[D3D12] Bindless textures/samplers
This commit is contained in:
parent
9f789e01b6
commit
40e335e2a9
|
@ -0,0 +1,30 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_BASE_HASH_H_
|
||||
#define XENIA_BASE_HASH_H_
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace xe {
|
||||
namespace hash {
|
||||
|
||||
// For use in unordered_sets and unordered_maps (primarily multisets and
|
||||
// multimaps, with manual collision resolution), where the hash is calculated
|
||||
// externally (for instance, as XXH64), possibly requiring context data rather
|
||||
// than a pure function to calculate the hash
|
||||
template <typename Key>
|
||||
struct IdentityHasher {
|
||||
size_t operator()(const Key& key) const { return static_cast<size_t>(key); }
|
||||
};
|
||||
|
||||
} // namespace hash
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_BASE_HASH_H_
|
File diff suppressed because it is too large
Load Diff
|
@ -29,6 +29,7 @@
|
|||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/kernel/kernel_state.h"
|
||||
#include "xenia/ui/d3d12/d3d12_context.h"
|
||||
#include "xenia/ui/d3d12/d3d12_util.h"
|
||||
#include "xenia/ui/d3d12/pools.h"
|
||||
|
||||
namespace xe {
|
||||
|
@ -53,8 +54,8 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
void RestoreEDRAMSnapshot(const void* snapshot) override;
|
||||
|
||||
// Needed by everything that owns transient objects.
|
||||
xe::ui::d3d12::D3D12Context* GetD3D12Context() const {
|
||||
return static_cast<xe::ui::d3d12::D3D12Context*>(context_.get());
|
||||
ui::d3d12::D3D12Context* GetD3D12Context() const {
|
||||
return static_cast<ui::d3d12::D3D12Context*>(context_.get());
|
||||
}
|
||||
|
||||
// Returns the deferred drawing command list for the currently open
|
||||
|
@ -95,18 +96,43 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
ui::d3d12::UploadBufferPool* GetConstantBufferPool() const {
|
||||
return constant_buffer_pool_.get();
|
||||
}
|
||||
// Request and automatically rebind descriptors on the draw command list.
|
||||
// Refer to DescriptorHeapPool::Request for partial/full update explanation.
|
||||
uint64_t RequestViewDescriptors(uint64_t previous_heap_index,
|
||||
uint32_t count_for_partial_update,
|
||||
uint32_t count_for_full_update,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out);
|
||||
uint64_t RequestSamplerDescriptors(
|
||||
uint64_t previous_heap_index, uint32_t count_for_partial_update,
|
||||
uint32_t count_for_full_update,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out);
|
||||
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE GetViewBindlessHeapCPUStart() const {
|
||||
assert_true(bindless_resources_used_);
|
||||
return view_bindless_heap_cpu_start_;
|
||||
}
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE GetViewBindlessHeapGPUStart() const {
|
||||
assert_true(bindless_resources_used_);
|
||||
return view_bindless_heap_gpu_start_;
|
||||
}
|
||||
// Returns UINT32_MAX if no free descriptors.
|
||||
uint32_t RequestPersistentViewBindlessDescriptor();
|
||||
void ReleaseViewBindlessDescriptorImmediately(uint32_t descriptor_index);
|
||||
// Request non-contiguous SRV/UAV descriptors for use only within the next
|
||||
// draw or dispatch command done for internal purposes. May change the current
|
||||
// descriptor heap.
|
||||
bool RequestOneUseSingleViewDescriptors(
|
||||
uint32_t count, ui::d3d12::util::DescriptorCPUGPUHandlePair* handles_out);
|
||||
// These are needed often, so they are always allocated.
|
||||
enum class SystemBindlessView : uint32_t {
|
||||
kNullTexture2DArray,
|
||||
kNullTexture3D,
|
||||
kNullTextureCube,
|
||||
|
||||
kSharedMemoryRawSRV,
|
||||
kSharedMemoryRawUAV,
|
||||
|
||||
kEDRAMR32UintUAV,
|
||||
kEDRAMRawSRV,
|
||||
kEDRAMRawUAV,
|
||||
|
||||
kGammaRampNormalSRV,
|
||||
kGammaRampPWLSRV,
|
||||
|
||||
kCount,
|
||||
};
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair GetSystemBindlessViewHandlePair(
|
||||
SystemBindlessView view) const;
|
||||
|
||||
// Returns a single temporary GPU-side buffer within a submission for tasks
|
||||
// like texture untiling and resolving.
|
||||
|
@ -148,6 +174,10 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
bool changing_viewport = true, bool changing_blend_factor = false,
|
||||
bool changing_stencil_ref = false);
|
||||
|
||||
// For the pipeline state cache to call when binding layout UIDs may be
|
||||
// reused.
|
||||
void NotifyShaderBindingsLayoutUIDsInvalidated();
|
||||
|
||||
// Returns the text to display in the GPU backend name in the window title.
|
||||
std::string GetWindowTitleText() const;
|
||||
|
||||
|
@ -180,36 +210,66 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
static constexpr uint32_t kQueueFrames = 3;
|
||||
|
||||
enum RootParameter : UINT {
|
||||
// Keep the size of the root signature at each stage 13 dwords or less
|
||||
// (better 12 or less) so it fits in user data on AMD. Descriptor tables are
|
||||
// 1 dword, root descriptors are 2 dwords (however, root descriptors require
|
||||
// less setup on the CPU - balance needs to be maintained).
|
||||
|
||||
// CBVs are set in both bindful and bindless cases via root descriptors.
|
||||
|
||||
// - Bindful resources - multiple root signatures depending on extra
|
||||
// parameters.
|
||||
|
||||
// These are always present.
|
||||
|
||||
// Very frequently changed, especially for UI draws, and for models drawn in
|
||||
// multiple parts - contains vertex and texture fetch constants.
|
||||
kRootParameter_FetchConstants,
|
||||
kRootParameter_Bindful_FetchConstants = 0, // +2 dwords = 2 in all.
|
||||
// Quite frequently changed (for one object drawn multiple times, for
|
||||
// instance - may contain projection matrices).
|
||||
kRootParameter_FloatConstantsVertex,
|
||||
kRootParameter_Bindful_FloatConstantsVertex, // +2 = 4 in VS.
|
||||
// Less frequently changed (per-material).
|
||||
kRootParameter_FloatConstantsPixel,
|
||||
// Rarely changed - system constants like viewport and alpha testing.
|
||||
kRootParameter_SystemConstants,
|
||||
kRootParameter_Bindful_FloatConstantsPixel, // +2 = 4 in PS.
|
||||
// May stay the same across many draws.
|
||||
kRootParameter_Bindful_SystemConstants, // +2 = 6 in all.
|
||||
// Pretty rarely used and rarely changed - flow control constants.
|
||||
kRootParameter_BoolLoopConstants,
|
||||
kRootParameter_Bindful_BoolLoopConstants, // +2 = 8 in all.
|
||||
// Never changed except for when starting a new descriptor heap - shared
|
||||
// memory byte address buffer, and, if ROV is used for EDRAM, EDRAM UAV.
|
||||
kRootParameter_SharedMemoryAndEDRAM,
|
||||
// memory byte address buffer, and, if ROV is used for EDRAM, EDRAM R32_UINT
|
||||
// UAV.
|
||||
// SRV/UAV descriptor table.
|
||||
kRootParameter_Bindful_SharedMemoryAndEDRAM, // +1 = 9 in all.
|
||||
|
||||
kRootParameter_Count_Base,
|
||||
kRootParameter_Bindful_Count_Base,
|
||||
|
||||
// Extra parameter that may or may not exist:
|
||||
// - Pixel textures (t1+).
|
||||
// - Pixel samplers (s0+).
|
||||
// - Vertex textures (t1+).
|
||||
// - Vertex samplers (s0+).
|
||||
// - Pixel textures (+1 = 10 in PS).
|
||||
// - Pixel samplers (+1 = 11 in PS).
|
||||
// - Vertex textures (+1 = 10 in VS).
|
||||
// - Vertex samplers (+1 = 11 in VS).
|
||||
|
||||
kRootParameter_Count_Max = kRootParameter_Count_Base + 4,
|
||||
kRootParameter_Bindful_Count_Max = kRootParameter_Bindful_Count_Base + 4,
|
||||
|
||||
// - Bindless resources - two global root signatures (for non-tessellated
|
||||
// and tessellated drawing), so these are always present.
|
||||
|
||||
kRootParameter_Bindless_FetchConstants = 0, // +2 = 2 in all.
|
||||
kRootParameter_Bindless_FloatConstantsVertex, // +2 = 4 in VS.
|
||||
kRootParameter_Bindless_FloatConstantsPixel, // +2 = 4 in PS.
|
||||
// Changed per-material, texture and sampler descriptor indices.
|
||||
kRootParameter_Bindless_DescriptorIndicesPixel, // +2 = 6 in PS.
|
||||
kRootParameter_Bindless_DescriptorIndicesVertex, // +2 = 6 in VS.
|
||||
kRootParameter_Bindless_SystemConstants, // +2 = 8 in all.
|
||||
kRootParameter_Bindless_BoolLoopConstants, // +2 = 10 in all.
|
||||
// Unbounded sampler descriptor table - changed in case of overflow.
|
||||
kRootParameter_Bindless_SamplerHeap, // +1 = 11 in all.
|
||||
// Unbounded SRV/UAV descriptor table - never changed.
|
||||
kRootParameter_Bindless_ViewHeap, // +1 = 12 in all.
|
||||
|
||||
kRootParameter_Bindless_Count,
|
||||
};
|
||||
|
||||
struct RootExtraParameterIndices {
|
||||
struct RootBindfulExtraParameterIndices {
|
||||
uint32_t textures_pixel;
|
||||
uint32_t samplers_pixel;
|
||||
uint32_t textures_vertex;
|
||||
|
@ -218,9 +278,9 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
};
|
||||
// Gets the indices of optional root parameters. Returns the total parameter
|
||||
// count.
|
||||
static uint32_t GetRootExtraParameterIndices(
|
||||
static uint32_t GetRootBindfulExtraParameterIndices(
|
||||
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader,
|
||||
RootExtraParameterIndices& indices_out);
|
||||
RootBindfulExtraParameterIndices& indices_out);
|
||||
|
||||
// BeginSubmission and EndSubmission may be called at any time. If there's an
|
||||
// open non-frame submission, BeginSubmission(true) will promote it to a
|
||||
|
@ -247,6 +307,20 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// Need to await submission completion before calling.
|
||||
void ClearCommandAllocatorCache();
|
||||
|
||||
// Request descriptors and automatically rebind the descriptor heap on the
|
||||
// draw command list. Refer to DescriptorHeapPool::Request for partial/full
|
||||
// update explanation. Doesn't work when bindless descriptors are used.
|
||||
uint64_t RequestViewBindfulDescriptors(
|
||||
uint64_t previous_heap_index, uint32_t count_for_partial_update,
|
||||
uint32_t count_for_full_update,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out);
|
||||
uint64_t RequestSamplerBindfulDescriptors(
|
||||
uint64_t previous_heap_index, uint32_t count_for_partial_update,
|
||||
uint32_t count_for_full_update,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out);
|
||||
|
||||
void UpdateFixedFunctionState(bool primitive_two_faced);
|
||||
void UpdateSystemConstantValues(
|
||||
bool shared_memory_is_uav, bool primitive_two_faced,
|
||||
|
@ -268,6 +342,8 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// synchronizing immediately after use. Always in COPY_DEST state.
|
||||
ID3D12Resource* RequestReadbackBuffer(uint32_t size);
|
||||
|
||||
void WriteGammaRampSRV(bool is_pwl, D3D12_CPU_DESCRIPTOR_HANDLE handle) const;
|
||||
|
||||
bool cache_clear_requested_ = false;
|
||||
|
||||
bool submission_open_ = false;
|
||||
|
@ -298,28 +374,89 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
ID3D12GraphicsCommandList1* command_list_1_ = nullptr;
|
||||
std::unique_ptr<DeferredCommandList> deferred_command_list_ = nullptr;
|
||||
|
||||
std::unique_ptr<SharedMemory> shared_memory_ = nullptr;
|
||||
|
||||
// Root signatures for different descriptor counts.
|
||||
std::unordered_map<uint32_t, ID3D12RootSignature*> root_signatures_;
|
||||
|
||||
std::unique_ptr<PipelineCache> pipeline_cache_ = nullptr;
|
||||
|
||||
// Should bindless textures and samplers be used - many times faster
|
||||
// UpdateBindings than bindful (that becomes a significant bottleneck with
|
||||
// bindful - mainly because of CopyDescriptorsSimple, which takes the majority
|
||||
// of UpdateBindings time, and that's outside the emulator's control even).
|
||||
bool bindless_resources_used_ = false;
|
||||
// Should a rasterizer-ordered UAV of the EDRAM buffer with format conversion
|
||||
// and blending performed in pixel shaders be used instead of host render
|
||||
// targets.
|
||||
bool edram_rov_used_ = false;
|
||||
|
||||
std::unique_ptr<ui::d3d12::UploadBufferPool> constant_buffer_pool_ = nullptr;
|
||||
|
||||
static constexpr uint32_t kViewBindfulHeapSize = 32768;
|
||||
static_assert(kViewBindfulHeapSize <=
|
||||
D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1);
|
||||
std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_bindful_heap_pool_ =
|
||||
nullptr;
|
||||
// Currently bound descriptor heap - updated by RequestViewBindfulDescriptors.
|
||||
ID3D12DescriptorHeap* view_bindful_heap_current_;
|
||||
// Rationale: textures have 4 KB alignment in guest memory, and there can be
|
||||
// 512 MB / 4 KB in total of them at most, and multiply by 3 for different
|
||||
// swizzles, signedness, and multiple host textures for one guest texture, and
|
||||
// transient descriptors. Though in reality there will be a lot fewer of
|
||||
// course, this is just a "safe" value. The limit is 1000000 for resource
|
||||
// binding tier 2.
|
||||
static constexpr uint32_t kViewBindlessHeapSize = 262144;
|
||||
static_assert(kViewBindlessHeapSize <=
|
||||
D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_2);
|
||||
ID3D12DescriptorHeap* view_bindless_heap_ = nullptr;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE view_bindless_heap_cpu_start_;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE view_bindless_heap_gpu_start_;
|
||||
uint32_t view_bindless_heap_allocated_ = 0;
|
||||
std::vector<uint32_t> view_bindless_heap_free_;
|
||||
// <Descriptor index, submission where requested>, sorted by the submission
|
||||
// number.
|
||||
std::deque<std::pair<uint32_t, uint64_t>> view_bindless_one_use_descriptors_;
|
||||
|
||||
// Direct3D 12 only allows shader-visible heaps with no more than 2048
|
||||
// samplers (due to Nvidia addressing). However, there's also possibly a weird
|
||||
// bug in the Nvidia driver (tested on 440.97 and earlier on Windows 10 1803)
|
||||
// that caused the sampler with index 2047 not to work if a heap with 8 or
|
||||
// less samplers also exists - in case of Xenia, it's the immediate drawer's
|
||||
// sampler heap.
|
||||
// FIXME(Triang3l): Investigate the issue with the sampler 2047 on Nvidia.
|
||||
static constexpr uint32_t kSamplerHeapSize = 2000;
|
||||
static_assert(kSamplerHeapSize <= D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE);
|
||||
std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_bindful_heap_pool_ =
|
||||
nullptr;
|
||||
ID3D12DescriptorHeap* sampler_bindful_heap_current_;
|
||||
ID3D12DescriptorHeap* sampler_bindless_heap_current_ = nullptr;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE sampler_bindless_heap_cpu_start_;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE sampler_bindless_heap_gpu_start_;
|
||||
// Currently the sampler heap is used only for texture cache samplers, so
|
||||
// individual samplers are never freed, and using a simple linear allocator
|
||||
// inside the current heap without a free list.
|
||||
uint32_t sampler_bindless_heap_allocated_ = 0;
|
||||
// <Heap, overflow submission number>, if total sampler count used so far
|
||||
// exceeds kSamplerHeapSize, and the heap has been switched (this is not a
|
||||
// totally impossible situation considering Direct3D 9 has sampler parameter
|
||||
// state instead of sampler objects, and having one "unimportant" parameter
|
||||
// changed may result in doubling of sampler count). Sorted by the submission
|
||||
// number (so checking if the first can be reused is enough).
|
||||
std::deque<std::pair<ID3D12DescriptorHeap*, uint64_t>>
|
||||
sampler_bindless_heaps_overflowed_;
|
||||
// TextureCache::SamplerParameters::value -> indices within the current
|
||||
// bindless sampler heap.
|
||||
std::unordered_map<uint32_t, uint32_t> texture_cache_bindless_sampler_map_;
|
||||
|
||||
// Root signatures for different descriptor counts.
|
||||
std::unordered_map<uint32_t, ID3D12RootSignature*> root_signatures_bindful_;
|
||||
ID3D12RootSignature* root_signature_bindless_vs_ = nullptr;
|
||||
ID3D12RootSignature* root_signature_bindless_ds_ = nullptr;
|
||||
|
||||
std::unique_ptr<SharedMemory> shared_memory_ = nullptr;
|
||||
|
||||
std::unique_ptr<PipelineCache> pipeline_cache_ = nullptr;
|
||||
|
||||
std::unique_ptr<TextureCache> texture_cache_ = nullptr;
|
||||
|
||||
std::unique_ptr<RenderTargetCache> render_target_cache_ = nullptr;
|
||||
|
||||
std::unique_ptr<PrimitiveConverter> primitive_converter_ = nullptr;
|
||||
|
||||
std::unique_ptr<ui::d3d12::UploadBufferPool> constant_buffer_pool_ = nullptr;
|
||||
std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_heap_pool_ = nullptr;
|
||||
std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_heap_pool_ = nullptr;
|
||||
|
||||
// Mip 0 contains the normal gamma ramp (256 entries), mip 1 contains the PWL
|
||||
// ramp (128 entries). DXGI_FORMAT_R10G10B10A2_UNORM 1D.
|
||||
ID3D12Resource* gamma_ramp_texture_ = nullptr;
|
||||
|
@ -348,11 +485,8 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// Unsubmitted barrier batch.
|
||||
std::vector<D3D12_RESOURCE_BARRIER> barriers_;
|
||||
|
||||
struct BufferForDeletion {
|
||||
ID3D12Resource* buffer;
|
||||
uint64_t last_usage_submission;
|
||||
};
|
||||
std::deque<BufferForDeletion> buffers_for_deletion_;
|
||||
// <Resource, submission where requested>, sorted by the submission number.
|
||||
std::deque<std::pair<ID3D12Resource*, uint64_t>> buffers_for_deletion_;
|
||||
|
||||
static constexpr uint32_t kScratchBufferSizeIncrement = 16 * 1024 * 1024;
|
||||
ID3D12Resource* scratch_buffer_ = nullptr;
|
||||
|
@ -390,18 +524,12 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// Currently bound graphics root signature.
|
||||
ID3D12RootSignature* current_graphics_root_signature_;
|
||||
// Extra parameters which may or may not be present.
|
||||
RootExtraParameterIndices current_graphics_root_extras_;
|
||||
RootBindfulExtraParameterIndices current_graphics_root_bindful_extras_;
|
||||
// Whether root parameters are up to date - reset if a new signature is bound.
|
||||
uint32_t current_graphics_root_up_to_date_;
|
||||
|
||||
// Currently bound descriptor heaps - update by RequestViewDescriptors and
|
||||
// RequestSamplerDescriptors.
|
||||
ID3D12DescriptorHeap* current_view_heap_;
|
||||
ID3D12DescriptorHeap* current_sampler_heap_;
|
||||
|
||||
// System shader constants.
|
||||
DxbcShaderTranslator::SystemConstants system_constants_;
|
||||
ColorRenderTargetFormat system_constants_color_formats_[4];
|
||||
|
||||
// Float constant usage masks of the last draw call.
|
||||
uint64_t current_float_constant_map_vertex_[4];
|
||||
|
@ -409,45 +537,48 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
|
||||
// Constant buffer bindings.
|
||||
struct ConstantBufferBinding {
|
||||
D3D12_GPU_VIRTUAL_ADDRESS buffer_address;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS address;
|
||||
bool up_to_date;
|
||||
};
|
||||
ConstantBufferBinding cbuffer_bindings_system_;
|
||||
ConstantBufferBinding cbuffer_bindings_float_vertex_;
|
||||
ConstantBufferBinding cbuffer_bindings_float_pixel_;
|
||||
ConstantBufferBinding cbuffer_bindings_bool_loop_;
|
||||
ConstantBufferBinding cbuffer_bindings_fetch_;
|
||||
ConstantBufferBinding cbuffer_binding_system_;
|
||||
ConstantBufferBinding cbuffer_binding_float_vertex_;
|
||||
ConstantBufferBinding cbuffer_binding_float_pixel_;
|
||||
ConstantBufferBinding cbuffer_binding_bool_loop_;
|
||||
ConstantBufferBinding cbuffer_binding_fetch_;
|
||||
ConstantBufferBinding cbuffer_binding_descriptor_indices_vertex_;
|
||||
ConstantBufferBinding cbuffer_binding_descriptor_indices_pixel_;
|
||||
|
||||
// Pages with the descriptors currently used for handling Xenos draw calls.
|
||||
uint64_t draw_view_heap_index_;
|
||||
uint64_t draw_sampler_heap_index_;
|
||||
uint64_t draw_view_bindful_heap_index_;
|
||||
uint64_t draw_sampler_bindful_heap_index_;
|
||||
|
||||
// Whether the last used texture bindings have been written to the current
|
||||
// view descriptor heap.
|
||||
bool texture_bindings_written_vertex_;
|
||||
bool texture_bindings_written_pixel_;
|
||||
// Hashes of the last texture bindings written to the current view descriptor
|
||||
// heap with the last used descriptor layout. Valid only when the
|
||||
// corresponding "written" variables are true.
|
||||
uint64_t current_texture_bindings_hash_vertex_;
|
||||
uint64_t current_texture_bindings_hash_pixel_;
|
||||
// Whether the last used texture sampler bindings have been written to the
|
||||
// current view descriptor heap.
|
||||
bool bindful_textures_written_vertex_;
|
||||
bool bindful_textures_written_pixel_;
|
||||
bool bindful_samplers_written_vertex_;
|
||||
bool bindful_samplers_written_pixel_;
|
||||
// Layout UIDs and last texture and sampler bindings written to the current
|
||||
// descriptor heaps (for bindful) or descriptor index constant buffer (for
|
||||
// bindless) with the last used descriptor layout. Valid only when:
|
||||
// - For bindful, when bindful_#_written_#_ is true.
|
||||
// - For bindless, when cbuffer_binding_descriptor_indices_#_.up_to_date is
|
||||
// true.
|
||||
size_t current_texture_layout_uid_vertex_;
|
||||
size_t current_texture_layout_uid_pixel_;
|
||||
size_t current_sampler_layout_uid_vertex_;
|
||||
size_t current_sampler_layout_uid_pixel_;
|
||||
// Size of these should be ignored when checking whether these are up to date,
|
||||
// layout UID should be checked first (they will be different for different
|
||||
// binding counts).
|
||||
std::vector<TextureCache::TextureSRVKey> current_texture_srv_keys_vertex_;
|
||||
std::vector<TextureCache::TextureSRVKey> current_texture_srv_keys_pixel_;
|
||||
std::vector<TextureCache::SamplerParameters> current_samplers_vertex_;
|
||||
std::vector<TextureCache::SamplerParameters> current_samplers_pixel_;
|
||||
std::vector<uint32_t> current_sampler_bindless_indices_vertex_;
|
||||
std::vector<uint32_t> current_sampler_bindless_indices_pixel_;
|
||||
|
||||
// Whether the last used samplers have been written to the current sampler
|
||||
// descriptor heap.
|
||||
bool samplers_written_vertex_;
|
||||
bool samplers_written_pixel_;
|
||||
// Hashes of the last sampler parameters written to the current sampler
|
||||
// descriptor heap with the last used descriptor layout. Valid only when the
|
||||
// corresponding "written" variables are true.
|
||||
uint64_t current_samplers_hash_vertex_;
|
||||
uint64_t current_samplers_hash_pixel_;
|
||||
|
||||
// Latest descriptor handles used for handling Xenos draw calls.
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_system_constants_;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_float_constants_vertex_;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_float_constants_pixel_;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_bool_loop_constants_;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_fetch_constants_;
|
||||
// Latest bindful descriptor handles used for handling Xenos draw calls.
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_and_edram_;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_vertex_;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_pixel_;
|
||||
|
|
|
@ -9,6 +9,8 @@
|
|||
|
||||
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
|
@ -18,8 +20,8 @@ namespace xe {
|
|||
namespace gpu {
|
||||
namespace d3d12 {
|
||||
|
||||
constexpr uint32_t D3D12Shader::kMaxTextureSRVIndexBits;
|
||||
constexpr uint32_t D3D12Shader::kMaxTextureSRVs;
|
||||
constexpr uint32_t D3D12Shader::kMaxTextureBindingIndexBits;
|
||||
constexpr uint32_t D3D12Shader::kMaxTextureBindings;
|
||||
constexpr uint32_t D3D12Shader::kMaxSamplerBindingIndexBits;
|
||||
constexpr uint32_t D3D12Shader::kMaxSamplerBindings;
|
||||
|
||||
|
@ -28,34 +30,40 @@ D3D12Shader::D3D12Shader(ShaderType shader_type, uint64_t data_hash,
|
|||
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}
|
||||
|
||||
void D3D12Shader::SetTexturesAndSamplers(
|
||||
const DxbcShaderTranslator::TextureSRV* texture_srvs,
|
||||
uint32_t texture_srv_count,
|
||||
const DxbcShaderTranslator::TextureBinding* texture_bindings,
|
||||
uint32_t texture_binding_count,
|
||||
const DxbcShaderTranslator::SamplerBinding* sampler_bindings,
|
||||
uint32_t sampler_binding_count) {
|
||||
texture_srvs_.clear();
|
||||
texture_srvs_.reserve(texture_srv_count);
|
||||
texture_bindings_.clear();
|
||||
texture_bindings_.reserve(texture_binding_count);
|
||||
used_texture_mask_ = 0;
|
||||
for (uint32_t i = 0; i < texture_srv_count; ++i) {
|
||||
TextureSRV srv;
|
||||
const DxbcShaderTranslator::TextureSRV& translator_srv = texture_srvs[i];
|
||||
srv.fetch_constant = translator_srv.fetch_constant;
|
||||
srv.dimension = translator_srv.dimension;
|
||||
srv.is_signed = translator_srv.is_signed;
|
||||
texture_srvs_.push_back(srv);
|
||||
used_texture_mask_ |= 1u << translator_srv.fetch_constant;
|
||||
for (uint32_t i = 0; i < texture_binding_count; ++i) {
|
||||
TextureBinding& binding = texture_bindings_.emplace_back();
|
||||
// For a stable hash.
|
||||
std::memset(&binding, 0, sizeof(binding));
|
||||
const DxbcShaderTranslator::TextureBinding& translator_binding =
|
||||
texture_bindings[i];
|
||||
binding.bindless_descriptor_index =
|
||||
translator_binding.bindless_descriptor_index;
|
||||
binding.fetch_constant = translator_binding.fetch_constant;
|
||||
binding.dimension = translator_binding.dimension;
|
||||
binding.is_signed = translator_binding.is_signed;
|
||||
used_texture_mask_ |= 1u << translator_binding.fetch_constant;
|
||||
}
|
||||
sampler_bindings_.clear();
|
||||
sampler_bindings_.reserve(sampler_binding_count);
|
||||
for (uint32_t i = 0; i < sampler_binding_count; ++i) {
|
||||
SamplerBinding sampler;
|
||||
const DxbcShaderTranslator::SamplerBinding& translator_sampler =
|
||||
SamplerBinding binding;
|
||||
const DxbcShaderTranslator::SamplerBinding& translator_binding =
|
||||
sampler_bindings[i];
|
||||
sampler.fetch_constant = translator_sampler.fetch_constant;
|
||||
sampler.mag_filter = translator_sampler.mag_filter;
|
||||
sampler.min_filter = translator_sampler.min_filter;
|
||||
sampler.mip_filter = translator_sampler.mip_filter;
|
||||
sampler.aniso_filter = translator_sampler.aniso_filter;
|
||||
sampler_bindings_.push_back(sampler);
|
||||
binding.bindless_descriptor_index =
|
||||
translator_binding.bindless_descriptor_index;
|
||||
binding.fetch_constant = translator_binding.fetch_constant;
|
||||
binding.mag_filter = translator_binding.mag_filter;
|
||||
binding.min_filter = translator_binding.min_filter;
|
||||
binding.mip_filter = translator_binding.mip_filter;
|
||||
binding.aniso_filter = translator_binding.aniso_filter;
|
||||
sampler_bindings_.push_back(binding);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -26,8 +26,8 @@ class D3D12Shader : public Shader {
|
|||
const uint32_t* dword_ptr, uint32_t dword_count);
|
||||
|
||||
void SetTexturesAndSamplers(
|
||||
const DxbcShaderTranslator::TextureSRV* texture_srvs,
|
||||
uint32_t texture_srv_count,
|
||||
const DxbcShaderTranslator::TextureBinding* texture_bindings,
|
||||
uint32_t texture_binding_count,
|
||||
const DxbcShaderTranslator::SamplerBinding* sampler_bindings,
|
||||
uint32_t sampler_binding_count);
|
||||
|
||||
|
@ -44,18 +44,22 @@ class D3D12Shader : public Shader {
|
|||
|
||||
bool DisassembleDxbc(const ui::d3d12::D3D12Provider* provider);
|
||||
|
||||
static constexpr uint32_t kMaxTextureSRVIndexBits =
|
||||
DxbcShaderTranslator::kMaxTextureSRVIndexBits;
|
||||
static constexpr uint32_t kMaxTextureSRVs =
|
||||
DxbcShaderTranslator::kMaxTextureSRVs;
|
||||
struct TextureSRV {
|
||||
static constexpr uint32_t kMaxTextureBindingIndexBits =
|
||||
DxbcShaderTranslator::kMaxTextureBindingIndexBits;
|
||||
static constexpr uint32_t kMaxTextureBindings =
|
||||
DxbcShaderTranslator::kMaxTextureBindings;
|
||||
struct TextureBinding {
|
||||
uint32_t bindless_descriptor_index;
|
||||
uint32_t fetch_constant;
|
||||
// Stacked and 3D are separate TextureBindings, even for bindless for null
|
||||
// descriptor handling simplicity.
|
||||
TextureDimension dimension;
|
||||
bool is_signed;
|
||||
};
|
||||
const TextureSRV* GetTextureSRVs(uint32_t& count_out) const {
|
||||
count_out = uint32_t(texture_srvs_.size());
|
||||
return texture_srvs_.data();
|
||||
// Safe to hash and compare with memcmp for layout hashing.
|
||||
const TextureBinding* GetTextureBindings(uint32_t& count_out) const {
|
||||
count_out = uint32_t(texture_bindings_.size());
|
||||
return texture_bindings_.data();
|
||||
}
|
||||
const uint32_t GetUsedTextureMask() const { return used_texture_mask_; }
|
||||
|
||||
|
@ -64,6 +68,7 @@ class D3D12Shader : public Shader {
|
|||
static constexpr uint32_t kMaxSamplerBindings =
|
||||
DxbcShaderTranslator::kMaxSamplerBindings;
|
||||
struct SamplerBinding {
|
||||
uint32_t bindless_descriptor_index;
|
||||
uint32_t fetch_constant;
|
||||
TextureFilter mag_filter;
|
||||
TextureFilter min_filter;
|
||||
|
@ -75,10 +80,29 @@ class D3D12Shader : public Shader {
|
|||
return sampler_bindings_.data();
|
||||
}
|
||||
|
||||
// For owning subsystems like the pipeline state cache, accessors for unique
|
||||
// identifiers (used instead of hashes to make sure collisions can't happen)
|
||||
// of binding layouts used by the shader, for invalidation if a shader with an
|
||||
// incompatible layout was bound.
|
||||
size_t GetTextureBindingLayoutUserUID() const {
|
||||
return texture_binding_layout_user_uid_;
|
||||
}
|
||||
void SetTextureBindingLayoutUserUID(size_t uid) {
|
||||
texture_binding_layout_user_uid_ = uid;
|
||||
}
|
||||
size_t GetSamplerBindingLayoutUserUID() const {
|
||||
return sampler_binding_layout_user_uid_;
|
||||
}
|
||||
void SetSamplerBindingLayoutUserUID(size_t uid) {
|
||||
sampler_binding_layout_user_uid_ = uid;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<TextureSRV> texture_srvs_;
|
||||
uint32_t used_texture_mask_ = 0;
|
||||
std::vector<TextureBinding> texture_bindings_;
|
||||
std::vector<SamplerBinding> sampler_bindings_;
|
||||
size_t texture_binding_layout_user_uid_ = 0;
|
||||
size_t sampler_binding_layout_user_uid_ = 0;
|
||||
uint32_t used_texture_mask_ = 0;
|
||||
|
||||
std::vector<uint8_t> forced_early_z_shader_;
|
||||
};
|
||||
|
|
|
@ -61,19 +61,22 @@ namespace d3d12 {
|
|||
#include "xenia/gpu/d3d12/shaders/dxbc/primitive_rectangle_list_gs.h"
|
||||
#include "xenia/gpu/d3d12/shaders/dxbc/tessellation_vs.h"
|
||||
|
||||
constexpr size_t PipelineCache::kLayoutUIDEmpty;
|
||||
constexpr uint32_t PipelineCache::PipelineDescription::kVersion;
|
||||
|
||||
PipelineCache::PipelineCache(D3D12CommandProcessor* command_processor,
|
||||
RegisterFile* register_file, bool edram_rov_used,
|
||||
RegisterFile* register_file,
|
||||
bool bindless_resources_used, bool edram_rov_used,
|
||||
uint32_t resolution_scale)
|
||||
: command_processor_(command_processor),
|
||||
register_file_(register_file),
|
||||
bindless_resources_used_(bindless_resources_used),
|
||||
edram_rov_used_(edram_rov_used),
|
||||
resolution_scale_(resolution_scale) {
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
|
||||
shader_translator_ = std::make_unique<DxbcShaderTranslator>(
|
||||
provider->GetAdapterVendorID(), edram_rov_used_,
|
||||
provider->GetAdapterVendorID(), bindless_resources_used_, edram_rov_used_,
|
||||
provider->GetGraphicsAnalysis() != nullptr);
|
||||
|
||||
if (edram_rov_used_) {
|
||||
|
@ -178,6 +181,13 @@ void PipelineCache::ClearCache(bool shutting_down) {
|
|||
COUNT_profile_set("gpu/pipeline_cache/pipeline_states", 0);
|
||||
|
||||
// Destroy all shaders.
|
||||
command_processor_->NotifyShaderBindingsLayoutUIDsInvalidated();
|
||||
if (bindless_resources_used_) {
|
||||
bindless_sampler_layout_map_.clear();
|
||||
bindless_sampler_layouts_.clear();
|
||||
}
|
||||
texture_binding_layout_map_.clear();
|
||||
texture_binding_layouts_.clear();
|
||||
for (auto it : shader_map_) {
|
||||
delete it.second;
|
||||
}
|
||||
|
@ -264,8 +274,8 @@ void PipelineCache::InitializeShaderStorage(
|
|||
auto shader_translation_thread_function = [&]() {
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
DxbcShaderTranslator translator(
|
||||
provider->GetAdapterVendorID(), edram_rov_used_,
|
||||
provider->GetGraphicsAnalysis() != nullptr);
|
||||
provider->GetAdapterVendorID(), bindless_resources_used_,
|
||||
edram_rov_used_, provider->GetGraphicsAnalysis() != nullptr);
|
||||
for (;;) {
|
||||
std::pair<ShaderStoredHeader, D3D12Shader*> shader_to_translate;
|
||||
for (;;) {
|
||||
|
@ -287,11 +297,11 @@ void PipelineCache::InitializeShaderStorage(
|
|||
translator, shader_to_translate.second,
|
||||
shader_to_translate.first.sq_program_cntl,
|
||||
shader_to_translate.first.host_vertex_shader_type)) {
|
||||
std::unique_lock<std::mutex> lock(shaders_failed_to_translate_mutex);
|
||||
std::lock_guard<std::mutex> lock(shaders_failed_to_translate_mutex);
|
||||
shaders_failed_to_translate.push_back(shader_to_translate.second);
|
||||
}
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(shaders_translation_thread_mutex);
|
||||
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
|
||||
--shader_translation_threads_busy;
|
||||
}
|
||||
}
|
||||
|
@ -340,7 +350,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
// one.
|
||||
size_t shader_translation_threads_needed;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(shaders_translation_thread_mutex);
|
||||
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
|
||||
shader_translation_threads_needed =
|
||||
std::min(shader_translation_threads_busy +
|
||||
shaders_to_translate.size() + size_t(1),
|
||||
|
@ -353,7 +363,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
shader_translation_threads.back()->set_name("Shader Translation");
|
||||
}
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(shaders_translation_thread_mutex);
|
||||
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
|
||||
shaders_to_translate.emplace_back(shader_header, shader);
|
||||
}
|
||||
shaders_translation_thread_cond.notify_one();
|
||||
|
@ -362,7 +372,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
}
|
||||
if (!shader_translation_threads.empty()) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(shaders_translation_thread_mutex);
|
||||
std::lock_guard<std::mutex> lock(shaders_translation_thread_mutex);
|
||||
shader_translation_threads_shutdown = true;
|
||||
}
|
||||
shaders_translation_thread_cond.notify_all();
|
||||
|
@ -662,7 +672,7 @@ void PipelineCache::EndSubmission() {
|
|||
if (shader_storage_file_flush_needed_ ||
|
||||
pipeline_state_storage_file_flush_needed_) {
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(storage_write_request_lock_);
|
||||
std::lock_guard<std::mutex> lock(storage_write_request_lock_);
|
||||
if (shader_storage_file_flush_needed_) {
|
||||
storage_write_flush_shaders_ = true;
|
||||
}
|
||||
|
@ -955,47 +965,165 @@ bool PipelineCache::TranslateShader(
|
|||
return false;
|
||||
}
|
||||
|
||||
uint32_t texture_srv_count;
|
||||
const DxbcShaderTranslator::TextureSRV* texture_srvs =
|
||||
translator.GetTextureSRVs(texture_srv_count);
|
||||
const char* host_shader_type;
|
||||
if (shader->type() == ShaderType::kVertex) {
|
||||
switch (shader->host_vertex_shader_type()) {
|
||||
case Shader::HostVertexShaderType::kLineDomainCPIndexed:
|
||||
host_shader_type = "control-point-indexed line domain";
|
||||
break;
|
||||
case Shader::HostVertexShaderType::kLineDomainPatchIndexed:
|
||||
host_shader_type = "patch-indexed line domain";
|
||||
break;
|
||||
case Shader::HostVertexShaderType::kTriangleDomainCPIndexed:
|
||||
host_shader_type = "control-point-indexed triangle domain";
|
||||
break;
|
||||
case Shader::HostVertexShaderType::kTriangleDomainPatchIndexed:
|
||||
host_shader_type = "patch-indexed triangle domain";
|
||||
break;
|
||||
case Shader::HostVertexShaderType::kQuadDomainCPIndexed:
|
||||
host_shader_type = "control-point-indexed quad domain";
|
||||
break;
|
||||
case Shader::HostVertexShaderType::kQuadDomainPatchIndexed:
|
||||
host_shader_type = "patch-indexed quad domain";
|
||||
break;
|
||||
default:
|
||||
host_shader_type = "vertex";
|
||||
}
|
||||
} else {
|
||||
host_shader_type = "pixel";
|
||||
}
|
||||
XELOGGPU("Generated {} shader ({}b) - hash {:016X}:\n{}\n", host_shader_type,
|
||||
shader->ucode_dword_count() * 4, shader->ucode_data_hash(),
|
||||
shader->ucode_disassembly().c_str());
|
||||
|
||||
// Set up texture and sampler bindings.
|
||||
uint32_t texture_binding_count;
|
||||
const DxbcShaderTranslator::TextureBinding* translator_texture_bindings =
|
||||
translator.GetTextureBindings(texture_binding_count);
|
||||
uint32_t sampler_binding_count;
|
||||
const DxbcShaderTranslator::SamplerBinding* sampler_bindings =
|
||||
translator.GetSamplerBindings(sampler_binding_count);
|
||||
shader->SetTexturesAndSamplers(texture_srvs, texture_srv_count,
|
||||
sampler_bindings, sampler_binding_count);
|
||||
|
||||
if (shader->is_valid()) {
|
||||
const char* host_shader_type;
|
||||
if (shader->type() == ShaderType::kVertex) {
|
||||
switch (shader->host_vertex_shader_type()) {
|
||||
case Shader::HostVertexShaderType::kLineDomainCPIndexed:
|
||||
host_shader_type = "control-point-indexed line domain";
|
||||
break;
|
||||
case Shader::HostVertexShaderType::kLineDomainPatchIndexed:
|
||||
host_shader_type = "patch-indexed line domain";
|
||||
break;
|
||||
case Shader::HostVertexShaderType::kTriangleDomainCPIndexed:
|
||||
host_shader_type = "control-point-indexed triangle domain";
|
||||
break;
|
||||
case Shader::HostVertexShaderType::kTriangleDomainPatchIndexed:
|
||||
host_shader_type = "patch-indexed triangle domain";
|
||||
break;
|
||||
case Shader::HostVertexShaderType::kQuadDomainCPIndexed:
|
||||
host_shader_type = "control-point-indexed quad domain";
|
||||
break;
|
||||
case Shader::HostVertexShaderType::kQuadDomainPatchIndexed:
|
||||
host_shader_type = "patch-indexed quad domain";
|
||||
break;
|
||||
default:
|
||||
host_shader_type = "vertex";
|
||||
}
|
||||
} else {
|
||||
host_shader_type = "pixel";
|
||||
}
|
||||
XELOGGPU("Generated {} shader ({}b) - hash {:016X}:\n{}\n",
|
||||
host_shader_type, shader->ucode_dword_count() * 4,
|
||||
shader->ucode_data_hash(), shader->ucode_disassembly().c_str());
|
||||
shader->SetTexturesAndSamplers(translator_texture_bindings,
|
||||
texture_binding_count, sampler_bindings,
|
||||
sampler_binding_count);
|
||||
assert_false(bindless_resources_used_ &&
|
||||
texture_binding_count + sampler_binding_count >
|
||||
D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 4);
|
||||
// Get hashable texture bindings, without translator-specific info.
|
||||
const D3D12Shader::TextureBinding* texture_bindings =
|
||||
shader->GetTextureBindings(texture_binding_count);
|
||||
size_t texture_binding_layout_bytes =
|
||||
texture_binding_count * sizeof(*texture_bindings);
|
||||
uint64_t texture_binding_layout_hash = 0;
|
||||
if (texture_binding_count) {
|
||||
texture_binding_layout_hash =
|
||||
XXH64(texture_bindings, texture_binding_layout_bytes, 0);
|
||||
}
|
||||
uint32_t bindless_sampler_count =
|
||||
bindless_resources_used_ ? sampler_binding_count : 0;
|
||||
uint64_t bindless_sampler_layout_hash = 0;
|
||||
if (bindless_sampler_count) {
|
||||
XXH64_state_t hash_state;
|
||||
XXH64_reset(&hash_state, 0);
|
||||
for (uint32_t i = 0; i < bindless_sampler_count; ++i) {
|
||||
XXH64_update(&hash_state, &sampler_bindings[i].bindless_descriptor_index,
|
||||
sizeof(sampler_bindings[i].bindless_descriptor_index));
|
||||
}
|
||||
bindless_sampler_layout_hash = XXH64_digest(&hash_state);
|
||||
}
|
||||
// Obtain the unique IDs of binding layouts if there are any texture bindings
|
||||
// or bindless samplers, for invalidation in the command processor.
|
||||
size_t texture_binding_layout_uid = kLayoutUIDEmpty;
|
||||
// Use sampler count for the bindful case because it's the only thing that
|
||||
// must be the same for layouts to be compatible in this case
|
||||
// (instruction-specified parameters are used as overrides for actual
|
||||
// samplers).
|
||||
static_assert(
|
||||
kLayoutUIDEmpty == 0,
|
||||
"Empty layout UID is assumed to be 0 because for bindful samplers, the "
|
||||
"UID is their count");
|
||||
size_t sampler_binding_layout_uid = bindless_resources_used_
|
||||
? kLayoutUIDEmpty
|
||||
: size_t(sampler_binding_count);
|
||||
if (texture_binding_count || bindless_sampler_count) {
|
||||
std::lock_guard<std::mutex> layouts_mutex_(layouts_mutex_);
|
||||
if (texture_binding_count) {
|
||||
auto found_range =
|
||||
texture_binding_layout_map_.equal_range(texture_binding_layout_hash);
|
||||
for (auto it = found_range.first; it != found_range.second; ++it) {
|
||||
if (it->second.vector_span_length == texture_binding_count &&
|
||||
!std::memcmp(
|
||||
texture_binding_layouts_.data() + it->second.vector_span_offset,
|
||||
texture_bindings, texture_binding_layout_bytes)) {
|
||||
texture_binding_layout_uid = it->second.uid;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (texture_binding_layout_uid == kLayoutUIDEmpty) {
|
||||
static_assert(
|
||||
kLayoutUIDEmpty == 0,
|
||||
"Layout UID is size + 1 because it's assumed that 0 is the UID for "
|
||||
"an empty layout");
|
||||
texture_binding_layout_uid = texture_binding_layout_map_.size() + 1;
|
||||
LayoutUID new_uid;
|
||||
new_uid.uid = texture_binding_layout_uid;
|
||||
new_uid.vector_span_offset = texture_binding_layouts_.size();
|
||||
new_uid.vector_span_length = texture_binding_count;
|
||||
texture_binding_layouts_.resize(new_uid.vector_span_offset +
|
||||
texture_binding_count);
|
||||
std::memcpy(
|
||||
texture_binding_layouts_.data() + new_uid.vector_span_offset,
|
||||
texture_bindings, texture_binding_layout_bytes);
|
||||
texture_binding_layout_map_.insert(
|
||||
{texture_binding_layout_hash, new_uid});
|
||||
}
|
||||
}
|
||||
if (bindless_sampler_count) {
|
||||
auto found_range =
|
||||
bindless_sampler_layout_map_.equal_range(sampler_binding_layout_uid);
|
||||
for (auto it = found_range.first; it != found_range.second; ++it) {
|
||||
if (it->second.vector_span_length != bindless_sampler_count) {
|
||||
continue;
|
||||
}
|
||||
sampler_binding_layout_uid = it->second.uid;
|
||||
const uint32_t* vector_bindless_sampler_layout =
|
||||
bindless_sampler_layouts_.data() + it->second.vector_span_offset;
|
||||
for (uint32_t i = 0; i < bindless_sampler_count; ++i) {
|
||||
if (vector_bindless_sampler_layout[i] !=
|
||||
sampler_bindings[i].bindless_descriptor_index) {
|
||||
sampler_binding_layout_uid = kLayoutUIDEmpty;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (sampler_binding_layout_uid != kLayoutUIDEmpty) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (sampler_binding_layout_uid == kLayoutUIDEmpty) {
|
||||
sampler_binding_layout_uid = bindless_sampler_layout_map_.size();
|
||||
LayoutUID new_uid;
|
||||
static_assert(
|
||||
kLayoutUIDEmpty == 0,
|
||||
"Layout UID is size + 1 because it's assumed that 0 is the UID for "
|
||||
"an empty layout");
|
||||
new_uid.uid = sampler_binding_layout_uid + 1;
|
||||
new_uid.vector_span_offset = bindless_sampler_layouts_.size();
|
||||
new_uid.vector_span_length = sampler_binding_count;
|
||||
bindless_sampler_layouts_.resize(new_uid.vector_span_offset +
|
||||
sampler_binding_count);
|
||||
uint32_t* vector_bindless_sampler_layout =
|
||||
bindless_sampler_layouts_.data() + new_uid.vector_span_offset;
|
||||
for (uint32_t i = 0; i < bindless_sampler_count; ++i) {
|
||||
vector_bindless_sampler_layout[i] =
|
||||
sampler_bindings[i].bindless_descriptor_index;
|
||||
}
|
||||
bindless_sampler_layout_map_.insert(
|
||||
{bindless_sampler_layout_hash, new_uid});
|
||||
}
|
||||
}
|
||||
}
|
||||
shader->SetTextureBindingLayoutUserUID(texture_binding_layout_uid);
|
||||
shader->SetSamplerBindingLayoutUserUID(sampler_binding_layout_uid);
|
||||
|
||||
// Create a version of the shader with early depth/stencil forced by Xenia
|
||||
// itself when it's safe to do so or when EARLY_Z_ENABLE is set in
|
||||
|
@ -1856,7 +1984,7 @@ void PipelineCache::CreationThread(size_t thread_index) {
|
|||
// set the completion event if needed (at the next iteration, or in some
|
||||
// other thread).
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(creation_request_lock_);
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
--creation_threads_busy_;
|
||||
}
|
||||
}
|
||||
|
@ -1867,7 +1995,7 @@ void PipelineCache::CreateQueuedPipelineStatesOnProcessorThread() {
|
|||
while (true) {
|
||||
PipelineState* pipeline_state_to_create;
|
||||
{
|
||||
std::unique_lock<std::mutex> lock(creation_request_lock_);
|
||||
std::lock_guard<std::mutex> lock(creation_request_lock_);
|
||||
if (creation_queue_.empty()) {
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/hash.h"
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/base/threading.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||
|
@ -37,9 +38,11 @@ class D3D12CommandProcessor;
|
|||
|
||||
class PipelineCache {
|
||||
public:
|
||||
static constexpr size_t kLayoutUIDEmpty = 0;
|
||||
|
||||
PipelineCache(D3D12CommandProcessor* command_processor,
|
||||
RegisterFile* register_file, bool edram_rov_used,
|
||||
uint32_t resolution_scale);
|
||||
RegisterFile* register_file, bool bindless_resources_used,
|
||||
bool edram_rov_used, uint32_t resolution_scale);
|
||||
~PipelineCache();
|
||||
|
||||
bool Initialize();
|
||||
|
@ -217,6 +220,7 @@ class PipelineCache {
|
|||
PipelineDescription description;
|
||||
};
|
||||
|
||||
// Can be called from multiple threads.
|
||||
bool TranslateShader(DxbcShaderTranslator& translator, D3D12Shader* shader,
|
||||
reg::SQ_PROGRAM_CNTL cntl,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type =
|
||||
|
@ -233,13 +237,37 @@ class PipelineCache {
|
|||
|
||||
D3D12CommandProcessor* command_processor_;
|
||||
RegisterFile* register_file_;
|
||||
bool bindless_resources_used_;
|
||||
bool edram_rov_used_;
|
||||
uint32_t resolution_scale_;
|
||||
|
||||
// Reusable shader translator.
|
||||
std::unique_ptr<DxbcShaderTranslator> shader_translator_ = nullptr;
|
||||
// All loaded shaders mapped by their guest hash key.
|
||||
std::unordered_map<uint64_t, D3D12Shader*> shader_map_;
|
||||
std::unordered_map<uint64_t, D3D12Shader*, xe::hash::IdentityHasher<uint64_t>>
|
||||
shader_map_;
|
||||
|
||||
struct LayoutUID {
|
||||
size_t uid;
|
||||
size_t vector_span_offset;
|
||||
size_t vector_span_length;
|
||||
};
|
||||
std::mutex layouts_mutex_;
|
||||
// Texture binding layouts of different shaders, for obtaining layout UIDs.
|
||||
std::vector<D3D12Shader::TextureBinding> texture_binding_layouts_;
|
||||
// Map of texture binding layouts used by shaders, for obtaining UIDs. Keys
|
||||
// are XXH64 hashes of layouts, values need manual collision resolution using
|
||||
// layout_vector_offset:layout_length of texture_binding_layouts_.
|
||||
std::unordered_multimap<uint64_t, LayoutUID,
|
||||
xe::hash::IdentityHasher<uint64_t>>
|
||||
texture_binding_layout_map_;
|
||||
// Bindless sampler indices of different shaders, for obtaining layout UIDs.
|
||||
// For bindful, sampler count is used as the UID instead.
|
||||
std::vector<uint32_t> bindless_sampler_layouts_;
|
||||
// Keys are XXH64 hashes of used bindless sampler indices.
|
||||
std::unordered_multimap<uint64_t, LayoutUID,
|
||||
xe::hash::IdentityHasher<uint64_t>>
|
||||
bindless_sampler_layout_map_;
|
||||
|
||||
// Empty depth-only pixel shader for writing to depth buffer via ROV when no
|
||||
// Xenos pixel shader provided.
|
||||
|
@ -252,7 +280,9 @@ class PipelineCache {
|
|||
};
|
||||
// All previously generated pipeline state objects identified by hash and the
|
||||
// description.
|
||||
std::unordered_multimap<uint64_t, PipelineState*> pipeline_states_;
|
||||
std::unordered_multimap<uint64_t, PipelineState*,
|
||||
xe::hash::IdentityHasher<uint64_t>>
|
||||
pipeline_states_;
|
||||
|
||||
// Previously used pipeline state object. This matches our current state
|
||||
// settings and allows us to quickly(ish) reuse the pipeline state if no
|
||||
|
|
|
@ -102,10 +102,12 @@ const RenderTargetCache::EDRAMLoadStoreModeInfo
|
|||
RenderTargetCache::RenderTargetCache(D3D12CommandProcessor* command_processor,
|
||||
RegisterFile* register_file,
|
||||
TraceWriter* trace_writer,
|
||||
bool bindless_resources_used,
|
||||
bool edram_rov_used)
|
||||
: command_processor_(command_processor),
|
||||
register_file_(register_file),
|
||||
trace_writer_(trace_writer),
|
||||
bindless_resources_used_(bindless_resources_used),
|
||||
edram_rov_used_(edram_rov_used) {}
|
||||
|
||||
RenderTargetCache::~RenderTargetCache() { Shutdown(); }
|
||||
|
@ -181,10 +183,10 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
|
|||
edram_buffer_, nullptr, &edram_buffer_uint32_uav_desc,
|
||||
provider->OffsetViewDescriptor(
|
||||
edram_buffer_descriptor_heap_start_,
|
||||
uint32_t(EDRAMBufferDescriptorIndex::kUint32UAV)));
|
||||
uint32_t(EDRAMBufferDescriptorIndex::kR32UintUAV)));
|
||||
|
||||
// Create the root signature for EDRAM buffer load/store.
|
||||
D3D12_ROOT_PARAMETER load_store_root_parameters[2];
|
||||
D3D12_ROOT_PARAMETER load_store_root_parameters[3];
|
||||
// Parameter 0 is constants (changed for each render target binding).
|
||||
load_store_root_parameters[0].ParameterType =
|
||||
D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
|
||||
|
@ -193,24 +195,32 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
|
|||
load_store_root_parameters[0].Constants.Num32BitValues =
|
||||
sizeof(EDRAMLoadStoreRootConstants) / sizeof(uint32_t);
|
||||
load_store_root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
// Parameter 1 is source and target.
|
||||
D3D12_DESCRIPTOR_RANGE load_store_root_ranges[2];
|
||||
load_store_root_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
load_store_root_ranges[0].NumDescriptors = 1;
|
||||
load_store_root_ranges[0].BaseShaderRegister = 0;
|
||||
load_store_root_ranges[0].RegisterSpace = 0;
|
||||
load_store_root_ranges[0].OffsetInDescriptorsFromTableStart = 0;
|
||||
load_store_root_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
||||
load_store_root_ranges[1].NumDescriptors = 1;
|
||||
load_store_root_ranges[1].BaseShaderRegister = 0;
|
||||
load_store_root_ranges[1].RegisterSpace = 0;
|
||||
load_store_root_ranges[1].OffsetInDescriptorsFromTableStart = 1;
|
||||
// Parameter 1 is the destination.
|
||||
D3D12_DESCRIPTOR_RANGE load_store_root_dest_range;
|
||||
load_store_root_dest_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
||||
load_store_root_dest_range.NumDescriptors = 1;
|
||||
load_store_root_dest_range.BaseShaderRegister = 0;
|
||||
load_store_root_dest_range.RegisterSpace = 0;
|
||||
load_store_root_dest_range.OffsetInDescriptorsFromTableStart = 0;
|
||||
load_store_root_parameters[1].ParameterType =
|
||||
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
load_store_root_parameters[1].DescriptorTable.NumDescriptorRanges = 2;
|
||||
load_store_root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
|
||||
load_store_root_parameters[1].DescriptorTable.pDescriptorRanges =
|
||||
load_store_root_ranges;
|
||||
&load_store_root_dest_range;
|
||||
load_store_root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
// Parameter 2 is the source.
|
||||
D3D12_DESCRIPTOR_RANGE load_store_root_source_range;
|
||||
load_store_root_source_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
load_store_root_source_range.NumDescriptors = 1;
|
||||
load_store_root_source_range.BaseShaderRegister = 0;
|
||||
load_store_root_source_range.RegisterSpace = 0;
|
||||
load_store_root_source_range.OffsetInDescriptorsFromTableStart = 0;
|
||||
load_store_root_parameters[2].ParameterType =
|
||||
D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
load_store_root_parameters[2].DescriptorTable.NumDescriptorRanges = 1;
|
||||
load_store_root_parameters[2].DescriptorTable.pDescriptorRanges =
|
||||
&load_store_root_source_range;
|
||||
load_store_root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
D3D12_ROOT_SIGNATURE_DESC load_store_root_desc;
|
||||
load_store_root_desc.NumParameters =
|
||||
UINT(xe::countof(load_store_root_parameters));
|
||||
|
@ -226,10 +236,8 @@ bool RenderTargetCache::Initialize(const TextureCache* texture_cache) {
|
|||
Shutdown();
|
||||
return false;
|
||||
}
|
||||
// Create the clear root signature (the same, but with the UAV only).
|
||||
load_store_root_ranges[1].OffsetInDescriptorsFromTableStart = 0;
|
||||
load_store_root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
|
||||
++load_store_root_parameters[1].DescriptorTable.pDescriptorRanges;
|
||||
// Create the clear root signature (the same, but with the destination only).
|
||||
load_store_root_desc.NumParameters = 2;
|
||||
edram_clear_root_signature_ =
|
||||
ui::d3d12::util::CreateRootSignature(provider, load_store_root_desc);
|
||||
if (edram_clear_root_signature_ == nullptr) {
|
||||
|
@ -1359,8 +1367,8 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
// bilinear filtering), applying exponent bias and swapping red and blue in
|
||||
// a format-agnostic way, then the resulting color is written to a temporary
|
||||
// RTV of the destination format.
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
auto device = provider->GetDevice();
|
||||
auto device =
|
||||
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||
uint32_t resolution_scale_log2 = resolution_scale_2x_ ? 1 : 0;
|
||||
// Check if we need to apply the hack to remove the gap on the left and top
|
||||
// sides of the screen caused by half-pixel offset becoming whole pixel offset
|
||||
|
@ -1423,33 +1431,50 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
}
|
||||
|
||||
// Write the source and destination descriptors.
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
|
||||
if (command_processor_->RequestViewDescriptors(
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 2, 2,
|
||||
descriptor_cpu_start, descriptor_gpu_start) ==
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
||||
return false;
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_dest;
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_source;
|
||||
if (bindless_resources_used_) {
|
||||
if (resolution_scale_2x_) {
|
||||
if (!command_processor_->RequestOneUseSingleViewDescriptors(
|
||||
1, &descriptor_dest)) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
descriptor_dest = command_processor_->GetSystemBindlessViewHandlePair(
|
||||
D3D12CommandProcessor::SystemBindlessView::kSharedMemoryRawUAV);
|
||||
}
|
||||
descriptor_source = command_processor_->GetSystemBindlessViewHandlePair(
|
||||
D3D12CommandProcessor::SystemBindlessView::kEDRAMRawSRV);
|
||||
} else {
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2];
|
||||
if (!command_processor_->RequestOneUseSingleViewDescriptors(
|
||||
2, descriptors)) {
|
||||
return false;
|
||||
}
|
||||
descriptor_dest = descriptors[0];
|
||||
if (!resolution_scale_2x_) {
|
||||
shared_memory->WriteRawUAVDescriptor(descriptor_dest.first);
|
||||
}
|
||||
descriptor_source = descriptors[1];
|
||||
WriteEDRAMRawSRVDescriptor(descriptor_source.first);
|
||||
}
|
||||
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
|
||||
WriteEDRAMRawSRVDescriptor(descriptor_cpu_start);
|
||||
if (resolution_scale_2x_) {
|
||||
texture_cache->UseScaledResolveBufferForWriting();
|
||||
// Can't address more than 512 MB directly on Nvidia - binding only a part
|
||||
// of the buffer.
|
||||
texture_cache->CreateScaledResolveBufferRawUAV(
|
||||
provider->OffsetViewDescriptor(descriptor_cpu_start, 1),
|
||||
dest_address >> 12,
|
||||
descriptor_dest.first, dest_address >> 12,
|
||||
((dest_address + dest_size - 1) >> 12) - (dest_address >> 12) + 1);
|
||||
} else {
|
||||
shared_memory->UseForWriting();
|
||||
shared_memory->WriteRawUAVDescriptor(
|
||||
provider->OffsetViewDescriptor(descriptor_cpu_start, 1));
|
||||
// Descriptor already written.
|
||||
}
|
||||
command_processor_->SubmitBarriers();
|
||||
|
||||
// Dispatch the computation.
|
||||
command_list->D3DSetComputeRootSignature(edram_load_store_root_signature_);
|
||||
command_list->D3DSetComputeRootDescriptorTable(2, descriptor_source.second);
|
||||
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_dest.second);
|
||||
EDRAMLoadStoreRootConstants root_constants;
|
||||
// Address is adjusted to the first modified tile, so using & 31 as the
|
||||
// destination offset.
|
||||
|
@ -1488,10 +1513,11 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
}
|
||||
command_list->D3DSetComputeRoot32BitConstants(
|
||||
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
||||
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
||||
|
||||
command_processor_->SetComputePipeline(
|
||||
src_64bpp ? edram_tile_sample_64bpp_pipeline_
|
||||
: edram_tile_sample_32bpp_pipeline_);
|
||||
command_processor_->SubmitBarriers();
|
||||
// 1 group per destination 80x16 region.
|
||||
uint32_t group_count_x = row_width_ss_div_80, group_count_y = rows;
|
||||
if (msaa_samples >= MsaaSamples::k2X) {
|
||||
|
@ -1572,14 +1598,30 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
if (resolve_target == nullptr) {
|
||||
return false;
|
||||
}
|
||||
// Descriptors. 2 for EDRAM load, 1 for conversion.
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
|
||||
if (command_processor_->RequestViewDescriptors(
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 3, 3,
|
||||
descriptor_cpu_start, descriptor_gpu_start) ==
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
||||
return false;
|
||||
// Descriptors.
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_copy_buffer;
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_rt;
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_edram;
|
||||
if (bindless_resources_used_) {
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2];
|
||||
if (!command_processor_->RequestOneUseSingleViewDescriptors(
|
||||
2, descriptors)) {
|
||||
return false;
|
||||
}
|
||||
descriptor_copy_buffer = descriptors[0];
|
||||
descriptor_rt = descriptors[1];
|
||||
descriptor_edram = command_processor_->GetSystemBindlessViewHandlePair(
|
||||
D3D12CommandProcessor::SystemBindlessView::kEDRAMRawSRV);
|
||||
} else {
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[3];
|
||||
if (!command_processor_->RequestOneUseSingleViewDescriptors(
|
||||
3, descriptors)) {
|
||||
return false;
|
||||
}
|
||||
descriptor_copy_buffer = descriptors[0];
|
||||
descriptor_rt = descriptors[1];
|
||||
descriptor_edram = descriptors[2];
|
||||
WriteEDRAMRawSRVDescriptor(descriptor_edram.first);
|
||||
}
|
||||
// Buffer for copying.
|
||||
D3D12_RESOURCE_STATES copy_buffer_state =
|
||||
|
@ -1616,11 +1658,12 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
0, sizeof(load_root_constants) / sizeof(uint32_t), &load_root_constants,
|
||||
0);
|
||||
|
||||
WriteEDRAMRawSRVDescriptor(descriptor_cpu_start);
|
||||
ui::d3d12::util::CreateRawBufferUAV(
|
||||
device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1),
|
||||
copy_buffer, render_target->copy_buffer_size);
|
||||
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
||||
command_list->D3DSetComputeRootDescriptorTable(2, descriptor_edram.second);
|
||||
ui::d3d12::util::CreateRawBufferUAV(device, descriptor_copy_buffer.first,
|
||||
copy_buffer,
|
||||
render_target->copy_buffer_size);
|
||||
command_list->D3DSetComputeRootDescriptorTable(
|
||||
1, descriptor_copy_buffer.second);
|
||||
|
||||
EDRAMLoadStoreMode mode = GetLoadStoreMode(false, src_format);
|
||||
command_processor_->SetComputePipeline(
|
||||
|
@ -1630,13 +1673,6 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
command_list->D3DDispatch(row_width_ss_div_80, rows, 1);
|
||||
command_processor_->PushUAVBarrier(copy_buffer);
|
||||
|
||||
// Go to the next descriptor set.
|
||||
|
||||
descriptor_cpu_start =
|
||||
provider->OffsetViewDescriptor(descriptor_cpu_start, 2);
|
||||
descriptor_gpu_start =
|
||||
provider->OffsetViewDescriptor(descriptor_gpu_start, 2);
|
||||
|
||||
// Copy the EDRAM buffer contents to the source texture.
|
||||
|
||||
#if 0
|
||||
|
@ -1770,8 +1806,8 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
|
|||
rt_srv_desc.Texture2D.PlaneSlice = 0;
|
||||
rt_srv_desc.Texture2D.ResourceMinLODClamp = 0.0f;
|
||||
device->CreateShaderResourceView(render_target->resource, &rt_srv_desc,
|
||||
descriptor_cpu_start);
|
||||
command_list->D3DSetGraphicsRootDescriptorTable(1, descriptor_gpu_start);
|
||||
descriptor_rt.first);
|
||||
command_list->D3DSetGraphicsRootDescriptorTable(1, descriptor_rt.second);
|
||||
|
||||
command_processor_->SubmitBarriers();
|
||||
command_processor_->SetSamplePositions(MsaaSamples::k1X);
|
||||
|
@ -1878,17 +1914,17 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
|
|||
uint32_t samples_x_log2 = msaa_samples >= MsaaSamples::k4X ? 1 : 0;
|
||||
uint32_t samples_y_log2 = msaa_samples >= MsaaSamples::k2X ? 1 : 0;
|
||||
|
||||
// Get everything needed for clearing.
|
||||
auto command_list = command_processor_->GetDeferredCommandList();
|
||||
auto device =
|
||||
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
|
||||
if (command_processor_->RequestViewDescriptors(
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 1, 1,
|
||||
descriptor_cpu_start, descriptor_gpu_start) ==
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
||||
return false;
|
||||
// Get transient data needed for clearing.
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_edram;
|
||||
if (bindless_resources_used_) {
|
||||
descriptor_edram = command_processor_->GetSystemBindlessViewHandlePair(
|
||||
D3D12CommandProcessor::SystemBindlessView::kEDRAMRawUAV);
|
||||
} else {
|
||||
if (!command_processor_->RequestOneUseSingleViewDescriptors(
|
||||
1, &descriptor_edram)) {
|
||||
return false;
|
||||
}
|
||||
WriteEDRAMRawUAVDescriptor(descriptor_edram.first);
|
||||
}
|
||||
|
||||
// Submit the clear.
|
||||
|
@ -1935,11 +1971,11 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
|
|||
root_constants.clear_color_high = regs[reg].u32;
|
||||
command_processor_->SetComputePipeline(edram_clear_32bpp_pipeline_);
|
||||
}
|
||||
auto command_list = command_processor_->GetDeferredCommandList();
|
||||
command_list->D3DSetComputeRootSignature(edram_clear_root_signature_);
|
||||
command_list->D3DSetComputeRoot32BitConstants(
|
||||
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
|
||||
WriteEDRAMRawUAVDescriptor(descriptor_cpu_start);
|
||||
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
||||
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_edram.second);
|
||||
// 1 group per 80x16 samples. Resolution scale handled in the shader itself.
|
||||
command_list->D3DDispatch(row_width_ss_div_80, rows, 1);
|
||||
CommitEDRAMBufferUAVWrites(true);
|
||||
|
@ -2150,7 +2186,7 @@ void RenderTargetCache::FlushAndUnbindRenderTargets() {
|
|||
ClearBindings();
|
||||
}
|
||||
|
||||
void RenderTargetCache::WriteEDRAMUint32UAVDescriptor(
|
||||
void RenderTargetCache::WriteEDRAMR32UintUAVDescriptor(
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
auto device = provider->GetDevice();
|
||||
|
@ -2158,7 +2194,31 @@ void RenderTargetCache::WriteEDRAMUint32UAVDescriptor(
|
|||
1, handle,
|
||||
provider->OffsetViewDescriptor(
|
||||
edram_buffer_descriptor_heap_start_,
|
||||
uint32_t(EDRAMBufferDescriptorIndex::kUint32UAV)),
|
||||
uint32_t(EDRAMBufferDescriptorIndex::kR32UintUAV)),
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
}
|
||||
|
||||
void RenderTargetCache::WriteEDRAMRawSRVDescriptor(
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
auto device = provider->GetDevice();
|
||||
device->CopyDescriptorsSimple(
|
||||
1, handle,
|
||||
provider->OffsetViewDescriptor(
|
||||
edram_buffer_descriptor_heap_start_,
|
||||
uint32_t(EDRAMBufferDescriptorIndex::kRawSRV)),
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
}
|
||||
|
||||
void RenderTargetCache::WriteEDRAMRawUAVDescriptor(
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
auto device = provider->GetDevice();
|
||||
device->CopyDescriptorsSimple(
|
||||
1, handle,
|
||||
provider->OffsetViewDescriptor(
|
||||
edram_buffer_descriptor_heap_start_,
|
||||
uint32_t(EDRAMBufferDescriptorIndex::kRawUAV)),
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
}
|
||||
|
||||
|
@ -2283,13 +2343,22 @@ void RenderTargetCache::RestoreEDRAMSnapshot(const void* snapshot) {
|
|||
// Clear and ignore the old 32-bit float depth - the non-ROV path is
|
||||
// inaccurate anyway, and this is backend-specific, not a part of a guest
|
||||
// trace.
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE shader_visbile_descriptor_cpu;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE shader_visbile_descriptor_gpu;
|
||||
if (command_processor_->RequestViewDescriptors(
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 1, 1,
|
||||
shader_visbile_descriptor_cpu, shader_visbile_descriptor_gpu) !=
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
||||
WriteEDRAMUint32UAVDescriptor(shader_visbile_descriptor_cpu);
|
||||
bool edram_shader_visible_r32_uav_obtained;
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair edram_shader_visible_r32_uav;
|
||||
if (bindless_resources_used_) {
|
||||
edram_shader_visible_r32_uav_obtained = true;
|
||||
edram_shader_visible_r32_uav =
|
||||
command_processor_->GetSystemBindlessViewHandlePair(
|
||||
D3D12CommandProcessor::SystemBindlessView::kEDRAMR32UintUAV);
|
||||
} else {
|
||||
edram_shader_visible_r32_uav_obtained =
|
||||
command_processor_->RequestOneUseSingleViewDescriptors(
|
||||
1, &edram_shader_visible_r32_uav);
|
||||
if (edram_shader_visible_r32_uav_obtained) {
|
||||
WriteEDRAMR32UintUAVDescriptor(edram_shader_visible_r32_uav.first);
|
||||
}
|
||||
}
|
||||
if (edram_shader_visible_r32_uav_obtained) {
|
||||
UINT clear_value[4] = {0, 0, 0, 0};
|
||||
D3D12_RECT clear_rect;
|
||||
clear_rect.left = kEDRAMSize >> 2;
|
||||
|
@ -2301,13 +2370,11 @@ void RenderTargetCache::RestoreEDRAMSnapshot(const void* snapshot) {
|
|||
// ClearUnorderedAccessView takes a shader-visible GPU descriptor and a
|
||||
// non-shader-visible CPU descriptor.
|
||||
command_list->D3DClearUnorderedAccessViewUint(
|
||||
shader_visbile_descriptor_gpu,
|
||||
edram_shader_visible_r32_uav.second,
|
||||
provider->OffsetViewDescriptor(
|
||||
edram_buffer_descriptor_heap_start_,
|
||||
uint32_t(EDRAMBufferDescriptorIndex::kUint32UAV)),
|
||||
uint32_t(EDRAMBufferDescriptorIndex::kR32UintUAV)),
|
||||
edram_buffer_, clear_value, 1, &clear_rect);
|
||||
} else {
|
||||
XELOGE("Failed to get a UAV descriptor for invalidating 32-bit depth");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2343,30 +2410,6 @@ void RenderTargetCache::CommitEDRAMBufferUAVWrites(bool force) {
|
|||
edram_buffer_modified_ = false;
|
||||
}
|
||||
|
||||
void RenderTargetCache::WriteEDRAMRawSRVDescriptor(
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
auto device = provider->GetDevice();
|
||||
device->CopyDescriptorsSimple(
|
||||
1, handle,
|
||||
provider->OffsetViewDescriptor(
|
||||
edram_buffer_descriptor_heap_start_,
|
||||
uint32_t(EDRAMBufferDescriptorIndex::kRawSRV)),
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
}
|
||||
|
||||
void RenderTargetCache::WriteEDRAMRawUAVDescriptor(
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
auto device = provider->GetDevice();
|
||||
device->CopyDescriptorsSimple(
|
||||
1, handle,
|
||||
provider->OffsetViewDescriptor(
|
||||
edram_buffer_descriptor_heap_start_,
|
||||
uint32_t(EDRAMBufferDescriptorIndex::kRawUAV)),
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
}
|
||||
|
||||
void RenderTargetCache::ClearBindings() {
|
||||
current_surface_pitch_ = 0;
|
||||
current_msaa_samples_ = MsaaSamples::k1X;
|
||||
|
@ -2710,13 +2753,24 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
|
|||
}
|
||||
|
||||
// Allocate descriptors for the buffers.
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
|
||||
if (command_processor_->RequestViewDescriptors(
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 2, 2,
|
||||
descriptor_cpu_start, descriptor_gpu_start) ==
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
||||
return;
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_edram;
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_source;
|
||||
if (bindless_resources_used_) {
|
||||
if (!command_processor_->RequestOneUseSingleViewDescriptors(
|
||||
1, &descriptor_source)) {
|
||||
return;
|
||||
}
|
||||
descriptor_edram = command_processor_->GetSystemBindlessViewHandlePair(
|
||||
D3D12CommandProcessor::SystemBindlessView::kEDRAMRawUAV);
|
||||
} else {
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2];
|
||||
if (!command_processor_->RequestOneUseSingleViewDescriptors(2,
|
||||
descriptors)) {
|
||||
return;
|
||||
}
|
||||
descriptor_edram = descriptors[0];
|
||||
WriteEDRAMRawUAVDescriptor(descriptor_edram.first);
|
||||
descriptor_source = descriptors[1];
|
||||
}
|
||||
|
||||
// Get the buffer for copying.
|
||||
|
@ -2740,14 +2794,13 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
|
|||
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
|
||||
|
||||
// Set up the bindings.
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
auto device = provider->GetDevice();
|
||||
auto device =
|
||||
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||
command_list->D3DSetComputeRootSignature(edram_load_store_root_signature_);
|
||||
ui::d3d12::util::CreateRawBufferSRV(device, descriptor_cpu_start, copy_buffer,
|
||||
copy_buffer_size);
|
||||
WriteEDRAMRawUAVDescriptor(
|
||||
provider->OffsetViewDescriptor(descriptor_cpu_start, 1));
|
||||
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
||||
ui::d3d12::util::CreateRawBufferSRV(device, descriptor_source.first,
|
||||
copy_buffer, copy_buffer_size);
|
||||
command_list->D3DSetComputeRootDescriptorTable(2, descriptor_source.second);
|
||||
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_edram.second);
|
||||
|
||||
// Sort the bindings in ascending order of EDRAM base so data in the render
|
||||
// targets placed farther in EDRAM isn't lost in case of overlap.
|
||||
|
@ -2857,13 +2910,23 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
|
|||
auto command_list = command_processor_->GetDeferredCommandList();
|
||||
|
||||
// Allocate descriptors for the buffers.
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
|
||||
if (command_processor_->RequestViewDescriptors(
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 2, 2,
|
||||
descriptor_cpu_start, descriptor_gpu_start) ==
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
||||
return;
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptor_dest, descriptor_edram;
|
||||
if (bindless_resources_used_) {
|
||||
if (!command_processor_->RequestOneUseSingleViewDescriptors(
|
||||
1, &descriptor_dest)) {
|
||||
return;
|
||||
}
|
||||
descriptor_edram = command_processor_->GetSystemBindlessViewHandlePair(
|
||||
D3D12CommandProcessor::SystemBindlessView::kEDRAMRawSRV);
|
||||
} else {
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2];
|
||||
if (!command_processor_->RequestOneUseSingleViewDescriptors(2,
|
||||
descriptors)) {
|
||||
return;
|
||||
}
|
||||
descriptor_dest = descriptors[0];
|
||||
descriptor_edram = descriptors[1];
|
||||
WriteEDRAMRawSRVDescriptor(descriptor_edram.first);
|
||||
}
|
||||
|
||||
// Get the buffer for copying.
|
||||
|
@ -2892,14 +2955,13 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
|
|||
TransitionEDRAMBuffer(D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE);
|
||||
|
||||
// Set up the bindings.
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
auto device = provider->GetDevice();
|
||||
auto device =
|
||||
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||
command_list->D3DSetComputeRootSignature(edram_load_store_root_signature_);
|
||||
WriteEDRAMRawSRVDescriptor(descriptor_cpu_start);
|
||||
ui::d3d12::util::CreateRawBufferUAV(
|
||||
device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1),
|
||||
copy_buffer, copy_buffer_size);
|
||||
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
||||
command_list->D3DSetComputeRootDescriptorTable(2, descriptor_edram.second);
|
||||
ui::d3d12::util::CreateRawBufferUAV(device, descriptor_dest.first,
|
||||
copy_buffer, copy_buffer_size);
|
||||
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_dest.second);
|
||||
|
||||
// Load each render target.
|
||||
for (uint32_t i = 0; i < render_target_count; ++i) {
|
||||
|
|
|
@ -251,7 +251,7 @@ class RenderTargetCache {
|
|||
|
||||
RenderTargetCache(D3D12CommandProcessor* command_processor,
|
||||
RegisterFile* register_file, TraceWriter* trace_writer,
|
||||
bool edram_rov_used);
|
||||
bool bindless_resources_used, bool edram_rov_used);
|
||||
~RenderTargetCache();
|
||||
|
||||
bool Initialize(const TextureCache* texture_cache);
|
||||
|
@ -284,7 +284,9 @@ class RenderTargetCache {
|
|||
// the command processor takes over framebuffer bindings to draw something
|
||||
// special. May change the CBV/SRV/UAV descriptor heap.
|
||||
void FlushAndUnbindRenderTargets();
|
||||
void WriteEDRAMUint32UAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||
void WriteEDRAMR32UintUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||
void WriteEDRAMRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||
void WriteEDRAMRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||
|
||||
// Totally necessary to rely on the base format - Too Human switches between
|
||||
// 2_10_10_10_FLOAT and 2_10_10_10_FLOAT_AS_16_16_16_16 every draw.
|
||||
|
@ -436,9 +438,6 @@ class RenderTargetCache {
|
|||
void TransitionEDRAMBuffer(D3D12_RESOURCE_STATES new_state);
|
||||
void CommitEDRAMBufferUAVWrites(bool force);
|
||||
|
||||
void WriteEDRAMRawSRVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||
void WriteEDRAMRawUAVDescriptor(D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||
|
||||
void ClearBindings();
|
||||
|
||||
#if 0
|
||||
|
@ -518,6 +517,7 @@ class RenderTargetCache {
|
|||
D3D12CommandProcessor* command_processor_;
|
||||
RegisterFile* register_file_;
|
||||
TraceWriter* trace_writer_;
|
||||
bool bindless_resources_used_;
|
||||
bool edram_rov_used_;
|
||||
|
||||
// Whether 1 guest pixel is rendered as 2x2 host pixels (currently only
|
||||
|
@ -538,7 +538,7 @@ class RenderTargetCache {
|
|||
kRawSRV,
|
||||
kRawUAV,
|
||||
// For ROV access primarily.
|
||||
kUint32UAV,
|
||||
kR32UintUAV,
|
||||
|
||||
kCount,
|
||||
};
|
||||
|
|
|
@ -47,10 +47,10 @@ cbuffer XeEDRAMLoadStoreConstants : register(b0) {
|
|||
#define xe_edram_clear_depth24 (xe_edram_load_store_constants.z)
|
||||
#define xe_edram_clear_depth32 (xe_edram_load_store_constants.w)
|
||||
|
||||
RWByteAddressBuffer xe_edram_load_store_dest : register(u0);
|
||||
#ifndef XE_EDRAM_WRITE_ONLY
|
||||
ByteAddressBuffer xe_edram_load_store_source : register(t0);
|
||||
#endif
|
||||
RWByteAddressBuffer xe_edram_load_store_dest : register(u0);
|
||||
|
||||
uint2 XeEDRAMSampleCountLog2() {
|
||||
return (xe_edram_base_samples_2x_depth_pitch >> uint2(12u, 11u)) & 1u;
|
||||
|
|
|
@ -27,8 +27,8 @@ cbuffer XeTextureLoadConstants : register(b0) {
|
|||
|
||||
#define XeTextureLoadGuestPitchTiled 0xFFFFFFFFu
|
||||
|
||||
ByteAddressBuffer xe_texture_load_source : register(t0);
|
||||
RWByteAddressBuffer xe_texture_load_dest : register(u0);
|
||||
ByteAddressBuffer xe_texture_load_source : register(t0);
|
||||
|
||||
// bpb and bpb_log2 are separate because bpb may be not a power of 2 (like 96).
|
||||
uint4 XeTextureLoadGuestBlockOffsets(uint3 block_index, uint bpb,
|
||||
|
|
|
@ -494,7 +494,8 @@ bool SharedMemory::AreTiledResourcesUsed() const {
|
|||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
// As of October 8th, 2018, PIX doesn't support tiled buffers.
|
||||
// FIXME(Triang3l): Re-enable tiled resources with PIX once fixed.
|
||||
return provider->GetTiledResourcesTier() >= 1 &&
|
||||
return provider->GetTiledResourcesTier() !=
|
||||
D3D12_TILED_RESOURCES_TIER_NOT_SUPPORTED &&
|
||||
provider->GetGraphicsAnalysis() == nullptr;
|
||||
}
|
||||
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include "third_party/xxhash/xxhash.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cfloat>
|
||||
#include <cstring>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
|
@ -92,7 +93,6 @@ namespace d3d12 {
|
|||
#include "xenia/gpu/d3d12/shaders/dxbc/texture_tile_r10g11b11_rgba16_cs.h"
|
||||
#include "xenia/gpu/d3d12/shaders/dxbc/texture_tile_r11g11b10_rgba16_cs.h"
|
||||
|
||||
constexpr uint32_t TextureCache::Texture::kCachedSRVDescriptorSwizzleMissing;
|
||||
constexpr uint32_t TextureCache::SRVDescriptorCachePage::kHeapSize;
|
||||
constexpr uint32_t TextureCache::LoadConstants::kGuestPitchTiled;
|
||||
constexpr uint32_t TextureCache::kScaledResolveBufferSizeLog2;
|
||||
|
@ -905,9 +905,11 @@ const TextureCache::ResolveTileModeInfo
|
|||
|
||||
TextureCache::TextureCache(D3D12CommandProcessor* command_processor,
|
||||
RegisterFile* register_file,
|
||||
bool bindless_resources_used,
|
||||
SharedMemory* shared_memory)
|
||||
: command_processor_(command_processor),
|
||||
register_file_(register_file),
|
||||
bindless_resources_used_(bindless_resources_used),
|
||||
shared_memory_(shared_memory) {}
|
||||
|
||||
TextureCache::~TextureCache() { Shutdown(); }
|
||||
|
@ -920,7 +922,8 @@ bool TextureCache::Initialize(bool edram_rov_used) {
|
|||
// Not currently supported with the RTV/DSV output path for various reasons.
|
||||
// As of November 27th, 2018, PIX doesn't support tiled buffers.
|
||||
if (cvars::d3d12_resolution_scale >= 2 && edram_rov_used &&
|
||||
provider->GetTiledResourcesTier() >= 1 &&
|
||||
provider->GetTiledResourcesTier() !=
|
||||
D3D12_TILED_RESOURCES_TIER_NOT_SUPPORTED &&
|
||||
provider->GetGraphicsAnalysis() == nullptr &&
|
||||
provider->GetVirtualAddressBitsPerResource() >=
|
||||
kScaledResolveBufferSizeLog2) {
|
||||
|
@ -947,28 +950,34 @@ bool TextureCache::Initialize(bool edram_rov_used) {
|
|||
scaled_resolve_heap_count_ = 0;
|
||||
|
||||
// Create the loading root signature.
|
||||
D3D12_ROOT_PARAMETER root_parameters[2];
|
||||
// Parameter 0 is constants (changed very often when untiling).
|
||||
D3D12_ROOT_PARAMETER root_parameters[3];
|
||||
// Parameter 0 is constants (changed multiple times when untiling).
|
||||
root_parameters[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
|
||||
root_parameters[0].Descriptor.ShaderRegister = 0;
|
||||
root_parameters[0].Descriptor.RegisterSpace = 0;
|
||||
root_parameters[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
// Parameter 1 is source and target.
|
||||
D3D12_DESCRIPTOR_RANGE root_copy_ranges[2];
|
||||
root_copy_ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
root_copy_ranges[0].NumDescriptors = 1;
|
||||
root_copy_ranges[0].BaseShaderRegister = 0;
|
||||
root_copy_ranges[0].RegisterSpace = 0;
|
||||
root_copy_ranges[0].OffsetInDescriptorsFromTableStart = 0;
|
||||
root_copy_ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
||||
root_copy_ranges[1].NumDescriptors = 1;
|
||||
root_copy_ranges[1].BaseShaderRegister = 0;
|
||||
root_copy_ranges[1].RegisterSpace = 0;
|
||||
root_copy_ranges[1].OffsetInDescriptorsFromTableStart = 1;
|
||||
// Parameter 1 is the destination.
|
||||
D3D12_DESCRIPTOR_RANGE root_dest_range;
|
||||
root_dest_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
||||
root_dest_range.NumDescriptors = 1;
|
||||
root_dest_range.BaseShaderRegister = 0;
|
||||
root_dest_range.RegisterSpace = 0;
|
||||
root_dest_range.OffsetInDescriptorsFromTableStart = 0;
|
||||
root_parameters[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
root_parameters[1].DescriptorTable.NumDescriptorRanges = 2;
|
||||
root_parameters[1].DescriptorTable.pDescriptorRanges = root_copy_ranges;
|
||||
root_parameters[1].DescriptorTable.NumDescriptorRanges = 1;
|
||||
root_parameters[1].DescriptorTable.pDescriptorRanges = &root_dest_range;
|
||||
root_parameters[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
// Parameter 2 is the source.
|
||||
D3D12_DESCRIPTOR_RANGE root_source_range;
|
||||
root_source_range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
root_source_range.NumDescriptors = 1;
|
||||
root_source_range.BaseShaderRegister = 0;
|
||||
root_source_range.RegisterSpace = 0;
|
||||
root_source_range.OffsetInDescriptorsFromTableStart = 0;
|
||||
root_parameters[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
|
||||
root_parameters[2].DescriptorTable.NumDescriptorRanges = 1;
|
||||
root_parameters[2].DescriptorTable.pDescriptorRanges = &root_source_range;
|
||||
root_parameters[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
D3D12_ROOT_SIGNATURE_DESC root_signature_desc;
|
||||
root_signature_desc.NumParameters = UINT(xe::countof(root_parameters));
|
||||
root_signature_desc.pParameters = root_parameters;
|
||||
|
@ -1033,6 +1042,8 @@ bool TextureCache::Initialize(bool edram_rov_used) {
|
|||
}
|
||||
}
|
||||
|
||||
srv_descriptor_cache_allocated_ = 0;
|
||||
|
||||
// Create a heap with null SRV descriptors, since it's faster to copy a
|
||||
// descriptor than to create an SRV, and null descriptors are used a lot (for
|
||||
// the signed version when only unsigned is used, for instance).
|
||||
|
@ -1137,6 +1148,14 @@ void TextureCache::ClearCache() {
|
|||
Texture* texture = texture_pair.second;
|
||||
shared_memory_->UnwatchMemoryRange(texture->base_watch_handle);
|
||||
shared_memory_->UnwatchMemoryRange(texture->mip_watch_handle);
|
||||
// Bindful descriptor cache will be cleared entirely now, so only release
|
||||
// bindless descriptors.
|
||||
if (bindless_resources_used_) {
|
||||
for (auto descriptor_pair : texture->srv_descriptors) {
|
||||
command_processor_->ReleaseViewBindlessDescriptorImmediately(
|
||||
descriptor_pair.second);
|
||||
}
|
||||
}
|
||||
texture->resource->Release();
|
||||
delete texture;
|
||||
}
|
||||
|
@ -1148,6 +1167,7 @@ void TextureCache::ClearCache() {
|
|||
|
||||
// Clear texture descriptor cache.
|
||||
srv_descriptor_cache_free_.clear();
|
||||
srv_descriptor_cache_allocated_ = 0;
|
||||
for (auto& page : srv_descriptor_cache_) {
|
||||
page.heap->Release();
|
||||
}
|
||||
|
@ -1155,7 +1175,7 @@ void TextureCache::ClearCache() {
|
|||
}
|
||||
|
||||
void TextureCache::TextureFetchConstantWritten(uint32_t index) {
|
||||
texture_keys_in_sync_ &= ~(1u << index);
|
||||
texture_bindings_in_sync_ &= ~(1u << index);
|
||||
}
|
||||
|
||||
void TextureCache::BeginFrame() {
|
||||
|
@ -1214,12 +1234,18 @@ void TextureCache::BeginFrame() {
|
|||
// Exclude the texture from the memory usage counter.
|
||||
textures_total_size_ -= texture->resource_size;
|
||||
// Destroy the texture.
|
||||
if (texture->cached_srv_descriptor_swizzle !=
|
||||
Texture::kCachedSRVDescriptorSwizzleMissing) {
|
||||
srv_descriptor_cache_free_.push_back(texture->cached_srv_descriptor);
|
||||
}
|
||||
shared_memory_->UnwatchMemoryRange(texture->base_watch_handle);
|
||||
shared_memory_->UnwatchMemoryRange(texture->mip_watch_handle);
|
||||
if (bindless_resources_used_) {
|
||||
for (auto descriptor_pair : texture->srv_descriptors) {
|
||||
command_processor_->ReleaseViewBindlessDescriptorImmediately(
|
||||
descriptor_pair.second);
|
||||
}
|
||||
} else {
|
||||
for (auto descriptor_pair : texture->srv_descriptors) {
|
||||
srv_descriptor_cache_free_.push_back(descriptor_pair.second);
|
||||
}
|
||||
}
|
||||
texture->resource->Release();
|
||||
delete texture;
|
||||
}
|
||||
|
@ -1262,8 +1288,10 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
|
|||
// loading may be needed in some draw call later, which may have the same
|
||||
// key for some binding as before the invalidation, but texture_invalidated_
|
||||
// being false (menu background in Halo 3).
|
||||
std::memset(texture_bindings_, 0, sizeof(texture_bindings_));
|
||||
texture_keys_in_sync_ = 0;
|
||||
for (size_t i = 0; i < xe::countof(texture_bindings_); ++i) {
|
||||
texture_bindings_[i].Clear();
|
||||
}
|
||||
texture_bindings_in_sync_ = 0;
|
||||
}
|
||||
|
||||
// Update the texture keys and the textures.
|
||||
|
@ -1272,7 +1300,7 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
|
|||
while (xe::bit_scan_forward(textures_remaining, &index)) {
|
||||
uint32_t index_bit = uint32_t(1) << index;
|
||||
textures_remaining &= ~index_bit;
|
||||
if (texture_keys_in_sync_ & index_bit) {
|
||||
if (texture_bindings_in_sync_ & index_bit) {
|
||||
continue;
|
||||
}
|
||||
TextureBinding& binding = texture_bindings_[index];
|
||||
|
@ -1282,10 +1310,12 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
|
|||
uint8_t old_swizzled_signs = binding.swizzled_signs;
|
||||
BindingInfoFromFetchConstant(fetch, binding.key, &binding.host_swizzle,
|
||||
&binding.swizzled_signs);
|
||||
texture_keys_in_sync_ |= index_bit;
|
||||
texture_bindings_in_sync_ |= index_bit;
|
||||
if (binding.key.IsInvalid()) {
|
||||
binding.texture = nullptr;
|
||||
binding.texture_signed = nullptr;
|
||||
binding.descriptor_index = UINT32_MAX;
|
||||
binding.descriptor_index_signed = UINT32_MAX;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -1305,27 +1335,64 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
|
|||
if (key_changed ||
|
||||
!texture_util::IsAnySignNotSigned(old_swizzled_signs)) {
|
||||
binding.texture = FindOrCreateTexture(binding.key);
|
||||
binding.descriptor_index =
|
||||
binding.texture
|
||||
? FindOrCreateTextureDescriptor(*binding.texture, false,
|
||||
binding.host_swizzle)
|
||||
: UINT32_MAX;
|
||||
load_unsigned_data = true;
|
||||
}
|
||||
} else {
|
||||
binding.texture = nullptr;
|
||||
binding.descriptor_index = UINT32_MAX;
|
||||
}
|
||||
if (texture_util::IsAnySignSigned(binding.swizzled_signs)) {
|
||||
if (key_changed || !texture_util::IsAnySignSigned(old_swizzled_signs)) {
|
||||
TextureKey signed_key = binding.key;
|
||||
signed_key.signed_separate = 1;
|
||||
binding.texture_signed = FindOrCreateTexture(signed_key);
|
||||
binding.descriptor_index_signed =
|
||||
binding.texture
|
||||
? FindOrCreateTextureDescriptor(*binding.texture_signed, true,
|
||||
binding.host_swizzle)
|
||||
: UINT32_MAX;
|
||||
load_signed_data = true;
|
||||
}
|
||||
} else {
|
||||
binding.texture_signed = nullptr;
|
||||
binding.descriptor_index_signed = UINT32_MAX;
|
||||
}
|
||||
} else {
|
||||
// Same resource for both unsigned and signed, but descriptor formats may
|
||||
// be different.
|
||||
if (key_changed) {
|
||||
binding.texture = FindOrCreateTexture(binding.key);
|
||||
load_unsigned_data = true;
|
||||
}
|
||||
binding.texture_signed = nullptr;
|
||||
if (texture_util::IsAnySignNotSigned(binding.swizzled_signs)) {
|
||||
if (key_changed ||
|
||||
!texture_util::IsAnySignNotSigned(old_swizzled_signs)) {
|
||||
binding.descriptor_index =
|
||||
binding.texture
|
||||
? FindOrCreateTextureDescriptor(*binding.texture, false,
|
||||
binding.host_swizzle)
|
||||
: UINT32_MAX;
|
||||
}
|
||||
} else {
|
||||
binding.descriptor_index = UINT32_MAX;
|
||||
}
|
||||
if (texture_util::IsAnySignSigned(binding.swizzled_signs)) {
|
||||
if (key_changed || !texture_util::IsAnySignSigned(old_swizzled_signs)) {
|
||||
binding.descriptor_index_signed =
|
||||
binding.texture
|
||||
? FindOrCreateTextureDescriptor(*binding.texture, true,
|
||||
binding.host_swizzle)
|
||||
: UINT32_MAX;
|
||||
}
|
||||
} else {
|
||||
binding.descriptor_index_signed = UINT32_MAX;
|
||||
}
|
||||
}
|
||||
if (load_unsigned_data && binding.texture != nullptr) {
|
||||
LoadTextureData(binding.texture);
|
||||
|
@ -1368,208 +1435,132 @@ void TextureCache::RequestTextures(uint32_t used_texture_mask) {
|
|||
}
|
||||
}
|
||||
|
||||
uint64_t TextureCache::GetDescriptorHashForActiveTextures(
|
||||
const D3D12Shader::TextureSRV* texture_srvs,
|
||||
uint32_t texture_srv_count) const {
|
||||
XXH64_state_t hash_state;
|
||||
XXH64_reset(&hash_state, 0);
|
||||
for (uint32_t i = 0; i < texture_srv_count; ++i) {
|
||||
const D3D12Shader::TextureSRV& texture_srv = texture_srvs[i];
|
||||
// There can be multiple SRVs of the same texture.
|
||||
XXH64_update(&hash_state, &texture_srv.dimension,
|
||||
sizeof(texture_srv.dimension));
|
||||
XXH64_update(&hash_state, &texture_srv.is_signed,
|
||||
sizeof(texture_srv.is_signed));
|
||||
bool TextureCache::AreActiveTextureSRVKeysUpToDate(
|
||||
const TextureSRVKey* keys,
|
||||
const D3D12Shader::TextureBinding* host_shader_bindings,
|
||||
uint32_t host_shader_binding_count) const {
|
||||
for (uint32_t i = 0; i < host_shader_binding_count; ++i) {
|
||||
const TextureSRVKey& key = keys[i];
|
||||
const TextureBinding& binding =
|
||||
texture_bindings_[texture_srv.fetch_constant];
|
||||
XXH64_update(&hash_state, &binding.key, sizeof(binding.key));
|
||||
XXH64_update(&hash_state, &binding.host_swizzle,
|
||||
sizeof(binding.host_swizzle));
|
||||
XXH64_update(&hash_state, &binding.swizzled_signs,
|
||||
sizeof(binding.swizzled_signs));
|
||||
texture_bindings_[host_shader_bindings[i].fetch_constant];
|
||||
if (key.key != binding.key || key.host_swizzle != binding.host_swizzle ||
|
||||
key.swizzled_signs != binding.swizzled_signs) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return XXH64_digest(&hash_state);
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureCache::WriteTextureSRV(const D3D12Shader::TextureSRV& texture_srv,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
||||
D3D12_SHADER_RESOURCE_VIEW_DESC desc;
|
||||
desc.Format = DXGI_FORMAT_UNKNOWN;
|
||||
Dimension binding_dimension;
|
||||
uint32_t mip_max_level, array_size;
|
||||
void TextureCache::WriteActiveTextureSRVKeys(
|
||||
TextureSRVKey* keys,
|
||||
const D3D12Shader::TextureBinding* host_shader_bindings,
|
||||
uint32_t host_shader_binding_count) const {
|
||||
for (uint32_t i = 0; i < host_shader_binding_count; ++i) {
|
||||
TextureSRVKey& key = keys[i];
|
||||
const TextureBinding& binding =
|
||||
texture_bindings_[host_shader_bindings[i].fetch_constant];
|
||||
key.key = binding.key;
|
||||
key.host_swizzle = binding.host_swizzle;
|
||||
key.swizzled_signs = binding.swizzled_signs;
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCache::WriteActiveTextureBindfulSRV(
|
||||
const D3D12Shader::TextureBinding& host_shader_binding,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle) {
|
||||
assert_false(bindless_resources_used_);
|
||||
const TextureBinding& binding =
|
||||
texture_bindings_[host_shader_binding.fetch_constant];
|
||||
uint32_t descriptor_index = UINT32_MAX;
|
||||
Texture* texture = nullptr;
|
||||
ID3D12Resource* resource = nullptr;
|
||||
|
||||
const TextureBinding& binding = texture_bindings_[texture_srv.fetch_constant];
|
||||
if (!binding.key.IsInvalid()) {
|
||||
TextureFormat format = binding.key.format;
|
||||
|
||||
if (IsSignedVersionSeparate(format) && texture_srv.is_signed) {
|
||||
texture = binding.texture_signed;
|
||||
} else {
|
||||
texture = binding.texture;
|
||||
}
|
||||
if (texture != nullptr) {
|
||||
resource = texture->resource;
|
||||
}
|
||||
|
||||
if (texture_srv.is_signed) {
|
||||
if (!binding.key.IsInvalid() &&
|
||||
AreDimensionsCompatible(host_shader_binding.dimension,
|
||||
binding.key.dimension)) {
|
||||
if (host_shader_binding.is_signed) {
|
||||
// Not supporting signed compressed textures - hopefully DXN and DXT5A are
|
||||
// not used as signed.
|
||||
if (texture_util::IsAnySignSigned(binding.swizzled_signs)) {
|
||||
desc.Format = host_formats_[uint32_t(format)].dxgi_format_snorm;
|
||||
if (desc.Format == DXGI_FORMAT_UNKNOWN) {
|
||||
unsupported_format_features_used_[uint32_t(format)] |=
|
||||
kUnsupportedSnormBit;
|
||||
}
|
||||
descriptor_index = binding.descriptor_index_signed;
|
||||
texture = IsSignedVersionSeparate(binding.key.format)
|
||||
? binding.texture_signed
|
||||
: binding.texture;
|
||||
}
|
||||
} else {
|
||||
if (texture_util::IsAnySignNotSigned(binding.swizzled_signs)) {
|
||||
desc.Format = GetDXGIUnormFormat(binding.key);
|
||||
if (desc.Format == DXGI_FORMAT_UNKNOWN) {
|
||||
unsupported_format_features_used_[uint32_t(format)] |=
|
||||
kUnsupportedUnormBit;
|
||||
}
|
||||
descriptor_index = binding.descriptor_index;
|
||||
texture = binding.texture;
|
||||
}
|
||||
}
|
||||
|
||||
binding_dimension = binding.key.dimension;
|
||||
mip_max_level = binding.key.mip_max_level;
|
||||
array_size = binding.key.depth;
|
||||
// XE_GPU_SWIZZLE and D3D12_SHADER_COMPONENT_MAPPING are the same except for
|
||||
// one bit.
|
||||
desc.Shader4ComponentMapping =
|
||||
binding.host_swizzle |
|
||||
D3D12_SHADER_COMPONENT_MAPPING_ALWAYS_SET_BIT_AVOIDING_ZEROMEM_MISTAKES;
|
||||
} else {
|
||||
binding_dimension = Dimension::k2D;
|
||||
mip_max_level = 0;
|
||||
array_size = 1;
|
||||
desc.Shader4ComponentMapping = D3D12_ENCODE_SHADER_4_COMPONENT_MAPPING(
|
||||
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
|
||||
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
|
||||
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0,
|
||||
D3D12_SHADER_COMPONENT_MAPPING_FORCE_VALUE_0);
|
||||
}
|
||||
|
||||
if (desc.Format == DXGI_FORMAT_UNKNOWN) {
|
||||
// A null descriptor must still have a valid format.
|
||||
desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||
resource = nullptr;
|
||||
}
|
||||
NullSRVDescriptorIndex null_descriptor_index;
|
||||
switch (texture_srv.dimension) {
|
||||
case TextureDimension::k3D:
|
||||
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
|
||||
desc.Texture3D.MostDetailedMip = 0;
|
||||
desc.Texture3D.MipLevels = mip_max_level + 1;
|
||||
desc.Texture3D.ResourceMinLODClamp = 0.0f;
|
||||
if (binding_dimension != Dimension::k3D) {
|
||||
// Create a null descriptor so it's safe to sample this texture even
|
||||
// though it has different dimensions.
|
||||
resource = nullptr;
|
||||
}
|
||||
null_descriptor_index = NullSRVDescriptorIndex::k3D;
|
||||
break;
|
||||
case TextureDimension::kCube:
|
||||
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE;
|
||||
desc.TextureCube.MostDetailedMip = 0;
|
||||
desc.TextureCube.MipLevels = mip_max_level + 1;
|
||||
desc.TextureCube.ResourceMinLODClamp = 0.0f;
|
||||
if (binding_dimension != Dimension::kCube) {
|
||||
resource = nullptr;
|
||||
}
|
||||
null_descriptor_index = NullSRVDescriptorIndex::kCube;
|
||||
break;
|
||||
default:
|
||||
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
|
||||
desc.Texture2DArray.MostDetailedMip = 0;
|
||||
desc.Texture2DArray.MipLevels = mip_max_level + 1;
|
||||
desc.Texture2DArray.FirstArraySlice = 0;
|
||||
desc.Texture2DArray.ArraySize = array_size;
|
||||
desc.Texture2DArray.PlaneSlice = 0;
|
||||
desc.Texture2DArray.ResourceMinLODClamp = 0.0f;
|
||||
if (binding_dimension == Dimension::k3D ||
|
||||
binding_dimension == Dimension::kCube) {
|
||||
resource = nullptr;
|
||||
}
|
||||
null_descriptor_index = NullSRVDescriptorIndex::k2DArray;
|
||||
break;
|
||||
}
|
||||
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE source_handle;
|
||||
if (descriptor_index != UINT32_MAX) {
|
||||
assert_not_null(texture);
|
||||
MarkTextureUsed(texture);
|
||||
source_handle = GetTextureDescriptorCPUHandle(descriptor_index);
|
||||
} else {
|
||||
NullSRVDescriptorIndex null_descriptor_index;
|
||||
switch (host_shader_binding.dimension) {
|
||||
case TextureDimension::k3D:
|
||||
null_descriptor_index = NullSRVDescriptorIndex::k3D;
|
||||
break;
|
||||
case TextureDimension::kCube:
|
||||
null_descriptor_index = NullSRVDescriptorIndex::kCube;
|
||||
break;
|
||||
default:
|
||||
assert_true(host_shader_binding.dimension == TextureDimension::k1D ||
|
||||
host_shader_binding.dimension == TextureDimension::k2D);
|
||||
null_descriptor_index = NullSRVDescriptorIndex::k2DArray;
|
||||
}
|
||||
source_handle = provider->OffsetViewDescriptor(
|
||||
null_srv_descriptor_heap_start_, uint32_t(null_descriptor_index));
|
||||
}
|
||||
auto device = provider->GetDevice();
|
||||
if (resource == nullptr) {
|
||||
// Copy a pre-made null descriptor since it's faster than to create an SRV.
|
||||
device->CopyDescriptorsSimple(
|
||||
1, handle,
|
||||
provider->OffsetViewDescriptor(null_srv_descriptor_heap_start_,
|
||||
uint32_t(null_descriptor_index)),
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
return;
|
||||
}
|
||||
MarkTextureUsed(texture);
|
||||
// Take the descriptor from the cache if it's cached, or create a new one in
|
||||
// the cache, or directly if this texture was already used with a different
|
||||
// swizzle. Profiling results say that CreateShaderResourceView takes the
|
||||
// longest time of draw call processing, and it's very noticeable in many
|
||||
// games.
|
||||
bool cached_handle_available = false;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE cached_handle = {};
|
||||
assert_not_null(texture);
|
||||
if (texture->cached_srv_descriptor_swizzle !=
|
||||
Texture::kCachedSRVDescriptorSwizzleMissing) {
|
||||
// Use an existing cached descriptor if it has the needed swizzle.
|
||||
if (binding.host_swizzle == texture->cached_srv_descriptor_swizzle) {
|
||||
cached_handle_available = true;
|
||||
cached_handle = texture->cached_srv_descriptor;
|
||||
}
|
||||
} else {
|
||||
// Try to create a new cached descriptor if it doesn't exist yet.
|
||||
if (!srv_descriptor_cache_free_.empty()) {
|
||||
cached_handle_available = true;
|
||||
cached_handle = srv_descriptor_cache_free_.back();
|
||||
srv_descriptor_cache_free_.pop_back();
|
||||
} else if (srv_descriptor_cache_.empty() ||
|
||||
srv_descriptor_cache_.back().current_usage >=
|
||||
SRVDescriptorCachePage::kHeapSize) {
|
||||
D3D12_DESCRIPTOR_HEAP_DESC new_heap_desc;
|
||||
new_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
||||
new_heap_desc.NumDescriptors = SRVDescriptorCachePage::kHeapSize;
|
||||
new_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
|
||||
new_heap_desc.NodeMask = 0;
|
||||
ID3D12DescriptorHeap* new_heap;
|
||||
if (SUCCEEDED(device->CreateDescriptorHeap(&new_heap_desc,
|
||||
IID_PPV_ARGS(&new_heap)))) {
|
||||
SRVDescriptorCachePage new_page;
|
||||
new_page.heap = new_heap;
|
||||
new_page.heap_start = new_heap->GetCPUDescriptorHandleForHeapStart();
|
||||
new_page.current_usage = 1;
|
||||
cached_handle_available = true;
|
||||
cached_handle = new_page.heap_start;
|
||||
srv_descriptor_cache_.push_back(new_page);
|
||||
}
|
||||
} else {
|
||||
SRVDescriptorCachePage& page = srv_descriptor_cache_.back();
|
||||
cached_handle_available = true;
|
||||
cached_handle =
|
||||
provider->OffsetViewDescriptor(page.heap_start, page.current_usage);
|
||||
++page.current_usage;
|
||||
}
|
||||
if (cached_handle_available) {
|
||||
device->CreateShaderResourceView(resource, &desc, cached_handle);
|
||||
texture->cached_srv_descriptor = cached_handle;
|
||||
texture->cached_srv_descriptor_swizzle = binding.host_swizzle;
|
||||
}
|
||||
}
|
||||
if (cached_handle_available) {
|
||||
device->CopyDescriptorsSimple(1, handle, cached_handle,
|
||||
{
|
||||
#if FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_i(
|
||||
"gpu",
|
||||
"xe::gpu::d3d12::TextureCache::WriteActiveTextureBindfulSRV->"
|
||||
"CopyDescriptorsSimple");
|
||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||
device->CopyDescriptorsSimple(1, handle, source_handle,
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
|
||||
} else {
|
||||
device->CreateShaderResourceView(resource, &desc, handle);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t TextureCache::GetActiveTextureBindlessSRVIndex(
|
||||
const D3D12Shader::TextureBinding& host_shader_binding) {
|
||||
assert_true(bindless_resources_used_);
|
||||
uint32_t descriptor_index = UINT32_MAX;
|
||||
const TextureBinding& binding =
|
||||
texture_bindings_[host_shader_binding.fetch_constant];
|
||||
if (!binding.key.IsInvalid() &&
|
||||
AreDimensionsCompatible(host_shader_binding.dimension,
|
||||
binding.key.dimension)) {
|
||||
descriptor_index = host_shader_binding.is_signed
|
||||
? binding.descriptor_index_signed
|
||||
: binding.descriptor_index;
|
||||
}
|
||||
if (descriptor_index == UINT32_MAX) {
|
||||
switch (host_shader_binding.dimension) {
|
||||
case TextureDimension::k3D:
|
||||
descriptor_index =
|
||||
uint32_t(D3D12CommandProcessor::SystemBindlessView::kNullTexture3D);
|
||||
break;
|
||||
case TextureDimension::kCube:
|
||||
descriptor_index = uint32_t(
|
||||
D3D12CommandProcessor::SystemBindlessView::kNullTextureCube);
|
||||
break;
|
||||
default:
|
||||
assert_true(host_shader_binding.dimension == TextureDimension::k1D ||
|
||||
host_shader_binding.dimension == TextureDimension::k2D);
|
||||
descriptor_index = uint32_t(
|
||||
D3D12CommandProcessor::SystemBindlessView::kNullTexture2DArray);
|
||||
}
|
||||
}
|
||||
return descriptor_index;
|
||||
}
|
||||
|
||||
TextureCache::SamplerParameters TextureCache::GetSamplerParameters(
|
||||
const D3D12Shader::SamplerBinding& binding) const {
|
||||
auto& regs = *register_file_;
|
||||
|
@ -1583,12 +1574,11 @@ TextureCache::SamplerParameters TextureCache::GetSamplerParameters(
|
|||
parameters.clamp_z = fetch.clamp_z;
|
||||
parameters.border_color = fetch.border_color;
|
||||
|
||||
uint32_t mip_min_level, mip_max_level;
|
||||
uint32_t mip_min_level;
|
||||
texture_util::GetSubresourcesFromFetchConstant(
|
||||
fetch, nullptr, nullptr, nullptr, nullptr, nullptr, &mip_min_level,
|
||||
&mip_max_level, binding.mip_filter);
|
||||
nullptr, binding.mip_filter);
|
||||
parameters.mip_min_level = mip_min_level;
|
||||
parameters.mip_max_level = std::max(mip_max_level, mip_min_level);
|
||||
|
||||
AnisoFilter aniso_filter = binding.aniso_filter == AnisoFilter::kUseFetchConst
|
||||
? fetch.aniso_filter
|
||||
|
@ -1675,7 +1665,8 @@ void TextureCache::WriteSampler(SamplerParameters parameters,
|
|||
desc.BorderColor[3] = 0.0f;
|
||||
}
|
||||
desc.MinLOD = float(parameters.mip_min_level);
|
||||
desc.MaxLOD = float(parameters.mip_max_level);
|
||||
// Maximum mip level is in the texture resource itself.
|
||||
desc.MaxLOD = FLT_MAX;
|
||||
auto device =
|
||||
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||
device->CreateSampler(&desc, handle);
|
||||
|
@ -1737,8 +1728,8 @@ bool TextureCache::TileResolvedTexture(
|
|||
resolve_tile_mode_info_[uint32_t(resolve_tile_mode)];
|
||||
|
||||
auto command_list = command_processor_->GetDeferredCommandList();
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
auto device = provider->GetDevice();
|
||||
auto device =
|
||||
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||
uint32_t resolution_scale_log2 = IsResolutionScale2X() ? 1 : 0;
|
||||
|
||||
texture_base &= 0x1FFFFFFF;
|
||||
|
@ -1811,12 +1802,8 @@ bool TextureCache::TileResolvedTexture(
|
|||
}
|
||||
|
||||
// Tile the texture.
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
|
||||
if (command_processor_->RequestViewDescriptors(
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, 2, 2,
|
||||
descriptor_cpu_start, descriptor_gpu_start) ==
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[2];
|
||||
if (!command_processor_->RequestOneUseSingleViewDescriptors(2, descriptors)) {
|
||||
return false;
|
||||
}
|
||||
if (resolution_scale_log2) {
|
||||
|
@ -1826,19 +1813,15 @@ bool TextureCache::TileResolvedTexture(
|
|||
}
|
||||
command_processor_->SubmitBarriers();
|
||||
command_list->D3DSetComputeRootSignature(resolve_tile_root_signature_);
|
||||
|
||||
ResolveTileConstants resolve_tile_constants;
|
||||
resolve_tile_constants.info = uint32_t(endian) | (uint32_t(format) << 3) |
|
||||
(resolution_scale_log2 << 9) |
|
||||
((texture_pitch >> 5) << 10) |
|
||||
(is_3d ? ((texture_height >> 5) << 19) : 0);
|
||||
resolve_tile_constants.offset = offset_x | (offset_y << 5) | (offset_z << 10);
|
||||
resolve_tile_constants.size = resolve_width | (resolve_height << 16);
|
||||
resolve_tile_constants.host_base = uint32_t(footprint.Offset);
|
||||
resolve_tile_constants.host_pitch = uint32_t(footprint.Footprint.RowPitch);
|
||||
ui::d3d12::util::CreateRawBufferSRV(device, descriptor_cpu_start, buffer,
|
||||
|
||||
// TODO(Triang3l): Use precreated bindless descriptors here after overall
|
||||
// cleanup/optimization involving typed buffers.
|
||||
ui::d3d12::util::CreateRawBufferSRV(device, descriptors[1].first, buffer,
|
||||
buffer_size);
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_uav =
|
||||
provider->OffsetViewDescriptor(descriptor_cpu_start, 1);
|
||||
command_list->D3DSetComputeRootDescriptorTable(2, descriptors[1].second);
|
||||
|
||||
if (resolve_tile_mode_info.typed_uav_format != DXGI_FORMAT_UNKNOWN) {
|
||||
// Not sure if this alignment is actually needed in Direct3D 12, but for
|
||||
// safety. Also not using the full 512 MB buffer as a typed UAV because
|
||||
|
@ -1862,22 +1845,32 @@ bool TextureCache::TileResolvedTexture(
|
|||
device->CreateUnorderedAccessView(resolution_scale_log2
|
||||
? scaled_resolve_buffer_
|
||||
: shared_memory_->GetBuffer(),
|
||||
nullptr, &uav_desc, descriptor_cpu_uav);
|
||||
nullptr, &uav_desc, descriptors[0].first);
|
||||
} else {
|
||||
if (resolution_scale_log2) {
|
||||
resolve_tile_constants.guest_base = texture_base & 0xFFF;
|
||||
CreateScaledResolveBufferRawUAV(
|
||||
descriptor_cpu_uav, texture_base >> 12,
|
||||
descriptors[0].first, texture_base >> 12,
|
||||
((texture_base + texture_size - 1) >> 12) - (texture_base >> 12) + 1);
|
||||
} else {
|
||||
resolve_tile_constants.guest_base = texture_base;
|
||||
shared_memory_->WriteRawUAVDescriptor(descriptor_cpu_uav);
|
||||
shared_memory_->WriteRawUAVDescriptor(descriptors[0].first);
|
||||
}
|
||||
}
|
||||
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
||||
command_list->D3DSetComputeRootDescriptorTable(1, descriptors[0].second);
|
||||
|
||||
resolve_tile_constants.info = uint32_t(endian) | (uint32_t(format) << 3) |
|
||||
(resolution_scale_log2 << 9) |
|
||||
((texture_pitch >> 5) << 10) |
|
||||
(is_3d ? ((texture_height >> 5) << 19) : 0);
|
||||
resolve_tile_constants.offset = offset_x | (offset_y << 5) | (offset_z << 10);
|
||||
resolve_tile_constants.size = resolve_width | (resolve_height << 16);
|
||||
resolve_tile_constants.host_base = uint32_t(footprint.Offset);
|
||||
resolve_tile_constants.host_pitch = uint32_t(footprint.Footprint.RowPitch);
|
||||
command_list->D3DSetComputeRoot32BitConstants(
|
||||
0, sizeof(resolve_tile_constants) / sizeof(uint32_t),
|
||||
&resolve_tile_constants, 0);
|
||||
|
||||
command_processor_->SetComputePipeline(
|
||||
resolve_tile_pipelines_[uint32_t(resolve_tile_mode)]);
|
||||
// Each group processes 32x32 texels after resolution scaling has been
|
||||
|
@ -2339,8 +2332,6 @@ TextureCache::Texture* TextureCache::FindOrCreateTexture(TextureKey key) {
|
|||
}
|
||||
texture->base_watch_handle = nullptr;
|
||||
texture->mip_watch_handle = nullptr;
|
||||
texture->cached_srv_descriptor_swizzle =
|
||||
Texture::kCachedSRVDescriptorSwizzleMissing;
|
||||
textures_.insert(std::make_pair(map_key, texture));
|
||||
COUNT_profile_set("gpu/texture_cache/textures", textures_.size());
|
||||
textures_total_size_ += texture->resource_size;
|
||||
|
@ -2364,8 +2355,8 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
|||
}
|
||||
|
||||
auto command_list = command_processor_->GetDeferredCommandList();
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
auto device = provider->GetDevice();
|
||||
auto device =
|
||||
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||
|
||||
// Get the pipeline.
|
||||
LoadMode load_mode = GetLoadMode(texture->key);
|
||||
|
@ -2453,16 +2444,19 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
|||
// descriptors for base and mips.
|
||||
bool separate_base_and_mips_descriptors =
|
||||
scaled_resolve && mip_first == 0 && mip_last != 0;
|
||||
// TODO(Triang3l): Use precreated bindless descriptors here after overall
|
||||
// cleanup/optimization involving typed buffers.
|
||||
uint32_t descriptor_count = separate_base_and_mips_descriptors ? 4 : 2;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE descriptor_cpu_start;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE descriptor_gpu_start;
|
||||
if (command_processor_->RequestViewDescriptors(
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, descriptor_count,
|
||||
descriptor_count, descriptor_cpu_start, descriptor_gpu_start) ==
|
||||
ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) {
|
||||
command_processor_->ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state);
|
||||
ui::d3d12::util::DescriptorCPUGPUHandlePair descriptors[4];
|
||||
if (!command_processor_->RequestOneUseSingleViewDescriptors(descriptor_count,
|
||||
descriptors)) {
|
||||
return false;
|
||||
}
|
||||
// Create two destination descriptors since the table has both.
|
||||
for (uint32_t i = 0; i < descriptor_count; i += 2) {
|
||||
ui::d3d12::util::CreateRawBufferUAV(device, descriptors[i].first,
|
||||
copy_buffer, uint32_t(host_slice_size));
|
||||
}
|
||||
if (scaled_resolve) {
|
||||
// TODO(Triang3l): Allow partial invalidation of scaled textures - send a
|
||||
// part of scaled_resolve_pages_ to the shader and choose the source
|
||||
|
@ -2470,35 +2464,28 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
|||
// it's not, duplicate the texels from the unscaled version - will be
|
||||
// blocky with filtering, but better than nothing.
|
||||
UseScaledResolveBufferForReading();
|
||||
uint32_t srv_descriptor_offset = 0;
|
||||
uint32_t source_descriptor_index = 1;
|
||||
if (mip_first == 0) {
|
||||
CreateScaledResolveBufferRawSRV(
|
||||
provider->OffsetViewDescriptor(descriptor_cpu_start,
|
||||
srv_descriptor_offset),
|
||||
texture->key.base_page, (texture->base_size + 0xFFF) >> 12);
|
||||
srv_descriptor_offset += 2;
|
||||
descriptors[source_descriptor_index].first, texture->key.base_page,
|
||||
(texture->base_size + 0xFFF) >> 12);
|
||||
source_descriptor_index += 2;
|
||||
}
|
||||
if (mip_last != 0) {
|
||||
CreateScaledResolveBufferRawSRV(
|
||||
provider->OffsetViewDescriptor(descriptor_cpu_start,
|
||||
srv_descriptor_offset),
|
||||
texture->key.mip_page, (texture->mip_size + 0xFFF) >> 12);
|
||||
descriptors[source_descriptor_index].first, texture->key.mip_page,
|
||||
(texture->mip_size + 0xFFF) >> 12);
|
||||
}
|
||||
} else {
|
||||
shared_memory_->UseForReading();
|
||||
shared_memory_->WriteRawSRVDescriptor(descriptor_cpu_start);
|
||||
}
|
||||
// Create two destination descriptors since the table has both.
|
||||
for (uint32_t i = 1; i < descriptor_count; i += 2) {
|
||||
ui::d3d12::util::CreateRawBufferUAV(
|
||||
device, provider->OffsetViewDescriptor(descriptor_cpu_start, i),
|
||||
copy_buffer, uint32_t(host_slice_size));
|
||||
shared_memory_->WriteRawSRVDescriptor(descriptors[1].first);
|
||||
}
|
||||
command_processor_->SetComputePipeline(pipeline);
|
||||
command_list->D3DSetComputeRootSignature(load_root_signature_);
|
||||
if (!separate_base_and_mips_descriptors) {
|
||||
// Will be bound later.
|
||||
command_list->D3DSetComputeRootDescriptorTable(1, descriptor_gpu_start);
|
||||
// Will be bound later if separate base and mip descriptors.
|
||||
command_list->D3DSetComputeRootDescriptorTable(2, descriptors[1].second);
|
||||
command_list->D3DSetComputeRootDescriptorTable(1, descriptors[0].second);
|
||||
}
|
||||
|
||||
// Submit commands.
|
||||
|
@ -2575,14 +2562,11 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
|||
}
|
||||
std::memcpy(cbuffer_mapping, &load_constants, sizeof(load_constants));
|
||||
command_list->D3DSetComputeRootConstantBufferView(0, cbuffer_gpu_address);
|
||||
if (separate_base_and_mips_descriptors) {
|
||||
if (j == 0) {
|
||||
command_list->D3DSetComputeRootDescriptorTable(1,
|
||||
descriptor_gpu_start);
|
||||
} else if (j == 1) {
|
||||
command_list->D3DSetComputeRootDescriptorTable(
|
||||
1, provider->OffsetViewDescriptor(descriptor_gpu_start, 2));
|
||||
}
|
||||
if (separate_base_and_mips_descriptors && j <= 1) {
|
||||
command_list->D3DSetComputeRootDescriptorTable(
|
||||
2, descriptors[j * 2 + 1].second);
|
||||
command_list->D3DSetComputeRootDescriptorTable(
|
||||
1, descriptors[j * 2].second);
|
||||
}
|
||||
command_processor_->SubmitBarriers();
|
||||
// Each thread group processes 32x32x1 blocks after resolution scaling has
|
||||
|
@ -2642,6 +2626,138 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
|||
return true;
|
||||
}
|
||||
|
||||
uint32_t TextureCache::FindOrCreateTextureDescriptor(Texture& texture,
|
||||
bool is_signed,
|
||||
uint32_t host_swizzle) {
|
||||
uint32_t descriptor_key = uint32_t(is_signed) | (host_swizzle << 1);
|
||||
|
||||
// Try to find an existing descriptor.
|
||||
auto it = texture.srv_descriptors.find(descriptor_key);
|
||||
if (it != texture.srv_descriptors.end()) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
// Create a new bindless or cached descriptor if supported.
|
||||
D3D12_SHADER_RESOURCE_VIEW_DESC desc;
|
||||
|
||||
TextureFormat format = texture.key.format;
|
||||
if (IsSignedVersionSeparate(format) &&
|
||||
texture.key.signed_separate != uint32_t(is_signed)) {
|
||||
// Not the version with the needed signedness.
|
||||
return UINT32_MAX;
|
||||
}
|
||||
if (is_signed) {
|
||||
// Not supporting signed compressed textures - hopefully DXN and DXT5A are
|
||||
// not used as signed.
|
||||
desc.Format = host_formats_[uint32_t(format)].dxgi_format_snorm;
|
||||
} else {
|
||||
desc.Format = GetDXGIUnormFormat(texture.key);
|
||||
}
|
||||
if (desc.Format == DXGI_FORMAT_UNKNOWN) {
|
||||
unsupported_format_features_used_[uint32_t(format)] |=
|
||||
is_signed ? kUnsupportedSnormBit : kUnsupportedUnormBit;
|
||||
return UINT32_MAX;
|
||||
}
|
||||
|
||||
uint32_t mip_levels = texture.key.mip_max_level + 1;
|
||||
switch (texture.key.dimension) {
|
||||
case Dimension::k1D:
|
||||
case Dimension::k2D:
|
||||
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
|
||||
desc.Texture2DArray.MostDetailedMip = 0;
|
||||
desc.Texture2DArray.MipLevels = mip_levels;
|
||||
desc.Texture2DArray.FirstArraySlice = 0;
|
||||
desc.Texture2DArray.ArraySize = texture.key.depth;
|
||||
desc.Texture2DArray.PlaneSlice = 0;
|
||||
desc.Texture2DArray.ResourceMinLODClamp = 0.0f;
|
||||
break;
|
||||
case Dimension::k3D:
|
||||
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
|
||||
desc.Texture3D.MostDetailedMip = 0;
|
||||
desc.Texture3D.MipLevels = mip_levels;
|
||||
desc.Texture3D.ResourceMinLODClamp = 0.0f;
|
||||
break;
|
||||
case Dimension::kCube:
|
||||
desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE;
|
||||
desc.TextureCube.MostDetailedMip = 0;
|
||||
desc.TextureCube.MipLevels = mip_levels;
|
||||
desc.TextureCube.ResourceMinLODClamp = 0.0f;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(texture.key.dimension);
|
||||
return UINT32_MAX;
|
||||
}
|
||||
|
||||
desc.Shader4ComponentMapping =
|
||||
host_swizzle |
|
||||
D3D12_SHADER_COMPONENT_MAPPING_ALWAYS_SET_BIT_AVOIDING_ZEROMEM_MISTAKES;
|
||||
|
||||
auto device =
|
||||
command_processor_->GetD3D12Context()->GetD3D12Provider()->GetDevice();
|
||||
uint32_t descriptor_index;
|
||||
if (bindless_resources_used_) {
|
||||
descriptor_index =
|
||||
command_processor_->RequestPersistentViewBindlessDescriptor();
|
||||
if (descriptor_index == UINT32_MAX) {
|
||||
XELOGE(
|
||||
"Failed to create a texture descriptor - no free bindless view "
|
||||
"descriptors");
|
||||
return UINT32_MAX;
|
||||
}
|
||||
} else {
|
||||
if (!srv_descriptor_cache_free_.empty()) {
|
||||
descriptor_index = srv_descriptor_cache_free_.back();
|
||||
srv_descriptor_cache_free_.pop_back();
|
||||
} else {
|
||||
// Allocated + 1 (including the descriptor that is being added), rounded
|
||||
// up to SRVDescriptorCachePage::kHeapSize, (allocated + 1 + size - 1).
|
||||
uint32_t cache_pages_needed = (srv_descriptor_cache_allocated_ +
|
||||
SRVDescriptorCachePage::kHeapSize) /
|
||||
SRVDescriptorCachePage::kHeapSize;
|
||||
if (srv_descriptor_cache_.size() < cache_pages_needed) {
|
||||
D3D12_DESCRIPTOR_HEAP_DESC cache_heap_desc;
|
||||
cache_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
||||
cache_heap_desc.NumDescriptors = SRVDescriptorCachePage::kHeapSize;
|
||||
cache_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
|
||||
cache_heap_desc.NodeMask = 0;
|
||||
while (srv_descriptor_cache_.size() < cache_pages_needed) {
|
||||
SRVDescriptorCachePage cache_page;
|
||||
if (FAILED(device->CreateDescriptorHeap(
|
||||
&cache_heap_desc, IID_PPV_ARGS(&cache_page.heap)))) {
|
||||
XELOGE(
|
||||
"Failed to create a texture descriptor - couldn't create a "
|
||||
"descriptor cache heap");
|
||||
return UINT32_MAX;
|
||||
}
|
||||
cache_page.heap_start =
|
||||
cache_page.heap->GetCPUDescriptorHandleForHeapStart();
|
||||
srv_descriptor_cache_.push_back(cache_page);
|
||||
}
|
||||
}
|
||||
descriptor_index = srv_descriptor_cache_allocated_++;
|
||||
}
|
||||
}
|
||||
device->CreateShaderResourceView(
|
||||
texture.resource, &desc, GetTextureDescriptorCPUHandle(descriptor_index));
|
||||
texture.srv_descriptors.insert({descriptor_key, descriptor_index});
|
||||
return descriptor_index;
|
||||
}
|
||||
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE TextureCache::GetTextureDescriptorCPUHandle(
|
||||
uint32_t descriptor_index) const {
|
||||
auto provider = command_processor_->GetD3D12Context()->GetD3D12Provider();
|
||||
if (bindless_resources_used_) {
|
||||
return provider->OffsetViewDescriptor(
|
||||
command_processor_->GetViewBindlessHeapCPUStart(), descriptor_index);
|
||||
}
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE heap_start =
|
||||
srv_descriptor_cache_[descriptor_index /
|
||||
SRVDescriptorCachePage::kHeapSize]
|
||||
.heap_start;
|
||||
uint32_t heap_offset = descriptor_index % SRVDescriptorCachePage::kHeapSize;
|
||||
return provider->OffsetViewDescriptor(heap_start, heap_offset);
|
||||
}
|
||||
|
||||
void TextureCache::MarkTextureUsed(Texture* texture) {
|
||||
uint64_t current_frame = command_processor_->GetCurrentFrame();
|
||||
// This is called very frequently, don't relink unless needed for caching.
|
||||
|
@ -2687,8 +2803,10 @@ void TextureCache::WatchCallback(Texture* texture, bool is_mip) {
|
|||
}
|
||||
|
||||
void TextureCache::ClearBindings() {
|
||||
std::memset(texture_bindings_, 0, sizeof(texture_bindings_));
|
||||
texture_keys_in_sync_ = 0;
|
||||
for (size_t i = 0; i < xe::countof(texture_bindings_); ++i) {
|
||||
texture_bindings_[i].Clear();
|
||||
}
|
||||
texture_bindings_in_sync_ = 0;
|
||||
// Already reset everything.
|
||||
texture_invalidated_.store(false, std::memory_order_relaxed);
|
||||
}
|
||||
|
|
|
@ -11,7 +11,9 @@
|
|||
#define XENIA_GPU_D3D12_TEXTURE_CACHE_H_
|
||||
|
||||
#include <atomic>
|
||||
#include <cstring>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include "xenia/base/mutex.h"
|
||||
#include "xenia/gpu/d3d12/d3d12_shader.h"
|
||||
|
@ -55,9 +57,84 @@ class D3D12CommandProcessor;
|
|||
// MipAddress but no BaseAddress to save memory because textures are streamed
|
||||
// this way anyway.
|
||||
class TextureCache {
|
||||
union TextureKey {
|
||||
struct {
|
||||
// Physical 4 KB page with the base mip level, disregarding A/C/E address
|
||||
// range prefix.
|
||||
uint32_t base_page : 17; // 17 total
|
||||
Dimension dimension : 2; // 19
|
||||
uint32_t width : 13; // 32
|
||||
|
||||
uint32_t height : 13; // 45
|
||||
uint32_t tiled : 1; // 46
|
||||
uint32_t packed_mips : 1; // 47
|
||||
// Physical 4 KB page with mip 1 and smaller.
|
||||
uint32_t mip_page : 17; // 64
|
||||
|
||||
// Layers for stacked and 3D, 6 for cube, 1 for other dimensions.
|
||||
uint32_t depth : 10; // 74
|
||||
uint32_t mip_max_level : 4; // 78
|
||||
TextureFormat format : 6; // 84
|
||||
Endian endianness : 2; // 86
|
||||
// Whether this texture is signed and has a different host representation
|
||||
// than an unsigned view of the same guest texture.
|
||||
uint32_t signed_separate : 1; // 87
|
||||
// Whether this texture is a 2x-scaled resolve target.
|
||||
uint32_t scaled_resolve : 1; // 88
|
||||
};
|
||||
struct {
|
||||
// The key used for unordered_multimap lookup. Single uint32_t instead of
|
||||
// a uint64_t so XXH hash can be calculated in a stable way due to no
|
||||
// padding.
|
||||
uint32_t map_key[2];
|
||||
// The key used to identify one texture within unordered_multimap buckets.
|
||||
uint32_t bucket_key;
|
||||
};
|
||||
TextureKey() { MakeInvalid(); }
|
||||
TextureKey(const TextureKey& key) {
|
||||
SetMapKey(key.GetMapKey());
|
||||
bucket_key = key.bucket_key;
|
||||
}
|
||||
TextureKey& operator=(const TextureKey& key) {
|
||||
SetMapKey(key.GetMapKey());
|
||||
bucket_key = key.bucket_key;
|
||||
return *this;
|
||||
}
|
||||
bool operator==(const TextureKey& key) const {
|
||||
return GetMapKey() == key.GetMapKey() && bucket_key == key.bucket_key;
|
||||
}
|
||||
bool operator!=(const TextureKey& key) const {
|
||||
return GetMapKey() != key.GetMapKey() || bucket_key != key.bucket_key;
|
||||
}
|
||||
inline uint64_t GetMapKey() const {
|
||||
return uint64_t(map_key[0]) | (uint64_t(map_key[1]) << 32);
|
||||
}
|
||||
inline void SetMapKey(uint64_t key) {
|
||||
map_key[0] = uint32_t(key);
|
||||
map_key[1] = uint32_t(key >> 32);
|
||||
}
|
||||
inline bool IsInvalid() const {
|
||||
// Zero base and zero width is enough for a binding to be invalid.
|
||||
return map_key[0] == 0;
|
||||
}
|
||||
inline void MakeInvalid() {
|
||||
// Reset all for a stable hash.
|
||||
SetMapKey(0);
|
||||
bucket_key = 0;
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
// Keys that can be stored for checking validity whether descriptors for host
|
||||
// shader bindings are up to date.
|
||||
struct TextureSRVKey {
|
||||
TextureKey key;
|
||||
uint32_t host_swizzle;
|
||||
uint8_t swizzled_signs;
|
||||
};
|
||||
|
||||
// Sampler parameters that can be directly converted to a host sampler or used
|
||||
// for binding hashing.
|
||||
// for binding checking validity whether samplers are up to date.
|
||||
union SamplerParameters {
|
||||
struct {
|
||||
ClampMode clamp_x : 3; // 3
|
||||
|
@ -70,7 +147,7 @@ class TextureCache {
|
|||
uint32_t mip_linear : 1; // 14
|
||||
AnisoFilter aniso_filter : 3; // 17
|
||||
uint32_t mip_min_level : 4; // 21
|
||||
uint32_t mip_max_level : 4; // 25
|
||||
// Maximum mip level is in the texture resource itself.
|
||||
};
|
||||
uint32_t value;
|
||||
|
||||
|
@ -91,7 +168,8 @@ class TextureCache {
|
|||
};
|
||||
|
||||
TextureCache(D3D12CommandProcessor* command_processor,
|
||||
RegisterFile* register_file, SharedMemory* shared_memory);
|
||||
RegisterFile* register_file, bool bindless_resources_used,
|
||||
SharedMemory* shared_memory);
|
||||
~TextureCache();
|
||||
|
||||
bool Initialize(bool edram_rov_used);
|
||||
|
@ -109,19 +187,33 @@ class TextureCache {
|
|||
// binding the actual drawing pipeline.
|
||||
void RequestTextures(uint32_t used_texture_mask);
|
||||
|
||||
// Returns the hash of the current bindings (must be called after
|
||||
// RequestTextures) for the provided SRV descriptor layout.
|
||||
uint64_t GetDescriptorHashForActiveTextures(
|
||||
const D3D12Shader::TextureSRV* texture_srvs,
|
||||
uint32_t texture_srv_count) const;
|
||||
// "ActiveTexture" means as of the latest RequestTextures call.
|
||||
|
||||
// Returns whether texture SRV keys stored externally are still valid for the
|
||||
// current bindings and host shader binding layout. Both keys and
|
||||
// host_shader_bindings must have host_shader_binding_count elements
|
||||
// (otherwise they are incompatible - like if this function returned false).
|
||||
bool AreActiveTextureSRVKeysUpToDate(
|
||||
const TextureSRVKey* keys,
|
||||
const D3D12Shader::TextureBinding* host_shader_bindings,
|
||||
uint32_t host_shader_binding_count) const;
|
||||
// Exports the current binding data to texture SRV keys so they can be stored
|
||||
// for checking whether subsequent draw calls can keep using the same
|
||||
// bindings. Write host_shader_binding_count keys.
|
||||
void WriteActiveTextureSRVKeys(
|
||||
TextureSRVKey* keys,
|
||||
const D3D12Shader::TextureBinding* host_shader_bindings,
|
||||
uint32_t host_shader_binding_count) const;
|
||||
// Returns the post-swizzle signedness of a currently bound texture (must be
|
||||
// called after RequestTextures).
|
||||
uint8_t GetActiveTextureSwizzledSigns(uint32_t index) const {
|
||||
return texture_bindings_[index].swizzled_signs;
|
||||
}
|
||||
|
||||
void WriteTextureSRV(const D3D12Shader::TextureSRV& texture_srv,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||
void WriteActiveTextureBindfulSRV(
|
||||
const D3D12Shader::TextureBinding& host_shader_binding,
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE handle);
|
||||
uint32_t GetActiveTextureBindlessSRVIndex(
|
||||
const D3D12Shader::TextureBinding& host_shader_binding);
|
||||
|
||||
SamplerParameters GetSamplerParameters(
|
||||
const D3D12Shader::SamplerBinding& binding) const;
|
||||
|
@ -276,73 +368,6 @@ class TextureCache {
|
|||
uint8_t swizzle[4];
|
||||
};
|
||||
|
||||
union TextureKey {
|
||||
struct {
|
||||
// Physical 4 KB page with the base mip level, disregarding A/C/E address
|
||||
// range prefix.
|
||||
uint32_t base_page : 17; // 17 total
|
||||
Dimension dimension : 2; // 19
|
||||
uint32_t width : 13; // 32
|
||||
|
||||
uint32_t height : 13; // 45
|
||||
uint32_t tiled : 1; // 46
|
||||
uint32_t packed_mips : 1; // 47
|
||||
// Physical 4 KB page with mip 1 and smaller.
|
||||
uint32_t mip_page : 17; // 64
|
||||
|
||||
// Layers for stacked and 3D, 6 for cube, 1 for other dimensions.
|
||||
uint32_t depth : 10; // 74
|
||||
uint32_t mip_max_level : 4; // 78
|
||||
TextureFormat format : 6; // 84
|
||||
Endian endianness : 2; // 86
|
||||
// Whether this texture is signed and has a different host representation
|
||||
// than an unsigned view of the same guest texture.
|
||||
uint32_t signed_separate : 1; // 87
|
||||
// Whether this texture is a 2x-scaled resolve target.
|
||||
uint32_t scaled_resolve : 1; // 88
|
||||
};
|
||||
struct {
|
||||
// The key used for unordered_multimap lookup. Single uint32_t instead of
|
||||
// a uint64_t so XXH hash can be calculated in a stable way due to no
|
||||
// padding.
|
||||
uint32_t map_key[2];
|
||||
// The key used to identify one texture within unordered_multimap buckets.
|
||||
uint32_t bucket_key;
|
||||
};
|
||||
TextureKey() { MakeInvalid(); }
|
||||
TextureKey(const TextureKey& key) {
|
||||
SetMapKey(key.GetMapKey());
|
||||
bucket_key = key.bucket_key;
|
||||
}
|
||||
TextureKey& operator=(const TextureKey& key) {
|
||||
SetMapKey(key.GetMapKey());
|
||||
bucket_key = key.bucket_key;
|
||||
return *this;
|
||||
}
|
||||
bool operator==(const TextureKey& key) const {
|
||||
return GetMapKey() == key.GetMapKey() && bucket_key == key.bucket_key;
|
||||
}
|
||||
bool operator!=(const TextureKey& key) const {
|
||||
return GetMapKey() != key.GetMapKey() || bucket_key != key.bucket_key;
|
||||
}
|
||||
inline uint64_t GetMapKey() const {
|
||||
return uint64_t(map_key[0]) | (uint64_t(map_key[1]) << 32);
|
||||
}
|
||||
inline void SetMapKey(uint64_t key) {
|
||||
map_key[0] = uint32_t(key);
|
||||
map_key[1] = uint32_t(key >> 32);
|
||||
}
|
||||
inline bool IsInvalid() const {
|
||||
// Zero base and zero width is enough for a binding to be invalid.
|
||||
return map_key[0] == 0;
|
||||
}
|
||||
inline void MakeInvalid() {
|
||||
// Reset all for a stable hash.
|
||||
SetMapKey(0);
|
||||
bucket_key = 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct Texture {
|
||||
TextureKey key;
|
||||
ID3D12Resource* resource;
|
||||
|
@ -367,13 +392,11 @@ class TextureCache {
|
|||
// Row pitches on each mip level (for linear layout mainly).
|
||||
uint32_t pitches[14];
|
||||
|
||||
// SRV descriptor from the cache, for the first swizzle the texture was used
|
||||
// with (which is usually determined by the format, such as RGBA or BGRA).
|
||||
// If swizzle is kCachedSRVDescriptorSwizzleMissing, the cached descriptor
|
||||
// doesn't exist yet (there are no invalid D3D descriptor handle values).
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE cached_srv_descriptor;
|
||||
static constexpr uint32_t kCachedSRVDescriptorSwizzleMissing = UINT32_MAX;
|
||||
uint32_t cached_srv_descriptor_swizzle;
|
||||
// For bindful - indices in the non-shader-visible descriptor cache for
|
||||
// copying to the shader-visible heap (much faster than recreating, which,
|
||||
// according to profiling, was often a bottleneck in many games).
|
||||
// For bindless - indices in the global shader-visible descriptor heap.
|
||||
std::unordered_map<uint32_t, uint32_t> srv_descriptors;
|
||||
|
||||
// These are to be accessed within the global critical region to synchronize
|
||||
// with shared memory.
|
||||
|
@ -390,7 +413,6 @@ class TextureCache {
|
|||
static constexpr uint32_t kHeapSize = 65536;
|
||||
ID3D12DescriptorHeap* heap;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE heap_start;
|
||||
uint32_t current_usage;
|
||||
};
|
||||
|
||||
struct LoadConstants {
|
||||
|
@ -459,6 +481,14 @@ class TextureCache {
|
|||
// Signed version of the texture if the data in the signed version is
|
||||
// different on the host.
|
||||
Texture* texture_signed;
|
||||
// Descriptor indices of texture and texture_signed returned from
|
||||
// FindOrCreateTextureDescriptor.
|
||||
uint32_t descriptor_index;
|
||||
uint32_t descriptor_index_signed;
|
||||
void Clear() {
|
||||
std::memset(this, 0, sizeof(*this));
|
||||
descriptor_index = descriptor_index_signed = UINT32_MAX;
|
||||
}
|
||||
};
|
||||
|
||||
// Whether the signed version of the texture has a different representation on
|
||||
|
@ -505,6 +535,22 @@ class TextureCache {
|
|||
const xenos::xe_gpu_texture_fetch_t& fetch, TextureKey& key_out,
|
||||
uint32_t* host_swizzle_out, uint8_t* swizzled_signs_out);
|
||||
|
||||
static constexpr bool AreDimensionsCompatible(
|
||||
TextureDimension binding_dimension, Dimension resource_dimension) {
|
||||
switch (binding_dimension) {
|
||||
case TextureDimension::k1D:
|
||||
case TextureDimension::k2D:
|
||||
return resource_dimension == Dimension::k1D ||
|
||||
resource_dimension == Dimension::k2D;
|
||||
case TextureDimension::k3D:
|
||||
return resource_dimension == Dimension::k3D;
|
||||
case TextureDimension::kCube:
|
||||
return resource_dimension == Dimension::kCube;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void LogTextureKeyAction(TextureKey key, const char* action);
|
||||
static void LogTextureAction(const Texture* texture, const char* action);
|
||||
|
||||
|
@ -517,6 +563,14 @@ class TextureCache {
|
|||
// allocates descriptors and copies!
|
||||
bool LoadTextureData(Texture* texture);
|
||||
|
||||
// Returns the index of an existing of a newly created non-shader-visible
|
||||
// cached (for bindful) or a shader-visible global (for bindless) descriptor,
|
||||
// or UINT32_MAX if failed to create.
|
||||
uint32_t FindOrCreateTextureDescriptor(Texture& texture, bool is_signed,
|
||||
uint32_t host_swizzle);
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE GetTextureDescriptorCPUHandle(
|
||||
uint32_t descriptor_index) const;
|
||||
|
||||
// For LRU caching - updates the last usage frame and moves the texture to
|
||||
// the end of the usage queue. Must be called any time the texture is
|
||||
// referenced by any command list to make sure it's not destroyed while still
|
||||
|
@ -552,6 +606,7 @@ class TextureCache {
|
|||
|
||||
D3D12CommandProcessor* command_processor_;
|
||||
RegisterFile* register_file_;
|
||||
bool bindless_resources_used_;
|
||||
SharedMemory* shared_memory_;
|
||||
|
||||
static const LoadModeInfo load_mode_info_[];
|
||||
|
@ -571,8 +626,9 @@ class TextureCache {
|
|||
uint64_t texture_current_usage_time_;
|
||||
|
||||
std::vector<SRVDescriptorCachePage> srv_descriptor_cache_;
|
||||
// Cached descriptors used by deleted textures, for reuse.
|
||||
std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> srv_descriptor_cache_free_;
|
||||
uint32_t srv_descriptor_cache_allocated_;
|
||||
// Indices of cached descriptors used by deleted textures, for reuse.
|
||||
std::vector<uint32_t> srv_descriptor_cache_free_;
|
||||
|
||||
enum class NullSRVDescriptorIndex {
|
||||
k2DArray,
|
||||
|
@ -587,9 +643,9 @@ class TextureCache {
|
|||
D3D12_CPU_DESCRIPTOR_HANDLE null_srv_descriptor_heap_start_;
|
||||
|
||||
TextureBinding texture_bindings_[32] = {};
|
||||
// Bit vector with bits reset on fetch constant writes to avoid getting
|
||||
// texture keys from the fetch constants again and again.
|
||||
uint32_t texture_keys_in_sync_ = 0;
|
||||
// Bit vector with bits reset on fetch constant writes to avoid parsing fetch
|
||||
// constants again and again.
|
||||
uint32_t texture_bindings_in_sync_ = 0;
|
||||
|
||||
// Whether a texture has been invalidated (a watch has been triggered), so
|
||||
// need to try to reload textures, disregarding whether fetch constants have
|
||||
|
|
|
@ -62,8 +62,8 @@ using namespace ucode;
|
|||
// S#/T#/U# binding index, and the second is the s#/t#/u# register index
|
||||
// within its space.
|
||||
|
||||
constexpr uint32_t DxbcShaderTranslator::kMaxTextureSRVIndexBits;
|
||||
constexpr uint32_t DxbcShaderTranslator::kMaxTextureSRVs;
|
||||
constexpr uint32_t DxbcShaderTranslator::kMaxTextureBindingIndexBits;
|
||||
constexpr uint32_t DxbcShaderTranslator::kMaxTextureBindings;
|
||||
constexpr uint32_t DxbcShaderTranslator::kMaxSamplerBindingIndexBits;
|
||||
constexpr uint32_t DxbcShaderTranslator::kMaxSamplerBindings;
|
||||
constexpr uint32_t DxbcShaderTranslator::kInterpolatorCount;
|
||||
|
@ -74,13 +74,16 @@ constexpr uint32_t DxbcShaderTranslator::kSwizzleXXXX;
|
|||
constexpr uint32_t DxbcShaderTranslator::kSwizzleYYYY;
|
||||
constexpr uint32_t DxbcShaderTranslator::kSwizzleZZZZ;
|
||||
constexpr uint32_t DxbcShaderTranslator::kSwizzleWWWW;
|
||||
constexpr uint32_t DxbcShaderTranslator::kCbufferIndexUnallocated;
|
||||
constexpr uint32_t DxbcShaderTranslator::kBindingIndexUnallocated;
|
||||
constexpr uint32_t DxbcShaderTranslator::kCfExecBoolConstantNone;
|
||||
|
||||
DxbcShaderTranslator::DxbcShaderTranslator(uint32_t vendor_id,
|
||||
bool bindless_resources_used,
|
||||
bool edram_rov_used,
|
||||
bool force_emit_source_map)
|
||||
: vendor_id_(vendor_id), edram_rov_used_(edram_rov_used) {
|
||||
: vendor_id_(vendor_id),
|
||||
bindless_resources_used_(bindless_resources_used),
|
||||
edram_rov_used_(edram_rov_used) {
|
||||
emit_source_map_ = force_emit_source_map || cvars::dxbc_source_map;
|
||||
// Don't allocate again and again for the first shader.
|
||||
shader_code_.reserve(8192);
|
||||
|
@ -154,9 +157,10 @@ void DxbcShaderTranslator::Reset() {
|
|||
cbuffer_count_ = 0;
|
||||
// System constants always used in prologues/epilogues.
|
||||
cbuffer_index_system_constants_ = cbuffer_count_++;
|
||||
cbuffer_index_float_constants_ = kCbufferIndexUnallocated;
|
||||
cbuffer_index_bool_loop_constants_ = kCbufferIndexUnallocated;
|
||||
cbuffer_index_fetch_constants_ = kCbufferIndexUnallocated;
|
||||
cbuffer_index_float_constants_ = kBindingIndexUnallocated;
|
||||
cbuffer_index_bool_loop_constants_ = kBindingIndexUnallocated;
|
||||
cbuffer_index_fetch_constants_ = kBindingIndexUnallocated;
|
||||
cbuffer_index_descriptor_indices_ = kBindingIndexUnallocated;
|
||||
|
||||
system_constants_used_ = 0;
|
||||
|
||||
|
@ -172,7 +176,19 @@ void DxbcShaderTranslator::Reset() {
|
|||
cf_instruction_predicate_if_open_ = false;
|
||||
cf_exec_predicate_written_ = false;
|
||||
|
||||
texture_srvs_.clear();
|
||||
srv_count_ = 0;
|
||||
srv_index_shared_memory_ = kBindingIndexUnallocated;
|
||||
srv_index_bindless_textures_2d_ = kBindingIndexUnallocated;
|
||||
srv_index_bindless_textures_3d_ = kBindingIndexUnallocated;
|
||||
srv_index_bindless_textures_cube_ = kBindingIndexUnallocated;
|
||||
|
||||
texture_bindings_.clear();
|
||||
texture_bindings_for_bindful_srv_indices_.clear();
|
||||
|
||||
uav_count_ = 0;
|
||||
uav_index_shared_memory_ = kBindingIndexUnallocated;
|
||||
uav_index_edram_ = kBindingIndexUnallocated;
|
||||
|
||||
sampler_bindings_.clear();
|
||||
|
||||
memexport_alloc_current_count_ = 0;
|
||||
|
@ -1369,7 +1385,7 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::LoadOperand(
|
|||
}
|
||||
} break;
|
||||
case InstructionStorageSource::kConstantFloat: {
|
||||
if (cbuffer_index_float_constants_ == kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_float_constants_ == kBindingIndexUnallocated) {
|
||||
cbuffer_index_float_constants_ = cbuffer_count_++;
|
||||
}
|
||||
if (operand.storage_addressing_mode ==
|
||||
|
@ -1600,7 +1616,7 @@ void DxbcShaderTranslator::UpdateExecConditionalsAndEmitDisassembly(
|
|||
if (type == ParsedExecInstruction::Type::kConditional) {
|
||||
uint32_t bool_constant_test_temp = PushSystemTemp();
|
||||
// Check the bool constant value.
|
||||
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_bool_loop_constants_ == kBindingIndexUnallocated) {
|
||||
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
|
||||
}
|
||||
DxbcOpAnd(DxbcDest::R(bool_constant_test_temp, 0b0001),
|
||||
|
@ -1755,7 +1771,7 @@ void DxbcShaderTranslator::ProcessLoopStartInstruction(
|
|||
|
||||
// Count (unsigned) in bits 0:7 of the loop constant, initial aL (unsigned) in
|
||||
// 8:15. Starting from vector 2 because of bool constants.
|
||||
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_bool_loop_constants_ == kBindingIndexUnallocated) {
|
||||
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
|
||||
}
|
||||
DxbcSrc loop_constant_src(
|
||||
|
@ -1843,7 +1859,7 @@ void DxbcShaderTranslator::ProcessLoopEndInstruction(
|
|||
uint32_t aL_add_temp = PushSystemTemp();
|
||||
// Extract the value to add to aL (signed, in bits 16:23 of the loop
|
||||
// constant). Starting from vector 2 because of bool constants.
|
||||
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_bool_loop_constants_ == kBindingIndexUnallocated) {
|
||||
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
|
||||
}
|
||||
DxbcOpIBFE(DxbcDest::R(aL_add_temp, 0b0001), DxbcSrc::LU(8),
|
||||
|
@ -1963,6 +1979,10 @@ const DxbcShaderTranslator::RdefType DxbcShaderTranslator::rdef_types_[size_t(
|
|||
// kUint4Array48
|
||||
{nullptr, DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kUInt, 1, 4,
|
||||
48, 0, RdefTypeIndex::kUint4, nullptr},
|
||||
// kUint4DescriptorIndexArray - bindless descriptor indices - size written
|
||||
// dynamically.
|
||||
{nullptr, DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kUInt, 1, 4,
|
||||
0, 0, RdefTypeIndex::kUint4, nullptr},
|
||||
};
|
||||
|
||||
const DxbcShaderTranslator::SystemConstantRdef DxbcShaderTranslator::
|
||||
|
@ -2042,22 +2062,17 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
shader_object_.push_back(cbuffer_count_);
|
||||
// Constant buffer offset (set later).
|
||||
shader_object_.push_back(0);
|
||||
// Bound resource count (samplers, SRV, UAV, CBV).
|
||||
uint32_t resource_count = cbuffer_count_;
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
// + 2 for shared memory SRV and UAV (vfetches can appear in pixel shaders
|
||||
// too, and the UAV is needed for memexport, however, the choice between
|
||||
// SRV and UAV is per-pipeline, not per-shader - a resource can't be in a
|
||||
// read-only state (SRV, IBV) if it's in a read/write state such as UAV).
|
||||
resource_count +=
|
||||
uint32_t(sampler_bindings_.size()) + 2 + uint32_t(texture_srvs_.size());
|
||||
}
|
||||
if (IsDxbcPixelShader() && edram_rov_used_) {
|
||||
// EDRAM.
|
||||
++resource_count;
|
||||
// Bindful resource count.
|
||||
uint32_t resource_count = srv_count_ + uav_count_ + cbuffer_count_;
|
||||
if (!sampler_bindings_.empty()) {
|
||||
if (bindless_resources_used_) {
|
||||
++resource_count;
|
||||
} else {
|
||||
resource_count += uint32_t(sampler_bindings_.size());
|
||||
}
|
||||
}
|
||||
shader_object_.push_back(resource_count);
|
||||
// Bound resource buffer offset (set later).
|
||||
// Bindful resource buffer offset (set later).
|
||||
shader_object_.push_back(0);
|
||||
if (IsDxbcVertexShader()) {
|
||||
// vs_5_1
|
||||
|
@ -2119,14 +2134,20 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
shader_object_.push_back(uint32_t(type.variable_class) |
|
||||
(uint32_t(type.variable_type) << 16));
|
||||
shader_object_.push_back(type.row_count | (type.column_count << 16));
|
||||
if (RdefTypeIndex(i) == RdefTypeIndex::kFloat4ConstantArray) {
|
||||
// Declaring a 0-sized array may not be safe, so write something valid
|
||||
// even if they aren't used.
|
||||
shader_object_.push_back(
|
||||
std::max(constant_register_map().float_count, uint32_t(1)));
|
||||
} else {
|
||||
shader_object_.push_back(type.element_count |
|
||||
(type.struct_member_count << 16));
|
||||
switch (RdefTypeIndex(i)) {
|
||||
case RdefTypeIndex::kFloat4ConstantArray:
|
||||
// Declaring a 0-sized array may not be safe, so write something valid
|
||||
// even if they aren't used.
|
||||
shader_object_.push_back(
|
||||
std::max(constant_register_map().float_count, uint32_t(1)));
|
||||
break;
|
||||
case RdefTypeIndex::kUint4DescriptorIndexArray:
|
||||
shader_object_.push_back(std::max(
|
||||
uint32_t((GetBindlessResourceCount() + 3) >> 2), uint32_t(1)));
|
||||
break;
|
||||
default:
|
||||
shader_object_.push_back(type.element_count |
|
||||
(type.struct_member_count << 16));
|
||||
}
|
||||
// Struct member offset (set later).
|
||||
shader_object_.push_back(0);
|
||||
|
@ -2177,33 +2198,37 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
|
||||
sizeof(uint32_t);
|
||||
uint32_t constant_name_offsets_system[kSysConst_Count];
|
||||
if (cbuffer_index_system_constants_ != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
|
||||
for (uint32_t i = 0; i < kSysConst_Count; ++i) {
|
||||
constant_name_offsets_system[i] = new_offset;
|
||||
new_offset += AppendString(shader_object_, system_constant_rdef_[i].name);
|
||||
}
|
||||
}
|
||||
uint32_t constant_name_offset_float = new_offset;
|
||||
if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) {
|
||||
new_offset += AppendString(shader_object_, "xe_float_constants");
|
||||
}
|
||||
uint32_t constant_name_offset_bool = new_offset;
|
||||
uint32_t constant_name_offset_loop = constant_name_offset_bool;
|
||||
if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) {
|
||||
uint32_t constant_name_offset_loop = new_offset;
|
||||
if (cbuffer_index_bool_loop_constants_ != kBindingIndexUnallocated) {
|
||||
new_offset += AppendString(shader_object_, "xe_bool_constants");
|
||||
constant_name_offset_loop = new_offset;
|
||||
new_offset += AppendString(shader_object_, "xe_loop_constants");
|
||||
}
|
||||
uint32_t constant_name_offset_fetch = new_offset;
|
||||
if (constant_name_offset_fetch != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_fetch_constants_ != kBindingIndexUnallocated) {
|
||||
new_offset += AppendString(shader_object_, "xe_fetch_constants");
|
||||
}
|
||||
uint32_t constant_name_offset_descriptor_indices = new_offset;
|
||||
if (cbuffer_index_descriptor_indices_ != kBindingIndexUnallocated) {
|
||||
new_offset += AppendString(shader_object_, "xe_descriptor_indices");
|
||||
}
|
||||
|
||||
const uint32_t constant_size = 10 * sizeof(uint32_t);
|
||||
|
||||
// System constants.
|
||||
uint32_t constant_offset_system = new_offset;
|
||||
if (cbuffer_index_system_constants_ != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
|
||||
uint32_t system_cbuffer_constant_offset = 0;
|
||||
for (uint32_t i = 0; i < kSysConst_Count; ++i) {
|
||||
const SystemConstantRdef& constant = system_constant_rdef_[i];
|
||||
|
@ -2229,12 +2254,12 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
|
||||
// Float constants.
|
||||
uint32_t constant_offset_float = new_offset;
|
||||
if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) {
|
||||
assert_not_zero(constant_register_map().float_count);
|
||||
shader_object_.push_back(constant_name_offset_float);
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(
|
||||
std::max(constant_register_map().float_count, uint32_t(1)) * 4 *
|
||||
sizeof(float));
|
||||
shader_object_.push_back(constant_register_map().float_count * 4 *
|
||||
sizeof(float));
|
||||
shader_object_.push_back(kDxbcRdefVariableFlagUsed);
|
||||
shader_object_.push_back(types_offset +
|
||||
uint32_t(RdefTypeIndex::kFloat4ConstantArray) *
|
||||
|
@ -2249,7 +2274,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
|
||||
// Bool and loop constants.
|
||||
uint32_t constant_offset_bool_loop = new_offset;
|
||||
if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_bool_loop_constants_ != kBindingIndexUnallocated) {
|
||||
shader_object_.push_back(constant_name_offset_bool);
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(2 * 4 * sizeof(uint32_t));
|
||||
|
@ -2279,7 +2304,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
|
||||
// Fetch constants.
|
||||
uint32_t constant_offset_fetch = new_offset;
|
||||
if (cbuffer_index_fetch_constants_ != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_fetch_constants_ != kBindingIndexUnallocated) {
|
||||
shader_object_.push_back(constant_name_offset_fetch);
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(32 * 6 * sizeof(uint32_t));
|
||||
|
@ -2294,6 +2319,26 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
new_offset += constant_size;
|
||||
}
|
||||
|
||||
// Bindless description indices.
|
||||
uint32_t constant_offset_descriptor_indices = new_offset;
|
||||
if (cbuffer_index_descriptor_indices_ != kBindingIndexUnallocated) {
|
||||
assert_not_zero(GetBindlessResourceCount());
|
||||
shader_object_.push_back(constant_name_offset_descriptor_indices);
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(
|
||||
xe::align(GetBindlessResourceCount(), uint32_t(4)) * sizeof(uint32_t));
|
||||
shader_object_.push_back(kDxbcRdefVariableFlagUsed);
|
||||
shader_object_.push_back(
|
||||
types_offset +
|
||||
uint32_t(RdefTypeIndex::kUint4DescriptorIndexArray) * type_size);
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(0xFFFFFFFFu);
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(0xFFFFFFFFu);
|
||||
shader_object_.push_back(0);
|
||||
new_offset += constant_size;
|
||||
}
|
||||
|
||||
// ***************************************************************************
|
||||
// Constant buffers
|
||||
// ***************************************************************************
|
||||
|
@ -2302,21 +2347,25 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
|
||||
sizeof(uint32_t);
|
||||
uint32_t cbuffer_name_offset_system = new_offset;
|
||||
if (cbuffer_index_system_constants_ != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
|
||||
new_offset += AppendString(shader_object_, "xe_system_cbuffer");
|
||||
}
|
||||
uint32_t cbuffer_name_offset_float = new_offset;
|
||||
if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) {
|
||||
new_offset += AppendString(shader_object_, "xe_float_cbuffer");
|
||||
}
|
||||
uint32_t cbuffer_name_offset_bool_loop = new_offset;
|
||||
if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_bool_loop_constants_ != kBindingIndexUnallocated) {
|
||||
new_offset += AppendString(shader_object_, "xe_bool_loop_cbuffer");
|
||||
}
|
||||
uint32_t cbuffer_name_offset_fetch = new_offset;
|
||||
if (cbuffer_index_fetch_constants_ != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_fetch_constants_ != kBindingIndexUnallocated) {
|
||||
new_offset += AppendString(shader_object_, "xe_fetch_cbuffer");
|
||||
}
|
||||
uint32_t cbuffer_name_offset_descriptor_indices = new_offset;
|
||||
if (cbuffer_index_descriptor_indices_ != kBindingIndexUnallocated) {
|
||||
new_offset += AppendString(shader_object_, "xe_descriptor_indices_cbuffer");
|
||||
}
|
||||
|
||||
// Write the offset to the header.
|
||||
shader_object_[chunk_position_dwords + 1] = new_offset;
|
||||
|
@ -2333,12 +2382,12 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
// No D3D_SHADER_CBUFFER_FLAGS.
|
||||
shader_object_.push_back(0);
|
||||
} else if (i == cbuffer_index_float_constants_) {
|
||||
assert_not_zero(constant_register_map().float_count);
|
||||
shader_object_.push_back(cbuffer_name_offset_float);
|
||||
shader_object_.push_back(1);
|
||||
shader_object_.push_back(constant_offset_float);
|
||||
shader_object_.push_back(
|
||||
std::max(constant_register_map().float_count, uint32_t(1)) * 4 *
|
||||
sizeof(float));
|
||||
shader_object_.push_back(constant_register_map().float_count * 4 *
|
||||
sizeof(float));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer));
|
||||
shader_object_.push_back(0);
|
||||
} else if (i == cbuffer_index_bool_loop_constants_) {
|
||||
|
@ -2356,6 +2405,18 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
shader_object_.push_back(32 * 6 * sizeof(uint32_t));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer));
|
||||
shader_object_.push_back(0);
|
||||
} else if (i == cbuffer_index_descriptor_indices_) {
|
||||
assert_not_zero(GetBindlessResourceCount());
|
||||
shader_object_.push_back(cbuffer_name_offset_descriptor_indices);
|
||||
shader_object_.push_back(1);
|
||||
shader_object_.push_back(constant_offset_descriptor_indices);
|
||||
shader_object_.push_back(
|
||||
xe::align(GetBindlessResourceCount(), uint32_t(4)) *
|
||||
sizeof(uint32_t));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer));
|
||||
shader_object_.push_back(0);
|
||||
} else {
|
||||
assert_unhandled_case(i);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2367,138 +2428,219 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
// their names already.
|
||||
new_offset = (uint32_t(shader_object_.size()) - chunk_position_dwords) *
|
||||
sizeof(uint32_t);
|
||||
uint32_t sampler_name_offset = 0;
|
||||
uint32_t shared_memory_srv_name_offset = 0;
|
||||
uint32_t texture_name_offset = 0;
|
||||
uint32_t shared_memory_uav_name_offset = 0;
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
sampler_name_offset = new_offset;
|
||||
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
|
||||
new_offset +=
|
||||
AppendString(shader_object_, sampler_bindings_[i].name.c_str());
|
||||
uint32_t sampler_name_offset = new_offset;
|
||||
if (!sampler_bindings_.empty()) {
|
||||
if (bindless_resources_used_) {
|
||||
new_offset += AppendString(shader_object_, "xe_samplers");
|
||||
} else {
|
||||
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
|
||||
new_offset +=
|
||||
AppendString(shader_object_, sampler_bindings_[i].name.c_str());
|
||||
}
|
||||
}
|
||||
shared_memory_srv_name_offset = new_offset;
|
||||
}
|
||||
uint32_t shared_memory_srv_name_offset = new_offset;
|
||||
if (srv_index_shared_memory_ != kBindingIndexUnallocated) {
|
||||
new_offset += AppendString(shader_object_, "xe_shared_memory_srv");
|
||||
texture_name_offset = new_offset;
|
||||
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
|
||||
new_offset += AppendString(shader_object_, texture_srvs_[i].name.c_str());
|
||||
}
|
||||
uint32_t bindless_textures_2d_name_offset = new_offset;
|
||||
uint32_t bindless_textures_3d_name_offset = new_offset;
|
||||
uint32_t bindless_textures_cube_name_offset = new_offset;
|
||||
if (bindless_resources_used_) {
|
||||
if (srv_index_bindless_textures_2d_ != kBindingIndexUnallocated) {
|
||||
bindless_textures_2d_name_offset = new_offset;
|
||||
new_offset += AppendString(shader_object_, "xe_textures_2d");
|
||||
}
|
||||
shared_memory_uav_name_offset = new_offset;
|
||||
if (srv_index_bindless_textures_3d_ != kBindingIndexUnallocated) {
|
||||
bindless_textures_3d_name_offset = new_offset;
|
||||
new_offset += AppendString(shader_object_, "xe_textures_3d");
|
||||
}
|
||||
if (srv_index_bindless_textures_cube_ != kBindingIndexUnallocated) {
|
||||
bindless_textures_cube_name_offset = new_offset;
|
||||
new_offset += AppendString(shader_object_, "xe_textures_cube");
|
||||
}
|
||||
} else {
|
||||
for (TextureBinding& texture_binding : texture_bindings_) {
|
||||
texture_binding.bindful_srv_rdef_name_offset = new_offset;
|
||||
new_offset += AppendString(shader_object_, texture_binding.name.c_str());
|
||||
}
|
||||
}
|
||||
uint32_t shared_memory_uav_name_offset = new_offset;
|
||||
if (uav_index_shared_memory_ != kBindingIndexUnallocated) {
|
||||
new_offset += AppendString(shader_object_, "xe_shared_memory_uav");
|
||||
}
|
||||
uint32_t edram_name_offset = new_offset;
|
||||
if (IsDxbcPixelShader() && edram_rov_used_) {
|
||||
if (uav_index_edram_ != kBindingIndexUnallocated) {
|
||||
new_offset += AppendString(shader_object_, "xe_edram");
|
||||
}
|
||||
|
||||
// Write the offset to the header.
|
||||
shader_object_[chunk_position_dwords + 3] = new_offset;
|
||||
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
// Samplers.
|
||||
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
|
||||
const SamplerBinding& sampler_binding = sampler_bindings_[i];
|
||||
// Samplers.
|
||||
if (!sampler_bindings_.empty()) {
|
||||
if (bindless_resources_used_) {
|
||||
// Bindless sampler heap.
|
||||
shader_object_.push_back(sampler_name_offset);
|
||||
shader_object_.push_back(uint32_t(DxbcRdefInputType::kSampler));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kVoid));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefDimension::kUnknown));
|
||||
// Multisampling not applicable.
|
||||
shader_object_.push_back(0);
|
||||
// Register s[i].
|
||||
shader_object_.push_back(i);
|
||||
// Registers s0:*.
|
||||
shader_object_.push_back(0);
|
||||
// Unbounded number of bindings.
|
||||
shader_object_.push_back(0);
|
||||
// No DxbcRdefInputFlags.
|
||||
shader_object_.push_back(0);
|
||||
// Register space 0.
|
||||
shader_object_.push_back(0);
|
||||
// Sampler ID S0.
|
||||
shader_object_.push_back(0);
|
||||
} else {
|
||||
// Bindful samplers.
|
||||
uint32_t sampler_current_name_offset = sampler_name_offset;
|
||||
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
|
||||
const SamplerBinding& sampler_binding = sampler_bindings_[i];
|
||||
shader_object_.push_back(sampler_current_name_offset);
|
||||
shader_object_.push_back(uint32_t(DxbcRdefInputType::kSampler));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kVoid));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefDimension::kUnknown));
|
||||
// Multisampling not applicable.
|
||||
shader_object_.push_back(0);
|
||||
// Register s[i].
|
||||
shader_object_.push_back(i);
|
||||
// One binding.
|
||||
shader_object_.push_back(1);
|
||||
// No DxbcRdefInputFlags.
|
||||
shader_object_.push_back(0);
|
||||
// Register space 0.
|
||||
shader_object_.push_back(0);
|
||||
// Sampler ID S[i].
|
||||
shader_object_.push_back(i);
|
||||
sampler_current_name_offset +=
|
||||
GetStringLength(sampler_binding.name.c_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shader resource views, sorted by binding index.
|
||||
for (uint32_t i = 0; i < srv_count_; ++i) {
|
||||
if (i == srv_index_shared_memory_) {
|
||||
// Shared memory (when memexport isn't used in the pipeline).
|
||||
shader_object_.push_back(shared_memory_srv_name_offset);
|
||||
shader_object_.push_back(uint32_t(DxbcRdefInputType::kByteAddress));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kMixed));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefDimension::kSRVBuffer));
|
||||
// Multisampling not applicable.
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory));
|
||||
// One binding.
|
||||
shader_object_.push_back(1);
|
||||
// No DxbcRdefInputFlags.
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(uint32_t(SRVSpace::kMain));
|
||||
} else {
|
||||
uint32_t texture_name_offset;
|
||||
DxbcRdefDimension texture_dimension;
|
||||
uint32_t texture_register;
|
||||
uint32_t texture_register_count;
|
||||
SRVSpace texture_register_space;
|
||||
if (bindless_resources_used_) {
|
||||
// Bindless texture heap.
|
||||
if (i == srv_index_bindless_textures_3d_) {
|
||||
texture_name_offset = bindless_textures_3d_name_offset;
|
||||
texture_dimension = DxbcRdefDimension::kSRVTexture3D;
|
||||
texture_register_space = SRVSpace::kBindlessTextures3D;
|
||||
} else if (i == srv_index_bindless_textures_cube_) {
|
||||
texture_name_offset = bindless_textures_cube_name_offset;
|
||||
texture_dimension = DxbcRdefDimension::kSRVTextureCube;
|
||||
texture_register_space = SRVSpace::kBindlessTexturesCube;
|
||||
} else {
|
||||
assert_true(i == srv_index_bindless_textures_2d_);
|
||||
texture_name_offset = bindless_textures_2d_name_offset;
|
||||
texture_dimension = DxbcRdefDimension::kSRVTexture2DArray;
|
||||
texture_register_space = SRVSpace::kBindlessTextures2DArray;
|
||||
}
|
||||
texture_register = 0;
|
||||
texture_register_count = 0;
|
||||
} else {
|
||||
// Bindful texture.
|
||||
auto it = texture_bindings_for_bindful_srv_indices_.find(i);
|
||||
assert_true(it != texture_bindings_for_bindful_srv_indices_.end());
|
||||
uint32_t texture_binding_index = it->second;
|
||||
const TextureBinding& texture_binding =
|
||||
texture_bindings_[texture_binding_index];
|
||||
texture_name_offset = texture_binding.bindful_srv_rdef_name_offset;
|
||||
switch (texture_binding.dimension) {
|
||||
case TextureDimension::k3D:
|
||||
texture_dimension = DxbcRdefDimension::kSRVTexture3D;
|
||||
break;
|
||||
case TextureDimension::kCube:
|
||||
texture_dimension = DxbcRdefDimension::kSRVTextureCube;
|
||||
break;
|
||||
default:
|
||||
assert_true(texture_binding.dimension == TextureDimension::k2D);
|
||||
texture_dimension = DxbcRdefDimension::kSRVTexture2DArray;
|
||||
}
|
||||
texture_register = uint32_t(SRVMainRegister::kBindfulTexturesStart) +
|
||||
texture_binding_index;
|
||||
texture_register_count = 1;
|
||||
texture_register_space = SRVSpace::kMain;
|
||||
}
|
||||
shader_object_.push_back(texture_name_offset);
|
||||
shader_object_.push_back(uint32_t(DxbcRdefInputType::kTexture));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kFloat));
|
||||
shader_object_.push_back(uint32_t(texture_dimension));
|
||||
// Not multisampled.
|
||||
shader_object_.push_back(0xFFFFFFFFu);
|
||||
shader_object_.push_back(texture_register);
|
||||
shader_object_.push_back(texture_register_count);
|
||||
// 4-component.
|
||||
shader_object_.push_back(DxbcRdefInputFlagsComponents);
|
||||
shader_object_.push_back(uint32_t(texture_register_space));
|
||||
}
|
||||
// SRV ID T[i].
|
||||
shader_object_.push_back(i);
|
||||
}
|
||||
|
||||
// Unordered access views, sorted by binding index.
|
||||
for (uint32_t i = 0; i < uav_count_; ++i) {
|
||||
if (i == uav_index_shared_memory_) {
|
||||
// Shared memory (when memexport is used in the pipeline).
|
||||
shader_object_.push_back(shared_memory_uav_name_offset);
|
||||
shader_object_.push_back(uint32_t(DxbcRdefInputType::kUAVRWByteAddress));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kMixed));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefDimension::kUAVBuffer));
|
||||
// Multisampling not applicable.
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory));
|
||||
// One binding.
|
||||
shader_object_.push_back(1);
|
||||
// No DxbcRdefInputFlags.
|
||||
shader_object_.push_back(0);
|
||||
// Register space 0.
|
||||
shader_object_.push_back(0);
|
||||
// Sampler ID S[i].
|
||||
shader_object_.push_back(i);
|
||||
sampler_name_offset += GetStringLength(sampler_binding.name.c_str());
|
||||
}
|
||||
|
||||
// Shared memory (when memexport isn't used in the pipeline).
|
||||
shader_object_.push_back(shared_memory_srv_name_offset);
|
||||
shader_object_.push_back(uint32_t(DxbcRdefInputType::kByteAddress));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kMixed));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefDimension::kSRVBuffer));
|
||||
// Multisampling not applicable.
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory));
|
||||
// One binding.
|
||||
shader_object_.push_back(1);
|
||||
// No DxbcRdefInputFlags.
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(uint32_t(SRVSpace::kMain));
|
||||
// SRV ID T0.
|
||||
shader_object_.push_back(0);
|
||||
|
||||
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
|
||||
const TextureSRV& texture_srv = texture_srvs_[i];
|
||||
shader_object_.push_back(texture_name_offset);
|
||||
shader_object_.push_back(uint32_t(DxbcRdefInputType::kTexture));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kFloat));
|
||||
switch (texture_srv.dimension) {
|
||||
case TextureDimension::k3D:
|
||||
shader_object_.push_back(uint32_t(DxbcRdefDimension::kSRVTexture3D));
|
||||
break;
|
||||
case TextureDimension::kCube:
|
||||
shader_object_.push_back(
|
||||
uint32_t(DxbcRdefDimension::kSRVTextureCube));
|
||||
break;
|
||||
default:
|
||||
shader_object_.push_back(
|
||||
uint32_t(DxbcRdefDimension::kSRVTexture2DArray));
|
||||
}
|
||||
} else if (i == uav_index_edram_) {
|
||||
// EDRAM R32_UINT buffer.
|
||||
shader_object_.push_back(edram_name_offset);
|
||||
shader_object_.push_back(uint32_t(DxbcRdefInputType::kUAVRWTyped));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kUInt));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefDimension::kUAVBuffer));
|
||||
// Not multisampled.
|
||||
shader_object_.push_back(0xFFFFFFFFu);
|
||||
shader_object_.push_back(uint32_t(SRVMainRegister::kBoundTexturesStart) +
|
||||
i);
|
||||
shader_object_.push_back(uint32_t(UAVRegister::kEDRAM));
|
||||
// One binding.
|
||||
shader_object_.push_back(1);
|
||||
// 4-component.
|
||||
shader_object_.push_back(DxbcRdefInputFlagsComponents);
|
||||
shader_object_.push_back(uint32_t(SRVSpace::kMain));
|
||||
// SRV ID T[1 + i] - T0 is shared memory.
|
||||
shader_object_.push_back(1 + i);
|
||||
texture_name_offset += GetStringLength(texture_srv.name.c_str());
|
||||
// No DxbcRdefInputFlags.
|
||||
shader_object_.push_back(0);
|
||||
// Register space 0.
|
||||
shader_object_.push_back(0);
|
||||
} else {
|
||||
assert_unhandled_case(i);
|
||||
}
|
||||
|
||||
// Shared memory (when memexport is used in the pipeline).
|
||||
shader_object_.push_back(shared_memory_uav_name_offset);
|
||||
shader_object_.push_back(uint32_t(DxbcRdefInputType::kUAVRWByteAddress));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kMixed));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefDimension::kUAVBuffer));
|
||||
// Multisampling not applicable.
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory));
|
||||
// One binding.
|
||||
shader_object_.push_back(1);
|
||||
// No DxbcRdefInputFlags.
|
||||
shader_object_.push_back(0);
|
||||
// Register space 0.
|
||||
shader_object_.push_back(0);
|
||||
// UAV ID U0.
|
||||
shader_object_.push_back(0);
|
||||
}
|
||||
|
||||
if (IsDxbcPixelShader() && edram_rov_used_) {
|
||||
// EDRAM uint32 buffer.
|
||||
shader_object_.push_back(edram_name_offset);
|
||||
shader_object_.push_back(uint32_t(DxbcRdefInputType::kUAVRWTyped));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kUInt));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefDimension::kUAVBuffer));
|
||||
// Not multisampled.
|
||||
shader_object_.push_back(0xFFFFFFFFu);
|
||||
shader_object_.push_back(uint32_t(UAVRegister::kEDRAM));
|
||||
// One binding.
|
||||
shader_object_.push_back(1);
|
||||
// No DxbcRdefInputFlags.
|
||||
shader_object_.push_back(0);
|
||||
// Register space 0.
|
||||
shader_object_.push_back(0);
|
||||
// UAV ID U1 or U0 depending on whether there's U0.
|
||||
shader_object_.push_back(ROV_GetEDRAMUAVIndex());
|
||||
// UAV ID U[i].
|
||||
shader_object_.push_back(i);
|
||||
}
|
||||
|
||||
// Constant buffers.
|
||||
|
@ -2516,6 +2658,11 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
} else if (i == cbuffer_index_fetch_constants_) {
|
||||
shader_object_.push_back(cbuffer_name_offset_fetch);
|
||||
register_index = uint32_t(CbufferRegister::kFetchConstants);
|
||||
} else if (i == cbuffer_index_descriptor_indices_) {
|
||||
shader_object_.push_back(cbuffer_name_offset_descriptor_indices);
|
||||
register_index = uint32_t(CbufferRegister::kDescriptorIndices);
|
||||
} else {
|
||||
assert_unhandled_case(i);
|
||||
}
|
||||
shader_object_.push_back(uint32_t(DxbcRdefInputType::kCbuffer));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefReturnType::kVoid));
|
||||
|
@ -3180,7 +3327,8 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
|
||||
// Constant buffers, from most frequenly accessed to least frequently accessed
|
||||
// (the order is a hint to the driver according to the DXBC header).
|
||||
if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_float_constants_ != kBindingIndexUnallocated) {
|
||||
assert_not_zero(constant_register_map().float_count);
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
|
||||
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
|
||||
|
@ -3196,7 +3344,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
shader_object_.push_back(constant_register_map().float_count);
|
||||
shader_object_.push_back(0);
|
||||
}
|
||||
if (cbuffer_index_system_constants_ != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
|
||||
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
|
||||
|
@ -3210,7 +3358,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
shader_object_.push_back((sizeof(SystemConstants) + 15) >> 4);
|
||||
shader_object_.push_back(0);
|
||||
}
|
||||
if (cbuffer_index_fetch_constants_ != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_fetch_constants_ != kBindingIndexUnallocated) {
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
|
||||
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
|
||||
|
@ -3224,7 +3372,22 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
shader_object_.push_back(48);
|
||||
shader_object_.push_back(0);
|
||||
}
|
||||
if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_descriptor_indices_ != kBindingIndexUnallocated) {
|
||||
assert_not_zero(GetBindlessResourceCount());
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
|
||||
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
|
||||
D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_object_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
|
||||
shader_object_.push_back(cbuffer_index_descriptor_indices_);
|
||||
shader_object_.push_back(uint32_t(CbufferRegister::kDescriptorIndices));
|
||||
shader_object_.push_back(uint32_t(CbufferRegister::kDescriptorIndices));
|
||||
shader_object_.push_back((GetBindlessResourceCount() + 3) >> 2);
|
||||
shader_object_.push_back(0);
|
||||
}
|
||||
if (cbuffer_index_bool_loop_constants_ != kBindingIndexUnallocated) {
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
|
||||
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
|
||||
|
@ -3239,46 +3402,93 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
shader_object_.push_back(0);
|
||||
}
|
||||
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
// Samplers.
|
||||
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
|
||||
const SamplerBinding& sampler_binding = sampler_bindings_[i];
|
||||
// Samplers.
|
||||
if (!sampler_bindings_.empty()) {
|
||||
if (bindless_resources_used_) {
|
||||
// Bindless sampler heap.
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_SAMPLER) |
|
||||
ENCODE_D3D10_SB_SAMPLER_MODE(D3D10_SB_SAMPLER_MODE_DEFAULT) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
|
||||
shader_object_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 3));
|
||||
shader_object_.push_back(i);
|
||||
shader_object_.push_back(i);
|
||||
shader_object_.push_back(i);
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(UINT32_MAX);
|
||||
shader_object_.push_back(0);
|
||||
} else {
|
||||
// Bindful samplers.
|
||||
for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) {
|
||||
const SamplerBinding& sampler_binding = sampler_bindings_[i];
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_SAMPLER) |
|
||||
ENCODE_D3D10_SB_SAMPLER_MODE(D3D10_SB_SAMPLER_MODE_DEFAULT) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
|
||||
shader_object_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 3));
|
||||
shader_object_.push_back(i);
|
||||
shader_object_.push_back(i);
|
||||
shader_object_.push_back(i);
|
||||
shader_object_.push_back(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shader resources.
|
||||
// Shared memory ByteAddressBuffer.
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_RESOURCE_RAW) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
|
||||
shader_object_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 3));
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory));
|
||||
shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory));
|
||||
shader_object_.push_back(uint32_t(SRVSpace::kMain));
|
||||
// Textures.
|
||||
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
|
||||
const TextureSRV& texture_srv = texture_srvs_[i];
|
||||
// Shader resource views, sorted by binding index.
|
||||
for (uint32_t i = 0; i < srv_count_; ++i) {
|
||||
if (i == srv_index_shared_memory_) {
|
||||
// Shared memory ByteAddressBuffer.
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_DCL_RESOURCE_RAW) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
|
||||
shader_object_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 3));
|
||||
shader_object_.push_back(srv_index_shared_memory_);
|
||||
shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory));
|
||||
shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory));
|
||||
shader_object_.push_back(uint32_t(SRVSpace::kMain));
|
||||
} else {
|
||||
// Texture or texture heap.
|
||||
D3D10_SB_RESOURCE_DIMENSION texture_srv_dimension;
|
||||
switch (texture_srv.dimension) {
|
||||
case TextureDimension::k3D:
|
||||
uint32_t texture_register_first, texture_register_last;
|
||||
SRVSpace texture_register_space;
|
||||
if (bindless_resources_used_) {
|
||||
// Bindless texture heap.
|
||||
texture_register_first = 0;
|
||||
texture_register_last = UINT32_MAX;
|
||||
if (i == srv_index_bindless_textures_3d_) {
|
||||
texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D;
|
||||
break;
|
||||
case TextureDimension::kCube:
|
||||
texture_register_space = SRVSpace::kBindlessTextures3D;
|
||||
} else if (i == srv_index_bindless_textures_cube_) {
|
||||
texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE;
|
||||
break;
|
||||
default:
|
||||
texture_register_space = SRVSpace::kBindlessTexturesCube;
|
||||
} else {
|
||||
assert_true(i == srv_index_bindless_textures_2d_);
|
||||
texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY;
|
||||
texture_register_space = SRVSpace::kBindlessTextures2DArray;
|
||||
}
|
||||
} else {
|
||||
// Bindful texture.
|
||||
auto it = texture_bindings_for_bindful_srv_indices_.find(i);
|
||||
assert_true(it != texture_bindings_for_bindful_srv_indices_.end());
|
||||
uint32_t texture_binding_index = it->second;
|
||||
const TextureBinding& texture_binding =
|
||||
texture_bindings_[texture_binding_index];
|
||||
switch (texture_binding.dimension) {
|
||||
case TextureDimension::k3D:
|
||||
texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D;
|
||||
break;
|
||||
case TextureDimension::kCube:
|
||||
texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE;
|
||||
break;
|
||||
default:
|
||||
assert_true(texture_binding.dimension == TextureDimension::k2D);
|
||||
texture_srv_dimension = D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY;
|
||||
}
|
||||
texture_register_first = texture_register_last =
|
||||
uint32_t(SRVMainRegister::kBindfulTexturesStart) +
|
||||
texture_binding_index;
|
||||
texture_register_space = SRVSpace::kMain;
|
||||
}
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_RESOURCE) |
|
||||
|
@ -3286,54 +3496,55 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_object_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 3));
|
||||
// T0 is shared memory.
|
||||
shader_object_.push_back(1 + i);
|
||||
shader_object_.push_back(uint32_t(SRVMainRegister::kBoundTexturesStart) +
|
||||
i);
|
||||
shader_object_.push_back(uint32_t(SRVMainRegister::kBoundTexturesStart) +
|
||||
i);
|
||||
shader_object_.push_back(i);
|
||||
shader_object_.push_back(texture_register_first);
|
||||
shader_object_.push_back(texture_register_last);
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_FLOAT, 0) |
|
||||
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_FLOAT, 1) |
|
||||
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_FLOAT, 2) |
|
||||
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_FLOAT, 3));
|
||||
shader_object_.push_back(uint32_t(SRVSpace::kMain));
|
||||
shader_object_.push_back(uint32_t(texture_register_space));
|
||||
}
|
||||
}
|
||||
|
||||
// Unordered access views.
|
||||
if (!is_depth_only_pixel_shader_) {
|
||||
// Shared memory RWByteAddressBuffer.
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||
D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
|
||||
shader_object_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3));
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory));
|
||||
shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory));
|
||||
shader_object_.push_back(0);
|
||||
}
|
||||
if (IsDxbcPixelShader() && edram_rov_used_) {
|
||||
// EDRAM uint32 rasterizer-ordered buffer.
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||
D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) |
|
||||
ENCODE_D3D10_SB_RESOURCE_DIMENSION(D3D10_SB_RESOURCE_DIMENSION_BUFFER) |
|
||||
D3D11_SB_RASTERIZER_ORDERED_ACCESS |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_object_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3));
|
||||
shader_object_.push_back(ROV_GetEDRAMUAVIndex());
|
||||
shader_object_.push_back(uint32_t(UAVRegister::kEDRAM));
|
||||
shader_object_.push_back(uint32_t(UAVRegister::kEDRAM));
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 0) |
|
||||
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 1) |
|
||||
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 2) |
|
||||
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 3));
|
||||
shader_object_.push_back(0);
|
||||
// Unordered access views, sorted by binding index.
|
||||
for (uint32_t i = 0; i < uav_count_; ++i) {
|
||||
if (i == uav_index_shared_memory_) {
|
||||
// Shared memory RWByteAddressBuffer.
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||
D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
|
||||
shader_object_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3));
|
||||
shader_object_.push_back(uav_index_shared_memory_);
|
||||
shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory));
|
||||
shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory));
|
||||
shader_object_.push_back(0);
|
||||
} else if (i == uav_index_edram_) {
|
||||
// EDRAM buffer R32_UINT rasterizer-ordered view.
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||
D3D11_SB_OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) |
|
||||
ENCODE_D3D10_SB_RESOURCE_DIMENSION(
|
||||
D3D10_SB_RESOURCE_DIMENSION_BUFFER) |
|
||||
D3D11_SB_RASTERIZER_ORDERED_ACCESS |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_object_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW, kSwizzleXYZW, 3));
|
||||
shader_object_.push_back(uav_index_edram_);
|
||||
shader_object_.push_back(uint32_t(UAVRegister::kEDRAM));
|
||||
shader_object_.push_back(uint32_t(UAVRegister::kEDRAM));
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 0) |
|
||||
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 1) |
|
||||
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 2) |
|
||||
ENCODE_D3D10_SB_RESOURCE_RETURN_TYPE(D3D10_SB_RETURN_TYPE_UINT, 3));
|
||||
shader_object_.push_back(0);
|
||||
} else {
|
||||
assert_unhandled_case(i);
|
||||
}
|
||||
}
|
||||
|
||||
// Inputs and outputs.
|
||||
|
|
|
@ -101,8 +101,8 @@ namespace gpu {
|
|||
// 0 for NaN.
|
||||
class DxbcShaderTranslator : public ShaderTranslator {
|
||||
public:
|
||||
DxbcShaderTranslator(uint32_t vendor_id, bool edram_rov_used,
|
||||
bool force_emit_source_map = false);
|
||||
DxbcShaderTranslator(uint32_t vendor_id, bool bindless_resources_used,
|
||||
bool edram_rov_used, bool force_emit_source_map = false);
|
||||
~DxbcShaderTranslator() override;
|
||||
|
||||
// Constant buffer bindings in space 0.
|
||||
|
@ -111,6 +111,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
kFloatConstants,
|
||||
kBoolLoopConstants,
|
||||
kFetchConstants,
|
||||
kDescriptorIndices,
|
||||
};
|
||||
|
||||
// Some are referenced in xenos_draw.hlsli - check it too when updating!
|
||||
|
@ -331,30 +332,39 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
enum class SRVSpace {
|
||||
// SRVMainSpaceRegister t# layout.
|
||||
kMain,
|
||||
kBindlessTextures2DArray,
|
||||
kBindlessTextures3D,
|
||||
kBindlessTexturesCube,
|
||||
};
|
||||
|
||||
// Shader resource view bindings in SRVSpace::kMain.
|
||||
enum class SRVMainRegister {
|
||||
kSharedMemory,
|
||||
kBoundTexturesStart,
|
||||
kBindfulTexturesStart,
|
||||
};
|
||||
|
||||
// 192 textures at most because there are 32 fetch constants, and textures can
|
||||
// be 2D array, 3D or cube, and also signed and unsigned.
|
||||
static constexpr uint32_t kMaxTextureSRVIndexBits = 8;
|
||||
static constexpr uint32_t kMaxTextureSRVs =
|
||||
(1 << kMaxTextureSRVIndexBits) - 1;
|
||||
struct TextureSRV {
|
||||
static constexpr uint32_t kMaxTextureBindingIndexBits = 8;
|
||||
static constexpr uint32_t kMaxTextureBindings =
|
||||
(1 << kMaxTextureBindingIndexBits) - 1;
|
||||
struct TextureBinding {
|
||||
uint32_t bindful_srv_index;
|
||||
// Temporary for WriteResourceDefinitions.
|
||||
uint32_t bindful_srv_rdef_name_offset;
|
||||
uint32_t bindless_descriptor_index;
|
||||
uint32_t fetch_constant;
|
||||
// Stacked and 3D are separate TextureBindings, even for bindless for null
|
||||
// descriptor handling simplicity.
|
||||
TextureDimension dimension;
|
||||
bool is_signed;
|
||||
std::string name;
|
||||
};
|
||||
// The first binding returned is at t[SRVMainRegister::kBoundTexturesStart]
|
||||
// The first binding returned is at t[SRVMainRegister::kBindfulTexturesStart]
|
||||
// of space SRVSpace::kMain.
|
||||
const TextureSRV* GetTextureSRVs(uint32_t& count_out) const {
|
||||
count_out = uint32_t(texture_srvs_.size());
|
||||
return texture_srvs_.data();
|
||||
const TextureBinding* GetTextureBindings(uint32_t& count_out) const {
|
||||
count_out = uint32_t(texture_bindings_.size());
|
||||
return texture_bindings_.data();
|
||||
}
|
||||
|
||||
// Arbitrary limit - there can't be more than 2048 in a shader-visible
|
||||
|
@ -369,6 +379,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
static constexpr uint32_t kMaxSamplerBindings =
|
||||
(1 << kMaxSamplerBindingIndexBits) - 1;
|
||||
struct SamplerBinding {
|
||||
uint32_t bindless_descriptor_index;
|
||||
uint32_t fetch_constant;
|
||||
TextureFilter mag_filter;
|
||||
TextureFilter min_filter;
|
||||
|
@ -381,6 +392,12 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
return sampler_bindings_.data();
|
||||
}
|
||||
|
||||
// Returns the number of texture SRV and sampler offsets that need to be
|
||||
// passed via a constant buffer to the shader.
|
||||
uint32_t GetBindlessResourceCount() const {
|
||||
return uint32_t(texture_bindings_.size() + sampler_bindings_.size());
|
||||
}
|
||||
|
||||
// Unordered access view bindings in space 0.
|
||||
enum class UAVRegister {
|
||||
kSharedMemory,
|
||||
|
@ -2144,11 +2161,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
uint32_t piece_temp_component, uint32_t accumulator_temp,
|
||||
uint32_t accumulator_temp_component);
|
||||
|
||||
inline uint32_t ROV_GetEDRAMUAVIndex() const {
|
||||
// xe_edram is U1 when there's xe_shared_memory_uav which is U0, but when
|
||||
// there's no xe_shared_memory_uav, it's U0.
|
||||
return is_depth_only_pixel_shader_ ? 0 : 1;
|
||||
}
|
||||
// Whether it's possible and worth skipping running the translated shader for
|
||||
// 2x2 quads.
|
||||
bool ROV_IsDepthStencilEarly() const {
|
||||
|
@ -2328,19 +2340,19 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
void CloseInstructionPredication();
|
||||
void JumpToLabel(uint32_t address);
|
||||
|
||||
DxbcSrc FindOrAddTextureSRV(uint32_t fetch_constant,
|
||||
TextureDimension dimension, bool is_signed);
|
||||
DxbcSrc FindOrAddSamplerBinding(uint32_t fetch_constant,
|
||||
TextureFilter mag_filter,
|
||||
TextureFilter min_filter,
|
||||
TextureFilter mip_filter,
|
||||
AnisoFilter aniso_filter);
|
||||
uint32_t FindOrAddTextureBinding(uint32_t fetch_constant,
|
||||
TextureDimension dimension, bool is_signed);
|
||||
uint32_t FindOrAddSamplerBinding(uint32_t fetch_constant,
|
||||
TextureFilter mag_filter,
|
||||
TextureFilter min_filter,
|
||||
TextureFilter mip_filter,
|
||||
AnisoFilter aniso_filter);
|
||||
// Marks fetch constants as used by the DXBC shader and returns DxbcSrc
|
||||
// for the words 01 (pair 0), 23 (pair 1) or 45 (pair 2) of the texture fetch
|
||||
// constant.
|
||||
DxbcSrc RequestTextureFetchConstantWordPair(uint32_t fetch_constant_index,
|
||||
uint32_t pair_index) {
|
||||
if (cbuffer_index_fetch_constants_ == kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_fetch_constants_ == kBindingIndexUnallocated) {
|
||||
cbuffer_index_fetch_constants_ = cbuffer_count_++;
|
||||
}
|
||||
uint32_t total_pair_index = fetch_constant_index * 3 + pair_index;
|
||||
|
@ -2392,6 +2404,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// Vendor ID of the GPU manufacturer, for toggling unsupported features.
|
||||
uint32_t vendor_id_;
|
||||
|
||||
// Whether textures and samplers should be bindless.
|
||||
bool bindless_resources_used_;
|
||||
|
||||
// Whether the output merger should be emulated in pixel shaders.
|
||||
bool edram_rov_used_;
|
||||
|
||||
|
@ -2422,6 +2437,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
kUint4Array8,
|
||||
// Fetch constants.
|
||||
kUint4Array48,
|
||||
// Descriptor indices - size written dynamically.
|
||||
kUint4DescriptorIndexArray,
|
||||
|
||||
kCount,
|
||||
kUnknown = kCount
|
||||
|
@ -2448,14 +2465,16 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
};
|
||||
static const RdefType rdef_types_[size_t(RdefTypeIndex::kCount)];
|
||||
|
||||
static constexpr uint32_t kBindingIndexUnallocated = UINT32_MAX;
|
||||
|
||||
// Number of constant buffer bindings used in this shader - also used for
|
||||
// generation of indices of constant buffers that are optional.
|
||||
uint32_t cbuffer_count_;
|
||||
static constexpr uint32_t kCbufferIndexUnallocated = UINT32_MAX;
|
||||
uint32_t cbuffer_index_system_constants_;
|
||||
uint32_t cbuffer_index_float_constants_;
|
||||
uint32_t cbuffer_index_bool_loop_constants_;
|
||||
uint32_t cbuffer_index_fetch_constants_;
|
||||
uint32_t cbuffer_index_descriptor_indices_;
|
||||
|
||||
struct SystemConstantRdef {
|
||||
const char* name;
|
||||
|
@ -2582,7 +2601,24 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// predicate condition anymore.
|
||||
bool cf_exec_predicate_written_;
|
||||
|
||||
std::vector<TextureSRV> texture_srvs_;
|
||||
// Number of SRV resources used in this shader - also used for generation of
|
||||
// indices of SRV resources that are optional.
|
||||
uint32_t srv_count_;
|
||||
uint32_t srv_index_shared_memory_;
|
||||
uint32_t srv_index_bindless_textures_2d_;
|
||||
uint32_t srv_index_bindless_textures_3d_;
|
||||
uint32_t srv_index_bindless_textures_cube_;
|
||||
|
||||
std::vector<TextureBinding> texture_bindings_;
|
||||
std::unordered_map<uint32_t, uint32_t>
|
||||
texture_bindings_for_bindful_srv_indices_;
|
||||
|
||||
// Number of UAV resources used in this shader - also used for generation of
|
||||
// indices of UAV resources that are optional.
|
||||
uint32_t uav_count_;
|
||||
uint32_t uav_index_shared_memory_;
|
||||
uint32_t uav_index_edram_;
|
||||
|
||||
std::vector<SamplerBinding> sampler_bindings_;
|
||||
|
||||
// Number of `alloc export`s encountered so far in the translation. The index
|
||||
|
|
|
@ -44,7 +44,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
|
||||
// Create a 2-component DxbcSrc for the fetch constant (vf0 is in [0].xy of
|
||||
// the fetch constants array, vf1 is in [0].zw, vf2 is in [1].xy).
|
||||
if (cbuffer_index_fetch_constants_ == kCbufferIndexUnallocated) {
|
||||
if (cbuffer_index_fetch_constants_ == kBindingIndexUnallocated) {
|
||||
cbuffer_index_fetch_constants_ = cbuffer_count_++;
|
||||
}
|
||||
DxbcSrc fetch_constant_src(DxbcSrc::CB(
|
||||
|
@ -135,13 +135,21 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
.Select(kSysConst_Flags_Comp),
|
||||
DxbcSrc::LU(kSysFlag_SharedMemoryIsUAV));
|
||||
DxbcOpIf(false, DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX));
|
||||
if (srv_index_shared_memory_ == kBindingIndexUnallocated) {
|
||||
srv_index_shared_memory_ = srv_count_++;
|
||||
}
|
||||
if (uav_index_shared_memory_ == kBindingIndexUnallocated) {
|
||||
uav_index_shared_memory_ = uav_count_++;
|
||||
}
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
if (i) {
|
||||
DxbcOpElse();
|
||||
}
|
||||
DxbcSrc shared_memory_src(
|
||||
i ? DxbcSrc::U(0, uint32_t(UAVRegister::kSharedMemory))
|
||||
: DxbcSrc::T(0, uint32_t(SRVMainRegister::kSharedMemory)));
|
||||
i ? DxbcSrc::U(uav_index_shared_memory_,
|
||||
uint32_t(UAVRegister::kSharedMemory))
|
||||
: DxbcSrc::T(srv_index_shared_memory_,
|
||||
uint32_t(SRVMainRegister::kSharedMemory)));
|
||||
uint32_t needed_words_remaining = needed_words;
|
||||
uint32_t word_index_previous = first_word_index;
|
||||
while (needed_words_remaining) {
|
||||
|
@ -438,7 +446,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
StoreResult(instr.result, DxbcSrc::R(system_temp_result_));
|
||||
}
|
||||
|
||||
DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::FindOrAddTextureSRV(
|
||||
uint32_t DxbcShaderTranslator::FindOrAddTextureBinding(
|
||||
uint32_t fetch_constant, TextureDimension dimension, bool is_signed) {
|
||||
// 1D and 2D textures (including stacked ones) are treated as 2D arrays for
|
||||
// binding and coordinate simplicity.
|
||||
|
@ -446,47 +454,52 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::FindOrAddTextureSRV(
|
|||
dimension = TextureDimension::k2D;
|
||||
}
|
||||
uint32_t srv_index = UINT32_MAX;
|
||||
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
|
||||
TextureSRV& texture_srv = texture_srvs_[i];
|
||||
if (texture_srv.fetch_constant == fetch_constant &&
|
||||
texture_srv.dimension == dimension &&
|
||||
texture_srv.is_signed == is_signed) {
|
||||
srv_index = i;
|
||||
for (uint32_t i = 0; i < uint32_t(texture_bindings_.size()); ++i) {
|
||||
TextureBinding& texture_binding = texture_bindings_[i];
|
||||
if (texture_binding.fetch_constant == fetch_constant &&
|
||||
texture_binding.dimension == dimension &&
|
||||
texture_binding.is_signed == is_signed) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
if (texture_bindings_.size() >= kMaxTextureBindings) {
|
||||
assert_always();
|
||||
return kMaxTextureBindings - 1;
|
||||
}
|
||||
uint32_t texture_binding_index = uint32_t(texture_bindings_.size());
|
||||
TextureBinding new_texture_binding;
|
||||
if (!bindless_resources_used_) {
|
||||
new_texture_binding.bindful_srv_index = srv_count_++;
|
||||
texture_bindings_for_bindful_srv_indices_.insert(
|
||||
{new_texture_binding.bindful_srv_index, texture_binding_index});
|
||||
} else {
|
||||
new_texture_binding.bindful_srv_index = kBindingIndexUnallocated;
|
||||
}
|
||||
new_texture_binding.bindful_srv_rdef_name_offset = 0;
|
||||
// Consistently 0 if not bindless as it may be used for hashing.
|
||||
new_texture_binding.bindless_descriptor_index =
|
||||
bindless_resources_used_ ? GetBindlessResourceCount() : 0;
|
||||
new_texture_binding.fetch_constant = fetch_constant;
|
||||
new_texture_binding.dimension = dimension;
|
||||
new_texture_binding.is_signed = is_signed;
|
||||
const char* dimension_name;
|
||||
switch (dimension) {
|
||||
case TextureDimension::k3D:
|
||||
dimension_name = "3d";
|
||||
break;
|
||||
}
|
||||
case TextureDimension::kCube:
|
||||
dimension_name = "cube";
|
||||
break;
|
||||
default:
|
||||
dimension_name = "2d";
|
||||
}
|
||||
if (srv_index == UINT32_MAX) {
|
||||
if (texture_srvs_.size() >= kMaxTextureSRVs) {
|
||||
assert_always();
|
||||
srv_index = kMaxTextureSRVs - 1;
|
||||
} else {
|
||||
TextureSRV new_texture_srv;
|
||||
new_texture_srv.fetch_constant = fetch_constant;
|
||||
new_texture_srv.dimension = dimension;
|
||||
new_texture_srv.is_signed = is_signed;
|
||||
const char* dimension_name;
|
||||
switch (dimension) {
|
||||
case TextureDimension::k3D:
|
||||
dimension_name = "3d";
|
||||
break;
|
||||
case TextureDimension::kCube:
|
||||
dimension_name = "cube";
|
||||
break;
|
||||
default:
|
||||
dimension_name = "2d";
|
||||
}
|
||||
new_texture_srv.name = fmt::format("xe_texture{}_{}_{}", fetch_constant,
|
||||
new_texture_binding.name = fmt::format("xe_texture{}_{}_{}", fetch_constant,
|
||||
dimension_name, is_signed ? 's' : 'u');
|
||||
srv_index = uint32_t(texture_srvs_.size());
|
||||
texture_srvs_.emplace_back(std::move(new_texture_srv));
|
||||
}
|
||||
}
|
||||
// T0 is shared memory.
|
||||
return DxbcSrc::T(1 + srv_index,
|
||||
uint32_t(SRVMainRegister::kBoundTexturesStart) + srv_index);
|
||||
texture_bindings_.emplace_back(std::move(new_texture_binding));
|
||||
return texture_binding_index;
|
||||
}
|
||||
|
||||
DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::FindOrAddSamplerBinding(
|
||||
uint32_t DxbcShaderTranslator::FindOrAddSamplerBinding(
|
||||
uint32_t fetch_constant, TextureFilter mag_filter, TextureFilter min_filter,
|
||||
TextureFilter mip_filter, AnisoFilter aniso_filter) {
|
||||
// In Direct3D 12, anisotropic filtering implies linear filtering.
|
||||
|
@ -505,43 +518,42 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::FindOrAddSamplerBinding(
|
|||
sampler_binding.min_filter == min_filter &&
|
||||
sampler_binding.mip_filter == mip_filter &&
|
||||
sampler_binding.aniso_filter == aniso_filter) {
|
||||
sampler_index = i;
|
||||
break;
|
||||
return i;
|
||||
}
|
||||
}
|
||||
if (sampler_index == UINT32_MAX) {
|
||||
if (sampler_bindings_.size() >= kMaxSamplerBindings) {
|
||||
assert_always();
|
||||
sampler_index = kMaxSamplerBindings - 1;
|
||||
if (sampler_bindings_.size() >= kMaxSamplerBindings) {
|
||||
assert_always();
|
||||
return kMaxSamplerBindings - 1;
|
||||
}
|
||||
std::ostringstream name;
|
||||
name << "xe_sampler" << fetch_constant;
|
||||
if (aniso_filter != AnisoFilter::kUseFetchConst) {
|
||||
if (aniso_filter == AnisoFilter::kDisabled) {
|
||||
name << "_a0";
|
||||
} else {
|
||||
std::ostringstream name;
|
||||
name << "xe_sampler" << fetch_constant;
|
||||
if (aniso_filter != AnisoFilter::kUseFetchConst) {
|
||||
if (aniso_filter == AnisoFilter::kDisabled) {
|
||||
name << "_a0";
|
||||
} else {
|
||||
name << "_a" << (1u << (uint32_t(aniso_filter) - 1));
|
||||
}
|
||||
}
|
||||
if (aniso_filter == AnisoFilter::kDisabled ||
|
||||
aniso_filter == AnisoFilter::kUseFetchConst) {
|
||||
static const char* kFilterSuffixes[] = {"p", "l", "b", "f"};
|
||||
name << "_" << kFilterSuffixes[uint32_t(mag_filter)]
|
||||
<< kFilterSuffixes[uint32_t(min_filter)]
|
||||
<< kFilterSuffixes[uint32_t(mip_filter)];
|
||||
}
|
||||
SamplerBinding new_sampler_binding;
|
||||
new_sampler_binding.fetch_constant = fetch_constant;
|
||||
new_sampler_binding.mag_filter = mag_filter;
|
||||
new_sampler_binding.min_filter = min_filter;
|
||||
new_sampler_binding.mip_filter = mip_filter;
|
||||
new_sampler_binding.aniso_filter = aniso_filter;
|
||||
new_sampler_binding.name = name.str();
|
||||
sampler_index = uint32_t(sampler_bindings_.size());
|
||||
sampler_bindings_.emplace_back(std::move(new_sampler_binding));
|
||||
name << "_a" << (1u << (uint32_t(aniso_filter) - 1));
|
||||
}
|
||||
}
|
||||
return DxbcSrc::S(sampler_index, sampler_index);
|
||||
if (aniso_filter == AnisoFilter::kDisabled ||
|
||||
aniso_filter == AnisoFilter::kUseFetchConst) {
|
||||
static const char* kFilterSuffixes[] = {"p", "l", "b", "f"};
|
||||
name << "_" << kFilterSuffixes[uint32_t(mag_filter)]
|
||||
<< kFilterSuffixes[uint32_t(min_filter)]
|
||||
<< kFilterSuffixes[uint32_t(mip_filter)];
|
||||
}
|
||||
SamplerBinding new_sampler_binding;
|
||||
// Consistently 0 if not bindless as it may be used for hashing.
|
||||
new_sampler_binding.bindless_descriptor_index =
|
||||
bindless_resources_used_ ? GetBindlessResourceCount() : 0;
|
||||
new_sampler_binding.fetch_constant = fetch_constant;
|
||||
new_sampler_binding.mag_filter = mag_filter;
|
||||
new_sampler_binding.min_filter = min_filter;
|
||||
new_sampler_binding.mip_filter = mip_filter;
|
||||
new_sampler_binding.aniso_filter = aniso_filter;
|
||||
new_sampler_binding.name = name.str();
|
||||
uint32_t sampler_binding_index = uint32_t(sampler_bindings_.size());
|
||||
sampler_bindings_.emplace_back(std::move(new_sampler_binding));
|
||||
return sampler_binding_index;
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||
|
@ -893,7 +905,6 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
LoadOperand(instr.operands[0], used_result_nonzero_components,
|
||||
coord_operand_temp_pushed);
|
||||
DxbcSrc coord_src(coord_operand);
|
||||
uint32_t coord_temp = UINT32_MAX;
|
||||
uint32_t offsets_needed = offsets_not_zero & used_result_nonzero_components;
|
||||
if (!instr.attributes.unnormalized_coordinates || offsets_needed) {
|
||||
// Using system_temp_result_ as a temporary for coordinate denormalization
|
||||
|
@ -948,7 +959,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// - 1D, 2D array - need to be padded to 2D array coordinates.
|
||||
// - 3D - Z needs to be unnormalized for stacked and normalized for 3D.
|
||||
// - Cube - coordinates need to be transformed into the cube space.
|
||||
uint32_t coord_temp = PushSystemTemp();
|
||||
// Bindless sampler index will be loaded to W after loading the coordinates
|
||||
// (so W can be used as a temporary for coordinate loading).
|
||||
uint32_t coord_and_sampler_temp = PushSystemTemp();
|
||||
|
||||
// Need normalized coordinates (except for Z - keep it as is, will be
|
||||
// converted later according to whether the texture is 3D). For cube maps,
|
||||
|
@ -978,51 +991,54 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
normalized_components);
|
||||
if (offsets_not_zero & normalized_components) {
|
||||
// FIXME(Triang3l): Offsets need to be applied at the LOD being fetched.
|
||||
DxbcOpAdd(DxbcDest::R(coord_temp, normalized_components), coord_operand,
|
||||
DxbcSrc::LP(offsets));
|
||||
DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, normalized_components),
|
||||
coord_operand, DxbcSrc::LP(offsets));
|
||||
assert_not_zero(normalized_components & 0b011);
|
||||
DxbcOpDiv(DxbcDest::R(coord_temp, normalized_components & 0b011),
|
||||
DxbcSrc::R(coord_temp), DxbcSrc::R(size_and_is_3d_temp));
|
||||
DxbcOpDiv(
|
||||
DxbcDest::R(coord_and_sampler_temp, normalized_components & 0b011),
|
||||
DxbcSrc::R(coord_and_sampler_temp),
|
||||
DxbcSrc::R(size_and_is_3d_temp));
|
||||
if (instr.dimension == TextureDimension::k3D) {
|
||||
// Normalize if 3D.
|
||||
assert_true((size_needed_components & 0b1100) == 0b1100);
|
||||
DxbcOpIf(true, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW));
|
||||
DxbcOpDiv(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ),
|
||||
DxbcOpDiv(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ),
|
||||
DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ));
|
||||
DxbcOpEndIf();
|
||||
}
|
||||
} else {
|
||||
DxbcOpDiv(DxbcDest::R(coord_temp, normalized_components), coord_operand,
|
||||
DxbcSrc::R(size_and_is_3d_temp));
|
||||
DxbcOpDiv(DxbcDest::R(coord_and_sampler_temp, normalized_components),
|
||||
coord_operand, DxbcSrc::R(size_and_is_3d_temp));
|
||||
if (instr.dimension == TextureDimension::k3D) {
|
||||
// Don't normalize if stacked.
|
||||
assert_true((size_needed_components & 0b1000) == 0b1000);
|
||||
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ),
|
||||
coord_operand.SelectFromSwizzled(2));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Normalized coordinates - apply offsets to XY or copy them to
|
||||
// coord_temp, and if stacked, denormalize Z.
|
||||
// coord_and_sampler_temp, and if stacked, denormalize Z.
|
||||
uint32_t coords_with_offset = offsets_not_zero & normalized_components;
|
||||
if (coords_with_offset) {
|
||||
// FIXME(Triang3l): Offsets need to be applied at the LOD being fetched.
|
||||
assert_true((size_needed_components & coords_with_offset) ==
|
||||
coords_with_offset);
|
||||
DxbcOpDiv(DxbcDest::R(coord_temp, coords_with_offset),
|
||||
DxbcOpDiv(DxbcDest::R(coord_and_sampler_temp, coords_with_offset),
|
||||
DxbcSrc::LP(offsets), DxbcSrc::R(size_and_is_3d_temp));
|
||||
DxbcOpAdd(DxbcDest::R(coord_temp, coords_with_offset), coord_operand,
|
||||
DxbcSrc::R(coord_temp));
|
||||
DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, coords_with_offset),
|
||||
coord_operand, DxbcSrc::R(coord_and_sampler_temp));
|
||||
}
|
||||
uint32_t coords_without_offset =
|
||||
~coords_with_offset & normalized_components;
|
||||
// 3D/stacked without offset is handled separately.
|
||||
if (coords_without_offset & 0b011) {
|
||||
DxbcOpMov(DxbcDest::R(coord_temp, coords_without_offset & 0b011),
|
||||
coord_operand);
|
||||
DxbcOpMov(
|
||||
DxbcDest::R(coord_and_sampler_temp, coords_without_offset & 0b011),
|
||||
coord_operand);
|
||||
}
|
||||
if (instr.dimension == TextureDimension::k3D) {
|
||||
assert_true((size_needed_components & 0b1100) == 0b1100);
|
||||
|
@ -1030,73 +1046,79 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// Denormalize and offset Z (re-apply the offset not to lose precision
|
||||
// as a result of division) if stacked.
|
||||
DxbcOpIf(false, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW));
|
||||
DxbcOpMAd(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcOpMAd(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
coord_operand.SelectFromSwizzled(2),
|
||||
DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ),
|
||||
DxbcSrc::LF(offsets[2]));
|
||||
DxbcOpEndIf();
|
||||
} else {
|
||||
// Denormalize Z if stacked, and revert to normalized if 3D.
|
||||
DxbcOpMul(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcOpMul(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
coord_operand.SelectFromSwizzled(2),
|
||||
DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ));
|
||||
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW),
|
||||
coord_operand.SelectFromSwizzled(2),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ));
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ));
|
||||
}
|
||||
}
|
||||
}
|
||||
switch (instr.dimension) {
|
||||
case TextureDimension::k1D:
|
||||
// Pad to 2D array coordinates.
|
||||
DxbcOpMov(DxbcDest::R(coord_temp, 0b0110), DxbcSrc::LF(0.0f));
|
||||
DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0110),
|
||||
DxbcSrc::LF(0.0f));
|
||||
break;
|
||||
case TextureDimension::k2D:
|
||||
// Pad to 2D array coordinates.
|
||||
DxbcOpMov(DxbcDest::R(coord_temp, 0b0100), DxbcSrc::LF(0.0f));
|
||||
DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
DxbcSrc::LF(0.0f));
|
||||
break;
|
||||
case TextureDimension::kCube: {
|
||||
// Transform from the major axis SC/TC plus 1 into cube coordinates.
|
||||
// Move SC/TC from 1...2 to -1...1.
|
||||
DxbcOpMAd(DxbcDest::R(coord_temp, 0b0011), DxbcSrc::R(coord_temp),
|
||||
DxbcSrc::LF(2.0f), DxbcSrc::LF(-3.0f));
|
||||
DxbcOpMAd(DxbcDest::R(coord_and_sampler_temp, 0b0011),
|
||||
DxbcSrc::R(coord_and_sampler_temp), DxbcSrc::LF(2.0f),
|
||||
DxbcSrc::LF(-3.0f));
|
||||
// Get the face index (floored, within 0...5) as an integer to
|
||||
// coord_temp.z.
|
||||
// coord_and_sampler_temp.z.
|
||||
if (offsets[2]) {
|
||||
DxbcOpAdd(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
coord_operand.SelectFromSwizzled(2),
|
||||
DxbcSrc::LF(offsets[2]));
|
||||
DxbcOpFToU(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ));
|
||||
DxbcOpFToU(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ));
|
||||
} else {
|
||||
DxbcOpFToU(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcOpFToU(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
coord_operand.SelectFromSwizzled(2));
|
||||
}
|
||||
DxbcOpUMin(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ), DxbcSrc::LU(5));
|
||||
DxbcOpUMin(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ),
|
||||
DxbcSrc::LU(5));
|
||||
// Split the face index into axis and sign (0 - positive, 1 - negative)
|
||||
// to coord_temp.zw (sign in W so it won't be overwritten).
|
||||
DxbcOpUBFE(DxbcDest::R(coord_temp, 0b1100), DxbcSrc::LU(0, 0, 2, 1),
|
||||
DxbcSrc::LU(0, 0, 1, 0),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ));
|
||||
// to coord_and_sampler_temp.zw (sign in W so it won't be overwritten).
|
||||
// Fine to overwrite W at this point, the sampler index hasn't been
|
||||
// loaded yet.
|
||||
DxbcOpUBFE(DxbcDest::R(coord_and_sampler_temp, 0b1100),
|
||||
DxbcSrc::LU(0, 0, 2, 1), DxbcSrc::LU(0, 0, 1, 0),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ));
|
||||
// Remap the axes in a way opposite to the ALU cube instruction.
|
||||
DxbcOpSwitch(DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ));
|
||||
DxbcOpSwitch(DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ));
|
||||
DxbcOpCase(DxbcSrc::LU(0));
|
||||
{
|
||||
// X is the major axis.
|
||||
// Y = -TC (TC overwritten).
|
||||
DxbcOpMov(DxbcDest::R(coord_temp, 0b0010),
|
||||
-DxbcSrc::R(coord_temp, DxbcSrc::kYYYY));
|
||||
DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0010),
|
||||
-DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY));
|
||||
// Z = neg ? SC : -SC.
|
||||
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kWWWW),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kXXXX),
|
||||
-DxbcSrc::R(coord_temp, DxbcSrc::kXXXX));
|
||||
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX),
|
||||
-DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX));
|
||||
// X = neg ? -1 : 1 (SC overwritten).
|
||||
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0001),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kWWWW), DxbcSrc::LF(-1.0f),
|
||||
DxbcSrc::LF(1.0f));
|
||||
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0001),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW),
|
||||
DxbcSrc::LF(-1.0f), DxbcSrc::LF(1.0f));
|
||||
}
|
||||
DxbcOpBreak();
|
||||
DxbcOpCase(DxbcSrc::LU(1));
|
||||
|
@ -1104,31 +1126,31 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// Y is the major axis.
|
||||
// X = SC (already there).
|
||||
// Z = neg ? -TC : TC.
|
||||
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kWWWW),
|
||||
-DxbcSrc::R(coord_temp, DxbcSrc::kYYYY),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kYYYY));
|
||||
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW),
|
||||
-DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY));
|
||||
// Y = neg ? -1 : 1 (TC overwritten).
|
||||
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0010),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kWWWW), DxbcSrc::LF(-1.0f),
|
||||
DxbcSrc::LF(1.0f));
|
||||
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0010),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW),
|
||||
DxbcSrc::LF(-1.0f), DxbcSrc::LF(1.0f));
|
||||
}
|
||||
DxbcOpBreak();
|
||||
DxbcOpDefault();
|
||||
{
|
||||
// Z is the major axis.
|
||||
// X = neg ? -SC : SC (SC overwritten).
|
||||
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0001),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kWWWW),
|
||||
-DxbcSrc::R(coord_temp, DxbcSrc::kXXXX),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kXXXX));
|
||||
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0001),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW),
|
||||
-DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX));
|
||||
// Y = -TC (TC overwritten).
|
||||
DxbcOpMov(DxbcDest::R(coord_temp, 0b0010),
|
||||
-DxbcSrc::R(coord_temp, DxbcSrc::kYYYY));
|
||||
DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0010),
|
||||
-DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY));
|
||||
// Z = neg ? -1 : 1.
|
||||
DxbcOpMovC(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kWWWW), DxbcSrc::LF(-1.0f),
|
||||
DxbcSrc::LF(1.0f));
|
||||
DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW),
|
||||
DxbcSrc::LF(-1.0f), DxbcSrc::LF(1.0f));
|
||||
}
|
||||
DxbcOpBreak();
|
||||
DxbcOpEndSwitch();
|
||||
|
@ -1145,10 +1167,26 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// since the return value can be used with bias later, forcing linear mip
|
||||
// filtering (the XNA assembler also doesn't accept MipFilter overrides
|
||||
// for getCompTexLOD).
|
||||
DxbcSrc sampler(FindOrAddSamplerBinding(
|
||||
uint32_t sampler_binding_index = FindOrAddSamplerBinding(
|
||||
tfetch_index, instr.attributes.mag_filter,
|
||||
instr.attributes.min_filter, TextureFilter::kLinear,
|
||||
instr.attributes.aniso_filter));
|
||||
instr.attributes.aniso_filter);
|
||||
DxbcSrc sampler(DxbcSrc::S(sampler_binding_index, sampler_binding_index));
|
||||
if (bindless_resources_used_) {
|
||||
// Load the sampler index to coord_and_sampler_temp.w and use relative
|
||||
// sampler indexing.
|
||||
if (cbuffer_index_descriptor_indices_ == kBindingIndexUnallocated) {
|
||||
cbuffer_index_descriptor_indices_ = cbuffer_count_++;
|
||||
}
|
||||
uint32_t sampler_bindless_descriptor_index =
|
||||
sampler_bindings_[sampler_binding_index].bindless_descriptor_index;
|
||||
DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b1000),
|
||||
DxbcSrc::CB(cbuffer_index_descriptor_indices_,
|
||||
uint32_t(CbufferRegister::kDescriptorIndices),
|
||||
sampler_bindless_descriptor_index >> 2)
|
||||
.Select(sampler_bindless_descriptor_index & 3));
|
||||
sampler = DxbcSrc::S(0, DxbcIndex(coord_and_sampler_temp, 3));
|
||||
}
|
||||
// Check which SRV needs to be accessed - signed or unsigned. If there is
|
||||
// at least one non-signed component, will be using the unsigned one.
|
||||
uint32_t is_unsigned_temp = PushSystemTemp();
|
||||
|
@ -1158,13 +1196,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
DxbcOpINE(DxbcDest::R(is_unsigned_temp, 0b0001),
|
||||
DxbcSrc::R(is_unsigned_temp, DxbcSrc::kXXXX),
|
||||
DxbcSrc::LU(uint32_t(TextureSign::kSigned) * 0b01010101));
|
||||
DxbcOpIf(true, DxbcSrc::R(is_unsigned_temp, DxbcSrc::kXXXX));
|
||||
// Release is_unsigned_temp.
|
||||
PopSystemTemp();
|
||||
for (uint32_t is_signed = 0; is_signed < 2; ++is_signed) {
|
||||
if (is_signed) {
|
||||
DxbcOpElse();
|
||||
}
|
||||
if (bindless_resources_used_) {
|
||||
// Bindless path - select the SRV index between unsigned and signed to
|
||||
// query.
|
||||
if (instr.dimension == TextureDimension::k3D) {
|
||||
// Check if 3D.
|
||||
assert_true((size_needed_components & 0b1000) == 0b1000);
|
||||
|
@ -1173,37 +1207,119 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
for (uint32_t is_stacked = 0;
|
||||
is_stacked < (instr.dimension == TextureDimension::k3D ? 2u : 1u);
|
||||
++is_stacked) {
|
||||
TextureDimension srv_dimension = instr.dimension;
|
||||
if (is_stacked) {
|
||||
srv_dimension = TextureDimension::k2D;
|
||||
DxbcOpElse();
|
||||
}
|
||||
// Always 3 coordinate components (1D and 2D are padded to 2D arrays,
|
||||
// 3D and cube have 3 coordinate dimensions). Not caring about
|
||||
// normalization of the array layer because it doesn't participate in
|
||||
// LOD calculation in Direct3D 12.
|
||||
uint32_t texture_binding_index_unsigned =
|
||||
FindOrAddTextureBinding(tfetch_index, srv_dimension, false);
|
||||
uint32_t texture_binding_index_signed =
|
||||
FindOrAddTextureBinding(tfetch_index, srv_dimension, true);
|
||||
uint32_t texture_bindless_descriptor_index_unsigned =
|
||||
texture_bindings_[texture_binding_index_unsigned]
|
||||
.bindless_descriptor_index;
|
||||
uint32_t texture_bindless_descriptor_index_signed =
|
||||
texture_bindings_[texture_binding_index_signed]
|
||||
.bindless_descriptor_index;
|
||||
if (cbuffer_index_descriptor_indices_ == kBindingIndexUnallocated) {
|
||||
cbuffer_index_descriptor_indices_ = cbuffer_count_++;
|
||||
}
|
||||
DxbcOpMovC(
|
||||
DxbcDest::R(is_unsigned_temp, 0b0001),
|
||||
DxbcSrc::R(is_unsigned_temp, DxbcSrc::kXXXX),
|
||||
DxbcSrc::CB(cbuffer_index_descriptor_indices_,
|
||||
uint32_t(CbufferRegister::kDescriptorIndices),
|
||||
texture_bindless_descriptor_index_unsigned >> 2)
|
||||
.Select(texture_bindless_descriptor_index_unsigned & 3),
|
||||
DxbcSrc::CB(cbuffer_index_descriptor_indices_,
|
||||
uint32_t(CbufferRegister::kDescriptorIndices),
|
||||
texture_bindless_descriptor_index_signed >> 2)
|
||||
.Select(texture_bindless_descriptor_index_signed & 3));
|
||||
// Always 3 coordinate components (1D and 2D are padded to 2D
|
||||
// arrays, 3D and cube have 3 coordinate dimensions). Not caring
|
||||
// about normalization of the array layer because it doesn't
|
||||
// participate in LOD calculation in Direct3D 12.
|
||||
// The `lod` instruction returns the unclamped LOD (probably need
|
||||
// unclamped so it can be biased back into the range later) in the Y
|
||||
// component, and the resource swizzle is the return value swizzle.
|
||||
// FIXME(Triang3l): Gradient exponent adjustment from the fetch
|
||||
// constant needs to be applied here, would require SV_Position.xy & 1
|
||||
// math, replacing coordinates for one pixel with 0 and for another
|
||||
// with the adjusted gradient, but possibly not used by any games.
|
||||
// constant needs to be applied here, would require math involving
|
||||
// SV_Position parity, replacing coordinates for one pixel with 0
|
||||
// and for another with the adjusted gradient, but possibly not used
|
||||
// by any games.
|
||||
assert_true(used_result_nonzero_components == 0b0001);
|
||||
uint32_t* bindless_srv_index = nullptr;
|
||||
switch (srv_dimension) {
|
||||
case TextureDimension::k1D:
|
||||
case TextureDimension::k2D:
|
||||
bindless_srv_index = &srv_index_bindless_textures_2d_;
|
||||
break;
|
||||
case TextureDimension::k3D:
|
||||
bindless_srv_index = &srv_index_bindless_textures_3d_;
|
||||
break;
|
||||
case TextureDimension::kCube:
|
||||
bindless_srv_index = &srv_index_bindless_textures_cube_;
|
||||
break;
|
||||
}
|
||||
assert_not_null(bindless_srv_index);
|
||||
if (*bindless_srv_index == kBindingIndexUnallocated) {
|
||||
*bindless_srv_index = srv_count_++;
|
||||
}
|
||||
DxbcOpLOD(DxbcDest::R(system_temp_result_, 0b0001),
|
||||
DxbcSrc::R(coord_temp), 3,
|
||||
FindOrAddTextureSRV(
|
||||
tfetch_index,
|
||||
is_stacked ? TextureDimension::k2D : instr.dimension,
|
||||
is_signed != 0)
|
||||
.Select(1),
|
||||
DxbcSrc::R(coord_and_sampler_temp), 3,
|
||||
DxbcSrc::T(*bindless_srv_index,
|
||||
DxbcIndex(is_unsigned_temp, 0), DxbcSrc::kYYYY),
|
||||
sampler);
|
||||
}
|
||||
if (instr.dimension == TextureDimension::k3D) {
|
||||
// Close the 3D/stacked check.
|
||||
DxbcOpEndIf();
|
||||
}
|
||||
} else {
|
||||
// Bindful path - conditionally query one of the SRVs.
|
||||
DxbcOpIf(true, DxbcSrc::R(is_unsigned_temp, DxbcSrc::kXXXX));
|
||||
for (uint32_t is_signed = 0; is_signed < 2; ++is_signed) {
|
||||
if (is_signed) {
|
||||
DxbcOpElse();
|
||||
}
|
||||
if (instr.dimension == TextureDimension::k3D) {
|
||||
// Check if 3D.
|
||||
assert_true((size_needed_components & 0b1000) == 0b1000);
|
||||
DxbcOpIf(true, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW));
|
||||
}
|
||||
for (uint32_t is_stacked = 0;
|
||||
is_stacked <
|
||||
(instr.dimension == TextureDimension::k3D ? 2u : 1u);
|
||||
++is_stacked) {
|
||||
if (is_stacked) {
|
||||
DxbcOpElse();
|
||||
}
|
||||
assert_true(used_result_nonzero_components == 0b0001);
|
||||
uint32_t texture_binding_index = FindOrAddTextureBinding(
|
||||
tfetch_index,
|
||||
is_stacked ? TextureDimension::k2D : instr.dimension,
|
||||
is_signed != 0);
|
||||
DxbcOpLOD(
|
||||
DxbcDest::R(system_temp_result_, 0b0001),
|
||||
DxbcSrc::R(coord_and_sampler_temp), 3,
|
||||
DxbcSrc::T(
|
||||
texture_bindings_[texture_binding_index].bindful_srv_index,
|
||||
uint32_t(SRVMainRegister::kBindfulTexturesStart) +
|
||||
texture_binding_index,
|
||||
DxbcSrc::kYYYY),
|
||||
sampler);
|
||||
}
|
||||
if (instr.dimension == TextureDimension::k3D) {
|
||||
// Close the 3D/stacked check.
|
||||
DxbcOpEndIf();
|
||||
}
|
||||
}
|
||||
// Close the signedness check.
|
||||
DxbcOpEndIf();
|
||||
}
|
||||
// Close the signedness check.
|
||||
DxbcOpEndIf();
|
||||
// Release is_unsigned_temp.
|
||||
PopSystemTemp();
|
||||
} else {
|
||||
// - Gradients or LOD to be passed to the sample_d/sample_l.
|
||||
|
||||
|
@ -1322,11 +1438,11 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
} else {
|
||||
// Coarse is according to the Direct3D 11.3 specification.
|
||||
DxbcOpDerivRTXCoarse(DxbcDest::R(grad_h_lod_temp, grad_mask),
|
||||
DxbcSrc::R(coord_temp));
|
||||
DxbcSrc::R(coord_and_sampler_temp));
|
||||
DxbcOpMul(DxbcDest::R(grad_h_lod_temp, grad_mask),
|
||||
DxbcSrc::R(grad_h_lod_temp), lod_src);
|
||||
DxbcOpDerivRTYCoarse(DxbcDest::R(grad_v_temp, grad_mask),
|
||||
DxbcSrc::R(coord_temp));
|
||||
DxbcSrc::R(coord_and_sampler_temp));
|
||||
// FIXME(Triang3l): Gradient exponent adjustment is currently not
|
||||
// done in getCompTexLOD, so don't do it here too.
|
||||
#if 0
|
||||
|
@ -1357,11 +1473,27 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// doesn't allow mixing anisotropic and point filtering. Possibly
|
||||
// anistropic filtering should be disabled when explicit LOD is used - do
|
||||
// this here.
|
||||
DxbcSrc sampler(FindOrAddSamplerBinding(
|
||||
uint32_t sampler_binding_index = FindOrAddSamplerBinding(
|
||||
tfetch_index, instr.attributes.mag_filter,
|
||||
instr.attributes.min_filter, instr.attributes.mip_filter,
|
||||
use_computed_lod ? instr.attributes.aniso_filter
|
||||
: AnisoFilter::kDisabled));
|
||||
: AnisoFilter::kDisabled);
|
||||
DxbcSrc sampler(DxbcSrc::S(sampler_binding_index, sampler_binding_index));
|
||||
if (bindless_resources_used_) {
|
||||
// Load the sampler index to coord_and_sampler_temp.w and use relative
|
||||
// sampler indexing.
|
||||
if (cbuffer_index_descriptor_indices_ == kBindingIndexUnallocated) {
|
||||
cbuffer_index_descriptor_indices_ = cbuffer_count_++;
|
||||
}
|
||||
uint32_t sampler_bindless_descriptor_index =
|
||||
sampler_bindings_[sampler_binding_index].bindless_descriptor_index;
|
||||
DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b1000),
|
||||
DxbcSrc::CB(cbuffer_index_descriptor_indices_,
|
||||
uint32_t(CbufferRegister::kDescriptorIndices),
|
||||
sampler_bindless_descriptor_index >> 2)
|
||||
.Select(sampler_bindless_descriptor_index & 3));
|
||||
sampler = DxbcSrc::S(0, DxbcIndex(coord_and_sampler_temp, 3));
|
||||
}
|
||||
|
||||
// Break result register dependencies because textures will be sampled
|
||||
// conditionally, including the primary signs.
|
||||
|
@ -1389,9 +1521,12 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// - srv_selection_temp.z - if stacked and not forced to be point-sampled,
|
||||
// the lerp factor between two layers, wrapped by layer_lerp_factor_src
|
||||
// with l(0.0) fallback for the point sampling case.
|
||||
// - srv_selection_temp.w - scratch for calculations involving these.
|
||||
// - srv_selection_temp.w - first, scratch for calculations involving
|
||||
// these, then, unsigned or signed SRV description index.
|
||||
DxbcSrc layer_lerp_factor_src(DxbcSrc::LF(0.0f));
|
||||
uint32_t srv_selection_temp = UINT32_MAX;
|
||||
// W is always needed for bindless.
|
||||
uint32_t srv_selection_temp =
|
||||
bindless_resources_used_ ? PushSystemTemp() : UINT32_MAX;
|
||||
if (instr.dimension == TextureDimension::k3D) {
|
||||
bool vol_mag_filter_is_fetch_const =
|
||||
instr.attributes.vol_mag_filter == TextureFilter::kUseFetchConst;
|
||||
|
@ -1469,10 +1604,11 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
}
|
||||
// For linear filtering, subtract 0.5 from the coordinates and store
|
||||
// the lerp factor. Flooring will be done later.
|
||||
DxbcOpAdd(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ), DxbcSrc::LF(-0.5f));
|
||||
DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ),
|
||||
DxbcSrc::LF(-0.5f));
|
||||
DxbcOpFrc(DxbcDest::R(srv_selection_temp, 0b0100),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ));
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ));
|
||||
// Close the linear check.
|
||||
DxbcOpEndIf();
|
||||
// Close the stacked check.
|
||||
|
@ -1505,11 +1641,11 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
}
|
||||
// For linear filtering, subtract 0.5 from the coordinates and store
|
||||
// the lerp factor. Flooring will be done later.
|
||||
DxbcOpAdd(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ),
|
||||
DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ),
|
||||
DxbcSrc::LF(-0.5f));
|
||||
DxbcOpFrc(DxbcDest::R(srv_selection_temp, 0b0100),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ));
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ));
|
||||
if (vol_mag_filter_is_fetch_const) {
|
||||
// Close the fetch constant linear filtering mode check.
|
||||
DxbcOpEndIf();
|
||||
|
@ -1578,13 +1714,50 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// for the layer index, but on the Xbox 360, addressing is similar to
|
||||
// that of 3D textures). This is needed for both point and linear
|
||||
// filtering (with linear, 0.5 was subtracted previously).
|
||||
DxbcOpRoundNI(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ));
|
||||
DxbcOpRoundNI(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ));
|
||||
}
|
||||
uint32_t texture_binding_index_unsigned =
|
||||
FindOrAddTextureBinding(tfetch_index, srv_dimension, false);
|
||||
const TextureBinding& texture_binding_unsigned =
|
||||
texture_bindings_[texture_binding_index_unsigned];
|
||||
uint32_t texture_binding_index_signed =
|
||||
FindOrAddTextureBinding(tfetch_index, srv_dimension, true);
|
||||
const TextureBinding& texture_binding_signed =
|
||||
texture_bindings_[texture_binding_index_signed];
|
||||
DxbcSrc srv_unsigned(DxbcSrc::LF(0.0f)), srv_signed(DxbcSrc::LF(0.0f));
|
||||
if (bindless_resources_used_) {
|
||||
uint32_t* bindless_srv_index = nullptr;
|
||||
switch (srv_dimension) {
|
||||
case TextureDimension::k1D:
|
||||
case TextureDimension::k2D:
|
||||
bindless_srv_index = &srv_index_bindless_textures_2d_;
|
||||
break;
|
||||
case TextureDimension::k3D:
|
||||
bindless_srv_index = &srv_index_bindless_textures_3d_;
|
||||
break;
|
||||
case TextureDimension::kCube:
|
||||
bindless_srv_index = &srv_index_bindless_textures_cube_;
|
||||
break;
|
||||
}
|
||||
assert_not_null(bindless_srv_index);
|
||||
if (*bindless_srv_index == kBindingIndexUnallocated) {
|
||||
*bindless_srv_index = srv_count_++;
|
||||
}
|
||||
assert_true(srv_selection_temp != UINT32_MAX);
|
||||
srv_unsigned =
|
||||
DxbcSrc::T(*bindless_srv_index, DxbcIndex(srv_selection_temp, 3));
|
||||
srv_signed = srv_unsigned;
|
||||
} else {
|
||||
srv_unsigned =
|
||||
DxbcSrc::T(texture_binding_unsigned.bindful_srv_index,
|
||||
uint32_t(SRVMainRegister::kBindfulTexturesStart) +
|
||||
texture_binding_index_unsigned);
|
||||
srv_signed =
|
||||
DxbcSrc::T(texture_binding_signed.bindful_srv_index,
|
||||
uint32_t(SRVMainRegister::kBindfulTexturesStart) +
|
||||
texture_binding_index_signed);
|
||||
}
|
||||
DxbcSrc srv_unsigned(
|
||||
FindOrAddTextureSRV(tfetch_index, srv_dimension, false));
|
||||
DxbcSrc srv_signed(
|
||||
FindOrAddTextureSRV(tfetch_index, srv_dimension, true));
|
||||
for (uint32_t layer = 0; layer < (layer_lerp_needed ? 2u : 1u);
|
||||
++layer) {
|
||||
uint32_t layer_value_temp = system_temp_result_;
|
||||
|
@ -1596,8 +1769,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// If the lerp factor is not zero, sample the next layer.
|
||||
DxbcOpIf(true, DxbcSrc::R(layer_value_temp, DxbcSrc::kXXXX));
|
||||
// Go to the next layer.
|
||||
DxbcOpAdd(DxbcDest::R(coord_temp, 0b0100),
|
||||
DxbcSrc::R(coord_temp, DxbcSrc::kZZZZ),
|
||||
DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100),
|
||||
DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ),
|
||||
DxbcSrc::LF(1.0f));
|
||||
}
|
||||
// Always 3 coordinate components (1D and 2D are padded to 2D arrays,
|
||||
|
@ -1605,17 +1778,34 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
DxbcOpIf(false, is_all_signed_src);
|
||||
{
|
||||
// Sample the unsigned texture.
|
||||
if (bindless_resources_used_) {
|
||||
// Load the unsigned texture descriptor index.
|
||||
assert_true(srv_selection_temp != UINT32_MAX);
|
||||
if (cbuffer_index_descriptor_indices_ ==
|
||||
kBindingIndexUnallocated) {
|
||||
cbuffer_index_descriptor_indices_ = cbuffer_count_++;
|
||||
}
|
||||
uint32_t texture_bindless_descriptor_index =
|
||||
texture_binding_unsigned.bindless_descriptor_index;
|
||||
DxbcOpMov(
|
||||
DxbcDest::R(srv_selection_temp, 0b1000),
|
||||
DxbcSrc::CB(cbuffer_index_descriptor_indices_,
|
||||
uint32_t(CbufferRegister::kDescriptorIndices),
|
||||
texture_bindless_descriptor_index >> 2)
|
||||
.Select(texture_bindless_descriptor_index & 3));
|
||||
}
|
||||
if (grad_v_temp != UINT32_MAX) {
|
||||
assert_not_zero(grad_component_count);
|
||||
DxbcOpSampleD(
|
||||
DxbcDest::R(layer_value_temp, used_result_nonzero_components),
|
||||
DxbcSrc::R(coord_temp), 3, srv_unsigned, sampler,
|
||||
DxbcSrc::R(coord_and_sampler_temp), 3, srv_unsigned, sampler,
|
||||
DxbcSrc::R(grad_h_lod_temp), DxbcSrc::R(grad_v_temp),
|
||||
srv_grad_component_count);
|
||||
} else {
|
||||
DxbcOpSampleL(
|
||||
DxbcDest::R(layer_value_temp, used_result_nonzero_components),
|
||||
DxbcSrc::R(coord_temp), 3, srv_unsigned, sampler, lod_src);
|
||||
DxbcSrc::R(coord_and_sampler_temp), 3, srv_unsigned, sampler,
|
||||
lod_src);
|
||||
}
|
||||
}
|
||||
DxbcOpEndIf();
|
||||
|
@ -1623,17 +1813,34 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
{
|
||||
// Sample the signed texture.
|
||||
uint32_t signed_temp = PushSystemTemp();
|
||||
if (bindless_resources_used_) {
|
||||
// Load the signed texture descriptor index.
|
||||
assert_true(srv_selection_temp != UINT32_MAX);
|
||||
if (cbuffer_index_descriptor_indices_ ==
|
||||
kBindingIndexUnallocated) {
|
||||
cbuffer_index_descriptor_indices_ = cbuffer_count_++;
|
||||
}
|
||||
uint32_t texture_bindless_descriptor_index =
|
||||
texture_binding_signed.bindless_descriptor_index;
|
||||
DxbcOpMov(
|
||||
DxbcDest::R(srv_selection_temp, 0b1000),
|
||||
DxbcSrc::CB(cbuffer_index_descriptor_indices_,
|
||||
uint32_t(CbufferRegister::kDescriptorIndices),
|
||||
texture_bindless_descriptor_index >> 2)
|
||||
.Select(texture_bindless_descriptor_index & 3));
|
||||
}
|
||||
if (grad_v_temp != UINT32_MAX) {
|
||||
assert_not_zero(grad_component_count);
|
||||
DxbcOpSampleD(
|
||||
DxbcDest::R(signed_temp, used_result_nonzero_components),
|
||||
DxbcSrc::R(coord_temp), 3, srv_signed, sampler,
|
||||
DxbcSrc::R(coord_and_sampler_temp), 3, srv_signed, sampler,
|
||||
DxbcSrc::R(grad_h_lod_temp), DxbcSrc::R(grad_v_temp),
|
||||
srv_grad_component_count);
|
||||
} else {
|
||||
DxbcOpSampleL(
|
||||
DxbcDest::R(signed_temp, used_result_nonzero_components),
|
||||
DxbcSrc::R(coord_temp), 3, srv_signed, sampler, lod_src);
|
||||
DxbcSrc::R(coord_and_sampler_temp), 3, srv_signed, sampler,
|
||||
lod_src);
|
||||
}
|
||||
DxbcOpMovC(
|
||||
DxbcDest::R(layer_value_temp, used_result_nonzero_components),
|
||||
|
@ -1680,7 +1887,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
}
|
||||
}
|
||||
|
||||
// Release coord_temp.
|
||||
// Release coord_and_sampler_temp.
|
||||
PopSystemTemp();
|
||||
|
||||
// Apply the bias and gamma correction (gamma is after filtering here,
|
||||
|
|
|
@ -435,8 +435,12 @@ void DxbcShaderTranslator::ExportToMemory() {
|
|||
DxbcOpSwitch(element_size_src);
|
||||
for (uint32_t k = 1; k <= 4; k <<= 1) {
|
||||
DxbcOpCase(DxbcSrc::LU(k * 4));
|
||||
if (uav_index_shared_memory_ == kBindingIndexUnallocated) {
|
||||
uav_index_shared_memory_ = uav_count_++;
|
||||
}
|
||||
DxbcOpStoreRaw(
|
||||
DxbcDest::U(0, uint32_t(UAVRegister::kSharedMemory), (1 << k) - 1),
|
||||
DxbcDest::U(uav_index_shared_memory_,
|
||||
uint32_t(UAVRegister::kSharedMemory), (1 << k) - 1),
|
||||
address_src, eM_src);
|
||||
DxbcOpBreak();
|
||||
}
|
||||
|
|
|
@ -1575,8 +1575,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
DxbcOpIf(true, temp_x_src);
|
||||
{
|
||||
// Write the new depth/stencil.
|
||||
if (uav_index_edram_ == kBindingIndexUnallocated) {
|
||||
uav_index_edram_ = uav_count_++;
|
||||
}
|
||||
DxbcOpStoreUAVTyped(
|
||||
DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)),
|
||||
DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1,
|
||||
DxbcSrc::R(system_temp_rov_depth_stencil_).Select(i));
|
||||
}
|
||||
|
@ -1955,10 +1958,13 @@ void DxbcShaderTranslator::
|
|||
// Load the old depth/stencil value to VGPR [0].z.
|
||||
// VGPR [0].x = new depth
|
||||
// VGPR [0].z = old depth/stencil
|
||||
if (uav_index_edram_ == kBindingIndexUnallocated) {
|
||||
uav_index_edram_ = uav_count_++;
|
||||
}
|
||||
DxbcOpLdUAVTyped(DxbcDest::R(system_temps_subroutine_, 0b0100),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1,
|
||||
DxbcSrc::U(ROV_GetEDRAMUAVIndex(),
|
||||
uint32_t(UAVRegister::kEDRAM), DxbcSrc::kXXXX));
|
||||
DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM),
|
||||
DxbcSrc::kXXXX));
|
||||
// Extract the old depth part to VGPR [0].w.
|
||||
// VGPR [0].x = new depth
|
||||
// VGPR [0].z = old depth/stencil
|
||||
|
@ -2398,8 +2404,11 @@ void DxbcShaderTranslator::
|
|||
// Write the new depth/stencil.
|
||||
// VGPR [0].x = new depth/stencil
|
||||
// VGPR [0].y = depth/stencil test failure
|
||||
if (uav_index_edram_ == kBindingIndexUnallocated) {
|
||||
uav_index_edram_ = uav_count_++;
|
||||
}
|
||||
DxbcOpStoreUAVTyped(
|
||||
DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)),
|
||||
DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1,
|
||||
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX));
|
||||
if (depth_stencil_early) {
|
||||
|
@ -2499,10 +2508,13 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine(
|
|||
// Load the lower 32 bits of the 64bpp color to VGPR [0].z.
|
||||
// VGPRs [0].xy - packed source color/alpha if not blending.
|
||||
// VGPR [0].z - lower 32 bits of the packed color.
|
||||
if (uav_index_edram_ == kBindingIndexUnallocated) {
|
||||
uav_index_edram_ = uav_count_++;
|
||||
}
|
||||
DxbcOpLdUAVTyped(
|
||||
DxbcDest::R(system_temps_subroutine_, 0b0100),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), 1,
|
||||
DxbcSrc::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM),
|
||||
DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM),
|
||||
DxbcSrc::kXXXX));
|
||||
// Get the address of the upper 32 bits of the color to VGPR [0].w.
|
||||
// VGPRs [0].xy - packed source color/alpha if not blending.
|
||||
|
@ -2514,10 +2526,13 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine(
|
|||
// Load the upper 32 bits of the 64bpp color to VGPR [0].w.
|
||||
// VGPRs [0].xy - packed source color/alpha if not blending.
|
||||
// VGPRs [0].zw - packed destination color/alpha.
|
||||
if (uav_index_edram_ == kBindingIndexUnallocated) {
|
||||
uav_index_edram_ = uav_count_++;
|
||||
}
|
||||
DxbcOpLdUAVTyped(
|
||||
DxbcDest::R(system_temps_subroutine_, 0b1000),
|
||||
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kWWWW), 1,
|
||||
DxbcSrc::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM),
|
||||
DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM),
|
||||
DxbcSrc::kXXXX));
|
||||
}
|
||||
// The color is 32bpp.
|
||||
|
@ -2526,10 +2541,13 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine(
|
|||
// Load the 32bpp color to VGPR [0].z.
|
||||
// VGPRs [0].xy - packed source color/alpha if not blending.
|
||||
// VGPR [0].z - packed 32bpp destination color.
|
||||
if (uav_index_edram_ == kBindingIndexUnallocated) {
|
||||
uav_index_edram_ = uav_count_++;
|
||||
}
|
||||
DxbcOpLdUAVTyped(
|
||||
DxbcDest::R(system_temps_subroutine_, 0b0100),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), 1,
|
||||
DxbcSrc::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM),
|
||||
DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM),
|
||||
DxbcSrc::kXXXX));
|
||||
// Break register dependency in VGPR [0].w if the color is 32bpp.
|
||||
// VGPRs [0].xy - packed source color/alpha if not blending.
|
||||
|
@ -3276,8 +3294,11 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine(
|
|||
DxbcOpIf(true, DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kZZZZ));
|
||||
{
|
||||
// Store the lower 32 bits of the 64bpp color.
|
||||
if (uav_index_edram_ == kBindingIndexUnallocated) {
|
||||
uav_index_edram_ = uav_count_++;
|
||||
}
|
||||
DxbcOpStoreUAVTyped(
|
||||
DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)),
|
||||
DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), 1,
|
||||
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX));
|
||||
// Get the address of the upper 32 bits of the color to VGPR [0].z (can't
|
||||
|
@ -3289,8 +3310,11 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine(
|
|||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW),
|
||||
DxbcSrc::LU(1));
|
||||
// Store the upper 32 bits of the 64bpp color.
|
||||
if (uav_index_edram_ == kBindingIndexUnallocated) {
|
||||
uav_index_edram_ = uav_count_++;
|
||||
}
|
||||
DxbcOpStoreUAVTyped(
|
||||
DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)),
|
||||
DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)),
|
||||
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kZZZZ), 1,
|
||||
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY));
|
||||
}
|
||||
|
@ -3298,8 +3322,11 @@ void DxbcShaderTranslator::CompleteShaderCode_ROV_ColorSampleSubroutine(
|
|||
DxbcOpElse();
|
||||
{
|
||||
// Store the 32bpp color.
|
||||
if (uav_index_edram_ == kBindingIndexUnallocated) {
|
||||
uav_index_edram_ = uav_count_++;
|
||||
}
|
||||
DxbcOpStoreUAVTyped(
|
||||
DxbcDest::U(ROV_GetEDRAMUAVIndex(), uint32_t(UAVRegister::kEDRAM)),
|
||||
DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEDRAM)),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), 1,
|
||||
DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX));
|
||||
}
|
||||
|
|
|
@ -41,6 +41,8 @@ DEFINE_string(
|
|||
"[vertex or unspecified, linedomaincp, linedomainpatch, triangledomaincp, "
|
||||
"triangledomainpatch, quaddomaincp, quaddomainpatch].",
|
||||
"GPU");
|
||||
DEFINE_bool(shader_output_bindless_resources, false,
|
||||
"Output host shader with bindless resources used.", "GPU");
|
||||
DEFINE_bool(shader_output_dxbc_rov, false,
|
||||
"Output ROV-based output-merger code in DXBC pixel shaders.",
|
||||
"GPU");
|
||||
|
@ -109,7 +111,8 @@ int shader_compiler_main(const std::vector<std::string>& args) {
|
|||
} else if (cvars::shader_output_type == "dxbc" ||
|
||||
cvars::shader_output_type == "dxbctext") {
|
||||
translator = std::make_unique<DxbcShaderTranslator>(
|
||||
0, cvars::shader_output_dxbc_rov);
|
||||
0, cvars::shader_output_bindless_resources,
|
||||
cvars::shader_output_dxbc_rov);
|
||||
} else {
|
||||
translator = std::make_unique<UcodeShaderTranslator>();
|
||||
}
|
||||
|
|
|
@ -327,19 +327,22 @@ bool D3D12Provider::Initialize() {
|
|||
|
||||
// Check if optional features are supported.
|
||||
rasterizer_ordered_views_supported_ = false;
|
||||
tiled_resources_tier_ = 0;
|
||||
resource_binding_tier_ = D3D12_RESOURCE_BINDING_TIER_1;
|
||||
tiled_resources_tier_ = D3D12_TILED_RESOURCES_TIER_NOT_SUPPORTED;
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS options;
|
||||
if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS,
|
||||
&options, sizeof(options)))) {
|
||||
rasterizer_ordered_views_supported_ = options.ROVsSupported ? true : false;
|
||||
tiled_resources_tier_ = uint32_t(options.TiledResourcesTier);
|
||||
resource_binding_tier_ = options.ResourceBindingTier;
|
||||
tiled_resources_tier_ = options.TiledResourcesTier;
|
||||
}
|
||||
programmable_sample_positions_tier_ = 0;
|
||||
programmable_sample_positions_tier_ =
|
||||
D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER_NOT_SUPPORTED;
|
||||
D3D12_FEATURE_DATA_D3D12_OPTIONS2 options2;
|
||||
if (SUCCEEDED(device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS2,
|
||||
&options2, sizeof(options2)))) {
|
||||
programmable_sample_positions_tier_ =
|
||||
uint32_t(options2.ProgrammableSamplePositionsTier);
|
||||
options2.ProgrammableSamplePositionsTier;
|
||||
}
|
||||
virtual_address_bits_per_resource_ = 0;
|
||||
D3D12_FEATURE_DATA_GPU_VIRTUAL_ADDRESS_SUPPORT virtual_address_support;
|
||||
|
@ -349,14 +352,17 @@ bool D3D12Provider::Initialize() {
|
|||
virtual_address_bits_per_resource_ =
|
||||
virtual_address_support.MaxGPUVirtualAddressBitsPerResource;
|
||||
}
|
||||
XELOGD3D("Direct3D 12 device features:");
|
||||
XELOGD3D("* Max GPU virtual address bits per resource: {}",
|
||||
virtual_address_bits_per_resource_);
|
||||
XELOGD3D("* Programmable sample positions: tier {}",
|
||||
programmable_sample_positions_tier_);
|
||||
XELOGD3D("* Rasterizer-ordered views: {}",
|
||||
rasterizer_ordered_views_supported_ ? "yes" : "no");
|
||||
XELOGD3D("* Tiled resources: tier {}", tiled_resources_tier_);
|
||||
XELOGD3D(
|
||||
"Direct3D 12 device features:\n"
|
||||
"Max GPU virtual address bits per resource: {}\n"
|
||||
"Programmable sample positions: tier {}\n"
|
||||
"Rasterizer-ordered views: {}\n"
|
||||
"Resource binding: tier {}\n"
|
||||
"Tiled resources: tier {}\n",
|
||||
virtual_address_bits_per_resource_,
|
||||
uint32_t(programmable_sample_positions_tier_),
|
||||
rasterizer_ordered_views_supported_ ? "yes" : "no",
|
||||
uint32_t(resource_binding_tier_), uint32_t(tiled_resources_tier_));
|
||||
|
||||
// Get the graphics analysis interface, will silently fail if PIX is not
|
||||
// attached.
|
||||
|
|
|
@ -68,13 +68,19 @@ class D3D12Provider : public GraphicsProvider {
|
|||
uint32_t GetAdapterVendorID() const { return adapter_vendor_id_; }
|
||||
|
||||
// Device features.
|
||||
uint32_t GetProgrammableSamplePositionsTier() const {
|
||||
D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER
|
||||
GetProgrammableSamplePositionsTier() const {
|
||||
return programmable_sample_positions_tier_;
|
||||
}
|
||||
bool AreRasterizerOrderedViewsSupported() const {
|
||||
return rasterizer_ordered_views_supported_;
|
||||
}
|
||||
uint32_t GetTiledResourcesTier() const { return tiled_resources_tier_; }
|
||||
D3D12_RESOURCE_BINDING_TIER GetResourceBindingTier() const {
|
||||
return resource_binding_tier_;
|
||||
}
|
||||
D3D12_TILED_RESOURCES_TIER GetTiledResourcesTier() const {
|
||||
return tiled_resources_tier_;
|
||||
}
|
||||
uint32_t GetVirtualAddressBitsPerResource() const {
|
||||
return virtual_address_bits_per_resource_;
|
||||
}
|
||||
|
@ -128,9 +134,10 @@ class D3D12Provider : public GraphicsProvider {
|
|||
|
||||
uint32_t adapter_vendor_id_;
|
||||
|
||||
uint32_t programmable_sample_positions_tier_;
|
||||
D3D12_PROGRAMMABLE_SAMPLE_POSITIONS_TIER programmable_sample_positions_tier_;
|
||||
bool rasterizer_ordered_views_supported_;
|
||||
uint32_t tiled_resources_tier_;
|
||||
D3D12_RESOURCE_BINDING_TIER resource_binding_tier_;
|
||||
D3D12_TILED_RESOURCES_TIER tiled_resources_tier_;
|
||||
uint32_t virtual_address_bits_per_resource_;
|
||||
};
|
||||
|
||||
|
|
|
@ -10,6 +10,8 @@
|
|||
#ifndef XENIA_UI_D3D12_D3D12_UTIL_H_
|
||||
#define XENIA_UI_D3D12_D3D12_UTIL_H_
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "xenia/ui/d3d12/d3d12_provider.h"
|
||||
|
||||
namespace xe {
|
||||
|
@ -17,6 +19,9 @@ namespace ui {
|
|||
namespace d3d12 {
|
||||
namespace util {
|
||||
|
||||
using DescriptorCPUGPUHandlePair =
|
||||
std::pair<D3D12_CPU_DESCRIPTOR_HANDLE, D3D12_GPU_DESCRIPTOR_HANDLE>;
|
||||
|
||||
extern const D3D12_HEAP_PROPERTIES kHeapPropertiesDefault;
|
||||
extern const D3D12_HEAP_PROPERTIES kHeapPropertiesUpload;
|
||||
extern const D3D12_HEAP_PROPERTIES kHeapPropertiesReadback;
|
||||
|
|
Loading…
Reference in New Issue