453 lines
19 KiB
C++
453 lines
19 KiB
C++
/**
|
|
******************************************************************************
|
|
* Xenia : Xbox 360 Emulator Research Project *
|
|
******************************************************************************
|
|
* Copyright 2018 Ben Vanik. All rights reserved. *
|
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
|
******************************************************************************
|
|
*/
|
|
|
|
#ifndef XENIA_GPU_D3D12_D3D12_COMMAND_PROCESSOR_H_
|
|
#define XENIA_GPU_D3D12_D3D12_COMMAND_PROCESSOR_H_
|
|
|
|
#include <atomic>
|
|
#include <deque>
|
|
#include <memory>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <utility>
|
|
|
|
#include "xenia/gpu/command_processor.h"
|
|
#include "xenia/gpu/d3d12/d3d12_graphics_system.h"
|
|
#include "xenia/gpu/d3d12/deferred_command_list.h"
|
|
#include "xenia/gpu/d3d12/pipeline_cache.h"
|
|
#include "xenia/gpu/d3d12/primitive_converter.h"
|
|
#include "xenia/gpu/d3d12/render_target_cache.h"
|
|
#include "xenia/gpu/d3d12/shared_memory.h"
|
|
#include "xenia/gpu/d3d12/texture_cache.h"
|
|
#include "xenia/gpu/dxbc_shader_translator.h"
|
|
#include "xenia/gpu/xenos.h"
|
|
#include "xenia/kernel/kernel_state.h"
|
|
#include "xenia/ui/d3d12/d3d12_context.h"
|
|
#include "xenia/ui/d3d12/pools.h"
|
|
|
|
namespace xe {
|
|
namespace gpu {
|
|
namespace d3d12 {
|
|
|
|
class D3D12CommandProcessor : public CommandProcessor {
|
|
public:
|
|
explicit D3D12CommandProcessor(D3D12GraphicsSystem* graphics_system,
|
|
kernel::KernelState* kernel_state);
|
|
~D3D12CommandProcessor();
|
|
|
|
void ClearCaches() override;
|
|
|
|
void RequestFrameTrace(const std::wstring& root_path) override;
|
|
|
|
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
|
|
|
|
void RestoreEDRAMSnapshot(const void* snapshot) override;
|
|
|
|
// Needed by everything that owns transient objects.
|
|
xe::ui::d3d12::D3D12Context* GetD3D12Context() const {
|
|
return static_cast<xe::ui::d3d12::D3D12Context*>(context_.get());
|
|
}
|
|
|
|
// Returns the deferred drawing command list for the currently open
|
|
// submission.
|
|
DeferredCommandList* GetDeferredCommandList() {
|
|
return deferred_command_list_.get();
|
|
}
|
|
|
|
// Should a rasterizer-ordered UAV of the EDRAM buffer with format conversion
|
|
// and blending performed in pixel shaders be used instead of host render
|
|
// targets.
|
|
bool IsROVUsedForEDRAM() const;
|
|
|
|
uint64_t GetCurrentSubmission() const { return submission_current_; }
|
|
uint64_t GetCompletedSubmission() const { return submission_completed_; }
|
|
|
|
uint64_t GetCurrentFrame() const { return frame_current_; }
|
|
uint64_t GetCompletedFrame() const { return frame_completed_; }
|
|
|
|
// Gets the current color write mask, taking the pixel shader's write mask
|
|
// into account. If a shader doesn't write to a render target, it shouldn't be
|
|
// written to and it shouldn't be even bound - otherwise, in Halo 3, one
|
|
// render target is being destroyed by a shader not writing anything, and in
|
|
// Banjo-Tooie, the result of clearing the top tile is being ignored because
|
|
// there are 4 render targets bound with the same EDRAM base (clearly not
|
|
// correct usage), but the shader only clears 1, and then EDRAM buffer stores
|
|
// conflict with each other.
|
|
uint32_t GetCurrentColorMask(const D3D12Shader* pixel_shader) const;
|
|
|
|
void PushTransitionBarrier(
|
|
ID3D12Resource* resource, D3D12_RESOURCE_STATES old_state,
|
|
D3D12_RESOURCE_STATES new_state,
|
|
UINT subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES);
|
|
void PushAliasingBarrier(ID3D12Resource* old_resource,
|
|
ID3D12Resource* new_resource);
|
|
void PushUAVBarrier(ID3D12Resource* resource);
|
|
void SubmitBarriers();
|
|
|
|
// Finds or creates root signature for a pipeline.
|
|
ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader,
|
|
const D3D12Shader* pixel_shader,
|
|
bool tessellated);
|
|
|
|
ui::d3d12::UploadBufferPool* GetConstantBufferPool() const {
|
|
return constant_buffer_pool_.get();
|
|
}
|
|
// Request and automatically rebind descriptors on the draw command list.
|
|
// Refer to DescriptorHeapPool::Request for partial/full update explanation.
|
|
uint64_t RequestViewDescriptors(uint64_t previous_heap_index,
|
|
uint32_t count_for_partial_update,
|
|
uint32_t count_for_full_update,
|
|
D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
|
|
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out);
|
|
uint64_t RequestSamplerDescriptors(
|
|
uint64_t previous_heap_index, uint32_t count_for_partial_update,
|
|
uint32_t count_for_full_update,
|
|
D3D12_CPU_DESCRIPTOR_HANDLE& cpu_handle_out,
|
|
D3D12_GPU_DESCRIPTOR_HANDLE& gpu_handle_out);
|
|
|
|
// Returns a single temporary GPU-side buffer within a submission for tasks
|
|
// like texture untiling and resolving.
|
|
ID3D12Resource* RequestScratchGPUBuffer(uint32_t size,
|
|
D3D12_RESOURCE_STATES state);
|
|
// This must be called when done with the scratch buffer, to notify the
|
|
// command processor about the new state in case the buffer was transitioned
|
|
// by its user.
|
|
void ReleaseScratchGPUBuffer(ID3D12Resource* buffer,
|
|
D3D12_RESOURCE_STATES new_state);
|
|
|
|
// Sets the current SSAA sample positions, needs to be done before setting
|
|
// render targets or copying to depth render targets.
|
|
void SetSamplePositions(MsaaSamples sample_positions);
|
|
|
|
// Returns a pipeline with deferred creation by its handle. May return nullptr
|
|
// if failed to create the pipeline.
|
|
inline ID3D12PipelineState* GetPipelineStateByHandle(void* handle) const {
|
|
return pipeline_cache_->GetPipelineStateByHandle(handle);
|
|
}
|
|
|
|
// Sets the current pipeline state to a compute pipeline. This is for cache
|
|
// invalidation primarily. A submission must be open.
|
|
void SetComputePipeline(ID3D12PipelineState* pipeline);
|
|
|
|
// Stores and unbinds render targets before binding changing render targets
|
|
// externally. This is separate from SetExternalGraphicsPipeline because it
|
|
// causes computations to be dispatched, and the scratch buffer may also be
|
|
// used.
|
|
void FlushAndUnbindRenderTargets();
|
|
|
|
// Sets the current pipeline state to a special drawing pipeline, invalidating
|
|
// various cached state variables. FlushAndUnbindRenderTargets may be needed
|
|
// before calling this. A submission must be open.
|
|
void SetExternalGraphicsPipeline(
|
|
ID3D12PipelineState* pipeline,
|
|
bool changing_rts_and_sample_positions = true,
|
|
bool changing_viewport = true, bool changing_blend_factor = false,
|
|
bool changing_stencil_ref = false);
|
|
|
|
// Returns the text to display in the GPU backend name in the window title.
|
|
std::wstring GetWindowTitleText() const;
|
|
|
|
std::unique_ptr<xe::ui::RawImage> Capture();
|
|
|
|
protected:
|
|
bool SetupContext() override;
|
|
void ShutdownContext() override;
|
|
|
|
void WriteRegister(uint32_t index, uint32_t value) override;
|
|
|
|
void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width,
|
|
uint32_t frontbuffer_height) override;
|
|
|
|
Shader* LoadShader(ShaderType shader_type, uint32_t guest_address,
|
|
const uint32_t* host_address,
|
|
uint32_t dword_count) override;
|
|
|
|
bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count,
|
|
IndexBufferInfo* index_buffer_info) override;
|
|
bool IssueCopy() override;
|
|
|
|
void InitializeTrace() override;
|
|
void FinalizeTrace() override;
|
|
|
|
private:
|
|
static constexpr uint32_t kQueueFrames = 3;
|
|
|
|
enum RootParameter : UINT {
|
|
// These are always present.
|
|
|
|
// Very frequently changed, especially for UI draws, and for models drawn in
|
|
// multiple parts - contains vertex and texture fetch constants.
|
|
kRootParameter_FetchConstants,
|
|
// Quite frequently changed (for one object drawn multiple times, for
|
|
// instance - may contain projection matrices).
|
|
kRootParameter_FloatConstantsVertex,
|
|
// Less frequently changed (per-material).
|
|
kRootParameter_FloatConstantsPixel,
|
|
// Rarely changed - system constants like viewport and alpha testing.
|
|
kRootParameter_SystemConstants,
|
|
// Pretty rarely used and rarely changed - flow control constants.
|
|
kRootParameter_BoolLoopConstants,
|
|
// Never changed except for when starting a new descriptor heap - shared
|
|
// memory byte address buffer (t0) and, if ROV is used for EDRAM, EDRAM UAV
|
|
// (u0).
|
|
kRootParameter_SharedMemoryAndEDRAM,
|
|
|
|
kRootParameter_Count_Base,
|
|
|
|
// Extra parameter that may or may not exist:
|
|
// - Pixel textures (t1+).
|
|
// - Pixel samplers (s0+).
|
|
// - Vertex textures (t1+).
|
|
// - Vertex samplers (s0+).
|
|
|
|
kRootParameter_Count_Max = kRootParameter_Count_Base + 4,
|
|
};
|
|
|
|
struct RootExtraParameterIndices {
|
|
uint32_t textures_pixel;
|
|
uint32_t samplers_pixel;
|
|
uint32_t textures_vertex;
|
|
uint32_t samplers_vertex;
|
|
static constexpr uint32_t kUnavailable = UINT32_MAX;
|
|
};
|
|
// Gets the indices of optional root parameters. Returns the total parameter
|
|
// count.
|
|
static uint32_t GetRootExtraParameterIndices(
|
|
const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader,
|
|
RootExtraParameterIndices& indices_out);
|
|
|
|
// BeginSubmission and EndSubmission may be called at any time. If there's an
|
|
// open non-frame submission, BeginSubmission(true) will promote it to a
|
|
// frame. EndSubmission(true) will close the frame no matter whether the
|
|
// submission has already been closed.
|
|
|
|
// If is_guest_command is true, a new full frame - with full cleanup of
|
|
// resources and, if needed, starting capturing - is opened if pending (as
|
|
// opposed to simply resuming after mid-frame synchronization).
|
|
void BeginSubmission(bool is_guest_command);
|
|
// If is_swap is true, a full frame is closed - with, if needed, cache
|
|
// clearing and stopping capturing. Returns whether the submission was done
|
|
// successfully, if it has failed, leaves it open.
|
|
bool EndSubmission(bool is_swap);
|
|
void AwaitAllSubmissionsCompletion();
|
|
// Need to await submission completion before calling.
|
|
void ClearCommandAllocatorCache();
|
|
|
|
void UpdateFixedFunctionState(bool primitive_two_faced);
|
|
void UpdateSystemConstantValues(
|
|
bool shared_memory_is_uav, bool primitive_two_faced,
|
|
uint32_t line_loop_closing_index, Endian index_endian,
|
|
uint32_t edge_factor_base, bool early_z, uint32_t color_mask,
|
|
const RenderTargetCache::PipelineRenderTarget render_targets[4]);
|
|
bool UpdateBindings(const D3D12Shader* vertex_shader,
|
|
const D3D12Shader* pixel_shader,
|
|
ID3D12RootSignature* root_signature);
|
|
|
|
// Returns dword count for one element for a memexport format, or 0 if it's
|
|
// not supported by the D3D12 command processor (if it's smaller that 1 dword,
|
|
// for instance).
|
|
// TODO(Triang3l): Check if any game uses memexport with formats smaller than
|
|
// 32 bits per element.
|
|
static uint32_t GetSupportedMemExportFormatSize(ColorFormat format);
|
|
|
|
// Returns a buffer for reading GPU data back to the CPU. Assuming
|
|
// synchronizing immediately after use. Always in COPY_DEST state.
|
|
ID3D12Resource* RequestReadbackBuffer(uint32_t size);
|
|
|
|
bool cache_clear_requested_ = false;
|
|
|
|
bool submission_open_ = false;
|
|
// Values of submission_fence_.
|
|
uint64_t submission_current_ = 1;
|
|
uint64_t submission_completed_ = 0;
|
|
HANDLE submission_fence_completion_event_ = nullptr;
|
|
ID3D12Fence* submission_fence_ = nullptr;
|
|
|
|
bool frame_open_ = false;
|
|
// Guest frame index, since some transient resources can be reused across
|
|
// submissions. Values updated in the beginning of a frame.
|
|
uint64_t frame_current_ = 1;
|
|
uint64_t frame_completed_ = 0;
|
|
// Submission indices of frames that have already been submitted.
|
|
uint64_t closed_frame_submissions_[kQueueFrames] = {};
|
|
|
|
struct CommandAllocator {
|
|
ID3D12CommandAllocator* command_allocator;
|
|
uint64_t last_usage_submission;
|
|
CommandAllocator* next;
|
|
};
|
|
CommandAllocator* command_allocator_writable_first_ = nullptr;
|
|
CommandAllocator* command_allocator_writable_last_ = nullptr;
|
|
CommandAllocator* command_allocator_submitted_first_ = nullptr;
|
|
CommandAllocator* command_allocator_submitted_last_ = nullptr;
|
|
ID3D12GraphicsCommandList* command_list_ = nullptr;
|
|
ID3D12GraphicsCommandList1* command_list_1_ = nullptr;
|
|
std::unique_ptr<DeferredCommandList> deferred_command_list_ = nullptr;
|
|
|
|
std::unique_ptr<SharedMemory> shared_memory_ = nullptr;
|
|
|
|
// Root signatures for different descriptor counts.
|
|
std::unordered_map<uint32_t, ID3D12RootSignature*> root_signatures_;
|
|
|
|
std::unique_ptr<PipelineCache> pipeline_cache_ = nullptr;
|
|
|
|
std::unique_ptr<TextureCache> texture_cache_ = nullptr;
|
|
|
|
std::unique_ptr<RenderTargetCache> render_target_cache_ = nullptr;
|
|
|
|
std::unique_ptr<PrimitiveConverter> primitive_converter_ = nullptr;
|
|
|
|
std::unique_ptr<ui::d3d12::UploadBufferPool> constant_buffer_pool_ = nullptr;
|
|
std::unique_ptr<ui::d3d12::DescriptorHeapPool> view_heap_pool_ = nullptr;
|
|
std::unique_ptr<ui::d3d12::DescriptorHeapPool> sampler_heap_pool_ = nullptr;
|
|
|
|
// Mip 0 contains the normal gamma ramp (256 entries), mip 1 contains the PWL
|
|
// ramp (128 entries). DXGI_FORMAT_R10G10B10A2_UNORM 1D.
|
|
ID3D12Resource* gamma_ramp_texture_ = nullptr;
|
|
D3D12_RESOURCE_STATES gamma_ramp_texture_state_;
|
|
// Upload buffer for an image that is the same as gamma_ramp_, but with
|
|
// kQueueFrames array layers.
|
|
ID3D12Resource* gamma_ramp_upload_ = nullptr;
|
|
uint8_t* gamma_ramp_upload_mapping_ = nullptr;
|
|
D3D12_PLACED_SUBRESOURCE_FOOTPRINT gamma_ramp_footprints_[kQueueFrames * 2];
|
|
|
|
static constexpr uint32_t kSwapTextureWidth = 1280;
|
|
static constexpr uint32_t kSwapTextureHeight = 720;
|
|
inline std::pair<uint32_t, uint32_t> GetSwapTextureSize() const {
|
|
if (texture_cache_->IsResolutionScale2X()) {
|
|
return std::make_pair(kSwapTextureWidth * 2, kSwapTextureHeight * 2);
|
|
}
|
|
return std::make_pair(kSwapTextureWidth, kSwapTextureHeight);
|
|
}
|
|
ID3D12Resource* swap_texture_ = nullptr;
|
|
D3D12_PLACED_SUBRESOURCE_FOOTPRINT swap_texture_copy_footprint_;
|
|
UINT64 swap_texture_copy_size_;
|
|
ID3D12DescriptorHeap* swap_texture_rtv_descriptor_heap_ = nullptr;
|
|
D3D12_CPU_DESCRIPTOR_HANDLE swap_texture_rtv_;
|
|
ID3D12DescriptorHeap* swap_texture_srv_descriptor_heap_ = nullptr;
|
|
|
|
// Unsubmitted barrier batch.
|
|
std::vector<D3D12_RESOURCE_BARRIER> barriers_;
|
|
|
|
struct BufferForDeletion {
|
|
ID3D12Resource* buffer;
|
|
uint64_t last_usage_submission;
|
|
};
|
|
std::deque<BufferForDeletion> buffers_for_deletion_;
|
|
|
|
static constexpr uint32_t kScratchBufferSizeIncrement = 16 * 1024 * 1024;
|
|
ID3D12Resource* scratch_buffer_ = nullptr;
|
|
uint32_t scratch_buffer_size_ = 0;
|
|
D3D12_RESOURCE_STATES scratch_buffer_state_;
|
|
bool scratch_buffer_used_ = false;
|
|
|
|
static constexpr uint32_t kReadbackBufferSizeIncrement = 16 * 1024 * 1024;
|
|
ID3D12Resource* readback_buffer_ = nullptr;
|
|
uint32_t readback_buffer_size_ = 0;
|
|
|
|
std::atomic<bool> pix_capture_requested_ = false;
|
|
bool pix_capturing_;
|
|
|
|
// The current fixed-function drawing state.
|
|
D3D12_VIEWPORT ff_viewport_;
|
|
D3D12_RECT ff_scissor_;
|
|
float ff_blend_factor_[4];
|
|
uint32_t ff_stencil_ref_;
|
|
bool ff_viewport_update_needed_;
|
|
bool ff_scissor_update_needed_;
|
|
bool ff_blend_factor_update_needed_;
|
|
bool ff_stencil_ref_update_needed_;
|
|
|
|
// Current SSAA sample positions (to be updated by the render target cache).
|
|
MsaaSamples current_sample_positions_;
|
|
|
|
// Currently bound pipeline, either a graphics pipeline from the pipeline
|
|
// cache (with potentially deferred creation - current_external_pipeline_ is
|
|
// nullptr in this case) or a non-Xenos graphics or compute pipeline
|
|
// (current_cached_pipeline_ is nullptr in this case).
|
|
void* current_cached_pipeline_;
|
|
ID3D12PipelineState* current_external_pipeline_;
|
|
|
|
// Currently bound graphics root signature.
|
|
ID3D12RootSignature* current_graphics_root_signature_;
|
|
// Extra parameters which may or may not be present.
|
|
RootExtraParameterIndices current_graphics_root_extras_;
|
|
// Whether root parameters are up to date - reset if a new signature is bound.
|
|
uint32_t current_graphics_root_up_to_date_;
|
|
|
|
// Currently bound descriptor heaps - update by RequestViewDescriptors and
|
|
// RequestSamplerDescriptors.
|
|
ID3D12DescriptorHeap* current_view_heap_;
|
|
ID3D12DescriptorHeap* current_sampler_heap_;
|
|
|
|
// System shader constants.
|
|
DxbcShaderTranslator::SystemConstants system_constants_;
|
|
ColorRenderTargetFormat system_constants_color_formats_[4];
|
|
|
|
// Float constant usage masks of the last draw call.
|
|
uint64_t current_float_constant_map_vertex_[4];
|
|
uint64_t current_float_constant_map_pixel_[4];
|
|
|
|
// Constant buffer bindings.
|
|
struct ConstantBufferBinding {
|
|
D3D12_GPU_VIRTUAL_ADDRESS buffer_address;
|
|
bool up_to_date;
|
|
};
|
|
ConstantBufferBinding cbuffer_bindings_system_;
|
|
ConstantBufferBinding cbuffer_bindings_float_vertex_;
|
|
ConstantBufferBinding cbuffer_bindings_float_pixel_;
|
|
ConstantBufferBinding cbuffer_bindings_bool_loop_;
|
|
ConstantBufferBinding cbuffer_bindings_fetch_;
|
|
|
|
// Pages with the descriptors currently used for handling Xenos draw calls.
|
|
uint64_t draw_view_heap_index_;
|
|
uint64_t draw_sampler_heap_index_;
|
|
|
|
// Whether the last used texture bindings have been written to the current
|
|
// view descriptor heap.
|
|
bool texture_bindings_written_vertex_;
|
|
bool texture_bindings_written_pixel_;
|
|
// Hashes of the last texture bindings written to the current view descriptor
|
|
// heap with the last used descriptor layout. Valid only when the
|
|
// corresponding "written" variables are true.
|
|
uint64_t current_texture_bindings_hash_vertex_;
|
|
uint64_t current_texture_bindings_hash_pixel_;
|
|
|
|
// Whether the last used samplers have been written to the current sampler
|
|
// descriptor heap.
|
|
bool samplers_written_vertex_;
|
|
bool samplers_written_pixel_;
|
|
// Hashes of the last sampler parameters written to the current sampler
|
|
// descriptor heap with the last used descriptor layout. Valid only when the
|
|
// corresponding "written" variables are true.
|
|
uint64_t current_samplers_hash_vertex_;
|
|
uint64_t current_samplers_hash_pixel_;
|
|
|
|
// Latest descriptor handles used for handling Xenos draw calls.
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_system_constants_;
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_float_constants_vertex_;
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_float_constants_pixel_;
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_bool_loop_constants_;
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_fetch_constants_;
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_shared_memory_and_edram_;
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_vertex_;
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_textures_pixel_;
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_samplers_vertex_;
|
|
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_samplers_pixel_;
|
|
|
|
// Current primitive topology.
|
|
D3D_PRIMITIVE_TOPOLOGY primitive_topology_;
|
|
};
|
|
|
|
} // namespace d3d12
|
|
} // namespace gpu
|
|
} // namespace xe
|
|
|
|
#endif // XENIA_GPU_D3D12_D3D12_COMMAND_PROCESSOR_H_
|