diff --git a/src/xenia/base/math.h b/src/xenia/base/math.h index c33e27019..e2d321702 100644 --- a/src/xenia/base/math.h +++ b/src/xenia/base/math.h @@ -26,23 +26,28 @@ namespace xe { template -size_t countof(T (&arr)[N]) { +constexpr size_t countof(T (&arr)[N]) { return std::extent::value; } +template +constexpr bool is_pow2(T value) { + return (value & (value - 1)) == 0; +} + // Rounds up the given value to the given alignment. template -T align(T value, T alignment) { +constexpr T align(T value, T alignment) { return (value + alignment - 1) & ~(alignment - 1); } // Rounds the given number up to the next highest multiple. template -T round_up(T value, V multiple) { +constexpr T round_up(T value, V multiple) { return value ? (((value + multiple - 1) / multiple) * multiple) : multiple; } -inline float saturate(float value) { +constexpr float saturate(float value) { return std::max(std::min(1.0f, value), -1.0f); } @@ -62,7 +67,7 @@ T next_pow2(T value) { #if __cpp_lib_gcd_lcm template -inline constexpr T greatest_common_divisor(T a, T b) { +constexpr T greatest_common_divisor(T a, T b) { return std::gcd(a, b); } #else @@ -77,14 +82,14 @@ constexpr T greatest_common_divisor(T a, T b) { #endif template -inline constexpr void reduce_fraction(T& numerator, T& denominator) { +constexpr void reduce_fraction(T& numerator, T& denominator) { auto gcd = greatest_common_divisor(numerator, denominator); numerator /= gcd; denominator /= gcd; } template -inline constexpr void reduce_fraction(std::pair& fraction) { +constexpr void reduce_fraction(std::pair& fraction) { reduce_fraction(fraction.first, fraction.second); } diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 23163a609..ec45089a9 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -438,9 +438,10 @@ uint64_t D3D12CommandProcessor::RequestViewBindfulDescriptors( uint64_t current_heap_index = view_bindful_heap_pool_->Request( frame_current_, previous_heap_index, count_for_partial_update, count_for_full_update, descriptor_index); - if (current_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { + if (current_heap_index == + ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) { // There was an error. - return ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; + return ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid; } ID3D12DescriptorHeap* heap = view_bindful_heap_pool_->GetLastRequestHeap(); if (view_bindful_heap_current_ != heap) { @@ -511,9 +512,9 @@ bool D3D12CommandProcessor::RequestOneUseSingleViewDescriptors( D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle_start; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle_start; if (RequestViewBindfulDescriptors( - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid, count, count, + ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid, count, count, cpu_handle_start, gpu_handle_start) == - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { + ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) { return false; } for (uint32_t i = 0; i < count; ++i) { @@ -609,9 +610,10 @@ uint64_t D3D12CommandProcessor::RequestSamplerBindfulDescriptors( uint64_t current_heap_index = sampler_bindful_heap_pool_->Request( frame_current_, previous_heap_index, count_for_partial_update, count_for_full_update, descriptor_index); - if (current_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { + if (current_heap_index == + ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) { // There was an error. - return ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; + return ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid; } ID3D12DescriptorHeap* heap = sampler_bindful_heap_pool_->GetLastRequestHeap(); if (sampler_bindful_heap_current_ != heap) { @@ -890,8 +892,10 @@ bool D3D12CommandProcessor::SetupContext() { cvars::d3d12_edram_rov && provider.AreRasterizerOrderedViewsSupported(); // Initialize resource binding. - constant_buffer_pool_ = - std::make_unique(provider, 1024 * 1024); + constant_buffer_pool_ = std::make_unique( + provider, + std::max(ui::d3d12::D3D12UploadBufferPool::kDefaultPageSize, + sizeof(float) * 4 * D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT)); if (bindless_resources_used_) { D3D12_DESCRIPTOR_HEAP_DESC view_bindless_heap_desc; view_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; @@ -927,10 +931,12 @@ bool D3D12CommandProcessor::SetupContext() { sampler_bindless_heap_current_->GetGPUDescriptorHandleForHeapStart(); sampler_bindless_heap_allocated_ = 0; } else { - view_bindful_heap_pool_ = std::make_unique( - device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, kViewBindfulHeapSize); + view_bindful_heap_pool_ = + std::make_unique( + device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + kViewBindfulHeapSize); sampler_bindful_heap_pool_ = - std::make_unique( + std::make_unique( device, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER, kSamplerHeapSize); } @@ -2506,9 +2512,9 @@ void D3D12CommandProcessor::BeginSubmission(bool is_guest_command) { cbuffer_binding_descriptor_indices_pixel_.up_to_date = false; } else { draw_view_bindful_heap_index_ = - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; + ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid; draw_sampler_bindful_heap_index_ = - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; + ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid; bindful_textures_written_vertex_ = false; bindful_textures_written_pixel_ = false; bindful_samplers_written_vertex_ = false; @@ -3519,13 +3525,6 @@ bool D3D12CommandProcessor::UpdateBindings( const Shader::ConstantRegisterMap& float_constant_map_vertex = vertex_shader->constant_register_map(); uint32_t float_constant_count_vertex = float_constant_map_vertex.float_count; - // Even if the shader doesn't need any float constants, a valid binding must - // still be provided, so if the first draw in the frame with the current root - // signature doesn't have float constants at all, still allocate an empty - // buffer. - uint32_t float_constant_size_vertex = xe::align( - uint32_t(std::max(float_constant_count_vertex, 1u) * 4 * sizeof(float)), - 256u); for (uint32_t i = 0; i < 4; ++i) { if (current_float_constant_map_vertex_[i] != float_constant_map_vertex.float_bitmap[i]) { @@ -3557,15 +3556,13 @@ bool D3D12CommandProcessor::UpdateBindings( std::memset(current_float_constant_map_pixel_, 0, sizeof(current_float_constant_map_pixel_)); } - uint32_t float_constant_size_pixel = xe::align( - uint32_t(std::max(float_constant_count_pixel, 1u) * 4 * sizeof(float)), - 256u); // Write the constant buffer data. if (!cbuffer_binding_system_.up_to_date) { uint8_t* system_constants = constant_buffer_pool_->Request( - frame_current_, xe::align(uint32_t(sizeof(system_constants_)), 256u), - nullptr, nullptr, &cbuffer_binding_system_.address); + frame_current_, sizeof(system_constants_), + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, + &cbuffer_binding_system_.address); if (system_constants == nullptr) { return false; } @@ -3576,8 +3573,14 @@ bool D3D12CommandProcessor::UpdateBindings( ~(1u << root_parameter_system_constants); } if (!cbuffer_binding_float_vertex_.up_to_date) { + // Even if the shader doesn't need any float constants, a valid binding must + // still be provided, so if the first draw in the frame with the current + // root signature doesn't have float constants at all, still allocate an + // empty buffer. uint8_t* float_constants = constant_buffer_pool_->Request( - frame_current_, float_constant_size_vertex, nullptr, nullptr, + frame_current_, + sizeof(float) * 4 * std::max(float_constant_count_vertex, uint32_t(1)), + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_float_vertex_.address); if (float_constants == nullptr) { return false; @@ -3603,7 +3606,9 @@ bool D3D12CommandProcessor::UpdateBindings( } if (!cbuffer_binding_float_pixel_.up_to_date) { uint8_t* float_constants = constant_buffer_pool_->Request( - frame_current_, float_constant_size_pixel, nullptr, nullptr, + frame_current_, + sizeof(float) * 4 * std::max(float_constant_count_pixel, uint32_t(1)), + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_float_pixel_.address); if (float_constants == nullptr) { return false; @@ -3632,28 +3637,33 @@ bool D3D12CommandProcessor::UpdateBindings( ~(1u << root_parameter_float_constants_pixel); } if (!cbuffer_binding_bool_loop_.up_to_date) { - uint8_t* bool_loop_constants = - constant_buffer_pool_->Request(frame_current_, 256, nullptr, nullptr, - &cbuffer_binding_bool_loop_.address); + constexpr uint32_t kBoolLoopConstantsSize = (8 + 32) * sizeof(uint32_t); + uint8_t* bool_loop_constants = constant_buffer_pool_->Request( + frame_current_, kBoolLoopConstantsSize, + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, + &cbuffer_binding_bool_loop_.address); if (bool_loop_constants == nullptr) { return false; } std::memcpy(bool_loop_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32, - (8 + 32) * sizeof(uint32_t)); + kBoolLoopConstantsSize); cbuffer_binding_bool_loop_.up_to_date = true; current_graphics_root_up_to_date_ &= ~(1u << root_parameter_bool_loop_constants); } if (!cbuffer_binding_fetch_.up_to_date) { + constexpr uint32_t kFetchConstantsSize = 32 * 6 * sizeof(uint32_t); uint8_t* fetch_constants = constant_buffer_pool_->Request( - frame_current_, 768, nullptr, nullptr, &cbuffer_binding_fetch_.address); + frame_current_, kFetchConstantsSize, + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, + &cbuffer_binding_fetch_.address); if (fetch_constants == nullptr) { return false; } std::memcpy(fetch_constants, ®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32, - 32 * 6 * sizeof(uint32_t)); + kFetchConstantsSize); cbuffer_binding_fetch_.up_to_date = true; current_graphics_root_up_to_date_ &= ~(1u << root_parameter_fetch_constants); @@ -3885,12 +3895,10 @@ bool D3D12CommandProcessor::UpdateBindings( uint32_t* descriptor_indices = reinterpret_cast(constant_buffer_pool_->Request( frame_current_, - xe::align( - uint32_t(std::max(texture_count_vertex + sampler_count_vertex, - uint32_t(1)) * - sizeof(uint32_t)), - uint32_t(256)), - nullptr, nullptr, + std::max(texture_count_vertex + sampler_count_vertex, + uint32_t(1)) * + sizeof(uint32_t), + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_descriptor_indices_vertex_.address)); if (!descriptor_indices) { return false; @@ -3923,12 +3931,9 @@ bool D3D12CommandProcessor::UpdateBindings( uint32_t* descriptor_indices = reinterpret_cast(constant_buffer_pool_->Request( frame_current_, - xe::align( - uint32_t(std::max(texture_count_pixel + sampler_count_pixel, - uint32_t(1)) * - sizeof(uint32_t)), - uint32_t(256)), - nullptr, nullptr, + std::max(texture_count_pixel + sampler_count_pixel, uint32_t(1)) * + sizeof(uint32_t), + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, &cbuffer_binding_descriptor_indices_pixel_.address)); if (!descriptor_indices) { return false; @@ -4003,7 +4008,8 @@ bool D3D12CommandProcessor::UpdateBindings( uint64_t view_heap_index = RequestViewBindfulDescriptors( draw_view_bindful_heap_index_, view_count_partial_update, view_count_full_update, view_cpu_handle, view_gpu_handle); - if (view_heap_index == ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { + if (view_heap_index == + ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) { XELOGE("Failed to allocate view descriptors"); return false; } @@ -4018,14 +4024,14 @@ bool D3D12CommandProcessor::UpdateBindings( D3D12_GPU_DESCRIPTOR_HANDLE sampler_gpu_handle = {}; uint32_t descriptor_size_sampler = provider.GetSamplerDescriptorSize(); uint64_t sampler_heap_index = - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid; + ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid; if (sampler_count_vertex != 0 || sampler_count_pixel != 0) { sampler_heap_index = RequestSamplerBindfulDescriptors( draw_sampler_bindful_heap_index_, sampler_count_partial_update, sampler_count_vertex + sampler_count_pixel, sampler_cpu_handle, sampler_gpu_handle); if (sampler_heap_index == - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { + ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) { XELOGE("Failed to allocate sampler descriptors"); return false; } @@ -4055,7 +4061,7 @@ bool D3D12CommandProcessor::UpdateBindings( ~(1u << kRootParameter_Bindful_SharedMemoryAndEdram); } if (sampler_heap_index != - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid && + ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid && draw_sampler_bindful_heap_index_ != sampler_heap_index) { write_samplers_vertex = sampler_count_vertex != 0; write_samplers_pixel = sampler_count_pixel != 0; @@ -4139,7 +4145,7 @@ bool D3D12CommandProcessor::UpdateBindings( // Wrote new descriptors on the current page. draw_view_bindful_heap_index_ = view_heap_index; if (sampler_heap_index != - ui::d3d12::DescriptorHeapPool::kHeapIndexInvalid) { + ui::d3d12::D3D12DescriptorHeapPool::kHeapIndexInvalid) { draw_sampler_bindful_heap_index_ = sampler_heap_index; } } diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 4921d0e44..52adcd692 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -30,8 +30,9 @@ #include "xenia/gpu/xenos.h" #include "xenia/kernel/kernel_state.h" #include "xenia/ui/d3d12/d3d12_context.h" +#include "xenia/ui/d3d12/d3d12_descriptor_heap_pool.h" +#include "xenia/ui/d3d12/d3d12_upload_buffer_pool.h" #include "xenia/ui/d3d12/d3d12_util.h" -#include "xenia/ui/d3d12/pools.h" DECLARE_int32(internal_tile_height); DECLARE_int32(internal_tile_width); @@ -97,7 +98,7 @@ class D3D12CommandProcessor : public CommandProcessor { ID3D12RootSignature* GetRootSignature(const D3D12Shader* vertex_shader, const D3D12Shader* pixel_shader); - ui::d3d12::UploadBufferPool& GetConstantBufferPool() const { + ui::d3d12::D3D12UploadBufferPool& GetConstantBufferPool() const { return *constant_buffer_pool_; } @@ -315,8 +316,8 @@ class D3D12CommandProcessor : public CommandProcessor { void ClearCommandAllocatorCache(); // Request descriptors and automatically rebind the descriptor heap on the - // draw command list. Refer to DescriptorHeapPool::Request for partial/full - // update explanation. Doesn't work when bindless descriptors are used. + // draw command list. Refer to D3D12DescriptorHeapPool::Request for partial / + // full update explanation. Doesn't work when bindless descriptors are used. uint64_t RequestViewBindfulDescriptors( uint64_t previous_heap_index, uint32_t count_for_partial_update, uint32_t count_for_full_update, @@ -379,7 +380,7 @@ class D3D12CommandProcessor : public CommandProcessor { CommandAllocator* command_allocator_submitted_last_ = nullptr; ID3D12GraphicsCommandList* command_list_ = nullptr; ID3D12GraphicsCommandList1* command_list_1_ = nullptr; - std::unique_ptr deferred_command_list_ = nullptr; + std::unique_ptr deferred_command_list_; // Should bindless textures and samplers be used - many times faster // UpdateBindings than bindful (that becomes a significant bottleneck with @@ -391,13 +392,12 @@ class D3D12CommandProcessor : public CommandProcessor { // targets. bool edram_rov_used_ = false; - std::unique_ptr constant_buffer_pool_ = nullptr; + std::unique_ptr constant_buffer_pool_; static constexpr uint32_t kViewBindfulHeapSize = 32768; static_assert(kViewBindfulHeapSize <= D3D12_MAX_SHADER_VISIBLE_DESCRIPTOR_HEAP_SIZE_TIER_1); - std::unique_ptr view_bindful_heap_pool_ = - nullptr; + std::unique_ptr view_bindful_heap_pool_; // Currently bound descriptor heap - updated by RequestViewBindfulDescriptors. ID3D12DescriptorHeap* view_bindful_heap_current_; // Rationale: textures have 4 KB alignment in guest memory, and there can be @@ -427,8 +427,8 @@ class D3D12CommandProcessor : public CommandProcessor { // FIXME(Triang3l): Investigate the issue with the sampler 2047 on Nvidia. static constexpr uint32_t kSamplerHeapSize = 2000; static_assert(kSamplerHeapSize <= D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE); - std::unique_ptr sampler_bindful_heap_pool_ = - nullptr; + std::unique_ptr + sampler_bindful_heap_pool_; ID3D12DescriptorHeap* sampler_bindful_heap_current_; ID3D12DescriptorHeap* sampler_bindless_heap_current_ = nullptr; D3D12_CPU_DESCRIPTOR_HANDLE sampler_bindless_heap_cpu_start_; @@ -454,15 +454,15 @@ class D3D12CommandProcessor : public CommandProcessor { ID3D12RootSignature* root_signature_bindless_vs_ = nullptr; ID3D12RootSignature* root_signature_bindless_ds_ = nullptr; - std::unique_ptr shared_memory_ = nullptr; + std::unique_ptr shared_memory_; - std::unique_ptr pipeline_cache_ = nullptr; + std::unique_ptr pipeline_cache_; - std::unique_ptr texture_cache_ = nullptr; + std::unique_ptr texture_cache_; - std::unique_ptr render_target_cache_ = nullptr; + std::unique_ptr render_target_cache_; - std::unique_ptr primitive_converter_ = nullptr; + std::unique_ptr primitive_converter_; // Mip 0 contains the normal gamma ramp (256 entries), mip 1 contains the PWL // ramp (128 entries). DXGI_FORMAT_R10G10B10A2_UNORM 1D. diff --git a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc index 835c94c07..e50bbbaac 100644 --- a/src/xenia/gpu/d3d12/d3d12_graphics_system.cc +++ b/src/xenia/gpu/d3d12/d3d12_graphics_system.cc @@ -142,6 +142,7 @@ X_STATUS D3D12GraphicsSystem::Setup(cpu::Processor* processor, stretch_pipeline_desc.VS.BytecodeLength = sizeof(fullscreen_vs); stretch_pipeline_desc.PS.pShaderBytecode = stretch_ps; stretch_pipeline_desc.PS.BytecodeLength = sizeof(stretch_ps); + // The shader will set alpha to 1, don't use output-merger to preserve it. stretch_pipeline_desc.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; stretch_pipeline_desc.SampleMask = UINT_MAX; diff --git a/src/xenia/gpu/d3d12/pipeline_cache.h b/src/xenia/gpu/d3d12/pipeline_cache.h index 30fd68a4e..cdc6ed5f3 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.h +++ b/src/xenia/gpu/d3d12/pipeline_cache.h @@ -247,7 +247,7 @@ class PipelineCache { uint32_t resolution_scale_; // Reusable shader translator. - std::unique_ptr shader_translator_ = nullptr; + std::unique_ptr shader_translator_; // Command processor thread DXIL conversion/disassembly interfaces, if DXIL // disassembly is enabled. @@ -344,7 +344,7 @@ class PipelineCache { // Manual-reset event set when the last queued pipeline state object is // created and there are no more pipeline state objects to create. This is // triggered by the thread creating the last pipeline state object. - std::unique_ptr creation_completion_event_ = nullptr; + std::unique_ptr creation_completion_event_; // Whether setting the event on completion is queued. Protected with // creation_request_lock_, notify_one creation_request_cond_ when set. bool creation_completion_set_event_ = false; diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc index ab2138b47..eba3585ba 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.cc +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -52,11 +52,13 @@ bool PrimitiveConverter::Initialize() { D3D12_HEAP_FLAGS heap_flag_create_not_zeroed = provider.GetHeapFlagCreateNotZeroed(); - // There can be at most 65535 indices in a Xenos draw call, but they can be up - // to 4 bytes large, and conversion can add more indices (almost triple the - // count for triangle strips, for instance). - buffer_pool_ = - std::make_unique(provider, 4 * 1024 * 1024); + // There can be at most 65535 indices in a Xenos draw call (16 bit index + // count), but they can be up to 4 bytes large, and conversion can add more + // indices (almost triple the count for triangle strips or fans, for + // instance). + buffer_pool_ = std::make_unique( + provider, std::max(sizeof(uint32_t) * 3 * 65535, + ui::d3d12::D3D12UploadBufferPool::kDefaultPageSize)); // Create the static index buffer for non-indexed drawing. D3D12_RESOURCE_DESC static_ib_desc; @@ -697,8 +699,8 @@ void* PrimitiveConverter::AllocateIndices( } D3D12_GPU_VIRTUAL_ADDRESS gpu_address; uint8_t* mapping = - buffer_pool_->Request(command_processor_.GetCurrentFrame(), size, nullptr, - nullptr, &gpu_address); + buffer_pool_->Request(command_processor_.GetCurrentFrame(), size, 16, + nullptr, nullptr, &gpu_address); if (mapping == nullptr) { XELOGE("Failed to allocate space for {} converted {}-bit vertex indices", count, format == xenos::IndexFormat::kInt32 ? 32 : 16); diff --git a/src/xenia/gpu/d3d12/primitive_converter.h b/src/xenia/gpu/d3d12/primitive_converter.h index 8da664ed2..4d5c80f2d 100644 --- a/src/xenia/gpu/d3d12/primitive_converter.h +++ b/src/xenia/gpu/d3d12/primitive_converter.h @@ -19,7 +19,7 @@ #include "xenia/gpu/xenos.h" #include "xenia/memory.h" #include "xenia/ui/d3d12/d3d12_context.h" -#include "xenia/ui/d3d12/pools.h" +#include "xenia/ui/d3d12/d3d12_upload_buffer_pool.h" namespace xe { namespace gpu { @@ -107,7 +107,7 @@ class PrimitiveConverter { Memory& memory_; TraceWriter& trace_writer_; - std::unique_ptr buffer_pool_ = nullptr; + std::unique_ptr buffer_pool_; // Static index buffers for emulating unsupported primitive types when drawing // without an index buffer. diff --git a/src/xenia/gpu/d3d12/render_target_cache.cc b/src/xenia/gpu/d3d12/render_target_cache.cc index cfc7f6f47..3cc9d8262 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.cc +++ b/src/xenia/gpu/d3d12/render_target_cache.cc @@ -26,7 +26,6 @@ #include "xenia/gpu/texture_info.h" #include "xenia/gpu/texture_util.h" #include "xenia/ui/d3d12/d3d12_util.h" -#include "xenia/ui/d3d12/pools.h" DEFINE_bool(d3d12_16bit_rtv_full_range, true, "Use full -32...32 range for RG16 and RGBA16 render targets " @@ -1501,13 +1500,13 @@ void RenderTargetCache::RestoreEdramSnapshot(const void* snapshot) { auto& provider = command_processor_.GetD3D12Context().GetD3D12Provider(); if (!edram_snapshot_restore_pool_) { edram_snapshot_restore_pool_ = - std::make_unique(provider, - xenos::kEdramSizeBytes); + std::make_unique( + provider, xenos::kEdramSizeBytes); } ID3D12Resource* upload_buffer; - uint32_t upload_buffer_offset; + size_t upload_buffer_offset; void* upload_buffer_mapping = edram_snapshot_restore_pool_->Request( - command_processor_.GetCurrentSubmission(), xenos::kEdramSizeBytes, + command_processor_.GetCurrentSubmission(), xenos::kEdramSizeBytes, 1, &upload_buffer, &upload_buffer_offset, nullptr); if (!upload_buffer_mapping) { XELOGE("Failed to get a buffer for restoring a EDRAM snapshot"); @@ -1518,7 +1517,7 @@ void RenderTargetCache::RestoreEdramSnapshot(const void* snapshot) { TransitionEdramBuffer(D3D12_RESOURCE_STATE_COPY_DEST); command_processor_.SubmitBarriers(); command_list.D3DCopyBufferRegion(edram_buffer_, 0, upload_buffer, - upload_buffer_offset, + UINT64(upload_buffer_offset), xenos::kEdramSizeBytes); if (!edram_rov_used_) { // Clear and ignore the old 32-bit float depth - the non-ROV path is diff --git a/src/xenia/gpu/d3d12/render_target_cache.h b/src/xenia/gpu/d3d12/render_target_cache.h index e343c6991..cf575dcdf 100644 --- a/src/xenia/gpu/d3d12/render_target_cache.h +++ b/src/xenia/gpu/d3d12/render_target_cache.h @@ -23,7 +23,7 @@ #include "xenia/gpu/xenos.h" #include "xenia/memory.h" #include "xenia/ui/d3d12/d3d12_api.h" -#include "xenia/ui/d3d12/pools.h" +#include "xenia/ui/d3d12/d3d12_upload_buffer_pool.h" DECLARE_bool(d3d12_16bit_rtv_full_range); @@ -602,8 +602,8 @@ class RenderTargetCache { // For traces only. ID3D12Resource* edram_snapshot_download_buffer_ = nullptr; - std::unique_ptr edram_snapshot_restore_pool_ = - nullptr; + std::unique_ptr + edram_snapshot_restore_pool_; }; } // namespace d3d12 diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.cso b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.cso index 6a1fe6724..229d21858 100644 Binary files a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.cso and b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.cso differ diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.h b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.h index c41cf6287..e6fd53b4f 100644 --- a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.h +++ b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.h @@ -1,11 +1,11 @@ // generated from `xb buildhlsl` // source: stretch_gamma.ps.hlsl const uint8_t stretch_gamma_ps[] = { - 0x44, 0x58, 0x42, 0x43, 0x8D, 0x1B, 0xAE, 0x52, 0x96, 0x87, 0x7B, 0x8F, - 0xCB, 0xD3, 0xBC, 0x4B, 0xCF, 0xAF, 0x66, 0xA9, 0x01, 0x00, 0x00, 0x00, - 0x10, 0x05, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x44, 0x58, 0x42, 0x43, 0x46, 0xCD, 0x15, 0xA8, 0x4D, 0x5B, 0xFD, 0x07, + 0x4C, 0x62, 0xA7, 0xD0, 0x61, 0x89, 0x85, 0xA6, 0x01, 0x00, 0x00, 0x00, + 0x24, 0x05, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x14, 0x02, 0x00, 0x00, 0x48, 0x02, 0x00, 0x00, 0x7C, 0x02, 0x00, 0x00, - 0x74, 0x04, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD8, 0x01, 0x00, 0x00, + 0x88, 0x04, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD8, 0x01, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2C, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0xFF, 0xFF, 0x00, 0x05, 0x00, 0x00, 0xB0, 0x01, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00, @@ -16,7 +16,7 @@ const uint8_t stretch_gamma_ps[] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF4, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, @@ -54,8 +54,8 @@ const uint8_t stretch_gamma_ps[] = { 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x53, 0x56, 0x5F, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x00, 0xAB, 0xAB, - 0x53, 0x48, 0x45, 0x58, 0xF0, 0x01, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, - 0x7C, 0x00, 0x00, 0x00, 0x6A, 0x08, 0x00, 0x01, 0x59, 0x00, 0x00, 0x07, + 0x53, 0x48, 0x45, 0x58, 0x04, 0x02, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, + 0x81, 0x00, 0x00, 0x00, 0x6A, 0x08, 0x00, 0x01, 0x59, 0x00, 0x00, 0x07, 0x46, 0x8E, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5A, 0x00, 0x00, 0x06, 0x46, 0x6E, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -67,20 +67,20 @@ const uint8_t stretch_gamma_ps[] = { 0x55, 0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0x10, 0x00, 0x03, 0x32, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x03, 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x02, - 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x0D, 0xF2, 0x00, 0x10, 0x00, + 0x02, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x0D, 0x72, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x7E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, - 0x12, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x80, + 0x82, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x80, 0x41, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, - 0x38, 0x00, 0x00, 0x09, 0x22, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x09, 0x12, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0A, 0x80, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3F, 0x32, 0x00, 0x00, 0x09, 0x72, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x46, 0x02, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x10, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x56, 0x05, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, + 0x46, 0x02, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0xF6, 0x0F, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x10, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x0D, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x7E, 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x60, 0x20, 0x00, @@ -94,19 +94,21 @@ const uint8_t stretch_gamma_ps[] = { 0x00, 0x00, 0x00, 0x00, 0x46, 0x7E, 0x20, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x60, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x36, 0x00, 0x00, 0x05, 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x46, 0x0E, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3E, 0x00, 0x00, 0x01, - 0x53, 0x54, 0x41, 0x54, 0x94, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, - 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x36, 0x00, 0x00, 0x05, 0x72, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x46, 0x02, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, + 0x82, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, + 0x00, 0x00, 0x80, 0x3F, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54, + 0x94, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, }; diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.txt b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.txt index c4addc470..2a06727e9 100644 --- a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.txt +++ b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_gamma_ps.txt @@ -17,7 +17,7 @@ // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ // xe_sampler_linear_clamp sampler NA NA S0 s0 1 -// xe_texture texture float4 2d T0 t0 1 +// xe_texture texture float3 2d T0 t0 1 // xe_gamma_ramp texture float3 1d T1 t1 1 // XeStretchGammaRootConstants cbuffer NA NA CB0 cb0 1 // @@ -45,13 +45,14 @@ dcl_resource_texture1d (float,float,float,float) T1[1:1], space=0 dcl_input_ps linear v0.xy dcl_output o0.xyzw dcl_temps 2 -sample_l r0.xyzw, v0.xyxx, T0[0].xyzw, S0[0], l(0.000000) -add r1.x, -CB0[0][0].x, l(1.000000) -mul r1.y, CB0[0][0].x, l(0.500000) -mad r0.xyz, r0.xyzx, r1.xxxx, r1.yyyy +sample_l r0.xyz, v0.xyxx, T0[0].xyzw, S0[0], l(0.000000) +add r0.w, -CB0[0][0].x, l(1.000000) +mul r1.x, CB0[0][0].x, l(0.500000) +mad r0.xyz, r0.xyzx, r0.wwww, r1.xxxx sample_l r0.x, r0.x, T1[1].xyzw, S0[0], l(0.000000) sample_l r0.y, r0.y, T1[1].xyzw, S0[0], l(0.000000) sample_l r0.z, r0.z, T1[1].xyzw, S0[0], l(0.000000) -mov o0.xyzw, r0.xyzw +mov o0.xyz, r0.xyzx +mov o0.w, l(1.000000) ret -// Approximately 9 instruction slots used +// Approximately 10 instruction slots used diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.cso b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.cso index 1843dd639..c5a49abfd 100644 Binary files a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.cso and b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.cso differ diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.h b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.h index 5aa4fdb1e..7f850d127 100644 --- a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.h +++ b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.h @@ -1,11 +1,11 @@ // generated from `xb buildhlsl` // source: stretch.ps.hlsl const uint8_t stretch_ps[] = { - 0x44, 0x58, 0x42, 0x43, 0x84, 0x8B, 0x62, 0x99, 0x4D, 0x9D, 0x1A, 0x13, - 0x49, 0x13, 0xF7, 0x6B, 0x9F, 0x69, 0x25, 0x8A, 0x01, 0x00, 0x00, 0x00, - 0xB0, 0x02, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, + 0x44, 0x58, 0x42, 0x43, 0x0B, 0x39, 0x5E, 0x9C, 0x0F, 0x3E, 0xAC, 0xF6, + 0x9E, 0xB3, 0x11, 0xC0, 0x9B, 0x25, 0x2E, 0x6E, 0x01, 0x00, 0x00, 0x00, + 0xE0, 0x02, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x14, 0x01, 0x00, 0x00, 0x48, 0x01, 0x00, 0x00, 0x7C, 0x01, 0x00, 0x00, - 0x14, 0x02, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD8, 0x00, 0x00, 0x00, + 0x44, 0x02, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0xD8, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x3C, 0x00, 0x00, 0x00, 0x01, 0x05, 0xFF, 0xFF, 0x00, 0x05, 0x00, 0x00, 0xAF, 0x00, 0x00, 0x00, 0x13, 0x13, 0x44, 0x25, 0x3C, 0x00, 0x00, 0x00, @@ -16,7 +16,7 @@ const uint8_t stretch_ps[] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xA4, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0C, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x73, 0x61, 0x6D, 0x70, 0x6C, 0x65, 0x72, 0x5F, 0x6C, 0x69, 0x6E, 0x65, 0x61, 0x72, 0x5F, 0x63, 0x6C, 0x61, 0x6D, 0x70, 0x00, 0x78, 0x65, 0x5F, 0x74, @@ -33,26 +33,30 @@ const uint8_t stretch_ps[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0F, 0x00, 0x00, 0x00, 0x53, 0x56, 0x5F, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x00, 0xAB, 0xAB, 0x53, 0x48, 0x45, 0x58, - 0x90, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, + 0xC0, 0x00, 0x00, 0x00, 0x51, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x6A, 0x08, 0x00, 0x01, 0x5A, 0x00, 0x00, 0x06, 0x46, 0x6E, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x58, 0x18, 0x00, 0x07, 0x46, 0x7E, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0x55, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0x10, 0x00, 0x03, 0x32, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x03, - 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x0D, - 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x10, 0x10, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x46, 0x7E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xF2, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x68, 0x00, 0x00, 0x02, + 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x0D, 0x72, 0x00, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x46, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x46, 0x7E, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x60, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, + 0x72, 0x20, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x46, 0x02, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x05, 0x82, 0x20, 0x10, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3F, 0x3E, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54, 0x94, 0x00, 0x00, 0x00, - 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.txt b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.txt index ff7c711a5..8495d8f55 100644 --- a/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.txt +++ b/src/xenia/gpu/d3d12/shaders/dxbc/stretch_ps.txt @@ -7,7 +7,7 @@ // Name Type Format Dim ID HLSL Bind Count // ------------------------------ ---------- ------- ----------- ------- -------------- ------ // xe_sampler_linear_clamp sampler NA NA S0 s0 1 -// xe_texture texture float4 2d T0 t0 1 +// xe_texture texture float3 2d T0 t0 1 // // // @@ -30,6 +30,9 @@ dcl_sampler S0[0:0], mode_default, space=0 dcl_resource_texture2d (float,float,float,float) T0[0:0], space=0 dcl_input_ps linear v0.xy dcl_output o0.xyzw -sample_l o0.xyzw, v0.xyxx, T0[0].xyzw, S0[0], l(0.000000) +dcl_temps 1 +sample_l r0.xyz, v0.xyxx, T0[0].xyzw, S0[0], l(0.000000) +mov o0.xyz, r0.xyzx +mov o0.w, l(1.000000) ret -// Approximately 2 instruction slots used +// Approximately 4 instruction slots used diff --git a/src/xenia/gpu/d3d12/shaders/stretch.ps.hlsl b/src/xenia/gpu/d3d12/shaders/stretch.ps.hlsl index 681552587..c7a568434 100644 --- a/src/xenia/gpu/d3d12/shaders/stretch.ps.hlsl +++ b/src/xenia/gpu/d3d12/shaders/stretch.ps.hlsl @@ -1,6 +1,8 @@ -Texture2D xe_texture : register(t0); +Texture2D xe_texture : register(t0); SamplerState xe_sampler_linear_clamp : register(s0); float4 main(float2 xe_texcoord : TEXCOORD) : SV_Target { - return xe_texture.SampleLevel(xe_sampler_linear_clamp, xe_texcoord, 0.0f); + // Force alpha to 1 to make sure the surface won't be translucent. + return float4( + xe_texture.SampleLevel(xe_sampler_linear_clamp, xe_texcoord, 0.0f), 1.0f); } diff --git a/src/xenia/gpu/d3d12/shaders/stretch_gamma.ps.hlsl b/src/xenia/gpu/d3d12/shaders/stretch_gamma.ps.hlsl index cfa545aa1..a13e333a8 100644 --- a/src/xenia/gpu/d3d12/shaders/stretch_gamma.ps.hlsl +++ b/src/xenia/gpu/d3d12/shaders/stretch_gamma.ps.hlsl @@ -1,4 +1,4 @@ -Texture2D xe_texture : register(t0); +Texture2D xe_texture : register(t0); Texture1D xe_gamma_ramp : register(t1); SamplerState xe_sampler_linear_clamp : register(s0); cbuffer XeStretchGammaRootConstants : register(b0) { @@ -6,14 +6,15 @@ cbuffer XeStretchGammaRootConstants : register(b0) { }; float4 main(float2 xe_texcoord : TEXCOORD) : SV_Target { - float4 color = + float3 color = xe_texture.SampleLevel(xe_sampler_linear_clamp, xe_texcoord, 0.0f); // The center of the first texel of the LUT contains the value for 0, and the // center of the last texel contains the value for 1. - color.rgb = color.rgb * (1.0f - xe_gamma_ramp_inv_size) + - (0.5 * xe_gamma_ramp_inv_size); + color = + color * (1.0f - xe_gamma_ramp_inv_size) + (0.5 * xe_gamma_ramp_inv_size); color.r = xe_gamma_ramp.SampleLevel(xe_sampler_linear_clamp, color.r, 0.0f).r; color.g = xe_gamma_ramp.SampleLevel(xe_sampler_linear_clamp, color.g, 0.0f).g; color.b = xe_gamma_ramp.SampleLevel(xe_sampler_linear_clamp, color.b, 0.0f).b; - return color; + // Force alpha to 1 to make sure the surface won't be translucent. + return float4(color, 1.0f); } diff --git a/src/xenia/gpu/d3d12/shared_memory.cc b/src/xenia/gpu/d3d12/shared_memory.cc index c24336664..bd57eb981 100644 --- a/src/xenia/gpu/d3d12/shared_memory.cc +++ b/src/xenia/gpu/d3d12/shared_memory.cc @@ -153,9 +153,9 @@ bool SharedMemory::Initialize() { system_page_flags_.clear(); system_page_flags_.resize((page_count_ + 63) / 64); - upload_buffer_pool_ = std::make_unique( - provider, - xe::align(uint32_t(4 * 1024 * 1024), uint32_t(1) << page_size_log2_)); + upload_buffer_pool_ = std::make_unique( + provider, xe::align(ui::d3d12::D3D12UploadBufferPool::kDefaultPageSize, + size_t(1) << page_size_log2_)); memory_invalidation_callback_handle_ = memory_.RegisterPhysicalMemoryInvalidationCallback( @@ -439,25 +439,26 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) { upload_range_length << page_size_log2_); while (upload_range_length != 0) { ID3D12Resource* upload_buffer; - uint32_t upload_buffer_offset, upload_buffer_size; + size_t upload_buffer_offset, upload_buffer_size; uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial( command_processor_.GetCurrentSubmission(), - upload_range_length << page_size_log2_, &upload_buffer, - &upload_buffer_offset, &upload_buffer_size, nullptr); + upload_range_length << page_size_log2_, size_t(1) << page_size_log2_, + &upload_buffer, &upload_buffer_offset, &upload_buffer_size, nullptr); if (upload_buffer_mapping == nullptr) { XELOGE("Shared memory: Failed to get an upload buffer"); return false; } - uint32_t upload_buffer_pages = upload_buffer_size >> page_size_log2_; MakeRangeValid(upload_range_start << page_size_log2_, - upload_buffer_pages << page_size_log2_, false); + uint32_t(upload_buffer_size), false); std::memcpy( upload_buffer_mapping, memory_.TranslatePhysical(upload_range_start << page_size_log2_), upload_buffer_size); command_list.D3DCopyBufferRegion( buffer_, upload_range_start << page_size_log2_, upload_buffer, - upload_buffer_offset, upload_buffer_size); + UINT64(upload_buffer_offset), UINT64(upload_buffer_size)); + uint32_t upload_buffer_pages = + uint32_t(upload_buffer_size >> page_size_log2_); upload_range_start += upload_buffer_pages; upload_range_length -= upload_buffer_pages; } diff --git a/src/xenia/gpu/d3d12/shared_memory.h b/src/xenia/gpu/d3d12/shared_memory.h index e1b48d0e6..86a55b2b7 100644 --- a/src/xenia/gpu/d3d12/shared_memory.h +++ b/src/xenia/gpu/d3d12/shared_memory.h @@ -18,7 +18,7 @@ #include "xenia/gpu/trace_writer.h" #include "xenia/memory.h" #include "xenia/ui/d3d12/d3d12_api.h" -#include "xenia/ui/d3d12/pools.h" +#include "xenia/ui/d3d12/d3d12_upload_buffer_pool.h" namespace xe { namespace gpu { @@ -212,7 +212,7 @@ class SharedMemory { std::vector upload_ranges_; void GetRangesToUpload(uint32_t request_page_first, uint32_t request_page_last); - std::unique_ptr upload_buffer_pool_ = nullptr; + std::unique_ptr upload_buffer_pool_; // GPU-written memory downloading for traces. // Start page, length in pages. diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 821a0136f..1c345b1bb 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -25,8 +25,8 @@ #include "xenia/gpu/gpu_flags.h" #include "xenia/gpu/texture_info.h" #include "xenia/gpu/texture_util.h" +#include "xenia/ui/d3d12/d3d12_upload_buffer_pool.h" #include "xenia/ui/d3d12/d3d12_util.h" -#include "xenia/ui/d3d12/pools.h" DEFINE_int32(d3d12_resolution_scale, 1, "Scale of rendering width and height (currently only 1 and 2 " @@ -2396,9 +2396,9 @@ bool TextureCache::LoadTextureData(Texture* texture) { } D3D12_GPU_VIRTUAL_ADDRESS cbuffer_gpu_address; uint8_t* cbuffer_mapping = cbuffer_pool.Request( - command_processor_.GetCurrentFrame(), - xe::align(uint32_t(sizeof(load_constants)), uint32_t(256)), nullptr, - nullptr, &cbuffer_gpu_address); + command_processor_.GetCurrentFrame(), sizeof(load_constants), + D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr, + &cbuffer_gpu_address); if (cbuffer_mapping == nullptr) { command_processor_.ReleaseScratchGPUBuffer(copy_buffer, copy_buffer_state); diff --git a/src/xenia/gpu/vulkan/texture_cache.cc b/src/xenia/gpu/vulkan/texture_cache.cc index 39a88ef38..fa6cdcd69 100644 --- a/src/xenia/gpu/vulkan/texture_cache.cc +++ b/src/xenia/gpu/vulkan/texture_cache.cc @@ -672,17 +672,10 @@ TextureCache::TextureView* TextureCache::DemandView(Texture* texture, swizzle_component_map[(swizzle >> 9) & 0x7], }; -#define SWIZZLE_VECTOR(r, x) \ - { \ - assert_true(config.vector_swizzle.x >= 0 && \ - config.vector_swizzle.x < xe::countof(components)); \ - view_info.components.r = components[config.vector_swizzle.x]; \ - } - SWIZZLE_VECTOR(r, x); - SWIZZLE_VECTOR(g, y); - SWIZZLE_VECTOR(b, z); - SWIZZLE_VECTOR(a, w); -#undef SWIZZLE_CHANNEL + view_info.components.r = components[config.vector_swizzle.x]; + view_info.components.g = components[config.vector_swizzle.y]; + view_info.components.b = components[config.vector_swizzle.z]; + view_info.components.a = components[config.vector_swizzle.w]; if (texture->format == VK_FORMAT_D16_UNORM_S8_UINT || texture->format == VK_FORMAT_D24_UNORM_S8_UINT || diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_usbcam.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_usbcam.cc index 8c02e50c6..40c3f1987 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_usbcam.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_usbcam.cc @@ -17,11 +17,17 @@ namespace xe { namespace kernel { namespace xboxkrnl { -dword_result_t XUsbcamCreate(unknown_t unk1, // E - unknown_t unk2, // 0x4B000 +dword_result_t XUsbcamCreate(dword_t buffer, + dword_t buffer_size, // 0x4B000 640x480? lpunknown_t unk3_ptr) { - // 0 = success. - return X_ERROR_DEVICE_NOT_CONNECTED; + // This function should return success. + // It looks like it only allocates space for usbcam support. + // returning error code might cause games to initialize incorrectly. + // "Carcassonne" initalization function checks for result from this + // function. If value is different than 0 instead of loading + // rest of the game it returns from initalization function and tries + // to run game normally which causes crash, due to uninitialized data. + return X_STATUS_SUCCESS; } DECLARE_XBOXKRNL_EXPORT1(XUsbcamCreate, kNone, kStub); diff --git a/src/xenia/ui/d3d12/d3d12_context.h b/src/xenia/ui/d3d12/d3d12_context.h index c9f235b97..8ae3a0b2c 100644 --- a/src/xenia/ui/d3d12/d3d12_context.h +++ b/src/xenia/ui/d3d12/d3d12_context.h @@ -99,7 +99,7 @@ class D3D12Context : public GraphicsContext { // kSwapCommandAllocatorCount. ID3D12GraphicsCommandList* swap_command_list_ = nullptr; - std::unique_ptr immediate_drawer_ = nullptr; + std::unique_ptr immediate_drawer_; }; } // namespace d3d12 diff --git a/src/xenia/ui/d3d12/d3d12_descriptor_heap_pool.cc b/src/xenia/ui/d3d12/d3d12_descriptor_heap_pool.cc new file mode 100644 index 000000000..5d3dac99d --- /dev/null +++ b/src/xenia/ui/d3d12/d3d12_descriptor_heap_pool.cc @@ -0,0 +1,136 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/d3d12/d3d12_descriptor_heap_pool.h" + +#include "xenia/base/assert.h" +#include "xenia/base/logging.h" + +namespace xe { +namespace ui { +namespace d3d12 { + +D3D12DescriptorHeapPool::D3D12DescriptorHeapPool( + ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, uint32_t page_size) + : device_(device), type_(type), page_size_(page_size) {} + +D3D12DescriptorHeapPool::~D3D12DescriptorHeapPool() { ClearCache(); } + +void D3D12DescriptorHeapPool::Reclaim(uint64_t completed_submission_index) { + while (submitted_first_) { + if (submitted_first_->last_submission_index > completed_submission_index) { + break; + } + if (writable_last_) { + writable_last_->next = submitted_first_; + } else { + writable_first_ = submitted_first_; + } + writable_last_ = submitted_first_; + submitted_first_ = submitted_first_->next; + writable_last_->next = nullptr; + } + if (!submitted_first_) { + submitted_last_ = nullptr; + } +} + +void D3D12DescriptorHeapPool::ClearCache() { + // Not checking current_page_used_ != 0 because asking for 0 descriptors + // returns a valid heap also - but actually the new heap will be different now + // and the old one must be unbound since it doesn't exist anymore. + ++current_heap_index_; + current_page_used_ = 0; + while (submitted_first_) { + auto next = submitted_first_->next; + submitted_first_->heap->Release(); + delete submitted_first_; + submitted_first_ = next; + } + submitted_last_ = nullptr; + while (writable_first_) { + auto next = writable_first_->next; + writable_first_->heap->Release(); + delete writable_first_; + writable_first_ = next; + } + writable_last_ = nullptr; +} + +uint64_t D3D12DescriptorHeapPool::Request(uint64_t submission_index, + uint64_t previous_heap_index, + uint32_t count_for_partial_update, + uint32_t count_for_full_update, + uint32_t& index_out) { + assert_true(count_for_partial_update <= count_for_full_update); + assert_true(count_for_full_update <= page_size_); + if (count_for_partial_update > count_for_full_update || + count_for_full_update > page_size_) { + return kHeapIndexInvalid; + } + assert_true(!current_page_used_ || + submission_index >= writable_first_->last_submission_index); + assert_true(!submitted_last_ || + submission_index >= submitted_last_->last_submission_index); + // If the last full update happened on the current page, a partial update is + // possible. + uint32_t count = previous_heap_index == current_heap_index_ + ? count_for_partial_update + : count_for_full_update; + // Go to the next page if there's not enough free space on the current one, + // or because the previous page may be outdated. In this case, a full update + // is necessary. + if (page_size_ - current_page_used_ < count) { + // Close the page that was current. + if (submitted_last_) { + submitted_last_->next = writable_first_; + } else { + submitted_first_ = writable_first_; + } + submitted_last_ = writable_first_; + writable_first_ = writable_first_->next; + submitted_last_->next = nullptr; + if (!writable_first_) { + writable_last_ = nullptr; + } + ++current_heap_index_; + current_page_used_ = 0; + count = count_for_full_update; + } + // Create the page if needed (may be the first call for the page). + if (!writable_first_) { + D3D12_DESCRIPTOR_HEAP_DESC new_heap_desc; + new_heap_desc.Type = type_; + new_heap_desc.NumDescriptors = page_size_; + new_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + new_heap_desc.NodeMask = 0; + ID3D12DescriptorHeap* new_heap; + if (FAILED(device_->CreateDescriptorHeap(&new_heap_desc, + IID_PPV_ARGS(&new_heap)))) { + XELOGE("Failed to create a heap for {} shader-visible descriptors", + page_size_); + return kHeapIndexInvalid; + } + writable_first_ = new Page; + writable_first_->heap = new_heap; + writable_first_->cpu_start = new_heap->GetCPUDescriptorHandleForHeapStart(); + writable_first_->gpu_start = new_heap->GetGPUDescriptorHandleForHeapStart(); + writable_first_->last_submission_index = submission_index; + writable_first_->next = nullptr; + writable_last_ = writable_first_; + } + writable_first_->last_submission_index = submission_index; + index_out = current_page_used_; + current_page_used_ += count; + return current_heap_index_; +} + +} // namespace d3d12 +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/d3d12/pools.h b/src/xenia/ui/d3d12/d3d12_descriptor_heap_pool.h similarity index 67% rename from src/xenia/ui/d3d12/pools.h rename to src/xenia/ui/d3d12/d3d12_descriptor_heap_pool.h index 21606cc42..fbc77aeef 100644 --- a/src/xenia/ui/d3d12/pools.h +++ b/src/xenia/ui/d3d12/d3d12_descriptor_heap_pool.h @@ -2,13 +2,13 @@ ****************************************************************************** * Xenia : Xbox 360 Emulator Research Project * ****************************************************************************** - * Copyright 2018 Ben Vanik. All rights reserved. * + * Copyright 2020 Ben Vanik. All rights reserved. * * Released under the BSD license - see LICENSE in the root for more details. * ****************************************************************************** */ -#ifndef XENIA_UI_D3D12_POOLS_H_ -#define XENIA_UI_D3D12_POOLS_H_ +#ifndef XENIA_UI_D3D12_D3D12_DESCRIPTOR_HEAP_POOL_H_ +#define XENIA_UI_D3D12_D3D12_DESCRIPTOR_HEAP_POOL_H_ #include @@ -21,55 +21,13 @@ namespace d3d12 { // Submission index is the fence value or a value derived from it (if reclaiming // less often than once per fence value, for instance). -class UploadBufferPool { - public: - UploadBufferPool(D3D12Provider& provider, uint32_t page_size); - ~UploadBufferPool(); - - void Reclaim(uint64_t completed_submission_index); - void ClearCache(); - - // Request to write data in a single piece, creating a new page if the current - // one doesn't have enough free space. - uint8_t* Request(uint64_t submission_index, uint32_t size, - ID3D12Resource** buffer_out, uint32_t* offset_out, - D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out); - // Request to write data in multiple parts, filling the buffer entirely. - uint8_t* RequestPartial(uint64_t submission_index, uint32_t size, - ID3D12Resource** buffer_out, uint32_t* offset_out, - uint32_t* size_out, - D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out); - - private: - D3D12Provider& provider_; - uint32_t page_size_; - - struct Page { - ID3D12Resource* buffer; - D3D12_GPU_VIRTUAL_ADDRESS gpu_address; - void* mapping; - uint64_t last_submission_index; - Page* next; - }; - - // A list of buffers with free space, with the first buffer being the one - // currently being filled. - Page* writable_first_ = nullptr; - Page* writable_last_ = nullptr; - // A list of full buffers that can be reclaimed when the GPU doesn't use them - // anymore. - Page* submitted_first_ = nullptr; - Page* submitted_last_ = nullptr; - uint32_t current_page_used_ = 0; -}; - -class DescriptorHeapPool { +class D3D12DescriptorHeapPool { public: static constexpr uint64_t kHeapIndexInvalid = UINT64_MAX; - DescriptorHeapPool(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, - uint32_t page_size); - ~DescriptorHeapPool(); + D3D12DescriptorHeapPool(ID3D12Device* device, D3D12_DESCRIPTOR_HEAP_TYPE type, + uint32_t page_size); + ~D3D12DescriptorHeapPool(); void Reclaim(uint64_t completed_submission_index); void ClearCache(); @@ -148,4 +106,4 @@ class DescriptorHeapPool { } // namespace ui } // namespace xe -#endif // XENIA_UI_D3D12_POOLS_H_ +#endif // XENIA_UI_D3D12_D3D12_DESCRIPTOR_HEAP_POOL_H_ diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc index 3c0ea8a3a..c1bcac5e6 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.cc @@ -158,7 +158,7 @@ bool D3D12ImmediateDrawer::Initialize() { } { auto& root_parameter = - root_parameters[size_t(RootParameter::kViewportInvSize)]; + root_parameters[size_t(RootParameter::kViewportSizeInv)]; root_parameter.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; root_parameter.Constants.ShaderRegister = 0; root_parameter.Constants.RegisterSpace = 0; @@ -179,7 +179,7 @@ bool D3D12ImmediateDrawer::Initialize() { return false; } - // Create the pipelines. + // Create the pipeline states. D3D12_GRAPHICS_PIPELINE_STATE_DESC pipeline_desc = {}; pipeline_desc.pRootSignature = root_signature_; pipeline_desc.VS.pShaderBytecode = immediate_vs; @@ -192,10 +192,13 @@ bool D3D12ImmediateDrawer::Initialize() { pipeline_blend_desc.SrcBlend = D3D12_BLEND_SRC_ALPHA; pipeline_blend_desc.DestBlend = D3D12_BLEND_INV_SRC_ALPHA; pipeline_blend_desc.BlendOp = D3D12_BLEND_OP_ADD; - pipeline_blend_desc.SrcBlendAlpha = D3D12_BLEND_SRC_ALPHA; - pipeline_blend_desc.DestBlendAlpha = D3D12_BLEND_INV_SRC_ALPHA; + // Don't change alpha (always 1). + pipeline_blend_desc.SrcBlendAlpha = D3D12_BLEND_ZERO; + pipeline_blend_desc.DestBlendAlpha = D3D12_BLEND_ONE; pipeline_blend_desc.BlendOpAlpha = D3D12_BLEND_OP_ADD; - pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + pipeline_blend_desc.RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_RED | + D3D12_COLOR_WRITE_ENABLE_GREEN | + D3D12_COLOR_WRITE_ENABLE_BLUE; pipeline_desc.SampleMask = UINT_MAX; pipeline_desc.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; pipeline_desc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; @@ -287,14 +290,15 @@ bool D3D12ImmediateDrawer::Initialize() { device->CreateSampler(&sampler_desc, sampler_handle); // Create pools for draws. - vertex_buffer_pool_ = - std::make_unique(provider, 2 * 1024 * 1024); - texture_descriptor_pool_ = std::make_unique( + vertex_buffer_pool_ = std::make_unique(provider); + texture_descriptor_pool_ = std::make_unique( device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2048); - texture_descriptor_pool_heap_index_ = DescriptorHeapPool::kHeapIndexInvalid; + texture_descriptor_pool_heap_index_ = + D3D12DescriptorHeapPool::kHeapIndexInvalid; // Reset the current state. current_command_list_ = nullptr; + batch_open_ = false; return true; } @@ -415,6 +419,9 @@ void D3D12ImmediateDrawer::UpdateTexture(ImmediateTexture* texture, void D3D12ImmediateDrawer::Begin(int render_target_width, int render_target_height) { + assert_null(current_command_list_); + assert_false(batch_open_); + auto device = context_.GetD3D12Provider().GetDevice(); // Use the compositing command list. @@ -466,7 +473,8 @@ void D3D12ImmediateDrawer::Begin(int render_target_width, vertex_buffer_pool_->Reclaim(completed_fence_value); texture_descriptor_pool_->Reclaim(completed_fence_value); - texture_descriptor_pool_heap_index_ = DescriptorHeapPool::kHeapIndexInvalid; + texture_descriptor_pool_heap_index_ = + D3D12DescriptorHeapPool::kHeapIndexInvalid; current_render_target_width_ = render_target_width; current_render_target_height_ = render_target_height; @@ -478,13 +486,17 @@ void D3D12ImmediateDrawer::Begin(int render_target_width, viewport.MinDepth = 0.0f; viewport.MaxDepth = 1.0f; current_command_list_->RSSetViewports(1, &viewport); + current_scissor_.left = 0; + current_scissor_.top = 0; + current_scissor_.right = 0; + current_scissor_.bottom = 0; current_command_list_->SetGraphicsRootSignature(root_signature_); float viewport_inv_size[2]; viewport_inv_size[0] = 1.0f / viewport.Width; viewport_inv_size[1] = 1.0f / viewport.Height; current_command_list_->SetGraphicsRoot32BitConstants( - UINT(RootParameter::kViewportInvSize), 2, viewport_inv_size, 0); + UINT(RootParameter::kViewportSizeInv), 2, viewport_inv_size, 0); current_primitive_topology_ = D3D_PRIMITIVE_TOPOLOGY_UNDEFINED; current_texture_ = nullptr; @@ -492,22 +504,19 @@ void D3D12ImmediateDrawer::Begin(int render_target_width, } void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { + assert_false(batch_open_); assert_not_null(current_command_list_); - if (current_command_list_ == nullptr) { - return; - } - uint64_t current_fence_value = context_.GetSwapCurrentFenceValue(); - batch_open_ = false; + uint64_t current_fence_value = context_.GetSwapCurrentFenceValue(); // Bind the vertices. D3D12_VERTEX_BUFFER_VIEW vertex_buffer_view; vertex_buffer_view.StrideInBytes = UINT(sizeof(ImmediateVertex)); vertex_buffer_view.SizeInBytes = - batch.vertex_count * uint32_t(sizeof(ImmediateVertex)); + UINT(sizeof(ImmediateVertex)) * batch.vertex_count; void* vertex_buffer_mapping = vertex_buffer_pool_->Request( - current_fence_value, vertex_buffer_view.SizeInBytes, nullptr, nullptr, - &vertex_buffer_view.BufferLocation); + current_fence_value, vertex_buffer_view.SizeInBytes, sizeof(float), + nullptr, nullptr, &vertex_buffer_view.BufferLocation); if (vertex_buffer_mapping == nullptr) { XELOGE("Failed to get a buffer for {} vertices in the immediate drawer", batch.vertex_count); @@ -521,11 +530,10 @@ void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { batch_has_index_buffer_ = batch.indices != nullptr; if (batch_has_index_buffer_) { D3D12_INDEX_BUFFER_VIEW index_buffer_view; - index_buffer_view.SizeInBytes = batch.index_count * sizeof(uint16_t); + index_buffer_view.SizeInBytes = UINT(sizeof(uint16_t)) * batch.index_count; index_buffer_view.Format = DXGI_FORMAT_R16_UINT; void* index_buffer_mapping = vertex_buffer_pool_->Request( - current_fence_value, - xe::align(index_buffer_view.SizeInBytes, UINT(sizeof(uint32_t))), + current_fence_value, index_buffer_view.SizeInBytes, sizeof(uint16_t), nullptr, nullptr, &index_buffer_view.BufferLocation); if (index_buffer_mapping == nullptr) { XELOGE("Failed to get a buffer for {} indices in the immediate drawer", @@ -541,11 +549,6 @@ void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { } void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { - assert_not_null(current_command_list_); - if (current_command_list_ == nullptr) { - return; - } - if (!batch_open_) { // Could be an error while obtaining the vertex and index buffers. return; @@ -554,6 +557,32 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { auto& provider = context_.GetD3D12Provider(); auto device = provider.GetDevice(); + // Set the scissor rectangle if enabled. + D3D12_RECT scissor; + if (draw.scissor) { + scissor.left = draw.scissor_rect[0]; + scissor.top = current_render_target_height_ - + (draw.scissor_rect[1] + draw.scissor_rect[3]); + scissor.right = scissor.left + draw.scissor_rect[2]; + scissor.bottom = scissor.top + draw.scissor_rect[3]; + } else { + scissor.left = 0; + scissor.top = 0; + scissor.right = current_render_target_width_; + scissor.bottom = current_render_target_height_; + } + if (scissor.right <= scissor.left || scissor.bottom <= scissor.top) { + // Nothing is visible (used as the default current_scissor_ value also). + return; + } + if (current_scissor_.left != scissor.left || + current_scissor_.top != scissor.top || + current_scissor_.right != scissor.right || + current_scissor_.bottom != scissor.bottom) { + current_scissor_ = scissor; + current_command_list_->RSSetScissorRects(1, &scissor); + } + // Bind the texture. auto texture = reinterpret_cast(draw.texture_handle); ID3D12Resource* texture_resource; @@ -569,7 +598,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { uint64_t texture_heap_index = texture_descriptor_pool_->Request( context_.GetSwapCurrentFenceValue(), texture_descriptor_pool_heap_index_, bind_texture ? 1 : 0, 1, texture_descriptor_index); - if (texture_heap_index == DescriptorHeapPool::kHeapIndexInvalid) { + if (texture_heap_index == D3D12DescriptorHeapPool::kHeapIndexInvalid) { return; } if (texture_descriptor_pool_heap_index_ != texture_heap_index) { @@ -580,6 +609,7 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { current_command_list_->SetDescriptorHeaps(2, descriptor_heaps); } if (bind_texture) { + current_texture_ = texture; D3D12_SHADER_RESOURCE_VIEW_DESC texture_view_desc; texture_view_desc.Format = D3D12ImmediateTexture::kFormat; texture_view_desc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; @@ -599,7 +629,6 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { provider.OffsetViewDescriptor( texture_descriptor_pool_->GetLastRequestHeapGPUStart(), texture_descriptor_index)); - current_texture_ = texture; } // Bind the sampler. @@ -616,11 +645,11 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { sampler_index = SamplerIndex::kNearestClamp; } if (current_sampler_index_ != sampler_index) { + current_sampler_index_ = sampler_index; current_command_list_->SetGraphicsRootDescriptorTable( UINT(RootParameter::kSampler), provider.OffsetSamplerDescriptor(sampler_heap_gpu_start_, uint32_t(sampler_index))); - current_sampler_index_ = sampler_index; } // Set whether texture coordinates need to be restricted. @@ -646,27 +675,11 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { return; } if (current_primitive_topology_ != primitive_topology) { + current_primitive_topology_ = primitive_topology; current_command_list_->IASetPrimitiveTopology(primitive_topology); current_command_list_->SetPipelineState(pipeline); - current_primitive_topology_ = primitive_topology; } - // Set the scissor rectangle if enabled. - D3D12_RECT scissor; - if (draw.scissor) { - scissor.left = draw.scissor_rect[0]; - scissor.top = current_render_target_height_ - - (draw.scissor_rect[1] + draw.scissor_rect[3]); - scissor.right = scissor.left + draw.scissor_rect[2]; - scissor.bottom = scissor.top + draw.scissor_rect[3]; - } else { - scissor.left = 0; - scissor.top = 0; - scissor.right = current_render_target_width_; - scissor.bottom = current_render_target_height_; - } - current_command_list_->RSSetScissorRects(1, &scissor); - // Draw. if (batch_has_index_buffer_) { current_command_list_->DrawIndexedInstanced( @@ -678,7 +691,10 @@ void D3D12ImmediateDrawer::Draw(const ImmediateDraw& draw) { void D3D12ImmediateDrawer::EndDrawBatch() { batch_open_ = false; } -void D3D12ImmediateDrawer::End() { current_command_list_ = nullptr; } +void D3D12ImmediateDrawer::End() { + assert_false(batch_open_); + current_command_list_ = nullptr; +} } // namespace d3d12 } // namespace ui diff --git a/src/xenia/ui/d3d12/d3d12_immediate_drawer.h b/src/xenia/ui/d3d12/d3d12_immediate_drawer.h index f2d5df829..53ae81054 100644 --- a/src/xenia/ui/d3d12/d3d12_immediate_drawer.h +++ b/src/xenia/ui/d3d12/d3d12_immediate_drawer.h @@ -15,7 +15,8 @@ #include #include "xenia/ui/d3d12/d3d12_api.h" -#include "xenia/ui/d3d12/pools.h" +#include "xenia/ui/d3d12/d3d12_descriptor_heap_pool.h" +#include "xenia/ui/d3d12/d3d12_upload_buffer_pool.h" #include "xenia/ui/immediate_drawer.h" namespace xe { @@ -53,7 +54,7 @@ class D3D12ImmediateDrawer : public ImmediateDrawer { kRestrictTextureSamples, kTexture, kSampler, - kViewportInvSize, + kViewportSizeInv, kCount }; @@ -74,8 +75,8 @@ class D3D12ImmediateDrawer : public ImmediateDrawer { D3D12_CPU_DESCRIPTOR_HANDLE sampler_heap_cpu_start_; D3D12_GPU_DESCRIPTOR_HANDLE sampler_heap_gpu_start_; - std::unique_ptr vertex_buffer_pool_ = nullptr; - std::unique_ptr texture_descriptor_pool_ = nullptr; + std::unique_ptr vertex_buffer_pool_; + std::unique_ptr texture_descriptor_pool_; uint64_t texture_descriptor_pool_heap_index_; struct PendingTextureUpload { @@ -94,6 +95,7 @@ class D3D12ImmediateDrawer : public ImmediateDrawer { int current_render_target_width_, current_render_target_height_; bool batch_open_ = false; bool batch_has_index_buffer_; + D3D12_RECT current_scissor_; D3D_PRIMITIVE_TOPOLOGY current_primitive_topology_; ImmediateTexture* current_texture_; SamplerIndex current_sampler_index_; diff --git a/src/xenia/ui/d3d12/d3d12_upload_buffer_pool.cc b/src/xenia/ui/d3d12/d3d12_upload_buffer_pool.cc new file mode 100644 index 000000000..4675280dc --- /dev/null +++ b/src/xenia/ui/d3d12/d3d12_upload_buffer_pool.cc @@ -0,0 +1,120 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/d3d12/d3d12_upload_buffer_pool.h" + +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/ui/d3d12/d3d12_util.h" + +namespace xe { +namespace ui { +namespace d3d12 { + +// Align to D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT not to waste any space if +// it's smaller (the size of the heap backing the buffer will be aligned to +// D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT anyway). +D3D12UploadBufferPool::D3D12UploadBufferPool(D3D12Provider& provider, + size_t page_size) + : GraphicsUploadBufferPool(xe::align( + page_size, size_t(D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT))), + provider_(provider) {} + +uint8_t* D3D12UploadBufferPool::Request( + uint64_t submission_index, size_t size, size_t alignment, + ID3D12Resource** buffer_out, size_t* offset_out, + D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) { + size_t offset; + const D3D12Page* page = + static_cast(GraphicsUploadBufferPool::Request( + submission_index, size, alignment, offset)); + if (!page) { + return nullptr; + } + if (buffer_out) { + *buffer_out = page->buffer_; + } + if (offset_out) { + *offset_out = offset; + } + if (gpu_address_out) { + *gpu_address_out = page->gpu_address_ + offset; + } + return reinterpret_cast(page->mapping_) + offset; +} + +uint8_t* D3D12UploadBufferPool::RequestPartial( + uint64_t submission_index, size_t size, size_t alignment, + ID3D12Resource** buffer_out, size_t* offset_out, size_t* size_out, + D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) { + size_t offset, size_obtained; + const D3D12Page* page = + static_cast(GraphicsUploadBufferPool::RequestPartial( + submission_index, size, alignment, offset, size_obtained)); + if (!page) { + return nullptr; + } + if (buffer_out) { + *buffer_out = page->buffer_; + } + if (offset_out) { + *offset_out = offset; + } + if (size_out) { + *size_out = size_obtained; + } + if (gpu_address_out) { + *gpu_address_out = page->gpu_address_ + offset; + } + return reinterpret_cast(page->mapping_) + offset; +} + +GraphicsUploadBufferPool::Page* +D3D12UploadBufferPool::CreatePageImplementation() { + D3D12_RESOURCE_DESC buffer_desc; + util::FillBufferResourceDesc(buffer_desc, page_size_, + D3D12_RESOURCE_FLAG_NONE); + ID3D12Resource* buffer; + if (FAILED(provider_.GetDevice()->CreateCommittedResource( + &util::kHeapPropertiesUpload, provider_.GetHeapFlagCreateNotZeroed(), + &buffer_desc, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + IID_PPV_ARGS(&buffer)))) { + XELOGE("Failed to create a D3D upload buffer with {} bytes", page_size_); + return nullptr; + } + D3D12_RANGE read_range; + read_range.Begin = 0; + read_range.End = 0; + void* mapping; + if (FAILED(buffer->Map(0, &read_range, &mapping))) { + XELOGE("Failed to map a D3D upload buffer with {} bytes", page_size_); + buffer->Release(); + return nullptr; + } + D3D12Page* page = new D3D12Page(buffer, mapping); + // Owned by the page now. + buffer->Release(); + return page; +} + +D3D12UploadBufferPool::D3D12Page::D3D12Page(ID3D12Resource* buffer, + void* mapping) + : buffer_(buffer), mapping_(mapping) { + buffer_->AddRef(); + gpu_address_ = buffer_->GetGPUVirtualAddress(); +} + +D3D12UploadBufferPool::D3D12Page::~D3D12Page() { + // Unmapping is done implicitly when the buffer is destroyed. + buffer_->Release(); +} + +} // namespace d3d12 +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/d3d12/d3d12_upload_buffer_pool.h b/src/xenia/ui/d3d12/d3d12_upload_buffer_pool.h new file mode 100644 index 000000000..9fc2ce45e --- /dev/null +++ b/src/xenia/ui/d3d12/d3d12_upload_buffer_pool.h @@ -0,0 +1,54 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_D3D12_D3D12_UPLOAD_BUFFER_POOL_H_ +#define XENIA_UI_D3D12_D3D12_UPLOAD_BUFFER_POOL_H_ + +#include "xenia/ui/d3d12/d3d12_provider.h" +#include "xenia/ui/graphics_upload_buffer_pool.h" + +namespace xe { +namespace ui { +namespace d3d12 { + +class D3D12UploadBufferPool : public GraphicsUploadBufferPool { + public: + D3D12UploadBufferPool(D3D12Provider& provider, + size_t page_size = kDefaultPageSize); + + uint8_t* Request(uint64_t submission_index, size_t size, size_t alignment, + ID3D12Resource** buffer_out, size_t* offset_out, + D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out); + uint8_t* RequestPartial(uint64_t submission_index, size_t size, + size_t alignment, ID3D12Resource** buffer_out, + size_t* offset_out, size_t* size_out, + D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out); + + protected: + Page* CreatePageImplementation() override; + + private: + struct D3D12Page : public Page { + // Creates a reference to the buffer. It must not be unmapped until this + // D3D12Page is deleted. + D3D12Page(ID3D12Resource* buffer, void* mapping); + ~D3D12Page() override; + ID3D12Resource* buffer_; + void* mapping_; + D3D12_GPU_VIRTUAL_ADDRESS gpu_address_; + }; + + D3D12Provider& provider_; +}; + +} // namespace d3d12 +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_D3D12_D3D12_UPLOAD_BUFFER_POOL_H_ diff --git a/src/xenia/ui/d3d12/pools.cc b/src/xenia/ui/d3d12/pools.cc deleted file mode 100644 index 7b892caa9..000000000 --- a/src/xenia/ui/d3d12/pools.cc +++ /dev/null @@ -1,285 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2018 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/ui/d3d12/pools.h" - -#include - -#include "xenia/base/assert.h" -#include "xenia/base/logging.h" -#include "xenia/ui/d3d12/d3d12_util.h" - -namespace xe { -namespace ui { -namespace d3d12 { - -UploadBufferPool::UploadBufferPool(D3D12Provider& provider, uint32_t page_size) - : provider_(provider), page_size_(page_size) {} - -UploadBufferPool::~UploadBufferPool() { ClearCache(); } - -void UploadBufferPool::Reclaim(uint64_t completed_submission_index) { - while (submitted_first_) { - if (submitted_first_->last_submission_index > completed_submission_index) { - break; - } - if (writable_last_) { - writable_last_->next = submitted_first_; - } else { - writable_first_ = submitted_first_; - } - writable_last_ = submitted_first_; - submitted_first_ = submitted_first_->next; - writable_last_->next = nullptr; - } - if (!submitted_first_) { - submitted_last_ = nullptr; - } -} - -void UploadBufferPool::ClearCache() { - current_page_used_ = 0; - // Deleting anyway, so assuming data not needed anymore. - D3D12_RANGE written_range; - written_range.Begin = 0; - written_range.End = 0; - while (submitted_first_) { - auto next = submitted_first_->next; - submitted_first_->buffer->Unmap(0, &written_range); - submitted_first_->buffer->Release(); - delete submitted_first_; - submitted_first_ = next; - } - submitted_last_ = nullptr; - while (writable_first_) { - auto next = writable_first_->next; - writable_first_->buffer->Unmap(0, &written_range); - writable_first_->buffer->Release(); - delete writable_first_; - writable_first_ = next; - } - writable_last_ = nullptr; -} - -uint8_t* UploadBufferPool::Request(uint64_t submission_index, uint32_t size, - ID3D12Resource** buffer_out, - uint32_t* offset_out, - D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) { - assert_true(size <= page_size_); - if (size > page_size_) { - return nullptr; - } - assert_true(!current_page_used_ || - submission_index >= writable_first_->last_submission_index); - assert_true(!submitted_last_ || - submission_index >= submitted_last_->last_submission_index); - if (page_size_ - current_page_used_ < size || !writable_first_) { - // Start a new page if can't fit all the bytes or don't have an open page. - if (writable_first_) { - // Close the page that was current. - if (submitted_last_) { - submitted_last_->next = writable_first_; - } else { - submitted_first_ = writable_first_; - } - submitted_last_ = writable_first_; - writable_first_ = writable_first_->next; - submitted_last_->next = nullptr; - if (!writable_first_) { - writable_last_ = nullptr; - } - } - if (!writable_first_) { - // Create a new page if none available. - D3D12_RESOURCE_DESC new_buffer_desc; - util::FillBufferResourceDesc(new_buffer_desc, page_size_, - D3D12_RESOURCE_FLAG_NONE); - ID3D12Resource* new_buffer; - if (FAILED(provider_.GetDevice()->CreateCommittedResource( - &util::kHeapPropertiesUpload, - provider_.GetHeapFlagCreateNotZeroed(), &new_buffer_desc, - D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, - IID_PPV_ARGS(&new_buffer)))) { - XELOGE("Failed to create a D3D upload buffer with {} bytes", - page_size_); - return nullptr; - } - D3D12_RANGE read_range; - read_range.Begin = 0; - read_range.End = 0; - void* new_buffer_mapping; - if (FAILED(new_buffer->Map(0, &read_range, &new_buffer_mapping))) { - XELOGE("Failed to map a D3D upload buffer with {} bytes", page_size_); - new_buffer->Release(); - return nullptr; - } - writable_first_ = new Page; - writable_first_->buffer = new_buffer; - writable_first_->gpu_address = new_buffer->GetGPUVirtualAddress(); - writable_first_->mapping = new_buffer_mapping; - writable_first_->last_submission_index = submission_index; - writable_first_->next = nullptr; - writable_last_ = writable_first_; - } - current_page_used_ = 0; - } - writable_first_->last_submission_index = submission_index; - if (buffer_out) { - *buffer_out = writable_first_->buffer; - } - if (offset_out) { - *offset_out = current_page_used_; - } - if (gpu_address_out) { - *gpu_address_out = writable_first_->gpu_address + current_page_used_; - } - uint8_t* mapping = - reinterpret_cast(writable_first_->mapping) + current_page_used_; - current_page_used_ += size; - return mapping; -} - -uint8_t* UploadBufferPool::RequestPartial( - uint64_t submission_index, uint32_t size, ID3D12Resource** buffer_out, - uint32_t* offset_out, uint32_t* size_out, - D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) { - size = std::min(size, page_size_); - if (current_page_used_ < page_size_) { - size = std::min(size, page_size_ - current_page_used_); - } - uint8_t* mapping = - Request(submission_index, size, buffer_out, offset_out, gpu_address_out); - if (!mapping) { - return nullptr; - } - if (size_out) { - *size_out = size; - } - return mapping; -} - -DescriptorHeapPool::DescriptorHeapPool(ID3D12Device* device, - D3D12_DESCRIPTOR_HEAP_TYPE type, - uint32_t page_size) - : device_(device), type_(type), page_size_(page_size) {} - -DescriptorHeapPool::~DescriptorHeapPool() { ClearCache(); } - -void DescriptorHeapPool::Reclaim(uint64_t completed_submission_index) { - while (submitted_first_) { - if (submitted_first_->last_submission_index > completed_submission_index) { - break; - } - if (writable_last_) { - writable_last_->next = submitted_first_; - } else { - writable_first_ = submitted_first_; - } - writable_last_ = submitted_first_; - submitted_first_ = submitted_first_->next; - writable_last_->next = nullptr; - } - if (!submitted_first_) { - submitted_last_ = nullptr; - } -} - -void DescriptorHeapPool::ClearCache() { - // Not checking current_page_used_ != 0 because asking for 0 descriptors - // returns a valid heap also - but actually the new heap will be different now - // and the old one must be unbound since it doesn't exist anymore. - ++current_heap_index_; - current_page_used_ = 0; - while (submitted_first_) { - auto next = submitted_first_->next; - submitted_first_->heap->Release(); - delete submitted_first_; - submitted_first_ = next; - } - submitted_last_ = nullptr; - while (writable_first_) { - auto next = writable_first_->next; - writable_first_->heap->Release(); - delete writable_first_; - writable_first_ = next; - } - writable_last_ = nullptr; -} - -uint64_t DescriptorHeapPool::Request(uint64_t submission_index, - uint64_t previous_heap_index, - uint32_t count_for_partial_update, - uint32_t count_for_full_update, - uint32_t& index_out) { - assert_true(count_for_partial_update <= count_for_full_update); - assert_true(count_for_full_update <= page_size_); - if (count_for_partial_update > count_for_full_update || - count_for_full_update > page_size_) { - return kHeapIndexInvalid; - } - assert_true(!current_page_used_ || - submission_index >= writable_first_->last_submission_index); - assert_true(!submitted_last_ || - submission_index >= submitted_last_->last_submission_index); - // If the last full update happened on the current page, a partial update is - // possible. - uint32_t count = previous_heap_index == current_heap_index_ - ? count_for_partial_update - : count_for_full_update; - // Go to the next page if there's not enough free space on the current one, - // or because the previous page may be outdated. In this case, a full update - // is necessary. - if (page_size_ - current_page_used_ < count) { - // Close the page that was current. - if (submitted_last_) { - submitted_last_->next = writable_first_; - } else { - submitted_first_ = writable_first_; - } - submitted_last_ = writable_first_; - writable_first_ = writable_first_->next; - submitted_last_->next = nullptr; - if (!writable_first_) { - writable_last_ = nullptr; - } - ++current_heap_index_; - current_page_used_ = 0; - count = count_for_full_update; - } - // Create the page if needed (may be the first call for the page). - if (!writable_first_) { - D3D12_DESCRIPTOR_HEAP_DESC new_heap_desc; - new_heap_desc.Type = type_; - new_heap_desc.NumDescriptors = page_size_; - new_heap_desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; - new_heap_desc.NodeMask = 0; - ID3D12DescriptorHeap* new_heap; - if (FAILED(device_->CreateDescriptorHeap(&new_heap_desc, - IID_PPV_ARGS(&new_heap)))) { - XELOGE("Failed to create a heap for {} shader-visible descriptors", - page_size_); - return kHeapIndexInvalid; - } - writable_first_ = new Page; - writable_first_->heap = new_heap; - writable_first_->cpu_start = new_heap->GetCPUDescriptorHandleForHeapStart(); - writable_first_->gpu_start = new_heap->GetGPUDescriptorHandleForHeapStart(); - writable_first_->last_submission_index = submission_index; - writable_first_->next = nullptr; - writable_last_ = writable_first_; - } - writable_first_->last_submission_index = submission_index; - index_out = current_page_used_; - current_page_used_ += count; - return current_heap_index_; -} - -} // namespace d3d12 -} // namespace ui -} // namespace xe diff --git a/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.cso b/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.cso index 81917561c..5a4a854bf 100644 Binary files a/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.cso and b/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.cso differ diff --git a/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.h b/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.h index 03a09ce46..074264ab7 100644 --- a/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.h +++ b/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.h @@ -1,8 +1,8 @@ // generated from `xb buildhlsl` // source: immediate.vs.hlsl const uint8_t immediate_vs[] = { - 0x44, 0x58, 0x42, 0x43, 0x6A, 0xC1, 0xC3, 0xE8, 0xA7, 0x09, 0x30, 0xF3, - 0x48, 0x84, 0xA3, 0x3C, 0x7D, 0x90, 0xA1, 0x09, 0x01, 0x00, 0x00, 0x00, + 0x44, 0x58, 0x42, 0x43, 0xC3, 0x3E, 0x79, 0xCB, 0x09, 0x65, 0x04, 0xF0, + 0x71, 0x43, 0x47, 0x45, 0xAC, 0xE1, 0xA9, 0x03, 0x01, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, 0x54, 0x01, 0x00, 0x00, 0xC4, 0x01, 0x00, 0x00, 0x34, 0x02, 0x00, 0x00, 0x64, 0x03, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x18, 0x01, 0x00, 0x00, @@ -21,8 +21,8 @@ const uint8_t immediate_vs[] = { 0x08, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0xCC, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x78, 0x65, 0x5F, 0x76, - 0x69, 0x65, 0x77, 0x70, 0x6F, 0x72, 0x74, 0x5F, 0x69, 0x6E, 0x76, 0x5F, - 0x73, 0x69, 0x7A, 0x65, 0x00, 0x66, 0x6C, 0x6F, 0x61, 0x74, 0x32, 0x00, + 0x69, 0x65, 0x77, 0x70, 0x6F, 0x72, 0x74, 0x5F, 0x73, 0x69, 0x7A, 0x65, + 0x5F, 0x69, 0x6E, 0x76, 0x00, 0x66, 0x6C, 0x6F, 0x61, 0x74, 0x32, 0x00, 0x01, 0x00, 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xC5, 0x00, 0x00, 0x00, diff --git a/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.txt b/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.txt index fe5f8134f..fbda7ff07 100644 --- a/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.txt +++ b/src/xenia/ui/d3d12/shaders/dxbc/immediate_vs.txt @@ -7,7 +7,7 @@ // cbuffer $Globals // { // -// float2 xe_viewport_inv_size; // Offset: 0 Size: 8 +// float2 xe_viewport_size_inv; // Offset: 0 Size: 8 // // } // diff --git a/src/xenia/ui/d3d12/shaders/immediate.vs.hlsl b/src/xenia/ui/d3d12/shaders/immediate.vs.hlsl index 7c3e22530..a79da7b1c 100644 --- a/src/xenia/ui/d3d12/shaders/immediate.vs.hlsl +++ b/src/xenia/ui/d3d12/shaders/immediate.vs.hlsl @@ -1,4 +1,4 @@ -float2 xe_viewport_inv_size : register(b0); +float2 xe_viewport_size_inv : register(b0); struct XeVertexShaderInput { float2 position : POSITION; @@ -15,7 +15,7 @@ struct XeVertexShaderOutput { XeVertexShaderOutput main(XeVertexShaderInput input) { XeVertexShaderOutput output; output.position = float4( - input.position * xe_viewport_inv_size * float2(2.0, -2.0) + + input.position * xe_viewport_size_inv * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); output.texcoord = input.texcoord; output.color = input.color; diff --git a/src/xenia/ui/graphics_upload_buffer_pool.cc b/src/xenia/ui/graphics_upload_buffer_pool.cc new file mode 100644 index 000000000..2a780b0c9 --- /dev/null +++ b/src/xenia/ui/graphics_upload_buffer_pool.cc @@ -0,0 +1,150 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/ui/graphics_upload_buffer_pool.h" + +#include + +#include "xenia/base/assert.h" +#include "xenia/base/math.h" + +namespace xe { +namespace ui { + +GraphicsUploadBufferPool::~GraphicsUploadBufferPool() { ClearCache(); } + +void GraphicsUploadBufferPool::Reclaim(uint64_t completed_submission_index) { + while (submitted_first_) { + if (submitted_first_->last_submission_index_ > completed_submission_index) { + break; + } + if (writable_last_) { + writable_last_->next_ = submitted_first_; + } else { + writable_first_ = submitted_first_; + } + writable_last_ = submitted_first_; + submitted_first_ = submitted_first_->next_; + writable_last_->next_ = nullptr; + } + if (!submitted_first_) { + submitted_last_ = nullptr; + } +} + +void GraphicsUploadBufferPool::ClearCache() { + // Called from the destructor - must not call virtual functions here. + current_page_flushed_ = 0; + current_page_used_ = 0; + while (submitted_first_) { + Page* next_ = submitted_first_->next_; + delete submitted_first_; + submitted_first_ = next_; + } + submitted_last_ = nullptr; + while (writable_first_) { + Page* next_ = writable_first_->next_; + delete writable_first_; + writable_first_ = next_; + } + writable_last_ = nullptr; +} + +GraphicsUploadBufferPool::Page::~Page() {} + +void GraphicsUploadBufferPool::FlushWrites() { + if (current_page_flushed_ >= current_page_used_) { + return; + } + assert_not_null(writable_first_); + FlushPageWrites(writable_first_, current_page_flushed_, + current_page_used_ - current_page_flushed_); + current_page_flushed_ = current_page_used_; +} + +GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::Request( + uint64_t submission_index, size_t size, size_t alignment, + size_t& offset_out) { + assert_not_zero(alignment); + assert_true(xe::is_pow2(alignment)); + size = xe::align(size, alignment); + assert_true(size <= page_size_); + if (size > page_size_) { + return nullptr; + } + assert_true(!current_page_used_ || + submission_index >= writable_first_->last_submission_index_); + assert_true(!submitted_last_ || + submission_index >= submitted_last_->last_submission_index_); + size_t current_page_used_aligned = xe::align(current_page_used_, alignment); + if (current_page_used_aligned + size > page_size_ || !writable_first_) { + // Start a new page if can't fit all the bytes or don't have an open page. + if (writable_first_) { + // Close the page that was current. + FlushWrites(); + if (submitted_last_) { + submitted_last_->next_ = writable_first_; + } else { + submitted_first_ = writable_first_; + } + submitted_last_ = writable_first_; + writable_first_ = writable_first_->next_; + submitted_last_->next_ = nullptr; + if (!writable_first_) { + writable_last_ = nullptr; + } + } + if (!writable_first_) { + // Create a new page if none available. + writable_first_ = CreatePageImplementation(); + if (!writable_first_) { + // Failed to create. + return nullptr; + } + writable_first_->last_submission_index_ = submission_index; + writable_first_->next_ = nullptr; + writable_last_ = writable_first_; + // After CreatePageImplementation (more specifically, the first successful + // call), page_size_ may grow - but this doesn't matter here. + } + current_page_used_ = 0; + current_page_used_aligned = 0; + current_page_flushed_ = 0; + } + writable_first_->last_submission_index_ = submission_index; + offset_out = current_page_used_aligned; + current_page_used_ = current_page_used_aligned + size; + return writable_first_; +} + +GraphicsUploadBufferPool::Page* GraphicsUploadBufferPool::RequestPartial( + uint64_t submission_index, size_t size, size_t alignment, + size_t& offset_out, size_t& size_out) { + assert_not_zero(alignment); + assert_true(xe::is_pow2(alignment)); + size = xe::align(size, alignment); + size = std::min(size, page_size_); + size_t current_page_used_aligned = xe::align(current_page_used_, alignment); + if (current_page_used_aligned + alignment <= page_size_) { + size = std::min( + size, (page_size_ - current_page_used_aligned) & ~(alignment - 1)); + } + Page* page = Request(submission_index, size, alignment, offset_out); + if (!page) { + return nullptr; + } + size_out = size; + return page; +} + +void GraphicsUploadBufferPool::FlushPageWrites(Page* page, size_t offset, + size_t size) {} + +} // namespace ui +} // namespace xe diff --git a/src/xenia/ui/graphics_upload_buffer_pool.h b/src/xenia/ui/graphics_upload_buffer_pool.h new file mode 100644 index 000000000..7f5a714d3 --- /dev/null +++ b/src/xenia/ui/graphics_upload_buffer_pool.h @@ -0,0 +1,80 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2020 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_UI_GRAPHICS_UPLOAD_BUFFER_POOL_H_ +#define XENIA_UI_GRAPHICS_UPLOAD_BUFFER_POOL_H_ + +#include +#include + +namespace xe { +namespace ui { + +// Submission index is the fence value or a value derived from it (if reclaiming +// less often than once per fence value, for instance). + +class GraphicsUploadBufferPool { + public: + // Taken from the Direct3D 12 MiniEngine sample (LinearAllocator + // kCpuAllocatorPageSize). Large enough for most cases. + static constexpr size_t kDefaultPageSize = 2 * 1024 * 1024; + + virtual ~GraphicsUploadBufferPool(); + + void Reclaim(uint64_t completed_submission_index); + void ClearCache(); + + // Should be called before submitting anything using this pool, unless the + // implementation doesn't require explicit flushing. + void FlushWrites(); + + protected: + // Extended by the implementation. + struct Page { + virtual ~Page(); + uint64_t last_submission_index_; + Page* next_; + }; + + GraphicsUploadBufferPool(size_t page_size) : page_size_(page_size) {} + + // Request to write data in a single piece, creating a new page if the current + // one doesn't have enough free space. + Page* Request(uint64_t submission_index, size_t size, size_t alignment, + size_t& offset_out); + // Request to write data in multiple parts, filling the buffer entirely. + Page* RequestPartial(uint64_t submission_index, size_t size, size_t alignment, + size_t& offset_out, size_t& size_out); + + virtual Page* CreatePageImplementation() = 0; + + virtual void FlushPageWrites(Page* page, size_t offset, size_t size); + + // May be increased by the implementation on creation or on first allocation + // to avoid wasting space if the real allocation turns out to be bigger than + // the specified page size. + size_t page_size_; + + // A list of buffers with free space, with the first buffer being the one + // currently being filled. + Page* writable_first_ = nullptr; + Page* writable_last_ = nullptr; + // A list of full buffers that can be reclaimed when the GPU doesn't use them + // anymore. + Page* submitted_first_ = nullptr; + Page* submitted_last_ = nullptr; + + size_t current_page_used_ = 0; + size_t current_page_flushed_ = 0; +}; + +} // namespace ui +} // namespace xe + +#endif // XENIA_UI_GRAPHICS_UPLOAD_BUFFER_POOL_H_