From d1f185c744eed182833da7b87c308c7cf2d47400 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 15 Sep 2018 14:40:00 +0300 Subject: [PATCH] [D3D12] Triangle fan conversion (untested, only indexed and without cache) --- .../gpu/d3d12/d3d12_command_processor.cc | 56 ++++- src/xenia/gpu/d3d12/d3d12_command_processor.h | 5 +- src/xenia/gpu/d3d12/primitive_converter.cc | 199 ++++++++++++++++++ src/xenia/gpu/d3d12/primitive_converter.h | 108 ++++++++++ 4 files changed, 356 insertions(+), 12 deletions(-) create mode 100644 src/xenia/gpu/d3d12/primitive_converter.cc create mode 100644 src/xenia/gpu/d3d12/primitive_converter.h diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 20d478d0e..3a0dccfdc 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -623,6 +623,10 @@ bool D3D12CommandProcessor::SetupContext() { return false; } + primitive_converter_ = std::make_unique( + context, register_file_, memory_, shared_memory_.get()); + primitive_converter_->Initialize(); + D3D12_HEAP_PROPERTIES swap_texture_heap_properties = {}; swap_texture_heap_properties.Type = D3D12_HEAP_TYPE_DEFAULT; D3D12_RESOURCE_DESC swap_texture_desc; @@ -719,6 +723,8 @@ void D3D12CommandProcessor::ShutdownContext() { view_heap_pool_.reset(); constant_buffer_pool_.reset(); + primitive_converter_.reset(); + render_target_cache_.reset(); texture_cache_.reset(); @@ -826,6 +832,8 @@ void D3D12CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, view_heap_pool_->ClearCache(); constant_buffer_pool_->ClearCache(); + primitive_converter_->ClearCache(); + render_target_cache_->ClearCache(); texture_cache_->ClearCache(); @@ -945,8 +953,10 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, render_target_cache_->GetCurrentPipelineRenderTargets(); // Set the primitive topology. + PrimitiveType primitive_type_converted = + PrimitiveConverter::GetReplacementPrimitiveType(primitive_type); D3D_PRIMITIVE_TOPOLOGY primitive_topology; - switch (primitive_type) { + switch (primitive_type_converted) { case PrimitiveType::kPointList: primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; break; @@ -978,7 +988,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, ID3D12PipelineState* pipeline; ID3D12RootSignature* root_signature; auto pipeline_status = pipeline_cache_->ConfigurePipeline( - vertex_shader, pixel_shader, primitive_type, + vertex_shader, pixel_shader, primitive_type_converted, indexed ? index_buffer_info->format : IndexFormat::kInt16, pipeline_render_targets, &pipeline, &root_signature); if (pipeline_status == PipelineCache::UpdateStatus::kError) { @@ -1031,27 +1041,47 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, regs[vfetch_constant_index + 1].u32 & 0x3FFFFFC); vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63); } + if (indexed) { - uint32_t index_base = index_buffer_info->guest_base & 0x1FFFFFFF; uint32_t index_size = index_buffer_info->format == IndexFormat::kInt32 ? sizeof(uint32_t) : sizeof(uint16_t); - index_base &= ~(index_size - 1); - uint32_t index_buffer_size = index_buffer_info->count * index_size; - shared_memory_->RequestRange(index_base, index_buffer_size); - - shared_memory_->UseForReading(); + assert_false(index_buffer_info->guest_base & (index_size - 1)); + uint32_t index_base = index_buffer_info->guest_base & ~(index_size - 1); D3D12_INDEX_BUFFER_VIEW index_buffer_view; - index_buffer_view.BufferLocation = - shared_memory_->GetGPUAddress() + index_base; - index_buffer_view.SizeInBytes = index_buffer_size; index_buffer_view.Format = index_buffer_info->format == IndexFormat::kInt32 ? DXGI_FORMAT_R32_UINT : DXGI_FORMAT_R16_UINT; + uint32_t converted_index_count; + PrimitiveConverter::ConversionResult conversion_result = + primitive_converter_->ConvertPrimitives( + primitive_type, index_buffer_info->guest_base, + index_buffer_info->count, index_buffer_info->format, + index_buffer_info->endianness, index_buffer_view.BufferLocation, + converted_index_count); + if (conversion_result == PrimitiveConverter::ConversionResult::kFailed) { + return false; + } + if (conversion_result == + PrimitiveConverter::ConversionResult::kPrimitiveEmpty) { + return true; + } + if (conversion_result == PrimitiveConverter::ConversionResult::kConverted) { + index_buffer_view.SizeInBytes = converted_index_count * index_size; + } else { + uint32_t index_buffer_size = index_buffer_info->count * index_size; + shared_memory_->RequestRange(index_base, index_buffer_size); + index_buffer_view.BufferLocation = + shared_memory_->GetGPUAddress() + index_base; + index_buffer_view.SizeInBytes = index_buffer_size; + } + shared_memory_->UseForReading(); command_list->IASetIndexBuffer(&index_buffer_view); SubmitBarriers(); command_list->DrawIndexedInstanced(index_count, 1, 0, 0, 0); } else { + // TODO(Triang3l): Get a static index buffer for unsupported primitive types + // from the primitive converter. shared_memory_->UseForReading(); SubmitBarriers(); command_list->DrawInstanced(index_count, 1, 0, 0); @@ -1134,6 +1164,8 @@ bool D3D12CommandProcessor::BeginFrame() { render_target_cache_->BeginFrame(); + primitive_converter_->BeginFrame(GetCurrentCommandList()); + return true; } @@ -1144,6 +1176,8 @@ bool D3D12CommandProcessor::EndFrame() { assert_false(scratch_buffer_used_); + primitive_converter_->EndFrame(); + render_target_cache_->EndFrame(); shared_memory_->EndFrame(); diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index fd1ca9557..9620c6eab 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -17,6 +17,7 @@ #include "xenia/gpu/command_processor.h" #include "xenia/gpu/d3d12/d3d12_graphics_system.h" #include "xenia/gpu/d3d12/pipeline_cache.h" +#include "xenia/gpu/d3d12/primitive_converter.h" #include "xenia/gpu/d3d12/render_target_cache.h" #include "xenia/gpu/d3d12/shared_memory.h" #include "xenia/gpu/d3d12/texture_cache.h" @@ -197,7 +198,7 @@ class D3D12CommandProcessor : public CommandProcessor { bool cache_clear_requested_ = false; std::unique_ptr - command_lists_[ui::d3d12::D3D12Context::kQueuedFrames] = {}; + command_lists_[ui::d3d12::D3D12Context::kQueuedFrames] = {}; std::unique_ptr shared_memory_ = nullptr; @@ -210,6 +211,8 @@ class D3D12CommandProcessor : public CommandProcessor { std::unique_ptr render_target_cache_ = nullptr; + std::unique_ptr primitive_converter_ = nullptr; + std::unique_ptr constant_buffer_pool_ = nullptr; std::unique_ptr view_heap_pool_ = nullptr; std::unique_ptr sampler_heap_pool_ = nullptr; diff --git a/src/xenia/gpu/d3d12/primitive_converter.cc b/src/xenia/gpu/d3d12/primitive_converter.cc new file mode 100644 index 000000000..8375e124f --- /dev/null +++ b/src/xenia/gpu/d3d12/primitive_converter.cc @@ -0,0 +1,199 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2018 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/d3d12/primitive_converter.h" + +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/platform.h" + +namespace xe { +namespace gpu { +namespace d3d12 { + +PrimitiveConverter::PrimitiveConverter(ui::d3d12::D3D12Context* context, + RegisterFile* register_file, + Memory* memory, + SharedMemory* shared_memory) + : context_(context), + register_file_(register_file), + memory_(memory), + shared_memory_(shared_memory) {} + +PrimitiveConverter::~PrimitiveConverter() { Shutdown(); } + +void PrimitiveConverter::Initialize() { + // There can be at most 65535 indices in a Xenos draw call, but they can be up + // to 4 bytes large, and conversion can add more indices (almost triple the + // count for triangle strips, for instance). + buffer_pool_ = + std::make_unique(context_, 4 * 1024 * 1024); +} + +void PrimitiveConverter::Shutdown() { buffer_pool_.reset(); } + +void PrimitiveConverter::ClearCache() { buffer_pool_->ClearCache(); } + +void PrimitiveConverter::BeginFrame(ID3D12GraphicsCommandList* command_list) { + buffer_pool_->BeginFrame(); + // TODO(Triang3l): Create the static index buffer for unindexed triangle fans. +} + +void PrimitiveConverter::EndFrame() { buffer_pool_->EndFrame(); } + +PrimitiveType PrimitiveConverter::GetReplacementPrimitiveType( + PrimitiveType type) { + if (type == PrimitiveType::kTriangleFan) { + return PrimitiveType::kTriangleList; + } + return type; +} + +PrimitiveConverter::ConversionResult PrimitiveConverter::ConvertPrimitives( + PrimitiveType source_type, uint32_t address, uint32_t index_count, + IndexFormat index_format, Endian index_endianness, + D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out, uint32_t& index_count_out) { + auto& regs = *register_file_; + bool reset = (regs[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21)) != 0; + // Swap the reset index because we will be comparing unswapped values to it. + uint32_t reset_index = xenos::GpuSwap( + regs[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32, index_endianness); + // If the specified reset index is the same as the one used by Direct3D 12 + // (0xFFFF or 0xFFFFFFFF - in the pipeline cache, we use the former for + // 16-bit and the latter for 32-bit indices), we can use the buffer directly. + uint32_t reset_index_host = + index_format == IndexFormat::kInt32 ? 0xFFFFFFFFu : 0xFFFFu; + + // Check if need to convert at all. + if (source_type != PrimitiveType::kTriangleFan) { + if (!reset || reset_index == reset_index_host) { + return ConversionResult::kConversionNotNeeded; + } + if (source_type != PrimitiveType::kTriangleStrip || + source_type != PrimitiveType::kLineStrip) { + return ConversionResult::kConversionNotNeeded; + } + // TODO(Triang3l): Write conversion for triangle and line strip reset index + // and for indexed line loops. + return ConversionResult::kConversionNotNeeded; + } + + // Exit early for clearly empty draws, without even reading the memory. + if (source_type == PrimitiveType::kTriangleFan || + source_type == PrimitiveType::kTriangleStrip) { + if (index_count < 3) { + return ConversionResult::kPrimitiveEmpty; + } + } else if (source_type == PrimitiveType::kLineStrip || + source_type == PrimitiveType::kLineLoop) { + if (index_count < 2) { + return ConversionResult::kPrimitiveEmpty; + } + } + + // TODO(Triang3l): Find the converted data in the cache. + + // Calculate the index count, and also check if there's nothing to convert in + // the buffer (for instance, if not using primitive reset). + uint32_t converted_index_count = 0; + bool conversion_needed = false; + bool simd = false; + if (source_type == PrimitiveType::kTriangleFan) { + // Triangle fans are not supported by Direct3D 12 at all. + conversion_needed = true; + if (reset) { + // TODO(Triang3l): Triangle fans with primitive reset. + return ConversionResult::kFailed; + } else { + converted_index_count = 3 * (index_count - 2); + } + } + + union { + void* source; + uint16_t* source_16; + uint32_t* source_32; + }; + source = memory_->TranslatePhysical(address); + union { + void* target; + uint16_t* target_16; + uint32_t* target_32; + }; + D3D12_GPU_VIRTUAL_ADDRESS gpu_address; + target = AllocateIndices(index_format, index_count, simd ? address & 15 : 0, + gpu_address); + if (target == nullptr) { + return ConversionResult::kFailed; + } + + if (source_type == PrimitiveType::kTriangleFan) { + // https://docs.microsoft.com/en-us/windows/desktop/direct3d9/triangle-fans + // Ordered as (v1, v2, v0), (v2, v3, v0). + if (reset) { + // TODO(Triang3l): Triangle fans with primitive restart. + return ConversionResult::kFailed; + } else { + if (index_format == IndexFormat::kInt32) { + for (uint32_t i = 2; i < index_count; ++i) { + *(target_32++) = source_32[i]; + *(target_32++) = source_32[i - 1]; + *(target_32++) = source_32[0]; + } + } else { + for (uint32_t i = 2; i < index_count; ++i) { + *(target_16++) = source_16[i]; + *(target_16++) = source_16[i - 1]; + *(target_16++) = source_16[0]; + } + } + } + } + + // TODO(Triang3l): Replace primitive reset index in triangle and line strips. + // TODO(Triang3l): Line loops. + + gpu_address_out = gpu_address; + index_count_out = converted_index_count; + return ConversionResult::kConverted; +} + +void* PrimitiveConverter::AllocateIndices( + IndexFormat format, uint32_t count, uint32_t simd_offset, + D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out) { + if (count == 0) { + return nullptr; + } + uint32_t size = count * (format == IndexFormat::kInt32 ? sizeof(uint32_t) + : sizeof(uint16_t)); + // 16-align all index data because SIMD is used to replace the reset index + // (without that, 4-alignment would be required anyway to mix 16-bit and + // 32-bit indices in one buffer page). + size = xe::align(size, uint32_t(16)); + // Add some space to align SIMD register components the same way in the source + // and the buffer. + simd_offset &= 15; + if (simd_offset != 0) { + size += 16; + } + D3D12_GPU_VIRTUAL_ADDRESS gpu_address; + uint8_t* mapping = + buffer_pool_->RequestFull(size, nullptr, nullptr, &gpu_address); + if (mapping == nullptr) { + XELOGE("Failed to allocate space for %u converted %u-bit vertex indices", + count, format == IndexFormat::kInt32 ? 32 : 16); + return nullptr; + } + gpu_address_out = gpu_address + simd_offset; + return mapping + simd_offset; +} + +} // namespace d3d12 +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/d3d12/primitive_converter.h b/src/xenia/gpu/d3d12/primitive_converter.h new file mode 100644 index 000000000..ed1969e50 --- /dev/null +++ b/src/xenia/gpu/d3d12/primitive_converter.h @@ -0,0 +1,108 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2018 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_D3D12_PRIMITIVE_CONVERTER_H_ +#define XENIA_GPU_D3D12_PRIMITIVE_CONVERTER_H_ + +#include +#include +#include + +#include "xenia/gpu/d3d12/shared_memory.h" +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/xenos.h" +#include "xenia/memory.h" +#include "xenia/ui/d3d12/d3d12_context.h" + +namespace xe { +namespace gpu { +namespace d3d12 { + +// Index buffer cache for primitive types not natively supported by Direct3D 12: +// - Triangle and line strips with non-0xFFFF/0xFFFFFFFF reset index. +// - Triangle fans. +// - Line loops (only indexed ones - non-indexed are better handled in vertex +// shaders, otherwise a whole index buffer would have to be created for every +// vertex count value). +class PrimitiveConverter { + public: + PrimitiveConverter(ui::d3d12::D3D12Context* context, + RegisterFile* register_file, Memory* memory, + SharedMemory* shared_memory); + ~PrimitiveConverter(); + + void Initialize(); + void Shutdown(); + void ClearCache(); + + void BeginFrame(ID3D12GraphicsCommandList* command_list); + void EndFrame(); + + // Returns the primitive type that the original type will be converted to. + static PrimitiveType GetReplacementPrimitiveType(PrimitiveType type); + + enum class ConversionResult { + // Converted to a transient buffer. + kConverted, + // Conversion not required - use the index buffer in shared memory. + kConversionNotNeeded, + // No errors, but nothing to render. + kPrimitiveEmpty, + // Total failure of the draw call. + kFailed + }; + + // Converts an index buffer to the primitive type returned by + // GetReplacementPrimitiveType. If conversion has been performed, the returned + // buffer will be in the GENERIC_READ state (it's in an upload heap). Only + // writing to the outputs if returning kConverted. The restart index will be + // handled internally from the register values. + ConversionResult ConvertPrimitives(PrimitiveType source_type, + uint32_t address, uint32_t index_count, + IndexFormat index_format, + Endian index_endianness, + D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out, + uint32_t& index_count_out); + + // TODO(Triang3l): A function that returns a static index buffer for + // non-indexed drawing of unsupported primitives + + private: + // simd_offset is source address & 15 - if SIMD is used, the source and the + // target must have the same alignment within one register. 0 is optimal when + // not using SIMD. + void* AllocateIndices(IndexFormat format, uint32_t count, + uint32_t simd_offset, + D3D12_GPU_VIRTUAL_ADDRESS& gpu_address_out); + + ui::d3d12::D3D12Context* context_; + RegisterFile* register_file_; + Memory* memory_; + SharedMemory* shared_memory_; + + std::unique_ptr buffer_pool_ = nullptr; + + struct ConvertedIndices { + D3D12_GPU_VIRTUAL_ADDRESS gpu_address; + PrimitiveType primitive_type; + uint32_t index_count; + IndexFormat index_format; + // Index pre-swapped - in guest storage endian. + uint32_t reset_index; + bool reset; + }; + // Cache for a single frame. + std::unordered_multimap converted_indices_; +}; + +} // namespace d3d12 +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_D3D12_PRIMITIVE_CONVERTER_H_