From 90b772a330377d9aabaf8d96686152d20605630f Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 24 Feb 2020 23:27:25 +0300 Subject: [PATCH] [GPU] Set VGT_DRAW_INITIATOR and use major mode from it --- src/xenia/gpu/command_processor.cc | 99 +++++++++++-------- src/xenia/gpu/command_processor.h | 3 +- .../gpu/d3d12/d3d12_command_processor.cc | 6 +- src/xenia/gpu/d3d12/d3d12_command_processor.h | 3 +- src/xenia/gpu/null/null_command_processor.cc | 3 +- src/xenia/gpu/null/null_command_processor.h | 3 +- src/xenia/gpu/register_table.inc | 3 + src/xenia/gpu/registers.h | 16 +++ src/xenia/gpu/vk/vulkan_command_processor.cc | 3 +- src/xenia/gpu/vk/vulkan_command_processor.h | 3 +- .../gpu/vulkan/vulkan_command_processor.cc | 3 +- .../gpu/vulkan/vulkan_command_processor.h | 3 +- src/xenia/gpu/xenos.h | 30 ++++-- 13 files changed, 118 insertions(+), 60 deletions(-) diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc index a66057e5b..8cdb7f331 100644 --- a/src/xenia/gpu/command_processor.cc +++ b/src/xenia/gpu/command_processor.cc @@ -1153,44 +1153,51 @@ bool CommandProcessor::ExecutePacketType3_DRAW_INDX(RingBuffer* reader, // ID = dword0 & 0x3F; // use = dword0 & 0x40; uint32_t dword0 = reader->ReadAndSwap(); // viz query info - uint32_t dword1 = reader->ReadAndSwap(); - uint32_t index_count = dword1 >> 16; - auto prim_type = static_cast(dword1 & 0x3F); + reg::VGT_DRAW_INITIATOR vgt_draw_initiator; + vgt_draw_initiator.value = reader->ReadAndSwap(); + WriteRegister(XE_GPU_REG_VGT_DRAW_INITIATOR, vgt_draw_initiator.value); + bool is_indexed = false; IndexBufferInfo index_buffer_info; - uint32_t src_sel = (dword1 >> 6) & 0x3; - if (src_sel == 0x0) { - // DI_SRC_SEL_DMA - // Indexed draw. - is_indexed = true; - index_buffer_info.guest_base = reader->ReadAndSwap(); - uint32_t index_size = reader->ReadAndSwap(); - index_buffer_info.endianness = static_cast(index_size >> 30); - index_size &= 0x00FFFFFF; - bool index_32bit = (dword1 >> 11) & 0x1; - index_buffer_info.format = - index_32bit ? IndexFormat::kInt32 : IndexFormat::kInt16; - index_size *= index_32bit ? 4 : 2; - index_buffer_info.length = index_size; - index_buffer_info.count = index_count; - } else if (src_sel == 0x1) { - // DI_SRC_SEL_IMMEDIATE - assert_always(); - } else if (src_sel == 0x2) { - // DI_SRC_SEL_AUTO_INDEX - // Auto draw. - index_buffer_info.guest_base = 0; - index_buffer_info.length = 0; - } else { - // Invalid source select. - assert_always(); + switch (vgt_draw_initiator.source_select) { + case xenos::SourceSelect::kDMA: { + // Indexed draw. + is_indexed = true; + index_buffer_info.guest_base = reader->ReadAndSwap(); + uint32_t index_size = reader->ReadAndSwap(); + index_buffer_info.endianness = static_cast(index_size >> 30); + index_size &= 0x00FFFFFF; + index_buffer_info.format = vgt_draw_initiator.index_size; + index_size *= + (vgt_draw_initiator.index_size == IndexFormat::kInt32) ? 4 : 2; + index_buffer_info.length = index_size; + index_buffer_info.count = vgt_draw_initiator.num_indices; + } break; + case xenos::SourceSelect::kImmediate: { + // TODO(Triang3l): VGT_IMMED_DATA. + assert_always(); + } break; + case xenos::SourceSelect::kAutoIndex: { + // Auto draw. + index_buffer_info.guest_base = 0; + index_buffer_info.length = 0; + } break; + default: { + // Invalid source select. + assert_always(); + } break; } - bool success = IssueDraw(prim_type, index_count, - is_indexed ? &index_buffer_info : nullptr); + bool success = + IssueDraw(vgt_draw_initiator.prim_type, vgt_draw_initiator.num_indices, + is_indexed ? &index_buffer_info : nullptr, + xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode, + vgt_draw_initiator.prim_type)); if (!success) { - XELOGE("PM4_DRAW_INDX(%d, %d, %d): Failed in backend", index_count, - prim_type, src_sel); + XELOGE("PM4_DRAW_INDX(%d, %d, %d): Failed in backend", + vgt_draw_initiator.num_indices, + uint32_t(vgt_draw_initiator.prim_type), + uint32_t(vgt_draw_initiator.source_select)); } return true; @@ -1200,21 +1207,27 @@ bool CommandProcessor::ExecutePacketType3_DRAW_INDX_2(RingBuffer* reader, uint32_t packet, uint32_t count) { // draw using supplied indices in packet - uint32_t dword0 = reader->ReadAndSwap(); - uint32_t index_count = dword0 >> 16; - auto prim_type = static_cast(dword0 & 0x3F); - uint32_t src_sel = (dword0 >> 6) & 0x3; - assert_true(src_sel == 0x2); // 'SrcSel=AutoIndex' + reg::VGT_DRAW_INITIATOR vgt_draw_initiator; + vgt_draw_initiator.value = reader->ReadAndSwap(); + WriteRegister(XE_GPU_REG_VGT_DRAW_INITIATOR, vgt_draw_initiator.value); + assert_true(vgt_draw_initiator.source_select == + xenos::SourceSelect::kAutoIndex); // Index buffer unused as automatic. - // bool index_32bit = (dword0 >> 11) & 0x1; - // uint32_t indices_size = index_count * (index_32bit ? 4 : 2); + // uint32_t indices_size = + // vgt_draw_initiator.num_indices * + // (vgt_draw_initiator.index_size == IndexFormat::kInt32 ? 4 : 2); // uint32_t index_ptr = reader->ptr(); + // TODO(Triang3l): VGT_IMMED_DATA. reader->AdvanceRead((count - 1) * sizeof(uint32_t)); - bool success = IssueDraw(prim_type, index_count, nullptr); + bool success = IssueDraw( + vgt_draw_initiator.prim_type, vgt_draw_initiator.num_indices, nullptr, + xenos::IsMajorModeExplicit(vgt_draw_initiator.major_mode, + vgt_draw_initiator.prim_type)); if (!success) { - XELOGE("PM4_DRAW_INDX_IMM(%d, %d): Failed in backend", index_count, - prim_type); + XELOGE("PM4_DRAW_INDX_IMM(%d, %d): Failed in backend", + vgt_draw_initiator.num_indices, + uint32_t(vgt_draw_initiator.prim_type)); } return true; diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h index e613d5a03..53120ae4b 100644 --- a/src/xenia/gpu/command_processor.h +++ b/src/xenia/gpu/command_processor.h @@ -239,7 +239,8 @@ class CommandProcessor { uint32_t dword_count) = 0; virtual bool IssueDraw(PrimitiveType prim_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) = 0; + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) = 0; virtual bool IssueCopy() = 0; virtual void InitializeTrace() = 0; diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 41af5d0e6..705480281 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1245,7 +1245,8 @@ Shader* D3D12CommandProcessor::LoadShader(ShaderType shader_type, bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) { + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) { auto device = GetD3D12Context()->GetD3D12Provider()->GetDevice(); auto& regs = *register_file_; @@ -1272,8 +1273,7 @@ bool D3D12CommandProcessor::IssueDraw(PrimitiveType primitive_type, // Check if using tessellation to get the correct primitive type. bool tessellated; - if (uint32_t(primitive_type) >= - uint32_t(PrimitiveType::kExplicitMajorModeForceStart)) { + if (major_mode_explicit) { tessellated = regs.Get().path_select == xenos::VGTOutputPath::kTessellationEnable; } else { diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.h b/src/xenia/gpu/d3d12/d3d12_command_processor.h index 3c42e67bb..08fcf7510 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.h +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.h @@ -171,7 +171,8 @@ class D3D12CommandProcessor : public CommandProcessor { uint32_t dword_count) override; bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) override; + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) override; bool IssueCopy() override; void InitializeTrace() override; diff --git a/src/xenia/gpu/null/null_command_processor.cc b/src/xenia/gpu/null/null_command_processor.cc index ba5500acd..b19e7fcb0 100644 --- a/src/xenia/gpu/null/null_command_processor.cc +++ b/src/xenia/gpu/null/null_command_processor.cc @@ -44,7 +44,8 @@ Shader* NullCommandProcessor::LoadShader(ShaderType shader_type, bool NullCommandProcessor::IssueDraw(PrimitiveType prim_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) { + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) { return true; } diff --git a/src/xenia/gpu/null/null_command_processor.h b/src/xenia/gpu/null/null_command_processor.h index 916668269..f33c1b126 100644 --- a/src/xenia/gpu/null/null_command_processor.h +++ b/src/xenia/gpu/null/null_command_processor.h @@ -41,7 +41,8 @@ class NullCommandProcessor : public CommandProcessor { uint32_t dword_count) override; bool IssueDraw(PrimitiveType prim_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) override; + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) override; bool IssueCopy() override; void InitializeTrace() override; diff --git a/src/xenia/gpu/register_table.inc b/src/xenia/gpu/register_table.inc index c305b5a03..8ac7ec5d2 100644 --- a/src/xenia/gpu/register_table.inc +++ b/src/xenia/gpu/register_table.inc @@ -131,7 +131,10 @@ XE_GPU_REGISTER(0x2182, kDword, SQ_INTERPOLATOR_CNTL) XE_GPU_REGISTER(0x2183, kDword, SQ_WRAPPING_0) XE_GPU_REGISTER(0x2184, kDword, SQ_WRAPPING_1) +// These three registers are set by the command processor. XE_GPU_REGISTER(0x21F9, kDword, VGT_EVENT_INITIATOR) +XE_GPU_REGISTER(0x21FC, kDword, VGT_DRAW_INITIATOR) +XE_GPU_REGISTER(0x21FD, kDword, VGT_IMMED_DATA) XE_GPU_REGISTER(0x2200, kDword, RB_DEPTHCONTROL) XE_GPU_REGISTER(0x2201, kDword, RB_BLENDCONTROL0) diff --git a/src/xenia/gpu/registers.h b/src/xenia/gpu/registers.h index 5b6fdc54b..af5ee74d9 100644 --- a/src/xenia/gpu/registers.h +++ b/src/xenia/gpu/registers.h @@ -145,6 +145,22 @@ union SQ_CONTEXT_MISC { *******************************************************************************/ +union VGT_DRAW_INITIATOR { + // Different than on A2xx and R6xx/R7xx. + struct { + PrimitiveType prim_type : 6; // +0 + xenos::SourceSelect source_select : 2; // +6 + xenos::MajorMode major_mode : 2; // +8 + uint32_t : 1; // +10 + IndexFormat index_size : 1; // +11 + uint32_t not_eop : 1; // +12 + uint32_t : 3; // +13 + uint32_t num_indices : 16; // +16 + }; + uint32_t value; + static constexpr Register register_index = XE_GPU_REG_VGT_DRAW_INITIATOR; +}; + union VGT_OUTPUT_PATH_CNTL { struct { xenos::VGTOutputPath path_select : 2; // +0 diff --git a/src/xenia/gpu/vk/vulkan_command_processor.cc b/src/xenia/gpu/vk/vulkan_command_processor.cc index 62bdb5677..d7c46090f 100644 --- a/src/xenia/gpu/vk/vulkan_command_processor.cc +++ b/src/xenia/gpu/vk/vulkan_command_processor.cc @@ -40,7 +40,8 @@ Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type, bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) { + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) { return true; } diff --git a/src/xenia/gpu/vk/vulkan_command_processor.h b/src/xenia/gpu/vk/vulkan_command_processor.h index 8157c3590..43e621814 100644 --- a/src/xenia/gpu/vk/vulkan_command_processor.h +++ b/src/xenia/gpu/vk/vulkan_command_processor.h @@ -40,7 +40,8 @@ class VulkanCommandProcessor : public CommandProcessor { uint32_t dword_count) override; bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) override; + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) override; bool IssueCopy() override; void InitializeTrace() override; diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 46244fbb4..43f734a62 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -598,7 +598,8 @@ Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type, bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) { + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) { auto& regs = *register_file_; #if FINE_GRAINED_DRAW_SCOPES diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.h b/src/xenia/gpu/vulkan/vulkan_command_processor.h index 94f7ae401..c4959fd0a 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.h +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.h @@ -81,7 +81,8 @@ class VulkanCommandProcessor : public CommandProcessor { uint32_t dword_count) override; bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count, - IndexBufferInfo* index_buffer_info) override; + IndexBufferInfo* index_buffer_info, + bool major_mode_explicit) override; bool PopulateConstants(VkCommandBuffer command_buffer, VulkanShader* vertex_shader, VulkanShader* pixel_shader); diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 0cda866ba..aa8174ecc 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -37,12 +37,11 @@ enum class PrimitiveType : uint32_t { kQuadStrip = 0x0E, kPolygon = 0x0F, - // Starting with this primitive mode, registers like VGT_OUTPUT_PATH_CNTL have - // effect (deduced from R6xx/R7xx registers, and Halo 3 also doesn't reset - // VGT_OUTPUT_PATH_CNTL after the first draw with tessellation). - // TODO(Triang3l): Find out if VGT_DRAW_INITIATOR (0x21FC on Adreno 2xx, but - // not seen being used in games) specifies the major mode (or if it's set - // somewhere else). + // Starting with this primitive type, explicit major mode is assumed (in the + // R6xx/R7xx registers, k2DCopyRectListV0 is 22, and implicit major mode is + // only used for primitive types 0 through 21) - and tessellation patches use + // the range that starts from k2DCopyRectListV0. + // TODO(Triang3l): Verify if this is also true for the Xenos. kExplicitMajorModeForceStart = 0x10, k2DCopyRectListV0 = 0x10, @@ -460,6 +459,25 @@ typedef enum { XE_GPU_INVALIDATE_MASK_ALL = 0x7FFF, } XE_GPU_INVALIDATE_MASK; +// VGT_DRAW_INITIATOR::DI_SRC_SEL_* +enum class SourceSelect : uint32_t { + kDMA, + kImmediate, + kAutoIndex, +}; + +// VGT_DRAW_INITIATOR::DI_MAJOR_MODE_* +enum class MajorMode : uint32_t { + kImplicit, + kExplicit, +}; + +inline bool IsMajorModeExplicit(MajorMode major_mode, + PrimitiveType primitive_type) { + return major_mode != MajorMode::kImplicit || + primitive_type >= PrimitiveType::kExplicitMajorModeForceStart; +} + // instr_arbitrary_filter_t enum class ArbitraryFilter : uint32_t { k2x4Sym = 0,