[Vulkan] 32-bit index fetch without fullDrawIndexUint32

This commit is contained in:
Triang3l 2022-07-25 16:53:12 +03:00
parent 37579d3bf0
commit 77e85ecaa4
6 changed files with 170 additions and 22 deletions

View File

@ -691,6 +691,7 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
// Writing to the trace irrespective of the cache lookup result // Writing to the trace irrespective of the cache lookup result
// because cache behavior depends on runtime configuration and // because cache behavior depends on runtime configuration and
// state. // state.
// Example of 16-bit reset index replacement: 415607D4.
trace_writer_.WriteMemoryRead(guest_index_base, trace_writer_.WriteMemoryRead(guest_index_base,
guest_index_buffer_needed_bytes); guest_index_buffer_needed_bytes);
// Not specifying the primitive type in the cache key because not // Not specifying the primitive type in the cache key because not

View File

@ -31,6 +31,7 @@ SpirvShaderTranslator::Features::Features(bool all)
max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)), max_storage_buffer_range(all ? UINT32_MAX : (128 * 1024 * 1024)),
clip_distance(all), clip_distance(all),
cull_distance(all), cull_distance(all),
full_draw_index_uint32(all),
image_view_format_swizzle(all), image_view_format_swizzle(all),
signed_zero_inf_nan_preserve_float32(all), signed_zero_inf_nan_preserve_float32(all),
denorm_flush_to_zero_float32(all) {} denorm_flush_to_zero_float32(all) {}
@ -40,7 +41,8 @@ SpirvShaderTranslator::Features::Features(
: max_storage_buffer_range( : max_storage_buffer_range(
provider.device_properties().limits.maxStorageBufferRange), provider.device_properties().limits.maxStorageBufferRange),
clip_distance(provider.device_features().shaderClipDistance), clip_distance(provider.device_features().shaderClipDistance),
cull_distance(provider.device_features().shaderCullDistance) { cull_distance(provider.device_features().shaderCullDistance),
full_draw_index_uint32(provider.device_features().fullDrawIndexUint32) {
uint32_t device_version = provider.device_properties().apiVersion; uint32_t device_version = provider.device_properties().apiVersion;
const ui::vulkan::VulkanProvider::DeviceExtensions& device_extensions = const ui::vulkan::VulkanProvider::DeviceExtensions& device_extensions =
provider.device_extensions(); provider.device_extensions();
@ -221,6 +223,8 @@ void SpirvShaderTranslator::StartTranslation() {
sizeof(uint32_t) * 4); sizeof(uint32_t) * 4);
const SystemConstant system_constants[] = { const SystemConstant system_constants[] = {
{"flags", offsetof(SystemConstants, flags), type_uint_}, {"flags", offsetof(SystemConstants, flags), type_uint_},
{"vertex_index_load_address",
offsetof(SystemConstants, vertex_index_load_address), type_uint_},
{"vertex_index_endian", offsetof(SystemConstants, vertex_index_endian), {"vertex_index_endian", offsetof(SystemConstants, vertex_index_endian),
type_uint_}, type_uint_},
{"vertex_base_index", offsetof(SystemConstants, vertex_base_index), {"vertex_base_index", offsetof(SystemConstants, vertex_base_index),
@ -1129,18 +1133,73 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
if (register_count()) { if (register_count()) {
// TODO(Triang3l): Barycentric coordinates and patch index. // TODO(Triang3l): Barycentric coordinates and patch index.
if (IsSpirvVertexShader()) { if (IsSpirvVertexShader()) {
// TODO(Triang3l): Fetch the vertex index from the shared memory when
// fullDrawIndexUint32 isn't available and the index is 32-bit and needs
// endian swap.
// TODO(Triang3l): Close line loop primitive. // TODO(Triang3l): Close line loop primitive.
// Load the unswapped index as uint for swapping. // Load the unswapped index as uint for swapping, or for indirect loading
// if needed.
spv::Id vertex_index = builder_->createUnaryOp( spv::Id vertex_index = builder_->createUnaryOp(
spv::OpBitcast, type_uint_, spv::OpBitcast, type_uint_,
builder_->createLoad(input_vertex_index_, spv::NoPrecision)); builder_->createLoad(input_vertex_index_, spv::NoPrecision));
if (!features_.full_draw_index_uint32) {
// Check if the full 32-bit index needs to be loaded indirectly.
spv::Id load_vertex_index = builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(
spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
builder_->makeUintConstant(
static_cast<unsigned int>(kSysFlag_VertexIndexLoad))),
const_uint_0_);
spv::Block& block_load_vertex_index_pre = *builder_->getBuildPoint();
spv::Block& block_load_vertex_index_start = builder_->makeNewBlock();
spv::Block& block_load_vertex_index_merge = builder_->makeNewBlock();
SpirvCreateSelectionMerge(block_load_vertex_index_merge.getId(),
spv::SelectionControlDontFlattenMask);
builder_->createConditionalBranch(load_vertex_index,
&block_load_vertex_index_start,
&block_load_vertex_index_merge);
builder_->setBuildPoint(&block_load_vertex_index_start);
// Load the 32-bit index.
// TODO(Triang3l): Bounds checking.
id_vector_temp_.clear();
id_vector_temp_.push_back(
builder_->makeIntConstant(kSystemConstantVertexIndexLoadAddress));
spv::Id loaded_vertex_index =
LoadUint32FromSharedMemory(builder_->createUnaryOp(
spv::OpBitcast, type_int_,
builder_->createBinOp(
spv::OpIAdd, type_uint_,
builder_->createBinOp(
spv::OpShiftRightLogical, type_uint_,
builder_->createLoad(
builder_->createAccessChain(
spv::StorageClassUniform,
uniform_system_constants_, id_vector_temp_),
spv::NoPrecision),
builder_->makeUintConstant(2)),
vertex_index)));
// Get the actual build point for phi.
spv::Block& block_load_vertex_index_end = *builder_->getBuildPoint();
builder_->createBranch(&block_load_vertex_index_merge);
// Select between the loaded index and the original index from Vulkan.
builder_->setBuildPoint(&block_load_vertex_index_merge);
{
std::unique_ptr<spv::Instruction> loaded_vertex_index_phi_op =
std::make_unique<spv::Instruction>(builder_->getUniqueId(),
type_uint_, spv::OpPhi);
loaded_vertex_index_phi_op->addIdOperand(loaded_vertex_index);
loaded_vertex_index_phi_op->addIdOperand(
block_load_vertex_index_end.getId());
loaded_vertex_index_phi_op->addIdOperand(vertex_index);
loaded_vertex_index_phi_op->addIdOperand(
block_load_vertex_index_pre.getId());
vertex_index = loaded_vertex_index_phi_op->getResultId();
builder_->getBuildPoint()->addInstruction(
std::move(loaded_vertex_index_phi_op));
}
}
// Endian-swap the index and convert to int. // Endian-swap the index and convert to int.
id_vector_temp_.clear(); id_vector_temp_.clear();
id_vector_temp_.push_back( id_vector_temp_.push_back(
builder_->makeIntConstant(kSystemConstantIndexVertexIndexEndian)); builder_->makeIntConstant(kSystemConstantVertexIndexEndian));
spv::Id vertex_index_endian = spv::Id vertex_index_endian =
builder_->createLoad(builder_->createAccessChain( builder_->createLoad(builder_->createAccessChain(
spv::StorageClassUniform, spv::StorageClassUniform,
@ -1152,7 +1211,7 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
// Add the base to the index. // Add the base to the index.
id_vector_temp_.clear(); id_vector_temp_.clear();
id_vector_temp_.push_back( id_vector_temp_.push_back(
builder_->makeIntConstant(kSystemConstantIndexVertexBaseIndex)); builder_->makeIntConstant(kSystemConstantVertexBaseIndex));
vertex_index = builder_->createBinOp( vertex_index = builder_->createBinOp(
spv::OpIAdd, type_int_, vertex_index, spv::OpIAdd, type_int_, vertex_index,
builder_->createLoad(builder_->createAccessChain( builder_->createLoad(builder_->createAccessChain(

View File

@ -83,6 +83,9 @@ class SpirvShaderTranslator : public ShaderTranslator {
}; };
enum : uint32_t { enum : uint32_t {
kSysFlag_VertexIndexLoad_Shift,
kSysFlag_ComputeOrPrimitiveVertexIndexLoad_Shift,
kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit_Shift,
kSysFlag_XYDividedByW_Shift, kSysFlag_XYDividedByW_Shift,
kSysFlag_ZDividedByW_Shift, kSysFlag_ZDividedByW_Shift,
kSysFlag_WNotReciprocal_Shift, kSysFlag_WNotReciprocal_Shift,
@ -98,6 +101,22 @@ class SpirvShaderTranslator : public ShaderTranslator {
kSysFlag_Count, kSysFlag_Count,
// For HostVertexShaderType kVertex, if fullDrawIndexUint32 is not
// supported (ignored otherwise), whether to fetch the index manually
// (32-bit only - 16-bit indices are always fetched via the Vulkan index
// buffer).
kSysFlag_VertexIndexLoad = 1u << kSysFlag_VertexIndexLoad_Shift,
// For HostVertexShaderTypes kMemexportCompute, kPointListAsTriangleStrip,
// kRectangleListAsTriangleStrip, whether the vertex index needs to be
// loaded from the index buffer (rather than using autogenerated indices),
// and whether it's 32-bit. This is separate from kSysFlag_VertexIndexLoad
// because the same system constants may be used for the memexporting
// compute shader and the vertex shader for the same draw, but
// kSysFlag_VertexIndexLoad may be not needed.
kSysFlag_ComputeOrPrimitiveVertexIndexLoad =
1u << kSysFlag_ComputeOrPrimitiveVertexIndexLoad_Shift,
kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit =
1u << kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit_Shift,
kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift, kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift,
kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift, kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift,
kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift, kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift,
@ -116,11 +135,14 @@ class SpirvShaderTranslator : public ShaderTranslator {
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED: // IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
// - SystemConstantIndex enum. // - SystemConstantIndex enum.
// - Structure members in BeginTranslation. // - Structure members in BeginTranslation.
//
// Using the std140 layout - vec2 must be aligned to 8 bytes, vec3 and vec4 to
// 16 bytes.
struct SystemConstants { struct SystemConstants {
uint32_t flags; uint32_t flags;
uint32_t vertex_index_load_address;
xenos::Endian vertex_index_endian; xenos::Endian vertex_index_endian;
int32_t vertex_base_index; int32_t vertex_base_index;
uint32_t padding_vertex_base_index;
float ndc_scale[3]; float ndc_scale[3];
uint32_t padding_ndc_scale; uint32_t padding_ndc_scale;
@ -216,6 +238,7 @@ class SpirvShaderTranslator : public ShaderTranslator {
uint32_t max_storage_buffer_range; uint32_t max_storage_buffer_range;
bool clip_distance; bool clip_distance;
bool cull_distance; bool cull_distance;
bool full_draw_index_uint32;
bool image_view_format_swizzle; bool image_view_format_swizzle;
bool signed_zero_inf_nan_preserve_float32; bool signed_zero_inf_nan_preserve_float32;
bool denorm_flush_to_zero_float32; bool denorm_flush_to_zero_float32;
@ -576,8 +599,9 @@ class SpirvShaderTranslator : public ShaderTranslator {
enum SystemConstantIndex : unsigned int { enum SystemConstantIndex : unsigned int {
kSystemConstantFlags, kSystemConstantFlags,
kSystemConstantIndexVertexIndexEndian, kSystemConstantVertexIndexLoadAddress,
kSystemConstantIndexVertexBaseIndex, kSystemConstantVertexIndexEndian,
kSystemConstantVertexBaseIndex,
kSystemConstantNdcScale, kSystemConstantNdcScale,
kSystemConstantNdcOffset, kSystemConstantNdcOffset,
kSystemConstantTextureSwizzledSigns, kSystemConstantTextureSwizzledSigns,

View File

@ -2383,9 +2383,10 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
normalized_depth_control); normalized_depth_control);
// Update system constants before uploading them. // Update system constants before uploading them.
UpdateSystemConstantValues(primitive_polygonal, bool vertex_shader_index_load;
primitive_processing_result.host_index_endian, UpdateSystemConstantValues(primitive_polygonal, primitive_processing_result,
viewport_info, used_texture_mask); viewport_info, used_texture_mask,
vertex_shader_index_load);
// Update uniform buffers and descriptor sets after binding the pipeline with // Update uniform buffers and descriptor sets after binding the pipeline with
// the new layout. // the new layout.
@ -2451,7 +2452,8 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
// Draw. // Draw.
if (primitive_processing_result.index_buffer_type == if (primitive_processing_result.index_buffer_type ==
PrimitiveProcessor::ProcessedIndexBufferType::kNone) { PrimitiveProcessor::ProcessedIndexBufferType::kNone ||
vertex_shader_index_load) {
deferred_command_buffer_.CmdVkDraw( deferred_command_buffer_.CmdVkDraw(
primitive_processing_result.host_draw_vertex_count, 1, 0, 0); primitive_processing_result.host_draw_vertex_count, 1, 0, 0);
} else { } else {
@ -3338,8 +3340,10 @@ void VulkanCommandProcessor::UpdateDynamicState(
} }
void VulkanCommandProcessor::UpdateSystemConstantValues( void VulkanCommandProcessor::UpdateSystemConstantValues(
bool primitive_polygonal, xenos::Endian index_endian, bool primitive_polygonal,
const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask) { const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask,
bool& vertex_shader_index_load_out) {
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES #if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu"); SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES #endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
@ -3362,6 +3366,52 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
// Flags. // Flags.
uint32_t flags = 0; uint32_t flags = 0;
// Vertex index shader loading.
bool vertex_shader_index_load = false;
// Only for ProcessedIndexBufferType kGuest since kHostConverted indices may
// be not loaded into the GPU memory (only read on the CPU), though
// kHostConverted must never be used for point lists and rectangle lists
// without geometry shaders anyway. For regular 32-bit index fetching without
// fullDrawIndexUint32, kHostConverted indices are already byte-swapped and
// truncated to 24 bits, so indirect fetch is not needed.
if (primitive_processing_result.index_buffer_type ==
PrimitiveProcessor::ProcessedIndexBufferType::kGuest) {
switch (primitive_processing_result.host_vertex_shader_type) {
case Shader::HostVertexShaderType::kVertex: {
// For guest (usually big-endian) 32-bit indices when they're not
// supported by the device.
if (vgt_draw_initiator.index_size == xenos::IndexFormat::kInt32) {
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
const VkPhysicalDeviceFeatures& device_features =
provider.device_features();
if (!device_features.fullDrawIndexUint32) {
vertex_shader_index_load = true;
flags |= SpirvShaderTranslator::kSysFlag_VertexIndexLoad;
}
}
} break;
// kMemexportCompute never comes out of the PrimitiveProcessor, as
// memexport compute shaders are executed alongside their vertex
// counterparts, since they may still result in drawing.
case Shader::HostVertexShaderType::kPointListAsTriangleStrip:
case Shader::HostVertexShaderType::kRectangleListAsTriangleStrip: {
// Always loading the guest index buffer indirectly if it's used, as
// host indexing contains a part needed specifically for the host for
// the construction of the primitive - host vertices don't map 1:1 to
// guest ones.
vertex_shader_index_load = true;
flags |=
SpirvShaderTranslator::kSysFlag_ComputeOrPrimitiveVertexIndexLoad;
if (vgt_draw_initiator.index_size == xenos::IndexFormat::kInt32) {
flags |= SpirvShaderTranslator ::
kSysFlag_ComputeOrPrimitiveVertexIndexLoad32Bit;
}
} break;
default:
break;
}
}
vertex_shader_index_load_out = vertex_shader_index_load;
// W0 division control. // W0 division control.
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
// 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0. // 8: VTX_XY_FMT = true: the incoming XY have already been multiplied by 1/W0.
@ -3404,9 +3454,21 @@ void VulkanCommandProcessor::UpdateSystemConstantValues(
dirty |= system_constants_.flags != flags; dirty |= system_constants_.flags != flags;
system_constants_.flags = flags; system_constants_.flags = flags;
// Index buffer address for loading in the shaders.
if (flags &
(SpirvShaderTranslator::kSysFlag_VertexIndexLoad |
SpirvShaderTranslator::kSysFlag_ComputeOrPrimitiveVertexIndexLoad)) {
dirty |= system_constants_.vertex_index_load_address !=
primitive_processing_result.guest_index_base;
system_constants_.vertex_index_load_address =
primitive_processing_result.guest_index_base;
}
// Index or tessellation edge factor buffer endianness. // Index or tessellation edge factor buffer endianness.
dirty |= system_constants_.vertex_index_endian != index_endian; dirty |= system_constants_.vertex_index_endian !=
system_constants_.vertex_index_endian = index_endian; primitive_processing_result.host_index_endian;
system_constants_.vertex_index_endian =
primitive_processing_result.host_index_endian;
// Vertex index offset. // Vertex index offset.
dirty |= system_constants_.vertex_base_index != vgt_indx_offset; dirty |= system_constants_.vertex_base_index != vgt_indx_offset;

View File

@ -433,10 +433,11 @@ class VulkanCommandProcessor : public CommandProcessor {
void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info, void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info,
bool primitive_polygonal, bool primitive_polygonal,
reg::RB_DEPTHCONTROL normalized_depth_control); reg::RB_DEPTHCONTROL normalized_depth_control);
void UpdateSystemConstantValues(bool primitive_polygonal, void UpdateSystemConstantValues(
xenos::Endian index_endian, bool primitive_polygonal,
const draw_util::ViewportInfo& viewport_info, const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
uint32_t used_texture_mask); const draw_util::ViewportInfo& viewport_info, uint32_t used_texture_mask,
bool& vertex_shader_index_load_out);
bool UpdateBindings(const VulkanShader* vertex_shader, bool UpdateBindings(const VulkanShader* vertex_shader,
const VulkanShader* pixel_shader); const VulkanShader* pixel_shader);
// Allocates a descriptor set and fills one or two VkWriteDescriptorSet // Allocates a descriptor set and fills one or two VkWriteDescriptorSet

View File

@ -208,6 +208,7 @@ enum class Endian128 : uint32_t {
enum class IndexFormat : uint32_t { enum class IndexFormat : uint32_t {
kInt16, kInt16,
// Not very common, but used for some world draws in 545407E0.
kInt32, kInt32,
}; };