diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 91b9415c8..7259efc68 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -1822,13 +1822,13 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, uint32_t index_count, IndexBufferInfo* index_buffer_info, bool major_mode_explicit) { - auto device = GetD3D12Context().GetD3D12Provider().GetDevice(); - auto& regs = *register_file_; - #if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES + ID3D12Device* device = GetD3D12Context().GetD3D12Provider().GetDevice(); + const RegisterFile& regs = *register_file_; + xenos::ModeControl edram_mode = regs.Get().edram_mode; if (edram_mode == xenos::ModeControl::kCopy) { // Special copy handling. @@ -3241,9 +3241,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( poly_offset_back_offset = poly_offset_front_offset; } } - // "slope computed in subpixels ([...] 1/16)" - R5xx Acceleration. - poly_offset_front_scale *= (1.0f / 16.0f) * resolution_scale; - poly_offset_back_scale *= (1.0f / 16.0f) * resolution_scale; + float poly_offset_scale_factor = + xenos::kPolygonOffsetScaleSubpixelUnit * resolution_scale; + poly_offset_front_scale *= poly_offset_scale_factor; + poly_offset_back_scale *= poly_offset_scale_factor; dirty |= system_constants_.edram_poly_offset_front_scale != poly_offset_front_scale; system_constants_.edram_poly_offset_front_scale = poly_offset_front_scale; diff --git a/src/xenia/gpu/d3d12/pipeline_cache.cc b/src/xenia/gpu/d3d12/pipeline_cache.cc index 97606a3cf..556c7cacf 100644 --- a/src/xenia/gpu/d3d12/pipeline_cache.cc +++ b/src/xenia/gpu/d3d12/pipeline_cache.cc @@ -36,6 +36,7 @@ #include "xenia/gpu/d3d12/d3d12_render_target_cache.h" #include "xenia/gpu/draw_util.h" #include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/xenos.h" #include "xenia/ui/d3d12/d3d12_util.h" DEFINE_bool(d3d12_dxbc_disasm, false, @@ -1340,7 +1341,6 @@ bool PipelineCache::GetCurrentStateDescription( break; case xenos::PrimitiveType::kLineList: case xenos::PrimitiveType::kLineStrip: - case xenos::PrimitiveType::kLineLoop: // Quads are emulated as line lists with adjacency. case xenos::PrimitiveType::kQuadList: case xenos::PrimitiveType::k2DLineStrip: @@ -1457,7 +1457,7 @@ bool PipelineCache::GetCurrentStateDescription( } } if (!edram_rov_used) { - float poly_offset_host_scale = draw_util::GetD3D10PolygonOffsetScale( + float poly_offset_host_scale = draw_util::GetD3D10PolygonOffsetFactor( regs.Get().depth_format, true); // Using ceil here just in case a game wants the offset but passes a value // that is too small - it's better to apply more offset than to make depth @@ -1468,7 +1468,7 @@ bool PipelineCache::GetCurrentStateDescription( (poly_offset < 0.0f ? -1 : 1); // "slope computed in subpixels ([...] 1/16)" - R5xx Acceleration. description_out.depth_bias_slope_scaled = - poly_offset_scale * (1.0f / 16.0f); + poly_offset_scale * xenos::kPolygonOffsetScaleSubpixelUnit; } if (tessellated && cvars::d3d12_tessellation_wireframe) { description_out.fill_mode_wireframe = 1; diff --git a/src/xenia/gpu/draw_util.h b/src/xenia/gpu/draw_util.h index 75d3d4053..d23d0d25a 100644 --- a/src/xenia/gpu/draw_util.h +++ b/src/xenia/gpu/draw_util.h @@ -106,7 +106,7 @@ inline reg::RB_DEPTHCONTROL GetDepthControlForCurrentEdramMode( return regs.Get(); } -constexpr float GetD3D10PolygonOffsetScale( +constexpr float GetD3D10PolygonOffsetFactor( xenos::DepthRenderTargetFormat depth_format, bool float24_as_0_to_0_5) { if (depth_format == xenos::DepthRenderTargetFormat::kD24S8) { return float(1 << 24); diff --git a/src/xenia/gpu/primitive_processor.cc b/src/xenia/gpu/primitive_processor.cc index 62864a256..b00e4ce50 100644 --- a/src/xenia/gpu/primitive_processor.cc +++ b/src/xenia/gpu/primitive_processor.cc @@ -458,16 +458,34 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) { } } else { if (regs.Get().multi_prim_ib_ena) { - guest_primitive_reset_index_guest_endian = xenos::GpuSwap( - regs.Get().reset_indx, - guest_index_endian); - // - VGT, what does the guest say about its primitive reset index? - // - It's over 0xFFFF!!! - // - What!? 0xFFFF!? There's no way that can be stored in 16 bits! - guest_primitive_reset_enabled = - guest_index_format == xenos::IndexFormat::kInt16 - ? guest_primitive_reset_index_guest_endian <= UINT16_MAX - : true; + switch (guest_primitive_type) { + case xenos::PrimitiveType::kLineStrip: + case xenos::PrimitiveType::kTriangleFan: + case xenos::PrimitiveType::kTriangleStrip: + case xenos::PrimitiveType::kLineLoop: + case xenos::PrimitiveType::kQuadStrip: + case xenos::PrimitiveType::kPolygon: + case xenos::PrimitiveType::k2DLineStrip: + case xenos::PrimitiveType::k2DTriStrip: + guest_primitive_reset_index_guest_endian = xenos::GpuSwap( + regs.Get().reset_indx, + guest_index_endian); + // - VGT, what does the guest say about its primitive reset index? + // - It's over 0xFFFF!!! + // - What!? 0xFFFF!? There's no way that can be stored in 16 bits! + guest_primitive_reset_enabled = + guest_index_format == xenos::IndexFormat::kInt16 + ? guest_primitive_reset_index_guest_endian <= UINT16_MAX + : true; + break; + default: + // Vulkan explicitly disallows primitive restart index for "list" + // topologies. In Direct3D 12, it's valid for non-strips, but has + // implementation-defined behavior. Make backend usage simpler by + // explicitly filtering lists out, and hope the guest never uses + // primitive reset for lists. + break; + } } } diff --git a/src/xenia/gpu/primitive_processor.h b/src/xenia/gpu/primitive_processor.h index 762b7f1f9..046928738 100644 --- a/src/xenia/gpu/primitive_processor.h +++ b/src/xenia/gpu/primitive_processor.h @@ -138,7 +138,8 @@ class PrimitiveProcessor { xenos::IndexFormat host_index_format; xenos::Endian host_index_endian; // The reset index, if enabled, is always 0xFFFF for host_index_format - // kInt16 and 0xFFFFFFFF for kInt32. + // kInt16 and 0xFFFFFFFF for kInt32. Never enabled for "list" primitive + // types, thus safe for direct usage on Vulkan. bool host_primitive_reset_enabled; // Backend-specific handle for the index buffer valid for the current draw, // only valid for index_buffer_type kHostConverted and kHostBuiltin. diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 97d668513..98fd8741b 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -347,6 +347,16 @@ constexpr float UNorm24To32(uint32_t n24) { return float(n24 + (n24 >> 23)) * (1.0f / float(1 << 24)); } +// Scale for conversion of slope scales from PA_SU_POLY_OFFSET_FRONT/BACK_SCALE +// units to those used when the slope is computed from the difference between +// adjacent pixels, for conversion from the guest to common host APIs or to +// calculation using max(|ddx(z)|, |ddy(z)|). +// "slope computed in subpixels (1/12 or 1/16)" - R5xx Acceleration. +// But the correct scale for conversion of the slope scale from subpixels to +// pixels is likely 1/16 according to: +// https://github.com/mesa3d/mesa/blob/54ad9b444c8e73da498211870e785239ad3ff1aa/src/gallium/drivers/radeonsi/si_state.c#L946 +constexpr float kPolygonOffsetScaleSubpixelUnit = 1.0f / 16.0f; + constexpr uint32_t kColorRenderTargetFormatBits = 4; constexpr uint32_t kDepthRenderTargetFormatBits = 1; constexpr uint32_t kRenderTargetFormatBits =