From aa73d652ba535eee112bb9284e651accdc3481c3 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 24 May 2020 19:25:35 +0300 Subject: [PATCH] [DXBC] OpenGL snorm vfetch because why not --- src/xenia/gpu/dxbc_shader_translator_fetch.cc | 79 +++++++++++++------ src/xenia/gpu/shader.h | 2 + src/xenia/gpu/shader_translator.cc | 1 + src/xenia/gpu/ucode.h | 5 +- src/xenia/gpu/xenos.h | 32 +++++--- 5 files changed, 83 insertions(+), 36 deletions(-) diff --git a/src/xenia/gpu/dxbc_shader_translator_fetch.cc b/src/xenia/gpu/dxbc_shader_translator_fetch.cc index da95cab80..26c6004be 100644 --- a/src/xenia/gpu/dxbc_shader_translator_fetch.cc +++ b/src/xenia/gpu/dxbc_shader_translator_fetch.cc @@ -303,24 +303,43 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( DxbcOpIToF(result_unpacked_dest, result_src); if (!instr.attributes.is_integer) { float packed_scales[4] = {}; - uint32_t packed_scales_mask = 0b0000; - for (uint32_t i = 0; i < 4; ++i) { - if (!(used_format_components & (1 << i))) { - continue; - } - if (packed_widths[i] > 2) { - packed_scales[i] = - 1.0f / float((uint32_t(1) << (packed_widths[i] - 1)) - 1); - packed_scales_mask |= 1 << i; - } + switch (instr.attributes.signed_rf_mode) { + case xenos::SignedRepeatingFractionMode::kZeroClampMinusOne: { + uint32_t packed_scales_mask = 0b0000; + for (uint32_t i = 0; i < 4; ++i) { + if (!(used_format_components & (1 << i))) { + continue; + } + if (packed_widths[i] > 2) { + packed_scales[i] = + 1.0f / float((uint32_t(1) << (packed_widths[i] - 1)) - 1); + packed_scales_mask |= 1 << i; + } + } + if (packed_scales_mask) { + DxbcOpMul(DxbcDest::R(system_temp_result_, packed_scales_mask), + result_src, DxbcSrc::LP(packed_scales)); + } + // Treat both -(2^(n-1)) and -(2^(n-1)-1) as -1. + DxbcOpMax(result_unpacked_dest, result_src, DxbcSrc::LF(-1.0f)); + } break; + case xenos::SignedRepeatingFractionMode::kNoZero: { + float packed_zeros[4] = {}; + for (uint32_t i = 0; i < 4; ++i) { + if (!(used_format_components & (1 << i))) { + continue; + } + assert_not_zero(packed_widths[i]); + packed_zeros[i] = + 1.0f / float((uint32_t(1) << packed_widths[i]) - 1); + packed_scales[i] = 2.0f * packed_zeros[i]; + } + DxbcOpMAd(result_unpacked_dest, result_src, + DxbcSrc::LP(packed_scales), DxbcSrc::LP(packed_zeros)); + } break; + default: + assert_unhandled_case(instr.attributes.signed_rf_mode); } - if (packed_scales_mask) { - DxbcOpMul(DxbcDest::R(system_temp_result_, packed_scales_mask), - result_src, DxbcSrc::LP(packed_scales)); - } - // Treat both -(2^(n-1)) and -(2^(n-1)-1) as -1, according to Direct3D - // snorm to float conversion rules. - DxbcOpMax(result_unpacked_dest, result_src, DxbcSrc::LF(-1.0f)); } } else { DxbcOpUBFE(result_unpacked_dest, DxbcSrc::LP(packed_widths), @@ -367,12 +386,26 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( DxbcOpUToF(result_unpacked_dest, result_src); } if (!instr.attributes.is_integer) { - DxbcOpMul( - result_unpacked_dest, result_src, - DxbcSrc::LF(instr.attributes.is_signed ? (1.0f / 2147483647.0f) - : (1.0f / 4294967295.0f))); - // No need to clamp to -1 if signed - 1/(2^31-1) is rounded to - // 1/(2^31) as float32. + if (instr.attributes.is_signed) { + switch (instr.attributes.signed_rf_mode) { + case xenos::SignedRepeatingFractionMode::kZeroClampMinusOne: + DxbcOpMul(result_unpacked_dest, result_src, + DxbcSrc::LF(1.0f / 2147483647.0f)); + // No need to clamp to -1 if signed - 1/(2^31-1) is rounded to + // 1/(2^31) as float32. + break; + case xenos::SignedRepeatingFractionMode::kNoZero: + DxbcOpMAd(result_unpacked_dest, result_src, + DxbcSrc::LF(1.0f / 2147483647.5f), + DxbcSrc::LF(0.5f / 2147483647.5f)); + break; + default: + assert_unhandled_case(instr.attributes.signed_rf_mode); + } + } else { + DxbcOpMul(result_unpacked_dest, result_src, + DxbcSrc::LF(1.0f / 4294967295.0f)); + } } break; case VertexFormat::k_32_FLOAT: diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index aa762cb02..c88e8f252 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -428,6 +428,8 @@ struct ParsedVertexFetchInstruction { int32_t exp_adjust = 0; // Prefetch count minus 1. uint32_t prefetch_count = 0; + xenos::SignedRepeatingFractionMode signed_rf_mode = + xenos::SignedRepeatingFractionMode::kZeroClampMinusOne; bool is_index_rounded = false; bool is_signed = false; bool is_integer = false; diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 7da1a48e4..b2ba88a08 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -993,6 +993,7 @@ void ShaderTranslator::ParseVertexFetchInstruction( i.attributes.is_index_rounded = op.is_index_rounded(); i.attributes.is_signed = op.is_signed(); i.attributes.is_integer = !op.is_normalized(); + i.attributes.signed_rf_mode = op.signed_rf_mode(); // Store for later use by mini fetches. if (!op.is_mini_fetch()) { diff --git a/src/xenia/gpu/ucode.h b/src/xenia/gpu/ucode.h index 762b977fb..95b16e2d7 100644 --- a/src/xenia/gpu/ucode.h +++ b/src/xenia/gpu/ucode.h @@ -505,6 +505,9 @@ struct VertexFetchInstruction { int exp_adjust() const { return data_.exp_adjust; } bool is_signed() const { return data_.fomat_comp_all == 1; } bool is_normalized() const { return data_.num_format_all == 0; } + xenos::SignedRepeatingFractionMode signed_rf_mode() const { + return data_.signed_rf_mode_all; + } bool is_index_rounded() const { return data_.is_index_rounded == 1; } // Dword stride, [0, 255]. uint32_t stride() const { return data_.stride; } @@ -536,7 +539,7 @@ struct VertexFetchInstruction { uint32_t dst_swiz : 12; uint32_t fomat_comp_all : 1; uint32_t num_format_all : 1; - uint32_t signed_rf_mode_all : 1; + xenos::SignedRepeatingFractionMode signed_rf_mode_all : 1; uint32_t is_index_rounded : 1; VertexFormat format : 6; uint32_t reserved2 : 2; diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 271337c02..41169e0ff 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -183,11 +183,11 @@ enum class IndexFormat : uint32_t { kInt32, }; -// GPUSURFACENUMBER from a game .pdb. "Repeat" means repeating fraction, it's -// what ATI calls normalized. +// SurfaceNumberX from yamato_enum.h. enum class SurfaceNumFormat : uint32_t { - kUnsignedRepeat = 0, - kSignedRepeat = 1, + kUnsignedRepeatingFraction = 0, + // Microsoft-style, scale factor (2^(n-1))-1. + kSignedRepeatingFraction = 1, kUnsignedInteger = 2, kSignedInteger = 3, kFloat = 7, @@ -516,6 +516,14 @@ inline bool IsMajorModeExplicit(MajorMode major_mode, primitive_type >= PrimitiveType::kExplicitMajorModeForceStart; } +enum class SignedRepeatingFractionMode : uint32_t { + // Microsoft-style representation with two -1 representations (one is slightly + // past -1 but clamped). + kZeroClampMinusOne, + // OpenGL "alternate mapping" format lacking representation for zero. + kNoZero, +}; + // instr_arbitrary_filter_t enum class ArbitraryFilter : uint32_t { k2x4Sym = 0, @@ -699,14 +707,14 @@ XEPACKEDUNION(xe_gpu_texture_fetch_t, { // which can be texture components 0/1/2/3 or constant 0/1) and R6xx // (signedness is FORMAT_COMP_X/Y/Z/W, while the swizzle is DST_SEL_X/Y/Z/W, // which is named in resources the same as DST_SEL in fetch clauses). - TextureSign sign_x : 2; // +2 - TextureSign sign_y : 2; // +4 - TextureSign sign_z : 2; // +6 - TextureSign sign_w : 2; // +8 - ClampMode clamp_x : 3; // +10 - ClampMode clamp_y : 3; // +13 - ClampMode clamp_z : 3; // +16 - uint32_t signed_rf_mode_all : 1; // +19 + TextureSign sign_x : 2; // +2 + TextureSign sign_y : 2; // +4 + TextureSign sign_z : 2; // +6 + TextureSign sign_w : 2; // +8 + ClampMode clamp_x : 3; // +10 + ClampMode clamp_y : 3; // +13 + ClampMode clamp_z : 3; // +16 + xenos::SignedRepeatingFractionMode signed_rf_mode_all : 1; // +19 // TODO(Triang3l): 1 or 2 dim_tbd bits? uint32_t unk_0 : 2; // +20 uint32_t pitch : 9; // +22 byte_pitch >> 5