From de047eaa40afc9a75b562570cb4cb0962cf6a75e Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Tue, 12 Nov 2024 21:10:30 +0100 Subject: [PATCH 01/15] [Soft-Float] - Initial Interpreter Implementation of Ps2's floating point unit specification. This Pull Request implements the first take ever on real Soft-Float support in PCSX2. This work is a combination or several efforts and researches done prior. Credits: - https://www.gregorygaines.com/blog/emulating-ps2-floating-point-nums-ieee-754-diffs-part-1/ - https://github.com/GitHubProUser67/MultiServer3/blob/main/BackendServices/CastleLibrary/EmotionEngine.Emulator/Ps2Float.cs - https://github.com/Goatman13/pcsx2/tree/accurate_int_add_sub - PCSX2 Team for their help and support in this massive journey. This pull request should be tested with every games requiring a clamping/rounding mode (cf: GameDatabase). Currently, this PR fixes on the interpreters: - https://github.com/PCSX2/pcsx2/issues/354 - https://github.com/PCSX2/pcsx2/issues/11507 - https://github.com/PCSX2/pcsx2/issues/10519 - https://github.com/PCSX2/pcsx2/issues/8068 - https://github.com/PCSX2/pcsx2/issues/7642 - https://github.com/PCSX2/pcsx2/issues/5257 This is important to note, that this implementation, while technically fixing Gran Turismo 4 and Klonoa 2, makes the games crash due to very high floats being passed in the emu code, and failing at some points later in the process. This has not yet been ironed-out. Other than that, this sets the floor for Soft-Float in PCSX2, a long awaited contribution. --- pcsx2-qt/Settings/AdvancedSettingsWidget.cpp | 10 + pcsx2-qt/Settings/AdvancedSettingsWidget.ui | 294 +++-- pcsx2/CMakeLists.txt | 2 + pcsx2/Config.h | 23 + pcsx2/FPU.cpp | 262 +++- pcsx2/Pcsx2Config.cpp | 13 + pcsx2/Ps2Float.cpp | 865 ++++++++++++ pcsx2/Ps2Float.h | 104 ++ pcsx2/VU.h | 1 + pcsx2/VUflags.cpp | 48 +- pcsx2/VUflags.h | 8 +- pcsx2/VUops.cpp | 1245 +++++++++++------- pcsx2/pcsx2.vcxproj | 4 +- pcsx2/pcsx2.vcxproj.filters | 11 +- 14 files changed, 2221 insertions(+), 669 deletions(-) create mode 100644 pcsx2/Ps2Float.cpp create mode 100644 pcsx2/Ps2Float.h diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp b/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp index acd4903fe4..fcae67fac7 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp @@ -48,6 +48,16 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(SettingsWindow* dialog, QWidget* connect(m_ui.vu0ClampMode, QOverload::of(&QComboBox::currentIndexChanged), [this](int index) { setClampingMode(0, index); }); connect(m_ui.vu1ClampMode, QOverload::of(&QComboBox::currentIndexChanged), [this](int index) { setClampingMode(1, index); }); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftAddSub, "EmuCore/CPU/Recompiler", "fpuSoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftMulDiv, "EmuCore/CPU/Recompiler", "fpuSoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftSqrt, "EmuCore/CPU/Recompiler", "fpuSoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftAddSub, "EmuCore/CPU/Recompiler", "vu0SoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftMulDiv, "EmuCore/CPU/Recompiler", "vu0SoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftSqrt, "EmuCore/CPU/Recompiler", "vu0SoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftAddSub, "EmuCore/CPU/Recompiler", "vu1SoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftMulDiv, "EmuCore/CPU/Recompiler", "vu1SoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftSqrt, "EmuCore/CPU/Recompiler", "vu1SoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.iopRecompiler, "EmuCore/CPU/Recompiler", "EnableIOP", true); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.gameFixes, "EmuCore", "EnableGameFixes", true); diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui index 197fda8c7c..5a8b420b88 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui @@ -33,8 +33,8 @@ 0 -447 - 790 - 1049 + 793 + 1283 @@ -94,10 +94,10 @@ - - + + - Division Rounding Mode: + Clamping Mode: @@ -125,38 +125,7 @@ - - - - Clamping Mode: - - - - - - - - None - - - - - Normal (Default) - - - - - Extra + Preserve Sign - - - - - Full - - - - - + @@ -208,6 +177,67 @@ + + + + + + None + + + + + Normal (Default) + + + + + Extra + Preserve Sign + + + + + Full + + + + + + + + Division Rounding Mode: + + + + + + + Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Square Root + + + + + @@ -218,7 +248,7 @@ Vector Units (VU) - + VU1 Rounding Mode: @@ -249,7 +279,129 @@ - + + + + VU1 Clamping Mode: + + + + + + + VU0 Rounding Mode: + + + + + + + VU1 Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Float Square Root + + + + + + + + + + VU0 Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Square Root + + + + + + + + + + + Nearest + + + + + Negative + + + + + Positive + + + + + Chop/Zero (Default) + + + + + + + + + None + + + + + Normal (Default) + + + + + Extra + + + + + Extra + Preserve Sign + + + + + @@ -281,30 +433,6 @@ - - - - - None - - - - - Normal (Default) - - - - - Extra - - - - - Extra + Preserve Sign - - - - @@ -312,45 +440,7 @@ - - - - VU0 Rounding Mode: - - - - - - - VU1 Clamping Mode: - - - - - - - - Nearest - - - - - Negative - - - - - Positive - - - - - Chop/Zero (Default) - - - - - + diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 5430675614..8f78ed7a63 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -93,6 +93,7 @@ set(pcsx2Sources MTGS.cpp MTVU.cpp Patch.cpp + Ps2Float.cpp Pcsx2Config.cpp PerformanceMetrics.cpp PrecompiledHeader.cpp @@ -173,6 +174,7 @@ set(pcsx2Headers MTVU.h Memory.h MemoryTypes.h + Ps2Float.h Patch.h PerformanceMetrics.h PrecompiledHeader.h diff --git a/pcsx2/Config.h b/pcsx2/Config.h index fc64f1c22b..9671b86fbd 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -596,17 +596,32 @@ struct Pcsx2Config vu0ExtraOverflow : 1, vu0SignOverflow : 1, vu0Underflow : 1; + + bool + vu0SoftAddSub : 1, + vu0SoftMulDiv : 1, + vu0SoftSqrt : 1; bool vu1Overflow : 1, vu1ExtraOverflow : 1, vu1SignOverflow : 1, vu1Underflow : 1; + + bool + vu1SoftAddSub : 1, + vu1SoftMulDiv : 1, + vu1SoftSqrt : 1; bool fpuOverflow : 1, fpuExtraOverflow : 1, fpuFullMode : 1; + + bool + fpuSoftAddSub : 1, + fpuSoftMulDiv : 1, + fpuSoftSqrt : 1; bool EnableEECache : 1; @@ -1426,11 +1441,19 @@ namespace EmuFolders #define CHECK_VU_SIGN_OVERFLOW(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SignOverflow : EmuConfig.Cpu.Recompiler.vu1SignOverflow) #define CHECK_VU_UNDERFLOW(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0Underflow : EmuConfig.Cpu.Recompiler.vu1Underflow) +#define CHECK_VU_SOFT_ADDSUB(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftAddSub : EmuConfig.Cpu.Recompiler.vu1SoftAddSub) +#define CHECK_VU_SOFT_MULDIV(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftMulDiv : EmuConfig.Cpu.Recompiler.vu1SoftMulDiv) +#define CHECK_VU_SOFT_SQRT(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftSqrt : EmuConfig.Cpu.Recompiler.vu1SoftSqrt) + #define CHECK_FPU_OVERFLOW (EmuConfig.Cpu.Recompiler.fpuOverflow) #define CHECK_FPU_EXTRA_OVERFLOW (EmuConfig.Cpu.Recompiler.fpuExtraOverflow) // If enabled, Operands are checked for infinities before being used in the FPU recs #define CHECK_FPU_EXTRA_FLAGS 1 // Always enabled now // Sets D/I flags on FPU instructions #define CHECK_FPU_FULL (EmuConfig.Cpu.Recompiler.fpuFullMode) +#define CHECK_FPU_SOFT_ADDSUB (EmuConfig.Cpu.Recompiler.fpuSoftAddSub) +#define CHECK_FPU_SOFT_MULDIV (EmuConfig.Cpu.Recompiler.fpuSoftMulDiv) +#define CHECK_FPU_SOFT_SQRT (EmuConfig.Cpu.Recompiler.fpuSoftSqrt) + //------------ EE Recompiler defines - Comment to disable a recompiler --------------- #define SHIFT_RECOMPILE // Speed majorly reduced if disabled diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index 3ac1ae3fd1..f2136aff20 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" - +#include "Ps2Float.h" #include // Helper Macros @@ -63,28 +63,57 @@ // If we have an infinity value, then Overflow has occured. bool checkOverflow(u32& xReg, u32 cFlagsToSet) { - if ((xReg & ~0x80000000) == PosInfinity) { - /*Console.Warning( "FPU OVERFLOW!: Changing to +/-Fmax!!!!!!!!!!!!\n" );*/ - xReg = (xReg & 0x80000000) | posFmax; - _ContVal_ |= (cFlagsToSet); - return true; + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + if (xReg == Ps2Float::MAX_FLOATING_POINT_VALUE || xReg == Ps2Float::MIN_FLOATING_POINT_VALUE) + { + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagO) + _ContVal_ &= ~FPUflagO; + } + else + { + if ((xReg & ~0x80000000) == PosInfinity) + { + /*Console.Warning( "FPU OVERFLOW!: Changing to +/-Fmax!!!!!!!!!!!!\n" );*/ + xReg = (xReg & 0x80000000) | posFmax; + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagO) + _ContVal_ &= ~FPUflagO; } - else if (cFlagsToSet & FPUflagO) - _ContVal_ &= ~FPUflagO; return false; } // If we have a denormal value, then Underflow has occured. bool checkUnderflow(u32& xReg, u32 cFlagsToSet) { - if ( ( (xReg & 0x7F800000) == 0 ) && ( (xReg & 0x007FFFFF) != 0 ) ) { - /*Console.Warning( "FPU UNDERFLOW!: Changing to +/-0!!!!!!!!!!!!\n" );*/ - xReg &= 0x80000000; - _ContVal_ |= (cFlagsToSet); - return true; + + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + if (Ps2Float(xReg).IsDenormalized()) + { + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagU) + _ContVal_ &= ~FPUflagU; + } + else + { + if (((xReg & 0x7F800000) == 0) && ((xReg & 0x007FFFFF) != 0)) + { + /*Console.Warning( "FPU UNDERFLOW!: Changing to +/-0!!!!!!!!!!!!\n" );*/ + xReg &= 0x80000000; + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagU) + _ContVal_ &= ~FPUflagU; } - else if (cFlagsToSet & FPUflagU) - _ContVal_ &= ~FPUflagU; return false; } @@ -106,9 +135,36 @@ __fi u32 fp_min(u32 a, u32 b) */ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsToSet1, u32 cFlagsToSet2) { - if ( (yDivisorReg & 0x7F800000) == 0 ) { - _ContVal_ |= ( (zDividendReg & 0x7F800000) == 0 ) ? cFlagsToSet2 : cFlagsToSet1; - xReg = ( (yDivisorReg ^ zDividendReg) & 0x80000000 ) | posFmax; + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + Ps2Float yMatrix = Ps2Float(yDivisorReg); + Ps2Float zMatrix = Ps2Float(zDividendReg); + + if (yMatrix.IsZero()) + { + bool dividendZero = zMatrix.IsZero(); + + _ContVal_ |= dividendZero ? cFlagsToSet2 : cFlagsToSet1; + + bool IsSigned = yMatrix.Sign ^ zMatrix.Sign; + + if (dividendZero) + xReg = IsSigned ? Ps2Float::MIN_FLOATING_POINT_VALUE : Ps2Float::MAX_FLOATING_POINT_VALUE; + else + { + Ps2Float zeroRes = Ps2Float(0); + + zeroRes.Sign = IsSigned; + xReg = zeroRes.AsUInt32(); + } + + return true; + } + } + else if ((yDivisorReg & 0x7F800000) == 0) + { + _ContVal_ |= ((zDividendReg & 0x7F800000) == 0) ? cFlagsToSet2 : cFlagsToSet1; + xReg = ((yDivisorReg ^ zDividendReg) & 0x80000000) | posFmax; return true; } @@ -182,19 +238,60 @@ float fpuDouble(u32 f) } } +static __fi uint32_t fpuAccurateAddSub(u32 a, u32 b, bool issub) +{ + if (CHECK_FPU_SOFT_ADDSUB) + { + if (issub) + return Ps2Float(a).Sub(Ps2Float(b), 1).AsUInt32(); + else + return Ps2Float(a).Add(Ps2Float(b), 1).AsUInt32(); + } + + if (issub) + return std::bit_cast(fpuDouble(a) - fpuDouble(b)); + else + return std::bit_cast(fpuDouble(a) + fpuDouble(b)); +} + +static __fi uint32_t fpuAccurateMulDiv(u32 a, u32 b, bool isdiv) +{ + if (CHECK_FPU_SOFT_MULDIV) + { + if (isdiv) + return Ps2Float(a).Div(Ps2Float(b)).AsUInt32(); + else + return Ps2Float(a).Mul(Ps2Float(b)).AsUInt32(); + } + + if (isdiv) + return std::bit_cast(fpuDouble(a) / fpuDouble(b)); + else + return std::bit_cast(fpuDouble(a) * fpuDouble(b)); +} + +static __fi s32 double_to_int(double value) +{ + if (value >= 2147483647.0) + return 2147483647LL; + if (value <= -2147483648.0) + return -2147483648LL; + return value; +} + void ABS_S() { _FdValUl_ = _FsValUl_ & 0x7fffffff; clearFPUFlags( FPUflagO | FPUflagU ); } void ADD_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateAddSub(_FsValUl_, _FtValUl_, 0); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void ADDA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateAddSub(_FsValUl_, _FtValUl_, 0); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -253,14 +350,30 @@ void CVT_S() { } void CVT_W() { - if ( ( _FsValUl_ & 0x7F800000 ) <= 0x4E800000 ) { _FdValSl_ = (s32)_FsValf_; } - else if ( ( _FsValUl_ & 0x80000000 ) == 0 ) { _FdValUl_ = 0x7fffffff; } - else { _FdValUl_ = 0x80000000; } + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + _FdValSl_ = double_to_int(Ps2Float(_FsValUl_).ToDouble()); + } + else + { + if ((_FsValUl_ & 0x7F800000) <= 0x4E800000) + { + _FdValSl_ = (s32)_FsValf_; + } + else if ((_FsValUl_ & 0x80000000) == 0) + { + _FdValUl_ = 0x7fffffff; + } + else + { + _FdValUl_ = 0x80000000; + } + } } void DIV_S() { if (checkDivideByZero( _FdValUl_, _FtValUl_, _FsValUl_, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; - _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 1); if (checkOverflow( _FdValUl_, 0)) return; checkUnderflow( _FdValUl_, 0); } @@ -271,14 +384,16 @@ void DIV_S() { */ void MADD_S() { FPRreg temp; - temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - _FdValf_ = fpuDouble( _FAValUl_ ) + fpuDouble( temp.UL ); + temp.UL = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); + _FdValUl_ = fpuAccurateAddSub(_FAValUl_, temp.UL, 0); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MADDA_S() { - _FAValf_ += fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + FPRreg temp; + temp.UL = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); + _FAValUl_ = fpuAccurateAddSub(_FAValUl_, temp.UL, 0); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -304,14 +419,16 @@ void MOV_S() { void MSUB_S() { FPRreg temp; - temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - _FdValf_ = fpuDouble( _FAValUl_ ) - fpuDouble( temp.UL ); + temp.UL = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); + _FdValUl_ = fpuAccurateAddSub(_FAValUl_, temp.UL, 1); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MSUBA_S() { - _FAValf_ -= fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + FPRreg temp; + temp.UL = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); + _FAValUl_ = fpuAccurateAddSub(_FAValUl_, temp.UL, 1); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -321,13 +438,13 @@ void MTC1() { } void MUL_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MULA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -341,17 +458,45 @@ void RSQRT_S() { FPRreg temp; clearFPUFlags(FPUflagD | FPUflagI); - if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) { // Ft is zero (Denormals are Zero) - _ContVal_ |= FPUflagD | FPUflagSD; - _FdValUl_ = ( _FtValUl_ & 0x80000000 ) | posFmax; - return; + if (CHECK_FPU_SOFT_SQRT) + { + Ps2Float value = Ps2Float(_FtValUl_); + + if (value.IsDenormalized()) + { + _ContVal_ |= FPUflagD | FPUflagSD; + _FdValUl_ = value.Sign ? Ps2Float::MIN_FLOATING_POINT_VALUE : Ps2Float::MAX_FLOATING_POINT_VALUE; + return; + } + else if (_FtValUl_ & 0x80000000) + { // Ft is negative + _ContVal_ |= FPUflagI | FPUflagSI; + _FdValUl_ = Ps2Float(_FsValUl_).Rsqrt(Ps2Float(value.Abs())).AsUInt32(); + } + else + { + _FdValUl_ = Ps2Float(_FsValUl_).Rsqrt(value).AsUInt32(); + } // Ft is positive and not zero } - else if ( _FtValUl_ & 0x80000000 ) { // Ft is negative - _ContVal_ |= FPUflagI | FPUflagSI; - temp.f = sqrt( fabs( fpuDouble( _FtValUl_ ) ) ); - _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( temp.UL ); + else + { + if ((_FtValUl_ & 0x7F800000) == 0) + { // Ft is zero (Denormals are Zero) + _ContVal_ |= FPUflagD | FPUflagSD; + _FdValUl_ = (_FtValUl_ & 0x80000000) | posFmax; + return; + } + else if (_FtValUl_ & 0x80000000) + { // Ft is negative + _ContVal_ |= FPUflagI | FPUflagSI; + temp.f = sqrt(fabs(fpuDouble(_FtValUl_))); + _FdValf_ = fpuDouble(_FsValUl_) / fpuDouble(temp.UL); + } + else + { + _FdValf_ = fpuDouble(_FsValUl_) / sqrt(fpuDouble(_FtValUl_)); + } // Ft is positive and not zero } - else { _FdValf_ = fpuDouble( _FsValUl_ ) / sqrt( fpuDouble( _FtValUl_ ) ); } // Ft is positive and not zero if (checkOverflow( _FdValUl_, 0)) return; checkUnderflow( _FdValUl_, 0); @@ -360,23 +505,40 @@ void RSQRT_S() { void SQRT_S() { clearFPUFlags(FPUflagI | FPUflagD); - if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) // If Ft = +/-0 - _FdValUl_ = _FtValUl_ & 0x80000000;// result is 0 - else if ( _FtValUl_ & 0x80000000 ) { // If Ft is Negative - _ContVal_ |= FPUflagI | FPUflagSI; - _FdValf_ = sqrt( fabs( fpuDouble( _FtValUl_ ) ) ); - } else - _FdValf_ = sqrt( fpuDouble( _FtValUl_ ) ); // If Ft is Positive + if (CHECK_FPU_SOFT_SQRT) + { + Ps2Float value = Ps2Float(_FtValUl_); + + if (_FtValUl_ & 0x80000000) + { // If Ft is Negative + _ContVal_ |= FPUflagI | FPUflagSI; + _FdValUl_ = Ps2Float(value.Abs()).Sqrt().AsUInt32(); + } + else + _FdValUl_ = value.Sqrt().AsUInt32(); // If Ft is Positive + } + else + { + if ((_FtValUl_ & 0x7F800000) == 0) // If Ft = +/-0 + _FdValUl_ = _FtValUl_ & 0x80000000; // result is 0 + else if (_FtValUl_ & 0x80000000) + { // If Ft is Negative + _ContVal_ |= FPUflagI | FPUflagSI; + _FdValf_ = sqrt(fabs(fpuDouble(_FtValUl_))); + } + else + _FdValf_ = sqrt(fpuDouble(_FtValUl_)); // If Ft is Positive + } } void SUB_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateAddSub(_FsValUl_, _FtValUl_, 1); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void SUBA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateAddSub(_FsValUl_, _FtValUl_, 1); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index 071ff140eb..4c3f70a573 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -536,14 +536,27 @@ void Pcsx2Config::RecompilerOptions::LoadSave(SettingsWrapper& wrap) SettingsWrapBitBool(vu0ExtraOverflow); SettingsWrapBitBool(vu0SignOverflow); SettingsWrapBitBool(vu0Underflow); + + SettingsWrapBitBool(vu0SoftAddSub); + SettingsWrapBitBool(vu0SoftMulDiv); + SettingsWrapBitBool(vu0SoftSqrt); + SettingsWrapBitBool(vu1Overflow); SettingsWrapBitBool(vu1ExtraOverflow); SettingsWrapBitBool(vu1SignOverflow); SettingsWrapBitBool(vu1Underflow); + + SettingsWrapBitBool(vu1SoftAddSub); + SettingsWrapBitBool(vu1SoftMulDiv); + SettingsWrapBitBool(vu1SoftSqrt); SettingsWrapBitBool(fpuOverflow); SettingsWrapBitBool(fpuExtraOverflow); SettingsWrapBitBool(fpuFullMode); + + SettingsWrapBitBool(fpuSoftAddSub); + SettingsWrapBitBool(fpuSoftMulDiv); + SettingsWrapBitBool(fpuSoftSqrt); } u32 Pcsx2Config::RecompilerOptions::GetEEClampMode() const diff --git a/pcsx2/Ps2Float.cpp b/pcsx2/Ps2Float.cpp new file mode 100644 index 0000000000..8156143272 --- /dev/null +++ b/pcsx2/Ps2Float.cpp @@ -0,0 +1,865 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#include +#include +#include +#include +#include +#include +#include +#include "Ps2Float.h" +#include "Common.h" + +const uint8_t Ps2Float::BIAS = 127; +const uint32_t Ps2Float::SIGNMASK = 0x80000000; +const uint32_t Ps2Float::MAX_FLOATING_POINT_VALUE = 0x7FFFFFFF; +const uint32_t Ps2Float::MIN_FLOATING_POINT_VALUE = 0xFFFFFFFF; +const uint32_t Ps2Float::POSITIVE_INFINITY_VALUE = 0x7F800000; +const uint32_t Ps2Float::NEGATIVE_INFINITY_VALUE = 0xFF800000; +const uint32_t Ps2Float::ONE = 0x3F800000; +const uint32_t Ps2Float::MIN_ONE = 0xBF800000; +const int32_t Ps2Float::IMPLICIT_LEADING_BIT_POS = 23; + +Ps2Float::Ps2Float(uint32_t value) + : Sign((value >> 31) & 1) + , Exponent((uint8_t)(((value >> 23) & 0xFF))) + , Mantissa(value & 0x7FFFFF) +{ +} + +Ps2Float::Ps2Float(bool sign, uint8_t exponent, uint32_t mantissa) + : Sign(sign) + , Exponent(exponent) + , Mantissa(mantissa) +{ +} + +Ps2Float Ps2Float::Max() +{ + return Ps2Float(MAX_FLOATING_POINT_VALUE); +} + +Ps2Float Ps2Float::Min() +{ + return Ps2Float(MIN_FLOATING_POINT_VALUE); +} + +Ps2Float Ps2Float::One() +{ + return Ps2Float(ONE); +} + +Ps2Float Ps2Float::MinOne() +{ + return Ps2Float(MIN_ONE); +} + +uint32_t Ps2Float::AsUInt32() const +{ + uint32_t result = 0; + result |= (Sign ? 1u : 0u) << 31; + result |= (uint32_t)(Exponent << 23); + result |= Mantissa; + return result; +} + +Ps2Float Ps2Float::Add(Ps2Float addend, bool COP1) +{ + if (IsDenormalized() || addend.IsDenormalized()) + return SolveAddSubDenormalizedOperation(*this, addend, true); + + if (IsAbnormal() && addend.IsAbnormal()) + return SolveAbnormalAdditionOrSubtractionOperation(*this, addend, true, COP1); + + uint32_t a = AsUInt32(); + uint32_t b = addend.AsUInt32(); + int32_t temp = 0; + + //exponent difference + int exp_diff = ((a >> 23) & 0xff) - ((b >> 23) & 0xff); + + //diff = 25 .. 255 , expt < expd + if (exp_diff >= 25) + { + b = b & Ps2Float::SIGNMASK; + } + + //diff = 1 .. 24, expt < expd + else if (exp_diff > 0) + { + exp_diff = exp_diff - 1; + temp = 0xffffffff << exp_diff; + b = temp & b; + } + + //diff = -255 .. -25, expd < expt + else if (exp_diff <= -25) + { + a = a & Ps2Float::SIGNMASK; + } + + //diff = -24 .. -1 , expd < expt + else if (exp_diff < 0) + { + exp_diff = -exp_diff; + exp_diff = exp_diff - 1; + temp = 0xffffffff << exp_diff; + a = a & temp; + } + + return Ps2Float(a).DoAdd(Ps2Float(b)); +} + +Ps2Float Ps2Float::Sub(Ps2Float subtrahend, bool COP1) +{ + if (IsDenormalized() || subtrahend.IsDenormalized()) + return SolveAddSubDenormalizedOperation(*this, subtrahend, false); + + if (IsAbnormal() && subtrahend.IsAbnormal()) + return SolveAbnormalAdditionOrSubtractionOperation(*this, subtrahend, false, COP1); + + uint32_t a = AsUInt32(); + uint32_t b = subtrahend.AsUInt32(); + int32_t temp = 0; + + //exponent difference + int exp_diff = ((a >> 23) & 0xff) - ((b >> 23) & 0xff); + + //diff = 25 .. 255 , expt < expd + if (exp_diff >= 25) + { + b = b & Ps2Float::SIGNMASK; + } + + //diff = 1 .. 24, expt < expd + else if (exp_diff > 0) + { + exp_diff = exp_diff - 1; + temp = 0xffffffff << exp_diff; + b = temp & b; + } + + //diff = -255 .. -25, expd < expt + else if (exp_diff <= -25) + { + a = a & Ps2Float::SIGNMASK; + } + + //diff = -24 .. -1 , expd < expt + else if (exp_diff < 0) + { + exp_diff = -exp_diff; + exp_diff = exp_diff - 1; + temp = 0xffffffff << exp_diff; + a = a & temp; + } + + + return Ps2Float(a).DoAdd(Neg(Ps2Float(b))); +} + +Ps2Float Ps2Float::Mul(Ps2Float mulend) +{ + if (IsDenormalized() || mulend.IsDenormalized()) + return SolveMultiplicationDenormalizedOperation(*this, mulend); + + if (IsAbnormal() && mulend.IsAbnormal()) + return SolveAbnormalMultiplicationOrDivisionOperation(*this, mulend, true); + + if (IsZero() || mulend.IsZero()) + { + Ps2Float result = Ps2Float(0); + + result.Sign = DetermineMultiplicationDivisionOperationSign(*this, mulend); + return result; + } + + return DoMul(mulend); +} + +Ps2Float Ps2Float::Div(Ps2Float divend) +{ + if (IsDenormalized() || divend.IsDenormalized()) + return SolveDivisionDenormalizedOperation(*this, divend); + + if (IsAbnormal() && divend.IsAbnormal()) + return SolveAbnormalMultiplicationOrDivisionOperation(*this, divend, false); + + if (IsZero()) + { + Ps2Float result = Ps2Float(0); + + result.Sign = DetermineMultiplicationDivisionOperationSign(*this, divend); + return result; + } + else if (divend.IsZero()) + return DetermineMultiplicationDivisionOperationSign(*this, divend) ? Min() : Max(); + + return DoDiv(divend); +} + +Ps2Float Ps2Float::Sqrt() +{ + int32_t t; + int32_t s = 0; + int32_t q = 0; + uint32_t r = 0x01000000; /* r = moving bit from right to left */ + + if (IsDenormalized()) + return Ps2Float(0); + + // PS2 only takes positive numbers for SQRT, and convert if necessary. + int32_t ix = (int32_t)(Ps2Float(false, Exponent, Mantissa).AsUInt32()); + + /* extract mantissa and unbias exponent */ + int32_t m = (ix >> 23) - BIAS; + + ix = (ix & 0x007fffff) | 0x00800000; + if ((m & 1) == 1) + { + /* odd m, double x to make it even */ + ix += ix; + } + + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix += ix; + + while (r != 0) + { + t = s + (int32_t)(r); + if (t <= ix) + { + s = t + (int32_t)(r); + ix -= t; + q += (int32_t)(r); + } + + ix += ix; + r >>= 1; + } + + /* use floating add to find out rounding direction */ + if (ix != 0) + { + q += q & 1; + } + + ix = (q >> 1) + 0x3f000000; + ix += m << 23; + + return Ps2Float((uint32_t)(ix)); +} + +Ps2Float Ps2Float::Rsqrt(Ps2Float other) +{ + return Div(other.Sqrt()); +} + +bool Ps2Float::IsDenormalized() +{ + return Exponent == 0; +} + +bool Ps2Float::IsAbnormal() +{ + uint32_t val = AsUInt32(); + return val == MAX_FLOATING_POINT_VALUE || val == MIN_FLOATING_POINT_VALUE || + val == POSITIVE_INFINITY_VALUE || val == NEGATIVE_INFINITY_VALUE; +} + +bool Ps2Float::IsZero() +{ + return (Abs()) == 0; +} + +uint32_t Ps2Float::Abs() +{ + return (AsUInt32() & MAX_FLOATING_POINT_VALUE); +} + +Ps2Float Ps2Float::RoundTowardsZero() +{ + return Ps2Float((uint32_t)(std::trunc((double)(AsUInt32())))); +} + +int32_t Ps2Float::CompareTo(Ps2Float other) +{ + int32_t selfTwoComplementVal = (int32_t)(Abs()); + if (Sign) + selfTwoComplementVal = -selfTwoComplementVal; + + int32_t otherTwoComplementVal = (int32_t)(other.Abs()); + if (other.Sign) + otherTwoComplementVal = -otherTwoComplementVal; + + if (selfTwoComplementVal < otherTwoComplementVal) + return -1; + else if (selfTwoComplementVal == otherTwoComplementVal) + return 0; + else + return 1; +} + +double Ps2Float::ToDouble() +{ + return std::bit_cast(((u64)Sign << 63) | ((((u64)Exponent - BIAS) + 1023ULL) << 52) | ((u64)Mantissa << 29)); +} + +std::string Ps2Float::ToString() +{ + double res = ToDouble(); + + uint32_t value = AsUInt32(); + std::ostringstream oss; + oss << std::fixed << std::setprecision(6); + + if (IsDenormalized()) + { + oss << "Denormalized(" << res << ")"; + } + else if (value == MAX_FLOATING_POINT_VALUE) + { + oss << "Fmax(" << res << ")"; + } + else if (value == MIN_FLOATING_POINT_VALUE) + { + oss << "-Fmax(" << res << ")"; + } + else if (value == POSITIVE_INFINITY_VALUE) + { + oss << "Inf(" << res << ")"; + } + else if (value == NEGATIVE_INFINITY_VALUE) + { + oss << "-Inf(" << res << ")"; + } + else + { + oss << "Ps2Float(" << res << ")"; + } + + return oss.str(); +} + +Ps2Float Ps2Float::DoAdd(Ps2Float other) +{ + const uint8_t roundingMultiplier = 6; + + uint8_t selfExponent = Exponent; + int32_t resExponent = selfExponent - other.Exponent; + + if (resExponent < 0) + return other.DoAdd(*this); + else if (resExponent >= 25) + return *this; + + // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate + uint32_t sign1 = (uint32_t)((int32_t)AsUInt32() >> 31); + int32_t selfMantissa = (int32_t)(((Mantissa | 0x800000) ^ sign1) - sign1); + uint32_t sign2 = (uint32_t)((int32_t)other.AsUInt32() >> 31); + int32_t otherMantissa = (int32_t)(((other.Mantissa | 0x800000) ^ sign2) - sign2); + + // PS2 multiply by 2 before doing the Math here. + int32_t man = (selfMantissa << roundingMultiplier) + ((otherMantissa << roundingMultiplier) >> resExponent); + int32_t absMan = abs(man); + if (absMan == 0) + return Ps2Float(0); + + // Remove from exponent the PS2 Multiplier value. + int32_t rawExp = selfExponent - roundingMultiplier; + + int32_t amount = normalizeAmounts[clz(absMan)]; + rawExp -= amount; + absMan <<= amount; + + int32_t msbIndex = BitScanReverse8(absMan >> 23); + rawExp += msbIndex; + absMan >>= msbIndex; + + if (rawExp > 255) + return man < 0 ? Min() : Max(); + else if (rawExp <= 0) + return Ps2Float(man < 0, 0, 0); + + return Ps2Float((uint32_t)man & Ps2Float::SIGNMASK | (uint32_t)rawExp << 23 | ((uint32_t)absMan & 0x7FFFFF)).RoundTowardsZero(); +} + +Ps2Float Ps2Float::DoMul(Ps2Float other) +{ + uint32_t selfMantissa = Mantissa | 0x800000; + uint32_t otherMantissa = other.Mantissa | 0x800000; + int32_t resExponent = Exponent + other.Exponent - BIAS; + + Ps2Float result = Ps2Float(0); + + result.Sign = DetermineMultiplicationDivisionOperationSign(*this, other); + + if (resExponent > 255) + return result.Sign ? Min() : Max(); + else if (resExponent <= 0) + return Ps2Float(result.Sign, 0, 0); + + uint32_t testImprecision = otherMantissa ^ ((otherMantissa >> 4) & 0x800); // For some reason, 0x808000 loses a bit and 0x800800 loses a bit, but 0x808800 does not + int64_t res = 0; + uint64_t mask = 0xFFFFFFFFFFFFFFFF; + + result.Exponent = (uint8_t)(resExponent); + + otherMantissa <<= 1; + + uint32_t part[13]; // Partial products + uint32_t bit[13]; // More partial products. 0 or 1. + + for (int i = 0; i <= 12; i++, otherMantissa >>= 2) + { + uint32_t test = otherMantissa & 7; + if (test == 0 || test == 7) + { + part[i] = 0; + bit[i] = 0; + } + else if (test == 3) + { + part[i] = (selfMantissa << 1); + bit[i] = 0; + } + else if (test == 4) + { + part[i] = ~(selfMantissa << 1); + bit[i] = 1; + } + else if (test < 4) + { + part[i] = selfMantissa; + bit[i] = 0; + } + else + { + part[i] = ~selfMantissa; + bit[i] = 1; + } + } + + for (int i = 0; i <= 12; i++) + { + res += (uint64_t)(int32_t)part[i] << (i * 2); + res &= mask; + res += bit[i] << (i * 2); + } + + result.Mantissa = (uint32_t)(res >> 23); + + if ((testImprecision & 0x000aaa) && !(res & 0x7FFFFF)) + result.Mantissa -= 1; + + if (result.Mantissa > 0) + { + int32_t leadingBitPosition = Ps2Float::GetMostSignificantBitPosition(result.Mantissa); + + while (leadingBitPosition != IMPLICIT_LEADING_BIT_POS) + { + if (leadingBitPosition > IMPLICIT_LEADING_BIT_POS) + { + result.Mantissa >>= 1; + + int32_t exp = ((int32_t)result.Exponent + 1); + + if (exp > 255) + return result.Sign ? Min() : Max(); + + result.Exponent = (uint8_t)exp; + + leadingBitPosition--; + } + else if (leadingBitPosition < IMPLICIT_LEADING_BIT_POS) + { + result.Mantissa <<= 1; + + int32_t exp = ((int32_t)result.Exponent - 1); + + if (exp <= 0) + return Ps2Float(result.Sign, 0, 0); + + result.Exponent = (uint8_t)exp; + + leadingBitPosition++; + } + } + } + + result.Mantissa &= 0x7FFFFF; + return result.RoundTowardsZero(); +} + +Ps2Float Ps2Float::DoDiv(Ps2Float other) +{ + uint64_t selfMantissa64; + uint32_t selfMantissa = Mantissa | 0x800000; + uint32_t otherMantissa = other.Mantissa | 0x800000; + int resExponent = Exponent - other.Exponent + BIAS; + + Ps2Float result = Ps2Float(0); + + result.Sign = DetermineMultiplicationDivisionOperationSign(*this, other); + + if (resExponent > 255) + return result.Sign ? Min() : Max(); + else if (resExponent <= 0) + return Ps2Float(result.Sign, 0, 0); + + if (selfMantissa < otherMantissa) + { + --resExponent; + if (resExponent == 0) + return Ps2Float(result.Sign, 0, 0); + selfMantissa64 = (uint64_t)(selfMantissa) << 31; + } + else + { + selfMantissa64 = (uint64_t)(selfMantissa) << 30; + } + + uint32_t resMantissa = (uint32_t)(selfMantissa64 / otherMantissa); + if ((resMantissa & 0x3F) == 0) + resMantissa |= ((uint64_t)(otherMantissa)*resMantissa != selfMantissa64) ? 1U : 0; + + result.Exponent = (uint8_t)(resExponent); + result.Mantissa = (resMantissa + 0x39U /* Non-standard value, 40U in IEEE754 (PS2: rsqrt(0x40400000, 0x40400000) = 0x3FDDB3D7 -> IEEE754: rsqrt(0x40400000, 0x40400000) = 0x3FDDB3D8 */) >> 7; + + if (result.Mantissa > 0) + { + int32_t leadingBitPosition = Ps2Float::GetMostSignificantBitPosition(result.Mantissa); + + while (leadingBitPosition != IMPLICIT_LEADING_BIT_POS) + { + if (leadingBitPosition > IMPLICIT_LEADING_BIT_POS) + { + result.Mantissa >>= 1; + + int32_t exp = ((int32_t)result.Exponent + 1); + + if (exp > 255) + return result.Sign ? Min() : Max(); + + result.Exponent = (uint8_t)exp; + + leadingBitPosition--; + } + else if (leadingBitPosition < IMPLICIT_LEADING_BIT_POS) + { + result.Mantissa <<= 1; + + int32_t exp = ((int32_t)result.Exponent - 1); + + if (exp <= 0) + return Ps2Float(result.Sign, 0, 0); + + result.Exponent = (uint8_t)exp; + + leadingBitPosition++; + } + } + } + + result.Mantissa &= 0x7FFFFF; + return result.RoundTowardsZero(); +} + +Ps2Float Ps2Float::SolveAbnormalAdditionOrSubtractionOperation(Ps2Float a, Ps2Float b, bool add, bool COP1) +{ + uint32_t aval = a.AsUInt32(); + uint32_t bval = b.AsUInt32(); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return add ? Max() : Ps2Float(0); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return add ? Min() : Ps2Float(0); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return COP1 ? Min() : (add ? Ps2Float(0) : Min()); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return COP1 ? Max() : (add ? Ps2Float(0) : Max()); + + if (aval == POSITIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return add ? Max() : Ps2Float(0); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return COP1 ? Min() : (add ? Ps2Float(0) : Min()); + + if (aval == POSITIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return COP1 ? Max() : (add ? Ps2Float(0) : Max()); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return add ? Min() : Ps2Float(0); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return add ? Max() : Ps2Float(0x7F7FFFFE); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return add ? Ps2Float(0x7F7FFFFE) : Max(); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return add ? Ps2Float(0xFF7FFFFE) : Min(); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return add ? Min() : Ps2Float(0xFF7FFFFE); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return add ? Max() : Ps2Float(0xFF7FFFFE); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return add ? Ps2Float(0xFF7FFFFE) : Max(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return add ? Ps2Float(0x7F7FFFFE) : Min(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return add ? Min() : Ps2Float(0x7F7FFFFE); + + Console.Error("Unhandled abnormal add/sub floating point operation"); +} + +Ps2Float Ps2Float::SolveAbnormalMultiplicationOrDivisionOperation(Ps2Float a, Ps2Float b, bool mul) +{ + uint32_t aval = a.AsUInt32(); + uint32_t bval = b.AsUInt32(); + + if (mul) + { + if ((aval == MAX_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) || + (aval == MIN_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE)) + return Max(); + + if ((aval == MAX_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) || + (aval == MIN_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE)) + return Min(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Max(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Min(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Min(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Max(); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Max(); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Min(); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Min(); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Max(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return Max(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return Min(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return Min(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return Max(); + } + else + { + if ((aval == MAX_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) || + (aval == MIN_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE)) + return One(); + + if ((aval == MAX_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) || + (aval == MIN_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE)) + return MinOne(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return One(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return MinOne(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return MinOne(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return One(); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Ps2Float(0x3FFFFFFF); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Ps2Float(0xBFFFFFFF); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Ps2Float(0xBFFFFFFF); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Ps2Float(0x3FFFFFFF); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return Ps2Float(0x3F000001); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return Ps2Float(0xBF000001); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return Ps2Float(0xBF000001); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return Ps2Float(0x3F000001); + } + + Console.Error("Unhandled abnormal mul/div floating point operation"); +} + +Ps2Float Ps2Float::SolveAddSubDenormalizedOperation(Ps2Float a, Ps2Float b, bool add) +{ + Ps2Float result = Ps2Float(0); + + if (a.IsDenormalized() && !b.IsDenormalized()) + result = b; + else if (!a.IsDenormalized() && b.IsDenormalized()) + result = a; + else if (a.IsDenormalized() && b.IsDenormalized()) + { + } + else + Console.Error("Both numbers are not denormalized"); + + result.Sign = add ? DetermineAdditionOperationSign(a, b) : DetermineSubtractionOperationSign(a, b); + return result; +} + +Ps2Float Ps2Float::SolveMultiplicationDenormalizedOperation(Ps2Float a, Ps2Float b) +{ + Ps2Float result = Ps2Float(0); + + result.Sign = DetermineMultiplicationDivisionOperationSign(a, b); + return result; +} + +Ps2Float Ps2Float::SolveDivisionDenormalizedOperation(Ps2Float a, Ps2Float b) +{ + bool sign = DetermineMultiplicationDivisionOperationSign(a, b); + Ps2Float result = Ps2Float(0); + + if (a.IsDenormalized() && !b.IsDenormalized()) + { + } + else if (!a.IsDenormalized() && b.IsDenormalized()) + return sign ? Min() : Max(); + else if (a.IsDenormalized() && b.IsDenormalized()) + return sign ? Min() : Max(); + else + Console.Error("Both numbers are not denormalized"); + + result.Sign = sign; + return result; +} + +Ps2Float Ps2Float::Neg(Ps2Float self) +{ + return Ps2Float(self.AsUInt32() ^ SIGNMASK); +} + +bool Ps2Float::DetermineMultiplicationDivisionOperationSign(Ps2Float a, Ps2Float b) +{ + return a.Sign ^ b.Sign; +} + +bool Ps2Float::DetermineAdditionOperationSign(Ps2Float a, Ps2Float b) +{ + if (a.IsZero() && b.IsZero()) + { + if (!a.Sign || !b.Sign) + return false; + else if (a.Sign && b.Sign) + return true; + else + Console.Error("Unhandled addition operation flags"); + } + else if (a.IsZero()) + return b.Sign; + + return a.Sign; +} + +bool Ps2Float::DetermineSubtractionOperationSign(Ps2Float a, Ps2Float b) +{ + if (a.IsZero() && b.IsZero()) + { + if (!a.Sign || b.Sign) + return false; + else if (a.Sign && !b.Sign) + return true; + else + Console.Error("Unhandled subtraction operation flags"); + } + else if (a.IsZero()) + return !b.Sign; + else if (b.IsZero()) + return a.Sign; + + return a.CompareTo(b) >= 0 ? a.Sign : !b.Sign; +} + +int32_t Ps2Float::clz(int x) +{ + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + + return debruijn32[(uint)x * 0x8c0b2891u >> 26]; +} + +int32_t Ps2Float::BitScanReverse8(int b) +{ + return msb[b]; +} + +int32_t Ps2Float::GetMostSignificantBitPosition(uint32_t value) +{ + for (int32_t i = 31; i >= 0; i--) + { + if (((value >> i) & 1) != 0) + return i; + } + return -1; +} + +const int8_t Ps2Float::msb[256] = + { + -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; + +const int32_t Ps2Float::debruijn32[64] = + { + 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, + 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, + -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12}; + +const int32_t Ps2Float::normalizeAmounts[] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24}; diff --git a/pcsx2/Ps2Float.h b/pcsx2/Ps2Float.h new file mode 100644 index 0000000000..2453ee1473 --- /dev/null +++ b/pcsx2/Ps2Float.h @@ -0,0 +1,104 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#pragma once + +#include + +class Ps2Float +{ +public: + bool Sign; + uint8_t Exponent; + uint32_t Mantissa; + + static const uint8_t BIAS; + static const uint32_t SIGNMASK; + static const uint32_t MAX_FLOATING_POINT_VALUE; + static const uint32_t MIN_FLOATING_POINT_VALUE; + static const uint32_t POSITIVE_INFINITY_VALUE; + static const uint32_t NEGATIVE_INFINITY_VALUE; + static const uint32_t ONE; + static const uint32_t MIN_ONE; + static const int IMPLICIT_LEADING_BIT_POS; + + static const int8_t msb[256]; + static const int32_t debruijn32[64]; + static const int32_t normalizeAmounts[]; + + Ps2Float(uint32_t value); + + Ps2Float(bool sign, uint8_t exponent, uint32_t mantissa); + + static Ps2Float Max(); + + static Ps2Float Min(); + + static Ps2Float One(); + + static Ps2Float MinOne(); + + static Ps2Float Neg(Ps2Float self); + + uint32_t AsUInt32() const; + + Ps2Float Add(Ps2Float addend, bool COP1); + + Ps2Float Sub(Ps2Float subtrahend, bool COP1); + + Ps2Float Mul(Ps2Float mulend); + + Ps2Float Div(Ps2Float divend); + + Ps2Float Sqrt(); + + Ps2Float Rsqrt(Ps2Float other); + + bool IsDenormalized(); + + bool IsAbnormal(); + + bool IsZero(); + + uint32_t Abs(); + + Ps2Float RoundTowardsZero(); + + int32_t CompareTo(Ps2Float other); + + double ToDouble(); + + std::string ToString(); + +protected: + +private: + + Ps2Float DoAdd(Ps2Float other); + + Ps2Float DoMul(Ps2Float other); + + Ps2Float DoDiv(Ps2Float other); + + static Ps2Float SolveAbnormalAdditionOrSubtractionOperation(Ps2Float a, Ps2Float b, bool add, bool COP1); + + static Ps2Float SolveAbnormalMultiplicationOrDivisionOperation(Ps2Float a, Ps2Float b, bool mul); + + static Ps2Float SolveAddSubDenormalizedOperation(Ps2Float a, Ps2Float b, bool add); + + static Ps2Float SolveMultiplicationDenormalizedOperation(Ps2Float a, Ps2Float b); + + static Ps2Float SolveDivisionDenormalizedOperation(Ps2Float a, Ps2Float b); + + static bool DetermineMultiplicationDivisionOperationSign(Ps2Float a, Ps2Float b); + + static bool DetermineAdditionOperationSign(Ps2Float a, Ps2Float b); + + static bool DetermineSubtractionOperationSign(Ps2Float a, Ps2Float b); + + static int32_t GetMostSignificantBitPosition(uint32_t value); + + static int32_t BitScanReverse8(int32_t b); + + static int32_t clz(int32_t x); +}; diff --git a/pcsx2/VU.h b/pcsx2/VU.h index 1f8224bc39..ad55a0e12b 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -124,6 +124,7 @@ struct alignas(16) VURegs REG_VI q; REG_VI p; + VECTOR TMP; uint idx; // VU index (0 or 1) // flags/cycle are needed by VIF dma code, so they have to be here (for now) diff --git a/pcsx2/VUflags.cpp b/pcsx2/VUflags.cpp index 22632cf36b..85a38eb1b8 100644 --- a/pcsx2/VUflags.cpp +++ b/pcsx2/VUflags.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" - +#include "Ps2Float.h" #include #include @@ -12,21 +12,22 @@ /* NEW FLAGS */ //By asadr. Thnkx F|RES :p /*****************************************/ -static __ri u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f ) +static __ri u32 VU_MAC_UPDATE(int shift, VURegs* VU, uint32_t f) { - u32 v = *(u32*)&f; - int exp = (v >> 23) & 0xff; - u32 s = v & 0x80000000; + Ps2Float ps2f = Ps2Float(f); + + uint exp = ps2f.Exponent; + u32 s = ps2f.AsUInt32() & Ps2Float::SIGNMASK; if (s) VU->macflag |= 0x0010<macflag &= ~(0x0010<macflag = (VU->macflag & ~(0x1100<macflag = (VU->macflag&~(0x1000<macflag = (VU->macflag&~(0x0101<macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); + return f; + } + else + return f; + } + else if (CHECK_VU_OVERFLOW((VU == &VU1) ? 1 : 0)) + { + VU->macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); + return s | 0x7f7fffff; /* max IEEE754 allowed */ + } else - return v; + { + VU->macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); + return f; + } default: VU->macflag = (VU->macflag & ~(0x1101<(vuDouble(a) - vuDouble(b)); + else + return std::bit_cast(vuDouble(a) + vuDouble(b)); + +} + +static __fi uint32_t vuAccurateMulDiv(VURegs* VU, u32 a, u32 b, bool isdiv) +{ + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + if (isdiv) + return Ps2Float(a).Div(Ps2Float(b)).AsUInt32(); + else + return Ps2Float(a).Mul(Ps2Float(b)).AsUInt32(); + } + + if (isdiv) + return std::bit_cast(vuDouble(a) / vuDouble(b)); + else + return std::bit_cast(vuDouble(a) * vuDouble(b)); +} + static __fi float vuADD_TriAceHack(u32 a, u32 b) { // On VU0 TriAce Games use ADDi and expects these bit-perfect results: @@ -481,15 +515,17 @@ static __fi float vuADD_TriAceHack(u32 a, u32 b) // but VU interpreters don't seem to need it currently: // Update Sept 2021, now the interpreters don't suck, they do - Refraction - s32 aExp = (a >> 23) & 0xff; - s32 bExp = (b >> 23) & 0xff; - if (aExp - bExp >= 25) b &= 0x80000000; - if (aExp - bExp <=-25) a &= 0x80000000; - float ret = vuDouble(a) + vuDouble(b); + // s32 aExp = (a >> 23) & 0xff; + // s32 bExp = (b >> 23) & 0xff; + // if (aExp - bExp >= 25) b &= 0x80000000; + // if (aExp - bExp <=-25) a &= 0x80000000; + // float ret = vuDouble(a) + vuDouble(b); //DevCon.WriteLn("aExp = %d, bExp = %d", aExp, bExp); //DevCon.WriteLn("0x%08x + 0x%08x = 0x%08x", a, b, (u32&)ret); //DevCon.WriteLn("%f + %f = %f", vuDouble(a), vuDouble(b), ret); - return ret; + + // Update November 2024, now the interpreters has soft float support - GithubProUser67 + return vuDouble(Ps2Float(a).Add(Ps2Float(b), 0).AsUInt32()); } void _vuABS(VURegs* VU) @@ -497,10 +533,44 @@ void _vuABS(VURegs* VU) if (_Ft_ == 0) return; - if (_X){ VU->VF[_Ft_].f.x = fabs(vuDouble(VU->VF[_Fs_].i.x)); } - if (_Y){ VU->VF[_Ft_].f.y = fabs(vuDouble(VU->VF[_Fs_].i.y)); } - if (_Z){ VU->VF[_Ft_].f.z = fabs(vuDouble(VU->VF[_Fs_].i.z)); } - if (_W){ VU->VF[_Ft_].f.w = fabs(vuDouble(VU->VF[_Fs_].i.w)); } + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) + { + VU->VF[_Ft_].i.x = Ps2Float(VU->VF[_Fs_].i.x).Abs(); + } + if (_Y) + { + VU->VF[_Ft_].i.y = Ps2Float(VU->VF[_Fs_].i.y).Abs(); + } + if (_Z) + { + VU->VF[_Ft_].i.z = Ps2Float(VU->VF[_Fs_].i.z).Abs(); + } + if (_W) + { + VU->VF[_Ft_].i.w = Ps2Float(VU->VF[_Fs_].i.w).Abs(); + } + } + else + { + if (_X) + { + VU->VF[_Ft_].f.x = fabs(vuDouble(VU->VF[_Fs_].i.x)); + } + if (_Y) + { + VU->VF[_Ft_].f.y = fabs(vuDouble(VU->VF[_Fs_].i.y)); + } + if (_Z) + { + VU->VF[_Ft_].f.z = fabs(vuDouble(VU->VF[_Fs_].i.z)); + } + if (_W) + { + VU->VF[_Ft_].f.w = fabs(vuDouble(VU->VF[_Fs_].i.w)); + } + } } @@ -512,10 +582,10 @@ static __fi void _vuADD(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -528,20 +598,11 @@ static __fi void _vuADDi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (!CHECK_VUADDSUBHACK) { - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); - } - else { - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.x, VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.y, VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.z, VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.w, VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); - } + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuADDq(VURegs* VU) @@ -552,153 +613,133 @@ static __fi void _vuADDq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDx(VURegs* VU) { - float ftx; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ftx); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ftx); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftx); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftx); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDy(VURegs* VU) { - float fty; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + fty);} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + fty);} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + fty);} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + fty);} else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDz(VURegs* VU) { - float ftz; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ftz); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ftz); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftz); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftz); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDw(VURegs* VU) { - float ftw; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ftw); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ftw); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftw); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftw); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAi(VURegs* VU) { - float ti = vuDouble(VU->VI[REG_I].UL); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ti); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ti); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ti); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ti); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAq(VURegs* VU) { - float tf = vuDouble(VU->VI[REG_Q].UL); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tf); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tf); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tf); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tf); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAx(VURegs* VU) { - float tx = vuDouble(VU->VF[_Ft_].i.x); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tx); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tx); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tx); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tx); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAy(VURegs* VU) { - float ty = vuDouble(VU->VF[_Ft_].i.y); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ty); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ty); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ty); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ty); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAz(VURegs* VU) { - float tz = vuDouble(VU->VF[_Ft_].i.z); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tz); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tz); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tz); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tz); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAw(VURegs* VU) { - float tw = vuDouble(VU->VF[_Ft_].i.w); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tw); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tw); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tw); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tw); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -711,11 +752,11 @@ static __fi void _vuSUB(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 1)); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuSUBi(VURegs* VU) @@ -726,10 +767,10 @@ static __fi void _vuSUBi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 1));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 1));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 1));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -741,147 +782,131 @@ static __fi void _vuSUBq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 1));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 1));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 1));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 1));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuSUBx(VURegs* VU) { - float ftx; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ftx); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ftx); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ftx); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ftx); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBy(VURegs* VU) { - float fty; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - fty); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - fty); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - fty); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - fty); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBz(VURegs* VU) { - float ftz; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ftz); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ftz); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ftz); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ftz); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBw(VURegs* VU) { - float ftw; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ftw); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ftw); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ftw); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ftw); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAi(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAq(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAx(VURegs* VU) { - float tx = vuDouble(VU->VF[_Ft_].i.x); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - tx); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - tx); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - tx); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - tx); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAy(VURegs* VU) { - float ty = vuDouble(VU->VF[_Ft_].i.y); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ty); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ty); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ty); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ty); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAz(VURegs* VU) { - float tz = vuDouble(VU->VF[_Ft_].i.z); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - tz); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - tz); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - tz); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - tz); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAw(VURegs* VU) { - float tw = vuDouble(VU->VF[_Ft_].i.w); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - tw); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - tw); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - tw); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - tw); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -893,10 +918,10 @@ static __fi void _vuMUL(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -908,10 +933,10 @@ static __fi void _vuMULi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -923,498 +948,540 @@ static __fi void _vuMULq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULx(VURegs* VU) { - float ftx; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * ftx); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * ftx); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * ftx); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * ftx); } else VU_MACw_CLEAR(VU); + uint32_t ftx = VU->VF[_Ft_].i.x; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftx, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftx, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftx, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftx, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULy(VURegs* VU) { - float fty; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * fty); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * fty); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * fty); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * fty); } else VU_MACw_CLEAR(VU); + uint32_t fty = VU->VF[_Ft_].i.y; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, fty, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, fty, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, fty, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, fty, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULz(VURegs* VU) { - float ftz; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * ftz); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * ftz); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * ftz); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * ftz); } else VU_MACw_CLEAR(VU); + uint32_t ftz = VU->VF[_Ft_].i.z; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftz, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftz, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftz, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftz, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULw(VURegs* VU) { - float ftw; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * ftw); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * ftw); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * ftw); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * ftw); } else VU_MACw_CLEAR(VU); + uint32_t ftw = VU->VF[_Ft_].i.w; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftw, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftw, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftw, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftw, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAi(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAq(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAx(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAy(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAz(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAw(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADD(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDi(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDq(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDx(VURegs* VU) { - float ftx; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * ftx)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * ftx)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * ftx)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * ftx)); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + uint32_t ftx = VU->VF[_Ft_].i.x; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftx, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftx, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftx, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftx, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDy(VURegs* VU) { - float fty; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * fty)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * fty)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * fty)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * fty)); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + uint32_t fty = VU->VF[_Ft_].i.y; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, fty, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, fty, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, fty, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, fty, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDz(VURegs* VU) { - float ftz; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * ftz)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * ftz)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * ftz)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * ftz)); else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); + tmp = &VU->TMP; + uint32_t ftz = VU->VF[_Ft_].i.z; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftz, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftz, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftz, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftz, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuMADDw(VURegs* VU) { - float ftw; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * ftw)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * ftw)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * ftw)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * ftw)); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + uint32_t ftw = VU->VF[_Ft_].i.w; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftw, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftw, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftw, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftw, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDA(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDA(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDAi(VURegs* VU) { - float ti = vuDouble(VU->VI[REG_I].UL); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * ti)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * ti)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * ti)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * ti)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDAq(VURegs* VU) { - float tq = vuDouble(VU->VI[REG_Q].UL); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * tq)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * tq)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * tq)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * tq)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAx(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAx(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAy(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAy(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAz(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAz(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAw(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAw(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMSUB(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } + static __fi void _vuMSUBi(VURegs* VU) { - float ti = vuDouble(VU->VI[REG_I].UL); + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ti ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ti ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ti ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ti ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMSUBq(VURegs* VU) { - float tq = vuDouble(VU->VI[REG_Q].UL); + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tq ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tq ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tq ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tq ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - static __fi void _vuMSUBx(VURegs* VU) { - float ftx; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ftx ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ftx ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ftx ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ftx ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + uint32_t ftx = VU->VF[_Ft_].i.x; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftx, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftx, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftx, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftx, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - static __fi void _vuMSUBy(VURegs* VU) { - float fty; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * fty ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * fty ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * fty ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * fty ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + uint32_t fty = VU->VF[_Ft_].i.y; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, fty, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, fty, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, fty, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, fty, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - static __fi void _vuMSUBz(VURegs* VU) { - float ftz; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ftz ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ftz ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ftz ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ftz ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + uint32_t ftz = VU->VF[_Ft_].i.z; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftz, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftz, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftz, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftz, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMSUBw(VURegs* VU) { - float ftw; - VECTOR * dst; - if (_Fd_ == 0) dst = &RDzero; - else dst = &VU->VF[_Fd_]; + VECTOR* tmp; + VECTOR* dst; + if (_Fd_ == 0) + dst = &RDzero; + else + dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ftw ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ftw ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ftw ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ftw ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + uint32_t ftw = VU->VF[_Ft_].i.w; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftw, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftw, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftw, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftw, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - -static __fi void _vuMSUBA(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); -} - -static __fi void _vuMSUBAi(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL))); else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); -} - -static __fi void _vuMSUBAq(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); -} - -static __fi void _vuMSUBAx(VURegs* VU) +static __fi void _vuMSUBA(VURegs* VU) { - float tx = vuDouble(VU->VF[_Ft_].i.x); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tx)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tx)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tx)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tx)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAy(VURegs* VU) +static __fi void _vuMSUBAi(VURegs* VU) { - float ty = vuDouble(VU->VF[_Ft_].i.y); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ty)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ty)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ty)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ty)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAz(VURegs* VU) +static __fi void _vuMSUBAq(VURegs* VU) { - float tz = vuDouble(VU->VF[_Ft_].i.z); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tz)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tz)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tz)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tz)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAw(VURegs* VU) -{ - float tw = vuDouble(VU->VF[_Ft_].i.w); - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tw)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tw)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tw)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tw)); else VU_MACw_CLEAR(VU); +static __fi void _vuMSUBAx(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + uint32_t tx = VU->VF[_Ft_].i.x; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, tx, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, tx, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, tx, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, tx, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +static __fi void _vuMSUBAy(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + uint32_t ty = VU->VF[_Ft_].i.y; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ty, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ty, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ty, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ty, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +static __fi void _vuMSUBAz(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + uint32_t tz = VU->VF[_Ft_].i.z; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, tz, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, tz, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, tz, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, tz, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +static __fi void _vuMSUBAw(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + uint32_t tw = VU->VF[_Ft_].i.w; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, tw, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, tw, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, tw, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, tw, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1575,32 +1642,28 @@ static __fi void _vuMINIw(VURegs* VU) static __fi void _vuOPMULA(VURegs* VU) { - VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z)); - VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x)); - VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y)); + VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0)); + VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0)); + VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0)); VU_STAT_UPDATE(VU); } static __fi void _vuOPMSUB(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; - float ftx, fty, ftz; - float fsx, fsy, fsz; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx = vuDouble(VU->VF[_Ft_].i.x); - fty = vuDouble(VU->VF[_Ft_].i.y); - ftz = vuDouble(VU->VF[_Ft_].i.z); - fsx = vuDouble(VU->VF[_Fs_].i.x); - fsy = vuDouble(VU->VF[_Fs_].i.y); - fsz = vuDouble(VU->VF[_Fs_].i.z); - - dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - fsy * ftz); - dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - fsz * ftx); - dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - fsx * fty); + tmp = &VU->TMP; + tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0); + tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0); + tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0); + dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1)); + dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1)); + dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1)); VU_STAT_UPDATE(VU); } @@ -1617,13 +1680,40 @@ static __fi s32 float_to_int(float value) return value; } +static __fi s32 double_to_int(double value) +{ + if (value >= 2147483647.0) + return 2147483647LL; + if (value <= -2147483648.0) + return -2147483648LL; + return value; +} + static __fi void _vuFTOI0(VURegs* VU) { if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].SL[0] = float_to_int(vuDouble(VU->VF[_Fs_].i.x)); - if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(vuDouble(VU->VF[_Fs_].i.y)); - if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(vuDouble(VU->VF[_Fs_].i.z)); - if (_W) VU->VF[_Ft_].SL[3] = float_to_int(vuDouble(VU->VF[_Fs_].i.w)); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) + VU->VF[_Ft_].SL[0] = double_to_int(Ps2Float(VU->VF[_Fs_].i.x).ToDouble()); + if (_Y) + VU->VF[_Ft_].SL[1] = double_to_int(Ps2Float(VU->VF[_Fs_].i.y).ToDouble()); + if (_Z) + VU->VF[_Ft_].SL[2] = double_to_int(Ps2Float(VU->VF[_Fs_].i.z).ToDouble()); + if (_W) + VU->VF[_Ft_].SL[3] = double_to_int(Ps2Float(VU->VF[_Fs_].i.w).ToDouble()); + } + else + { + if (_X) + VU->VF[_Ft_].SL[0] = float_to_int(vuDouble(VU->VF[_Fs_].i.x)); + if (_Y) + VU->VF[_Ft_].SL[1] = float_to_int(vuDouble(VU->VF[_Fs_].i.y)); + if (_Z) + VU->VF[_Ft_].SL[2] = float_to_int(vuDouble(VU->VF[_Fs_].i.z)); + if (_W) + VU->VF[_Ft_].SL[3] = float_to_int(vuDouble(VU->VF[_Fs_].i.w)); + } } static __fi void _vuFTOI4(VURegs* VU) { @@ -1703,15 +1793,43 @@ static __fi void _vuITOF15(VURegs* VU) static __fi void _vuCLIP(VURegs* VU) { - float value = fabs(vuDouble(VU->VF[_Ft_].i.w)); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + double value = Ps2Float(Ps2Float(VU->VF[_Ft_].i.w).Abs()).ToDouble(); + + VU->clipflag <<= 6; + if (Ps2Float(VU->VF[_Fs_].i.x).ToDouble() > +value) + VU->clipflag |= 0x01; + if (Ps2Float(VU->VF[_Fs_].i.x).ToDouble() < -value) + VU->clipflag |= 0x02; + if (Ps2Float(VU->VF[_Fs_].i.y).ToDouble() > +value) + VU->clipflag |= 0x04; + if (Ps2Float(VU->VF[_Fs_].i.y).ToDouble() < -value) + VU->clipflag |= 0x08; + if (Ps2Float(VU->VF[_Fs_].i.z).ToDouble() > +value) + VU->clipflag |= 0x10; + if (Ps2Float(VU->VF[_Fs_].i.z).ToDouble() < -value) + VU->clipflag |= 0x20; + } + else + { + float value = fabs(vuDouble(VU->VF[_Ft_].i.w)); + + VU->clipflag <<= 6; + if (vuDouble(VU->VF[_Fs_].i.x) > +value) + VU->clipflag |= 0x01; + if (vuDouble(VU->VF[_Fs_].i.x) < -value) + VU->clipflag |= 0x02; + if (vuDouble(VU->VF[_Fs_].i.y) > +value) + VU->clipflag |= 0x04; + if (vuDouble(VU->VF[_Fs_].i.y) < -value) + VU->clipflag |= 0x08; + if (vuDouble(VU->VF[_Fs_].i.z) > +value) + VU->clipflag |= 0x10; + if (vuDouble(VU->VF[_Fs_].i.z) < -value) + VU->clipflag |= 0x20; + } - VU->clipflag <<= 6; - if ( vuDouble(VU->VF[_Fs_].i.x) > +value ) VU->clipflag|= 0x01; - if ( vuDouble(VU->VF[_Fs_].i.x) < -value ) VU->clipflag|= 0x02; - if ( vuDouble(VU->VF[_Fs_].i.y) > +value ) VU->clipflag|= 0x04; - if ( vuDouble(VU->VF[_Fs_].i.y) < -value ) VU->clipflag|= 0x08; - if ( vuDouble(VU->VF[_Fs_].i.z) > +value ) VU->clipflag|= 0x10; - if ( vuDouble(VU->VF[_Fs_].i.z) < -value ) VU->clipflag|= 0x20; VU->clipflag = VU->clipflag & 0xFFFFFF; } @@ -1721,28 +1839,56 @@ static __fi void _vuCLIP(VURegs* VU) static __fi void _vuDIV(VURegs* VU) { - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); - float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - VU->statusflag &= ~0x30; - - if (ft == 0.0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) { - if (fs == 0.0) - VU->statusflag |= 0x10; - else - VU->statusflag |= 0x20; + Ps2Float ft = Ps2Float(VU->VF[_Ft_].UL[_Ftf_]); + Ps2Float fs = Ps2Float(VU->VF[_Fs_].UL[_Fsf_]); - if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ - (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = 0xFF7FFFFF; + VU->statusflag &= ~0x30; + + if (ft.IsZero()) + { + if (fs.IsZero()) + VU->statusflag |= 0x10; + else + VU->statusflag |= 0x20; + + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = Ps2Float::MIN_FLOATING_POINT_VALUE; + else + VU->q.UL = Ps2Float::MAX_FLOATING_POINT_VALUE; + } else - VU->q.UL = 0x7F7FFFFF; + { + VU->q.UL = fs.Div(ft).AsUInt32(); + } } else { - VU->q.F = fs / ft; - VU->q.F = vuDouble(VU->q.UL); + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + + VU->statusflag &= ~0x30; + + if (ft == 0.0) + { + if (fs == 0.0) + VU->statusflag |= 0x10; + else + VU->statusflag |= 0x20; + + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0xFF7FFFFF; + else + VU->q.UL = 0x7F7FFFFF; + } + else + { + VU->q.F = fs / ft; + VU->q.F = vuDouble(VU->q.UL); + } } } @@ -2442,56 +2588,118 @@ static __ri void _vuWAITP(VURegs* VU) static __ri void _vuESADD(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + uint32_t x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); + uint32_t y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); + uint32_t z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); - VU->p.F = p; + VU->p.UL = vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0); } static __ri void _vuERSADD(VURegs* VU) { - float p = (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x)) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y)) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z)); + uint32_t x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); + uint32_t y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); + uint32_t z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); - if (p != 0.0) - p = 1.0f / p; + Ps2Float p = Ps2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); - VU->p.F = p; + if (!p.IsZero()) + { + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + p = Ps2Float::One().Div(p); + else + { + VU->p.F = 1.0f / vuDouble(p.AsUInt32()); + return; + } + } + + VU->p.UL = p.AsUInt32(); } static __ri void _vuELENG(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + uint32_t x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); + uint32_t y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); + uint32_t z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); - if (p >= 0) + Ps2Float value = Ps2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); + + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); + if (value.ToDouble() >= 0) + { + value = value.Sqrt(); + } + VU->p.UL = value.AsUInt32(); + } + else + { + float p = vuDouble(value.AsUInt32()); + + if (p >= 0) + { + p = sqrt(p); + } + VU->p.F = p; } - VU->p.F = p; } static __ri void _vuERLENG(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + uint32_t x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); + uint32_t y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); + uint32_t z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); - if (p >= 0) + Ps2Float value = Ps2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); + + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); - if (p != 0) + if (value.ToDouble() >= 0) { - p = 1.0f / p; + value = value.Sqrt(); + if (!value.IsZero()) + { + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + value = Ps2Float::One().Div(value); + } + else + { + VU->p.F = 1.0 / vuDouble(value.AsUInt32()); + return; + } + } } + VU->p.UL = value.AsUInt32(); + } + else + { + float p = vuDouble(value.AsUInt32()); + + if (p >= 0) + { + p = sqrt(p); + if (p != 0) + { + p = 1.0f / p; + } + } + VU->p.F = p; } - VU->p.F = p; } -static __ri float _vuCalculateEATAN(float inputvalue) { +static __ri float _vuCalculateEATAN(uint32_t inputvalue) { + + float fvalue = vuDouble(inputvalue); + float eatanconst[9] = { 0.999999344348907f, -0.333298563957214f, 0.199465364217758f, -0.13085337519646f, 0.096420042216778f, -0.055909886956215f, 0.021861229091883f, -0.004054057877511f, 0.785398185253143f }; - float result = (eatanconst[0] * inputvalue) + (eatanconst[1] * pow(inputvalue, 3)) + (eatanconst[2] * pow(inputvalue, 5)) - + (eatanconst[3] * pow(inputvalue, 7)) + (eatanconst[4] * pow(inputvalue, 9)) + (eatanconst[5] * pow(inputvalue, 11)) - + (eatanconst[6] * pow(inputvalue, 13)) + (eatanconst[7] * pow(inputvalue, 15)); + float result = (eatanconst[0] * fvalue) + (eatanconst[1] * pow(fvalue, 3)) + (eatanconst[2] * pow(fvalue, 5)) + (eatanconst[3] * pow(fvalue, 7)) + + (eatanconst[4] * pow(fvalue, 9)) + (eatanconst[5] * pow(fvalue, 11)) + (eatanconst[6] * pow(fvalue, 13)) + (eatanconst[7] * pow(fvalue, 15)); result += eatanconst[8]; @@ -2502,16 +2710,16 @@ static __ri float _vuCalculateEATAN(float inputvalue) { static __ri void _vuEATAN(VURegs* VU) { - float p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].UL[_Fsf_])); + float p = _vuCalculateEATAN(VU->VF[_Fs_].UL[_Fsf_]); VU->p.F = p; } static __ri void _vuEATANxy(VURegs* VU) { float p = 0; - if (vuDouble(VU->VF[_Fs_].i.x) != 0) + if (!Ps2Float(VU->VF[_Fs_].i.x).IsZero()) { - p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.y) / vuDouble(VU->VF[_Fs_].i.x)); + p = _vuCalculateEATAN(vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.x, 1)); } VU->p.F = p; } @@ -2519,57 +2727,104 @@ static __ri void _vuEATANxy(VURegs* VU) static __ri void _vuEATANxz(VURegs* VU) { float p = 0; - if (vuDouble(VU->VF[_Fs_].i.x) != 0) + if (!Ps2Float(VU->VF[_Fs_].i.x).IsZero()) { - p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.z) / vuDouble(VU->VF[_Fs_].i.x)); + p = _vuCalculateEATAN(vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.x, 1)); } VU->p.F = p; } static __ri void _vuESUM(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Fs_].i.w); - VU->p.F = p; + VU->p.UL = vuAccurateAddSub(VU, vuAccurateAddSub(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.y, 0), VU->VF[_Fs_].i.z, 0), VU->VF[_Fs_].i.w, 0); } static __ri void _vuERCPR(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + Ps2Float p = Ps2Float(VU->VF[_Fs_].UL[_Fsf_]); - if (p != 0) + if (!p.IsZero()) { - p = 1.0 / p; + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + p = Ps2Float::One().Div(p); + } + else + { + VU->p.F = 1.0 / vuDouble(p.AsUInt32()); + return; + } } - VU->p.F = p; + VU->p.UL = p.AsUInt32(); } static __ri void _vuESQRT(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - if (p >= 0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); - } + Ps2Float value = Ps2Float(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + if (value.ToDouble() >= 0) + { + value = value.Sqrt(); + } + + VU->p.UL = value.AsUInt32(); + } + else + { + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + + if (p >= 0) + { + p = sqrt(p); + } + + VU->p.F = p; + } } static __ri void _vuERSQRT(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - if (p >= 0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); - if (p) - { - p = 1.0f / p; - } - } + Ps2Float value = Ps2Float(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + if (value.ToDouble() >= 0) + { + value = value.Sqrt(); + if (!value.IsZero()) + { + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + VU->p.F = 1.0f / vuDouble(value.AsUInt32()); + return; + } + else + { + value = Ps2Float::One().Div(value); + } + } + } + + VU->p.UL = value.AsUInt32(); + } + else + { + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + + if (p >= 0) + { + p = sqrt(p); + if (p) + { + p = 1.0f / p; + } + } + + VU->p.F = p; + } } static __ri void _vuESIN(VURegs* VU) diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index c7f68793cf..53b9a41c37 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -281,6 +281,7 @@ + @@ -726,6 +727,7 @@ + @@ -1025,4 +1027,4 @@ - + \ No newline at end of file diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index 51782a5fcf..c7f2c7dd50 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -289,6 +289,9 @@ {cd8ec519-2196-43f7-86de-7faced2d4296} + + {9a40984b-cb23-4a54-a5e9-9c54f3c16c5b} + @@ -1443,6 +1446,9 @@ System\Ps2\Iop\SIO\PAD + + System\Ps2\EmotionEngine\Shared + @@ -2399,6 +2405,9 @@ System\Ps2\Iop\SIO\PAD + + System\Ps2\EmotionEngine\Shared + @@ -2428,4 +2437,4 @@ System\Ps2\GS - + \ No newline at end of file From b7f38061df28c3c620649de2056ecf9c615ed778 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Wed, 13 Nov 2024 16:09:02 +0100 Subject: [PATCH 02/15] [Soft-Float] - Fixes Tony Hawk Pro Skater 4 Mul issue. The game sends some super low floats to the Mul unit. On PS2, floats with exponent zero should return zero, but this is not the case in Mul, the multiplier can work with denormals internally. I love when undocumented stuff is used by some games for their 3D engine ^^. --- pcsx2/Ps2Float.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/Ps2Float.cpp b/pcsx2/Ps2Float.cpp index 8156143272..1d4e336a49 100644 --- a/pcsx2/Ps2Float.cpp +++ b/pcsx2/Ps2Float.cpp @@ -399,7 +399,7 @@ Ps2Float Ps2Float::DoMul(Ps2Float other) if (resExponent > 255) return result.Sign ? Min() : Max(); - else if (resExponent <= 0) + else if (resExponent < 0) return Ps2Float(result.Sign, 0, 0); uint32_t testImprecision = otherMantissa ^ ((otherMantissa >> 4) & 0x800); // For some reason, 0x808000 loses a bit and 0x800800 loses a bit, but 0x808800 does not From b09bfb0c0234ea023abd51b60aea9b632f536695 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Wed, 13 Nov 2024 18:19:37 +0100 Subject: [PATCH 03/15] [FPU] - Uses Soft-Float comparison. More accurate approach to compare. --- pcsx2/FPU.cpp | 63 +++++++++++++++++++++++++++++++++------------------ 1 file changed, 41 insertions(+), 22 deletions(-) diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index f2136aff20..5eb286d4fb 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -181,25 +181,6 @@ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsT _ContVal_ &= ~( cFlags ) ; \ } -#ifdef comparePrecision -// This compare discards the least-significant bit(s) in order to solve some rounding issues. - #define C_cond_S(cond) { \ - FPRreg tempA, tempB; \ - tempA.UL = _FsValUl_ & comparePrecision; \ - tempB.UL = _FtValUl_ & comparePrecision; \ - _ContVal_ = ( ( tempA.f ) cond ( tempB.f ) ) ? \ - ( _ContVal_ | FPUflagC ) : \ - ( _ContVal_ & ~FPUflagC ); \ - } -#else -// Used for Comparing; This compares if the floats are exactly the same. - #define C_cond_S(cond) { \ - _ContVal_ = ( fpuDouble(_FsValUl_) cond fpuDouble(_FtValUl_) ) ? \ - ( _ContVal_ | FPUflagC ) : \ - ( _ContVal_ & ~FPUflagC ); \ - } -#endif - // Conditional Branch #define BC1(cond) \ if ( ( _ContVal_ & FPUflagC ) cond 0 ) { \ @@ -279,6 +260,44 @@ static __fi s32 double_to_int(double value) return value; } +static __fi void C_cond_S(uint8_t mode) +{ + switch (mode) + { + case 0: // == + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + _ContVal_ = (Ps2Float(_FsValUl_).CompareTo(Ps2Float(_FtValUl_)) == 0) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + else + { + _ContVal_ = (fpuDouble(_FsValUl_) == fpuDouble(_FtValUl_)) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + break; + case 1: // <= + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + int32_t cmpResult = Ps2Float(_FsValUl_).CompareTo(Ps2Float(_FtValUl_)); + _ContVal_ = (cmpResult == 0 || cmpResult == -1) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + else + { + _ContVal_ = (fpuDouble(_FsValUl_) <= fpuDouble(_FtValUl_)) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + break; + case 2: // < + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + _ContVal_ = (Ps2Float(_FsValUl_).CompareTo(Ps2Float(_FtValUl_)) == -1) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + else + { + _ContVal_ = (fpuDouble(_FsValUl_) < fpuDouble(_FtValUl_)) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + break; + } +} + void ABS_S() { _FdValUl_ = _FsValUl_ & 0x7fffffff; clearFPUFlags( FPUflagO | FPUflagU ); @@ -313,7 +332,7 @@ void BC1TL() { } void C_EQ() { - C_cond_S(==); + C_cond_S(0); } void C_F() { @@ -321,11 +340,11 @@ void C_F() { } void C_LE() { - C_cond_S(<=); + C_cond_S(1); } void C_LT() { - C_cond_S(<); + C_cond_S(2); } void CFC1() { From 3bd88f7e8e822a3f67cc3414a3a2f2c676273d93 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Wed, 13 Nov 2024 21:15:21 +0100 Subject: [PATCH 04/15] [VUops] - Implements accurate SQRT/RSQRT + removal of TriAce hack. Implements accurate SQRT options, also removes Tri-Ace hack, which isn't needed anymore on the interpreter. --- pcsx2/VUops.cpp | 159 +++++++++++++++++++++++++++++------------------- 1 file changed, 95 insertions(+), 64 deletions(-) diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 05dedd522f..38728078d6 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -496,38 +496,6 @@ static __fi uint32_t vuAccurateMulDiv(VURegs* VU, u32 a, u32 b, bool isdiv) return std::bit_cast(vuDouble(a) * vuDouble(b)); } -static __fi float vuADD_TriAceHack(u32 a, u32 b) -{ - // On VU0 TriAce Games use ADDi and expects these bit-perfect results: - //if (a == 0xb3e2a619 && b == 0x42546666) return vuDouble(0x42546666); - //if (a == 0x8b5b19e9 && b == 0xc7f079b3) return vuDouble(0xc7f079b3); - //if (a == 0x4b1ed4a8 && b == 0x43a02666) return vuDouble(0x4b1ed5e7); - //if (a == 0x7d1ca47b && b == 0x42f23333) return vuDouble(0x7d1ca47b); - - // In the 3rd case, some other rounding error is giving us incorrect - // operands ('a' is wrong); and therefor an incorrect result. - // We're getting: 0x4b1ed4a8 + 0x43a02666 = 0x4b1ed5e8 - // We should be getting: 0x4b1ed4a7 + 0x43a02666 = 0x4b1ed5e7 - // microVU gets the correct operands and result. The interps likely - // don't get it due to rounding towards nearest in other calculations. - - // microVU uses something like this to get TriAce games working, - // but VU interpreters don't seem to need it currently: - - // Update Sept 2021, now the interpreters don't suck, they do - Refraction - // s32 aExp = (a >> 23) & 0xff; - // s32 bExp = (b >> 23) & 0xff; - // if (aExp - bExp >= 25) b &= 0x80000000; - // if (aExp - bExp <=-25) a &= 0x80000000; - // float ret = vuDouble(a) + vuDouble(b); - //DevCon.WriteLn("aExp = %d, bExp = %d", aExp, bExp); - //DevCon.WriteLn("0x%08x + 0x%08x = 0x%08x", a, b, (u32&)ret); - //DevCon.WriteLn("%f + %f = %f", vuDouble(a), vuDouble(b), ret); - - // Update November 2024, now the interpreters has soft float support - GithubProUser67 - return vuDouble(Ps2Float(a).Add(Ps2Float(b), 0).AsUInt32()); -} - void _vuABS(VURegs* VU) { if (_Ft_ == 0) @@ -1894,57 +1862,120 @@ static __fi void _vuDIV(VURegs* VU) static __fi void _vuSQRT(VURegs* VU) { - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + Ps2Float ft = Ps2Float(VU->VF[_Ft_].UL[_Ftf_]); - VU->statusflag &= ~0x30; + VU->statusflag &= ~0x30; - if (ft < 0.0) - VU->statusflag |= 0x10; - VU->q.F = sqrt(fabs(ft)); - VU->q.F = vuDouble(VU->q.UL); + if (ft.ToDouble() < 0.0) + VU->statusflag |= 0x10; + VU->q.UL = Ps2Float(ft.Abs()).Sqrt().AsUInt32(); + } + else + { + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + + VU->statusflag &= ~0x30; + + if (ft < 0.0) + VU->statusflag |= 0x10; + VU->q.F = sqrt(fabs(ft)); + VU->q.F = vuDouble(VU->q.UL); + } } static __fi void _vuRSQRT(VURegs* VU) { - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); - float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - float temp; - - VU->statusflag &= ~0x30; - - if (ft == 0.0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - VU->statusflag |= 0x20; + Ps2Float ft = Ps2Float(VU->VF[_Ft_].UL[_Ftf_]); + Ps2Float fs = Ps2Float(VU->VF[_Fs_].UL[_Fsf_]); - if (fs != 0) + VU->statusflag &= ~0x30; + + if (ft.IsZero()) { - if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ - (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = 0xFF7FFFFF; + VU->statusflag |= 0x20; + + if (!fs.IsZero()) + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = Ps2Float::MIN_FLOATING_POINT_VALUE; + else + VU->q.UL = Ps2Float::MAX_FLOATING_POINT_VALUE; + } else - VU->q.UL = 0x7F7FFFFF; + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0x80000000; + else + VU->q.UL = 0; + + VU->statusflag |= 0x10; + } } else { - if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ - (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = 0x80000000; - else - VU->q.UL = 0; + if (ft.ToDouble() < 0.0) + { + VU->statusflag |= 0x10; + } - VU->statusflag |= 0x10; + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + VU->q.UL = fs.Div(Ps2Float(ft.Abs()).Sqrt()).AsUInt32(); + else + { + float temp = sqrt(fabs(vuDouble(ft.AsUInt32()))); + VU->q.F = vuDouble(fs.AsUInt32()) / temp; + VU->q.F = vuDouble(VU->q.UL); + } } } else { - if (ft < 0.0) - { - VU->statusflag |= 0x10; - } + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + float temp; - temp = sqrt(fabs(ft)); - VU->q.F = fs / temp; - VU->q.F = vuDouble(VU->q.UL); + VU->statusflag &= ~0x30; + + if (ft == 0.0) + { + VU->statusflag |= 0x20; + + if (fs != 0) + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0xFF7FFFFF; + else + VU->q.UL = 0x7F7FFFFF; + } + else + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0x80000000; + else + VU->q.UL = 0; + + VU->statusflag |= 0x10; + } + } + else + { + if (ft < 0.0) + { + VU->statusflag |= 0x10; + } + + temp = sqrt(fabs(ft)); + VU->q.F = fs / temp; + VU->q.F = vuDouble(VU->q.UL); + } } } From 3fe4277555ff653e74022ab0e40ccc88e4bffbf3 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Sat, 16 Nov 2024 15:45:18 +0100 Subject: [PATCH 05/15] [Soft-Float] - Fixes Operand checking for denormals Add/Sub operations. --- pcsx2/Ps2Float.cpp | 25 ++++++++++++++++--------- pcsx2/Ps2Float.h | 2 ++ 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/pcsx2/Ps2Float.cpp b/pcsx2/Ps2Float.cpp index 1d4e336a49..020bc3fc80 100644 --- a/pcsx2/Ps2Float.cpp +++ b/pcsx2/Ps2Float.cpp @@ -303,6 +303,19 @@ int32_t Ps2Float::CompareTo(Ps2Float other) return 1; } +int32_t Ps2Float::CompareOperand(Ps2Float other) +{ + int32_t selfTwoComplementVal = (int32_t)(Abs()); + int32_t otherTwoComplementVal = (int32_t)(other.Abs()); + + if (selfTwoComplementVal < otherTwoComplementVal) + return -1; + else if (selfTwoComplementVal == otherTwoComplementVal) + return 0; + else + return 1; +} + double Ps2Float::ToDouble() { return std::bit_cast(((u64)Sign << 63) | ((((u64)Exponent - BIAS) + 1023ULL) << 52) | ((u64)Mantissa << 29)); @@ -791,10 +804,8 @@ bool Ps2Float::DetermineAdditionOperationSign(Ps2Float a, Ps2Float b) else Console.Error("Unhandled addition operation flags"); } - else if (a.IsZero()) - return b.Sign; - - return a.Sign; + + return a.CompareOperand(b) >= 0 ? a.Sign : b.Sign; } bool Ps2Float::DetermineSubtractionOperationSign(Ps2Float a, Ps2Float b) @@ -808,12 +819,8 @@ bool Ps2Float::DetermineSubtractionOperationSign(Ps2Float a, Ps2Float b) else Console.Error("Unhandled subtraction operation flags"); } - else if (a.IsZero()) - return !b.Sign; - else if (b.IsZero()) - return a.Sign; - return a.CompareTo(b) >= 0 ? a.Sign : !b.Sign; + return a.CompareOperand(b) >= 0 ? a.Sign : !b.Sign; } int32_t Ps2Float::clz(int x) diff --git a/pcsx2/Ps2Float.h b/pcsx2/Ps2Float.h index 2453ee1473..9af889aa6b 100644 --- a/pcsx2/Ps2Float.h +++ b/pcsx2/Ps2Float.h @@ -66,6 +66,8 @@ public: int32_t CompareTo(Ps2Float other); + int32_t CompareOperand(Ps2Float other); + double ToDouble(); std::string ToString(); From d5e50284ff8a4873cbd457098e3401fc027a16db Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Sat, 16 Nov 2024 19:02:09 +0100 Subject: [PATCH 06/15] [Soft-Float] - Removes "special" COP1 mode. It isn't accurate at all. --- pcsx2/FPU.cpp | 4 ++-- pcsx2/Ps2Float.cpp | 18 +++++++++--------- pcsx2/Ps2Float.h | 6 +++--- pcsx2/VUops.cpp | 4 ++-- 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index 5eb286d4fb..5eefc00c66 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -224,9 +224,9 @@ static __fi uint32_t fpuAccurateAddSub(u32 a, u32 b, bool issub) if (CHECK_FPU_SOFT_ADDSUB) { if (issub) - return Ps2Float(a).Sub(Ps2Float(b), 1).AsUInt32(); + return Ps2Float(a).Sub(Ps2Float(b)).AsUInt32(); else - return Ps2Float(a).Add(Ps2Float(b), 1).AsUInt32(); + return Ps2Float(a).Add(Ps2Float(b)).AsUInt32(); } if (issub) diff --git a/pcsx2/Ps2Float.cpp b/pcsx2/Ps2Float.cpp index 020bc3fc80..d1bb04bf98 100644 --- a/pcsx2/Ps2Float.cpp +++ b/pcsx2/Ps2Float.cpp @@ -64,13 +64,13 @@ uint32_t Ps2Float::AsUInt32() const return result; } -Ps2Float Ps2Float::Add(Ps2Float addend, bool COP1) +Ps2Float Ps2Float::Add(Ps2Float addend) { if (IsDenormalized() || addend.IsDenormalized()) return SolveAddSubDenormalizedOperation(*this, addend, true); if (IsAbnormal() && addend.IsAbnormal()) - return SolveAbnormalAdditionOrSubtractionOperation(*this, addend, true, COP1); + return SolveAbnormalAdditionOrSubtractionOperation(*this, addend, true); uint32_t a = AsUInt32(); uint32_t b = addend.AsUInt32(); @@ -111,13 +111,13 @@ Ps2Float Ps2Float::Add(Ps2Float addend, bool COP1) return Ps2Float(a).DoAdd(Ps2Float(b)); } -Ps2Float Ps2Float::Sub(Ps2Float subtrahend, bool COP1) +Ps2Float Ps2Float::Sub(Ps2Float subtrahend) { if (IsDenormalized() || subtrahend.IsDenormalized()) return SolveAddSubDenormalizedOperation(*this, subtrahend, false); if (IsAbnormal() && subtrahend.IsAbnormal()) - return SolveAbnormalAdditionOrSubtractionOperation(*this, subtrahend, false, COP1); + return SolveAbnormalAdditionOrSubtractionOperation(*this, subtrahend, false); uint32_t a = AsUInt32(); uint32_t b = subtrahend.AsUInt32(); @@ -581,7 +581,7 @@ Ps2Float Ps2Float::DoDiv(Ps2Float other) return result.RoundTowardsZero(); } -Ps2Float Ps2Float::SolveAbnormalAdditionOrSubtractionOperation(Ps2Float a, Ps2Float b, bool add, bool COP1) +Ps2Float Ps2Float::SolveAbnormalAdditionOrSubtractionOperation(Ps2Float a, Ps2Float b, bool add) { uint32_t aval = a.AsUInt32(); uint32_t bval = b.AsUInt32(); @@ -593,19 +593,19 @@ Ps2Float Ps2Float::SolveAbnormalAdditionOrSubtractionOperation(Ps2Float a, Ps2Fl return add ? Min() : Ps2Float(0); if (aval == MIN_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return COP1 ? Min() : (add ? Ps2Float(0) : Min()); + return add ? Ps2Float(0) : Min(); if (aval == MAX_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return COP1 ? Max() : (add ? Ps2Float(0) : Max()); + return add ? Ps2Float(0) : Max(); if (aval == POSITIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) return add ? Max() : Ps2Float(0); if (aval == NEGATIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) - return COP1 ? Min() : (add ? Ps2Float(0) : Min()); + return add ? Ps2Float(0) : Min(); if (aval == POSITIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return COP1 ? Max() : (add ? Ps2Float(0) : Max()); + return add ? Ps2Float(0) : Max(); if (aval == NEGATIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) return add ? Min() : Ps2Float(0); diff --git a/pcsx2/Ps2Float.h b/pcsx2/Ps2Float.h index 9af889aa6b..ceffb01af7 100644 --- a/pcsx2/Ps2Float.h +++ b/pcsx2/Ps2Float.h @@ -42,9 +42,9 @@ public: uint32_t AsUInt32() const; - Ps2Float Add(Ps2Float addend, bool COP1); + Ps2Float Add(Ps2Float addend); - Ps2Float Sub(Ps2Float subtrahend, bool COP1); + Ps2Float Sub(Ps2Float subtrahend); Ps2Float Mul(Ps2Float mulend); @@ -82,7 +82,7 @@ private: Ps2Float DoDiv(Ps2Float other); - static Ps2Float SolveAbnormalAdditionOrSubtractionOperation(Ps2Float a, Ps2Float b, bool add, bool COP1); + static Ps2Float SolveAbnormalAdditionOrSubtractionOperation(Ps2Float a, Ps2Float b, bool add); static Ps2Float SolveAbnormalMultiplicationOrDivisionOperation(Ps2Float a, Ps2Float b, bool mul); diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 38728078d6..60e26a707f 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -468,9 +468,9 @@ static __fi uint32_t vuAccurateAddSub(VURegs* VU, u32 a, u32 b, bool issub) if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { if (issub) - return Ps2Float(a).Sub(Ps2Float(b), 0).AsUInt32(); + return Ps2Float(a).Sub(Ps2Float(b)).AsUInt32(); else - return Ps2Float(a).Add(Ps2Float(b), 0).AsUInt32(); + return Ps2Float(a).Add(Ps2Float(b)).AsUInt32(); } if (issub) From 98e3df3cb97ea43ff3d1c59283199a390a8af464 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Thu, 21 Nov 2024 20:45:02 +0100 Subject: [PATCH 07/15] [Soft-Float] - Implements fully accurate Mul operations. Fixes : https://github.com/PCSX2/pcsx2/issues/5169 All the credits belongs to TellowKrinkle from the PCSX2 team. Also removes a useless rounding towards zero in DoAdd. --- pcsx2/BoothMultiplier.cpp | 64 +++++++++++++++++ pcsx2/BoothMultiplier.h | 30 ++++++++ pcsx2/CMakeLists.txt | 2 + pcsx2/Ps2Float.cpp | 137 ++++++++---------------------------- pcsx2/pcsx2.vcxproj | 2 + pcsx2/pcsx2.vcxproj.filters | 6 ++ 6 files changed, 132 insertions(+), 109 deletions(-) create mode 100644 pcsx2/BoothMultiplier.cpp create mode 100644 pcsx2/BoothMultiplier.h diff --git a/pcsx2/BoothMultiplier.cpp b/pcsx2/BoothMultiplier.cpp new file mode 100644 index 0000000000..a008ae3cb7 --- /dev/null +++ b/pcsx2/BoothMultiplier.cpp @@ -0,0 +1,64 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#include +#include +#include +#include +#include +#include "BoothMultiplier.h" + +BoothMultiplier::BoothRecode BoothMultiplier::Booth(uint32_t a, uint32_t b, uint32_t bit) +{ + uint32_t test = (bit ? b >> (bit * 2 - 1) : b << 1) & 7; + a <<= (bit * 2); + a += (test == 3 || test == 4) ? a : 0; + uint32_t neg = (test >= 4 && test <= 6) ? ~0u : 0; + uint32_t pos = 1 << (bit * 2); + a ^= (neg & -pos); + a &= (test >= 1 && test <= 6) ? ~0u : 0; + return {a, neg & pos}; +} + +BoothMultiplier::AddResult BoothMultiplier::Add3(uint32_t a, uint32_t b, uint32_t c) +{ + uint32_t u = a ^ b; + return {u ^ c, ((u & c) | (a & b)) << 1}; +} + +uint64_t BoothMultiplier::MulMantissa(uint32_t a, uint32_t b) +{ + uint64_t full = static_cast(a) * static_cast(b); + BoothRecode b0 = Booth(a, b, 0); + BoothRecode b1 = Booth(a, b, 1); + BoothRecode b2 = Booth(a, b, 2); + BoothRecode b3 = Booth(a, b, 3); + BoothRecode b4 = Booth(a, b, 4); + BoothRecode b5 = Booth(a, b, 5); + BoothRecode b6 = Booth(a, b, 6); + BoothRecode b7 = Booth(a, b, 7); + + // First cycle + AddResult t0 = Add3(b1.data, b2.data, b3.data); + AddResult t1 = Add3(b4.data & ~0x7ffu, b5.data & ~0xfffu, b6.data); + // A few adds get skipped, squeeze them back in + t1.hi |= b6.negate | (b5.data & 0x800); + b7.data |= (b5.data & 0x400) + b5.negate; + + // Second cycle + AddResult t2 = Add3(b0.data, t0.lo, t0.hi); + AddResult t3 = Add3(b7.data, t1.lo, t1.hi); + + // Third cycle + AddResult t4 = Add3(t2.hi, t3.lo, t3.hi); + + // Fourth cycle + AddResult t5 = Add3(t2.lo, t4.lo, t4.hi); + + // Discard bits and sum + t5.hi += b7.negate; + t5.lo &= ~0x7fffu; + t5.hi &= ~0x7fffu; + uint32_t ps2lo = t5.lo + t5.hi; + return full - ((ps2lo ^ full) & 0x8000); +} \ No newline at end of file diff --git a/pcsx2/BoothMultiplier.h b/pcsx2/BoothMultiplier.h new file mode 100644 index 0000000000..17bb602a75 --- /dev/null +++ b/pcsx2/BoothMultiplier.h @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#pragma once + +class BoothMultiplier +{ +public: + + static uint64_t MulMantissa(uint32_t a, uint32_t b); + +protected: + +private: + struct BoothRecode + { + uint32_t data; + uint32_t negate; + }; + + struct AddResult + { + uint32_t lo; + uint32_t hi; + }; + + static BoothRecode Booth(uint32_t a, uint32_t b, uint32_t bit); + + static AddResult Add3(uint32_t a, uint32_t b, uint32_t c); +}; \ No newline at end of file diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 8f78ed7a63..868fcbc786 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -93,6 +93,7 @@ set(pcsx2Sources MTGS.cpp MTVU.cpp Patch.cpp + BoothMultiplier.cpp Ps2Float.cpp Pcsx2Config.cpp PerformanceMetrics.cpp @@ -174,6 +175,7 @@ set(pcsx2Headers MTVU.h Memory.h MemoryTypes.h + BoothMultiplier.h Ps2Float.h Patch.h PerformanceMetrics.h diff --git a/pcsx2/Ps2Float.cpp b/pcsx2/Ps2Float.cpp index d1bb04bf98..354622e403 100644 --- a/pcsx2/Ps2Float.cpp +++ b/pcsx2/Ps2Float.cpp @@ -9,6 +9,7 @@ #include #include #include "Ps2Float.h" +#include "BoothMultiplier.h" #include "Common.h" const uint8_t Ps2Float::BIAS = 127; @@ -77,26 +78,26 @@ Ps2Float Ps2Float::Add(Ps2Float addend) int32_t temp = 0; //exponent difference - int exp_diff = ((a >> 23) & 0xff) - ((b >> 23) & 0xff); + int exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF); //diff = 25 .. 255 , expt < expd if (exp_diff >= 25) { - b = b & Ps2Float::SIGNMASK; + b = b & SIGNMASK; } //diff = 1 .. 24, expt < expd else if (exp_diff > 0) { exp_diff = exp_diff - 1; - temp = 0xffffffff << exp_diff; + temp = MIN_FLOATING_POINT_VALUE << exp_diff; b = temp & b; } //diff = -255 .. -25, expd < expt else if (exp_diff <= -25) { - a = a & Ps2Float::SIGNMASK; + a = a & SIGNMASK; } //diff = -24 .. -1 , expd < expt @@ -104,7 +105,7 @@ Ps2Float Ps2Float::Add(Ps2Float addend) { exp_diff = -exp_diff; exp_diff = exp_diff - 1; - temp = 0xffffffff << exp_diff; + temp = MIN_FLOATING_POINT_VALUE << exp_diff; a = a & temp; } @@ -124,26 +125,26 @@ Ps2Float Ps2Float::Sub(Ps2Float subtrahend) int32_t temp = 0; //exponent difference - int exp_diff = ((a >> 23) & 0xff) - ((b >> 23) & 0xff); + int exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF); //diff = 25 .. 255 , expt < expd if (exp_diff >= 25) { - b = b & Ps2Float::SIGNMASK; + b = b & SIGNMASK; } //diff = 1 .. 24, expt < expd else if (exp_diff > 0) { exp_diff = exp_diff - 1; - temp = 0xffffffff << exp_diff; + temp = MIN_FLOATING_POINT_VALUE << exp_diff; b = temp & b; } //diff = -255 .. -25, expd < expt else if (exp_diff <= -25) { - a = a & Ps2Float::SIGNMASK; + a = a & SIGNMASK; } //diff = -24 .. -1 , expd < expt @@ -151,7 +152,7 @@ Ps2Float Ps2Float::Sub(Ps2Float subtrahend) { exp_diff = -exp_diff; exp_diff = exp_diff - 1; - temp = 0xffffffff << exp_diff; + temp = MIN_FLOATING_POINT_VALUE << exp_diff; a = a & temp; } @@ -215,7 +216,7 @@ Ps2Float Ps2Float::Sqrt() /* extract mantissa and unbias exponent */ int32_t m = (ix >> 23) - BIAS; - ix = (ix & 0x007fffff) | 0x00800000; + ix = (ix & 0x007FFFFF) | 0x00800000; if ((m & 1) == 1) { /* odd m, double x to make it even */ @@ -247,7 +248,7 @@ Ps2Float Ps2Float::Sqrt() q += q & 1; } - ix = (q >> 1) + 0x3f000000; + ix = (q >> 1) + 0x3F000000; ix += m << 23; return Ps2Float((uint32_t)(ix)); @@ -397,114 +398,32 @@ Ps2Float Ps2Float::DoAdd(Ps2Float other) else if (rawExp <= 0) return Ps2Float(man < 0, 0, 0); - return Ps2Float((uint32_t)man & Ps2Float::SIGNMASK | (uint32_t)rawExp << 23 | ((uint32_t)absMan & 0x7FFFFF)).RoundTowardsZero(); + return Ps2Float((uint32_t)man & SIGNMASK | (uint32_t)rawExp << 23 | ((uint32_t)absMan & 0x7FFFFF)); } Ps2Float Ps2Float::DoMul(Ps2Float other) { + uint8_t selfExponent = Exponent; + uint8_t otherExponent = other.Exponent; uint32_t selfMantissa = Mantissa | 0x800000; uint32_t otherMantissa = other.Mantissa | 0x800000; - int32_t resExponent = Exponent + other.Exponent - BIAS; + uint32_t sign = (AsUInt32() ^ other.AsUInt32()) & SIGNMASK; - Ps2Float result = Ps2Float(0); + int32_t resExponent = selfExponent + otherExponent - 127; + uint32_t resMantissa = (uint32_t)(BoothMultiplier::MulMantissa(selfMantissa, otherMantissa) >> 23); - result.Sign = DetermineMultiplicationDivisionOperationSign(*this, other); + if (resMantissa > 0xFFFFFF) + { + resMantissa >>= 1; + resExponent++; + } if (resExponent > 255) - return result.Sign ? Min() : Max(); - else if (resExponent < 0) - return Ps2Float(result.Sign, 0, 0); + return Ps2Float(sign | MAX_FLOATING_POINT_VALUE); + else if (resExponent <= 0) + return Ps2Float(sign); - uint32_t testImprecision = otherMantissa ^ ((otherMantissa >> 4) & 0x800); // For some reason, 0x808000 loses a bit and 0x800800 loses a bit, but 0x808800 does not - int64_t res = 0; - uint64_t mask = 0xFFFFFFFFFFFFFFFF; - - result.Exponent = (uint8_t)(resExponent); - - otherMantissa <<= 1; - - uint32_t part[13]; // Partial products - uint32_t bit[13]; // More partial products. 0 or 1. - - for (int i = 0; i <= 12; i++, otherMantissa >>= 2) - { - uint32_t test = otherMantissa & 7; - if (test == 0 || test == 7) - { - part[i] = 0; - bit[i] = 0; - } - else if (test == 3) - { - part[i] = (selfMantissa << 1); - bit[i] = 0; - } - else if (test == 4) - { - part[i] = ~(selfMantissa << 1); - bit[i] = 1; - } - else if (test < 4) - { - part[i] = selfMantissa; - bit[i] = 0; - } - else - { - part[i] = ~selfMantissa; - bit[i] = 1; - } - } - - for (int i = 0; i <= 12; i++) - { - res += (uint64_t)(int32_t)part[i] << (i * 2); - res &= mask; - res += bit[i] << (i * 2); - } - - result.Mantissa = (uint32_t)(res >> 23); - - if ((testImprecision & 0x000aaa) && !(res & 0x7FFFFF)) - result.Mantissa -= 1; - - if (result.Mantissa > 0) - { - int32_t leadingBitPosition = Ps2Float::GetMostSignificantBitPosition(result.Mantissa); - - while (leadingBitPosition != IMPLICIT_LEADING_BIT_POS) - { - if (leadingBitPosition > IMPLICIT_LEADING_BIT_POS) - { - result.Mantissa >>= 1; - - int32_t exp = ((int32_t)result.Exponent + 1); - - if (exp > 255) - return result.Sign ? Min() : Max(); - - result.Exponent = (uint8_t)exp; - - leadingBitPosition--; - } - else if (leadingBitPosition < IMPLICIT_LEADING_BIT_POS) - { - result.Mantissa <<= 1; - - int32_t exp = ((int32_t)result.Exponent - 1); - - if (exp <= 0) - return Ps2Float(result.Sign, 0, 0); - - result.Exponent = (uint8_t)exp; - - leadingBitPosition++; - } - } - } - - result.Mantissa &= 0x7FFFFF; - return result.RoundTowardsZero(); + return Ps2Float(sign | (uint32_t)(resExponent << 23) | (resMantissa & 0x7FFFFF)); } Ps2Float Ps2Float::DoDiv(Ps2Float other) diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index 53b9a41c37..9fb77e262a 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -126,6 +126,7 @@ true + @@ -583,6 +584,7 @@ true + diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index c7f2c7dd50..2e6adfb733 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -1449,6 +1449,9 @@ System\Ps2\EmotionEngine\Shared + + System\Ps2\EmotionEngine\Shared + @@ -2408,6 +2411,9 @@ System\Ps2\EmotionEngine\Shared + + System\Ps2\EmotionEngine\Shared + From 34753ae109122fa9ae72b915bc2b75690c8ca2d9 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Thu, 21 Nov 2024 22:50:15 +0100 Subject: [PATCH 08/15] Arrange code style to accommodate requested changes. More in line with the PCSX2 code-base. --- pcsx2/BoothMultiplier.cpp | 64 ------ pcsx2/BoothMultiplier.h | 30 --- pcsx2/CMakeLists.txt | 6 +- pcsx2/FPU.cpp | 46 ++-- pcsx2/Ps2Float.cpp | 408 +++++++++++++++++++++--------------- pcsx2/Ps2Float.h | 110 ++++++---- pcsx2/VU.h | 2 +- pcsx2/VUflags.cpp | 28 +-- pcsx2/VUflags.h | 18 +- pcsx2/VUops.cpp | 158 +++++++------- pcsx2/pcsx2.vcxproj | 6 +- pcsx2/pcsx2.vcxproj.filters | 10 +- 12 files changed, 431 insertions(+), 455 deletions(-) delete mode 100644 pcsx2/BoothMultiplier.cpp delete mode 100644 pcsx2/BoothMultiplier.h diff --git a/pcsx2/BoothMultiplier.cpp b/pcsx2/BoothMultiplier.cpp deleted file mode 100644 index a008ae3cb7..0000000000 --- a/pcsx2/BoothMultiplier.cpp +++ /dev/null @@ -1,64 +0,0 @@ -// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team -// SPDX-License-Identifier: GPL-3.0+ - -#include -#include -#include -#include -#include -#include "BoothMultiplier.h" - -BoothMultiplier::BoothRecode BoothMultiplier::Booth(uint32_t a, uint32_t b, uint32_t bit) -{ - uint32_t test = (bit ? b >> (bit * 2 - 1) : b << 1) & 7; - a <<= (bit * 2); - a += (test == 3 || test == 4) ? a : 0; - uint32_t neg = (test >= 4 && test <= 6) ? ~0u : 0; - uint32_t pos = 1 << (bit * 2); - a ^= (neg & -pos); - a &= (test >= 1 && test <= 6) ? ~0u : 0; - return {a, neg & pos}; -} - -BoothMultiplier::AddResult BoothMultiplier::Add3(uint32_t a, uint32_t b, uint32_t c) -{ - uint32_t u = a ^ b; - return {u ^ c, ((u & c) | (a & b)) << 1}; -} - -uint64_t BoothMultiplier::MulMantissa(uint32_t a, uint32_t b) -{ - uint64_t full = static_cast(a) * static_cast(b); - BoothRecode b0 = Booth(a, b, 0); - BoothRecode b1 = Booth(a, b, 1); - BoothRecode b2 = Booth(a, b, 2); - BoothRecode b3 = Booth(a, b, 3); - BoothRecode b4 = Booth(a, b, 4); - BoothRecode b5 = Booth(a, b, 5); - BoothRecode b6 = Booth(a, b, 6); - BoothRecode b7 = Booth(a, b, 7); - - // First cycle - AddResult t0 = Add3(b1.data, b2.data, b3.data); - AddResult t1 = Add3(b4.data & ~0x7ffu, b5.data & ~0xfffu, b6.data); - // A few adds get skipped, squeeze them back in - t1.hi |= b6.negate | (b5.data & 0x800); - b7.data |= (b5.data & 0x400) + b5.negate; - - // Second cycle - AddResult t2 = Add3(b0.data, t0.lo, t0.hi); - AddResult t3 = Add3(b7.data, t1.lo, t1.hi); - - // Third cycle - AddResult t4 = Add3(t2.hi, t3.lo, t3.hi); - - // Fourth cycle - AddResult t5 = Add3(t2.lo, t4.lo, t4.hi); - - // Discard bits and sum - t5.hi += b7.negate; - t5.lo &= ~0x7fffu; - t5.hi &= ~0x7fffu; - uint32_t ps2lo = t5.lo + t5.hi; - return full - ((ps2lo ^ full) & 0x8000); -} \ No newline at end of file diff --git a/pcsx2/BoothMultiplier.h b/pcsx2/BoothMultiplier.h deleted file mode 100644 index 17bb602a75..0000000000 --- a/pcsx2/BoothMultiplier.h +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team -// SPDX-License-Identifier: GPL-3.0+ - -#pragma once - -class BoothMultiplier -{ -public: - - static uint64_t MulMantissa(uint32_t a, uint32_t b); - -protected: - -private: - struct BoothRecode - { - uint32_t data; - uint32_t negate; - }; - - struct AddResult - { - uint32_t lo; - uint32_t hi; - }; - - static BoothRecode Booth(uint32_t a, uint32_t b, uint32_t bit); - - static AddResult Add3(uint32_t a, uint32_t b, uint32_t c); -}; \ No newline at end of file diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 868fcbc786..4e0a82db9e 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -93,8 +93,7 @@ set(pcsx2Sources MTGS.cpp MTVU.cpp Patch.cpp - BoothMultiplier.cpp - Ps2Float.cpp + PS2Float.cpp Pcsx2Config.cpp PerformanceMetrics.cpp PrecompiledHeader.cpp @@ -175,8 +174,7 @@ set(pcsx2Headers MTVU.h Memory.h MemoryTypes.h - BoothMultiplier.h - Ps2Float.h + PS2Float.h Patch.h PerformanceMetrics.h PrecompiledHeader.h diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index 5eefc00c66..bab1cd9bc1 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" -#include "Ps2Float.h" +#include "PS2Float.h" #include // Helper Macros @@ -65,7 +65,7 @@ bool checkOverflow(u32& xReg, u32 cFlagsToSet) { if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { - if (xReg == Ps2Float::MAX_FLOATING_POINT_VALUE || xReg == Ps2Float::MIN_FLOATING_POINT_VALUE) + if (xReg == PS2Float::MAX_FLOATING_POINT_VALUE || xReg == PS2Float::MIN_FLOATING_POINT_VALUE) { _ContVal_ |= (cFlagsToSet); return true; @@ -94,7 +94,7 @@ bool checkUnderflow(u32& xReg, u32 cFlagsToSet) { if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { - if (Ps2Float(xReg).IsDenormalized()) + if (PS2Float(xReg).IsDenormalized()) { _ContVal_ |= (cFlagsToSet); return true; @@ -137,8 +137,8 @@ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsT if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { - Ps2Float yMatrix = Ps2Float(yDivisorReg); - Ps2Float zMatrix = Ps2Float(zDividendReg); + PS2Float yMatrix = PS2Float(yDivisorReg); + PS2Float zMatrix = PS2Float(zDividendReg); if (yMatrix.IsZero()) { @@ -149,10 +149,10 @@ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsT bool IsSigned = yMatrix.Sign ^ zMatrix.Sign; if (dividendZero) - xReg = IsSigned ? Ps2Float::MIN_FLOATING_POINT_VALUE : Ps2Float::MAX_FLOATING_POINT_VALUE; + xReg = IsSigned ? PS2Float::MIN_FLOATING_POINT_VALUE : PS2Float::MAX_FLOATING_POINT_VALUE; else { - Ps2Float zeroRes = Ps2Float(0); + PS2Float zeroRes = PS2Float(0); zeroRes.Sign = IsSigned; xReg = zeroRes.AsUInt32(); @@ -219,14 +219,14 @@ float fpuDouble(u32 f) } } -static __fi uint32_t fpuAccurateAddSub(u32 a, u32 b, bool issub) +static __fi u32 fpuAccurateAddSub(u32 a, u32 b, bool issub) { if (CHECK_FPU_SOFT_ADDSUB) { if (issub) - return Ps2Float(a).Sub(Ps2Float(b)).AsUInt32(); + return PS2Float(a).Sub(PS2Float(b)).AsUInt32(); else - return Ps2Float(a).Add(Ps2Float(b)).AsUInt32(); + return PS2Float(a).Add(PS2Float(b)).AsUInt32(); } if (issub) @@ -235,14 +235,14 @@ static __fi uint32_t fpuAccurateAddSub(u32 a, u32 b, bool issub) return std::bit_cast(fpuDouble(a) + fpuDouble(b)); } -static __fi uint32_t fpuAccurateMulDiv(u32 a, u32 b, bool isdiv) +static __fi u32 fpuAccurateMulDiv(u32 a, u32 b, bool isdiv) { if (CHECK_FPU_SOFT_MULDIV) { if (isdiv) - return Ps2Float(a).Div(Ps2Float(b)).AsUInt32(); + return PS2Float(a).Div(PS2Float(b)).AsUInt32(); else - return Ps2Float(a).Mul(Ps2Float(b)).AsUInt32(); + return PS2Float(a).Mul(PS2Float(b)).AsUInt32(); } if (isdiv) @@ -267,7 +267,7 @@ static __fi void C_cond_S(uint8_t mode) case 0: // == if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { - _ContVal_ = (Ps2Float(_FsValUl_).CompareTo(Ps2Float(_FtValUl_)) == 0) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + _ContVal_ = (PS2Float(_FsValUl_).CompareTo(PS2Float(_FtValUl_)) == 0) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); } else { @@ -277,7 +277,7 @@ static __fi void C_cond_S(uint8_t mode) case 1: // <= if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { - int32_t cmpResult = Ps2Float(_FsValUl_).CompareTo(Ps2Float(_FtValUl_)); + int32_t cmpResult = PS2Float(_FsValUl_).CompareTo(PS2Float(_FtValUl_)); _ContVal_ = (cmpResult == 0 || cmpResult == -1) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); } else @@ -288,7 +288,7 @@ static __fi void C_cond_S(uint8_t mode) case 2: // < if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { - _ContVal_ = (Ps2Float(_FsValUl_).CompareTo(Ps2Float(_FtValUl_)) == -1) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + _ContVal_ = (PS2Float(_FsValUl_).CompareTo(PS2Float(_FtValUl_)) == -1) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); } else { @@ -371,7 +371,7 @@ void CVT_S() { void CVT_W() { if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { - _FdValSl_ = double_to_int(Ps2Float(_FsValUl_).ToDouble()); + _FdValSl_ = double_to_int(PS2Float(_FsValUl_).ToDouble()); } else { @@ -479,22 +479,22 @@ void RSQRT_S() { if (CHECK_FPU_SOFT_SQRT) { - Ps2Float value = Ps2Float(_FtValUl_); + PS2Float value = PS2Float(_FtValUl_); if (value.IsDenormalized()) { _ContVal_ |= FPUflagD | FPUflagSD; - _FdValUl_ = value.Sign ? Ps2Float::MIN_FLOATING_POINT_VALUE : Ps2Float::MAX_FLOATING_POINT_VALUE; + _FdValUl_ = value.Sign ? PS2Float::MIN_FLOATING_POINT_VALUE : PS2Float::MAX_FLOATING_POINT_VALUE; return; } else if (_FtValUl_ & 0x80000000) { // Ft is negative _ContVal_ |= FPUflagI | FPUflagSI; - _FdValUl_ = Ps2Float(_FsValUl_).Rsqrt(Ps2Float(value.Abs())).AsUInt32(); + _FdValUl_ = PS2Float(_FsValUl_).Rsqrt(PS2Float(value.Abs())).AsUInt32(); } else { - _FdValUl_ = Ps2Float(_FsValUl_).Rsqrt(value).AsUInt32(); + _FdValUl_ = PS2Float(_FsValUl_).Rsqrt(value).AsUInt32(); } // Ft is positive and not zero } else @@ -526,12 +526,12 @@ void SQRT_S() { if (CHECK_FPU_SOFT_SQRT) { - Ps2Float value = Ps2Float(_FtValUl_); + PS2Float value = PS2Float(_FtValUl_); if (_FtValUl_ & 0x80000000) { // If Ft is Negative _ContVal_ |= FPUflagI | FPUflagSI; - _FdValUl_ = Ps2Float(value.Abs()).Sqrt().AsUInt32(); + _FdValUl_ = PS2Float(value.Abs()).Sqrt().AsUInt32(); } else _FdValUl_ = value.Sqrt().AsUInt32(); // If Ft is Positive diff --git a/pcsx2/Ps2Float.cpp b/pcsx2/Ps2Float.cpp index 354622e403..afe2619d46 100644 --- a/pcsx2/Ps2Float.cpp +++ b/pcsx2/Ps2Float.cpp @@ -8,64 +8,126 @@ #include #include #include -#include "Ps2Float.h" -#include "BoothMultiplier.h" +#include "PS2Float.h" #include "Common.h" -const uint8_t Ps2Float::BIAS = 127; -const uint32_t Ps2Float::SIGNMASK = 0x80000000; -const uint32_t Ps2Float::MAX_FLOATING_POINT_VALUE = 0x7FFFFFFF; -const uint32_t Ps2Float::MIN_FLOATING_POINT_VALUE = 0xFFFFFFFF; -const uint32_t Ps2Float::POSITIVE_INFINITY_VALUE = 0x7F800000; -const uint32_t Ps2Float::NEGATIVE_INFINITY_VALUE = 0xFF800000; -const uint32_t Ps2Float::ONE = 0x3F800000; -const uint32_t Ps2Float::MIN_ONE = 0xBF800000; -const int32_t Ps2Float::IMPLICIT_LEADING_BIT_POS = 23; +const u8 PS2Float::BIAS = 127; +const u32 PS2Float::SIGNMASK = 0x80000000; +const u32 PS2Float::MAX_FLOATING_POINT_VALUE = 0x7FFFFFFF; +const u32 PS2Float::MIN_FLOATING_POINT_VALUE = 0xFFFFFFFF; +const u32 PS2Float::POSITIVE_INFINITY_VALUE = 0x7F800000; +const u32 PS2Float::NEGATIVE_INFINITY_VALUE = 0xFF800000; +const u32 PS2Float::ONE = 0x3F800000; +const u32 PS2Float::MIN_ONE = 0xBF800000; +const s32 PS2Float::IMPLICIT_LEADING_BIT_POS = 23; -Ps2Float::Ps2Float(uint32_t value) +//**************************************************************** +// Booth Multiplier +//**************************************************************** + +PS2Float::BoothRecode PS2Float::Booth(u32 a, u32 b, u32 bit) +{ + u32 test = (bit ? b >> (bit * 2 - 1) : b << 1) & 7; + a <<= (bit * 2); + a += (test == 3 || test == 4) ? a : 0; + u32 neg = (test >= 4 && test <= 6) ? ~0u : 0; + u32 pos = 1 << (bit * 2); + a ^= (neg & -pos); + a &= (test >= 1 && test <= 6) ? ~0u : 0; + return {a, neg & pos}; +} + +PS2Float::AddResult PS2Float::Add3(u32 a, u32 b, u32 c) +{ + u32 u = a ^ b; + return {u ^ c, ((u & c) | (a & b)) << 1}; +} + +u64 PS2Float::MulMantissa(u32 a, u32 b) +{ + u64 full = static_cast(a) * static_cast(b); + BoothRecode b0 = Booth(a, b, 0); + BoothRecode b1 = Booth(a, b, 1); + BoothRecode b2 = Booth(a, b, 2); + BoothRecode b3 = Booth(a, b, 3); + BoothRecode b4 = Booth(a, b, 4); + BoothRecode b5 = Booth(a, b, 5); + BoothRecode b6 = Booth(a, b, 6); + BoothRecode b7 = Booth(a, b, 7); + + // First cycle + AddResult t0 = Add3(b1.data, b2.data, b3.data); + AddResult t1 = Add3(b4.data & ~0x7ffu, b5.data & ~0xfffu, b6.data); + // A few adds get skipped, squeeze them back in + t1.hi |= b6.negate | (b5.data & 0x800); + b7.data |= (b5.data & 0x400) + b5.negate; + + // Second cycle + AddResult t2 = Add3(b0.data, t0.lo, t0.hi); + AddResult t3 = Add3(b7.data, t1.lo, t1.hi); + + // Third cycle + AddResult t4 = Add3(t2.hi, t3.lo, t3.hi); + + // Fourth cycle + AddResult t5 = Add3(t2.lo, t4.lo, t4.hi); + + // Discard bits and sum + t5.hi += b7.negate; + t5.lo &= ~0x7fffu; + t5.hi &= ~0x7fffu; + u32 ps2lo = t5.lo + t5.hi; + return full - ((ps2lo ^ full) & 0x8000); +} + +//**************************************************************** +// Float Processor +//**************************************************************** + +PS2Float::PS2Float(u32 value) : Sign((value >> 31) & 1) - , Exponent((uint8_t)(((value >> 23) & 0xFF))) + , Exponent((u8)(((value >> 23) & 0xFF))) , Mantissa(value & 0x7FFFFF) { } -Ps2Float::Ps2Float(bool sign, uint8_t exponent, uint32_t mantissa) +PS2Float::PS2Float(bool sign, u8 exponent, u32 mantissa) : Sign(sign) , Exponent(exponent) , Mantissa(mantissa) { } -Ps2Float Ps2Float::Max() +PS2Float PS2Float::Max() { - return Ps2Float(MAX_FLOATING_POINT_VALUE); + return PS2Float(MAX_FLOATING_POINT_VALUE); } -Ps2Float Ps2Float::Min() +PS2Float PS2Float::Min() { - return Ps2Float(MIN_FLOATING_POINT_VALUE); + return PS2Float(MIN_FLOATING_POINT_VALUE); } -Ps2Float Ps2Float::One() +PS2Float PS2Float::One() { - return Ps2Float(ONE); + return PS2Float(ONE); } -Ps2Float Ps2Float::MinOne() +PS2Float PS2Float::MinOne() { - return Ps2Float(MIN_ONE); + return PS2Float(MIN_ONE); } -uint32_t Ps2Float::AsUInt32() const +u32 PS2Float::AsUInt32() const { - uint32_t result = 0; + u32 result = 0; result |= (Sign ? 1u : 0u) << 31; - result |= (uint32_t)(Exponent << 23); + result |= (u32)(Exponent << 23); result |= Mantissa; return result; } -Ps2Float Ps2Float::Add(Ps2Float addend) +PS2Float PS2Float::Add(PS2Float addend) { if (IsDenormalized() || addend.IsDenormalized()) return SolveAddSubDenormalizedOperation(*this, addend, true); @@ -73,12 +135,11 @@ Ps2Float Ps2Float::Add(Ps2Float addend) if (IsAbnormal() && addend.IsAbnormal()) return SolveAbnormalAdditionOrSubtractionOperation(*this, addend, true); - uint32_t a = AsUInt32(); - uint32_t b = addend.AsUInt32(); - int32_t temp = 0; + u32 a = AsUInt32(); + u32 b = addend.AsUInt32(); //exponent difference - int exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF); + s32 exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF); //diff = 25 .. 255 , expt < expd if (exp_diff >= 25) @@ -90,8 +151,7 @@ Ps2Float Ps2Float::Add(Ps2Float addend) else if (exp_diff > 0) { exp_diff = exp_diff - 1; - temp = MIN_FLOATING_POINT_VALUE << exp_diff; - b = temp & b; + b = (MIN_FLOATING_POINT_VALUE << exp_diff) & b; } //diff = -255 .. -25, expd < expt @@ -105,14 +165,13 @@ Ps2Float Ps2Float::Add(Ps2Float addend) { exp_diff = -exp_diff; exp_diff = exp_diff - 1; - temp = MIN_FLOATING_POINT_VALUE << exp_diff; - a = a & temp; + a = a & (MIN_FLOATING_POINT_VALUE << exp_diff); } - return Ps2Float(a).DoAdd(Ps2Float(b)); + return PS2Float(a).DoAdd(PS2Float(b)); } -Ps2Float Ps2Float::Sub(Ps2Float subtrahend) +PS2Float PS2Float::Sub(PS2Float subtrahend) { if (IsDenormalized() || subtrahend.IsDenormalized()) return SolveAddSubDenormalizedOperation(*this, subtrahend, false); @@ -120,12 +179,11 @@ Ps2Float Ps2Float::Sub(Ps2Float subtrahend) if (IsAbnormal() && subtrahend.IsAbnormal()) return SolveAbnormalAdditionOrSubtractionOperation(*this, subtrahend, false); - uint32_t a = AsUInt32(); - uint32_t b = subtrahend.AsUInt32(); - int32_t temp = 0; + u32 a = AsUInt32(); + u32 b = subtrahend.AsUInt32(); //exponent difference - int exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF); + s32 exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF); //diff = 25 .. 255 , expt < expd if (exp_diff >= 25) @@ -137,8 +195,7 @@ Ps2Float Ps2Float::Sub(Ps2Float subtrahend) else if (exp_diff > 0) { exp_diff = exp_diff - 1; - temp = MIN_FLOATING_POINT_VALUE << exp_diff; - b = temp & b; + b = (MIN_FLOATING_POINT_VALUE << exp_diff) & b; } //diff = -255 .. -25, expd < expt @@ -152,15 +209,14 @@ Ps2Float Ps2Float::Sub(Ps2Float subtrahend) { exp_diff = -exp_diff; exp_diff = exp_diff - 1; - temp = MIN_FLOATING_POINT_VALUE << exp_diff; - a = a & temp; + a = a & (MIN_FLOATING_POINT_VALUE << exp_diff); } - return Ps2Float(a).DoAdd(Neg(Ps2Float(b))); + return PS2Float(a).DoAdd(Neg(PS2Float(b))); } -Ps2Float Ps2Float::Mul(Ps2Float mulend) +PS2Float PS2Float::Mul(PS2Float mulend) { if (IsDenormalized() || mulend.IsDenormalized()) return SolveMultiplicationDenormalizedOperation(*this, mulend); @@ -170,7 +226,7 @@ Ps2Float Ps2Float::Mul(Ps2Float mulend) if (IsZero() || mulend.IsZero()) { - Ps2Float result = Ps2Float(0); + PS2Float result = PS2Float(0); result.Sign = DetermineMultiplicationDivisionOperationSign(*this, mulend); return result; @@ -179,7 +235,7 @@ Ps2Float Ps2Float::Mul(Ps2Float mulend) return DoMul(mulend); } -Ps2Float Ps2Float::Div(Ps2Float divend) +PS2Float PS2Float::Div(PS2Float divend) { if (IsDenormalized() || divend.IsDenormalized()) return SolveDivisionDenormalizedOperation(*this, divend); @@ -189,7 +245,7 @@ Ps2Float Ps2Float::Div(Ps2Float divend) if (IsZero()) { - Ps2Float result = Ps2Float(0); + PS2Float result = PS2Float(0); result.Sign = DetermineMultiplicationDivisionOperationSign(*this, divend); return result; @@ -200,21 +256,22 @@ Ps2Float Ps2Float::Div(Ps2Float divend) return DoDiv(divend); } -Ps2Float Ps2Float::Sqrt() +// Rounding can be slightly off: (PS2: rsqrt(0x7FFFFFF0) -> 0x5FB504ED | SoftFloat/IEEE754 rsqrt(0x7FFFFFF0) -> 0x5FB504EE). +PS2Float PS2Float::Sqrt() { - int32_t t; - int32_t s = 0; - int32_t q = 0; - uint32_t r = 0x01000000; /* r = moving bit from right to left */ + s32 t; + s32 s = 0; + s32 q = 0; + u32 r = 0x01000000; /* r = moving bit from right to left */ if (IsDenormalized()) - return Ps2Float(0); + return PS2Float(0); // PS2 only takes positive numbers for SQRT, and convert if necessary. - int32_t ix = (int32_t)(Ps2Float(false, Exponent, Mantissa).AsUInt32()); + s32 ix = (s32)(PS2Float(false, Exponent, Mantissa).AsUInt32()); /* extract mantissa and unbias exponent */ - int32_t m = (ix >> 23) - BIAS; + s32 m = (ix >> 23) - BIAS; ix = (ix & 0x007FFFFF) | 0x00800000; if ((m & 1) == 1) @@ -230,12 +287,12 @@ Ps2Float Ps2Float::Sqrt() while (r != 0) { - t = s + (int32_t)(r); + t = s + (s32)(r); if (t <= ix) { - s = t + (int32_t)(r); + s = t + (s32)(r); ix -= t; - q += (int32_t)(r); + q += (s32)(r); } ix += ix; @@ -251,48 +308,48 @@ Ps2Float Ps2Float::Sqrt() ix = (q >> 1) + 0x3F000000; ix += m << 23; - return Ps2Float((uint32_t)(ix)); + return PS2Float((u32)(ix)); } -Ps2Float Ps2Float::Rsqrt(Ps2Float other) +PS2Float PS2Float::Rsqrt(PS2Float other) { return Div(other.Sqrt()); } -bool Ps2Float::IsDenormalized() +bool PS2Float::IsDenormalized() { return Exponent == 0; } -bool Ps2Float::IsAbnormal() +bool PS2Float::IsAbnormal() { - uint32_t val = AsUInt32(); + u32 val = AsUInt32(); return val == MAX_FLOATING_POINT_VALUE || val == MIN_FLOATING_POINT_VALUE || val == POSITIVE_INFINITY_VALUE || val == NEGATIVE_INFINITY_VALUE; } -bool Ps2Float::IsZero() +bool PS2Float::IsZero() { return (Abs()) == 0; } -uint32_t Ps2Float::Abs() +u32 PS2Float::Abs() { return (AsUInt32() & MAX_FLOATING_POINT_VALUE); } -Ps2Float Ps2Float::RoundTowardsZero() +PS2Float PS2Float::RoundTowardsZero() { - return Ps2Float((uint32_t)(std::trunc((double)(AsUInt32())))); + return PS2Float((u32)(std::trunc((double)(AsUInt32())))); } -int32_t Ps2Float::CompareTo(Ps2Float other) +s32 PS2Float::CompareTo(PS2Float other) { - int32_t selfTwoComplementVal = (int32_t)(Abs()); + s32 selfTwoComplementVal = (s32)(Abs()); if (Sign) selfTwoComplementVal = -selfTwoComplementVal; - int32_t otherTwoComplementVal = (int32_t)(other.Abs()); + s32 otherTwoComplementVal = (s32)(other.Abs()); if (other.Sign) otherTwoComplementVal = -otherTwoComplementVal; @@ -304,10 +361,10 @@ int32_t Ps2Float::CompareTo(Ps2Float other) return 1; } -int32_t Ps2Float::CompareOperand(Ps2Float other) +s32 PS2Float::CompareOperand(PS2Float other) { - int32_t selfTwoComplementVal = (int32_t)(Abs()); - int32_t otherTwoComplementVal = (int32_t)(other.Abs()); + s32 selfTwoComplementVal = (s32)(Abs()); + s32 otherTwoComplementVal = (s32)(other.Abs()); if (selfTwoComplementVal < otherTwoComplementVal) return -1; @@ -317,16 +374,16 @@ int32_t Ps2Float::CompareOperand(Ps2Float other) return 1; } -double Ps2Float::ToDouble() +double PS2Float::ToDouble() { return std::bit_cast(((u64)Sign << 63) | ((((u64)Exponent - BIAS) + 1023ULL) << 52) | ((u64)Mantissa << 29)); } -std::string Ps2Float::ToString() +std::string PS2Float::ToString() { double res = ToDouble(); - uint32_t value = AsUInt32(); + u32 value = AsUInt32(); std::ostringstream oss; oss << std::fixed << std::setprecision(6); @@ -358,12 +415,12 @@ std::string Ps2Float::ToString() return oss.str(); } -Ps2Float Ps2Float::DoAdd(Ps2Float other) +PS2Float PS2Float::DoAdd(PS2Float other) { - const uint8_t roundingMultiplier = 6; + const u8 roundingMultiplier = 6; - uint8_t selfExponent = Exponent; - int32_t resExponent = selfExponent - other.Exponent; + u8 selfExponent = Exponent; + s32 resExponent = selfExponent - other.Exponent; if (resExponent < 0) return other.DoAdd(*this); @@ -371,46 +428,46 @@ Ps2Float Ps2Float::DoAdd(Ps2Float other) return *this; // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate - uint32_t sign1 = (uint32_t)((int32_t)AsUInt32() >> 31); - int32_t selfMantissa = (int32_t)(((Mantissa | 0x800000) ^ sign1) - sign1); - uint32_t sign2 = (uint32_t)((int32_t)other.AsUInt32() >> 31); - int32_t otherMantissa = (int32_t)(((other.Mantissa | 0x800000) ^ sign2) - sign2); + u32 sign1 = (u32)((s32)AsUInt32() >> 31); + s32 selfMantissa = (s32)(((Mantissa | 0x800000) ^ sign1) - sign1); + u32 sign2 = (u32)((s32)other.AsUInt32() >> 31); + s32 otherMantissa = (s32)(((other.Mantissa | 0x800000) ^ sign2) - sign2); // PS2 multiply by 2 before doing the Math here. - int32_t man = (selfMantissa << roundingMultiplier) + ((otherMantissa << roundingMultiplier) >> resExponent); - int32_t absMan = abs(man); + s32 man = (selfMantissa << roundingMultiplier) + ((otherMantissa << roundingMultiplier) >> resExponent); + s32 absMan = abs(man); if (absMan == 0) - return Ps2Float(0); + return PS2Float(0); // Remove from exponent the PS2 Multiplier value. - int32_t rawExp = selfExponent - roundingMultiplier; + s32 rawExp = selfExponent - roundingMultiplier; - int32_t amount = normalizeAmounts[clz(absMan)]; + s32 amount = normalizeAmounts[clz(absMan)]; rawExp -= amount; absMan <<= amount; - int32_t msbIndex = BitScanReverse8(absMan >> 23); + s32 msbIndex = BitScanReverse8(absMan >> 23); rawExp += msbIndex; absMan >>= msbIndex; if (rawExp > 255) return man < 0 ? Min() : Max(); else if (rawExp <= 0) - return Ps2Float(man < 0, 0, 0); + return PS2Float(man < 0, 0, 0); - return Ps2Float((uint32_t)man & SIGNMASK | (uint32_t)rawExp << 23 | ((uint32_t)absMan & 0x7FFFFF)); + return PS2Float(((u32)man & SIGNMASK) | (u32)rawExp << 23 | ((u32)absMan & 0x7FFFFF)); } -Ps2Float Ps2Float::DoMul(Ps2Float other) +PS2Float PS2Float::DoMul(PS2Float other) { - uint8_t selfExponent = Exponent; - uint8_t otherExponent = other.Exponent; - uint32_t selfMantissa = Mantissa | 0x800000; - uint32_t otherMantissa = other.Mantissa | 0x800000; - uint32_t sign = (AsUInt32() ^ other.AsUInt32()) & SIGNMASK; + u8 selfExponent = Exponent; + u8 otherExponent = other.Exponent; + u32 selfMantissa = Mantissa | 0x800000; + u32 otherMantissa = other.Mantissa | 0x800000; + u32 sign = (AsUInt32() ^ other.AsUInt32()) & SIGNMASK; - int32_t resExponent = selfExponent + otherExponent - 127; - uint32_t resMantissa = (uint32_t)(BoothMultiplier::MulMantissa(selfMantissa, otherMantissa) >> 23); + s32 resExponent = selfExponent + otherExponent - 127; + u32 resMantissa = (u32)(MulMantissa(selfMantissa, otherMantissa) >> 23); if (resMantissa > 0xFFFFFF) { @@ -419,51 +476,52 @@ Ps2Float Ps2Float::DoMul(Ps2Float other) } if (resExponent > 255) - return Ps2Float(sign | MAX_FLOATING_POINT_VALUE); + return PS2Float(sign | MAX_FLOATING_POINT_VALUE); else if (resExponent <= 0) - return Ps2Float(sign); + return PS2Float(sign); - return Ps2Float(sign | (uint32_t)(resExponent << 23) | (resMantissa & 0x7FFFFF)); + return PS2Float(sign | (u32)(resExponent << 23) | (resMantissa & 0x7FFFFF)); } -Ps2Float Ps2Float::DoDiv(Ps2Float other) +// Rounding can be slightly off: (PS2: 0x3F800000 / 0x3F800001 = 0x3F7FFFFF | SoftFloat/IEEE754: 0x3F800000 / 0x3F800001 = 0x3F7FFFFE). +PS2Float PS2Float::DoDiv(PS2Float other) { - uint64_t selfMantissa64; - uint32_t selfMantissa = Mantissa | 0x800000; - uint32_t otherMantissa = other.Mantissa | 0x800000; - int resExponent = Exponent - other.Exponent + BIAS; + u64 selfMantissa64; + u32 selfMantissa = Mantissa | 0x800000; + u32 otherMantissa = other.Mantissa | 0x800000; + s32 resExponent = Exponent - other.Exponent + BIAS; - Ps2Float result = Ps2Float(0); + PS2Float result = PS2Float(0); result.Sign = DetermineMultiplicationDivisionOperationSign(*this, other); if (resExponent > 255) return result.Sign ? Min() : Max(); else if (resExponent <= 0) - return Ps2Float(result.Sign, 0, 0); + return PS2Float(result.Sign, 0, 0); if (selfMantissa < otherMantissa) { --resExponent; if (resExponent == 0) - return Ps2Float(result.Sign, 0, 0); - selfMantissa64 = (uint64_t)(selfMantissa) << 31; + return PS2Float(result.Sign, 0, 0); + selfMantissa64 = (u64)(selfMantissa) << 31; } else { - selfMantissa64 = (uint64_t)(selfMantissa) << 30; + selfMantissa64 = (u64)(selfMantissa) << 30; } - uint32_t resMantissa = (uint32_t)(selfMantissa64 / otherMantissa); + u32 resMantissa = (u32)(selfMantissa64 / otherMantissa); if ((resMantissa & 0x3F) == 0) - resMantissa |= ((uint64_t)(otherMantissa)*resMantissa != selfMantissa64) ? 1U : 0; + resMantissa |= ((u64)(otherMantissa)*resMantissa != selfMantissa64) ? 1U : 0; - result.Exponent = (uint8_t)(resExponent); + result.Exponent = (u8)(resExponent); result.Mantissa = (resMantissa + 0x39U /* Non-standard value, 40U in IEEE754 (PS2: rsqrt(0x40400000, 0x40400000) = 0x3FDDB3D7 -> IEEE754: rsqrt(0x40400000, 0x40400000) = 0x3FDDB3D8 */) >> 7; if (result.Mantissa > 0) { - int32_t leadingBitPosition = Ps2Float::GetMostSignificantBitPosition(result.Mantissa); + s32 leadingBitPosition = PS2Float::GetMostSignificantBitPosition(result.Mantissa); while (leadingBitPosition != IMPLICIT_LEADING_BIT_POS) { @@ -471,12 +529,12 @@ Ps2Float Ps2Float::DoDiv(Ps2Float other) { result.Mantissa >>= 1; - int32_t exp = ((int32_t)result.Exponent + 1); + s32 exp = ((s32)result.Exponent + 1); if (exp > 255) return result.Sign ? Min() : Max(); - result.Exponent = (uint8_t)exp; + result.Exponent = (u8)exp; leadingBitPosition--; } @@ -484,12 +542,12 @@ Ps2Float Ps2Float::DoDiv(Ps2Float other) { result.Mantissa <<= 1; - int32_t exp = ((int32_t)result.Exponent - 1); + s32 exp = ((s32)result.Exponent - 1); if (exp <= 0) - return Ps2Float(result.Sign, 0, 0); + return PS2Float(result.Sign, 0, 0); - result.Exponent = (uint8_t)exp; + result.Exponent = (u8)exp; leadingBitPosition++; } @@ -500,66 +558,68 @@ Ps2Float Ps2Float::DoDiv(Ps2Float other) return result.RoundTowardsZero(); } -Ps2Float Ps2Float::SolveAbnormalAdditionOrSubtractionOperation(Ps2Float a, Ps2Float b, bool add) +PS2Float PS2Float::SolveAbnormalAdditionOrSubtractionOperation(PS2Float a, PS2Float b, bool add) { - uint32_t aval = a.AsUInt32(); - uint32_t bval = b.AsUInt32(); + u32 aval = a.AsUInt32(); + u32 bval = b.AsUInt32(); if (aval == MAX_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return add ? Max() : Ps2Float(0); + return add ? Max() : PS2Float(0); if (aval == MIN_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return add ? Min() : Ps2Float(0); + return add ? Min() : PS2Float(0); if (aval == MIN_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return add ? Ps2Float(0) : Min(); + return add ? PS2Float(0) : Min(); if (aval == MAX_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return add ? Ps2Float(0) : Max(); + return add ? PS2Float(0) : Max(); if (aval == POSITIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) - return add ? Max() : Ps2Float(0); + return add ? Max() : PS2Float(0); if (aval == NEGATIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) - return add ? Ps2Float(0) : Min(); + return add ? PS2Float(0) : Min(); if (aval == POSITIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return add ? Ps2Float(0) : Max(); + return add ? PS2Float(0) : Max(); if (aval == NEGATIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return add ? Min() : Ps2Float(0); + return add ? Min() : PS2Float(0); if (aval == MAX_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) - return add ? Max() : Ps2Float(0x7F7FFFFE); + return add ? Max() : PS2Float(0x7F7FFFFE); if (aval == MAX_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return add ? Ps2Float(0x7F7FFFFE) : Max(); + return add ? PS2Float(0x7F7FFFFE) : Max(); if (aval == MIN_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) - return add ? Ps2Float(0xFF7FFFFE) : Min(); + return add ? PS2Float(0xFF7FFFFE) : Min(); if (aval == MIN_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return add ? Min() : Ps2Float(0xFF7FFFFE); + return add ? Min() : PS2Float(0xFF7FFFFE); if (aval == POSITIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return add ? Max() : Ps2Float(0xFF7FFFFE); + return add ? Max() : PS2Float(0xFF7FFFFE); if (aval == POSITIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return add ? Ps2Float(0xFF7FFFFE) : Max(); + return add ? PS2Float(0xFF7FFFFE) : Max(); if (aval == NEGATIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return add ? Ps2Float(0x7F7FFFFE) : Min(); + return add ? PS2Float(0x7F7FFFFE) : Min(); if (aval == NEGATIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return add ? Min() : Ps2Float(0x7F7FFFFE); + return add ? Min() : PS2Float(0x7F7FFFFE); Console.Error("Unhandled abnormal add/sub floating point operation"); + + return PS2Float(0); } -Ps2Float Ps2Float::SolveAbnormalMultiplicationOrDivisionOperation(Ps2Float a, Ps2Float b, bool mul) +PS2Float PS2Float::SolveAbnormalMultiplicationOrDivisionOperation(PS2Float a, PS2Float b, bool mul) { - uint32_t aval = a.AsUInt32(); - uint32_t bval = b.AsUInt32(); + u32 aval = a.AsUInt32(); + u32 bval = b.AsUInt32(); if (mul) { @@ -630,36 +690,38 @@ Ps2Float Ps2Float::SolveAbnormalMultiplicationOrDivisionOperation(Ps2Float a, Ps return One(); if (aval == MAX_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) - return Ps2Float(0x3FFFFFFF); + return PS2Float(0x3FFFFFFF); if (aval == MAX_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return Ps2Float(0xBFFFFFFF); + return PS2Float(0xBFFFFFFF); if (aval == MIN_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) - return Ps2Float(0xBFFFFFFF); + return PS2Float(0xBFFFFFFF); if (aval == MIN_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return Ps2Float(0x3FFFFFFF); + return PS2Float(0x3FFFFFFF); if (aval == POSITIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return Ps2Float(0x3F000001); + return PS2Float(0x3F000001); if (aval == POSITIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return Ps2Float(0xBF000001); + return PS2Float(0xBF000001); if (aval == NEGATIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return Ps2Float(0xBF000001); + return PS2Float(0xBF000001); if (aval == NEGATIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return Ps2Float(0x3F000001); + return PS2Float(0x3F000001); } Console.Error("Unhandled abnormal mul/div floating point operation"); + + return PS2Float(0); } -Ps2Float Ps2Float::SolveAddSubDenormalizedOperation(Ps2Float a, Ps2Float b, bool add) +PS2Float PS2Float::SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add) { - Ps2Float result = Ps2Float(0); + PS2Float result = PS2Float(0); if (a.IsDenormalized() && !b.IsDenormalized()) result = b; @@ -675,18 +737,18 @@ Ps2Float Ps2Float::SolveAddSubDenormalizedOperation(Ps2Float a, Ps2Float b, bool return result; } -Ps2Float Ps2Float::SolveMultiplicationDenormalizedOperation(Ps2Float a, Ps2Float b) +PS2Float PS2Float::SolveMultiplicationDenormalizedOperation(PS2Float a, PS2Float b) { - Ps2Float result = Ps2Float(0); + PS2Float result = PS2Float(0); result.Sign = DetermineMultiplicationDivisionOperationSign(a, b); return result; } -Ps2Float Ps2Float::SolveDivisionDenormalizedOperation(Ps2Float a, Ps2Float b) +PS2Float PS2Float::SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b) { bool sign = DetermineMultiplicationDivisionOperationSign(a, b); - Ps2Float result = Ps2Float(0); + PS2Float result = PS2Float(0); if (a.IsDenormalized() && !b.IsDenormalized()) { @@ -702,17 +764,17 @@ Ps2Float Ps2Float::SolveDivisionDenormalizedOperation(Ps2Float a, Ps2Float b) return result; } -Ps2Float Ps2Float::Neg(Ps2Float self) +PS2Float PS2Float::Neg(PS2Float self) { - return Ps2Float(self.AsUInt32() ^ SIGNMASK); + return PS2Float(self.AsUInt32() ^ SIGNMASK); } -bool Ps2Float::DetermineMultiplicationDivisionOperationSign(Ps2Float a, Ps2Float b) +bool PS2Float::DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b) { return a.Sign ^ b.Sign; } -bool Ps2Float::DetermineAdditionOperationSign(Ps2Float a, Ps2Float b) +bool PS2Float::DetermineAdditionOperationSign(PS2Float a, PS2Float b) { if (a.IsZero() && b.IsZero()) { @@ -727,7 +789,7 @@ bool Ps2Float::DetermineAdditionOperationSign(Ps2Float a, Ps2Float b) return a.CompareOperand(b) >= 0 ? a.Sign : b.Sign; } -bool Ps2Float::DetermineSubtractionOperationSign(Ps2Float a, Ps2Float b) +bool PS2Float::DetermineSubtractionOperationSign(PS2Float a, PS2Float b) { if (a.IsZero() && b.IsZero()) { @@ -742,7 +804,7 @@ bool Ps2Float::DetermineSubtractionOperationSign(Ps2Float a, Ps2Float b) return a.CompareOperand(b) >= 0 ? a.Sign : !b.Sign; } -int32_t Ps2Float::clz(int x) +s32 PS2Float::clz(s32 x) { x |= x >> 1; x |= x >> 2; @@ -750,17 +812,17 @@ int32_t Ps2Float::clz(int x) x |= x >> 8; x |= x >> 16; - return debruijn32[(uint)x * 0x8c0b2891u >> 26]; + return debruijn32[(u32)x * 0x8c0b2891u >> 26]; } -int32_t Ps2Float::BitScanReverse8(int b) +s32 PS2Float::BitScanReverse8(s32 b) { return msb[b]; } -int32_t Ps2Float::GetMostSignificantBitPosition(uint32_t value) +s32 PS2Float::GetMostSignificantBitPosition(u32 value) { - for (int32_t i = 31; i >= 0; i--) + for (s32 i = 31; i >= 0; i--) { if (((value >> i) & 1) != 0) return i; @@ -768,7 +830,7 @@ int32_t Ps2Float::GetMostSignificantBitPosition(uint32_t value) return -1; } -const int8_t Ps2Float::msb[256] = +const s8 PS2Float::msb[256] = { -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -779,13 +841,13 @@ const int8_t Ps2Float::msb[256] = 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; -const int32_t Ps2Float::debruijn32[64] = +const s32 PS2Float::debruijn32[64] = { 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12}; -const int32_t Ps2Float::normalizeAmounts[] = +const s32 PS2Float::normalizeAmounts[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24}; diff --git a/pcsx2/Ps2Float.h b/pcsx2/Ps2Float.h index ceffb01af7..14ef962942 100644 --- a/pcsx2/Ps2Float.h +++ b/pcsx2/Ps2Float.h @@ -5,54 +5,72 @@ #include -class Ps2Float +class PS2Float { + struct BoothRecode + { + u32 data; + u32 negate; + }; + + struct AddResult + { + u32 lo; + u32 hi; + }; + + static u64 MulMantissa(u32 a, u32 b); + + static BoothRecode Booth(u32 a, u32 b, u32 bit); + + static AddResult Add3(u32 a, u32 b, u32 c); + public: bool Sign; - uint8_t Exponent; - uint32_t Mantissa; + u8 Exponent; + u32 Mantissa; - static const uint8_t BIAS; - static const uint32_t SIGNMASK; - static const uint32_t MAX_FLOATING_POINT_VALUE; - static const uint32_t MIN_FLOATING_POINT_VALUE; - static const uint32_t POSITIVE_INFINITY_VALUE; - static const uint32_t NEGATIVE_INFINITY_VALUE; - static const uint32_t ONE; - static const uint32_t MIN_ONE; + static const u8 BIAS; + static const u32 SIGNMASK; + static const u32 MAX_FLOATING_POINT_VALUE; + static const u32 MIN_FLOATING_POINT_VALUE; + static const u32 POSITIVE_INFINITY_VALUE; + static const u32 NEGATIVE_INFINITY_VALUE; + static const u32 ONE; + static const u32 MIN_ONE; static const int IMPLICIT_LEADING_BIT_POS; - static const int8_t msb[256]; - static const int32_t debruijn32[64]; - static const int32_t normalizeAmounts[]; + static const s8 msb[256]; + static const s32 debruijn32[64]; + static const s32 normalizeAmounts[]; - Ps2Float(uint32_t value); + PS2Float(u32 value); - Ps2Float(bool sign, uint8_t exponent, uint32_t mantissa); + PS2Float(bool sign, u8 exponent, u32 mantissa); - static Ps2Float Max(); + static PS2Float Max(); - static Ps2Float Min(); + static PS2Float Min(); - static Ps2Float One(); + static PS2Float One(); - static Ps2Float MinOne(); + static PS2Float MinOne(); - static Ps2Float Neg(Ps2Float self); + static PS2Float Neg(PS2Float self); - uint32_t AsUInt32() const; + u32 AsUInt32() const; - Ps2Float Add(Ps2Float addend); + PS2Float Add(PS2Float addend); - Ps2Float Sub(Ps2Float subtrahend); + PS2Float Sub(PS2Float subtrahend); - Ps2Float Mul(Ps2Float mulend); + PS2Float Mul(PS2Float mulend); - Ps2Float Div(Ps2Float divend); + PS2Float Div(PS2Float divend); - Ps2Float Sqrt(); + PS2Float Sqrt(); - Ps2Float Rsqrt(Ps2Float other); + PS2Float Rsqrt(PS2Float other); bool IsDenormalized(); @@ -60,13 +78,13 @@ public: bool IsZero(); - uint32_t Abs(); + u32 Abs(); - Ps2Float RoundTowardsZero(); + PS2Float RoundTowardsZero(); - int32_t CompareTo(Ps2Float other); + s32 CompareTo(PS2Float other); - int32_t CompareOperand(Ps2Float other); + s32 CompareOperand(PS2Float other); double ToDouble(); @@ -76,31 +94,31 @@ protected: private: - Ps2Float DoAdd(Ps2Float other); + PS2Float DoAdd(PS2Float other); - Ps2Float DoMul(Ps2Float other); + PS2Float DoMul(PS2Float other); - Ps2Float DoDiv(Ps2Float other); + PS2Float DoDiv(PS2Float other); - static Ps2Float SolveAbnormalAdditionOrSubtractionOperation(Ps2Float a, Ps2Float b, bool add); + static PS2Float SolveAbnormalAdditionOrSubtractionOperation(PS2Float a, PS2Float b, bool add); - static Ps2Float SolveAbnormalMultiplicationOrDivisionOperation(Ps2Float a, Ps2Float b, bool mul); + static PS2Float SolveAbnormalMultiplicationOrDivisionOperation(PS2Float a, PS2Float b, bool mul); - static Ps2Float SolveAddSubDenormalizedOperation(Ps2Float a, Ps2Float b, bool add); + static PS2Float SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add); - static Ps2Float SolveMultiplicationDenormalizedOperation(Ps2Float a, Ps2Float b); + static PS2Float SolveMultiplicationDenormalizedOperation(PS2Float a, PS2Float b); - static Ps2Float SolveDivisionDenormalizedOperation(Ps2Float a, Ps2Float b); + static PS2Float SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b); - static bool DetermineMultiplicationDivisionOperationSign(Ps2Float a, Ps2Float b); + static bool DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b); - static bool DetermineAdditionOperationSign(Ps2Float a, Ps2Float b); + static bool DetermineAdditionOperationSign(PS2Float a, PS2Float b); - static bool DetermineSubtractionOperationSign(Ps2Float a, Ps2Float b); + static bool DetermineSubtractionOperationSign(PS2Float a, PS2Float b); - static int32_t GetMostSignificantBitPosition(uint32_t value); + static s32 GetMostSignificantBitPosition(u32 value); - static int32_t BitScanReverse8(int32_t b); + static s32 BitScanReverse8(s32 b); - static int32_t clz(int32_t x); + static s32 clz(s32 x); }; diff --git a/pcsx2/VU.h b/pcsx2/VU.h index ad55a0e12b..72b519c8fa 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -124,7 +124,7 @@ struct alignas(16) VURegs REG_VI q; REG_VI p; - VECTOR TMP; + VECTOR TMP; // Temporary vector used to stack FMA operations uint idx; // VU index (0 or 1) // flags/cycle are needed by VIF dma code, so they have to be here (for now) diff --git a/pcsx2/VUflags.cpp b/pcsx2/VUflags.cpp index 85a38eb1b8..d3422c2587 100644 --- a/pcsx2/VUflags.cpp +++ b/pcsx2/VUflags.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" -#include "Ps2Float.h" +#include "PS2Float.h" #include #include @@ -12,12 +12,12 @@ /* NEW FLAGS */ //By asadr. Thnkx F|RES :p /*****************************************/ -static __ri u32 VU_MAC_UPDATE(int shift, VURegs* VU, uint32_t f) +static __ri u32 VU_MAC_UPDATE(int shift, VURegs* VU, u32 f) { - Ps2Float ps2f = Ps2Float(f); + PS2Float ps2f = PS2Float(f); - uint exp = ps2f.Exponent; - u32 s = ps2f.AsUInt32() & Ps2Float::SIGNMASK; + u32 exp = ps2f.Exponent; + u32 s = ps2f.AsUInt32() & PS2Float::SIGNMASK; if (s) VU->macflag |= 0x0010<macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); return f; @@ -62,42 +62,42 @@ static __ri u32 VU_MAC_UPDATE(int shift, VURegs* VU, uint32_t f) } } -__fi u32 VU_MACx_UPDATE(VURegs * VU, uint32_t x) +__fi u32 VU_MACx_UPDATE(VURegs* VU, u32 x) { return VU_MAC_UPDATE(3, VU, x); } -__fi u32 VU_MACy_UPDATE(VURegs* VU, uint32_t y) +__fi u32 VU_MACy_UPDATE(VURegs* VU, u32 y) { return VU_MAC_UPDATE(2, VU, y); } -__fi u32 VU_MACz_UPDATE(VURegs* VU, uint32_t z) +__fi u32 VU_MACz_UPDATE(VURegs* VU, u32 z) { return VU_MAC_UPDATE(1, VU, z); } -__fi u32 VU_MACw_UPDATE(VURegs* VU, uint32_t w) +__fi u32 VU_MACw_UPDATE(VURegs* VU, u32 w) { return VU_MAC_UPDATE(0, VU, w); } -__fi void VU_MACx_CLEAR(VURegs * VU) +__fi void VU_MACx_CLEAR(VURegs* VU) { VU->macflag&= ~(0x1111<<3); } -__fi void VU_MACy_CLEAR(VURegs * VU) +__fi void VU_MACy_CLEAR(VURegs* VU) { VU->macflag&= ~(0x1111<<2); } -__fi void VU_MACz_CLEAR(VURegs * VU) +__fi void VU_MACz_CLEAR(VURegs* VU) { VU->macflag&= ~(0x1111<<1); } -__fi void VU_MACw_CLEAR(VURegs * VU) +__fi void VU_MACw_CLEAR(VURegs* VU) { VU->macflag&= ~(0x1111<<0); } diff --git a/pcsx2/VUflags.h b/pcsx2/VUflags.h index 9a5a5fafa8..12bd316351 100644 --- a/pcsx2/VUflags.h +++ b/pcsx2/VUflags.h @@ -4,12 +4,12 @@ #pragma once #include "VU.h" -extern u32 VU_MACx_UPDATE(VURegs * VU, uint32_t x); -extern u32 VU_MACy_UPDATE(VURegs* VU, uint32_t y); -extern u32 VU_MACz_UPDATE(VURegs* VU, uint32_t z); -extern u32 VU_MACw_UPDATE(VURegs* VU, uint32_t w); -extern void VU_MACx_CLEAR(VURegs * VU); -extern void VU_MACy_CLEAR(VURegs * VU); -extern void VU_MACz_CLEAR(VURegs * VU); -extern void VU_MACw_CLEAR(VURegs * VU); -extern void VU_STAT_UPDATE(VURegs * VU); +extern u32 VU_MACx_UPDATE(VURegs* VU, u32 x); +extern u32 VU_MACy_UPDATE(VURegs* VU, u32 y); +extern u32 VU_MACz_UPDATE(VURegs* VU, u32 z); +extern u32 VU_MACw_UPDATE(VURegs* VU, u32 w); +extern void VU_MACx_CLEAR(VURegs* VU); +extern void VU_MACy_CLEAR(VURegs* VU); +extern void VU_MACz_CLEAR(VURegs* VU); +extern void VU_MACw_CLEAR(VURegs* VU); +extern void VU_STAT_UPDATE(VURegs* VU); diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 60e26a707f..f12903b72d 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" -#include "Ps2Float.h" +#include "PS2Float.h" #include "VUops.h" #include "GS.h" #include "Gif_Unit.h" @@ -463,14 +463,14 @@ static __fi float vuDouble(u32 f) } #endif -static __fi uint32_t vuAccurateAddSub(VURegs* VU, u32 a, u32 b, bool issub) +static __fi u32 vuAccurateAddSub(VURegs* VU, u32 a, u32 b, bool issub) { if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { if (issub) - return Ps2Float(a).Sub(Ps2Float(b)).AsUInt32(); + return PS2Float(a).Sub(PS2Float(b)).AsUInt32(); else - return Ps2Float(a).Add(Ps2Float(b)).AsUInt32(); + return PS2Float(a).Add(PS2Float(b)).AsUInt32(); } if (issub) @@ -480,14 +480,14 @@ static __fi uint32_t vuAccurateAddSub(VURegs* VU, u32 a, u32 b, bool issub) } -static __fi uint32_t vuAccurateMulDiv(VURegs* VU, u32 a, u32 b, bool isdiv) +static __fi u32 vuAccurateMulDiv(VURegs* VU, u32 a, u32 b, bool isdiv) { if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) { if (isdiv) - return Ps2Float(a).Div(Ps2Float(b)).AsUInt32(); + return PS2Float(a).Div(PS2Float(b)).AsUInt32(); else - return Ps2Float(a).Mul(Ps2Float(b)).AsUInt32(); + return PS2Float(a).Mul(PS2Float(b)).AsUInt32(); } if (isdiv) @@ -505,19 +505,19 @@ void _vuABS(VURegs* VU) { if (_X) { - VU->VF[_Ft_].i.x = Ps2Float(VU->VF[_Fs_].i.x).Abs(); + VU->VF[_Ft_].i.x = PS2Float(VU->VF[_Fs_].i.x).Abs(); } if (_Y) { - VU->VF[_Ft_].i.y = Ps2Float(VU->VF[_Fs_].i.y).Abs(); + VU->VF[_Ft_].i.y = PS2Float(VU->VF[_Fs_].i.y).Abs(); } if (_Z) { - VU->VF[_Ft_].i.z = Ps2Float(VU->VF[_Fs_].i.z).Abs(); + VU->VF[_Ft_].i.z = PS2Float(VU->VF[_Fs_].i.z).Abs(); } if (_W) { - VU->VF[_Ft_].i.w = Ps2Float(VU->VF[_Fs_].i.w).Abs(); + VU->VF[_Ft_].i.w = PS2Float(VU->VF[_Fs_].i.w).Abs(); } } else @@ -750,11 +750,11 @@ static __fi void _vuSUBq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 1));} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 1));} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 1));} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 1));} else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 1));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 1));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 1));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 1));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuSUBx(VURegs* VU) @@ -931,7 +931,7 @@ static __fi void _vuMULx(VURegs* VU) else dst = &VU->VF[_Fd_]; - uint32_t ftx = VU->VF[_Ft_].i.x; + u32 ftx = VU->VF[_Ft_].i.x; if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftx, 0)); } else VU_MACx_CLEAR(VU); if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftx, 0)); } else VU_MACy_CLEAR(VU); if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftx, 0)); } else VU_MACz_CLEAR(VU); @@ -948,7 +948,7 @@ static __fi void _vuMULy(VURegs* VU) else dst = &VU->VF[_Fd_]; - uint32_t fty = VU->VF[_Ft_].i.y; + u32 fty = VU->VF[_Ft_].i.y; if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, fty, 0)); } else VU_MACx_CLEAR(VU); if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, fty, 0)); } else VU_MACy_CLEAR(VU); if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, fty, 0)); } else VU_MACz_CLEAR(VU); @@ -964,7 +964,7 @@ static __fi void _vuMULz(VURegs* VU) else dst = &VU->VF[_Fd_]; - uint32_t ftz = VU->VF[_Ft_].i.z; + u32 ftz = VU->VF[_Ft_].i.z; if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftz, 0)); } else VU_MACx_CLEAR(VU); if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftz, 0)); } else VU_MACy_CLEAR(VU); if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftz, 0)); } else VU_MACz_CLEAR(VU); @@ -980,7 +980,7 @@ static __fi void _vuMULw(VURegs* VU) else dst = &VU->VF[_Fd_]; - uint32_t ftw = VU->VF[_Ft_].i.w; + u32 ftw = VU->VF[_Ft_].i.w; if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftw, 0)); } else VU_MACx_CLEAR(VU); if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftw, 0)); } else VU_MACy_CLEAR(VU); if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftw, 0)); } else VU_MACz_CLEAR(VU); @@ -1108,7 +1108,7 @@ static __fi void _vuMADDx(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - uint32_t ftx = VU->VF[_Ft_].i.x; + u32 ftx = VU->VF[_Ft_].i.x; if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftx, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftx, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftx, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); @@ -1126,7 +1126,7 @@ static __fi void _vuMADDy(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - uint32_t fty = VU->VF[_Ft_].i.y; + u32 fty = VU->VF[_Ft_].i.y; if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, fty, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, fty, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, fty, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); @@ -1144,7 +1144,7 @@ static __fi void _vuMADDz(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - uint32_t ftz = VU->VF[_Ft_].i.z; + u32 ftz = VU->VF[_Ft_].i.z; if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftz, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftz, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftz, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); @@ -1162,7 +1162,7 @@ static __fi void _vuMADDw(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - uint32_t ftw = VU->VF[_Ft_].i.w; + u32 ftw = VU->VF[_Ft_].i.w; if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftw, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftw, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftw, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); @@ -1309,7 +1309,7 @@ static __fi void _vuMSUBx(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - uint32_t ftx = VU->VF[_Ft_].i.x; + u32 ftx = VU->VF[_Ft_].i.x; if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftx, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftx, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftx, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); @@ -1327,7 +1327,7 @@ static __fi void _vuMSUBy(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - uint32_t fty = VU->VF[_Ft_].i.y; + u32 fty = VU->VF[_Ft_].i.y; if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, fty, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, fty, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, fty, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); @@ -1345,7 +1345,7 @@ static __fi void _vuMSUBz(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - uint32_t ftz = VU->VF[_Ft_].i.z; + u32 ftz = VU->VF[_Ft_].i.z; if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftz, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftz, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftz, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); @@ -1363,7 +1363,7 @@ static __fi void _vuMSUBw(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - uint32_t ftw = VU->VF[_Ft_].i.w; + u32 ftw = VU->VF[_Ft_].i.w; if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftw, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftw, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftw, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); @@ -1409,7 +1409,7 @@ static __fi void _vuMSUBAx(VURegs* VU) { VECTOR* tmp; tmp = &VU->TMP; - uint32_t tx = VU->VF[_Ft_].i.x; + u32 tx = VU->VF[_Ft_].i.x; if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, tx, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, tx, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, tx, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); @@ -1421,7 +1421,7 @@ static __fi void _vuMSUBAy(VURegs* VU) { VECTOR* tmp; tmp = &VU->TMP; - uint32_t ty = VU->VF[_Ft_].i.y; + u32 ty = VU->VF[_Ft_].i.y; if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ty, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ty, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ty, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); @@ -1433,7 +1433,7 @@ static __fi void _vuMSUBAz(VURegs* VU) { VECTOR* tmp; tmp = &VU->TMP; - uint32_t tz = VU->VF[_Ft_].i.z; + u32 tz = VU->VF[_Ft_].i.z; if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, tz, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, tz, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, tz, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); @@ -1445,7 +1445,7 @@ static __fi void _vuMSUBAw(VURegs* VU) { VECTOR* tmp; tmp = &VU->TMP; - uint32_t tw = VU->VF[_Ft_].i.w; + u32 tw = VU->VF[_Ft_].i.w; if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, tw, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, tw, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, tw, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); @@ -1663,13 +1663,13 @@ static __fi void _vuFTOI0(VURegs* VU) { if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { if (_X) - VU->VF[_Ft_].SL[0] = double_to_int(Ps2Float(VU->VF[_Fs_].i.x).ToDouble()); + VU->VF[_Ft_].SL[0] = double_to_int(PS2Float(VU->VF[_Fs_].i.x).ToDouble()); if (_Y) - VU->VF[_Ft_].SL[1] = double_to_int(Ps2Float(VU->VF[_Fs_].i.y).ToDouble()); + VU->VF[_Ft_].SL[1] = double_to_int(PS2Float(VU->VF[_Fs_].i.y).ToDouble()); if (_Z) - VU->VF[_Ft_].SL[2] = double_to_int(Ps2Float(VU->VF[_Fs_].i.z).ToDouble()); + VU->VF[_Ft_].SL[2] = double_to_int(PS2Float(VU->VF[_Fs_].i.z).ToDouble()); if (_W) - VU->VF[_Ft_].SL[3] = double_to_int(Ps2Float(VU->VF[_Fs_].i.w).ToDouble()); + VU->VF[_Ft_].SL[3] = double_to_int(PS2Float(VU->VF[_Fs_].i.w).ToDouble()); } else { @@ -1763,20 +1763,20 @@ static __fi void _vuCLIP(VURegs* VU) { if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - double value = Ps2Float(Ps2Float(VU->VF[_Ft_].i.w).Abs()).ToDouble(); + double value = PS2Float(PS2Float(VU->VF[_Ft_].i.w).Abs()).ToDouble(); VU->clipflag <<= 6; - if (Ps2Float(VU->VF[_Fs_].i.x).ToDouble() > +value) + if (PS2Float(VU->VF[_Fs_].i.x).ToDouble() > +value) VU->clipflag |= 0x01; - if (Ps2Float(VU->VF[_Fs_].i.x).ToDouble() < -value) + if (PS2Float(VU->VF[_Fs_].i.x).ToDouble() < -value) VU->clipflag |= 0x02; - if (Ps2Float(VU->VF[_Fs_].i.y).ToDouble() > +value) + if (PS2Float(VU->VF[_Fs_].i.y).ToDouble() > +value) VU->clipflag |= 0x04; - if (Ps2Float(VU->VF[_Fs_].i.y).ToDouble() < -value) + if (PS2Float(VU->VF[_Fs_].i.y).ToDouble() < -value) VU->clipflag |= 0x08; - if (Ps2Float(VU->VF[_Fs_].i.z).ToDouble() > +value) + if (PS2Float(VU->VF[_Fs_].i.z).ToDouble() > +value) VU->clipflag |= 0x10; - if (Ps2Float(VU->VF[_Fs_].i.z).ToDouble() < -value) + if (PS2Float(VU->VF[_Fs_].i.z).ToDouble() < -value) VU->clipflag |= 0x20; } else @@ -1809,8 +1809,8 @@ static __fi void _vuDIV(VURegs* VU) { if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) { - Ps2Float ft = Ps2Float(VU->VF[_Ft_].UL[_Ftf_]); - Ps2Float fs = Ps2Float(VU->VF[_Fs_].UL[_Fsf_]); + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); + PS2Float fs = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); VU->statusflag &= ~0x30; @@ -1823,9 +1823,9 @@ static __fi void _vuDIV(VURegs* VU) if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = Ps2Float::MIN_FLOATING_POINT_VALUE; + VU->q.UL = PS2Float::MIN_FLOATING_POINT_VALUE; else - VU->q.UL = Ps2Float::MAX_FLOATING_POINT_VALUE; + VU->q.UL = PS2Float::MAX_FLOATING_POINT_VALUE; } else { @@ -1864,13 +1864,13 @@ static __fi void _vuSQRT(VURegs* VU) { if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - Ps2Float ft = Ps2Float(VU->VF[_Ft_].UL[_Ftf_]); + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); VU->statusflag &= ~0x30; if (ft.ToDouble() < 0.0) VU->statusflag |= 0x10; - VU->q.UL = Ps2Float(ft.Abs()).Sqrt().AsUInt32(); + VU->q.UL = PS2Float(ft.Abs()).Sqrt().AsUInt32(); } else { @@ -1889,8 +1889,8 @@ static __fi void _vuRSQRT(VURegs* VU) { if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - Ps2Float ft = Ps2Float(VU->VF[_Ft_].UL[_Ftf_]); - Ps2Float fs = Ps2Float(VU->VF[_Fs_].UL[_Fsf_]); + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); + PS2Float fs = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); VU->statusflag &= ~0x30; @@ -1902,9 +1902,9 @@ static __fi void _vuRSQRT(VURegs* VU) { if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = Ps2Float::MIN_FLOATING_POINT_VALUE; + VU->q.UL = PS2Float::MIN_FLOATING_POINT_VALUE; else - VU->q.UL = Ps2Float::MAX_FLOATING_POINT_VALUE; + VU->q.UL = PS2Float::MAX_FLOATING_POINT_VALUE; } else { @@ -1925,7 +1925,7 @@ static __fi void _vuRSQRT(VURegs* VU) } if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) - VU->q.UL = fs.Div(Ps2Float(ft.Abs()).Sqrt()).AsUInt32(); + VU->q.UL = fs.Div(PS2Float(ft.Abs()).Sqrt()).AsUInt32(); else { float temp = sqrt(fabs(vuDouble(ft.AsUInt32()))); @@ -2619,25 +2619,25 @@ static __ri void _vuWAITP(VURegs* VU) static __ri void _vuESADD(VURegs* VU) { - uint32_t x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); - uint32_t y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); - uint32_t z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); + u32 x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); + u32 y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); + u32 z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); VU->p.UL = vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0); } static __ri void _vuERSADD(VURegs* VU) { - uint32_t x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); - uint32_t y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); - uint32_t z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); + u32 x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); + u32 y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); + u32 z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); - Ps2Float p = Ps2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); + PS2Float p = PS2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); if (!p.IsZero()) { if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) - p = Ps2Float::One().Div(p); + p = PS2Float::One().Div(p); else { VU->p.F = 1.0f / vuDouble(p.AsUInt32()); @@ -2650,11 +2650,11 @@ static __ri void _vuERSADD(VURegs* VU) static __ri void _vuELENG(VURegs* VU) { - uint32_t x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); - uint32_t y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); - uint32_t z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); + u32 x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); + u32 y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); + u32 z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); - Ps2Float value = Ps2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); + PS2Float value = PS2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { @@ -2678,11 +2678,11 @@ static __ri void _vuELENG(VURegs* VU) static __ri void _vuERLENG(VURegs* VU) { - uint32_t x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); - uint32_t y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); - uint32_t z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); + u32 x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); + u32 y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); + u32 z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); - Ps2Float value = Ps2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); + PS2Float value = PS2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { @@ -2693,7 +2693,7 @@ static __ri void _vuERLENG(VURegs* VU) { if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) { - value = Ps2Float::One().Div(value); + value = PS2Float::One().Div(value); } else { @@ -2721,7 +2721,7 @@ static __ri void _vuERLENG(VURegs* VU) } -static __ri float _vuCalculateEATAN(uint32_t inputvalue) { +static __ri float _vuCalculateEATAN(u32 inputvalue) { float fvalue = vuDouble(inputvalue); @@ -2748,7 +2748,7 @@ static __ri void _vuEATAN(VURegs* VU) static __ri void _vuEATANxy(VURegs* VU) { float p = 0; - if (!Ps2Float(VU->VF[_Fs_].i.x).IsZero()) + if (!PS2Float(VU->VF[_Fs_].i.x).IsZero()) { p = _vuCalculateEATAN(vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.x, 1)); } @@ -2758,7 +2758,7 @@ static __ri void _vuEATANxy(VURegs* VU) static __ri void _vuEATANxz(VURegs* VU) { float p = 0; - if (!Ps2Float(VU->VF[_Fs_].i.x).IsZero()) + if (!PS2Float(VU->VF[_Fs_].i.x).IsZero()) { p = _vuCalculateEATAN(vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.x, 1)); } @@ -2772,13 +2772,13 @@ static __ri void _vuESUM(VURegs* VU) static __ri void _vuERCPR(VURegs* VU) { - Ps2Float p = Ps2Float(VU->VF[_Fs_].UL[_Fsf_]); + PS2Float p = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); if (!p.IsZero()) { if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) { - p = Ps2Float::One().Div(p); + p = PS2Float::One().Div(p); } else { @@ -2794,7 +2794,7 @@ static __ri void _vuESQRT(VURegs* VU) { if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - Ps2Float value = Ps2Float(VU->VF[_Fs_].UL[_Fsf_]); + PS2Float value = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); if (value.ToDouble() >= 0) { @@ -2820,7 +2820,7 @@ static __ri void _vuERSQRT(VURegs* VU) { if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - Ps2Float value = Ps2Float(VU->VF[_Fs_].UL[_Fsf_]); + PS2Float value = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); if (value.ToDouble() >= 0) { @@ -2834,7 +2834,7 @@ static __ri void _vuERSQRT(VURegs* VU) } else { - value = Ps2Float::One().Div(value); + value = PS2Float::One().Div(value); } } } diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index 9fb77e262a..3a6555254b 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -126,7 +126,6 @@ true - @@ -282,7 +281,7 @@ - + @@ -584,7 +583,6 @@ true - @@ -729,7 +727,7 @@ - + diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index 2e6adfb733..0fa8c4cd9f 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -1446,10 +1446,7 @@ System\Ps2\Iop\SIO\PAD - - System\Ps2\EmotionEngine\Shared - - + System\Ps2\EmotionEngine\Shared @@ -2408,10 +2405,7 @@ System\Ps2\Iop\SIO\PAD - - System\Ps2\EmotionEngine\Shared - - + System\Ps2\EmotionEngine\Shared From b0b65fa2489e681090dca9e72cb365a1819e1573 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Fri, 22 Nov 2024 00:11:26 +0100 Subject: [PATCH 09/15] [Soft-Float] - Removes Div "special" normalization constant. This broke stuff on very high floats, 0x40 is an un-biased value that can't be changed from IEEE standard. We now 100% match the PS3's SPEs, but not the PS2 Div result (can be off by one bit). However, this is still way better than IEEE754. --- pcsx2/Ps2Float.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcsx2/Ps2Float.cpp b/pcsx2/Ps2Float.cpp index afe2619d46..ab7b9ab1b6 100644 --- a/pcsx2/Ps2Float.cpp +++ b/pcsx2/Ps2Float.cpp @@ -517,7 +517,7 @@ PS2Float PS2Float::DoDiv(PS2Float other) resMantissa |= ((u64)(otherMantissa)*resMantissa != selfMantissa64) ? 1U : 0; result.Exponent = (u8)(resExponent); - result.Mantissa = (resMantissa + 0x39U /* Non-standard value, 40U in IEEE754 (PS2: rsqrt(0x40400000, 0x40400000) = 0x3FDDB3D7 -> IEEE754: rsqrt(0x40400000, 0x40400000) = 0x3FDDB3D8 */) >> 7; + result.Mantissa = (resMantissa + 0x40U) >> 7; if (result.Mantissa > 0) { From 8bc2ed928281ae35a27a15aea89ad7bebf444b80 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Sat, 23 Nov 2024 15:35:44 +0100 Subject: [PATCH 10/15] [Soft-Float] - Code review Part1. Applies some recommendations from the review. The next batch will come later. --- pcsx2-qt/Settings/AdvancedSettingsWidget.ui | 2 +- pcsx2/FPU.cpp | 104 ++- pcsx2/{Ps2Float.cpp => PS2Float.cpp} | 32 - pcsx2/{Ps2Float.h => PS2Float.h} | 58 +- pcsx2/VUops.cpp | 739 +++++++++----------- 5 files changed, 429 insertions(+), 506 deletions(-) rename pcsx2/{Ps2Float.cpp => PS2Float.cpp} (92%) rename pcsx2/{Ps2Float.h => PS2Float.h} (50%) diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui index 5a8b420b88..aa68c2c5c3 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui @@ -33,7 +33,7 @@ 0 -447 - 793 + 790 1283 diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index bab1cd9bc1..d4967dc467 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -219,36 +219,32 @@ float fpuDouble(u32 f) } } -static __fi u32 fpuAccurateAddSub(u32 a, u32 b, bool issub) +static __fi u32 fpuAccurateAdd(u32 a, u32 b) { - if (CHECK_FPU_SOFT_ADDSUB) - { - if (issub) - return PS2Float(a).Sub(PS2Float(b)).AsUInt32(); - else - return PS2Float(a).Add(PS2Float(b)).AsUInt32(); - } - - if (issub) - return std::bit_cast(fpuDouble(a) - fpuDouble(b)); - else - return std::bit_cast(fpuDouble(a) + fpuDouble(b)); + if (CHECK_FPU_SOFT_ADDSUB) return PS2Float(a).Add(PS2Float(b)).AsUInt32(); + + return std::bit_cast(fpuDouble(a) + fpuDouble(b)); } -static __fi u32 fpuAccurateMulDiv(u32 a, u32 b, bool isdiv) +static __fi u32 fpuAccurateSub(u32 a, u32 b) { - if (CHECK_FPU_SOFT_MULDIV) - { - if (isdiv) - return PS2Float(a).Div(PS2Float(b)).AsUInt32(); - else - return PS2Float(a).Mul(PS2Float(b)).AsUInt32(); - } + if (CHECK_FPU_SOFT_ADDSUB) return PS2Float(a).Sub(PS2Float(b)).AsUInt32(); - if (isdiv) - return std::bit_cast(fpuDouble(a) / fpuDouble(b)); - else - return std::bit_cast(fpuDouble(a) * fpuDouble(b)); + return std::bit_cast(fpuDouble(a) - fpuDouble(b)); +} + +static __fi u32 fpuAccurateMul(u32 a, u32 b) +{ + if (CHECK_FPU_SOFT_MULDIV) return PS2Float(a).Mul(PS2Float(b)).AsUInt32(); + + return std::bit_cast(fpuDouble(a) * fpuDouble(b)); +} + +static __fi u32 fpuAccurateDiv(u32 a, u32 b) +{ + if (CHECK_FPU_SOFT_MULDIV) return PS2Float(a).Div(PS2Float(b)).AsUInt32(); + + return std::bit_cast(fpuDouble(a) / fpuDouble(b)); } static __fi s32 double_to_int(double value) @@ -304,13 +300,13 @@ void ABS_S() { } void ADD_S() { - _FdValUl_ = fpuAccurateAddSub(_FsValUl_, _FtValUl_, 0); + _FdValUl_ = fpuAccurateAdd(_FsValUl_, _FtValUl_); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void ADDA_S() { - _FAValUl_ = fpuAccurateAddSub(_FsValUl_, _FtValUl_, 0); + _FAValUl_ = fpuAccurateAdd(_FsValUl_, _FtValUl_); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -392,7 +388,7 @@ void CVT_W() { void DIV_S() { if (checkDivideByZero( _FdValUl_, _FtValUl_, _FsValUl_, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; - _FdValUl_ = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 1); + _FdValUl_ = fpuAccurateDiv(_FsValUl_, _FtValUl_); if (checkOverflow( _FdValUl_, 0)) return; checkUnderflow( _FdValUl_, 0); } @@ -402,17 +398,13 @@ void DIV_S() { method provides a similar outcome and is faster. (cottonvibes) */ void MADD_S() { - FPRreg temp; - temp.UL = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); - _FdValUl_ = fpuAccurateAddSub(_FAValUl_, temp.UL, 0); + _FdValUl_ = fpuAccurateAdd(_FAValUl_, fpuAccurateMul(_FsValUl_, _FtValUl_)); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MADDA_S() { - FPRreg temp; - temp.UL = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); - _FAValUl_ = fpuAccurateAddSub(_FAValUl_, temp.UL, 0); + _FAValUl_ = fpuAccurateAdd(_FAValUl_, fpuAccurateMul(_FsValUl_, _FtValUl_)); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -437,17 +429,13 @@ void MOV_S() { } void MSUB_S() { - FPRreg temp; - temp.UL = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); - _FdValUl_ = fpuAccurateAddSub(_FAValUl_, temp.UL, 1); + _FdValUl_ = fpuAccurateSub(_FAValUl_, fpuAccurateMul(_FsValUl_, _FtValUl_)); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MSUBA_S() { - FPRreg temp; - temp.UL = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); - _FAValUl_ = fpuAccurateAddSub(_FAValUl_, temp.UL, 1); + _FAValUl_ = fpuAccurateSub(_FAValUl_, fpuAccurateMul(_FsValUl_, _FtValUl_)); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -457,13 +445,13 @@ void MTC1() { } void MUL_S() { - _FdValUl_ = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); + _FdValUl_ = fpuAccurateMul(_FsValUl_, _FtValUl_); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MULA_S() { - _FAValUl_ = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); + _FAValUl_ = fpuAccurateMul(_FsValUl_, _FtValUl_); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -487,34 +475,34 @@ void RSQRT_S() { _FdValUl_ = value.Sign ? PS2Float::MIN_FLOATING_POINT_VALUE : PS2Float::MAX_FLOATING_POINT_VALUE; return; } - else if (_FtValUl_ & 0x80000000) - { // Ft is negative + else if (_FtValUl_ & 0x80000000) // Ft is negative + { _ContVal_ |= FPUflagI | FPUflagSI; _FdValUl_ = PS2Float(_FsValUl_).Rsqrt(PS2Float(value.Abs())).AsUInt32(); } - else + else // Ft is positive and not zero { _FdValUl_ = PS2Float(_FsValUl_).Rsqrt(value).AsUInt32(); - } // Ft is positive and not zero + } } else { - if ((_FtValUl_ & 0x7F800000) == 0) - { // Ft is zero (Denormals are Zero) + if ((_FtValUl_ & 0x7F800000) == 0) // Ft is zero (Denormals are Zero) + { _ContVal_ |= FPUflagD | FPUflagSD; _FdValUl_ = (_FtValUl_ & 0x80000000) | posFmax; return; } - else if (_FtValUl_ & 0x80000000) - { // Ft is negative + else if (_FtValUl_ & 0x80000000) // Ft is negative + { _ContVal_ |= FPUflagI | FPUflagSI; temp.f = sqrt(fabs(fpuDouble(_FtValUl_))); _FdValf_ = fpuDouble(_FsValUl_) / fpuDouble(temp.UL); } - else + else // Ft is positive and not zero { _FdValf_ = fpuDouble(_FsValUl_) / sqrt(fpuDouble(_FtValUl_)); - } // Ft is positive and not zero + } } if (checkOverflow( _FdValUl_, 0)) return; @@ -528,8 +516,8 @@ void SQRT_S() { { PS2Float value = PS2Float(_FtValUl_); - if (_FtValUl_ & 0x80000000) - { // If Ft is Negative + if (_FtValUl_ & 0x80000000) // If Ft is Negative + { _ContVal_ |= FPUflagI | FPUflagSI; _FdValUl_ = PS2Float(value.Abs()).Sqrt().AsUInt32(); } @@ -540,8 +528,8 @@ void SQRT_S() { { if ((_FtValUl_ & 0x7F800000) == 0) // If Ft = +/-0 _FdValUl_ = _FtValUl_ & 0x80000000; // result is 0 - else if (_FtValUl_ & 0x80000000) - { // If Ft is Negative + else if (_FtValUl_ & 0x80000000) // If Ft is Negative + { _ContVal_ |= FPUflagI | FPUflagSI; _FdValf_ = sqrt(fabs(fpuDouble(_FtValUl_))); } @@ -551,13 +539,13 @@ void SQRT_S() { } void SUB_S() { - _FdValUl_ = fpuAccurateAddSub(_FsValUl_, _FtValUl_, 1); + _FdValUl_ = fpuAccurateSub(_FsValUl_, _FtValUl_); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void SUBA_S() { - _FAValUl_ = fpuAccurateAddSub(_FsValUl_, _FtValUl_, 1); + _FAValUl_ = fpuAccurateSub(_FsValUl_, _FtValUl_); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } diff --git a/pcsx2/Ps2Float.cpp b/pcsx2/PS2Float.cpp similarity index 92% rename from pcsx2/Ps2Float.cpp rename to pcsx2/PS2Float.cpp index ab7b9ab1b6..9a9babeabf 100644 --- a/pcsx2/Ps2Float.cpp +++ b/pcsx2/PS2Float.cpp @@ -11,16 +11,6 @@ #include "PS2Float.h" #include "Common.h" -const u8 PS2Float::BIAS = 127; -const u32 PS2Float::SIGNMASK = 0x80000000; -const u32 PS2Float::MAX_FLOATING_POINT_VALUE = 0x7FFFFFFF; -const u32 PS2Float::MIN_FLOATING_POINT_VALUE = 0xFFFFFFFF; -const u32 PS2Float::POSITIVE_INFINITY_VALUE = 0x7F800000; -const u32 PS2Float::NEGATIVE_INFINITY_VALUE = 0xFF800000; -const u32 PS2Float::ONE = 0x3F800000; -const u32 PS2Float::MIN_ONE = 0xBF800000; -const s32 PS2Float::IMPLICIT_LEADING_BIT_POS = 23; - //**************************************************************** // Booth Multiplier //**************************************************************** @@ -829,25 +819,3 @@ s32 PS2Float::GetMostSignificantBitPosition(u32 value) } return -1; } - -const s8 PS2Float::msb[256] = - { - -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; - -const s32 PS2Float::debruijn32[64] = - { - 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, - 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, - -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12}; - -const s32 PS2Float::normalizeAmounts[] = - { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24}; diff --git a/pcsx2/Ps2Float.h b/pcsx2/PS2Float.h similarity index 50% rename from pcsx2/Ps2Float.h rename to pcsx2/PS2Float.h index 14ef962942..341bc10b56 100644 --- a/pcsx2/Ps2Float.h +++ b/pcsx2/PS2Float.h @@ -7,42 +7,60 @@ class PS2Float { - struct BoothRecode - { + struct BoothRecode + { u32 data; u32 negate; - }; + }; - struct AddResult - { + struct AddResult + { u32 lo; u32 hi; - }; + }; static u64 MulMantissa(u32 a, u32 b); static BoothRecode Booth(u32 a, u32 b, u32 bit); - static AddResult Add3(u32 a, u32 b, u32 c); + static AddResult Add3(u32 a, u32 b, u32 c); public: bool Sign; u8 Exponent; u32 Mantissa; - static const u8 BIAS; - static const u32 SIGNMASK; - static const u32 MAX_FLOATING_POINT_VALUE; - static const u32 MIN_FLOATING_POINT_VALUE; - static const u32 POSITIVE_INFINITY_VALUE; - static const u32 NEGATIVE_INFINITY_VALUE; - static const u32 ONE; - static const u32 MIN_ONE; - static const int IMPLICIT_LEADING_BIT_POS; + static constexpr u8 BIAS = 127; + static constexpr u32 SIGNMASK = 0x80000000; + static constexpr u32 MAX_FLOATING_POINT_VALUE = 0x7FFFFFFF; + static constexpr u32 MIN_FLOATING_POINT_VALUE = 0xFFFFFFFF; + static constexpr u32 POSITIVE_INFINITY_VALUE = 0x7F800000; + static constexpr u32 NEGATIVE_INFINITY_VALUE = 0xFF800000; + static constexpr u32 ONE = 0x3F800000; + static constexpr u32 MIN_ONE = 0xBF800000; + static constexpr int IMPLICIT_LEADING_BIT_POS = 23; - static const s8 msb[256]; - static const s32 debruijn32[64]; - static const s32 normalizeAmounts[]; + static constexpr s8 msb[256] = { + -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 + }; + + static constexpr s32 debruijn32[64] = { + 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, + 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, + -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12 + }; + + static constexpr s32 normalizeAmounts[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24 + }; PS2Float(u32 value); @@ -56,7 +74,7 @@ public: static PS2Float MinOne(); - static PS2Float Neg(PS2Float self); + static PS2Float Neg(PS2Float self); u32 AsUInt32() const; diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index f12903b72d..04da937e62 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -463,37 +463,32 @@ static __fi float vuDouble(u32 f) } #endif -static __fi u32 vuAccurateAddSub(VURegs* VU, u32 a, u32 b, bool issub) +static __fi u32 vuAccurateAdd(VURegs* VU, u32 a, u32 b) { - if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) - { - if (issub) - return PS2Float(a).Sub(PS2Float(b)).AsUInt32(); - else - return PS2Float(a).Add(PS2Float(b)).AsUInt32(); - } - - if (issub) - return std::bit_cast(vuDouble(a) - vuDouble(b)); - else - return std::bit_cast(vuDouble(a) + vuDouble(b)); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) return PS2Float(a).Add(PS2Float(b)).AsUInt32(); + return std::bit_cast(vuDouble(a) + vuDouble(b)); } -static __fi u32 vuAccurateMulDiv(VURegs* VU, u32 a, u32 b, bool isdiv) +static __fi u32 vuAccurateSub(VURegs* VU, u32 a, u32 b) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) - { - if (isdiv) - return PS2Float(a).Div(PS2Float(b)).AsUInt32(); - else - return PS2Float(a).Mul(PS2Float(b)).AsUInt32(); - } + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) return PS2Float(a).Sub(PS2Float(b)).AsUInt32(); - if (isdiv) - return std::bit_cast(vuDouble(a) / vuDouble(b)); - else - return std::bit_cast(vuDouble(a) * vuDouble(b)); + return std::bit_cast(vuDouble(a) - vuDouble(b)); +} + +static __fi u32 vuAccurateMul(VURegs* VU, u32 a, u32 b) +{ + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) return PS2Float(a).Mul(PS2Float(b)).AsUInt32(); + + return std::bit_cast(vuDouble(a) * vuDouble(b)); +} + +static __fi u32 vuAccurateDiv(VURegs* VU, u32 a, u32 b) +{ + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) return PS2Float(a).Div(PS2Float(b)).AsUInt32(); + + return std::bit_cast(vuDouble(a) / vuDouble(b)); } void _vuABS(VURegs* VU) @@ -501,44 +496,10 @@ void _vuABS(VURegs* VU) if (_Ft_ == 0) return; - if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) - { - if (_X) - { - VU->VF[_Ft_].i.x = PS2Float(VU->VF[_Fs_].i.x).Abs(); - } - if (_Y) - { - VU->VF[_Ft_].i.y = PS2Float(VU->VF[_Fs_].i.y).Abs(); - } - if (_Z) - { - VU->VF[_Ft_].i.z = PS2Float(VU->VF[_Fs_].i.z).Abs(); - } - if (_W) - { - VU->VF[_Ft_].i.w = PS2Float(VU->VF[_Fs_].i.w).Abs(); - } - } - else - { - if (_X) - { - VU->VF[_Ft_].f.x = fabs(vuDouble(VU->VF[_Fs_].i.x)); - } - if (_Y) - { - VU->VF[_Ft_].f.y = fabs(vuDouble(VU->VF[_Fs_].i.y)); - } - if (_Z) - { - VU->VF[_Ft_].f.z = fabs(vuDouble(VU->VF[_Fs_].i.z)); - } - if (_W) - { - VU->VF[_Ft_].f.w = fabs(vuDouble(VU->VF[_Fs_].i.w)); - } - } + if (_X) VU->VF[_Ft_].i.x = PS2Float(VU->VF[_Fs_].i.x).Abs(); + if (_Y) VU->VF[_Ft_].i.y = PS2Float(VU->VF[_Fs_].i.y).Abs(); + if (_Z) VU->VF[_Ft_].i.z = PS2Float(VU->VF[_Fs_].i.z).Abs(); + if (_W) VU->VF[_Ft_].i.w = PS2Float(VU->VF[_Fs_].i.w).Abs(); } @@ -550,10 +511,10 @@ static __fi void _vuADD(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -566,10 +527,10 @@ static __fi void _vuADDi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0));} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0));} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0));} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0));} else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -581,10 +542,10 @@ static __fi void _vuADDq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0));} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0));} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0));} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0));} else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -597,10 +558,10 @@ static __fi void _vuADDx(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -612,10 +573,10 @@ static __fi void _vuADDy(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -627,10 +588,10 @@ static __fi void _vuADDz(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -642,72 +603,72 @@ static __fi void _vuADDw(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAi(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAq(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAx(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAy(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAz(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAw(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -720,10 +681,10 @@ static __fi void _vuSUB(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 1)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 1)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 1)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 1)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -735,10 +696,10 @@ static __fi void _vuSUBi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 1));} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 1));} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 1));} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 1));} else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -750,10 +711,10 @@ static __fi void _vuSUBq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 1));} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 1));} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 1));} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 1));} else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -765,10 +726,10 @@ static __fi void _vuSUBx(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 1)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 1)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 1)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 1)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -780,10 +741,10 @@ static __fi void _vuSUBy(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 1)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 1)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 1)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 1)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -795,10 +756,10 @@ static __fi void _vuSUBz(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 1)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 1)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 1)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 1)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -810,71 +771,71 @@ static __fi void _vuSUBw(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 1)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 1)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 1)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 1)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 1)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 1)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 1)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 1)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAi(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 1)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 1)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 1)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 1)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAq(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 1)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 1)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 1)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 1)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAx(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 1)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 1)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 1)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 1)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAy(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 1)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 1)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 1)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 1)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAz(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 1)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 1)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 1)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 1)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAw(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 1)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 1)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 1)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 1)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -886,10 +847,10 @@ static __fi void _vuMUL(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -901,10 +862,10 @@ static __fi void _vuMULi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -916,10 +877,10 @@ static __fi void _vuMULq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -932,10 +893,10 @@ static __fi void _vuMULx(VURegs* VU) dst = &VU->VF[_Fd_]; u32 ftx = VU->VF[_Ft_].i.x; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftx, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftx, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftx, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftx, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftx)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftx)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftx)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftx)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -949,10 +910,10 @@ static __fi void _vuMULy(VURegs* VU) dst = &VU->VF[_Fd_]; u32 fty = VU->VF[_Ft_].i.y; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, fty, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, fty, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, fty, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, fty, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, fty)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, fty)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, fty)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, fty)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -965,10 +926,10 @@ static __fi void _vuMULz(VURegs* VU) dst = &VU->VF[_Fd_]; u32 ftz = VU->VF[_Ft_].i.z; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftz, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftz, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftz, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftz, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftz)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftz)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftz)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftz)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -981,68 +942,68 @@ static __fi void _vuMULw(VURegs* VU) dst = &VU->VF[_Fd_]; u32 ftw = VU->VF[_Ft_].i.w; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftw, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftw, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftw, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftw, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftw)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftw)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftw)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftw)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAi(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAq(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAx(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAy(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAz(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAw(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 0)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 0)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 0)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1056,10 +1017,10 @@ static __fi void _vuMADD(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1074,10 +1035,10 @@ static __fi void _vuMADDi(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1091,10 +1052,10 @@ static __fi void _vuMADDq(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1109,10 +1070,10 @@ static __fi void _vuMADDx(VURegs* VU) tmp = &VU->TMP; u32 ftx = VU->VF[_Ft_].i.x; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftx, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftx, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftx, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftx, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftx); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftx); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftx); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftx); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1127,10 +1088,10 @@ static __fi void _vuMADDy(VURegs* VU) tmp = &VU->TMP; u32 fty = VU->VF[_Ft_].i.y; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, fty, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, fty, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, fty, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, fty, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, fty); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, fty); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, fty); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, fty); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1145,10 +1106,10 @@ static __fi void _vuMADDz(VURegs* VU) tmp = &VU->TMP; u32 ftz = VU->VF[_Ft_].i.z; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftz, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftz, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftz, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftz, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftz); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftz); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftz); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftz); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1163,10 +1124,10 @@ static __fi void _vuMADDw(VURegs* VU) tmp = &VU->TMP; u32 ftw = VU->VF[_Ft_].i.w; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftw, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftw, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftw, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftw, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftw); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftw); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftw); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftw); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1174,10 +1135,10 @@ static __fi void _vuMADDA(VURegs* VU) { VECTOR* tmp; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1185,10 +1146,10 @@ static __fi void _vuMADDAi(VURegs* VU) { VECTOR* tmp; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1196,10 +1157,10 @@ static __fi void _vuMADDAq(VURegs* VU) { VECTOR* tmp; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1207,10 +1168,10 @@ static __fi void _vuMADDAx(VURegs* VU) { VECTOR* tmp; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1218,10 +1179,10 @@ static __fi void _vuMADDAy(VURegs* VU) { VECTOR* tmp; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1229,10 +1190,10 @@ static __fi void _vuMADDAz(VURegs* VU) { VECTOR* tmp; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1240,10 +1201,10 @@ static __fi void _vuMADDAw(VURegs* VU) { VECTOR* tmp; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1257,10 +1218,10 @@ static __fi void _vuMSUB(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1275,10 +1236,10 @@ static __fi void _vuMSUBi(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1292,10 +1253,10 @@ static __fi void _vuMSUBq(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1310,10 +1271,10 @@ static __fi void _vuMSUBx(VURegs* VU) tmp = &VU->TMP; u32 ftx = VU->VF[_Ft_].i.x; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftx, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftx, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftx, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftx, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftx); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftx); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftx); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftx); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1328,10 +1289,10 @@ static __fi void _vuMSUBy(VURegs* VU) tmp = &VU->TMP; u32 fty = VU->VF[_Ft_].i.y; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, fty, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, fty, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, fty, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, fty, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, fty); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, fty); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, fty); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, fty); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1346,10 +1307,10 @@ static __fi void _vuMSUBz(VURegs* VU) tmp = &VU->TMP; u32 ftz = VU->VF[_Ft_].i.z; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftz, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftz, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftz, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftz, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftz); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftz); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftz); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftz); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1364,10 +1325,10 @@ static __fi void _vuMSUBw(VURegs* VU) tmp = &VU->TMP; u32 ftw = VU->VF[_Ft_].i.w; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftw, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftw, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftw, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftw, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftw); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftw); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftw); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftw); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1375,10 +1336,10 @@ static __fi void _vuMSUBA(VURegs* VU) { VECTOR* tmp; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1386,10 +1347,10 @@ static __fi void _vuMSUBAi(VURegs* VU) { VECTOR* tmp; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1397,10 +1358,10 @@ static __fi void _vuMSUBAq(VURegs* VU) { VECTOR* tmp; tmp = &VU->TMP; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1410,10 +1371,10 @@ static __fi void _vuMSUBAx(VURegs* VU) VECTOR* tmp; tmp = &VU->TMP; u32 tx = VU->VF[_Ft_].i.x; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, tx, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, tx, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, tx, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, tx, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, tx); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, tx); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, tx); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, tx); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1422,10 +1383,10 @@ static __fi void _vuMSUBAy(VURegs* VU) VECTOR* tmp; tmp = &VU->TMP; u32 ty = VU->VF[_Ft_].i.y; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ty, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ty, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ty, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ty, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ty); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ty); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ty); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ty); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1434,10 +1395,10 @@ static __fi void _vuMSUBAz(VURegs* VU) VECTOR* tmp; tmp = &VU->TMP; u32 tz = VU->VF[_Ft_].i.z; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, tz, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, tz, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, tz, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, tz, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, tz); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, tz); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, tz); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, tz); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1446,10 +1407,10 @@ static __fi void _vuMSUBAw(VURegs* VU) VECTOR* tmp; tmp = &VU->TMP; u32 tw = VU->VF[_Ft_].i.w; - if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, tw, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); - if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, tw, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); - if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, tw, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); - if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, tw, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, tw); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, tw); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, tw); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, tw); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1610,9 +1571,9 @@ static __fi void _vuMINIw(VURegs* VU) static __fi void _vuOPMULA(VURegs* VU) { - VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0)); - VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0)); - VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0)); + VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); + VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); + VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); VU_STAT_UPDATE(VU); } @@ -1626,12 +1587,12 @@ static __fi void _vuOPMSUB(VURegs* VU) dst = &VU->VF[_Fd_]; tmp = &VU->TMP; - tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0); - tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0); - tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0); - dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1)); - dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1)); - dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1)); + tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z); + tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x); + tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y); + dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x)); + dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y)); + dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z)); VU_STAT_UPDATE(VU); } @@ -1766,36 +1727,24 @@ static __fi void _vuCLIP(VURegs* VU) double value = PS2Float(PS2Float(VU->VF[_Ft_].i.w).Abs()).ToDouble(); VU->clipflag <<= 6; - if (PS2Float(VU->VF[_Fs_].i.x).ToDouble() > +value) - VU->clipflag |= 0x01; - if (PS2Float(VU->VF[_Fs_].i.x).ToDouble() < -value) - VU->clipflag |= 0x02; - if (PS2Float(VU->VF[_Fs_].i.y).ToDouble() > +value) - VU->clipflag |= 0x04; - if (PS2Float(VU->VF[_Fs_].i.y).ToDouble() < -value) - VU->clipflag |= 0x08; - if (PS2Float(VU->VF[_Fs_].i.z).ToDouble() > +value) - VU->clipflag |= 0x10; - if (PS2Float(VU->VF[_Fs_].i.z).ToDouble() < -value) - VU->clipflag |= 0x20; + if (PS2Float(VU->VF[_Fs_].i.x).ToDouble() > +value) VU->clipflag |= 0x01; + if (PS2Float(VU->VF[_Fs_].i.x).ToDouble() < -value) VU->clipflag |= 0x02; + if (PS2Float(VU->VF[_Fs_].i.y).ToDouble() > +value) VU->clipflag |= 0x04; + if (PS2Float(VU->VF[_Fs_].i.y).ToDouble() < -value) VU->clipflag |= 0x08; + if (PS2Float(VU->VF[_Fs_].i.z).ToDouble() > +value) VU->clipflag |= 0x10; + if (PS2Float(VU->VF[_Fs_].i.z).ToDouble() < -value) VU->clipflag |= 0x20; } else { float value = fabs(vuDouble(VU->VF[_Ft_].i.w)); VU->clipflag <<= 6; - if (vuDouble(VU->VF[_Fs_].i.x) > +value) - VU->clipflag |= 0x01; - if (vuDouble(VU->VF[_Fs_].i.x) < -value) - VU->clipflag |= 0x02; - if (vuDouble(VU->VF[_Fs_].i.y) > +value) - VU->clipflag |= 0x04; - if (vuDouble(VU->VF[_Fs_].i.y) < -value) - VU->clipflag |= 0x08; - if (vuDouble(VU->VF[_Fs_].i.z) > +value) - VU->clipflag |= 0x10; - if (vuDouble(VU->VF[_Fs_].i.z) < -value) - VU->clipflag |= 0x20; + if (vuDouble(VU->VF[_Fs_].i.x) > +value) VU->clipflag |= 0x01; + if (vuDouble(VU->VF[_Fs_].i.x) < -value) VU->clipflag |= 0x02; + if (vuDouble(VU->VF[_Fs_].i.y) > +value) VU->clipflag |= 0x04; + if (vuDouble(VU->VF[_Fs_].i.y) < -value) VU->clipflag |= 0x08; + if (vuDouble(VU->VF[_Fs_].i.z) > +value) VU->clipflag |= 0x10; + if (vuDouble(VU->VF[_Fs_].i.z) < -value) VU->clipflag |= 0x20; } VU->clipflag = VU->clipflag & 0xFFFFFF; @@ -2619,20 +2568,20 @@ static __ri void _vuWAITP(VURegs* VU) static __ri void _vuESADD(VURegs* VU) { - u32 x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); - u32 y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); - u32 z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); + u32 x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x); + u32 y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y); + u32 z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z); - VU->p.UL = vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0); + VU->p.UL = vuAccurateAdd(VU, vuAccurateAdd(VU, x, y), z); } static __ri void _vuERSADD(VURegs* VU) { - u32 x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); - u32 y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); - u32 z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); + u32 x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x); + u32 y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y); + u32 z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z); - PS2Float p = PS2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); + PS2Float p = PS2Float(vuAccurateAdd(VU, vuAccurateAdd(VU, x, y), z)); if (!p.IsZero()) { @@ -2650,11 +2599,11 @@ static __ri void _vuERSADD(VURegs* VU) static __ri void _vuELENG(VURegs* VU) { - u32 x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); - u32 y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); - u32 z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); + u32 x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x); + u32 y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y); + u32 z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z); - PS2Float value = PS2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); + PS2Float value = PS2Float(vuAccurateAdd(VU, vuAccurateAdd(VU, x, y), z)); if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { @@ -2678,11 +2627,11 @@ static __ri void _vuELENG(VURegs* VU) static __ri void _vuERLENG(VURegs* VU) { - u32 x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); - u32 y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); - u32 z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); + u32 x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x); + u32 y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y); + u32 z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z); - PS2Float value = PS2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); + PS2Float value = PS2Float(vuAccurateAdd(VU, vuAccurateAdd(VU, x, y), z)); if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { @@ -2750,7 +2699,7 @@ static __ri void _vuEATANxy(VURegs* VU) float p = 0; if (!PS2Float(VU->VF[_Fs_].i.x).IsZero()) { - p = _vuCalculateEATAN(vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.x, 1)); + p = _vuCalculateEATAN(vuAccurateDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.x)); } VU->p.F = p; } @@ -2760,14 +2709,14 @@ static __ri void _vuEATANxz(VURegs* VU) float p = 0; if (!PS2Float(VU->VF[_Fs_].i.x).IsZero()) { - p = _vuCalculateEATAN(vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.x, 1)); + p = _vuCalculateEATAN(vuAccurateDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.x)); } VU->p.F = p; } static __ri void _vuESUM(VURegs* VU) { - VU->p.UL = vuAccurateAddSub(VU, vuAccurateAddSub(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.y, 0), VU->VF[_Fs_].i.z, 0), VU->VF[_Fs_].i.w, 0); + VU->p.UL = vuAccurateAdd(VU, vuAccurateAdd(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.y), VU->VF[_Fs_].i.z), VU->VF[_Fs_].i.w); } static __ri void _vuERCPR(VURegs* VU) From 745e4746fce6ec7e231216431478950c5d6763b9 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Sat, 7 Dec 2024 22:38:10 +0100 Subject: [PATCH 11/15] [Soft-Float] - Improves the PS2Float class by using a raw float to speed-up simple calculations. This greatly improves performance while using Soft-Floats. --- pcsx2/FPU.cpp | 25 +++--- pcsx2/PS2Float.cpp | 198 +++++++++++++++++++-------------------------- pcsx2/PS2Float.h | 17 ++-- pcsx2/VUflags.cpp | 4 +- pcsx2/VUops.cpp | 42 +++++----- 5 files changed, 125 insertions(+), 161 deletions(-) diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index d4967dc467..96b51f10c5 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -146,16 +146,13 @@ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsT _ContVal_ |= dividendZero ? cFlagsToSet2 : cFlagsToSet1; - bool IsSigned = yMatrix.Sign ^ zMatrix.Sign; + bool IsSigned = yMatrix.Sign() ^ zMatrix.Sign(); if (dividendZero) xReg = IsSigned ? PS2Float::MIN_FLOATING_POINT_VALUE : PS2Float::MAX_FLOATING_POINT_VALUE; else { - PS2Float zeroRes = PS2Float(0); - - zeroRes.Sign = IsSigned; - xReg = zeroRes.AsUInt32(); + xReg = PS2Float(IsSigned, 0, 0).raw; } return true; @@ -221,28 +218,28 @@ float fpuDouble(u32 f) static __fi u32 fpuAccurateAdd(u32 a, u32 b) { - if (CHECK_FPU_SOFT_ADDSUB) return PS2Float(a).Add(PS2Float(b)).AsUInt32(); + if (CHECK_FPU_SOFT_ADDSUB) return PS2Float(a).Add(PS2Float(b)).raw; return std::bit_cast(fpuDouble(a) + fpuDouble(b)); } static __fi u32 fpuAccurateSub(u32 a, u32 b) { - if (CHECK_FPU_SOFT_ADDSUB) return PS2Float(a).Sub(PS2Float(b)).AsUInt32(); + if (CHECK_FPU_SOFT_ADDSUB) return PS2Float(a).Sub(PS2Float(b)).raw; return std::bit_cast(fpuDouble(a) - fpuDouble(b)); } static __fi u32 fpuAccurateMul(u32 a, u32 b) { - if (CHECK_FPU_SOFT_MULDIV) return PS2Float(a).Mul(PS2Float(b)).AsUInt32(); + if (CHECK_FPU_SOFT_MULDIV) return PS2Float(a).Mul(PS2Float(b)).raw; return std::bit_cast(fpuDouble(a) * fpuDouble(b)); } static __fi u32 fpuAccurateDiv(u32 a, u32 b) { - if (CHECK_FPU_SOFT_MULDIV) return PS2Float(a).Div(PS2Float(b)).AsUInt32(); + if (CHECK_FPU_SOFT_MULDIV) return PS2Float(a).Div(PS2Float(b)).raw; return std::bit_cast(fpuDouble(a) / fpuDouble(b)); } @@ -472,17 +469,17 @@ void RSQRT_S() { if (value.IsDenormalized()) { _ContVal_ |= FPUflagD | FPUflagSD; - _FdValUl_ = value.Sign ? PS2Float::MIN_FLOATING_POINT_VALUE : PS2Float::MAX_FLOATING_POINT_VALUE; + _FdValUl_ = value.Sign() ? PS2Float::MIN_FLOATING_POINT_VALUE : PS2Float::MAX_FLOATING_POINT_VALUE; return; } else if (_FtValUl_ & 0x80000000) // Ft is negative { _ContVal_ |= FPUflagI | FPUflagSI; - _FdValUl_ = PS2Float(_FsValUl_).Rsqrt(PS2Float(value.Abs())).AsUInt32(); + _FdValUl_ = PS2Float(_FsValUl_).Rsqrt(PS2Float(value.Abs())).raw; } else // Ft is positive and not zero { - _FdValUl_ = PS2Float(_FsValUl_).Rsqrt(value).AsUInt32(); + _FdValUl_ = PS2Float(_FsValUl_).Rsqrt(value).raw; } } else @@ -519,10 +516,10 @@ void SQRT_S() { if (_FtValUl_ & 0x80000000) // If Ft is Negative { _ContVal_ |= FPUflagI | FPUflagSI; - _FdValUl_ = PS2Float(value.Abs()).Sqrt().AsUInt32(); + _FdValUl_ = PS2Float(value.Abs()).Sqrt().raw; } else - _FdValUl_ = value.Sqrt().AsUInt32(); // If Ft is Positive + _FdValUl_ = value.Sqrt().raw; // If Ft is Positive } else { diff --git a/pcsx2/PS2Float.cpp b/pcsx2/PS2Float.cpp index 9a9babeabf..955759c2e1 100644 --- a/pcsx2/PS2Float.cpp +++ b/pcsx2/PS2Float.cpp @@ -74,18 +74,14 @@ u64 PS2Float::MulMantissa(u32 a, u32 b) // Float Processor //**************************************************************** -PS2Float::PS2Float(u32 value) - : Sign((value >> 31) & 1) - , Exponent((u8)(((value >> 23) & 0xFF))) - , Mantissa(value & 0x7FFFFF) -{ -} +PS2Float::PS2Float(u32 value) { raw = value; } PS2Float::PS2Float(bool sign, u8 exponent, u32 mantissa) - : Sign(sign) - , Exponent(exponent) - , Mantissa(mantissa) { + raw = 0; + raw |= (sign ? 1u : 0u) << 31; + raw |= (u32)(exponent << 23); + raw |= mantissa; } PS2Float PS2Float::Max() @@ -108,15 +104,6 @@ PS2Float PS2Float::MinOne() return PS2Float(MIN_ONE); } -u32 PS2Float::AsUInt32() const -{ - u32 result = 0; - result |= (Sign ? 1u : 0u) << 31; - result |= (u32)(Exponent << 23); - result |= Mantissa; - return result; -} - PS2Float PS2Float::Add(PS2Float addend) { if (IsDenormalized() || addend.IsDenormalized()) @@ -125,8 +112,8 @@ PS2Float PS2Float::Add(PS2Float addend) if (IsAbnormal() && addend.IsAbnormal()) return SolveAbnormalAdditionOrSubtractionOperation(*this, addend, true); - u32 a = AsUInt32(); - u32 b = addend.AsUInt32(); + u32 a = raw; + u32 b = addend.raw; //exponent difference s32 exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF); @@ -169,8 +156,8 @@ PS2Float PS2Float::Sub(PS2Float subtrahend) if (IsAbnormal() && subtrahend.IsAbnormal()) return SolveAbnormalAdditionOrSubtractionOperation(*this, subtrahend, false); - u32 a = AsUInt32(); - u32 b = subtrahend.AsUInt32(); + u32 a = raw; + u32 b = subtrahend.raw; //exponent difference s32 exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF); @@ -203,7 +190,7 @@ PS2Float PS2Float::Sub(PS2Float subtrahend) } - return PS2Float(a).DoAdd(Neg(PS2Float(b))); + return PS2Float(a).DoAdd(PS2Float(b).Negate()); } PS2Float PS2Float::Mul(PS2Float mulend) @@ -215,12 +202,7 @@ PS2Float PS2Float::Mul(PS2Float mulend) return SolveAbnormalMultiplicationOrDivisionOperation(*this, mulend, true); if (IsZero() || mulend.IsZero()) - { - PS2Float result = PS2Float(0); - - result.Sign = DetermineMultiplicationDivisionOperationSign(*this, mulend); - return result; - } + return PS2Float(DetermineMultiplicationDivisionOperationSign(*this, mulend), 0, 0); return DoMul(mulend); } @@ -234,12 +216,7 @@ PS2Float PS2Float::Div(PS2Float divend) return SolveAbnormalMultiplicationOrDivisionOperation(*this, divend, false); if (IsZero()) - { - PS2Float result = PS2Float(0); - - result.Sign = DetermineMultiplicationDivisionOperationSign(*this, divend); - return result; - } + return PS2Float(DetermineMultiplicationDivisionOperationSign(*this, divend), 0, 0); else if (divend.IsZero()) return DetermineMultiplicationDivisionOperationSign(*this, divend) ? Min() : Max(); @@ -258,7 +235,7 @@ PS2Float PS2Float::Sqrt() return PS2Float(0); // PS2 only takes positive numbers for SQRT, and convert if necessary. - s32 ix = (s32)(PS2Float(false, Exponent, Mantissa).AsUInt32()); + s32 ix = (s32)PS2Float(false, Exponent(), Mantissa()).raw; /* extract mantissa and unbias exponent */ s32 m = (ix >> 23) - BIAS; @@ -308,39 +285,44 @@ PS2Float PS2Float::Rsqrt(PS2Float other) bool PS2Float::IsDenormalized() { - return Exponent == 0; + return Exponent() == 0; } bool PS2Float::IsAbnormal() { - u32 val = AsUInt32(); + u32 val = raw; return val == MAX_FLOATING_POINT_VALUE || val == MIN_FLOATING_POINT_VALUE || val == POSITIVE_INFINITY_VALUE || val == NEGATIVE_INFINITY_VALUE; } bool PS2Float::IsZero() { - return (Abs()) == 0; + return Abs() == 0; } u32 PS2Float::Abs() { - return (AsUInt32() & MAX_FLOATING_POINT_VALUE); + return (raw & MAX_FLOATING_POINT_VALUE); +} + +PS2Float PS2Float::Negate() +{ + return PS2Float(raw ^ 0x80000000); } PS2Float PS2Float::RoundTowardsZero() { - return PS2Float((u32)(std::trunc((double)(AsUInt32())))); + return PS2Float((u32)std::trunc((double)raw)); } s32 PS2Float::CompareTo(PS2Float other) { - s32 selfTwoComplementVal = (s32)(Abs()); - if (Sign) + s32 selfTwoComplementVal = (s32)Abs(); + if (Sign()) selfTwoComplementVal = -selfTwoComplementVal; - s32 otherTwoComplementVal = (s32)(other.Abs()); - if (other.Sign) + s32 otherTwoComplementVal = (s32)other.Abs(); + if (other.Sign()) otherTwoComplementVal = -otherTwoComplementVal; if (selfTwoComplementVal < otherTwoComplementVal) @@ -353,8 +335,8 @@ s32 PS2Float::CompareTo(PS2Float other) s32 PS2Float::CompareOperand(PS2Float other) { - s32 selfTwoComplementVal = (s32)(Abs()); - s32 otherTwoComplementVal = (s32)(other.Abs()); + s32 selfTwoComplementVal = (s32)Abs(); + s32 otherTwoComplementVal = (s32)other.Abs(); if (selfTwoComplementVal < otherTwoComplementVal) return -1; @@ -366,14 +348,14 @@ s32 PS2Float::CompareOperand(PS2Float other) double PS2Float::ToDouble() { - return std::bit_cast(((u64)Sign << 63) | ((((u64)Exponent - BIAS) + 1023ULL) << 52) | ((u64)Mantissa << 29)); + return std::bit_cast(((u64)Sign() << 63) | ((((u64)Exponent() - BIAS) + 1023ULL) << 52) | ((u64)Mantissa() << 29)); } std::string PS2Float::ToString() { double res = ToDouble(); - u32 value = AsUInt32(); + u32 value = raw; std::ostringstream oss; oss << std::fixed << std::setprecision(6); @@ -409,8 +391,8 @@ PS2Float PS2Float::DoAdd(PS2Float other) { const u8 roundingMultiplier = 6; - u8 selfExponent = Exponent; - s32 resExponent = selfExponent - other.Exponent; + u8 selfExponent = Exponent(); + s32 resExponent = selfExponent - other.Exponent(); if (resExponent < 0) return other.DoAdd(*this); @@ -418,10 +400,10 @@ PS2Float PS2Float::DoAdd(PS2Float other) return *this; // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate - u32 sign1 = (u32)((s32)AsUInt32() >> 31); - s32 selfMantissa = (s32)(((Mantissa | 0x800000) ^ sign1) - sign1); - u32 sign2 = (u32)((s32)other.AsUInt32() >> 31); - s32 otherMantissa = (s32)(((other.Mantissa | 0x800000) ^ sign2) - sign2); + u32 sign1 = (u32)((s32)raw >> 31); + s32 selfMantissa = (s32)(((Mantissa() | 0x800000) ^ sign1) - sign1); + u32 sign2 = (u32)((s32)other.raw >> 31); + s32 otherMantissa = (s32)(((other.Mantissa() | 0x800000) ^ sign2) - sign2); // PS2 multiply by 2 before doing the Math here. s32 man = (selfMantissa << roundingMultiplier) + ((otherMantissa << roundingMultiplier) >> resExponent); @@ -450,11 +432,11 @@ PS2Float PS2Float::DoAdd(PS2Float other) PS2Float PS2Float::DoMul(PS2Float other) { - u8 selfExponent = Exponent; - u8 otherExponent = other.Exponent; - u32 selfMantissa = Mantissa | 0x800000; - u32 otherMantissa = other.Mantissa | 0x800000; - u32 sign = (AsUInt32() ^ other.AsUInt32()) & SIGNMASK; + u8 selfExponent = Exponent(); + u8 otherExponent = other.Exponent(); + u32 selfMantissa = Mantissa() | 0x800000; + u32 otherMantissa = other.Mantissa() | 0x800000; + u32 sign = (raw ^ other.raw) & SIGNMASK; s32 resExponent = selfExponent + otherExponent - 127; u32 resMantissa = (u32)(MulMantissa(selfMantissa, otherMantissa) >> 23); @@ -476,25 +458,22 @@ PS2Float PS2Float::DoMul(PS2Float other) // Rounding can be slightly off: (PS2: 0x3F800000 / 0x3F800001 = 0x3F7FFFFF | SoftFloat/IEEE754: 0x3F800000 / 0x3F800001 = 0x3F7FFFFE). PS2Float PS2Float::DoDiv(PS2Float other) { + bool sign = DetermineMultiplicationDivisionOperationSign(*this, other); + u32 selfMantissa = Mantissa() | 0x800000; + u32 otherMantissa = other.Mantissa() | 0x800000; + s32 resExponent = Exponent() - other.Exponent() + BIAS; u64 selfMantissa64; - u32 selfMantissa = Mantissa | 0x800000; - u32 otherMantissa = other.Mantissa | 0x800000; - s32 resExponent = Exponent - other.Exponent + BIAS; - - PS2Float result = PS2Float(0); - - result.Sign = DetermineMultiplicationDivisionOperationSign(*this, other); if (resExponent > 255) - return result.Sign ? Min() : Max(); + return sign ? Min() : Max(); else if (resExponent <= 0) - return PS2Float(result.Sign, 0, 0); + return PS2Float(sign, 0, 0); if (selfMantissa < otherMantissa) { --resExponent; if (resExponent == 0) - return PS2Float(result.Sign, 0, 0); + return PS2Float(sign, 0, 0); selfMantissa64 = (u64)(selfMantissa) << 31; } else @@ -503,55 +482,55 @@ PS2Float PS2Float::DoDiv(PS2Float other) } u32 resMantissa = (u32)(selfMantissa64 / otherMantissa); + if ((resMantissa & 0x3F) == 0) resMantissa |= ((u64)(otherMantissa)*resMantissa != selfMantissa64) ? 1U : 0; - result.Exponent = (u8)(resExponent); - result.Mantissa = (resMantissa + 0x40U) >> 7; + resMantissa = (resMantissa + 0x40U) >> 7; - if (result.Mantissa > 0) + if (resMantissa > 0) { - s32 leadingBitPosition = PS2Float::GetMostSignificantBitPosition(result.Mantissa); + s32 leadingBitPosition = PS2Float::GetMostSignificantBitPosition(resMantissa); while (leadingBitPosition != IMPLICIT_LEADING_BIT_POS) { if (leadingBitPosition > IMPLICIT_LEADING_BIT_POS) { - result.Mantissa >>= 1; + resMantissa >>= 1; - s32 exp = ((s32)result.Exponent + 1); + s32 exp = resExponent + 1; if (exp > 255) - return result.Sign ? Min() : Max(); + return sign ? Min() : Max(); - result.Exponent = (u8)exp; + resExponent = exp; leadingBitPosition--; } else if (leadingBitPosition < IMPLICIT_LEADING_BIT_POS) { - result.Mantissa <<= 1; + resMantissa <<= 1; - s32 exp = ((s32)result.Exponent - 1); + s32 exp = resExponent - 1; if (exp <= 0) - return PS2Float(result.Sign, 0, 0); + return PS2Float(sign, 0, 0); - result.Exponent = (u8)exp; + resExponent = exp; leadingBitPosition++; } } } - result.Mantissa &= 0x7FFFFF; - return result.RoundTowardsZero(); + resMantissa &= 0x7FFFFF; + return PS2Float(sign, (u8)resExponent, resMantissa).RoundTowardsZero(); } PS2Float PS2Float::SolveAbnormalAdditionOrSubtractionOperation(PS2Float a, PS2Float b, bool add) { - u32 aval = a.AsUInt32(); - u32 bval = b.AsUInt32(); + u32 aval = a.raw; + u32 bval = b.raw; if (aval == MAX_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) return add ? Max() : PS2Float(0); @@ -608,8 +587,8 @@ PS2Float PS2Float::SolveAbnormalAdditionOrSubtractionOperation(PS2Float a, PS2Fl PS2Float PS2Float::SolveAbnormalMultiplicationOrDivisionOperation(PS2Float a, PS2Float b, bool mul) { - u32 aval = a.AsUInt32(); - u32 bval = b.AsUInt32(); + u32 aval = a.raw; + u32 bval = b.raw; if (mul) { @@ -711,38 +690,31 @@ PS2Float PS2Float::SolveAbnormalMultiplicationOrDivisionOperation(PS2Float a, PS PS2Float PS2Float::SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add) { - PS2Float result = PS2Float(0); + bool sign = add ? DetermineAdditionOperationSign(a, b) : DetermineSubtractionOperationSign(a, b); if (a.IsDenormalized() && !b.IsDenormalized()) - result = b; + return PS2Float(sign, b.Exponent(), b.Mantissa()); else if (!a.IsDenormalized() && b.IsDenormalized()) - result = a; + return PS2Float(sign, a.Exponent(), a.Mantissa()); else if (a.IsDenormalized() && b.IsDenormalized()) - { - } + return PS2Float(sign, 0, 0); else Console.Error("Both numbers are not denormalized"); - result.Sign = add ? DetermineAdditionOperationSign(a, b) : DetermineSubtractionOperationSign(a, b); - return result; + return PS2Float(0); } PS2Float PS2Float::SolveMultiplicationDenormalizedOperation(PS2Float a, PS2Float b) { - PS2Float result = PS2Float(0); - - result.Sign = DetermineMultiplicationDivisionOperationSign(a, b); - return result; + return PS2Float(DetermineMultiplicationDivisionOperationSign(a, b), 0, 0); } PS2Float PS2Float::SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b) { bool sign = DetermineMultiplicationDivisionOperationSign(a, b); - PS2Float result = PS2Float(0); if (a.IsDenormalized() && !b.IsDenormalized()) - { - } + return PS2Float(sign, 0, 0); else if (!a.IsDenormalized() && b.IsDenormalized()) return sign ? Min() : Max(); else if (a.IsDenormalized() && b.IsDenormalized()) @@ -750,48 +722,42 @@ PS2Float PS2Float::SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b) else Console.Error("Both numbers are not denormalized"); - result.Sign = sign; - return result; -} - -PS2Float PS2Float::Neg(PS2Float self) -{ - return PS2Float(self.AsUInt32() ^ SIGNMASK); + return PS2Float(0); } bool PS2Float::DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b) { - return a.Sign ^ b.Sign; + return a.Sign() ^ b.Sign(); } bool PS2Float::DetermineAdditionOperationSign(PS2Float a, PS2Float b) { if (a.IsZero() && b.IsZero()) { - if (!a.Sign || !b.Sign) + if (!a.Sign() || !b.Sign()) return false; - else if (a.Sign && b.Sign) + else if (a.Sign() && b.Sign()) return true; else Console.Error("Unhandled addition operation flags"); } - return a.CompareOperand(b) >= 0 ? a.Sign : b.Sign; + return a.CompareOperand(b) >= 0 ? a.Sign() : b.Sign(); } bool PS2Float::DetermineSubtractionOperationSign(PS2Float a, PS2Float b) { if (a.IsZero() && b.IsZero()) { - if (!a.Sign || b.Sign) + if (!a.Sign() || b.Sign()) return false; - else if (a.Sign && !b.Sign) + else if (a.Sign() && !b.Sign()) return true; else Console.Error("Unhandled subtraction operation flags"); } - return a.CompareOperand(b) >= 0 ? a.Sign : !b.Sign; + return a.CompareOperand(b) >= 0 ? a.Sign() : !b.Sign(); } s32 PS2Float::clz(s32 x) diff --git a/pcsx2/PS2Float.h b/pcsx2/PS2Float.h index 341bc10b56..89c0b94969 100644 --- a/pcsx2/PS2Float.h +++ b/pcsx2/PS2Float.h @@ -26,10 +26,7 @@ class PS2Float static AddResult Add3(u32 a, u32 b, u32 c); public: - bool Sign; - u8 Exponent; - u32 Mantissa; - + static constexpr u8 BIAS = 127; static constexpr u32 SIGNMASK = 0x80000000; static constexpr u32 MAX_FLOATING_POINT_VALUE = 0x7FFFFFFF; @@ -62,6 +59,12 @@ public: 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24 }; + u32 raw; + + constexpr u32 Mantissa() const { return raw & 0x7FFFFF; } + constexpr u8 Exponent() const { return (raw >> 23) & 0xFF; } + constexpr bool Sign() const { return ((raw >> 31) & 1) != 0; } + PS2Float(u32 value); PS2Float(bool sign, u8 exponent, u32 mantissa); @@ -74,10 +77,6 @@ public: static PS2Float MinOne(); - static PS2Float Neg(PS2Float self); - - u32 AsUInt32() const; - PS2Float Add(PS2Float addend); PS2Float Sub(PS2Float subtrahend); @@ -98,6 +97,8 @@ public: u32 Abs(); + PS2Float Negate(); + PS2Float RoundTowardsZero(); s32 CompareTo(PS2Float other); diff --git a/pcsx2/VUflags.cpp b/pcsx2/VUflags.cpp index d3422c2587..4640fcf405 100644 --- a/pcsx2/VUflags.cpp +++ b/pcsx2/VUflags.cpp @@ -16,8 +16,8 @@ static __ri u32 VU_MAC_UPDATE(int shift, VURegs* VU, u32 f) { PS2Float ps2f = PS2Float(f); - u32 exp = ps2f.Exponent; - u32 s = ps2f.AsUInt32() & PS2Float::SIGNMASK; + u32 exp = ps2f.Exponent(); + u32 s = ps2f.raw & PS2Float::SIGNMASK; if (s) VU->macflag |= 0x0010<(vuDouble(a) + vuDouble(b)); } static __fi u32 vuAccurateSub(VURegs* VU, u32 a, u32 b) { - if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) return PS2Float(a).Sub(PS2Float(b)).AsUInt32(); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) return PS2Float(a).Sub(PS2Float(b)).raw; return std::bit_cast(vuDouble(a) - vuDouble(b)); } static __fi u32 vuAccurateMul(VURegs* VU, u32 a, u32 b) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) return PS2Float(a).Mul(PS2Float(b)).AsUInt32(); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) return PS2Float(a).Mul(PS2Float(b)).raw; return std::bit_cast(vuDouble(a) * vuDouble(b)); } static __fi u32 vuAccurateDiv(VURegs* VU, u32 a, u32 b) { - if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) return PS2Float(a).Div(PS2Float(b)).AsUInt32(); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) return PS2Float(a).Div(PS2Float(b)).raw; return std::bit_cast(vuDouble(a) / vuDouble(b)); } @@ -1778,7 +1778,7 @@ static __fi void _vuDIV(VURegs* VU) } else { - VU->q.UL = fs.Div(ft).AsUInt32(); + VU->q.UL = fs.Div(ft).raw; } } else @@ -1819,7 +1819,7 @@ static __fi void _vuSQRT(VURegs* VU) if (ft.ToDouble() < 0.0) VU->statusflag |= 0x10; - VU->q.UL = PS2Float(ft.Abs()).Sqrt().AsUInt32(); + VU->q.UL = PS2Float(ft.Abs()).Sqrt().raw; } else { @@ -1874,11 +1874,11 @@ static __fi void _vuRSQRT(VURegs* VU) } if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) - VU->q.UL = fs.Div(PS2Float(ft.Abs()).Sqrt()).AsUInt32(); + VU->q.UL = fs.Div(PS2Float(ft.Abs()).Sqrt()).raw; else { - float temp = sqrt(fabs(vuDouble(ft.AsUInt32()))); - VU->q.F = vuDouble(fs.AsUInt32()) / temp; + float temp = sqrt(fabs(vuDouble(ft.raw))); + VU->q.F = vuDouble(fs.raw) / temp; VU->q.F = vuDouble(VU->q.UL); } } @@ -2589,12 +2589,12 @@ static __ri void _vuERSADD(VURegs* VU) p = PS2Float::One().Div(p); else { - VU->p.F = 1.0f / vuDouble(p.AsUInt32()); + VU->p.F = 1.0f / vuDouble(p.raw); return; } } - VU->p.UL = p.AsUInt32(); + VU->p.UL = p.raw; } static __ri void _vuELENG(VURegs* VU) @@ -2611,11 +2611,11 @@ static __ri void _vuELENG(VURegs* VU) { value = value.Sqrt(); } - VU->p.UL = value.AsUInt32(); + VU->p.UL = value.raw; } else { - float p = vuDouble(value.AsUInt32()); + float p = vuDouble(value.raw); if (p >= 0) { @@ -2646,16 +2646,16 @@ static __ri void _vuERLENG(VURegs* VU) } else { - VU->p.F = 1.0 / vuDouble(value.AsUInt32()); + VU->p.F = 1.0 / vuDouble(value.raw); return; } } } - VU->p.UL = value.AsUInt32(); + VU->p.UL = value.raw; } else { - float p = vuDouble(value.AsUInt32()); + float p = vuDouble(value.raw); if (p >= 0) { @@ -2731,12 +2731,12 @@ static __ri void _vuERCPR(VURegs* VU) } else { - VU->p.F = 1.0 / vuDouble(p.AsUInt32()); + VU->p.F = 1.0 / vuDouble(p.raw); return; } } - VU->p.UL = p.AsUInt32(); + VU->p.UL = p.raw; } static __ri void _vuESQRT(VURegs* VU) @@ -2750,7 +2750,7 @@ static __ri void _vuESQRT(VURegs* VU) value = value.Sqrt(); } - VU->p.UL = value.AsUInt32(); + VU->p.UL = value.raw; } else { @@ -2778,7 +2778,7 @@ static __ri void _vuERSQRT(VURegs* VU) { if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) { - VU->p.F = 1.0f / vuDouble(value.AsUInt32()); + VU->p.F = 1.0f / vuDouble(value.raw); return; } else @@ -2788,7 +2788,7 @@ static __ri void _vuERSQRT(VURegs* VU) } } - VU->p.UL = value.AsUInt32(); + VU->p.UL = value.raw; } else { From 5b94f53efccff3c73c0393dd6ae6b1c0c91097eb Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Thu, 19 Dec 2024 17:33:40 +0100 Subject: [PATCH 12/15] [Soft-Float] - Fixes MAC-OS compile error + moves the bitUtils methods to their respectives place. I don't like the SIMD way of doing it, it can be slower and less practical to use (expensive casting). --- common/BitUtils.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ pcsx2/PS2Float.cpp | 36 ++++++------------------------------ pcsx2/PS2Float.h | 28 ---------------------------- 3 files changed, 51 insertions(+), 58 deletions(-) diff --git a/common/BitUtils.h b/common/BitUtils.h index 64b6ff3047..93794038f8 100644 --- a/common/BitUtils.h +++ b/common/BitUtils.h @@ -28,6 +28,25 @@ static inline int _BitScanReverse(unsigned long* const Index, const unsigned lon namespace Common { + static constexpr s8 msb[256] = { + -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; + + static constexpr s32 debruijn32[64] = { + 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, + 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, + -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12}; + + static constexpr s32 normalizeAmounts[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24}; + template static constexpr __fi bool IsAligned(T value, unsigned int alignment) { @@ -84,6 +103,32 @@ namespace Common // Perform our count leading zero. return std::countl_zero(static_cast(n)); } + + __fi static s32 clz(s32 x) + { + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + + return debruijn32[(u32)x * 0x8c0b2891u >> 26]; + } + + __fi static s32 BitScanReverse8(s32 b) + { + return msb[b]; + } + + __fi static s32 GetMostSignificantBitPosition(u32 value) + { + for (s32 i = 31; i >= 0; i--) + { + if (((value >> i) & 1) != 0) + return i; + } + return -1; + } } // namespace Common template diff --git a/pcsx2/PS2Float.cpp b/pcsx2/PS2Float.cpp index 955759c2e1..a4f0e8bf0f 100644 --- a/pcsx2/PS2Float.cpp +++ b/pcsx2/PS2Float.cpp @@ -8,6 +8,8 @@ #include #include #include +#include "common/Pcsx2Defs.h" +#include "common/BitUtils.h" #include "PS2Float.h" #include "Common.h" @@ -381,7 +383,7 @@ std::string PS2Float::ToString() } else { - oss << "Ps2Float(" << res << ")"; + oss << "PS2Float(" << res << ")"; } return oss.str(); @@ -414,11 +416,11 @@ PS2Float PS2Float::DoAdd(PS2Float other) // Remove from exponent the PS2 Multiplier value. s32 rawExp = selfExponent - roundingMultiplier; - s32 amount = normalizeAmounts[clz(absMan)]; + s32 amount = Common::normalizeAmounts[Common::clz(absMan)]; rawExp -= amount; absMan <<= amount; - s32 msbIndex = BitScanReverse8(absMan >> 23); + s32 msbIndex = Common::BitScanReverse8(absMan >> 23); rawExp += msbIndex; absMan >>= msbIndex; @@ -490,7 +492,7 @@ PS2Float PS2Float::DoDiv(PS2Float other) if (resMantissa > 0) { - s32 leadingBitPosition = PS2Float::GetMostSignificantBitPosition(resMantissa); + s32 leadingBitPosition = Common::GetMostSignificantBitPosition(resMantissa); while (leadingBitPosition != IMPLICIT_LEADING_BIT_POS) { @@ -759,29 +761,3 @@ bool PS2Float::DetermineSubtractionOperationSign(PS2Float a, PS2Float b) return a.CompareOperand(b) >= 0 ? a.Sign() : !b.Sign(); } - -s32 PS2Float::clz(s32 x) -{ - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - - return debruijn32[(u32)x * 0x8c0b2891u >> 26]; -} - -s32 PS2Float::BitScanReverse8(s32 b) -{ - return msb[b]; -} - -s32 PS2Float::GetMostSignificantBitPosition(u32 value) -{ - for (s32 i = 31; i >= 0; i--) - { - if (((value >> i) & 1) != 0) - return i; - } - return -1; -} diff --git a/pcsx2/PS2Float.h b/pcsx2/PS2Float.h index 89c0b94969..4927657df8 100644 --- a/pcsx2/PS2Float.h +++ b/pcsx2/PS2Float.h @@ -37,28 +37,6 @@ public: static constexpr u32 MIN_ONE = 0xBF800000; static constexpr int IMPLICIT_LEADING_BIT_POS = 23; - static constexpr s8 msb[256] = { - -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, - 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 - }; - - static constexpr s32 debruijn32[64] = { - 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, - 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, - -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12 - }; - - static constexpr s32 normalizeAmounts[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24 - }; - u32 raw; constexpr u32 Mantissa() const { return raw & 0x7FFFFF; } @@ -134,10 +112,4 @@ private: static bool DetermineAdditionOperationSign(PS2Float a, PS2Float b); static bool DetermineSubtractionOperationSign(PS2Float a, PS2Float b); - - static s32 GetMostSignificantBitPosition(u32 value); - - static s32 BitScanReverse8(s32 b); - - static s32 clz(s32 x); }; From 0a8c54514c22113d51bba4a52bbe1bfb241f7856 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Fri, 27 Dec 2024 19:45:14 +0100 Subject: [PATCH 13/15] [Soft-Float] - Implemented stop-gap Div rounding mode + implements softfloat on some extra obscure VU ops and fixes a denormals check on the checkDivideByZero method in the FPU. Fixes: - Final Fantasy X (fully playable) - Klonoa 2 Partially Fixes: - Mortal combat Shaloin Monks - Gran Turismo 4 (game patch will be neceassary to skip Licence Test CRC check). - Tourist Trophy (game patch will be neceassary to skip Licence Test CRC check). The stop gap div measure is not yet enough to fully fix GT4/TouristTrophy (they will need different rounding modes per licences). Currently this is bridged on the Div Rounding Mode setting. --- common/BitUtils.h | 10 ----- pcsx2/FPU.cpp | 14 +++++-- pcsx2/PS2Float.cpp | 78 +++++++++++++++++------------------- pcsx2/PS2Float.h | 28 +++++++------ pcsx2/VUops.cpp | 98 ++++++++++++++++++++++++++++++++++------------ 5 files changed, 136 insertions(+), 92 deletions(-) diff --git a/common/BitUtils.h b/common/BitUtils.h index 93794038f8..194d04d274 100644 --- a/common/BitUtils.h +++ b/common/BitUtils.h @@ -119,16 +119,6 @@ namespace Common { return msb[b]; } - - __fi static s32 GetMostSignificantBitPosition(u32 value) - { - for (s32 i = 31; i >= 0; i--) - { - if (((value >> i) & 1) != 0) - return i; - } - return -1; - } } // namespace Common template diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index 96b51f10c5..bbc0d335c1 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -140,13 +140,21 @@ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsT PS2Float yMatrix = PS2Float(yDivisorReg); PS2Float zMatrix = PS2Float(zDividendReg); - if (yMatrix.IsZero()) + // Check Final Fantasy X controls and Klonoa 2 to test this code, they send a bunch of denormals which are often hack-fixed on the game code. + if (zMatrix.IsDenormalized() || yMatrix.IsDenormalized()) { - bool dividendZero = zMatrix.IsZero(); + _ContVal_ |= 0; + xReg = PS2Float::SolveDivisionDenormalizedOperation(zMatrix, yMatrix).raw; + return true; + } + + if (zMatrix.IsZero()) + { + bool dividendZero = yMatrix.IsZero(); _ContVal_ |= dividendZero ? cFlagsToSet2 : cFlagsToSet1; - bool IsSigned = yMatrix.Sign() ^ zMatrix.Sign(); + bool IsSigned = zMatrix.Sign() ^ yMatrix.Sign(); if (dividendZero) xReg = IsSigned ? PS2Float::MIN_FLOATING_POINT_VALUE : PS2Float::MAX_FLOATING_POINT_VALUE; diff --git a/pcsx2/PS2Float.cpp b/pcsx2/PS2Float.cpp index a4f0e8bf0f..39090d7f9d 100644 --- a/pcsx2/PS2Float.cpp +++ b/pcsx2/PS2Float.cpp @@ -76,14 +76,18 @@ u64 PS2Float::MulMantissa(u32 a, u32 b) // Float Processor //**************************************************************** +PS2Float::PS2Float(s32 value) { raw = (u32)value; } + PS2Float::PS2Float(u32 value) { raw = value; } +PS2Float::PS2Float(float value) { raw = std::bit_cast(value); } + PS2Float::PS2Float(bool sign, u8 exponent, u32 mantissa) { raw = 0; raw |= (sign ? 1u : 0u) << 31; raw |= (u32)(exponent << 23); - raw |= mantissa; + raw |= mantissa & 0x7FFFFF; } PS2Float PS2Float::Max() @@ -285,6 +289,26 @@ PS2Float PS2Float::Rsqrt(PS2Float other) return Div(other.Sqrt()); } +PS2Float PS2Float::Pow(s32 exponent) +{ + PS2Float result = PS2Float::One(); // Start with 1, since any number raised to the power of 0 is 1 + + if (exponent != 0) + { + s32 exp = abs(exponent); + + for (s32 i = 0; i < exp; i++) + { + result = result.Mul(*this); + } + } + + if (exponent < 0) + return PS2Float::One().Div(result); + else + return result; +} + bool PS2Float::IsDenormalized() { return Exponent() == 0; @@ -312,11 +336,6 @@ PS2Float PS2Float::Negate() return PS2Float(raw ^ 0x80000000); } -PS2Float PS2Float::RoundTowardsZero() -{ - return PS2Float((u32)std::trunc((double)raw)); -} - s32 PS2Float::CompareTo(PS2Float other) { s32 selfTwoComplementVal = (s32)Abs(); @@ -486,47 +505,24 @@ PS2Float PS2Float::DoDiv(PS2Float other) u32 resMantissa = (u32)(selfMantissa64 / otherMantissa); if ((resMantissa & 0x3F) == 0) - resMantissa |= ((u64)(otherMantissa)*resMantissa != selfMantissa64) ? 1U : 0; + resMantissa |= ((u64)otherMantissa * resMantissa != selfMantissa64) ? 1U : 0; - resMantissa = (resMantissa + 0x40U) >> 7; + FPRoundMode roundingMode = EmuConfig.Cpu.FPUDivFPCR.GetRoundMode(); - if (resMantissa > 0) - { - s32 leadingBitPosition = Common::GetMostSignificantBitPosition(resMantissa); + bool roundNearEven = roundingMode == FPRoundMode::Nearest; + u32 roundIncrement = (!roundNearEven) ? ((roundingMode == (sign ? FPRoundMode::NegativeInfinity : FPRoundMode::PositiveInfinity)) ? 0x7FU : 0) : 0x40U; + u32 roundBits = resMantissa & 0x7F; - while (leadingBitPosition != IMPLICIT_LEADING_BIT_POS) - { - if (leadingBitPosition > IMPLICIT_LEADING_BIT_POS) - { - resMantissa >>= 1; + if (0x80000000 <= resMantissa + roundIncrement) + return sign ? Min() : Max(); - s32 exp = resExponent + 1; + resMantissa = (resMantissa + roundIncrement) >> 7; - if (exp > 255) - return sign ? Min() : Max(); + resMantissa &= ~(((roundBits ^ 0x40) == 0 & roundNearEven) ? 1U : 0U); + if (resMantissa == 0) + resExponent = 0; - resExponent = exp; - - leadingBitPosition--; - } - else if (leadingBitPosition < IMPLICIT_LEADING_BIT_POS) - { - resMantissa <<= 1; - - s32 exp = resExponent - 1; - - if (exp <= 0) - return PS2Float(sign, 0, 0); - - resExponent = exp; - - leadingBitPosition++; - } - } - } - - resMantissa &= 0x7FFFFF; - return PS2Float(sign, (u8)resExponent, resMantissa).RoundTowardsZero(); + return PS2Float(sign, (u8)resExponent, resMantissa); } PS2Float PS2Float::SolveAbnormalAdditionOrSubtractionOperation(PS2Float a, PS2Float b, bool add) diff --git a/pcsx2/PS2Float.h b/pcsx2/PS2Float.h index 4927657df8..ecddebeadd 100644 --- a/pcsx2/PS2Float.h +++ b/pcsx2/PS2Float.h @@ -43,8 +43,12 @@ public: constexpr u8 Exponent() const { return (raw >> 23) & 0xFF; } constexpr bool Sign() const { return ((raw >> 31) & 1) != 0; } + PS2Float(s32 value); + PS2Float(u32 value); + PS2Float(float value); + PS2Float(bool sign, u8 exponent, u32 mantissa); static PS2Float Max(); @@ -55,6 +59,16 @@ public: static PS2Float MinOne(); + static PS2Float SolveAbnormalAdditionOrSubtractionOperation(PS2Float a, PS2Float b, bool add); + + static PS2Float SolveAbnormalMultiplicationOrDivisionOperation(PS2Float a, PS2Float b, bool mul); + + static PS2Float SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add); + + static PS2Float SolveMultiplicationDenormalizedOperation(PS2Float a, PS2Float b); + + static PS2Float SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b); + PS2Float Add(PS2Float addend); PS2Float Sub(PS2Float subtrahend); @@ -67,6 +81,8 @@ public: PS2Float Rsqrt(PS2Float other); + PS2Float Pow(s32 exponent); + bool IsDenormalized(); bool IsAbnormal(); @@ -77,8 +93,6 @@ public: PS2Float Negate(); - PS2Float RoundTowardsZero(); - s32 CompareTo(PS2Float other); s32 CompareOperand(PS2Float other); @@ -97,16 +111,6 @@ private: PS2Float DoDiv(PS2Float other); - static PS2Float SolveAbnormalAdditionOrSubtractionOperation(PS2Float a, PS2Float b, bool add); - - static PS2Float SolveAbnormalMultiplicationOrDivisionOperation(PS2Float a, PS2Float b, bool mul); - - static PS2Float SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add); - - static PS2Float SolveMultiplicationDenormalizedOperation(PS2Float a, PS2Float b); - - static PS2Float SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b); - static bool DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b); static bool DetermineAdditionOperationSign(PS2Float a, PS2Float b); diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 8f2e10625c..825992676d 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -2670,48 +2670,67 @@ static __ri void _vuERLENG(VURegs* VU) } -static __ri float _vuCalculateEATAN(u32 inputvalue) { - - float fvalue = vuDouble(inputvalue); - +static __ri u32 _vuCalculateEATAN(VURegs* VU, u32 inputvalue) +{ float eatanconst[9] = { 0.999999344348907f, -0.333298563957214f, 0.199465364217758f, -0.13085337519646f, 0.096420042216778f, -0.055909886956215f, 0.021861229091883f, -0.004054057877511f, 0.785398185253143f }; - float result = (eatanconst[0] * fvalue) + (eatanconst[1] * pow(fvalue, 3)) + (eatanconst[2] * pow(fvalue, 5)) + (eatanconst[3] * pow(fvalue, 7)) - + (eatanconst[4] * pow(fvalue, 9)) + (eatanconst[5] * pow(fvalue, 11)) + (eatanconst[6] * pow(fvalue, 13)) + (eatanconst[7] * pow(fvalue, 15)); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + { + PS2Float p = PS2Float(inputvalue); - result += eatanconst[8]; + return PS2Float(eatanconst[0]).Mul(p) + .Add(PS2Float(eatanconst[1]).Mul(p.Pow(3))) + .Add(PS2Float(eatanconst[2]).Mul(p.Pow(5))) + .Add(PS2Float(eatanconst[3]).Mul(p.Pow(7))) + .Add(PS2Float(eatanconst[4]).Mul(p.Pow(9))) + .Add(PS2Float(eatanconst[5]).Mul(p.Pow(11))) + .Add(PS2Float(eatanconst[6]).Mul(p.Pow(13))) + .Add(PS2Float(eatanconst[7]).Mul(p.Pow(15))) + .Add(PS2Float(eatanconst[8])).raw; + } + else + { + float fvalue = vuDouble(inputvalue); - result = vuDouble(*(u32*)&result); + float result = (eatanconst[0] * fvalue) + (eatanconst[1] * pow(fvalue, 3)) + (eatanconst[2] * pow(fvalue, 5)) + (eatanconst[3] * pow(fvalue, 7)) + (eatanconst[4] * pow(fvalue, 9)) + (eatanconst[5] * pow(fvalue, 11)) + (eatanconst[6] * pow(fvalue, 13)) + (eatanconst[7] * pow(fvalue, 15)); - return result; + result += eatanconst[8]; + + result = vuDouble(*(u32*)&result); + + return std::bit_cast(result); + } } static __ri void _vuEATAN(VURegs* VU) { - float p = _vuCalculateEATAN(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + VU->p.UL = _vuCalculateEATAN(VU, VU->VF[_Fs_].UL[_Fsf_]); } static __ri void _vuEATANxy(VURegs* VU) { - float p = 0; if (!PS2Float(VU->VF[_Fs_].i.x).IsZero()) { - p = _vuCalculateEATAN(vuAccurateDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.x)); + VU->p.UL = _vuCalculateEATAN(VU, vuAccurateDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.x)); + } + else + { + VU->p.UL = PS2Float(0).raw; } - VU->p.F = p; } static __ri void _vuEATANxz(VURegs* VU) { - float p = 0; if (!PS2Float(VU->VF[_Fs_].i.x).IsZero()) { - p = _vuCalculateEATAN(vuAccurateDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.x)); + VU->p.UL = _vuCalculateEATAN(VU, vuAccurateDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.x)); + } + else + { + VU->p.UL = PS2Float(0).raw; } - VU->p.F = p; } static __ri void _vuESUM(VURegs* VU) @@ -2810,24 +2829,51 @@ static __ri void _vuERSQRT(VURegs* VU) static __ri void _vuESIN(VURegs* VU) { float sinconsts[5] = {1.0f, -0.166666567325592f, 0.008333025500178f, -0.000198074136279f, 0.000002601886990f}; - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - p = (sinconsts[0] * p) + (sinconsts[1] * pow(p, 3)) + (sinconsts[2] * pow(p, 5)) + (sinconsts[3] * pow(p, 7)) + (sinconsts[4] * pow(p, 9)); - VU->p.F = vuDouble(*(u32*)&p); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + { + PS2Float p = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); + + VU->p.UL = PS2Float(sinconsts[0]).Mul(p).Add(PS2Float(sinconsts[1]).Mul(p.Pow(3))).Add(PS2Float(sinconsts[2]).Mul(p.Pow(5))).Add(PS2Float(sinconsts[3]).Mul(p.Pow(7))).Add(PS2Float(sinconsts[4]).Mul(p.Pow(9))).raw; + } + else + { + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + + p = (sinconsts[0] * p) + (sinconsts[1] * pow(p, 3)) + (sinconsts[2] * pow(p, 5)) + (sinconsts[3] * pow(p, 7)) + (sinconsts[4] * pow(p, 9)); + VU->p.F = vuDouble(*(u32*)&p); + } } static __ri void _vuEEXP(VURegs* VU) { float consts[6] = {0.249998688697815f, 0.031257584691048f, 0.002591371303424f, 0.000171562001924f, 0.000005430199963f, 0.000000690600018f}; - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - p = 1.0f + (consts[0] * p) + (consts[1] * pow(p, 2)) + (consts[2] * pow(p, 3)) + (consts[3] * pow(p, 4)) + (consts[4] * pow(p, 5)) + (consts[5] * pow(p, 6)); - p = pow(p, 4); - p = vuDouble(*(u32*)&p); - p = 1 / p; + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + { + PS2Float p = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + VU->p.UL = PS2Float::One().Div(PS2Float::One() + .Add(PS2Float(consts[0]).Mul(p)) + .Add(PS2Float(consts[1]).Mul(p.Pow(2))) + .Add(PS2Float(consts[2]).Mul(p.Pow(3))) + .Add(PS2Float(consts[3]).Mul(p.Pow(4))) + .Add(PS2Float(consts[4]).Mul(p.Pow(5))) + .Add(PS2Float(consts[5]).Mul(p.Pow(6))) + .Pow(4)).raw; + } + else + { + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + + p = 1.0f + (consts[0] * p) + (consts[1] * pow(p, 2)) + (consts[2] * pow(p, 3)) + (consts[3] * pow(p, 4)) + (consts[4] * pow(p, 5)) + (consts[5] * pow(p, 6)); + p = pow(p, 4); + p = vuDouble(*(u32*)&p); + p = 1 / p; + + VU->p.F = p; + } } static __ri void _vuXITOP(VURegs* VU) From dfb361d342332ae8c649b9e6997b4109ebe11b0d Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Sat, 4 Jan 2025 21:18:56 +0100 Subject: [PATCH 14/15] [Soft-Float] - Implements fully accurate Div/Sqrt/Ftoi/Itof/U|O|I|D flags + uses built-in clz for Add/Sub. Fixes a TON of games. The flags are not yet in use in the Interpreters, this will ideally be commited next (requires VU code changes). The Div/Sqrt method is unoptimized for now, the team is working on a faster equivalent. --- common/BitUtils.h | 17 -- pcsx2/CMakeLists.txt | 2 + pcsx2/FPU.cpp | 28 +-- pcsx2/FpgaDiv.cpp | 477 ++++++++++++++++++++++++++++++++++++ pcsx2/FpgaDiv.h | 63 +++++ pcsx2/PS2Float.cpp | 432 +++++++++----------------------- pcsx2/PS2Float.h | 25 +- pcsx2/VUops.cpp | 163 +++++++----- pcsx2/pcsx2.vcxproj | 2 + pcsx2/pcsx2.vcxproj.filters | 6 + 10 files changed, 803 insertions(+), 412 deletions(-) create mode 100644 pcsx2/FpgaDiv.cpp create mode 100644 pcsx2/FpgaDiv.h diff --git a/common/BitUtils.h b/common/BitUtils.h index 194d04d274..3f42751417 100644 --- a/common/BitUtils.h +++ b/common/BitUtils.h @@ -38,12 +38,6 @@ namespace Common 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; - static constexpr s32 debruijn32[64] = { - 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, - 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, - -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12}; - static constexpr s32 normalizeAmounts[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24}; @@ -104,17 +98,6 @@ namespace Common return std::countl_zero(static_cast(n)); } - __fi static s32 clz(s32 x) - { - x |= x >> 1; - x |= x >> 2; - x |= x >> 4; - x |= x >> 8; - x |= x >> 16; - - return debruijn32[(u32)x * 0x8c0b2891u >> 26]; - } - __fi static s32 BitScanReverse8(s32 b) { return msb[b]; diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 4e0a82db9e..ae995173ea 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -94,6 +94,7 @@ set(pcsx2Sources MTVU.cpp Patch.cpp PS2Float.cpp + FpgaDiv.cpp Pcsx2Config.cpp PerformanceMetrics.cpp PrecompiledHeader.cpp @@ -175,6 +176,7 @@ set(pcsx2Headers Memory.h MemoryTypes.h PS2Float.h + FpgaDiv.h Patch.h PerformanceMetrics.h PrecompiledHeader.h diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index bbc0d335c1..99575cef99 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -252,15 +252,6 @@ static __fi u32 fpuAccurateDiv(u32 a, u32 b) return std::bit_cast(fpuDouble(a) / fpuDouble(b)); } -static __fi s32 double_to_int(double value) -{ - if (value >= 2147483647.0) - return 2147483647LL; - if (value <= -2147483648.0) - return -2147483648LL; - return value; -} - static __fi void C_cond_S(uint8_t mode) { switch (mode) @@ -268,7 +259,7 @@ static __fi void C_cond_S(uint8_t mode) case 0: // == if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { - _ContVal_ = (PS2Float(_FsValUl_).CompareTo(PS2Float(_FtValUl_)) == 0) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + _ContVal_ = (PS2Float(_FsValUl_).CompareToSign(PS2Float(_FtValUl_)) == 0) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); } else { @@ -278,7 +269,7 @@ static __fi void C_cond_S(uint8_t mode) case 1: // <= if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { - int32_t cmpResult = PS2Float(_FsValUl_).CompareTo(PS2Float(_FtValUl_)); + int32_t cmpResult = PS2Float(_FsValUl_).CompareToSign(PS2Float(_FtValUl_)); _ContVal_ = (cmpResult == 0 || cmpResult == -1) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); } else @@ -289,7 +280,7 @@ static __fi void C_cond_S(uint8_t mode) case 2: // < if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { - _ContVal_ = (PS2Float(_FsValUl_).CompareTo(PS2Float(_FtValUl_)) == -1) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + _ContVal_ = (PS2Float(_FsValUl_).CompareToSign(PS2Float(_FtValUl_)) == -1) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); } else { @@ -365,14 +356,21 @@ void CTC1() { } void CVT_S() { - _FdValf_ = (float)_FsValSl_; - _FdValf_ = fpuDouble( _FdValUl_ ); + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + _FdValUl_ = PS2Float::Itof(0, _FsValSl_); + } + else + { + _FdValf_ = (float)_FsValSl_; + _FdValf_ = fpuDouble(_FdValUl_); + } } void CVT_W() { if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { - _FdValSl_ = double_to_int(PS2Float(_FsValUl_).ToDouble()); + _FdValSl_ = PS2Float::Ftoi(0, _FsValUl_); } else { diff --git a/pcsx2/FpgaDiv.cpp b/pcsx2/FpgaDiv.cpp new file mode 100644 index 0000000000..4d0e385319 --- /dev/null +++ b/pcsx2/FpgaDiv.cpp @@ -0,0 +1,477 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#include "FpgaDiv.h" +#include "PS2Float.h" +#include "Common.h" + +FpgaDiv::FpgaDiv(bool divMode, u32 f1, u32 f2) +{ + FpgaDiv::divMode = divMode; + + if (divMode) + { + if (((f1 & 0x7F800000) == 0) && ((f2 & 0x7F800000) != 0)) + { + floatResult = 0; + floatResult &= PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult |= (u32)(((s32)(f2 >> 31) != (s32)(f1 >> 31)) ? 1 : 0 & 1) << 31; + return; + } + if (((f1 & 0x7F800000) != 0) && ((f2 & 0x7F800000) == 0)) + { + dz = true; + floatResult = PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult &= PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult |= (u32)(((s32)(f2 >> 31) != (s32)(f1 >> 31)) ? 1 : 0 & 1) << 31; + return; + } + if (((f1 & 0x7F800000) == 0) && ((f2 & 0x7F800000) == 0)) + { + iv = true; + floatResult = PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult &= PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult |= (u32)(((s32)(f2 >> 31) != (s32)(f1 >> 31)) ? 1 : 0 & 1) << 31; + return; + } + } + else if ((f2 & 0x7F800000) == 0) + { + floatResult = 0; + iv = ((f2 >> 31) & 1) != 0; + return; + } + + u32 floatDivisor, floatDividend; + s32 i, j, csaRes; + s32 man = 0; + s32 QuotientValueDomain = 1; + + Product[0] = 1; + Carry[25] = 1; + + if (divMode) + { + floatDividend = f1; + floatDivisor = f2; + } + else + { + floatDividend = f2; + floatDivisor = f1; + } + + u8 Dvdtexp = (u8)((floatDividend >> 23) & 0xFF); + u8 Dvsrexp = (u8)((floatDivisor >> 23) & 0xFF); + s32 Dvdtsign = (s32)(floatDividend >> 31); + s32 Dvsrsign = (s32)(floatDivisor >> 31); + + Sum[0] = 1; + Sum[1] = ((floatDividend & 0x400000) != 0); + Sum[2] = ((floatDividend & 0x200000) != 0); + Sum[3] = ((floatDividend & 0x100000) != 0); + Sum[4] = ((floatDividend & 0x80000) != 0); + Sum[5] = ((floatDividend & 0x40000) != 0); + Sum[6] = ((floatDividend & 0x20000) != 0); + Sum[7] = (s32)((floatDividend >> 16) & 1); + Sum[8] = (s32)((floatDividend >> 15) & 1); + Sum[9] = ((floatDividend & 0x4000) != 0); + Sum[10] = ((floatDividend & 0x2000) != 0); + Sum[11] = ((floatDividend & 0x1000) != 0); + Sum[12] = ((floatDividend & 0x800) != 0); + Sum[13] = ((floatDividend & 0x400) != 0); + Sum[14] = ((floatDividend & 0x200) != 0); + Sum[15] = (s32)((floatDividend >> 8) & 1); + Sum[16] = (s32)((floatDividend >> 7) & 1); + Sum[17] = ((floatDividend & 0x40) != 0); + Sum[18] = ((floatDividend & 0x20) != 0); + Sum[19] = ((floatDividend & 0x10) != 0); + Sum[20] = ((floatDividend & 8) != 0); + Sum[21] = ((floatDividend & 4) != 0); + Sum[22] = ((floatDividend & 2) != 0); + Sum[23] = (s32)(floatDividend & 1); + Sum[24] = 0; + Sum[25] = 0; + + Divisor[0] = 1; + Divisor[1] = ((floatDivisor & 0x400000) != 0); + Divisor[2] = ((floatDivisor & 0x200000) != 0); + Divisor[3] = ((floatDivisor & 0x100000) != 0); + Divisor[4] = ((floatDivisor & 0x80000) != 0); + Divisor[5] = ((floatDivisor & 0x40000) != 0); + Divisor[6] = ((floatDivisor & 0x20000) != 0); + Divisor[7] = (s32)((floatDivisor >> 16) & 1); + Divisor[8] = (s32)((floatDivisor >> 15) & 1); + Divisor[9] = ((floatDivisor & 0x4000) != 0); + Divisor[10] = ((floatDivisor & 0x2000) != 0); + Divisor[11] = ((floatDivisor & 0x1000) != 0); + Divisor[12] = ((floatDivisor & 0x800) != 0); + Divisor[13] = ((floatDivisor & 0x400) != 0); + Divisor[14] = ((floatDivisor & 0x200) != 0); + Divisor[15] = (s32)((floatDivisor >> 8) & 1); + Divisor[16] = (s32)((floatDivisor >> 7) & 1); + Divisor[17] = ((floatDivisor & 0x40) != 0); + Divisor[18] = ((floatDivisor & 0x20) != 0); + Divisor[19] = ((floatDivisor & 0x10) != 0); + Divisor[20] = ((floatDivisor & 8) != 0); + Divisor[21] = ((floatDivisor & 4) != 0); + Divisor[22] = ((floatDivisor & 2) != 0); + Divisor[23] = (s32)(floatDivisor & 1); + Divisor[24] = 0; + Divisor[25] = 0; + + if (!divMode && Dvdtexp % 2 == 1) + { + for (i = 0; i <= 24; i++) + { + Sum[25 - i] = Sum[24 - i]; + } + Sum[0] = 0; + } + + for (i = 0; i <= 24; ++i) + { + MultipleFormation(QuotientValueDomain); + csaRes = CSAQSLAdder(QuotientValueDomain); + ProductQuotientRestTransformation(i, QuotientValueDomain); + Carry[25] = csaRes > 0 ? 1 : 0; + QuotientValueDomain = csaRes; + } + + s32 sign = SignCalc(Dvdtsign, Dvsrsign) ? 1 : 0; + s32 exp = ExpCalc(Dvdtexp, Dvsrexp); + + if (divMode && (Quotient[0] == 0)) + exp--; + + if (divMode) + { + if ((Dvdtexp == 0) && (Dvsrexp == 0)) + { + iv = true; + exp = 255; + for (i = 0; i < 25; i++) + { + Quotient[i] = 1; + } + } + else if ((Dvdtexp == 0) || (Dvsrexp != 0)) + { + if ((Dvdtexp == 0) && (Dvsrexp != 0)) + { + exp = 0; + for (i = 0; i < 25; i++) + { + Quotient[i] = 0; + } + } + } + else + { + dz = true; + exp = 255; + for (i = 0; i < 25; i++) + { + Quotient[i] = 1; + } + } + } + else + { + if (Dvdtexp == 0) + { + sign = 0; + exp = 0; + for (i = 0; i < 25; i++) + { + Quotient[i] = 0; + } + } + if (Dvdtsign == 1) + { + iv = true; + sign = 0; + } + } + + if (divMode) + { + if (exp < 256) + { + if (exp < 1) + { + uf = true; + exp = 0; + for (i = 0; i < 25; i++) + { + Quotient[i] = 0; + } + } + } + else + { + of = true; + exp = 255; + for (i = 0; i < 25; i++) + { + Quotient[i] = 1; + } + } + } + + if (divMode) + j = 2 - Quotient[0]; + else + j = 1; + + for (i = j; i < j + 23; i++) + { + man = man * 2 + Quotient[i]; + } + + floatResult = 0; + floatResult &= PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult |= (u32)(sign & 1) << 31; + floatResult &= 0x807FFFFF; + floatResult |= (u32)(exp & 0xFF) << 23; + floatResult &= 0xFF800000; + floatResult |= (u32)man & 0x7FFFFF; +} + +bool FpgaDiv::SignCalc(s32 Dvdtsign, s32 Dvsrsign) +{ + return divMode && Dvsrsign != Dvdtsign; +} + +bool FpgaDiv::BitInvert(s32 val) +{ + return val < 1; +} + +s32 FpgaDiv::ExpCalc(s32 Dvdtexp, s32 Dvsrexp) +{ + s32 result; + + if (divMode) + return Dvdtexp - Dvsrexp + 127; + if ((Dvdtexp & 1) != 0) + result = (Dvdtexp - 127) / 2; + else + result = (Dvdtexp - 128) / 2; + return result + 127; +} + +s32 FpgaDiv::CSAQSLAdder(s32 QuotientValueDomain) +{ + s32 CarryArray[4]; + s32 SumArray[4]; + s32 i; + s32 tmpSum; + s32 tmpCarry; + + if (QuotientValueDomain == 0) + { + SumArray[0] = SubSum; + CarryArray[0] = SubCarry; + for (i = 1; i <= 3; i++) + { + SumArray[i] = Sum[i - 1]; + CarryArray[i] = Carry[i - 1]; + } + } + CSAAdder(SubSum, SubCarry, SubMult, tmpSum, tmpCarry); + SubSum0 = tmpSum; + CSAAdder(Sum[0], Carry[0], Mult[0], tmpSum, tmpCarry); + SubSum = tmpSum; + SubCarry0 = tmpCarry; + CSAAdder(Sum[1], Carry[1], Mult[1], tmpSum, tmpCarry); + Sum[0] = tmpSum; + SubCarry = tmpCarry; + for (i = 2; i <= 25; i++) + { + CSAAdder(Sum[i], Carry[i], Mult[i], tmpSum, tmpCarry); + Sum[i - 1] = tmpSum; + Carry[i - 2] = tmpCarry; + } + Sum[i - 1] = 0; + Carry[i - 2] = 0; + Carry[i - 1] = ~QuotientValueDomain; + Carry[i - 1] = (s32)((u32)Carry[i - 1] >> 31); + if (QuotientValueDomain != 0) + { + SumArray[0] = SubSum0; + CarryArray[0] = SubCarry0; + SumArray[1] = SubSum; + CarryArray[1] = SubCarry; + for (i = 2; i <= 3; i++) + { + SumArray[i] = Sum[i - 2]; + CarryArray[i] = Carry[i - 2]; + } + } + return QSLAdder(SumArray, CarryArray); +} + +s32 FpgaDiv::QSLAdder(s32 SumArray[], s32 CarryArray[]) +{ + s32 specialCondition = 0; + s32 result; + s32 claResult = CLAAdder(SumArray, CarryArray); + + if (SumArray[3] == 1 || CarryArray[3] == 1 || (claResult % 2 != 0)) + specialCondition = 1; + + switch (claResult) + { + case 0: + result = specialCondition; + break; + case 1: + result = specialCondition; + break; + case 2: + case 3: + result = 1; + break; + case 4: + case 5: + case 6: + result = -1; + break; + case 7: + result = 0; + break; + default: + result = 0; + break; + } + + return result; +} + +s32 FpgaDiv::ProductQuotientRestTransformation(s32 increment, s32 QuotientValueDomain) +{ + s32 i; + + Product[increment] = 0; + Product[increment + 1] = 1; + if (QuotientValueDomain == 0) + Rest[increment] = 1; + else + { + if (QuotientValueDomain == -1) + { + for (i = 0; i <= 25; i++) + Quotient[i] = Rest[i]; + Quotient[increment] = 1; + return 0; + } + else if (QuotientValueDomain == 1) + { + for (i = 0; i <= 25; ++i) + Rest[i] = Quotient[i]; + Quotient[increment] = 1; + return 0; + } + Console.Error("PQRTF: Quotient value domain error!"); + return -1; + } + + return 0; +} + +s32 FpgaDiv::CSAAdder(s32 sum, s32 carry, s32 mult, s32& resSum, s32& resCarry) +{ + s32 addResult = carry + sum + mult; + resCarry = 0; + resSum = 0; + if (addResult == 1) + resSum = 1; + else if (addResult == 2) + resCarry = 1; + else if (addResult == 3) + { + resSum = 1; + resCarry = 1; + } + + return 0; +} + +s32 FpgaDiv::CLAAdder(s32 SumArray[], s32 CarryArray[]) +{ + return (2 * CarryArray[1] + 4 * CarryArray[0] + CarryArray[2] + 2 * SumArray[1] + 4 * SumArray[0] + SumArray[2]) % 8; +} + +s32 FpgaDiv::MultipleFormation(s32 QuotientValueDomain) +{ + s32 i; + + if (QuotientValueDomain == 0) + { + SubMult = 0; + for (i = 0; i <= 25; i++) + Mult[i] = 0; + } + else if (divMode) + DivideModeFormation(QuotientValueDomain); + else + RootModeFormation(QuotientValueDomain); + + return 0; +} + +s32 FpgaDiv::DivideModeFormation(s32 QuotientValueDomain) +{ + s32 i; + + if (QuotientValueDomain <= 0) + { + SubMult = 0; + for (i = 0; i <= 25; i++) + Mult[i] = Divisor[i]; + } + else + { + SubMult = 1; + for (i = 0; i <= 25; i++) + Mult[i] = BitInvert(Divisor[i]) ? 1 : 0; + } + + return 0; +} + +s32 FpgaDiv::RootModeFormation(s32 QuotientValueDomain) +{ + s32 i; + + if (QuotientValueDomain <= 0) + { + SubMult = 0; + if (Product[0] == 1) + Mult[0] = 1; + else + Mult[0] = Rest[0]; + for (i = 1; i <= 25; i++) + { + if (Product[i - 1] == 1 || Product[i] == 1) + Mult[i] = 1; + else + Mult[i] = Rest[i]; + } + } + else + { + SubMult = 1; + Mult[0] = BitInvert(Quotient[0]) ? 1 : 0; + for (i = 1; i <= 25; i++) + { + if (Product[i - 1] == 1) + Mult[i] = 0; + else + Mult[i] = BitInvert(Quotient[i]) ? 1 : 0; + } + } + + return 0; +} \ No newline at end of file diff --git a/pcsx2/FpgaDiv.h b/pcsx2/FpgaDiv.h new file mode 100644 index 0000000000..752f756868 --- /dev/null +++ b/pcsx2/FpgaDiv.h @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#pragma once + +#include +#include + +class FpgaDiv +{ +public: + + bool dz = false; + bool iv = false; + bool of = false; + bool uf = false; + + u32 floatResult; + + FpgaDiv(bool divMode, u32 f1, u32 f2); + +protected: + +private: + + s32 Rest[26] = {0}; + s32 Quotient[26] = {0}; + s32 Product[26] = {0}; + s32 Sum[26] = {0}; + s32 Divisor[26] = {0}; + s32 Carry[26] = {0}; + s32 Mult[26] = {0}; + + bool divMode; + + s32 SubCarry = 0; + s32 SubCarry0 = 0; + s32 SubSum = 0; + s32 SubSum0 = 0; + s32 SubMult = 0; + + bool SignCalc(s32 Dvdtsign, s32 Dvsrsign); + + bool BitInvert(s32 val); + + s32 ExpCalc(s32 Dvdtexp, s32 Dvsrexp); + + s32 CSAQSLAdder(s32 QuotientValueDomain); + + s32 QSLAdder(s32 SumArray[], s32 CarryArray[]); + + s32 ProductQuotientRestTransformation(s32 increment, s32 QuotientValueDomain); + + s32 CSAAdder(s32 sum, s32 carry, s32 mult, s32& resSum, s32& resCarry); + + s32 CLAAdder(s32 SumArray[], s32 CarryArray[]); + + s32 MultipleFormation(s32 QuotientValueDomain); + + s32 DivideModeFormation(s32 QuotientValueDomain); + + s32 RootModeFormation(s32 QuotientValueDomain); +}; \ No newline at end of file diff --git a/pcsx2/PS2Float.cpp b/pcsx2/PS2Float.cpp index 39090d7f9d..1ab3189210 100644 --- a/pcsx2/PS2Float.cpp +++ b/pcsx2/PS2Float.cpp @@ -10,6 +10,7 @@ #include #include "common/Pcsx2Defs.h" #include "common/BitUtils.h" +#include "FpgaDiv.h" #include "PS2Float.h" #include "Common.h" @@ -115,14 +116,11 @@ PS2Float PS2Float::Add(PS2Float addend) if (IsDenormalized() || addend.IsDenormalized()) return SolveAddSubDenormalizedOperation(*this, addend, true); - if (IsAbnormal() && addend.IsAbnormal()) - return SolveAbnormalAdditionOrSubtractionOperation(*this, addend, true); - u32 a = raw; u32 b = addend.raw; //exponent difference - s32 exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF); + s32 exp_diff = Exponent() - addend.Exponent(); //diff = 25 .. 255 , expt < expd if (exp_diff >= 25) @@ -159,14 +157,11 @@ PS2Float PS2Float::Sub(PS2Float subtrahend) if (IsDenormalized() || subtrahend.IsDenormalized()) return SolveAddSubDenormalizedOperation(*this, subtrahend, false); - if (IsAbnormal() && subtrahend.IsAbnormal()) - return SolveAbnormalAdditionOrSubtractionOperation(*this, subtrahend, false); - u32 a = raw; u32 b = subtrahend.raw; //exponent difference - s32 exp_diff = ((a >> 23) & 0xFF) - ((b >> 23) & 0xFF); + s32 exp_diff = Exponent() - subtrahend.Exponent(); //diff = 25 .. 255 , expt < expd if (exp_diff >= 25) @@ -195,7 +190,6 @@ PS2Float PS2Float::Sub(PS2Float subtrahend) a = a & (MIN_FLOATING_POINT_VALUE << exp_diff); } - return PS2Float(a).DoAdd(PS2Float(b).Negate()); } @@ -204,9 +198,6 @@ PS2Float PS2Float::Mul(PS2Float mulend) if (IsDenormalized() || mulend.IsDenormalized()) return SolveMultiplicationDenormalizedOperation(*this, mulend); - if (IsAbnormal() && mulend.IsAbnormal()) - return SolveAbnormalMultiplicationOrDivisionOperation(*this, mulend, true); - if (IsZero() || mulend.IsZero()) return PS2Float(DetermineMultiplicationDivisionOperationSign(*this, mulend), 0, 0); @@ -215,78 +206,34 @@ PS2Float PS2Float::Mul(PS2Float mulend) PS2Float PS2Float::Div(PS2Float divend) { - if (IsDenormalized() || divend.IsDenormalized()) - return SolveDivisionDenormalizedOperation(*this, divend); - - if (IsAbnormal() && divend.IsAbnormal()) - return SolveAbnormalMultiplicationOrDivisionOperation(*this, divend, false); - - if (IsZero()) - return PS2Float(DetermineMultiplicationDivisionOperationSign(*this, divend), 0, 0); - else if (divend.IsZero()) - return DetermineMultiplicationDivisionOperationSign(*this, divend) ? Min() : Max(); - - return DoDiv(divend); + FpgaDiv fpga = FpgaDiv(true, raw, divend.raw); + PS2Float result = PS2Float(fpga.floatResult); + result.dz = fpga.dz; + result.iv = fpga.iv; + result.of = fpga.of; + result.uf = fpga.uf; + return result; } -// Rounding can be slightly off: (PS2: rsqrt(0x7FFFFFF0) -> 0x5FB504ED | SoftFloat/IEEE754 rsqrt(0x7FFFFFF0) -> 0x5FB504EE). PS2Float PS2Float::Sqrt() { - s32 t; - s32 s = 0; - s32 q = 0; - u32 r = 0x01000000; /* r = moving bit from right to left */ - - if (IsDenormalized()) - return PS2Float(0); - - // PS2 only takes positive numbers for SQRT, and convert if necessary. - s32 ix = (s32)PS2Float(false, Exponent(), Mantissa()).raw; - - /* extract mantissa and unbias exponent */ - s32 m = (ix >> 23) - BIAS; - - ix = (ix & 0x007FFFFF) | 0x00800000; - if ((m & 1) == 1) - { - /* odd m, double x to make it even */ - ix += ix; - } - - m >>= 1; /* m = [m/2] */ - - /* generate sqrt(x) bit by bit */ - ix += ix; - - while (r != 0) - { - t = s + (s32)(r); - if (t <= ix) - { - s = t + (s32)(r); - ix -= t; - q += (s32)(r); - } - - ix += ix; - r >>= 1; - } - - /* use floating add to find out rounding direction */ - if (ix != 0) - { - q += q & 1; - } - - ix = (q >> 1) + 0x3F000000; - ix += m << 23; - - return PS2Float((u32)(ix)); + FpgaDiv fpga = FpgaDiv(false, 0, PS2Float(false, Exponent(), Mantissa()).raw); + PS2Float result = PS2Float(fpga.floatResult); + result.dz = fpga.dz; + result.iv = fpga.iv; + return result; } PS2Float PS2Float::Rsqrt(PS2Float other) { - return Div(other.Sqrt()); + FpgaDiv fpgaSqrt = FpgaDiv(false, 0, PS2Float(false, other.Exponent(), other.Mantissa()).raw); + FpgaDiv fpgaDiv = FpgaDiv(true, raw, fpgaSqrt.floatResult); + PS2Float result = PS2Float(fpgaDiv.floatResult); + result.dz = fpgaSqrt.dz || fpgaDiv.dz; + result.iv = fpgaSqrt.iv || fpgaDiv.iv; + result.of = fpgaDiv.of; + result.uf = fpgaDiv.uf; + return result; } PS2Float PS2Float::Pow(s32 exponent) @@ -314,13 +261,6 @@ bool PS2Float::IsDenormalized() return Exponent() == 0; } -bool PS2Float::IsAbnormal() -{ - u32 val = raw; - return val == MAX_FLOATING_POINT_VALUE || val == MIN_FLOATING_POINT_VALUE || - val == POSITIVE_INFINITY_VALUE || val == NEGATIVE_INFINITY_VALUE; -} - bool PS2Float::IsZero() { return Abs() == 0; @@ -336,7 +276,7 @@ PS2Float PS2Float::Negate() return PS2Float(raw ^ 0x80000000); } -s32 PS2Float::CompareTo(PS2Float other) +s32 PS2Float::CompareToSign(PS2Float other) { s32 selfTwoComplementVal = (s32)Abs(); if (Sign()) @@ -354,7 +294,7 @@ s32 PS2Float::CompareTo(PS2Float other) return 1; } -s32 PS2Float::CompareOperand(PS2Float other) +s32 PS2Float::CompareTo(PS2Float other) { s32 selfTwoComplementVal = (s32)Abs(); s32 otherTwoComplementVal = (s32)other.Abs(); @@ -392,14 +332,6 @@ std::string PS2Float::ToString() { oss << "-Fmax(" << res << ")"; } - else if (value == POSITIVE_INFINITY_VALUE) - { - oss << "Inf(" << res << ")"; - } - else if (value == NEGATIVE_INFINITY_VALUE) - { - oss << "-Inf(" << res << ")"; - } else { oss << "PS2Float(" << res << ")"; @@ -435,7 +367,7 @@ PS2Float PS2Float::DoAdd(PS2Float other) // Remove from exponent the PS2 Multiplier value. s32 rawExp = selfExponent - roundingMultiplier; - s32 amount = Common::normalizeAmounts[Common::clz(absMan)]; + s32 amount = Common::normalizeAmounts[Common::CountLeadingSignBits(absMan)]; rawExp -= amount; absMan <<= amount; @@ -444,9 +376,17 @@ PS2Float PS2Float::DoAdd(PS2Float other) absMan >>= msbIndex; if (rawExp > 255) - return man < 0 ? Min() : Max(); - else if (rawExp <= 0) - return PS2Float(man < 0, 0, 0); + { + PS2Float result = man < 0 ? Min() : Max(); + result.of = true; + return result; + } + else if (rawExp < 1) + { + PS2Float result = PS2Float(man < 0, 0, 0); + result.uf = true; + return result; + } return PS2Float(((u32)man & SIGNMASK) | (u32)rawExp << 23 | ((u32)absMan & 0x7FFFFF)); } @@ -469,223 +409,21 @@ PS2Float PS2Float::DoMul(PS2Float other) } if (resExponent > 255) - return PS2Float(sign | MAX_FLOATING_POINT_VALUE); - else if (resExponent <= 0) - return PS2Float(sign); + { + PS2Float result = PS2Float(sign | MAX_FLOATING_POINT_VALUE); + result.of = true; + return result; + } + else if (resExponent < 1) + { + PS2Float result = PS2Float(sign); + result.uf = true; + return result; + } return PS2Float(sign | (u32)(resExponent << 23) | (resMantissa & 0x7FFFFF)); } -// Rounding can be slightly off: (PS2: 0x3F800000 / 0x3F800001 = 0x3F7FFFFF | SoftFloat/IEEE754: 0x3F800000 / 0x3F800001 = 0x3F7FFFFE). -PS2Float PS2Float::DoDiv(PS2Float other) -{ - bool sign = DetermineMultiplicationDivisionOperationSign(*this, other); - u32 selfMantissa = Mantissa() | 0x800000; - u32 otherMantissa = other.Mantissa() | 0x800000; - s32 resExponent = Exponent() - other.Exponent() + BIAS; - u64 selfMantissa64; - - if (resExponent > 255) - return sign ? Min() : Max(); - else if (resExponent <= 0) - return PS2Float(sign, 0, 0); - - if (selfMantissa < otherMantissa) - { - --resExponent; - if (resExponent == 0) - return PS2Float(sign, 0, 0); - selfMantissa64 = (u64)(selfMantissa) << 31; - } - else - { - selfMantissa64 = (u64)(selfMantissa) << 30; - } - - u32 resMantissa = (u32)(selfMantissa64 / otherMantissa); - - if ((resMantissa & 0x3F) == 0) - resMantissa |= ((u64)otherMantissa * resMantissa != selfMantissa64) ? 1U : 0; - - FPRoundMode roundingMode = EmuConfig.Cpu.FPUDivFPCR.GetRoundMode(); - - bool roundNearEven = roundingMode == FPRoundMode::Nearest; - u32 roundIncrement = (!roundNearEven) ? ((roundingMode == (sign ? FPRoundMode::NegativeInfinity : FPRoundMode::PositiveInfinity)) ? 0x7FU : 0) : 0x40U; - u32 roundBits = resMantissa & 0x7F; - - if (0x80000000 <= resMantissa + roundIncrement) - return sign ? Min() : Max(); - - resMantissa = (resMantissa + roundIncrement) >> 7; - - resMantissa &= ~(((roundBits ^ 0x40) == 0 & roundNearEven) ? 1U : 0U); - if (resMantissa == 0) - resExponent = 0; - - return PS2Float(sign, (u8)resExponent, resMantissa); -} - -PS2Float PS2Float::SolveAbnormalAdditionOrSubtractionOperation(PS2Float a, PS2Float b, bool add) -{ - u32 aval = a.raw; - u32 bval = b.raw; - - if (aval == MAX_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return add ? Max() : PS2Float(0); - - if (aval == MIN_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return add ? Min() : PS2Float(0); - - if (aval == MIN_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return add ? PS2Float(0) : Min(); - - if (aval == MAX_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return add ? PS2Float(0) : Max(); - - if (aval == POSITIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) - return add ? Max() : PS2Float(0); - - if (aval == NEGATIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) - return add ? PS2Float(0) : Min(); - - if (aval == POSITIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return add ? PS2Float(0) : Max(); - - if (aval == NEGATIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return add ? Min() : PS2Float(0); - - if (aval == MAX_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) - return add ? Max() : PS2Float(0x7F7FFFFE); - - if (aval == MAX_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return add ? PS2Float(0x7F7FFFFE) : Max(); - - if (aval == MIN_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) - return add ? PS2Float(0xFF7FFFFE) : Min(); - - if (aval == MIN_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return add ? Min() : PS2Float(0xFF7FFFFE); - - if (aval == POSITIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return add ? Max() : PS2Float(0xFF7FFFFE); - - if (aval == POSITIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return add ? PS2Float(0xFF7FFFFE) : Max(); - - if (aval == NEGATIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return add ? PS2Float(0x7F7FFFFE) : Min(); - - if (aval == NEGATIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return add ? Min() : PS2Float(0x7F7FFFFE); - - Console.Error("Unhandled abnormal add/sub floating point operation"); - - return PS2Float(0); -} - -PS2Float PS2Float::SolveAbnormalMultiplicationOrDivisionOperation(PS2Float a, PS2Float b, bool mul) -{ - u32 aval = a.raw; - u32 bval = b.raw; - - if (mul) - { - if ((aval == MAX_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) || - (aval == MIN_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE)) - return Max(); - - if ((aval == MAX_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) || - (aval == MIN_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE)) - return Min(); - - if (aval == POSITIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) - return Max(); - - if (aval == NEGATIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) - return Min(); - - if (aval == POSITIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return Min(); - - if (aval == NEGATIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return Max(); - - if (aval == MAX_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) - return Max(); - - if (aval == MAX_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return Min(); - - if (aval == MIN_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) - return Min(); - - if (aval == MIN_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return Max(); - - if (aval == POSITIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return Max(); - - if (aval == POSITIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return Min(); - - if (aval == NEGATIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return Min(); - - if (aval == NEGATIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return Max(); - } - else - { - if ((aval == MAX_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) || - (aval == MIN_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE)) - return One(); - - if ((aval == MAX_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) || - (aval == MIN_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE)) - return MinOne(); - - if (aval == POSITIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) - return One(); - - if (aval == NEGATIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) - return MinOne(); - - if (aval == POSITIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return MinOne(); - - if (aval == NEGATIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return One(); - - if (aval == MAX_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) - return PS2Float(0x3FFFFFFF); - - if (aval == MAX_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return PS2Float(0xBFFFFFFF); - - if (aval == MIN_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) - return PS2Float(0xBFFFFFFF); - - if (aval == MIN_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) - return PS2Float(0x3FFFFFFF); - - if (aval == POSITIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return PS2Float(0x3F000001); - - if (aval == POSITIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return PS2Float(0xBF000001); - - if (aval == NEGATIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) - return PS2Float(0xBF000001); - - if (aval == NEGATIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) - return PS2Float(0x3F000001); - } - - Console.Error("Unhandled abnormal mul/div floating point operation"); - - return PS2Float(0); -} - PS2Float PS2Float::SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add) { bool sign = add ? DetermineAdditionOperationSign(a, b) : DetermineSubtractionOperationSign(a, b); @@ -723,6 +461,76 @@ PS2Float PS2Float::SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b) return PS2Float(0); } +u32 PS2Float::Itof(s32 complement, s32 f1) +{ + u8 specialCondition; + u32 result; + s32 subExponent, newExponent, floatResult; + + if (f1 != 0) + { + specialCondition = 0; + subExponent = 158; + if (f1 < 0) + { + f1 = ~(f1 - 1); + specialCondition = 1; + } + while (f1 >= 0) + { + f1 *= 2; + --subExponent; + } + floatResult = (2 * f1) >> 9; + newExponent = subExponent - complement; + if (newExponent >= 0) + { + floatResult = (((u8)newExponent << 7) | ((floatResult >> 16) & 0x807F)) << 16 | (floatResult & 0xFFFF); + floatResult = (((specialCondition << 7) | ((floatResult >> 24) & 0x7F)) << 24) | (floatResult & 0xFFFFFF); + result = (u32)floatResult; + } + else + result = 0; + } + else + result = 0; + + return result; +} + +s32 PS2Float::Ftoi(s32 complement, u32 f1) +{ + u32 a, result; + + a = f1; + if ((f1 & 0x7F800000) == 0) + result = 0; + else + { + complement = (s32)(f1 >> 23 & 0xFF) + complement; + f1 &= 0x7FFFFF; + f1 |= 0x800000; + if (complement < 158) + { + if (complement >= 126) + { + f1 = (f1 << 7) >> (31 - ((u8)complement - 126)); + if ((s32)a < 0) + f1 = ~f1 + 1; + result = f1; + } + else + result = 0; + } + else if ((s32)a < 0) + result = SIGNMASK; + else + result = MAX_FLOATING_POINT_VALUE; + } + + return (s32)result; +} + bool PS2Float::DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b) { return a.Sign() ^ b.Sign(); @@ -740,7 +548,7 @@ bool PS2Float::DetermineAdditionOperationSign(PS2Float a, PS2Float b) Console.Error("Unhandled addition operation flags"); } - return a.CompareOperand(b) >= 0 ? a.Sign() : b.Sign(); + return a.CompareTo(b) >= 0 ? a.Sign() : b.Sign(); } bool PS2Float::DetermineSubtractionOperationSign(PS2Float a, PS2Float b) @@ -755,5 +563,5 @@ bool PS2Float::DetermineSubtractionOperationSign(PS2Float a, PS2Float b) Console.Error("Unhandled subtraction operation flags"); } - return a.CompareOperand(b) >= 0 ? a.Sign() : !b.Sign(); + return a.CompareTo(b) >= 0 ? a.Sign() : !b.Sign(); } diff --git a/pcsx2/PS2Float.h b/pcsx2/PS2Float.h index ecddebeadd..e6501153ae 100644 --- a/pcsx2/PS2Float.h +++ b/pcsx2/PS2Float.h @@ -31,12 +31,15 @@ public: static constexpr u32 SIGNMASK = 0x80000000; static constexpr u32 MAX_FLOATING_POINT_VALUE = 0x7FFFFFFF; static constexpr u32 MIN_FLOATING_POINT_VALUE = 0xFFFFFFFF; - static constexpr u32 POSITIVE_INFINITY_VALUE = 0x7F800000; - static constexpr u32 NEGATIVE_INFINITY_VALUE = 0xFF800000; static constexpr u32 ONE = 0x3F800000; static constexpr u32 MIN_ONE = 0xBF800000; static constexpr int IMPLICIT_LEADING_BIT_POS = 23; + bool dz = false; + bool iv = false; + bool of = false; + bool uf = false; + u32 raw; constexpr u32 Mantissa() const { return raw & 0x7FFFFF; } @@ -59,15 +62,15 @@ public: static PS2Float MinOne(); - static PS2Float SolveAbnormalAdditionOrSubtractionOperation(PS2Float a, PS2Float b, bool add); + static PS2Float SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add); - static PS2Float SolveAbnormalMultiplicationOrDivisionOperation(PS2Float a, PS2Float b, bool mul); + static PS2Float SolveMultiplicationDenormalizedOperation(PS2Float a, PS2Float b); - static PS2Float SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add); + static PS2Float SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b); - static PS2Float SolveMultiplicationDenormalizedOperation(PS2Float a, PS2Float b); + static u32 Itof(s32 complement, s32 f1); - static PS2Float SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b); + static s32 Ftoi(s32 complement, u32 f1); PS2Float Add(PS2Float addend); @@ -85,17 +88,15 @@ public: bool IsDenormalized(); - bool IsAbnormal(); - bool IsZero(); u32 Abs(); PS2Float Negate(); - s32 CompareTo(PS2Float other); + s32 CompareToSign(PS2Float other); - s32 CompareOperand(PS2Float other); + s32 CompareTo(PS2Float other); double ToDouble(); @@ -109,8 +110,6 @@ private: PS2Float DoMul(PS2Float other); - PS2Float DoDiv(PS2Float other); - static bool DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b); static bool DetermineAdditionOperationSign(PS2Float a, PS2Float b); diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 825992676d..cfa5a3bd22 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -1609,49 +1609,42 @@ static __fi s32 float_to_int(float value) return value; } -static __fi s32 double_to_int(double value) -{ - if (value >= 2147483647.0) - return 2147483647LL; - if (value <= -2147483648.0) - return -2147483648LL; - return value; -} - -static __fi void _vuFTOI0(VURegs* VU) { +static __fi void _vuFTOI0(VURegs* VU) { if (_Ft_ == 0) return; if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - if (_X) - VU->VF[_Ft_].SL[0] = double_to_int(PS2Float(VU->VF[_Fs_].i.x).ToDouble()); - if (_Y) - VU->VF[_Ft_].SL[1] = double_to_int(PS2Float(VU->VF[_Fs_].i.y).ToDouble()); - if (_Z) - VU->VF[_Ft_].SL[2] = double_to_int(PS2Float(VU->VF[_Fs_].i.z).ToDouble()); - if (_W) - VU->VF[_Ft_].SL[3] = double_to_int(PS2Float(VU->VF[_Fs_].i.w).ToDouble()); + if (_X) VU->VF[_Ft_].SL[0] = PS2Float::Ftoi(0, VU->VF[_Fs_].i.x); + if (_Y) VU->VF[_Ft_].SL[1] = PS2Float::Ftoi(0, VU->VF[_Fs_].i.y); + if (_Z) VU->VF[_Ft_].SL[2] = PS2Float::Ftoi(0, VU->VF[_Fs_].i.z); + if (_W) VU->VF[_Ft_].SL[3] = PS2Float::Ftoi(0, VU->VF[_Fs_].i.w); } else { - if (_X) - VU->VF[_Ft_].SL[0] = float_to_int(vuDouble(VU->VF[_Fs_].i.x)); - if (_Y) - VU->VF[_Ft_].SL[1] = float_to_int(vuDouble(VU->VF[_Fs_].i.y)); - if (_Z) - VU->VF[_Ft_].SL[2] = float_to_int(vuDouble(VU->VF[_Fs_].i.z)); - if (_W) - VU->VF[_Ft_].SL[3] = float_to_int(vuDouble(VU->VF[_Fs_].i.w)); + if (_X) VU->VF[_Ft_].SL[0] = float_to_int(vuDouble(VU->VF[_Fs_].i.x)); + if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(vuDouble(VU->VF[_Fs_].i.y)); + if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(vuDouble(VU->VF[_Fs_].i.z)); + if (_W) VU->VF[_Ft_].SL[3] = float_to_int(vuDouble(VU->VF[_Fs_].i.w)); } } -static __fi void _vuFTOI4(VURegs* VU) { +static __fi void _vuFTOI4(VURegs* VU) { if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].SL[0] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.x))); - if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.y))); - if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.z))); - if (_W) VU->VF[_Ft_].SL[3] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.w))); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].SL[0] = PS2Float::Ftoi(4, VU->VF[_Fs_].i.x); + if (_Y) VU->VF[_Ft_].SL[1] = PS2Float::Ftoi(4, VU->VF[_Fs_].i.y); + if (_Z) VU->VF[_Ft_].SL[2] = PS2Float::Ftoi(4, VU->VF[_Fs_].i.z); + if (_W) VU->VF[_Ft_].SL[3] = PS2Float::Ftoi(4, VU->VF[_Fs_].i.w); + } + else + { + if (_X) VU->VF[_Ft_].SL[0] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.x))); + if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.y))); + if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.z))); + if (_W) VU->VF[_Ft_].SL[3] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.w))); + } } static __fi void _vuFTOI12(VURegs* VU) @@ -1659,10 +1652,20 @@ static __fi void _vuFTOI12(VURegs* VU) if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].SL[0] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.x))); - if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.y))); - if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.z))); - if (_W) VU->VF[_Ft_].SL[3] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.w))); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].SL[0] = PS2Float::Ftoi(12, VU->VF[_Fs_].i.x); + if (_Y) VU->VF[_Ft_].SL[1] = PS2Float::Ftoi(12, VU->VF[_Fs_].i.y); + if (_Z) VU->VF[_Ft_].SL[2] = PS2Float::Ftoi(12, VU->VF[_Fs_].i.z); + if (_W) VU->VF[_Ft_].SL[3] = PS2Float::Ftoi(12, VU->VF[_Fs_].i.w); + } + else + { + if (_X) VU->VF[_Ft_].SL[0] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.x))); + if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.y))); + if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.z))); + if (_W) VU->VF[_Ft_].SL[3] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.w))); + } } static __fi void _vuFTOI15(VURegs* VU) @@ -1670,10 +1673,20 @@ static __fi void _vuFTOI15(VURegs* VU) if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].SL[0] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.x))); - if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.y))); - if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.z))); - if (_W) VU->VF[_Ft_].SL[3] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.w))); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].SL[0] = PS2Float::Ftoi(15, VU->VF[_Fs_].i.x); + if (_Y) VU->VF[_Ft_].SL[1] = PS2Float::Ftoi(15, VU->VF[_Fs_].i.y); + if (_Z) VU->VF[_Ft_].SL[2] = PS2Float::Ftoi(15, VU->VF[_Fs_].i.z); + if (_W) VU->VF[_Ft_].SL[3] = PS2Float::Ftoi(15, VU->VF[_Fs_].i.w); + } + else + { + if (_X) VU->VF[_Ft_].SL[0] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.x))); + if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.y))); + if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.z))); + if (_W) VU->VF[_Ft_].SL[3] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.w))); + } } static __fi void _vuITOF0(VURegs* VU) @@ -1681,10 +1694,20 @@ static __fi void _vuITOF0(VURegs* VU) if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].f.x = (float)VU->VF[_Fs_].SL[0]; - if (_Y) VU->VF[_Ft_].f.y = (float)VU->VF[_Fs_].SL[1]; - if (_Z) VU->VF[_Ft_].f.z = (float)VU->VF[_Fs_].SL[2]; - if (_W) VU->VF[_Ft_].f.w = (float)VU->VF[_Fs_].SL[3]; + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(0, VU->VF[_Fs_].SL[0]); + if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(0, VU->VF[_Fs_].SL[1]); + if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(0, VU->VF[_Fs_].SL[2]); + if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(0, VU->VF[_Fs_].SL[3]); + } + else + { + if (_X) VU->VF[_Ft_].f.x = (float)VU->VF[_Fs_].SL[0]; + if (_Y) VU->VF[_Ft_].f.y = (float)VU->VF[_Fs_].SL[1]; + if (_Z) VU->VF[_Ft_].f.z = (float)VU->VF[_Fs_].SL[2]; + if (_W) VU->VF[_Ft_].f.w = (float)VU->VF[_Fs_].SL[3]; + } } static __fi void _vuITOF4(VURegs* VU) @@ -1692,10 +1715,20 @@ static __fi void _vuITOF4(VURegs* VU) if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].f.x = int4_to_float(VU->VF[_Fs_].SL[0]); - if (_Y) VU->VF[_Ft_].f.y = int4_to_float(VU->VF[_Fs_].SL[1]); - if (_Z) VU->VF[_Ft_].f.z = int4_to_float(VU->VF[_Fs_].SL[2]); - if (_W) VU->VF[_Ft_].f.w = int4_to_float(VU->VF[_Fs_].SL[3]); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(4, VU->VF[_Fs_].SL[0]); + if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(4, VU->VF[_Fs_].SL[1]); + if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(4, VU->VF[_Fs_].SL[2]); + if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(4, VU->VF[_Fs_].SL[3]); + } + else + { + if (_X) VU->VF[_Ft_].f.x = int4_to_float(VU->VF[_Fs_].SL[0]); + if (_Y) VU->VF[_Ft_].f.y = int4_to_float(VU->VF[_Fs_].SL[1]); + if (_Z) VU->VF[_Ft_].f.z = int4_to_float(VU->VF[_Fs_].SL[2]); + if (_W) VU->VF[_Ft_].f.w = int4_to_float(VU->VF[_Fs_].SL[3]); + } } static __fi void _vuITOF12(VURegs* VU) @@ -1703,10 +1736,20 @@ static __fi void _vuITOF12(VURegs* VU) if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].f.x = int12_to_float(VU->VF[_Fs_].SL[0]); - if (_Y) VU->VF[_Ft_].f.y = int12_to_float(VU->VF[_Fs_].SL[1]); - if (_Z) VU->VF[_Ft_].f.z = int12_to_float(VU->VF[_Fs_].SL[2]); - if (_W) VU->VF[_Ft_].f.w = int12_to_float(VU->VF[_Fs_].SL[3]); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(12, VU->VF[_Fs_].SL[0]); + if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(12, VU->VF[_Fs_].SL[1]); + if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(12, VU->VF[_Fs_].SL[2]); + if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(12, VU->VF[_Fs_].SL[3]); + } + else + { + if (_X) VU->VF[_Ft_].f.x = int12_to_float(VU->VF[_Fs_].SL[0]); + if (_Y) VU->VF[_Ft_].f.y = int12_to_float(VU->VF[_Fs_].SL[1]); + if (_Z) VU->VF[_Ft_].f.z = int12_to_float(VU->VF[_Fs_].SL[2]); + if (_W) VU->VF[_Ft_].f.w = int12_to_float(VU->VF[_Fs_].SL[3]); + } } static __fi void _vuITOF15(VURegs* VU) @@ -1714,10 +1757,20 @@ static __fi void _vuITOF15(VURegs* VU) if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].f.x = int15_to_float(VU->VF[_Fs_].SL[0]); - if (_Y) VU->VF[_Ft_].f.y = int15_to_float(VU->VF[_Fs_].SL[1]); - if (_Z) VU->VF[_Ft_].f.z = int15_to_float(VU->VF[_Fs_].SL[2]); - if (_W) VU->VF[_Ft_].f.w = int15_to_float(VU->VF[_Fs_].SL[3]); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(15, VU->VF[_Fs_].SL[0]); + if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(15, VU->VF[_Fs_].SL[1]); + if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(15, VU->VF[_Fs_].SL[2]); + if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(15, VU->VF[_Fs_].SL[3]); + } + else + { + if (_X) VU->VF[_Ft_].f.x = int15_to_float(VU->VF[_Fs_].SL[0]); + if (_Y) VU->VF[_Ft_].f.y = int15_to_float(VU->VF[_Fs_].SL[1]); + if (_Z) VU->VF[_Ft_].f.z = int15_to_float(VU->VF[_Fs_].SL[2]); + if (_W) VU->VF[_Ft_].f.w = int15_to_float(VU->VF[_Fs_].SL[3]); + } } static __fi void _vuCLIP(VURegs* VU) diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index 3a6555254b..6fc7a4c50b 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -201,6 +201,7 @@ + @@ -646,6 +647,7 @@ + diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index 0fa8c4cd9f..8da173ff3f 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -1449,6 +1449,9 @@ System\Ps2\EmotionEngine\Shared + + System\Ps2\EmotionEngine\Shared + @@ -2408,6 +2411,9 @@ System\Ps2\EmotionEngine\Shared + + System\Ps2\EmotionEngine\Shared + From 99ee5d5975cf4867148dfd2be21a89e9529d1dd8 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Mon, 6 Jan 2025 21:33:49 +0100 Subject: [PATCH 15/15] [Soft-Float] - Fixes Itof method on Clang compiler. We use a faster checked method that achieve the same result. --- pcsx2/FPU.cpp | 2 +- pcsx2/PS2Float.cpp | 78 +++++++++++++++++++++++++--------------------- pcsx2/PS2Float.h | 4 +-- pcsx2/VUops.cpp | 32 +++++++++---------- 4 files changed, 62 insertions(+), 54 deletions(-) diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index 99575cef99..23578d1b33 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -358,7 +358,7 @@ void CTC1() { void CVT_S() { if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { - _FdValUl_ = PS2Float::Itof(0, _FsValSl_); + _FdValUl_ = PS2Float::Itof(0, _FsValSl_).raw; } else { diff --git a/pcsx2/PS2Float.cpp b/pcsx2/PS2Float.cpp index 1ab3189210..3bd0c5ede1 100644 --- a/pcsx2/PS2Float.cpp +++ b/pcsx2/PS2Float.cpp @@ -87,7 +87,7 @@ PS2Float::PS2Float(bool sign, u8 exponent, u32 mantissa) { raw = 0; raw |= (sign ? 1u : 0u) << 31; - raw |= (u32)(exponent << 23); + raw |= (u32)(exponent << MANTISSA_BITS); raw |= mantissa & 0x7FFFFF; } @@ -371,7 +371,7 @@ PS2Float PS2Float::DoAdd(PS2Float other) rawExp -= amount; absMan <<= amount; - s32 msbIndex = Common::BitScanReverse8(absMan >> 23); + s32 msbIndex = Common::BitScanReverse8(absMan >> MANTISSA_BITS); rawExp += msbIndex; absMan >>= msbIndex; @@ -388,7 +388,7 @@ PS2Float PS2Float::DoAdd(PS2Float other) return result; } - return PS2Float(((u32)man & SIGNMASK) | (u32)rawExp << 23 | ((u32)absMan & 0x7FFFFF)); + return PS2Float(((u32)man & SIGNMASK) | (u32)rawExp << MANTISSA_BITS | ((u32)absMan & 0x7FFFFF)); } PS2Float PS2Float::DoMul(PS2Float other) @@ -400,7 +400,7 @@ PS2Float PS2Float::DoMul(PS2Float other) u32 sign = (raw ^ other.raw) & SIGNMASK; s32 resExponent = selfExponent + otherExponent - 127; - u32 resMantissa = (u32)(MulMantissa(selfMantissa, otherMantissa) >> 23); + u32 resMantissa = (u32)(MulMantissa(selfMantissa, otherMantissa) >> MANTISSA_BITS); if (resMantissa > 0xFFFFFF) { @@ -421,7 +421,7 @@ PS2Float PS2Float::DoMul(PS2Float other) return result; } - return PS2Float(sign | (u32)(resExponent << 23) | (resMantissa & 0x7FFFFF)); + return PS2Float(sign | (u32)(resExponent << MANTISSA_BITS) | (resMantissa & 0x7FFFFF)); } PS2Float PS2Float::SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add) @@ -461,41 +461,49 @@ PS2Float PS2Float::SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b) return PS2Float(0); } -u32 PS2Float::Itof(s32 complement, s32 f1) +PS2Float PS2Float::Itof(s32 complement, s32 f1) { - u8 specialCondition; - u32 result; - s32 subExponent, newExponent, floatResult; + if (f1 == 0) + return PS2Float(0); - if (f1 != 0) + s32 resExponent; + + if (f1 == -2147483648) { - specialCondition = 0; - subExponent = 158; - if (f1 < 0) - { - f1 = ~(f1 - 1); - specialCondition = 1; - } - while (f1 >= 0) - { - f1 *= 2; - --subExponent; - } - floatResult = (2 * f1) >> 9; - newExponent = subExponent - complement; - if (newExponent >= 0) - { - floatResult = (((u8)newExponent << 7) | ((floatResult >> 16) & 0x807F)) << 16 | (floatResult & 0xFFFF); - floatResult = (((specialCondition << 7) | ((floatResult >> 24) & 0x7F)) << 24) | (floatResult & 0xFFFFFF); - result = (u32)floatResult; - } - else - result = 0; + // special case + resExponent = 158 - complement; + + if (resExponent >= 0) + return PS2Float(true, (u8)resExponent, 0); + + return PS2Float(0); + } + + bool negative = f1 < 0; + s32 u = std::abs(f1); + + s32 shifts; + + s32 lzcnt = Common::CountLeadingSignBits(u); + if (lzcnt < 8) + { + s32 count = 8 - lzcnt; + u >>= count; + shifts = -count; } else - result = 0; + { + s32 count = lzcnt - 8; + u <<= count; + shifts = count; + } - return result; + resExponent = BIAS + MANTISSA_BITS - shifts - complement; + + if (resExponent >= 0) + return PS2Float(negative, (u8)resExponent, (u32)u); + + return PS2Float(0); } s32 PS2Float::Ftoi(s32 complement, u32 f1) @@ -507,7 +515,7 @@ s32 PS2Float::Ftoi(s32 complement, u32 f1) result = 0; else { - complement = (s32)(f1 >> 23 & 0xFF) + complement; + complement = (s32)(f1 >> MANTISSA_BITS & 0xFF) + complement; f1 &= 0x7FFFFF; f1 |= 0x800000; if (complement < 158) diff --git a/pcsx2/PS2Float.h b/pcsx2/PS2Float.h index e6501153ae..9e68425be7 100644 --- a/pcsx2/PS2Float.h +++ b/pcsx2/PS2Float.h @@ -28,12 +28,12 @@ class PS2Float public: static constexpr u8 BIAS = 127; + static constexpr u8 MANTISSA_BITS = 23; static constexpr u32 SIGNMASK = 0x80000000; static constexpr u32 MAX_FLOATING_POINT_VALUE = 0x7FFFFFFF; static constexpr u32 MIN_FLOATING_POINT_VALUE = 0xFFFFFFFF; static constexpr u32 ONE = 0x3F800000; static constexpr u32 MIN_ONE = 0xBF800000; - static constexpr int IMPLICIT_LEADING_BIT_POS = 23; bool dz = false; bool iv = false; @@ -68,7 +68,7 @@ public: static PS2Float SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b); - static u32 Itof(s32 complement, s32 f1); + static PS2Float Itof(s32 complement, s32 f1); static s32 Ftoi(s32 complement, u32 f1); diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index cfa5a3bd22..8fbfe150f0 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -1696,10 +1696,10 @@ static __fi void _vuITOF0(VURegs* VU) if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(0, VU->VF[_Fs_].SL[0]); - if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(0, VU->VF[_Fs_].SL[1]); - if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(0, VU->VF[_Fs_].SL[2]); - if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(0, VU->VF[_Fs_].SL[3]); + if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(0, VU->VF[_Fs_].SL[0]).raw; + if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(0, VU->VF[_Fs_].SL[1]).raw; + if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(0, VU->VF[_Fs_].SL[2]).raw; + if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(0, VU->VF[_Fs_].SL[3]).raw; } else { @@ -1717,10 +1717,10 @@ static __fi void _vuITOF4(VURegs* VU) if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(4, VU->VF[_Fs_].SL[0]); - if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(4, VU->VF[_Fs_].SL[1]); - if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(4, VU->VF[_Fs_].SL[2]); - if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(4, VU->VF[_Fs_].SL[3]); + if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(4, VU->VF[_Fs_].SL[0]).raw; + if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(4, VU->VF[_Fs_].SL[1]).raw; + if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(4, VU->VF[_Fs_].SL[2]).raw; + if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(4, VU->VF[_Fs_].SL[3]).raw; } else { @@ -1738,10 +1738,10 @@ static __fi void _vuITOF12(VURegs* VU) if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(12, VU->VF[_Fs_].SL[0]); - if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(12, VU->VF[_Fs_].SL[1]); - if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(12, VU->VF[_Fs_].SL[2]); - if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(12, VU->VF[_Fs_].SL[3]); + if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(12, VU->VF[_Fs_].SL[0]).raw; + if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(12, VU->VF[_Fs_].SL[1]).raw; + if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(12, VU->VF[_Fs_].SL[2]).raw; + if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(12, VU->VF[_Fs_].SL[3]).raw; } else { @@ -1759,10 +1759,10 @@ static __fi void _vuITOF15(VURegs* VU) if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(15, VU->VF[_Fs_].SL[0]); - if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(15, VU->VF[_Fs_].SL[1]); - if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(15, VU->VF[_Fs_].SL[2]); - if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(15, VU->VF[_Fs_].SL[3]); + if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(15, VU->VF[_Fs_].SL[0]).raw; + if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(15, VU->VF[_Fs_].SL[1]).raw; + if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(15, VU->VF[_Fs_].SL[2]).raw; + if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(15, VU->VF[_Fs_].SL[3]).raw; } else {