diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp b/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp index acd4903fe4..fcae67fac7 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp @@ -48,6 +48,16 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(SettingsWindow* dialog, QWidget* connect(m_ui.vu0ClampMode, QOverload::of(&QComboBox::currentIndexChanged), [this](int index) { setClampingMode(0, index); }); connect(m_ui.vu1ClampMode, QOverload::of(&QComboBox::currentIndexChanged), [this](int index) { setClampingMode(1, index); }); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftAddSub, "EmuCore/CPU/Recompiler", "fpuSoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftMulDiv, "EmuCore/CPU/Recompiler", "fpuSoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftSqrt, "EmuCore/CPU/Recompiler", "fpuSoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftAddSub, "EmuCore/CPU/Recompiler", "vu0SoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftMulDiv, "EmuCore/CPU/Recompiler", "vu0SoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftSqrt, "EmuCore/CPU/Recompiler", "vu0SoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftAddSub, "EmuCore/CPU/Recompiler", "vu1SoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftMulDiv, "EmuCore/CPU/Recompiler", "vu1SoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftSqrt, "EmuCore/CPU/Recompiler", "vu1SoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.iopRecompiler, "EmuCore/CPU/Recompiler", "EnableIOP", true); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.gameFixes, "EmuCore", "EnableGameFixes", true); diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui index 197fda8c7c..5a8b420b88 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui @@ -33,8 +33,8 @@ 0 -447 - 790 - 1049 + 793 + 1283 @@ -94,10 +94,10 @@ - - + + - Division Rounding Mode: + Clamping Mode: @@ -125,38 +125,7 @@ - - - - Clamping Mode: - - - - - - - - None - - - - - Normal (Default) - - - - - Extra + Preserve Sign - - - - - Full - - - - - + @@ -208,6 +177,67 @@ + + + + + + None + + + + + Normal (Default) + + + + + Extra + Preserve Sign + + + + + Full + + + + + + + + Division Rounding Mode: + + + + + + + Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Square Root + + + + + @@ -218,7 +248,7 @@ Vector Units (VU) - + VU1 Rounding Mode: @@ -249,7 +279,129 @@ - + + + + VU1 Clamping Mode: + + + + + + + VU0 Rounding Mode: + + + + + + + VU1 Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Float Square Root + + + + + + + + + + VU0 Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Square Root + + + + + + + + + + + Nearest + + + + + Negative + + + + + Positive + + + + + Chop/Zero (Default) + + + + + + + + + None + + + + + Normal (Default) + + + + + Extra + + + + + Extra + Preserve Sign + + + + + @@ -281,30 +433,6 @@ - - - - - None - - - - - Normal (Default) - - - - - Extra - - - - - Extra + Preserve Sign - - - - @@ -312,45 +440,7 @@ - - - - VU0 Rounding Mode: - - - - - - - VU1 Clamping Mode: - - - - - - - - Nearest - - - - - Negative - - - - - Positive - - - - - Chop/Zero (Default) - - - - - + diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 5430675614..8f78ed7a63 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -93,6 +93,7 @@ set(pcsx2Sources MTGS.cpp MTVU.cpp Patch.cpp + Ps2Float.cpp Pcsx2Config.cpp PerformanceMetrics.cpp PrecompiledHeader.cpp @@ -173,6 +174,7 @@ set(pcsx2Headers MTVU.h Memory.h MemoryTypes.h + Ps2Float.h Patch.h PerformanceMetrics.h PrecompiledHeader.h diff --git a/pcsx2/Config.h b/pcsx2/Config.h index fc64f1c22b..9671b86fbd 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -596,17 +596,32 @@ struct Pcsx2Config vu0ExtraOverflow : 1, vu0SignOverflow : 1, vu0Underflow : 1; + + bool + vu0SoftAddSub : 1, + vu0SoftMulDiv : 1, + vu0SoftSqrt : 1; bool vu1Overflow : 1, vu1ExtraOverflow : 1, vu1SignOverflow : 1, vu1Underflow : 1; + + bool + vu1SoftAddSub : 1, + vu1SoftMulDiv : 1, + vu1SoftSqrt : 1; bool fpuOverflow : 1, fpuExtraOverflow : 1, fpuFullMode : 1; + + bool + fpuSoftAddSub : 1, + fpuSoftMulDiv : 1, + fpuSoftSqrt : 1; bool EnableEECache : 1; @@ -1426,11 +1441,19 @@ namespace EmuFolders #define CHECK_VU_SIGN_OVERFLOW(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SignOverflow : EmuConfig.Cpu.Recompiler.vu1SignOverflow) #define CHECK_VU_UNDERFLOW(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0Underflow : EmuConfig.Cpu.Recompiler.vu1Underflow) +#define CHECK_VU_SOFT_ADDSUB(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftAddSub : EmuConfig.Cpu.Recompiler.vu1SoftAddSub) +#define CHECK_VU_SOFT_MULDIV(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftMulDiv : EmuConfig.Cpu.Recompiler.vu1SoftMulDiv) +#define CHECK_VU_SOFT_SQRT(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftSqrt : EmuConfig.Cpu.Recompiler.vu1SoftSqrt) + #define CHECK_FPU_OVERFLOW (EmuConfig.Cpu.Recompiler.fpuOverflow) #define CHECK_FPU_EXTRA_OVERFLOW (EmuConfig.Cpu.Recompiler.fpuExtraOverflow) // If enabled, Operands are checked for infinities before being used in the FPU recs #define CHECK_FPU_EXTRA_FLAGS 1 // Always enabled now // Sets D/I flags on FPU instructions #define CHECK_FPU_FULL (EmuConfig.Cpu.Recompiler.fpuFullMode) +#define CHECK_FPU_SOFT_ADDSUB (EmuConfig.Cpu.Recompiler.fpuSoftAddSub) +#define CHECK_FPU_SOFT_MULDIV (EmuConfig.Cpu.Recompiler.fpuSoftMulDiv) +#define CHECK_FPU_SOFT_SQRT (EmuConfig.Cpu.Recompiler.fpuSoftSqrt) + //------------ EE Recompiler defines - Comment to disable a recompiler --------------- #define SHIFT_RECOMPILE // Speed majorly reduced if disabled diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index 3ac1ae3fd1..f2136aff20 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" - +#include "Ps2Float.h" #include // Helper Macros @@ -63,28 +63,57 @@ // If we have an infinity value, then Overflow has occured. bool checkOverflow(u32& xReg, u32 cFlagsToSet) { - if ((xReg & ~0x80000000) == PosInfinity) { - /*Console.Warning( "FPU OVERFLOW!: Changing to +/-Fmax!!!!!!!!!!!!\n" );*/ - xReg = (xReg & 0x80000000) | posFmax; - _ContVal_ |= (cFlagsToSet); - return true; + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + if (xReg == Ps2Float::MAX_FLOATING_POINT_VALUE || xReg == Ps2Float::MIN_FLOATING_POINT_VALUE) + { + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagO) + _ContVal_ &= ~FPUflagO; + } + else + { + if ((xReg & ~0x80000000) == PosInfinity) + { + /*Console.Warning( "FPU OVERFLOW!: Changing to +/-Fmax!!!!!!!!!!!!\n" );*/ + xReg = (xReg & 0x80000000) | posFmax; + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagO) + _ContVal_ &= ~FPUflagO; } - else if (cFlagsToSet & FPUflagO) - _ContVal_ &= ~FPUflagO; return false; } // If we have a denormal value, then Underflow has occured. bool checkUnderflow(u32& xReg, u32 cFlagsToSet) { - if ( ( (xReg & 0x7F800000) == 0 ) && ( (xReg & 0x007FFFFF) != 0 ) ) { - /*Console.Warning( "FPU UNDERFLOW!: Changing to +/-0!!!!!!!!!!!!\n" );*/ - xReg &= 0x80000000; - _ContVal_ |= (cFlagsToSet); - return true; + + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + if (Ps2Float(xReg).IsDenormalized()) + { + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagU) + _ContVal_ &= ~FPUflagU; + } + else + { + if (((xReg & 0x7F800000) == 0) && ((xReg & 0x007FFFFF) != 0)) + { + /*Console.Warning( "FPU UNDERFLOW!: Changing to +/-0!!!!!!!!!!!!\n" );*/ + xReg &= 0x80000000; + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagU) + _ContVal_ &= ~FPUflagU; } - else if (cFlagsToSet & FPUflagU) - _ContVal_ &= ~FPUflagU; return false; } @@ -106,9 +135,36 @@ __fi u32 fp_min(u32 a, u32 b) */ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsToSet1, u32 cFlagsToSet2) { - if ( (yDivisorReg & 0x7F800000) == 0 ) { - _ContVal_ |= ( (zDividendReg & 0x7F800000) == 0 ) ? cFlagsToSet2 : cFlagsToSet1; - xReg = ( (yDivisorReg ^ zDividendReg) & 0x80000000 ) | posFmax; + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + Ps2Float yMatrix = Ps2Float(yDivisorReg); + Ps2Float zMatrix = Ps2Float(zDividendReg); + + if (yMatrix.IsZero()) + { + bool dividendZero = zMatrix.IsZero(); + + _ContVal_ |= dividendZero ? cFlagsToSet2 : cFlagsToSet1; + + bool IsSigned = yMatrix.Sign ^ zMatrix.Sign; + + if (dividendZero) + xReg = IsSigned ? Ps2Float::MIN_FLOATING_POINT_VALUE : Ps2Float::MAX_FLOATING_POINT_VALUE; + else + { + Ps2Float zeroRes = Ps2Float(0); + + zeroRes.Sign = IsSigned; + xReg = zeroRes.AsUInt32(); + } + + return true; + } + } + else if ((yDivisorReg & 0x7F800000) == 0) + { + _ContVal_ |= ((zDividendReg & 0x7F800000) == 0) ? cFlagsToSet2 : cFlagsToSet1; + xReg = ((yDivisorReg ^ zDividendReg) & 0x80000000) | posFmax; return true; } @@ -182,19 +238,60 @@ float fpuDouble(u32 f) } } +static __fi uint32_t fpuAccurateAddSub(u32 a, u32 b, bool issub) +{ + if (CHECK_FPU_SOFT_ADDSUB) + { + if (issub) + return Ps2Float(a).Sub(Ps2Float(b), 1).AsUInt32(); + else + return Ps2Float(a).Add(Ps2Float(b), 1).AsUInt32(); + } + + if (issub) + return std::bit_cast(fpuDouble(a) - fpuDouble(b)); + else + return std::bit_cast(fpuDouble(a) + fpuDouble(b)); +} + +static __fi uint32_t fpuAccurateMulDiv(u32 a, u32 b, bool isdiv) +{ + if (CHECK_FPU_SOFT_MULDIV) + { + if (isdiv) + return Ps2Float(a).Div(Ps2Float(b)).AsUInt32(); + else + return Ps2Float(a).Mul(Ps2Float(b)).AsUInt32(); + } + + if (isdiv) + return std::bit_cast(fpuDouble(a) / fpuDouble(b)); + else + return std::bit_cast(fpuDouble(a) * fpuDouble(b)); +} + +static __fi s32 double_to_int(double value) +{ + if (value >= 2147483647.0) + return 2147483647LL; + if (value <= -2147483648.0) + return -2147483648LL; + return value; +} + void ABS_S() { _FdValUl_ = _FsValUl_ & 0x7fffffff; clearFPUFlags( FPUflagO | FPUflagU ); } void ADD_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateAddSub(_FsValUl_, _FtValUl_, 0); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void ADDA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateAddSub(_FsValUl_, _FtValUl_, 0); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -253,14 +350,30 @@ void CVT_S() { } void CVT_W() { - if ( ( _FsValUl_ & 0x7F800000 ) <= 0x4E800000 ) { _FdValSl_ = (s32)_FsValf_; } - else if ( ( _FsValUl_ & 0x80000000 ) == 0 ) { _FdValUl_ = 0x7fffffff; } - else { _FdValUl_ = 0x80000000; } + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + _FdValSl_ = double_to_int(Ps2Float(_FsValUl_).ToDouble()); + } + else + { + if ((_FsValUl_ & 0x7F800000) <= 0x4E800000) + { + _FdValSl_ = (s32)_FsValf_; + } + else if ((_FsValUl_ & 0x80000000) == 0) + { + _FdValUl_ = 0x7fffffff; + } + else + { + _FdValUl_ = 0x80000000; + } + } } void DIV_S() { if (checkDivideByZero( _FdValUl_, _FtValUl_, _FsValUl_, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; - _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 1); if (checkOverflow( _FdValUl_, 0)) return; checkUnderflow( _FdValUl_, 0); } @@ -271,14 +384,16 @@ void DIV_S() { */ void MADD_S() { FPRreg temp; - temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - _FdValf_ = fpuDouble( _FAValUl_ ) + fpuDouble( temp.UL ); + temp.UL = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); + _FdValUl_ = fpuAccurateAddSub(_FAValUl_, temp.UL, 0); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MADDA_S() { - _FAValf_ += fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + FPRreg temp; + temp.UL = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); + _FAValUl_ = fpuAccurateAddSub(_FAValUl_, temp.UL, 0); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -304,14 +419,16 @@ void MOV_S() { void MSUB_S() { FPRreg temp; - temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - _FdValf_ = fpuDouble( _FAValUl_ ) - fpuDouble( temp.UL ); + temp.UL = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); + _FdValUl_ = fpuAccurateAddSub(_FAValUl_, temp.UL, 1); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MSUBA_S() { - _FAValf_ -= fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + FPRreg temp; + temp.UL = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); + _FAValUl_ = fpuAccurateAddSub(_FAValUl_, temp.UL, 1); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -321,13 +438,13 @@ void MTC1() { } void MUL_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MULA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateMulDiv(_FsValUl_, _FtValUl_, 0); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -341,17 +458,45 @@ void RSQRT_S() { FPRreg temp; clearFPUFlags(FPUflagD | FPUflagI); - if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) { // Ft is zero (Denormals are Zero) - _ContVal_ |= FPUflagD | FPUflagSD; - _FdValUl_ = ( _FtValUl_ & 0x80000000 ) | posFmax; - return; + if (CHECK_FPU_SOFT_SQRT) + { + Ps2Float value = Ps2Float(_FtValUl_); + + if (value.IsDenormalized()) + { + _ContVal_ |= FPUflagD | FPUflagSD; + _FdValUl_ = value.Sign ? Ps2Float::MIN_FLOATING_POINT_VALUE : Ps2Float::MAX_FLOATING_POINT_VALUE; + return; + } + else if (_FtValUl_ & 0x80000000) + { // Ft is negative + _ContVal_ |= FPUflagI | FPUflagSI; + _FdValUl_ = Ps2Float(_FsValUl_).Rsqrt(Ps2Float(value.Abs())).AsUInt32(); + } + else + { + _FdValUl_ = Ps2Float(_FsValUl_).Rsqrt(value).AsUInt32(); + } // Ft is positive and not zero } - else if ( _FtValUl_ & 0x80000000 ) { // Ft is negative - _ContVal_ |= FPUflagI | FPUflagSI; - temp.f = sqrt( fabs( fpuDouble( _FtValUl_ ) ) ); - _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( temp.UL ); + else + { + if ((_FtValUl_ & 0x7F800000) == 0) + { // Ft is zero (Denormals are Zero) + _ContVal_ |= FPUflagD | FPUflagSD; + _FdValUl_ = (_FtValUl_ & 0x80000000) | posFmax; + return; + } + else if (_FtValUl_ & 0x80000000) + { // Ft is negative + _ContVal_ |= FPUflagI | FPUflagSI; + temp.f = sqrt(fabs(fpuDouble(_FtValUl_))); + _FdValf_ = fpuDouble(_FsValUl_) / fpuDouble(temp.UL); + } + else + { + _FdValf_ = fpuDouble(_FsValUl_) / sqrt(fpuDouble(_FtValUl_)); + } // Ft is positive and not zero } - else { _FdValf_ = fpuDouble( _FsValUl_ ) / sqrt( fpuDouble( _FtValUl_ ) ); } // Ft is positive and not zero if (checkOverflow( _FdValUl_, 0)) return; checkUnderflow( _FdValUl_, 0); @@ -360,23 +505,40 @@ void RSQRT_S() { void SQRT_S() { clearFPUFlags(FPUflagI | FPUflagD); - if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) // If Ft = +/-0 - _FdValUl_ = _FtValUl_ & 0x80000000;// result is 0 - else if ( _FtValUl_ & 0x80000000 ) { // If Ft is Negative - _ContVal_ |= FPUflagI | FPUflagSI; - _FdValf_ = sqrt( fabs( fpuDouble( _FtValUl_ ) ) ); - } else - _FdValf_ = sqrt( fpuDouble( _FtValUl_ ) ); // If Ft is Positive + if (CHECK_FPU_SOFT_SQRT) + { + Ps2Float value = Ps2Float(_FtValUl_); + + if (_FtValUl_ & 0x80000000) + { // If Ft is Negative + _ContVal_ |= FPUflagI | FPUflagSI; + _FdValUl_ = Ps2Float(value.Abs()).Sqrt().AsUInt32(); + } + else + _FdValUl_ = value.Sqrt().AsUInt32(); // If Ft is Positive + } + else + { + if ((_FtValUl_ & 0x7F800000) == 0) // If Ft = +/-0 + _FdValUl_ = _FtValUl_ & 0x80000000; // result is 0 + else if (_FtValUl_ & 0x80000000) + { // If Ft is Negative + _ContVal_ |= FPUflagI | FPUflagSI; + _FdValf_ = sqrt(fabs(fpuDouble(_FtValUl_))); + } + else + _FdValf_ = sqrt(fpuDouble(_FtValUl_)); // If Ft is Positive + } } void SUB_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateAddSub(_FsValUl_, _FtValUl_, 1); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void SUBA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateAddSub(_FsValUl_, _FtValUl_, 1); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index 071ff140eb..4c3f70a573 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -536,14 +536,27 @@ void Pcsx2Config::RecompilerOptions::LoadSave(SettingsWrapper& wrap) SettingsWrapBitBool(vu0ExtraOverflow); SettingsWrapBitBool(vu0SignOverflow); SettingsWrapBitBool(vu0Underflow); + + SettingsWrapBitBool(vu0SoftAddSub); + SettingsWrapBitBool(vu0SoftMulDiv); + SettingsWrapBitBool(vu0SoftSqrt); + SettingsWrapBitBool(vu1Overflow); SettingsWrapBitBool(vu1ExtraOverflow); SettingsWrapBitBool(vu1SignOverflow); SettingsWrapBitBool(vu1Underflow); + + SettingsWrapBitBool(vu1SoftAddSub); + SettingsWrapBitBool(vu1SoftMulDiv); + SettingsWrapBitBool(vu1SoftSqrt); SettingsWrapBitBool(fpuOverflow); SettingsWrapBitBool(fpuExtraOverflow); SettingsWrapBitBool(fpuFullMode); + + SettingsWrapBitBool(fpuSoftAddSub); + SettingsWrapBitBool(fpuSoftMulDiv); + SettingsWrapBitBool(fpuSoftSqrt); } u32 Pcsx2Config::RecompilerOptions::GetEEClampMode() const diff --git a/pcsx2/Ps2Float.cpp b/pcsx2/Ps2Float.cpp new file mode 100644 index 0000000000..8156143272 --- /dev/null +++ b/pcsx2/Ps2Float.cpp @@ -0,0 +1,865 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#include +#include +#include +#include +#include +#include +#include +#include "Ps2Float.h" +#include "Common.h" + +const uint8_t Ps2Float::BIAS = 127; +const uint32_t Ps2Float::SIGNMASK = 0x80000000; +const uint32_t Ps2Float::MAX_FLOATING_POINT_VALUE = 0x7FFFFFFF; +const uint32_t Ps2Float::MIN_FLOATING_POINT_VALUE = 0xFFFFFFFF; +const uint32_t Ps2Float::POSITIVE_INFINITY_VALUE = 0x7F800000; +const uint32_t Ps2Float::NEGATIVE_INFINITY_VALUE = 0xFF800000; +const uint32_t Ps2Float::ONE = 0x3F800000; +const uint32_t Ps2Float::MIN_ONE = 0xBF800000; +const int32_t Ps2Float::IMPLICIT_LEADING_BIT_POS = 23; + +Ps2Float::Ps2Float(uint32_t value) + : Sign((value >> 31) & 1) + , Exponent((uint8_t)(((value >> 23) & 0xFF))) + , Mantissa(value & 0x7FFFFF) +{ +} + +Ps2Float::Ps2Float(bool sign, uint8_t exponent, uint32_t mantissa) + : Sign(sign) + , Exponent(exponent) + , Mantissa(mantissa) +{ +} + +Ps2Float Ps2Float::Max() +{ + return Ps2Float(MAX_FLOATING_POINT_VALUE); +} + +Ps2Float Ps2Float::Min() +{ + return Ps2Float(MIN_FLOATING_POINT_VALUE); +} + +Ps2Float Ps2Float::One() +{ + return Ps2Float(ONE); +} + +Ps2Float Ps2Float::MinOne() +{ + return Ps2Float(MIN_ONE); +} + +uint32_t Ps2Float::AsUInt32() const +{ + uint32_t result = 0; + result |= (Sign ? 1u : 0u) << 31; + result |= (uint32_t)(Exponent << 23); + result |= Mantissa; + return result; +} + +Ps2Float Ps2Float::Add(Ps2Float addend, bool COP1) +{ + if (IsDenormalized() || addend.IsDenormalized()) + return SolveAddSubDenormalizedOperation(*this, addend, true); + + if (IsAbnormal() && addend.IsAbnormal()) + return SolveAbnormalAdditionOrSubtractionOperation(*this, addend, true, COP1); + + uint32_t a = AsUInt32(); + uint32_t b = addend.AsUInt32(); + int32_t temp = 0; + + //exponent difference + int exp_diff = ((a >> 23) & 0xff) - ((b >> 23) & 0xff); + + //diff = 25 .. 255 , expt < expd + if (exp_diff >= 25) + { + b = b & Ps2Float::SIGNMASK; + } + + //diff = 1 .. 24, expt < expd + else if (exp_diff > 0) + { + exp_diff = exp_diff - 1; + temp = 0xffffffff << exp_diff; + b = temp & b; + } + + //diff = -255 .. -25, expd < expt + else if (exp_diff <= -25) + { + a = a & Ps2Float::SIGNMASK; + } + + //diff = -24 .. -1 , expd < expt + else if (exp_diff < 0) + { + exp_diff = -exp_diff; + exp_diff = exp_diff - 1; + temp = 0xffffffff << exp_diff; + a = a & temp; + } + + return Ps2Float(a).DoAdd(Ps2Float(b)); +} + +Ps2Float Ps2Float::Sub(Ps2Float subtrahend, bool COP1) +{ + if (IsDenormalized() || subtrahend.IsDenormalized()) + return SolveAddSubDenormalizedOperation(*this, subtrahend, false); + + if (IsAbnormal() && subtrahend.IsAbnormal()) + return SolveAbnormalAdditionOrSubtractionOperation(*this, subtrahend, false, COP1); + + uint32_t a = AsUInt32(); + uint32_t b = subtrahend.AsUInt32(); + int32_t temp = 0; + + //exponent difference + int exp_diff = ((a >> 23) & 0xff) - ((b >> 23) & 0xff); + + //diff = 25 .. 255 , expt < expd + if (exp_diff >= 25) + { + b = b & Ps2Float::SIGNMASK; + } + + //diff = 1 .. 24, expt < expd + else if (exp_diff > 0) + { + exp_diff = exp_diff - 1; + temp = 0xffffffff << exp_diff; + b = temp & b; + } + + //diff = -255 .. -25, expd < expt + else if (exp_diff <= -25) + { + a = a & Ps2Float::SIGNMASK; + } + + //diff = -24 .. -1 , expd < expt + else if (exp_diff < 0) + { + exp_diff = -exp_diff; + exp_diff = exp_diff - 1; + temp = 0xffffffff << exp_diff; + a = a & temp; + } + + + return Ps2Float(a).DoAdd(Neg(Ps2Float(b))); +} + +Ps2Float Ps2Float::Mul(Ps2Float mulend) +{ + if (IsDenormalized() || mulend.IsDenormalized()) + return SolveMultiplicationDenormalizedOperation(*this, mulend); + + if (IsAbnormal() && mulend.IsAbnormal()) + return SolveAbnormalMultiplicationOrDivisionOperation(*this, mulend, true); + + if (IsZero() || mulend.IsZero()) + { + Ps2Float result = Ps2Float(0); + + result.Sign = DetermineMultiplicationDivisionOperationSign(*this, mulend); + return result; + } + + return DoMul(mulend); +} + +Ps2Float Ps2Float::Div(Ps2Float divend) +{ + if (IsDenormalized() || divend.IsDenormalized()) + return SolveDivisionDenormalizedOperation(*this, divend); + + if (IsAbnormal() && divend.IsAbnormal()) + return SolveAbnormalMultiplicationOrDivisionOperation(*this, divend, false); + + if (IsZero()) + { + Ps2Float result = Ps2Float(0); + + result.Sign = DetermineMultiplicationDivisionOperationSign(*this, divend); + return result; + } + else if (divend.IsZero()) + return DetermineMultiplicationDivisionOperationSign(*this, divend) ? Min() : Max(); + + return DoDiv(divend); +} + +Ps2Float Ps2Float::Sqrt() +{ + int32_t t; + int32_t s = 0; + int32_t q = 0; + uint32_t r = 0x01000000; /* r = moving bit from right to left */ + + if (IsDenormalized()) + return Ps2Float(0); + + // PS2 only takes positive numbers for SQRT, and convert if necessary. + int32_t ix = (int32_t)(Ps2Float(false, Exponent, Mantissa).AsUInt32()); + + /* extract mantissa and unbias exponent */ + int32_t m = (ix >> 23) - BIAS; + + ix = (ix & 0x007fffff) | 0x00800000; + if ((m & 1) == 1) + { + /* odd m, double x to make it even */ + ix += ix; + } + + m >>= 1; /* m = [m/2] */ + + /* generate sqrt(x) bit by bit */ + ix += ix; + + while (r != 0) + { + t = s + (int32_t)(r); + if (t <= ix) + { + s = t + (int32_t)(r); + ix -= t; + q += (int32_t)(r); + } + + ix += ix; + r >>= 1; + } + + /* use floating add to find out rounding direction */ + if (ix != 0) + { + q += q & 1; + } + + ix = (q >> 1) + 0x3f000000; + ix += m << 23; + + return Ps2Float((uint32_t)(ix)); +} + +Ps2Float Ps2Float::Rsqrt(Ps2Float other) +{ + return Div(other.Sqrt()); +} + +bool Ps2Float::IsDenormalized() +{ + return Exponent == 0; +} + +bool Ps2Float::IsAbnormal() +{ + uint32_t val = AsUInt32(); + return val == MAX_FLOATING_POINT_VALUE || val == MIN_FLOATING_POINT_VALUE || + val == POSITIVE_INFINITY_VALUE || val == NEGATIVE_INFINITY_VALUE; +} + +bool Ps2Float::IsZero() +{ + return (Abs()) == 0; +} + +uint32_t Ps2Float::Abs() +{ + return (AsUInt32() & MAX_FLOATING_POINT_VALUE); +} + +Ps2Float Ps2Float::RoundTowardsZero() +{ + return Ps2Float((uint32_t)(std::trunc((double)(AsUInt32())))); +} + +int32_t Ps2Float::CompareTo(Ps2Float other) +{ + int32_t selfTwoComplementVal = (int32_t)(Abs()); + if (Sign) + selfTwoComplementVal = -selfTwoComplementVal; + + int32_t otherTwoComplementVal = (int32_t)(other.Abs()); + if (other.Sign) + otherTwoComplementVal = -otherTwoComplementVal; + + if (selfTwoComplementVal < otherTwoComplementVal) + return -1; + else if (selfTwoComplementVal == otherTwoComplementVal) + return 0; + else + return 1; +} + +double Ps2Float::ToDouble() +{ + return std::bit_cast(((u64)Sign << 63) | ((((u64)Exponent - BIAS) + 1023ULL) << 52) | ((u64)Mantissa << 29)); +} + +std::string Ps2Float::ToString() +{ + double res = ToDouble(); + + uint32_t value = AsUInt32(); + std::ostringstream oss; + oss << std::fixed << std::setprecision(6); + + if (IsDenormalized()) + { + oss << "Denormalized(" << res << ")"; + } + else if (value == MAX_FLOATING_POINT_VALUE) + { + oss << "Fmax(" << res << ")"; + } + else if (value == MIN_FLOATING_POINT_VALUE) + { + oss << "-Fmax(" << res << ")"; + } + else if (value == POSITIVE_INFINITY_VALUE) + { + oss << "Inf(" << res << ")"; + } + else if (value == NEGATIVE_INFINITY_VALUE) + { + oss << "-Inf(" << res << ")"; + } + else + { + oss << "Ps2Float(" << res << ")"; + } + + return oss.str(); +} + +Ps2Float Ps2Float::DoAdd(Ps2Float other) +{ + const uint8_t roundingMultiplier = 6; + + uint8_t selfExponent = Exponent; + int32_t resExponent = selfExponent - other.Exponent; + + if (resExponent < 0) + return other.DoAdd(*this); + else if (resExponent >= 25) + return *this; + + // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate + uint32_t sign1 = (uint32_t)((int32_t)AsUInt32() >> 31); + int32_t selfMantissa = (int32_t)(((Mantissa | 0x800000) ^ sign1) - sign1); + uint32_t sign2 = (uint32_t)((int32_t)other.AsUInt32() >> 31); + int32_t otherMantissa = (int32_t)(((other.Mantissa | 0x800000) ^ sign2) - sign2); + + // PS2 multiply by 2 before doing the Math here. + int32_t man = (selfMantissa << roundingMultiplier) + ((otherMantissa << roundingMultiplier) >> resExponent); + int32_t absMan = abs(man); + if (absMan == 0) + return Ps2Float(0); + + // Remove from exponent the PS2 Multiplier value. + int32_t rawExp = selfExponent - roundingMultiplier; + + int32_t amount = normalizeAmounts[clz(absMan)]; + rawExp -= amount; + absMan <<= amount; + + int32_t msbIndex = BitScanReverse8(absMan >> 23); + rawExp += msbIndex; + absMan >>= msbIndex; + + if (rawExp > 255) + return man < 0 ? Min() : Max(); + else if (rawExp <= 0) + return Ps2Float(man < 0, 0, 0); + + return Ps2Float((uint32_t)man & Ps2Float::SIGNMASK | (uint32_t)rawExp << 23 | ((uint32_t)absMan & 0x7FFFFF)).RoundTowardsZero(); +} + +Ps2Float Ps2Float::DoMul(Ps2Float other) +{ + uint32_t selfMantissa = Mantissa | 0x800000; + uint32_t otherMantissa = other.Mantissa | 0x800000; + int32_t resExponent = Exponent + other.Exponent - BIAS; + + Ps2Float result = Ps2Float(0); + + result.Sign = DetermineMultiplicationDivisionOperationSign(*this, other); + + if (resExponent > 255) + return result.Sign ? Min() : Max(); + else if (resExponent <= 0) + return Ps2Float(result.Sign, 0, 0); + + uint32_t testImprecision = otherMantissa ^ ((otherMantissa >> 4) & 0x800); // For some reason, 0x808000 loses a bit and 0x800800 loses a bit, but 0x808800 does not + int64_t res = 0; + uint64_t mask = 0xFFFFFFFFFFFFFFFF; + + result.Exponent = (uint8_t)(resExponent); + + otherMantissa <<= 1; + + uint32_t part[13]; // Partial products + uint32_t bit[13]; // More partial products. 0 or 1. + + for (int i = 0; i <= 12; i++, otherMantissa >>= 2) + { + uint32_t test = otherMantissa & 7; + if (test == 0 || test == 7) + { + part[i] = 0; + bit[i] = 0; + } + else if (test == 3) + { + part[i] = (selfMantissa << 1); + bit[i] = 0; + } + else if (test == 4) + { + part[i] = ~(selfMantissa << 1); + bit[i] = 1; + } + else if (test < 4) + { + part[i] = selfMantissa; + bit[i] = 0; + } + else + { + part[i] = ~selfMantissa; + bit[i] = 1; + } + } + + for (int i = 0; i <= 12; i++) + { + res += (uint64_t)(int32_t)part[i] << (i * 2); + res &= mask; + res += bit[i] << (i * 2); + } + + result.Mantissa = (uint32_t)(res >> 23); + + if ((testImprecision & 0x000aaa) && !(res & 0x7FFFFF)) + result.Mantissa -= 1; + + if (result.Mantissa > 0) + { + int32_t leadingBitPosition = Ps2Float::GetMostSignificantBitPosition(result.Mantissa); + + while (leadingBitPosition != IMPLICIT_LEADING_BIT_POS) + { + if (leadingBitPosition > IMPLICIT_LEADING_BIT_POS) + { + result.Mantissa >>= 1; + + int32_t exp = ((int32_t)result.Exponent + 1); + + if (exp > 255) + return result.Sign ? Min() : Max(); + + result.Exponent = (uint8_t)exp; + + leadingBitPosition--; + } + else if (leadingBitPosition < IMPLICIT_LEADING_BIT_POS) + { + result.Mantissa <<= 1; + + int32_t exp = ((int32_t)result.Exponent - 1); + + if (exp <= 0) + return Ps2Float(result.Sign, 0, 0); + + result.Exponent = (uint8_t)exp; + + leadingBitPosition++; + } + } + } + + result.Mantissa &= 0x7FFFFF; + return result.RoundTowardsZero(); +} + +Ps2Float Ps2Float::DoDiv(Ps2Float other) +{ + uint64_t selfMantissa64; + uint32_t selfMantissa = Mantissa | 0x800000; + uint32_t otherMantissa = other.Mantissa | 0x800000; + int resExponent = Exponent - other.Exponent + BIAS; + + Ps2Float result = Ps2Float(0); + + result.Sign = DetermineMultiplicationDivisionOperationSign(*this, other); + + if (resExponent > 255) + return result.Sign ? Min() : Max(); + else if (resExponent <= 0) + return Ps2Float(result.Sign, 0, 0); + + if (selfMantissa < otherMantissa) + { + --resExponent; + if (resExponent == 0) + return Ps2Float(result.Sign, 0, 0); + selfMantissa64 = (uint64_t)(selfMantissa) << 31; + } + else + { + selfMantissa64 = (uint64_t)(selfMantissa) << 30; + } + + uint32_t resMantissa = (uint32_t)(selfMantissa64 / otherMantissa); + if ((resMantissa & 0x3F) == 0) + resMantissa |= ((uint64_t)(otherMantissa)*resMantissa != selfMantissa64) ? 1U : 0; + + result.Exponent = (uint8_t)(resExponent); + result.Mantissa = (resMantissa + 0x39U /* Non-standard value, 40U in IEEE754 (PS2: rsqrt(0x40400000, 0x40400000) = 0x3FDDB3D7 -> IEEE754: rsqrt(0x40400000, 0x40400000) = 0x3FDDB3D8 */) >> 7; + + if (result.Mantissa > 0) + { + int32_t leadingBitPosition = Ps2Float::GetMostSignificantBitPosition(result.Mantissa); + + while (leadingBitPosition != IMPLICIT_LEADING_BIT_POS) + { + if (leadingBitPosition > IMPLICIT_LEADING_BIT_POS) + { + result.Mantissa >>= 1; + + int32_t exp = ((int32_t)result.Exponent + 1); + + if (exp > 255) + return result.Sign ? Min() : Max(); + + result.Exponent = (uint8_t)exp; + + leadingBitPosition--; + } + else if (leadingBitPosition < IMPLICIT_LEADING_BIT_POS) + { + result.Mantissa <<= 1; + + int32_t exp = ((int32_t)result.Exponent - 1); + + if (exp <= 0) + return Ps2Float(result.Sign, 0, 0); + + result.Exponent = (uint8_t)exp; + + leadingBitPosition++; + } + } + } + + result.Mantissa &= 0x7FFFFF; + return result.RoundTowardsZero(); +} + +Ps2Float Ps2Float::SolveAbnormalAdditionOrSubtractionOperation(Ps2Float a, Ps2Float b, bool add, bool COP1) +{ + uint32_t aval = a.AsUInt32(); + uint32_t bval = b.AsUInt32(); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return add ? Max() : Ps2Float(0); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return add ? Min() : Ps2Float(0); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return COP1 ? Min() : (add ? Ps2Float(0) : Min()); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return COP1 ? Max() : (add ? Ps2Float(0) : Max()); + + if (aval == POSITIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return add ? Max() : Ps2Float(0); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return COP1 ? Min() : (add ? Ps2Float(0) : Min()); + + if (aval == POSITIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return COP1 ? Max() : (add ? Ps2Float(0) : Max()); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return add ? Min() : Ps2Float(0); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return add ? Max() : Ps2Float(0x7F7FFFFE); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return add ? Ps2Float(0x7F7FFFFE) : Max(); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return add ? Ps2Float(0xFF7FFFFE) : Min(); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return add ? Min() : Ps2Float(0xFF7FFFFE); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return add ? Max() : Ps2Float(0xFF7FFFFE); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return add ? Ps2Float(0xFF7FFFFE) : Max(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return add ? Ps2Float(0x7F7FFFFE) : Min(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return add ? Min() : Ps2Float(0x7F7FFFFE); + + Console.Error("Unhandled abnormal add/sub floating point operation"); +} + +Ps2Float Ps2Float::SolveAbnormalMultiplicationOrDivisionOperation(Ps2Float a, Ps2Float b, bool mul) +{ + uint32_t aval = a.AsUInt32(); + uint32_t bval = b.AsUInt32(); + + if (mul) + { + if ((aval == MAX_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) || + (aval == MIN_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE)) + return Max(); + + if ((aval == MAX_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) || + (aval == MIN_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE)) + return Min(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Max(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Min(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Min(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Max(); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Max(); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Min(); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Min(); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Max(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return Max(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return Min(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return Min(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return Max(); + } + else + { + if ((aval == MAX_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE) || + (aval == MIN_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE)) + return One(); + + if ((aval == MAX_FLOATING_POINT_VALUE && bval == MIN_FLOATING_POINT_VALUE) || + (aval == MIN_FLOATING_POINT_VALUE && bval == MAX_FLOATING_POINT_VALUE)) + return MinOne(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return One(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == POSITIVE_INFINITY_VALUE) + return MinOne(); + + if (aval == POSITIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return MinOne(); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return One(); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Ps2Float(0x3FFFFFFF); + + if (aval == MAX_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Ps2Float(0xBFFFFFFF); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == POSITIVE_INFINITY_VALUE) + return Ps2Float(0xBFFFFFFF); + + if (aval == MIN_FLOATING_POINT_VALUE && bval == NEGATIVE_INFINITY_VALUE) + return Ps2Float(0x3FFFFFFF); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return Ps2Float(0x3F000001); + + if (aval == POSITIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return Ps2Float(0xBF000001); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MAX_FLOATING_POINT_VALUE) + return Ps2Float(0xBF000001); + + if (aval == NEGATIVE_INFINITY_VALUE && bval == MIN_FLOATING_POINT_VALUE) + return Ps2Float(0x3F000001); + } + + Console.Error("Unhandled abnormal mul/div floating point operation"); +} + +Ps2Float Ps2Float::SolveAddSubDenormalizedOperation(Ps2Float a, Ps2Float b, bool add) +{ + Ps2Float result = Ps2Float(0); + + if (a.IsDenormalized() && !b.IsDenormalized()) + result = b; + else if (!a.IsDenormalized() && b.IsDenormalized()) + result = a; + else if (a.IsDenormalized() && b.IsDenormalized()) + { + } + else + Console.Error("Both numbers are not denormalized"); + + result.Sign = add ? DetermineAdditionOperationSign(a, b) : DetermineSubtractionOperationSign(a, b); + return result; +} + +Ps2Float Ps2Float::SolveMultiplicationDenormalizedOperation(Ps2Float a, Ps2Float b) +{ + Ps2Float result = Ps2Float(0); + + result.Sign = DetermineMultiplicationDivisionOperationSign(a, b); + return result; +} + +Ps2Float Ps2Float::SolveDivisionDenormalizedOperation(Ps2Float a, Ps2Float b) +{ + bool sign = DetermineMultiplicationDivisionOperationSign(a, b); + Ps2Float result = Ps2Float(0); + + if (a.IsDenormalized() && !b.IsDenormalized()) + { + } + else if (!a.IsDenormalized() && b.IsDenormalized()) + return sign ? Min() : Max(); + else if (a.IsDenormalized() && b.IsDenormalized()) + return sign ? Min() : Max(); + else + Console.Error("Both numbers are not denormalized"); + + result.Sign = sign; + return result; +} + +Ps2Float Ps2Float::Neg(Ps2Float self) +{ + return Ps2Float(self.AsUInt32() ^ SIGNMASK); +} + +bool Ps2Float::DetermineMultiplicationDivisionOperationSign(Ps2Float a, Ps2Float b) +{ + return a.Sign ^ b.Sign; +} + +bool Ps2Float::DetermineAdditionOperationSign(Ps2Float a, Ps2Float b) +{ + if (a.IsZero() && b.IsZero()) + { + if (!a.Sign || !b.Sign) + return false; + else if (a.Sign && b.Sign) + return true; + else + Console.Error("Unhandled addition operation flags"); + } + else if (a.IsZero()) + return b.Sign; + + return a.Sign; +} + +bool Ps2Float::DetermineSubtractionOperationSign(Ps2Float a, Ps2Float b) +{ + if (a.IsZero() && b.IsZero()) + { + if (!a.Sign || b.Sign) + return false; + else if (a.Sign && !b.Sign) + return true; + else + Console.Error("Unhandled subtraction operation flags"); + } + else if (a.IsZero()) + return !b.Sign; + else if (b.IsZero()) + return a.Sign; + + return a.CompareTo(b) >= 0 ? a.Sign : !b.Sign; +} + +int32_t Ps2Float::clz(int x) +{ + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + + return debruijn32[(uint)x * 0x8c0b2891u >> 26]; +} + +int32_t Ps2Float::BitScanReverse8(int b) +{ + return msb[b]; +} + +int32_t Ps2Float::GetMostSignificantBitPosition(uint32_t value) +{ + for (int32_t i = 31; i >= 0; i--) + { + if (((value >> i) & 1) != 0) + return i; + } + return -1; +} + +const int8_t Ps2Float::msb[256] = + { + -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; + +const int32_t Ps2Float::debruijn32[64] = + { + 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, + 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, + -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12}; + +const int32_t Ps2Float::normalizeAmounts[] = + { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24}; diff --git a/pcsx2/Ps2Float.h b/pcsx2/Ps2Float.h new file mode 100644 index 0000000000..2453ee1473 --- /dev/null +++ b/pcsx2/Ps2Float.h @@ -0,0 +1,104 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#pragma once + +#include + +class Ps2Float +{ +public: + bool Sign; + uint8_t Exponent; + uint32_t Mantissa; + + static const uint8_t BIAS; + static const uint32_t SIGNMASK; + static const uint32_t MAX_FLOATING_POINT_VALUE; + static const uint32_t MIN_FLOATING_POINT_VALUE; + static const uint32_t POSITIVE_INFINITY_VALUE; + static const uint32_t NEGATIVE_INFINITY_VALUE; + static const uint32_t ONE; + static const uint32_t MIN_ONE; + static const int IMPLICIT_LEADING_BIT_POS; + + static const int8_t msb[256]; + static const int32_t debruijn32[64]; + static const int32_t normalizeAmounts[]; + + Ps2Float(uint32_t value); + + Ps2Float(bool sign, uint8_t exponent, uint32_t mantissa); + + static Ps2Float Max(); + + static Ps2Float Min(); + + static Ps2Float One(); + + static Ps2Float MinOne(); + + static Ps2Float Neg(Ps2Float self); + + uint32_t AsUInt32() const; + + Ps2Float Add(Ps2Float addend, bool COP1); + + Ps2Float Sub(Ps2Float subtrahend, bool COP1); + + Ps2Float Mul(Ps2Float mulend); + + Ps2Float Div(Ps2Float divend); + + Ps2Float Sqrt(); + + Ps2Float Rsqrt(Ps2Float other); + + bool IsDenormalized(); + + bool IsAbnormal(); + + bool IsZero(); + + uint32_t Abs(); + + Ps2Float RoundTowardsZero(); + + int32_t CompareTo(Ps2Float other); + + double ToDouble(); + + std::string ToString(); + +protected: + +private: + + Ps2Float DoAdd(Ps2Float other); + + Ps2Float DoMul(Ps2Float other); + + Ps2Float DoDiv(Ps2Float other); + + static Ps2Float SolveAbnormalAdditionOrSubtractionOperation(Ps2Float a, Ps2Float b, bool add, bool COP1); + + static Ps2Float SolveAbnormalMultiplicationOrDivisionOperation(Ps2Float a, Ps2Float b, bool mul); + + static Ps2Float SolveAddSubDenormalizedOperation(Ps2Float a, Ps2Float b, bool add); + + static Ps2Float SolveMultiplicationDenormalizedOperation(Ps2Float a, Ps2Float b); + + static Ps2Float SolveDivisionDenormalizedOperation(Ps2Float a, Ps2Float b); + + static bool DetermineMultiplicationDivisionOperationSign(Ps2Float a, Ps2Float b); + + static bool DetermineAdditionOperationSign(Ps2Float a, Ps2Float b); + + static bool DetermineSubtractionOperationSign(Ps2Float a, Ps2Float b); + + static int32_t GetMostSignificantBitPosition(uint32_t value); + + static int32_t BitScanReverse8(int32_t b); + + static int32_t clz(int32_t x); +}; diff --git a/pcsx2/VU.h b/pcsx2/VU.h index 1f8224bc39..ad55a0e12b 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -124,6 +124,7 @@ struct alignas(16) VURegs REG_VI q; REG_VI p; + VECTOR TMP; uint idx; // VU index (0 or 1) // flags/cycle are needed by VIF dma code, so they have to be here (for now) diff --git a/pcsx2/VUflags.cpp b/pcsx2/VUflags.cpp index 22632cf36b..85a38eb1b8 100644 --- a/pcsx2/VUflags.cpp +++ b/pcsx2/VUflags.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" - +#include "Ps2Float.h" #include #include @@ -12,21 +12,22 @@ /* NEW FLAGS */ //By asadr. Thnkx F|RES :p /*****************************************/ -static __ri u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f ) +static __ri u32 VU_MAC_UPDATE(int shift, VURegs* VU, uint32_t f) { - u32 v = *(u32*)&f; - int exp = (v >> 23) & 0xff; - u32 s = v & 0x80000000; + Ps2Float ps2f = Ps2Float(f); + + uint exp = ps2f.Exponent; + u32 s = ps2f.AsUInt32() & Ps2Float::SIGNMASK; if (s) VU->macflag |= 0x0010<macflag &= ~(0x0010<macflag = (VU->macflag & ~(0x1100<macflag = (VU->macflag&~(0x1000<macflag = (VU->macflag&~(0x0101<macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); + return f; + } + else + return f; + } + else if (CHECK_VU_OVERFLOW((VU == &VU1) ? 1 : 0)) + { + VU->macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); + return s | 0x7f7fffff; /* max IEEE754 allowed */ + } else - return v; + { + VU->macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); + return f; + } default: VU->macflag = (VU->macflag & ~(0x1101<(vuDouble(a) - vuDouble(b)); + else + return std::bit_cast(vuDouble(a) + vuDouble(b)); + +} + +static __fi uint32_t vuAccurateMulDiv(VURegs* VU, u32 a, u32 b, bool isdiv) +{ + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + if (isdiv) + return Ps2Float(a).Div(Ps2Float(b)).AsUInt32(); + else + return Ps2Float(a).Mul(Ps2Float(b)).AsUInt32(); + } + + if (isdiv) + return std::bit_cast(vuDouble(a) / vuDouble(b)); + else + return std::bit_cast(vuDouble(a) * vuDouble(b)); +} + static __fi float vuADD_TriAceHack(u32 a, u32 b) { // On VU0 TriAce Games use ADDi and expects these bit-perfect results: @@ -481,15 +515,17 @@ static __fi float vuADD_TriAceHack(u32 a, u32 b) // but VU interpreters don't seem to need it currently: // Update Sept 2021, now the interpreters don't suck, they do - Refraction - s32 aExp = (a >> 23) & 0xff; - s32 bExp = (b >> 23) & 0xff; - if (aExp - bExp >= 25) b &= 0x80000000; - if (aExp - bExp <=-25) a &= 0x80000000; - float ret = vuDouble(a) + vuDouble(b); + // s32 aExp = (a >> 23) & 0xff; + // s32 bExp = (b >> 23) & 0xff; + // if (aExp - bExp >= 25) b &= 0x80000000; + // if (aExp - bExp <=-25) a &= 0x80000000; + // float ret = vuDouble(a) + vuDouble(b); //DevCon.WriteLn("aExp = %d, bExp = %d", aExp, bExp); //DevCon.WriteLn("0x%08x + 0x%08x = 0x%08x", a, b, (u32&)ret); //DevCon.WriteLn("%f + %f = %f", vuDouble(a), vuDouble(b), ret); - return ret; + + // Update November 2024, now the interpreters has soft float support - GithubProUser67 + return vuDouble(Ps2Float(a).Add(Ps2Float(b), 0).AsUInt32()); } void _vuABS(VURegs* VU) @@ -497,10 +533,44 @@ void _vuABS(VURegs* VU) if (_Ft_ == 0) return; - if (_X){ VU->VF[_Ft_].f.x = fabs(vuDouble(VU->VF[_Fs_].i.x)); } - if (_Y){ VU->VF[_Ft_].f.y = fabs(vuDouble(VU->VF[_Fs_].i.y)); } - if (_Z){ VU->VF[_Ft_].f.z = fabs(vuDouble(VU->VF[_Fs_].i.z)); } - if (_W){ VU->VF[_Ft_].f.w = fabs(vuDouble(VU->VF[_Fs_].i.w)); } + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) + { + VU->VF[_Ft_].i.x = Ps2Float(VU->VF[_Fs_].i.x).Abs(); + } + if (_Y) + { + VU->VF[_Ft_].i.y = Ps2Float(VU->VF[_Fs_].i.y).Abs(); + } + if (_Z) + { + VU->VF[_Ft_].i.z = Ps2Float(VU->VF[_Fs_].i.z).Abs(); + } + if (_W) + { + VU->VF[_Ft_].i.w = Ps2Float(VU->VF[_Fs_].i.w).Abs(); + } + } + else + { + if (_X) + { + VU->VF[_Ft_].f.x = fabs(vuDouble(VU->VF[_Fs_].i.x)); + } + if (_Y) + { + VU->VF[_Ft_].f.y = fabs(vuDouble(VU->VF[_Fs_].i.y)); + } + if (_Z) + { + VU->VF[_Ft_].f.z = fabs(vuDouble(VU->VF[_Fs_].i.z)); + } + if (_W) + { + VU->VF[_Ft_].f.w = fabs(vuDouble(VU->VF[_Fs_].i.w)); + } + } } @@ -512,10 +582,10 @@ static __fi void _vuADD(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -528,20 +598,11 @@ static __fi void _vuADDi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (!CHECK_VUADDSUBHACK) { - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); - } - else { - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.x, VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.y, VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.z, VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.w, VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); - } + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuADDq(VURegs* VU) @@ -552,153 +613,133 @@ static __fi void _vuADDq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDx(VURegs* VU) { - float ftx; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ftx); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ftx); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftx); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftx); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDy(VURegs* VU) { - float fty; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + fty);} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + fty);} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + fty);} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + fty);} else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDz(VURegs* VU) { - float ftz; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ftz); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ftz); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftz); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftz); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDw(VURegs* VU) { - float ftw; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ftw); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ftw); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftw); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftw); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAi(VURegs* VU) { - float ti = vuDouble(VU->VI[REG_I].UL); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ti); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ti); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ti); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ti); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAq(VURegs* VU) { - float tf = vuDouble(VU->VI[REG_Q].UL); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tf); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tf); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tf); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tf); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAx(VURegs* VU) { - float tx = vuDouble(VU->VF[_Ft_].i.x); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tx); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tx); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tx); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tx); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAy(VURegs* VU) { - float ty = vuDouble(VU->VF[_Ft_].i.y); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ty); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ty); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ty); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ty); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAz(VURegs* VU) { - float tz = vuDouble(VU->VF[_Ft_].i.z); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tz); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tz); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tz); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tz); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAw(VURegs* VU) { - float tw = vuDouble(VU->VF[_Ft_].i.w); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tw); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tw); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tw); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tw); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -711,11 +752,11 @@ static __fi void _vuSUB(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 1)); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuSUBi(VURegs* VU) @@ -726,10 +767,10 @@ static __fi void _vuSUBi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 1));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 1));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 1));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -741,147 +782,131 @@ static __fi void _vuSUBq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 1));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 1));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 1));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 1));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuSUBx(VURegs* VU) { - float ftx; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ftx); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ftx); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ftx); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ftx); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBy(VURegs* VU) { - float fty; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - fty); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - fty); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - fty); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - fty); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBz(VURegs* VU) { - float ftz; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ftz); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ftz); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ftz); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ftz); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBw(VURegs* VU) { - float ftw; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ftw); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ftw); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ftw); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ftw); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAi(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAq(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAx(VURegs* VU) { - float tx = vuDouble(VU->VF[_Ft_].i.x); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - tx); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - tx); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - tx); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - tx); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAy(VURegs* VU) { - float ty = vuDouble(VU->VF[_Ft_].i.y); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ty); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ty); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ty); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ty); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAz(VURegs* VU) { - float tz = vuDouble(VU->VF[_Ft_].i.z); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - tz); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - tz); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - tz); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - tz); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAw(VURegs* VU) { - float tw = vuDouble(VU->VF[_Ft_].i.w); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - tw); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - tw); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - tw); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - tw); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 1)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 1)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 1)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 1)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -893,10 +918,10 @@ static __fi void _vuMUL(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -908,10 +933,10 @@ static __fi void _vuMULi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -923,498 +948,540 @@ static __fi void _vuMULq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULx(VURegs* VU) { - float ftx; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * ftx); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * ftx); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * ftx); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * ftx); } else VU_MACw_CLEAR(VU); + uint32_t ftx = VU->VF[_Ft_].i.x; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftx, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftx, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftx, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftx, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULy(VURegs* VU) { - float fty; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * fty); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * fty); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * fty); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * fty); } else VU_MACw_CLEAR(VU); + uint32_t fty = VU->VF[_Ft_].i.y; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, fty, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, fty, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, fty, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, fty, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULz(VURegs* VU) { - float ftz; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * ftz); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * ftz); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * ftz); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * ftz); } else VU_MACw_CLEAR(VU); + uint32_t ftz = VU->VF[_Ft_].i.z; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftz, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftz, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftz, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftz, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULw(VURegs* VU) { - float ftw; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * ftw); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * ftw); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * ftw); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * ftw); } else VU_MACw_CLEAR(VU); + uint32_t ftw = VU->VF[_Ft_].i.w; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftw, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftw, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftw, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftw, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAi(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAq(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAx(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAy(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAz(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAw(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 0)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 0)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 0)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADD(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDi(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDq(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDx(VURegs* VU) { - float ftx; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * ftx)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * ftx)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * ftx)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * ftx)); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + uint32_t ftx = VU->VF[_Ft_].i.x; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftx, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftx, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftx, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftx, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDy(VURegs* VU) { - float fty; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * fty)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * fty)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * fty)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * fty)); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + uint32_t fty = VU->VF[_Ft_].i.y; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, fty, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, fty, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, fty, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, fty, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDz(VURegs* VU) { - float ftz; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * ftz)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * ftz)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * ftz)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * ftz)); else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); + tmp = &VU->TMP; + uint32_t ftz = VU->VF[_Ft_].i.z; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftz, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftz, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftz, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftz, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuMADDw(VURegs* VU) { - float ftw; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * ftw)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * ftw)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * ftw)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * ftw)); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + uint32_t ftw = VU->VF[_Ft_].i.w; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftw, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftw, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftw, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftw, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDA(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDA(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDAi(VURegs* VU) { - float ti = vuDouble(VU->VI[REG_I].UL); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * ti)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * ti)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * ti)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * ti)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDAq(VURegs* VU) { - float tq = vuDouble(VU->VI[REG_Q].UL); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * tq)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * tq)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * tq)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * tq)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAx(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAx(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAy(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAy(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAz(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAz(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAw(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAw(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 0));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 0));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 0));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 0));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMSUB(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } + static __fi void _vuMSUBi(VURegs* VU) { - float ti = vuDouble(VU->VI[REG_I].UL); + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ti ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ti ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ti ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ti ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMSUBq(VURegs* VU) { - float tq = vuDouble(VU->VI[REG_Q].UL); + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tq ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tq ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tq ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tq ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - static __fi void _vuMSUBx(VURegs* VU) { - float ftx; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ftx ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ftx ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ftx ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ftx ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + uint32_t ftx = VU->VF[_Ft_].i.x; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftx, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftx, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftx, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftx, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - static __fi void _vuMSUBy(VURegs* VU) { - float fty; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * fty ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * fty ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * fty ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * fty ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + uint32_t fty = VU->VF[_Ft_].i.y; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, fty, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, fty, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, fty, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, fty, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - static __fi void _vuMSUBz(VURegs* VU) { - float ftz; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ftz ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ftz ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ftz ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ftz ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + uint32_t ftz = VU->VF[_Ft_].i.z; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftz, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftz, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftz, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftz, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMSUBw(VURegs* VU) { - float ftw; - VECTOR * dst; - if (_Fd_ == 0) dst = &RDzero; - else dst = &VU->VF[_Fd_]; + VECTOR* tmp; + VECTOR* dst; + if (_Fd_ == 0) + dst = &RDzero; + else + dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ftw ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ftw ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ftw ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ftw ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + uint32_t ftw = VU->VF[_Ft_].i.w; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ftw, 0); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ftw, 0); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ftw, 0); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ftw, 0); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - -static __fi void _vuMSUBA(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); -} - -static __fi void _vuMSUBAi(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL))); else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); -} - -static __fi void _vuMSUBAq(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); -} - -static __fi void _vuMSUBAx(VURegs* VU) +static __fi void _vuMSUBA(VURegs* VU) { - float tx = vuDouble(VU->VF[_Ft_].i.x); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tx)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tx)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tx)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tx)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAy(VURegs* VU) +static __fi void _vuMSUBAi(VURegs* VU) { - float ty = vuDouble(VU->VF[_Ft_].i.y); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ty)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ty)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ty)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ty)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAz(VURegs* VU) +static __fi void _vuMSUBAq(VURegs* VU) { - float tz = vuDouble(VU->VF[_Ft_].i.z); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tz)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tz)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tz)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tz)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAw(VURegs* VU) -{ - float tw = vuDouble(VU->VF[_Ft_].i.w); - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tw)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tw)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tw)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tw)); else VU_MACw_CLEAR(VU); +static __fi void _vuMSUBAx(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + uint32_t tx = VU->VF[_Ft_].i.x; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, tx, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, tx, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, tx, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, tx, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +static __fi void _vuMSUBAy(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + uint32_t ty = VU->VF[_Ft_].i.y; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, ty, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, ty, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, ty, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, ty, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +static __fi void _vuMSUBAz(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + uint32_t tz = VU->VF[_Ft_].i.z; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, tz, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, tz, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, tz, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, tz, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +static __fi void _vuMSUBAw(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + uint32_t tw = VU->VF[_Ft_].i.w; + if (_X) {tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, tw, 0); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, tw, 0); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, tw, 0); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.w, tw, 0); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.w, tmp->i.w, 1));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1575,32 +1642,28 @@ static __fi void _vuMINIw(VURegs* VU) static __fi void _vuOPMULA(VURegs* VU) { - VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z)); - VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x)); - VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y)); + VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0)); + VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0)); + VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0)); VU_STAT_UPDATE(VU); } static __fi void _vuOPMSUB(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; - float ftx, fty, ftz; - float fsx, fsy, fsz; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx = vuDouble(VU->VF[_Ft_].i.x); - fty = vuDouble(VU->VF[_Ft_].i.y); - ftz = vuDouble(VU->VF[_Ft_].i.z); - fsx = vuDouble(VU->VF[_Fs_].i.x); - fsy = vuDouble(VU->VF[_Fs_].i.y); - fsz = vuDouble(VU->VF[_Fs_].i.z); - - dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - fsy * ftz); - dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - fsz * ftx); - dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - fsx * fty); + tmp = &VU->TMP; + tmp->i.x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z, 0); + tmp->i.y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x, 0); + tmp->i.z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y, 0); + dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.x, tmp->i.x, 1)); + dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.y, tmp->i.y, 1)); + dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAddSub(VU, VU->ACC.i.z, tmp->i.z, 1)); VU_STAT_UPDATE(VU); } @@ -1617,13 +1680,40 @@ static __fi s32 float_to_int(float value) return value; } +static __fi s32 double_to_int(double value) +{ + if (value >= 2147483647.0) + return 2147483647LL; + if (value <= -2147483648.0) + return -2147483648LL; + return value; +} + static __fi void _vuFTOI0(VURegs* VU) { if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].SL[0] = float_to_int(vuDouble(VU->VF[_Fs_].i.x)); - if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(vuDouble(VU->VF[_Fs_].i.y)); - if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(vuDouble(VU->VF[_Fs_].i.z)); - if (_W) VU->VF[_Ft_].SL[3] = float_to_int(vuDouble(VU->VF[_Fs_].i.w)); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) + VU->VF[_Ft_].SL[0] = double_to_int(Ps2Float(VU->VF[_Fs_].i.x).ToDouble()); + if (_Y) + VU->VF[_Ft_].SL[1] = double_to_int(Ps2Float(VU->VF[_Fs_].i.y).ToDouble()); + if (_Z) + VU->VF[_Ft_].SL[2] = double_to_int(Ps2Float(VU->VF[_Fs_].i.z).ToDouble()); + if (_W) + VU->VF[_Ft_].SL[3] = double_to_int(Ps2Float(VU->VF[_Fs_].i.w).ToDouble()); + } + else + { + if (_X) + VU->VF[_Ft_].SL[0] = float_to_int(vuDouble(VU->VF[_Fs_].i.x)); + if (_Y) + VU->VF[_Ft_].SL[1] = float_to_int(vuDouble(VU->VF[_Fs_].i.y)); + if (_Z) + VU->VF[_Ft_].SL[2] = float_to_int(vuDouble(VU->VF[_Fs_].i.z)); + if (_W) + VU->VF[_Ft_].SL[3] = float_to_int(vuDouble(VU->VF[_Fs_].i.w)); + } } static __fi void _vuFTOI4(VURegs* VU) { @@ -1703,15 +1793,43 @@ static __fi void _vuITOF15(VURegs* VU) static __fi void _vuCLIP(VURegs* VU) { - float value = fabs(vuDouble(VU->VF[_Ft_].i.w)); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + double value = Ps2Float(Ps2Float(VU->VF[_Ft_].i.w).Abs()).ToDouble(); + + VU->clipflag <<= 6; + if (Ps2Float(VU->VF[_Fs_].i.x).ToDouble() > +value) + VU->clipflag |= 0x01; + if (Ps2Float(VU->VF[_Fs_].i.x).ToDouble() < -value) + VU->clipflag |= 0x02; + if (Ps2Float(VU->VF[_Fs_].i.y).ToDouble() > +value) + VU->clipflag |= 0x04; + if (Ps2Float(VU->VF[_Fs_].i.y).ToDouble() < -value) + VU->clipflag |= 0x08; + if (Ps2Float(VU->VF[_Fs_].i.z).ToDouble() > +value) + VU->clipflag |= 0x10; + if (Ps2Float(VU->VF[_Fs_].i.z).ToDouble() < -value) + VU->clipflag |= 0x20; + } + else + { + float value = fabs(vuDouble(VU->VF[_Ft_].i.w)); + + VU->clipflag <<= 6; + if (vuDouble(VU->VF[_Fs_].i.x) > +value) + VU->clipflag |= 0x01; + if (vuDouble(VU->VF[_Fs_].i.x) < -value) + VU->clipflag |= 0x02; + if (vuDouble(VU->VF[_Fs_].i.y) > +value) + VU->clipflag |= 0x04; + if (vuDouble(VU->VF[_Fs_].i.y) < -value) + VU->clipflag |= 0x08; + if (vuDouble(VU->VF[_Fs_].i.z) > +value) + VU->clipflag |= 0x10; + if (vuDouble(VU->VF[_Fs_].i.z) < -value) + VU->clipflag |= 0x20; + } - VU->clipflag <<= 6; - if ( vuDouble(VU->VF[_Fs_].i.x) > +value ) VU->clipflag|= 0x01; - if ( vuDouble(VU->VF[_Fs_].i.x) < -value ) VU->clipflag|= 0x02; - if ( vuDouble(VU->VF[_Fs_].i.y) > +value ) VU->clipflag|= 0x04; - if ( vuDouble(VU->VF[_Fs_].i.y) < -value ) VU->clipflag|= 0x08; - if ( vuDouble(VU->VF[_Fs_].i.z) > +value ) VU->clipflag|= 0x10; - if ( vuDouble(VU->VF[_Fs_].i.z) < -value ) VU->clipflag|= 0x20; VU->clipflag = VU->clipflag & 0xFFFFFF; } @@ -1721,28 +1839,56 @@ static __fi void _vuCLIP(VURegs* VU) static __fi void _vuDIV(VURegs* VU) { - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); - float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - VU->statusflag &= ~0x30; - - if (ft == 0.0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) { - if (fs == 0.0) - VU->statusflag |= 0x10; - else - VU->statusflag |= 0x20; + Ps2Float ft = Ps2Float(VU->VF[_Ft_].UL[_Ftf_]); + Ps2Float fs = Ps2Float(VU->VF[_Fs_].UL[_Fsf_]); - if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ - (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = 0xFF7FFFFF; + VU->statusflag &= ~0x30; + + if (ft.IsZero()) + { + if (fs.IsZero()) + VU->statusflag |= 0x10; + else + VU->statusflag |= 0x20; + + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = Ps2Float::MIN_FLOATING_POINT_VALUE; + else + VU->q.UL = Ps2Float::MAX_FLOATING_POINT_VALUE; + } else - VU->q.UL = 0x7F7FFFFF; + { + VU->q.UL = fs.Div(ft).AsUInt32(); + } } else { - VU->q.F = fs / ft; - VU->q.F = vuDouble(VU->q.UL); + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + + VU->statusflag &= ~0x30; + + if (ft == 0.0) + { + if (fs == 0.0) + VU->statusflag |= 0x10; + else + VU->statusflag |= 0x20; + + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0xFF7FFFFF; + else + VU->q.UL = 0x7F7FFFFF; + } + else + { + VU->q.F = fs / ft; + VU->q.F = vuDouble(VU->q.UL); + } } } @@ -2442,56 +2588,118 @@ static __ri void _vuWAITP(VURegs* VU) static __ri void _vuESADD(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + uint32_t x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); + uint32_t y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); + uint32_t z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); - VU->p.F = p; + VU->p.UL = vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0); } static __ri void _vuERSADD(VURegs* VU) { - float p = (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x)) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y)) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z)); + uint32_t x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); + uint32_t y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); + uint32_t z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); - if (p != 0.0) - p = 1.0f / p; + Ps2Float p = Ps2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); - VU->p.F = p; + if (!p.IsZero()) + { + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + p = Ps2Float::One().Div(p); + else + { + VU->p.F = 1.0f / vuDouble(p.AsUInt32()); + return; + } + } + + VU->p.UL = p.AsUInt32(); } static __ri void _vuELENG(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + uint32_t x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); + uint32_t y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); + uint32_t z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); - if (p >= 0) + Ps2Float value = Ps2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); + + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); + if (value.ToDouble() >= 0) + { + value = value.Sqrt(); + } + VU->p.UL = value.AsUInt32(); + } + else + { + float p = vuDouble(value.AsUInt32()); + + if (p >= 0) + { + p = sqrt(p); + } + VU->p.F = p; } - VU->p.F = p; } static __ri void _vuERLENG(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + uint32_t x = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x, 0); + uint32_t y = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y, 0); + uint32_t z = vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z, 0); - if (p >= 0) + Ps2Float value = Ps2Float(vuAccurateAddSub(VU, vuAccurateAddSub(VU, x, y, 0), z, 0)); + + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); - if (p != 0) + if (value.ToDouble() >= 0) { - p = 1.0f / p; + value = value.Sqrt(); + if (!value.IsZero()) + { + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + value = Ps2Float::One().Div(value); + } + else + { + VU->p.F = 1.0 / vuDouble(value.AsUInt32()); + return; + } + } } + VU->p.UL = value.AsUInt32(); + } + else + { + float p = vuDouble(value.AsUInt32()); + + if (p >= 0) + { + p = sqrt(p); + if (p != 0) + { + p = 1.0f / p; + } + } + VU->p.F = p; } - VU->p.F = p; } -static __ri float _vuCalculateEATAN(float inputvalue) { +static __ri float _vuCalculateEATAN(uint32_t inputvalue) { + + float fvalue = vuDouble(inputvalue); + float eatanconst[9] = { 0.999999344348907f, -0.333298563957214f, 0.199465364217758f, -0.13085337519646f, 0.096420042216778f, -0.055909886956215f, 0.021861229091883f, -0.004054057877511f, 0.785398185253143f }; - float result = (eatanconst[0] * inputvalue) + (eatanconst[1] * pow(inputvalue, 3)) + (eatanconst[2] * pow(inputvalue, 5)) - + (eatanconst[3] * pow(inputvalue, 7)) + (eatanconst[4] * pow(inputvalue, 9)) + (eatanconst[5] * pow(inputvalue, 11)) - + (eatanconst[6] * pow(inputvalue, 13)) + (eatanconst[7] * pow(inputvalue, 15)); + float result = (eatanconst[0] * fvalue) + (eatanconst[1] * pow(fvalue, 3)) + (eatanconst[2] * pow(fvalue, 5)) + (eatanconst[3] * pow(fvalue, 7)) + + (eatanconst[4] * pow(fvalue, 9)) + (eatanconst[5] * pow(fvalue, 11)) + (eatanconst[6] * pow(fvalue, 13)) + (eatanconst[7] * pow(fvalue, 15)); result += eatanconst[8]; @@ -2502,16 +2710,16 @@ static __ri float _vuCalculateEATAN(float inputvalue) { static __ri void _vuEATAN(VURegs* VU) { - float p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].UL[_Fsf_])); + float p = _vuCalculateEATAN(VU->VF[_Fs_].UL[_Fsf_]); VU->p.F = p; } static __ri void _vuEATANxy(VURegs* VU) { float p = 0; - if (vuDouble(VU->VF[_Fs_].i.x) != 0) + if (!Ps2Float(VU->VF[_Fs_].i.x).IsZero()) { - p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.y) / vuDouble(VU->VF[_Fs_].i.x)); + p = _vuCalculateEATAN(vuAccurateMulDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.x, 1)); } VU->p.F = p; } @@ -2519,57 +2727,104 @@ static __ri void _vuEATANxy(VURegs* VU) static __ri void _vuEATANxz(VURegs* VU) { float p = 0; - if (vuDouble(VU->VF[_Fs_].i.x) != 0) + if (!Ps2Float(VU->VF[_Fs_].i.x).IsZero()) { - p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.z) / vuDouble(VU->VF[_Fs_].i.x)); + p = _vuCalculateEATAN(vuAccurateMulDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.x, 1)); } VU->p.F = p; } static __ri void _vuESUM(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Fs_].i.w); - VU->p.F = p; + VU->p.UL = vuAccurateAddSub(VU, vuAccurateAddSub(VU, vuAccurateAddSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.y, 0), VU->VF[_Fs_].i.z, 0), VU->VF[_Fs_].i.w, 0); } static __ri void _vuERCPR(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + Ps2Float p = Ps2Float(VU->VF[_Fs_].UL[_Fsf_]); - if (p != 0) + if (!p.IsZero()) { - p = 1.0 / p; + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + p = Ps2Float::One().Div(p); + } + else + { + VU->p.F = 1.0 / vuDouble(p.AsUInt32()); + return; + } } - VU->p.F = p; + VU->p.UL = p.AsUInt32(); } static __ri void _vuESQRT(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - if (p >= 0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); - } + Ps2Float value = Ps2Float(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + if (value.ToDouble() >= 0) + { + value = value.Sqrt(); + } + + VU->p.UL = value.AsUInt32(); + } + else + { + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + + if (p >= 0) + { + p = sqrt(p); + } + + VU->p.F = p; + } } static __ri void _vuERSQRT(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - if (p >= 0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); - if (p) - { - p = 1.0f / p; - } - } + Ps2Float value = Ps2Float(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + if (value.ToDouble() >= 0) + { + value = value.Sqrt(); + if (!value.IsZero()) + { + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + VU->p.F = 1.0f / vuDouble(value.AsUInt32()); + return; + } + else + { + value = Ps2Float::One().Div(value); + } + } + } + + VU->p.UL = value.AsUInt32(); + } + else + { + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + + if (p >= 0) + { + p = sqrt(p); + if (p) + { + p = 1.0f / p; + } + } + + VU->p.F = p; + } } static __ri void _vuESIN(VURegs* VU) diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index c7f68793cf..53b9a41c37 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -281,6 +281,7 @@ + @@ -726,6 +727,7 @@ + @@ -1025,4 +1027,4 @@ - + \ No newline at end of file diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index 51782a5fcf..c7f2c7dd50 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -289,6 +289,9 @@ {cd8ec519-2196-43f7-86de-7faced2d4296} + + {9a40984b-cb23-4a54-a5e9-9c54f3c16c5b} + @@ -1443,6 +1446,9 @@ System\Ps2\Iop\SIO\PAD + + System\Ps2\EmotionEngine\Shared + @@ -2399,6 +2405,9 @@ System\Ps2\Iop\SIO\PAD + + System\Ps2\EmotionEngine\Shared + @@ -2428,4 +2437,4 @@ System\Ps2\GS - + \ No newline at end of file