diff --git a/common/BitUtils.h b/common/BitUtils.h index 64b6ff3047..3f42751417 100644 --- a/common/BitUtils.h +++ b/common/BitUtils.h @@ -28,6 +28,19 @@ static inline int _BitScanReverse(unsigned long* const Index, const unsigned lon namespace Common { + static constexpr s8 msb[256] = { + -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; + + static constexpr s32 normalizeAmounts[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24}; + template static constexpr __fi bool IsAligned(T value, unsigned int alignment) { @@ -84,6 +97,11 @@ namespace Common // Perform our count leading zero. return std::countl_zero(static_cast(n)); } + + __fi static s32 BitScanReverse8(s32 b) + { + return msb[b]; + } } // namespace Common template diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp b/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp index acd4903fe4..fcae67fac7 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp @@ -48,6 +48,16 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(SettingsWindow* dialog, QWidget* connect(m_ui.vu0ClampMode, QOverload::of(&QComboBox::currentIndexChanged), [this](int index) { setClampingMode(0, index); }); connect(m_ui.vu1ClampMode, QOverload::of(&QComboBox::currentIndexChanged), [this](int index) { setClampingMode(1, index); }); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftAddSub, "EmuCore/CPU/Recompiler", "fpuSoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftMulDiv, "EmuCore/CPU/Recompiler", "fpuSoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftSqrt, "EmuCore/CPU/Recompiler", "fpuSoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftAddSub, "EmuCore/CPU/Recompiler", "vu0SoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftMulDiv, "EmuCore/CPU/Recompiler", "vu0SoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftSqrt, "EmuCore/CPU/Recompiler", "vu0SoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftAddSub, "EmuCore/CPU/Recompiler", "vu1SoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftMulDiv, "EmuCore/CPU/Recompiler", "vu1SoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftSqrt, "EmuCore/CPU/Recompiler", "vu1SoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.iopRecompiler, "EmuCore/CPU/Recompiler", "EnableIOP", true); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.gameFixes, "EmuCore", "EnableGameFixes", true); diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui index 197fda8c7c..aa68c2c5c3 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui @@ -34,7 +34,7 @@ 0 -447 790 - 1049 + 1283 @@ -94,10 +94,10 @@ - - + + - Division Rounding Mode: + Clamping Mode: @@ -125,38 +125,7 @@ - - - - Clamping Mode: - - - - - - - - None - - - - - Normal (Default) - - - - - Extra + Preserve Sign - - - - - Full - - - - - + @@ -208,6 +177,67 @@ + + + + + + None + + + + + Normal (Default) + + + + + Extra + Preserve Sign + + + + + Full + + + + + + + + Division Rounding Mode: + + + + + + + Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Square Root + + + + + @@ -218,7 +248,7 @@ Vector Units (VU) - + VU1 Rounding Mode: @@ -249,7 +279,129 @@ - + + + + VU1 Clamping Mode: + + + + + + + VU0 Rounding Mode: + + + + + + + VU1 Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Float Square Root + + + + + + + + + + VU0 Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Square Root + + + + + + + + + + + Nearest + + + + + Negative + + + + + Positive + + + + + Chop/Zero (Default) + + + + + + + + + None + + + + + Normal (Default) + + + + + Extra + + + + + Extra + Preserve Sign + + + + + @@ -281,30 +433,6 @@ - - - - - None - - - - - Normal (Default) - - - - - Extra - - - - - Extra + Preserve Sign - - - - @@ -312,45 +440,7 @@ - - - - VU0 Rounding Mode: - - - - - - - VU1 Clamping Mode: - - - - - - - - Nearest - - - - - Negative - - - - - Positive - - - - - Chop/Zero (Default) - - - - - + diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index d99c68fb27..17eaa90d84 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -93,6 +93,8 @@ set(pcsx2Sources MTGS.cpp MTVU.cpp Patch.cpp + PS2Float.cpp + FpgaDiv.cpp Pcsx2Config.cpp PerformanceMetrics.cpp PrecompiledHeader.cpp @@ -173,6 +175,8 @@ set(pcsx2Headers MTVU.h Memory.h MemoryTypes.h + PS2Float.h + FpgaDiv.h Patch.h PerformanceMetrics.h PrecompiledHeader.h diff --git a/pcsx2/Config.h b/pcsx2/Config.h index 403900ff2c..c55125ab97 100644 --- a/pcsx2/Config.h +++ b/pcsx2/Config.h @@ -598,17 +598,32 @@ struct Pcsx2Config vu0ExtraOverflow : 1, vu0SignOverflow : 1, vu0Underflow : 1; + + bool + vu0SoftAddSub : 1, + vu0SoftMulDiv : 1, + vu0SoftSqrt : 1; bool vu1Overflow : 1, vu1ExtraOverflow : 1, vu1SignOverflow : 1, vu1Underflow : 1; + + bool + vu1SoftAddSub : 1, + vu1SoftMulDiv : 1, + vu1SoftSqrt : 1; bool fpuOverflow : 1, fpuExtraOverflow : 1, fpuFullMode : 1; + + bool + fpuSoftAddSub : 1, + fpuSoftMulDiv : 1, + fpuSoftSqrt : 1; bool EnableEECache : 1; @@ -1428,11 +1443,19 @@ namespace EmuFolders #define CHECK_VU_SIGN_OVERFLOW(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SignOverflow : EmuConfig.Cpu.Recompiler.vu1SignOverflow) #define CHECK_VU_UNDERFLOW(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0Underflow : EmuConfig.Cpu.Recompiler.vu1Underflow) +#define CHECK_VU_SOFT_ADDSUB(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftAddSub : EmuConfig.Cpu.Recompiler.vu1SoftAddSub) +#define CHECK_VU_SOFT_MULDIV(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftMulDiv : EmuConfig.Cpu.Recompiler.vu1SoftMulDiv) +#define CHECK_VU_SOFT_SQRT(vunum) (((vunum) == 0) ? EmuConfig.Cpu.Recompiler.vu0SoftSqrt : EmuConfig.Cpu.Recompiler.vu1SoftSqrt) + #define CHECK_FPU_OVERFLOW (EmuConfig.Cpu.Recompiler.fpuOverflow) #define CHECK_FPU_EXTRA_OVERFLOW (EmuConfig.Cpu.Recompiler.fpuExtraOverflow) // If enabled, Operands are checked for infinities before being used in the FPU recs #define CHECK_FPU_EXTRA_FLAGS 1 // Always enabled now // Sets D/I flags on FPU instructions #define CHECK_FPU_FULL (EmuConfig.Cpu.Recompiler.fpuFullMode) +#define CHECK_FPU_SOFT_ADDSUB (EmuConfig.Cpu.Recompiler.fpuSoftAddSub) +#define CHECK_FPU_SOFT_MULDIV (EmuConfig.Cpu.Recompiler.fpuSoftMulDiv) +#define CHECK_FPU_SOFT_SQRT (EmuConfig.Cpu.Recompiler.fpuSoftSqrt) + //------------ EE Recompiler defines - Comment to disable a recompiler --------------- #define SHIFT_RECOMPILE // Speed majorly reduced if disabled diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index 3ac1ae3fd1..23578d1b33 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" - +#include "PS2Float.h" #include // Helper Macros @@ -63,28 +63,57 @@ // If we have an infinity value, then Overflow has occured. bool checkOverflow(u32& xReg, u32 cFlagsToSet) { - if ((xReg & ~0x80000000) == PosInfinity) { - /*Console.Warning( "FPU OVERFLOW!: Changing to +/-Fmax!!!!!!!!!!!!\n" );*/ - xReg = (xReg & 0x80000000) | posFmax; - _ContVal_ |= (cFlagsToSet); - return true; + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + if (xReg == PS2Float::MAX_FLOATING_POINT_VALUE || xReg == PS2Float::MIN_FLOATING_POINT_VALUE) + { + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagO) + _ContVal_ &= ~FPUflagO; + } + else + { + if ((xReg & ~0x80000000) == PosInfinity) + { + /*Console.Warning( "FPU OVERFLOW!: Changing to +/-Fmax!!!!!!!!!!!!\n" );*/ + xReg = (xReg & 0x80000000) | posFmax; + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagO) + _ContVal_ &= ~FPUflagO; } - else if (cFlagsToSet & FPUflagO) - _ContVal_ &= ~FPUflagO; return false; } // If we have a denormal value, then Underflow has occured. bool checkUnderflow(u32& xReg, u32 cFlagsToSet) { - if ( ( (xReg & 0x7F800000) == 0 ) && ( (xReg & 0x007FFFFF) != 0 ) ) { - /*Console.Warning( "FPU UNDERFLOW!: Changing to +/-0!!!!!!!!!!!!\n" );*/ - xReg &= 0x80000000; - _ContVal_ |= (cFlagsToSet); - return true; + + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + if (PS2Float(xReg).IsDenormalized()) + { + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagU) + _ContVal_ &= ~FPUflagU; + } + else + { + if (((xReg & 0x7F800000) == 0) && ((xReg & 0x007FFFFF) != 0)) + { + /*Console.Warning( "FPU UNDERFLOW!: Changing to +/-0!!!!!!!!!!!!\n" );*/ + xReg &= 0x80000000; + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagU) + _ContVal_ &= ~FPUflagU; } - else if (cFlagsToSet & FPUflagU) - _ContVal_ &= ~FPUflagU; return false; } @@ -106,9 +135,41 @@ __fi u32 fp_min(u32 a, u32 b) */ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsToSet1, u32 cFlagsToSet2) { - if ( (yDivisorReg & 0x7F800000) == 0 ) { - _ContVal_ |= ( (zDividendReg & 0x7F800000) == 0 ) ? cFlagsToSet2 : cFlagsToSet1; - xReg = ( (yDivisorReg ^ zDividendReg) & 0x80000000 ) | posFmax; + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + PS2Float yMatrix = PS2Float(yDivisorReg); + PS2Float zMatrix = PS2Float(zDividendReg); + + // Check Final Fantasy X controls and Klonoa 2 to test this code, they send a bunch of denormals which are often hack-fixed on the game code. + if (zMatrix.IsDenormalized() || yMatrix.IsDenormalized()) + { + _ContVal_ |= 0; + xReg = PS2Float::SolveDivisionDenormalizedOperation(zMatrix, yMatrix).raw; + return true; + } + + if (zMatrix.IsZero()) + { + bool dividendZero = yMatrix.IsZero(); + + _ContVal_ |= dividendZero ? cFlagsToSet2 : cFlagsToSet1; + + bool IsSigned = zMatrix.Sign() ^ yMatrix.Sign(); + + if (dividendZero) + xReg = IsSigned ? PS2Float::MIN_FLOATING_POINT_VALUE : PS2Float::MAX_FLOATING_POINT_VALUE; + else + { + xReg = PS2Float(IsSigned, 0, 0).raw; + } + + return true; + } + } + else if ((yDivisorReg & 0x7F800000) == 0) + { + _ContVal_ |= ((zDividendReg & 0x7F800000) == 0) ? cFlagsToSet2 : cFlagsToSet1; + xReg = ((yDivisorReg ^ zDividendReg) & 0x80000000) | posFmax; return true; } @@ -125,25 +186,6 @@ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsT _ContVal_ &= ~( cFlags ) ; \ } -#ifdef comparePrecision -// This compare discards the least-significant bit(s) in order to solve some rounding issues. - #define C_cond_S(cond) { \ - FPRreg tempA, tempB; \ - tempA.UL = _FsValUl_ & comparePrecision; \ - tempB.UL = _FtValUl_ & comparePrecision; \ - _ContVal_ = ( ( tempA.f ) cond ( tempB.f ) ) ? \ - ( _ContVal_ | FPUflagC ) : \ - ( _ContVal_ & ~FPUflagC ); \ - } -#else -// Used for Comparing; This compares if the floats are exactly the same. - #define C_cond_S(cond) { \ - _ContVal_ = ( fpuDouble(_FsValUl_) cond fpuDouble(_FtValUl_) ) ? \ - ( _ContVal_ | FPUflagC ) : \ - ( _ContVal_ & ~FPUflagC ); \ - } -#endif - // Conditional Branch #define BC1(cond) \ if ( ( _ContVal_ & FPUflagC ) cond 0 ) { \ @@ -182,19 +224,85 @@ float fpuDouble(u32 f) } } +static __fi u32 fpuAccurateAdd(u32 a, u32 b) +{ + if (CHECK_FPU_SOFT_ADDSUB) return PS2Float(a).Add(PS2Float(b)).raw; + + return std::bit_cast(fpuDouble(a) + fpuDouble(b)); +} + +static __fi u32 fpuAccurateSub(u32 a, u32 b) +{ + if (CHECK_FPU_SOFT_ADDSUB) return PS2Float(a).Sub(PS2Float(b)).raw; + + return std::bit_cast(fpuDouble(a) - fpuDouble(b)); +} + +static __fi u32 fpuAccurateMul(u32 a, u32 b) +{ + if (CHECK_FPU_SOFT_MULDIV) return PS2Float(a).Mul(PS2Float(b)).raw; + + return std::bit_cast(fpuDouble(a) * fpuDouble(b)); +} + +static __fi u32 fpuAccurateDiv(u32 a, u32 b) +{ + if (CHECK_FPU_SOFT_MULDIV) return PS2Float(a).Div(PS2Float(b)).raw; + + return std::bit_cast(fpuDouble(a) / fpuDouble(b)); +} + +static __fi void C_cond_S(uint8_t mode) +{ + switch (mode) + { + case 0: // == + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + _ContVal_ = (PS2Float(_FsValUl_).CompareToSign(PS2Float(_FtValUl_)) == 0) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + else + { + _ContVal_ = (fpuDouble(_FsValUl_) == fpuDouble(_FtValUl_)) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + break; + case 1: // <= + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + int32_t cmpResult = PS2Float(_FsValUl_).CompareToSign(PS2Float(_FtValUl_)); + _ContVal_ = (cmpResult == 0 || cmpResult == -1) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + else + { + _ContVal_ = (fpuDouble(_FsValUl_) <= fpuDouble(_FtValUl_)) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + break; + case 2: // < + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + _ContVal_ = (PS2Float(_FsValUl_).CompareToSign(PS2Float(_FtValUl_)) == -1) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + else + { + _ContVal_ = (fpuDouble(_FsValUl_) < fpuDouble(_FtValUl_)) ? (_ContVal_ | FPUflagC) : (_ContVal_ & ~FPUflagC); + } + break; + } +} + void ABS_S() { _FdValUl_ = _FsValUl_ & 0x7fffffff; clearFPUFlags( FPUflagO | FPUflagU ); } void ADD_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateAdd(_FsValUl_, _FtValUl_); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void ADDA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateAdd(_FsValUl_, _FtValUl_); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -216,7 +324,7 @@ void BC1TL() { } void C_EQ() { - C_cond_S(==); + C_cond_S(0); } void C_F() { @@ -224,11 +332,11 @@ void C_F() { } void C_LE() { - C_cond_S(<=); + C_cond_S(1); } void C_LT() { - C_cond_S(<); + C_cond_S(2); } void CFC1() { @@ -248,19 +356,42 @@ void CTC1() { } void CVT_S() { - _FdValf_ = (float)_FsValSl_; - _FdValf_ = fpuDouble( _FdValUl_ ); + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + _FdValUl_ = PS2Float::Itof(0, _FsValSl_).raw; + } + else + { + _FdValf_ = (float)_FsValSl_; + _FdValf_ = fpuDouble(_FdValUl_); + } } void CVT_W() { - if ( ( _FsValUl_ & 0x7F800000 ) <= 0x4E800000 ) { _FdValSl_ = (s32)_FsValf_; } - else if ( ( _FsValUl_ & 0x80000000 ) == 0 ) { _FdValUl_ = 0x7fffffff; } - else { _FdValUl_ = 0x80000000; } + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) + { + _FdValSl_ = PS2Float::Ftoi(0, _FsValUl_); + } + else + { + if ((_FsValUl_ & 0x7F800000) <= 0x4E800000) + { + _FdValSl_ = (s32)_FsValf_; + } + else if ((_FsValUl_ & 0x80000000) == 0) + { + _FdValUl_ = 0x7fffffff; + } + else + { + _FdValUl_ = 0x80000000; + } + } } void DIV_S() { if (checkDivideByZero( _FdValUl_, _FtValUl_, _FsValUl_, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; - _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateDiv(_FsValUl_, _FtValUl_); if (checkOverflow( _FdValUl_, 0)) return; checkUnderflow( _FdValUl_, 0); } @@ -270,15 +401,13 @@ void DIV_S() { method provides a similar outcome and is faster. (cottonvibes) */ void MADD_S() { - FPRreg temp; - temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - _FdValf_ = fpuDouble( _FAValUl_ ) + fpuDouble( temp.UL ); + _FdValUl_ = fpuAccurateAdd(_FAValUl_, fpuAccurateMul(_FsValUl_, _FtValUl_)); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MADDA_S() { - _FAValf_ += fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateAdd(_FAValUl_, fpuAccurateMul(_FsValUl_, _FtValUl_)); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -303,15 +432,13 @@ void MOV_S() { } void MSUB_S() { - FPRreg temp; - temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - _FdValf_ = fpuDouble( _FAValUl_ ) - fpuDouble( temp.UL ); + _FdValUl_ = fpuAccurateSub(_FAValUl_, fpuAccurateMul(_FsValUl_, _FtValUl_)); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MSUBA_S() { - _FAValf_ -= fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateSub(_FAValUl_, fpuAccurateMul(_FsValUl_, _FtValUl_)); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -321,13 +448,13 @@ void MTC1() { } void MUL_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateMul(_FsValUl_, _FtValUl_); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void MULA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateMul(_FsValUl_, _FtValUl_); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } @@ -341,17 +468,45 @@ void RSQRT_S() { FPRreg temp; clearFPUFlags(FPUflagD | FPUflagI); - if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) { // Ft is zero (Denormals are Zero) - _ContVal_ |= FPUflagD | FPUflagSD; - _FdValUl_ = ( _FtValUl_ & 0x80000000 ) | posFmax; - return; + if (CHECK_FPU_SOFT_SQRT) + { + PS2Float value = PS2Float(_FtValUl_); + + if (value.IsDenormalized()) + { + _ContVal_ |= FPUflagD | FPUflagSD; + _FdValUl_ = value.Sign() ? PS2Float::MIN_FLOATING_POINT_VALUE : PS2Float::MAX_FLOATING_POINT_VALUE; + return; + } + else if (_FtValUl_ & 0x80000000) // Ft is negative + { + _ContVal_ |= FPUflagI | FPUflagSI; + _FdValUl_ = PS2Float(_FsValUl_).Rsqrt(PS2Float(value.Abs())).raw; + } + else // Ft is positive and not zero + { + _FdValUl_ = PS2Float(_FsValUl_).Rsqrt(value).raw; + } } - else if ( _FtValUl_ & 0x80000000 ) { // Ft is negative - _ContVal_ |= FPUflagI | FPUflagSI; - temp.f = sqrt( fabs( fpuDouble( _FtValUl_ ) ) ); - _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( temp.UL ); + else + { + if ((_FtValUl_ & 0x7F800000) == 0) // Ft is zero (Denormals are Zero) + { + _ContVal_ |= FPUflagD | FPUflagSD; + _FdValUl_ = (_FtValUl_ & 0x80000000) | posFmax; + return; + } + else if (_FtValUl_ & 0x80000000) // Ft is negative + { + _ContVal_ |= FPUflagI | FPUflagSI; + temp.f = sqrt(fabs(fpuDouble(_FtValUl_))); + _FdValf_ = fpuDouble(_FsValUl_) / fpuDouble(temp.UL); + } + else // Ft is positive and not zero + { + _FdValf_ = fpuDouble(_FsValUl_) / sqrt(fpuDouble(_FtValUl_)); + } } - else { _FdValf_ = fpuDouble( _FsValUl_ ) / sqrt( fpuDouble( _FtValUl_ ) ); } // Ft is positive and not zero if (checkOverflow( _FdValUl_, 0)) return; checkUnderflow( _FdValUl_, 0); @@ -360,23 +515,40 @@ void RSQRT_S() { void SQRT_S() { clearFPUFlags(FPUflagI | FPUflagD); - if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) // If Ft = +/-0 - _FdValUl_ = _FtValUl_ & 0x80000000;// result is 0 - else if ( _FtValUl_ & 0x80000000 ) { // If Ft is Negative - _ContVal_ |= FPUflagI | FPUflagSI; - _FdValf_ = sqrt( fabs( fpuDouble( _FtValUl_ ) ) ); - } else - _FdValf_ = sqrt( fpuDouble( _FtValUl_ ) ); // If Ft is Positive + if (CHECK_FPU_SOFT_SQRT) + { + PS2Float value = PS2Float(_FtValUl_); + + if (_FtValUl_ & 0x80000000) // If Ft is Negative + { + _ContVal_ |= FPUflagI | FPUflagSI; + _FdValUl_ = PS2Float(value.Abs()).Sqrt().raw; + } + else + _FdValUl_ = value.Sqrt().raw; // If Ft is Positive + } + else + { + if ((_FtValUl_ & 0x7F800000) == 0) // If Ft = +/-0 + _FdValUl_ = _FtValUl_ & 0x80000000; // result is 0 + else if (_FtValUl_ & 0x80000000) // If Ft is Negative + { + _ContVal_ |= FPUflagI | FPUflagSI; + _FdValf_ = sqrt(fabs(fpuDouble(_FtValUl_))); + } + else + _FdValf_ = sqrt(fpuDouble(_FtValUl_)); // If Ft is Positive + } } void SUB_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); + _FdValUl_ = fpuAccurateSub(_FsValUl_, _FtValUl_); if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); } void SUBA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); + _FAValUl_ = fpuAccurateSub(_FsValUl_, _FtValUl_); if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); } diff --git a/pcsx2/FpgaDiv.cpp b/pcsx2/FpgaDiv.cpp new file mode 100644 index 0000000000..4d0e385319 --- /dev/null +++ b/pcsx2/FpgaDiv.cpp @@ -0,0 +1,477 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#include "FpgaDiv.h" +#include "PS2Float.h" +#include "Common.h" + +FpgaDiv::FpgaDiv(bool divMode, u32 f1, u32 f2) +{ + FpgaDiv::divMode = divMode; + + if (divMode) + { + if (((f1 & 0x7F800000) == 0) && ((f2 & 0x7F800000) != 0)) + { + floatResult = 0; + floatResult &= PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult |= (u32)(((s32)(f2 >> 31) != (s32)(f1 >> 31)) ? 1 : 0 & 1) << 31; + return; + } + if (((f1 & 0x7F800000) != 0) && ((f2 & 0x7F800000) == 0)) + { + dz = true; + floatResult = PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult &= PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult |= (u32)(((s32)(f2 >> 31) != (s32)(f1 >> 31)) ? 1 : 0 & 1) << 31; + return; + } + if (((f1 & 0x7F800000) == 0) && ((f2 & 0x7F800000) == 0)) + { + iv = true; + floatResult = PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult &= PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult |= (u32)(((s32)(f2 >> 31) != (s32)(f1 >> 31)) ? 1 : 0 & 1) << 31; + return; + } + } + else if ((f2 & 0x7F800000) == 0) + { + floatResult = 0; + iv = ((f2 >> 31) & 1) != 0; + return; + } + + u32 floatDivisor, floatDividend; + s32 i, j, csaRes; + s32 man = 0; + s32 QuotientValueDomain = 1; + + Product[0] = 1; + Carry[25] = 1; + + if (divMode) + { + floatDividend = f1; + floatDivisor = f2; + } + else + { + floatDividend = f2; + floatDivisor = f1; + } + + u8 Dvdtexp = (u8)((floatDividend >> 23) & 0xFF); + u8 Dvsrexp = (u8)((floatDivisor >> 23) & 0xFF); + s32 Dvdtsign = (s32)(floatDividend >> 31); + s32 Dvsrsign = (s32)(floatDivisor >> 31); + + Sum[0] = 1; + Sum[1] = ((floatDividend & 0x400000) != 0); + Sum[2] = ((floatDividend & 0x200000) != 0); + Sum[3] = ((floatDividend & 0x100000) != 0); + Sum[4] = ((floatDividend & 0x80000) != 0); + Sum[5] = ((floatDividend & 0x40000) != 0); + Sum[6] = ((floatDividend & 0x20000) != 0); + Sum[7] = (s32)((floatDividend >> 16) & 1); + Sum[8] = (s32)((floatDividend >> 15) & 1); + Sum[9] = ((floatDividend & 0x4000) != 0); + Sum[10] = ((floatDividend & 0x2000) != 0); + Sum[11] = ((floatDividend & 0x1000) != 0); + Sum[12] = ((floatDividend & 0x800) != 0); + Sum[13] = ((floatDividend & 0x400) != 0); + Sum[14] = ((floatDividend & 0x200) != 0); + Sum[15] = (s32)((floatDividend >> 8) & 1); + Sum[16] = (s32)((floatDividend >> 7) & 1); + Sum[17] = ((floatDividend & 0x40) != 0); + Sum[18] = ((floatDividend & 0x20) != 0); + Sum[19] = ((floatDividend & 0x10) != 0); + Sum[20] = ((floatDividend & 8) != 0); + Sum[21] = ((floatDividend & 4) != 0); + Sum[22] = ((floatDividend & 2) != 0); + Sum[23] = (s32)(floatDividend & 1); + Sum[24] = 0; + Sum[25] = 0; + + Divisor[0] = 1; + Divisor[1] = ((floatDivisor & 0x400000) != 0); + Divisor[2] = ((floatDivisor & 0x200000) != 0); + Divisor[3] = ((floatDivisor & 0x100000) != 0); + Divisor[4] = ((floatDivisor & 0x80000) != 0); + Divisor[5] = ((floatDivisor & 0x40000) != 0); + Divisor[6] = ((floatDivisor & 0x20000) != 0); + Divisor[7] = (s32)((floatDivisor >> 16) & 1); + Divisor[8] = (s32)((floatDivisor >> 15) & 1); + Divisor[9] = ((floatDivisor & 0x4000) != 0); + Divisor[10] = ((floatDivisor & 0x2000) != 0); + Divisor[11] = ((floatDivisor & 0x1000) != 0); + Divisor[12] = ((floatDivisor & 0x800) != 0); + Divisor[13] = ((floatDivisor & 0x400) != 0); + Divisor[14] = ((floatDivisor & 0x200) != 0); + Divisor[15] = (s32)((floatDivisor >> 8) & 1); + Divisor[16] = (s32)((floatDivisor >> 7) & 1); + Divisor[17] = ((floatDivisor & 0x40) != 0); + Divisor[18] = ((floatDivisor & 0x20) != 0); + Divisor[19] = ((floatDivisor & 0x10) != 0); + Divisor[20] = ((floatDivisor & 8) != 0); + Divisor[21] = ((floatDivisor & 4) != 0); + Divisor[22] = ((floatDivisor & 2) != 0); + Divisor[23] = (s32)(floatDivisor & 1); + Divisor[24] = 0; + Divisor[25] = 0; + + if (!divMode && Dvdtexp % 2 == 1) + { + for (i = 0; i <= 24; i++) + { + Sum[25 - i] = Sum[24 - i]; + } + Sum[0] = 0; + } + + for (i = 0; i <= 24; ++i) + { + MultipleFormation(QuotientValueDomain); + csaRes = CSAQSLAdder(QuotientValueDomain); + ProductQuotientRestTransformation(i, QuotientValueDomain); + Carry[25] = csaRes > 0 ? 1 : 0; + QuotientValueDomain = csaRes; + } + + s32 sign = SignCalc(Dvdtsign, Dvsrsign) ? 1 : 0; + s32 exp = ExpCalc(Dvdtexp, Dvsrexp); + + if (divMode && (Quotient[0] == 0)) + exp--; + + if (divMode) + { + if ((Dvdtexp == 0) && (Dvsrexp == 0)) + { + iv = true; + exp = 255; + for (i = 0; i < 25; i++) + { + Quotient[i] = 1; + } + } + else if ((Dvdtexp == 0) || (Dvsrexp != 0)) + { + if ((Dvdtexp == 0) && (Dvsrexp != 0)) + { + exp = 0; + for (i = 0; i < 25; i++) + { + Quotient[i] = 0; + } + } + } + else + { + dz = true; + exp = 255; + for (i = 0; i < 25; i++) + { + Quotient[i] = 1; + } + } + } + else + { + if (Dvdtexp == 0) + { + sign = 0; + exp = 0; + for (i = 0; i < 25; i++) + { + Quotient[i] = 0; + } + } + if (Dvdtsign == 1) + { + iv = true; + sign = 0; + } + } + + if (divMode) + { + if (exp < 256) + { + if (exp < 1) + { + uf = true; + exp = 0; + for (i = 0; i < 25; i++) + { + Quotient[i] = 0; + } + } + } + else + { + of = true; + exp = 255; + for (i = 0; i < 25; i++) + { + Quotient[i] = 1; + } + } + } + + if (divMode) + j = 2 - Quotient[0]; + else + j = 1; + + for (i = j; i < j + 23; i++) + { + man = man * 2 + Quotient[i]; + } + + floatResult = 0; + floatResult &= PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult |= (u32)(sign & 1) << 31; + floatResult &= 0x807FFFFF; + floatResult |= (u32)(exp & 0xFF) << 23; + floatResult &= 0xFF800000; + floatResult |= (u32)man & 0x7FFFFF; +} + +bool FpgaDiv::SignCalc(s32 Dvdtsign, s32 Dvsrsign) +{ + return divMode && Dvsrsign != Dvdtsign; +} + +bool FpgaDiv::BitInvert(s32 val) +{ + return val < 1; +} + +s32 FpgaDiv::ExpCalc(s32 Dvdtexp, s32 Dvsrexp) +{ + s32 result; + + if (divMode) + return Dvdtexp - Dvsrexp + 127; + if ((Dvdtexp & 1) != 0) + result = (Dvdtexp - 127) / 2; + else + result = (Dvdtexp - 128) / 2; + return result + 127; +} + +s32 FpgaDiv::CSAQSLAdder(s32 QuotientValueDomain) +{ + s32 CarryArray[4]; + s32 SumArray[4]; + s32 i; + s32 tmpSum; + s32 tmpCarry; + + if (QuotientValueDomain == 0) + { + SumArray[0] = SubSum; + CarryArray[0] = SubCarry; + for (i = 1; i <= 3; i++) + { + SumArray[i] = Sum[i - 1]; + CarryArray[i] = Carry[i - 1]; + } + } + CSAAdder(SubSum, SubCarry, SubMult, tmpSum, tmpCarry); + SubSum0 = tmpSum; + CSAAdder(Sum[0], Carry[0], Mult[0], tmpSum, tmpCarry); + SubSum = tmpSum; + SubCarry0 = tmpCarry; + CSAAdder(Sum[1], Carry[1], Mult[1], tmpSum, tmpCarry); + Sum[0] = tmpSum; + SubCarry = tmpCarry; + for (i = 2; i <= 25; i++) + { + CSAAdder(Sum[i], Carry[i], Mult[i], tmpSum, tmpCarry); + Sum[i - 1] = tmpSum; + Carry[i - 2] = tmpCarry; + } + Sum[i - 1] = 0; + Carry[i - 2] = 0; + Carry[i - 1] = ~QuotientValueDomain; + Carry[i - 1] = (s32)((u32)Carry[i - 1] >> 31); + if (QuotientValueDomain != 0) + { + SumArray[0] = SubSum0; + CarryArray[0] = SubCarry0; + SumArray[1] = SubSum; + CarryArray[1] = SubCarry; + for (i = 2; i <= 3; i++) + { + SumArray[i] = Sum[i - 2]; + CarryArray[i] = Carry[i - 2]; + } + } + return QSLAdder(SumArray, CarryArray); +} + +s32 FpgaDiv::QSLAdder(s32 SumArray[], s32 CarryArray[]) +{ + s32 specialCondition = 0; + s32 result; + s32 claResult = CLAAdder(SumArray, CarryArray); + + if (SumArray[3] == 1 || CarryArray[3] == 1 || (claResult % 2 != 0)) + specialCondition = 1; + + switch (claResult) + { + case 0: + result = specialCondition; + break; + case 1: + result = specialCondition; + break; + case 2: + case 3: + result = 1; + break; + case 4: + case 5: + case 6: + result = -1; + break; + case 7: + result = 0; + break; + default: + result = 0; + break; + } + + return result; +} + +s32 FpgaDiv::ProductQuotientRestTransformation(s32 increment, s32 QuotientValueDomain) +{ + s32 i; + + Product[increment] = 0; + Product[increment + 1] = 1; + if (QuotientValueDomain == 0) + Rest[increment] = 1; + else + { + if (QuotientValueDomain == -1) + { + for (i = 0; i <= 25; i++) + Quotient[i] = Rest[i]; + Quotient[increment] = 1; + return 0; + } + else if (QuotientValueDomain == 1) + { + for (i = 0; i <= 25; ++i) + Rest[i] = Quotient[i]; + Quotient[increment] = 1; + return 0; + } + Console.Error("PQRTF: Quotient value domain error!"); + return -1; + } + + return 0; +} + +s32 FpgaDiv::CSAAdder(s32 sum, s32 carry, s32 mult, s32& resSum, s32& resCarry) +{ + s32 addResult = carry + sum + mult; + resCarry = 0; + resSum = 0; + if (addResult == 1) + resSum = 1; + else if (addResult == 2) + resCarry = 1; + else if (addResult == 3) + { + resSum = 1; + resCarry = 1; + } + + return 0; +} + +s32 FpgaDiv::CLAAdder(s32 SumArray[], s32 CarryArray[]) +{ + return (2 * CarryArray[1] + 4 * CarryArray[0] + CarryArray[2] + 2 * SumArray[1] + 4 * SumArray[0] + SumArray[2]) % 8; +} + +s32 FpgaDiv::MultipleFormation(s32 QuotientValueDomain) +{ + s32 i; + + if (QuotientValueDomain == 0) + { + SubMult = 0; + for (i = 0; i <= 25; i++) + Mult[i] = 0; + } + else if (divMode) + DivideModeFormation(QuotientValueDomain); + else + RootModeFormation(QuotientValueDomain); + + return 0; +} + +s32 FpgaDiv::DivideModeFormation(s32 QuotientValueDomain) +{ + s32 i; + + if (QuotientValueDomain <= 0) + { + SubMult = 0; + for (i = 0; i <= 25; i++) + Mult[i] = Divisor[i]; + } + else + { + SubMult = 1; + for (i = 0; i <= 25; i++) + Mult[i] = BitInvert(Divisor[i]) ? 1 : 0; + } + + return 0; +} + +s32 FpgaDiv::RootModeFormation(s32 QuotientValueDomain) +{ + s32 i; + + if (QuotientValueDomain <= 0) + { + SubMult = 0; + if (Product[0] == 1) + Mult[0] = 1; + else + Mult[0] = Rest[0]; + for (i = 1; i <= 25; i++) + { + if (Product[i - 1] == 1 || Product[i] == 1) + Mult[i] = 1; + else + Mult[i] = Rest[i]; + } + } + else + { + SubMult = 1; + Mult[0] = BitInvert(Quotient[0]) ? 1 : 0; + for (i = 1; i <= 25; i++) + { + if (Product[i - 1] == 1) + Mult[i] = 0; + else + Mult[i] = BitInvert(Quotient[i]) ? 1 : 0; + } + } + + return 0; +} \ No newline at end of file diff --git a/pcsx2/FpgaDiv.h b/pcsx2/FpgaDiv.h new file mode 100644 index 0000000000..752f756868 --- /dev/null +++ b/pcsx2/FpgaDiv.h @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#pragma once + +#include +#include + +class FpgaDiv +{ +public: + + bool dz = false; + bool iv = false; + bool of = false; + bool uf = false; + + u32 floatResult; + + FpgaDiv(bool divMode, u32 f1, u32 f2); + +protected: + +private: + + s32 Rest[26] = {0}; + s32 Quotient[26] = {0}; + s32 Product[26] = {0}; + s32 Sum[26] = {0}; + s32 Divisor[26] = {0}; + s32 Carry[26] = {0}; + s32 Mult[26] = {0}; + + bool divMode; + + s32 SubCarry = 0; + s32 SubCarry0 = 0; + s32 SubSum = 0; + s32 SubSum0 = 0; + s32 SubMult = 0; + + bool SignCalc(s32 Dvdtsign, s32 Dvsrsign); + + bool BitInvert(s32 val); + + s32 ExpCalc(s32 Dvdtexp, s32 Dvsrexp); + + s32 CSAQSLAdder(s32 QuotientValueDomain); + + s32 QSLAdder(s32 SumArray[], s32 CarryArray[]); + + s32 ProductQuotientRestTransformation(s32 increment, s32 QuotientValueDomain); + + s32 CSAAdder(s32 sum, s32 carry, s32 mult, s32& resSum, s32& resCarry); + + s32 CLAAdder(s32 SumArray[], s32 CarryArray[]); + + s32 MultipleFormation(s32 QuotientValueDomain); + + s32 DivideModeFormation(s32 QuotientValueDomain); + + s32 RootModeFormation(s32 QuotientValueDomain); +}; \ No newline at end of file diff --git a/pcsx2/PS2Float.cpp b/pcsx2/PS2Float.cpp new file mode 100644 index 0000000000..3bd0c5ede1 --- /dev/null +++ b/pcsx2/PS2Float.cpp @@ -0,0 +1,575 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#include +#include +#include +#include +#include +#include +#include +#include "common/Pcsx2Defs.h" +#include "common/BitUtils.h" +#include "FpgaDiv.h" +#include "PS2Float.h" +#include "Common.h" + +//**************************************************************** +// Booth Multiplier +//**************************************************************** + +PS2Float::BoothRecode PS2Float::Booth(u32 a, u32 b, u32 bit) +{ + u32 test = (bit ? b >> (bit * 2 - 1) : b << 1) & 7; + a <<= (bit * 2); + a += (test == 3 || test == 4) ? a : 0; + u32 neg = (test >= 4 && test <= 6) ? ~0u : 0; + u32 pos = 1 << (bit * 2); + a ^= (neg & -pos); + a &= (test >= 1 && test <= 6) ? ~0u : 0; + return {a, neg & pos}; +} + +PS2Float::AddResult PS2Float::Add3(u32 a, u32 b, u32 c) +{ + u32 u = a ^ b; + return {u ^ c, ((u & c) | (a & b)) << 1}; +} + +u64 PS2Float::MulMantissa(u32 a, u32 b) +{ + u64 full = static_cast(a) * static_cast(b); + BoothRecode b0 = Booth(a, b, 0); + BoothRecode b1 = Booth(a, b, 1); + BoothRecode b2 = Booth(a, b, 2); + BoothRecode b3 = Booth(a, b, 3); + BoothRecode b4 = Booth(a, b, 4); + BoothRecode b5 = Booth(a, b, 5); + BoothRecode b6 = Booth(a, b, 6); + BoothRecode b7 = Booth(a, b, 7); + + // First cycle + AddResult t0 = Add3(b1.data, b2.data, b3.data); + AddResult t1 = Add3(b4.data & ~0x7ffu, b5.data & ~0xfffu, b6.data); + // A few adds get skipped, squeeze them back in + t1.hi |= b6.negate | (b5.data & 0x800); + b7.data |= (b5.data & 0x400) + b5.negate; + + // Second cycle + AddResult t2 = Add3(b0.data, t0.lo, t0.hi); + AddResult t3 = Add3(b7.data, t1.lo, t1.hi); + + // Third cycle + AddResult t4 = Add3(t2.hi, t3.lo, t3.hi); + + // Fourth cycle + AddResult t5 = Add3(t2.lo, t4.lo, t4.hi); + + // Discard bits and sum + t5.hi += b7.negate; + t5.lo &= ~0x7fffu; + t5.hi &= ~0x7fffu; + u32 ps2lo = t5.lo + t5.hi; + return full - ((ps2lo ^ full) & 0x8000); +} + +//**************************************************************** +// Float Processor +//**************************************************************** + +PS2Float::PS2Float(s32 value) { raw = (u32)value; } + +PS2Float::PS2Float(u32 value) { raw = value; } + +PS2Float::PS2Float(float value) { raw = std::bit_cast(value); } + +PS2Float::PS2Float(bool sign, u8 exponent, u32 mantissa) +{ + raw = 0; + raw |= (sign ? 1u : 0u) << 31; + raw |= (u32)(exponent << MANTISSA_BITS); + raw |= mantissa & 0x7FFFFF; +} + +PS2Float PS2Float::Max() +{ + return PS2Float(MAX_FLOATING_POINT_VALUE); +} + +PS2Float PS2Float::Min() +{ + return PS2Float(MIN_FLOATING_POINT_VALUE); +} + +PS2Float PS2Float::One() +{ + return PS2Float(ONE); +} + +PS2Float PS2Float::MinOne() +{ + return PS2Float(MIN_ONE); +} + +PS2Float PS2Float::Add(PS2Float addend) +{ + if (IsDenormalized() || addend.IsDenormalized()) + return SolveAddSubDenormalizedOperation(*this, addend, true); + + u32 a = raw; + u32 b = addend.raw; + + //exponent difference + s32 exp_diff = Exponent() - addend.Exponent(); + + //diff = 25 .. 255 , expt < expd + if (exp_diff >= 25) + { + b = b & SIGNMASK; + } + + //diff = 1 .. 24, expt < expd + else if (exp_diff > 0) + { + exp_diff = exp_diff - 1; + b = (MIN_FLOATING_POINT_VALUE << exp_diff) & b; + } + + //diff = -255 .. -25, expd < expt + else if (exp_diff <= -25) + { + a = a & SIGNMASK; + } + + //diff = -24 .. -1 , expd < expt + else if (exp_diff < 0) + { + exp_diff = -exp_diff; + exp_diff = exp_diff - 1; + a = a & (MIN_FLOATING_POINT_VALUE << exp_diff); + } + + return PS2Float(a).DoAdd(PS2Float(b)); +} + +PS2Float PS2Float::Sub(PS2Float subtrahend) +{ + if (IsDenormalized() || subtrahend.IsDenormalized()) + return SolveAddSubDenormalizedOperation(*this, subtrahend, false); + + u32 a = raw; + u32 b = subtrahend.raw; + + //exponent difference + s32 exp_diff = Exponent() - subtrahend.Exponent(); + + //diff = 25 .. 255 , expt < expd + if (exp_diff >= 25) + { + b = b & SIGNMASK; + } + + //diff = 1 .. 24, expt < expd + else if (exp_diff > 0) + { + exp_diff = exp_diff - 1; + b = (MIN_FLOATING_POINT_VALUE << exp_diff) & b; + } + + //diff = -255 .. -25, expd < expt + else if (exp_diff <= -25) + { + a = a & SIGNMASK; + } + + //diff = -24 .. -1 , expd < expt + else if (exp_diff < 0) + { + exp_diff = -exp_diff; + exp_diff = exp_diff - 1; + a = a & (MIN_FLOATING_POINT_VALUE << exp_diff); + } + + return PS2Float(a).DoAdd(PS2Float(b).Negate()); +} + +PS2Float PS2Float::Mul(PS2Float mulend) +{ + if (IsDenormalized() || mulend.IsDenormalized()) + return SolveMultiplicationDenormalizedOperation(*this, mulend); + + if (IsZero() || mulend.IsZero()) + return PS2Float(DetermineMultiplicationDivisionOperationSign(*this, mulend), 0, 0); + + return DoMul(mulend); +} + +PS2Float PS2Float::Div(PS2Float divend) +{ + FpgaDiv fpga = FpgaDiv(true, raw, divend.raw); + PS2Float result = PS2Float(fpga.floatResult); + result.dz = fpga.dz; + result.iv = fpga.iv; + result.of = fpga.of; + result.uf = fpga.uf; + return result; +} + +PS2Float PS2Float::Sqrt() +{ + FpgaDiv fpga = FpgaDiv(false, 0, PS2Float(false, Exponent(), Mantissa()).raw); + PS2Float result = PS2Float(fpga.floatResult); + result.dz = fpga.dz; + result.iv = fpga.iv; + return result; +} + +PS2Float PS2Float::Rsqrt(PS2Float other) +{ + FpgaDiv fpgaSqrt = FpgaDiv(false, 0, PS2Float(false, other.Exponent(), other.Mantissa()).raw); + FpgaDiv fpgaDiv = FpgaDiv(true, raw, fpgaSqrt.floatResult); + PS2Float result = PS2Float(fpgaDiv.floatResult); + result.dz = fpgaSqrt.dz || fpgaDiv.dz; + result.iv = fpgaSqrt.iv || fpgaDiv.iv; + result.of = fpgaDiv.of; + result.uf = fpgaDiv.uf; + return result; +} + +PS2Float PS2Float::Pow(s32 exponent) +{ + PS2Float result = PS2Float::One(); // Start with 1, since any number raised to the power of 0 is 1 + + if (exponent != 0) + { + s32 exp = abs(exponent); + + for (s32 i = 0; i < exp; i++) + { + result = result.Mul(*this); + } + } + + if (exponent < 0) + return PS2Float::One().Div(result); + else + return result; +} + +bool PS2Float::IsDenormalized() +{ + return Exponent() == 0; +} + +bool PS2Float::IsZero() +{ + return Abs() == 0; +} + +u32 PS2Float::Abs() +{ + return (raw & MAX_FLOATING_POINT_VALUE); +} + +PS2Float PS2Float::Negate() +{ + return PS2Float(raw ^ 0x80000000); +} + +s32 PS2Float::CompareToSign(PS2Float other) +{ + s32 selfTwoComplementVal = (s32)Abs(); + if (Sign()) + selfTwoComplementVal = -selfTwoComplementVal; + + s32 otherTwoComplementVal = (s32)other.Abs(); + if (other.Sign()) + otherTwoComplementVal = -otherTwoComplementVal; + + if (selfTwoComplementVal < otherTwoComplementVal) + return -1; + else if (selfTwoComplementVal == otherTwoComplementVal) + return 0; + else + return 1; +} + +s32 PS2Float::CompareTo(PS2Float other) +{ + s32 selfTwoComplementVal = (s32)Abs(); + s32 otherTwoComplementVal = (s32)other.Abs(); + + if (selfTwoComplementVal < otherTwoComplementVal) + return -1; + else if (selfTwoComplementVal == otherTwoComplementVal) + return 0; + else + return 1; +} + +double PS2Float::ToDouble() +{ + return std::bit_cast(((u64)Sign() << 63) | ((((u64)Exponent() - BIAS) + 1023ULL) << 52) | ((u64)Mantissa() << 29)); +} + +std::string PS2Float::ToString() +{ + double res = ToDouble(); + + u32 value = raw; + std::ostringstream oss; + oss << std::fixed << std::setprecision(6); + + if (IsDenormalized()) + { + oss << "Denormalized(" << res << ")"; + } + else if (value == MAX_FLOATING_POINT_VALUE) + { + oss << "Fmax(" << res << ")"; + } + else if (value == MIN_FLOATING_POINT_VALUE) + { + oss << "-Fmax(" << res << ")"; + } + else + { + oss << "PS2Float(" << res << ")"; + } + + return oss.str(); +} + +PS2Float PS2Float::DoAdd(PS2Float other) +{ + const u8 roundingMultiplier = 6; + + u8 selfExponent = Exponent(); + s32 resExponent = selfExponent - other.Exponent(); + + if (resExponent < 0) + return other.DoAdd(*this); + else if (resExponent >= 25) + return *this; + + // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate + u32 sign1 = (u32)((s32)raw >> 31); + s32 selfMantissa = (s32)(((Mantissa() | 0x800000) ^ sign1) - sign1); + u32 sign2 = (u32)((s32)other.raw >> 31); + s32 otherMantissa = (s32)(((other.Mantissa() | 0x800000) ^ sign2) - sign2); + + // PS2 multiply by 2 before doing the Math here. + s32 man = (selfMantissa << roundingMultiplier) + ((otherMantissa << roundingMultiplier) >> resExponent); + s32 absMan = abs(man); + if (absMan == 0) + return PS2Float(0); + + // Remove from exponent the PS2 Multiplier value. + s32 rawExp = selfExponent - roundingMultiplier; + + s32 amount = Common::normalizeAmounts[Common::CountLeadingSignBits(absMan)]; + rawExp -= amount; + absMan <<= amount; + + s32 msbIndex = Common::BitScanReverse8(absMan >> MANTISSA_BITS); + rawExp += msbIndex; + absMan >>= msbIndex; + + if (rawExp > 255) + { + PS2Float result = man < 0 ? Min() : Max(); + result.of = true; + return result; + } + else if (rawExp < 1) + { + PS2Float result = PS2Float(man < 0, 0, 0); + result.uf = true; + return result; + } + + return PS2Float(((u32)man & SIGNMASK) | (u32)rawExp << MANTISSA_BITS | ((u32)absMan & 0x7FFFFF)); +} + +PS2Float PS2Float::DoMul(PS2Float other) +{ + u8 selfExponent = Exponent(); + u8 otherExponent = other.Exponent(); + u32 selfMantissa = Mantissa() | 0x800000; + u32 otherMantissa = other.Mantissa() | 0x800000; + u32 sign = (raw ^ other.raw) & SIGNMASK; + + s32 resExponent = selfExponent + otherExponent - 127; + u32 resMantissa = (u32)(MulMantissa(selfMantissa, otherMantissa) >> MANTISSA_BITS); + + if (resMantissa > 0xFFFFFF) + { + resMantissa >>= 1; + resExponent++; + } + + if (resExponent > 255) + { + PS2Float result = PS2Float(sign | MAX_FLOATING_POINT_VALUE); + result.of = true; + return result; + } + else if (resExponent < 1) + { + PS2Float result = PS2Float(sign); + result.uf = true; + return result; + } + + return PS2Float(sign | (u32)(resExponent << MANTISSA_BITS) | (resMantissa & 0x7FFFFF)); +} + +PS2Float PS2Float::SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add) +{ + bool sign = add ? DetermineAdditionOperationSign(a, b) : DetermineSubtractionOperationSign(a, b); + + if (a.IsDenormalized() && !b.IsDenormalized()) + return PS2Float(sign, b.Exponent(), b.Mantissa()); + else if (!a.IsDenormalized() && b.IsDenormalized()) + return PS2Float(sign, a.Exponent(), a.Mantissa()); + else if (a.IsDenormalized() && b.IsDenormalized()) + return PS2Float(sign, 0, 0); + else + Console.Error("Both numbers are not denormalized"); + + return PS2Float(0); +} + +PS2Float PS2Float::SolveMultiplicationDenormalizedOperation(PS2Float a, PS2Float b) +{ + return PS2Float(DetermineMultiplicationDivisionOperationSign(a, b), 0, 0); +} + +PS2Float PS2Float::SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b) +{ + bool sign = DetermineMultiplicationDivisionOperationSign(a, b); + + if (a.IsDenormalized() && !b.IsDenormalized()) + return PS2Float(sign, 0, 0); + else if (!a.IsDenormalized() && b.IsDenormalized()) + return sign ? Min() : Max(); + else if (a.IsDenormalized() && b.IsDenormalized()) + return sign ? Min() : Max(); + else + Console.Error("Both numbers are not denormalized"); + + return PS2Float(0); +} + +PS2Float PS2Float::Itof(s32 complement, s32 f1) +{ + if (f1 == 0) + return PS2Float(0); + + s32 resExponent; + + if (f1 == -2147483648) + { + // special case + resExponent = 158 - complement; + + if (resExponent >= 0) + return PS2Float(true, (u8)resExponent, 0); + + return PS2Float(0); + } + + bool negative = f1 < 0; + s32 u = std::abs(f1); + + s32 shifts; + + s32 lzcnt = Common::CountLeadingSignBits(u); + if (lzcnt < 8) + { + s32 count = 8 - lzcnt; + u >>= count; + shifts = -count; + } + else + { + s32 count = lzcnt - 8; + u <<= count; + shifts = count; + } + + resExponent = BIAS + MANTISSA_BITS - shifts - complement; + + if (resExponent >= 0) + return PS2Float(negative, (u8)resExponent, (u32)u); + + return PS2Float(0); +} + +s32 PS2Float::Ftoi(s32 complement, u32 f1) +{ + u32 a, result; + + a = f1; + if ((f1 & 0x7F800000) == 0) + result = 0; + else + { + complement = (s32)(f1 >> MANTISSA_BITS & 0xFF) + complement; + f1 &= 0x7FFFFF; + f1 |= 0x800000; + if (complement < 158) + { + if (complement >= 126) + { + f1 = (f1 << 7) >> (31 - ((u8)complement - 126)); + if ((s32)a < 0) + f1 = ~f1 + 1; + result = f1; + } + else + result = 0; + } + else if ((s32)a < 0) + result = SIGNMASK; + else + result = MAX_FLOATING_POINT_VALUE; + } + + return (s32)result; +} + +bool PS2Float::DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b) +{ + return a.Sign() ^ b.Sign(); +} + +bool PS2Float::DetermineAdditionOperationSign(PS2Float a, PS2Float b) +{ + if (a.IsZero() && b.IsZero()) + { + if (!a.Sign() || !b.Sign()) + return false; + else if (a.Sign() && b.Sign()) + return true; + else + Console.Error("Unhandled addition operation flags"); + } + + return a.CompareTo(b) >= 0 ? a.Sign() : b.Sign(); +} + +bool PS2Float::DetermineSubtractionOperationSign(PS2Float a, PS2Float b) +{ + if (a.IsZero() && b.IsZero()) + { + if (!a.Sign() || b.Sign()) + return false; + else if (a.Sign() && !b.Sign()) + return true; + else + Console.Error("Unhandled subtraction operation flags"); + } + + return a.CompareTo(b) >= 0 ? a.Sign() : !b.Sign(); +} diff --git a/pcsx2/PS2Float.h b/pcsx2/PS2Float.h new file mode 100644 index 0000000000..9e68425be7 --- /dev/null +++ b/pcsx2/PS2Float.h @@ -0,0 +1,118 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#pragma once + +#include + +class PS2Float +{ + struct BoothRecode + { + u32 data; + u32 negate; + }; + + struct AddResult + { + u32 lo; + u32 hi; + }; + + static u64 MulMantissa(u32 a, u32 b); + + static BoothRecode Booth(u32 a, u32 b, u32 bit); + + static AddResult Add3(u32 a, u32 b, u32 c); + +public: + + static constexpr u8 BIAS = 127; + static constexpr u8 MANTISSA_BITS = 23; + static constexpr u32 SIGNMASK = 0x80000000; + static constexpr u32 MAX_FLOATING_POINT_VALUE = 0x7FFFFFFF; + static constexpr u32 MIN_FLOATING_POINT_VALUE = 0xFFFFFFFF; + static constexpr u32 ONE = 0x3F800000; + static constexpr u32 MIN_ONE = 0xBF800000; + + bool dz = false; + bool iv = false; + bool of = false; + bool uf = false; + + u32 raw; + + constexpr u32 Mantissa() const { return raw & 0x7FFFFF; } + constexpr u8 Exponent() const { return (raw >> 23) & 0xFF; } + constexpr bool Sign() const { return ((raw >> 31) & 1) != 0; } + + PS2Float(s32 value); + + PS2Float(u32 value); + + PS2Float(float value); + + PS2Float(bool sign, u8 exponent, u32 mantissa); + + static PS2Float Max(); + + static PS2Float Min(); + + static PS2Float One(); + + static PS2Float MinOne(); + + static PS2Float SolveAddSubDenormalizedOperation(PS2Float a, PS2Float b, bool add); + + static PS2Float SolveMultiplicationDenormalizedOperation(PS2Float a, PS2Float b); + + static PS2Float SolveDivisionDenormalizedOperation(PS2Float a, PS2Float b); + + static PS2Float Itof(s32 complement, s32 f1); + + static s32 Ftoi(s32 complement, u32 f1); + + PS2Float Add(PS2Float addend); + + PS2Float Sub(PS2Float subtrahend); + + PS2Float Mul(PS2Float mulend); + + PS2Float Div(PS2Float divend); + + PS2Float Sqrt(); + + PS2Float Rsqrt(PS2Float other); + + PS2Float Pow(s32 exponent); + + bool IsDenormalized(); + + bool IsZero(); + + u32 Abs(); + + PS2Float Negate(); + + s32 CompareToSign(PS2Float other); + + s32 CompareTo(PS2Float other); + + double ToDouble(); + + std::string ToString(); + +protected: + +private: + + PS2Float DoAdd(PS2Float other); + + PS2Float DoMul(PS2Float other); + + static bool DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b); + + static bool DetermineAdditionOperationSign(PS2Float a, PS2Float b); + + static bool DetermineSubtractionOperationSign(PS2Float a, PS2Float b); +}; diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index 655c11af58..0c7ded2eff 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -536,14 +536,27 @@ void Pcsx2Config::RecompilerOptions::LoadSave(SettingsWrapper& wrap) SettingsWrapBitBool(vu0ExtraOverflow); SettingsWrapBitBool(vu0SignOverflow); SettingsWrapBitBool(vu0Underflow); + + SettingsWrapBitBool(vu0SoftAddSub); + SettingsWrapBitBool(vu0SoftMulDiv); + SettingsWrapBitBool(vu0SoftSqrt); + SettingsWrapBitBool(vu1Overflow); SettingsWrapBitBool(vu1ExtraOverflow); SettingsWrapBitBool(vu1SignOverflow); SettingsWrapBitBool(vu1Underflow); + + SettingsWrapBitBool(vu1SoftAddSub); + SettingsWrapBitBool(vu1SoftMulDiv); + SettingsWrapBitBool(vu1SoftSqrt); SettingsWrapBitBool(fpuOverflow); SettingsWrapBitBool(fpuExtraOverflow); SettingsWrapBitBool(fpuFullMode); + + SettingsWrapBitBool(fpuSoftAddSub); + SettingsWrapBitBool(fpuSoftMulDiv); + SettingsWrapBitBool(fpuSoftSqrt); } u32 Pcsx2Config::RecompilerOptions::GetEEClampMode() const diff --git a/pcsx2/VU.h b/pcsx2/VU.h index 1f8224bc39..72b519c8fa 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -124,6 +124,7 @@ struct alignas(16) VURegs REG_VI q; REG_VI p; + VECTOR TMP; // Temporary vector used to stack FMA operations uint idx; // VU index (0 or 1) // flags/cycle are needed by VIF dma code, so they have to be here (for now) diff --git a/pcsx2/VUflags.cpp b/pcsx2/VUflags.cpp index 22632cf36b..4640fcf405 100644 --- a/pcsx2/VUflags.cpp +++ b/pcsx2/VUflags.cpp @@ -2,7 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" - +#include "PS2Float.h" #include #include @@ -12,21 +12,22 @@ /* NEW FLAGS */ //By asadr. Thnkx F|RES :p /*****************************************/ -static __ri u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f ) +static __ri u32 VU_MAC_UPDATE(int shift, VURegs* VU, u32 f) { - u32 v = *(u32*)&f; - int exp = (v >> 23) & 0xff; - u32 s = v & 0x80000000; + PS2Float ps2f = PS2Float(f); + + u32 exp = ps2f.Exponent(); + u32 s = ps2f.raw & PS2Float::SIGNMASK; if (s) VU->macflag |= 0x0010<macflag &= ~(0x0010<macflag = (VU->macflag & ~(0x1100<macflag = (VU->macflag&~(0x1000<macflag = (VU->macflag&~(0x0101<macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); + return f; + } + else + return f; + } + else if (CHECK_VU_OVERFLOW((VU == &VU1) ? 1 : 0)) + { + VU->macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); + return s | 0x7f7fffff; /* max IEEE754 allowed */ + } else - return v; + { + VU->macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); + return f; + } default: VU->macflag = (VU->macflag & ~(0x1101<macflag&= ~(0x1111<<3); } -__fi void VU_MACy_CLEAR(VURegs * VU) +__fi void VU_MACy_CLEAR(VURegs* VU) { VU->macflag&= ~(0x1111<<2); } -__fi void VU_MACz_CLEAR(VURegs * VU) +__fi void VU_MACz_CLEAR(VURegs* VU) { VU->macflag&= ~(0x1111<<1); } -__fi void VU_MACw_CLEAR(VURegs * VU) +__fi void VU_MACw_CLEAR(VURegs* VU) { VU->macflag&= ~(0x1111<<0); } diff --git a/pcsx2/VUflags.h b/pcsx2/VUflags.h index 86e844ae62..12bd316351 100644 --- a/pcsx2/VUflags.h +++ b/pcsx2/VUflags.h @@ -4,12 +4,12 @@ #pragma once #include "VU.h" -extern u32 VU_MACx_UPDATE(VURegs * VU, float x); -extern u32 VU_MACy_UPDATE(VURegs * VU, float y); -extern u32 VU_MACz_UPDATE(VURegs * VU, float z); -extern u32 VU_MACw_UPDATE(VURegs * VU, float w); -extern void VU_MACx_CLEAR(VURegs * VU); -extern void VU_MACy_CLEAR(VURegs * VU); -extern void VU_MACz_CLEAR(VURegs * VU); -extern void VU_MACw_CLEAR(VURegs * VU); -extern void VU_STAT_UPDATE(VURegs * VU); +extern u32 VU_MACx_UPDATE(VURegs* VU, u32 x); +extern u32 VU_MACy_UPDATE(VURegs* VU, u32 y); +extern u32 VU_MACz_UPDATE(VURegs* VU, u32 z); +extern u32 VU_MACw_UPDATE(VURegs* VU, u32 w); +extern void VU_MACx_CLEAR(VURegs* VU); +extern void VU_MACy_CLEAR(VURegs* VU); +extern void VU_MACz_CLEAR(VURegs* VU); +extern void VU_MACw_CLEAR(VURegs* VU); +extern void VU_STAT_UPDATE(VURegs* VU); diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index dfa777e8ae..8fbfe150f0 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" +#include "PS2Float.h" #include "VUops.h" #include "GS.h" #include "Gif_Unit.h" @@ -462,34 +463,32 @@ static __fi float vuDouble(u32 f) } #endif -static __fi float vuADD_TriAceHack(u32 a, u32 b) +static __fi u32 vuAccurateAdd(VURegs* VU, u32 a, u32 b) { - // On VU0 TriAce Games use ADDi and expects these bit-perfect results: - //if (a == 0xb3e2a619 && b == 0x42546666) return vuDouble(0x42546666); - //if (a == 0x8b5b19e9 && b == 0xc7f079b3) return vuDouble(0xc7f079b3); - //if (a == 0x4b1ed4a8 && b == 0x43a02666) return vuDouble(0x4b1ed5e7); - //if (a == 0x7d1ca47b && b == 0x42f23333) return vuDouble(0x7d1ca47b); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) return PS2Float(a).Add(PS2Float(b)).raw; - // In the 3rd case, some other rounding error is giving us incorrect - // operands ('a' is wrong); and therefor an incorrect result. - // We're getting: 0x4b1ed4a8 + 0x43a02666 = 0x4b1ed5e8 - // We should be getting: 0x4b1ed4a7 + 0x43a02666 = 0x4b1ed5e7 - // microVU gets the correct operands and result. The interps likely - // don't get it due to rounding towards nearest in other calculations. + return std::bit_cast(vuDouble(a) + vuDouble(b)); +} - // microVU uses something like this to get TriAce games working, - // but VU interpreters don't seem to need it currently: +static __fi u32 vuAccurateSub(VURegs* VU, u32 a, u32 b) +{ + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) return PS2Float(a).Sub(PS2Float(b)).raw; - // Update Sept 2021, now the interpreters don't suck, they do - Refraction - s32 aExp = (a >> 23) & 0xff; - s32 bExp = (b >> 23) & 0xff; - if (aExp - bExp >= 25) b &= 0x80000000; - if (aExp - bExp <=-25) a &= 0x80000000; - float ret = vuDouble(a) + vuDouble(b); - //DevCon.WriteLn("aExp = %d, bExp = %d", aExp, bExp); - //DevCon.WriteLn("0x%08x + 0x%08x = 0x%08x", a, b, (u32&)ret); - //DevCon.WriteLn("%f + %f = %f", vuDouble(a), vuDouble(b), ret); - return ret; + return std::bit_cast(vuDouble(a) - vuDouble(b)); +} + +static __fi u32 vuAccurateMul(VURegs* VU, u32 a, u32 b) +{ + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) return PS2Float(a).Mul(PS2Float(b)).raw; + + return std::bit_cast(vuDouble(a) * vuDouble(b)); +} + +static __fi u32 vuAccurateDiv(VURegs* VU, u32 a, u32 b) +{ + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) return PS2Float(a).Div(PS2Float(b)).raw; + + return std::bit_cast(vuDouble(a) / vuDouble(b)); } void _vuABS(VURegs* VU) @@ -497,10 +496,10 @@ void _vuABS(VURegs* VU) if (_Ft_ == 0) return; - if (_X){ VU->VF[_Ft_].f.x = fabs(vuDouble(VU->VF[_Fs_].i.x)); } - if (_Y){ VU->VF[_Ft_].f.y = fabs(vuDouble(VU->VF[_Fs_].i.y)); } - if (_Z){ VU->VF[_Ft_].f.z = fabs(vuDouble(VU->VF[_Fs_].i.z)); } - if (_W){ VU->VF[_Ft_].f.w = fabs(vuDouble(VU->VF[_Fs_].i.w)); } + if (_X) VU->VF[_Ft_].i.x = PS2Float(VU->VF[_Fs_].i.x).Abs(); + if (_Y) VU->VF[_Ft_].i.y = PS2Float(VU->VF[_Fs_].i.y).Abs(); + if (_Z) VU->VF[_Ft_].i.z = PS2Float(VU->VF[_Fs_].i.z).Abs(); + if (_W) VU->VF[_Ft_].i.w = PS2Float(VU->VF[_Fs_].i.w).Abs(); } @@ -512,10 +511,10 @@ static __fi void _vuADD(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -528,20 +527,11 @@ static __fi void _vuADDi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (!CHECK_VUADDSUBHACK) { - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); - } - else { - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.x, VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.y, VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.z, VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuADD_TriAceHack(VU->VF[_Fs_].i.w, VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); - } + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuADDq(VURegs* VU) @@ -552,153 +542,133 @@ static __fi void _vuADDq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDx(VURegs* VU) { - float ftx; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ftx); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ftx); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftx); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftx); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDy(VURegs* VU) { - float fty; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + fty);} else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + fty);} else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + fty);} else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + fty);} else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDz(VURegs* VU) { - float ftz; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ftz); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ftz); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftz); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftz); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDw(VURegs* VU) { - float ftw; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ftw); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ftw); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ftw); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ftw); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAi(VURegs* VU) { - float ti = vuDouble(VU->VI[REG_I].UL); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ti); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ti); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ti); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ti); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAq(VURegs* VU) { - float tf = vuDouble(VU->VI[REG_Q].UL); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tf); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tf); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tf); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tf); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAx(VURegs* VU) { - float tx = vuDouble(VU->VF[_Ft_].i.x); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tx); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tx); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tx); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tx); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAy(VURegs* VU) { - float ty = vuDouble(VU->VF[_Ft_].i.y); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + ty); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + ty); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + ty); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + ty); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAz(VURegs* VU) { - float tz = vuDouble(VU->VF[_Ft_].i.z); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tz); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tz); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tz); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tz); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuADDAw(VURegs* VU) { - float tw = vuDouble(VU->VF[_Ft_].i.w); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) + tw); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) + tw); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) + tw); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) + tw); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -711,11 +681,11 @@ static __fi void _vuSUB(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuSUBi(VURegs* VU) @@ -726,10 +696,10 @@ static __fi void _vuSUBi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -741,147 +711,131 @@ static __fi void _vuSUBq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL));} else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL));} else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL));} else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBx(VURegs* VU) { - float ftx; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ftx); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ftx); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ftx); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ftx); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBy(VURegs* VU) { - float fty; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - fty); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - fty); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - fty); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - fty); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBz(VURegs* VU) { - float ftz; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ftz); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ftz); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ftz); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ftz); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBw(VURegs* VU) { - float ftw; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ftw); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ftw); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ftw); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ftw); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAi(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAq(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAx(VURegs* VU) { - float tx = vuDouble(VU->VF[_Ft_].i.x); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - tx); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - tx); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - tx); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - tx); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAy(VURegs* VU) { - float ty = vuDouble(VU->VF[_Ft_].i.y); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - ty); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - ty); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - ty); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - ty); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAz(VURegs* VU) { - float tz = vuDouble(VU->VF[_Ft_].i.z); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - tz); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - tz); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - tz); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - tz); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuSUBAw(VURegs* VU) { - float tw = vuDouble(VU->VF[_Ft_].i.w); - - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) - tw); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) - tw); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) - tw); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) - tw); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -893,10 +847,10 @@ static __fi void _vuMUL(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -908,10 +862,10 @@ static __fi void _vuMULi(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -923,498 +877,540 @@ static __fi void _vuMULq(VURegs* VU) else dst = &VU->VF[_Fd_]; - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULx(VURegs* VU) { - float ftx; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * ftx); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * ftx); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * ftx); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * ftx); } else VU_MACw_CLEAR(VU); + u32 ftx = VU->VF[_Ft_].i.x; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftx)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftx)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftx)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftx)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULy(VURegs* VU) { - float fty; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * fty); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * fty); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * fty); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * fty); } else VU_MACw_CLEAR(VU); + u32 fty = VU->VF[_Ft_].i.y; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, fty)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, fty)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, fty)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, fty)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULz(VURegs* VU) { - float ftz; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * ftz); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * ftz); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * ftz); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * ftz); } else VU_MACw_CLEAR(VU); + u32 ftz = VU->VF[_Ft_].i.z; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftz)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftz)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftz)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftz)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULw(VURegs* VU) { - float ftw; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * ftw); } else VU_MACx_CLEAR(VU); - if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * ftw); } else VU_MACy_CLEAR(VU); - if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * ftw); } else VU_MACz_CLEAR(VU); - if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * ftw); } else VU_MACw_CLEAR(VU); + u32 ftw = VU->VF[_Ft_].i.w; + if (_X){ dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftw)); } else VU_MACx_CLEAR(VU); + if (_Y){ dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftw)); } else VU_MACy_CLEAR(VU); + if (_Z){ dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftw)); } else VU_MACz_CLEAR(VU); + if (_W){ dst->i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftw)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULA(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAi(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAq(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAx(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAy(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAz(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMULAw(VURegs* VU) { - if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); - if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); - if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); - if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + if (_X){ VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w)); } else VU_MACx_CLEAR(VU); + if (_Y){ VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w)); } else VU_MACy_CLEAR(VU); + if (_Z){ VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w)); } else VU_MACz_CLEAR(VU); + if (_W){ VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADD(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDi(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDq(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDx(VURegs* VU) { - float ftx; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * ftx)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * ftx)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * ftx)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * ftx)); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + u32 ftx = VU->VF[_Ft_].i.x; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftx); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftx); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftx); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftx); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDy(VURegs* VU) { - float fty; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * fty)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * fty)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * fty)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * fty)); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + u32 fty = VU->VF[_Ft_].i.y; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, fty); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, fty); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, fty); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, fty); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDz(VURegs* VU) { - float ftz; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * ftz)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * ftz)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * ftz)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * ftz)); else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); + tmp = &VU->TMP; + u32 ftz = VU->VF[_Ft_].i.z; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftz); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftz); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftz); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftz); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); } static __fi void _vuMADDw(VURegs* VU) { - float ftw; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * ftw)); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * ftw)); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * ftw)); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * ftw)); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + u32 ftw = VU->VF[_Ft_].i.w; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftw); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftw); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftw); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftw); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDA(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + (vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDA(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDAi(VURegs* VU) { - float ti = vuDouble(VU->VI[REG_I].UL); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * ti)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * ti)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * ti)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * ti)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMADDAq(VURegs* VU) { - float tq = vuDouble(VU->VI[REG_Q].UL); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * tq)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * tq)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * tq)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * tq)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAx(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAx(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.x); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.x); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAy(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAy(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.y); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.y); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAz(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAz(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.z); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.z); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMADDAw(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) + ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) + ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) + ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) + ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); +static __fi void _vuMADDAw(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.w); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.w); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.w); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateAdd(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMSUB(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } + static __fi void _vuMSUBi(VURegs* VU) { - float ti = vuDouble(VU->VI[REG_I].UL); + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ti ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ti ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ti ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ti ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMSUBq(VURegs* VU) { - float tq = vuDouble(VU->VI[REG_Q].UL); + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tq ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tq ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tq ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tq ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - static __fi void _vuMSUBx(VURegs* VU) { - float ftx; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx=vuDouble(VU->VF[_Ft_].i.x); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ftx ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ftx ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ftx ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ftx ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + u32 ftx = VU->VF[_Ft_].i.x; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftx); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftx); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftx); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftx); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - static __fi void _vuMSUBy(VURegs* VU) { - float fty; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - fty=vuDouble(VU->VF[_Ft_].i.y); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * fty ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * fty ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * fty ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * fty ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + u32 fty = VU->VF[_Ft_].i.y; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, fty); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, fty); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, fty); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, fty); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - static __fi void _vuMSUBz(VURegs* VU) { - float ftz; + VECTOR* tmp; VECTOR* dst; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftz=vuDouble(VU->VF[_Ft_].i.z); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ftz ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ftz ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ftz ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ftz ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + u32 ftz = VU->VF[_Ft_].i.z; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftz); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftz); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftz); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftz); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } static __fi void _vuMSUBw(VURegs* VU) { - float ftw; - VECTOR * dst; - if (_Fd_ == 0) dst = &RDzero; - else dst = &VU->VF[_Fd_]; + VECTOR* tmp; + VECTOR* dst; + if (_Fd_ == 0) + dst = &RDzero; + else + dst = &VU->VF[_Fd_]; - ftw=vuDouble(VU->VF[_Ft_].i.w); - if (_X) dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ftw ) ); else VU_MACx_CLEAR(VU); - if (_Y) dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ftw ) ); else VU_MACy_CLEAR(VU); - if (_Z) dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ftw ) ); else VU_MACz_CLEAR(VU); - if (_W) dst->i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ftw ) ); else VU_MACw_CLEAR(VU); + tmp = &VU->TMP; + u32 ftw = VU->VF[_Ft_].i.w; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ftw); dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ftw); dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ftw); dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ftw); dst->i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } - -static __fi void _vuMSUBA(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.x))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.y))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.z))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VF[_Ft_].i.w))); else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); -} - -static __fi void _vuMSUBAi(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_I].UL))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_I].UL))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_I].UL))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_I].UL))); else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); -} - -static __fi void _vuMSUBAq(VURegs* VU) { - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * vuDouble(VU->VI[REG_Q].UL))); else VU_MACw_CLEAR(VU); - VU_STAT_UPDATE(VU); -} - -static __fi void _vuMSUBAx(VURegs* VU) +static __fi void _vuMSUBA(VURegs* VU) { - float tx = vuDouble(VU->VF[_Ft_].i.x); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tx)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tx)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tx)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tx)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAy(VURegs* VU) +static __fi void _vuMSUBAi(VURegs* VU) { - float ty = vuDouble(VU->VF[_Ft_].i.y); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * ty)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * ty)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * ty)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * ty)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_I].UL); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_I].UL); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_I].UL); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_I].UL); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAz(VURegs* VU) +static __fi void _vuMSUBAq(VURegs* VU) { - float tz = vuDouble(VU->VF[_Ft_].i.z); - - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tz)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tz)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tz)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tz)); else VU_MACw_CLEAR(VU); + VECTOR* tmp; + tmp = &VU->TMP; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VI[REG_Q].UL); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VI[REG_Q].UL); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VI[REG_Q].UL); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, VU->VI[REG_Q].UL); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } -static __fi void _vuMSUBAw(VURegs* VU) -{ - float tw = vuDouble(VU->VF[_Ft_].i.w); - if (_X) VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - ( vuDouble(VU->VF[_Fs_].i.x) * tw)); else VU_MACx_CLEAR(VU); - if (_Y) VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - ( vuDouble(VU->VF[_Fs_].i.y) * tw)); else VU_MACy_CLEAR(VU); - if (_Z) VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - ( vuDouble(VU->VF[_Fs_].i.z) * tw)); else VU_MACz_CLEAR(VU); - if (_W) VU->ACC.i.w = VU_MACw_UPDATE(VU, vuDouble(VU->ACC.i.w) - ( vuDouble(VU->VF[_Fs_].i.w) * tw)); else VU_MACw_CLEAR(VU); +static __fi void _vuMSUBAx(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + u32 tx = VU->VF[_Ft_].i.x; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, tx); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, tx); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, tx); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, tx); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +static __fi void _vuMSUBAy(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + u32 ty = VU->VF[_Ft_].i.y; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, ty); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, ty); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, ty); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, ty); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +static __fi void _vuMSUBAz(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + u32 tz = VU->VF[_Ft_].i.z; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, tz); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, tz); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, tz); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, tz); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +static __fi void _vuMSUBAw(VURegs* VU) +{ + VECTOR* tmp; + tmp = &VU->TMP; + u32 tw = VU->VF[_Ft_].i.w; + if (_X) {tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, tw); VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x));} else VU_MACx_CLEAR(VU); + if (_Y) {tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, tw); VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y));} else VU_MACy_CLEAR(VU); + if (_Z) {tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, tw); VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z));} else VU_MACz_CLEAR(VU); + if (_W) {tmp->i.w = vuAccurateMul(VU, VU->VF[_Fs_].i.w, tw); VU->ACC.i.w = VU_MACw_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.w, tmp->i.w));} else VU_MACw_CLEAR(VU); VU_STAT_UPDATE(VU); } @@ -1575,32 +1571,28 @@ static __fi void _vuMINIw(VURegs* VU) static __fi void _vuOPMULA(VURegs* VU) { - VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z)); - VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x)); - VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y)); + VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); + VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); + VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); VU_STAT_UPDATE(VU); } static __fi void _vuOPMSUB(VURegs* VU) { + VECTOR* tmp; VECTOR* dst; - float ftx, fty, ftz; - float fsx, fsy, fsz; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx = vuDouble(VU->VF[_Ft_].i.x); - fty = vuDouble(VU->VF[_Ft_].i.y); - ftz = vuDouble(VU->VF[_Ft_].i.z); - fsx = vuDouble(VU->VF[_Fs_].i.x); - fsy = vuDouble(VU->VF[_Fs_].i.y); - fsz = vuDouble(VU->VF[_Fs_].i.z); - - dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - fsy * ftz); - dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - fsz * ftx); - dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - fsx * fty); + tmp = &VU->TMP; + tmp->i.x = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z); + tmp->i.y = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x); + tmp->i.z = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y); + dst->i.x = VU_MACx_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.x, tmp->i.x)); + dst->i.y = VU_MACy_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.y, tmp->i.y)); + dst->i.z = VU_MACz_UPDATE(VU, vuAccurateSub(VU, VU->ACC.i.z, tmp->i.z)); VU_STAT_UPDATE(VU); } @@ -1617,22 +1609,42 @@ static __fi s32 float_to_int(float value) return value; } -static __fi void _vuFTOI0(VURegs* VU) { +static __fi void _vuFTOI0(VURegs* VU) { if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].SL[0] = float_to_int(vuDouble(VU->VF[_Fs_].i.x)); - if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(vuDouble(VU->VF[_Fs_].i.y)); - if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(vuDouble(VU->VF[_Fs_].i.z)); - if (_W) VU->VF[_Ft_].SL[3] = float_to_int(vuDouble(VU->VF[_Fs_].i.w)); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].SL[0] = PS2Float::Ftoi(0, VU->VF[_Fs_].i.x); + if (_Y) VU->VF[_Ft_].SL[1] = PS2Float::Ftoi(0, VU->VF[_Fs_].i.y); + if (_Z) VU->VF[_Ft_].SL[2] = PS2Float::Ftoi(0, VU->VF[_Fs_].i.z); + if (_W) VU->VF[_Ft_].SL[3] = PS2Float::Ftoi(0, VU->VF[_Fs_].i.w); + } + else + { + if (_X) VU->VF[_Ft_].SL[0] = float_to_int(vuDouble(VU->VF[_Fs_].i.x)); + if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(vuDouble(VU->VF[_Fs_].i.y)); + if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(vuDouble(VU->VF[_Fs_].i.z)); + if (_W) VU->VF[_Ft_].SL[3] = float_to_int(vuDouble(VU->VF[_Fs_].i.w)); + } } -static __fi void _vuFTOI4(VURegs* VU) { +static __fi void _vuFTOI4(VURegs* VU) { if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].SL[0] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.x))); - if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.y))); - if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.z))); - if (_W) VU->VF[_Ft_].SL[3] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.w))); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].SL[0] = PS2Float::Ftoi(4, VU->VF[_Fs_].i.x); + if (_Y) VU->VF[_Ft_].SL[1] = PS2Float::Ftoi(4, VU->VF[_Fs_].i.y); + if (_Z) VU->VF[_Ft_].SL[2] = PS2Float::Ftoi(4, VU->VF[_Fs_].i.z); + if (_W) VU->VF[_Ft_].SL[3] = PS2Float::Ftoi(4, VU->VF[_Fs_].i.w); + } + else + { + if (_X) VU->VF[_Ft_].SL[0] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.x))); + if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.y))); + if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.z))); + if (_W) VU->VF[_Ft_].SL[3] = float_to_int(float_to_int4(vuDouble(VU->VF[_Fs_].i.w))); + } } static __fi void _vuFTOI12(VURegs* VU) @@ -1640,10 +1652,20 @@ static __fi void _vuFTOI12(VURegs* VU) if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].SL[0] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.x))); - if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.y))); - if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.z))); - if (_W) VU->VF[_Ft_].SL[3] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.w))); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].SL[0] = PS2Float::Ftoi(12, VU->VF[_Fs_].i.x); + if (_Y) VU->VF[_Ft_].SL[1] = PS2Float::Ftoi(12, VU->VF[_Fs_].i.y); + if (_Z) VU->VF[_Ft_].SL[2] = PS2Float::Ftoi(12, VU->VF[_Fs_].i.z); + if (_W) VU->VF[_Ft_].SL[3] = PS2Float::Ftoi(12, VU->VF[_Fs_].i.w); + } + else + { + if (_X) VU->VF[_Ft_].SL[0] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.x))); + if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.y))); + if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.z))); + if (_W) VU->VF[_Ft_].SL[3] = float_to_int(float_to_int12(vuDouble(VU->VF[_Fs_].i.w))); + } } static __fi void _vuFTOI15(VURegs* VU) @@ -1651,10 +1673,20 @@ static __fi void _vuFTOI15(VURegs* VU) if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].SL[0] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.x))); - if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.y))); - if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.z))); - if (_W) VU->VF[_Ft_].SL[3] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.w))); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].SL[0] = PS2Float::Ftoi(15, VU->VF[_Fs_].i.x); + if (_Y) VU->VF[_Ft_].SL[1] = PS2Float::Ftoi(15, VU->VF[_Fs_].i.y); + if (_Z) VU->VF[_Ft_].SL[2] = PS2Float::Ftoi(15, VU->VF[_Fs_].i.z); + if (_W) VU->VF[_Ft_].SL[3] = PS2Float::Ftoi(15, VU->VF[_Fs_].i.w); + } + else + { + if (_X) VU->VF[_Ft_].SL[0] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.x))); + if (_Y) VU->VF[_Ft_].SL[1] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.y))); + if (_Z) VU->VF[_Ft_].SL[2] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.z))); + if (_W) VU->VF[_Ft_].SL[3] = float_to_int(float_to_int15(vuDouble(VU->VF[_Fs_].i.w))); + } } static __fi void _vuITOF0(VURegs* VU) @@ -1662,10 +1694,20 @@ static __fi void _vuITOF0(VURegs* VU) if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].f.x = (float)VU->VF[_Fs_].SL[0]; - if (_Y) VU->VF[_Ft_].f.y = (float)VU->VF[_Fs_].SL[1]; - if (_Z) VU->VF[_Ft_].f.z = (float)VU->VF[_Fs_].SL[2]; - if (_W) VU->VF[_Ft_].f.w = (float)VU->VF[_Fs_].SL[3]; + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(0, VU->VF[_Fs_].SL[0]).raw; + if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(0, VU->VF[_Fs_].SL[1]).raw; + if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(0, VU->VF[_Fs_].SL[2]).raw; + if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(0, VU->VF[_Fs_].SL[3]).raw; + } + else + { + if (_X) VU->VF[_Ft_].f.x = (float)VU->VF[_Fs_].SL[0]; + if (_Y) VU->VF[_Ft_].f.y = (float)VU->VF[_Fs_].SL[1]; + if (_Z) VU->VF[_Ft_].f.z = (float)VU->VF[_Fs_].SL[2]; + if (_W) VU->VF[_Ft_].f.w = (float)VU->VF[_Fs_].SL[3]; + } } static __fi void _vuITOF4(VURegs* VU) @@ -1673,10 +1715,20 @@ static __fi void _vuITOF4(VURegs* VU) if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].f.x = int4_to_float(VU->VF[_Fs_].SL[0]); - if (_Y) VU->VF[_Ft_].f.y = int4_to_float(VU->VF[_Fs_].SL[1]); - if (_Z) VU->VF[_Ft_].f.z = int4_to_float(VU->VF[_Fs_].SL[2]); - if (_W) VU->VF[_Ft_].f.w = int4_to_float(VU->VF[_Fs_].SL[3]); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(4, VU->VF[_Fs_].SL[0]).raw; + if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(4, VU->VF[_Fs_].SL[1]).raw; + if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(4, VU->VF[_Fs_].SL[2]).raw; + if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(4, VU->VF[_Fs_].SL[3]).raw; + } + else + { + if (_X) VU->VF[_Ft_].f.x = int4_to_float(VU->VF[_Fs_].SL[0]); + if (_Y) VU->VF[_Ft_].f.y = int4_to_float(VU->VF[_Fs_].SL[1]); + if (_Z) VU->VF[_Ft_].f.z = int4_to_float(VU->VF[_Fs_].SL[2]); + if (_W) VU->VF[_Ft_].f.w = int4_to_float(VU->VF[_Fs_].SL[3]); + } } static __fi void _vuITOF12(VURegs* VU) @@ -1684,10 +1736,20 @@ static __fi void _vuITOF12(VURegs* VU) if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].f.x = int12_to_float(VU->VF[_Fs_].SL[0]); - if (_Y) VU->VF[_Ft_].f.y = int12_to_float(VU->VF[_Fs_].SL[1]); - if (_Z) VU->VF[_Ft_].f.z = int12_to_float(VU->VF[_Fs_].SL[2]); - if (_W) VU->VF[_Ft_].f.w = int12_to_float(VU->VF[_Fs_].SL[3]); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(12, VU->VF[_Fs_].SL[0]).raw; + if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(12, VU->VF[_Fs_].SL[1]).raw; + if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(12, VU->VF[_Fs_].SL[2]).raw; + if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(12, VU->VF[_Fs_].SL[3]).raw; + } + else + { + if (_X) VU->VF[_Ft_].f.x = int12_to_float(VU->VF[_Fs_].SL[0]); + if (_Y) VU->VF[_Ft_].f.y = int12_to_float(VU->VF[_Fs_].SL[1]); + if (_Z) VU->VF[_Ft_].f.z = int12_to_float(VU->VF[_Fs_].SL[2]); + if (_W) VU->VF[_Ft_].f.w = int12_to_float(VU->VF[_Fs_].SL[3]); + } } static __fi void _vuITOF15(VURegs* VU) @@ -1695,23 +1757,49 @@ static __fi void _vuITOF15(VURegs* VU) if (_Ft_ == 0) return; - if (_X) VU->VF[_Ft_].f.x = int15_to_float(VU->VF[_Fs_].SL[0]); - if (_Y) VU->VF[_Ft_].f.y = int15_to_float(VU->VF[_Fs_].SL[1]); - if (_Z) VU->VF[_Ft_].f.z = int15_to_float(VU->VF[_Fs_].SL[2]); - if (_W) VU->VF[_Ft_].f.w = int15_to_float(VU->VF[_Fs_].SL[3]); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + if (_X) VU->VF[_Ft_].i.x = PS2Float::Itof(15, VU->VF[_Fs_].SL[0]).raw; + if (_Y) VU->VF[_Ft_].i.y = PS2Float::Itof(15, VU->VF[_Fs_].SL[1]).raw; + if (_Z) VU->VF[_Ft_].i.z = PS2Float::Itof(15, VU->VF[_Fs_].SL[2]).raw; + if (_W) VU->VF[_Ft_].i.w = PS2Float::Itof(15, VU->VF[_Fs_].SL[3]).raw; + } + else + { + if (_X) VU->VF[_Ft_].f.x = int15_to_float(VU->VF[_Fs_].SL[0]); + if (_Y) VU->VF[_Ft_].f.y = int15_to_float(VU->VF[_Fs_].SL[1]); + if (_Z) VU->VF[_Ft_].f.z = int15_to_float(VU->VF[_Fs_].SL[2]); + if (_W) VU->VF[_Ft_].f.w = int15_to_float(VU->VF[_Fs_].SL[3]); + } } static __fi void _vuCLIP(VURegs* VU) { - float value = fabs(vuDouble(VU->VF[_Ft_].i.w)); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) || CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + double value = PS2Float(PS2Float(VU->VF[_Ft_].i.w).Abs()).ToDouble(); + + VU->clipflag <<= 6; + if (PS2Float(VU->VF[_Fs_].i.x).ToDouble() > +value) VU->clipflag |= 0x01; + if (PS2Float(VU->VF[_Fs_].i.x).ToDouble() < -value) VU->clipflag |= 0x02; + if (PS2Float(VU->VF[_Fs_].i.y).ToDouble() > +value) VU->clipflag |= 0x04; + if (PS2Float(VU->VF[_Fs_].i.y).ToDouble() < -value) VU->clipflag |= 0x08; + if (PS2Float(VU->VF[_Fs_].i.z).ToDouble() > +value) VU->clipflag |= 0x10; + if (PS2Float(VU->VF[_Fs_].i.z).ToDouble() < -value) VU->clipflag |= 0x20; + } + else + { + float value = fabs(vuDouble(VU->VF[_Ft_].i.w)); + + VU->clipflag <<= 6; + if (vuDouble(VU->VF[_Fs_].i.x) > +value) VU->clipflag |= 0x01; + if (vuDouble(VU->VF[_Fs_].i.x) < -value) VU->clipflag |= 0x02; + if (vuDouble(VU->VF[_Fs_].i.y) > +value) VU->clipflag |= 0x04; + if (vuDouble(VU->VF[_Fs_].i.y) < -value) VU->clipflag |= 0x08; + if (vuDouble(VU->VF[_Fs_].i.z) > +value) VU->clipflag |= 0x10; + if (vuDouble(VU->VF[_Fs_].i.z) < -value) VU->clipflag |= 0x20; + } - VU->clipflag <<= 6; - if ( vuDouble(VU->VF[_Fs_].i.x) > +value ) VU->clipflag|= 0x01; - if ( vuDouble(VU->VF[_Fs_].i.x) < -value ) VU->clipflag|= 0x02; - if ( vuDouble(VU->VF[_Fs_].i.y) > +value ) VU->clipflag|= 0x04; - if ( vuDouble(VU->VF[_Fs_].i.y) < -value ) VU->clipflag|= 0x08; - if ( vuDouble(VU->VF[_Fs_].i.z) > +value ) VU->clipflag|= 0x10; - if ( vuDouble(VU->VF[_Fs_].i.z) < -value ) VU->clipflag|= 0x20; VU->clipflag = VU->clipflag & 0xFFFFFF; } @@ -1721,57 +1809,45 @@ static __fi void _vuCLIP(VURegs* VU) static __fi void _vuDIV(VURegs* VU) { - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); - float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - VU->statusflag &= ~0x30; - - if (ft == 0.0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) { - if (fs == 0.0) - VU->statusflag |= 0x10; - else - VU->statusflag |= 0x20; + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); + PS2Float fs = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); - if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ - (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = 0xFF7FFFFF; + VU->statusflag &= ~0x30; + + if (ft.IsZero()) + { + if (fs.IsZero()) + VU->statusflag |= 0x10; + else + VU->statusflag |= 0x20; + + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = PS2Float::MIN_FLOATING_POINT_VALUE; + else + VU->q.UL = PS2Float::MAX_FLOATING_POINT_VALUE; + } else - VU->q.UL = 0x7F7FFFFF; + { + VU->q.UL = fs.Div(ft).raw; + } } else { - VU->q.F = fs / ft; - VU->q.F = vuDouble(VU->q.UL); - } -} + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); -static __fi void _vuSQRT(VURegs* VU) -{ - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + VU->statusflag &= ~0x30; - VU->statusflag &= ~0x30; - - if (ft < 0.0) - VU->statusflag |= 0x10; - VU->q.F = sqrt(fabs(ft)); - VU->q.F = vuDouble(VU->q.UL); -} - -static __fi void _vuRSQRT(VURegs* VU) -{ - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); - float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - float temp; - - VU->statusflag &= ~0x30; - - if (ft == 0.0) - { - VU->statusflag |= 0x20; - - if (fs != 0) + if (ft == 0.0) { + if (fs == 0.0) + VU->statusflag |= 0x10; + else + VU->statusflag |= 0x20; + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) VU->q.UL = 0xFF7FFFFF; @@ -1780,25 +1856,128 @@ static __fi void _vuRSQRT(VURegs* VU) } else { - if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ - (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = 0x80000000; - else - VU->q.UL = 0; + VU->q.F = fs / ft; + VU->q.F = vuDouble(VU->q.UL); + } + } +} +static __fi void _vuSQRT(VURegs* VU) +{ + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); + + VU->statusflag &= ~0x30; + + if (ft.ToDouble() < 0.0) VU->statusflag |= 0x10; + VU->q.UL = PS2Float(ft.Abs()).Sqrt().raw; + } + else + { + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + + VU->statusflag &= ~0x30; + + if (ft < 0.0) + VU->statusflag |= 0x10; + VU->q.F = sqrt(fabs(ft)); + VU->q.F = vuDouble(VU->q.UL); + } +} + +static __fi void _vuRSQRT(VURegs* VU) +{ + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); + PS2Float fs = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); + + VU->statusflag &= ~0x30; + + if (ft.IsZero()) + { + VU->statusflag |= 0x20; + + if (!fs.IsZero()) + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = PS2Float::MIN_FLOATING_POINT_VALUE; + else + VU->q.UL = PS2Float::MAX_FLOATING_POINT_VALUE; + } + else + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0x80000000; + else + VU->q.UL = 0; + + VU->statusflag |= 0x10; + } + } + else + { + if (ft.ToDouble() < 0.0) + { + VU->statusflag |= 0x10; + } + + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + VU->q.UL = fs.Div(PS2Float(ft.Abs()).Sqrt()).raw; + else + { + float temp = sqrt(fabs(vuDouble(ft.raw))); + VU->q.F = vuDouble(fs.raw) / temp; + VU->q.F = vuDouble(VU->q.UL); + } } } else { - if (ft < 0.0) - { - VU->statusflag |= 0x10; - } + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + float temp; - temp = sqrt(fabs(ft)); - VU->q.F = fs / temp; - VU->q.F = vuDouble(VU->q.UL); + VU->statusflag &= ~0x30; + + if (ft == 0.0) + { + VU->statusflag |= 0x20; + + if (fs != 0) + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0xFF7FFFFF; + else + VU->q.UL = 0x7F7FFFFF; + } + else + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0x80000000; + else + VU->q.UL = 0; + + VU->statusflag |= 0x10; + } + } + else + { + if (ft < 0.0) + { + VU->statusflag |= 0x10; + } + + temp = sqrt(fabs(ft)); + VU->q.F = fs / temp; + VU->q.F = vuDouble(VU->q.UL); + } } } @@ -2442,157 +2621,312 @@ static __ri void _vuWAITP(VURegs* VU) static __ri void _vuESADD(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + u32 x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x); + u32 y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y); + u32 z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z); - VU->p.F = p; + VU->p.UL = vuAccurateAdd(VU, vuAccurateAdd(VU, x, y), z); } static __ri void _vuERSADD(VURegs* VU) { - float p = (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x)) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y)) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z)); + u32 x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x); + u32 y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y); + u32 z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z); - if (p != 0.0) - p = 1.0f / p; + PS2Float p = PS2Float(vuAccurateAdd(VU, vuAccurateAdd(VU, x, y), z)); - VU->p.F = p; + if (!p.IsZero()) + { + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + p = PS2Float::One().Div(p); + else + { + VU->p.F = 1.0f / vuDouble(p.raw); + return; + } + } + + VU->p.UL = p.raw; } static __ri void _vuELENG(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + u32 x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x); + u32 y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y); + u32 z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z); - if (p >= 0) + PS2Float value = PS2Float(vuAccurateAdd(VU, vuAccurateAdd(VU, x, y), z)); + + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); + if (value.ToDouble() >= 0) + { + value = value.Sqrt(); + } + VU->p.UL = value.raw; + } + else + { + float p = vuDouble(value.raw); + + if (p >= 0) + { + p = sqrt(p); + } + VU->p.F = p; } - VU->p.F = p; } static __ri void _vuERLENG(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + u32 x = vuAccurateMul(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.x); + u32 y = vuAccurateMul(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.y); + u32 z = vuAccurateMul(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.z); - if (p >= 0) + PS2Float value = PS2Float(vuAccurateAdd(VU, vuAccurateAdd(VU, x, y), z)); + + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); - if (p != 0) + if (value.ToDouble() >= 0) { - p = 1.0f / p; + value = value.Sqrt(); + if (!value.IsZero()) + { + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + value = PS2Float::One().Div(value); + } + else + { + VU->p.F = 1.0 / vuDouble(value.raw); + return; + } + } } + VU->p.UL = value.raw; + } + else + { + float p = vuDouble(value.raw); + + if (p >= 0) + { + p = sqrt(p); + if (p != 0) + { + p = 1.0f / p; + } + } + VU->p.F = p; } - VU->p.F = p; } -static __ri float _vuCalculateEATAN(float inputvalue) { +static __ri u32 _vuCalculateEATAN(VURegs* VU, u32 inputvalue) +{ float eatanconst[9] = { 0.999999344348907f, -0.333298563957214f, 0.199465364217758f, -0.13085337519646f, 0.096420042216778f, -0.055909886956215f, 0.021861229091883f, -0.004054057877511f, 0.785398185253143f }; - float result = (eatanconst[0] * inputvalue) + (eatanconst[1] * pow(inputvalue, 3)) + (eatanconst[2] * pow(inputvalue, 5)) - + (eatanconst[3] * pow(inputvalue, 7)) + (eatanconst[4] * pow(inputvalue, 9)) + (eatanconst[5] * pow(inputvalue, 11)) - + (eatanconst[6] * pow(inputvalue, 13)) + (eatanconst[7] * pow(inputvalue, 15)); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + { + PS2Float p = PS2Float(inputvalue); - result += eatanconst[8]; + return PS2Float(eatanconst[0]).Mul(p) + .Add(PS2Float(eatanconst[1]).Mul(p.Pow(3))) + .Add(PS2Float(eatanconst[2]).Mul(p.Pow(5))) + .Add(PS2Float(eatanconst[3]).Mul(p.Pow(7))) + .Add(PS2Float(eatanconst[4]).Mul(p.Pow(9))) + .Add(PS2Float(eatanconst[5]).Mul(p.Pow(11))) + .Add(PS2Float(eatanconst[6]).Mul(p.Pow(13))) + .Add(PS2Float(eatanconst[7]).Mul(p.Pow(15))) + .Add(PS2Float(eatanconst[8])).raw; + } + else + { + float fvalue = vuDouble(inputvalue); - result = vuDouble(*(u32*)&result); + float result = (eatanconst[0] * fvalue) + (eatanconst[1] * pow(fvalue, 3)) + (eatanconst[2] * pow(fvalue, 5)) + (eatanconst[3] * pow(fvalue, 7)) + (eatanconst[4] * pow(fvalue, 9)) + (eatanconst[5] * pow(fvalue, 11)) + (eatanconst[6] * pow(fvalue, 13)) + (eatanconst[7] * pow(fvalue, 15)); - return result; + result += eatanconst[8]; + + result = vuDouble(*(u32*)&result); + + return std::bit_cast(result); + } } static __ri void _vuEATAN(VURegs* VU) { - float p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].UL[_Fsf_])); - VU->p.F = p; + VU->p.UL = _vuCalculateEATAN(VU, VU->VF[_Fs_].UL[_Fsf_]); } static __ri void _vuEATANxy(VURegs* VU) { - float p = 0; - if (vuDouble(VU->VF[_Fs_].i.x) != 0) + if (!PS2Float(VU->VF[_Fs_].i.x).IsZero()) { - p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.y) / vuDouble(VU->VF[_Fs_].i.x)); + VU->p.UL = _vuCalculateEATAN(VU, vuAccurateDiv(VU, VU->VF[_Fs_].i.y, VU->VF[_Fs_].i.x)); + } + else + { + VU->p.UL = PS2Float(0).raw; } - VU->p.F = p; } static __ri void _vuEATANxz(VURegs* VU) { - float p = 0; - if (vuDouble(VU->VF[_Fs_].i.x) != 0) + if (!PS2Float(VU->VF[_Fs_].i.x).IsZero()) { - p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.z) / vuDouble(VU->VF[_Fs_].i.x)); + VU->p.UL = _vuCalculateEATAN(VU, vuAccurateDiv(VU, VU->VF[_Fs_].i.z, VU->VF[_Fs_].i.x)); + } + else + { + VU->p.UL = PS2Float(0).raw; } - VU->p.F = p; } static __ri void _vuESUM(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Fs_].i.w); - VU->p.F = p; + VU->p.UL = vuAccurateAdd(VU, vuAccurateAdd(VU, vuAccurateAdd(VU, VU->VF[_Fs_].i.x, VU->VF[_Fs_].i.y), VU->VF[_Fs_].i.z), VU->VF[_Fs_].i.w); } static __ri void _vuERCPR(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + PS2Float p = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); - if (p != 0) + if (!p.IsZero()) { - p = 1.0 / p; + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + p = PS2Float::One().Div(p); + } + else + { + VU->p.F = 1.0 / vuDouble(p.raw); + return; + } } - VU->p.F = p; + VU->p.UL = p.raw; } static __ri void _vuESQRT(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - if (p >= 0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); - } + PS2Float value = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + if (value.ToDouble() >= 0) + { + value = value.Sqrt(); + } + + VU->p.UL = value.raw; + } + else + { + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + + if (p >= 0) + { + p = sqrt(p); + } + + VU->p.F = p; + } } static __ri void _vuERSQRT(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - if (p >= 0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { - p = sqrt(p); - if (p) - { - p = 1.0f / p; - } - } + PS2Float value = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + if (value.ToDouble() >= 0) + { + value = value.Sqrt(); + if (!value.IsZero()) + { + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + VU->p.F = 1.0f / vuDouble(value.raw); + return; + } + else + { + value = PS2Float::One().Div(value); + } + } + } + + VU->p.UL = value.raw; + } + else + { + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + + if (p >= 0) + { + p = sqrt(p); + if (p) + { + p = 1.0f / p; + } + } + + VU->p.F = p; + } } static __ri void _vuESIN(VURegs* VU) { float sinconsts[5] = {1.0f, -0.166666567325592f, 0.008333025500178f, -0.000198074136279f, 0.000002601886990f}; - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - p = (sinconsts[0] * p) + (sinconsts[1] * pow(p, 3)) + (sinconsts[2] * pow(p, 5)) + (sinconsts[3] * pow(p, 7)) + (sinconsts[4] * pow(p, 9)); - VU->p.F = vuDouble(*(u32*)&p); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + { + PS2Float p = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); + + VU->p.UL = PS2Float(sinconsts[0]).Mul(p).Add(PS2Float(sinconsts[1]).Mul(p.Pow(3))).Add(PS2Float(sinconsts[2]).Mul(p.Pow(5))).Add(PS2Float(sinconsts[3]).Mul(p.Pow(7))).Add(PS2Float(sinconsts[4]).Mul(p.Pow(9))).raw; + } + else + { + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + + p = (sinconsts[0] * p) + (sinconsts[1] * pow(p, 3)) + (sinconsts[2] * pow(p, 5)) + (sinconsts[3] * pow(p, 7)) + (sinconsts[4] * pow(p, 9)); + VU->p.F = vuDouble(*(u32*)&p); + } } static __ri void _vuEEXP(VURegs* VU) { float consts[6] = {0.249998688697815f, 0.031257584691048f, 0.002591371303424f, 0.000171562001924f, 0.000005430199963f, 0.000000690600018f}; - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - p = 1.0f + (consts[0] * p) + (consts[1] * pow(p, 2)) + (consts[2] * pow(p, 3)) + (consts[3] * pow(p, 4)) + (consts[4] * pow(p, 5)) + (consts[5] * pow(p, 6)); - p = pow(p, 4); - p = vuDouble(*(u32*)&p); - p = 1 / p; + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + { + PS2Float p = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + VU->p.UL = PS2Float::One().Div(PS2Float::One() + .Add(PS2Float(consts[0]).Mul(p)) + .Add(PS2Float(consts[1]).Mul(p.Pow(2))) + .Add(PS2Float(consts[2]).Mul(p.Pow(3))) + .Add(PS2Float(consts[3]).Mul(p.Pow(4))) + .Add(PS2Float(consts[4]).Mul(p.Pow(5))) + .Add(PS2Float(consts[5]).Mul(p.Pow(6))) + .Pow(4)).raw; + } + else + { + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + + p = 1.0f + (consts[0] * p) + (consts[1] * pow(p, 2)) + (consts[2] * pow(p, 3)) + (consts[3] * pow(p, 4)) + (consts[4] * pow(p, 5)) + (consts[5] * pow(p, 6)); + p = pow(p, 4); + p = vuDouble(*(u32*)&p); + p = 1 / p; + + VU->p.F = p; + } } static __ri void _vuXITOP(VURegs* VU) diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index 1d2efe083b..ca31dee902 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -201,6 +201,7 @@ + @@ -281,6 +282,7 @@ + @@ -645,6 +647,7 @@ + @@ -726,6 +729,7 @@ + @@ -1025,4 +1029,4 @@ - + \ No newline at end of file diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index e72297d4b7..902d75bf51 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -289,6 +289,9 @@ {cd8ec519-2196-43f7-86de-7faced2d4296} + + {9a40984b-cb23-4a54-a5e9-9c54f3c16c5b} + @@ -1443,6 +1446,12 @@ System\Ps2\Iop\SIO\PAD + + System\Ps2\EmotionEngine\Shared + + + System\Ps2\EmotionEngine\Shared + @@ -2399,6 +2408,12 @@ System\Ps2\Iop\SIO\PAD + + System\Ps2\EmotionEngine\Shared + + + System\Ps2\EmotionEngine\Shared + @@ -2428,4 +2443,4 @@ System\Ps2\GS - + \ No newline at end of file