From 6383a3a14761dda4eb4adbfe03fb9721e9df1c97 Mon Sep 17 00:00:00 2001 From: GitHubProUser67 <127040195+GitHubProUser67@users.noreply.github.com> Date: Sun, 20 Apr 2025 11:54:44 +0200 Subject: [PATCH] Core: Interpreter Implementation of PS2's floating point unit specification. This commit implements software floating points support in PCSX2's interpreters. This work is a combination or several efforts and researches done prior. Credits: - https://www.gregorygaines.com/blog/emulating-ps2-floating-point-nums-ieee-754-diffs-part-1/ - https://github.com/GitHubProUser67/MultiServer3/tree/main/BackendServices/PS2FloatLibrary - https://github.com/Goatman13/pcsx2/tree/accurate_int_add_sub - PCSX2 Team for their help and support in this massive journey. --- common/BitUtils.h | 18 + pcsx2-qt/Settings/AdvancedSettingsWidget.cpp | 10 + pcsx2-qt/Settings/AdvancedSettingsWidget.ui | 294 ++++-- pcsx2/CMakeLists.txt | 2 + pcsx2/FPU.cpp | 333 +++++-- pcsx2/PS2Float.cpp | 965 +++++++++++++++++++ pcsx2/PS2Float.h | 123 +++ pcsx2/Pcsx2Config.cpp | 13 + pcsx2/VU.h | 4 +- pcsx2/VUflags.cpp | 51 +- pcsx2/VUflags.h | 6 + pcsx2/VUops.cpp | 702 ++++++++++---- pcsx2/pcsx2.vcxproj | 4 +- pcsx2/pcsx2.vcxproj.filters | 11 +- 14 files changed, 2179 insertions(+), 357 deletions(-) create mode 100644 pcsx2/PS2Float.cpp create mode 100644 pcsx2/PS2Float.h diff --git a/common/BitUtils.h b/common/BitUtils.h index 4d12ba2b07..536b6a5f16 100644 --- a/common/BitUtils.h +++ b/common/BitUtils.h @@ -28,6 +28,19 @@ static inline int _BitScanReverse(unsigned long* const Index, const unsigned lon namespace Common { + static constexpr s8 msb[256] = { + -1, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}; + + static constexpr s32 normalizeAmounts[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 8, 8, 8, 8, 8, 8, 16, 16, 16, 16, 16, 16, 16, 16, 24, 24, 24, 24, 24, 24, 24}; + template static constexpr __fi bool IsAligned(T value, unsigned int alignment) { @@ -71,6 +84,11 @@ namespace Common return Common::AlignUpPow2(size, __pagesize); } + __fi static s32 BitScanReverse8(s32 b) + { + return msb[b]; + } + __fi static u32 CountLeadingSignBits(s32 n) { // If the sign bit is 1, we invert the bits to 0 for count-leading-zero. diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp b/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp index 6fe70187a2..96f5524a64 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.cpp @@ -48,6 +48,16 @@ AdvancedSettingsWidget::AdvancedSettingsWidget(SettingsWindow* dialog, QWidget* connect(m_ui.vu0ClampMode, QOverload::of(&QComboBox::currentIndexChanged), [this](int index) { setClampingMode(0, index); }); connect(m_ui.vu1ClampMode, QOverload::of(&QComboBox::currentIndexChanged), [this](int index) { setClampingMode(1, index); }); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftAddSub, "EmuCore/CPU/Recompiler", "fpuSoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftMulDiv, "EmuCore/CPU/Recompiler", "fpuSoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.eeSoftSqrt, "EmuCore/CPU/Recompiler", "fpuSoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftAddSub, "EmuCore/CPU/Recompiler", "vu0SoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftMulDiv, "EmuCore/CPU/Recompiler", "vu0SoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu0SoftSqrt, "EmuCore/CPU/Recompiler", "vu0SoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftAddSub, "EmuCore/CPU/Recompiler", "vu1SoftAddSub", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftMulDiv, "EmuCore/CPU/Recompiler", "vu1SoftMulDiv", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.vu1SoftSqrt, "EmuCore/CPU/Recompiler", "vu1SoftSqrt", false); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.iopRecompiler, "EmuCore/CPU/Recompiler", "EnableIOP", true); SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.gameFixes, "EmuCore", "EnableGameFixes", true); diff --git a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui index 197fda8c7c..2094043473 100644 --- a/pcsx2-qt/Settings/AdvancedSettingsWidget.ui +++ b/pcsx2-qt/Settings/AdvancedSettingsWidget.ui @@ -32,9 +32,9 @@ 0 - -447 + 0 790 - 1049 + 1317 @@ -94,10 +94,10 @@ - - + + - Division Rounding Mode: + Clamping Mode: @@ -125,38 +125,7 @@ - - - - Clamping Mode: - - - - - - - - None - - - - - Normal (Default) - - - - - Extra + Preserve Sign - - - - - Full - - - - - + @@ -209,6 +178,67 @@ + + + + + None + + + + + Normal (Default) + + + + + Extra + Preserve Sign + + + + + Full + + + + + + + + Division Rounding Mode: + + + + + + + Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Square Root + + + + + + @@ -218,7 +248,7 @@ Vector Units (VU) - + VU1 Rounding Mode: @@ -249,7 +279,129 @@ - + + + + VU1 Clamping Mode: + + + + + + + VU0 Rounding Mode: + + + + + + + VU1 Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Float Square Root + + + + + + + + + + VU0 Software Float + + + + + + Multiplication/Division + + + + + + + Addition/Subtraction + + + + + + + Square Root + + + + + + + + + + + Nearest + + + + + Negative + + + + + Positive + + + + + Chop/Zero (Default) + + + + + + + + + None + + + + + Normal (Default) + + + + + Extra + + + + + Extra + Preserve Sign + + + + + @@ -281,30 +433,6 @@ - - - - - None - - - - - Normal (Default) - - - - - Extra - - - - - Extra + Preserve Sign - - - - @@ -312,45 +440,7 @@ - - - - VU0 Rounding Mode: - - - - - - - VU1 Clamping Mode: - - - - - - - - Nearest - - - - - Negative - - - - - Positive - - - - - Chop/Zero (Default) - - - - - + diff --git a/pcsx2/CMakeLists.txt b/pcsx2/CMakeLists.txt index 20560112e5..cb1132f127 100644 --- a/pcsx2/CMakeLists.txt +++ b/pcsx2/CMakeLists.txt @@ -93,6 +93,7 @@ set(pcsx2Sources MTGS.cpp MTVU.cpp Patch.cpp + PS2Float.cpp Pcsx2Config.cpp PerformanceMetrics.cpp PrecompiledHeader.cpp @@ -173,6 +174,7 @@ set(pcsx2Headers MTVU.h Memory.h MemoryTypes.h + PS2Float.h Patch.h PerformanceMetrics.h PrecompiledHeader.h diff --git a/pcsx2/FPU.cpp b/pcsx2/FPU.cpp index f8c5a74d44..5be32e5de8 100644 --- a/pcsx2/FPU.cpp +++ b/pcsx2/FPU.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" +#include "PS2Float.h" #include @@ -89,6 +90,19 @@ bool checkUnderflow(u32& xReg, u32 cFlagsToSet) { return false; } +bool checkOverflowUnderfowSoft(PS2Float xReg, u32 cFlagsToSet, bool oflw) +{ + if ((oflw && xReg.of) || (!oflw && xReg.uf)) + { + _ContVal_ |= (cFlagsToSet); + return true; + } + else if (cFlagsToSet & FPUflagO) + _ContVal_ &= oflw ? ~FPUflagO : ~FPUflagU; + + return false; +} + __fi u32 fp_max(u32 a, u32 b) { return ((s32)a < 0 && (s32)b < 0) ? std::min(a, b) : std::max(a, b); @@ -115,6 +129,22 @@ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsT return false; } +bool checkDivideByZeroInvalidSoft(PS2Float xReg, u32 cFlagsToSet1, u32 cFlagsToSet2) +{ + if (xReg.dz) + { + _ContVal_ |= cFlagsToSet1; + return true; + } + else if (xReg.iv) + { + _ContVal_ |= cFlagsToSet2; + return true; + } + + return false; +} + /* Clears the "Cause Flags" of the Control/Status Reg The "EE Core Users Manual" implies that all the Cause flags are cleared every instruction... But, the "EE Core Instruction Set Manual" says that only certain Cause Flags are cleared @@ -138,7 +168,7 @@ bool checkDivideByZero(u32& xReg, u32 yDivisorReg, u32 zDividendReg, u32 cFlagsT #else // Used for Comparing; This compares if the floats are exactly the same. #define C_cond_S(cond) { \ - _ContVal_ = ( fpuDouble(_FsValUl_) cond fpuDouble(_FtValUl_) ) ? \ + _ContVal_ = (fpuCompareFull(_FsValUl_) cond fpuCompareFull(_FtValUl_)) ? \ ( _ContVal_ | FPUflagC ) : \ ( _ContVal_ & ~FPUflagC ); \ } @@ -182,21 +212,81 @@ float fpuDouble(u32 f) } } +static s32 fpuCompareFull(u32 f) +{ + if (!(f & 0x7f800000)) + f = 0; + // If f is negative, flip the non-sign bits so integer compares work like fp compares + if (f & 0x80000000) + f ^= 0x7fffffff; + return static_cast(f); +} + +static __fi PS2Float fpuAccurateAdd(u32 a, u32 b) +{ + return PS2Float(a).Add(PS2Float(b)); +} + +static __fi PS2Float fpuAccurateSub(u32 a, u32 b) +{ + return PS2Float(a).Sub(PS2Float(b)); +} + +static __fi PS2Float fpuAccurateMul(u32 a, u32 b) +{ + return PS2Float(a).Mul(PS2Float(b)); +} + +static __fi PS2Float fpuAccurateDiv(u32 a, u32 b) +{ + return PS2Float(a).Div(PS2Float(b)); +} + +static __fi PS2Float fpuAccurateMulAdd(u32 a, u32 b, u32 c) +{ + return PS2Float(a).MulAdd(PS2Float(b), PS2Float(c)); +} + +static __fi PS2Float fpuAccurateMulSub(u32 a, u32 b, u32 c) +{ + return PS2Float(a).MulSub(PS2Float(b), PS2Float(c)); +} + void ABS_S() { _FdValUl_ = _FsValUl_ & 0x7fffffff; clearFPUFlags( FPUflagO | FPUflagU ); } void ADD_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); - if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB) + { + PS2Float addres = fpuAccurateAdd(_FsValUl_, _FtValUl_); + _FdValUl_ = addres.raw; + if (checkOverflowUnderfowSoft(addres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderfowSoft(addres, FPUflagU | FPUflagSU, false); + } + else + { + _FdValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); + if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + } } void ADDA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); - if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB) + { + PS2Float addres = fpuAccurateAdd(_FsValUl_, _FtValUl_); + _FAValUl_ = addres.raw; + if (checkOverflowUnderfowSoft(addres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderfowSoft(addres, FPUflagU | FPUflagSU, false); + } + else + { + _FAValf_ = fpuDouble( _FsValUl_ ) + fpuDouble( _FtValUl_ ); + if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + } } void BC1F() { @@ -248,39 +338,75 @@ void CTC1() { } void CVT_S() { - _FdValf_ = (float)_FsValSl_; - _FdValf_ = fpuDouble( _FdValUl_ ); + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { _FdValUl_ = PS2Float::Itof(0, _FsValSl_).raw; } + else + { + _FdValf_ = (float)_FsValSl_; + _FdValf_ = fpuDouble(_FdValUl_); + } } void CVT_W() { - if ( ( _FsValUl_ & 0x7F800000 ) <= 0x4E800000 ) { _FdValSl_ = (s32)_FsValf_; } + if (CHECK_FPU_SOFT_ADDSUB || CHECK_FPU_SOFT_MULDIV || CHECK_FPU_SOFT_SQRT) { _FdValSl_ = PS2Float::Ftoi(0, _FsValUl_); } + else if ( ( _FsValUl_ & 0x7F800000 ) <= 0x4E800000 ) { _FdValSl_ = (s32)_FsValf_; } else if ( ( _FsValUl_ & 0x80000000 ) == 0 ) { _FdValUl_ = 0x7fffffff; } else { _FdValUl_ = 0x80000000; } } void DIV_S() { - if (checkDivideByZero( _FdValUl_, _FtValUl_, _FsValUl_, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; - _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( _FtValUl_ ); - if (checkOverflow( _FdValUl_, 0)) return; - checkUnderflow( _FdValUl_, 0); + if (CHECK_FPU_SOFT_MULDIV) + { + PS2Float divres = fpuAccurateDiv(_FsValUl_, _FtValUl_); + _FdValUl_ = divres.raw; + if (checkDivideByZeroInvalidSoft(divres, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; + if (checkOverflowUnderfowSoft(divres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderfowSoft(divres, FPUflagU | FPUflagSU, false); + } + else + { + if (checkDivideByZero( _FdValUl_, _FtValUl_, _FsValUl_, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; + _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( _FtValUl_ ); + if (checkOverflow( _FdValUl_, 0)) return; + checkUnderflow( _FdValUl_, 0); + } } -/* The Instruction Set manual has an overly complicated way of +/* The Instruction Set manual has an overflow like way of determining the flags that are set. Hopefully this shorter method provides a similar outcome and is faster. (cottonvibes) */ void MADD_S() { - FPRreg temp; - temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - _FdValf_ = fpuDouble( _FAValUl_ ) + fpuDouble( temp.UL ); - if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MULDIV) + { + PS2Float fmacres = fpuAccurateMulAdd(_FAValUl_, _FsValUl_, _FtValUl_); + _FdValUl_ = fmacres.raw; + if (checkOverflowUnderfowSoft(fmacres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderfowSoft(fmacres, FPUflagU | FPUflagSU, false); + } + else + { + FPRreg temp; + temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FdValf_ = fpuDouble( _FAValUl_ ) + fpuDouble( temp.UL ); + if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + } } void MADDA_S() { - _FAValf_ += fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MULDIV) + { + PS2Float fmacres = fpuAccurateMulAdd(_FAValUl_, _FsValUl_, _FtValUl_); + _FAValUl_ = fmacres.raw; + if (checkOverflowUnderfowSoft(fmacres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderfowSoft(fmacres, FPUflagU | FPUflagSU, false); + } + else + { + _FAValf_ += fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + } } void MAX_S() { @@ -303,17 +429,37 @@ void MOV_S() { } void MSUB_S() { - FPRreg temp; - temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - _FdValf_ = fpuDouble( _FAValUl_ ) - fpuDouble( temp.UL ); - if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MULDIV) + { + PS2Float fmacres = fpuAccurateMulSub(_FAValUl_, _FsValUl_, _FtValUl_); + _FdValUl_ = fmacres.raw; + if (checkOverflowUnderfowSoft(fmacres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderfowSoft(fmacres, FPUflagU | FPUflagSU, false); + } + else + { + FPRreg temp; + temp.f = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + _FdValf_ = fpuDouble( _FAValUl_ ) - fpuDouble( temp.UL ); + if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + } } void MSUBA_S() { - _FAValf_ -= fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB && CHECK_FPU_SOFT_MULDIV) + { + PS2Float fmacres = fpuAccurateMulSub(_FAValUl_, _FsValUl_, _FtValUl_); + _FAValUl_ = fmacres.raw; + if (checkOverflowUnderfowSoft(fmacres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderfowSoft(fmacres, FPUflagU | FPUflagSU, false); + } + else + { + _FAValf_ -= fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + } } void MTC1() { @@ -321,15 +467,35 @@ void MTC1() { } void MUL_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_MULDIV) + { + PS2Float mulres = fpuAccurateMul(_FsValUl_, _FtValUl_); + _FdValUl_ = mulres.raw; + if (checkOverflowUnderfowSoft(mulres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderfowSoft(mulres, FPUflagU | FPUflagSU, false); + } + else + { + _FdValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + } } void MULA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); - if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_MULDIV) + { + PS2Float mulres = fpuAccurateMul(_FsValUl_, _FtValUl_); + _FAValUl_ = mulres.raw; + if (checkOverflowUnderfowSoft(mulres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderfowSoft(mulres, FPUflagU | FPUflagSU, false); + } + else + { + _FAValf_ = fpuDouble( _FsValUl_ ) * fpuDouble( _FtValUl_ ); + if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + } } void NEG_S() { @@ -338,47 +504,90 @@ void NEG_S() { } void RSQRT_S() { - FPRreg temp; clearFPUFlags(FPUflagD | FPUflagI); - if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) { // Ft is zero (Denormals are Zero) - _ContVal_ |= FPUflagD | FPUflagSD; - _FdValUl_ = ( _FtValUl_ & 0x80000000 ) | posFmax; - return; + if (CHECK_FPU_SOFT_SQRT) + { + PS2Float rsqrtres = PS2Float(_FsValUl_).Rsqrt(_FtValUl_); + _FdValUl_ = rsqrtres.raw; + if (checkDivideByZeroInvalidSoft(rsqrtres, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; + if (checkOverflowUnderfowSoft(rsqrtres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderfowSoft(rsqrtres, FPUflagU | FPUflagSU, false); } - else if ( _FtValUl_ & 0x80000000 ) { // Ft is negative - _ContVal_ |= FPUflagI | FPUflagSI; - temp.f = sqrt( fabs( fpuDouble( _FtValUl_ ) ) ); - _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( temp.UL ); - } - else { _FdValf_ = fpuDouble( _FsValUl_ ) / sqrt( fpuDouble( _FtValUl_ ) ); } // Ft is positive and not zero + else + { + FPRreg temp; - if (checkOverflow( _FdValUl_, 0)) return; - checkUnderflow( _FdValUl_, 0); + if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) { // Ft is zero (Denormals are Zero) + _ContVal_ |= FPUflagD | FPUflagSD; + _FdValUl_ = ( _FtValUl_ & 0x80000000 ) | posFmax; + return; + } + else if ( _FtValUl_ & 0x80000000 ) { // Ft is negative + _ContVal_ |= FPUflagI | FPUflagSI; + temp.f = sqrt( fabs( fpuDouble( _FtValUl_ ) ) ); + _FdValf_ = fpuDouble( _FsValUl_ ) / fpuDouble( temp.UL ); + } + else { _FdValf_ = fpuDouble( _FsValUl_ ) / sqrt( fpuDouble( _FtValUl_ ) ); } // Ft is positive and not zero + + if (checkOverflow( _FdValUl_, 0)) return; + checkUnderflow( _FdValUl_, 0); + } } void SQRT_S() { clearFPUFlags(FPUflagI | FPUflagD); - if ( ( _FtValUl_ & 0x7F800000 ) == 0 ) // If Ft = +/-0 - _FdValUl_ = _FtValUl_ & 0x80000000;// result is 0 - else if ( _FtValUl_ & 0x80000000 ) { // If Ft is Negative + if (CHECK_FPU_SOFT_SQRT) + { + PS2Float sqrtres = PS2Float(_FtValUl_).Sqrt(); + _FdValUl_ = sqrtres.raw; + if (checkDivideByZeroInvalidSoft(sqrtres, FPUflagD | FPUflagSD, FPUflagI | FPUflagSI)) return; + if (checkOverflowUnderfowSoft(sqrtres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderfowSoft(sqrtres, FPUflagU | FPUflagSU, false); + } + else if ((_FtValUl_ & 0x7F800000) == 0) // If Ft = +/-0 + _FdValUl_ = _FtValUl_ & 0x80000000; // result is 0 + else if (_FtValUl_ & 0x80000000) + { + // If Ft is Negative _ContVal_ |= FPUflagI | FPUflagSI; - _FdValf_ = sqrt( fabs( fpuDouble( _FtValUl_ ) ) ); - } else - _FdValf_ = sqrt( fpuDouble( _FtValUl_ ) ); // If Ft is Positive + _FdValf_ = sqrt(fabs(fpuDouble(_FtValUl_))); + } + else + _FdValf_ = sqrt(fpuDouble(_FtValUl_)); // If Ft is Positive } void SUB_S() { - _FdValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); - if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB) + { + PS2Float subres = fpuAccurateSub(_FsValUl_, _FtValUl_); + _FdValUl_ = subres.raw; + if (checkOverflowUnderfowSoft(subres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderfowSoft(subres, FPUflagU | FPUflagSU, false); + } + else + { + _FdValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); + if (checkOverflow( _FdValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FdValUl_, FPUflagU | FPUflagSU); + } } void SUBA_S() { - _FAValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); - if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; - checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + if (CHECK_FPU_SOFT_ADDSUB) + { + PS2Float subres = fpuAccurateSub(_FsValUl_, _FtValUl_); + _FAValUl_ = subres.raw; + if (checkOverflowUnderfowSoft(subres, FPUflagO | FPUflagSO, true)) return; + checkOverflowUnderfowSoft(subres, FPUflagU | FPUflagSU, false); + } + else + { + _FAValf_ = fpuDouble( _FsValUl_ ) - fpuDouble( _FtValUl_ ); + if (checkOverflow( _FAValUl_, FPUflagO | FPUflagSO)) return; + checkUnderflow( _FAValUl_, FPUflagU | FPUflagSU); + } } } // End Namespace COP1 diff --git a/pcsx2/PS2Float.cpp b/pcsx2/PS2Float.cpp new file mode 100644 index 0000000000..84cc27e904 --- /dev/null +++ b/pcsx2/PS2Float.cpp @@ -0,0 +1,965 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#include +#include +#include +#include +#include +#include +#include +#include "common/Pcsx2Defs.h" +#include "common/BitUtils.h" +#include "PS2Float.h" +#include "Common.h" + +//**************************************************************** +// Radix Divisor +// Algorithm reference: DOI 10.1109/ARITH.1995.465363 +//**************************************************************** + +struct CSAResult +{ + u32 sum; + u32 carry; +}; + +static struct CSAResult CSA(u32 a, u32 b, u32 c) +{ + u32 u = a ^ b; + u32 h = (a & b) | (u & c); + u32 l = u ^ c; + return {l, h << 1}; +} + +static s32 quotientSelect(struct CSAResult current) +{ + // Note: Decimal point is between bits 24 and 25 + u32 mask = (1 << 24) - 1; // Bit 23 needs to be or'd in instead of added + s32 test = ((current.sum & ~mask) + current.carry) | (current.sum & mask); + if (test >= 1 << 23) + { // test >= 0.25 + return 1; + } + else if (test < (s32)(~0u << 24)) + { // test < -0.5 + return -1; + } + else + { + return 0; + } +} + +static u32 mantissa(u32 x) +{ + return (x & 0x7fffff) | 0x800000; +} + +static u32 exponent(u32 x) +{ + return (x >> 23) & 0xff; +} + +//**************************************************************** +// Booth Multiplier +//**************************************************************** + +struct BoothRecode +{ + u32 data; + u32 negate; +}; + +struct AddResult +{ + u32 lo; + u32 hi; +}; + +static BoothRecode Booth(u32 a, u32 b, u32 bit) +{ + u32 test = (bit ? b >> (bit * 2 - 1) : b << 1) & 7; + a <<= (bit * 2); + a += (test == 3 || test == 4) ? a : 0; + u32 neg = (test >= 4 && test <= 6) ? ~0u : 0; + u32 pos = 1 << (bit * 2); + a ^= (neg & -pos); + a &= (test >= 1 && test <= 6) ? ~0u : 0; + return {a, neg & pos}; +} + +static AddResult Add3(u32 a, u32 b, u32 c) +{ + u32 u = a ^ b; + return {u ^ c, ((u & c) | (a & b)) << 1}; +} + +static u64 MulMantissa(u32 a, u32 b) +{ + u64 full = static_cast(a) * static_cast(b); + BoothRecode b0 = Booth(a, b, 0); + BoothRecode b1 = Booth(a, b, 1); + BoothRecode b2 = Booth(a, b, 2); + BoothRecode b3 = Booth(a, b, 3); + BoothRecode b4 = Booth(a, b, 4); + BoothRecode b5 = Booth(a, b, 5); + BoothRecode b6 = Booth(a, b, 6); + BoothRecode b7 = Booth(a, b, 7); + + // First cycle + AddResult t0 = Add3(b1.data, b2.data, b3.data); + AddResult t1 = Add3(b4.data & ~0x7ffu, b5.data & ~0xfffu, b6.data); + // A few adds get skipped, squeeze them back in + t1.hi |= b6.negate | (b5.data & 0x800); + b7.data |= (b5.data & 0x400) + b5.negate; + + // Second cycle + AddResult t2 = Add3(b0.data, t0.lo, t0.hi); + AddResult t3 = Add3(b7.data, t1.lo, t1.hi); + + // Third cycle + AddResult t4 = Add3(t2.hi, t3.lo, t3.hi); + + // Fourth cycle + AddResult t5 = Add3(t2.lo, t4.lo, t4.hi); + + // Discard bits and sum + t5.hi += b7.negate; + t5.lo &= ~0x7fffu; + t5.hi &= ~0x7fffu; + u32 ps2lo = t5.lo + t5.hi; + return full - ((ps2lo ^ full) & 0x8000); +} + +//**************************************************************** +// Float Processor +//**************************************************************** + +PS2Float::PS2Float(s32 value) { raw = (u32)value; } + +PS2Float::PS2Float(u32 value) { raw = value; } + +PS2Float::PS2Float(float value) { raw = std::bit_cast(value); } + +PS2Float::PS2Float(bool sign, u8 exponent, u32 mantissa) +{ + raw = 0; + raw |= (sign ? 1u : 0u) << 31; + raw |= (u32)(exponent << MANTISSA_BITS); + raw |= mantissa & 0x7FFFFF; +} + +PS2Float PS2Float::Max() +{ + return PS2Float(MAX_FLOATING_POINT_VALUE); +} + +PS2Float PS2Float::Min() +{ + return PS2Float(MIN_FLOATING_POINT_VALUE); +} + +PS2Float PS2Float::One() +{ + return PS2Float(ONE); +} + +PS2Float PS2Float::MinOne() +{ + return PS2Float(MIN_ONE); +} + +PS2Float PS2Float::Add(PS2Float addend) +{ + if (IsDenormalized() || addend.IsDenormalized()) + { + bool sign = DetermineAdditionOperationSign(*this, addend); + + if (IsDenormalized() && !addend.IsDenormalized()) + return PS2Float(sign, addend.Exponent(), addend.Mantissa()); + else if (!IsDenormalized() && addend.IsDenormalized()) + return PS2Float(sign, Exponent(), Mantissa()); + else if (IsDenormalized() && addend.IsDenormalized()) + return PS2Float(sign, 0, 0); + else + Console.Error("Both numbers are not denormalized"); + + return PS2Float(0); + } + + u32 a = raw; + u32 b = addend.raw; + + //exponent difference + s32 exp_diff = Exponent() - addend.Exponent(); + + //diff = 1 .. 24, expt < expd + if (exp_diff > 0 && exp_diff < 25) + { + exp_diff = exp_diff - 1; + b = (MIN_FLOATING_POINT_VALUE << exp_diff) & b; + } + + //diff = -24 .. -1 , expd < expt + else if (exp_diff < 0 && exp_diff > -25) + { + exp_diff = -exp_diff; + exp_diff = exp_diff - 1; + a = a & (MIN_FLOATING_POINT_VALUE << exp_diff); + } + + return PS2Float(a).DoAdd(PS2Float(b)); +} + +PS2Float PS2Float::Sub(PS2Float subtrahend) +{ + if (IsDenormalized() || subtrahend.IsDenormalized()) + { + bool sign = DetermineSubtractionOperationSign(*this, subtrahend); + + if (IsDenormalized() && !subtrahend.IsDenormalized()) + return PS2Float(sign, subtrahend.Exponent(), subtrahend.Mantissa()); + else if (!IsDenormalized() && subtrahend.IsDenormalized()) + return PS2Float(sign, Exponent(), Mantissa()); + else if (IsDenormalized() && subtrahend.IsDenormalized()) + return PS2Float(sign, 0, 0); + else + Console.Error("Both numbers are not denormalized"); + + return PS2Float(0); + } + + u32 a = raw; + u32 b = subtrahend.raw; + + //exponent difference + s32 exp_diff = Exponent() - subtrahend.Exponent(); + + //diff = 1 .. 24, expt < expd + if (exp_diff > 0 && exp_diff < 25) + { + exp_diff = exp_diff - 1; + b = (MIN_FLOATING_POINT_VALUE << exp_diff) & b; + } + + //diff = -24 .. -1 , expd < expt + else if (exp_diff < 0 && exp_diff > -25) + { + exp_diff = -exp_diff; + exp_diff = exp_diff - 1; + a = a & (MIN_FLOATING_POINT_VALUE << exp_diff); + } + + return PS2Float(a).DoAdd(PS2Float(b).Negate()); +} + +PS2Float PS2Float::Mul(PS2Float mulend) +{ + if (IsDenormalized() || mulend.IsDenormalized() || IsZero() || mulend.IsZero()) + return PS2Float(DetermineMultiplicationDivisionOperationSign(*this, mulend), 0, 0); + + return DoMul(mulend); +} + +PS2Float PS2Float::MulAdd(PS2Float opsend, PS2Float optend) +{ + PS2Float mulres = opsend.Mul(optend); + PS2Float addres = Add(mulres); + u32 rawres = addres.raw; + bool oflw = addres.of; + bool uflw = addres.uf; + DetermineMacException(3, raw, of, mulres.of, mulres.Sign() ? 1 : 0, rawres, oflw, uflw); + PS2Float result = PS2Float(rawres); + result.of = oflw; + result.uf = uflw; + return result; +} + +PS2Float PS2Float::MulAddAcc(PS2Float opsend, PS2Float optend) +{ + PS2Float mulres = opsend.Mul(optend); + PS2Float addres = Add(mulres); + u32 rawres = addres.raw; + bool oflw = addres.of; + bool uflw = addres.uf; + DetermineMacException(8, raw, of, mulres.of, mulres.Sign() ? 1 : 0, rawres, oflw, uflw); + raw = rawres; + of = oflw; + PS2Float result = PS2Float(rawres); + result.of = oflw; + result.uf = uflw; + return result; +} + +PS2Float PS2Float::MulSub(PS2Float opsend, PS2Float optend) +{ + PS2Float mulres = opsend.Mul(optend); + PS2Float subres = Sub(mulres); + u32 rawres = subres.raw; + bool oflw = subres.of; + bool uflw = subres.uf; + DetermineMacException(4, raw, of, mulres.of, mulres.Sign() ? 1 : 0, rawres, oflw, uflw); + PS2Float result = PS2Float(rawres); + result.of = oflw; + result.uf = uflw; + return result; +} + +PS2Float PS2Float::MulSubAcc(PS2Float opsend, PS2Float optend) +{ + PS2Float mulres = opsend.Mul(optend); + PS2Float subres = Sub(mulres); + u32 rawres = subres.raw; + bool oflw = subres.of; + bool uflw = subres.uf; + DetermineMacException(9, raw, of, mulres.of, mulres.Sign() ? 1 : 0, rawres, oflw, uflw); + raw = rawres; + of = oflw; + PS2Float result = PS2Float(rawres); + result.of = oflw; + result.uf = uflw; + return result; +} + +PS2Float PS2Float::Div(PS2Float divend) +{ + u32 a = raw; + u32 b = divend.raw; + if (((a & 0x7F800000) == 0) && ((b & 0x7F800000) != 0)) + { + u32 floatResult = 0; + floatResult &= PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult |= (u32)(((s32)(b >> 31) != (s32)(a >> 31)) ? 1 : 0 & 1) << 31; + return PS2Float(floatResult); + } + if (((a & 0x7F800000) != 0) && ((b & 0x7F800000) == 0)) + { + u32 floatResult = PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult &= PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult |= (u32)(((s32)(b >> 31) != (s32)(a >> 31)) ? 1 : 0 & 1) << 31; + PS2Float result = PS2Float(floatResult); + result.dz = true; + return result; + } + if (((a & 0x7F800000) == 0) && ((b & 0x7F800000) == 0)) + { + u32 floatResult = PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult &= PS2Float::MAX_FLOATING_POINT_VALUE; + floatResult |= (u32)(((s32)(b >> 31) != (s32)(a >> 31)) ? 1 : 0 & 1) << 31; + PS2Float result = PS2Float(floatResult); + result.iv = true; + return result; + } + u32 am = mantissa(a) << 2; + u32 bm = mantissa(b) << 2; + struct CSAResult current = {am, 0}; + u32 quotient = 0; + int quotientBit = 1; + for (int i = 0; i < 25; i++) + { + quotient = (quotient << 1) + quotientBit; + u32 add = quotientBit > 0 ? ~bm : quotientBit < 0 ? bm : 0; + current.carry += quotientBit > 0; + struct CSAResult csa = CSA(current.sum, current.carry, add); + quotientBit = quotientSelect(quotientBit ? csa : current); + current.sum = csa.sum << 1; + current.carry = csa.carry << 1; + } + u32 sign = ((a ^ b) & 0x80000000); + u32 Dvdtexp = exponent(a); + u32 Dvsrexp = exponent(b); + s32 cexp = Dvdtexp - Dvsrexp + 126; + if (quotient >= (1 << 24)) + { + cexp += 1; + quotient >>= 1; + } + if (Dvdtexp == 0 && Dvsrexp == 0) + { + PS2Float result = PS2Float(sign | PS2Float::MAX_FLOATING_POINT_VALUE); + result.iv = true; + return result; + } + else if (Dvdtexp == 0 || Dvsrexp != 0) + { + if (Dvdtexp == 0 && Dvsrexp != 0) { return PS2Float(sign); } + } + else + { + PS2Float result = PS2Float(sign | PS2Float::MAX_FLOATING_POINT_VALUE); + result.dz = true; + return result; + } + if (cexp > 255) + { + PS2Float result = PS2Float(sign | PS2Float::MAX_FLOATING_POINT_VALUE); + result.of = true; + return result; + } + else if (cexp < 1) + { + PS2Float result = PS2Float(sign); + result.uf = true; + return result; + } + return (quotient & 0x7fffff) | (cexp << 23) | sign; +} + +PS2Float PS2Float::Sqrt() +{ + u32 a = raw; + if ((a & 0x7F800000) == 0) + { + PS2Float result = PS2Float(0); + result.iv = ((a >> 31) & 1) != 0; + return result; + } + u32 m = mantissa(a) << 1; + if (!(a & 0x800000)) // If exponent is odd after subtracting bias of 127 + m <<= 1; + struct CSAResult current = {m, 0}; + u32 quotient = 0; + s32 quotientBit = 1; + for (s32 i = 0; i < 25; i++) + { + // Adding n to quotient adds n * (2*quotient + n) to quotient^2 + // (which is what we need to subtract from the remainder) + u32 adjust = quotient + (quotientBit << (24 - i)); + quotient += quotientBit << (25 - i); + u32 add = quotientBit > 0 ? ~adjust : quotientBit < 0 ? adjust : 0; + current.carry += quotientBit > 0; + struct CSAResult csa = CSA(current.sum, current.carry, add); + quotientBit = quotientSelect(quotientBit ? csa : current); + current.sum = csa.sum << 1; + current.carry = csa.carry << 1; + } + s32 Dvdtexp = exponent(a); + if (Dvdtexp == 0) + return PS2Float(0); + Dvdtexp = (Dvdtexp + 127) >> 1; + PS2Float result = PS2Float(((quotient >> 2) & 0x7fffff) | (Dvdtexp << 23)); + if (Sign()) + { + if (result.Sign()) + result = result.Negate(); + result.iv = true; + } + return result; +} + +PS2Float PS2Float::Rsqrt(PS2Float other) +{ + PS2Float sqrt = PS2Float(false, other.Exponent(), other.Mantissa()).Sqrt(); + PS2Float div = Div(sqrt); + PS2Float result = PS2Float(div.raw); + result.dz = sqrt.dz || div.dz; + result.iv = sqrt.iv || div.iv; + result.of = div.of; + result.uf = div.uf; + return result; +} + +PS2Float PS2Float::ELENG(PS2Float y, PS2Float z) +{ + PS2Float ACC = Mul(*this); + ACC.MulAddAcc(y, y); + PS2Float p = ACC.MulAdd(z, z); + return p.Sqrt(); +} + +PS2Float PS2Float::ERCPR() +{ + return PS2Float(ONE).Div(*this); +} + +PS2Float PS2Float::ERLENG(PS2Float y, PS2Float z) +{ + PS2Float ACC = Mul(*this); + ACC.MulAddAcc(y, y); + PS2Float p = ACC.MulAdd(z, z); + p = PS2Float(ONE).Rsqrt(p); + return p; +} + +PS2Float PS2Float::ERSADD(PS2Float y, PS2Float z) +{ + PS2Float ACC = Mul(*this); + ACC.MulAddAcc(y, y); + PS2Float p = ACC.MulAdd(z, z); + p = PS2Float(ONE).Div(p); + return p; +} + +PS2Float PS2Float::ESQRT() +{ + return Sqrt(); +} + +PS2Float PS2Float::ESQUR() +{ + return Mul(*this); +} + +PS2Float PS2Float::ESUM(PS2Float y, PS2Float z, PS2Float w) +{ + PS2Float ACC = Mul(PS2Float(ONE)); + ACC.MulAddAcc(y, PS2Float(ONE)); + ACC.MulAddAcc(z, PS2Float(ONE)); + return ACC.MulAdd(w, PS2Float(ONE)); +} + +PS2Float PS2Float::ERSQRT() +{ + return PS2Float(ONE).Rsqrt(*this); +} + +PS2Float PS2Float::ESADD(PS2Float y, PS2Float z) +{ + PS2Float ACC = Mul(*this); + ACC.MulAddAcc(y, y); + return ACC.MulAdd(z, z); +} + +PS2Float PS2Float::EEXP() +{ + float consts[6] = {0.249998688697815f, 0.031257584691048f, 0.002591371303424f, + 0.000171562001924f, 0.000005430199963f, 0.000000690600018f}; + + PS2Float tmp1 = Mul(*this); + PS2Float ACC = Mul(PS2Float(consts[0])); + PS2Float tmp2 = tmp1.Mul(*this); + ACC.MulAddAcc(tmp1, PS2Float(consts[1])); + tmp1 = tmp2.Mul(*this); + ACC.MulAddAcc(tmp2, PS2Float(consts[2])); + tmp2 = tmp1.Mul(*this); + ACC.MulAddAcc(tmp1, PS2Float(consts[3])); + tmp1 = tmp2.Mul(*this); + ACC.MulAddAcc(tmp2, PS2Float(consts[4])); + ACC.MulAddAcc(PS2Float(ONE), PS2Float(ONE)); + PS2Float p = ACC.MulAdd(tmp1, PS2Float(consts[5])); + p = p.Mul(p); + p = p.Mul(p); + p = PS2Float(ONE).Div(p); + + return p; +} + +PS2Float PS2Float::EATAN() +{ + float eatanconst[9] = {0.999999344348907f, -0.333298563957214f, 0.199465364217758f, -0.13085337519646f, + 0.096420042216778f, -0.055909886956215f, 0.021861229091883f, -0.004054057877511f, + 0.785398185253143f}; + + PS2Float tmp1 = Add(PS2Float(ONE)); + PS2Float tmp2 = Sub(PS2Float(ONE)); + *this = tmp2.Div(tmp1); + PS2Float tmp3 = Mul(*this); + PS2Float ACC = PS2Float(eatanconst[0]).Mul(*this); + tmp1 = tmp3.Mul(*this); + tmp2 = tmp1.Mul(tmp3); + ACC.MulAddAcc(tmp1, PS2Float(eatanconst[1])); + tmp1 = tmp2.Mul(tmp3); + ACC.MulAddAcc(tmp2, PS2Float(eatanconst[2])); + tmp2 = tmp1.Mul(tmp3); + ACC.MulAddAcc(tmp1, PS2Float(eatanconst[3])); + tmp1 = tmp2.Mul(tmp3); + ACC.MulAddAcc(tmp2, PS2Float(eatanconst[4])); + tmp2 = tmp1.Mul(tmp3); + ACC.MulAddAcc(tmp1, PS2Float(eatanconst[5])); + tmp1 = tmp2.Mul(tmp3); + ACC.MulAddAcc(tmp2, PS2Float(eatanconst[6])); + ACC.MulAddAcc(PS2Float(ONE), PS2Float(eatanconst[8])); + + return ACC.MulAdd(tmp1, PS2Float(eatanconst[7])); +} + +PS2Float PS2Float::ESIN() +{ + float sinconsts[5] = {1.0f, -0.166666567325592f, 0.008333025500178f, -0.000198074136279f, 0.000002601886990f}; + + PS2Float tmp3 = Mul(*this); + PS2Float ACC = Mul(PS2Float(sinconsts[0])); + PS2Float tmp1 = tmp3.Mul(*this); + PS2Float tmp2 = tmp1.Mul(tmp3); + ACC.MulAddAcc(tmp1, PS2Float(sinconsts[1])); + tmp1 = tmp2.Mul(tmp3); + ACC.MulAddAcc(tmp2, PS2Float(sinconsts[2])); + tmp2 = tmp1.Mul(tmp3); + ACC.MulAddAcc(tmp1, PS2Float(sinconsts[3])); + + return ACC.MulAdd(tmp2, PS2Float(sinconsts[4])); +} + +bool PS2Float::IsDenormalized() +{ + return Exponent() == 0; +} + +bool PS2Float::IsZero() +{ + return Abs() == 0; +} + +u32 PS2Float::Abs() +{ + return (raw & MAX_FLOATING_POINT_VALUE); +} + +PS2Float PS2Float::Negate() +{ + return PS2Float(raw ^ SIGNMASK); +} + +s32 PS2Float::CompareTo(PS2Float other) +{ + s32 selfTwoComplementVal = (s32)Abs(); + if (Sign()) + selfTwoComplementVal = -selfTwoComplementVal; + + s32 otherTwoComplementVal = (s32)other.Abs(); + if (other.Sign()) + otherTwoComplementVal = -otherTwoComplementVal; + + if (selfTwoComplementVal < otherTwoComplementVal) + return -1; + else if (selfTwoComplementVal == otherTwoComplementVal) + return 0; + else + return 1; +} + +s32 PS2Float::CompareOperands(PS2Float other) +{ + u32 selfTwoComplementVal = Abs(); + u32 otherTwoComplementVal = other.Abs(); + + if (selfTwoComplementVal < otherTwoComplementVal) + return -1; + else if (selfTwoComplementVal == otherTwoComplementVal) + return 0; + else + return 1; +} + +double PS2Float::ToDouble() +{ + return std::bit_cast(((u64)Sign() << 63) | ((((u64)Exponent() - BIAS) + 1023ULL) << 52) | ((u64)Mantissa() << 29)); +} + +std::string PS2Float::ToString() +{ + double res = ToDouble(); + + u32 value = raw; + std::ostringstream oss; + oss << std::fixed << std::setprecision(6); + + if (IsDenormalized()) + { + oss << "Denormalized(" << res << ")"; + } + else if (value == MAX_FLOATING_POINT_VALUE) + { + oss << "Fmax(" << res << ")"; + } + else if (value == MIN_FLOATING_POINT_VALUE) + { + oss << "-Fmax(" << res << ")"; + } + else + { + oss << "PS2Float(" << res << ")"; + } + + return oss.str(); +} + +PS2Float PS2Float::DoAdd(PS2Float other) +{ + u8 selfExponent = Exponent(); + s32 resExponent = selfExponent - other.Exponent(); + + if (resExponent < 0) + return other.DoAdd(*this); + else if (resExponent >= 25) + return *this; + + const u8 roundingMultiplier = 6; + + // http://graphics.stanford.edu/~seander/bithacks.html#ConditionalNegate + u32 sign1 = (u32)((s32)raw >> 31); + s32 selfMantissa = (s32)(((Mantissa() | 0x800000) ^ sign1) - sign1); + u32 sign2 = (u32)((s32)other.raw >> 31); + s32 otherMantissa = (s32)(((other.Mantissa() | 0x800000) ^ sign2) - sign2); + + // PS2 multiply by 2 before doing the Math here. + s32 man = (selfMantissa << roundingMultiplier) + ((otherMantissa << roundingMultiplier) >> resExponent); + s32 absMan = abs(man); + if (absMan == 0) + return PS2Float(0); + + // Remove from exponent the PS2 Multiplier value. + s32 rawExp = selfExponent - roundingMultiplier; + + s32 amount = Common::normalizeAmounts[Common::CountLeadingSignBits(absMan)]; + rawExp -= amount; + absMan <<= amount; + + s32 msbIndex = Common::BitScanReverse8(absMan >> MANTISSA_BITS); + rawExp += msbIndex; + absMan >>= msbIndex; + + if (rawExp > 255) + { + PS2Float result = man < 0 ? Min() : Max(); + result.of = true; + return result; + } + else if (rawExp < 1) + { + PS2Float result = PS2Float(man < 0, 0, 0); + result.uf = true; + return result; + } + + return PS2Float(((u32)man & SIGNMASK) | (u32)rawExp << MANTISSA_BITS | ((u32)absMan & 0x7FFFFF)); +} + +PS2Float PS2Float::DoMul(PS2Float other) +{ + u8 selfExponent = Exponent(); + u8 otherExponent = other.Exponent(); + u32 selfMantissa = Mantissa() | 0x800000; + u32 otherMantissa = other.Mantissa() | 0x800000; + u32 sign = (raw ^ other.raw) & SIGNMASK; + + s32 resExponent = selfExponent + otherExponent - 127; + u32 resMantissa = (u32)(MulMantissa(selfMantissa, otherMantissa) >> MANTISSA_BITS); + + if (resMantissa > 0xFFFFFF) + { + resMantissa >>= 1; + resExponent++; + } + + if (resExponent > 255) + { + PS2Float result = PS2Float(sign | MAX_FLOATING_POINT_VALUE); + result.of = true; + return result; + } + else if (resExponent < 1) + { + PS2Float result = PS2Float(sign); + result.uf = true; + return result; + } + + return PS2Float(sign | (u32)(resExponent << MANTISSA_BITS) | (resMantissa & 0x7FFFFF)); +} + +PS2Float PS2Float::Itof(s32 complement, s32 f1) +{ + if (f1 == 0) + return PS2Float(0); + + s32 resExponent; + + bool negative = f1 < 0; + + if (f1 == -2147483648) + { + if (complement <= 0) + // special case + return PS2Float(0xcf000000); + else + f1 = 2147483647; + } + + s32 u = std::abs(f1); + + s32 shifts; + + s32 lzcnt = Common::CountLeadingSignBits(u); + if (lzcnt < 8) + { + s32 count = 8 - lzcnt; + u >>= count; + shifts = -count; + } + else + { + s32 count = lzcnt - 8; + u <<= count; + shifts = count; + } + + resExponent = BIAS + MANTISSA_BITS - shifts - complement; + + if (resExponent >= 158) + return negative ? PS2Float(0xcf000000) : PS2Float(0x4f000000); + else if (resExponent >= 0) + return PS2Float(negative, (u8)resExponent, (u32)u); + + return PS2Float(0); +} + +s32 PS2Float::Ftoi(s32 complement, u32 f1) +{ + u32 a, result; + + a = f1; + if ((f1 & 0x7F800000) == 0) + result = 0; + else + { + complement = (s32)(f1 >> MANTISSA_BITS & 0xFF) + complement; + f1 &= 0x7FFFFF; + f1 |= 0x800000; + if (complement < 158) + { + if (complement > 126) + { + f1 = (f1 << 7) >> (31 - ((u8)complement - 126)); + if ((s32)a < 0) + f1 = ~f1 + 1; + result = f1; + } + else + result = 0; + } + else if ((s32)a < 0) + result = SIGNMASK; + else + result = MAX_FLOATING_POINT_VALUE; + } + + return (s32)result; +} + +u8 PS2Float::Clip(u32 f1, u32 f2, bool& cplus, bool& cminus) +{ + bool resultPlus = false; + bool resultMinus = false; + u32 a; + + if ((f1 & 0x7F800000) == 0) + { + f1 &= 0xFF800000; + } + + a = f1; + + if ((f2 & 0x7F800000) == 0) + { + f2 &= 0xFF800000; + } + + f1 = f1 & MAX_FLOATING_POINT_VALUE; + f2 = f2 & MAX_FLOATING_POINT_VALUE; + + if ((-1 < (int)a) && (f2 < f1)) + resultPlus = true; + + cplus = resultPlus; + + if (((int)a < 0) && (f2 < f1)) + resultMinus = true; + + cminus = resultMinus; + + return 0; +} + +bool PS2Float::DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b) +{ + return a.Sign() ^ b.Sign(); +} + +bool PS2Float::DetermineAdditionOperationSign(PS2Float a, PS2Float b) +{ + if (a.IsZero() && b.IsZero()) + { + if (!a.Sign() || !b.Sign()) + return false; + else if (a.Sign() && b.Sign()) + return true; + else + Console.Error("Unhandled addition operation flags"); + } + + return a.CompareOperands(b) >= 0 ? a.Sign() : b.Sign(); +} + +bool PS2Float::DetermineSubtractionOperationSign(PS2Float a, PS2Float b) +{ + if (a.IsZero() && b.IsZero()) + { + if (!a.Sign() || b.Sign()) + return false; + else if (a.Sign() && !b.Sign()) + return true; + else + Console.Error("Unhandled subtraction operation flags"); + } + + return a.CompareOperands(b) >= 0 ? a.Sign() : !b.Sign(); +} + +u8 PS2Float::DetermineMacException(u8 mode, u32 acc, bool acc_oflw, bool moflw, s32 msign, u32& addsubres, bool& oflw, bool& uflw) +{ + bool roundToMax; + + if ((mode == 3) || (mode == 8)) + roundToMax = msign == 0; + else + { + if ((mode != 4) && (mode != 9)) + { + Console.Error("Unhandled MacFlag operation flags"); + return 1; + } + + roundToMax = msign != 0; + } + + if (!acc_oflw) + { + if (moflw) + { + if (roundToMax) + { + addsubres = MAX_FLOATING_POINT_VALUE; + uflw = false; + oflw = true; + } + else + { + addsubres = MIN_FLOATING_POINT_VALUE; + uflw = false; + oflw = true; + } + } + } + else if (!moflw) + { + addsubres = acc; + uflw = false; + oflw = true; + } + else if (roundToMax) + { + addsubres = MAX_FLOATING_POINT_VALUE; + uflw = false; + oflw = true; + } + else + { + addsubres = MIN_FLOATING_POINT_VALUE; + uflw = false; + oflw = true; + } + + return 0; +} diff --git a/pcsx2/PS2Float.h b/pcsx2/PS2Float.h new file mode 100644 index 0000000000..5744f60e1a --- /dev/null +++ b/pcsx2/PS2Float.h @@ -0,0 +1,123 @@ +// SPDX-FileCopyrightText: 2002-2024 PCSX2 Dev Team +// SPDX-License-Identifier: GPL-3.0+ + +#pragma once + +class PS2Float +{ +public: + static constexpr u8 BIAS = 127; + static constexpr u8 MANTISSA_BITS = 23; + static constexpr u32 SIGNMASK = 0x80000000; + static constexpr u32 MAX_FLOATING_POINT_VALUE = 0x7FFFFFFF; + static constexpr u32 MIN_FLOATING_POINT_VALUE = 0xFFFFFFFF; + static constexpr u32 ONE = 0x3F800000; + static constexpr u32 MIN_ONE = 0xBF800000; + + bool dz = false; + bool iv = false; + bool of = false; + bool uf = false; + + u32 raw; + + constexpr u32 Mantissa() const { return raw & 0x7FFFFF; } + constexpr u8 Exponent() const { return (raw >> 23) & 0xFF; } + constexpr bool Sign() const { return ((raw >> 31) & 1) != 0; } + + PS2Float(s32 value); + + PS2Float(u32 value); + + PS2Float(float value); + + PS2Float(bool sign, u8 exponent, u32 mantissa); + + static PS2Float Max(); + + static PS2Float Min(); + + static PS2Float One(); + + static PS2Float MinOne(); + + static PS2Float Itof(s32 complement, s32 f1); + + static s32 Ftoi(s32 complement, u32 f1); + + static u8 Clip(u32 f1, u32 f2, bool& cplus, bool& cminus); + + PS2Float Add(PS2Float addend); + + PS2Float Sub(PS2Float subtrahend); + + PS2Float Mul(PS2Float mulend); + + PS2Float MulAdd(PS2Float opsend, PS2Float optend); + + PS2Float MulAddAcc(PS2Float opsend, PS2Float optend); + + PS2Float MulSub(PS2Float opsend, PS2Float optend); + + PS2Float MulSubAcc(PS2Float opsend, PS2Float optend); + + PS2Float Div(PS2Float divend); + + PS2Float Sqrt(); + + PS2Float Rsqrt(PS2Float other); + + PS2Float ELENG(PS2Float y, PS2Float z); + + PS2Float ERCPR(); + + PS2Float ERLENG(PS2Float y, PS2Float z); + + PS2Float ERSADD(PS2Float y, PS2Float z); + + PS2Float ESQRT(); + + PS2Float ESQUR(); + + PS2Float ESUM(PS2Float y, PS2Float z, PS2Float w); + + PS2Float ERSQRT(); + + PS2Float ESADD(PS2Float y, PS2Float z); + + PS2Float EEXP(); + + PS2Float EATAN(); + + PS2Float ESIN(); + + bool IsDenormalized(); + + bool IsZero(); + + u32 Abs(); + + PS2Float Negate(); + + s32 CompareTo(PS2Float other); + + s32 CompareOperands(PS2Float other); + + double ToDouble(); + + std::string ToString(); + +protected: +private: + PS2Float DoAdd(PS2Float other); + + PS2Float DoMul(PS2Float other); + + static bool DetermineMultiplicationDivisionOperationSign(PS2Float a, PS2Float b); + + static bool DetermineAdditionOperationSign(PS2Float a, PS2Float b); + + static bool DetermineSubtractionOperationSign(PS2Float a, PS2Float b); + + static u8 DetermineMacException(u8 mode, u32 acc, bool acc_oflw, bool moflw, s32 msign, u32& addsubres, bool& oflw, bool& uflw); +}; diff --git a/pcsx2/Pcsx2Config.cpp b/pcsx2/Pcsx2Config.cpp index c70f1e9480..95bb1aaa42 100644 --- a/pcsx2/Pcsx2Config.cpp +++ b/pcsx2/Pcsx2Config.cpp @@ -537,14 +537,27 @@ void Pcsx2Config::RecompilerOptions::LoadSave(SettingsWrapper& wrap) SettingsWrapBitBool(vu0ExtraOverflow); SettingsWrapBitBool(vu0SignOverflow); SettingsWrapBitBool(vu0Underflow); + + SettingsWrapBitBool(vu0SoftAddSub); + SettingsWrapBitBool(vu0SoftMulDiv); + SettingsWrapBitBool(vu0SoftSqrt); + SettingsWrapBitBool(vu1Overflow); SettingsWrapBitBool(vu1ExtraOverflow); SettingsWrapBitBool(vu1SignOverflow); SettingsWrapBitBool(vu1Underflow); + SettingsWrapBitBool(vu1SoftAddSub); + SettingsWrapBitBool(vu1SoftMulDiv); + SettingsWrapBitBool(vu1SoftSqrt); + SettingsWrapBitBool(fpuOverflow); SettingsWrapBitBool(fpuExtraOverflow); SettingsWrapBitBool(fpuFullMode); + + SettingsWrapBitBool(fpuSoftAddSub); + SettingsWrapBitBool(fpuSoftMulDiv); + SettingsWrapBitBool(fpuSoftSqrt); } u32 Pcsx2Config::RecompilerOptions::GetEEClampMode() const diff --git a/pcsx2/VU.h b/pcsx2/VU.h index 15d944c90b..f81ea9b52d 100644 --- a/pcsx2/VU.h +++ b/pcsx2/VU.h @@ -149,8 +149,8 @@ struct alignas(16) VURegs alignas(16) u32 micro_macflags[4]; alignas(16) u32 micro_clipflags[4]; alignas(16) u32 micro_statusflags[4]; - // MAC/Status flags -- these are used by interpreters but are kind of hacky - // and shouldn't be relied on for any useful/valid info. Would like to move them out of + // MAC/Status flags -- these are used by interpreters but are kind of hacky without soft floats + // shouldn't be relied on for any useful/valid info without using float floats. Would like to move them out of // this struct eventually. u32 macflag; u32 statusflag; diff --git a/pcsx2/VUflags.cpp b/pcsx2/VUflags.cpp index dd5921a69d..f5506d4ef2 100644 --- a/pcsx2/VUflags.cpp +++ b/pcsx2/VUflags.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: GPL-3.0+ #include "Common.h" +#include "PS2Float.h" #include #include @@ -12,10 +13,10 @@ /* NEW FLAGS */ //By asadr. Thnkx F|RES :p /*****************************************/ -static __ri u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f ) +static __ri u32 VU_MAC_UPDATE( s32 shift, VURegs* VU, float f) { u32 v = *(u32*)&f; - int exp = (v >> 23) & 0xff; + s32 exp = (v >> 23) & 0xff; u32 s = v & 0x80000000; if (s) @@ -46,6 +47,32 @@ static __ri u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f ) } } +static __ri u32 VU_MAC_UPDATE(s32 shift, VURegs* VU, PS2Float f) +{ + u32 v = f.raw; + + if (v & PS2Float::SIGNMASK) + VU->macflag |= 0x0010 << shift; + else + VU->macflag &= ~(0x0010 << shift); + + if (f.IsZero()) + { + VU->macflag = (VU->macflag & ~(0x1100 << shift)) | (0x0001 << shift); + return v; + } + else if (f.uf) { VU->macflag = (VU->macflag & ~(0x1000 << shift)) | (0x0101 << shift); } + else if (f.of) { VU->macflag = (VU->macflag & ~(0x0101 << shift)) | (0x1000 << shift); } + else { VU->macflag = (VU->macflag & ~(0x1101 << shift)); } + + return v; +} + +__fi bool IsOverflowSet(VURegs* VU, s32 shift) +{ + return (VU->macflag & (0x1000 << shift)); +} + __fi u32 VU_MACx_UPDATE(VURegs * VU, float x) { return VU_MAC_UPDATE(3, VU, x); @@ -66,6 +93,26 @@ __fi u32 VU_MACw_UPDATE(VURegs * VU, float w) return VU_MAC_UPDATE(0, VU, w); } +__fi u32 VU_MACx_UPDATE(VURegs* VU, PS2Float x) +{ + return VU_MAC_UPDATE(3, VU, x); +} + +__fi u32 VU_MACy_UPDATE(VURegs* VU, PS2Float y) +{ + return VU_MAC_UPDATE(2, VU, y); +} + +__fi u32 VU_MACz_UPDATE(VURegs* VU, PS2Float z) +{ + return VU_MAC_UPDATE(1, VU, z); +} + +__fi u32 VU_MACw_UPDATE(VURegs* VU, PS2Float w) +{ + return VU_MAC_UPDATE(0, VU, w); +} + __fi void VU_MACx_CLEAR(VURegs * VU) { VU->macflag&= ~(0x1111<<3); diff --git a/pcsx2/VUflags.h b/pcsx2/VUflags.h index 3ac149d5fe..75c9eade7f 100644 --- a/pcsx2/VUflags.h +++ b/pcsx2/VUflags.h @@ -3,11 +3,17 @@ #pragma once #include "VU.h" +#include "PS2Float.h" +extern bool IsOverflowSet(VURegs* VU, s32 shift); extern u32 VU_MACx_UPDATE(VURegs * VU, float x); extern u32 VU_MACy_UPDATE(VURegs * VU, float y); extern u32 VU_MACz_UPDATE(VURegs * VU, float z); extern u32 VU_MACw_UPDATE(VURegs * VU, float w); +extern u32 VU_MACx_UPDATE(VURegs* VU, PS2Float x); +extern u32 VU_MACy_UPDATE(VURegs* VU, PS2Float y); +extern u32 VU_MACz_UPDATE(VURegs* VU, PS2Float z); +extern u32 VU_MACw_UPDATE(VURegs* VU, PS2Float w); extern void VU_MACx_CLEAR(VURegs * VU); extern void VU_MACy_CLEAR(VURegs * VU); extern void VU_MACz_CLEAR(VURegs * VU); diff --git a/pcsx2/VUops.cpp b/pcsx2/VUops.cpp index 81f14646fb..bfb1310e71 100644 --- a/pcsx2/VUops.cpp +++ b/pcsx2/VUops.cpp @@ -3,6 +3,7 @@ #include "Common.h" #include "VUops.h" +#include "PS2Float.h" #include "GS.h" #include "Gif_Unit.h" #include "MTVU.h" @@ -462,34 +463,48 @@ static __fi float vuDouble(u32 f) } #endif -static __fi float vuADD_TriAceHack(u32 a, u32 b) +static __fi PS2Float vuAccurateAdd(u32 a, u32 b) { - // On VU0 TriAce Games use ADDi and expects these bit-perfect results: - //if (a == 0xb3e2a619 && b == 0x42546666) return vuDouble(0x42546666); - //if (a == 0x8b5b19e9 && b == 0xc7f079b3) return vuDouble(0xc7f079b3); - //if (a == 0x4b1ed4a8 && b == 0x43a02666) return vuDouble(0x4b1ed5e7); - //if (a == 0x7d1ca47b && b == 0x42f23333) return vuDouble(0x7d1ca47b); + return PS2Float(a).Add(PS2Float(b)); +} - // In the 3rd case, some other rounding error is giving us incorrect - // operands ('a' is wrong); and therefor an incorrect result. - // We're getting: 0x4b1ed4a8 + 0x43a02666 = 0x4b1ed5e8 - // We should be getting: 0x4b1ed4a7 + 0x43a02666 = 0x4b1ed5e7 - // microVU gets the correct operands and result. The interps likely - // don't get it due to rounding towards nearest in other calculations. +static __fi PS2Float vuAccurateSub(u32 a, u32 b) +{ + return PS2Float(a).Sub(PS2Float(b)); +} - // microVU uses something like this to get TriAce games working, - // but VU interpreters don't seem to need it currently: +static __fi PS2Float vuAccurateMul(u32 a, u32 b) +{ + return PS2Float(a).Mul(PS2Float(b)); +} - // Update Sept 2021, now the interpreters don't suck, they do - Refraction - s32 aExp = (a >> 23) & 0xff; - s32 bExp = (b >> 23) & 0xff; - if (aExp - bExp >= 25) b &= 0x80000000; - if (aExp - bExp <=-25) a &= 0x80000000; - float ret = vuDouble(a) + vuDouble(b); - //DevCon.WriteLn("aExp = %d, bExp = %d", aExp, bExp); - //DevCon.WriteLn("0x%08x + 0x%08x = 0x%08x", a, b, (u32&)ret); - //DevCon.WriteLn("%f + %f = %f", vuDouble(a), vuDouble(b), ret); - return ret; +static __fi PS2Float vuAccurateDiv(u32 a, u32 b) +{ + return PS2Float(a).Div(PS2Float(b)); +} + +static __fi PS2Float vuAccurateMulAdd(u32 a, u32 b, u32 c) +{ + return PS2Float(a).MulAdd(PS2Float(b), PS2Float(c)); +} + +static __fi PS2Float vuAccurateMulSub(u32 a, u32 b, u32 c) +{ + return PS2Float(a).MulSub(PS2Float(b), PS2Float(c)); +} + +static __fi PS2Float vuAccurateMulAddAcc(u32 a, u32 b, u32 c, bool oflw) +{ + PS2Float acc = PS2Float(a); + acc = oflw; + return acc.MulAddAcc(PS2Float(b), PS2Float(c)); +} + +static __fi PS2Float vuAccurateMulSubAcc(u32 a, u32 b, u32 c, bool oflw) +{ + PS2Float acc = PS2Float(a); + acc = oflw; + return acc.MulSubAcc(PS2Float(b), PS2Float(c)); } template @@ -549,34 +564,55 @@ static __fi void applyBinaryMACOpBroadcast(VURegs* VU, u32 bc) VU_STAT_UPDATE(VU); } +template +static __fi void applyAccurateBinaryMACOp(VURegs* VU) +{ + VECTOR* dst = _getDst(VU); + if (_X) { dst->i.x = VU_MACx_UPDATE(VU, Fn(VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y) { dst->i.y = VU_MACy_UPDATE(VU, Fn(VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z) { dst->i.z = VU_MACz_UPDATE(VU, Fn(VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W) { dst->i.w = VU_MACw_UPDATE(VU, Fn(VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +template +static __fi void applyAccurateBinaryMACOpBroadcast(VURegs* VU, u32 bc) +{ + VECTOR* dst = _getDst(VU); + if (_X) { dst->i.x = VU_MACx_UPDATE(VU, Fn(VU->VF[_Fs_].i.x, bc)); } else VU_MACx_CLEAR(VU); + if (_Y) { dst->i.y = VU_MACy_UPDATE(VU, Fn(VU->VF[_Fs_].i.y, bc)); } else VU_MACy_CLEAR(VU); + if (_Z) { dst->i.z = VU_MACz_UPDATE(VU, Fn(VU->VF[_Fs_].i.z, bc)); } else VU_MACz_CLEAR(VU); + if (_W) { dst->i.w = VU_MACw_UPDATE(VU, Fn(VU->VF[_Fs_].i.w, bc)); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + static __fi float _vuOpADD(u32 fs, u32 ft) { return vuDouble(fs) + vuDouble(ft); } +static __fi PS2Float _vuAccurateOpADD(u32 fs, u32 ft) +{ + return PS2Float(fs).Add(PS2Float(ft)); +} + static __fi void _vuADD(VURegs* VU) { - applyBinaryMACOp<_vuOpADD, MACOpDst::Fd>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOp<_vuAccurateOpADD, MACOpDst::Fd>(VU); + else + applyBinaryMACOp<_vuOpADD, MACOpDst::Fd>(VU); } static __fi void vuADDbc(VURegs* VU, u32 bc) { - applyBinaryMACOpBroadcast<_vuOpADD, MACOpDst::Fd>(VU, bc); -} - -static __fi void vuADDbc_addsubhack(VURegs* VU, u32 bc) -{ - if (CHECK_VUADDSUBHACK) - applyBinaryMACOpBroadcast(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOpBroadcast<_vuAccurateOpADD, MACOpDst::Fd>(VU, bc); else applyBinaryMACOpBroadcast<_vuOpADD, MACOpDst::Fd>(VU, bc); } -static __fi void _vuADDi(VURegs* VU) -{ - vuADDbc_addsubhack(VU, VU->VI[REG_I].UL); -} - +static __fi void _vuADDi(VURegs* VU) { vuADDbc(VU, VU->VI[REG_I].UL); } static __fi void _vuADDq(VURegs* VU) { vuADDbc(VU, VU->VI[REG_Q].UL); } static __fi void _vuADDx(VURegs* VU) { vuADDbc(VU, VU->VF[_Ft_].i.x); } static __fi void _vuADDy(VURegs* VU) { vuADDbc(VU, VU->VF[_Ft_].i.y); } @@ -585,12 +621,18 @@ static __fi void _vuADDw(VURegs* VU) { vuADDbc(VU, VU->VF[_Ft_].i.w); } static __fi void _vuADDA(VURegs* VU) { - applyBinaryMACOp<_vuOpADD, MACOpDst::Acc>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOp<_vuAccurateOpADD, MACOpDst::Acc>(VU); + else + applyBinaryMACOp<_vuOpADD, MACOpDst::Acc>(VU); } static __fi void vuADDAbc(VURegs* VU, u32 bc) { - applyBinaryMACOpBroadcast<_vuOpADD, MACOpDst::Acc>(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOpBroadcast<_vuAccurateOpADD, MACOpDst::Acc>(VU, bc); + else + applyBinaryMACOpBroadcast<_vuOpADD, MACOpDst::Acc>(VU, bc); } static __fi void _vuADDAi(VURegs* VU) { vuADDAbc(VU, VU->VI[REG_I].UL); } @@ -605,14 +647,25 @@ static __fi float _vuOpSUB(u32 fs, u32 ft) return vuDouble(fs) - vuDouble(ft); } +static __fi PS2Float _vuAccurateOpSUB(u32 fs, u32 ft) +{ + return PS2Float(fs).Sub(PS2Float(ft)); +} + static __fi void _vuSUB(VURegs* VU) { - applyBinaryMACOp<_vuOpSUB, MACOpDst::Fd>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOp<_vuAccurateOpSUB, MACOpDst::Fd>(VU); + else + applyBinaryMACOp<_vuOpSUB, MACOpDst::Fd>(VU); } static __fi void vuSUBbc(VURegs* VU, u32 bc) { - applyBinaryMACOpBroadcast<_vuOpSUB, MACOpDst::Fd>(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOpBroadcast<_vuAccurateOpSUB, MACOpDst::Fd>(VU, bc); + else + applyBinaryMACOpBroadcast<_vuOpSUB, MACOpDst::Fd>(VU, bc); } static __fi void _vuSUBi(VURegs* VU) { vuSUBbc(VU, VU->VI[REG_I].UL); } @@ -624,12 +677,18 @@ static __fi void _vuSUBw(VURegs* VU) { vuSUBbc(VU, VU->VF[_Ft_].i.w); } static __fi void _vuSUBA(VURegs* VU) { - applyBinaryMACOp<_vuOpSUB, MACOpDst::Acc>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOp<_vuAccurateOpSUB, MACOpDst::Acc>(VU); + else + applyBinaryMACOp<_vuOpSUB, MACOpDst::Acc>(VU); } static __fi void vuSUBAbc(VURegs* VU, u32 bc) { - applyBinaryMACOpBroadcast<_vuOpSUB, MACOpDst::Acc>(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOpBroadcast<_vuAccurateOpSUB, MACOpDst::Acc>(VU, bc); + else + applyBinaryMACOpBroadcast<_vuOpSUB, MACOpDst::Acc>(VU, bc); } static __fi void _vuSUBAi(VURegs* VU) { vuSUBAbc(VU, VU->VI[REG_I].UL); } @@ -644,14 +703,25 @@ static __fi float _vuOpMUL(u32 fs, u32 ft) return vuDouble(fs) * vuDouble(ft); } +static __fi PS2Float _vuAccurateOpMUL(u32 fs, u32 ft) +{ + return PS2Float(fs).Mul(PS2Float(ft)); +} + static __fi void _vuMUL(VURegs* VU) { - applyBinaryMACOp<_vuOpMUL, MACOpDst::Fd>(VU); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOp<_vuAccurateOpMUL, MACOpDst::Fd>(VU); + else + applyBinaryMACOp<_vuOpMUL, MACOpDst::Fd>(VU); } static __fi void vuMULbc(VURegs* VU, u32 bc) { - applyBinaryMACOpBroadcast<_vuOpMUL, MACOpDst::Fd>(VU, bc); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOpBroadcast<_vuAccurateOpMUL, MACOpDst::Fd>(VU, bc); + else + applyBinaryMACOpBroadcast<_vuOpMUL, MACOpDst::Fd>(VU, bc); } static __fi void _vuMULi(VURegs* VU) { vuMULbc(VU, VU->VI[REG_I].UL); } @@ -664,12 +734,18 @@ static __fi void _vuMULw(VURegs* VU) { vuMULbc(VU, VU->VF[_Ft_].i.w); } static __fi void _vuMULA(VURegs* VU) { - applyBinaryMACOp<_vuOpMUL, MACOpDst::Acc>(VU); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOp<_vuAccurateOpMUL, MACOpDst::Acc>(VU); + else + applyBinaryMACOp<_vuOpMUL, MACOpDst::Acc>(VU); } static __fi void vuMULAbc(VURegs* VU, u32 bc) { - applyBinaryMACOpBroadcast<_vuOpMUL, MACOpDst::Acc>(VU, bc); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateBinaryMACOpBroadcast<_vuAccurateOpMUL, MACOpDst::Acc>(VU, bc); + else + applyBinaryMACOpBroadcast<_vuOpMUL, MACOpDst::Acc>(VU, bc); } static __fi void _vuMULAi(VURegs* VU) { vuMULAbc(VU, VU->VI[REG_I].UL); } @@ -701,19 +777,81 @@ static __fi void applyTernaryMACOpBroadcast(VURegs* VU, u32 bc) VU_STAT_UPDATE(VU); } +template +static __fi void applyAccurateTernaryMACOp(VURegs* VU) +{ + VECTOR* dst = _getDst(VU); + if (_X) { dst->i.x = VU_MACx_UPDATE(VU, Fn(VU->ACC.i.x, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x)); } else VU_MACx_CLEAR(VU); + if (_Y) { dst->i.y = VU_MACy_UPDATE(VU, Fn(VU->ACC.i.y, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y)); } else VU_MACy_CLEAR(VU); + if (_Z) { dst->i.z = VU_MACz_UPDATE(VU, Fn(VU->ACC.i.z, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z)); } else VU_MACz_CLEAR(VU); + if (_W) { dst->i.w = VU_MACw_UPDATE(VU, Fn(VU->ACC.i.w, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w)); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +template +static __fi void applyAccurateTernaryMACOpBroadcast(VURegs* VU, u32 bc) +{ + VECTOR* dst = _getDst(VU); + if (_X) { dst->i.x = VU_MACx_UPDATE(VU, Fn(VU->ACC.i.x, VU->VF[_Fs_].i.x, bc)); } else VU_MACx_CLEAR(VU); + if (_Y) { dst->i.y = VU_MACy_UPDATE(VU, Fn(VU->ACC.i.y, VU->VF[_Fs_].i.y, bc)); } else VU_MACy_CLEAR(VU); + if (_Z) { dst->i.z = VU_MACz_UPDATE(VU, Fn(VU->ACC.i.z, VU->VF[_Fs_].i.z, bc)); } else VU_MACz_CLEAR(VU); + if (_W) { dst->i.w = VU_MACw_UPDATE(VU, Fn(VU->ACC.i.w, VU->VF[_Fs_].i.w, bc)); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +template +static __fi void applyAccurateAccumulatorTernaryMACOp(VURegs* VU) +{ + VECTOR* dst = _getDst(VU); + if (_X) { dst->i.x = VU_MACx_UPDATE(VU, Fn(VU->ACC.i.x, VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.x, IsOverflowSet(VU, 3))); } else VU_MACx_CLEAR(VU); + if (_Y) { dst->i.y = VU_MACy_UPDATE(VU, Fn(VU->ACC.i.y, VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.y, IsOverflowSet(VU, 2))); } else VU_MACy_CLEAR(VU); + if (_Z) { dst->i.z = VU_MACz_UPDATE(VU, Fn(VU->ACC.i.z, VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.z, IsOverflowSet(VU, 1))); } else VU_MACz_CLEAR(VU); + if (_W) { dst->i.w = VU_MACw_UPDATE(VU, Fn(VU->ACC.i.w, VU->VF[_Fs_].i.w, VU->VF[_Ft_].i.w, IsOverflowSet(VU, 0))); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + +template +static __fi void applyAccurateAccumulatorTernaryMACOpBroadcast(VURegs* VU, u32 bc) +{ + VECTOR* dst = _getDst(VU); + if (_X) { dst->i.x = VU_MACx_UPDATE(VU, Fn(VU->ACC.i.x, VU->VF[_Fs_].i.x, bc, IsOverflowSet(VU, 3))); } else VU_MACx_CLEAR(VU); + if (_Y) { dst->i.y = VU_MACy_UPDATE(VU, Fn(VU->ACC.i.y, VU->VF[_Fs_].i.y, bc, IsOverflowSet(VU, 2))); } else VU_MACy_CLEAR(VU); + if (_Z) { dst->i.z = VU_MACz_UPDATE(VU, Fn(VU->ACC.i.z, VU->VF[_Fs_].i.z, bc, IsOverflowSet(VU, 1))); } else VU_MACz_CLEAR(VU); + if (_W) { dst->i.w = VU_MACw_UPDATE(VU, Fn(VU->ACC.i.w, VU->VF[_Fs_].i.w, bc, IsOverflowSet(VU, 0))); } else VU_MACw_CLEAR(VU); + VU_STAT_UPDATE(VU); +} + static __fi float _vuOpMADD(u32 acc, u32 fs, u32 ft) { return vuDouble(acc) + vuDouble(fs) * vuDouble(ft); } +static __fi PS2Float _vuAccurateOpMADD(u32 acc, u32 fs, u32 ft) +{ + return PS2Float(acc).MulAdd(PS2Float(fs), PS2Float(ft)); +} + +static __fi PS2Float _vuAccurateOpMADDA(u32 acc, u32 fs, u32 ft, bool oflw) +{ + PS2Float accfloat = PS2Float(acc); + accfloat.of = oflw; + return accfloat.MulAddAcc(PS2Float(fs), PS2Float(ft)); +} + static __fi void _vuMADD(VURegs* VU) { - applyTernaryMACOp<_vuOpMADD, MACOpDst::Fd>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateTernaryMACOp<_vuAccurateOpMADD, MACOpDst::Fd>(VU); + else + applyTernaryMACOp<_vuOpMADD, MACOpDst::Fd>(VU); } static __fi void vuMADDbc(VURegs* VU, u32 bc) { - applyTernaryMACOpBroadcast<_vuOpMADD, MACOpDst::Fd>(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateTernaryMACOpBroadcast<_vuAccurateOpMADD, MACOpDst::Fd>(VU, bc); + else + applyTernaryMACOpBroadcast<_vuOpMADD, MACOpDst::Fd>(VU, bc); } static __fi void _vuMADDi(VURegs* VU) { vuMADDbc(VU, VU->VI[REG_I].UL); } @@ -725,12 +863,18 @@ static __fi void _vuMADDw(VURegs* VU) { vuMADDbc(VU, VU->VF[_Ft_].i.w); } static __fi void _vuMADDA(VURegs* VU) { - applyTernaryMACOp<_vuOpMADD, MACOpDst::Acc>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateAccumulatorTernaryMACOp<_vuAccurateOpMADDA, MACOpDst::Acc>(VU); + else + applyTernaryMACOp<_vuOpMADD, MACOpDst::Acc>(VU); } static __fi void vuMADDAbc(VURegs* VU, u32 bc) { - applyTernaryMACOpBroadcast<_vuOpMADD, MACOpDst::Acc>(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateAccumulatorTernaryMACOpBroadcast<_vuAccurateOpMADDA, MACOpDst::Acc>(VU, bc); + else + applyTernaryMACOpBroadcast<_vuOpMADD, MACOpDst::Acc>(VU, bc); } static __fi void _vuMADDAi(VURegs* VU) { vuMADDAbc(VU, VU->VI[REG_I].UL); } @@ -745,14 +889,32 @@ static __fi float _vuOpMSUB(u32 acc, u32 fs, u32 ft) return vuDouble(acc) - vuDouble(fs) * vuDouble(ft); } +static __fi PS2Float _vuAccurateOpMSUB(u32 acc, u32 fs, u32 ft) +{ + return PS2Float(acc).MulSub(PS2Float(fs), PS2Float(ft)); +} + +static __fi PS2Float _vuAccurateOpMSUBA(u32 acc, u32 fs, u32 ft, bool oflw) +{ + PS2Float accfloat = PS2Float(acc); + accfloat.of = oflw; + return accfloat.MulSubAcc(PS2Float(fs), PS2Float(ft)); +} + static __fi void _vuMSUB(VURegs* VU) { - applyTernaryMACOp<_vuOpMSUB, MACOpDst::Fd>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateTernaryMACOp<_vuAccurateOpMSUB, MACOpDst::Fd>(VU); + else + applyTernaryMACOp<_vuOpMSUB, MACOpDst::Fd>(VU); } static __fi void vuMSUBbc(VURegs* VU, u32 bc) { - applyTernaryMACOpBroadcast<_vuOpMSUB, MACOpDst::Fd>(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateTernaryMACOpBroadcast<_vuAccurateOpMSUB, MACOpDst::Fd>(VU, bc); + else + applyTernaryMACOpBroadcast<_vuOpMSUB, MACOpDst::Fd>(VU, bc); } static __fi void _vuMSUBi(VURegs* VU) { vuMSUBbc(VU, VU->VI[REG_I].UL); } @@ -764,12 +926,18 @@ static __fi void _vuMSUBw(VURegs* VU) { vuMSUBbc(VU, VU->VF[_Ft_].i.w); } static __fi void _vuMSUBA(VURegs* VU) { - applyTernaryMACOp<_vuOpMSUB, MACOpDst::Acc>(VU); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateAccumulatorTernaryMACOp<_vuAccurateOpMSUBA, MACOpDst::Acc>(VU); + else + applyTernaryMACOp<_vuOpMSUB, MACOpDst::Acc>(VU); } static __fi void vuMSUBAbc(VURegs* VU, u32 bc) { - applyTernaryMACOpBroadcast<_vuOpMSUB, MACOpDst::Acc>(VU, bc); + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + applyAccurateAccumulatorTernaryMACOpBroadcast<_vuAccurateOpMSUBA, MACOpDst::Acc>(VU, bc); + else + applyTernaryMACOpBroadcast<_vuOpMSUB, MACOpDst::Acc>(VU, bc); } static __fi void _vuMSUBAi(VURegs* VU) { vuMSUBAbc(VU, VU->VI[REG_I].UL); } @@ -840,32 +1008,55 @@ static __fi void _vuMINIw(VURegs* VU) { applyMinMaxBroadcast(VU, VU->VF[ static __fi void _vuOPMULA(VURegs* VU) { - VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z)); - VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x)); - VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y)); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + VU->ACC.i.x = VU_MACx_UPDATE(VU, vuAccurateMul(VU->VF[_Fs_].i.y, VU->VF[_Ft_].i.z)); + VU->ACC.i.y = VU_MACy_UPDATE(VU, vuAccurateMul(VU->VF[_Fs_].i.z, VU->VF[_Ft_].i.x)); + VU->ACC.i.z = VU_MACz_UPDATE(VU, vuAccurateMul(VU->VF[_Fs_].i.x, VU->VF[_Ft_].i.y)); + } + else + { + VU->ACC.i.x = VU_MACx_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Ft_].i.z)); + VU->ACC.i.y = VU_MACy_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Ft_].i.x)); + VU->ACC.i.z = VU_MACz_UPDATE(VU, vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Ft_].i.y)); + } VU_STAT_UPDATE(VU); } static __fi void _vuOPMSUB(VURegs* VU) { VECTOR* dst; - float ftx, fty, ftz; - float fsx, fsy, fsz; if (_Fd_ == 0) dst = &RDzero; else dst = &VU->VF[_Fd_]; - ftx = vuDouble(VU->VF[_Ft_].i.x); - fty = vuDouble(VU->VF[_Ft_].i.y); - ftz = vuDouble(VU->VF[_Ft_].i.z); - fsx = vuDouble(VU->VF[_Fs_].i.x); - fsy = vuDouble(VU->VF[_Fs_].i.y); - fsz = vuDouble(VU->VF[_Fs_].i.z); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) + { + u32 ftx = VU->VF[_Ft_].i.x; + u32 fty = VU->VF[_Ft_].i.y; + u32 ftz = VU->VF[_Ft_].i.z; + u32 fsx = VU->VF[_Fs_].i.x; + u32 fsy = VU->VF[_Fs_].i.y; + u32 fsz = VU->VF[_Fs_].i.z; - dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - fsy * ftz); - dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - fsz * ftx); - dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - fsx * fty); + dst->i.x = VU_MACx_UPDATE(VU, vuAccurateMulSub(VU->ACC.i.x, fsy, ftz)); + dst->i.y = VU_MACy_UPDATE(VU, vuAccurateMulSub(VU->ACC.i.y, fsz, ftx)); + dst->i.z = VU_MACz_UPDATE(VU, vuAccurateMulSub(VU->ACC.i.z, fsx, fty)); + } + else + { + float ftx = vuDouble(VU->VF[_Ft_].i.x); + float fty = vuDouble(VU->VF[_Ft_].i.y); + float ftz = vuDouble(VU->VF[_Ft_].i.z); + float fsx = vuDouble(VU->VF[_Fs_].i.x); + float fsy = vuDouble(VU->VF[_Fs_].i.y); + float fsz = vuDouble(VU->VF[_Fs_].i.z); + + dst->i.x = VU_MACx_UPDATE(VU, vuDouble(VU->ACC.i.x) - fsy * ftz); + dst->i.y = VU_MACy_UPDATE(VU, vuDouble(VU->ACC.i.y) - fsz * ftx); + dst->i.z = VU_MACz_UPDATE(VU, vuDouble(VU->ACC.i.z) - fsx * fty); + } VU_STAT_UPDATE(VU); } @@ -932,57 +1123,45 @@ static __fi void _vuCLIP(VURegs* VU) static __fi void _vuDIV(VURegs* VU) { - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); - float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - VU->statusflag &= ~0x30; - - if (ft == 0.0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) { - if (fs == 0.0) - VU->statusflag |= 0x10; - else - VU->statusflag |= 0x20; + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); + PS2Float fs = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); - if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ - (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = 0xFF7FFFFF; + VU->statusflag &= ~0x30; + + if (ft.IsZero()) + { + if (fs.IsZero()) + VU->statusflag |= 0x10; + else + VU->statusflag |= 0x20; + + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = PS2Float::MIN_FLOATING_POINT_VALUE; + else + VU->q.UL = PS2Float::MAX_FLOATING_POINT_VALUE; + } else - VU->q.UL = 0x7F7FFFFF; + { + VU->q.UL = fs.Div(ft).raw; + } } else { - VU->q.F = fs / ft; - VU->q.F = vuDouble(VU->q.UL); - } -} + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); -static __fi void _vuSQRT(VURegs* VU) -{ - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + VU->statusflag &= ~0x30; - VU->statusflag &= ~0x30; - - if (ft < 0.0) - VU->statusflag |= 0x10; - VU->q.F = sqrt(fabs(ft)); - VU->q.F = vuDouble(VU->q.UL); -} - -static __fi void _vuRSQRT(VURegs* VU) -{ - float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); - float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - float temp; - - VU->statusflag &= ~0x30; - - if (ft == 0.0) - { - VU->statusflag |= 0x20; - - if (fs != 0) + if (ft == 0.0) { + if (fs == 0.0) + VU->statusflag |= 0x10; + else + VU->statusflag |= 0x20; + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) VU->q.UL = 0xFF7FFFFF; @@ -991,25 +1170,117 @@ static __fi void _vuRSQRT(VURegs* VU) } else { - if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ - (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) - VU->q.UL = 0x80000000; - else - VU->q.UL = 0; + VU->q.F = fs / ft; + VU->q.F = vuDouble(VU->q.UL); + } + } +} +static __fi void _vuSQRT(VURegs* VU) +{ + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); + + VU->statusflag &= ~0x30; + + if (ft.ToDouble() < 0.0) VU->statusflag |= 0x10; + VU->q.UL = PS2Float(ft).Sqrt().raw; + } + else + { + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + + VU->statusflag &= ~0x30; + + if (ft < 0.0) + VU->statusflag |= 0x10; + VU->q.F = sqrt(fabs(ft)); + VU->q.F = vuDouble(VU->q.UL); + } +} + +static __fi void _vuRSQRT(VURegs* VU) +{ + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) + { + PS2Float ft = PS2Float(VU->VF[_Ft_].UL[_Ftf_]); + PS2Float fs = PS2Float(VU->VF[_Fs_].UL[_Fsf_]); + + VU->statusflag &= ~0x30; + + if (ft.IsZero()) + { + VU->statusflag |= 0x20; + + if (!fs.IsZero()) + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = PS2Float::MIN_FLOATING_POINT_VALUE; + else + VU->q.UL = PS2Float::MAX_FLOATING_POINT_VALUE; + } + else + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0x80000000; + else + VU->q.UL = 0; + + VU->statusflag |= 0x10; + } + } + else + { + if (ft.ToDouble() < 0.0) + VU->statusflag |= 0x10; + + VU->q.UL = fs.Rsqrt(PS2Float(ft)).raw; } } else { - if (ft < 0.0) - { - VU->statusflag |= 0x10; - } + float ft = vuDouble(VU->VF[_Ft_].UL[_Ftf_]); + float fs = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + float temp; - temp = sqrt(fabs(ft)); - VU->q.F = fs / temp; - VU->q.F = vuDouble(VU->q.UL); + VU->statusflag &= ~0x30; + + if (ft == 0.0) + { + VU->statusflag |= 0x20; + + if (fs != 0) + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0xFF7FFFFF; + else + VU->q.UL = 0x7F7FFFFF; + } + else + { + if ((VU->VF[_Ft_].UL[_Ftf_] & 0x80000000) ^ + (VU->VF[_Fs_].UL[_Fsf_] & 0x80000000)) + VU->q.UL = 0x80000000; + else + VU->q.UL = 0; + + VU->statusflag |= 0x10; + } + } + else + { + if (ft < 0.0) + VU->statusflag |= 0x10; + + temp = sqrt(fabs(ft)); + VU->q.F = fs / temp; + VU->q.F = vuDouble(VU->q.UL); + } } } @@ -1653,45 +1924,61 @@ static __ri void _vuWAITP(VURegs* VU) static __ri void _vuESADD(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ESADD(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } + else + { + float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); - VU->p.F = p; + VU->p.F = p; + } } static __ri void _vuERSADD(VURegs* VU) { - float p = (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x)) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y)) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z)); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ERSADD(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } + else + { + float p = (vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x)) + (vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y)) + (vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z)); - if (p != 0.0) - p = 1.0f / p; + if (p != 0.0) + p = 1.0f / p; - VU->p.F = p; + VU->p.F = p; + } } static __ri void _vuELENG(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); - - if (p >= 0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ELENG(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } + else { - p = sqrt(p); + float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + + if (p >= 0) + { + p = sqrt(p); + } + VU->p.F = p; } - VU->p.F = p; } static __ri void _vuERLENG(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); - - if (p >= 0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ERLENG(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z)).raw; } + else { - p = sqrt(p); - if (p != 0) + float p = vuDouble(VU->VF[_Fs_].i.x) * vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) * vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) * vuDouble(VU->VF[_Fs_].i.z); + + if (p >= 0) { - p = 1.0f / p; + p = sqrt(p); + if (p != 0) + { + p = 1.0f / p; + } } + VU->p.F = p; } - VU->p.F = p; } @@ -1711,99 +1998,140 @@ static __ri float _vuCalculateEATAN(float inputvalue) { return result; } +static __ri PS2Float _vuCalculateAccurateEATAN(PS2Float inputvalue) +{ + return inputvalue.EATAN(); +} + static __ri void _vuEATAN(VURegs* VU) { - float p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].UL[_Fsf_])); - VU->p.F = p; + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = _vuCalculateAccurateEATAN(PS2Float(VU->VF[_Fs_].UL[_Fsf_])).raw; } + else + { + float p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].UL[_Fsf_])); + VU->p.F = p; + } } static __ri void _vuEATANxy(VURegs* VU) { - float p = 0; - if (vuDouble(VU->VF[_Fs_].i.x) != 0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = _vuCalculateAccurateEATAN(PS2Float(VU->VF[_Fs_].i.y).Div(PS2Float(VU->VF[_Fs_].i.x))).raw; } + else { - p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.y) / vuDouble(VU->VF[_Fs_].i.x)); + float p = 0; + if (vuDouble(VU->VF[_Fs_].i.x) != 0) + { + p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.y) / vuDouble(VU->VF[_Fs_].i.x)); + } + VU->p.F = p; } - VU->p.F = p; } static __ri void _vuEATANxz(VURegs* VU) { - float p = 0; - if (vuDouble(VU->VF[_Fs_].i.x) != 0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = _vuCalculateAccurateEATAN(PS2Float(VU->VF[_Fs_].i.z).Div(PS2Float(VU->VF[_Fs_].i.x))).raw; } + else { - p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.z) / vuDouble(VU->VF[_Fs_].i.x)); + float p = 0; + if (vuDouble(VU->VF[_Fs_].i.x) != 0) + { + p = _vuCalculateEATAN(vuDouble(VU->VF[_Fs_].i.z) / vuDouble(VU->VF[_Fs_].i.x)); + } + VU->p.F = p; } - VU->p.F = p; } static __ri void _vuESUM(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Fs_].i.w); - VU->p.F = p; + if (CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].i.x).ESUM(PS2Float(VU->VF[_Fs_].i.y), PS2Float(VU->VF[_Fs_].i.z), PS2Float(VU->VF[_Fs_].i.w)).raw; } + else + { + float p = vuDouble(VU->VF[_Fs_].i.x) + vuDouble(VU->VF[_Fs_].i.y) + vuDouble(VU->VF[_Fs_].i.z) + vuDouble(VU->VF[_Fs_].i.w); + VU->p.F = p; + } } static __ri void _vuERCPR(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - if (p != 0) + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ERCPR().raw; } + else { - p = 1.0 / p; - } + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + if (p != 0) + { + p = 1.0 / p; + } + + VU->p.F = p; + } } static __ri void _vuESQRT(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - if (p >= 0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ESQRT().raw; } + else { - p = sqrt(p); - } + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + if (p >= 0) + { + p = sqrt(p); + } + + VU->p.F = p; + } } static __ri void _vuERSQRT(VURegs* VU) { - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - - if (p >= 0) + if (CHECK_VU_SOFT_SQRT((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ERSQRT().raw; } + else { - p = sqrt(p); - if (p) - { - p = 1.0f / p; - } - } + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - VU->p.F = p; + if (p >= 0) + { + p = sqrt(p); + if (p) + { + p = 1.0f / p; + } + } + + VU->p.F = p; + } } static __ri void _vuESIN(VURegs* VU) { - float sinconsts[5] = {1.0f, -0.166666567325592f, 0.008333025500178f, -0.000198074136279f, 0.000002601886990f}; - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).ESIN().raw; } + else + { + float sinconsts[5] = {1.0f, -0.166666567325592f, 0.008333025500178f, -0.000198074136279f, 0.000002601886990f}; + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - p = (sinconsts[0] * p) + (sinconsts[1] * pow(p, 3)) + (sinconsts[2] * pow(p, 5)) + (sinconsts[3] * pow(p, 7)) + (sinconsts[4] * pow(p, 9)); - VU->p.F = vuDouble(*(u32*)&p); + p = (sinconsts[0] * p) + (sinconsts[1] * pow(p, 3)) + (sinconsts[2] * pow(p, 5)) + (sinconsts[3] * pow(p, 7)) + (sinconsts[4] * pow(p, 9)); + VU->p.F = vuDouble(*(u32*)&p); + } } static __ri void _vuEEXP(VURegs* VU) { - float consts[6] = {0.249998688697815f, 0.031257584691048f, 0.002591371303424f, - 0.000171562001924f, 0.000005430199963f, 0.000000690600018f}; - float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); + if (CHECK_VU_SOFT_MULDIV((VU == &VU1) ? 1 : 0) && CHECK_VU_SOFT_ADDSUB((VU == &VU1) ? 1 : 0)) { VU->p.UL = PS2Float(VU->VF[_Fs_].UL[_Fsf_]).EEXP().raw; } + else + { + float consts[6] = {0.249998688697815f, 0.031257584691048f, 0.002591371303424f, + 0.000171562001924f, 0.000005430199963f, 0.000000690600018f}; + float p = vuDouble(VU->VF[_Fs_].UL[_Fsf_]); - p = 1.0f + (consts[0] * p) + (consts[1] * pow(p, 2)) + (consts[2] * pow(p, 3)) + (consts[3] * pow(p, 4)) + (consts[4] * pow(p, 5)) + (consts[5] * pow(p, 6)); - p = pow(p, 4); - p = vuDouble(*(u32*)&p); - p = 1 / p; + p = 1.0f + (consts[0] * p) + (consts[1] * pow(p, 2)) + (consts[2] * pow(p, 3)) + (consts[3] * pow(p, 4)) + (consts[4] * pow(p, 5)) + (consts[5] * pow(p, 6)); + p = pow(p, 4); + p = vuDouble(*(u32*)&p); + p = 1 / p; - VU->p.F = p; + VU->p.F = p; + } } static __ri void _vuXITOP(VURegs* VU) diff --git a/pcsx2/pcsx2.vcxproj b/pcsx2/pcsx2.vcxproj index 4535dbb8a8..c5570cee58 100644 --- a/pcsx2/pcsx2.vcxproj +++ b/pcsx2/pcsx2.vcxproj @@ -281,6 +281,7 @@ + @@ -726,6 +727,7 @@ + @@ -1025,4 +1027,4 @@ - + \ No newline at end of file diff --git a/pcsx2/pcsx2.vcxproj.filters b/pcsx2/pcsx2.vcxproj.filters index e72297d4b7..3e9a547e8b 100644 --- a/pcsx2/pcsx2.vcxproj.filters +++ b/pcsx2/pcsx2.vcxproj.filters @@ -289,6 +289,9 @@ {cd8ec519-2196-43f7-86de-7faced2d4296} + + {e244cd3f-4431-4628-a294-d22c9614133b} + @@ -1443,6 +1446,9 @@ System\Ps2\Iop\SIO\PAD + + System\Ps2\EmotionEngine\Shared + @@ -2399,6 +2405,9 @@ System\Ps2\Iop\SIO\PAD + + System\Ps2\EmotionEngine\Shared + @@ -2428,4 +2437,4 @@ System\Ps2\GS - + \ No newline at end of file