From 464575702b2a5ebeac7223a5540781b3036bc4ed Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Tue, 17 Aug 2021 17:27:42 -0700 Subject: [PATCH 01/11] DSPSpy: Replace less_test with cond_test This new test covers all conditions, and also tests overflow, carry, logical zero, and the behavior of NEG --- Source/DSPSpy/tests/cond_test.ds | 248 +++++++++++++++++++++++++++++++ Source/DSPSpy/tests/less_test.ds | 17 --- 2 files changed, 248 insertions(+), 17 deletions(-) create mode 100644 Source/DSPSpy/tests/cond_test.ds delete mode 100644 Source/DSPSpy/tests/less_test.ds diff --git a/Source/DSPSpy/tests/cond_test.ds b/Source/DSPSpy/tests/cond_test.ds new file mode 100644 index 0000000000..151f3e3a9a --- /dev/null +++ b/Source/DSPSpy/tests/cond_test.ds @@ -0,0 +1,248 @@ +incdir "tests" +include "dsp_base.inc" + +test_main: + CLR $acc0 + CLR $acc1 + CALL test_cond + ; 1. ar0: 9969. ac0.h: 0000. sr: 2224 + + LRI $ac0.h, #0x0050 + CALL test_cond + ; 2. ar0: 9969. ac0.h: 0050. sr: 2224. LRI doesn't change sr. + + TST $acc0 + CALL test_cond + ; 3. ar0: 9655. ac0.h: 0050. sr: 2230 + + LRI $ac1.h, #0x0050 + ADD $acc0, $acc1 ; Causes acc0 to overflow, and thus also become negative + CALL test_cond + ; 4. ar0: d655. ac0.h: ffa0. sr: 22ba + + ADD $acc0, $acc1 ; acc0 is now negative, but not overflowed + CALL test_cond + ; 5. ar0: 965a. ac0.h: fff0. sr: 22b8 + + ADD $acc0, $acc1 ; Triggers carry + CALL test_cond + ; 6. ar0: 9695. ac0.h: 0040. sr: 22b1 + + CLR $acc1 + ADD $acc0, $acc1 ; Adding 0 should do nothing + CALL test_cond + ; 7. ar0: 9655. ac0.h: 0040. sr: 22b0 + + SUB $acc0, $acc1 ; Subtracting 0 sets the carry flag + CALL test_cond + ; 8. ar0: 9695. ac0.h: 0040. sr: 22b1 + + LRI $ac1.h, #0x0050 + SUB $acc0, $acc1 ; No carry + CALL test_cond + ; 9. ar0: 965a. ac0.h: fff0. sr: 22b8 + + SUB $acc0, $acc1 ; Carry + CALL test_cond + ; 10. ar0: 969a. ac0.h: ffa0. sr: 22b9 + + SUB $acc0, $acc1 ; Carry and overflow + CALL test_cond + ; 11. ar0: d69a. ac0.h: 0050. sr: 22b3 + + SUB $acc0, $acc1 ; Carry + CALL test_cond + ; 12. ar0: 99a9. ac0.h: 0000. sr: 22a5 + + LRI $ac1.h, #0xffb0 ; -0x50 + SUB $acc0, $acc1 ; No carry or overflow + CALL test_cond + ; 13. ar0: 9655. ac0.h: 0050. sr: 22b0 + + SUB $acc0, $acc1 ; Overflow, no carry + CALL test_cond + ; 14. ar0: d655. ac0.h: ffa0. sr: 22ba + + SUB $acc0, $acc1 ; No carry or overflow + CALL test_cond + ; 15. ar0: 965a. ac0.h: fff0. sr: 22b8 + + SUB $acc0, $acc1 ; Carry + CALL test_cond + ; 16. ar0: 9695. ac0.h: 0040. sr: 22b1 + + LRI $ac1.h, #0xff80 + SUB $acc0, $acc1 ; Overflow, no carry + CALL test_cond + ; 17. ar0: d655. ac0.h: ffc0. sr: 22ba + + ADD $acc0, $acc1 ; Overflow and carry + CALL test_cond + ; 18. ar0: d69a. ac0.h: 0040. sr: 22b3 + + LRI $ac1.h, #0xffb0 + ADD $acc0, $acc1 ; No overflow or carry + CALL test_cond + ; 19. ar0: 965a. ac0.h: fff0. sr: 22b8 + + ADD $acc0, $acc1 ; Carry + CALL test_cond + ; 20. ar0: 969a. ac0.h: ffa0. sr: 22b9 + + ADD $acc0, $acc1 ; Overflow and carry + CALL test_cond + ; 21. ar0: d69a. ac0.h: 0050. sr: 22b3 + + ADD $acc0, $acc1 ; Carry + CALL test_cond + ; 22. ar0: 99a9. ac0.h: 0000. sr: 22a5 + + CLR $acc1 + CMP ; Compare 0 with 0. Results in 0 and carry. + CALL test_cond + ; 23. ar0: 99a9. sr: 22a5 + + ; Logic zero tests + LRIS $ac0.m, #0x01 + ANDF $ac0.m, #0x0000 + CALL test_cond + ; 24. ar0: a9a9. sr: 22e5 + + ANDCF $ac0.m, #0x0000 + CALL test_cond + ; 25. ar0: a9a9. sr: 22e5 + + ANDF $ac0.m, #0x0001 + CALL test_cond + ; 26. ar0: 99a9. sr: 22a5 + + ANDCF $ac0.m, #0x0001 + CALL test_cond + ; 27. ar0: a9a9. sr: 22e5 + + ANDF $ac0.m, #0x0002 + CALL test_cond + ; 28. ar0: a9a9. sr: 22e5 + + ANDCF $ac0.m, #0x0002 + CALL test_cond + ; 29. ar0: 99a9. sr: 22a5 + + ANDF $ac0.m, #0x0003 + CALL test_cond + ; 30. ar0: 99a9. sr: 22a5 + + ANDCF $ac0.m, #0x0003 + CALL test_cond + ; 31. ar0: 99a9. sr: 22a5 + + CLR $acc0 + NEG $acc0 ; 0 - 0, marked as carry + CALL test_cond + ; 32. ar0: 99a9. ac0.h: 0000. sr: 22a5 + + LRI $ac0.h, #0x0010 + NEG $acc0 + CALL test_cond + ; 33. ar0: 965a. ac0.h: fff0. sr: 22b8 + + NEG $acc0 + CALL test_cond + ; 34. ar0: 9655. ac0.h: 0010. sr: 22b0 + + LRI $ac0.h, #0xff80 + NEG $acc0 ; -INT_MIN is INT_MIN. This generates an overflow. + CALL test_cond + ; 35. ar0: d655. ac0.h: ff80. sr: 22ba + + CMP ; Compare INT_MIN with 0. Carry but no overflow. + CALL test_cond + ; 36. ar0: 969a. ac0.h: ff80. sr: 22b9 + + MOV $acc1, $acc0 + CALL test_cond + ; 37. ar0: 965a. ac0.h: ff80. sr: 22b8 + + TST $acc1 + CALL test_cond + ; 38. ar0: 965a. ac0.h: ff80. sr: 22b8 + + CLR $acc0 + CMP ; Compare 0 with INT_MIN. Overflow but no carry. + CALL test_cond + ; 39. ar0: d655. ac0.h: 0000. sr: 22ba + +; We're done, DO NOT DELETE THIS LINE + JMP end_of_test + +; Test all conditionals, setting bits in $AR0 based on it. +; $AR0 is used because ADDARN does not update flags. +test_cond: + LRI $AR0, #0x0000 + + LRI $IX0, #0x0001 + IFGE + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0002 + IFL + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0004 + IFG + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0008 + IFLE + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0010 + IFNZ + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0020 + IFZ + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0040 + IFNC + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0080 + IFC + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0100 + CW 0x0278 ; IFx8 + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0200 + CW 0x0279 ; IFx9 + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0400 + CW 0x027A ; IFxA + ADDARN $AR0, $IX0 + + LRI $IX0, #0x0800 + CW 0x027B ; IFxB + ADDARN $AR0, $IX0 + + LRI $IX0, #0x1000 + IFLNZ + ADDARN $AR0, $IX0 + + LRI $IX0, #0x2000 + IFLZ + ADDARN $AR0, $IX0 + + LRI $IX0, #0x4000 + IFO + ADDARN $AR0, $IX0 + + LRI $IX0, #0x8000 + IF ; Always true + ADDARN $AR0, $IX0 + + CALL send_back + RET diff --git a/Source/DSPSpy/tests/less_test.ds b/Source/DSPSpy/tests/less_test.ds deleted file mode 100644 index 20be209b73..0000000000 --- a/Source/DSPSpy/tests/less_test.ds +++ /dev/null @@ -1,17 +0,0 @@ -incdir "tests" -include "dsp_base.inc" - -test_main: - CLR $acc0 - CLR $acc1 - LRI $ac0.h, #0x0050 - LRI $ac1.h, #0x0050 - ADD $acc0, $acc1 ; Causes acc0 to overflow, and thus also become negative - - LRI $AX0.L, #0x0000 - IFL - LRI $AX0.L, #0x0001 - CALL send_back - -; We're done, DO NOT DELETE THIS LINE - JMP end_of_test From 105d8860fb1032ca4a1b5341b9a925f09e3b8a03 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Thu, 19 Aug 2021 10:04:42 -0700 Subject: [PATCH 02/11] DSPAnalyzer: Disable update SR analysis It doesn't work right in all situations, including in the cond_test hardware test. It could definitely be fixed, but it would be a hassle to do so. --- Source/Core/Core/DSP/DSPAnalyzer.cpp | 4 ++++ Source/Core/Core/DSP/DSPAnalyzer.h | 11 +++++++++++ 2 files changed, 15 insertions(+) diff --git a/Source/Core/Core/DSP/DSPAnalyzer.cpp b/Source/Core/Core/DSP/DSPAnalyzer.cpp index 19d563f4b2..a62de02771 100644 --- a/Source/Core/Core/DSP/DSPAnalyzer.cpp +++ b/Source/Core/Core/DSP/DSPAnalyzer.cpp @@ -91,7 +91,9 @@ void Analyzer::FindInstructionStarts(const SDSP& dsp, u16 start_addr, u16 end_ad { // This may not be 100% accurate in case of jump tables! // It could get desynced, which would be bad. We'll see if that's an issue. +#ifndef DISABLE_UPDATE_SR_ANALYSIS u16 last_arithmetic = 0; +#endif for (u16 addr = start_addr; addr < end_addr;) { const UDSPInstruction inst = dsp.ReadIMEM(addr); @@ -117,6 +119,7 @@ void Analyzer::FindInstructionStarts(const SDSP& dsp, u16 start_addr, u16 end_ad m_code_flags[static_cast(addr + 1u)] |= CODE_LOOP_END; } +#ifndef DISABLE_UPDATE_SR_ANALYSIS // Mark the last arithmetic/multiplier instruction before a branch. // We must update the SR reg at these instructions if (opcode->updates_sr) @@ -128,6 +131,7 @@ void Analyzer::FindInstructionStarts(const SDSP& dsp, u16 start_addr, u16 end_ad { m_code_flags[last_arithmetic] |= CODE_UPDATE_SR; } +#endif // If an instruction potentially raises exceptions, mark the following // instruction as needing to check for exceptions diff --git a/Source/Core/Core/DSP/DSPAnalyzer.h b/Source/Core/Core/DSP/DSPAnalyzer.h index c5875e3701..5f172f48be 100644 --- a/Source/Core/Core/DSP/DSPAnalyzer.h +++ b/Source/Core/Core/DSP/DSPAnalyzer.h @@ -6,6 +6,13 @@ #include #include "Common/CommonTypes.h" +// The update SR analysis is not perfect: it does not properly handle modified SR values if SR is +// only read within a function call, and it's possible that a previous instruction sets SR (e.g. the +// logical zero bit, or the sticky overflow bit) but is marked as not changing SR as a later +// instruction sets it. When this flag is set, we always treat instructions as updating SR, and +// disable the analysis for if SR needs to be set. +#define DISABLE_UPDATE_SR_ANALYSIS + namespace DSP { struct SDSP; @@ -63,7 +70,11 @@ public: // Whether or not the address describes an instruction that requires updating the SR register. [[nodiscard]] bool IsUpdateSR(u16 address) const { +#ifdef DISABLE_UPDATE_SR_ANALYSIS + return true; +#else return (GetCodeFlags(address) & CODE_UPDATE_SR) != 0; +#endif } // Whether or not the address describes instructions that potentially raise exceptions. From 74440c468f7adbf48ca007e6c90dfc763df06bd5 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Wed, 18 Aug 2021 15:34:01 -0700 Subject: [PATCH 03/11] DSPInterpreter: Sign-extend acS.h to 32 bits Thus, the 40-bit accumulator is treated as properly sign-extended when read as a 64-bit number. This affects e.g. overflow detection. --- Source/Core/Core/DSP/DSPCore.h | 2 +- Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/DSP/DSPCore.h b/Source/Core/Core/DSP/DSPCore.h index 506e3995d4..6455349a09 100644 --- a/Source/Core/Core/DSP/DSPCore.h +++ b/Source/Core/Core/DSP/DSPCore.h @@ -271,7 +271,7 @@ struct DSP_Regs { u16 l; u16 m; - u16 h; + u32 h; // 32 bits so that val is fully sign-extended (only 8 bits are actually used) }; } ac[2]; }; diff --git a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp index 1fd3aa3ff3..9cd7804fbc 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp @@ -769,7 +769,7 @@ void Interpreter::ConditionalExtendAccum(int reg) // Sign extend into whole accum. auto& state = m_dsp_core.DSPState(); const u16 val = state.r.ac[reg - DSP_REG_ACM0].m; - state.r.ac[reg - DSP_REG_ACM0].h = (val & 0x8000) != 0 ? 0xFFFF : 0x0000; + state.r.ac[reg - DSP_REG_ACM0].h = (val & 0x8000) != 0 ? 0xFFFFFFFF : 0x0000; state.r.ac[reg - DSP_REG_ACM0].l = 0; } From dc2eab17789b39d8fcd6e32e98f8920078f08841 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Wed, 18 Aug 2021 15:34:22 -0700 Subject: [PATCH 04/11] DSPJit: Sign-extend acS.h to 32 bits Thus, the 40-bit accumulator is treated as properly sign-extended when read as a 64-bit number. This affects e.g. overflow detection. --- .../Core/Core/DSP/Jit/x64/DSPJitRegCache.cpp | 25 +++---------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitRegCache.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitRegCache.cpp index 1d3b2768de..2354915b31 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitRegCache.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitRegCache.cpp @@ -704,23 +704,6 @@ OpArg DSPJitRegCache::GetReg(int reg, bool load) const OpArg oparg = m_regs[real_reg].loc; m_regs[real_reg].used = true; - // do some register specific fixup - switch (reg) - { - case DSP_REG_ACC0_64: - case DSP_REG_ACC1_64: - if (load) - { - // need to do this because interpreter only does 48 bits - // (and PutReg does the same) - m_emitter.SHL(64, oparg, Imm8(64 - 40)); // sign extend - m_emitter.SAR(64, oparg, Imm8(64 - 40)); - } - break; - default: - break; - } - return oparg; } @@ -738,15 +721,13 @@ void DSPJitRegCache::PutReg(int reg, bool dirty) case DSP_REG_ACH1: if (dirty) { - // no need to extend to full 64bit here until interpreter - // uses that if (oparg.IsSimpleReg()) { // register is already shifted correctly // (if at all) // sign extend from the bottom 8 bits. - m_emitter.MOVSX(16, 8, oparg.GetSimpleReg(), oparg); + m_emitter.MOVSX(32, 8, oparg.GetSimpleReg(), oparg); } else if (oparg.IsImm()) { @@ -759,8 +740,8 @@ void DSPJitRegCache::PutReg(int reg, bool dirty) // of real_reg, since it has the right loc X64Reg tmp = GetFreeXReg(); // Sign extend from the bottom 8 bits. - m_emitter.MOVSX(16, 8, tmp, m_regs[reg].loc); - m_emitter.MOV(16, m_regs[reg].loc, R(tmp)); + m_emitter.MOVSX(32, 8, tmp, m_regs[reg].loc); + m_emitter.MOV(32, m_regs[reg].loc, R(tmp)); PutXReg(tmp); } } From 4865b322d03e8ab5953d3a367b90320464e10464 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Thu, 19 Aug 2021 13:03:17 -0700 Subject: [PATCH 05/11] DSPInterpreter: Improve CMPAR variable names --- Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp index 41ff79cc67..dcc6fc2a8d 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp @@ -134,12 +134,12 @@ void Interpreter::cmpar(const UDSPInstruction opc) const u8 rreg = (opc >> 12) & 0x1; const u8 sreg = (opc >> 11) & 0x1; - const s64 sr = GetLongAcc(sreg); - s64 rr = GetAXHigh(rreg); - rr <<= 16; - const s64 res = dsp_convert_long_acc(sr - rr); + const s64 acc = GetLongAcc(sreg); + s64 ax = GetAXHigh(rreg); + ax <<= 16; + const s64 res = dsp_convert_long_acc(acc - ax); - UpdateSR64(res, isCarry2(sr, res), isOverflow(sr, -rr, res)); + UpdateSR64(res, isCarry2(acc, res), isOverflow(acc, -ax, res)); ZeroWriteBackLog(); } From e5d130c4b37ccaeec4355487fafb3dabb602113c Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Thu, 19 Aug 2021 10:27:31 -0700 Subject: [PATCH 06/11] DSPLLE: O is the mnemonic for overflow, not 0 --- Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp | 2 +- Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp index 9cd7804fbc..a005ba6ca5 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp @@ -289,7 +289,7 @@ bool Interpreter::CheckCondition(u8 condition) const return !IsLogicZero(); case 0xd: // LZ - Logic Zero return IsLogicZero(); - case 0xe: // 0 - Overflow + case 0xe: // O - Overflow return IsOverflow(); default: return true; diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp index 043e5f2043..4768106a02 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp @@ -65,7 +65,7 @@ void DSPEmitter::ReJitConditional(const UDSPInstruction opc, case 0xd: // LZ - Logic Zero TEST(16, R(EAX), Imm16(SR_LOGIC_ZERO)); break; - case 0xe: // 0 - Overflow + case 0xe: // O - Overflow TEST(16, R(EAX), Imm16(SR_OVERFLOW)); break; } From a30a186d30a2e571a99ec4bf3e28f3b0586fb887 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Thu, 19 Aug 2021 11:56:41 -0700 Subject: [PATCH 07/11] DSPInterpreter: Replace IsConditionA with IsConditionB Although it's not clear what the xA and xB conditions are intended to do, the pattern indicates that xB is the regular version and xA is the inverted version, so for consistency, IsConditionB should be the main function. --- Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp index a005ba6ca5..dfa687be08 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp @@ -253,8 +253,8 @@ bool Interpreter::CheckCondition(u8 condition) const const auto IsLess = [this] { return IsSRFlagSet(SR_OVERFLOW) != IsSRFlagSet(SR_SIGN); }; const auto IsZero = [this] { return IsSRFlagSet(SR_ARITH_ZERO); }; const auto IsLogicZero = [this] { return IsSRFlagSet(SR_LOGIC_ZERO); }; - const auto IsConditionA = [this] { - return (IsSRFlagSet(SR_OVER_S32) || IsSRFlagSet(SR_TOP2BITS)) && !IsSRFlagSet(SR_ARITH_ZERO); + const auto IsConditionB = [this] { + return (!(IsSRFlagSet(SR_OVER_S32) || IsSRFlagSet(SR_TOP2BITS))) || IsSRFlagSet(SR_ARITH_ZERO); }; switch (condition & 0xf) @@ -282,9 +282,9 @@ bool Interpreter::CheckCondition(u8 condition) const case 0x9: // ? - Over s32 return IsOverS32(); case 0xa: // ? - return IsConditionA(); + return !IsConditionB(); case 0xb: // ? - return !IsConditionA(); + return IsConditionB(); case 0xc: // LNZ - Logic Not Zero return !IsLogicZero(); case 0xd: // LZ - Logic Zero From 7cd1b2c4d1a6963ccaa453ab072ebbdea988aae3 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Thu, 19 Aug 2021 10:55:33 -0700 Subject: [PATCH 08/11] DSPJit: Fix xA and xB --- Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp | 33 +++++++++++++++---- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp index 4768106a02..81ee4f6d8d 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitBranch.cpp @@ -54,12 +54,25 @@ void DSPEmitter::ReJitConditional(const UDSPInstruction opc, break; case 0xa: // ? case 0xb: // ? + // We want to test this expression, which corresponds to xB: + // (!(IsSRFlagSet(SR_OVER_S32) || IsSRFlagSet(SR_TOP2BITS))) || IsSRFlagSet(SR_ARITH_ZERO) + // The xB expression is used due to even instructions (i.e. xA) looking for the expression to + // evaluate to false, while odd ones look for it to be true. + + // Since SR_OVER_S32 is bit 4 (0x10) and SR_TOP2BITS is bit 5 (0x20), + // set EDX to 2*EAX, so that SR_OVER_S32 is in bit 5 of EDX. LEA(16, EDX, MRegSum(EAX, EAX)); - OR(16, R(EAX), R(EDX)); - SHL(16, R(EDX), Imm8(3)); - NOT(16, R(EAX)); - OR(16, R(EAX), R(EDX)); - TEST(16, R(EAX), Imm16(0x20)); + // Now OR them together, so bit 5 of EDX is + // (IsSRFlagSet(SR_OVER_S32) || IsSRFlagSet(SR_TOP2BITS)) + OR(16, R(EDX), R(EAX)); + // EDX bit 5 is !(IsSRFlagSet(SR_OVER_S32) || IsSRFlagSet(SR_TOP2BITS)) + NOT(16, R(EDX)); + // SR_ARITH_ZERO is bit 2 (0x04). We want that in bit 5, so shift left by 3. + SHL(16, R(EAX), Imm8(3)); + // Bit 5 of EAX is IsSRFlagSet(SR_OVER_S32), so or-ing EDX with EAX gives our target expression. + OR(16, R(EDX), R(EAX)); + // Test bit 5 + TEST(16, R(EDX), Imm16(0x20)); break; case 0xc: // LNZ - Logic Not Zero case 0xd: // LZ - Logic Zero @@ -70,8 +83,14 @@ void DSPEmitter::ReJitConditional(const UDSPInstruction opc, break; } DSPJitRegCache c1(m_gpr); - FixupBranch skip_code = - cond == 0xe ? J_CC(CC_E, true) : J_CC((CCFlags)(CC_NE - (cond & 1)), true); + CCFlags flag; + if (cond == 0xe) // Overflow, special case as there is no inverse case + flag = CC_Z; + else if ((cond & 1) == 0) // Even conditions run if the bit is zero, so jump if it IS NOT zero + flag = CC_NZ; + else // Odd conditions run if the bit IS NOT zero, so jump if it IS zero + flag = CC_Z; + FixupBranch skip_code = J_CC(flag, true); (this->*conditional_fn)(opc); m_gpr.FlushRegs(c1); SetJumpTarget(skip_code); From 4508ca6734a2e05d3df3cdc42362a7d75179844c Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Tue, 17 Aug 2021 21:13:59 -0700 Subject: [PATCH 09/11] DSPInterpreter: Rework overflow and carry handling --- .../Core/DSP/Interpreter/DSPIntArithmetic.cpp | 109 ++++++++---------- .../Core/Core/DSP/Interpreter/DSPIntCCUtil.h | 5 +- .../Core/DSP/Interpreter/DSPIntMultiplier.cpp | 2 +- .../Core/DSP/Interpreter/DSPInterpreter.cpp | 33 +++++- .../Core/DSP/Interpreter/DSPInterpreter.h | 2 + 5 files changed, 84 insertions(+), 67 deletions(-) diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp index dcc6fc2a8d..4cfa8d446e 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp @@ -119,8 +119,7 @@ void Interpreter::cmp(const UDSPInstruction) const s64 acc1 = GetLongAcc(1); const s64 res = dsp_convert_long_acc(acc0 - acc1); - UpdateSR64(res, isCarry2(acc0, res), - isOverflow(acc0, -acc1, res)); // CF -> influence on ABS/0xa100 + UpdateSR64Sub(acc0, acc1, res); ZeroWriteBackLog(); } @@ -139,7 +138,7 @@ void Interpreter::cmpar(const UDSPInstruction opc) ax <<= 16; const s64 res = dsp_convert_long_acc(acc - ax); - UpdateSR64(res, isCarry2(acc, res), isOverflow(acc, -ax, res)); + UpdateSR64Sub(acc, ax, res); ZeroWriteBackLog(); } @@ -157,10 +156,11 @@ void Interpreter::cmpi(const UDSPInstruction opc) const s64 val = GetLongAcc(reg); // Immediate is considered to be at M level in the 40-bit accumulator. - const s64 imm = (s64)(s16)state.FetchInstruction() << 16; + s64 imm = static_cast(state.FetchInstruction()); + imm <<= 16; const s64 res = dsp_convert_long_acc(val - imm); - UpdateSR64(res, isCarry2(val, res), isOverflow(val, -imm, res)); + UpdateSR64Sub(val, imm, res); } // CMPIS $acD, #I @@ -175,11 +175,11 @@ void Interpreter::cmpis(const UDSPInstruction opc) const u8 areg = (opc >> 8) & 0x1; const s64 acc = GetLongAcc(areg); - s64 val = (s8)opc; - val <<= 16; - const s64 res = dsp_convert_long_acc(acc - val); + s64 imm = static_cast(opc); + imm <<= 16; + const s64 res = dsp_convert_long_acc(acc - imm); - UpdateSR64(res, isCarry2(acc, res), isOverflow(acc, -val, res)); + UpdateSR64Sub(acc, imm, res); } //---- @@ -401,13 +401,12 @@ void Interpreter::addr(const UDSPInstruction opc) } ax <<= 16; - s64 res = acc + ax; + const s64 res = acc + ax; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(acc, res), isOverflow(acc, ax, res)); + UpdateSR64Add(acc, ax, GetLongAcc(dreg)); } // ADDAX $acD, $axS @@ -422,13 +421,12 @@ void Interpreter::addax(const UDSPInstruction opc) const s64 acc = GetLongAcc(dreg); const s64 ax = GetLongACX(sreg); - s64 res = acc + ax; + const s64 res = acc + ax; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(acc, res), isOverflow(acc, ax, res)); + UpdateSR64Add(acc, ax, GetLongAcc(dreg)); } // ADD $acD, $ac(1-D) @@ -442,13 +440,12 @@ void Interpreter::add(const UDSPInstruction opc) const s64 acc0 = GetLongAcc(dreg); const s64 acc1 = GetLongAcc(1 - dreg); - s64 res = acc0 + acc1; + const s64 res = acc0 + acc1; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(acc0, res), isOverflow(acc0, acc1, res)); + UpdateSR64Add(acc0, acc1, GetLongAcc(dreg)); } // ADDP $acD @@ -462,13 +459,12 @@ void Interpreter::addp(const UDSPInstruction opc) const s64 acc = GetLongAcc(dreg); const s64 prod = GetLongProduct(); - s64 res = acc + prod; + const s64 res = acc + prod; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(acc, res), isOverflow(acc, prod, res)); + UpdateSR64Add(acc, prod, GetLongAcc(dreg)); } // ADDAXL $acD, $axS.l @@ -484,15 +480,12 @@ void Interpreter::addaxl(const UDSPInstruction opc) const u64 acc = GetLongAcc(dreg); const u16 acx = static_cast(GetAXLow(sreg)); - - u64 res = acc + acx; + const u64 res = acc + acx; ZeroWriteBackLog(); SetLongAcc(dreg, static_cast(res)); - res = GetLongAcc(dreg); - UpdateSR64(static_cast(res), isCarry(acc, res), - isOverflow(static_cast(acc), static_cast(acx), static_cast(res))); + UpdateSR64Add(acc, acx, GetLongAcc(dreg)); } // ADDI $amR, #I @@ -509,11 +502,10 @@ void Interpreter::addi(const UDSPInstruction opc) const s64 acc = GetLongAcc(areg); s64 imm = static_cast(state.FetchInstruction()); imm <<= 16; - s64 res = acc + imm; + const s64 res = acc + imm; SetLongAcc(areg, res); - res = GetLongAcc(areg); - UpdateSR64(res, isCarry(acc, res), isOverflow(acc, imm, res)); + UpdateSR64Add(acc, imm, GetLongAcc(areg)); } // ADDIS $acD, #I @@ -526,13 +518,12 @@ void Interpreter::addis(const UDSPInstruction opc) const u8 dreg = (opc >> 8) & 0x1; const s64 acc = GetLongAcc(dreg); - s64 imm = static_cast(static_cast(opc)); + s64 imm = static_cast(opc); imm <<= 16; - s64 res = acc + imm; + const s64 res = acc + imm; SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(acc, res), isOverflow(acc, imm, res)); + UpdateSR64Add(acc, imm, GetLongAcc(dreg)); } // INCM $acsD @@ -546,13 +537,12 @@ void Interpreter::incm(const UDSPInstruction opc) const s64 sub = 0x10000; const s64 acc = GetLongAcc(dreg); - s64 res = acc + sub; + const s64 res = acc + sub; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(acc, res), isOverflow(acc, sub, res)); + UpdateSR64Add(acc, sub, GetLongAcc(dreg)); } // INC $acD @@ -565,13 +555,12 @@ void Interpreter::inc(const UDSPInstruction opc) const u8 dreg = (opc >> 8) & 0x1; const s64 acc = GetLongAcc(dreg); - s64 res = acc + 1; + const s64 res = acc + 1; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(acc, res), isOverflow(acc, 1, res)); + UpdateSR64Add(acc, 1, GetLongAcc(dreg)); } //---- @@ -606,13 +595,12 @@ void Interpreter::subr(const UDSPInstruction opc) } ax <<= 16; - s64 res = acc - ax; + const s64 res = acc - ax; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry2(acc, res), isOverflow(acc, -ax, res)); + UpdateSR64Sub(acc, ax, GetLongAcc(dreg)); } // SUBAX $acD, $axS @@ -627,13 +615,12 @@ void Interpreter::subax(const UDSPInstruction opc) const s64 acc = GetLongAcc(dreg); const s64 acx = GetLongACX(sreg); - s64 res = acc - acx; + const s64 res = acc - acx; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry2(acc, res), isOverflow(acc, -acx, res)); + UpdateSR64Sub(acc, acx, GetLongAcc(dreg)); } // SUB $acD, $ac(1-D) @@ -647,13 +634,12 @@ void Interpreter::sub(const UDSPInstruction opc) const s64 acc1 = GetLongAcc(dreg); const s64 acc2 = GetLongAcc(1 - dreg); - s64 res = acc1 - acc2; + const s64 res = acc1 - acc2; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry2(acc1, res), isOverflow(acc1, -acc2, res)); + UpdateSR64Sub(acc1, acc2, GetLongAcc(dreg)); } // SUBP $acD @@ -667,13 +653,12 @@ void Interpreter::subp(const UDSPInstruction opc) const s64 acc = GetLongAcc(dreg); const s64 prod = GetLongProduct(); - s64 res = acc - prod; + const s64 res = acc - prod; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry2(acc, res), isOverflow(acc, -prod, res)); + UpdateSR64Sub(acc, prod, GetLongAcc(dreg)); } // DECM $acsD @@ -687,13 +672,12 @@ void Interpreter::decm(const UDSPInstruction opc) const s64 sub = 0x10000; const s64 acc = GetLongAcc(dreg); - s64 res = acc - sub; + const s64 res = acc - sub; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry2(acc, res), isOverflow(acc, -sub, res)); + UpdateSR64Sub(acc, sub, GetLongAcc(dreg)); } // DEC $acD @@ -706,13 +690,12 @@ void Interpreter::dec(const UDSPInstruction opc) const u8 dreg = (opc >> 8) & 0x01; const s64 acc = GetLongAcc(dreg); - s64 res = acc - 1; + const s64 res = acc - 1; ZeroWriteBackLog(); SetLongAcc(dreg, res); - res = GetLongAcc(dreg); - UpdateSR64(res, isCarry2(acc, res), isOverflow(acc, -1, res)); + UpdateSR64Sub(acc, 1, GetLongAcc(dreg)); } //---- @@ -752,7 +735,7 @@ void Interpreter::abs(const UDSPInstruction opc) ZeroWriteBackLog(); SetLongAcc(dreg, acc); - UpdateSR64(GetLongAcc(dreg)); + UpdateSR64(GetLongAcc(dreg)); // TODO: Is this right? } //---- @@ -856,7 +839,7 @@ void Interpreter::lsr16(const UDSPInstruction opc) u64 acc = GetLongAcc(areg); // Lop off the extraneous sign extension our 64-bit fake accum causes - acc &= 0x000000FFFFFFFFFFULL; + acc &= 0x0000'00FF'FFFF'FFFFULL; acc >>= 16; ZeroWriteBackLog(); @@ -912,7 +895,7 @@ void Interpreter::lsr(const UDSPInstruction opc) u16 shift; u64 acc = GetLongAcc(rreg); // Lop off the extraneous sign extension our 64-bit fake accum causes - acc &= 0x000000FFFFFFFFFFULL; + acc &= 0x0000'00FF'FFFF'FFFFULL; if ((opc & 0x3f) == 0) shift = 0; @@ -977,7 +960,7 @@ void Interpreter::lsrn(const UDSPInstruction opc) s16 shift; const u16 accm = static_cast(GetAccMid(1)); u64 acc = GetLongAcc(0); - acc &= 0x000000FFFFFFFFFFULL; + acc &= 0x0000'00FF'FFFF'FFFFULL; if ((accm & 0x3f) == 0) shift = 0; @@ -1046,7 +1029,7 @@ void Interpreter::lsrnrx(const UDSPInstruction opc) s16 shift; const u16 axh = state.r.ax[sreg].h; u64 acc = GetLongAcc(dreg); - acc &= 0x000000FFFFFFFFFFULL; + acc &= 0x0000'00FF'FFFF'FFFFULL; if ((axh & 0x3f) == 0) shift = 0; @@ -1121,7 +1104,7 @@ void Interpreter::lsrnr(const UDSPInstruction opc) s16 shift; const u16 accm = static_cast(GetAccMid(1 - dreg)); u64 acc = GetLongAcc(dreg); - acc &= 0x000000FFFFFFFFFFULL; + acc &= 0x0000'00FF'FFFF'FFFFULL; if ((accm & 0x3f) == 0) shift = 0; diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntCCUtil.h b/Source/Core/Core/DSP/Interpreter/DSPIntCCUtil.h index 0db9dc969d..3e970cc2ac 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPIntCCUtil.h +++ b/Source/Core/Core/DSP/Interpreter/DSPIntCCUtil.h @@ -11,18 +11,19 @@ namespace DSP::Interpreter { -constexpr bool isCarry(u64 val, u64 result) +constexpr bool isCarryAdd(u64 val, u64 result) { return val > result; } -constexpr bool isCarry2(u64 val, u64 result) +constexpr bool isCarrySubtract(u64 val, u64 result) { return val >= result; } constexpr bool isOverflow(s64 val1, s64 val2, s64 res) { + // val1 > 0 and val1 > 0 yet res < 0, or val1 < 0 and val2 < 0 yet res > 0. return ((val1 ^ res) & (val2 ^ res)) < 0; } diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntMultiplier.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntMultiplier.cpp index a334f99d3e..0816e8deed 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPIntMultiplier.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPIntMultiplier.cpp @@ -117,7 +117,7 @@ void Interpreter::addpaxz(const UDSPInstruction opc) SetLongAcc(dreg, res); res = GetLongAcc(dreg); - UpdateSR64(res, isCarry(oldprod, res), false); + UpdateSR64(res, isCarryAdd(oldprod, res), false); // TODO: Why doesn't this set the overflow bit? } //---- diff --git a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp index dfa687be08..d0cb74f764 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.cpp @@ -11,6 +11,7 @@ #include "Core/DSP/DSPAnalyzer.h" #include "Core/DSP/DSPCore.h" #include "Core/DSP/DSPTables.h" +#include "Core/DSP/Interpreter/DSPIntCCUtil.h" #include "Core/DSP/Interpreter/DSPIntTables.h" namespace DSP::Interpreter @@ -547,8 +548,16 @@ void Interpreter::UpdateSR16(s16 value, bool carry, bool overflow, bool over_s32 } } +static constexpr bool IsProperlySignExtended(u64 val) +{ + const u64 topbits = val & 0xffff'ff80'0000'0000ULL; + return (topbits == 0) || (0xffff'ff80'0000'0000ULL == topbits); +} + void Interpreter::UpdateSR64(s64 value, bool carry, bool overflow) { + DEBUG_ASSERT(IsProperlySignExtended(value)); + auto& state = m_dsp_core.DSPState(); state.r.sr &= ~SR_CMP_MASK; @@ -579,7 +588,7 @@ void Interpreter::UpdateSR64(s64 value, bool carry, bool overflow) } // 0x10 - if (value != static_cast(value)) + if (isOverS32(value)) { state.r.sr |= SR_OVER_S32; } @@ -591,6 +600,28 @@ void Interpreter::UpdateSR64(s64 value, bool carry, bool overflow) } } +// Updates SR based on a 64-bit value computed by result = val1 + val2. +// Result is a separate parameter that is properly sign-extended, and as such may not equal the +// result of adding a and b in a 64-bit context. +void Interpreter::UpdateSR64Add(s64 val1, s64 val2, s64 result) +{ + DEBUG_ASSERT(((val1 + val2) & 0xff'ffff'ffffULL) == (result & 0xff'ffff'ffffULL)); + DEBUG_ASSERT(IsProperlySignExtended(val1)); + DEBUG_ASSERT(IsProperlySignExtended(val2)); + UpdateSR64(result, isCarryAdd(val1, result), isOverflow(val1, val2, result)); +} + +// Updates SR based on a 64-bit value computed by result = val1 - val2. +// Result is a separate parameter that is properly sign-extended, and as such may not equal the +// result of adding a and b in a 64-bit context. +void Interpreter::UpdateSR64Sub(s64 val1, s64 val2, s64 result) +{ + DEBUG_ASSERT(((val1 - val2) & 0xff'ffff'ffffULL) == (result & 0xff'ffff'ffffULL)); + DEBUG_ASSERT(IsProperlySignExtended(val1)); + DEBUG_ASSERT(IsProperlySignExtended(val2)); + UpdateSR64(result, isCarrySubtract(val1, result), isOverflow(val1, -val2, result)); +} + void Interpreter::UpdateSRLogicZero(bool value) { auto& state = m_dsp_core.DSPState(); diff --git a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h index 119c509f2a..422c9a5f45 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h +++ b/Source/Core/Core/DSP/Interpreter/DSPInterpreter.h @@ -225,6 +225,8 @@ private: void UpdateSR16(s16 value, bool carry = false, bool overflow = false, bool over_s32 = false); void UpdateSR64(s64 value, bool carry = false, bool overflow = false); + void UpdateSR64Add(s64 val1, s64 val2, s64 result); + void UpdateSR64Sub(s64 val1, s64 val2, s64 result); void UpdateSRLogicZero(bool value); u16 OpReadRegister(int reg_); From 3ee605d699bb970156b7f8e92a52d6293cf4da07 Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Thu, 19 Aug 2021 17:12:23 -0700 Subject: [PATCH 10/11] DSPJit: Rework overflow and carry handling --- Source/Core/Core/DSP/Jit/x64/DSPEmitter.h | 12 +- .../Core/DSP/Jit/x64/DSPJitArithmetic.cpp | 690 +++++++++--------- Source/Core/Core/DSP/Jit/x64/DSPJitCCUtil.cpp | 55 +- .../Core/DSP/Jit/x64/DSPJitMultiplier.cpp | 5 +- Source/Core/Core/DSP/Jit/x64/DSPJitUtil.cpp | 10 +- 5 files changed, 390 insertions(+), 382 deletions(-) diff --git a/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h b/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h index ee74def463..05d52abe51 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h +++ b/Source/Core/Core/DSP/Jit/x64/DSPEmitter.h @@ -228,6 +228,7 @@ private: void get_long_prod(Gen::X64Reg long_prod = Gen::RAX); void get_long_prod_round_prodl(Gen::X64Reg long_prod = Gen::RAX); void set_long_prod(); + void dsp_convert_long_acc(Gen::X64Reg long_acc); // s64 -> s40 void round_long_acc(Gen::X64Reg long_acc = Gen::EAX); void set_long_acc(int _reg, Gen::X64Reg acc = Gen::EAX); void get_acc_h(int _reg, Gen::X64Reg acc = Gen::EAX, bool sign = true); @@ -246,7 +247,16 @@ private: // CC helpers void Update_SR_Register64(Gen::X64Reg val = Gen::EAX, Gen::X64Reg scratch = Gen::EDX); - void Update_SR_Register64_Carry(Gen::X64Reg val, Gen::X64Reg carry_ovfl, bool carry_eq = false); + void UpdateSR64AddSub(Gen::X64Reg val1, Gen::X64Reg val2, Gen::X64Reg result, Gen::X64Reg scratch, + bool subtract); + void UpdateSR64Add(Gen::X64Reg val1, Gen::X64Reg val2, Gen::X64Reg result, Gen::X64Reg scratch) + { + UpdateSR64AddSub(val1, val2, result, scratch, false); + } + void UpdateSR64Sub(Gen::X64Reg val1, Gen::X64Reg val2, Gen::X64Reg result, Gen::X64Reg scratch) + { + UpdateSR64AddSub(val1, val2, result, scratch, true); + } void Update_SR_Register16(Gen::X64Reg val = Gen::EAX); void Update_SR_Register16_OverS32(Gen::X64Reg val = Gen::EAX); diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp index 633b714124..e651bfb84f 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp @@ -64,18 +64,19 @@ void DSPEmitter::andcf(const UDSPInstruction opc) if (FlagsNeeded()) { const u8 reg = (opc >> 8) & 0x1; - // u16 imm = dsp_fetch_code(); + // const u16 imm = m_dsp_core.DSPState().FetchInstruction(); const u16 imm = m_dsp_core.DSPState().ReadIMEM(m_compile_pc + 1); - // u16 val = dsp_get_acc_m(reg); - get_acc_m(reg); - // Update_SR_LZ(((val & imm) == imm) ? true : false); - // if ((val & imm) == imm) - // g_dsp.r.sr |= SR_LOGIC_ZERO; - // else - // g_dsp.r.sr &= ~SR_LOGIC_ZERO; + // const u16 val = GetAccMid(reg); + X64Reg val = RAX; + get_acc_m(reg, val); + // UpdateSRLogicZero((val & imm) == imm); + // if ((val & imm) == imm) + // g_dsp.r.sr |= SR_LOGIC_ZERO; + // else + // g_dsp.r.sr &= ~SR_LOGIC_ZERO; const OpArg sr_reg = m_gpr.GetReg(DSP_REG_SR); - AND(16, R(RAX), Imm16(imm)); - CMP(16, R(RAX), Imm16(imm)); + AND(16, R(val), Imm16(imm)); + CMP(16, R(val), Imm16(imm)); FixupBranch notLogicZero = J_CC(CC_NE); OR(16, sr_reg, Imm16(SR_LOGIC_ZERO)); FixupBranch exit = J(); @@ -99,17 +100,18 @@ void DSPEmitter::andf(const UDSPInstruction opc) if (FlagsNeeded()) { const u8 reg = (opc >> 8) & 0x1; - // u16 imm = dsp_fetch_code(); + // const u16 imm = m_dsp_core.DSPState().FetchInstruction(); const u16 imm = m_dsp_core.DSPState().ReadIMEM(m_compile_pc + 1); - // u16 val = dsp_get_acc_m(reg); - get_acc_m(reg); - // Update_SR_LZ(((val & imm) == 0) ? true : false); - // if ((val & imm) == 0) - // g_dsp.r.sr |= SR_LOGIC_ZERO; - // else - // g_dsp.r.sr &= ~SR_LOGIC_ZERO; + // const u16 val = GetAccMid(reg); + X64Reg val = RAX; + get_acc_m(reg, val); + // UpdateSRLogicZero((val & imm) == 0); + // if ((val & imm) == 0) + // g_dsp.r.sr |= SR_LOGIC_ZERO; + // else + // g_dsp.r.sr &= ~SR_LOGIC_ZERO; const OpArg sr_reg = m_gpr.GetReg(DSP_REG_SR); - TEST(16, R(RAX), Imm16(imm)); + TEST(16, R(val), Imm16(imm)); FixupBranch notLogicZero = J_CC(CC_NE); OR(16, sr_reg, Imm16(SR_LOGIC_ZERO)); FixupBranch exit = J(); @@ -167,18 +169,21 @@ void DSPEmitter::cmp(const UDSPInstruction opc) { if (FlagsNeeded()) { + // const s64 acc0 = GetLongAcc(0); + X64Reg acc0 = RAX; + get_long_acc(0, acc0); + // const s64 acc1 = GetLongAcc(1); + X64Reg acc1 = RDX; + get_long_acc(1, acc1); + // s64 res = dsp_convert_long_acc(acc0 - acc1); + X64Reg res = RCX; + MOV(64, R(res), R(acc0)); + SUB(64, R(res), R(acc1)); + dsp_convert_long_acc(RCX); + + // UpdateSR64Sub(acc0, acc1, res); X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc0 = dsp_get_long_acc(0); - get_long_acc(0, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 acc1 = dsp_get_long_acc(1); - get_long_acc(1, RDX); - // s64 res = dsp_convert_long_acc(acc0 - acc1); - SUB(64, R(RAX), R(RDX)); - // Update_SR_Register64(res, isCarry2(acc0, res), isOverflow(acc0, -acc1, res)); // CF -> - // influence on ABS/0xa100 - NEG(64, R(RDX)); - Update_SR_Register64_Carry(EAX, tmp1, true); + UpdateSR64Sub(acc0, acc1, res, tmp1); m_gpr.PutXReg(tmp1); } } @@ -195,19 +200,22 @@ void DSPEmitter::cmpar(const UDSPInstruction opc) u8 rreg = ((opc >> 12) & 0x1); u8 sreg = (opc >> 11) & 0x1; + // const s64 acc = GetLongAcc(sreg); + X64Reg acc = RAX; + get_long_acc(sreg, acc); + // s64 ax = GetAXHigh(rreg); + X64Reg ax = RDX; + get_ax_h(rreg, ax); + // ax <<= 16; + SHL(64, R(ax), Imm8(16)); + // const s64 res = dsp_convert_long_acc(acc - ax); + X64Reg res = RCX; + MOV(64, R(res), R(acc)); + SUB(64, R(res), R(ax)); + dsp_convert_long_acc(res); + // UpdateSR64Sub(acc, ax, res); X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 sr = dsp_get_long_acc(sreg); - get_long_acc(sreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 rr = (s16)g_dsp.r.axh[rreg]; - get_ax_h(rreg, RDX); - // rr <<= 16; - SHL(64, R(RDX), Imm8(16)); - // s64 res = dsp_convert_long_acc(sr - rr); - SUB(64, R(RAX), R(RDX)); - // Update_SR_Register64(res, isCarry2(sr, res), isOverflow(sr, -rr, res)); - NEG(64, R(RDX)); - Update_SR_Register64_Carry(EAX, tmp1, true); + UpdateSR64Sub(acc, ax, res, tmp1); m_gpr.PutXReg(tmp1); } } @@ -224,19 +232,24 @@ void DSPEmitter::cmpi(const UDSPInstruction opc) if (FlagsNeeded()) { const u8 reg = (opc >> 8) & 0x1; - const X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 val = dsp_get_long_acc(reg); - get_long_acc(reg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 imm = (s64)(s16)dsp_fetch_code() << 16; // Immediate is considered to be at M level in - // the 40-bit accumulator. - const u16 imm = m_dsp_core.DSPState().ReadIMEM(m_compile_pc + 1); - MOV(64, R(RDX), Imm64((s64)(s16)imm << 16)); - // s64 res = dsp_convert_long_acc(val - imm); - SUB(64, R(RAX), R(RDX)); - // Update_SR_Register64(res, isCarry2(val, res), isOverflow(val, -imm, res)); - NEG(64, R(RDX)); - Update_SR_Register64_Carry(EAX, tmp1, true); + // const s64 val = GetLongAcc(reg); + X64Reg val = RAX; + get_long_acc(reg, val); + // Immediate is considered to be at M level in the 40-bit accumulator. + // s64 imm = static_cast(state.FetchInstruction()); + // imm <<= 16; + X64Reg imm_reg = RDX; + s64 imm = static_cast(m_dsp_core.DSPState().ReadIMEM(m_compile_pc + 1)); + imm <<= 16; + MOV(64, R(imm_reg), Imm64(imm)); + // const s64 res = dsp_convert_long_acc(val - imm); + X64Reg res = RCX; + MOV(64, R(res), R(val)); + SUB(64, R(res), R(imm_reg)); + dsp_convert_long_acc(res); + // UpdateSR64Sub(val, imm, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(val, imm_reg, res, tmp1); m_gpr.PutXReg(tmp1); } } @@ -253,18 +266,23 @@ void DSPEmitter::cmpis(const UDSPInstruction opc) if (FlagsNeeded()) { u8 areg = (opc >> 8) & 0x1; - // s64 acc = dsp_get_long_acc(areg); + // const s64 acc = GetLongAcc(areg); + X64Reg acc = RAX; + get_long_acc(areg, acc); + // s64 imm = static_cast(opc); + // imm <<= 16; + X64Reg imm_reg = RDX; + s64 imm = static_cast(opc); + imm <<= 16; + MOV(64, R(imm_reg), Imm64(imm)); + // const s64 res = dsp_convert_long_acc(acc - imm); + X64Reg res = RCX; + MOV(64, R(res), R(acc)); + SUB(64, R(res), R(imm_reg)); + dsp_convert_long_acc(res); + // UpdateSR64Sub(acc, imm, res); X64Reg tmp1 = m_gpr.GetFreeXReg(); - get_long_acc(areg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 val = (s8)opc; - // val <<= 16; - MOV(64, R(RDX), Imm64((s64)(s8)opc << 16)); - // s64 res = dsp_convert_long_acc(acc - val); - SUB(64, R(RAX), R(RDX)); - // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -val, res)); - NEG(64, R(RDX)); - Update_SR_Register64_Carry(EAX, tmp1, true); + UpdateSR64Sub(acc, imm_reg, res, tmp1); m_gpr.PutXReg(tmp1); } } @@ -521,29 +539,27 @@ void DSPEmitter::addr(const UDSPInstruction opc) u8 dreg = (opc >> 8) & 0x1; u8 sreg = ((opc >> 9) & 0x3) + DSP_REG_AXL0; - // s64 acc = dsp_get_long_acc(dreg); - X64Reg tmp1 = m_gpr.GetFreeXReg(); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 ax = (s16)g_dsp.r[sreg]; - dsp_op_read_reg(sreg, RDX, RegisterExtension::Sign); - // ax <<= 16; - SHL(64, R(RDX), Imm8(16)); - // s64 res = acc + ax; - ADD(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, ax, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // s64 ax = ...; + X64Reg ax = RDX; + dsp_op_read_reg(sreg, ax, RegisterExtension::Sign); + // ax <<= 16; + SHL(64, R(ax), Imm8(16)); + // const s64 res = acc + ax; + X64Reg res = RCX; + LEA(64, res, MRegSum(acc, ax)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, ax, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, ax, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // ADDAX $acD, $axS @@ -556,28 +572,25 @@ void DSPEmitter::addax(const UDSPInstruction opc) u8 dreg = (opc >> 8) & 0x1; u8 sreg = (opc >> 9) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 ax = dsp_get_long_acx(sreg); - get_long_acx(sreg, RDX); - // s64 res = acc + ax; - ADD(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, ax, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 ax = GetLongACX(sreg); + X64Reg ax = RDX; + get_long_acx(sreg, ax); + // const s64 res = acc + ax; + X64Reg res = RCX; + LEA(64, res, MRegSum(acc, ax)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, ax, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, ax, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // ADD $acD, $ac(1-D) @@ -589,28 +602,25 @@ void DSPEmitter::add(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc0 = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 acc1 = dsp_get_long_acc(1 - dreg); - get_long_acc(1 - dreg, RDX); - // s64 res = acc0 + acc1; - ADD(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry(acc0, res), isOverflow(acc0, acc1, res)); + // const s64 acc0 = GetLongAcc(dreg); + X64Reg acc0 = RAX; + get_long_acc(dreg, acc0); + // const s64 acc1 = GetLongAcc(1 - dreg); + X64Reg acc1 = RDX; + get_long_acc(1 - dreg, acc1); + // const s64 res = acc0 + acc1; + X64Reg res = RCX; + LEA(64, res, MRegSum(acc0, acc1)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc0, acc1, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc0, acc1, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // ADDP $acD @@ -622,28 +632,25 @@ void DSPEmitter::addp(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 prod = dsp_get_long_prod(); - get_long_prod(RDX); - // s64 res = acc + prod; - ADD(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, prod, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 prod = GetLongProduct(); + X64Reg prod = RDX; + get_long_prod(prod); + // const s64 res = acc + prod; + X64Reg res = RCX; + LEA(64, res, MRegSum(acc, prod)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, prod, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, prod, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // ADDAXL $acD, $axS.l @@ -657,29 +664,26 @@ void DSPEmitter::addaxl(const UDSPInstruction opc) u8 sreg = (opc >> 9) & 0x1; u8 dreg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // u64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // u16 acx = (u16)dsp_get_ax_l(sreg); - get_ax_l(sreg, RDX); - MOVZX(64, 16, RDX, R(RDX)); - // u64 res = acc + acx; - ADD(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, (s64)res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64((s64)res, isCarry(acc, res), isOverflow((s64)acc, (s64)acx, (s64)res)); + // const u64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const u16 acx = static_cast(GetAXLow(sreg)); + X64Reg acx = RDX; + get_ax_l(sreg, acx); + MOVZX(64, 16, acx, R(acx)); + // const u64 res = acc + acx; + X64Reg res = RCX; + LEA(64, res, MRegSum(acc, acx)); + // SetLongAcc(dreg, static_cast(res)); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, acx, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, acx, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // ADDI $amR, #I @@ -691,30 +695,30 @@ void DSPEmitter::addaxl(const UDSPInstruction opc) void DSPEmitter::addi(const UDSPInstruction opc) { u8 areg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(areg); - get_long_acc(areg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 imm = (s16)dsp_fetch_code(); - const s16 imm = m_dsp_core.DSPState().ReadIMEM(m_compile_pc + 1); + // const s64 acc = GetLongAcc(areg); + X64Reg acc = RAX; + get_long_acc(areg, acc); + // s64 imm = static_cast(state.FetchInstruction()); // imm <<= 16; - MOV(64, R(RDX), Imm32(imm << 16)); - // s64 res = acc + imm; - ADD(64, R(RAX), R(RDX)); - // dsp_set_long_acc(areg, res); - // res = dsp_get_long_acc(areg); - // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, imm, res)); + s64 imm = static_cast(m_dsp_core.DSPState().ReadIMEM(m_compile_pc + 1)); + imm <<= 16; + // const s64 res = acc + imm; + X64Reg res = RCX; + // Can safely use LEA as we are using a 16-bit sign-extended immediate shifted left by 16, which + // fits in a signed 32-bit immediate + LEA(64, res, MDisp(acc, static_cast(imm))); + // SetLongAcc(areg, res); + set_long_acc(areg, res); if (FlagsNeeded()) { - MOV(64, R(RCX), R(RAX)); - set_long_acc(areg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, imm, GetLongAcc(areg)); + get_long_acc(areg, res); + X64Reg imm_reg = RDX; + MOV(64, R(imm_reg), Imm64(imm)); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, imm_reg, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(areg, RAX); - } - m_gpr.PutXReg(tmp1); } // ADDIS $acD, #I @@ -726,30 +730,28 @@ void DSPEmitter::addis(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 imm = (s8)(u8)opc; - // imm <<= 16; - s32 imm = static_cast(opc) << 24 >> 8; - MOV(64, R(RDX), Imm32(imm)); - // s64 res = acc + imm; - ADD(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, imm, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // s64 imm = static_cast(opc); + // imm <<= 16; + s64 imm = static_cast(opc); + imm <<= 16; + // const s64 res = acc + imm; + X64Reg res = RCX; + LEA(64, res, MDisp(acc, static_cast(imm))); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, imm, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg imm_reg = RDX; + MOV(64, R(imm_reg), Imm64(imm)); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, imm_reg, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // INCM $acsD @@ -761,26 +763,24 @@ void DSPEmitter::incm(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; s64 subtract = 0x10000; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - // s64 res = acc + sub; - LEA(64, RAX, MDisp(tmp1, subtract)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, subtract, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 res = acc + sub; + X64Reg res = RCX; + LEA(64, res, MDisp(acc, static_cast(subtract))); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RDX), Imm32((u32)subtract)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, sub, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg imm_reg = RDX; + MOV(64, R(imm_reg), Imm64(subtract)); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, imm_reg, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg); - } - m_gpr.PutXReg(tmp1); } // INC $acD @@ -791,26 +791,24 @@ void DSPEmitter::incm(const UDSPInstruction opc) void DSPEmitter::inc(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - // s64 res = acc + 1; - LEA(64, RAX, MDisp(tmp1, 1)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry(acc, res), isOverflow(acc, 1, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 res = acc + 1; + X64Reg res = RCX; + LEA(64, res, MDisp(acc, 1)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RDX), Imm64(1)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // UpdateSR64Add(acc, 1, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg imm_reg = RDX; + MOV(64, R(imm_reg), Imm64(1)); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Add(acc, imm_reg, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg); - } - m_gpr.PutXReg(tmp1); } //---- @@ -825,31 +823,28 @@ void DSPEmitter::subr(const UDSPInstruction opc) u8 dreg = (opc >> 8) & 0x1; u8 sreg = ((opc >> 9) & 0x3) + DSP_REG_AXL0; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 ax = (s16)g_dsp.r[sreg]; - dsp_op_read_reg(sreg, RDX, RegisterExtension::Sign); - // ax <<= 16; - SHL(64, R(RDX), Imm8(16)); - // s64 res = acc - ax; - SUB(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -ax, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // s64 ax = ...; + X64Reg ax = RDX; + dsp_op_read_reg(sreg, ax, RegisterExtension::Sign); + // ax <<= 16; + SHL(64, R(ax), Imm8(16)); + // const s64 res = acc - ax; + X64Reg res = RCX; + MOV(64, R(res), R(acc)); + SUB(64, R(res), R(ax)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - NEG(64, R(RDX)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1, true); + // UpdateSR64Sub(acc, ax, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(acc, ax, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // SUBAX $acD, $axS @@ -862,29 +857,26 @@ void DSPEmitter::subax(const UDSPInstruction opc) u8 dreg = (opc >> 8) & 0x1; u8 sreg = (opc >> 9) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 acx = dsp_get_long_acx(sreg); - get_long_acx(sreg, RDX); - // s64 res = acc - acx; - SUB(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -acx, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 acx = GetLongACX(sreg); + X64Reg acx = RDX; + get_long_acx(sreg, acx); + // const s64 res = acc - acx; + X64Reg res = RCX; + MOV(64, R(res), R(acc)); + SUB(64, R(res), R(acx)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - NEG(64, R(RDX)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1, true); + // UpdateSR64Sub(acc, acx, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(acc, acx, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // SUB $acD, $ac(1-D) @@ -895,29 +887,26 @@ void DSPEmitter::subax(const UDSPInstruction opc) void DSPEmitter::sub(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc1 = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 acc2 = dsp_get_long_acc(1 - dreg); - get_long_acc(1 - dreg, RDX); - // s64 res = acc1 - acc2; - SUB(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry2(acc1, res), isOverflow(acc1, -acc2, res)); + // const s64 acc1 = GetLongAcc(dreg); + X64Reg acc1 = RAX; + get_long_acc(dreg, acc1); + // const s64 acc2 = GetLongAcc(1 - dreg); + X64Reg acc2 = RDX; + get_long_acc(1 - dreg, acc2); + // const s64 res = acc1 - acc2; + X64Reg res = RCX; + MOV(64, R(res), R(acc1)); + SUB(64, R(res), R(acc2)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - NEG(64, R(RDX)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1, true); + // UpdateSR64Sub(acc1, acc2, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(acc1, acc2, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // SUBP $acD @@ -928,29 +917,26 @@ void DSPEmitter::sub(const UDSPInstruction opc) void DSPEmitter::subp(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - MOV(64, R(RAX), R(tmp1)); - // s64 prod = dsp_get_long_prod(); - get_long_prod(RDX); - // s64 res = acc - prod; - SUB(64, R(RAX), R(RDX)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -prod, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 prod = GetLongProduct(); + X64Reg prod = RDX; + get_long_prod(prod); + // const s64 res = acc - prod; + X64Reg res = RCX; + MOV(64, R(res), R(acc)); + SUB(64, R(res), R(prod)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - NEG(64, R(RDX)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1, true); + // UpdateSR64Sub(acc, prod, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(acc, prod, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // DECM $acsD @@ -962,26 +948,24 @@ void DSPEmitter::decm(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x01; s64 subtract = 0x10000; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - // s64 res = acc - sub; - LEA(64, RAX, MDisp(tmp1, -subtract)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -subtract, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 res = acc - sub; + X64Reg res = RCX; + LEA(64, res, MDisp(acc, -subtract)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RDX), Imm64(-subtract)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1, true); + // UpdateSR64Sub(acc, sub, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg imm_reg = RDX; + MOV(64, R(imm_reg), Imm64(subtract)); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(acc, imm_reg, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg, RAX); - } - m_gpr.PutXReg(tmp1); } // DEC $acD @@ -992,26 +976,24 @@ void DSPEmitter::decm(const UDSPInstruction opc) void DSPEmitter::dec(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x01; - X64Reg tmp1 = m_gpr.GetFreeXReg(); - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg, tmp1); - // s64 res = acc - 1; - LEA(64, RAX, MDisp(tmp1, -1)); - // dsp_set_long_acc(dreg, res); - // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry2(acc, res), isOverflow(acc, -1, res)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 res = acc - 1; + X64Reg res = RCX; + LEA(64, res, MDisp(acc, -1)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - MOV(64, R(RDX), Imm64(-1)); - MOV(64, R(RCX), R(RAX)); - set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1, true); + // UpdateSR64Sub(acc, 1, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg imm_reg = RDX; + MOV(64, R(RDX), Imm64(1)); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(acc, imm_reg, res, tmp1); + m_gpr.PutXReg(tmp1); } - else - { - set_long_acc(dreg); - } - m_gpr.PutXReg(tmp1); } //---- diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitCCUtil.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitCCUtil.cpp index 58676e8ef2..a891d0c483 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitCCUtil.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitCCUtil.cpp @@ -65,45 +65,52 @@ void DSPEmitter::Update_SR_Register64(Gen::X64Reg val, Gen::X64Reg scratch) Update_SR_Register(val, scratch); } -// In: (val): s64 _Value -// In: (carry_ovfl): 1 = carry, 2 = overflow -// Clobbers RDX -void DSPEmitter::Update_SR_Register64_Carry(X64Reg val, X64Reg carry_ovfl, bool carry_eq) +// Updates SR based on a 64-bit value computed by result = val1 + val2 or result = val1 - val2 +// Clobbers scratch +void DSPEmitter::UpdateSR64AddSub(Gen::X64Reg val1, Gen::X64Reg val2, Gen::X64Reg result, + Gen::X64Reg scratch, bool subtract) { const OpArg sr_reg = m_gpr.GetReg(DSP_REG_SR); - // g_dsp.r[DSP_REG_SR] &= ~SR_CMP_MASK; + // g_dsp.r[DSP_REG_SR] &= ~SR_CMP_MASK; AND(16, sr_reg, Imm16(~SR_CMP_MASK)); - CMP(64, R(carry_ovfl), R(val)); + CMP(64, R(val1), R(result)); + // x86 ZF set if val1 == result + // x86 CF set if val1 < result + // Note that x86 uses a different definition of carry than the DSP // 0x01 - // g_dsp.r[DSP_REG_SR] |= SR_CARRY; - // Carry = (acc>res) - // Carry2 = (acc>=res) - FixupBranch noCarry = J_CC(carry_eq ? CC_B : CC_BE); + // g_dsp.r[DSP_REG_SR] |= SR_CARRY; + // isCarryAdd = (val1 > result) => skip setting if (val <= result) => jump if ZF or CF => use JBE + // isCarrySubtract = (val1 >= result) => skip setting if (val < result) => jump if CF => use JB + FixupBranch noCarry = J_CC(subtract ? CC_B : CC_BE); OR(16, sr_reg, Imm16(SR_CARRY)); SetJumpTarget(noCarry); // 0x02 and 0x80 - // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW; - // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW_STICKY; - // Overflow = ((acc ^ res) & (ax ^ res)) < 0 - XOR(64, R(carry_ovfl), R(val)); - XOR(64, R(RDX), R(val)); - TEST(64, R(carry_ovfl), R(RDX)); + // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW; + // g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW_STICKY; + // Overflow (add) = ((val1 ^ res) & (val2 ^ res)) < 0 + // Overflow (sub) = ((val1 ^ res) & (-val2 ^ res)) < 0 + MOV(64, R(scratch), R(val1)); + XOR(64, R(scratch), R(result)); + + if (subtract) + NEG(64, R(val2)); + XOR(64, R(result), R(val2)); + + TEST(64, R(scratch), R(result)); // Test scratch & value FixupBranch noOverflow = J_CC(CC_GE); OR(16, sr_reg, Imm16(SR_OVERFLOW | SR_OVERFLOW_STICKY)); SetJumpTarget(noOverflow); + // Restore result and val2 -- TODO: does this really matter? + XOR(64, R(result), R(val2)); + if (subtract) + NEG(64, R(val2)); + m_gpr.PutReg(DSP_REG_SR); - if (carry_eq) - { - Update_SR_Register(); - } - else - { - Update_SR_Register(val); - } + Update_SR_Register(result, scratch); } // In: RAX: s64 _Value diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitMultiplier.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitMultiplier.cpp index efbd59d164..7fba07f974 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitMultiplier.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitMultiplier.cpp @@ -259,13 +259,14 @@ void DSPEmitter::addpaxz(const UDSPInstruction opc) // s64 oldprod = dsp_get_long_prod(); // dsp_set_long_acc(dreg, res); // res = dsp_get_long_acc(dreg); - // Update_SR_Register64(res, isCarry(oldprod, res), false); + // Update_SR_Register64(res, isCarryAdd(oldprod, res), false); if (FlagsNeeded()) { get_long_prod(RDX); MOV(64, R(RCX), R(RAX)); set_long_acc(dreg, RCX); - Update_SR_Register64_Carry(EAX, tmp1); + // TODO: Why does this not set the overflow bit? (And thus, why can't it use UpdateSR64Add?) + Update_SR_Register64(EAX, tmp1); } else { diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitUtil.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitUtil.cpp index 4957947b40..bbf97b98c7 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitUtil.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitUtil.cpp @@ -690,7 +690,15 @@ void DSPEmitter::set_long_prod() m_gpr.PutReg(DSP_REG_PROD_64, true); } -// Returns s64 in RAX +// s64 -> s40 in long_acc +void DSPEmitter::dsp_convert_long_acc(Gen::X64Reg long_acc) +{ + // return ((long_acc << (64 - 40)) >> (64 - 40)) + SHL(64, R(long_acc), Imm8(64 - 40)); // sign extend + SAR(64, R(long_acc), Imm8(64 - 40)); +} + +// Returns s64 in long_acc void DSPEmitter::round_long_acc(X64Reg long_acc) { // if (prod & 0x10000) prod = (prod + 0x8000) & ~0xffff; From 779cd479bc7ddcaaa3826f54146ee6aef22310ee Mon Sep 17 00:00:00 2001 From: Pokechu22 Date: Wed, 18 Aug 2021 16:28:08 -0700 Subject: [PATCH 11/11] DSPLLE: NEG can set the carry and overflow flags --- .../Core/DSP/Interpreter/DSPIntArithmetic.cpp | 15 ++++++--- .../Core/DSP/Jit/x64/DSPJitArithmetic.cpp | 31 +++++++++++++------ 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp index 4cfa8d446e..c4319db554 100644 --- a/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp +++ b/Source/Core/Core/DSP/Interpreter/DSPIntArithmetic.cpp @@ -704,18 +704,23 @@ void Interpreter::dec(const UDSPInstruction opc) // 0111 110d xxxx xxxx // Negate accumulator $acD. // -// flags out: --xx xx00 +// flags out: x-xx xxxx +// +// The carry flag is set only if $acD was zero. +// The overflow flag is set only if $acD was 0x8000000000 (the minimum value), +// as -INT_MIN is INT_MIN in two's complement. In both of these cases, +// the value of $acD after the operation is the same as it was before. void Interpreter::neg(const UDSPInstruction opc) { const u8 dreg = (opc >> 8) & 0x1; - s64 acc = GetLongAcc(dreg); - acc = 0 - acc; + const s64 acc = GetLongAcc(dreg); + const s64 res = 0 - acc; ZeroWriteBackLog(); - SetLongAcc(dreg, acc); - UpdateSR64(GetLongAcc(dreg)); + SetLongAcc(dreg, res); + UpdateSR64Sub(0, acc, GetLongAcc(dreg)); } // ABS $acD diff --git a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp index e651bfb84f..1a9270cab1 100644 --- a/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp +++ b/Source/Core/Core/DSP/Jit/x64/DSPJitArithmetic.cpp @@ -1002,20 +1002,33 @@ void DSPEmitter::dec(const UDSPInstruction opc) // 0111 110d xxxx xxxx // Negate accumulator $acD. // -// flags out: --xx xx00 +// flags out: x-xx xxxx +// +// The carry flag is set only if $acD was zero. +// The overflow flag is set only if $acD was 0x8000000000 (the minimum value), +// as -INT_MIN is INT_MIN in two's complement. In both of these cases, +// the value of $acD after the operation is the same as it was before. void DSPEmitter::neg(const UDSPInstruction opc) { u8 dreg = (opc >> 8) & 0x1; - // s64 acc = dsp_get_long_acc(dreg); - get_long_acc(dreg); - // acc = 0 - acc; - NEG(64, R(RAX)); - // dsp_set_long_acc(dreg, acc); - set_long_acc(dreg); - // Update_SR_Register64(dsp_get_long_acc(dreg)); + // const s64 acc = GetLongAcc(dreg); + X64Reg acc = RAX; + get_long_acc(dreg, acc); + // const s64 res = 0 - acc; + X64Reg res = RCX; + MOV(64, R(res), R(acc)); + NEG(64, R(res)); + // SetLongAcc(dreg, res); + set_long_acc(dreg, res); if (FlagsNeeded()) { - Update_SR_Register64(); + // UpdateSR64Sub(0, acc, GetLongAcc(dreg)); + get_long_acc(dreg, res); + X64Reg imm_reg = RDX; + XOR(64, R(imm_reg), R(imm_reg)); + X64Reg tmp1 = m_gpr.GetFreeXReg(); + UpdateSR64Sub(imm_reg, acc, res, tmp1); + m_gpr.PutXReg(tmp1); } }