From 65e57a823044dbff2ad1c8983ee89882518b9775 Mon Sep 17 00:00:00 2001 From: TellowKrinkle Date: Wed, 23 Sep 2020 04:29:30 -0500 Subject: [PATCH] iR5900: Use 64-bit math on x86-64 --- common/emitter/implement/movs.h | 4 + common/emitter/instructions.h | 26 +++ common/emitter/movs.cpp | 24 +-- common/emitter/x86emitter.cpp | 11 ++ pcsx2/x86/iCOP0.cpp | 14 +- pcsx2/x86/iR5900.h | 1 + pcsx2/x86/ix86-32/iR5900-32.cpp | 25 ++- pcsx2/x86/ix86-32/iR5900Arit.cpp | 179 +++++++++++++++++- pcsx2/x86/ix86-32/iR5900AritImm.cpp | 70 ++++++- pcsx2/x86/ix86-32/iR5900Branch.cpp | 25 +++ pcsx2/x86/ix86-32/iR5900LoadStore.cpp | 20 ++ pcsx2/x86/ix86-32/iR5900Move.cpp | 91 ++++----- pcsx2/x86/ix86-32/iR5900MultDiv.cpp | 54 +++++- pcsx2/x86/ix86-32/iR5900Shift.cpp | 102 ++++++++-- pcsx2/x86/microVU_Macro.inl | 7 + tests/ctest/x86emitter/codegen_tests_main.cpp | 3 + 16 files changed, 552 insertions(+), 104 deletions(-) diff --git a/common/emitter/implement/movs.h b/common/emitter/implement/movs.h index 0198f96c53..82f5b78114 100644 --- a/common/emitter/implement/movs.h +++ b/common/emitter/implement/movs.h @@ -131,6 +131,10 @@ namespace x86Emitter void operator()(const xRegister16or32or64& to, const xIndirect8& sibsrc) const; void operator()(const xRegister32or64& to, const xRegister16& from) const; void operator()(const xRegister32or64& to, const xIndirect16& sibsrc) const; +#ifdef __M_X86_64 + void operator()(const xRegister64& to, const xRegister32& from) const; + void operator()(const xRegister64& to, const xIndirect32& sibsrc) const; +#endif //void operator()( const xRegister32& to, const xDirectOrIndirect16& src ) const; //void operator()( const xRegister16or32& to, const xDirectOrIndirect8& src ) const; diff --git a/common/emitter/instructions.h b/common/emitter/instructions.h index 28196ed813..be0e577b2e 100644 --- a/common/emitter/instructions.h +++ b/common/emitter/instructions.h @@ -168,6 +168,7 @@ namespace x86Emitter extern void xCWD(); extern void xCDQ(); extern void xCWDE(); + extern void xCDQE(); extern void xLAHF(); extern void xSAHF(); @@ -216,6 +217,31 @@ namespace x86Emitter /// On x86-64, resolves to either `mov dst, (sptr)addr` or `lea dst, [addr]` depending on the distance from RIP void xLoadFarAddr(const xAddressReg& dst, void* addr); + ////////////////////////////////////////////////////////////////////////////////////////// + /// Helper function to write a 64-bit constant to memory + /// May use `tmp` on x86-64 + void xWriteImm64ToMem(u64* addr, const xAddressReg& tmp, u64 imm); + +#ifdef __M_X86_64 + ////////////////////////////////////////////////////////////////////////////////////////// + /// Helper function to run operations with large immediates + /// If the immediate fits in 32 bits, runs op(target, imm) + /// Otherwise, loads imm into tmpRegister and then runs op(dst, tmp) + template + void xImm64Op(const Op& op, const Dst& dst, const xRegister64& tmpRegister, s64 imm) + { + if (imm == (s32)imm) + { + op(dst, imm); + } + else + { + xMOV64(tmpRegister, imm); + op(dst, tmpRegister); + } + } +#endif + ////////////////////////////////////////////////////////////////////////////////////////// // JMP / Jcc Instructions! diff --git a/common/emitter/movs.cpp b/common/emitter/movs.cpp index 447996afe8..a2338e4ef3 100644 --- a/common/emitter/movs.cpp +++ b/common/emitter/movs.cpp @@ -241,18 +241,20 @@ namespace x86Emitter xOpWrite0F(SignExtend ? 0xbf : 0xb7, to, sibsrc); } -#if 0 -void xImpl_MovExtend::operator()( const xRegister32& to, const xDirectOrIndirect16& src ) const -{ - EbpAssert(); - _DoI_helpermess( *this, to, src ); -} +#ifdef __M_X86_64 + void xImpl_MovExtend::operator()(const xRegister64& to, const xRegister32& from) const + { + EbpAssert(); + pxAssertMsg(SignExtend, "Use mov for 64-bit movzx"); + xOpWrite(0, 0x63, to, from); + } -void xImpl_MovExtend::operator()( const xRegister16or32& to, const xDirectOrIndirect8& src ) const -{ - EbpAssert(); - _DoI_helpermess( *this, to, src ); -} + void xImpl_MovExtend::operator()(const xRegister64& to, const xIndirect32& sibsrc) const + { + EbpAssert(); + pxAssertMsg(SignExtend, "Use mov for 64-bit movzx"); + xOpWrite(0, 0x63, to, sibsrc); + } #endif const xImpl_MovExtend xMOVSX = {true}; diff --git a/common/emitter/x86emitter.cpp b/common/emitter/x86emitter.cpp index 6398d8a024..d6e3405595 100644 --- a/common/emitter/x86emitter.cpp +++ b/common/emitter/x86emitter.cpp @@ -1156,6 +1156,7 @@ const xRegister32 __fi void xCWD() { xWrite8(0x98); } __fi void xCDQ() { xWrite8(0x99); } __fi void xCWDE() { xWrite8(0x98); } + __fi void xCDQE() { xWrite16(0x9848); } __fi void xLAHF() { xWrite8(0x9f); } __fi void xSAHF() { xWrite8(0x9e); } @@ -1368,4 +1369,14 @@ const xRegister32 #endif } + void xWriteImm64ToMem(u64* addr, const xAddressReg& tmp, u64 imm) + { +#ifdef __M_X86_64 + xImm64Op(xMOV, ptr64[addr], tmp, imm); +#else + xMOV(ptr32[(u32*)addr], (u32)(imm & 0xFFFFFFFF)); + xMOV(ptr32[(u32*)addr + 1], (u32)(imm >> 32)); +#endif + } + } // End namespace x86Emitter diff --git a/pcsx2/x86/iCOP0.cpp b/pcsx2/x86/iCOP0.cpp index 913eb5ce3d..bdf015b633 100644 --- a/pcsx2/x86/iCOP0.cpp +++ b/pcsx2/x86/iCOP0.cpp @@ -158,10 +158,7 @@ void recMFC0() return; _deleteEEreg(_Rt_, 0); - xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); - - xCDQ(); - xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); + eeSignExtendTo(_Rt_); return; } @@ -187,10 +184,7 @@ void recMFC0() xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]); } _deleteEEreg(_Rt_, 0); - xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); - - xCDQ(); - xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); + eeSignExtendTo(_Rt_); return; } @@ -202,9 +196,7 @@ void recMFC0() _eeOnWriteReg(_Rt_, 1); _deleteEEreg(_Rt_, 0); xMOV(eax, ptr[&cpuRegs.CP0.r[_Rd_]]); - xCDQ(); - xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); - xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); + eeSignExtendTo(_Rt_); } void recMTC0() diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 65eb1df0ee..49732cc65a 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -117,6 +117,7 @@ u32* _eeGetConstReg(int reg); void _eeMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr); void _eeMoveGPRtoM(uptr to, int fromgpr); void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr); +void _signExtendToMem(void* mem); void eeSignExtendTo(int gpr, bool onlyupper = false); void _eeFlushAllUnused(); diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 7d35c9b504..b3e8c04344 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -218,12 +218,29 @@ void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr) } } +void _signExtendToMem(void* mem) +{ +#ifdef __M_X86_64 + xCDQE(); + xMOV(ptr64[mem], rax); +#else + xCDQ(); + xMOV(ptr32[mem], eax); + xMOV(ptr32[(void*)((sptr)mem + 4)], edx); +#endif +} + void eeSignExtendTo(int gpr, bool onlyupper) { - xCDQ(); - if (!onlyupper) - xMOV(ptr32[&cpuRegs.GPR.r[gpr].UL[0]], eax); - xMOV(ptr32[&cpuRegs.GPR.r[gpr].UL[1]], edx); + if (onlyupper) + { + xCDQ(); + xMOV(ptr32[&cpuRegs.GPR.r[gpr].UL[1]], edx); + } + else + { + _signExtendToMem(&cpuRegs.GPR.r[gpr].UD[0]); + } } int _flushXMMunused() diff --git a/pcsx2/x86/ix86-32/iR5900Arit.cpp b/pcsx2/x86/ix86-32/iR5900Arit.cpp index 3e5a05abbc..2793f3880c 100644 --- a/pcsx2/x86/ix86-32/iR5900Arit.cpp +++ b/pcsx2/x86/ix86-32/iR5900Arit.cpp @@ -117,10 +117,31 @@ void recDADD_constv(int info, int creg, u32 vreg) GPR_reg64 cval = g_cpuConstRegs[creg]; +#ifdef __M_X86_64 if (_Rd_ == vreg) { - if (!cval.SD[0]) // no-op - return; + if (!cval.SD[0]) + return; // no-op + xImm64Op(xADD, ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax, cval.SD[0]); + } + else + { + if (cval.SD[0]) + { + xMOV64(rax, cval.SD[0]); + xADD(rax, ptr64[&cpuRegs.GPR.r[vreg].SD[0]]); + } + else + { + xMOV(rax, ptr64[&cpuRegs.GPR.r[vreg].SD[0]]); + } + xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax); + } +#else + if (_Rd_ == vreg) + { + if (!cval.SD[0]) + return; // no-op xADD(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], cval.SL[0]); xADC(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], cval.SL[1]); } @@ -136,6 +157,7 @@ void recDADD_constv(int info, int creg, u32 vreg) xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx); } +#endif } void recDADD_consts(int info) @@ -156,6 +178,31 @@ void recDADD_(int info) if (_Rd_ == _Rt_) rs = _Rt_, rt = _Rs_; +#ifdef __M_X86_64 + if (_Rd_ == _Rs_ && _Rs_ == _Rt_) + { + xSHL(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 1); + return; + } + + xMOV(rax, ptr64[&cpuRegs.GPR.r[rt].SD[0]]); + + if (_Rd_ == rs) + { + xADD(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax); + return; + } + else if (rs == rt) + { + xADD(rax, rax); + } + else + { + xADD(rax, ptr32[&cpuRegs.GPR.r[rs].SD[0]]); + } + + xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax); +#else xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].SL[0]]); if (_Rd_ == _Rs_ && _Rs_ == _Rt_) @@ -186,6 +233,7 @@ void recDADD_(int info) xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx); +#endif } EERECOMPILE_CODE0(DADD, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT); @@ -232,8 +280,12 @@ void recSUB_(int info) if (_Rs_ == _Rt_) { +#ifdef __M_X86_64 + xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 0); +#else xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 0); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], 0); +#endif return; } @@ -262,6 +314,20 @@ void recDSUB_consts(int info) GPR_reg64 sval = g_cpuConstRegs[_Rs_]; +#ifdef __M_X86_64 + if (!sval.SD[0] && _Rd_ == _Rt_) + { + xNEG(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]]); + return; + } + else + { + xMOV64(rax, sval.SD[0]); + } + + xSUB(rax, ptr32[&cpuRegs.GPR.r[_Rt_].SD[0]]); + xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax); +#else if (!sval.SD[0] && _Rd_ == _Rt_) { /* To understand this 64-bit negate, consider that a negate in 2's complement @@ -285,6 +351,7 @@ void recDSUB_consts(int info) xSBB(edx, ptr32[&cpuRegs.GPR.r[_Rt_].SL[1]]); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx); +#endif } void recDSUB_constt(int info) @@ -293,6 +360,21 @@ void recDSUB_constt(int info) GPR_reg64 tval = g_cpuConstRegs[_Rt_]; +#ifdef __M_X86_64 + if (_Rd_ == _Rs_) + { + xImm64Op(xSUB, ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax, tval.SD[0]); + } + else + { + xMOV(rax, ptr64[&cpuRegs.GPR.r[_Rs_].SD[0]]); + if (tval.SD[0]) + { + xImm64Op(xSUB, rax, rdx, tval.SD[0]); + } + xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax); + } +#else if (_Rd_ == _Rs_) { xSUB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], tval.SL[0]); @@ -310,12 +392,30 @@ void recDSUB_constt(int info) xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx); } +#endif } void recDSUB_(int info) { pxAssert(!(info & PROCESS_EE_XMM)); +#ifdef __M_X86_64 + if (_Rs_ == _Rt_) + { + xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 0); + } + else if (_Rd_ == _Rs_) + { + xMOV(rax, ptr64[&cpuRegs.GPR.r[_Rt_].SD[0]]); + xSUB(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax); + } + else + { + xMOV(rax, ptr64[&cpuRegs.GPR.r[_Rs_].SD[0]]); + xSUB(rax, ptr64[&cpuRegs.GPR.r[_Rt_].SD[0]]); + xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax); + } +#else if (_Rs_ == _Rt_) { xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 0); @@ -337,6 +437,7 @@ void recDSUB_(int info) xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx); } +#endif } EERECOMPILE_CODE0(DSUB, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); @@ -392,7 +493,34 @@ static void recLogicalOp_constv(LogicalOp op, int info, int creg, u32 vreg) } GPR_reg64 cval = g_cpuConstRegs[creg]; - +#ifdef __M_X86_64 + if (hasFixed && cval.SD[0] == fixedInput) + { + xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], fixedOutput); + } + else if (_Rd_ == vreg) + { + if (cval.SD[0] != identityInput) + xImm64Op(xOP, ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax, cval.UD[0]); + if (op == LogicalOp::NOR) + xNOT(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]]); + } + else + { + if (cval.SD[0] != identityInput) + { + xMOV64(rax, cval.SD[0]); + xOP(rax, ptr32[&cpuRegs.GPR.r[vreg].UD[0]]); + } + else + { + xMOV(rax, ptr32[&cpuRegs.GPR.r[vreg].UD[0]]); + } + if (op == LogicalOp::NOR) + xNOT(rax); + xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); + } +#else for (int i = 0; i < 2; i++) { if (hasFixed && cval.SL[i] == (s32)fixedInput) @@ -416,6 +544,7 @@ static void recLogicalOp_constv(LogicalOp op, int info, int creg, u32 vreg) xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax); } } +#endif } static void recLogicalOp(LogicalOp op, int info) @@ -433,6 +562,31 @@ static void recLogicalOp(LogicalOp op, int info) if (_Rd_ == _Rt_) rs = _Rt_, rt = _Rs_; +#ifdef __M_X86_64 + if (op == LogicalOp::XOR && rs == rt) + { + xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], 0); + } + else if (_Rd_ == rs) + { + if (rs != rt) + { + xMOV(rax, ptr64[&cpuRegs.GPR.r[rt].UD[0]]); + xOP(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); + } + if (op == LogicalOp::NOR) + xNOT(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]]); + } + else + { + xMOV(rax, ptr64[&cpuRegs.GPR.r[rs].UD[0]]); + if (rs != rt) + xOP(rax, ptr64[&cpuRegs.GPR.r[rt].UD[0]]); + if (op == LogicalOp::NOR) + xNOT(rax); + xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); + } +#else for (int i = 0; i < 2; i++) { if (op == LogicalOp::XOR && rs == rt) @@ -459,6 +613,7 @@ static void recLogicalOp(LogicalOp op, int info) xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax); } } +#endif } //// AND @@ -565,6 +720,14 @@ void recSLTs_const(int info, int sign, int st) GPR_reg64 cval = g_cpuConstRegs[st ? _Rt_ : _Rs_]; +#ifdef __M_X86_64 + const xImpl_Set& SET = st ? (sign ? xSETL : xSETB) : (sign ? xSETG : xSETA); + + xXOR(eax, eax); + xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[st ? _Rs_ : _Rt_].UD[0]], rdx, cval.UD[0]); + SET(al); + xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); +#else xMOV(eax, 1); xCMP(ptr32[&cpuRegs.GPR.r[st ? _Rs_ : _Rt_].UL[1]], cval.UL[1]); @@ -582,12 +745,21 @@ void recSLTs_const(int info, int sign, int st) xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0); +#endif } void recSLTs_(int info, int sign) { pxAssert(!(info & PROCESS_EE_XMM)); +#ifdef __M_X86_64 + const xImpl_Set& SET = sign ? xSETL : xSETB; + xXOR(eax, eax); + xMOV(rdx, ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]); + xCMP(rdx, ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]); + SET(al); + xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); +#else xMOV(eax, 1); xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]]); @@ -607,6 +779,7 @@ void recSLTs_(int info, int sign) xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0); +#endif } void recSLT_consts(int info) diff --git a/pcsx2/x86/ix86-32/iR5900AritImm.cpp b/pcsx2/x86/ix86-32/iR5900AritImm.cpp index 871e01df4d..a8f553ccac 100644 --- a/pcsx2/x86/ix86-32/iR5900AritImm.cpp +++ b/pcsx2/x86/ix86-32/iR5900AritImm.cpp @@ -71,9 +71,7 @@ void recADDI_(int info) if (_Imm_ != 0) xADD(eax, _Imm_); - xCDQ(); - xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); - xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); + eeSignExtendTo(_Rt_); } } @@ -95,6 +93,23 @@ void recDADDI_(int info) { pxAssert(!(info & PROCESS_EE_XMM)); +#ifdef __M_X86_64 + if (_Rt_ == _Rs_) + { + xADD(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], _Imm_); + } + else + { + xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]); + + if (_Imm_ != 0) + { + xADD(rax, _Imm_); + } + + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UD[0]], rax); + } +#else if (_Rt_ == _Rs_) { xADD(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _Imm_); @@ -116,6 +131,7 @@ void recDADDI_(int info) xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); } +#endif } EERECOMPILE_CODEX(eeRecompileCode1, DADDI); @@ -137,6 +153,12 @@ extern u32 s_sltone; void recSLTIU_(int info) { +#ifdef __M_X86_64 + xXOR(eax, eax); + xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], _Imm_); + xSETB(al); + xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax); +#else xMOV(eax, 1); xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], _Imm_ >= 0 ? 0 : 0xffffffff); @@ -154,6 +176,7 @@ void recSLTIU_(int info) xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0); +#endif } EERECOMPILE_CODEX(eeRecompileCode1, SLTIU); @@ -167,6 +190,12 @@ void recSLTI_const() void recSLTI_(int info) { // test silent hill if modding +#ifdef __M_X86_64 + xXOR(eax, eax); + xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], _Imm_); + xSETL(al); + xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax); +#else xMOV(eax, 1); xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], _Imm_ >= 0 ? 0 : 0xffffffff); @@ -184,6 +213,7 @@ void recSLTI_(int info) xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0); +#endif } EERECOMPILE_CODEX(eeRecompileCode1, SLTI); @@ -209,6 +239,39 @@ static void recLogicalOpI(int info, LogicalOp op) : op == LogicalOp::XOR ? xXOR : bad; pxAssert(&xOP != &bad); +#ifdef __M_X86_64 + if (_ImmU_ != 0) + { + if (_Rt_ == _Rs_) + { + if (op == LogicalOp::AND) + xOP(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], _ImmU_); + else + xOP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _ImmU_); + } + else + { + xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]); + xOP(rax, _ImmU_); + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UD[0]], rax); + } + } + else + { + if (op == LogicalOp::AND) + { + xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], 0); + } + else + { + if (_Rt_ != _Rs_) + { + xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]); + xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UD[0]], rax); + } + } + } +#else if (_ImmU_ != 0) { if (_Rt_ == _Rs_) @@ -251,6 +314,7 @@ static void recLogicalOpI(int info, LogicalOp op) } } } +#endif } void recANDI_(int info) diff --git a/pcsx2/x86/ix86-32/iR5900Branch.cpp b/pcsx2/x86/ix86-32/iR5900Branch.cpp index 424d6aeaec..ed3f3b9cc4 100644 --- a/pcsx2/x86/ix86-32/iR5900Branch.cpp +++ b/pcsx2/x86/ix86-32/iR5900Branch.cpp @@ -147,6 +147,30 @@ void recSetBranchEQ(int info, int bne, int process) _eeFlushAllUnused(); +#ifdef __M_X86_64 + if (process & PROCESS_CONSTS) + { + xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax, g_cpuConstRegs[_Rs_].UD[0]); + } + else if (process & PROCESS_CONSTT) + { + xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], rax, g_cpuConstRegs[_Rt_].UD[0]); + } + else + { + xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]); + xCMP(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]); + } + + if (bne) + { + j32Ptr[1] = JE32(0); + } + else + { + j32Ptr[0] = j32Ptr[1] = JNE32(0); + } +#else if (bne) { if (process & PROCESS_CONSTS) @@ -208,6 +232,7 @@ void recSetBranchEQ(int info, int bne, int process) j32Ptr[1] = JNE32(0); } } +#endif } _clearNeededXMMregs(); diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp index 64c95d2999..520f3b83fd 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.cpp +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.cpp @@ -179,6 +179,13 @@ void recLoad32(u32 bits, bool sign) if (_Rt_) { +#if __M_X86_64 + // EAX holds the loaded value, so sign extend as needed: + if (sign) + xCDQE(); + + xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax); +#else // EAX holds the loaded value, so sign extend as needed: if (sign) xCDQ(); @@ -188,6 +195,7 @@ void recLoad32(u32 bits, bool sign) xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); else xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0); +#endif } } @@ -283,6 +291,17 @@ void recLWL() xMOV(ecx, calleeSavedReg1d); xMOV(edx, 0xffffff); xSHR(edx, cl); +# ifdef __M_X86_64 + xAND(edx, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]); + + // OR in bytes loaded + xNEG(ecx); + xADD(ecx, 24); + xSHL(eax, cl); + xOR(eax, edx); + + eeSignExtendTo(_Rt_); +# else xAND(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], edx); // OR in bytes loaded @@ -294,6 +313,7 @@ void recLWL() // eax will always have the sign bit xCDQ(); xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); +# endif #else iFlushCall(FLUSH_INTERPRETER); _deleteEEreg(_Rs_, 1); diff --git a/pcsx2/x86/ix86-32/iR5900Move.cpp b/pcsx2/x86/ix86-32/iR5900Move.cpp index fa7ffe0e25..b1fd6c26a5 100644 --- a/pcsx2/x86/ix86-32/iR5900Move.cpp +++ b/pcsx2/x86/ix86-32/iR5900Move.cpp @@ -50,6 +50,29 @@ REC_FUNC_DEL(MOVN, _Rd_); #else +static void xCopy64(u64* dst, u64* src) +{ +#ifdef __M_X86_64 + xMOV(rax, ptr64[src]); + xMOV(ptr64[dst], rax); +#else + xMOV(eax, ptr32[(u32*)src]); + xMOV(edx, ptr32[(u32*)src + 1]); + xMOV(ptr32[(u32*)dst], eax); + xMOV(ptr32[(u32*)dst + 1], edx); +#endif +} + +static void xCMPToZero64(u64* mem) +{ +#ifndef __M_X86_64 + xCMP(ptr64[mem], 0); +#else + xMOV(eax, ptr32[(u32*)mem]); + xOR(eax, ptr32[(u32*)mem + 1]); +#endif +} + /********************************************************* * Load higher 16 bits of the first word in GPR with imm * * Format: OP rt, immediate * @@ -83,9 +106,7 @@ void recLUI() else { xMOV(eax, (s32)(cpuRegs.code << 16)); - xCDQ(); - xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); - xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx); + eeSignExtendTo(_Rt_); } EE::Profiler.EmitOp(eeOpcode::LUI); @@ -138,10 +159,7 @@ void recMFHILO(int hi) else { _deleteEEreg(_Rd_, 0); - xMOV(eax, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[0] : (uptr)&cpuRegs.LO.UL[0])]); - xMOV(edx, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[1] : (uptr)&cpuRegs.LO.UL[1])]); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); + xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], hi ? &cpuRegs.HI.UD[0] : &cpuRegs.LO.UD[0]); } } } @@ -189,17 +207,13 @@ void recMTHILO(int hi) { if (GPR_IS_CONST1(_Rs_)) { - xMOV(ptr32[(u32*)(addrhilo)], g_cpuConstRegs[_Rs_].UL[0]); - xMOV(ptr32[(u32*)(addrhilo + 4)], g_cpuConstRegs[_Rs_].UL[1]); + xWriteImm64ToMem((u64*)addrhilo, rax, g_cpuConstRegs[_Rs_].UD[0]); } else { _eeMoveGPRtoR(ecx, _Rs_); _flushEEreg(_Rs_); - xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); - xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); - xMOV(ptr[(void*)(addrhilo)], eax); - xMOV(ptr[(void*)(addrhilo + 4)], edx); + xCopy64((u64*)addrhilo, &cpuRegs.GPR.r[_Rs_].UD[0]); } } } @@ -275,10 +289,7 @@ void recMFHILO1(int hi) else { _deleteEEreg(_Rd_, 0); - xMOV(eax, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[2] : (uptr)&cpuRegs.LO.UL[2])]); - xMOV(edx, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[3] : (uptr)&cpuRegs.LO.UL[3])]); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); + xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], hi ? &cpuRegs.HI.UD[1] : &cpuRegs.LO.UD[1]); } } } @@ -316,16 +327,12 @@ void recMTHILO1(int hi) { if (GPR_IS_CONST1(_Rs_)) { - xMOV(ptr32[(u32*)(addrhilo + 8)], g_cpuConstRegs[_Rs_].UL[0]); - xMOV(ptr32[(u32*)(addrhilo + 12)], g_cpuConstRegs[_Rs_].UL[1]); + xWriteImm64ToMem((u64*)(addrhilo + 8), rax, g_cpuConstRegs[_Rs_].UD[0]); } else { _flushEEreg(_Rs_); - xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); - xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); - xMOV(ptr[(void*)(addrhilo + 8)], eax); - xMOV(ptr[(void*)(addrhilo + 12)], edx); + xCopy64((u64*)(addrhilo + 8), &cpuRegs.GPR.r[_Rs_].UD[0]); } } } @@ -363,34 +370,25 @@ void recMOVZtemp_const() void recMOVZtemp_consts(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); - xOR(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); + xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]); j8Ptr[0] = JNZ8(0); - xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]); - xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]); + xWriteImm64ToMem(&cpuRegs.GPR.r[_Rd_].UD[0], rax, g_cpuConstRegs[_Rs_].UD[0]); x86SetJ8(j8Ptr[0]); } void recMOVZtemp_constt(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); - xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); + xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]); } void recMOVZtemp_(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); - xOR(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); + xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]); j8Ptr[0] = JNZ8(0); - xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); - xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); + xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]); x86SetJ8(j8Ptr[0]); } @@ -421,34 +419,25 @@ void recMOVNtemp_const() void recMOVNtemp_consts(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); - xOR(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); + xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]); j8Ptr[0] = JZ8(0); - xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]); - xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]); + xWriteImm64ToMem(&cpuRegs.GPR.r[_Rd_].UD[0], rax, g_cpuConstRegs[_Rs_].UD[0]); x86SetJ8(j8Ptr[0]); } void recMOVNtemp_constt(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); - xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); + xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]); } void recMOVNtemp_(int info) { - xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); - xOR(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); + xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]); j8Ptr[0] = JZ8(0); - xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); - xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); + xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]); x86SetJ8(j8Ptr[0]); } diff --git a/pcsx2/x86/ix86-32/iR5900MultDiv.cpp b/pcsx2/x86/ix86-32/iR5900MultDiv.cpp index 6e744916c5..8102025e57 100644 --- a/pcsx2/x86/ix86-32/iR5900MultDiv.cpp +++ b/pcsx2/x86/ix86-32/iR5900MultDiv.cpp @@ -59,8 +59,13 @@ void recWritebackHILO(int info, int writed, int upper) uptr hiaddr = (uptr)&cpuRegs.HI.UL[upper ? 2 : 0]; u8 testlive = upper ? EEINST_LIVE2 : EEINST_LIVE0; +#ifdef __M_X86_64 + if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive) + xMOVSX(rcx, edx); +#else if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive) xMOV(ecx, edx); +#endif if (g_pCurInstInfo->regs[XMMGPR_LO] & testlive) { @@ -79,9 +84,7 @@ void recWritebackHILO(int info, int writed, int upper) reglo = -1; } - xCDQ(); - xMOV(ptr[(void*)(loaddr)], eax); - xMOV(ptr[(void*)(loaddr + 4)], edx); + _signExtendToMem((void*)loaddr); savedlo = 1; } @@ -106,10 +109,16 @@ void recWritebackHILO(int info, int writed, int upper) { _deleteEEreg(_Rd_, 0); +#ifdef __M_X86_64 + if (!savedlo) + xCDQE(); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); +#else if (!savedlo) xCDQ(); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); +#endif } } @@ -129,9 +138,13 @@ void recWritebackHILO(int info, int writed, int upper) reghi = -1; } +#ifdef __M_X86_64 + xMOV(ptr[(void*)(hiaddr)], rcx); +#else xMOV(ptr[(void*)(hiaddr)], ecx); xSAR(ecx, 31); xMOV(ptr[(void*)(hiaddr + 4)], ecx); +#endif } } @@ -156,8 +169,7 @@ void recWritebackConstHILO(u64 res, int writed, int upper) } else { - xMOV(ptr32[(u32*)(loaddr)], res & 0xffffffff); - xMOV(ptr32[(u32*)(loaddr + 4)], (res & 0x80000000) ? 0xffffffff : 0); + xWriteImm64ToMem((u64*)loaddr, rax, (s64)(s32)(res & 0xffffffff)); } } @@ -177,8 +189,7 @@ void recWritebackConstHILO(u64 res, int writed, int upper) else { _deleteEEreg(XMMGPR_HI, 0); - xMOV(ptr32[(u32*)(hiaddr)], res >> 32); - xMOV(ptr32[(u32*)(hiaddr + 4)], (res >> 63) ? 0xffffffff : 0); + xWriteImm64ToMem((u64*)hiaddr, rax, (s64)res >> 32); } } @@ -524,6 +535,20 @@ EERECOMPILE_CODE0(DIVU1, XMMINFO_READS | XMMINFO_READT); static void writeBackMAddToHiLoRd(int hiloID) { +#if __M_X86_64 + // eax -> LO, edx -> HI + xCDQE(); + if (_Rd_) + { + _eeOnWriteReg(_Rd_, 1); + _deleteEEreg(_Rd_, 0); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); + } + xMOV(ptr[&cpuRegs.LO.UD[hiloID]], rax); + + xMOVSX(rax, edx); + xMOV(ptr[&cpuRegs.HI.UD[hiloID]], rax); +#else // eax -> LO, ecx -> HI xCDQ(); if (_Rd_) @@ -541,25 +566,36 @@ static void writeBackMAddToHiLoRd(int hiloID) xMOV(eax, ecx); xCDQ(); xMOV(ptr[&cpuRegs.HI.UL[hiloID * 2 + 1]], edx); +#endif } static void addConstantAndWriteBackToHiLoRd(int hiloID, u64 constant) { +#if __M_X86_64 + const xRegister32& ehi = edx; +#else + const xRegister32& ehi = ecx; +#endif _deleteEEreg(XMMGPR_LO, 1); _deleteEEreg(XMMGPR_HI, 1); xMOV(eax, ptr[&cpuRegs.LO.UL[hiloID * 2]]); - xMOV(ecx, ptr[&cpuRegs.HI.UL[hiloID * 2]]); + xMOV(ehi, ptr[&cpuRegs.HI.UL[hiloID * 2]]); xADD(eax, (u32)(constant & 0xffffffff)); - xADC(ecx, (u32)(constant >> 32)); + xADC(ehi, (u32)(constant >> 32)); writeBackMAddToHiLoRd(hiloID); } static void addEaxEdxAndWriteBackToHiLoRd(int hiloID) { +#if __M_X86_64 + xADD(eax, ptr[&cpuRegs.LO.UL[hiloID * 2]]); + xADC(edx, ptr[&cpuRegs.HI.UL[hiloID * 2]]); +#else xMOV(ecx, edx); xADD(eax, ptr[&cpuRegs.LO.UL[hiloID * 2]]); xADC(ecx, ptr[&cpuRegs.HI.UL[hiloID * 2]]); +#endif writeBackMAddToHiLoRd(hiloID); } diff --git a/pcsx2/x86/ix86-32/iR5900Shift.cpp b/pcsx2/x86/ix86-32/iR5900Shift.cpp index 67c056fde4..bb26967c86 100644 --- a/pcsx2/x86/ix86-32/iR5900Shift.cpp +++ b/pcsx2/x86/ix86-32/iR5900Shift.cpp @@ -69,9 +69,7 @@ void recSLLs_(int info, int sa) xSHL(eax, sa); } - xCDQ(); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); + eeSignExtendTo(_Rd_); } void recSLL_(int info) @@ -95,9 +93,7 @@ void recSRLs_(int info, int sa) if (sa != 0) xSHR(eax, sa); - xCDQ(); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); + eeSignExtendTo(_Rd_); } void recSRL_(int info) @@ -121,9 +117,7 @@ void recSRAs_(int info, int sa) if (sa != 0) xSAR(eax, sa); - xCDQ(); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); - xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); + eeSignExtendTo(_Rd_); } void recSRA_(int info) @@ -141,9 +135,15 @@ void recDSLL_const() void recDSLLs_(int info, int sa) { - int rtreg, rdreg; pxAssert(!(info & PROCESS_EE_XMM)); +#ifdef __M_X86_64 + xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]); + if (sa != 0) + xSHL(rax, sa); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); +#else + int rtreg, rdreg; _addNeededGPRtoXMMreg(_Rt_); _addNeededGPRtoXMMreg(_Rd_); rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); @@ -159,6 +159,7 @@ void recDSLLs_(int info, int sa) xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); _deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); +#endif } void recDSLL_(int info) @@ -176,9 +177,15 @@ void recDSRL_const() void recDSRLs_(int info, int sa) { - int rtreg, rdreg; pxAssert(!(info & PROCESS_EE_XMM)); +#ifdef __M_X86_64 + xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]); + if (sa != 0) + xSHR(rax, sa); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); +#else + int rtreg, rdreg; _addNeededGPRtoXMMreg(_Rt_); _addNeededGPRtoXMMreg(_Rd_); rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); @@ -194,6 +201,7 @@ void recDSRLs_(int info, int sa) xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); _deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); +#endif } void recDSRL_(int info) @@ -211,9 +219,15 @@ void recDSRA_const() void recDSRAs_(int info, int sa) { - int rtreg, rdreg, t0reg; pxAssert(!(info & PROCESS_EE_XMM)); +#ifdef __M_X86_64 + xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]); + if (sa != 0) + xSAR(rax, sa); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); +#else + int rtreg, rdreg, t0reg; _addNeededGPRtoXMMreg(_Rt_); _addNeededGPRtoXMMreg(_Rd_); rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); @@ -250,6 +264,7 @@ void recDSRAs_(int info, int sa) xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); _deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); +#endif } void recDSRA_(int info) @@ -270,12 +285,17 @@ void recDSLL32s_(int info, int sa) pxAssert(!(info & PROCESS_EE_XMM)); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]); +#ifdef __M_X86_64 + xSHL(rax, sa + 32); + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); +#else if (sa != 0) { xSHL(eax, sa); } xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], 0); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], eax); +#endif } void recDSLL32_(int info) @@ -299,8 +319,12 @@ void recDSRL32s_(int info, int sa) if (sa != 0) xSHR(eax, sa); +#ifdef __M_X86_64 + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); +#else xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0); +#endif } void recDSRL32_(int info) @@ -318,6 +342,9 @@ void recDSRA32_const() void recDSRA32s_(int info, int sa) { +#ifdef __M_X86_64 + recDSRAs_(info, sa + 32); +#else pxAssert(!(info & PROCESS_EE_XMM)); xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]); @@ -327,6 +354,7 @@ void recDSRA32s_(int info, int sa) xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax); xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx); +#endif } void recDSRA32_(int info) @@ -362,6 +390,31 @@ static void recShiftV(const xImpl_Group2& shift) eeSignExtendTo(_Rd_); } +#ifdef __M_X86_64 + +static void recDShiftV_constt(const xImpl_Group2& shift) +{ + xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + + xMOV64(rax, g_cpuConstRegs[_Rt_].UD[0]); + shift(rax, cl); + + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); +} + +static void recDShiftV(const xImpl_Group2& shift) +{ + xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]); + if (_Rs_ != 0) + { + xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]); + shift(rax, cl); + } + xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax); +} + +#else + __aligned16 u32 s_sa[4] = {0x1f, 0, 0x3f, 0}; void recSetShiftV(int info, int* rsreg, int* rtreg, int* rdreg, int* rstemp) @@ -402,6 +455,7 @@ void recSetConstShiftV(int info, int* rsreg, int* rdreg, int* rstemp) xMOVDZX(xRegisterSSE(*rstemp), eax); *rsreg = *rstemp; } +#endif //// SLLV void recSLLV_const() @@ -489,6 +543,9 @@ void recDSLLV_consts(int info) void recDSLLV_constt(int info) { +#ifdef __M_X86_64 + recDShiftV_constt(xSHL); +#else int rsreg, rdreg, rstemp = -1; recSetConstShiftV(info, &rsreg, &rdreg, &rstemp); xMOVDQA(xRegisterSSE(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]); @@ -502,10 +559,14 @@ void recDSLLV_constt(int info) xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); //_deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); +#endif } void recDSLLV_(int info) { +#ifdef __M_X86_64 + recDShiftV(xSHL); +#else int rsreg, rtreg, rdreg, rstemp = -1; recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp); @@ -519,6 +580,7 @@ void recDSLLV_(int info) xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); _deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); +#endif } EERECOMPILE_CODE0(DSLLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); @@ -540,6 +602,9 @@ void recDSRLV_consts(int info) void recDSRLV_constt(int info) { +#ifdef __M_X86_64 + recDShiftV_constt(xSHR); +#else int rsreg, rdreg, rstemp = -1; recSetConstShiftV(info, &rsreg, &rdreg, &rstemp); @@ -554,10 +619,14 @@ void recDSRLV_constt(int info) xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); //_deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); +#endif } void recDSRLV_(int info) { +#ifdef __M_X86_64 + recDShiftV(xSHR); +#else int rsreg, rtreg, rdreg, rstemp = -1; recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp); @@ -571,6 +640,7 @@ void recDSRLV_(int info) xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg)); _deleteGPRtoXMMreg(_Rt_, 3); _deleteGPRtoXMMreg(_Rd_, 3); +#endif } EERECOMPILE_CODE0(DSRLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); @@ -592,6 +662,9 @@ void recDSRAV_consts(int info) void recDSRAV_constt(int info) { +#ifdef __M_X86_64 + recDShiftV_constt(xSAR); +#else int rsreg, rdreg, rstemp = -1, t0reg, t1reg; t0reg = _allocTempXMMreg(XMMT_INT, -1); t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -627,10 +700,14 @@ void recDSRAV_constt(int info) _freeXMMreg(t1reg); if (rstemp != -1) _freeXMMreg(rstemp); +#endif } void recDSRAV_(int info) { +#ifdef __M_X86_64 + recDShiftV(xSAR); +#else int rsreg, rtreg, rdreg, rstemp = -1, t0reg, t1reg; t0reg = _allocTempXMMreg(XMMT_INT, -1); t1reg = _allocTempXMMreg(XMMT_INT, -1); @@ -665,6 +742,7 @@ void recDSRAV_(int info) _freeXMMreg(t1reg); if (rstemp != -1) _freeXMMreg(rstemp); +#endif } EERECOMPILE_CODE0(DSRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED); diff --git a/pcsx2/x86/microVU_Macro.inl b/pcsx2/x86/microVU_Macro.inl index a4ac444d90..43645bcd73 100644 --- a/pcsx2/x86/microVU_Macro.inl +++ b/pcsx2/x86/microVU_Macro.inl @@ -393,6 +393,12 @@ static void recCFC2() xMOV(eax, ptr32[&vu0Regs.VI[_Rd_].UL]); // FixMe: Should R-Reg have upper 9 bits 0? +#ifdef __M_X86_64 + if (_Rd_ >= 16) + xCDQE(); // Sign Extend + + xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax); +#else xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax); if (_Rd_ >= 16) @@ -403,6 +409,7 @@ static void recCFC2() } else xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0); +#endif // FixMe: I think this is needed, but not sure how it works // Update Refraction 20/09/2021: This is needed because Const Prop is broken diff --git a/tests/ctest/x86emitter/codegen_tests_main.cpp b/tests/ctest/x86emitter/codegen_tests_main.cpp index a64c15df91..83c6417276 100644 --- a/tests/ctest/x86emitter/codegen_tests_main.cpp +++ b/tests/ctest/x86emitter/codegen_tests_main.cpp @@ -51,6 +51,9 @@ TEST(CodegenTests, MOVTest) CODEGEN_TEST_64(xMOV64(rax, 0x1234567890), "48 b8 90 78 56 34 12 00 00 00"); CODEGEN_TEST_64(xMOV64(r8, 0x1234567890), "49 b8 90 78 56 34 12 00 00 00"); CODEGEN_TEST_64(xMOV(ptr32[base], 0x12), "c7 05 f6 ff ff ff 12 00 00 00"); + CODEGEN_TEST_BOTH(xMOVSX(eax, dx), "0f bf c2"); + CODEGEN_TEST_64(xMOVSX(rax, r8d), "49 63 c0"); + CODEGEN_TEST_64(xMOVSX(rax, ebx), "48 63 c3"); } TEST(CodegenTests, LEATest)