iR5900: Use 64-bit math on x86-64

This commit is contained in:
TellowKrinkle 2020-09-23 04:29:30 -05:00 committed by refractionpcsx2
parent e74ba82093
commit 65e57a8230
16 changed files with 552 additions and 104 deletions

View File

@ -131,6 +131,10 @@ namespace x86Emitter
void operator()(const xRegister16or32or64& to, const xIndirect8& sibsrc) const;
void operator()(const xRegister32or64& to, const xRegister16& from) const;
void operator()(const xRegister32or64& to, const xIndirect16& sibsrc) const;
#ifdef __M_X86_64
void operator()(const xRegister64& to, const xRegister32& from) const;
void operator()(const xRegister64& to, const xIndirect32& sibsrc) const;
#endif
//void operator()( const xRegister32& to, const xDirectOrIndirect16& src ) const;
//void operator()( const xRegister16or32& to, const xDirectOrIndirect8& src ) const;

View File

@ -168,6 +168,7 @@ namespace x86Emitter
extern void xCWD();
extern void xCDQ();
extern void xCWDE();
extern void xCDQE();
extern void xLAHF();
extern void xSAHF();
@ -216,6 +217,31 @@ namespace x86Emitter
/// On x86-64, resolves to either `mov dst, (sptr)addr` or `lea dst, [addr]` depending on the distance from RIP
void xLoadFarAddr(const xAddressReg& dst, void* addr);
//////////////////////////////////////////////////////////////////////////////////////////
/// Helper function to write a 64-bit constant to memory
/// May use `tmp` on x86-64
void xWriteImm64ToMem(u64* addr, const xAddressReg& tmp, u64 imm);
#ifdef __M_X86_64
//////////////////////////////////////////////////////////////////////////////////////////
/// Helper function to run operations with large immediates
/// If the immediate fits in 32 bits, runs op(target, imm)
/// Otherwise, loads imm into tmpRegister and then runs op(dst, tmp)
template <typename Op, typename Dst>
void xImm64Op(const Op& op, const Dst& dst, const xRegister64& tmpRegister, s64 imm)
{
if (imm == (s32)imm)
{
op(dst, imm);
}
else
{
xMOV64(tmpRegister, imm);
op(dst, tmpRegister);
}
}
#endif
//////////////////////////////////////////////////////////////////////////////////////////
// JMP / Jcc Instructions!

View File

@ -241,18 +241,20 @@ namespace x86Emitter
xOpWrite0F(SignExtend ? 0xbf : 0xb7, to, sibsrc);
}
#if 0
void xImpl_MovExtend::operator()( const xRegister32& to, const xDirectOrIndirect16& src ) const
{
EbpAssert();
_DoI_helpermess( *this, to, src );
}
#ifdef __M_X86_64
void xImpl_MovExtend::operator()(const xRegister64& to, const xRegister32& from) const
{
EbpAssert();
pxAssertMsg(SignExtend, "Use mov for 64-bit movzx");
xOpWrite(0, 0x63, to, from);
}
void xImpl_MovExtend::operator()( const xRegister16or32& to, const xDirectOrIndirect8& src ) const
{
EbpAssert();
_DoI_helpermess( *this, to, src );
}
void xImpl_MovExtend::operator()(const xRegister64& to, const xIndirect32& sibsrc) const
{
EbpAssert();
pxAssertMsg(SignExtend, "Use mov for 64-bit movzx");
xOpWrite(0, 0x63, to, sibsrc);
}
#endif
const xImpl_MovExtend xMOVSX = {true};

View File

@ -1156,6 +1156,7 @@ const xRegister32
__fi void xCWD() { xWrite8(0x98); }
__fi void xCDQ() { xWrite8(0x99); }
__fi void xCWDE() { xWrite8(0x98); }
__fi void xCDQE() { xWrite16(0x9848); }
__fi void xLAHF() { xWrite8(0x9f); }
__fi void xSAHF() { xWrite8(0x9e); }
@ -1368,4 +1369,14 @@ const xRegister32
#endif
}
void xWriteImm64ToMem(u64* addr, const xAddressReg& tmp, u64 imm)
{
#ifdef __M_X86_64
xImm64Op(xMOV, ptr64[addr], tmp, imm);
#else
xMOV(ptr32[(u32*)addr], (u32)(imm & 0xFFFFFFFF));
xMOV(ptr32[(u32*)addr + 1], (u32)(imm >> 32));
#endif
}
} // End namespace x86Emitter

View File

@ -158,10 +158,7 @@ void recMFC0()
return;
_deleteEEreg(_Rt_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
eeSignExtendTo(_Rt_);
return;
}
@ -187,10 +184,7 @@ void recMFC0()
xMOV(eax, ptr[&cpuRegs.PERF.n.pcr1]);
}
_deleteEEreg(_Rt_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
eeSignExtendTo(_Rt_);
return;
}
@ -202,9 +196,7 @@ void recMFC0()
_eeOnWriteReg(_Rt_, 1);
_deleteEEreg(_Rt_, 0);
xMOV(eax, ptr[&cpuRegs.CP0.r[_Rd_]]);
xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
eeSignExtendTo(_Rt_);
}
void recMTC0()

View File

@ -117,6 +117,7 @@ u32* _eeGetConstReg(int reg);
void _eeMoveGPRtoR(const x86Emitter::xRegister32& to, int fromgpr);
void _eeMoveGPRtoM(uptr to, int fromgpr);
void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr);
void _signExtendToMem(void* mem);
void eeSignExtendTo(int gpr, bool onlyupper = false);
void _eeFlushAllUnused();

View File

@ -218,12 +218,29 @@ void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr)
}
}
void _signExtendToMem(void* mem)
{
#ifdef __M_X86_64
xCDQE();
xMOV(ptr64[mem], rax);
#else
xCDQ();
xMOV(ptr32[mem], eax);
xMOV(ptr32[(void*)((sptr)mem + 4)], edx);
#endif
}
void eeSignExtendTo(int gpr, bool onlyupper)
{
xCDQ();
if (!onlyupper)
xMOV(ptr32[&cpuRegs.GPR.r[gpr].UL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[gpr].UL[1]], edx);
if (onlyupper)
{
xCDQ();
xMOV(ptr32[&cpuRegs.GPR.r[gpr].UL[1]], edx);
}
else
{
_signExtendToMem(&cpuRegs.GPR.r[gpr].UD[0]);
}
}
int _flushXMMunused()

View File

@ -117,10 +117,31 @@ void recDADD_constv(int info, int creg, u32 vreg)
GPR_reg64 cval = g_cpuConstRegs[creg];
#ifdef __M_X86_64
if (_Rd_ == vreg)
{
if (!cval.SD[0]) // no-op
return;
if (!cval.SD[0])
return; // no-op
xImm64Op(xADD, ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax, cval.SD[0]);
}
else
{
if (cval.SD[0])
{
xMOV64(rax, cval.SD[0]);
xADD(rax, ptr64[&cpuRegs.GPR.r[vreg].SD[0]]);
}
else
{
xMOV(rax, ptr64[&cpuRegs.GPR.r[vreg].SD[0]]);
}
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax);
}
#else
if (_Rd_ == vreg)
{
if (!cval.SD[0])
return; // no-op
xADD(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], cval.SL[0]);
xADC(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], cval.SL[1]);
}
@ -136,6 +157,7 @@ void recDADD_constv(int info, int creg, u32 vreg)
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx);
}
#endif
}
void recDADD_consts(int info)
@ -156,6 +178,31 @@ void recDADD_(int info)
if (_Rd_ == _Rt_)
rs = _Rt_, rt = _Rs_;
#ifdef __M_X86_64
if (_Rd_ == _Rs_ && _Rs_ == _Rt_)
{
xSHL(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 1);
return;
}
xMOV(rax, ptr64[&cpuRegs.GPR.r[rt].SD[0]]);
if (_Rd_ == rs)
{
xADD(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax);
return;
}
else if (rs == rt)
{
xADD(rax, rax);
}
else
{
xADD(rax, ptr32[&cpuRegs.GPR.r[rs].SD[0]]);
}
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax);
#else
xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].SL[0]]);
if (_Rd_ == _Rs_ && _Rs_ == _Rt_)
@ -186,6 +233,7 @@ void recDADD_(int info)
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx);
#endif
}
EERECOMPILE_CODE0(DADD, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT);
@ -232,8 +280,12 @@ void recSUB_(int info)
if (_Rs_ == _Rt_)
{
#ifdef __M_X86_64
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 0);
#else
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 0);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], 0);
#endif
return;
}
@ -262,6 +314,20 @@ void recDSUB_consts(int info)
GPR_reg64 sval = g_cpuConstRegs[_Rs_];
#ifdef __M_X86_64
if (!sval.SD[0] && _Rd_ == _Rt_)
{
xNEG(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]]);
return;
}
else
{
xMOV64(rax, sval.SD[0]);
}
xSUB(rax, ptr32[&cpuRegs.GPR.r[_Rt_].SD[0]]);
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax);
#else
if (!sval.SD[0] && _Rd_ == _Rt_)
{
/* To understand this 64-bit negate, consider that a negate in 2's complement
@ -285,6 +351,7 @@ void recDSUB_consts(int info)
xSBB(edx, ptr32[&cpuRegs.GPR.r[_Rt_].SL[1]]);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx);
#endif
}
void recDSUB_constt(int info)
@ -293,6 +360,21 @@ void recDSUB_constt(int info)
GPR_reg64 tval = g_cpuConstRegs[_Rt_];
#ifdef __M_X86_64
if (_Rd_ == _Rs_)
{
xImm64Op(xSUB, ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax, tval.SD[0]);
}
else
{
xMOV(rax, ptr64[&cpuRegs.GPR.r[_Rs_].SD[0]]);
if (tval.SD[0])
{
xImm64Op(xSUB, rax, rdx, tval.SD[0]);
}
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax);
}
#else
if (_Rd_ == _Rs_)
{
xSUB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], tval.SL[0]);
@ -310,12 +392,30 @@ void recDSUB_constt(int info)
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx);
}
#endif
}
void recDSUB_(int info)
{
pxAssert(!(info & PROCESS_EE_XMM));
#ifdef __M_X86_64
if (_Rs_ == _Rt_)
{
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 0);
}
else if (_Rd_ == _Rs_)
{
xMOV(rax, ptr64[&cpuRegs.GPR.r[_Rt_].SD[0]]);
xSUB(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax);
}
else
{
xMOV(rax, ptr64[&cpuRegs.GPR.r[_Rs_].SD[0]]);
xSUB(rax, ptr64[&cpuRegs.GPR.r[_Rt_].SD[0]]);
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax);
}
#else
if (_Rs_ == _Rt_)
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 0);
@ -337,6 +437,7 @@ void recDSUB_(int info)
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx);
}
#endif
}
EERECOMPILE_CODE0(DSUB, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
@ -392,7 +493,34 @@ static void recLogicalOp_constv(LogicalOp op, int info, int creg, u32 vreg)
}
GPR_reg64 cval = g_cpuConstRegs[creg];
#ifdef __M_X86_64
if (hasFixed && cval.SD[0] == fixedInput)
{
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], fixedOutput);
}
else if (_Rd_ == vreg)
{
if (cval.SD[0] != identityInput)
xImm64Op(xOP, ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax, cval.UD[0]);
if (op == LogicalOp::NOR)
xNOT(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]]);
}
else
{
if (cval.SD[0] != identityInput)
{
xMOV64(rax, cval.SD[0]);
xOP(rax, ptr32[&cpuRegs.GPR.r[vreg].UD[0]]);
}
else
{
xMOV(rax, ptr32[&cpuRegs.GPR.r[vreg].UD[0]]);
}
if (op == LogicalOp::NOR)
xNOT(rax);
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
}
#else
for (int i = 0; i < 2; i++)
{
if (hasFixed && cval.SL[i] == (s32)fixedInput)
@ -416,6 +544,7 @@ static void recLogicalOp_constv(LogicalOp op, int info, int creg, u32 vreg)
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax);
}
}
#endif
}
static void recLogicalOp(LogicalOp op, int info)
@ -433,6 +562,31 @@ static void recLogicalOp(LogicalOp op, int info)
if (_Rd_ == _Rt_)
rs = _Rt_, rt = _Rs_;
#ifdef __M_X86_64
if (op == LogicalOp::XOR && rs == rt)
{
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], 0);
}
else if (_Rd_ == rs)
{
if (rs != rt)
{
xMOV(rax, ptr64[&cpuRegs.GPR.r[rt].UD[0]]);
xOP(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
}
if (op == LogicalOp::NOR)
xNOT(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]]);
}
else
{
xMOV(rax, ptr64[&cpuRegs.GPR.r[rs].UD[0]]);
if (rs != rt)
xOP(rax, ptr64[&cpuRegs.GPR.r[rt].UD[0]]);
if (op == LogicalOp::NOR)
xNOT(rax);
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
}
#else
for (int i = 0; i < 2; i++)
{
if (op == LogicalOp::XOR && rs == rt)
@ -459,6 +613,7 @@ static void recLogicalOp(LogicalOp op, int info)
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax);
}
}
#endif
}
//// AND
@ -565,6 +720,14 @@ void recSLTs_const(int info, int sign, int st)
GPR_reg64 cval = g_cpuConstRegs[st ? _Rt_ : _Rs_];
#ifdef __M_X86_64
const xImpl_Set& SET = st ? (sign ? xSETL : xSETB) : (sign ? xSETG : xSETA);
xXOR(eax, eax);
xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[st ? _Rs_ : _Rt_].UD[0]], rdx, cval.UD[0]);
SET(al);
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
xMOV(eax, 1);
xCMP(ptr32[&cpuRegs.GPR.r[st ? _Rs_ : _Rt_].UL[1]], cval.UL[1]);
@ -582,12 +745,21 @@ void recSLTs_const(int info, int sign, int st)
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0);
#endif
}
void recSLTs_(int info, int sign)
{
pxAssert(!(info & PROCESS_EE_XMM));
#ifdef __M_X86_64
const xImpl_Set& SET = sign ? xSETL : xSETB;
xXOR(eax, eax);
xMOV(rdx, ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]]);
xCMP(rdx, ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
SET(al);
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
xMOV(eax, 1);
xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]]);
@ -607,6 +779,7 @@ void recSLTs_(int info, int sign)
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0);
#endif
}
void recSLT_consts(int info)

View File

@ -71,9 +71,7 @@ void recADDI_(int info)
if (_Imm_ != 0)
xADD(eax, _Imm_);
xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
eeSignExtendTo(_Rt_);
}
}
@ -95,6 +93,23 @@ void recDADDI_(int info)
{
pxAssert(!(info & PROCESS_EE_XMM));
#ifdef __M_X86_64
if (_Rt_ == _Rs_)
{
xADD(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], _Imm_);
}
else
{
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
if (_Imm_ != 0)
{
xADD(rax, _Imm_);
}
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
}
#else
if (_Rt_ == _Rs_)
{
xADD(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _Imm_);
@ -116,6 +131,7 @@ void recDADDI_(int info)
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
}
#endif
}
EERECOMPILE_CODEX(eeRecompileCode1, DADDI);
@ -137,6 +153,12 @@ extern u32 s_sltone;
void recSLTIU_(int info)
{
#ifdef __M_X86_64
xXOR(eax, eax);
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], _Imm_);
xSETB(al);
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
#else
xMOV(eax, 1);
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], _Imm_ >= 0 ? 0 : 0xffffffff);
@ -154,6 +176,7 @@ void recSLTIU_(int info)
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0);
#endif
}
EERECOMPILE_CODEX(eeRecompileCode1, SLTIU);
@ -167,6 +190,12 @@ void recSLTI_const()
void recSLTI_(int info)
{
// test silent hill if modding
#ifdef __M_X86_64
xXOR(eax, eax);
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], _Imm_);
xSETL(al);
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
#else
xMOV(eax, 1);
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], _Imm_ >= 0 ? 0 : 0xffffffff);
@ -184,6 +213,7 @@ void recSLTI_(int info)
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0);
#endif
}
EERECOMPILE_CODEX(eeRecompileCode1, SLTI);
@ -209,6 +239,39 @@ static void recLogicalOpI(int info, LogicalOp op)
: op == LogicalOp::XOR ? xXOR : bad;
pxAssert(&xOP != &bad);
#ifdef __M_X86_64
if (_ImmU_ != 0)
{
if (_Rt_ == _Rs_)
{
if (op == LogicalOp::AND)
xOP(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], _ImmU_);
else
xOP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _ImmU_);
}
else
{
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
xOP(rax, _ImmU_);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
}
}
else
{
if (op == LogicalOp::AND)
{
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], 0);
}
else
{
if (_Rt_ != _Rs_)
{
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
}
}
}
#else
if (_ImmU_ != 0)
{
if (_Rt_ == _Rs_)
@ -251,6 +314,7 @@ static void recLogicalOpI(int info, LogicalOp op)
}
}
}
#endif
}
void recANDI_(int info)

View File

@ -147,6 +147,30 @@ void recSetBranchEQ(int info, int bne, int process)
_eeFlushAllUnused();
#ifdef __M_X86_64
if (process & PROCESS_CONSTS)
{
xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax, g_cpuConstRegs[_Rs_].UD[0]);
}
else if (process & PROCESS_CONSTT)
{
xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], rax, g_cpuConstRegs[_Rt_].UD[0]);
}
else
{
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rs_].UD[0]]);
xCMP(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]);
}
if (bne)
{
j32Ptr[1] = JE32(0);
}
else
{
j32Ptr[0] = j32Ptr[1] = JNE32(0);
}
#else
if (bne)
{
if (process & PROCESS_CONSTS)
@ -208,6 +232,7 @@ void recSetBranchEQ(int info, int bne, int process)
j32Ptr[1] = JNE32(0);
}
}
#endif
}
_clearNeededXMMregs();

View File

@ -179,6 +179,13 @@ void recLoad32(u32 bits, bool sign)
if (_Rt_)
{
#if __M_X86_64
// EAX holds the loaded value, so sign extend as needed:
if (sign)
xCDQE();
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
#else
// EAX holds the loaded value, so sign extend as needed:
if (sign)
xCDQ();
@ -188,6 +195,7 @@ void recLoad32(u32 bits, bool sign)
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
else
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0);
#endif
}
}
@ -283,6 +291,17 @@ void recLWL()
xMOV(ecx, calleeSavedReg1d);
xMOV(edx, 0xffffff);
xSHR(edx, cl);
# ifdef __M_X86_64
xAND(edx, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
// OR in bytes loaded
xNEG(ecx);
xADD(ecx, 24);
xSHL(eax, cl);
xOR(eax, edx);
eeSignExtendTo(_Rt_);
# else
xAND(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], edx);
// OR in bytes loaded
@ -294,6 +313,7 @@ void recLWL()
// eax will always have the sign bit
xCDQ();
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
# endif
#else
iFlushCall(FLUSH_INTERPRETER);
_deleteEEreg(_Rs_, 1);

View File

@ -50,6 +50,29 @@ REC_FUNC_DEL(MOVN, _Rd_);
#else
static void xCopy64(u64* dst, u64* src)
{
#ifdef __M_X86_64
xMOV(rax, ptr64[src]);
xMOV(ptr64[dst], rax);
#else
xMOV(eax, ptr32[(u32*)src]);
xMOV(edx, ptr32[(u32*)src + 1]);
xMOV(ptr32[(u32*)dst], eax);
xMOV(ptr32[(u32*)dst + 1], edx);
#endif
}
static void xCMPToZero64(u64* mem)
{
#ifndef __M_X86_64
xCMP(ptr64[mem], 0);
#else
xMOV(eax, ptr32[(u32*)mem]);
xOR(eax, ptr32[(u32*)mem + 1]);
#endif
}
/*********************************************************
* Load higher 16 bits of the first word in GPR with imm *
* Format: OP rt, immediate *
@ -83,9 +106,7 @@ void recLUI()
else
{
xMOV(eax, (s32)(cpuRegs.code << 16));
xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
eeSignExtendTo(_Rt_);
}
EE::Profiler.EmitOp(eeOpcode::LUI);
@ -138,10 +159,7 @@ void recMFHILO(int hi)
else
{
_deleteEEreg(_Rd_, 0);
xMOV(eax, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[0] : (uptr)&cpuRegs.LO.UL[0])]);
xMOV(edx, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[1] : (uptr)&cpuRegs.LO.UL[1])]);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], hi ? &cpuRegs.HI.UD[0] : &cpuRegs.LO.UD[0]);
}
}
}
@ -189,17 +207,13 @@ void recMTHILO(int hi)
{
if (GPR_IS_CONST1(_Rs_))
{
xMOV(ptr32[(u32*)(addrhilo)], g_cpuConstRegs[_Rs_].UL[0]);
xMOV(ptr32[(u32*)(addrhilo + 4)], g_cpuConstRegs[_Rs_].UL[1]);
xWriteImm64ToMem((u64*)addrhilo, rax, g_cpuConstRegs[_Rs_].UD[0]);
}
else
{
_eeMoveGPRtoR(ecx, _Rs_);
_flushEEreg(_Rs_);
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
xMOV(ptr[(void*)(addrhilo)], eax);
xMOV(ptr[(void*)(addrhilo + 4)], edx);
xCopy64((u64*)addrhilo, &cpuRegs.GPR.r[_Rs_].UD[0]);
}
}
}
@ -275,10 +289,7 @@ void recMFHILO1(int hi)
else
{
_deleteEEreg(_Rd_, 0);
xMOV(eax, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[2] : (uptr)&cpuRegs.LO.UL[2])]);
xMOV(edx, ptr[(void*)(hi ? (uptr)&cpuRegs.HI.UL[3] : (uptr)&cpuRegs.LO.UL[3])]);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], hi ? &cpuRegs.HI.UD[1] : &cpuRegs.LO.UD[1]);
}
}
}
@ -316,16 +327,12 @@ void recMTHILO1(int hi)
{
if (GPR_IS_CONST1(_Rs_))
{
xMOV(ptr32[(u32*)(addrhilo + 8)], g_cpuConstRegs[_Rs_].UL[0]);
xMOV(ptr32[(u32*)(addrhilo + 12)], g_cpuConstRegs[_Rs_].UL[1]);
xWriteImm64ToMem((u64*)(addrhilo + 8), rax, g_cpuConstRegs[_Rs_].UD[0]);
}
else
{
_flushEEreg(_Rs_);
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
xMOV(ptr[(void*)(addrhilo + 8)], eax);
xMOV(ptr[(void*)(addrhilo + 12)], edx);
xCopy64((u64*)(addrhilo + 8), &cpuRegs.GPR.r[_Rs_].UD[0]);
}
}
}
@ -363,34 +370,25 @@ void recMOVZtemp_const()
void recMOVZtemp_consts(int info)
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
xOR(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]);
xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]);
j8Ptr[0] = JNZ8(0);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]);
xWriteImm64ToMem(&cpuRegs.GPR.r[_Rd_].UD[0], rax, g_cpuConstRegs[_Rs_].UD[0]);
x86SetJ8(j8Ptr[0]);
}
void recMOVZtemp_constt(int info)
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]);
}
void recMOVZtemp_(int info)
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
xOR(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]);
xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]);
j8Ptr[0] = JNZ8(0);
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]);
x86SetJ8(j8Ptr[0]);
}
@ -421,34 +419,25 @@ void recMOVNtemp_const()
void recMOVNtemp_consts(int info)
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
xOR(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]);
xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]);
j8Ptr[0] = JZ8(0);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]);
xWriteImm64ToMem(&cpuRegs.GPR.r[_Rd_].UD[0], rax, g_cpuConstRegs[_Rs_].UD[0]);
x86SetJ8(j8Ptr[0]);
}
void recMOVNtemp_constt(int info)
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]);
}
void recMOVNtemp_(int info)
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
xOR(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]);
xCMPToZero64(&cpuRegs.GPR.r[_Rt_].UD[0]);
j8Ptr[0] = JZ8(0);
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
xCopy64(&cpuRegs.GPR.r[_Rd_].UD[0], &cpuRegs.GPR.r[_Rs_].UD[0]);
x86SetJ8(j8Ptr[0]);
}

View File

@ -59,8 +59,13 @@ void recWritebackHILO(int info, int writed, int upper)
uptr hiaddr = (uptr)&cpuRegs.HI.UL[upper ? 2 : 0];
u8 testlive = upper ? EEINST_LIVE2 : EEINST_LIVE0;
#ifdef __M_X86_64
if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive)
xMOVSX(rcx, edx);
#else
if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive)
xMOV(ecx, edx);
#endif
if (g_pCurInstInfo->regs[XMMGPR_LO] & testlive)
{
@ -79,9 +84,7 @@ void recWritebackHILO(int info, int writed, int upper)
reglo = -1;
}
xCDQ();
xMOV(ptr[(void*)(loaddr)], eax);
xMOV(ptr[(void*)(loaddr + 4)], edx);
_signExtendToMem((void*)loaddr);
savedlo = 1;
}
@ -106,10 +109,16 @@ void recWritebackHILO(int info, int writed, int upper)
{
_deleteEEreg(_Rd_, 0);
#ifdef __M_X86_64
if (!savedlo)
xCDQE();
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
if (!savedlo)
xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
#endif
}
}
@ -129,9 +138,13 @@ void recWritebackHILO(int info, int writed, int upper)
reghi = -1;
}
#ifdef __M_X86_64
xMOV(ptr[(void*)(hiaddr)], rcx);
#else
xMOV(ptr[(void*)(hiaddr)], ecx);
xSAR(ecx, 31);
xMOV(ptr[(void*)(hiaddr + 4)], ecx);
#endif
}
}
@ -156,8 +169,7 @@ void recWritebackConstHILO(u64 res, int writed, int upper)
}
else
{
xMOV(ptr32[(u32*)(loaddr)], res & 0xffffffff);
xMOV(ptr32[(u32*)(loaddr + 4)], (res & 0x80000000) ? 0xffffffff : 0);
xWriteImm64ToMem((u64*)loaddr, rax, (s64)(s32)(res & 0xffffffff));
}
}
@ -177,8 +189,7 @@ void recWritebackConstHILO(u64 res, int writed, int upper)
else
{
_deleteEEreg(XMMGPR_HI, 0);
xMOV(ptr32[(u32*)(hiaddr)], res >> 32);
xMOV(ptr32[(u32*)(hiaddr + 4)], (res >> 63) ? 0xffffffff : 0);
xWriteImm64ToMem((u64*)hiaddr, rax, (s64)res >> 32);
}
}
@ -524,6 +535,20 @@ EERECOMPILE_CODE0(DIVU1, XMMINFO_READS | XMMINFO_READT);
static void writeBackMAddToHiLoRd(int hiloID)
{
#if __M_X86_64
// eax -> LO, edx -> HI
xCDQE();
if (_Rd_)
{
_eeOnWriteReg(_Rd_, 1);
_deleteEEreg(_Rd_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
}
xMOV(ptr[&cpuRegs.LO.UD[hiloID]], rax);
xMOVSX(rax, edx);
xMOV(ptr[&cpuRegs.HI.UD[hiloID]], rax);
#else
// eax -> LO, ecx -> HI
xCDQ();
if (_Rd_)
@ -541,25 +566,36 @@ static void writeBackMAddToHiLoRd(int hiloID)
xMOV(eax, ecx);
xCDQ();
xMOV(ptr[&cpuRegs.HI.UL[hiloID * 2 + 1]], edx);
#endif
}
static void addConstantAndWriteBackToHiLoRd(int hiloID, u64 constant)
{
#if __M_X86_64
const xRegister32& ehi = edx;
#else
const xRegister32& ehi = ecx;
#endif
_deleteEEreg(XMMGPR_LO, 1);
_deleteEEreg(XMMGPR_HI, 1);
xMOV(eax, ptr[&cpuRegs.LO.UL[hiloID * 2]]);
xMOV(ecx, ptr[&cpuRegs.HI.UL[hiloID * 2]]);
xMOV(ehi, ptr[&cpuRegs.HI.UL[hiloID * 2]]);
xADD(eax, (u32)(constant & 0xffffffff));
xADC(ecx, (u32)(constant >> 32));
xADC(ehi, (u32)(constant >> 32));
writeBackMAddToHiLoRd(hiloID);
}
static void addEaxEdxAndWriteBackToHiLoRd(int hiloID)
{
#if __M_X86_64
xADD(eax, ptr[&cpuRegs.LO.UL[hiloID * 2]]);
xADC(edx, ptr[&cpuRegs.HI.UL[hiloID * 2]]);
#else
xMOV(ecx, edx);
xADD(eax, ptr[&cpuRegs.LO.UL[hiloID * 2]]);
xADC(ecx, ptr[&cpuRegs.HI.UL[hiloID * 2]]);
#endif
writeBackMAddToHiLoRd(hiloID);
}

View File

@ -69,9 +69,7 @@ void recSLLs_(int info, int sa)
xSHL(eax, sa);
}
xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
eeSignExtendTo(_Rd_);
}
void recSLL_(int info)
@ -95,9 +93,7 @@ void recSRLs_(int info, int sa)
if (sa != 0)
xSHR(eax, sa);
xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
eeSignExtendTo(_Rd_);
}
void recSRL_(int info)
@ -121,9 +117,7 @@ void recSRAs_(int info, int sa)
if (sa != 0)
xSAR(eax, sa);
xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
eeSignExtendTo(_Rd_);
}
void recSRA_(int info)
@ -141,9 +135,15 @@ void recDSLL_const()
void recDSLLs_(int info, int sa)
{
int rtreg, rdreg;
pxAssert(!(info & PROCESS_EE_XMM));
#ifdef __M_X86_64
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]);
if (sa != 0)
xSHL(rax, sa);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
int rtreg, rdreg;
_addNeededGPRtoXMMreg(_Rt_);
_addNeededGPRtoXMMreg(_Rd_);
rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
@ -159,6 +159,7 @@ void recDSLLs_(int info, int sa)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
#endif
}
void recDSLL_(int info)
@ -176,9 +177,15 @@ void recDSRL_const()
void recDSRLs_(int info, int sa)
{
int rtreg, rdreg;
pxAssert(!(info & PROCESS_EE_XMM));
#ifdef __M_X86_64
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]);
if (sa != 0)
xSHR(rax, sa);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
int rtreg, rdreg;
_addNeededGPRtoXMMreg(_Rt_);
_addNeededGPRtoXMMreg(_Rd_);
rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
@ -194,6 +201,7 @@ void recDSRLs_(int info, int sa)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
#endif
}
void recDSRL_(int info)
@ -211,9 +219,15 @@ void recDSRA_const()
void recDSRAs_(int info, int sa)
{
int rtreg, rdreg, t0reg;
pxAssert(!(info & PROCESS_EE_XMM));
#ifdef __M_X86_64
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]);
if (sa != 0)
xSAR(rax, sa);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
int rtreg, rdreg, t0reg;
_addNeededGPRtoXMMreg(_Rt_);
_addNeededGPRtoXMMreg(_Rd_);
rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
@ -250,6 +264,7 @@ void recDSRAs_(int info, int sa)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
#endif
}
void recDSRA_(int info)
@ -270,12 +285,17 @@ void recDSLL32s_(int info, int sa)
pxAssert(!(info & PROCESS_EE_XMM));
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
#ifdef __M_X86_64
xSHL(rax, sa + 32);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
if (sa != 0)
{
xSHL(eax, sa);
}
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], eax);
#endif
}
void recDSLL32_(int info)
@ -299,8 +319,12 @@ void recDSRL32s_(int info, int sa)
if (sa != 0)
xSHR(eax, sa);
#ifdef __M_X86_64
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0);
#endif
}
void recDSRL32_(int info)
@ -318,6 +342,9 @@ void recDSRA32_const()
void recDSRA32s_(int info, int sa)
{
#ifdef __M_X86_64
recDSRAs_(info, sa + 32);
#else
pxAssert(!(info & PROCESS_EE_XMM));
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]);
@ -327,6 +354,7 @@ void recDSRA32s_(int info, int sa)
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
#endif
}
void recDSRA32_(int info)
@ -362,6 +390,31 @@ static void recShiftV(const xImpl_Group2& shift)
eeSignExtendTo(_Rd_);
}
#ifdef __M_X86_64
static void recDShiftV_constt(const xImpl_Group2& shift)
{
xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMOV64(rax, g_cpuConstRegs[_Rt_].UD[0]);
shift(rax, cl);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
}
static void recDShiftV(const xImpl_Group2& shift)
{
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]);
if (_Rs_ != 0)
{
xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
shift(rax, cl);
}
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
}
#else
__aligned16 u32 s_sa[4] = {0x1f, 0, 0x3f, 0};
void recSetShiftV(int info, int* rsreg, int* rtreg, int* rdreg, int* rstemp)
@ -402,6 +455,7 @@ void recSetConstShiftV(int info, int* rsreg, int* rdreg, int* rstemp)
xMOVDZX(xRegisterSSE(*rstemp), eax);
*rsreg = *rstemp;
}
#endif
//// SLLV
void recSLLV_const()
@ -489,6 +543,9 @@ void recDSLLV_consts(int info)
void recDSLLV_constt(int info)
{
#ifdef __M_X86_64
recDShiftV_constt(xSHL);
#else
int rsreg, rdreg, rstemp = -1;
recSetConstShiftV(info, &rsreg, &rdreg, &rstemp);
xMOVDQA(xRegisterSSE(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
@ -502,10 +559,14 @@ void recDSLLV_constt(int info)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
//_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
#endif
}
void recDSLLV_(int info)
{
#ifdef __M_X86_64
recDShiftV(xSHL);
#else
int rsreg, rtreg, rdreg, rstemp = -1;
recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp);
@ -519,6 +580,7 @@ void recDSLLV_(int info)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
#endif
}
EERECOMPILE_CODE0(DSLLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
@ -540,6 +602,9 @@ void recDSRLV_consts(int info)
void recDSRLV_constt(int info)
{
#ifdef __M_X86_64
recDShiftV_constt(xSHR);
#else
int rsreg, rdreg, rstemp = -1;
recSetConstShiftV(info, &rsreg, &rdreg, &rstemp);
@ -554,10 +619,14 @@ void recDSRLV_constt(int info)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
//_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
#endif
}
void recDSRLV_(int info)
{
#ifdef __M_X86_64
recDShiftV(xSHR);
#else
int rsreg, rtreg, rdreg, rstemp = -1;
recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp);
@ -571,6 +640,7 @@ void recDSRLV_(int info)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
#endif
}
EERECOMPILE_CODE0(DSRLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
@ -592,6 +662,9 @@ void recDSRAV_consts(int info)
void recDSRAV_constt(int info)
{
#ifdef __M_X86_64
recDShiftV_constt(xSAR);
#else
int rsreg, rdreg, rstemp = -1, t0reg, t1reg;
t0reg = _allocTempXMMreg(XMMT_INT, -1);
t1reg = _allocTempXMMreg(XMMT_INT, -1);
@ -627,10 +700,14 @@ void recDSRAV_constt(int info)
_freeXMMreg(t1reg);
if (rstemp != -1)
_freeXMMreg(rstemp);
#endif
}
void recDSRAV_(int info)
{
#ifdef __M_X86_64
recDShiftV(xSAR);
#else
int rsreg, rtreg, rdreg, rstemp = -1, t0reg, t1reg;
t0reg = _allocTempXMMreg(XMMT_INT, -1);
t1reg = _allocTempXMMreg(XMMT_INT, -1);
@ -665,6 +742,7 @@ void recDSRAV_(int info)
_freeXMMreg(t1reg);
if (rstemp != -1)
_freeXMMreg(rstemp);
#endif
}
EERECOMPILE_CODE0(DSRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);

View File

@ -393,6 +393,12 @@ static void recCFC2()
xMOV(eax, ptr32[&vu0Regs.VI[_Rd_].UL]);
// FixMe: Should R-Reg have upper 9 bits 0?
#ifdef __M_X86_64
if (_Rd_ >= 16)
xCDQE(); // Sign Extend
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
#else
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
if (_Rd_ >= 16)
@ -403,6 +409,7 @@ static void recCFC2()
}
else
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0);
#endif
// FixMe: I think this is needed, but not sure how it works
// Update Refraction 20/09/2021: This is needed because Const Prop is broken

View File

@ -51,6 +51,9 @@ TEST(CodegenTests, MOVTest)
CODEGEN_TEST_64(xMOV64(rax, 0x1234567890), "48 b8 90 78 56 34 12 00 00 00");
CODEGEN_TEST_64(xMOV64(r8, 0x1234567890), "49 b8 90 78 56 34 12 00 00 00");
CODEGEN_TEST_64(xMOV(ptr32[base], 0x12), "c7 05 f6 ff ff ff 12 00 00 00");
CODEGEN_TEST_BOTH(xMOVSX(eax, dx), "0f bf c2");
CODEGEN_TEST_64(xMOVSX(rax, r8d), "49 63 c0");
CODEGEN_TEST_64(xMOVSX(rax, ebx), "48 63 c3");
}
TEST(CodegenTests, LEATest)