EE/VU JIT: Remove 32bit code

This commit is contained in:
refractionpcsx2 2022-03-19 21:38:20 +00:00
parent 42a5cb7ad2
commit ccd86a242c
18 changed files with 9 additions and 857 deletions

View File

@ -239,10 +239,7 @@ void SysLogMachineCaps()
};
Console.WriteLn( Color_StrongBlack, L"x86 Features Detected:" );
Console.Indent().WriteLn(result[0] + (result[1].IsEmpty() ? L"" : (L"\n" + result[1])));
#ifdef __M_X86_64
Console.Indent().WriteLn("Pcsx2 was compiled as 64-bits.");
#endif
Console.Indent().WriteLn(result[0] + (result[1].IsEmpty() ? L"" : (L"\n" + result[1])));
Console.Newline();

View File

@ -18,11 +18,7 @@
#include <cstddef>
#include <cstdint>
#ifdef __M_X86_64
#define G_MAXSIZE G_MAXUINT64
#else
#define G_MAXSIZE G_MAXUINT32
#endif
#define G_MAXUINT64 0xffffffffffffffffUL
#define G_MAXUINT32 ((uint32_t)0xffffffff)

View File

@ -93,11 +93,7 @@ void MainEmuFrame::UpdateStatusBar()
m_statusbar.SetStatusText(CDVD_SourceLabels[enum_cast(g_Conf->CdvdSource)], 2);
#ifdef __M_X86_64
m_statusbar.SetStatusText("x64", 3);
#else
m_statusbar.SetStatusText("x32", 3);
#endif
}
void MainEmuFrame::UpdateCdvdSrcSelection()

View File

@ -48,11 +48,7 @@ _x86regs x86regs[iREGCNT_GPR], s_saveX86regs[iREGCNT_GPR];
alignas(16) u32 xmmBackup[iREGCNT_XMM][4];
#ifdef __M_X86_64
alignas(16) u64 gprBackup[iREGCNT_GPR];
#else
alignas(16) u32 gprBackup[iREGCNT_GPR];
#endif
static int s_xmmchecknext = 0;
@ -84,11 +80,7 @@ void _backupNeededx86()
{
if (x86regs[i].inuse)
{
#ifdef __M_X86_64
xMOV(ptr64[&gprBackup[i]], xRegister64(i));
#else
xMOV(ptr32[&gprBackup[i]], xRegister32(i));
#endif
}
}
}
@ -99,11 +91,7 @@ void _restoreNeededx86()
{
if (x86regs[i].inuse)
{
#ifdef __M_X86_64
xMOV(xRegister64(i), ptr64[&gprBackup[i]]);
#else
xMOV(xRegister32(i), ptr32[&gprBackup[i]]);
#endif
}
}
}

View File

@ -196,11 +196,8 @@ void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc, bool addsub)
u8* to_underflow = JB8(0);
xCVTSD2SS(xRegisterSSE(reg), xRegisterSSE(reg)); //simply convert
#ifdef __M_X86_64
u32* end = JMP32(0);
#else
u8* end = JMP8(0);
#endif
x86SetJ8(to_complex);
xUCOMI.SD(xRegisterSSE(absreg), ptr[&s_const.dbl_ps2_overflow]);
@ -209,11 +206,8 @@ void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc, bool addsub)
xPSUB.Q(xRegisterSSE(reg), ptr[&s_const.dbl_one_exp]); //lower exponent
xCVTSD2SS(xRegisterSSE(reg), xRegisterSSE(reg)); //convert
xPADD.D(xRegisterSSE(reg), ptr[s_const.one_exp]); //raise exponent
#ifdef __M_X86_64
u32* end2 = JMP32(0);
#else
u8* end2 = JMP8(0);
#endif
x86SetJ8(to_overflow);
xCVTSD2SS(xRegisterSSE(reg), xRegisterSSE(reg));
@ -252,13 +246,9 @@ void ToPS2FPU_Full(int reg, bool flags, int absreg, bool acc, bool addsub)
xCVTSD2SS(xRegisterSSE(reg), xRegisterSSE(reg));
xAND.PS(xRegisterSSE(reg), ptr[s_const.neg]); //flush to zero
#ifdef __M_X86_64
x86SetJ32(end);
x86SetJ32(end2);
#else
x86SetJ8(end);
x86SetJ8(end2);
#endif
x86SetJ8(end3);
if (flags && FPU_FLAGS_UNDERFLOW && addsub)
x86SetJ8(end4);

View File

@ -574,11 +574,7 @@ static void psxRecompileIrxImport()
if (SysTraceActive(IOP.Bios))
{
#ifdef __M_X86_64
xMOV64(arg3reg, (uptr)funcname);
#else
xPUSH((uptr)funcname);
#endif
xMOV64(arg3reg, (uptr)funcname);
xFastCall((void*)irxImportLog_rec, import_table, index);
}

View File

@ -225,14 +225,8 @@ void _eeMoveGPRtoRm(x86IntRegType to, int fromgpr)
void _signExtendToMem(void* mem)
{
#ifdef __M_X86_64
xCDQE();
xMOV(ptr64[mem], rax);
#else
xCDQ();
xMOV(ptr32[mem], eax);
xMOV(ptr32[(void*)((sptr)mem + 4)], edx);
#endif
}
void eeSignExtendTo(int gpr, bool onlyupper)

View File

@ -117,7 +117,6 @@ void recDADD_constv(int info, int creg, u32 vreg)
GPR_reg64 cval = g_cpuConstRegs[creg];
#ifdef __M_X86_64
if (_Rd_ == vreg)
{
if (!cval.SD[0])
@ -137,27 +136,6 @@ void recDADD_constv(int info, int creg, u32 vreg)
}
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax);
}
#else
if (_Rd_ == vreg)
{
if (!cval.SD[0])
return; // no-op
xADD(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], cval.SL[0]);
xADC(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], cval.SL[1]);
}
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].SL[0]]);
xMOV(edx, ptr32[&cpuRegs.GPR.r[vreg].SL[1]]);
if (cval.SD[0])
{
xADD(eax, cval.SL[0]);
xADC(edx, cval.SL[1]);
}
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx);
}
#endif
}
void recDADD_consts(int info)
@ -178,7 +156,6 @@ void recDADD_(int info)
if (_Rd_ == _Rt_)
rs = _Rt_, rt = _Rs_;
#ifdef __M_X86_64
if (_Rd_ == _Rs_ && _Rs_ == _Rt_)
{
xSHL(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 1);
@ -202,38 +179,6 @@ void recDADD_(int info)
}
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax);
#else
xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].SL[0]]);
if (_Rd_ == _Rs_ && _Rs_ == _Rt_)
{
xSHLD(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], eax, 1);
xSHL(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 1);
return;
}
xMOV(edx, ptr32[&cpuRegs.GPR.r[rt].SL[1]]);
if (_Rd_ == rs)
{
xADD(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax);
xADC(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx);
return;
}
else if (rs == rt)
{
xADD(eax, eax);
xADC(edx, edx);
}
else
{
xADD(eax, ptr32[&cpuRegs.GPR.r[rs].SL[0]]);
xADC(edx, ptr32[&cpuRegs.GPR.r[rs].SL[1]]);
}
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx);
#endif
}
EERECOMPILE_CODE0(DADD, XMMINFO_WRITED | XMMINFO_READS | XMMINFO_READT);
@ -280,12 +225,7 @@ void recSUB_(int info)
if (_Rs_ == _Rt_)
{
#ifdef __M_X86_64
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 0);
#else
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 0);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], 0);
#endif
return;
}
@ -314,7 +254,6 @@ void recDSUB_consts(int info)
GPR_reg64 sval = g_cpuConstRegs[_Rs_];
#ifdef __M_X86_64
if (!sval.SD[0] && _Rd_ == _Rt_)
{
xNEG(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]]);
@ -327,31 +266,6 @@ void recDSUB_consts(int info)
xSUB(rax, ptr32[&cpuRegs.GPR.r[_Rt_].SD[0]]);
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax);
#else
if (!sval.SD[0] && _Rd_ == _Rt_)
{
/* To understand this 64-bit negate, consider that a negate in 2's complement
* is a NOT then an ADD 1. The upper word should only have the NOT stage unless
* the ADD overflows. The ADD only overflows if the lower word is 0.
* Incrementing before a NEG is the same as a NOT and the carry flag is set for
* a non-zero lower word.
*/
xNEG(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]]);
xADC(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], 0);
xNEG(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]]);
return;
}
else
{
xMOV(eax, sval.SL[0]);
xMOV(edx, sval.SL[1]);
}
xSUB(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]);
xSBB(edx, ptr32[&cpuRegs.GPR.r[_Rt_].SL[1]]);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx);
#endif
}
void recDSUB_constt(int info)
@ -360,7 +274,6 @@ void recDSUB_constt(int info)
GPR_reg64 tval = g_cpuConstRegs[_Rt_];
#ifdef __M_X86_64
if (_Rd_ == _Rs_)
{
xImm64Op(xSUB, ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], rax, tval.SD[0]);
@ -374,32 +287,12 @@ void recDSUB_constt(int info)
}
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax);
}
#else
if (_Rd_ == _Rs_)
{
xSUB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], tval.SL[0]);
xSBB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], tval.SL[1]);
}
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]);
xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rs_].SL[1]]);
if (tval.SD[0])
{
xSUB(eax, tval.SL[0]);
xSBB(edx, tval.SL[1]);
}
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx);
}
#endif
}
void recDSUB_(int info)
{
pxAssert(!(info & PROCESS_EE_XMM));
#ifdef __M_X86_64
if (_Rs_ == _Rt_)
{
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SD[0]], 0);
@ -415,29 +308,6 @@ void recDSUB_(int info)
xSUB(rax, ptr64[&cpuRegs.GPR.r[_Rt_].SD[0]]);
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].SL[0]], rax);
}
#else
if (_Rs_ == _Rt_)
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], 0);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], 0);
}
else if (_Rd_ == _Rs_)
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]);
xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rt_].SL[1]]);
xSUB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax);
xSBB(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx);
}
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[_Rs_].SL[0]]);
xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rs_].SL[1]]);
xSUB(eax, ptr32[&cpuRegs.GPR.r[_Rt_].SL[0]]);
xSBB(edx, ptr32[&cpuRegs.GPR.r[_Rt_].SL[1]]);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].SL[1]], edx);
}
#endif
}
EERECOMPILE_CODE0(DSUB, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
@ -496,7 +366,7 @@ static void recLogicalOp_constv(LogicalOp op, int info, int creg, u32 vreg)
}
GPR_reg64 cval = g_cpuConstRegs[creg];
#ifdef __M_X86_64
if (hasFixed && cval.SD[0] == fixedInput)
{
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], fixedOutput);
@ -523,31 +393,6 @@ static void recLogicalOp_constv(LogicalOp op, int info, int creg, u32 vreg)
xNOT(rax);
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
}
#else
for (int i = 0; i < 2; i++)
{
if (hasFixed && cval.SL[i] == (s32)fixedInput)
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], (s32)fixedOutput);
}
else if (_Rd_ == vreg)
{
if (cval.SL[i] != identityInput)
xOP(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], cval.UL[i]);
if (op == LogicalOp::NOR)
xNOT(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]]);
}
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[vreg].UL[i]]);
if (cval.SL[i] != identityInput)
xOP(eax, cval.UL[i]);
if (op == LogicalOp::NOR)
xNOT(eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax);
}
}
#endif
}
static void recLogicalOp(LogicalOp op, int info)
@ -565,7 +410,6 @@ static void recLogicalOp(LogicalOp op, int info)
if (_Rd_ == _Rt_)
rs = _Rt_, rt = _Rs_;
#ifdef __M_X86_64
if (op == LogicalOp::XOR && rs == rt)
{
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], 0);
@ -589,34 +433,6 @@ static void recLogicalOp(LogicalOp op, int info)
xNOT(rax);
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
}
#else
for (int i = 0; i < 2; i++)
{
if (op == LogicalOp::XOR && rs == rt)
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], 0);
}
else if (_Rd_ == rs)
{
if (rs != rt)
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]);
xOP(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax);
}
if (op == LogicalOp::NOR)
xNOT(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]]);
}
else
{
xMOV(eax, ptr32[&cpuRegs.GPR.r[rs].UL[i]]);
if (rs != rt)
xOP(eax, ptr32[&cpuRegs.GPR.r[rt].UL[i]]);
if (op == LogicalOp::NOR)
xNOT(eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[i]], eax);
}
}
#endif
}
//// AND
@ -723,38 +539,18 @@ void recSLTs_const(int info, int sign, int st)
GPR_reg64 cval = g_cpuConstRegs[st ? _Rt_ : _Rs_];
#ifdef __M_X86_64
const xImpl_Set& SET = st ? (sign ? xSETL : xSETB) : (sign ? xSETG : xSETA);
xXOR(eax, eax);
xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[st ? _Rs_ : _Rt_].UD[0]], rdx, cval.UD[0]);
SET(al);
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
xMOV(eax, 1);
xCMP(ptr32[&cpuRegs.GPR.r[st ? _Rs_ : _Rt_].UL[1]], cval.UL[1]);
xForwardJump8 pass1(st ? (sign ? Jcc_Less : Jcc_Below) : (sign ? Jcc_Greater : Jcc_Above));
xForwardJump8 fail(st ? (sign ? Jcc_Greater : Jcc_Above) : (sign ? Jcc_Less : Jcc_Below));
{
xCMP(ptr32[&cpuRegs.GPR.r[st ? _Rs_ : _Rt_].UL[0]], cval.UL[0]);
xForwardJump8 pass2(st ? Jcc_Below : Jcc_Above);
fail.SetTarget();
xMOV(eax, 0);
pass2.SetTarget();
}
pass1.SetTarget();
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0);
#endif
}
void recSLTs_(int info, int sign)
{
pxAssert(!(info & PROCESS_EE_XMM));
#ifdef __M_X86_64
const xImpl_Set& SET = sign ? xSETL : xSETB;
xXOR(eax, eax);
@ -762,27 +558,6 @@ void recSLTs_(int info, int sign)
xCMP(rdx, ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]]);
SET(al);
xMOV(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
xMOV(eax, 1);
xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]]);
xCMP(edx, ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]]);
xForwardJump8 pass1(sign ? Jcc_Less : Jcc_Below);
xForwardJump8 fail(sign ? Jcc_Greater : Jcc_Above);
{
xMOV(edx, ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xCMP(edx, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
xForwardJump8 pass2(Jcc_Below);
fail.SetTarget();
xMOV(eax, 0);
pass2.SetTarget();
}
pass1.SetTarget();
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0);
#endif
}
void recSLT_consts(int info)

View File

@ -93,7 +93,6 @@ void recDADDI_(int info)
{
pxAssert(!(info & PROCESS_EE_XMM));
#ifdef __M_X86_64
if (_Rt_ == _Rs_)
{
xADD(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], _Imm_);
@ -109,29 +108,6 @@ void recDADDI_(int info)
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
}
#else
if (_Rt_ == _Rs_)
{
xADD(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _Imm_);
xADC(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], _Imm_ < 0 ? 0xffffffff : 0);
}
else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
if (_Imm_ != 0)
{
xADD(eax, _Imm_);
xADC(edx, _Imm_ < 0 ? 0xffffffff : 0);
}
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
}
#endif
}
EERECOMPILE_CODEX(eeRecompileCode1, DADDI);
@ -153,30 +129,10 @@ extern u32 s_sltone;
void recSLTIU_(int info)
{
#ifdef __M_X86_64
xXOR(eax, eax);
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], _Imm_);
xSETB(al);
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
#else
xMOV(eax, 1);
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], _Imm_ >= 0 ? 0 : 0xffffffff);
j8Ptr[0] = JB8(0);
j8Ptr[2] = JA8(0);
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], (s32)_Imm_);
j8Ptr[1] = JB8(0);
x86SetJ8(j8Ptr[2]);
xXOR(eax, eax);
x86SetJ8(j8Ptr[0]);
x86SetJ8(j8Ptr[1]);
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0);
#endif
}
EERECOMPILE_CODEX(eeRecompileCode1, SLTIU);
@ -190,30 +146,10 @@ void recSLTI_const()
void recSLTI_(int info)
{
// test silent hill if modding
#ifdef __M_X86_64
xXOR(eax, eax);
xCMP(ptr64[&cpuRegs.GPR.r[_Rs_].UD[0]], _Imm_);
xSETL(al);
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
#else
xMOV(eax, 1);
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], _Imm_ >= 0 ? 0 : 0xffffffff);
j8Ptr[0] = JL8(0);
j8Ptr[2] = JG8(0);
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], (s32)_Imm_);
j8Ptr[1] = JB8(0);
x86SetJ8(j8Ptr[2]);
xXOR(eax, eax);
x86SetJ8(j8Ptr[0]);
x86SetJ8(j8Ptr[1]);
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0);
#endif
}
EERECOMPILE_CODEX(eeRecompileCode1, SLTI);
@ -242,7 +178,6 @@ static void recLogicalOpI(int info, LogicalOp op)
: op == LogicalOp::XOR ? xXOR : bad;
pxAssert(&xOP != &bad);
#ifdef __M_X86_64
if (_ImmU_ != 0)
{
if (_Rt_ == _Rs_)
@ -274,50 +209,6 @@ static void recLogicalOpI(int info, LogicalOp op)
}
}
}
#else
if (_ImmU_ != 0)
{
if (_Rt_ == _Rs_)
{
xOP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], _ImmU_);
}
else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
if (op != LogicalOp::AND)
xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
xOP(eax, _ImmU_);
if (op != LogicalOp::AND)
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
}
if (op == LogicalOp::AND)
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0);
}
}
else
{
if (op == LogicalOp::AND)
{
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], 0);
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0);
}
else
{
if (_Rt_ != _Rs_)
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xMOV(edx, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
}
}
}
#endif
}
void recANDI_(int info)

View File

@ -147,7 +147,6 @@ void recSetBranchEQ(int info, int bne, int process)
_eeFlushAllUnused();
#ifdef __M_X86_64
if (process & PROCESS_CONSTS)
{
xImm64Op(xCMP, ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax, g_cpuConstRegs[_Rs_].UD[0]);
@ -170,69 +169,6 @@ void recSetBranchEQ(int info, int bne, int process)
{
j32Ptr[0] = j32Ptr[1] = JNE32(0);
}
#else
if (bne)
{
if (process & PROCESS_CONSTS)
{
xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]);
j8Ptr[0] = JNE8(0);
xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]);
j32Ptr[1] = JE32(0);
}
else if (process & PROCESS_CONSTT)
{
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], g_cpuConstRegs[_Rt_].UL[0]);
j8Ptr[0] = JNE8(0);
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], g_cpuConstRegs[_Rt_].UL[1]);
j32Ptr[1] = JE32(0);
}
else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xCMP(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
j8Ptr[0] = JNE8(0);
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
xCMP(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]);
j32Ptr[1] = JE32(0);
}
x86SetJ8(j8Ptr[0]);
}
else
{
// beq
if (process & PROCESS_CONSTS)
{
xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], g_cpuConstRegs[_Rs_].UL[0]);
j32Ptr[0] = JNE32(0);
xCMP(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], g_cpuConstRegs[_Rs_].UL[1]);
j32Ptr[1] = JNE32(0);
}
else if (process & PROCESS_CONSTT)
{
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[0]], g_cpuConstRegs[_Rt_].UL[0]);
j32Ptr[0] = JNE32(0);
xCMP(ptr32[&cpuRegs.GPR.r[_Rs_].UL[1]], g_cpuConstRegs[_Rt_].UL[1]);
j32Ptr[1] = JNE32(0);
}
else
{
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xCMP(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
j32Ptr[0] = JNE32(0);
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[1]]);
xCMP(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]);
j32Ptr[1] = JNE32(0);
}
}
#endif
}
_clearNeededXMMregs();

View File

@ -179,23 +179,11 @@ void recLoad32(u32 bits, bool sign)
if (_Rt_)
{
#if __M_X86_64
// EAX holds the loaded value, so sign extend as needed:
if (sign)
xCDQE();
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
#else
// EAX holds the loaded value, so sign extend as needed:
if (sign)
xCDQ();
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
if (sign)
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
else
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0);
#endif
}
}
@ -291,7 +279,6 @@ void recLWL()
xMOV(ecx, calleeSavedReg1d);
xMOV(edx, 0xffffff);
xSHR(edx, cl);
# ifdef __M_X86_64
xAND(edx, ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]]);
// OR in bytes loaded
@ -301,19 +288,6 @@ void recLWL()
xOR(eax, edx);
eeSignExtendTo(_Rt_);
# else
xAND(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], edx);
// OR in bytes loaded
xNEG(ecx);
xADD(ecx, 24);
xSHL(eax, cl);
xOR(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
// eax will always have the sign bit
xCDQ();
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
# endif
#else
iFlushCall(FLUSH_INTERPRETER);
_deleteEEreg(_Rs_, 1);

View File

@ -52,25 +52,13 @@ REC_FUNC_DEL(MOVN, _Rd_);
static void xCopy64(u64* dst, u64* src)
{
#ifdef __M_X86_64
xMOV(rax, ptr64[src]);
xMOV(ptr64[dst], rax);
#else
xMOV(eax, ptr32[(u32*)src]);
xMOV(edx, ptr32[(u32*)src + 1]);
xMOV(ptr32[(u32*)dst], eax);
xMOV(ptr32[(u32*)dst + 1], edx);
#endif
}
static void xCMPToZero64(u64* mem)
{
#ifdef __M_X86_64
xCMP(ptr64[mem], 0);
#else
xMOV(eax, ptr32[(u32*)mem]);
xOR(eax, ptr32[(u32*)mem + 1]);
#endif
}
/*********************************************************

View File

@ -59,13 +59,8 @@ void recWritebackHILO(int info, int writed, int upper)
uptr hiaddr = (uptr)&cpuRegs.HI.UL[upper ? 2 : 0];
u8 testlive = upper ? EEINST_LIVE2 : EEINST_LIVE0;
#ifdef __M_X86_64
if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive)
xMOVSX(rcx, edx);
#else
if (g_pCurInstInfo->regs[XMMGPR_HI] & testlive)
xMOV(ecx, edx);
#endif
if (g_pCurInstInfo->regs[XMMGPR_LO] & testlive)
{
@ -109,16 +104,9 @@ void recWritebackHILO(int info, int writed, int upper)
{
_deleteEEreg(_Rd_, 0);
#ifdef __M_X86_64
if (!savedlo)
xCDQE();
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
if (!savedlo)
xCDQ();
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
#endif
}
}
@ -138,13 +126,7 @@ void recWritebackHILO(int info, int writed, int upper)
reghi = -1;
}
#ifdef __M_X86_64
xMOV(ptr[(void*)(hiaddr)], rcx);
#else
xMOV(ptr[(void*)(hiaddr)], ecx);
xSAR(ecx, 31);
xMOV(ptr[(void*)(hiaddr + 4)], ecx);
#endif
}
}
@ -535,7 +517,6 @@ EERECOMPILE_CODE0(DIVU1, XMMINFO_READS | XMMINFO_READT);
static void writeBackMAddToHiLoRd(int hiloID)
{
#if __M_X86_64
// eax -> LO, edx -> HI
xCDQE();
if (_Rd_)
@ -548,34 +529,12 @@ static void writeBackMAddToHiLoRd(int hiloID)
xMOVSX(rax, edx);
xMOV(ptr[&cpuRegs.HI.UD[hiloID]], rax);
#else
// eax -> LO, ecx -> HI
xCDQ();
if (_Rd_)
{
_eeOnWriteReg(_Rd_, 1);
_deleteEEreg(_Rd_, 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
}
xMOV(ptr[&cpuRegs.LO.UL[hiloID * 2]], eax);
xMOV(ptr[&cpuRegs.LO.UL[hiloID * 2 + 1]], edx);
xMOV(ptr[&cpuRegs.HI.UL[hiloID * 2]], ecx);
xMOV(eax, ecx);
xCDQ();
xMOV(ptr[&cpuRegs.HI.UL[hiloID * 2 + 1]], edx);
#endif
}
static void addConstantAndWriteBackToHiLoRd(int hiloID, u64 constant)
{
#if __M_X86_64
const xRegister32& ehi = edx;
#else
const xRegister32& ehi = ecx;
#endif
_deleteEEreg(XMMGPR_LO, 1);
_deleteEEreg(XMMGPR_HI, 1);
@ -588,14 +547,9 @@ static void addConstantAndWriteBackToHiLoRd(int hiloID, u64 constant)
static void addEaxEdxAndWriteBackToHiLoRd(int hiloID)
{
#if __M_X86_64
xADD(eax, ptr[&cpuRegs.LO.UL[hiloID * 2]]);
xADC(edx, ptr[&cpuRegs.HI.UL[hiloID * 2]]);
#else
xMOV(ecx, edx);
xADD(eax, ptr[&cpuRegs.LO.UL[hiloID * 2]]);
xADC(ecx, ptr[&cpuRegs.HI.UL[hiloID * 2]]);
#endif
writeBackMAddToHiLoRd(hiloID);
}

View File

@ -137,29 +137,10 @@ void recDSLLs_(int info, int sa)
{
pxAssert(!(info & PROCESS_EE_XMM));
#ifdef __M_X86_64
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]);
if (sa != 0)
xSHL(rax, sa);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
int rtreg, rdreg;
_addNeededGPRtoXMMreg(_Rt_);
_addNeededGPRtoXMMreg(_Rd_);
rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
if (rtreg != rdreg)
xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg));
xPSLL.Q(xRegisterSSE(rdreg), sa);
// flush lower 64 bits (as upper is wrong)
// The others possibility could be a read back of the upper 64 bits
// (better use of register but code will likely be flushed after anyway)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
#endif
}
void recDSLL_(int info)
@ -179,29 +160,10 @@ void recDSRLs_(int info, int sa)
{
pxAssert(!(info & PROCESS_EE_XMM));
#ifdef __M_X86_64
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]);
if (sa != 0)
xSHR(rax, sa);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
int rtreg, rdreg;
_addNeededGPRtoXMMreg(_Rt_);
_addNeededGPRtoXMMreg(_Rd_);
rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
if (rtreg != rdreg)
xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg));
xPSRL.Q(xRegisterSSE(rdreg), sa);
// flush lower 64 bits (as upper is wrong)
// The others possibility could be a read back of the upper 64 bits
// (better use of register but code will likely be flushed after anyway)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
#endif
}
void recDSRL_(int info)
@ -221,50 +183,10 @@ void recDSRAs_(int info, int sa)
{
pxAssert(!(info & PROCESS_EE_XMM));
#ifdef __M_X86_64
xMOV(rax, ptr[&cpuRegs.GPR.r[_Rt_].UD[0]]);
if (sa != 0)
xSAR(rax, sa);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
int rtreg, rdreg, t0reg;
_addNeededGPRtoXMMreg(_Rt_);
_addNeededGPRtoXMMreg(_Rd_);
rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
if (rtreg != rdreg)
xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg));
if (sa)
{
t0reg = _allocTempXMMreg(XMMT_INT, -1);
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(rtreg));
// it is a signed shift (but 64 bits operands aren't supported on 32 bits even on SSE)
xPSRA.D(xRegisterSSE(t0reg), sa);
xPSRL.Q(xRegisterSSE(rdreg), sa);
// It can be done in one blend instruction in SSE4.1
// Goal is to move 63:32 of t0reg to 63:32 rdreg
{
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0x55);
// take lower dword of rdreg and lower dword of t0reg
xPUNPCK.LDQ(xRegisterSSE(rdreg), xRegisterSSE(t0reg));
}
_freeXMMreg(t0reg);
}
// flush lower 64 bits (as upper is wrong)
// The others possibility could be a read back of the upper 64 bits
// (better use of register but code will likely be flushed after anyway)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
#endif
}
void recDSRA_(int info)
@ -285,17 +207,8 @@ void recDSLL32s_(int info, int sa)
pxAssert(!(info & PROCESS_EE_XMM));
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[0]]);
#ifdef __M_X86_64
xSHL(rax, sa + 32);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
if (sa != 0)
{
xSHL(eax, sa);
}
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[0]], 0);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], eax);
#endif
}
void recDSLL32_(int info)
@ -319,12 +232,7 @@ void recDSRL32s_(int info, int sa)
if (sa != 0)
xSHR(eax, sa);
#ifdef __M_X86_64
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
#else
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr32[&cpuRegs.GPR.r[_Rd_].UL[1]], 0);
#endif
}
void recDSRL32_(int info)
@ -342,19 +250,7 @@ void recDSRA32_const()
void recDSRA32s_(int info, int sa)
{
#ifdef __M_X86_64
recDSRAs_(info, sa + 32);
#else
pxAssert(!(info & PROCESS_EE_XMM));
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rt_].UL[1]]);
xCDQ();
if (sa != 0)
xSAR(eax, sa);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[0]], eax);
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UL[1]], edx);
#endif
}
void recDSRA32_(int info)
@ -390,8 +286,6 @@ static void recShiftV(const xImpl_Group2& shift)
eeSignExtendTo(_Rd_);
}
#ifdef __M_X86_64
static void recDShiftV_constt(const xImpl_Group2& shift)
{
xMOV(ecx, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
@ -413,50 +307,6 @@ static void recDShiftV(const xImpl_Group2& shift)
xMOV(ptr[&cpuRegs.GPR.r[_Rd_].UD[0]], rax);
}
#else
alignas(16) u32 s_sa[4] = {0x1f, 0, 0x3f, 0};
void recSetShiftV(int info, int* rsreg, int* rtreg, int* rdreg, int* rstemp)
{
pxAssert(!(info & PROCESS_EE_XMM));
_addNeededGPRtoXMMreg(_Rt_);
_addNeededGPRtoXMMreg(_Rd_);
*rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
*rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
*rstemp = _allocTempXMMreg(XMMT_INT, -1);
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xAND(eax, 0x3f);
xMOVDZX(xRegisterSSE(*rstemp), eax);
*rsreg = *rstemp;
if (*rtreg != *rdreg)
xMOVDQA(xRegisterSSE(*rdreg), xRegisterSSE(*rtreg));
}
void recSetConstShiftV(int info, int* rsreg, int* rdreg, int* rstemp)
{
// Note: do it first.
// 1/ It doesn't work in SSE if you did it in the end (I suspect
// a conflict with _allocGPRtoXMMreg when rt==rd)
// 2/ CPU has minimum cycle delay between read/write
_flushConstReg(_Rt_);
_addNeededGPRtoXMMreg(_Rd_);
*rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
*rstemp = _allocTempXMMreg(XMMT_INT, -1);
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
xAND(eax, 0x3f);
xMOVDZX(xRegisterSSE(*rstemp), eax);
*rsreg = *rstemp;
}
#endif
//// SLLV
void recSLLV_const()
{
@ -543,44 +393,12 @@ void recDSLLV_consts(int info)
void recDSLLV_constt(int info)
{
#ifdef __M_X86_64
recDShiftV_constt(xSHL);
#else
int rsreg, rdreg, rstemp = -1;
recSetConstShiftV(info, &rsreg, &rdreg, &rstemp);
xMOVDQA(xRegisterSSE(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
xPSLL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
if (rstemp != -1)
_freeXMMreg(rstemp);
// flush lower 64 bits (as upper is wrong)
// The others possibility could be a read back of the upper 64 bits
// (better use of register but code will likely be flushed after anyway)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
//_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
#endif
}
void recDSLLV_(int info)
{
#ifdef __M_X86_64
recDShiftV(xSHL);
#else
int rsreg, rtreg, rdreg, rstemp = -1;
recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp);
xPSLL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
if (rstemp != -1)
_freeXMMreg(rstemp);
// flush lower 64 bits (as upper is wrong)
// The others possibility could be a read back of the upper 64 bits
// (better use of register but code will likely be flushed after anyway)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
#endif
}
EERECOMPILE_CODE0(DSLLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
@ -602,45 +420,12 @@ void recDSRLV_consts(int info)
void recDSRLV_constt(int info)
{
#ifdef __M_X86_64
recDShiftV_constt(xSHR);
#else
int rsreg, rdreg, rstemp = -1;
recSetConstShiftV(info, &rsreg, &rdreg, &rstemp);
xMOVDQA(xRegisterSSE(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
xPSRL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
if (rstemp != -1)
_freeXMMreg(rstemp);
// flush lower 64 bits (as upper is wrong)
// The others possibility could be a read back of the upper 64 bits
// (better use of register but code will likely be flushed after anyway)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
//_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
#endif
}
void recDSRLV_(int info)
{
#ifdef __M_X86_64
recDShiftV(xSHR);
#else
int rsreg, rtreg, rdreg, rstemp = -1;
recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp);
xPSRL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
if (rstemp != -1)
_freeXMMreg(rstemp);
// flush lower 64 bits (as upper is wrong)
// The others possibility could be a read back of the upper 64 bits
// (better use of register but code will likely be flushed after anyway)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
#endif
}
EERECOMPILE_CODE0(DSRLV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);
@ -662,87 +447,12 @@ void recDSRAV_consts(int info)
void recDSRAV_constt(int info)
{
#ifdef __M_X86_64
recDShiftV_constt(xSAR);
#else
int rsreg, rdreg, rstemp = -1, t0reg, t1reg;
t0reg = _allocTempXMMreg(XMMT_INT, -1);
t1reg = _allocTempXMMreg(XMMT_INT, -1);
recSetConstShiftV(info, &rsreg, &rdreg, &rstemp);
xMOVDQA(xRegisterSSE(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
xPXOR(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
// calc high bit
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(rdreg));
xPCMP.GTD(xRegisterSSE(t0reg), xRegisterSSE(rdreg));
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0x55);
// shift highest bit, 64 - eax
xMOV(eax, 64);
xMOVDZX(xRegisterSSE(t1reg), eax);
xPSUB.D(xRegisterSSE(t1reg), xRegisterSSE(rsreg));
// right logical shift
xPSRL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg)); // highest bits
xPOR(xRegisterSSE(rdreg), xRegisterSSE(t0reg));
// flush lower 64 bits (as upper is wrong)
// The others possibility could be a read back of the upper 64 bits
// (better use of register but code will likely be flushed after anyway)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
_deleteGPRtoXMMreg(_Rd_, 3);
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
if (rstemp != -1)
_freeXMMreg(rstemp);
#endif
}
void recDSRAV_(int info)
{
#ifdef __M_X86_64
recDShiftV(xSAR);
#else
int rsreg, rtreg, rdreg, rstemp = -1, t0reg, t1reg;
t0reg = _allocTempXMMreg(XMMT_INT, -1);
t1reg = _allocTempXMMreg(XMMT_INT, -1);
recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp);
xPXOR(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
// calc high bit
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(rdreg));
xPCMP.GTD(xRegisterSSE(t0reg), xRegisterSSE(rdreg));
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0x55);
// shift highest bit, 64 - eax
xMOV(eax, 64);
xMOVDZX(xRegisterSSE(t1reg), eax);
xPSUB.D(xRegisterSSE(t1reg), xRegisterSSE(rsreg));
// right logical shift
xPSRL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg)); // highest bits
xPOR(xRegisterSSE(rdreg), xRegisterSSE(t0reg));
// flush lower 64 bits (as upper is wrong)
// The others possibility could be a read back of the upper 64 bits
// (better use of register but code will likely be flushed after anyway)
xMOVL.PD(ptr64[&cpuRegs.GPR.r[_Rd_].UD[0]], xRegisterSSE(rdreg));
_deleteGPRtoXMMreg(_Rt_, 3);
_deleteGPRtoXMMreg(_Rd_, 3);
_freeXMMreg(t0reg);
_freeXMMreg(t1reg);
if (rstemp != -1)
_freeXMMreg(rstemp);
#endif
}
EERECOMPILE_CODE0(DSRAV, XMMINFO_READS | XMMINFO_READT | XMMINFO_WRITED);

View File

@ -209,11 +209,7 @@ struct microVU
alignas(16) u32 macFlag [4]; // 4 instances of mac flag (used in execution)
alignas(16) u32 clipFlag[4]; // 4 instances of clip flag (used in execution)
alignas(16) u32 xmmCTemp[4]; // Backup used in mVUclamp2()
#ifdef __M_X86_64
alignas(16) u32 xmmBackup[16][4]; // Backup for xmm0~xmm15
#else
alignas(16) u32 xmmBackup[8][4]; // Backup for xmm0~xmm7
#endif
u32 index; // VU Index (VU0 or VU1)
u32 cop2; // VU is in COP2 mode? (No/Yes)

View File

@ -225,11 +225,7 @@ struct microMapXMM
class microRegAlloc
{
protected:
#ifdef __M_X86_64
static const int xmmTotal = 15; // PQ register is reserved
#else
static const int xmmTotal = 7; // PQ register is reserved
#endif
microMapXMM xmmMap[xmmTotal];
int counter; // Current allocation count
int index; // VU0 or VU1

View File

@ -403,23 +403,10 @@ static void recCFC2()
xMOV(eax, ptr32[&vu0Regs.VI[_Rd_].UL]);
// FixMe: Should R-Reg have upper 9 bits 0?
#ifdef __M_X86_64
if (_Rd_ >= 16)
xCDQE(); // Sign Extend
xMOV(ptr64[&cpuRegs.GPR.r[_Rt_].UD[0]], rax);
#else
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[0]], eax);
if (_Rd_ >= 16)
{
_freeX86reg(edx);
xCDQ(); // Sign Extend
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], edx);
}
else
xMOV(ptr32[&cpuRegs.GPR.r[_Rt_].UL[1]], 0);
#endif
// FixMe: I think this is needed, but not sure how it works
// Update Refraction 20/09/2021: This is needed because Const Prop is broken

View File

@ -150,12 +150,7 @@ static const char branchSTR[16][8] = {
#define xmmT5 xmm4 // Used for regAlloc
#define xmmT6 xmm5 // Used for regAlloc
#define xmmT7 xmm6 // Used for regAlloc
#ifdef __M_X86_64
#define xmmPQ xmm15 // Holds the Value and Backup Values of P and Q regs
#else
#define xmmPQ xmm7 // Holds the Value and Backup Values of P and Q regs
#endif
#define gprT1 eax // eax - Temp Reg
#define gprT2 ecx // ecx - Temp Reg
@ -167,17 +162,10 @@ static const char branchSTR[16][8] = {
#define gprT2b cx // Low 16-bit of gprT2 (ecx)
#define gprT3b dx // Low 16-bit of gprT3 (edx)
#ifdef __M_X86_64
#define gprF0 ebx // Status Flag 0
#define gprF1 r12d // Status Flag 1
#define gprF2 r13d // Status Flag 2
#define gprF3 r14d // Status Flag 3
#else
#define gprF0 ebx // Status Flag 0
#define gprF1 ebp // Status Flag 1
#define gprF2 esi // Status Flag 2
#define gprF3 edi // Status Flag 3
#endif
// Function Params
#define mP microVU& mVU, int recPass