mirror of https://github.com/PCSX2/pcsx2.git
EE Rec: Added LDR/LDL
Also fixed slight optimisation bug in SDL
This commit is contained in:
parent
5f58c325ca
commit
862d606514
|
@ -451,12 +451,96 @@ void recSWR()
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
alignas(16) const u32 SHIFT_MASKS[2][4] = {
|
||||||
|
{ 0xffffffff, 0xffffffff, 0x00000000, 0x00000000 },
|
||||||
|
{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }
|
||||||
|
};
|
||||||
|
|
||||||
void recLDL()
|
void recLDL()
|
||||||
{
|
{
|
||||||
|
if (!_Rt_)
|
||||||
|
return;
|
||||||
|
|
||||||
|
#ifdef LOADSTORE_RECOMPILE
|
||||||
|
xLEA(arg2reg, ptr128[&dummyValue[0]]);
|
||||||
|
|
||||||
|
if (GPR_IS_CONST1(_Rs_))
|
||||||
|
{
|
||||||
|
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
|
||||||
|
srcadr &= ~0x07;
|
||||||
|
|
||||||
|
vtlb_DynGenRead64_Const(64, srcadr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Load ECX with the source memory address that we're reading from.
|
||||||
|
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||||
|
if (_Imm_ != 0)
|
||||||
|
xADD(arg1regd, _Imm_);
|
||||||
|
|
||||||
|
xAND(arg1regd, ~0x07);
|
||||||
|
|
||||||
|
iFlushCall(FLUSH_FULLVTLB);
|
||||||
|
|
||||||
|
vtlb_DynGenRead64(64);
|
||||||
|
}
|
||||||
|
|
||||||
|
int rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ | MODE_WRITE);
|
||||||
|
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||||
|
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||||
|
int t2reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||||
|
|
||||||
|
if (GPR_IS_CONST1(_Rs_))
|
||||||
|
{
|
||||||
|
u32 shiftval = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
|
||||||
|
shiftval &= 0x7;
|
||||||
|
xMOV(eax, shiftval + 1);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_eeMoveGPRtoR(eax, _Rs_);
|
||||||
|
if (_Imm_ != 0)
|
||||||
|
xADD(eax, _Imm_);
|
||||||
|
xAND(eax, 0x7);
|
||||||
|
xADD(eax, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
xCMP(eax, 8);
|
||||||
|
xForwardJE32 skip;
|
||||||
|
//Calculate the shift from top bit to lowest
|
||||||
|
xMOV(edx, 64);
|
||||||
|
xSHL(eax, 3);
|
||||||
|
xSUB(edx, eax);
|
||||||
|
|
||||||
|
xMOVDZX(xRegisterSSE(t1reg), eax);
|
||||||
|
xMOVQZX(xRegisterSSE(t0reg), ptr128[&SHIFT_MASKS[0][0]]);
|
||||||
|
xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||||
|
xPAND(xRegisterSSE(t0reg), xRegisterSSE(rtreg));
|
||||||
|
xMOVDQA(xRegisterSSE(t2reg), xRegisterSSE(t0reg));
|
||||||
|
|
||||||
|
xMOVDZX(xRegisterSSE(t1reg), edx);
|
||||||
|
xMOVQZX(xRegisterSSE(t0reg), ptr128[&dummyValue[0]]);
|
||||||
|
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||||
|
xPOR(xRegisterSSE(t0reg), xRegisterSSE(t2reg));
|
||||||
|
xForwardJump32 full;
|
||||||
|
skip.SetTarget();
|
||||||
|
|
||||||
|
xMOVQZX(xRegisterSSE(t0reg), ptr128[&dummyValue[0]]);
|
||||||
|
full.SetTarget();
|
||||||
|
|
||||||
|
xBLEND.PS(xRegisterSSE(rtreg), xRegisterSSE(t0reg), 0x3);
|
||||||
|
|
||||||
|
_freeXMMreg(t0reg);
|
||||||
|
_freeXMMreg(t1reg);
|
||||||
|
_freeXMMreg(t2reg);
|
||||||
|
|
||||||
|
#else
|
||||||
iFlushCall(FLUSH_INTERPRETER);
|
iFlushCall(FLUSH_INTERPRETER);
|
||||||
_deleteEEreg(_Rs_, 1);
|
_deleteEEreg(_Rs_, 1);
|
||||||
_deleteEEreg(_Rt_, 1);
|
_deleteEEreg(_Rt_, 1);
|
||||||
recCall(LDL);
|
recCall(LDL);
|
||||||
|
#endif
|
||||||
|
|
||||||
EE::Profiler.EmitOp(eeOpcode::LDL);
|
EE::Profiler.EmitOp(eeOpcode::LDL);
|
||||||
}
|
}
|
||||||
|
@ -464,21 +548,93 @@ void recLDL()
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
void recLDR()
|
void recLDR()
|
||||||
{
|
{
|
||||||
|
if (!_Rt_)
|
||||||
|
return;
|
||||||
|
|
||||||
|
#ifdef LOADSTORE_RECOMPILE
|
||||||
|
xLEA(arg2reg, ptr128[&dummyValue[0]]);
|
||||||
|
|
||||||
|
if (GPR_IS_CONST1(_Rs_))
|
||||||
|
{
|
||||||
|
u32 srcadr = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
|
||||||
|
srcadr &= ~0x07;
|
||||||
|
|
||||||
|
vtlb_DynGenRead64_Const(64, srcadr);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Load ECX with the source memory address that we're reading from.
|
||||||
|
_eeMoveGPRtoR(arg1regd, _Rs_);
|
||||||
|
if (_Imm_ != 0)
|
||||||
|
xADD(arg1regd, _Imm_);
|
||||||
|
|
||||||
|
xAND(arg1regd, ~0x07);
|
||||||
|
|
||||||
|
iFlushCall(FLUSH_FULLVTLB);
|
||||||
|
|
||||||
|
vtlb_DynGenRead64(64);
|
||||||
|
}
|
||||||
|
|
||||||
|
int rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ | MODE_WRITE);
|
||||||
|
int t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||||
|
int t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||||
|
int t2reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||||
|
|
||||||
|
if (GPR_IS_CONST1(_Rs_))
|
||||||
|
{
|
||||||
|
u32 shiftval = g_cpuConstRegs[_Rs_].UL[0] + _Imm_;
|
||||||
|
shiftval &= 0x7;
|
||||||
|
xMOV(eax, shiftval);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
_eeMoveGPRtoR(eax, _Rs_);
|
||||||
|
if (_Imm_ != 0)
|
||||||
|
xADD(eax, _Imm_);
|
||||||
|
xAND(eax, 0x7);
|
||||||
|
}
|
||||||
|
|
||||||
|
xCMP(eax, 0);
|
||||||
|
xForwardJE32 skip;
|
||||||
|
//Calculate the shift from top bit to lowest
|
||||||
|
xMOV(edx, 64);
|
||||||
|
xSHL(eax, 3);
|
||||||
|
xSUB(edx, eax);
|
||||||
|
|
||||||
|
xMOVDZX(xRegisterSSE(t1reg), edx); //64-shift*8
|
||||||
|
xMOVQZX(xRegisterSSE(t0reg), ptr128[&SHIFT_MASKS[0][0]]);
|
||||||
|
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||||
|
xPAND(xRegisterSSE(t0reg), xRegisterSSE(rtreg));
|
||||||
|
xMOVQZX(xRegisterSSE(t2reg), xRegisterSSE(t0reg));
|
||||||
|
|
||||||
|
xMOVDZX(xRegisterSSE(t1reg), eax); //shift*8
|
||||||
|
xMOVQZX(xRegisterSSE(t0reg), ptr128[&dummyValue[0]]);
|
||||||
|
xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||||
|
xPOR(xRegisterSSE(t0reg), xRegisterSSE(t2reg));
|
||||||
|
xForwardJump32 full;
|
||||||
|
skip.SetTarget();
|
||||||
|
|
||||||
|
xMOVQZX(xRegisterSSE(t0reg), ptr128[&dummyValue[0]]);
|
||||||
|
full.SetTarget();
|
||||||
|
|
||||||
|
xBLEND.PS(xRegisterSSE(rtreg), xRegisterSSE(t0reg), 0x3);
|
||||||
|
|
||||||
|
_freeXMMreg(t0reg);
|
||||||
|
_freeXMMreg(t1reg);
|
||||||
|
_freeXMMreg(t2reg);
|
||||||
|
|
||||||
|
#else
|
||||||
iFlushCall(FLUSH_INTERPRETER);
|
iFlushCall(FLUSH_INTERPRETER);
|
||||||
_deleteEEreg(_Rs_, 1);
|
_deleteEEreg(_Rs_, 1);
|
||||||
_deleteEEreg(_Rt_, 1);
|
_deleteEEreg(_Rt_, 1);
|
||||||
recCall(LDR);
|
recCall(LDR);
|
||||||
|
#endif
|
||||||
|
|
||||||
EE::Profiler.EmitOp(eeOpcode::LDR);
|
EE::Profiler.EmitOp(eeOpcode::LDR);
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
|
|
||||||
alignas(16) const u32 SD_MASK[2][4] = {
|
|
||||||
{ 0xffffffff, 0xffffffff, 0x00000000, 0x00000000 },
|
|
||||||
{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }
|
|
||||||
};
|
|
||||||
|
|
||||||
void recSDL()
|
void recSDL()
|
||||||
{
|
{
|
||||||
#ifdef LOADSTORE_RECOMPILE
|
#ifdef LOADSTORE_RECOMPILE
|
||||||
|
@ -525,7 +681,7 @@ void recSDL()
|
||||||
xADD(eax, 1);
|
xADD(eax, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
xCMP(eax, 1);
|
xCMP(eax, 8);
|
||||||
xForwardJE32 skip;
|
xForwardJE32 skip;
|
||||||
//Calculate the shift from top bit to lowest
|
//Calculate the shift from top bit to lowest
|
||||||
xMOV(edx, 64);
|
xMOV(edx, 64);
|
||||||
|
@ -533,7 +689,7 @@ void recSDL()
|
||||||
xSUB(edx, eax);
|
xSUB(edx, eax);
|
||||||
// Generate mask 128-(shiftx8) xPSRA.W does bit for bit
|
// Generate mask 128-(shiftx8) xPSRA.W does bit for bit
|
||||||
xMOVDZX(xRegisterSSE(t1reg), eax);
|
xMOVDZX(xRegisterSSE(t1reg), eax);
|
||||||
xMOVQZX(xRegisterSSE(t0reg), ptr128[&SD_MASK[0][0]]);
|
xMOVQZX(xRegisterSSE(t0reg), ptr128[&SHIFT_MASKS[0][0]]);
|
||||||
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||||
xMOVQZX(xRegisterSSE(t1reg), ptr128[&dummyValue[0]]); // This line is super slow, but using MOVDQA/MOVAPS is even slower!
|
xMOVQZX(xRegisterSSE(t1reg), ptr128[&dummyValue[0]]); // This line is super slow, but using MOVDQA/MOVAPS is even slower!
|
||||||
xPAND(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
xPAND(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||||
|
@ -634,7 +790,7 @@ void recSDR()
|
||||||
xSUB(edx, eax);
|
xSUB(edx, eax);
|
||||||
// Generate mask 128-(shiftx8) xPSRA.W does bit for bit
|
// Generate mask 128-(shiftx8) xPSRA.W does bit for bit
|
||||||
xMOVDZX(xRegisterSSE(t1reg), edx);
|
xMOVDZX(xRegisterSSE(t1reg), edx);
|
||||||
xMOVQZX(xRegisterSSE(t0reg), ptr128[&SD_MASK[0][0]]);
|
xMOVQZX(xRegisterSSE(t0reg), ptr128[&SHIFT_MASKS[0][0]]);
|
||||||
xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
xPSRL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||||
xMOVQZX(xRegisterSSE(t1reg), ptr128[&dummyValue[0]]); // This line is super slow, but using MOVDQA/MOVAPS is even slower!
|
xMOVQZX(xRegisterSSE(t1reg), ptr128[&dummyValue[0]]); // This line is super slow, but using MOVDQA/MOVAPS is even slower!
|
||||||
xPAND(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
xPAND(xRegisterSSE(t0reg), xRegisterSSE(t1reg));
|
||||||
|
|
Loading…
Reference in New Issue