mirror of https://github.com/PCSX2/pcsx2.git
EE: port MMX code to SSE for shift opcode
Code need to be enabled with a define (NO_MMX 1) Code was tested with ps2autotest but it need more tests. I need to check alignement issue too. Globally code is potentially a little slower than SSE. The trick is that we need to shift only the 64 lsb whereas SSE will shift the full 128 bits register. Current implementation flush the lsb and drop the full register. It is unlikely that next intruction will be done in SSE anyway. Note: it would be easier in x64 arch
This commit is contained in:
parent
119f6deb24
commit
a9a955f8b9
|
@ -748,6 +748,7 @@ void _deleteACCtoXMMreg(int vu, int flush)
|
||||||
// Flush is 0: _freeXMMreg. Flush in memory if MODE_WRITE. Clear inuse
|
// Flush is 0: _freeXMMreg. Flush in memory if MODE_WRITE. Clear inuse
|
||||||
// Flush is 1: Flush in memory. But register is still valid
|
// Flush is 1: Flush in memory. But register is still valid
|
||||||
// Flush is 2: like 0 ...
|
// Flush is 2: like 0 ...
|
||||||
|
// Flush is 3: drop register content
|
||||||
void _deleteGPRtoXMMreg(int reg, int flush)
|
void _deleteGPRtoXMMreg(int reg, int flush)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
@ -775,6 +776,10 @@ void _deleteGPRtoXMMreg(int reg, int flush)
|
||||||
if( flush == 2 )
|
if( flush == 2 )
|
||||||
xmmregs[i].inuse = 0;
|
xmmregs[i].inuse = 0;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
xmmregs[i].inuse = 0;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -20,6 +20,8 @@
|
||||||
#include "R5900OpcodeTables.h"
|
#include "R5900OpcodeTables.h"
|
||||||
#include "iR5900.h"
|
#include "iR5900.h"
|
||||||
|
|
||||||
|
#define NO_MMX 0
|
||||||
|
|
||||||
using namespace x86Emitter;
|
using namespace x86Emitter;
|
||||||
|
|
||||||
namespace R5900 {
|
namespace R5900 {
|
||||||
|
@ -143,6 +145,22 @@ void recDSLLs_(int info, int sa)
|
||||||
int rtreg, rdreg;
|
int rtreg, rdreg;
|
||||||
pxAssert( !(info & PROCESS_EE_XMM) );
|
pxAssert( !(info & PROCESS_EE_XMM) );
|
||||||
|
|
||||||
|
#if NO_MMX
|
||||||
|
_addNeededGPRtoXMMreg(_Rt_);
|
||||||
|
_addNeededGPRtoXMMreg(_Rd_);
|
||||||
|
rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
|
||||||
|
rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
|
||||||
|
|
||||||
|
if( rtreg != rdreg ) xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg));
|
||||||
|
xPSLL.Q(xRegisterSSE(rdreg), sa);
|
||||||
|
|
||||||
|
// flush lower 64 bits (as upper is wrong)
|
||||||
|
// The others possibility could be a read back of the upper 64 bits
|
||||||
|
// (better use of register but code will likely be flushed after anyway)
|
||||||
|
xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg));
|
||||||
|
_deleteGPRtoXMMreg(_Rt_, 3);
|
||||||
|
_deleteGPRtoXMMreg(_Rd_, 3);
|
||||||
|
#else
|
||||||
_addNeededMMXreg(MMX_GPR+_Rt_);
|
_addNeededMMXreg(MMX_GPR+_Rt_);
|
||||||
_addNeededMMXreg(MMX_GPR+_Rd_);
|
_addNeededMMXreg(MMX_GPR+_Rd_);
|
||||||
rtreg = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_READ);
|
rtreg = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_READ);
|
||||||
|
@ -151,6 +169,7 @@ void recDSLLs_(int info, int sa)
|
||||||
|
|
||||||
if( rtreg != rdreg ) xMOVQ(xRegisterMMX(rdreg), xRegisterMMX(rtreg));
|
if( rtreg != rdreg ) xMOVQ(xRegisterMMX(rdreg), xRegisterMMX(rtreg));
|
||||||
xPSLL.Q(xRegisterMMX(rdreg), sa);
|
xPSLL.Q(xRegisterMMX(rdreg), sa);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void recDSLL_(int info)
|
void recDSLL_(int info)
|
||||||
|
@ -171,6 +190,22 @@ void recDSRLs_(int info, int sa)
|
||||||
int rtreg, rdreg;
|
int rtreg, rdreg;
|
||||||
pxAssert( !(info & PROCESS_EE_XMM) );
|
pxAssert( !(info & PROCESS_EE_XMM) );
|
||||||
|
|
||||||
|
#if NO_MMX
|
||||||
|
_addNeededGPRtoXMMreg(_Rt_);
|
||||||
|
_addNeededGPRtoXMMreg(_Rd_);
|
||||||
|
rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
|
||||||
|
rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
|
||||||
|
|
||||||
|
if( rtreg != rdreg ) xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg));
|
||||||
|
xPSRL.Q(xRegisterSSE(rdreg), sa);
|
||||||
|
|
||||||
|
// flush lower 64 bits (as upper is wrong)
|
||||||
|
// The others possibility could be a read back of the upper 64 bits
|
||||||
|
// (better use of register but code will likely be flushed after anyway)
|
||||||
|
xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg));
|
||||||
|
_deleteGPRtoXMMreg(_Rt_, 3);
|
||||||
|
_deleteGPRtoXMMreg(_Rd_, 3);
|
||||||
|
#else
|
||||||
_addNeededMMXreg(MMX_GPR+_Rt_);
|
_addNeededMMXreg(MMX_GPR+_Rt_);
|
||||||
_addNeededMMXreg(MMX_GPR+_Rd_);
|
_addNeededMMXreg(MMX_GPR+_Rd_);
|
||||||
rtreg = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_READ);
|
rtreg = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_READ);
|
||||||
|
@ -179,6 +214,7 @@ void recDSRLs_(int info, int sa)
|
||||||
|
|
||||||
if( rtreg != rdreg ) xMOVQ(xRegisterMMX(rdreg), xRegisterMMX(rtreg));
|
if( rtreg != rdreg ) xMOVQ(xRegisterMMX(rdreg), xRegisterMMX(rtreg));
|
||||||
xPSRL.Q(xRegisterMMX(rdreg), sa);
|
xPSRL.Q(xRegisterMMX(rdreg), sa);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void recDSRL_(int info)
|
void recDSRL_(int info)
|
||||||
|
@ -199,6 +235,42 @@ void recDSRAs_(int info, int sa)
|
||||||
int rtreg, rdreg, t0reg;
|
int rtreg, rdreg, t0reg;
|
||||||
pxAssert( !(info & PROCESS_EE_XMM) );
|
pxAssert( !(info & PROCESS_EE_XMM) );
|
||||||
|
|
||||||
|
#if NO_MMX
|
||||||
|
_addNeededGPRtoXMMreg(_Rt_);
|
||||||
|
_addNeededGPRtoXMMreg(_Rd_);
|
||||||
|
rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
|
||||||
|
rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
|
||||||
|
|
||||||
|
if( rtreg != rdreg ) xMOVDQA(xRegisterSSE(rdreg), xRegisterSSE(rtreg));
|
||||||
|
|
||||||
|
if ( sa ) {
|
||||||
|
|
||||||
|
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||||
|
|
||||||
|
xMOVDQA(xRegisterSSE(t0reg), xRegisterSSE(rtreg));
|
||||||
|
|
||||||
|
// it is a signed shift (but 64 bits operands aren't supported on 32 bits even on SSE)
|
||||||
|
xPSRA.D(xRegisterSSE(t0reg), sa);
|
||||||
|
xPSRL.Q(xRegisterSSE(rdreg), sa);
|
||||||
|
|
||||||
|
// It can be done in one blend instruction in SSE4.1
|
||||||
|
// Goal is to move 63:32 of t0reg to 63:32 rdreg
|
||||||
|
{
|
||||||
|
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0x55);
|
||||||
|
// take lower dword of rdreg and lower dword of t0reg
|
||||||
|
xPUNPCK.LDQ(xRegisterSSE(rdreg), xRegisterSSE(t0reg));
|
||||||
|
}
|
||||||
|
|
||||||
|
_freeXMMreg(t0reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
// flush lower 64 bits (as upper is wrong)
|
||||||
|
// The others possibility could be a read back of the upper 64 bits
|
||||||
|
// (better use of register but code will likely be flushed after anyway)
|
||||||
|
xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg));
|
||||||
|
_deleteGPRtoXMMreg(_Rt_, 3);
|
||||||
|
_deleteGPRtoXMMreg(_Rd_, 3);
|
||||||
|
#else
|
||||||
_addNeededMMXreg(MMX_GPR+_Rt_);
|
_addNeededMMXreg(MMX_GPR+_Rt_);
|
||||||
_addNeededMMXreg(MMX_GPR+_Rd_);
|
_addNeededMMXreg(MMX_GPR+_Rd_);
|
||||||
rtreg = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_READ);
|
rtreg = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_READ);
|
||||||
|
@ -221,6 +293,7 @@ void recDSRAs_(int info, int sa)
|
||||||
|
|
||||||
_freeMMXreg(t0reg);
|
_freeMMXreg(t0reg);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void recDSRA_(int info)
|
void recDSRA_(int info)
|
||||||
|
@ -314,10 +387,25 @@ EERECOMPILE_CODEX(eeRecompileCode2, DSRA32);
|
||||||
|
|
||||||
__aligned16 u32 s_sa[4] = {0x1f, 0, 0x3f, 0};
|
__aligned16 u32 s_sa[4] = {0x1f, 0, 0x3f, 0};
|
||||||
|
|
||||||
int recSetShiftV(int info, int* rsreg, int* rtreg, int* rdreg, int* rstemp, int shift64)
|
void recSetShiftV(int info, int* rsreg, int* rtreg, int* rdreg, int* rstemp)
|
||||||
{
|
{
|
||||||
pxAssert( !(info & PROCESS_EE_XMM) );
|
pxAssert( !(info & PROCESS_EE_XMM) );
|
||||||
|
|
||||||
|
#if NO_MMX
|
||||||
|
_addNeededGPRtoXMMreg(_Rt_);
|
||||||
|
_addNeededGPRtoXMMreg(_Rd_);
|
||||||
|
*rtreg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ);
|
||||||
|
*rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
|
||||||
|
|
||||||
|
*rstemp = _allocTempXMMreg(XMMT_INT, -1);
|
||||||
|
|
||||||
|
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||||
|
xAND(eax, 0x3f);
|
||||||
|
xMOVDZX(xRegisterSSE(*rstemp), eax);
|
||||||
|
*rsreg = *rstemp;
|
||||||
|
|
||||||
|
if( *rtreg != *rdreg ) xMOVDQA(xRegisterSSE(*rdreg), xRegisterSSE(*rtreg));
|
||||||
|
#else
|
||||||
_addNeededMMXreg(MMX_GPR+_Rt_);
|
_addNeededMMXreg(MMX_GPR+_Rt_);
|
||||||
_addNeededMMXreg(MMX_GPR+_Rd_);
|
_addNeededMMXreg(MMX_GPR+_Rd_);
|
||||||
*rtreg = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_READ);
|
*rtreg = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_READ);
|
||||||
|
@ -326,27 +414,41 @@ int recSetShiftV(int info, int* rsreg, int* rtreg, int* rdreg, int* rstemp, int
|
||||||
|
|
||||||
*rstemp = _allocMMXreg(-1, MMX_TEMP, 0);
|
*rstemp = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||||
xAND(eax, shift64?0x3f:0x1f);
|
xAND(eax, 0x3f);
|
||||||
xMOVDZX(xRegisterMMX(*rstemp), eax);
|
xMOVDZX(xRegisterMMX(*rstemp), eax);
|
||||||
*rsreg = *rstemp;
|
*rsreg = *rstemp;
|
||||||
|
|
||||||
if( *rtreg != *rdreg ) xMOVQ(xRegisterMMX(*rdreg), xRegisterMMX(*rtreg));
|
if( *rtreg != *rdreg ) xMOVQ(xRegisterMMX(*rdreg), xRegisterMMX(*rtreg));
|
||||||
return 1;
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void recSetConstShiftV(int info, int* rsreg, int* rdreg, int* rstemp, int shift64)
|
void recSetConstShiftV(int info, int* rsreg, int* rdreg, int* rstemp)
|
||||||
{
|
{
|
||||||
|
#if NO_MMX
|
||||||
|
_addNeededGPRtoXMMreg(_Rd_);
|
||||||
|
*rdreg = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE);
|
||||||
|
|
||||||
|
*rstemp = _allocTempXMMreg(XMMT_INT, -1);
|
||||||
|
|
||||||
|
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||||
|
xAND(eax, 0x3f);
|
||||||
|
xMOVDZX(xRegisterSSE(*rstemp), eax);
|
||||||
|
*rsreg = *rstemp;
|
||||||
|
|
||||||
|
_flushConstReg(_Rt_);
|
||||||
|
#else
|
||||||
_addNeededMMXreg(MMX_GPR+_Rd_);
|
_addNeededMMXreg(MMX_GPR+_Rd_);
|
||||||
*rdreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
|
*rdreg = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE);
|
||||||
SetMMXstate();
|
SetMMXstate();
|
||||||
|
|
||||||
*rstemp = _allocMMXreg(-1, MMX_TEMP, 0);
|
*rstemp = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||||
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
xMOV(eax, ptr[&cpuRegs.GPR.r[_Rs_].UL[0]]);
|
||||||
xAND(eax, shift64?0x3f:0x1f);
|
xAND(eax, 0x3f);
|
||||||
xMOVDZX(xRegisterMMX(*rstemp), eax);
|
xMOVDZX(xRegisterMMX(*rstemp), eax);
|
||||||
*rsreg = *rstemp;
|
*rsreg = *rstemp;
|
||||||
|
|
||||||
_flushConstReg(_Rt_);
|
_flushConstReg(_Rt_);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
//// SLLV
|
//// SLLV
|
||||||
|
@ -479,20 +581,45 @@ void recDSLLV_consts(int info)
|
||||||
void recDSLLV_constt(int info)
|
void recDSLLV_constt(int info)
|
||||||
{
|
{
|
||||||
int rsreg, rdreg, rstemp = -1;
|
int rsreg, rdreg, rstemp = -1;
|
||||||
recSetConstShiftV(info, &rsreg, &rdreg, &rstemp, 1);
|
recSetConstShiftV(info, &rsreg, &rdreg, &rstemp);
|
||||||
|
#if NO_MMX
|
||||||
|
xMOVDQA(xRegisterSSE(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
|
||||||
|
xPSLL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
|
||||||
|
if( rstemp != -1 ) _freeXMMreg(rstemp);
|
||||||
|
|
||||||
|
// flush lower 64 bits (as upper is wrong)
|
||||||
|
// The others possibility could be a read back of the upper 64 bits
|
||||||
|
// (better use of register but code will likely be flushed after anyway)
|
||||||
|
xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg));
|
||||||
|
_deleteGPRtoXMMreg(_Rt_, 3);
|
||||||
|
_deleteGPRtoXMMreg(_Rd_, 3);
|
||||||
|
#else
|
||||||
|
|
||||||
xMOVQ(xRegisterMMX(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
|
xMOVQ(xRegisterMMX(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
|
||||||
xPSLL.Q(xRegisterMMX(rdreg), xRegisterMMX(rsreg));
|
xPSLL.Q(xRegisterMMX(rdreg), xRegisterMMX(rsreg));
|
||||||
if( rstemp != -1 ) _freeMMXreg(rstemp);
|
if( rstemp != -1 ) _freeMMXreg(rstemp);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void recDSLLV_(int info)
|
void recDSLLV_(int info)
|
||||||
{
|
{
|
||||||
int rsreg, rtreg, rdreg, rstemp = -1;
|
int rsreg, rtreg, rdreg, rstemp = -1;
|
||||||
recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp, 1);
|
recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp);
|
||||||
|
|
||||||
|
#if NO_MMX
|
||||||
|
xPSLL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
|
||||||
|
if( rstemp != -1 ) _freeXMMreg(rstemp);
|
||||||
|
|
||||||
|
// flush lower 64 bits (as upper is wrong)
|
||||||
|
// The others possibility could be a read back of the upper 64 bits
|
||||||
|
// (better use of register but code will likely be flushed after anyway)
|
||||||
|
xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg));
|
||||||
|
_deleteGPRtoXMMreg(_Rt_, 3);
|
||||||
|
_deleteGPRtoXMMreg(_Rd_, 3);
|
||||||
|
#else
|
||||||
xPSLL.Q(xRegisterMMX(rdreg), xRegisterMMX(rsreg));
|
xPSLL.Q(xRegisterMMX(rdreg), xRegisterMMX(rsreg));
|
||||||
if( rstemp != -1 ) _freeMMXreg(rstemp);
|
if( rstemp != -1 ) _freeMMXreg(rstemp);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
EERECOMPILE_CODE0(DSLLV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED);
|
EERECOMPILE_CODE0(DSLLV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED);
|
||||||
|
@ -513,20 +640,45 @@ void recDSRLV_consts(int info)
|
||||||
void recDSRLV_constt(int info)
|
void recDSRLV_constt(int info)
|
||||||
{
|
{
|
||||||
int rsreg, rdreg, rstemp = -1;
|
int rsreg, rdreg, rstemp = -1;
|
||||||
recSetConstShiftV(info, &rsreg, &rdreg, &rstemp, 1);
|
recSetConstShiftV(info, &rsreg, &rdreg, &rstemp);
|
||||||
|
|
||||||
|
#if NO_MMX
|
||||||
|
xMOVDQA(xRegisterSSE(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
|
||||||
|
xPSRL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
|
||||||
|
if( rstemp != -1 ) _freeXMMreg(rstemp);
|
||||||
|
|
||||||
|
// flush lower 64 bits (as upper is wrong)
|
||||||
|
// The others possibility could be a read back of the upper 64 bits
|
||||||
|
// (better use of register but code will likely be flushed after anyway)
|
||||||
|
xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg));
|
||||||
|
_deleteGPRtoXMMreg(_Rt_, 3);
|
||||||
|
_deleteGPRtoXMMreg(_Rd_, 3);
|
||||||
|
#else
|
||||||
xMOVQ(xRegisterMMX(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
|
xMOVQ(xRegisterMMX(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
|
||||||
xPSRL.Q(xRegisterMMX(rdreg), xRegisterMMX(rsreg));
|
xPSRL.Q(xRegisterMMX(rdreg), xRegisterMMX(rsreg));
|
||||||
if( rstemp != -1 ) _freeMMXreg(rstemp);
|
if( rstemp != -1 ) _freeMMXreg(rstemp);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void recDSRLV_(int info)
|
void recDSRLV_(int info)
|
||||||
{
|
{
|
||||||
int rsreg, rtreg, rdreg, rstemp = -1;
|
int rsreg, rtreg, rdreg, rstemp = -1;
|
||||||
recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp, 1);
|
recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp);
|
||||||
|
|
||||||
|
#if NO_MMX
|
||||||
|
xPSRL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
|
||||||
|
if( rstemp != -1 ) _freeXMMreg(rstemp);
|
||||||
|
|
||||||
|
// flush lower 64 bits (as upper is wrong)
|
||||||
|
// The others possibility could be a read back of the upper 64 bits
|
||||||
|
// (better use of register but code will likely be flushed after anyway)
|
||||||
|
xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg));
|
||||||
|
_deleteGPRtoXMMreg(_Rt_, 3);
|
||||||
|
_deleteGPRtoXMMreg(_Rd_, 3);
|
||||||
|
#else
|
||||||
xPSRL.Q(xRegisterMMX(rdreg), xRegisterMMX(rsreg));
|
xPSRL.Q(xRegisterMMX(rdreg), xRegisterMMX(rsreg));
|
||||||
if( rstemp != -1 ) _freeMMXreg(rstemp);
|
if( rstemp != -1 ) _freeMMXreg(rstemp);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
EERECOMPILE_CODE0(DSRLV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED);
|
EERECOMPILE_CODE0(DSRLV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED);
|
||||||
|
@ -547,10 +699,45 @@ void recDSRAV_consts(int info)
|
||||||
void recDSRAV_constt(int info)
|
void recDSRAV_constt(int info)
|
||||||
{
|
{
|
||||||
int rsreg, rdreg, rstemp = -1, t0reg, t1reg;
|
int rsreg, rdreg, rstemp = -1, t0reg, t1reg;
|
||||||
|
#if NO_MMX
|
||||||
|
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||||
|
t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||||
|
|
||||||
|
recSetConstShiftV(info, &rsreg, &rdreg, &rstemp);
|
||||||
|
|
||||||
|
xMOVDQA(xRegisterSSE(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
|
||||||
|
xPXOR(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||||
|
|
||||||
|
// calc high bit
|
||||||
|
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(rdreg));
|
||||||
|
xPCMP.GTD(xRegisterSSE(t0reg), xRegisterSSE(rdreg));
|
||||||
|
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0x55);
|
||||||
|
|
||||||
|
// shift highest bit, 64 - eax
|
||||||
|
xMOV(eax, 64);
|
||||||
|
xMOVDZX(xRegisterSSE(t1reg), eax);
|
||||||
|
xPSUB.D(xRegisterSSE(t1reg), xRegisterSSE(rsreg));
|
||||||
|
|
||||||
|
// right logical shift
|
||||||
|
xPSRL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
|
||||||
|
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg)); // highest bits
|
||||||
|
|
||||||
|
xPOR(xRegisterSSE(rdreg), xRegisterSSE(t0reg));
|
||||||
|
|
||||||
|
// flush lower 64 bits (as upper is wrong)
|
||||||
|
// The others possibility could be a read back of the upper 64 bits
|
||||||
|
// (better use of register but code will likely be flushed after anyway)
|
||||||
|
xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg));
|
||||||
|
_deleteGPRtoXMMreg(_Rd_, 3);
|
||||||
|
|
||||||
|
_freeXMMreg(t0reg);
|
||||||
|
_freeXMMreg(t1reg);
|
||||||
|
if( rstemp != -1 ) _freeXMMreg(rstemp);
|
||||||
|
#else
|
||||||
t0reg = _allocMMXreg(-1, MMX_TEMP, 0);
|
t0reg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||||
t1reg = _allocMMXreg(-1, MMX_TEMP, 0);
|
t1reg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||||
|
|
||||||
recSetConstShiftV(info, &rsreg, &rdreg, &rstemp, 1);
|
recSetConstShiftV(info, &rsreg, &rdreg, &rstemp);
|
||||||
|
|
||||||
xMOVQ(xRegisterMMX(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
|
xMOVQ(xRegisterMMX(rdreg), ptr[&cpuRegs.GPR.r[_Rt_]]);
|
||||||
xPXOR(xRegisterMMX(t0reg), xRegisterMMX(t0reg));
|
xPXOR(xRegisterMMX(t0reg), xRegisterMMX(t0reg));
|
||||||
|
@ -574,14 +761,49 @@ void recDSRAV_constt(int info)
|
||||||
_freeMMXreg(t0reg);
|
_freeMMXreg(t0reg);
|
||||||
_freeMMXreg(t1reg);
|
_freeMMXreg(t1reg);
|
||||||
if( rstemp != -1 ) _freeMMXreg(rstemp);
|
if( rstemp != -1 ) _freeMMXreg(rstemp);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void recDSRAV_(int info)
|
void recDSRAV_(int info)
|
||||||
{
|
{
|
||||||
int rsreg, rtreg, rdreg, rstemp = -1, t0reg, t1reg;
|
int rsreg, rtreg, rdreg, rstemp = -1, t0reg, t1reg;
|
||||||
|
#if NO_MMX
|
||||||
|
t0reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||||
|
t1reg = _allocTempXMMreg(XMMT_INT, -1);
|
||||||
|
recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp);
|
||||||
|
|
||||||
|
xPXOR(xRegisterSSE(t0reg), xRegisterSSE(t0reg));
|
||||||
|
|
||||||
|
// calc high bit
|
||||||
|
xMOVDQA(xRegisterSSE(t1reg), xRegisterSSE(rdreg));
|
||||||
|
xPCMP.GTD(xRegisterSSE(t0reg), xRegisterSSE(rdreg));
|
||||||
|
xPSHUF.D(xRegisterSSE(t0reg), xRegisterSSE(t0reg), 0x55);
|
||||||
|
|
||||||
|
// shift highest bit, 64 - eax
|
||||||
|
xMOV(eax, 64);
|
||||||
|
xMOVDZX(xRegisterSSE(t1reg), eax);
|
||||||
|
xPSUB.D(xRegisterSSE(t1reg), xRegisterSSE(rsreg));
|
||||||
|
|
||||||
|
// right logical shift
|
||||||
|
xPSRL.Q(xRegisterSSE(rdreg), xRegisterSSE(rsreg));
|
||||||
|
xPSLL.Q(xRegisterSSE(t0reg), xRegisterSSE(t1reg)); // highest bits
|
||||||
|
|
||||||
|
xPOR(xRegisterSSE(rdreg), xRegisterSSE(t0reg));
|
||||||
|
|
||||||
|
// flush lower 64 bits (as upper is wrong)
|
||||||
|
// The others possibility could be a read back of the upper 64 bits
|
||||||
|
// (better use of register but code will likely be flushed after anyway)
|
||||||
|
xMOVL.PD(ptr64[&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ]] , xRegisterSSE(rdreg));
|
||||||
|
_deleteGPRtoXMMreg(_Rt_, 3);
|
||||||
|
_deleteGPRtoXMMreg(_Rd_, 3);
|
||||||
|
|
||||||
|
_freeXMMreg(t0reg);
|
||||||
|
_freeXMMreg(t1reg);
|
||||||
|
if( rstemp != -1 ) _freeXMMreg(rstemp);
|
||||||
|
#else
|
||||||
t0reg = _allocMMXreg(-1, MMX_TEMP, 0);
|
t0reg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||||
t1reg = _allocMMXreg(-1, MMX_TEMP, 0);
|
t1reg = _allocMMXreg(-1, MMX_TEMP, 0);
|
||||||
recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp, 1);
|
recSetShiftV(info, &rsreg, &rtreg, &rdreg, &rstemp);
|
||||||
|
|
||||||
xPXOR(xRegisterMMX(t0reg), xRegisterMMX(t0reg));
|
xPXOR(xRegisterMMX(t0reg), xRegisterMMX(t0reg));
|
||||||
|
|
||||||
|
@ -604,6 +826,7 @@ void recDSRAV_(int info)
|
||||||
_freeMMXreg(t0reg);
|
_freeMMXreg(t0reg);
|
||||||
_freeMMXreg(t1reg);
|
_freeMMXreg(t1reg);
|
||||||
if( rstemp != -1 ) _freeMMXreg(rstemp);
|
if( rstemp != -1 ) _freeMMXreg(rstemp);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
EERECOMPILE_CODE0(DSRAV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED);
|
EERECOMPILE_CODE0(DSRAV, XMMINFO_READS|XMMINFO_READT|XMMINFO_WRITED);
|
||||||
|
|
Loading…
Reference in New Issue