Also removed SSE1 checks, see rev281 for explanation :p

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@282 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
ramapcsx2 2008-11-03 15:04:11 +00:00 committed by Gregory Hainaut
parent b70563a9fa
commit 8dc5441547
10 changed files with 218 additions and 303 deletions

View File

@ -828,7 +828,6 @@ void _eeWriteConstMem64(u32 mem, int mmreg)
void _eeWriteConstMem128(u32 mem, int mmreg)
{
assert( cpucaps.hasStreamingSIMDExtensions );
if( IS_MMXREG(mmreg) ) {
SetMMXstate();
MOVQRtoM(mem, mmreg&0xf);

View File

@ -532,12 +532,6 @@ void cpuExecuteBios()
if( CHECK_EEREC ) Config.Options |= PCSX2_COP2REC;
else Config.Options &= ~PCSX2_COP2REC;
#ifndef PCSX2_NORECBUILD
if( !cpucaps.hasStreamingSIMDExtensions ) {
Config.Options &= ~(PCSX2_VU1REC|PCSX2_VU0REC);
}
#endif
// remove frame skipping if GS doesn't support it
switch(CHECK_FRAMELIMIT) {
case PCSX2_FRAMELIMIT_SKIP:

View File

@ -126,7 +126,6 @@ int _getFreeXMMreg()
{
int i, tempi;
u32 bestcount = 0x10000;
if( !cpucaps.hasStreamingSIMDExtensions ) return -1;
for (i=0; i<XMMREGS; i++) {
if (xmmregs[(i+s_xmmchecknext)%XMMREGS].inuse == 0) {
@ -382,7 +381,6 @@ int _allocFPtoXMMreg(int xmmreg, int fpreg, int mode) {
int _allocGPRtoXMMreg(int xmmreg, int gprreg, int mode)
{
int i;
if( !cpucaps.hasStreamingSIMDExtensions ) return -1;
for (i=0; i<XMMREGS; i++) {
if (xmmregs[i].inuse == 0) continue;
@ -499,7 +497,6 @@ int _allocGPRtoXMMreg(int xmmreg, int gprreg, int mode)
int _allocFPACCtoXMMreg(int xmmreg, int mode)
{
int i;
if( !cpucaps.hasStreamingSIMDExtensions ) return -1;
for (i=0; i<XMMREGS; i++) {
if (xmmregs[i].inuse == 0) continue;
@ -905,8 +902,6 @@ u8 _hasFreeXMMreg()
{
int i;
if( !cpucaps.hasStreamingSIMDExtensions ) return 0;
for (i=0; i<XMMREGS; i++) {
if (!xmmregs[i].inuse) return 1;
}
@ -996,9 +991,7 @@ void FreezeXMMRegs_(int save)
//SysPrintf("XMM Already saved\n");
return;
}
// only necessary for nonsse CPUs (very rare)
if( !cpucaps.hasStreamingSIMDExtensions )
return;
g_globalXMMSaved = 1;
#ifdef _MSC_VER

View File

@ -799,56 +799,47 @@ static u32 s_signbit = 0x80000000;
void recCVT_W()
{
if( cpucaps.hasStreamingSIMDExtensions ) {
int t0reg;
int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
int t0reg;
int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
if( regs >= 0 ) {
t0reg = _allocTempXMMreg(XMMT_FPS, -1);
_freeXMMreg(t0reg);
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&s_signbit);
SSE_CVTTSS2SI_XMM_to_R32(EAX, regs);
SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[ _Fs_ ], regs);
}
else SSE_CVTTSS2SI_M32_to_R32(EAX, (uptr)&fpuRegs.fpr[ _Fs_ ]);
_deleteFPtoXMMreg(_Fd_, 2);
if( regs >= 0 ) {
t0reg = _allocTempXMMreg(XMMT_FPS, -1);
_freeXMMreg(t0reg);
SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&s_signbit);
SSE_CVTTSS2SI_XMM_to_R32(EAX, regs);
SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[ _Fs_ ], regs);
}
else SSE_CVTTSS2SI_M32_to_R32(EAX, (uptr)&fpuRegs.fpr[ _Fs_ ]);
_deleteFPtoXMMreg(_Fd_, 2);
MOV32MtoR(ECX, (uptr)&fpuRegs.fpr[ _Fs_ ]);
AND32ItoR(ECX, 0x7f800000);
CMP32ItoR(ECX, 0x4E800000);
j8Ptr[0] = JLE8(0);
MOV32MtoR(ECX, (uptr)&fpuRegs.fpr[ _Fs_ ]);
AND32ItoR(ECX, 0x7f800000);
CMP32ItoR(ECX, 0x4E800000);
j8Ptr[0] = JLE8(0);
// need to detect if reg is positive
/*if( regs >= 0 ) {
SSE_UCOMISS_XMM_to_XMM(regs, t0reg);
j8Ptr[2] = JB8(0);
}
else {*/
TEST32ItoM((uptr)&fpuRegs.fpr[ _Fs_ ], 0x80000000);
j8Ptr[2] = JNZ8(0);
//}
// need to detect if reg is positive
/*if( regs >= 0 ) {
SSE_UCOMISS_XMM_to_XMM(regs, t0reg);
j8Ptr[2] = JB8(0);
}
else {*/
TEST32ItoM((uptr)&fpuRegs.fpr[ _Fs_ ], 0x80000000);
j8Ptr[2] = JNZ8(0);
//}
MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x7fffffff);
j8Ptr[1] = JMP8(0);
MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x7fffffff);
j8Ptr[1] = JMP8(0);
x86SetJ8( j8Ptr[2] );
MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x80000000);
j8Ptr[1] = JMP8(0);
x86SetJ8( j8Ptr[2] );
MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x80000000);
j8Ptr[1] = JMP8(0);
x86SetJ8( j8Ptr[0] );
x86SetJ8( j8Ptr[0] );
MOV32RtoM((uptr)&fpuRegs.fpr[_Fd_], EAX);
MOV32RtoM((uptr)&fpuRegs.fpr[_Fd_], EAX);
x86SetJ8( j8Ptr[1] );
}
#ifndef __x86_64__
else {
MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code);
iFlushCall(FLUSH_EVERYTHING);
_flushConstRegs();
CALLFunc((uptr)CVT_W);
}
#endif
x86SetJ8( j8Ptr[1] );
}
//------------------------------------------------------------------

View File

@ -844,24 +844,17 @@ CPU_SSE_XMMCACHE_END
_flushCachedRegs();
_deleteEEreg(_Rd_, 0);
if( cpucaps.hasStreamingSIMDExtensions ) {
MMX_ALLOC_TEMP4(
MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] );
MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] );
SSE_PMAXSW_MM_to_MM( t0reg, t1reg );
MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] );
MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] );
SSE_PMAXSW_MM_to_MM( t2reg, t3reg);
MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg );
MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t2reg);
SetMMXstate();
)
}
else {
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
CALLFunc( (uptr)PMAXH );
}
MMX_ALLOC_TEMP4(
MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] );
MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] );
SSE_PMAXSW_MM_to_MM( t0reg, t1reg );
MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] );
MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] );
SSE_PMAXSW_MM_to_MM( t2reg, t3reg);
MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg );
MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t2reg);
SetMMXstate();
)
}
////////////////////////////////////////////////////
@ -2001,24 +1994,17 @@ CPU_SSE_XMMCACHE_END
_flushCachedRegs();
_deleteEEreg(_Rd_, 0);
if( cpucaps.hasStreamingSIMDExtensions ) {
MMX_ALLOC_TEMP4(
MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] );
MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] );
MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] );
MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] );
SSE_PMINSW_MM_to_MM( t0reg, t2reg );
SSE_PMINSW_MM_to_MM( t1reg, t3reg );
MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg );
MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg );
SetMMXstate();
)
}
else {
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
CALLFunc( (uptr)PMINH );
}
MMX_ALLOC_TEMP4(
MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] );
MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] );
MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] );
MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] );
SSE_PMINSW_MM_to_MM( t0reg, t2reg );
SSE_PMINSW_MM_to_MM( t1reg, t3reg );
MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg );
MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg );
SetMMXstate();
)
}
////////////////////////////////////////////////////

View File

@ -219,7 +219,6 @@ void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode
#define XMMINFO_WRITEACC 0x400
#define CPU_SSE_XMMCACHE_START(xmminfo) \
if (cpucaps.hasStreamingSIMDExtensions) \
{ \
int info = eeRecompileCodeXMM(xmminfo); \

View File

@ -435,7 +435,7 @@ void rpropMMI1(EEINST* prev, EEINST* pinst);
void rpropMMI2(EEINST* prev, EEINST* pinst);
void rpropMMI3(EEINST* prev, EEINST* pinst);
#define EEINST_REALXMM (cpucaps.hasStreamingSIMDExtensions?EEINST_XMM:0)
#define EEINST_REALXMM EEINST_XMM
//SPECIAL, REGIMM, J, JAL, BEQ, BNE, BLEZ, BGTZ,
//ADDI, ADDIU, SLTI, SLTIU, ANDI, ORI, XORI, LUI,

View File

@ -285,7 +285,7 @@ u8 _eeIsLoadStoreCoIssue(u32 firstcode, u32 secondcode)
case 57: // swc1
case 54: // lqc2
case 62: // sqc2
return (secondcode>>26)==(firstcode>>26)&&cpucaps.hasStreamingSIMDExtensions;
return (secondcode>>26)==(firstcode>>26);
}
return 0;
}
@ -1294,153 +1294,144 @@ int eeRecompileCodeXMM(int xmminfo)
void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo)
{
int mmregs=-1, mmregt=-1, mmregd=-1, mmregacc=-1;
int info = PROCESS_EE_XMM;
if( cpucaps.hasStreamingSIMDExtensions ) {
int info = PROCESS_EE_XMM;
if( xmminfo & XMMINFO_READS ) _addNeededFPtoXMMreg(_Fs_);
if( xmminfo & XMMINFO_READT ) _addNeededFPtoXMMreg(_Ft_);
if( xmminfo & (XMMINFO_WRITED|XMMINFO_READD) ) _addNeededFPtoXMMreg(_Fd_);
if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) _addNeededFPACCtoXMMreg();
if( xmminfo & XMMINFO_READS ) _addNeededFPtoXMMreg(_Fs_);
if( xmminfo & XMMINFO_READT ) _addNeededFPtoXMMreg(_Ft_);
if( xmminfo & (XMMINFO_WRITED|XMMINFO_READD) ) _addNeededFPtoXMMreg(_Fd_);
if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) _addNeededFPACCtoXMMreg();
if( xmminfo & XMMINFO_READT ) {
if( g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE ) mmregt = _checkXMMreg(XMMTYPE_FPREG, _Ft_, MODE_READ);
else mmregt = _allocFPtoXMMreg(-1, _Ft_, MODE_READ);
}
if( xmminfo & XMMINFO_READS ) {
if( ( !(xmminfo & XMMINFO_READT) || (mmregt >= 0) ) && (g_pCurInstInfo->fpuregs[_Fs_] & EEINST_LASTUSE) ) {
mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
}
else mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ);
}
if( mmregs >= 0 ) info |= PROCESS_EE_SETMODES_XMM(mmregs);
if( mmregt >= 0 ) info |= PROCESS_EE_SETMODET_XMM(mmregt);
if( xmminfo & XMMINFO_READD ) {
assert( xmminfo & XMMINFO_WRITED );
mmregd = _allocFPtoXMMreg(-1, _Fd_, MODE_READ);
}
if( xmminfo & XMMINFO_READACC ) {
if( !(xmminfo&XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE) )
mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, MODE_READ);
else mmregacc = _allocFPACCtoXMMreg(-1, MODE_READ);
}
if( xmminfo & XMMINFO_WRITEACC ) {
// check for last used, if so don't alloc a new XMM reg
int readacc = MODE_WRITE|((xmminfo&XMMINFO_READACC)?MODE_READ:0);
mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, readacc);
if( mmregacc < 0 ) {
if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) {
if( FPUINST_ISLIVE(_Ft_) ) {
_freeXMMreg(mmregt);
info &= ~PROCESS_EE_MODEWRITET;
}
_deleteMMXreg(MMX_FPU+XMMFPU_ACC, 2);
xmmregs[mmregt].inuse = 1;
xmmregs[mmregt].reg = 0;
xmmregs[mmregt].mode = readacc;
xmmregs[mmregt].type = XMMTYPE_FPACC;
mmregacc = mmregt;
}
else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) {
if( FPUINST_ISLIVE(_Fs_) ) {
_freeXMMreg(mmregs);
info &= ~PROCESS_EE_MODEWRITES;
}
_deleteMMXreg(MMX_FPU+XMMFPU_ACC, 2);
xmmregs[mmregs].inuse = 1;
xmmregs[mmregs].reg = 0;
xmmregs[mmregs].mode = readacc;
xmmregs[mmregs].type = XMMTYPE_FPACC;
mmregacc = mmregs;
}
else mmregacc = _allocFPACCtoXMMreg(-1, readacc);
}
xmmregs[mmregacc].mode |= MODE_WRITE;
}
else if( xmminfo & XMMINFO_WRITED ) {
// check for last used, if so don't alloc a new XMM reg
int readd = MODE_WRITE|((xmminfo&XMMINFO_READD)?MODE_READ:0);
if( xmminfo&XMMINFO_READD ) mmregd = _allocFPtoXMMreg(-1, _Fd_, readd);
else mmregd = _checkXMMreg(XMMTYPE_FPREG, _Fd_, readd);
if( mmregd < 0 ) {
if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) {
if( FPUINST_ISLIVE(_Ft_) ) {
_freeXMMreg(mmregt);
info &= ~PROCESS_EE_MODEWRITET;
}
_deleteMMXreg(MMX_FPU+_Fd_, 2);
xmmregs[mmregt].inuse = 1;
xmmregs[mmregt].reg = _Fd_;
xmmregs[mmregt].mode = readd;
mmregd = mmregt;
}
else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) {
if( FPUINST_ISLIVE(_Fs_) ) {
_freeXMMreg(mmregs);
info &= ~PROCESS_EE_MODEWRITES;
}
_deleteMMXreg(MMX_FPU+_Fd_, 2);
xmmregs[mmregs].inuse = 1;
xmmregs[mmregs].reg = _Fd_;
xmmregs[mmregs].mode = readd;
mmregd = mmregs;
}
else if( (xmminfo&XMMINFO_READACC) && mmregacc >= 0 && (FPUINST_LASTUSE(XMMFPU_ACC) || !FPUINST_ISLIVE(XMMFPU_ACC)) ) {
if( FPUINST_ISLIVE(XMMFPU_ACC) )
_freeXMMreg(mmregacc);
_deleteMMXreg(MMX_FPU+_Fd_, 2);
xmmregs[mmregacc].inuse = 1;
xmmregs[mmregacc].reg = _Fd_;
xmmregs[mmregacc].mode = readd;
xmmregs[mmregacc].type = XMMTYPE_FPREG;
mmregd = mmregacc;
}
else mmregd = _allocFPtoXMMreg(-1, _Fd_, readd);
}
}
assert( mmregs >= 0 || mmregt >= 0 || mmregd >= 0 || mmregacc >= 0 );
if( xmminfo & XMMINFO_WRITED ) {
assert( mmregd >= 0 );
info |= PROCESS_EE_SET_D(mmregd);
}
if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) {
if( mmregacc >= 0 ) info |= PROCESS_EE_SET_ACC(mmregacc)|PROCESS_EE_ACC;
else assert( !(xmminfo&XMMINFO_WRITEACC));
}
if( xmminfo & XMMINFO_READS ) {
if( mmregs >= 0 ) info |= PROCESS_EE_SET_S(mmregs)|PROCESS_EE_S;
}
if( xmminfo & XMMINFO_READT ) {
if( mmregt >= 0 ) info |= PROCESS_EE_SET_T(mmregt)|PROCESS_EE_T;
}
// at least one must be in xmm
if( (xmminfo & (XMMINFO_READS|XMMINFO_READT)) == (XMMINFO_READS|XMMINFO_READT) ) {
assert( mmregs >= 0 || mmregt >= 0 );
}
xmmcode(info);
_clearNeededXMMregs();
return;
if( xmminfo & XMMINFO_READT ) {
if( g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE ) mmregt = _checkXMMreg(XMMTYPE_FPREG, _Ft_, MODE_READ);
else mmregt = _allocFPtoXMMreg(-1, _Ft_, MODE_READ);
}
MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code);
MOV32ItoM((uptr)&cpuRegs.pc, pc);
iFlushCall(FLUSH_EVERYTHING);
CALLFunc((uptr)fpucode);
if( xmminfo & XMMINFO_READS ) {
if( ( !(xmminfo & XMMINFO_READT) || (mmregt >= 0) ) && (g_pCurInstInfo->fpuregs[_Fs_] & EEINST_LASTUSE) ) {
mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ);
}
else mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ);
}
if( mmregs >= 0 ) info |= PROCESS_EE_SETMODES_XMM(mmregs);
if( mmregt >= 0 ) info |= PROCESS_EE_SETMODET_XMM(mmregt);
if( xmminfo & XMMINFO_READD ) {
assert( xmminfo & XMMINFO_WRITED );
mmregd = _allocFPtoXMMreg(-1, _Fd_, MODE_READ);
}
if( xmminfo & XMMINFO_READACC ) {
if( !(xmminfo&XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE) )
mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, MODE_READ);
else mmregacc = _allocFPACCtoXMMreg(-1, MODE_READ);
}
if( xmminfo & XMMINFO_WRITEACC ) {
// check for last used, if so don't alloc a new XMM reg
int readacc = MODE_WRITE|((xmminfo&XMMINFO_READACC)?MODE_READ:0);
mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, readacc);
if( mmregacc < 0 ) {
if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) {
if( FPUINST_ISLIVE(_Ft_) ) {
_freeXMMreg(mmregt);
info &= ~PROCESS_EE_MODEWRITET;
}
_deleteMMXreg(MMX_FPU+XMMFPU_ACC, 2);
xmmregs[mmregt].inuse = 1;
xmmregs[mmregt].reg = 0;
xmmregs[mmregt].mode = readacc;
xmmregs[mmregt].type = XMMTYPE_FPACC;
mmregacc = mmregt;
}
else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) {
if( FPUINST_ISLIVE(_Fs_) ) {
_freeXMMreg(mmregs);
info &= ~PROCESS_EE_MODEWRITES;
}
_deleteMMXreg(MMX_FPU+XMMFPU_ACC, 2);
xmmregs[mmregs].inuse = 1;
xmmregs[mmregs].reg = 0;
xmmregs[mmregs].mode = readacc;
xmmregs[mmregs].type = XMMTYPE_FPACC;
mmregacc = mmregs;
}
else mmregacc = _allocFPACCtoXMMreg(-1, readacc);
}
xmmregs[mmregacc].mode |= MODE_WRITE;
}
else if( xmminfo & XMMINFO_WRITED ) {
// check for last used, if so don't alloc a new XMM reg
int readd = MODE_WRITE|((xmminfo&XMMINFO_READD)?MODE_READ:0);
if( xmminfo&XMMINFO_READD ) mmregd = _allocFPtoXMMreg(-1, _Fd_, readd);
else mmregd = _checkXMMreg(XMMTYPE_FPREG, _Fd_, readd);
if( mmregd < 0 ) {
if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) {
if( FPUINST_ISLIVE(_Ft_) ) {
_freeXMMreg(mmregt);
info &= ~PROCESS_EE_MODEWRITET;
}
_deleteMMXreg(MMX_FPU+_Fd_, 2);
xmmregs[mmregt].inuse = 1;
xmmregs[mmregt].reg = _Fd_;
xmmregs[mmregt].mode = readd;
mmregd = mmregt;
}
else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) {
if( FPUINST_ISLIVE(_Fs_) ) {
_freeXMMreg(mmregs);
info &= ~PROCESS_EE_MODEWRITES;
}
_deleteMMXreg(MMX_FPU+_Fd_, 2);
xmmregs[mmregs].inuse = 1;
xmmregs[mmregs].reg = _Fd_;
xmmregs[mmregs].mode = readd;
mmregd = mmregs;
}
else if( (xmminfo&XMMINFO_READACC) && mmregacc >= 0 && (FPUINST_LASTUSE(XMMFPU_ACC) || !FPUINST_ISLIVE(XMMFPU_ACC)) ) {
if( FPUINST_ISLIVE(XMMFPU_ACC) )
_freeXMMreg(mmregacc);
_deleteMMXreg(MMX_FPU+_Fd_, 2);
xmmregs[mmregacc].inuse = 1;
xmmregs[mmregacc].reg = _Fd_;
xmmregs[mmregacc].mode = readd;
xmmregs[mmregacc].type = XMMTYPE_FPREG;
mmregd = mmregacc;
}
else mmregd = _allocFPtoXMMreg(-1, _Fd_, readd);
}
}
assert( mmregs >= 0 || mmregt >= 0 || mmregd >= 0 || mmregacc >= 0 );
if( xmminfo & XMMINFO_WRITED ) {
assert( mmregd >= 0 );
info |= PROCESS_EE_SET_D(mmregd);
}
if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) {
if( mmregacc >= 0 ) info |= PROCESS_EE_SET_ACC(mmregacc)|PROCESS_EE_ACC;
else assert( !(xmminfo&XMMINFO_WRITEACC));
}
if( xmminfo & XMMINFO_READS ) {
if( mmregs >= 0 ) info |= PROCESS_EE_SET_S(mmregs)|PROCESS_EE_S;
}
if( xmminfo & XMMINFO_READT ) {
if( mmregt >= 0 ) info |= PROCESS_EE_SET_T(mmregt)|PROCESS_EE_T;
}
// at least one must be in xmm
if( (xmminfo & (XMMINFO_READS|XMMINFO_READT)) == (XMMINFO_READS|XMMINFO_READT) ) {
assert( mmregs >= 0 || mmregt >= 0 );
}
xmmcode(info);
_clearNeededXMMregs();
}
#undef _Ft_
@ -1461,23 +1452,20 @@ void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR)
sseMXCSR &= 0xffff; // clear the upper 16 bits since they shouldn't be set
sseVUMXCSR &= 0xffff;
if( cpucaps.hasStreamingSIMDExtensions ) {
g_sseMXCSR = sseMXCSR;
g_sseVUMXCSR = sseVUMXCSR;
// do NOT set Denormals-Are-Zero flag (charlie and chocfac messes up)
// Update 11/05/08 - Doesnt seem to effect it anymore, for the speed boost, its on :p
//g_sseMXCSR = 0x9f80; // changing the rounding mode to 0x2000 (near) kills grandia III!
// changing the rounding mode to 0x0000 or 0x4000 totally kills gitaroo
// so... grandia III wins (you can change individual games with the 'roundmode' patch command)
g_sseMXCSR = sseMXCSR;
g_sseVUMXCSR = sseVUMXCSR;
// do NOT set Denormals-Are-Zero flag (charlie and chocfac messes up)
// Update 11/05/08 - Doesnt seem to effect it anymore, for the speed boost, its on :p
//g_sseMXCSR = 0x9f80; // changing the rounding mode to 0x2000 (near) kills grandia III!
// changing the rounding mode to 0x0000 or 0x4000 totally kills gitaroo
// so... grandia III wins (you can change individual games with the 'roundmode' patch command)
#ifdef _MSC_VER
__asm ldmxcsr g_sseMXCSR; // set the new sse control
__asm ldmxcsr g_sseMXCSR; // set the new sse control
#else
__asm__("ldmxcsr %0" : : "m"(g_sseMXCSR) );
__asm__("ldmxcsr %0" : : "m"(g_sseMXCSR) );
#endif
//g_sseVUMXCSR = g_sseMXCSR|0x6000;
}
//g_sseVUMXCSR = g_sseMXCSR|0x6000;
}
#define REC_CACHEMEM 0x01000000
@ -1575,6 +1563,11 @@ int recInit( void )
SysMessage( _( "Processor doesn't supports MMX, can't run recompiler without that" ) );
return -1;
}
if ( !( cpucaps.hasStreamingSIMDExtensions ) )
{
SysMessage( _( "Processor doesn't supports SSE, can't run recompiler without that" ) );
return -1;
}
if ( !( cpucaps.hasStreamingSIMD2Extensions ) )
{
SysMessage( _( "Processor doesn't supports SSE2, can't run recompiler without that" ) );
@ -2313,24 +2306,7 @@ void recCOP2( void )
#ifdef CPU_LOG
CPU_LOG( "Recompiling COP2:%s\n", disR5900Fasm( cpuRegs.code, cpuRegs.pc ) );
#endif
if ( !cpucaps.hasStreamingSIMDExtensions ) {
MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code );
MOV32ItoM( (uptr)&cpuRegs.pc, pc );
iFlushCall(FLUSH_EVERYTHING);
g_cpuHasConstReg = 1; // reset all since COP2 can change regs
CALLFunc( (uptr)COP2 );
CMP32ItoM((int)&cpuRegs.pc, pc);
j8Ptr[0] = JE8(0);
ADD32ItoM((u32)&cpuRegs.cycle, s_nBlockCycles);
JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 ));
x86SetJ8(j8Ptr[0]);
}
else
{
recCOP22( );
}
recCOP22( );
}
#endif

View File

@ -1237,7 +1237,7 @@ void recLQ( void )
#ifdef REC_SLOWREAD
_flushConstReg(_Rs_);
#else
if( cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) {
if( GPR_IS_CONST1( _Rs_ ) ) {
// malice hits this
assert( (g_cpuConstRegs[_Rs_].UL[0]+_Imm_) % 16 == 0 );
@ -1278,7 +1278,7 @@ void recLQ( void )
int mmregs;
int t0reg = -1;
if( !cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) )
if( GPR_IS_CONST1( _Rs_ ) )
_flushConstReg(_Rs_);
mmregs = _eePrepareReg(_Rs_);
@ -2100,7 +2100,7 @@ void recStore(int bit, u32 imm, int align)
#ifdef REC_SLOWWRITE
_flushConstReg(_Rs_);
#else
if( cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) {
if( GPR_IS_CONST1( _Rs_ ) ) {
u32 addr = g_cpuConstRegs[_Rs_].UL[0]+imm;
int doclear = 0;
StopPerfCounter();
@ -2190,7 +2190,7 @@ void recStore(int bit, u32 imm, int align)
int dohw;
int mmregs;
if( !cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) {
if( GPR_IS_CONST1( _Rs_ ) ) {
_flushConstReg(_Rs_);
}
@ -3429,7 +3429,7 @@ void recLQC2( void )
#ifdef REC_SLOWREAD
_flushConstReg(_Rs_);
#else
if( cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) {
if( GPR_IS_CONST1( _Rs_ ) ) {
assert( (g_cpuConstRegs[_Rs_].UL[0]+_Imm_) % 16 == 0 );
if( _Ft_ ) mmreg = _allocVFtoXMMreg(&VU0, -1, _Ft_, MODE_WRITE);
@ -3443,7 +3443,7 @@ void recLQC2( void )
{
int dohw, mmregs;
if( !cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) {
if( GPR_IS_CONST1( _Rs_ ) ) {
_flushConstReg(_Rs_);
}
@ -3574,7 +3574,7 @@ void recSQC2( void )
#ifdef REC_SLOWWRITE
_flushConstReg(_Rs_);
#else
if( cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) {
if( GPR_IS_CONST1( _Rs_ ) ) {
assert( (g_cpuConstRegs[_Rs_].UL[0]+_Imm_)%16 == 0 );
mmreg = _allocVFtoXMMreg(&VU0, -1, _Ft_, MODE_READ)|MEM_XMMTAG;
@ -3586,7 +3586,7 @@ void recSQC2( void )
s8* rawreadptr;
int dohw, mmregs;
if( cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) {
if( GPR_IS_CONST1( _Rs_ ) ) {
_flushConstReg(_Rs_);
}

View File

@ -33,7 +33,6 @@ XMMSSEType g_xmmtypes[XMMREGS] = {0};
/********************/
#define SSEMtoR( code, overb ) \
assert( cpucaps.hasStreamingSIMDExtensions ); \
assert( to < XMMREGS ) ; \
RexR(0, to); \
write16( code ); \
@ -41,7 +40,6 @@ XMMSSEType g_xmmtypes[XMMREGS] = {0};
write32( MEMADDR(from, 4 + overb) ); \
#define SSERtoM( code, overb ) \
assert( cpucaps.hasStreamingSIMDExtensions ); \
assert( from < XMMREGS) ; \
RexR(0, from); \
write16( code ); \
@ -49,7 +47,6 @@ XMMSSEType g_xmmtypes[XMMREGS] = {0};
write32( MEMADDR(to, 4 + overb) ); \
#define SSE_SS_MtoR( code, overb ) \
assert( cpucaps.hasStreamingSIMDExtensions ); \
assert( to < XMMREGS ) ; \
write8( 0xf3 ); \
RexR(0, to); \
@ -58,7 +55,6 @@ XMMSSEType g_xmmtypes[XMMREGS] = {0};
write32( MEMADDR(from, 4 + overb) ); \
#define SSE_SS_RtoM( code, overb ) \
assert( cpucaps.hasStreamingSIMDExtensions ); \
assert( from < XMMREGS) ; \
write8( 0xf3 ); \
RexR(0, from); \
@ -67,7 +63,6 @@ XMMSSEType g_xmmtypes[XMMREGS] = {0};
write32( MEMADDR(to, 4 + overb) ); \
#define SSERtoR( code ) \
assert( cpucaps.hasStreamingSIMDExtensions ); \
assert( to < XMMREGS && from < XMMREGS) ; \
RexRB(0, to, from); \
write16( code ); \
@ -86,7 +81,6 @@ XMMSSEType g_xmmtypes[XMMREGS] = {0};
SSERtoR( code );
#define _SSERtoR66( code ) \
assert( cpucaps.hasStreamingSIMDExtensions ); \
assert( to < XMMREGS && from < XMMREGS) ; \
write8( 0x66 ); \
RexRB(0, from, to); \
@ -94,7 +88,6 @@ XMMSSEType g_xmmtypes[XMMREGS] = {0};
ModRM( 3, from, to );
#define SSE_SS_RtoR( code ) \
assert( cpucaps.hasStreamingSIMDExtensions ); \
assert( to < XMMREGS && from < XMMREGS) ; \
write8( 0xf3 ); \
RexRB(0, to, from); \
@ -125,7 +118,6 @@ void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset);
/* movups [r32][r32*scale] to xmm1 */
_inline void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
{
assert( cpucaps.hasStreamingSIMDExtensions );
RexRXB(0, to, from2, from);
write16( 0x100f );
ModRM( 0, to, 0x4 );
@ -135,7 +127,6 @@ _inline void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegTy
/* movups xmm1 to [r32][r32*scale] */
_inline void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
{
assert( cpucaps.hasStreamingSIMDExtensions );
RexRXB(1, to, from2, from);
write16( 0x110f );
ModRM( 0, to, 0x4 );
@ -145,7 +136,6 @@ _inline void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegTy
/* movups [r32] to r32 */
_inline void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from )
{
assert( cpucaps.hasStreamingSIMDExtensions );
RexRB(0, to, from);
write16( 0x100f );
ModRM( 0, to, from );
@ -154,7 +144,6 @@ _inline void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from )
/* movups r32 to [r32] */
_inline void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from )
{
assert( cpucaps.hasStreamingSIMDExtensions );
RexRB(0, from, to);
write16( 0x110f );
ModRM( 0, from, to );
@ -163,7 +152,6 @@ _inline void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from )
/* movlps [r32] to r32 */
_inline void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from )
{
assert( cpucaps.hasStreamingSIMDExtensions );
RexRB(1, to, from);
write16( 0x120f );
ModRM( 0, to, from );
@ -171,7 +159,6 @@ _inline void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from )
_inline void SSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
{
assert( cpucaps.hasStreamingSIMDExtensions );
RexRB(0, to, from);
write16( 0x120f );
WriteRmOffsetFrom(to, from, offset);
@ -180,7 +167,6 @@ _inline void SSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int of
/* movaps r32 to [r32] */
_inline void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from )
{
assert( cpucaps.hasStreamingSIMDExtensions );
RexRB(0, from, to);
write16( 0x130f );
ModRM( 0, from, to );
@ -188,7 +174,6 @@ _inline void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from )
_inline void SSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset )
{
assert( cpucaps.hasStreamingSIMDExtensions );
RexRB(0, from, to);
write16( 0x130f );
WriteRmOffsetFrom(from, to, offset);
@ -197,7 +182,7 @@ _inline void SSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int of
/* movaps [r32][r32*scale] to xmm1 */
_inline void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
{
assert( cpucaps.hasStreamingSIMDExtensions && from != EBP );
assert( from != EBP );
RexRXB(0, to, from2, from);
write16( 0x280f );
ModRM( 0, to, 0x4 );
@ -207,7 +192,7 @@ _inline void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegTy
/* movaps xmm1 to [r32][r32*scale] */
_inline void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale )
{
assert( cpucaps.hasStreamingSIMDExtensions && from != EBP );
assert( from != EBP );
RexRXB(0, to, from2, from);
write16( 0x290f );
ModRM( 0, to, 0x4 );
@ -217,7 +202,6 @@ _inline void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegTy
// movaps [r32+offset] to r32
_inline void SSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
{
assert( cpucaps.hasStreamingSIMDExtensions );
RexRB(0, to, from);
write16( 0x280f );
WriteRmOffsetFrom(to, from, offset);
@ -226,7 +210,6 @@ _inline void SSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int of
// movaps r32 to [r32+offset]
_inline void SSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
{
assert( cpucaps.hasStreamingSIMDExtensions );
RexRB(0, from, to);
write16( 0x290f );
WriteRmOffsetFrom(from, to, offset);
@ -235,7 +218,6 @@ _inline void SSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int of
// movdqa [r32+offset] to r32
_inline void SSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset )
{
assert( cpucaps.hasStreamingSIMDExtensions );
write8(0x66);
RexRB(0, to, from);
write16( 0x6f0f );
@ -245,7 +227,6 @@ _inline void SSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int o
// movdqa r32 to [r32+offset]
_inline void SSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset )
{
assert( cpucaps.hasStreamingSIMDExtensions );
write8(0x66);
RexRB(0, from, to);
write16( 0x7f0f );
@ -263,7 +244,6 @@ _inline void SSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int of
// movups r32 to [r32+offset]
_inline void SSE_MOVUPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset )
{
assert( cpucaps.hasStreamingSIMDExtensions );
RexRB(0, from, to);
write16( 0x110f );
WriteRmOffsetFrom(from, to, offset);
@ -353,7 +333,6 @@ _inline void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSER
_inline void SSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
{
assert( cpucaps.hasStreamingSIMDExtensions );
RexRB(0, to, from);
write16( 0x120f );
WriteRmOffsetFrom(to, from, offset);
@ -375,7 +354,6 @@ _inline void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSER
_inline void SSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset )
{
assert( cpucaps.hasStreamingSIMDExtensions );
RexRB(0, to, from);
write16( 0x160f );
WriteRmOffsetFrom(to, from, offset);
@ -383,7 +361,6 @@ _inline void SSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, i
_inline void SSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset )
{
assert( cpucaps.hasStreamingSIMDExtensions );
RexRB(0, from, to);
write16(0x170f);
WriteRmOffsetFrom(from, to, offset);