From 8dc5441547c9baf426f3d9d916a3cdf3875548b1 Mon Sep 17 00:00:00 2001 From: ramapcsx2 Date: Mon, 3 Nov 2008 15:04:11 +0000 Subject: [PATCH] Also removed SSE1 checks, see rev281 for explanation :p git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@282 a6443dda-0b58-4228-96e9-037be469359c --- pcsx2/Memory.c | 1 - pcsx2/R5900.c | 6 - pcsx2/x86/iCore.cpp | 9 +- pcsx2/x86/iFPU.c | 75 +++---- pcsx2/x86/iMMI.c | 58 ++--- pcsx2/x86/iR5900.h | 1 - pcsx2/x86/ir5900tables.c | 2 +- pcsx2/x86/ix86-32/iR5900-32.c | 326 +++++++++++++--------------- pcsx2/x86/ix86-32/iR5900LoadStore.c | 16 +- pcsx2/x86/ix86/ix86_sse.c | 27 +-- 10 files changed, 218 insertions(+), 303 deletions(-) diff --git a/pcsx2/Memory.c b/pcsx2/Memory.c index 5c38f7cde3..1d04e77471 100644 --- a/pcsx2/Memory.c +++ b/pcsx2/Memory.c @@ -828,7 +828,6 @@ void _eeWriteConstMem64(u32 mem, int mmreg) void _eeWriteConstMem128(u32 mem, int mmreg) { - assert( cpucaps.hasStreamingSIMDExtensions ); if( IS_MMXREG(mmreg) ) { SetMMXstate(); MOVQRtoM(mem, mmreg&0xf); diff --git a/pcsx2/R5900.c b/pcsx2/R5900.c index 9286dd51b9..1a1b71aa3e 100644 --- a/pcsx2/R5900.c +++ b/pcsx2/R5900.c @@ -532,12 +532,6 @@ void cpuExecuteBios() if( CHECK_EEREC ) Config.Options |= PCSX2_COP2REC; else Config.Options &= ~PCSX2_COP2REC; -#ifndef PCSX2_NORECBUILD - if( !cpucaps.hasStreamingSIMDExtensions ) { - Config.Options &= ~(PCSX2_VU1REC|PCSX2_VU0REC); - } -#endif - // remove frame skipping if GS doesn't support it switch(CHECK_FRAMELIMIT) { case PCSX2_FRAMELIMIT_SKIP: diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index 458b3e1204..265bce9551 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -126,7 +126,6 @@ int _getFreeXMMreg() { int i, tempi; u32 bestcount = 0x10000; - if( !cpucaps.hasStreamingSIMDExtensions ) return -1; for (i=0; i= 0 ) { - t0reg = _allocTempXMMreg(XMMT_FPS, -1); - _freeXMMreg(t0reg); - SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&s_signbit); - SSE_CVTTSS2SI_XMM_to_R32(EAX, regs); - SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[ _Fs_ ], regs); - } - else SSE_CVTTSS2SI_M32_to_R32(EAX, (uptr)&fpuRegs.fpr[ _Fs_ ]); - _deleteFPtoXMMreg(_Fd_, 2); + if( regs >= 0 ) { + t0reg = _allocTempXMMreg(XMMT_FPS, -1); + _freeXMMreg(t0reg); + SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&s_signbit); + SSE_CVTTSS2SI_XMM_to_R32(EAX, regs); + SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[ _Fs_ ], regs); + } + else SSE_CVTTSS2SI_M32_to_R32(EAX, (uptr)&fpuRegs.fpr[ _Fs_ ]); + + _deleteFPtoXMMreg(_Fd_, 2); - MOV32MtoR(ECX, (uptr)&fpuRegs.fpr[ _Fs_ ]); - AND32ItoR(ECX, 0x7f800000); - CMP32ItoR(ECX, 0x4E800000); - j8Ptr[0] = JLE8(0); + MOV32MtoR(ECX, (uptr)&fpuRegs.fpr[ _Fs_ ]); + AND32ItoR(ECX, 0x7f800000); + CMP32ItoR(ECX, 0x4E800000); + j8Ptr[0] = JLE8(0); - // need to detect if reg is positive - /*if( regs >= 0 ) { - SSE_UCOMISS_XMM_to_XMM(regs, t0reg); - j8Ptr[2] = JB8(0); - } - else {*/ - TEST32ItoM((uptr)&fpuRegs.fpr[ _Fs_ ], 0x80000000); - j8Ptr[2] = JNZ8(0); - //} + // need to detect if reg is positive + /*if( regs >= 0 ) { + SSE_UCOMISS_XMM_to_XMM(regs, t0reg); + j8Ptr[2] = JB8(0); + } + else {*/ + TEST32ItoM((uptr)&fpuRegs.fpr[ _Fs_ ], 0x80000000); + j8Ptr[2] = JNZ8(0); + //} - MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x7fffffff); - j8Ptr[1] = JMP8(0); + MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x7fffffff); + j8Ptr[1] = JMP8(0); - x86SetJ8( j8Ptr[2] ); - MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x80000000); - j8Ptr[1] = JMP8(0); + x86SetJ8( j8Ptr[2] ); + MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x80000000); + j8Ptr[1] = JMP8(0); - x86SetJ8( j8Ptr[0] ); + x86SetJ8( j8Ptr[0] ); - MOV32RtoM((uptr)&fpuRegs.fpr[_Fd_], EAX); + MOV32RtoM((uptr)&fpuRegs.fpr[_Fd_], EAX); - x86SetJ8( j8Ptr[1] ); - } -#ifndef __x86_64__ - else { - MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); - iFlushCall(FLUSH_EVERYTHING); - _flushConstRegs(); - CALLFunc((uptr)CVT_W); - } -#endif + x86SetJ8( j8Ptr[1] ); } //------------------------------------------------------------------ diff --git a/pcsx2/x86/iMMI.c b/pcsx2/x86/iMMI.c index 61142a15cd..e2bb4543e2 100644 --- a/pcsx2/x86/iMMI.c +++ b/pcsx2/x86/iMMI.c @@ -844,24 +844,17 @@ CPU_SSE_XMMCACHE_END _flushCachedRegs(); _deleteEEreg(_Rd_, 0); - if( cpucaps.hasStreamingSIMDExtensions ) { - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - SSE_PMAXSW_MM_to_MM( t0reg, t1reg ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - SSE_PMAXSW_MM_to_MM( t2reg, t3reg); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t2reg); - SetMMXstate(); - ) - } - else { - MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (uptr)&cpuRegs.pc, pc ); - CALLFunc( (uptr)PMAXH ); - } + MMX_ALLOC_TEMP4( + MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); + MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); + SSE_PMAXSW_MM_to_MM( t0reg, t1reg ); + MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); + MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); + SSE_PMAXSW_MM_to_MM( t2reg, t3reg); + MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); + MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t2reg); + SetMMXstate(); + ) } //////////////////////////////////////////////////// @@ -2001,24 +1994,17 @@ CPU_SSE_XMMCACHE_END _flushCachedRegs(); _deleteEEreg(_Rd_, 0); - if( cpucaps.hasStreamingSIMDExtensions ) { - MMX_ALLOC_TEMP4( - MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); - MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); - MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); - MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); - SSE_PMINSW_MM_to_MM( t0reg, t2reg ); - SSE_PMINSW_MM_to_MM( t1reg, t3reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); - MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); - SetMMXstate(); - ) - } - else { - MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (uptr)&cpuRegs.pc, pc ); - CALLFunc( (uptr)PMINH ); - } + MMX_ALLOC_TEMP4( + MOVQMtoR( t0reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 0 ] ); + MOVQMtoR( t1reg, (uptr)&cpuRegs.GPR.r[ _Rs_ ].UD[ 1 ] ); + MOVQMtoR( t2reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 0 ] ); + MOVQMtoR( t3reg, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UD[ 1 ] ); + SSE_PMINSW_MM_to_MM( t0reg, t2reg ); + SSE_PMINSW_MM_to_MM( t1reg, t3reg ); + MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 0 ], t0reg ); + MOVQRtoM( (uptr)&cpuRegs.GPR.r[ _Rd_ ].UD[ 1 ], t1reg ); + SetMMXstate(); + ) } //////////////////////////////////////////////////// diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 2851e6342c..7583243cd0 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -219,7 +219,6 @@ void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode #define XMMINFO_WRITEACC 0x400 #define CPU_SSE_XMMCACHE_START(xmminfo) \ - if (cpucaps.hasStreamingSIMDExtensions) \ { \ int info = eeRecompileCodeXMM(xmminfo); \ diff --git a/pcsx2/x86/ir5900tables.c b/pcsx2/x86/ir5900tables.c index 9557bcdfe2..60fa2394da 100644 --- a/pcsx2/x86/ir5900tables.c +++ b/pcsx2/x86/ir5900tables.c @@ -435,7 +435,7 @@ void rpropMMI1(EEINST* prev, EEINST* pinst); void rpropMMI2(EEINST* prev, EEINST* pinst); void rpropMMI3(EEINST* prev, EEINST* pinst); -#define EEINST_REALXMM (cpucaps.hasStreamingSIMDExtensions?EEINST_XMM:0) +#define EEINST_REALXMM EEINST_XMM //SPECIAL, REGIMM, J, JAL, BEQ, BNE, BLEZ, BGTZ, //ADDI, ADDIU, SLTI, SLTIU, ANDI, ORI, XORI, LUI, diff --git a/pcsx2/x86/ix86-32/iR5900-32.c b/pcsx2/x86/ix86-32/iR5900-32.c index 1cc85864cc..fd7b154a43 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.c +++ b/pcsx2/x86/ix86-32/iR5900-32.c @@ -285,7 +285,7 @@ u8 _eeIsLoadStoreCoIssue(u32 firstcode, u32 secondcode) case 57: // swc1 case 54: // lqc2 case 62: // sqc2 - return (secondcode>>26)==(firstcode>>26)&&cpucaps.hasStreamingSIMDExtensions; + return (secondcode>>26)==(firstcode>>26); } return 0; } @@ -1294,153 +1294,144 @@ int eeRecompileCodeXMM(int xmminfo) void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo) { int mmregs=-1, mmregt=-1, mmregd=-1, mmregacc=-1; + int info = PROCESS_EE_XMM; - if( cpucaps.hasStreamingSIMDExtensions ) { - int info = PROCESS_EE_XMM; + if( xmminfo & XMMINFO_READS ) _addNeededFPtoXMMreg(_Fs_); + if( xmminfo & XMMINFO_READT ) _addNeededFPtoXMMreg(_Ft_); + if( xmminfo & (XMMINFO_WRITED|XMMINFO_READD) ) _addNeededFPtoXMMreg(_Fd_); + if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) _addNeededFPACCtoXMMreg(); - if( xmminfo & XMMINFO_READS ) _addNeededFPtoXMMreg(_Fs_); - if( xmminfo & XMMINFO_READT ) _addNeededFPtoXMMreg(_Ft_); - if( xmminfo & (XMMINFO_WRITED|XMMINFO_READD) ) _addNeededFPtoXMMreg(_Fd_); - if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) _addNeededFPACCtoXMMreg(); - - if( xmminfo & XMMINFO_READT ) { - if( g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE ) mmregt = _checkXMMreg(XMMTYPE_FPREG, _Ft_, MODE_READ); - else mmregt = _allocFPtoXMMreg(-1, _Ft_, MODE_READ); - } - - if( xmminfo & XMMINFO_READS ) { - if( ( !(xmminfo & XMMINFO_READT) || (mmregt >= 0) ) && (g_pCurInstInfo->fpuregs[_Fs_] & EEINST_LASTUSE) ) { - mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); - } - else mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ); - } - - if( mmregs >= 0 ) info |= PROCESS_EE_SETMODES_XMM(mmregs); - if( mmregt >= 0 ) info |= PROCESS_EE_SETMODET_XMM(mmregt); - - if( xmminfo & XMMINFO_READD ) { - assert( xmminfo & XMMINFO_WRITED ); - mmregd = _allocFPtoXMMreg(-1, _Fd_, MODE_READ); - } - - if( xmminfo & XMMINFO_READACC ) { - if( !(xmminfo&XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE) ) - mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, MODE_READ); - else mmregacc = _allocFPACCtoXMMreg(-1, MODE_READ); - } - - if( xmminfo & XMMINFO_WRITEACC ) { - - // check for last used, if so don't alloc a new XMM reg - int readacc = MODE_WRITE|((xmminfo&XMMINFO_READACC)?MODE_READ:0); - - mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, readacc); - - if( mmregacc < 0 ) { - if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) { - if( FPUINST_ISLIVE(_Ft_) ) { - _freeXMMreg(mmregt); - info &= ~PROCESS_EE_MODEWRITET; - } - _deleteMMXreg(MMX_FPU+XMMFPU_ACC, 2); - xmmregs[mmregt].inuse = 1; - xmmregs[mmregt].reg = 0; - xmmregs[mmregt].mode = readacc; - xmmregs[mmregt].type = XMMTYPE_FPACC; - mmregacc = mmregt; - } - else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) { - if( FPUINST_ISLIVE(_Fs_) ) { - _freeXMMreg(mmregs); - info &= ~PROCESS_EE_MODEWRITES; - } - _deleteMMXreg(MMX_FPU+XMMFPU_ACC, 2); - xmmregs[mmregs].inuse = 1; - xmmregs[mmregs].reg = 0; - xmmregs[mmregs].mode = readacc; - xmmregs[mmregs].type = XMMTYPE_FPACC; - mmregacc = mmregs; - } - else mmregacc = _allocFPACCtoXMMreg(-1, readacc); - } - - xmmregs[mmregacc].mode |= MODE_WRITE; - } - else if( xmminfo & XMMINFO_WRITED ) { - // check for last used, if so don't alloc a new XMM reg - int readd = MODE_WRITE|((xmminfo&XMMINFO_READD)?MODE_READ:0); - if( xmminfo&XMMINFO_READD ) mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); - else mmregd = _checkXMMreg(XMMTYPE_FPREG, _Fd_, readd); - - if( mmregd < 0 ) { - if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) { - if( FPUINST_ISLIVE(_Ft_) ) { - _freeXMMreg(mmregt); - info &= ~PROCESS_EE_MODEWRITET; - } - _deleteMMXreg(MMX_FPU+_Fd_, 2); - xmmregs[mmregt].inuse = 1; - xmmregs[mmregt].reg = _Fd_; - xmmregs[mmregt].mode = readd; - mmregd = mmregt; - } - else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) { - if( FPUINST_ISLIVE(_Fs_) ) { - _freeXMMreg(mmregs); - info &= ~PROCESS_EE_MODEWRITES; - } - _deleteMMXreg(MMX_FPU+_Fd_, 2); - xmmregs[mmregs].inuse = 1; - xmmregs[mmregs].reg = _Fd_; - xmmregs[mmregs].mode = readd; - mmregd = mmregs; - } - else if( (xmminfo&XMMINFO_READACC) && mmregacc >= 0 && (FPUINST_LASTUSE(XMMFPU_ACC) || !FPUINST_ISLIVE(XMMFPU_ACC)) ) { - if( FPUINST_ISLIVE(XMMFPU_ACC) ) - _freeXMMreg(mmregacc); - _deleteMMXreg(MMX_FPU+_Fd_, 2); - xmmregs[mmregacc].inuse = 1; - xmmregs[mmregacc].reg = _Fd_; - xmmregs[mmregacc].mode = readd; - xmmregs[mmregacc].type = XMMTYPE_FPREG; - mmregd = mmregacc; - } - else mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); - } - } - - assert( mmregs >= 0 || mmregt >= 0 || mmregd >= 0 || mmregacc >= 0 ); - - if( xmminfo & XMMINFO_WRITED ) { - assert( mmregd >= 0 ); - info |= PROCESS_EE_SET_D(mmregd); - } - if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) { - if( mmregacc >= 0 ) info |= PROCESS_EE_SET_ACC(mmregacc)|PROCESS_EE_ACC; - else assert( !(xmminfo&XMMINFO_WRITEACC)); - } - - if( xmminfo & XMMINFO_READS ) { - if( mmregs >= 0 ) info |= PROCESS_EE_SET_S(mmregs)|PROCESS_EE_S; - } - if( xmminfo & XMMINFO_READT ) { - if( mmregt >= 0 ) info |= PROCESS_EE_SET_T(mmregt)|PROCESS_EE_T; - } - - // at least one must be in xmm - if( (xmminfo & (XMMINFO_READS|XMMINFO_READT)) == (XMMINFO_READS|XMMINFO_READT) ) { - assert( mmregs >= 0 || mmregt >= 0 ); - } - - xmmcode(info); - _clearNeededXMMregs(); - return; + if( xmminfo & XMMINFO_READT ) { + if( g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE ) mmregt = _checkXMMreg(XMMTYPE_FPREG, _Ft_, MODE_READ); + else mmregt = _allocFPtoXMMreg(-1, _Ft_, MODE_READ); } - MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); - MOV32ItoM((uptr)&cpuRegs.pc, pc); - iFlushCall(FLUSH_EVERYTHING); - CALLFunc((uptr)fpucode); + if( xmminfo & XMMINFO_READS ) { + if( ( !(xmminfo & XMMINFO_READT) || (mmregt >= 0) ) && (g_pCurInstInfo->fpuregs[_Fs_] & EEINST_LASTUSE) ) { + mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); + } + else mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ); + } + + if( mmregs >= 0 ) info |= PROCESS_EE_SETMODES_XMM(mmregs); + if( mmregt >= 0 ) info |= PROCESS_EE_SETMODET_XMM(mmregt); + + if( xmminfo & XMMINFO_READD ) { + assert( xmminfo & XMMINFO_WRITED ); + mmregd = _allocFPtoXMMreg(-1, _Fd_, MODE_READ); + } + + if( xmminfo & XMMINFO_READACC ) { + if( !(xmminfo&XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE) ) + mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, MODE_READ); + else mmregacc = _allocFPACCtoXMMreg(-1, MODE_READ); + } + + if( xmminfo & XMMINFO_WRITEACC ) { + + // check for last used, if so don't alloc a new XMM reg + int readacc = MODE_WRITE|((xmminfo&XMMINFO_READACC)?MODE_READ:0); + + mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, readacc); + + if( mmregacc < 0 ) { + if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) { + if( FPUINST_ISLIVE(_Ft_) ) { + _freeXMMreg(mmregt); + info &= ~PROCESS_EE_MODEWRITET; + } + _deleteMMXreg(MMX_FPU+XMMFPU_ACC, 2); + xmmregs[mmregt].inuse = 1; + xmmregs[mmregt].reg = 0; + xmmregs[mmregt].mode = readacc; + xmmregs[mmregt].type = XMMTYPE_FPACC; + mmregacc = mmregt; + } + else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) { + if( FPUINST_ISLIVE(_Fs_) ) { + _freeXMMreg(mmregs); + info &= ~PROCESS_EE_MODEWRITES; + } + _deleteMMXreg(MMX_FPU+XMMFPU_ACC, 2); + xmmregs[mmregs].inuse = 1; + xmmregs[mmregs].reg = 0; + xmmregs[mmregs].mode = readacc; + xmmregs[mmregs].type = XMMTYPE_FPACC; + mmregacc = mmregs; + } + else mmregacc = _allocFPACCtoXMMreg(-1, readacc); + } + + xmmregs[mmregacc].mode |= MODE_WRITE; + } + else if( xmminfo & XMMINFO_WRITED ) { + // check for last used, if so don't alloc a new XMM reg + int readd = MODE_WRITE|((xmminfo&XMMINFO_READD)?MODE_READ:0); + if( xmminfo&XMMINFO_READD ) mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); + else mmregd = _checkXMMreg(XMMTYPE_FPREG, _Fd_, readd); + + if( mmregd < 0 ) { + if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) { + if( FPUINST_ISLIVE(_Ft_) ) { + _freeXMMreg(mmregt); + info &= ~PROCESS_EE_MODEWRITET; + } + _deleteMMXreg(MMX_FPU+_Fd_, 2); + xmmregs[mmregt].inuse = 1; + xmmregs[mmregt].reg = _Fd_; + xmmregs[mmregt].mode = readd; + mmregd = mmregt; + } + else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) { + if( FPUINST_ISLIVE(_Fs_) ) { + _freeXMMreg(mmregs); + info &= ~PROCESS_EE_MODEWRITES; + } + _deleteMMXreg(MMX_FPU+_Fd_, 2); + xmmregs[mmregs].inuse = 1; + xmmregs[mmregs].reg = _Fd_; + xmmregs[mmregs].mode = readd; + mmregd = mmregs; + } + else if( (xmminfo&XMMINFO_READACC) && mmregacc >= 0 && (FPUINST_LASTUSE(XMMFPU_ACC) || !FPUINST_ISLIVE(XMMFPU_ACC)) ) { + if( FPUINST_ISLIVE(XMMFPU_ACC) ) + _freeXMMreg(mmregacc); + _deleteMMXreg(MMX_FPU+_Fd_, 2); + xmmregs[mmregacc].inuse = 1; + xmmregs[mmregacc].reg = _Fd_; + xmmregs[mmregacc].mode = readd; + xmmregs[mmregacc].type = XMMTYPE_FPREG; + mmregd = mmregacc; + } + else mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); + } + } + + assert( mmregs >= 0 || mmregt >= 0 || mmregd >= 0 || mmregacc >= 0 ); + + if( xmminfo & XMMINFO_WRITED ) { + assert( mmregd >= 0 ); + info |= PROCESS_EE_SET_D(mmregd); + } + if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) { + if( mmregacc >= 0 ) info |= PROCESS_EE_SET_ACC(mmregacc)|PROCESS_EE_ACC; + else assert( !(xmminfo&XMMINFO_WRITEACC)); + } + + if( xmminfo & XMMINFO_READS ) { + if( mmregs >= 0 ) info |= PROCESS_EE_SET_S(mmregs)|PROCESS_EE_S; + } + if( xmminfo & XMMINFO_READT ) { + if( mmregt >= 0 ) info |= PROCESS_EE_SET_T(mmregt)|PROCESS_EE_T; + } + + // at least one must be in xmm + if( (xmminfo & (XMMINFO_READS|XMMINFO_READT)) == (XMMINFO_READS|XMMINFO_READT) ) { + assert( mmregs >= 0 || mmregt >= 0 ); + } + + xmmcode(info); + _clearNeededXMMregs(); } #undef _Ft_ @@ -1461,23 +1452,20 @@ void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR) sseMXCSR &= 0xffff; // clear the upper 16 bits since they shouldn't be set sseVUMXCSR &= 0xffff; - if( cpucaps.hasStreamingSIMDExtensions ) { - - g_sseMXCSR = sseMXCSR; - g_sseVUMXCSR = sseVUMXCSR; - // do NOT set Denormals-Are-Zero flag (charlie and chocfac messes up) - // Update 11/05/08 - Doesnt seem to effect it anymore, for the speed boost, its on :p - //g_sseMXCSR = 0x9f80; // changing the rounding mode to 0x2000 (near) kills grandia III! - // changing the rounding mode to 0x0000 or 0x4000 totally kills gitaroo - // so... grandia III wins (you can change individual games with the 'roundmode' patch command) + g_sseMXCSR = sseMXCSR; + g_sseVUMXCSR = sseVUMXCSR; + // do NOT set Denormals-Are-Zero flag (charlie and chocfac messes up) + // Update 11/05/08 - Doesnt seem to effect it anymore, for the speed boost, its on :p + //g_sseMXCSR = 0x9f80; // changing the rounding mode to 0x2000 (near) kills grandia III! + // changing the rounding mode to 0x0000 or 0x4000 totally kills gitaroo + // so... grandia III wins (you can change individual games with the 'roundmode' patch command) #ifdef _MSC_VER - __asm ldmxcsr g_sseMXCSR; // set the new sse control + __asm ldmxcsr g_sseMXCSR; // set the new sse control #else - __asm__("ldmxcsr %0" : : "m"(g_sseMXCSR) ); + __asm__("ldmxcsr %0" : : "m"(g_sseMXCSR) ); #endif - //g_sseVUMXCSR = g_sseMXCSR|0x6000; - } + //g_sseVUMXCSR = g_sseMXCSR|0x6000; } #define REC_CACHEMEM 0x01000000 @@ -1575,6 +1563,11 @@ int recInit( void ) SysMessage( _( "Processor doesn't supports MMX, can't run recompiler without that" ) ); return -1; } + if ( !( cpucaps.hasStreamingSIMDExtensions ) ) + { + SysMessage( _( "Processor doesn't supports SSE, can't run recompiler without that" ) ); + return -1; + } if ( !( cpucaps.hasStreamingSIMD2Extensions ) ) { SysMessage( _( "Processor doesn't supports SSE2, can't run recompiler without that" ) ); @@ -2313,24 +2306,7 @@ void recCOP2( void ) #ifdef CPU_LOG CPU_LOG( "Recompiling COP2:%s\n", disR5900Fasm( cpuRegs.code, cpuRegs.pc ) ); #endif - - if ( !cpucaps.hasStreamingSIMDExtensions ) { - MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (uptr)&cpuRegs.pc, pc ); - iFlushCall(FLUSH_EVERYTHING); - g_cpuHasConstReg = 1; // reset all since COP2 can change regs - CALLFunc( (uptr)COP2 ); - - CMP32ItoM((int)&cpuRegs.pc, pc); - j8Ptr[0] = JE8(0); - ADD32ItoM((u32)&cpuRegs.cycle, s_nBlockCycles); - JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 )); - x86SetJ8(j8Ptr[0]); - } - else - { - recCOP22( ); - } + recCOP22( ); } #endif diff --git a/pcsx2/x86/ix86-32/iR5900LoadStore.c b/pcsx2/x86/ix86-32/iR5900LoadStore.c index 2f3c522dd0..925e4a4c9a 100644 --- a/pcsx2/x86/ix86-32/iR5900LoadStore.c +++ b/pcsx2/x86/ix86-32/iR5900LoadStore.c @@ -1237,7 +1237,7 @@ void recLQ( void ) #ifdef REC_SLOWREAD _flushConstReg(_Rs_); #else - if( cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) { + if( GPR_IS_CONST1( _Rs_ ) ) { // malice hits this assert( (g_cpuConstRegs[_Rs_].UL[0]+_Imm_) % 16 == 0 ); @@ -1278,7 +1278,7 @@ void recLQ( void ) int mmregs; int t0reg = -1; - if( !cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) + if( GPR_IS_CONST1( _Rs_ ) ) _flushConstReg(_Rs_); mmregs = _eePrepareReg(_Rs_); @@ -2100,7 +2100,7 @@ void recStore(int bit, u32 imm, int align) #ifdef REC_SLOWWRITE _flushConstReg(_Rs_); #else - if( cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) { + if( GPR_IS_CONST1( _Rs_ ) ) { u32 addr = g_cpuConstRegs[_Rs_].UL[0]+imm; int doclear = 0; StopPerfCounter(); @@ -2190,7 +2190,7 @@ void recStore(int bit, u32 imm, int align) int dohw; int mmregs; - if( !cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) { + if( GPR_IS_CONST1( _Rs_ ) ) { _flushConstReg(_Rs_); } @@ -3429,7 +3429,7 @@ void recLQC2( void ) #ifdef REC_SLOWREAD _flushConstReg(_Rs_); #else - if( cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) { + if( GPR_IS_CONST1( _Rs_ ) ) { assert( (g_cpuConstRegs[_Rs_].UL[0]+_Imm_) % 16 == 0 ); if( _Ft_ ) mmreg = _allocVFtoXMMreg(&VU0, -1, _Ft_, MODE_WRITE); @@ -3443,7 +3443,7 @@ void recLQC2( void ) { int dohw, mmregs; - if( !cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) { + if( GPR_IS_CONST1( _Rs_ ) ) { _flushConstReg(_Rs_); } @@ -3574,7 +3574,7 @@ void recSQC2( void ) #ifdef REC_SLOWWRITE _flushConstReg(_Rs_); #else - if( cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) { + if( GPR_IS_CONST1( _Rs_ ) ) { assert( (g_cpuConstRegs[_Rs_].UL[0]+_Imm_)%16 == 0 ); mmreg = _allocVFtoXMMreg(&VU0, -1, _Ft_, MODE_READ)|MEM_XMMTAG; @@ -3586,7 +3586,7 @@ void recSQC2( void ) s8* rawreadptr; int dohw, mmregs; - if( cpucaps.hasStreamingSIMDExtensions && GPR_IS_CONST1( _Rs_ ) ) { + if( GPR_IS_CONST1( _Rs_ ) ) { _flushConstReg(_Rs_); } diff --git a/pcsx2/x86/ix86/ix86_sse.c b/pcsx2/x86/ix86/ix86_sse.c index 864c974d22..5722a464f9 100644 --- a/pcsx2/x86/ix86/ix86_sse.c +++ b/pcsx2/x86/ix86/ix86_sse.c @@ -33,7 +33,6 @@ XMMSSEType g_xmmtypes[XMMREGS] = {0}; /********************/ #define SSEMtoR( code, overb ) \ - assert( cpucaps.hasStreamingSIMDExtensions ); \ assert( to < XMMREGS ) ; \ RexR(0, to); \ write16( code ); \ @@ -41,7 +40,6 @@ XMMSSEType g_xmmtypes[XMMREGS] = {0}; write32( MEMADDR(from, 4 + overb) ); \ #define SSERtoM( code, overb ) \ - assert( cpucaps.hasStreamingSIMDExtensions ); \ assert( from < XMMREGS) ; \ RexR(0, from); \ write16( code ); \ @@ -49,7 +47,6 @@ XMMSSEType g_xmmtypes[XMMREGS] = {0}; write32( MEMADDR(to, 4 + overb) ); \ #define SSE_SS_MtoR( code, overb ) \ - assert( cpucaps.hasStreamingSIMDExtensions ); \ assert( to < XMMREGS ) ; \ write8( 0xf3 ); \ RexR(0, to); \ @@ -58,7 +55,6 @@ XMMSSEType g_xmmtypes[XMMREGS] = {0}; write32( MEMADDR(from, 4 + overb) ); \ #define SSE_SS_RtoM( code, overb ) \ - assert( cpucaps.hasStreamingSIMDExtensions ); \ assert( from < XMMREGS) ; \ write8( 0xf3 ); \ RexR(0, from); \ @@ -67,7 +63,6 @@ XMMSSEType g_xmmtypes[XMMREGS] = {0}; write32( MEMADDR(to, 4 + overb) ); \ #define SSERtoR( code ) \ - assert( cpucaps.hasStreamingSIMDExtensions ); \ assert( to < XMMREGS && from < XMMREGS) ; \ RexRB(0, to, from); \ write16( code ); \ @@ -86,7 +81,6 @@ XMMSSEType g_xmmtypes[XMMREGS] = {0}; SSERtoR( code ); #define _SSERtoR66( code ) \ - assert( cpucaps.hasStreamingSIMDExtensions ); \ assert( to < XMMREGS && from < XMMREGS) ; \ write8( 0x66 ); \ RexRB(0, from, to); \ @@ -94,7 +88,6 @@ XMMSSEType g_xmmtypes[XMMREGS] = {0}; ModRM( 3, from, to ); #define SSE_SS_RtoR( code ) \ - assert( cpucaps.hasStreamingSIMDExtensions ); \ assert( to < XMMREGS && from < XMMREGS) ; \ write8( 0xf3 ); \ RexRB(0, to, from); \ @@ -125,7 +118,6 @@ void WriteRmOffsetFrom(x86IntRegType to, x86IntRegType from, int offset); /* movups [r32][r32*scale] to xmm1 */ _inline void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) { - assert( cpucaps.hasStreamingSIMDExtensions ); RexRXB(0, to, from2, from); write16( 0x100f ); ModRM( 0, to, 0x4 ); @@ -135,7 +127,6 @@ _inline void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegTy /* movups xmm1 to [r32][r32*scale] */ _inline void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) { - assert( cpucaps.hasStreamingSIMDExtensions ); RexRXB(1, to, from2, from); write16( 0x110f ); ModRM( 0, to, 0x4 ); @@ -145,7 +136,6 @@ _inline void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegTy /* movups [r32] to r32 */ _inline void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ) { - assert( cpucaps.hasStreamingSIMDExtensions ); RexRB(0, to, from); write16( 0x100f ); ModRM( 0, to, from ); @@ -154,7 +144,6 @@ _inline void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ) /* movups r32 to [r32] */ _inline void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ) { - assert( cpucaps.hasStreamingSIMDExtensions ); RexRB(0, from, to); write16( 0x110f ); ModRM( 0, from, to ); @@ -163,7 +152,6 @@ _inline void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ) /* movlps [r32] to r32 */ _inline void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) { - assert( cpucaps.hasStreamingSIMDExtensions ); RexRB(1, to, from); write16( 0x120f ); ModRM( 0, to, from ); @@ -171,7 +159,6 @@ _inline void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ) _inline void SSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) { - assert( cpucaps.hasStreamingSIMDExtensions ); RexRB(0, to, from); write16( 0x120f ); WriteRmOffsetFrom(to, from, offset); @@ -180,7 +167,6 @@ _inline void SSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int of /* movaps r32 to [r32] */ _inline void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) { - assert( cpucaps.hasStreamingSIMDExtensions ); RexRB(0, from, to); write16( 0x130f ); ModRM( 0, from, to ); @@ -188,7 +174,6 @@ _inline void SSE_MOVLPSRtoRm( x86IntRegType to, x86IntRegType from ) _inline void SSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ) { - assert( cpucaps.hasStreamingSIMDExtensions ); RexRB(0, from, to); write16( 0x130f ); WriteRmOffsetFrom(from, to, offset); @@ -197,7 +182,7 @@ _inline void SSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int of /* movaps [r32][r32*scale] to xmm1 */ _inline void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) { - assert( cpucaps.hasStreamingSIMDExtensions && from != EBP ); + assert( from != EBP ); RexRXB(0, to, from2, from); write16( 0x280f ); ModRM( 0, to, 0x4 ); @@ -207,7 +192,7 @@ _inline void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegTy /* movaps xmm1 to [r32][r32*scale] */ _inline void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ) { - assert( cpucaps.hasStreamingSIMDExtensions && from != EBP ); + assert( from != EBP ); RexRXB(0, to, from2, from); write16( 0x290f ); ModRM( 0, to, 0x4 ); @@ -217,7 +202,6 @@ _inline void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegTy // movaps [r32+offset] to r32 _inline void SSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) { - assert( cpucaps.hasStreamingSIMDExtensions ); RexRB(0, to, from); write16( 0x280f ); WriteRmOffsetFrom(to, from, offset); @@ -226,7 +210,6 @@ _inline void SSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int of // movaps r32 to [r32+offset] _inline void SSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) { - assert( cpucaps.hasStreamingSIMDExtensions ); RexRB(0, from, to); write16( 0x290f ); WriteRmOffsetFrom(from, to, offset); @@ -235,7 +218,6 @@ _inline void SSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int of // movdqa [r32+offset] to r32 _inline void SSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ) { - assert( cpucaps.hasStreamingSIMDExtensions ); write8(0x66); RexRB(0, to, from); write16( 0x6f0f ); @@ -245,7 +227,6 @@ _inline void SSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int o // movdqa r32 to [r32+offset] _inline void SSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ) { - assert( cpucaps.hasStreamingSIMDExtensions ); write8(0x66); RexRB(0, from, to); write16( 0x7f0f ); @@ -263,7 +244,6 @@ _inline void SSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int of // movups r32 to [r32+offset] _inline void SSE_MOVUPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ) { - assert( cpucaps.hasStreamingSIMDExtensions ); RexRB(0, from, to); write16( 0x110f ); WriteRmOffsetFrom(from, to, offset); @@ -353,7 +333,6 @@ _inline void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSER _inline void SSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { - assert( cpucaps.hasStreamingSIMDExtensions ); RexRB(0, to, from); write16( 0x120f ); WriteRmOffsetFrom(to, from, offset); @@ -375,7 +354,6 @@ _inline void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ) { SSER _inline void SSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ) { - assert( cpucaps.hasStreamingSIMDExtensions ); RexRB(0, to, from); write16( 0x160f ); WriteRmOffsetFrom(to, from, offset); @@ -383,7 +361,6 @@ _inline void SSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, i _inline void SSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ) { - assert( cpucaps.hasStreamingSIMDExtensions ); RexRB(0, from, to); write16(0x170f); WriteRmOffsetFrom(from, to, offset);