From 42ce99eb21b75a432f84f11be521ad9bb835f63d Mon Sep 17 00:00:00 2001 From: cottonvibes Date: Sat, 27 Sep 2008 08:19:46 +0000 Subject: [PATCH] major code clean-up for FPUs, removed alot of old/useless code, and organized stuff better. trying to clean-up pcsx2 a bit.. *note: FPU reg caching is always on now; which means you need a SSE processor to run the FPU recs, if not it will just run the interpreter code instead (not really a bad thing, the int-code isn't much slower) git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@153 a6443dda-0b58-4228-96e9-037be469359c --- pcsx2/x86/iFPU.c | 2083 +++++++++++++++------------------ pcsx2/x86/iR5900.h | 6 +- pcsx2/x86/ix86-32/iR5900-32.c | 13 +- 3 files changed, 923 insertions(+), 1179 deletions(-) diff --git a/pcsx2/x86/iFPU.c b/pcsx2/x86/iFPU.c index c7179464e1..7aaa844503 100644 --- a/pcsx2/x86/iFPU.c +++ b/pcsx2/x86/iFPU.c @@ -26,37 +26,12 @@ #include "iFPU.h" #include "stdio.h" //Linux needs this? #include "stdlib.h" //Linux needs this? +//------------------------------------------------------------------ -#define REC_FPUBRANCH(f) \ - void f(); \ - void rec##f() { \ - MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); \ - MOV32ItoM((uptr)&cpuRegs.pc, pc); \ - iFlushCall(FLUSH_EVERYTHING); \ - CALLFunc((uptr)f); \ - branch = 2; \ -} - -#define REC_FPUFUNC(f) \ - void f(); \ - void rec##f() { \ - MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); \ - MOV32ItoM((uptr)&cpuRegs.pc, pc); \ - iFlushCall(FLUSH_EVERYTHING); \ - CALLFunc((uptr)f); \ -} - -#define REC_FPUOP(f) \ - MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); \ - MOV32ItoM((uptr)&cpuRegs.pc, pc); \ - iFlushCall(FLUSH_EVERYTHING); \ - CALLFunc((uptr)f); - -/********************************************************* -* COP1 opcodes * -* * -*********************************************************/ +//------------------------------------------------------------------ +// Helper Macros +//------------------------------------------------------------------ #define _Ft_ _Rt_ #define _Fs_ _Rd_ #define _Fd_ _Sa_ @@ -78,236 +53,40 @@ extern PCSX2_ALIGNED16_DECL(u32 g_maxvals[4]); static u32 PCSX2_ALIGNED16(s_neg[4]) = { 0x80000000, 0, 0, 0 }; static u32 PCSX2_ALIGNED16(s_pos[4]) = { 0x7fffffff, 0, 0, 0 }; -//////////////////////////////////////////////////// -void recMFC1(void) { - int regt, regs; - if ( ! _Rt_ ) return; - - _eeOnWriteReg(_Rt_, 1); - - regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); - if( regs >= 0 ) { - _deleteGPRtoXMMreg(_Rt_, 2); - -#ifdef __x86_64__ - regt = _allocCheckGPRtoX86(g_pCurInstInfo, _Rt_, MODE_WRITE); - - if( regt >= 0 ) { - - if(EEINST_ISLIVE1(_Rt_)) { - SSE2_MOVD_XMM_to_R(RAX, regs); - // sign extend - CDQE(); - MOV64RtoR(regt, RAX); - } - else { - SSE2_MOVD_XMM_to_R(regt, regs); - EEINST_RESETHASLIVE1(_Rt_); - } - } -#else - regt = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rt_, MODE_WRITE); - - if( regt >= 0 ) { - SSE2_MOVDQ2Q_XMM_to_MM(regt, regs); - - if(EEINST_ISLIVE1(_Rt_)) _signExtendGPRtoMMX(regt, _Rt_, 0); - else EEINST_RESETHASLIVE1(_Rt_); - } -#endif - else { - if(EEINST_ISLIVE1(_Rt_)) { - _signExtendXMMtoM((uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], regs, 0); - } - else { - EEINST_RESETHASLIVE1(_Rt_); - SSE_MOVSS_XMM_to_M32((uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], regs); - } - } - } -#ifndef __x86_64__ - else if( (regs = _checkMMXreg(MMX_FPU+_Fs_, MODE_READ)) >= 0 ) { - // convert to mmx reg - mmxregs[regs].reg = MMX_GPR+_Rt_; - mmxregs[regs].mode |= MODE_READ|MODE_WRITE; - _signExtendGPRtoMMX(regs, _Rt_, 0); - } -#endif - else { - regt = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ); - - if( regt >= 0 ) { - if( xmmregs[regt].mode & MODE_WRITE ) { - SSE_MOVHPS_XMM_to_M64((uptr)&cpuRegs.GPR.r[_Rt_].UL[2], regt); - } - xmmregs[regt].inuse = 0; - } -#ifdef __x86_64__ - else if( (regt = _allocCheckGPRtoX86(g_pCurInstInfo, _Rt_, MODE_WRITE)) >= 0 ) { - - if(EEINST_ISLIVE1(_Rt_)) { - MOV32MtoR( RAX, (uptr)&fpuRegs.fpr[ _Fs_ ].UL ); - CDQE(); - MOV64RtoR(regt, RAX); - } - else { - MOV32MtoR( regt, (uptr)&fpuRegs.fpr[ _Fs_ ].UL ); - EEINST_RESETHASLIVE1(_Rt_); - } - } - else -#endif - { - - _deleteEEreg(_Rt_, 0); - MOV32MtoR( EAX, (uptr)&fpuRegs.fpr[ _Fs_ ].UL ); - - if(EEINST_ISLIVE1(_Rt_)) { -#ifdef __x86_64__ - CDQE(); - MOV64RtoM((uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], RAX); -#else - CDQ( ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX ); -#endif - } - else { - EEINST_RESETHASLIVE1(_Rt_); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - } - } - } +#define REC_FPUBRANCH(f) \ + void f(); \ + void rec##f() { \ + MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); \ + MOV32ItoM((uptr)&cpuRegs.pc, pc); \ + iFlushCall(FLUSH_EVERYTHING); \ + CALLFunc((uptr)f); \ + branch = 2; \ } -//////////////////////////////////////////////////// -void recCFC1(void) -{ - if ( ! _Rt_ ) return; - - _eeOnWriteReg(_Rt_, 1); - - MOV32MtoR( EAX, (uptr)&fpuRegs.fprc[ _Fs_ ] ); - _deleteEEreg(_Rt_, 0); - - if(EEINST_ISLIVE1(_Rt_)) { -#ifdef __x86_64__ - CDQE(); - MOV64RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], RAX ); -#else - CDQ( ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX ); -#endif - } - else { - EEINST_RESETHASLIVE1(_Rt_); - MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); - } +#define REC_FPUFUNC(f) \ + void f(); \ + void rec##f() { \ + MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); \ + MOV32ItoM((uptr)&cpuRegs.pc, pc); \ + iFlushCall(FLUSH_EVERYTHING); \ + CALLFunc((uptr)f); \ } +//------------------------------------------------------------------ -//////////////////////////////////////////////////// -void recMTC1(void) -{ - if( GPR_IS_CONST1(_Rt_) ) { - _deleteFPtoXMMreg(_Fs_, 0); - MOV32ItoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, g_cpuConstRegs[_Rt_].UL[0]); - } - else { - int mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ); - if( mmreg >= 0 ) { - if( g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE ) { - // transfer the reg directly - _deleteGPRtoXMMreg(_Rt_, 2); - _deleteFPtoXMMreg(_Fs_, 2); - _allocFPtoXMMreg(mmreg, _Fs_, MODE_WRITE); - } - else { - int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE); - if( mmreg2 >= 0 ) SSE_MOVSS_XMM_to_XMM(mmreg2, mmreg); - else SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg); - } - } -#ifndef __x86_64__ - else if( (mmreg = _checkMMXreg(MMX_GPR+_Rt_, MODE_READ)) >= 0 ) { - - if( cpucaps.hasStreamingSIMD2Extensions ) { - int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE); - if( mmreg2 >= 0 ) { - SetMMXstate(); - SSE2_MOVQ2DQ_MM_to_XMM(mmreg2, mmreg); - } - else { - SetMMXstate(); - MOVDMMXtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg); - } - } - else { - _deleteFPtoXMMreg(_Fs_, 0); - SetMMXstate(); - MOVDMMXtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg); - } - } -#endif - else { - int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE); - - if( mmreg2 >= 0 ) SSE_MOVSS_M32_to_XMM(mmreg2, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]); - else { - MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]); - MOV32RtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, EAX); - } - } - } -} - -//////////////////////////////////////////////////// -void recCTC1( void ) -{ - if( GPR_IS_CONST1(_Rt_)) { - MOV32ItoM((uptr)&fpuRegs.fprc[ _Fs_ ], g_cpuConstRegs[_Rt_].UL[0]); - } - else { - int mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ); - if( mmreg >= 0 ) { - SSEX_MOVD_XMM_to_M32((uptr)&fpuRegs.fprc[ _Fs_ ], mmreg); - } -#ifdef __x86_64__ - else if( (mmreg = _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ)) >= 0 ) { - MOV32RtoM((uptr)&fpuRegs.fprc[ _Fs_ ], mmreg); - } -#else - else if( (mmreg = _checkMMXreg(MMX_GPR+_Rt_, MODE_READ)) >= 0 ) { - MOVDMMXtoM((uptr)&fpuRegs.fprc[ _Fs_ ], mmreg); - SetMMXstate(); - } -#endif - else { - _deleteGPRtoXMMreg(_Rt_, 1); - -#ifdef __x86_64__ - _deleteX86reg(X86TYPE_GPR, _Rt_, 1); -#else - _deleteMMXreg(MMX_GPR+_Rt_, 1); -#endif - MOV32MtoR( EAX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); - MOV32RtoM( (uptr)&fpuRegs.fprc[ _Fs_ ], EAX ); - } - } -} - -//////////////////////////////////////////////////// -void recCOP1_BC1() -{ - recCP1BC1[_Rt_](); -} +//------------------------------------------------------------------ +// Misc... +//------------------------------------------------------------------ //static u32 _mxcsr = 0x7F80; //static u32 _mxcsrs; static u32 fpucw = 0x007f; static u32 fpucws = 0; -//////////////////////////////////////////////////// +void recCOP1_BC1() +{ + recCP1BC1[_Rt_](); +} + void SaveCW(int type) { if (iCWstate & type) return; @@ -321,7 +100,6 @@ void SaveCW(int type) { iCWstate|= type; } -//////////////////////////////////////////////////// void LoadCW( void ) { if (iCWstate == 0) return; @@ -334,11 +112,9 @@ void LoadCW( void ) { iCWstate = 0; } -//////////////////////////////////////////////////// -void recCOP1_S( void ) -{ +void recCOP1_S( void ) { #ifndef __x86_64__ - if( !EE_FPU_REGCACHING || !cpucaps.hasStreamingSIMD2Extensions ) { + if( !cpucaps.hasStreamingSIMD2Extensions ) { // Not sure if this is needed anymore... (cottonvibes) _freeMMXreg(6); _freeMMXreg(7); } @@ -346,340 +122,51 @@ void recCOP1_S( void ) recCP1S[ _Funct_ ]( ); } -//////////////////////////////////////////////////// -void recCOP1_W( void ) -{ -#ifndef __x86_64__ - if( !EE_FPU_REGCACHING ) { - _freeMMXreg(6); - _freeMMXreg(7); - } -#endif +void recCOP1_W( void ) { recCP1W[ _Funct_ ]( ); } +//------------------------------------------------------------------ + +//------------------------------------------------------------------ +// FPU Opcodes! +//------------------------------------------------------------------ #ifndef FPU_RECOMPILE -REC_FPUFUNC(ADD_S); -REC_FPUFUNC(SUB_S); -REC_FPUFUNC(MUL_S); -REC_FPUFUNC(DIV_S); -REC_FPUFUNC(SQRT_S); -REC_FPUFUNC(RSQRT_S); REC_FPUFUNC(ABS_S); -REC_FPUFUNC(MOV_S); -REC_FPUFUNC(NEG_S); +REC_FPUFUNC(ADD_S); REC_FPUFUNC(ADDA_S); -REC_FPUFUNC(SUBA_S); -REC_FPUFUNC(MULA_S); -REC_FPUFUNC(MADD_S); -REC_FPUFUNC(MSUB_S); -REC_FPUFUNC(MADDA_S); -REC_FPUFUNC(MSUBA_S); -REC_FPUFUNC(CVT_S); -REC_FPUFUNC(CVT_W); -REC_FPUFUNC(MIN_S); -REC_FPUFUNC(MAX_S); REC_FPUBRANCH(BC1F); REC_FPUBRANCH(BC1T); REC_FPUBRANCH(BC1FL); REC_FPUBRANCH(BC1TL); REC_FPUFUNC(C_EQ); REC_FPUFUNC(C_F); -REC_FPUFUNC(C_LT); REC_FPUFUNC(C_LE); +REC_FPUFUNC(C_LT); +REC_FPUFUNC(CVT_S); +REC_FPUFUNC(CVT_W); +REC_FPUFUNC(DIV_S); +REC_FPUFUNC(MAX_S); +REC_FPUFUNC(MIN_S); +REC_FPUFUNC(MADD_S); +REC_FPUFUNC(MADDA_S); +REC_FPUFUNC(MOV_S); +REC_FPUFUNC(MSUB_S); +REC_FPUFUNC(MSUBA_S); +REC_FPUFUNC(MUL_S); +REC_FPUFUNC(MULA_S); +REC_FPUFUNC(NEG_S); +REC_FPUFUNC(SUB_S); +REC_FPUFUNC(SUBA_S); +REC_FPUFUNC(SQRT_S); +REC_FPUFUNC(RSQRT_S); -#else - -// define the FPU ops using the x86 FPU. x86-64 doesn't use FPU -#ifndef __x86_64__ - -void SetQFromStack(u32 mem) -{ - write16(0xe5d9); - FNSTSWtoAX(); - write8(0x9e); - j8Ptr[0] = JAE8(0); // jnc - - // sign bit is in bit 9 of EAX - FSTP(0); // pop - AND32ItoR(EAX, 0x200); - SHL32ItoR(EAX, 22); - OR32MtoR(EAX, (uptr)&g_maxvals[0]); - MOV32RtoM(mem, EAX); - j8Ptr[1] = JMP8(0); - - x86SetJ8(j8Ptr[0]); - // just pop - FSTP32(mem); - x86SetJ8(j8Ptr[1]); -} - -void recC_EQ_(int info) -{ - SetFPUstate(); - - FLD32( (uptr)&fpuRegs.fpr[_Fs_].f); - FCOMP32( (uptr)&fpuRegs.fpr[_Ft_].f); - FNSTSWtoAX( ); - TEST32ItoR( EAX, 0x00004000 ); - j8Ptr[ 0 ] = JE8( 0 ); - OR32ItoM( (uptr)&fpuRegs.fprc[ 31 ], 0x00800000 ); - j8Ptr[ 1 ] = JMP8( 0 ); - - x86SetJ8( j8Ptr[ 0 ] ); - AND32ItoM( (uptr)&fpuRegs.fprc[ 31 ], ~0x00800000 ); - - x86SetJ8( j8Ptr[ 1 ] ); -} - -void recC_LT_(int info) -{ - SetFPUstate(); - - FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - FCOMP32( (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - FNSTSWtoAX( ); - TEST32ItoR( EAX, 0x00000100 ); - j8Ptr[ 0 ] = JE8( 0 ); - OR32ItoM( (uptr)&fpuRegs.fprc[ 31 ], 0x00800000 ); - j8Ptr[ 1 ] = JMP8( 0 ); - - x86SetJ8( j8Ptr[ 0 ] ); - AND32ItoM( (uptr)&fpuRegs.fprc[ 31 ], ~0x00800000 ); - - x86SetJ8( j8Ptr[1] ); -} - -void recC_LE_(int info) -{ - SetFPUstate(); - - FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - FCOMP32( (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - FNSTSWtoAX( ); - TEST32ItoR( EAX, 0x00004100 ); - j8Ptr[ 0 ] = JE8( 0 ); - OR32ItoM( (uptr)&fpuRegs.fprc[ 31 ], 0x00800000 ); - j8Ptr[ 1 ] = JMP8( 0 ); - - x86SetJ8( j8Ptr[ 0 ] ); - AND32ItoM( (uptr)&fpuRegs.fprc[ 31 ], ~0x00800000 ); - - x86SetJ8( j8Ptr[ 1 ] ); -} - -void recADD_S_(int info) { - SetFPUstate(); - - SaveCW(1); - FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - FADD32( (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - FSTP32( (uptr)&fpuRegs.fpr[ _Fd_ ].f ); -} - -void recSUB_S_(int info) -{ - SetFPUstate(); - SaveCW(1); - - FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - FSUB32( (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - FSTP32( (uptr)&fpuRegs.fpr[ _Fd_ ].f ); -} - -void recMUL_S_(int info) -{ - SetFPUstate(); - SaveCW(1); - - FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - FMUL32( (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - FSTP32( (uptr)&fpuRegs.fpr[ _Fd_ ].f ); -} - -void recDIV_S_(int info) -{ - SetFPUstate(); - SaveCW(1); - - FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - FDIV32( (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - SetQFromStack( (uptr)&fpuRegs.fpr[ _Fd_ ].f ); -} - -void recSQRT_S_(int info) -{ - static u32 tmp; - - SysPrintf("SQRT\n"); - SetFPUstate(); - SaveCW(1); - - MOV32MtoR( EAX, (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - AND32ItoR( EAX, 0x7fffffff); - MOV32RtoM((uptr)&tmp, EAX); - FLD32( (uptr)&tmp ); - FSQRT( ); - FSTP32( (uptr)&fpuRegs.fpr[ _Fd_ ].f ); -} - -void recABS_S_(int info) -{ - MOV32MtoR( EAX, (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - AND32ItoR( EAX, 0x7fffffff ); - MOV32RtoM( (uptr)&fpuRegs.fpr[ _Fd_ ].f, EAX ); -} - -void recMOV_S_(int info) -{ - MOV32MtoR( EAX, (uptr)&fpuRegs.fpr[ _Fs_ ].UL ); - MOV32RtoM( (uptr)&fpuRegs.fpr[ _Fd_ ].UL, EAX ); -} - -void recNEG_S_(int info) -{ - MOV32MtoR( EAX,(uptr)&fpuRegs.fpr[ _Fs_ ].f ); - XOR32ItoR( EAX, 0x80000000 ); - MOV32RtoM( (uptr)&fpuRegs.fpr[ _Fd_ ].f, EAX ); -} - -void recRSQRT_S_(int info) -{ - static u32 tmp; - - SysPrintf("RSQRT\n"); - SetFPUstate(); - SaveCW(1); - - MOV32MtoR( EAX, (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - AND32ItoR( EAX, 0x7fffffff); - MOV32RtoM((uptr)&tmp, EAX); - FLD32( (uptr)&tmp ); - FSQRT( ); - FSTP32( (uptr)&tmp ); - - FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - FDIV32( (uptr)&tmp ); - SetQFromStack( (uptr)&fpuRegs.fpr[ _Fd_ ].f ); - -// FLD32( (uptr)&fpuRegs.fpr[ _Ft_ ].f ); -// FSQRT( ); -// FSTP32( (uptr)&tmp ); -// -// MOV32MtoR( EAX, (uptr)&tmp ); -// OR32RtoR( EAX, EAX ); -// j8Ptr[ 0 ] = JE8( 0 ); -// FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); -// FDIV32( (uptr)&tmp ); -// FSTP32( (uptr)&fpuRegs.fpr[ _Fd_ ].f ); -// x86SetJ8( j8Ptr[ 0 ] ); -} - -void recADDA_S_(int info) { - SetFPUstate(); - SaveCW(1); - - FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - FADD32( (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - FSTP32( (uptr)&fpuRegs.ACC.f ); -} - -void recSUBA_S_(int info) -{ - SetFPUstate(); - SaveCW(1); - - FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - FSUB32( (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - FSTP32( (uptr)&fpuRegs.ACC.f ); -} - -void recMULA_S_(int info) { - SetFPUstate(); - SaveCW(1); - - FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - FMUL32( (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - FSTP32( (uptr)&fpuRegs.ACC.f ); -} - -void recMADD_S_(int info) -{ - SetFPUstate(); - SaveCW(1); - - FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - FMUL32( (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - FADD32( (uptr)&fpuRegs.ACC.f ); - FSTP32( (uptr)&fpuRegs.fpr[ _Fd_ ].f ); -} - -void recMADDA_S_(int info) -{ - SetFPUstate(); - SaveCW(1); - - FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - FMUL32( (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - FADD32( (uptr)&fpuRegs.ACC.f ); - FSTP32( (uptr)&fpuRegs.ACC.f ); -} - -void recMSUB_S_(int info) -{ - SetFPUstate(); - SaveCW(1); - - FLD32( (uptr)&fpuRegs.ACC.f ); - FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - FMUL32( (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - FSUBP( ); - FSTP32( (uptr)&fpuRegs.fpr[ _Fd_ ].f ); -} - -void recMSUBA_S_(int info) -{ - SetFPUstate(); - SaveCW(1); - - FLD32( (uptr)&fpuRegs.ACC.f ); - FLD32( (uptr)&fpuRegs.fpr[ _Fs_ ].f ); - FMUL32( (uptr)&fpuRegs.fpr[ _Ft_ ].f ); - FSUBP( ); - FSTP32( (uptr)&fpuRegs.ACC.f ); -} - -void recCVT_S_(int info) -{ - SetFPUstate(); - FILD32( (uptr)&fpuRegs.fpr[ _Fs_ ].UL ); - FSTP32( (uptr)&fpuRegs.fpr[ _Fd_ ].f ); -} - -void recMAX_S_(int info) -{ - MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (uptr)&cpuRegs.pc, pc ); - iFlushCall(FLUSH_NODESTROY); - CALLFunc( (u32)MAX_S ); -} - -void recMIN_S_(int info) { - MOV32ItoM( (uptr)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (uptr)&cpuRegs.pc, pc ); - iFlushCall(FLUSH_NODESTROY); - CALLFunc( (u32)MIN_S ); -} - -#endif - -//////////////////////////////////////////////////// - - -static u32 s_signbit = 0x80000000; -extern int g_VuNanHandling; +#else // FPU_RECOMPILE +//------------------------------------------------------------------ +// Clamp Functions (Converts NaN's and Infinities to Normal Numbers) +//------------------------------------------------------------------ void fpuFloat(regd) { if (CHECK_FPU_OVERFLOW && !CHECK_FPUCLAMPHACK) { // Tekken 5 doesn't like clamping infinities. SSE_MINSS_M32_to_XMM(regd, (uptr)&g_maxvals[0]); // MIN() must be before MAX()! So that NaN's become +Maximum @@ -692,7 +179,6 @@ void ClampValues(regd) { } void ClampValues2(regd) { - if (CHECK_FPUCLAMPHACK) { // Fixes Tekken 5 ( Makes NaN equal 0, infinities stay the same ) int t5reg = _allocTempXMMreg(XMMT_FPS, -1); @@ -708,210 +194,34 @@ void ClampValues2(regd) { _freeXMMreg(t5reg); } else fpuFloat(regd); - } +//------------------------------------------------------------------ -//////////////////////////////////////////////////// -void recC_EQ_xmm(int info) -{ - int tempReg; - switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { - case PROCESS_EE_S: - SSE_MINSS_M32_to_XMM(EEREC_S, (uptr)&g_maxvals[0]); - SSE_UCOMISS_M32_to_XMM(EEREC_S, (uptr)&fpuRegs.fpr[_Ft_]); - break; - case PROCESS_EE_T: - SSE_MINSS_M32_to_XMM(EEREC_T, (uptr)&g_maxvals[0]); - SSE_UCOMISS_M32_to_XMM(EEREC_T, (uptr)&fpuRegs.fpr[_Fs_]); - break; - case (PROCESS_EE_S|PROCESS_EE_T): - SSE_MINSS_M32_to_XMM(EEREC_S, (uptr)&g_maxvals[0]); - SSE_MINSS_M32_to_XMM(EEREC_T, (uptr)&g_maxvals[0]); - SSE_UCOMISS_XMM_to_XMM(EEREC_S, EEREC_T); - break; - default: - SysPrintf("recC_EQ_xmm: Default\n"); - tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); - if (tempReg == -1) {SysPrintf("FPU: DIV Allocation Error!\n"); tempReg = EAX;} - MOV32MtoR(tempReg, (uptr)&fpuRegs.fpr[_Fs_]); - CMP32MtoR(tempReg, (uptr)&fpuRegs.fpr[_Ft_]); - - j8Ptr[0] = JZ8(0); - AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); - j8Ptr[1] = JMP8(0); - x86SetJ8(j8Ptr[0]); - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); - x86SetJ8(j8Ptr[1]); - - _freeX86reg(tempReg); - return; +//------------------------------------------------------------------ +// ABS XMM +//------------------------------------------------------------------ +void recABS_S_xmm(int info) +{ + if( info & PROCESS_EE_S ) { + if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); } + else SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); - j8Ptr[0] = JZ8(0); - AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); - j8Ptr[1] = JMP8(0); - x86SetJ8(j8Ptr[0]); - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); - x86SetJ8(j8Ptr[1]); + SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); + //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags + + if (CHECK_FPU_OVERFLOW) // Only need to do positive clamp, since EEREC_D is positive + SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)&g_maxvals[0]); } -FPURECOMPILE_CONSTCODE(C_EQ, XMMINFO_READS|XMMINFO_READT); -//REC_FPUFUNC(C_EQ); +FPURECOMPILE_CONSTCODE(ABS_S, XMMINFO_WRITED|XMMINFO_READS); +//------------------------------------------------------------------ -//////////////////////////////////////////////////// -void recC_F() -{ - AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); -} -//REC_FPUFUNC(C_F); -//////////////////////////////////////////////////// -void recC_LT_xmm(int info) -{ - int tempReg; - - switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { - case PROCESS_EE_S: - SSE_MINSS_M32_to_XMM(EEREC_S, (uptr)&g_maxvals[0]); - SSE_UCOMISS_M32_to_XMM(EEREC_S, (uptr)&fpuRegs.fpr[_Ft_]); - break; - case PROCESS_EE_T: - SSE_MINSS_M32_to_XMM(EEREC_T, (uptr)&g_maxvals[0]); - SSE_UCOMISS_M32_to_XMM(EEREC_T, (uptr)&fpuRegs.fpr[_Fs_]); - j8Ptr[0] = JA8(0); - AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); - j8Ptr[1] = JMP8(0); - x86SetJ8(j8Ptr[0]); - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); - x86SetJ8(j8Ptr[1]); - return; - case (PROCESS_EE_S|PROCESS_EE_T): - // Makes NaNs and +Infinity be +maximum; -Infinity stays - // the same, but this is okay for a Compare operation. - // Note: This fixes a crash in Rule of Rose. - SSE_MINSS_M32_to_XMM(EEREC_S, (uptr)&g_maxvals[0]); - SSE_MINSS_M32_to_XMM(EEREC_T, (uptr)&g_maxvals[0]); - SSE_UCOMISS_XMM_to_XMM(EEREC_S, EEREC_T); - break; - default: - SysPrintf("recC_LT_xmm: Default\n"); - tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); - if (tempReg == -1) {SysPrintf("FPU: DIV Allocation Error!\n"); tempReg = EAX;} - MOV32MtoR(tempReg, (uptr)&fpuRegs.fpr[_Fs_]); - CMP32MtoR(tempReg, (uptr)&fpuRegs.fpr[_Ft_]); - - j8Ptr[0] = JL8(0); - AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); - j8Ptr[1] = JMP8(0); - x86SetJ8(j8Ptr[0]); - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); - x86SetJ8(j8Ptr[1]); - - _freeX86reg(tempReg); - return; - } - - j8Ptr[0] = JB8(0); - AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); - j8Ptr[1] = JMP8(0); - x86SetJ8(j8Ptr[0]); - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); - x86SetJ8(j8Ptr[1]); -} - -FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS|XMMINFO_READT); -//REC_FPUFUNC(C_LT); -/* -void recC_LT() -{ - int tempS, tempT, tempReg; - iFlushCall(FLUSH_EVERYTHING); - tempS = _allocTempXMMreg(XMMT_FPS, -1); - tempT = _allocTempXMMreg(XMMT_FPS, -1); - tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); - if (tempReg == -1) {SysPrintf("FPU: DIV Allocation Error!\n"); tempReg = EAX;} - SysPrintf("Default\n"); - - SSE_MOVSS_M32_to_XMM(tempS, (uptr)&fpuRegs.fpr[_Fs_]); - SSE_MINSS_M32_to_XMM(tempS, (uptr)&g_maxvals[0]); - SSE_MAXSS_M32_to_XMM(tempS, (uptr)&g_minvals[0]); - - SSE_MOVSS_M32_to_XMM(tempT, (uptr)&fpuRegs.fpr[_Ft_]); - SSE_MINSS_M32_to_XMM(tempT, (uptr)&g_maxvals[0]); - SSE_MAXSS_M32_to_XMM(tempT, (uptr)&g_minvals[0]); - - SSE_UCOMISS_XMM_to_XMM(tempS, tempT); - //MOV32MtoR(tempReg, (uptr)&fpuRegs.fpr[_Fs_]); - //CMP32MtoR(tempReg, (uptr)&fpuRegs.fpr[_Ft_]); - //j8Ptr[0] = JL8(0); - - j8Ptr[0] = JB8(0); - AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); - j8Ptr[1] = JMP8(0); - x86SetJ8(j8Ptr[0]); - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); - x86SetJ8(j8Ptr[1]); - - _freeXMMreg(tempS); - _freeXMMreg(tempT); - _freeX86reg(tempReg); -}*/ - -//////////////////////////////////////////////////// -void recC_LE_xmm(int info ) -{ - int tempReg; - - switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { - case PROCESS_EE_S: - SSE_MINSS_M32_to_XMM(EEREC_S, (uptr)&g_maxvals[0]); - SSE_UCOMISS_M32_to_XMM(EEREC_S, (uptr)&fpuRegs.fpr[_Ft_]); - break; - case PROCESS_EE_T: - SSE_MINSS_M32_to_XMM(EEREC_T, (uptr)&g_maxvals[0]); - SSE_UCOMISS_M32_to_XMM(EEREC_T, (uptr)&fpuRegs.fpr[_Fs_]); - j8Ptr[0] = JAE8(0); - AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); - j8Ptr[1] = JMP8(0); - x86SetJ8(j8Ptr[0]); - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); - x86SetJ8(j8Ptr[1]); - return; - case (PROCESS_EE_S|PROCESS_EE_T): - SSE_MINSS_M32_to_XMM(EEREC_S, (uptr)&g_maxvals[0]); - SSE_MINSS_M32_to_XMM(EEREC_T, (uptr)&g_maxvals[0]); - SSE_UCOMISS_XMM_to_XMM(EEREC_S, EEREC_T); - break; - default: - SysPrintf("recC_LE_xmm: Default\n"); - tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); - if (tempReg == -1) {SysPrintf("FPU: DIV Allocation Error!\n"); tempReg = EAX;} - MOV32MtoR(tempReg, (uptr)&fpuRegs.fpr[_Fs_]); - CMP32MtoR(tempReg, (uptr)&fpuRegs.fpr[_Ft_]); - - j8Ptr[0] = JLE8(0); - AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); - j8Ptr[1] = JMP8(0); - x86SetJ8(j8Ptr[0]); - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); - x86SetJ8(j8Ptr[1]); - - _freeX86reg(tempReg); - return; - } - - j8Ptr[0] = JBE8(0); - AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); - j8Ptr[1] = JMP8(0); - x86SetJ8(j8Ptr[0]); - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); - x86SetJ8(j8Ptr[1]); -} - -FPURECOMPILE_CONSTCODE(C_LE, XMMINFO_READS|XMMINFO_READT); -//REC_FPUFUNC(C_LE); -//////////////////////////////////////////////////// +//------------------------------------------------------------------ +// CommutativeOp XMM (used for ADD, MUL, MAX, and MIN opcodes) +//------------------------------------------------------------------ static void (*recComOpXMM_to_XMM[] )(x86SSERegType, x86SSERegType) = { SSE_ADDSS_XMM_to_XMM, SSE_MULSS_XMM_to_XMM, SSE_MAXSS_XMM_to_XMM, SSE_MINSS_XMM_to_XMM }; @@ -971,7 +281,12 @@ int recCommutativeOp(int info, int regd, int op) _freeXMMreg(t0reg); return regd; } +//------------------------------------------------------------------ + +//------------------------------------------------------------------ +// ADD XMM +//------------------------------------------------------------------ void recADD_S_xmm(int info) { //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags @@ -981,84 +296,424 @@ void recADD_S_xmm(int info) FPURECOMPILE_CONSTCODE(ADD_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); -//////////////////////////////////////////////////// - -void recSUBhelper(int regd, int regt) +void recADDA_S_xmm(int info) { - if (CHECK_FPU_EXTRA_OVERFLOW /*&& !CHECK_FPUCLAMPHACK*/) { fpuFloat(regd); fpuFloat(regt); } - SSE_SUBSS_XMM_to_XMM(regd, regt); + //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags + ClampValues(recCommutativeOp(info, EEREC_ACC, 0)); } -void recSUBop(int info, int regd) -{ - int t0reg = _allocTempXMMreg(XMMT_FPS, -1); - //if (t0reg == -1) {SysPrintf("FPU: SUB Allocation Error!\n");} +FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); +//------------------------------------------------------------------ - //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags + +//------------------------------------------------------------------ +// BC1x XMM +//------------------------------------------------------------------ +void recBC1F( void ) { + u32 branchTo = (s32)_Imm_ * 4 + pc; + + _eeFlushAllUnused(); + MOV32MtoR(EAX, (uptr)&fpuRegs.fprc[31]); + TEST32ItoR(EAX, FPUflagC); + j32Ptr[0] = JNZ32(0); + + SaveBranchState(); + recompileNextInstruction(1); + SetBranchImm(branchTo); + + x86SetJ32(j32Ptr[0]); + + // recopy the next inst + pc -= 4; + LoadBranchState(); + recompileNextInstruction(1); + + SetBranchImm(pc); +} + +void recBC1T( void ) { + u32 branchTo = (s32)_Imm_ * 4 + pc; + + _eeFlushAllUnused(); + MOV32MtoR(EAX, (uptr)&fpuRegs.fprc[31]); + TEST32ItoR(EAX, FPUflagC); + j32Ptr[0] = JZ32(0); + + SaveBranchState(); + recompileNextInstruction(1); + SetBranchImm(branchTo); + //j32Ptr[1] = JMP32(0); + + x86SetJ32(j32Ptr[0]); + + // recopy the next inst + pc -= 4; + LoadBranchState(); + recompileNextInstruction(1); + + SetBranchImm(pc); + //x86SetJ32(j32Ptr[1]); +} + +void recBC1FL( void ) { + u32 branchTo = _Imm_ * 4 + pc; + + _eeFlushAllUnused(); + MOV32MtoR(EAX, (uptr)&fpuRegs.fprc[31]); + TEST32ItoR(EAX, FPUflagC); + j32Ptr[0] = JNZ32(0); + + SaveBranchState(); + recompileNextInstruction(1); + SetBranchImm(branchTo); + + x86SetJ32(j32Ptr[0]); + + LoadBranchState(); + SetBranchImm(pc); +} + +void recBC1TL( void ) { + u32 branchTo = _Imm_ * 4 + pc; + + _eeFlushAllUnused(); + MOV32MtoR(EAX, (uptr)&fpuRegs.fprc[31]); + TEST32ItoR(EAX, FPUflagC); + j32Ptr[0] = JZ32(0); + + SaveBranchState(); + recompileNextInstruction(1); + SetBranchImm(branchTo); + x86SetJ32(j32Ptr[0]); + + LoadBranchState(); + SetBranchImm(pc); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// C.x.S XMM +//------------------------------------------------------------------ +void recC_EQ_xmm(int info) +{ + int tempReg; + + switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { + case PROCESS_EE_S: + SSE_MINSS_M32_to_XMM(EEREC_S, (uptr)&g_maxvals[0]); + SSE_UCOMISS_M32_to_XMM(EEREC_S, (uptr)&fpuRegs.fpr[_Ft_]); + break; + case PROCESS_EE_T: + SSE_MINSS_M32_to_XMM(EEREC_T, (uptr)&g_maxvals[0]); + SSE_UCOMISS_M32_to_XMM(EEREC_T, (uptr)&fpuRegs.fpr[_Fs_]); + break; + case (PROCESS_EE_S|PROCESS_EE_T): + SSE_MINSS_M32_to_XMM(EEREC_S, (uptr)&g_maxvals[0]); + SSE_MINSS_M32_to_XMM(EEREC_T, (uptr)&g_maxvals[0]); + SSE_UCOMISS_XMM_to_XMM(EEREC_S, EEREC_T); + break; + default: + SysPrintf("recC_EQ_xmm: Default\n"); + tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); + if (tempReg == -1) {SysPrintf("FPU: DIV Allocation Error!\n"); tempReg = EAX;} + MOV32MtoR(tempReg, (uptr)&fpuRegs.fpr[_Fs_]); + CMP32MtoR(tempReg, (uptr)&fpuRegs.fpr[_Ft_]); + + j8Ptr[0] = JZ8(0); + AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); + j8Ptr[1] = JMP8(0); + x86SetJ8(j8Ptr[0]); + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); + x86SetJ8(j8Ptr[1]); + + _freeX86reg(tempReg); + return; + } + + j8Ptr[0] = JZ8(0); + AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); + j8Ptr[1] = JMP8(0); + x86SetJ8(j8Ptr[0]); + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); + x86SetJ8(j8Ptr[1]); +} + +FPURECOMPILE_CONSTCODE(C_EQ, XMMINFO_READS|XMMINFO_READT); +//REC_FPUFUNC(C_EQ); + +void recC_F() +{ + AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); +} +//REC_FPUFUNC(C_F); + +void recC_LE_xmm(int info ) +{ + int tempReg; + + switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { + case PROCESS_EE_S: + SSE_MINSS_M32_to_XMM(EEREC_S, (uptr)&g_maxvals[0]); + SSE_UCOMISS_M32_to_XMM(EEREC_S, (uptr)&fpuRegs.fpr[_Ft_]); + break; + case PROCESS_EE_T: + SSE_MINSS_M32_to_XMM(EEREC_T, (uptr)&g_maxvals[0]); + SSE_UCOMISS_M32_to_XMM(EEREC_T, (uptr)&fpuRegs.fpr[_Fs_]); + j8Ptr[0] = JAE8(0); + AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); + j8Ptr[1] = JMP8(0); + x86SetJ8(j8Ptr[0]); + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); + x86SetJ8(j8Ptr[1]); + return; + case (PROCESS_EE_S|PROCESS_EE_T): + SSE_MINSS_M32_to_XMM(EEREC_S, (uptr)&g_maxvals[0]); + SSE_MINSS_M32_to_XMM(EEREC_T, (uptr)&g_maxvals[0]); + SSE_UCOMISS_XMM_to_XMM(EEREC_S, EEREC_T); + break; + default: + SysPrintf("recC_LE_xmm: Default\n"); + tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); + if (tempReg == -1) {SysPrintf("FPU: DIV Allocation Error!\n"); tempReg = EAX;} + MOV32MtoR(tempReg, (uptr)&fpuRegs.fpr[_Fs_]); + CMP32MtoR(tempReg, (uptr)&fpuRegs.fpr[_Ft_]); + + j8Ptr[0] = JLE8(0); + AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); + j8Ptr[1] = JMP8(0); + x86SetJ8(j8Ptr[0]); + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); + x86SetJ8(j8Ptr[1]); + + _freeX86reg(tempReg); + return; + } + + j8Ptr[0] = JBE8(0); + AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); + j8Ptr[1] = JMP8(0); + x86SetJ8(j8Ptr[0]); + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); + x86SetJ8(j8Ptr[1]); +} + +FPURECOMPILE_CONSTCODE(C_LE, XMMINFO_READS|XMMINFO_READT); +//REC_FPUFUNC(C_LE); + +void recC_LT_xmm(int info) +{ + int tempReg; switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { case PROCESS_EE_S: - //SysPrintf("FPU: SUB case 1\n"); - if (regd != EEREC_S) SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); - SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); - recSUBhelper(regd, t0reg); + SSE_MINSS_M32_to_XMM(EEREC_S, (uptr)&g_maxvals[0]); + SSE_UCOMISS_M32_to_XMM(EEREC_S, (uptr)&fpuRegs.fpr[_Ft_]); break; case PROCESS_EE_T: - //SysPrintf("FPU: SUB case 2\n"); - if (regd == EEREC_T) { - SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T); - SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]); - recSUBhelper(regd, t0reg); - } - else { - SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]); - recSUBhelper(regd, EEREC_T); - } - break; + SSE_MINSS_M32_to_XMM(EEREC_T, (uptr)&g_maxvals[0]); + SSE_UCOMISS_M32_to_XMM(EEREC_T, (uptr)&fpuRegs.fpr[_Fs_]); + j8Ptr[0] = JA8(0); + AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); + j8Ptr[1] = JMP8(0); + x86SetJ8(j8Ptr[0]); + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); + x86SetJ8(j8Ptr[1]); + return; case (PROCESS_EE_S|PROCESS_EE_T): - //SysPrintf("FPU: SUB case 3\n"); - if (regd == EEREC_T) { - SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T); - SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); - recSUBhelper(regd, t0reg); - } - else { - if (regd != EEREC_S) SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); - recSUBhelper(regd, EEREC_T); - } + // Makes NaNs and +Infinity be +maximum; -Infinity stays + // the same, but this is okay for a Compare operation. + // Note: This fixes a crash in Rule of Rose. + SSE_MINSS_M32_to_XMM(EEREC_S, (uptr)&g_maxvals[0]); + SSE_MINSS_M32_to_XMM(EEREC_T, (uptr)&g_maxvals[0]); + SSE_UCOMISS_XMM_to_XMM(EEREC_S, EEREC_T); break; default: - SysPrintf("FPU: SUB case 4\n"); - SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); - SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]); - recSUBhelper(regd, t0reg); - break; + SysPrintf("recC_LT_xmm: Default\n"); + tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); + if (tempReg == -1) {SysPrintf("FPU: DIV Allocation Error!\n"); tempReg = EAX;} + MOV32MtoR(tempReg, (uptr)&fpuRegs.fpr[_Fs_]); + CMP32MtoR(tempReg, (uptr)&fpuRegs.fpr[_Ft_]); + + j8Ptr[0] = JL8(0); + AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); + j8Ptr[1] = JMP8(0); + x86SetJ8(j8Ptr[0]); + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); + x86SetJ8(j8Ptr[1]); + + _freeX86reg(tempReg); + return; } - ClampValues2(regd); - _freeXMMreg(t0reg); + j8Ptr[0] = JB8(0); + AND32ItoM( (uptr)&fpuRegs.fprc[31], ~FPUflagC ); + j8Ptr[1] = JMP8(0); + x86SetJ8(j8Ptr[0]); + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagC); + x86SetJ8(j8Ptr[1]); } -void recSUB_S_xmm(int info) +FPURECOMPILE_CONSTCODE(C_LT, XMMINFO_READS|XMMINFO_READT); +//REC_FPUFUNC(C_LT); +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// CFC1 / CTC1 XMM +//------------------------------------------------------------------ +void recCFC1(void) { - recSUBop(info, EEREC_D); + if ( ! _Rt_ ) return; + + _eeOnWriteReg(_Rt_, 1); + + MOV32MtoR( EAX, (uptr)&fpuRegs.fprc[ _Fs_ ] ); + _deleteEEreg(_Rt_, 0); + + if(EEINST_ISLIVE1(_Rt_)) { +#ifdef __x86_64__ + CDQE(); + MOV64RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], RAX ); +#else + CDQ( ); + MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); + MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX ); +#endif + } + else { + EEINST_RESETHASLIVE1(_Rt_); + MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); + } } -FPURECOMPILE_CONSTCODE(SUB_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +void recCTC1( void ) +{ + if( GPR_IS_CONST1(_Rt_)) { + MOV32ItoM((uptr)&fpuRegs.fprc[ _Fs_ ], g_cpuConstRegs[_Rt_].UL[0]); + } + else { + int mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ); + if( mmreg >= 0 ) { + SSEX_MOVD_XMM_to_M32((uptr)&fpuRegs.fprc[ _Fs_ ], mmreg); + } +#ifdef __x86_64__ + else if( (mmreg = _checkX86reg(X86TYPE_GPR, _Rt_, MODE_READ)) >= 0 ) { + MOV32RtoM((uptr)&fpuRegs.fprc[ _Fs_ ], mmreg); + } +#else + else if( (mmreg = _checkMMXreg(MMX_GPR+_Rt_, MODE_READ)) >= 0 ) { + MOVDMMXtoM((uptr)&fpuRegs.fprc[ _Fs_ ], mmreg); + SetMMXstate(); + } +#endif + else { + _deleteGPRtoXMMreg(_Rt_, 1); -//////////////////////////////////////////////////// -void recMUL_S_xmm(int info) -{ - //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags - ClampValues(recCommutativeOp(info, EEREC_D, 1)); +#ifdef __x86_64__ + _deleteX86reg(X86TYPE_GPR, _Rt_, 1); +#else + _deleteMMXreg(MMX_GPR+_Rt_, 1); +#endif + MOV32MtoR( EAX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ] ); + MOV32RtoM( (uptr)&fpuRegs.fprc[ _Fs_ ], EAX ); + } + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// CVT.x XMM +//------------------------------------------------------------------ +void recCVT_S_xmm(int info) +{ + if( !(info&PROCESS_EE_S) || (EEREC_D != EEREC_S && !(info&PROCESS_EE_MODEWRITES)) ) { + SSE_CVTSI2SS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); + } + else { + if( cpucaps.hasStreamingSIMD2Extensions ) { + SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_D, EEREC_S); + } + else { + if( info&PROCESS_EE_MODEWRITES ) { + if( xmmregs[EEREC_S].reg == _Fs_ ) + _deleteFPtoXMMreg(_Fs_, 1); + else { + // force sync + SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[_Fs_], EEREC_S); + } + } + SSE_CVTSI2SS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); + xmmregs[EEREC_D].mode |= MODE_WRITE; // in the case that _Fs_ == _Fd_ + } + } } -FPURECOMPILE_CONSTCODE(MUL_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED|XMMINFO_READS); -//////////////////////////////////////////////////// +static u32 s_signbit = 0x80000000; -// Sets flags -void recDIVhelper1(int regd, int regt) +void recCVT_W() +{ + if( cpucaps.hasStreamingSIMDExtensions ) { + int t0reg; + int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); + + if( regs >= 0 ) { + t0reg = _allocTempXMMreg(XMMT_FPS, -1); + _freeXMMreg(t0reg); + SSE_MOVSS_M32_to_XMM(t0reg, (u32)&s_signbit); + SSE_CVTTSS2SI_XMM_to_R32(EAX, regs); + SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[ _Fs_ ], regs); + } + else SSE_CVTTSS2SI_M32_to_R32(EAX, (uptr)&fpuRegs.fpr[ _Fs_ ]); + _deleteFPtoXMMreg(_Fd_, 2); + + MOV32MtoR(ECX, (uptr)&fpuRegs.fpr[ _Fs_ ]); + AND32ItoR(ECX, 0x7f800000); + CMP32ItoR(ECX, 0x4E800000); + j8Ptr[0] = JLE8(0); + + // need to detect if reg is positive + /*if( regs >= 0 ) { + SSE_UCOMISS_XMM_to_XMM(regs, t0reg); + j8Ptr[2] = JB8(0); + } + else {*/ + TEST32ItoM((uptr)&fpuRegs.fpr[ _Fs_ ], 0x80000000); + j8Ptr[2] = JNZ8(0); + //} + + MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x7fffffff); + j8Ptr[1] = JMP8(0); + + x86SetJ8( j8Ptr[2] ); + MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x80000000); + j8Ptr[1] = JMP8(0); + + x86SetJ8( j8Ptr[0] ); + + MOV32RtoM((uptr)&fpuRegs.fpr[_Fd_], EAX); + + x86SetJ8( j8Ptr[1] ); + } +#ifndef __x86_64__ + else { + MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); + iFlushCall(FLUSH_EVERYTHING); + _flushConstRegs(); + CALLFunc((uptr)CVT_W); + } +#endif +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// DIV XMM +//------------------------------------------------------------------ +void recDIVhelper1(int regd, int regt) // Sets flags { u8 *pjmp1, *pjmp2; u32 *ajmp32, *bjmp32; @@ -1108,8 +763,7 @@ void recDIVhelper1(int regd, int regt) _freeX86reg(tempReg); } -// Doesn't sets flags -void recDIVhelper2(int regd, int regt) +void recDIVhelper2(int regd, int regt) // Doesn't sets flags { if (CHECK_FPU_EXTRA_OVERFLOW) { fpuFloat(regd); fpuFloat(regt); } SSE_DIVSS_XMM_to_XMM(regd, regt); @@ -1169,205 +823,12 @@ void recDIV_S_xmm(int info) } FPURECOMPILE_CONSTCODE(DIV_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +//------------------------------------------------------------------ -void recSQRT_S_xmm(int info) -{ - u8* pjmp; - int tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); - if (tempReg == -1) {SysPrintf("FPU: SQRT Allocation Error!\n"); tempReg = EAX;} - //SysPrintf("FPU: SQRT\n"); - - if( info & PROCESS_EE_T ) { - if ( EEREC_D != EEREC_T ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_T); - } - else SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Ft_]); - - if (CHECK_FPU_EXTRA_FLAGS) { - AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags - - /*--- Check for negative SQRT ---*/ - SSE_MOVMSKPS_XMM_to_R32(tempReg, EEREC_D); - AND32ItoR(tempReg, 1); //Check sign - pjmp = JZ8(0); //Skip if none are - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags - SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive - x86SetJ8(pjmp); - } - else SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive - - if (CHECK_FPU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)&g_maxvals[0]);// Only need to do positive clamp, since EEREC_D is positive - SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); - if (CHECK_FPU_EXTRA_OVERFLOW) ClampValues(EEREC_D); // Shouldn't need to clamp again since SQRT of a number will always be smaller than the original number, doing it just incase :/ - - _freeX86reg(tempReg); -} - -FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED|XMMINFO_READT); - -void recABS_S_xmm(int info) -{ - if( info & PROCESS_EE_S ) { - if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - } - else SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); - - SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); - //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags - - if (CHECK_FPU_OVERFLOW) // Only need to do positive clamp, since EEREC_D is positive - SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)&g_maxvals[0]); -} - -FPURECOMPILE_CONSTCODE(ABS_S, XMMINFO_WRITED|XMMINFO_READS); - -void recMOV_S_xmm(int info) -{ - if( info & PROCESS_EE_S ) { - if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - } - else SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); -} - -FPURECOMPILE_CONSTCODE(MOV_S, XMMINFO_WRITED|XMMINFO_READS); - -void recNEG_S_xmm(int info) { - if( info & PROCESS_EE_S ) { - if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - } - else SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); - - //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags - SSE_XORPS_M128_to_XMM(EEREC_D, (uptr)&s_neg[0]); - ClampValues(EEREC_D); -} - -FPURECOMPILE_CONSTCODE(NEG_S, XMMINFO_WRITED|XMMINFO_READS); - -// Preforms the RSQRT function when regd <- Fs and t0reg <- Ft (Sets correct flags) -void recRSQRThelper1(int regd, int t0reg) -{ - u8 *pjmp1, *pjmp2; - u32 *pjmp32; - int t1reg = _allocTempXMMreg(XMMT_FPS, -1); - int tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); - //if (t1reg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n");} - if (tempReg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n"); tempReg = EAX;} - - AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags - - /*--- Check for zero ---*/ - SSE_XORPS_XMM_to_XMM(t1reg, t1reg); - SSE_CMPEQSS_XMM_to_XMM(t1reg, t0reg); - SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg); - AND32ItoR(tempReg, 1); //Check sign (if t0reg == zero, sign will be set) - pjmp1 = JZ8(0); //Skip if not set - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags - SSE_XORPS_XMM_to_XMM(regd, t0reg); // Make regd Positive or Negative - SSE_ANDPS_M128_to_XMM(regd, (uptr)&s_neg[0]); // Get the sign bit - SSE_ORPS_M128_to_XMM(regd, (uptr)&g_maxvals[0]); // regd = +/- Maximum - pjmp32 = JMP32(0); - x86SetJ8(pjmp1); - - /*--- Check for negative SQRT ---*/ - SSE_MOVMSKPS_XMM_to_R32(tempReg, t0reg); - AND32ItoR(tempReg, 1); //Check sign - pjmp2 = JZ8(0); //Skip if not set - OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags - SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); // Make t0reg Positive - x86SetJ8(pjmp2); - - if (CHECK_FPU_EXTRA_OVERFLOW) { - SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); // Only need to do positive clamp, since t0reg is positive - fpuFloat(regd); - } - - SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg); - SSE_DIVSS_XMM_to_XMM(regd, t0reg); - - ClampValues(regd); - x86SetJ32(pjmp32); - - _freeXMMreg(t1reg); - _freeX86reg(tempReg); -} - -// Preforms the RSQRT function when regd <- Fs and t0reg <- Ft (Doesn't set flags) -void recRSQRThelper2(int regd, int t0reg) -{ - SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); // Make t0reg Positive - if (CHECK_FPU_EXTRA_OVERFLOW) { - SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); // Only need to do positive clamp, since t0reg is positive - fpuFloat(regd); - } - SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg); - SSE_DIVSS_XMM_to_XMM(regd, t0reg); - ClampValues(regd); -} - -void recRSQRT_S_xmm(int info) -{ - int t0reg = _allocTempXMMreg(XMMT_FPS, -1); - //if (t0reg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n");} - //SysPrintf("FPU: RSQRT\n"); - - switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { - case PROCESS_EE_S: - //SysPrintf("FPU: RSQRT case 1\n"); - if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); - if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); - else recRSQRThelper2(EEREC_D, t0reg); - break; - case PROCESS_EE_T: - //SysPrintf("FPU: RSQRT case 2\n"); - SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T); - SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); - if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); - else recRSQRThelper2(EEREC_D, t0reg); - break; - case (PROCESS_EE_S|PROCESS_EE_T): - //SysPrintf("FPU: RSQRT case 3\n"); - SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T); - if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); - else recRSQRThelper2(EEREC_D, t0reg); - break; - default: - //SysPrintf("FPU: RSQRT case 4\n"); - SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); - SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); - if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); - else recRSQRThelper2(EEREC_D, t0reg); - break; - } - _freeXMMreg(t0reg); -} - -FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); - -void recADDA_S_xmm(int info) -{ - //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags - ClampValues(recCommutativeOp(info, EEREC_ACC, 0)); -} - -FPURECOMPILE_CONSTCODE(ADDA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); - -void recSUBA_S_xmm(int info) -{ - recSUBop(info, EEREC_ACC); -} - -FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); - -void recMULA_S_xmm(int info) -{ - //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags - ClampValues(recCommutativeOp(info, EEREC_ACC, 1)); -} - -FPURECOMPILE_CONSTCODE(MULA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); +//------------------------------------------------------------------ +// MADD XMM +//------------------------------------------------------------------ void recMADDtemp(int info, int regd) { int t1reg; @@ -1546,7 +1007,154 @@ void recMADDA_S_xmm(int info) } FPURECOMPILE_CONSTCODE(MADDA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT); +//------------------------------------------------------------------ + +//------------------------------------------------------------------ +// MAX / MIN XMM +//------------------------------------------------------------------ +void recMAX_S_xmm(int info) +{ + //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags + recCommutativeOp(info, EEREC_D, 2); +} + +FPURECOMPILE_CONSTCODE(MAX_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); + +void recMIN_S_xmm(int info) +{ + //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags + recCommutativeOp(info, EEREC_D, 3); +} + +FPURECOMPILE_CONSTCODE(MIN_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MFC1 XMM +//------------------------------------------------------------------ +void recMFC1(void) { + int regt, regs; + if ( ! _Rt_ ) return; + + _eeOnWriteReg(_Rt_, 1); + + regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); + if( regs >= 0 ) { + _deleteGPRtoXMMreg(_Rt_, 2); + +#ifdef __x86_64__ + regt = _allocCheckGPRtoX86(g_pCurInstInfo, _Rt_, MODE_WRITE); + + if( regt >= 0 ) { + + if(EEINST_ISLIVE1(_Rt_)) { + SSE2_MOVD_XMM_to_R(RAX, regs); + // sign extend + CDQE(); + MOV64RtoR(regt, RAX); + } + else { + SSE2_MOVD_XMM_to_R(regt, regs); + EEINST_RESETHASLIVE1(_Rt_); + } + } +#else + regt = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rt_, MODE_WRITE); + + if( regt >= 0 ) { + SSE2_MOVDQ2Q_XMM_to_MM(regt, regs); + + if(EEINST_ISLIVE1(_Rt_)) _signExtendGPRtoMMX(regt, _Rt_, 0); + else EEINST_RESETHASLIVE1(_Rt_); + } +#endif + else { + if(EEINST_ISLIVE1(_Rt_)) { + _signExtendXMMtoM((uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], regs, 0); + } + else { + EEINST_RESETHASLIVE1(_Rt_); + SSE_MOVSS_XMM_to_M32((uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], regs); + } + } + } +#ifndef __x86_64__ + else if( (regs = _checkMMXreg(MMX_FPU+_Fs_, MODE_READ)) >= 0 ) { + // convert to mmx reg + mmxregs[regs].reg = MMX_GPR+_Rt_; + mmxregs[regs].mode |= MODE_READ|MODE_WRITE; + _signExtendGPRtoMMX(regs, _Rt_, 0); + } +#endif + else { + regt = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ); + + if( regt >= 0 ) { + if( xmmregs[regt].mode & MODE_WRITE ) { + SSE_MOVHPS_XMM_to_M64((uptr)&cpuRegs.GPR.r[_Rt_].UL[2], regt); + } + xmmregs[regt].inuse = 0; + } +#ifdef __x86_64__ + else if( (regt = _allocCheckGPRtoX86(g_pCurInstInfo, _Rt_, MODE_WRITE)) >= 0 ) { + + if(EEINST_ISLIVE1(_Rt_)) { + MOV32MtoR( RAX, (uptr)&fpuRegs.fpr[ _Fs_ ].UL ); + CDQE(); + MOV64RtoR(regt, RAX); + } + else { + MOV32MtoR( regt, (uptr)&fpuRegs.fpr[ _Fs_ ].UL ); + EEINST_RESETHASLIVE1(_Rt_); + } + } + else +#endif + { + + _deleteEEreg(_Rt_, 0); + MOV32MtoR( EAX, (uptr)&fpuRegs.fpr[ _Fs_ ].UL ); + + if(EEINST_ISLIVE1(_Rt_)) { +#ifdef __x86_64__ + CDQE(); + MOV64RtoM((uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], RAX); +#else + CDQ( ); + MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); + MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], EDX ); +#endif + } + else { + EEINST_RESETHASLIVE1(_Rt_); + MOV32RtoM( (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], EAX ); + } + } + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MOV XMM +//------------------------------------------------------------------ +void recMOV_S_xmm(int info) +{ + if( info & PROCESS_EE_S ) { + if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + } + else SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); +} + +FPURECOMPILE_CONSTCODE(MOV_S, XMMINFO_WRITED|XMMINFO_READS); +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MSUB XMM +//------------------------------------------------------------------ void recMSUBtemp(int info, int regd) { int t1reg; @@ -1694,188 +1302,325 @@ void recMSUBA_S_xmm(int info) } FPURECOMPILE_CONSTCODE(MSUBA_S, XMMINFO_WRITEACC|XMMINFO_READACC|XMMINFO_READS|XMMINFO_READT); +//------------------------------------------------------------------ -void recCVT_S_xmm(int info) + +//------------------------------------------------------------------ +// MTC1 XMM +//------------------------------------------------------------------ +void recMTC1(void) { - if( !(info&PROCESS_EE_S) || (EEREC_D != EEREC_S && !(info&PROCESS_EE_MODEWRITES)) ) { - SSE_CVTSI2SS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); + if( GPR_IS_CONST1(_Rt_) ) { + _deleteFPtoXMMreg(_Fs_, 0); + MOV32ItoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, g_cpuConstRegs[_Rt_].UL[0]); } else { - if( cpucaps.hasStreamingSIMD2Extensions ) { - SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_D, EEREC_S); + int mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_READ); + if( mmreg >= 0 ) { + if( g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE ) { + // transfer the reg directly + _deleteGPRtoXMMreg(_Rt_, 2); + _deleteFPtoXMMreg(_Fs_, 2); + _allocFPtoXMMreg(mmreg, _Fs_, MODE_WRITE); + } + else { + int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE); + if( mmreg2 >= 0 ) SSE_MOVSS_XMM_to_XMM(mmreg2, mmreg); + else SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg); + } } - else { - if( info&PROCESS_EE_MODEWRITES ) { - if( xmmregs[EEREC_S].reg == _Fs_ ) - _deleteFPtoXMMreg(_Fs_, 1); +#ifndef __x86_64__ + else if( (mmreg = _checkMMXreg(MMX_GPR+_Rt_, MODE_READ)) >= 0 ) { + + if( cpucaps.hasStreamingSIMD2Extensions ) { + int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE); + if( mmreg2 >= 0 ) { + SetMMXstate(); + SSE2_MOVQ2DQ_MM_to_XMM(mmreg2, mmreg); + } else { - // force sync - SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[_Fs_], EEREC_S); + SetMMXstate(); + MOVDMMXtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg); } } - SSE_CVTSI2SS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); - xmmregs[EEREC_D].mode |= MODE_WRITE; // in the case that _Fs_ == _Fd_ + else { + _deleteFPtoXMMreg(_Fs_, 0); + SetMMXstate(); + MOVDMMXtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, mmreg); + } } - } -} - -FPURECOMPILE_CONSTCODE(CVT_S, XMMINFO_WRITED|XMMINFO_READS); - -//////////////////////////////////////////////////// -void recCVT_W() -{ - if( cpucaps.hasStreamingSIMDExtensions ) { - int t0reg; - int regs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); - - if( regs >= 0 ) { - t0reg = _allocTempXMMreg(XMMT_FPS, -1); - _freeXMMreg(t0reg); - SSE_MOVSS_M32_to_XMM(t0reg, (u32)&s_signbit); - SSE_CVTTSS2SI_XMM_to_R32(EAX, regs); - SSE_MOVSS_XMM_to_M32((uptr)&fpuRegs.fpr[ _Fs_ ], regs); - } - else SSE_CVTTSS2SI_M32_to_R32(EAX, (uptr)&fpuRegs.fpr[ _Fs_ ]); - _deleteFPtoXMMreg(_Fd_, 2); - - MOV32MtoR(ECX, (uptr)&fpuRegs.fpr[ _Fs_ ]); - AND32ItoR(ECX, 0x7f800000); - CMP32ItoR(ECX, 0x4E800000); - j8Ptr[0] = JLE8(0); - - // need to detect if reg is positive - /*if( regs >= 0 ) { - SSE_UCOMISS_XMM_to_XMM(regs, t0reg); - j8Ptr[2] = JB8(0); - } - else {*/ - TEST32ItoM((uptr)&fpuRegs.fpr[ _Fs_ ], 0x80000000); - j8Ptr[2] = JNZ8(0); - //} - - MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x7fffffff); - j8Ptr[1] = JMP8(0); - - x86SetJ8( j8Ptr[2] ); - MOV32ItoM((uptr)&fpuRegs.fpr[_Fd_], 0x80000000); - j8Ptr[1] = JMP8(0); - - x86SetJ8( j8Ptr[0] ); - - MOV32RtoM((uptr)&fpuRegs.fpr[_Fd_], EAX); - - x86SetJ8( j8Ptr[1] ); - } -#ifndef __x86_64__ - else { - MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); - iFlushCall(FLUSH_EVERYTHING); - _flushConstRegs(); - CALLFunc((uptr)CVT_W); - } #endif -} + else { + int mmreg2 = _allocCheckFPUtoXMM(g_pCurInstInfo, _Fs_, MODE_WRITE); -void recMAX_S_xmm(int info) -{ + if( mmreg2 >= 0 ) SSE_MOVSS_M32_to_XMM(mmreg2, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]); + else { + MOV32MtoR(EAX, (uptr)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ]); + MOV32RtoM((uptr)&fpuRegs.fpr[ _Fs_ ].UL, EAX); + } + } + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MUL XMM +//------------------------------------------------------------------ +void recMUL_S_xmm(int info) +{ //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags - recCommutativeOp(info, EEREC_D, 2); + ClampValues(recCommutativeOp(info, EEREC_D, 1)); } -FPURECOMPILE_CONSTCODE(MAX_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MUL_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); -void recMIN_S_xmm(int info) -{ +void recMULA_S_xmm(int info) +{ //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags - recCommutativeOp(info, EEREC_D, 3); + ClampValues(recCommutativeOp(info, EEREC_ACC, 1)); } -FPURECOMPILE_CONSTCODE(MIN_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); +FPURECOMPILE_CONSTCODE(MULA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); +//------------------------------------------------------------------ -//////////////////////////////////////////////////// -void recBC1F( void ) { - u32 branchTo = (s32)_Imm_ * 4 + pc; + +//------------------------------------------------------------------ +// NEG XMM +//------------------------------------------------------------------ +void recNEG_S_xmm(int info) { + if( info & PROCESS_EE_S ) { + if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + } + else SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); + + //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags + SSE_XORPS_M128_to_XMM(EEREC_D, (uptr)&s_neg[0]); + ClampValues(EEREC_D); +} + +FPURECOMPILE_CONSTCODE(NEG_S, XMMINFO_WRITED|XMMINFO_READS); +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// SUB XMM +//------------------------------------------------------------------ +void recSUBhelper(int regd, int regt) +{ + if (CHECK_FPU_EXTRA_OVERFLOW /*&& !CHECK_FPUCLAMPHACK*/) { fpuFloat(regd); fpuFloat(regt); } + SSE_SUBSS_XMM_to_XMM(regd, regt); +} + +void recSUBop(int info, int regd) +{ + int t0reg = _allocTempXMMreg(XMMT_FPS, -1); + //if (t0reg == -1) {SysPrintf("FPU: SUB Allocation Error!\n");} + + //AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagO|FPUflagU)); // Clear O and U flags + + switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { + case PROCESS_EE_S: + //SysPrintf("FPU: SUB case 1\n"); + if (regd != EEREC_S) SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); + SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); + recSUBhelper(regd, t0reg); + break; + case PROCESS_EE_T: + //SysPrintf("FPU: SUB case 2\n"); + if (regd == EEREC_T) { + SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T); + SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]); + recSUBhelper(regd, t0reg); + } + else { + SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]); + recSUBhelper(regd, EEREC_T); + } + break; + case (PROCESS_EE_S|PROCESS_EE_T): + //SysPrintf("FPU: SUB case 3\n"); + if (regd == EEREC_T) { + SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T); + SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); + recSUBhelper(regd, t0reg); + } + else { + if (regd != EEREC_S) SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); + recSUBhelper(regd, EEREC_T); + } + break; + default: + SysPrintf("FPU: SUB case 4\n"); + SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); + SSE_MOVSS_M32_to_XMM(regd, (uptr)&fpuRegs.fpr[_Fs_]); + recSUBhelper(regd, t0reg); + break; + } + + ClampValues2(regd); + _freeXMMreg(t0reg); +} + +void recSUB_S_xmm(int info) +{ + recSUBop(info, EEREC_D); +} + +FPURECOMPILE_CONSTCODE(SUB_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); + + +void recSUBA_S_xmm(int info) +{ + recSUBop(info, EEREC_ACC); +} + +FPURECOMPILE_CONSTCODE(SUBA_S, XMMINFO_WRITEACC|XMMINFO_READS|XMMINFO_READT); +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// SQRT XMM +//------------------------------------------------------------------ +void recSQRT_S_xmm(int info) +{ + u8* pjmp; + int tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); + if (tempReg == -1) {SysPrintf("FPU: SQRT Allocation Error!\n"); tempReg = EAX;} + //SysPrintf("FPU: SQRT\n"); + + if( info & PROCESS_EE_T ) { + if ( EEREC_D != EEREC_T ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_T); + } + else SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Ft_]); + + if (CHECK_FPU_EXTRA_FLAGS) { + AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags + + /*--- Check for negative SQRT ---*/ + SSE_MOVMSKPS_XMM_to_R32(tempReg, EEREC_D); + AND32ItoR(tempReg, 1); //Check sign + pjmp = JZ8(0); //Skip if none are + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags + SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive + x86SetJ8(pjmp); + } + else SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&s_pos[0]); // Make EEREC_D Positive - _eeFlushAllUnused(); - MOV32MtoR(EAX, (uptr)&fpuRegs.fprc[31]); - TEST32ItoR(EAX, FPUflagC); - j32Ptr[0] = JNZ32(0); + if (CHECK_FPU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)&g_maxvals[0]);// Only need to do positive clamp, since EEREC_D is positive + SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); + if (CHECK_FPU_EXTRA_OVERFLOW) ClampValues(EEREC_D); // Shouldn't need to clamp again since SQRT of a number will always be smaller than the original number, doing it just incase :/ - SaveBranchState(); - recompileNextInstruction(1); - SetBranchImm(branchTo); - - x86SetJ32(j32Ptr[0]); - - // recopy the next inst - pc -= 4; - LoadBranchState(); - recompileNextInstruction(1); - - SetBranchImm(pc); + _freeX86reg(tempReg); } -void recBC1T( void ) { - u32 branchTo = (s32)_Imm_ * 4 + pc; +FPURECOMPILE_CONSTCODE(SQRT_S, XMMINFO_WRITED|XMMINFO_READT); +//------------------------------------------------------------------ - _eeFlushAllUnused(); - MOV32MtoR(EAX, (uptr)&fpuRegs.fprc[31]); - TEST32ItoR(EAX, FPUflagC); - j32Ptr[0] = JZ32(0); - SaveBranchState(); - recompileNextInstruction(1); - SetBranchImm(branchTo); - //j32Ptr[1] = JMP32(0); +//------------------------------------------------------------------ +// RSQRT XMM +//------------------------------------------------------------------ +void recRSQRThelper1(int regd, int t0reg) // Preforms the RSQRT function when regd <- Fs and t0reg <- Ft (Sets correct flags) +{ + u8 *pjmp1, *pjmp2; + u32 *pjmp32; + int t1reg = _allocTempXMMreg(XMMT_FPS, -1); + int tempReg = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); + //if (t1reg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n");} + if (tempReg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n"); tempReg = EAX;} - x86SetJ32(j32Ptr[0]); + AND32ItoM((uptr)&fpuRegs.fprc[31], ~(FPUflagI|FPUflagD)); // Clear I and D flags - // recopy the next inst - pc -= 4; - LoadBranchState(); - recompileNextInstruction(1); + /*--- Check for zero ---*/ + SSE_XORPS_XMM_to_XMM(t1reg, t1reg); + SSE_CMPEQSS_XMM_to_XMM(t1reg, t0reg); + SSE_MOVMSKPS_XMM_to_R32(tempReg, t1reg); + AND32ItoR(tempReg, 1); //Check sign (if t0reg == zero, sign will be set) + pjmp1 = JZ8(0); //Skip if not set + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagD|FPUflagSD); // Set D and SD flags + SSE_XORPS_XMM_to_XMM(regd, t0reg); // Make regd Positive or Negative + SSE_ANDPS_M128_to_XMM(regd, (uptr)&s_neg[0]); // Get the sign bit + SSE_ORPS_M128_to_XMM(regd, (uptr)&g_maxvals[0]); // regd = +/- Maximum + pjmp32 = JMP32(0); + x86SetJ8(pjmp1); - SetBranchImm(pc); - //x86SetJ32(j32Ptr[1]); + /*--- Check for negative SQRT ---*/ + SSE_MOVMSKPS_XMM_to_R32(tempReg, t0reg); + AND32ItoR(tempReg, 1); //Check sign + pjmp2 = JZ8(0); //Skip if not set + OR32ItoM((uptr)&fpuRegs.fprc[31], FPUflagI|FPUflagSI); // Set I and SI flags + SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); // Make t0reg Positive + x86SetJ8(pjmp2); + + if (CHECK_FPU_EXTRA_OVERFLOW) { + SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); // Only need to do positive clamp, since t0reg is positive + fpuFloat(regd); + } + + SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg); + SSE_DIVSS_XMM_to_XMM(regd, t0reg); + + ClampValues(regd); + x86SetJ32(pjmp32); + + _freeXMMreg(t1reg); + _freeX86reg(tempReg); } -//////////////////////////////////////////////////// -void recBC1FL( void ) { - u32 branchTo = _Imm_ * 4 + pc; - - _eeFlushAllUnused(); - MOV32MtoR(EAX, (uptr)&fpuRegs.fprc[31]); - TEST32ItoR(EAX, FPUflagC); - j32Ptr[0] = JNZ32(0); - - SaveBranchState(); - recompileNextInstruction(1); - SetBranchImm(branchTo); - - x86SetJ32(j32Ptr[0]); - - LoadBranchState(); - SetBranchImm(pc); +void recRSQRThelper2(int regd, int t0reg) // Preforms the RSQRT function when regd <- Fs and t0reg <- Ft (Doesn't set flags) +{ + SSE_ANDPS_M128_to_XMM(t0reg, (uptr)&s_pos[0]); // Make t0reg Positive + if (CHECK_FPU_EXTRA_OVERFLOW) { + SSE_MINSS_M32_to_XMM(t0reg, (uptr)&g_maxvals[0]); // Only need to do positive clamp, since t0reg is positive + fpuFloat(regd); + } + SSE_SQRTSS_XMM_to_XMM(t0reg, t0reg); + SSE_DIVSS_XMM_to_XMM(regd, t0reg); + ClampValues(regd); } -//////////////////////////////////////////////////// -void recBC1TL( void ) { - u32 branchTo = _Imm_ * 4 + pc; +void recRSQRT_S_xmm(int info) +{ + int t0reg = _allocTempXMMreg(XMMT_FPS, -1); + //if (t0reg == -1) {SysPrintf("FPU: RSQRT Allocation Error!\n");} + //SysPrintf("FPU: RSQRT\n"); - _eeFlushAllUnused(); - MOV32MtoR(EAX, (uptr)&fpuRegs.fprc[31]); - TEST32ItoR(EAX, FPUflagC); - j32Ptr[0] = JZ32(0); - - SaveBranchState(); - recompileNextInstruction(1); - SetBranchImm(branchTo); - x86SetJ32(j32Ptr[0]); - - LoadBranchState(); - SetBranchImm(pc); + switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { + case PROCESS_EE_S: + //SysPrintf("FPU: RSQRT case 1\n"); + if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); + if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); + else recRSQRThelper2(EEREC_D, t0reg); + break; + case PROCESS_EE_T: + //SysPrintf("FPU: RSQRT case 2\n"); + SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T); + SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); + if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); + else recRSQRThelper2(EEREC_D, t0reg); + break; + case (PROCESS_EE_S|PROCESS_EE_T): + //SysPrintf("FPU: RSQRT case 3\n"); + SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_T); + if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); + else recRSQRThelper2(EEREC_D, t0reg); + break; + default: + //SysPrintf("FPU: RSQRT case 4\n"); + SSE_MOVSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); + SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); + if (CHECK_FPU_EXTRA_FLAGS) recRSQRThelper1(EEREC_D, t0reg); + else recRSQRThelper2(EEREC_D, t0reg); + break; + } + _freeXMMreg(t0reg); } -#endif +FPURECOMPILE_CONSTCODE(RSQRT_S, XMMINFO_WRITED|XMMINFO_READS|XMMINFO_READT); + +#endif // FPU_RECOMPILE #endif // PCSX2_NORECBUILD diff --git a/pcsx2/x86/iR5900.h b/pcsx2/x86/iR5900.h index 40f919f90f..f6ddf4a7ba 100644 --- a/pcsx2/x86/iR5900.h +++ b/pcsx2/x86/iR5900.h @@ -41,7 +41,7 @@ #define CP2_RECOMPILE #define EE_CONST_PROP // rec2 - enables constant propagation (faster) -#define EE_FPU_REGCACHING 1 +//#define EE_FPU_REGCACHING 1 // Not used anymore, its always on! #define PC_GETBLOCK(x) PC_GETBLOCK_(x, recLUT) @@ -243,12 +243,12 @@ void rec##fn(void) \ #define FPURECOMPILE_CONSTCODE(fn, xmminfo) \ void rec##fn(void) \ { \ - eeFPURecompileCode(rec##fn##_xmm, rec##fn##_, xmminfo); \ + eeFPURecompileCode(rec##fn##_xmm, fn, xmminfo); \ } #endif // rd = rs op rt (all regs need to be in xmm) int eeRecompileCodeXMM(int xmminfo); -void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR_INFO fpucode, int xmminfo); +void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo); #endif // __IR5900_H__ diff --git a/pcsx2/x86/ix86-32/iR5900-32.c b/pcsx2/x86/ix86-32/iR5900-32.c index c434e8dd9d..57acbc57a8 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.c +++ b/pcsx2/x86/ix86-32/iR5900-32.c @@ -1290,11 +1290,11 @@ int eeRecompileCodeXMM(int xmminfo) #define PROCESS_EE_SETMODET_XMM(mmreg) ((xmmregs[mmreg].mode&MODE_WRITE)?PROCESS_EE_MODEWRITET:0) // rd = rs op rt -void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR_INFO fpucode, int xmminfo) +void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR fpucode, int xmminfo) { int mmregs=-1, mmregt=-1, mmregd=-1, mmregacc=-1; - if( EE_FPU_REGCACHING && cpucaps.hasStreamingSIMDExtensions ) { + if( cpucaps.hasStreamingSIMDExtensions ) { int info = PROCESS_EE_XMM; if( xmminfo & XMMINFO_READS ) _addNeededFPtoXMMreg(_Fs_); @@ -1436,11 +1436,10 @@ void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR_INFO fpucode, int xm return; } - if( xmminfo & XMMINFO_READS ) _deleteFPtoXMMreg(_Fs_, 0); - if( xmminfo & XMMINFO_READT ) _deleteFPtoXMMreg(_Ft_, 0); - if( xmminfo & (XMMINFO_READD|XMMINFO_WRITED) ) _deleteFPtoXMMreg(_Fd_, 0); - if( xmminfo & (XMMINFO_READACC|XMMINFO_WRITEACC) ) _deleteFPtoXMMreg(XMMFPU_ACC, 0); - fpucode(0); + MOV32ItoM((uptr)&cpuRegs.code, cpuRegs.code); + MOV32ItoM((uptr)&cpuRegs.pc, pc); + iFlushCall(FLUSH_EVERYTHING); + CALLFunc((uptr)fpucode); } #undef _Ft_