diff --git a/pcsx2/Docs/ChangeLog.txt b/pcsx2/Docs/ChangeLog.txt index 4c9c3ea7f2..182d60802b 100644 --- a/pcsx2/Docs/ChangeLog.txt +++ b/pcsx2/Docs/ChangeLog.txt @@ -221,7 +221,7 @@ v0.9.1: * 05/08/05:[+] Added faster Cd/DVD reads to the Advanced options (Refraction) [-] The previous DVD/CD change by ref was just a hack, so i commited the old version again - [-] I've commented out RECOMPILE_VUMI_LOADSTORE on iVUmicro.h, instead + [-] I've commented out RECOMPILE_VUMI_LOADSTORE on sVU_Micro.h, instead of commenting it out in iVU1micro.c as ref did on last update, that way is better until i fix the real problem [*] Improved vuDIV for .x to .x cases diff --git a/pcsx2/Misc.cpp b/pcsx2/Misc.cpp index 77992cdc70..d3799e17e2 100644 --- a/pcsx2/Misc.cpp +++ b/pcsx2/Misc.cpp @@ -33,7 +33,7 @@ #include "VUmicro.h" #include "VU.h" #include "iCore.h" -#include "iVUzerorec.h" +#include "sVU_zerorec.h" #include "BaseblockEx.h" // included for devbuild block dumping (which may or may not work anymore?) #include "GS.h" diff --git a/pcsx2/SaveState.cpp b/pcsx2/SaveState.cpp index 569e4e045a..ce42bf2577 100644 --- a/pcsx2/SaveState.cpp +++ b/pcsx2/SaveState.cpp @@ -25,7 +25,7 @@ #include "VUmicro.h" #include "VU.h" #include "iCore.h" -#include "iVUzerorec.h" +#include "sVU_zerorec.h" #include "GS.h" #include "COP0.h" diff --git a/pcsx2/System.cpp b/pcsx2/System.cpp index aa1170d3b2..eff13a3209 100644 --- a/pcsx2/System.cpp +++ b/pcsx2/System.cpp @@ -25,7 +25,7 @@ #include "iR5900.h" #include "R3000A.h" #include "IopMem.h" -#include "iVUzerorec.h" // for SuperVUReset +#include "sVU_zerorec.h" // for SuperVUReset #include "R5900Exceptions.h" diff --git a/pcsx2/VU0micro.cpp b/pcsx2/VU0micro.cpp index 772d8ecda3..1502e2cdb2 100644 --- a/pcsx2/VU0micro.cpp +++ b/pcsx2/VU0micro.cpp @@ -32,7 +32,7 @@ #include "VUflags.h" #include "VUops.h" -#include "iVUzerorec.h" +#include "sVU_zerorec.h" using namespace R5900; diff --git a/pcsx2/VU1micro.cpp b/pcsx2/VU1micro.cpp index dfc3151127..8ebb1b2272 100644 --- a/pcsx2/VU1micro.cpp +++ b/pcsx2/VU1micro.cpp @@ -27,8 +27,8 @@ #include "VU.h" #include "VUops.h" #include "VUmicro.h" -#include "iVUmicro.h" -#include "iVUzerorec.h" +#include "sVU_Micro.h" +#include "sVU_zerorec.h" VURegs* g_pVU1; diff --git a/pcsx2/VUflags.h b/pcsx2/VUflags.h index 4b04a2c012..e0eaad2acf 100644 --- a/pcsx2/VUflags.h +++ b/pcsx2/VUflags.h @@ -16,9 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#ifndef __VUFLAGS_H__ -#define __VUFLAGS_H__ - +#pragma once #include "VU.h" void vuUpdateDI(VURegs * VU); @@ -33,7 +31,3 @@ __forceinline void VU_MACz_CLEAR(VURegs * VU); __forceinline void VU_MACw_CLEAR(VURegs * VU); void VU_STAT_UPDATE(VURegs * VU); - -#endif - - diff --git a/pcsx2/VUmicro.h b/pcsx2/VUmicro.h index 0b60109f9d..922d74328a 100644 --- a/pcsx2/VUmicro.h +++ b/pcsx2/VUmicro.h @@ -16,9 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#ifndef __VUMICRO_H__ -#define __VUMICRO_H__ - +#pragma once #include "VU.h" struct VUmicroCpu @@ -1315,6 +1313,3 @@ void (*PREFIX##_LOWER_OPCODE[128])(_VURegsNum *VUregsn) = { \ #endif #include "VUops.h" - -#endif - diff --git a/pcsx2/VUmicroMem.cpp b/pcsx2/VUmicroMem.cpp index 59ed7cd653..907d15a8ae 100644 --- a/pcsx2/VUmicroMem.cpp +++ b/pcsx2/VUmicroMem.cpp @@ -18,13 +18,10 @@ #include "PrecompiledHeader.h" -#include - #include "Common.h" #include "R5900.h" #include "VUmicro.h" - -#include "iVUzerorec.h" +#include "sVU_zerorec.h" // The following CpuVU objects are value types instead of handles or pointers because they are // modified on the fly to implement VU1 Skip. diff --git a/pcsx2/VUops.h b/pcsx2/VUops.h index 2512002169..c19e021279 100644 --- a/pcsx2/VUops.h +++ b/pcsx2/VUops.h @@ -16,9 +16,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA */ -#ifndef __VU1OPS_H__ -#define __VU1OPS_H__ - +#pragma once #include "VU.h" extern __forceinline u32 VU_MAC_UPDATE( int shift, VURegs * VU, float f); @@ -395,5 +393,3 @@ void _vuRegsEEXP(VURegs * VU, _VURegsNum *VUregsn); void _vuRegsXITOP(VURegs * VU, _VURegsNum *VUregsn); void _vuRegsXGKICK(VURegs * VU, _VURegsNum *VUregsn); void _vuRegsXTOP(VURegs * VU, _VURegsNum *VUregsn); - -#endif diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index f3388e5078..5d2ca8cf13 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -2025,26 +2025,10 @@ RelativePath="..\..\VU0micro.cpp" > - - - - - - - - @@ -2053,14 +2037,6 @@ RelativePath="..\..\VUmicroMem.cpp" > - - - - @@ -2072,34 +2048,6 @@ RelativePath="..\..\x86\iVU1micro.cpp" > - - - - - - - - - - - - - - @@ -2160,6 +2108,66 @@ > + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - #include "Common.h" -#include "R5900OpcodeTables.h" #include "iR5900.h" #include "VUmicro.h" -#include "iVUzerorec.h" +#include "sVU_zerorec.h" #define useMVU1 CHECK_MICROVU1 diff --git a/pcsx2/x86/ix86-32/iR5900-32.cpp b/pcsx2/x86/ix86-32/iR5900-32.cpp index 4a46b80a4c..48212413e8 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.cpp +++ b/pcsx2/x86/ix86-32/iR5900-32.cpp @@ -36,11 +36,11 @@ #include "iMMI.h" #include "iFPU.h" #include "iCOP0.h" -#include "iVUmicro.h" +#include "sVU_Micro.h" #include "VU.h" #include "VUmicro.h" -#include "iVUzerorec.h" +#include "sVU_zerorec.h" #include "vtlb.h" #include "SamplProf.h" diff --git a/pcsx2/x86/ix86-32/iR5900Templates.cpp b/pcsx2/x86/ix86-32/iR5900Templates.cpp index 2149848dfc..b2633bcd6d 100644 --- a/pcsx2/x86/ix86-32/iR5900Templates.cpp +++ b/pcsx2/x86/ix86-32/iR5900Templates.cpp @@ -26,11 +26,11 @@ #include "iMMI.h" #include "iFPU.h" #include "iCOP0.h" -#include "iVUmicro.h" +#include "sVU_Micro.h" #include "VU.h" #include "VUmicro.h" -#include "iVUzerorec.h" +#include "sVU_zerorec.h" #include "vtlb.h" diff --git a/pcsx2/x86/iVUops.h b/pcsx2/x86/sVU_Debug.h similarity index 95% rename from pcsx2/x86/iVUops.h rename to pcsx2/x86/sVU_Debug.h index e369935c19..9c6c023e9a 100644 --- a/pcsx2/x86/iVUops.h +++ b/pcsx2/x86/sVU_Debug.h @@ -1,57 +1,59 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -#define REC_VUOP(VU, f) { \ - _freeXMMregs(/*&VU*/); \ - _freeMMXregs(); \ - SetFPUstate();) \ - MOV32ItoM((uptr)&VU.code, (u32)VU.code); \ - CALLFunc((uptr)VU##MI_##f); \ -} - -#define REC_VUOPs(VU, f) { \ - _freeXMMregs(); \ - _freeMMXregs(); \ - SetFPUstate();) \ - if (VU==&VU1) { \ - MOV32ItoM((uptr)&VU1.code, (u32)VU1.code); \ - CALLFunc((uptr)VU1MI_##f); \ - } \ - else { \ - MOV32ItoM((uptr)&VU0.code, (u32)VU0.code); \ - CALLFunc((uptr)VU0MI_##f); \ - } \ -} - -#define REC_VUOPFLAGS(VU, f) { \ - _freeXMMregs(/*&VU*/); \ - _freeMMXregs(); \ - SetFPUstate(); \ - MOV32ItoM((uptr)&VU.code, (u32)VU.code); \ - CALLFunc((uptr)VU##MI_##f); \ -} - -#define REC_VUBRANCH(VU, f) { \ - _freeXMMregs(/*&VU*/); \ - _freeMMXregs(); \ - SetFPUstate(); \ - MOV32ItoM((uptr)&VU.code, (u32)VU.code); \ - MOV32ItoM((uptr)&VU.VI[REG_TPC].UL, (u32)pc); \ - CALLFunc((uptr)VU##MI_##f); \ - branch = 1; \ -} +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#pragma once + +#define REC_VUOP(VU, f) { \ + _freeXMMregs(/*&VU*/); \ + _freeMMXregs(); \ + SetFPUstate();) \ + MOV32ItoM((uptr)&VU.code, (u32)VU.code); \ + CALLFunc((uptr)VU##MI_##f); \ +} + +#define REC_VUOPs(VU, f) { \ + _freeXMMregs(); \ + _freeMMXregs(); \ + SetFPUstate();) \ + if (VU==&VU1) { \ + MOV32ItoM((uptr)&VU1.code, (u32)VU1.code); \ + CALLFunc((uptr)VU1MI_##f); \ + } \ + else { \ + MOV32ItoM((uptr)&VU0.code, (u32)VU0.code); \ + CALLFunc((uptr)VU0MI_##f); \ + } \ +} + +#define REC_VUOPFLAGS(VU, f) { \ + _freeXMMregs(/*&VU*/); \ + _freeMMXregs(); \ + SetFPUstate(); \ + MOV32ItoM((uptr)&VU.code, (u32)VU.code); \ + CALLFunc((uptr)VU##MI_##f); \ +} + +#define REC_VUBRANCH(VU, f) { \ + _freeXMMregs(/*&VU*/); \ + _freeMMXregs(); \ + SetFPUstate(); \ + MOV32ItoM((uptr)&VU.code, (u32)VU.code); \ + MOV32ItoM((uptr)&VU.VI[REG_TPC].UL, (u32)pc); \ + CALLFunc((uptr)VU##MI_##f); \ + branch = 1; \ +} diff --git a/pcsx2/x86/iVUmicroLower.cpp b/pcsx2/x86/sVU_Lower.cpp similarity index 96% rename from pcsx2/x86/iVUmicroLower.cpp rename to pcsx2/x86/sVU_Lower.cpp index fcae6ca866..26d6e76ee2 100644 --- a/pcsx2/x86/iVUmicroLower.cpp +++ b/pcsx2/x86/sVU_Lower.cpp @@ -1,2009 +1,2009 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "PrecompiledHeader.h" - -#include "Common.h" -#include "GS.h" -#include "R5900OpcodeTables.h" -#include "iR5900.h" -#include "iMMI.h" -#include "iFPU.h" -#include "iCOP0.h" -#include "VUmicro.h" -#include "VUflags.h" -#include "iVUmicro.h" -#include "iVUops.h" -#include "iVUzerorec.h" -//------------------------------------------------------------------ - -//------------------------------------------------------------------ -// Helper Macros -//------------------------------------------------------------------ -#define _Ft_ (( VU->code >> 16) & 0x1F) // The rt part of the instruction register -#define _Fs_ (( VU->code >> 11) & 0x1F) // The rd part of the instruction register -#define _Fd_ (( VU->code >> 6) & 0x1F) // The sa part of the instruction register -#define _It_ (_Ft_ & 15) -#define _Is_ (_Fs_ & 15) -#define _Id_ (_Fd_ & 15) - -#define _X (( VU->code>>24) & 0x1) -#define _Y (( VU->code>>23) & 0x1) -#define _Z (( VU->code>>22) & 0x1) -#define _W (( VU->code>>21) & 0x1) - -#define _XYZW_SS (_X+_Y+_Z+_W==1) - -#define _Fsf_ (( VU->code >> 21) & 0x03) -#define _Ftf_ (( VU->code >> 23) & 0x03) - -#define _Imm11_ (s32)(VU->code & 0x400 ? 0xfffffc00 | (VU->code & 0x3ff) : VU->code & 0x3ff) -#define _UImm11_ (s32)(VU->code & 0x7ff) - -#define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0] -#define VU_VFy_ADDR(x) (uptr)&VU->VF[x].UL[1] -#define VU_VFz_ADDR(x) (uptr)&VU->VF[x].UL[2] -#define VU_VFw_ADDR(x) (uptr)&VU->VF[x].UL[3] - -#define VU_REGR_ADDR (uptr)&VU->VI[REG_R] -#define VU_REGQ_ADDR (uptr)&VU->VI[REG_Q] -#define VU_REGMAC_ADDR (uptr)&VU->VI[REG_MAC_FLAG] - -#define VU_VI_ADDR(x, read) GetVIAddr(VU, x, read, info) - -#define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0] -#define VU_ACCy_ADDR (uptr)&VU->ACC.UL[1] -#define VU_ACCz_ADDR (uptr)&VU->ACC.UL[2] -#define VU_ACCw_ADDR (uptr)&VU->ACC.UL[3] - -#define _X_Y_Z_W ((( VU->code >> 21 ) & 0xF ) ) - - -static const PCSX2_ALIGNED16(u32 VU_ONE[4]) = {0x3f800000, 0xffffffff, 0xffffffff, 0xffffffff}; -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// *VU Lower Instructions!* -// -// Note: * = Checked for errors by cottonvibes -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// DIV* -//------------------------------------------------------------------ -PCSX2_ALIGNED16(u64 DIV_TEMP_XMM[2]); -void recVUMI_DIV(VURegs *VU, int info) -{ - u8 *pjmp, *pjmp1; - u32 *ajmp32, *bjmp32; - - //Console::WriteLn("recVUMI_DIV()"); - AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags - - // FT can be zero here! so we need to check if its zero and set the correct flag. - SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP - SSE_CMPEQPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); // Set all F's if each vector is zero - - SSE_MOVMSKPS_XMM_to_R32( EAX, EEREC_TEMP); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, (1<<_Ftf_) ); // Grab "Is Zero" bits from the previous calculation - ajmp32 = JZ32(0); // Skip if none are - - SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP - SSE_CMPEQPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); // Set all F's if each vector is zero - SSE_MOVMSKPS_XMM_to_R32(EAX, EEREC_TEMP); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, (1<<_Fsf_) ); // Grab "Is Zero" bits from the previous calculation - pjmp = JZ8(0); - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0) - pjmp1 = JMP8(0); - x86SetJ8(pjmp); - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0) - x86SetJ8(pjmp1); - - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - - _vuFlipRegSS_xyzw(EEREC_T, _Ftf_); - SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - _vuFlipRegSS_xyzw(EEREC_T, _Ftf_); - - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); - SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = +/- fmax - - bjmp32 = JMP32(0); - - x86SetJ32(ajmp32); - - if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat5_useEAX(EEREC_S, EEREC_TEMP, (1 << (3-_Fsf_))); - vuFloat5_useEAX(EEREC_T, EEREC_TEMP, (1 << (3-_Ftf_))); - } - - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - - _vuFlipRegSS_xyzw(EEREC_T, _Ftf_); - SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - _vuFlipRegSS_xyzw(EEREC_T, _Ftf_); - - vuFloat_useEAX(info, EEREC_TEMP, 0x8); - - x86SetJ32(bjmp32); - - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// SQRT* -//------------------------------------------------------------------ -void recVUMI_SQRT( VURegs *VU, int info ) -{ - u8* pjmp; - //Console::WriteLn("recVUMI_SQRT()"); - - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_); - AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags - - /* Check for negative sqrt */ - SSE_MOVMSKPS_XMM_to_R32(EAX, EEREC_TEMP); - AND32ItoR(EAX, 1); //Check sign - pjmp = JZ8(0); //Skip if none are - OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); // Invalid Flag - Negative number sqrt - x86SetJ8(pjmp); - - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)const_clip); // Do a cardinal sqrt - if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Clamp infinities (only need to do positive clamp since EEREC_TEMP is positive) - SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// RSQRT* -//------------------------------------------------------------------ -PCSX2_ALIGNED16(u64 RSQRT_TEMP_XMM[2]); -void recVUMI_RSQRT(VURegs *VU, int info) -{ - u8 *ajmp8, *bjmp8; - u8 *qjmp1, *qjmp2; - int t1reg, t1boolean; - //Console::WriteLn("recVUMI_RSQRT()"); - - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_); - AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags - - /* Check for negative divide */ - SSE_MOVMSKPS_XMM_to_R32(EAX, EEREC_TEMP); - AND32ItoR(EAX, 1); //Check sign - ajmp8 = JZ8(0); //Skip if none are - OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); // Invalid Flag - Negative number sqrt - x86SetJ8(ajmp8); - - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)const_clip); // Do a cardinal sqrt - if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Clamp Infinities to Fmax - SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - - t1reg = _vuGetTempXMMreg(info); - if( t1reg < 0 ) { - for (t1reg = 0; ( (t1reg == EEREC_TEMP) || (t1reg == EEREC_S) ); t1reg++) - ; // Makes t1reg not be EEREC_TEMP or EEREC_S. - SSE_MOVAPS_XMM_to_M128( (uptr)&RSQRT_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address - t1boolean = 1; - } - else t1boolean = 0; - - // Ft can still be zero here! so we need to check if its zero and set the correct flag. - SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg - SSE_CMPEQSS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero - - SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, 0x01 ); // Grab "Is Zero" bits from the previous calculation - ajmp8 = JZ8(0); // Skip if none are - - //check for 0/0 - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - - SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear EEREC_TEMP - SSE_CMPEQPS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero - SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation - - AND32ItoR( EAX, 0x01 ); // Grab "Is Zero" bits from the previous calculation - qjmp1 = JZ8(0); - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0) - qjmp2 = JMP8(0); - x86SetJ8(qjmp1); - OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0) - x86SetJ8(qjmp2); - - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); - SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = +/- fmax - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); - bjmp8 = JMP8(0); - x86SetJ8(ajmp8); - - _unpackVFSS_xyzw(t1reg, EEREC_S, _Fsf_); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat_useEAX(info, t1reg, 0x8); // Clamp Infinities - SSE_DIVSS_XMM_to_XMM(t1reg, EEREC_TEMP); - vuFloat_useEAX(info, t1reg, 0x8); - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), t1reg); - - x86SetJ8(bjmp8); - - if (t1boolean) SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&RSQRT_TEMP_XMM[0] ); // restore t1reg data - else _freeXMMreg(t1reg); // free t1reg -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// _addISIMMtoIT() - Used in IADDI, IADDIU, and ISUBIU instructions -//------------------------------------------------------------------ -void _addISIMMtoIT(VURegs *VU, s16 imm, int info) -{ - int isreg = -1, itreg; - if (_It_ == 0) return; - - if( _Is_ == 0 ) { - itreg = ALLOCVI(_It_, MODE_WRITE); - MOV32ItoR(itreg, imm&0xffff); - return; - } - - ADD_VI_NEEDED(_It_); - isreg = ALLOCVI(_Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_WRITE); - - if ( _It_ == _Is_ ) { - if (imm != 0 ) ADD16ItoR(itreg, imm); - } - else { - if( imm ) { - LEA32RtoR(itreg, isreg, imm); - MOVZX32R16toR(itreg, itreg); - } - else MOV32RtoR(itreg, isreg); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// IADDI -//------------------------------------------------------------------ -void recVUMI_IADDI(VURegs *VU, int info) -{ - s16 imm; - - if ( _It_ == 0 ) return; - //Console::WriteLn("recVUMI_IADDI"); - imm = ( VU->code >> 6 ) & 0x1f; - imm = ( imm & 0x10 ? 0xfff0 : 0) | ( imm & 0xf ); - _addISIMMtoIT(VU, imm, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// IADDIU -//------------------------------------------------------------------ -void recVUMI_IADDIU(VURegs *VU, int info) -{ - s16 imm; - - if ( _It_ == 0 ) return; - //Console::WriteLn("recVUMI_IADDIU"); - imm = ( ( VU->code >> 10 ) & 0x7800 ) | ( VU->code & 0x7ff ); - _addISIMMtoIT(VU, imm, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// IADD -//------------------------------------------------------------------ -void recVUMI_IADD( VURegs *VU, int info ) -{ - int idreg, isreg = -1, itreg = -1; - if ( _Id_ == 0 ) return; - //Console::WriteLn("recVUMI_IADD"); - if ( ( _It_ == 0 ) && ( _Is_ == 0 ) ) { - idreg = ALLOCVI(_Id_, MODE_WRITE); - XOR32RtoR(idreg, idreg); - return; - } - - ADD_VI_NEEDED(_Is_); - ADD_VI_NEEDED(_It_); - idreg = ALLOCVI(_Id_, MODE_WRITE); - - if ( _Is_ == 0 ) - { - if( (itreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _It_, MODE_READ)) >= 0 ) { - if( idreg != itreg ) MOV32RtoR(idreg, itreg); - } - else MOVZX32M16toR(idreg, VU_VI_ADDR(_It_, 1)); - } - else if ( _It_ == 0 ) - { - if( (isreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Is_, MODE_READ)) >= 0 ) { - if( idreg != isreg ) MOV32RtoR(idreg, isreg); - } - else MOVZX32M16toR(idreg, VU_VI_ADDR(_Is_, 1)); - } - else { - //ADD_VI_NEEDED(_It_); - isreg = ALLOCVI(_Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_READ); - - if( idreg == isreg ) ADD32RtoR(idreg, itreg); - else if( idreg == itreg ) ADD32RtoR(idreg, isreg); - else LEA32RRtoR(idreg, isreg, itreg); - MOVZX32R16toR(idreg, idreg); // needed since don't know if idreg's upper bits are 0 - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// IAND -//------------------------------------------------------------------ -void recVUMI_IAND( VURegs *VU, int info ) -{ - int idreg, isreg = -1, itreg = -1; - if ( _Id_ == 0 ) return; - //Console::WriteLn("recVUMI_IAND"); - if ( ( _Is_ == 0 ) || ( _It_ == 0 ) ) { - idreg = ALLOCVI(_Id_, MODE_WRITE); - XOR32RtoR(idreg, idreg); - return; - } - - ADD_VI_NEEDED(_Is_); - ADD_VI_NEEDED(_It_); - idreg = ALLOCVI(_Id_, MODE_WRITE); - - isreg = ALLOCVI(_Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_READ); - - if( idreg == isreg ) AND16RtoR(idreg, itreg); - else if( idreg == itreg ) AND16RtoR(idreg, isreg); - else { - MOV32RtoR(idreg, itreg); - AND32RtoR(idreg, isreg); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// IOR -//------------------------------------------------------------------ -void recVUMI_IOR( VURegs *VU, int info ) -{ - int idreg, isreg = -1, itreg = -1; - if ( _Id_ == 0 ) return; - //Console::WriteLn("recVUMI_IOR"); - if ( ( _It_ == 0 ) && ( _Is_ == 0 ) ) { - idreg = ALLOCVI(_Id_, MODE_WRITE); - XOR32RtoR(idreg, idreg); - return; - } - - ADD_VI_NEEDED(_Is_); - ADD_VI_NEEDED(_It_); - idreg = ALLOCVI(_Id_, MODE_WRITE); - - if ( _Is_ == 0 ) - { - if( (itreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _It_, MODE_READ)) >= 0 ) { - if( idreg != itreg ) MOV32RtoR(idreg, itreg); - } - else MOVZX32M16toR(idreg, VU_VI_ADDR(_It_, 1)); - } - else if ( _It_ == 0 ) - { - if( (isreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Is_, MODE_READ)) >= 0 ) { - if( idreg != isreg ) MOV32RtoR(idreg, isreg); - } - else MOVZX32M16toR(idreg, VU_VI_ADDR(_Is_, 1)); - } - else - { - isreg = ALLOCVI(_Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_READ); - - if( idreg == isreg ) OR16RtoR(idreg, itreg); - else if( idreg == itreg ) OR16RtoR(idreg, isreg); - else { - MOV32RtoR(idreg, isreg); - OR32RtoR(idreg, itreg); - } - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ISUB -//------------------------------------------------------------------ -void recVUMI_ISUB( VURegs *VU, int info ) -{ - int idreg, isreg = -1, itreg = -1; - if ( _Id_ == 0 ) return; - //Console::WriteLn("recVUMI_ISUB"); - if ( ( _It_ == 0 ) && ( _Is_ == 0 ) ) { - idreg = ALLOCVI(_Id_, MODE_WRITE); - XOR32RtoR(idreg, idreg); - return; - } - - ADD_VI_NEEDED(_Is_); - ADD_VI_NEEDED(_It_); - idreg = ALLOCVI(_Id_, MODE_WRITE); - - if ( _Is_ == 0 ) - { - if( (itreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _It_, MODE_READ)) >= 0 ) { - if( idreg != itreg ) MOV32RtoR(idreg, itreg); - } - else MOVZX32M16toR(idreg, VU_VI_ADDR(_It_, 1)); - NEG16R(idreg); - } - else if ( _It_ == 0 ) - { - if( (isreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Is_, MODE_READ)) >= 0 ) { - if( idreg != isreg ) MOV32RtoR(idreg, isreg); - } - else MOVZX32M16toR(idreg, VU_VI_ADDR(_Is_, 1)); - } - else - { - isreg = ALLOCVI(_Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_READ); - - if( idreg == isreg ) SUB16RtoR(idreg, itreg); - else if( idreg == itreg ) { - SUB16RtoR(idreg, isreg); - NEG16R(idreg); - } - else { - MOV32RtoR(idreg, isreg); - SUB16RtoR(idreg, itreg); - } - } -} -//------------------------------------------------------------------ - -//------------------------------------------------------------------ -// ISUBIU -//------------------------------------------------------------------ -void recVUMI_ISUBIU( VURegs *VU, int info ) -{ - s16 imm; - - if ( _It_ == 0 ) return; - //Console::WriteLn("recVUMI_ISUBIU"); - imm = ( ( VU->code >> 10 ) & 0x7800 ) | ( VU->code & 0x7ff ); - imm = -imm; - _addISIMMtoIT(VU, imm, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MOVE* -//------------------------------------------------------------------ -void recVUMI_MOVE( VURegs *VU, int info ) -{ - if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; - //Console::WriteLn("recVUMI_MOVE"); - if (_X_Y_Z_W == 0x8) SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_S); - else if (_X_Y_Z_W == 0xf) SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S); - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MFIR* -//------------------------------------------------------------------ -void recVUMI_MFIR( VURegs *VU, int info ) -{ - if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; - //Console::WriteLn("recVUMI_MFIR"); - _deleteX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Is_, 1); - - if( _XYZW_SS ) { - SSE2_MOVD_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(_Is_, 1)-2); - _vuFlipRegSS(VU, EEREC_T); - SSE2_PSRAD_I8_to_XMM(EEREC_TEMP, 16); - SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP); - _vuFlipRegSS(VU, EEREC_T); - } - else if (_X_Y_Z_W != 0xf) { - SSE2_MOVD_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(_Is_, 1)-2); - SSE2_PSRAD_I8_to_XMM(EEREC_TEMP, 16); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0); - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } - else { - SSE2_MOVD_M32_to_XMM(EEREC_T, VU_VI_ADDR(_Is_, 1)-2); - SSE2_PSRAD_I8_to_XMM(EEREC_T, 16); - SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MTIR* -//------------------------------------------------------------------ -void recVUMI_MTIR( VURegs *VU, int info ) -{ - if ( _It_ == 0 ) return; - //Console::WriteLn("recVUMI_MTIR"); - _deleteX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _It_, 2); - - if( _Fsf_ == 0 ) { - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(_It_, 0), EEREC_S); - } - else { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(_It_, 0), EEREC_TEMP); - } - - AND32ItoM(VU_VI_ADDR(_It_, 0), 0xffff); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MR32* -//------------------------------------------------------------------ -void recVUMI_MR32( VURegs *VU, int info ) -{ - if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; - //Console::WriteLn("recVUMI_MR32"); - if (_X_Y_Z_W != 0xf) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x39); - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S); - SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0x39); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// _loadEAX() -// -// NOTE: If x86reg < 0, reads directly from offset -//------------------------------------------------------------------ -void _loadEAX(VURegs *VU, int x86reg, uptr offset, int info) -{ - assert( offset < 0x80000000 ); - - if( x86reg >= 0 ) { - switch(_X_Y_Z_W) { - case 3: // ZW - SSE_MOVHPS_Rm_to_XMM(EEREC_T, x86reg, offset+8); - break; - case 6: // YZ - SSE_SHUFPS_Rm_to_XMM(EEREC_T, x86reg, offset, 0x9c); - SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0x78); - break; - - case 8: // X - SSE_MOVSS_Rm_to_XMM(EEREC_TEMP, x86reg, offset); - SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP); - break; - case 9: // XW - SSE_SHUFPS_Rm_to_XMM(EEREC_T, x86reg, offset, 0xc9); - SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xd2); - break; - case 12: // XY - SSE_MOVLPS_Rm_to_XMM(EEREC_T, x86reg, offset); - break; - case 15: - if( VU == &VU1 ) SSE_MOVAPSRmtoR(EEREC_T, x86reg, offset); - else SSE_MOVUPSRmtoR(EEREC_T, x86reg, offset); - break; - default: - if( VU == &VU1 ) SSE_MOVAPSRmtoR(EEREC_TEMP, x86reg, offset); - else SSE_MOVUPSRmtoR(EEREC_TEMP, x86reg, offset); - - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - break; - } - } - else { - switch(_X_Y_Z_W) { - case 3: // ZW - SSE_MOVHPS_M64_to_XMM(EEREC_T, offset+8); - break; - case 6: // YZ - SSE_SHUFPS_M128_to_XMM(EEREC_T, offset, 0x9c); - SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0x78); - break; - case 8: // X - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, offset); - SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP); - break; - case 9: // XW - SSE_SHUFPS_M128_to_XMM(EEREC_T, offset, 0xc9); - SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xd2); - break; - case 12: // XY - SSE_MOVLPS_M64_to_XMM(EEREC_T, offset); - break; - case 15: - if( VU == &VU1 ) SSE_MOVAPS_M128_to_XMM(EEREC_T, offset); - else SSE_MOVUPS_M128_to_XMM(EEREC_T, offset); - break; - default: - if( VU == &VU1 ) SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, offset); - else SSE_MOVUPS_M128_to_XMM(EEREC_TEMP, offset); - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - break; - } - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// recVUTransformAddr() -//------------------------------------------------------------------ -int recVUTransformAddr(int x86reg, VURegs* VU, int vireg, int imm) -{ - u8* pjmp[2]; - if( x86reg == EAX ) { - if (imm) ADD32ItoR(x86reg, imm); - } - else { - if( imm ) LEA32RtoR(EAX, x86reg, imm); - else MOV32RtoR(EAX, x86reg); - } - - if( VU == &VU1 ) { - AND32ItoR(EAX, 0x3ff); // wrap around - SHL32ItoR(EAX, 4); - } - else { - - // VU0 has a somewhat interesting memory mapping: - // if addr >= 0x4000, reads VU1's VF regs and VI regs - // if addr < 0x4000, wrap around at 0x1000 - - CMP32ItoR(EAX, 0x400); - pjmp[0] = JL8(0); // if addr >= 0x4000, reads VU1's VF regs and VI regs - AND32ItoR(EAX, 0x43f); - pjmp[1] = JMP8(0); - x86SetJ8(pjmp[0]); - AND32ItoR(EAX, 0xff); // if addr < 0x4000, wrap around - x86SetJ8(pjmp[1]); - - SHL32ItoR(EAX, 4); // multiply by 16 (shift left by 4) - } - - return EAX; -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// LQ -//------------------------------------------------------------------ -void recVUMI_LQ(VURegs *VU, int info) -{ - s16 imm; - if ( _Ft_ == 0 ) return; - //Console::WriteLn("recVUMI_LQ"); - imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); - if (_Is_ == 0) { - _loadEAX(VU, -1, (uptr)GET_VU_MEM(VU, (u32)imm*16), info); - } - else { - int isreg = ALLOCVI(_Is_, MODE_READ); - _loadEAX(VU, recVUTransformAddr(isreg, VU, _Is_, imm), (uptr)VU->Mem, info); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// LQD -//------------------------------------------------------------------ -void recVUMI_LQD( VURegs *VU, int info ) -{ - int isreg; - //Console::WriteLn("recVUMI_LQD"); - if ( _Is_ != 0 ) { - isreg = ALLOCVI(_Is_, MODE_READ|MODE_WRITE); - SUB16ItoR( isreg, 1 ); - } - - if ( _Ft_ == 0 ) return; - - if ( _Is_ == 0 ) _loadEAX(VU, -1, (uptr)VU->Mem, info); - else _loadEAX(VU, recVUTransformAddr(isreg, VU, _Is_, 0), (uptr)VU->Mem, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// LQI -//------------------------------------------------------------------ -void recVUMI_LQI(VURegs *VU, int info) -{ - int isreg; - //Console::WriteLn("recVUMI_LQI"); - if ( _Ft_ == 0 ) { - if( _Is_ != 0 ) { - if( (isreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Is_, MODE_WRITE|MODE_READ)) >= 0 ) { - ADD16ItoR(isreg, 1); - } - else { - ADD16ItoM( VU_VI_ADDR( _Is_, 0 ), 1 ); - } - } - return; - } - - if (_Is_ == 0) { - _loadEAX(VU, -1, (uptr)VU->Mem, info); - } - else { - isreg = ALLOCVI(_Is_, MODE_READ|MODE_WRITE); - _loadEAX(VU, recVUTransformAddr(isreg, VU, _Is_, 0), (uptr)VU->Mem, info); - ADD16ItoR( isreg, 1 ); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// _saveEAX() -//------------------------------------------------------------------ -void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) -{ - assert( offset < 0x80000000 ); - - if ( _Fs_ == 0 ) { - if ( _XYZW_SS ) { - u32 c = _W ? 0x3f800000 : 0; - if ( x86reg >= 0 ) MOV32ItoRm(x86reg, c, offset+(_W?12:(_Z?8:(_Y?4:0)))); - else MOV32ItoM(offset+(_W?12:(_Z?8:(_Y?4:0))), c); - } - else { - - // (this is one of my test cases for the new emitter --air) - using namespace x86Emitter; - xAddressReg thisreg( x86reg ); - - if ( _X ) xMOV(ptr32[thisreg+offset], 0x00000000); - if ( _Y ) xMOV(ptr32[thisreg+offset+4], 0x00000000); - if ( _Z ) xMOV(ptr32[thisreg+offset+8], 0x00000000); - if ( _W ) xMOV(ptr32[thisreg+offset+12], 0x3f800000); - } - return; - } - - switch ( _X_Y_Z_W ) { - case 1: // W - SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x27); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); - else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); - break; - case 2: // Z - SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); - else SSE_MOVSS_XMM_to_M32(offset+8, EEREC_TEMP); - break; - case 3: // ZW - if ( x86reg >= 0 ) SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_S, offset+8); - else SSE_MOVHPS_XMM_to_M64(offset+8, EEREC_S); - break; - case 4: // Y - SSE2_PSHUFLW_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x4e); - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); - else SSE_MOVSS_XMM_to_M32(offset+4, EEREC_TEMP); - break; - case 5: // YW - SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xB1); - SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if ( x86reg >= 0 ) { - SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset+4); - SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); - } - else { - SSE_MOVSS_XMM_to_M32(offset+4, EEREC_S); - SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); - } - SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xB1); - break; - case 6: // YZ - SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0xc9); - if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); - else SSE_MOVLPS_XMM_to_M64(offset+4, EEREC_TEMP); - break; - case 7: // YZW - SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x93); //ZYXW - if ( x86reg >= 0 ) { - SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); - SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); - } - else { - SSE_MOVHPS_XMM_to_M64(offset+4, EEREC_TEMP); - SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); - } - break; - case 8: // X - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); - else SSE_MOVSS_XMM_to_M32(offset, EEREC_S); - break; - case 9: // XW - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); - else SSE_MOVSS_XMM_to_M32(offset, EEREC_S); - - SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0xff); //WWWW - - if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); - else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); - - break; - case 10: //XZ - SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if ( x86reg >= 0 ) { - SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); - SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); - } - else { - SSE_MOVSS_XMM_to_M32(offset, EEREC_S); - SSE_MOVSS_XMM_to_M32(offset+8, EEREC_TEMP); - } - break; - case 11: //XZW - if ( x86reg >= 0 ) { - SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); - SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_S, offset+8); - } - else { - SSE_MOVSS_XMM_to_M32(offset, EEREC_S); - SSE_MOVHPS_XMM_to_M64(offset+8, EEREC_S); - } - break; - case 12: // XY - if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_S, offset+0); - else SSE_MOVLPS_XMM_to_M64(offset, EEREC_S); - break; - case 13: // XYW - SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x4b); //YXZW - if ( x86reg >= 0 ) { - SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+0); - SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); - } - else { - SSE_MOVHPS_XMM_to_M64(offset, EEREC_TEMP); - SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); - } - break; - case 14: // XYZ - SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if ( x86reg >= 0 ) { - SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_S, offset+0); - SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); - } - else { - SSE_MOVLPS_XMM_to_M64(offset, EEREC_S); - SSE_MOVSS_XMM_to_M32(offset+8, EEREC_TEMP); - } - break; - case 15: // XYZW - if ( VU == &VU1 ) { - if( x86reg >= 0 ) SSE_MOVAPSRtoRm(x86reg, EEREC_S, offset+0); - else SSE_MOVAPS_XMM_to_M128(offset, EEREC_S); - } - else { - if( x86reg >= 0 ) SSE_MOVUPSRtoRm(x86reg, EEREC_S, offset+0); - else { - if( offset & 15 ) SSE_MOVUPS_XMM_to_M128(offset, EEREC_S); - else SSE_MOVAPS_XMM_to_M128(offset, EEREC_S); - } - } - break; - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// SQ -//------------------------------------------------------------------ -void recVUMI_SQ(VURegs *VU, int info) -{ - s16 imm; - //Console::WriteLn("recVUMI_SQ"); - imm = ( VU->code & 0x400) ? ( VU->code & 0x3ff) | 0xfc00 : ( VU->code & 0x3ff); - if ( _It_ == 0 ) _saveEAX(VU, -1, (uptr)GET_VU_MEM(VU, (int)imm * 16), info); - else { - int itreg = ALLOCVI(_It_, MODE_READ); - _saveEAX(VU, recVUTransformAddr(itreg, VU, _It_, imm), (uptr)VU->Mem, info); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// SQD -//------------------------------------------------------------------ -void recVUMI_SQD(VURegs *VU, int info) -{ - //Console::WriteLn("recVUMI_SQD"); - if (_It_ == 0) _saveEAX(VU, -1, (uptr)VU->Mem, info); - else { - int itreg = ALLOCVI(_It_, MODE_READ|MODE_WRITE); - SUB16ItoR( itreg, 1 ); - _saveEAX(VU, recVUTransformAddr(itreg, VU, _It_, 0), (uptr)VU->Mem, info); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// SQI -//------------------------------------------------------------------ -void recVUMI_SQI(VURegs *VU, int info) -{ - //Console::WriteLn("recVUMI_SQI"); - if (_It_ == 0) _saveEAX(VU, -1, (uptr)VU->Mem, info); - else { - int itreg = ALLOCVI(_It_, MODE_READ|MODE_WRITE); - _saveEAX(VU, recVUTransformAddr(itreg, VU, _It_, 0), (uptr)VU->Mem, info); - ADD16ItoR( itreg, 1 ); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ILW -//------------------------------------------------------------------ -void recVUMI_ILW(VURegs *VU, int info) -{ - int itreg; - s16 imm, off; - - if ( ( _It_ == 0 ) || ( _X_Y_Z_W == 0 ) ) return; - //Console::WriteLn("recVUMI_ILW"); - imm = ( VU->code & 0x400) ? ( VU->code & 0x3ff) | 0xfc00 : ( VU->code & 0x3ff); - if (_X) off = 0; - else if (_Y) off = 4; - else if (_Z) off = 8; - else if (_W) off = 12; - - ADD_VI_NEEDED(_Is_); - itreg = ALLOCVI(_It_, MODE_WRITE); - - if ( _Is_ == 0 ) { - MOVZX32M16toR( itreg, (uptr)GET_VU_MEM(VU, (int)imm * 16 + off) ); - } - else { - int isreg = ALLOCVI(_Is_, MODE_READ); - MOV32RmtoR(itreg, recVUTransformAddr(isreg, VU, _Is_, imm), (uptr)VU->Mem + off); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ISW -//------------------------------------------------------------------ -void recVUMI_ISW( VURegs *VU, int info ) -{ - s16 imm; - //Console::WriteLn("recVUMI_ISW"); - imm = ( VU->code & 0x400) ? ( VU->code & 0x3ff) | 0xfc00 : ( VU->code & 0x3ff); - - if (_Is_ == 0) { - uptr off = (uptr)GET_VU_MEM(VU, (int)imm * 16); - int itreg = ALLOCVI(_It_, MODE_READ); - - if (_X) MOV32RtoM(off, itreg); - if (_Y) MOV32RtoM(off+4, itreg); - if (_Z) MOV32RtoM(off+8, itreg); - if (_W) MOV32RtoM(off+12, itreg); - } - else { - int x86reg, isreg, itreg; - - ADD_VI_NEEDED(_It_); - isreg = ALLOCVI(_Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_READ); - - x86reg = recVUTransformAddr(isreg, VU, _Is_, imm); - - if (_X) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem); - if (_Y) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem+4); - if (_Z) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem+8); - if (_W) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem+12); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ILWR -//------------------------------------------------------------------ -void recVUMI_ILWR( VURegs *VU, int info ) -{ - int off, itreg; - - if ( ( _It_ == 0 ) || ( _X_Y_Z_W == 0 ) ) return; - //Console::WriteLn("recVUMI_ILWR"); - if (_X) off = 0; - else if (_Y) off = 4; - else if (_Z) off = 8; - else if (_W) off = 12; - - ADD_VI_NEEDED(_Is_); - itreg = ALLOCVI(_It_, MODE_WRITE); - - if ( _Is_ == 0 ) { - MOVZX32M16toR( itreg, (uptr)VU->Mem + off ); - } - else { - int isreg = ALLOCVI(_Is_, MODE_READ); - MOVZX32Rm16toR(itreg, recVUTransformAddr(isreg, VU, _Is_, 0), (uptr)VU->Mem + off); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ISWR -//------------------------------------------------------------------ -void recVUMI_ISWR( VURegs *VU, int info ) -{ - int itreg; - //Console::WriteLn("recVUMI_ISWR"); - ADD_VI_NEEDED(_Is_); - itreg = ALLOCVI(_It_, MODE_READ); - - if (_Is_ == 0) { - if (_X) MOV32RtoM((uptr)VU->Mem, itreg); - if (_Y) MOV32RtoM((uptr)VU->Mem+4, itreg); - if (_Z) MOV32RtoM((uptr)VU->Mem+8, itreg); - if (_W) MOV32RtoM((uptr)VU->Mem+12, itreg); - } - else { - int x86reg; - int isreg = ALLOCVI(_Is_, MODE_READ); - x86reg = recVUTransformAddr(isreg, VU, _Is_, 0); - - if (_X) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem); - if (_Y) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem+4); - if (_Z) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem+8); - if (_W) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem+12); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// RINIT* -//------------------------------------------------------------------ -void recVUMI_RINIT(VURegs *VU, int info) -{ - //Console::WriteLn("recVUMI_RINIT()"); - if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode & MODE_NOFLUSH) ) { - _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 2); - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)s_mask); - SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)VU_ONE); - SSE_MOVSS_XMM_to_M32(VU_REGR_ADDR, EEREC_TEMP); - } - else { - int rreg = ALLOCVI(REG_R, MODE_WRITE); - - if( xmmregs[EEREC_S].mode & MODE_WRITE ) { - SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S); - xmmregs[EEREC_S].mode &= ~MODE_WRITE; - } - - MOV32MtoR( rreg, VU_VFx_ADDR( _Fs_ ) + 4 * _Fsf_ ); - AND32ItoR( rreg, 0x7fffff ); - OR32ItoR( rreg, 0x7f << 23 ); - - _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// RGET* -//------------------------------------------------------------------ -void recVUMI_RGET(VURegs *VU, int info) -{ - //Console::WriteLn("recVUMI_RGET()"); - if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; - - _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); - - if (_X_Y_Z_W != 0xf) { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, VU_REGR_ADDR); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0); - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } - else { - SSE_MOVSS_M32_to_XMM(EEREC_T, VU_REGR_ADDR); - SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// RNEXT* -//------------------------------------------------------------------ -void recVUMI_RNEXT( VURegs *VU, int info ) -{ - int rreg, x86temp0, x86temp1; - //Console::WriteLn("recVUMI_RNEXT()"); - - rreg = ALLOCVI(REG_R, MODE_WRITE|MODE_READ); - - x86temp0 = ALLOCTEMPX86(0); - x86temp1 = ALLOCTEMPX86(0); - - // code from www.project-fao.org - //MOV32MtoR(rreg, VU_REGR_ADDR); - MOV32RtoR(x86temp0, rreg); - SHR32ItoR(x86temp0, 4); - AND32ItoR(x86temp0, 1); - - MOV32RtoR(x86temp1, rreg); - SHR32ItoR(x86temp1, 22); - AND32ItoR(x86temp1, 1); - - SHL32ItoR(rreg, 1); - XOR32RtoR(x86temp0, x86temp1); - XOR32RtoR(rreg, x86temp0); - AND32ItoR(rreg, 0x7fffff); - OR32ItoR(rreg, 0x3f800000); - - _freeX86reg(x86temp0); - _freeX86reg(x86temp1); - - if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) { - _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); - return; - } - - recVUMI_RGET(VU, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// RXOR* -//------------------------------------------------------------------ -void recVUMI_RXOR( VURegs *VU, int info ) -{ - //Console::WriteLn("recVUMI_RXOR()"); - if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode & MODE_NOFLUSH) ) { - _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - - SSE_XORPS_M128_to_XMM(EEREC_TEMP, VU_REGR_ADDR); - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)s_mask); - SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)s_fones); - SSE_MOVSS_XMM_to_M32(VU_REGR_ADDR, EEREC_TEMP); - } - else { - int rreg = ALLOCVI(REG_R, MODE_WRITE|MODE_READ); - - if( xmmregs[EEREC_S].mode & MODE_WRITE ) { - SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S); - xmmregs[EEREC_S].mode &= ~MODE_WRITE; - } - - XOR32MtoR( rreg, VU_VFx_ADDR( _Fs_ ) + 4 * _Fsf_ ); - AND32ItoR( rreg, 0x7fffff ); - OR32ItoR ( rreg, 0x3f800000 ); - - _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// WAITQ -//------------------------------------------------------------------ -void recVUMI_WAITQ( VURegs *VU, int info ) -{ - //Console::WriteLn("recVUMI_WAITQ"); -// if( info & PROCESS_VU_SUPER ) { -// //CALLFunc(waitqfn); -// SuperVUFlush(0, 1); -// } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FSAND -//------------------------------------------------------------------ -void recVUMI_FSAND( VURegs *VU, int info ) -{ - int itreg; - u16 imm; - //Console::WriteLn("recVUMI_FSAND"); - imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff); - if(_It_ == 0) return; - - itreg = ALLOCVI(_It_, MODE_WRITE); - MOV32MtoR( itreg, VU_VI_ADDR(REG_STATUS_FLAG, 1) ); - AND32ItoR( itreg, imm ); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FSEQ -//------------------------------------------------------------------ -void recVUMI_FSEQ( VURegs *VU, int info ) -{ - int itreg; - u16 imm; - if ( _It_ == 0 ) return; - //Console::WriteLn("recVUMI_FSEQ"); - imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff); - - itreg = ALLOCVI(_It_, MODE_WRITE|MODE_8BITREG); - - MOVZX32M16toR( EAX, VU_VI_ADDR(REG_STATUS_FLAG, 1) ); - XOR32RtoR(itreg, itreg); - CMP16ItoR(EAX, imm); - SETE8R(itreg); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FSOR -//------------------------------------------------------------------ -void recVUMI_FSOR( VURegs *VU, int info ) -{ - int itreg; - u32 imm; - if(_It_ == 0) return; - //Console::WriteLn("recVUMI_FSOR"); - imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff); - - itreg = ALLOCVI(_It_, MODE_WRITE); - - MOVZX32M16toR( itreg, VU_VI_ADDR(REG_STATUS_FLAG, 1) ); - OR32ItoR( itreg, imm ); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FSSET -//------------------------------------------------------------------ -void recVUMI_FSSET(VURegs *VU, int info) -{ - u32 writeaddr = VU_VI_ADDR(REG_STATUS_FLAG, 0); - u32 prevaddr = VU_VI_ADDR(REG_STATUS_FLAG, 2); - - u16 imm = 0; - //Console::WriteLn("recVUMI_FSSET"); - imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7FF); - - // keep the low 6 bits ONLY if the upper instruction is an fmac instruction (otherwise rewrite) - metal gear solid 3 - //if( (info & PROCESS_VU_SUPER) && VUREC_FMAC ) { - MOV32MtoR(EAX, prevaddr); - AND32ItoR(EAX, 0x3f); - if ((imm&0xfc0) != 0) OR32ItoR(EAX, imm & 0xFC0); - MOV32RtoM(writeaddr ? writeaddr : prevaddr, EAX); - //} - //else { - // MOV32ItoM(writeaddr ? writeaddr : prevaddr, imm&0xfc0); - //} -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FMAND -//------------------------------------------------------------------ -void recVUMI_FMAND( VURegs *VU, int info ) -{ - int isreg, itreg; - if ( _It_ == 0 ) return; - //Console::WriteLn("recVUMI_FMAND"); - isreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_WRITE);//|MODE_8BITREG); - - if( isreg >= 0 ) { - if( itreg != isreg ) MOV32RtoR(itreg, isreg); - } - else MOVZX32M16toR(itreg, VU_VI_ADDR(_Is_, 1)); - - AND16MtoR( itreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FMEQ -//------------------------------------------------------------------ -void recVUMI_FMEQ( VURegs *VU, int info ) -{ - int itreg, isreg; - if ( _It_ == 0 ) return; - //Console::WriteLn("recVUMI_FMEQ"); - if( _It_ == _Is_ ) { - itreg = ALLOCVI(_It_, MODE_WRITE|MODE_READ);//|MODE_8BITREG - - CMP16MtoR(itreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); - SETE8R(EAX); - MOVZX32R8toR(itreg, EAX); - } - else { - ADD_VI_NEEDED(_Is_); - itreg = ALLOCVI(_It_, MODE_WRITE|MODE_8BITREG); - isreg = ALLOCVI(_Is_, MODE_READ); - - XOR32RtoR(itreg, itreg); - - CMP16MtoR(isreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); - SETE8R(itreg); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FMOR -//------------------------------------------------------------------ -void recVUMI_FMOR( VURegs *VU, int info ) -{ - int isreg, itreg; - if ( _It_ == 0 ) return; - //Console::WriteLn("recVUMI_FMOR"); - if( _Is_ == 0 ) { - itreg = ALLOCVI(_It_, MODE_WRITE);//|MODE_8BITREG); - MOVZX32M16toR( itreg, VU_VI_ADDR(REG_MAC_FLAG, 1) ); - } - else if( _It_ == _Is_ ) { - itreg = ALLOCVI(_It_, MODE_WRITE|MODE_READ);//|MODE_8BITREG); - OR16MtoR( itreg, VU_VI_ADDR(REG_MAC_FLAG, 1) ); - } - else { - isreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Is_, MODE_READ); - itreg = ALLOCVI(_It_, MODE_WRITE); - - MOVZX32M16toR( itreg, VU_VI_ADDR(REG_MAC_FLAG, 1) ); - - if( isreg >= 0 ) - OR16RtoR( itreg, isreg ); - else - OR16MtoR( itreg, VU_VI_ADDR(_Is_, 1) ); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FCAND -//------------------------------------------------------------------ -void recVUMI_FCAND( VURegs *VU, int info ) -{ - int itreg = ALLOCVI(1, MODE_WRITE|MODE_8BITREG); - //Console::WriteLn("recVUMI_FCAND"); - MOV32MtoR( EAX, VU_VI_ADDR(REG_CLIP_FLAG, 1) ); - XOR32RtoR( itreg, itreg ); - AND32ItoR( EAX, VU->code & 0xFFFFFF ); - - SETNZ8R(itreg); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FCEQ -//------------------------------------------------------------------ -void recVUMI_FCEQ( VURegs *VU, int info ) -{ - int itreg = ALLOCVI(1, MODE_WRITE|MODE_8BITREG); - //Console::WriteLn("recVUMI_FCEQ"); - MOV32MtoR( EAX, VU_VI_ADDR(REG_CLIP_FLAG, 1) ); - AND32ItoR( EAX, 0xffffff ); - XOR32RtoR( itreg, itreg ); - CMP32ItoR( EAX, VU->code&0xffffff ); - - SETE8R(itreg); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FCOR -//------------------------------------------------------------------ -void recVUMI_FCOR( VURegs *VU, int info ) -{ - int itreg; - //Console::WriteLn("recVUMI_FCOR"); - itreg = ALLOCVI(1, MODE_WRITE); - MOV32MtoR( itreg, VU_VI_ADDR(REG_CLIP_FLAG, 1) ); - OR32ItoR ( itreg, VU->code ); - AND32ItoR( itreg, 0xffffff ); - ADD32ItoR( itreg, 1 ); // If 24 1's will make 25th bit 1, else 0 - SHR32ItoR( itreg, 24 ); // Get the 25th bit (also clears the rest of the garbage in the reg) -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FCSET -//------------------------------------------------------------------ -void recVUMI_FCSET( VURegs *VU, int info ) -{ - u32 addr = VU_VI_ADDR(REG_CLIP_FLAG, 0); - //Console::WriteLn("recVUMI_FCSET"); - MOV32ItoM(addr ? addr : VU_VI_ADDR(REG_CLIP_FLAG, 2), VU->code&0xffffff ); - - if( !(info & (PROCESS_VU_SUPER|PROCESS_VU_COP2)) ) - MOV32ItoM( VU_VI_ADDR(REG_CLIP_FLAG, 1), VU->code&0xffffff ); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FCGET -//------------------------------------------------------------------ -void recVUMI_FCGET( VURegs *VU, int info ) -{ - int itreg; - if(_It_ == 0) return; - //Console::WriteLn("recVUMI_FCGET"); - itreg = ALLOCVI(_It_, MODE_WRITE); - - MOV32MtoR(itreg, VU_VI_ADDR(REG_CLIP_FLAG, 1)); - AND32ItoR(itreg, 0x0fff); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// _recbranchAddr() -// -// NOTE: Due to static var dependencies, several SuperVU branch instructions -// are still located in iVUzerorec.cpp. -//------------------------------------------------------------------ - -//------------------------------------------------------------------ -// MFP* -//------------------------------------------------------------------ -void recVUMI_MFP(VURegs *VU, int info) -{ - if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; - //Console::WriteLn("recVUMI_MFP"); - if( _XYZW_SS ) { - _vuFlipRegSS(VU, EEREC_T); - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_P, 1)); - SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP); - _vuFlipRegSS(VU, EEREC_T); - } - else if (_X_Y_Z_W != 0xf) { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_P, 1)); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0); - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } - else { - SSE_MOVSS_M32_to_XMM(EEREC_T, VU_VI_ADDR(REG_P, 1)); - SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// WAITP -//------------------------------------------------------------------ -static PCSX2_ALIGNED16(float s_tempmem[4]); -void recVUMI_WAITP(VURegs *VU, int info) -{ - //Console::WriteLn("recVUMI_WAITP"); -// if( info & PROCESS_VU_SUPER ) -// SuperVUFlush(1, 1); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// vuSqSumXYZ()* -// -// NOTE: In all EFU insts, EEREC_D is a temp reg -//------------------------------------------------------------------ -void vuSqSumXYZ(int regd, int regs, int regtemp) // regd.x = x ^ 2 + y ^ 2 + z ^ 2 -{ - //Console::WriteLn("VU: SUMXYZ"); - if( cpucaps.hasStreamingSIMD4Extensions ) - { - SSE_MOVAPS_XMM_to_XMM(regd, regs); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat2(regd, regtemp, 0xf); - SSE4_DPPS_XMM_to_XMM(regd, regd, 0x71); - } - else - { - SSE_MOVAPS_XMM_to_XMM(regtemp, regs); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat2(regtemp, regd, 0xf); - SSE_MULPS_XMM_to_XMM(regtemp, regtemp); // xyzw ^ 2 - - if( cpucaps.hasStreamingSIMD3Extensions ) { - SSE3_HADDPS_XMM_to_XMM(regd, regtemp); - SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x ^ 2 + y ^ 2 + z ^ 2 - SSE_MOVHLPS_XMM_to_XMM(regd, regd); // regd.x = regd.z - } - else { - SSE_MOVSS_XMM_to_XMM(regd, regtemp); - SSE2_PSHUFLW_XMM_to_XMM(regtemp, regtemp, 0x4e); // wzyx -> wzxy - SSE_ADDSS_XMM_to_XMM(regd, regtemp); // x ^ 2 + y ^ 2 - SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xD2); // wzxy -> wxyz - SSE_ADDSS_XMM_to_XMM(regd, regtemp); // x ^ 2 + y ^ 2 + z ^ 2 - } - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ESADD* -//------------------------------------------------------------------ -void recVUMI_ESADD( VURegs *VU, int info) -{ - //Console::WriteLn("VU: ESADD"); - assert( VU == &VU1 ); - if( EEREC_TEMP == EEREC_D ) { // special code to reset P ( FixMe: don't know if this is still needed! (cottonvibes) ) - Console::Notice("ESADD: Resetting P reg!!!\n"); - MOV32ItoM(VU_VI_ADDR(REG_P, 0), 0); - return; - } - vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); - if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_D); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ERSADD* -//------------------------------------------------------------------ -void recVUMI_ERSADD( VURegs *VU, int info ) -{ - //Console::WriteLn("VU: ERSADD"); - assert( VU == &VU1 ); - vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); - // don't use RCPSS (very bad precision) - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); - SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); - if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ELENG* -//------------------------------------------------------------------ -void recVUMI_ELENG( VURegs *VU, int info ) -{ - //Console::WriteLn("VU: ELENG"); - assert( VU == &VU1 ); - vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); - if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive - SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_D); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ERLENG* -//------------------------------------------------------------------ -void recVUMI_ERLENG( VURegs *VU, int info ) -{ - //Console::WriteLn("VU: ERLENG"); - assert( VU == &VU1 ); - vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); - if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive - SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); // regd <- sqrt(x^2 + y^2 + z^2) - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 - SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // temp = 1 / sqrt(x^2 + y^2 + z^2) - if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// EATANxy -//------------------------------------------------------------------ -void recVUMI_EATANxy( VURegs *VU, int info ) -{ - assert( VU == &VU1 ); - //Console::WriteLn("recVUMI_EATANxy"); - if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { - SSE_MOVLPS_XMM_to_M64((uptr)s_tempmem, EEREC_S); - FLD32((uptr)&s_tempmem[0]); - FLD32((uptr)&s_tempmem[1]); - } - else { - if( xmmregs[EEREC_S].mode & MODE_WRITE ) { - SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S); - xmmregs[EEREC_S].mode &= ~MODE_WRITE; - } - - FLD32((uptr)&VU->VF[_Fs_].UL[0]); - FLD32((uptr)&VU->VF[_Fs_].UL[1]); - } - - FPATAN(); - FSTP32(VU_VI_ADDR(REG_P, 0)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// EATANxz -//------------------------------------------------------------------ -void recVUMI_EATANxz( VURegs *VU, int info ) -{ - assert( VU == &VU1 ); - //Console::WriteLn("recVUMI_EATANxz"); - if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { - SSE_MOVLPS_XMM_to_M64((uptr)s_tempmem, EEREC_S); - FLD32((uptr)&s_tempmem[0]); - FLD32((uptr)&s_tempmem[2]); - } - else { - if( xmmregs[EEREC_S].mode & MODE_WRITE ) { - SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S); - xmmregs[EEREC_S].mode &= ~MODE_WRITE; - } - - FLD32((uptr)&VU->VF[_Fs_].UL[0]); - FLD32((uptr)&VU->VF[_Fs_].UL[2]); - } - FPATAN(); - FSTP32(VU_VI_ADDR(REG_P, 0)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ESUM* -//------------------------------------------------------------------ -void recVUMI_ESUM( VURegs *VU, int info ) -{ - //Console::WriteLn("VU: ESUM"); - assert( VU == &VU1 ); - - if( cpucaps.hasStreamingSIMD3Extensions ) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat_useEAX(info, EEREC_TEMP, 0xf); - SSE3_HADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - SSE3_HADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - } - else { - SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); // z, w, z, w - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); // z+x, w+y, z+z, w+w - SSE_UNPCKLPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // z+x, z+x, w+y, w+y - SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // w+y, w+y, w+y, w+y - SSE_ADDSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // x+y+z+w, w+y, w+y, w+y - } - - vuFloat_useEAX(info, EEREC_TEMP, 8); - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ERCPR* -//------------------------------------------------------------------ -void recVUMI_ERCPR( VURegs *VU, int info ) -{ - assert( VU == &VU1 ); - //Console::WriteLn("VU1: ERCPR"); - - // don't use RCPSS (very bad precision) - switch ( _Fsf_ ) { - case 0: //0001 - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 - SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - break; - case 1: //0010 - SSE2_PSHUFLW_XMM_to_XMM(EEREC_S, EEREC_S, 0x4e); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 - SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE2_PSHUFLW_XMM_to_XMM(EEREC_S, EEREC_S, 0x4e); - break; - case 2: //0100 - SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xc6); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 - SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xc6); - break; - case 3: //1000 - SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0x27); - if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 - SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0x27); - break; - } - - vuFloat_useEAX(info, EEREC_TEMP, 8); - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ESQRT* -//------------------------------------------------------------------ -void recVUMI_ESQRT( VURegs *VU, int info ) -{ - assert( VU == &VU1 ); - - //Console::WriteLn("VU1: ESQRT"); - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)const_clip); // abs(x) - if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp - SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ERSQRT* -//------------------------------------------------------------------ -void recVUMI_ERSQRT( VURegs *VU, int info ) -{ - int t1reg = _vuGetTempXMMreg(info); - - assert( VU == &VU1 ); - //Console::WriteLn("VU1: ERSQRT"); - - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)const_clip); // abs(x) - SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Clamp Infinities to Fmax - SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // SQRT(abs(x)) - - if( t1reg >= 0 ) - { - SSE_MOVSS_M32_to_XMM(t1reg, (uptr)VU_ONE); - SSE_DIVSS_XMM_to_XMM(t1reg, EEREC_TEMP); - vuFloat_useEAX(info, t1reg, 8); - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), t1reg); - _freeXMMreg(t1reg); - } - else - { - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); - SSE_DIVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_P, 0)); - vuFloat_useEAX(info, EEREC_TEMP, 8); - SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ESIN -//------------------------------------------------------------------ -void recVUMI_ESIN( VURegs *VU, int info ) -{ - assert( VU == &VU1 ); - - //Console::WriteLn("recVUMI_ESIN"); - if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { - switch(_Fsf_) { - case 0: SSE_MOVSS_XMM_to_M32((uptr)s_tempmem, EEREC_S); - case 1: SSE_MOVLPS_XMM_to_M64((uptr)s_tempmem, EEREC_S); - default: SSE_MOVHPS_XMM_to_M64((uptr)&s_tempmem[2], EEREC_S); - } - FLD32((uptr)&s_tempmem[_Fsf_]); - } - else { - if( xmmregs[EEREC_S].mode & MODE_WRITE ) { - SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S); - xmmregs[EEREC_S].mode &= ~MODE_WRITE; - } - - FLD32((uptr)&VU->VF[_Fs_].UL[_Fsf_]); - } - - FSIN(); - FSTP32(VU_VI_ADDR(REG_P, 0)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// EATAN -//------------------------------------------------------------------ -void recVUMI_EATAN( VURegs *VU, int info ) -{ - assert( VU == &VU1 ); - - //Console::WriteLn("recVUMI_EATAN"); - if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { - switch(_Fsf_) { - case 0: SSE_MOVSS_XMM_to_M32((uptr)s_tempmem, EEREC_S); - case 1: SSE_MOVLPS_XMM_to_M64((uptr)s_tempmem, EEREC_S); - default: SSE_MOVHPS_XMM_to_M64((uptr)&s_tempmem[2], EEREC_S); - } - FLD32((uptr)&s_tempmem[_Fsf_]); - } - else { - if( xmmregs[EEREC_S].mode & MODE_WRITE ) { - SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S); - xmmregs[EEREC_S].mode &= ~MODE_WRITE; - } - } - - FLD1(); - FLD32((uptr)&VU->VF[_Fs_].UL[_Fsf_]); - FPATAN(); - FSTP32(VU_VI_ADDR(REG_P, 0)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// EEXP -//------------------------------------------------------------------ -void recVUMI_EEXP( VURegs *VU, int info ) -{ - assert( VU == &VU1 ); - //Console::WriteLn("recVUMI_EEXP"); - FLDL2E(); - - if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { - switch(_Fsf_) { - case 0: SSE_MOVSS_XMM_to_M32((uptr)s_tempmem, EEREC_S); - case 1: SSE_MOVLPS_XMM_to_M64((uptr)s_tempmem, EEREC_S); - default: SSE_MOVHPS_XMM_to_M64((uptr)&s_tempmem[2], EEREC_S); - } - FMUL32((uptr)&s_tempmem[_Fsf_]); - } - else { - if( xmmregs[EEREC_S].mode & MODE_WRITE ) { - SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S); - xmmregs[EEREC_S].mode &= ~MODE_WRITE; - } - - FMUL32((uptr)&VU->VF[_Fs_].UL[_Fsf_]); - } - - // basically do 2^(log_2(e) * val) - FLD(0); - FRNDINT(); - FXCH(1); - FSUB32Rto0(1); - F2XM1(); - FLD1(); - FADD320toR(1); - FSCALE(); - FSTP(1); - - FSTP32(VU_VI_ADDR(REG_P, 0)); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// XITOP -//------------------------------------------------------------------ -void recVUMI_XITOP( VURegs *VU, int info ) -{ - int itreg; - if (_It_ == 0) return; - //Console::WriteLn("recVUMI_XITOP"); - itreg = ALLOCVI(_It_, MODE_WRITE); - MOVZX32M16toR( itreg, (uptr)&VU->vifRegs->itop ); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// XTOP -//------------------------------------------------------------------ -void recVUMI_XTOP( VURegs *VU, int info ) -{ - int itreg; - if ( _It_ == 0 ) return; - //Console::WriteLn("recVUMI_XTOP"); - itreg = ALLOCVI(_It_, MODE_WRITE); - MOVZX32M16toR( itreg, (uptr)&VU->vifRegs->top ); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// VU1XGKICK_MTGSTransfer() - Called by ivuZerorec.cpp -//------------------------------------------------------------------ -void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr) -{ - u32 size; - u32* data = (u32*)((u8*)pMem + (addr&0x3fff)); - - // fixme: The gifTagDummy function in the MTGS (called by PrepDataPacket) has a - // hack that aborts the packet if it goes past the end of VU1 memory. - // Chances are this should be a "loops around memory" situation, and the packet - // should be continued starting at addr zero (0). - - size = mtgsThread->PrepDataPacket( GIF_PATH_1, data, (0x4000-(addr&0x3fff)) >> 4); - jASSUME( size > 0 ); - - //if( size > 0 ) - { - u8* pmem = mtgsThread->GetDataPacketPtr(); - - /* if((size << 4) > (0x4000-(addr&0x3fff))) - { - //DevCon::Notice("addr + Size = 0x%x, transferring %x then doing %x", params (addr&0x3fff) + (size << 4), (0x4000-(addr&0x3fff)) >> 4, size - ((0x4000-(addr&0x3fff)) >> 4)); - memcpy_aligned(pmem, (u8*)pMem+addr, 0x4000-(addr&0x3fff)); - size -= (0x4000-(addr&0x3fff)) >> 4; - //DevCon::Notice("Size left %x", params size); - pmem += 0x4000-(addr&0x3fff); - memcpy_aligned(pmem, (u8*)pMem, size<<4); - } - else - {*/ - memcpy_aligned(pmem, (u8*)pMem+addr, size<<4); - // } - mtgsThread->SendDataPacket(); - } -} -//------------------------------------------------------------------ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "PrecompiledHeader.h" + +#include "Common.h" +#include "GS.h" +#include "R5900OpcodeTables.h" +#include "iR5900.h" +#include "iMMI.h" +#include "iFPU.h" +#include "iCOP0.h" +#include "VUmicro.h" +#include "VUflags.h" +#include "sVU_Micro.h" +#include "sVU_Debug.h" +#include "sVU_zerorec.h" +//------------------------------------------------------------------ + +//------------------------------------------------------------------ +// Helper Macros +//------------------------------------------------------------------ +#define _Ft_ (( VU->code >> 16) & 0x1F) // The rt part of the instruction register +#define _Fs_ (( VU->code >> 11) & 0x1F) // The rd part of the instruction register +#define _Fd_ (( VU->code >> 6) & 0x1F) // The sa part of the instruction register +#define _It_ (_Ft_ & 15) +#define _Is_ (_Fs_ & 15) +#define _Id_ (_Fd_ & 15) + +#define _X (( VU->code>>24) & 0x1) +#define _Y (( VU->code>>23) & 0x1) +#define _Z (( VU->code>>22) & 0x1) +#define _W (( VU->code>>21) & 0x1) + +#define _XYZW_SS (_X+_Y+_Z+_W==1) + +#define _Fsf_ (( VU->code >> 21) & 0x03) +#define _Ftf_ (( VU->code >> 23) & 0x03) + +#define _Imm11_ (s32)(VU->code & 0x400 ? 0xfffffc00 | (VU->code & 0x3ff) : VU->code & 0x3ff) +#define _UImm11_ (s32)(VU->code & 0x7ff) + +#define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0] +#define VU_VFy_ADDR(x) (uptr)&VU->VF[x].UL[1] +#define VU_VFz_ADDR(x) (uptr)&VU->VF[x].UL[2] +#define VU_VFw_ADDR(x) (uptr)&VU->VF[x].UL[3] + +#define VU_REGR_ADDR (uptr)&VU->VI[REG_R] +#define VU_REGQ_ADDR (uptr)&VU->VI[REG_Q] +#define VU_REGMAC_ADDR (uptr)&VU->VI[REG_MAC_FLAG] + +#define VU_VI_ADDR(x, read) GetVIAddr(VU, x, read, info) + +#define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0] +#define VU_ACCy_ADDR (uptr)&VU->ACC.UL[1] +#define VU_ACCz_ADDR (uptr)&VU->ACC.UL[2] +#define VU_ACCw_ADDR (uptr)&VU->ACC.UL[3] + +#define _X_Y_Z_W ((( VU->code >> 21 ) & 0xF ) ) + + +static const PCSX2_ALIGNED16(u32 VU_ONE[4]) = {0x3f800000, 0xffffffff, 0xffffffff, 0xffffffff}; +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// *VU Lower Instructions!* +// +// Note: * = Checked for errors by cottonvibes +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// DIV* +//------------------------------------------------------------------ +PCSX2_ALIGNED16(u64 DIV_TEMP_XMM[2]); +void recVUMI_DIV(VURegs *VU, int info) +{ + u8 *pjmp, *pjmp1; + u32 *ajmp32, *bjmp32; + + //Console::WriteLn("recVUMI_DIV()"); + AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags + + // FT can be zero here! so we need to check if its zero and set the correct flag. + SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP + SSE_CMPEQPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); // Set all F's if each vector is zero + + SSE_MOVMSKPS_XMM_to_R32( EAX, EEREC_TEMP); // Move the sign bits of the previous calculation + + AND32ItoR( EAX, (1<<_Ftf_) ); // Grab "Is Zero" bits from the previous calculation + ajmp32 = JZ32(0); // Skip if none are + + SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP + SSE_CMPEQPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); // Set all F's if each vector is zero + SSE_MOVMSKPS_XMM_to_R32(EAX, EEREC_TEMP); // Move the sign bits of the previous calculation + + AND32ItoR( EAX, (1<<_Fsf_) ); // Grab "Is Zero" bits from the previous calculation + pjmp = JZ8(0); + OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0) + pjmp1 = JMP8(0); + x86SetJ8(pjmp); + OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0) + x86SetJ8(pjmp1); + + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); + + _vuFlipRegSS_xyzw(EEREC_T, _Ftf_); + SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + _vuFlipRegSS_xyzw(EEREC_T, _Ftf_); + + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); + SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = +/- fmax + + bjmp32 = JMP32(0); + + x86SetJ32(ajmp32); + + if (CHECK_VU_EXTRA_OVERFLOW) { + vuFloat5_useEAX(EEREC_S, EEREC_TEMP, (1 << (3-_Fsf_))); + vuFloat5_useEAX(EEREC_T, EEREC_TEMP, (1 << (3-_Ftf_))); + } + + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); + + _vuFlipRegSS_xyzw(EEREC_T, _Ftf_); + SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + _vuFlipRegSS_xyzw(EEREC_T, _Ftf_); + + vuFloat_useEAX(info, EEREC_TEMP, 0x8); + + x86SetJ32(bjmp32); + + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// SQRT* +//------------------------------------------------------------------ +void recVUMI_SQRT( VURegs *VU, int info ) +{ + u8* pjmp; + //Console::WriteLn("recVUMI_SQRT()"); + + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_); + AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags + + /* Check for negative sqrt */ + SSE_MOVMSKPS_XMM_to_R32(EAX, EEREC_TEMP); + AND32ItoR(EAX, 1); //Check sign + pjmp = JZ8(0); //Skip if none are + OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); // Invalid Flag - Negative number sqrt + x86SetJ8(pjmp); + + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)const_clip); // Do a cardinal sqrt + if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Clamp infinities (only need to do positive clamp since EEREC_TEMP is positive) + SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// RSQRT* +//------------------------------------------------------------------ +PCSX2_ALIGNED16(u64 RSQRT_TEMP_XMM[2]); +void recVUMI_RSQRT(VURegs *VU, int info) +{ + u8 *ajmp8, *bjmp8; + u8 *qjmp1, *qjmp2; + int t1reg, t1boolean; + //Console::WriteLn("recVUMI_RSQRT()"); + + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_); + AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags + + /* Check for negative divide */ + SSE_MOVMSKPS_XMM_to_R32(EAX, EEREC_TEMP); + AND32ItoR(EAX, 1); //Check sign + ajmp8 = JZ8(0); //Skip if none are + OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); // Invalid Flag - Negative number sqrt + x86SetJ8(ajmp8); + + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)const_clip); // Do a cardinal sqrt + if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Clamp Infinities to Fmax + SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); + + t1reg = _vuGetTempXMMreg(info); + if( t1reg < 0 ) { + for (t1reg = 0; ( (t1reg == EEREC_TEMP) || (t1reg == EEREC_S) ); t1reg++) + ; // Makes t1reg not be EEREC_TEMP or EEREC_S. + SSE_MOVAPS_XMM_to_M128( (uptr)&RSQRT_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address + t1boolean = 1; + } + else t1boolean = 0; + + // Ft can still be zero here! so we need to check if its zero and set the correct flag. + SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg + SSE_CMPEQSS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero + + SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation + + AND32ItoR( EAX, 0x01 ); // Grab "Is Zero" bits from the previous calculation + ajmp8 = JZ8(0); // Skip if none are + + //check for 0/0 + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); + + SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear EEREC_TEMP + SSE_CMPEQPS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero + SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation + + AND32ItoR( EAX, 0x01 ); // Grab "Is Zero" bits from the previous calculation + qjmp1 = JZ8(0); + OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0) + qjmp2 = JMP8(0); + x86SetJ8(qjmp1); + OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0) + x86SetJ8(qjmp2); + + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); + SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = +/- fmax + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); + bjmp8 = JMP8(0); + x86SetJ8(ajmp8); + + _unpackVFSS_xyzw(t1reg, EEREC_S, _Fsf_); + if (CHECK_VU_EXTRA_OVERFLOW) vuFloat_useEAX(info, t1reg, 0x8); // Clamp Infinities + SSE_DIVSS_XMM_to_XMM(t1reg, EEREC_TEMP); + vuFloat_useEAX(info, t1reg, 0x8); + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), t1reg); + + x86SetJ8(bjmp8); + + if (t1boolean) SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&RSQRT_TEMP_XMM[0] ); // restore t1reg data + else _freeXMMreg(t1reg); // free t1reg +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// _addISIMMtoIT() - Used in IADDI, IADDIU, and ISUBIU instructions +//------------------------------------------------------------------ +void _addISIMMtoIT(VURegs *VU, s16 imm, int info) +{ + int isreg = -1, itreg; + if (_It_ == 0) return; + + if( _Is_ == 0 ) { + itreg = ALLOCVI(_It_, MODE_WRITE); + MOV32ItoR(itreg, imm&0xffff); + return; + } + + ADD_VI_NEEDED(_It_); + isreg = ALLOCVI(_Is_, MODE_READ); + itreg = ALLOCVI(_It_, MODE_WRITE); + + if ( _It_ == _Is_ ) { + if (imm != 0 ) ADD16ItoR(itreg, imm); + } + else { + if( imm ) { + LEA32RtoR(itreg, isreg, imm); + MOVZX32R16toR(itreg, itreg); + } + else MOV32RtoR(itreg, isreg); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// IADDI +//------------------------------------------------------------------ +void recVUMI_IADDI(VURegs *VU, int info) +{ + s16 imm; + + if ( _It_ == 0 ) return; + //Console::WriteLn("recVUMI_IADDI"); + imm = ( VU->code >> 6 ) & 0x1f; + imm = ( imm & 0x10 ? 0xfff0 : 0) | ( imm & 0xf ); + _addISIMMtoIT(VU, imm, info); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// IADDIU +//------------------------------------------------------------------ +void recVUMI_IADDIU(VURegs *VU, int info) +{ + s16 imm; + + if ( _It_ == 0 ) return; + //Console::WriteLn("recVUMI_IADDIU"); + imm = ( ( VU->code >> 10 ) & 0x7800 ) | ( VU->code & 0x7ff ); + _addISIMMtoIT(VU, imm, info); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// IADD +//------------------------------------------------------------------ +void recVUMI_IADD( VURegs *VU, int info ) +{ + int idreg, isreg = -1, itreg = -1; + if ( _Id_ == 0 ) return; + //Console::WriteLn("recVUMI_IADD"); + if ( ( _It_ == 0 ) && ( _Is_ == 0 ) ) { + idreg = ALLOCVI(_Id_, MODE_WRITE); + XOR32RtoR(idreg, idreg); + return; + } + + ADD_VI_NEEDED(_Is_); + ADD_VI_NEEDED(_It_); + idreg = ALLOCVI(_Id_, MODE_WRITE); + + if ( _Is_ == 0 ) + { + if( (itreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _It_, MODE_READ)) >= 0 ) { + if( idreg != itreg ) MOV32RtoR(idreg, itreg); + } + else MOVZX32M16toR(idreg, VU_VI_ADDR(_It_, 1)); + } + else if ( _It_ == 0 ) + { + if( (isreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Is_, MODE_READ)) >= 0 ) { + if( idreg != isreg ) MOV32RtoR(idreg, isreg); + } + else MOVZX32M16toR(idreg, VU_VI_ADDR(_Is_, 1)); + } + else { + //ADD_VI_NEEDED(_It_); + isreg = ALLOCVI(_Is_, MODE_READ); + itreg = ALLOCVI(_It_, MODE_READ); + + if( idreg == isreg ) ADD32RtoR(idreg, itreg); + else if( idreg == itreg ) ADD32RtoR(idreg, isreg); + else LEA32RRtoR(idreg, isreg, itreg); + MOVZX32R16toR(idreg, idreg); // needed since don't know if idreg's upper bits are 0 + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// IAND +//------------------------------------------------------------------ +void recVUMI_IAND( VURegs *VU, int info ) +{ + int idreg, isreg = -1, itreg = -1; + if ( _Id_ == 0 ) return; + //Console::WriteLn("recVUMI_IAND"); + if ( ( _Is_ == 0 ) || ( _It_ == 0 ) ) { + idreg = ALLOCVI(_Id_, MODE_WRITE); + XOR32RtoR(idreg, idreg); + return; + } + + ADD_VI_NEEDED(_Is_); + ADD_VI_NEEDED(_It_); + idreg = ALLOCVI(_Id_, MODE_WRITE); + + isreg = ALLOCVI(_Is_, MODE_READ); + itreg = ALLOCVI(_It_, MODE_READ); + + if( idreg == isreg ) AND16RtoR(idreg, itreg); + else if( idreg == itreg ) AND16RtoR(idreg, isreg); + else { + MOV32RtoR(idreg, itreg); + AND32RtoR(idreg, isreg); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// IOR +//------------------------------------------------------------------ +void recVUMI_IOR( VURegs *VU, int info ) +{ + int idreg, isreg = -1, itreg = -1; + if ( _Id_ == 0 ) return; + //Console::WriteLn("recVUMI_IOR"); + if ( ( _It_ == 0 ) && ( _Is_ == 0 ) ) { + idreg = ALLOCVI(_Id_, MODE_WRITE); + XOR32RtoR(idreg, idreg); + return; + } + + ADD_VI_NEEDED(_Is_); + ADD_VI_NEEDED(_It_); + idreg = ALLOCVI(_Id_, MODE_WRITE); + + if ( _Is_ == 0 ) + { + if( (itreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _It_, MODE_READ)) >= 0 ) { + if( idreg != itreg ) MOV32RtoR(idreg, itreg); + } + else MOVZX32M16toR(idreg, VU_VI_ADDR(_It_, 1)); + } + else if ( _It_ == 0 ) + { + if( (isreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Is_, MODE_READ)) >= 0 ) { + if( idreg != isreg ) MOV32RtoR(idreg, isreg); + } + else MOVZX32M16toR(idreg, VU_VI_ADDR(_Is_, 1)); + } + else + { + isreg = ALLOCVI(_Is_, MODE_READ); + itreg = ALLOCVI(_It_, MODE_READ); + + if( idreg == isreg ) OR16RtoR(idreg, itreg); + else if( idreg == itreg ) OR16RtoR(idreg, isreg); + else { + MOV32RtoR(idreg, isreg); + OR32RtoR(idreg, itreg); + } + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ISUB +//------------------------------------------------------------------ +void recVUMI_ISUB( VURegs *VU, int info ) +{ + int idreg, isreg = -1, itreg = -1; + if ( _Id_ == 0 ) return; + //Console::WriteLn("recVUMI_ISUB"); + if ( ( _It_ == 0 ) && ( _Is_ == 0 ) ) { + idreg = ALLOCVI(_Id_, MODE_WRITE); + XOR32RtoR(idreg, idreg); + return; + } + + ADD_VI_NEEDED(_Is_); + ADD_VI_NEEDED(_It_); + idreg = ALLOCVI(_Id_, MODE_WRITE); + + if ( _Is_ == 0 ) + { + if( (itreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _It_, MODE_READ)) >= 0 ) { + if( idreg != itreg ) MOV32RtoR(idreg, itreg); + } + else MOVZX32M16toR(idreg, VU_VI_ADDR(_It_, 1)); + NEG16R(idreg); + } + else if ( _It_ == 0 ) + { + if( (isreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Is_, MODE_READ)) >= 0 ) { + if( idreg != isreg ) MOV32RtoR(idreg, isreg); + } + else MOVZX32M16toR(idreg, VU_VI_ADDR(_Is_, 1)); + } + else + { + isreg = ALLOCVI(_Is_, MODE_READ); + itreg = ALLOCVI(_It_, MODE_READ); + + if( idreg == isreg ) SUB16RtoR(idreg, itreg); + else if( idreg == itreg ) { + SUB16RtoR(idreg, isreg); + NEG16R(idreg); + } + else { + MOV32RtoR(idreg, isreg); + SUB16RtoR(idreg, itreg); + } + } +} +//------------------------------------------------------------------ + +//------------------------------------------------------------------ +// ISUBIU +//------------------------------------------------------------------ +void recVUMI_ISUBIU( VURegs *VU, int info ) +{ + s16 imm; + + if ( _It_ == 0 ) return; + //Console::WriteLn("recVUMI_ISUBIU"); + imm = ( ( VU->code >> 10 ) & 0x7800 ) | ( VU->code & 0x7ff ); + imm = -imm; + _addISIMMtoIT(VU, imm, info); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MOVE* +//------------------------------------------------------------------ +void recVUMI_MOVE( VURegs *VU, int info ) +{ + if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; + //Console::WriteLn("recVUMI_MOVE"); + if (_X_Y_Z_W == 0x8) SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_S); + else if (_X_Y_Z_W == 0xf) SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S); + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_T, EEREC_TEMP); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MFIR* +//------------------------------------------------------------------ +void recVUMI_MFIR( VURegs *VU, int info ) +{ + if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; + //Console::WriteLn("recVUMI_MFIR"); + _deleteX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Is_, 1); + + if( _XYZW_SS ) { + SSE2_MOVD_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(_Is_, 1)-2); + _vuFlipRegSS(VU, EEREC_T); + SSE2_PSRAD_I8_to_XMM(EEREC_TEMP, 16); + SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP); + _vuFlipRegSS(VU, EEREC_T); + } + else if (_X_Y_Z_W != 0xf) { + SSE2_MOVD_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(_Is_, 1)-2); + SSE2_PSRAD_I8_to_XMM(EEREC_TEMP, 16); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0); + VU_MERGE_REGS(EEREC_T, EEREC_TEMP); + } + else { + SSE2_MOVD_M32_to_XMM(EEREC_T, VU_VI_ADDR(_Is_, 1)-2); + SSE2_PSRAD_I8_to_XMM(EEREC_T, 16); + SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MTIR* +//------------------------------------------------------------------ +void recVUMI_MTIR( VURegs *VU, int info ) +{ + if ( _It_ == 0 ) return; + //Console::WriteLn("recVUMI_MTIR"); + _deleteX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _It_, 2); + + if( _Fsf_ == 0 ) { + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(_It_, 0), EEREC_S); + } + else { + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(_It_, 0), EEREC_TEMP); + } + + AND32ItoM(VU_VI_ADDR(_It_, 0), 0xffff); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MR32* +//------------------------------------------------------------------ +void recVUMI_MR32( VURegs *VU, int info ) +{ + if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; + //Console::WriteLn("recVUMI_MR32"); + if (_X_Y_Z_W != 0xf) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x39); + VU_MERGE_REGS(EEREC_T, EEREC_TEMP); + } + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S); + SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0x39); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// _loadEAX() +// +// NOTE: If x86reg < 0, reads directly from offset +//------------------------------------------------------------------ +void _loadEAX(VURegs *VU, int x86reg, uptr offset, int info) +{ + assert( offset < 0x80000000 ); + + if( x86reg >= 0 ) { + switch(_X_Y_Z_W) { + case 3: // ZW + SSE_MOVHPS_Rm_to_XMM(EEREC_T, x86reg, offset+8); + break; + case 6: // YZ + SSE_SHUFPS_Rm_to_XMM(EEREC_T, x86reg, offset, 0x9c); + SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0x78); + break; + + case 8: // X + SSE_MOVSS_Rm_to_XMM(EEREC_TEMP, x86reg, offset); + SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP); + break; + case 9: // XW + SSE_SHUFPS_Rm_to_XMM(EEREC_T, x86reg, offset, 0xc9); + SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xd2); + break; + case 12: // XY + SSE_MOVLPS_Rm_to_XMM(EEREC_T, x86reg, offset); + break; + case 15: + if( VU == &VU1 ) SSE_MOVAPSRmtoR(EEREC_T, x86reg, offset); + else SSE_MOVUPSRmtoR(EEREC_T, x86reg, offset); + break; + default: + if( VU == &VU1 ) SSE_MOVAPSRmtoR(EEREC_TEMP, x86reg, offset); + else SSE_MOVUPSRmtoR(EEREC_TEMP, x86reg, offset); + + VU_MERGE_REGS(EEREC_T, EEREC_TEMP); + break; + } + } + else { + switch(_X_Y_Z_W) { + case 3: // ZW + SSE_MOVHPS_M64_to_XMM(EEREC_T, offset+8); + break; + case 6: // YZ + SSE_SHUFPS_M128_to_XMM(EEREC_T, offset, 0x9c); + SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0x78); + break; + case 8: // X + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, offset); + SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP); + break; + case 9: // XW + SSE_SHUFPS_M128_to_XMM(EEREC_T, offset, 0xc9); + SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xd2); + break; + case 12: // XY + SSE_MOVLPS_M64_to_XMM(EEREC_T, offset); + break; + case 15: + if( VU == &VU1 ) SSE_MOVAPS_M128_to_XMM(EEREC_T, offset); + else SSE_MOVUPS_M128_to_XMM(EEREC_T, offset); + break; + default: + if( VU == &VU1 ) SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, offset); + else SSE_MOVUPS_M128_to_XMM(EEREC_TEMP, offset); + VU_MERGE_REGS(EEREC_T, EEREC_TEMP); + break; + } + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// recVUTransformAddr() +//------------------------------------------------------------------ +int recVUTransformAddr(int x86reg, VURegs* VU, int vireg, int imm) +{ + u8* pjmp[2]; + if( x86reg == EAX ) { + if (imm) ADD32ItoR(x86reg, imm); + } + else { + if( imm ) LEA32RtoR(EAX, x86reg, imm); + else MOV32RtoR(EAX, x86reg); + } + + if( VU == &VU1 ) { + AND32ItoR(EAX, 0x3ff); // wrap around + SHL32ItoR(EAX, 4); + } + else { + + // VU0 has a somewhat interesting memory mapping: + // if addr >= 0x4000, reads VU1's VF regs and VI regs + // if addr < 0x4000, wrap around at 0x1000 + + CMP32ItoR(EAX, 0x400); + pjmp[0] = JL8(0); // if addr >= 0x4000, reads VU1's VF regs and VI regs + AND32ItoR(EAX, 0x43f); + pjmp[1] = JMP8(0); + x86SetJ8(pjmp[0]); + AND32ItoR(EAX, 0xff); // if addr < 0x4000, wrap around + x86SetJ8(pjmp[1]); + + SHL32ItoR(EAX, 4); // multiply by 16 (shift left by 4) + } + + return EAX; +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// LQ +//------------------------------------------------------------------ +void recVUMI_LQ(VURegs *VU, int info) +{ + s16 imm; + if ( _Ft_ == 0 ) return; + //Console::WriteLn("recVUMI_LQ"); + imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff); + if (_Is_ == 0) { + _loadEAX(VU, -1, (uptr)GET_VU_MEM(VU, (u32)imm*16), info); + } + else { + int isreg = ALLOCVI(_Is_, MODE_READ); + _loadEAX(VU, recVUTransformAddr(isreg, VU, _Is_, imm), (uptr)VU->Mem, info); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// LQD +//------------------------------------------------------------------ +void recVUMI_LQD( VURegs *VU, int info ) +{ + int isreg; + //Console::WriteLn("recVUMI_LQD"); + if ( _Is_ != 0 ) { + isreg = ALLOCVI(_Is_, MODE_READ|MODE_WRITE); + SUB16ItoR( isreg, 1 ); + } + + if ( _Ft_ == 0 ) return; + + if ( _Is_ == 0 ) _loadEAX(VU, -1, (uptr)VU->Mem, info); + else _loadEAX(VU, recVUTransformAddr(isreg, VU, _Is_, 0), (uptr)VU->Mem, info); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// LQI +//------------------------------------------------------------------ +void recVUMI_LQI(VURegs *VU, int info) +{ + int isreg; + //Console::WriteLn("recVUMI_LQI"); + if ( _Ft_ == 0 ) { + if( _Is_ != 0 ) { + if( (isreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Is_, MODE_WRITE|MODE_READ)) >= 0 ) { + ADD16ItoR(isreg, 1); + } + else { + ADD16ItoM( VU_VI_ADDR( _Is_, 0 ), 1 ); + } + } + return; + } + + if (_Is_ == 0) { + _loadEAX(VU, -1, (uptr)VU->Mem, info); + } + else { + isreg = ALLOCVI(_Is_, MODE_READ|MODE_WRITE); + _loadEAX(VU, recVUTransformAddr(isreg, VU, _Is_, 0), (uptr)VU->Mem, info); + ADD16ItoR( isreg, 1 ); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// _saveEAX() +//------------------------------------------------------------------ +void _saveEAX(VURegs *VU, int x86reg, uptr offset, int info) +{ + assert( offset < 0x80000000 ); + + if ( _Fs_ == 0 ) { + if ( _XYZW_SS ) { + u32 c = _W ? 0x3f800000 : 0; + if ( x86reg >= 0 ) MOV32ItoRm(x86reg, c, offset+(_W?12:(_Z?8:(_Y?4:0)))); + else MOV32ItoM(offset+(_W?12:(_Z?8:(_Y?4:0))), c); + } + else { + + // (this is one of my test cases for the new emitter --air) + using namespace x86Emitter; + xAddressReg thisreg( x86reg ); + + if ( _X ) xMOV(ptr32[thisreg+offset], 0x00000000); + if ( _Y ) xMOV(ptr32[thisreg+offset+4], 0x00000000); + if ( _Z ) xMOV(ptr32[thisreg+offset+8], 0x00000000); + if ( _W ) xMOV(ptr32[thisreg+offset+12], 0x3f800000); + } + return; + } + + switch ( _X_Y_Z_W ) { + case 1: // W + SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x27); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); + else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); + break; + case 2: // Z + SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); + else SSE_MOVSS_XMM_to_M32(offset+8, EEREC_TEMP); + break; + case 3: // ZW + if ( x86reg >= 0 ) SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_S, offset+8); + else SSE_MOVHPS_XMM_to_M64(offset+8, EEREC_S); + break; + case 4: // Y + SSE2_PSHUFLW_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x4e); + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); + else SSE_MOVSS_XMM_to_M32(offset+4, EEREC_TEMP); + break; + case 5: // YW + SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xB1); + SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if ( x86reg >= 0 ) { + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset+4); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); + } + else { + SSE_MOVSS_XMM_to_M32(offset+4, EEREC_S); + SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); + } + SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xB1); + break; + case 6: // YZ + SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0xc9); + if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); + else SSE_MOVLPS_XMM_to_M64(offset+4, EEREC_TEMP); + break; + case 7: // YZW + SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x93); //ZYXW + if ( x86reg >= 0 ) { + SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+4); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); + } + else { + SSE_MOVHPS_XMM_to_M64(offset+4, EEREC_TEMP); + SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); + } + break; + case 8: // X + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); + else SSE_MOVSS_XMM_to_M32(offset, EEREC_S); + break; + case 9: // XW + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); + else SSE_MOVSS_XMM_to_M32(offset, EEREC_S); + + SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0xff); //WWWW + + if ( x86reg >= 0 ) SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); + else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); + + break; + case 10: //XZ + SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if ( x86reg >= 0 ) { + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); + } + else { + SSE_MOVSS_XMM_to_M32(offset, EEREC_S); + SSE_MOVSS_XMM_to_M32(offset+8, EEREC_TEMP); + } + break; + case 11: //XZW + if ( x86reg >= 0 ) { + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_S, offset); + SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_S, offset+8); + } + else { + SSE_MOVSS_XMM_to_M32(offset, EEREC_S); + SSE_MOVHPS_XMM_to_M64(offset+8, EEREC_S); + } + break; + case 12: // XY + if ( x86reg >= 0 ) SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_S, offset+0); + else SSE_MOVLPS_XMM_to_M64(offset, EEREC_S); + break; + case 13: // XYW + SSE2_PSHUFD_XMM_to_XMM(EEREC_TEMP, EEREC_S, 0x4b); //YXZW + if ( x86reg >= 0 ) { + SSE_MOVHPS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+0); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+12); + } + else { + SSE_MOVHPS_XMM_to_M64(offset, EEREC_TEMP); + SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP); + } + break; + case 14: // XYZ + SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if ( x86reg >= 0 ) { + SSE_MOVLPS_XMM_to_Rm(x86reg, EEREC_S, offset+0); + SSE_MOVSS_XMM_to_Rm(x86reg, EEREC_TEMP, offset+8); + } + else { + SSE_MOVLPS_XMM_to_M64(offset, EEREC_S); + SSE_MOVSS_XMM_to_M32(offset+8, EEREC_TEMP); + } + break; + case 15: // XYZW + if ( VU == &VU1 ) { + if( x86reg >= 0 ) SSE_MOVAPSRtoRm(x86reg, EEREC_S, offset+0); + else SSE_MOVAPS_XMM_to_M128(offset, EEREC_S); + } + else { + if( x86reg >= 0 ) SSE_MOVUPSRtoRm(x86reg, EEREC_S, offset+0); + else { + if( offset & 15 ) SSE_MOVUPS_XMM_to_M128(offset, EEREC_S); + else SSE_MOVAPS_XMM_to_M128(offset, EEREC_S); + } + } + break; + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// SQ +//------------------------------------------------------------------ +void recVUMI_SQ(VURegs *VU, int info) +{ + s16 imm; + //Console::WriteLn("recVUMI_SQ"); + imm = ( VU->code & 0x400) ? ( VU->code & 0x3ff) | 0xfc00 : ( VU->code & 0x3ff); + if ( _It_ == 0 ) _saveEAX(VU, -1, (uptr)GET_VU_MEM(VU, (int)imm * 16), info); + else { + int itreg = ALLOCVI(_It_, MODE_READ); + _saveEAX(VU, recVUTransformAddr(itreg, VU, _It_, imm), (uptr)VU->Mem, info); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// SQD +//------------------------------------------------------------------ +void recVUMI_SQD(VURegs *VU, int info) +{ + //Console::WriteLn("recVUMI_SQD"); + if (_It_ == 0) _saveEAX(VU, -1, (uptr)VU->Mem, info); + else { + int itreg = ALLOCVI(_It_, MODE_READ|MODE_WRITE); + SUB16ItoR( itreg, 1 ); + _saveEAX(VU, recVUTransformAddr(itreg, VU, _It_, 0), (uptr)VU->Mem, info); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// SQI +//------------------------------------------------------------------ +void recVUMI_SQI(VURegs *VU, int info) +{ + //Console::WriteLn("recVUMI_SQI"); + if (_It_ == 0) _saveEAX(VU, -1, (uptr)VU->Mem, info); + else { + int itreg = ALLOCVI(_It_, MODE_READ|MODE_WRITE); + _saveEAX(VU, recVUTransformAddr(itreg, VU, _It_, 0), (uptr)VU->Mem, info); + ADD16ItoR( itreg, 1 ); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ILW +//------------------------------------------------------------------ +void recVUMI_ILW(VURegs *VU, int info) +{ + int itreg; + s16 imm, off; + + if ( ( _It_ == 0 ) || ( _X_Y_Z_W == 0 ) ) return; + //Console::WriteLn("recVUMI_ILW"); + imm = ( VU->code & 0x400) ? ( VU->code & 0x3ff) | 0xfc00 : ( VU->code & 0x3ff); + if (_X) off = 0; + else if (_Y) off = 4; + else if (_Z) off = 8; + else if (_W) off = 12; + + ADD_VI_NEEDED(_Is_); + itreg = ALLOCVI(_It_, MODE_WRITE); + + if ( _Is_ == 0 ) { + MOVZX32M16toR( itreg, (uptr)GET_VU_MEM(VU, (int)imm * 16 + off) ); + } + else { + int isreg = ALLOCVI(_Is_, MODE_READ); + MOV32RmtoR(itreg, recVUTransformAddr(isreg, VU, _Is_, imm), (uptr)VU->Mem + off); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ISW +//------------------------------------------------------------------ +void recVUMI_ISW( VURegs *VU, int info ) +{ + s16 imm; + //Console::WriteLn("recVUMI_ISW"); + imm = ( VU->code & 0x400) ? ( VU->code & 0x3ff) | 0xfc00 : ( VU->code & 0x3ff); + + if (_Is_ == 0) { + uptr off = (uptr)GET_VU_MEM(VU, (int)imm * 16); + int itreg = ALLOCVI(_It_, MODE_READ); + + if (_X) MOV32RtoM(off, itreg); + if (_Y) MOV32RtoM(off+4, itreg); + if (_Z) MOV32RtoM(off+8, itreg); + if (_W) MOV32RtoM(off+12, itreg); + } + else { + int x86reg, isreg, itreg; + + ADD_VI_NEEDED(_It_); + isreg = ALLOCVI(_Is_, MODE_READ); + itreg = ALLOCVI(_It_, MODE_READ); + + x86reg = recVUTransformAddr(isreg, VU, _Is_, imm); + + if (_X) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem); + if (_Y) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem+4); + if (_Z) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem+8); + if (_W) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem+12); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ILWR +//------------------------------------------------------------------ +void recVUMI_ILWR( VURegs *VU, int info ) +{ + int off, itreg; + + if ( ( _It_ == 0 ) || ( _X_Y_Z_W == 0 ) ) return; + //Console::WriteLn("recVUMI_ILWR"); + if (_X) off = 0; + else if (_Y) off = 4; + else if (_Z) off = 8; + else if (_W) off = 12; + + ADD_VI_NEEDED(_Is_); + itreg = ALLOCVI(_It_, MODE_WRITE); + + if ( _Is_ == 0 ) { + MOVZX32M16toR( itreg, (uptr)VU->Mem + off ); + } + else { + int isreg = ALLOCVI(_Is_, MODE_READ); + MOVZX32Rm16toR(itreg, recVUTransformAddr(isreg, VU, _Is_, 0), (uptr)VU->Mem + off); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ISWR +//------------------------------------------------------------------ +void recVUMI_ISWR( VURegs *VU, int info ) +{ + int itreg; + //Console::WriteLn("recVUMI_ISWR"); + ADD_VI_NEEDED(_Is_); + itreg = ALLOCVI(_It_, MODE_READ); + + if (_Is_ == 0) { + if (_X) MOV32RtoM((uptr)VU->Mem, itreg); + if (_Y) MOV32RtoM((uptr)VU->Mem+4, itreg); + if (_Z) MOV32RtoM((uptr)VU->Mem+8, itreg); + if (_W) MOV32RtoM((uptr)VU->Mem+12, itreg); + } + else { + int x86reg; + int isreg = ALLOCVI(_Is_, MODE_READ); + x86reg = recVUTransformAddr(isreg, VU, _Is_, 0); + + if (_X) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem); + if (_Y) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem+4); + if (_Z) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem+8); + if (_W) MOV32RtoRm(x86reg, itreg, (uptr)VU->Mem+12); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// RINIT* +//------------------------------------------------------------------ +void recVUMI_RINIT(VURegs *VU, int info) +{ + //Console::WriteLn("recVUMI_RINIT()"); + if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode & MODE_NOFLUSH) ) { + _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 2); + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); + + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)s_mask); + SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)VU_ONE); + SSE_MOVSS_XMM_to_M32(VU_REGR_ADDR, EEREC_TEMP); + } + else { + int rreg = ALLOCVI(REG_R, MODE_WRITE); + + if( xmmregs[EEREC_S].mode & MODE_WRITE ) { + SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S); + xmmregs[EEREC_S].mode &= ~MODE_WRITE; + } + + MOV32MtoR( rreg, VU_VFx_ADDR( _Fs_ ) + 4 * _Fsf_ ); + AND32ItoR( rreg, 0x7fffff ); + OR32ItoR( rreg, 0x7f << 23 ); + + _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// RGET* +//------------------------------------------------------------------ +void recVUMI_RGET(VURegs *VU, int info) +{ + //Console::WriteLn("recVUMI_RGET()"); + if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; + + _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); + + if (_X_Y_Z_W != 0xf) { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, VU_REGR_ADDR); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0); + VU_MERGE_REGS(EEREC_T, EEREC_TEMP); + } + else { + SSE_MOVSS_M32_to_XMM(EEREC_T, VU_REGR_ADDR); + SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// RNEXT* +//------------------------------------------------------------------ +void recVUMI_RNEXT( VURegs *VU, int info ) +{ + int rreg, x86temp0, x86temp1; + //Console::WriteLn("recVUMI_RNEXT()"); + + rreg = ALLOCVI(REG_R, MODE_WRITE|MODE_READ); + + x86temp0 = ALLOCTEMPX86(0); + x86temp1 = ALLOCTEMPX86(0); + + // code from www.project-fao.org + //MOV32MtoR(rreg, VU_REGR_ADDR); + MOV32RtoR(x86temp0, rreg); + SHR32ItoR(x86temp0, 4); + AND32ItoR(x86temp0, 1); + + MOV32RtoR(x86temp1, rreg); + SHR32ItoR(x86temp1, 22); + AND32ItoR(x86temp1, 1); + + SHL32ItoR(rreg, 1); + XOR32RtoR(x86temp0, x86temp1); + XOR32RtoR(rreg, x86temp0); + AND32ItoR(rreg, 0x7fffff); + OR32ItoR(rreg, 0x3f800000); + + _freeX86reg(x86temp0); + _freeX86reg(x86temp1); + + if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) { + _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); + return; + } + + recVUMI_RGET(VU, info); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// RXOR* +//------------------------------------------------------------------ +void recVUMI_RXOR( VURegs *VU, int info ) +{ + //Console::WriteLn("recVUMI_RXOR()"); + if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode & MODE_NOFLUSH) ) { + _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); + + SSE_XORPS_M128_to_XMM(EEREC_TEMP, VU_REGR_ADDR); + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)s_mask); + SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)s_fones); + SSE_MOVSS_XMM_to_M32(VU_REGR_ADDR, EEREC_TEMP); + } + else { + int rreg = ALLOCVI(REG_R, MODE_WRITE|MODE_READ); + + if( xmmregs[EEREC_S].mode & MODE_WRITE ) { + SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S); + xmmregs[EEREC_S].mode &= ~MODE_WRITE; + } + + XOR32MtoR( rreg, VU_VFx_ADDR( _Fs_ ) + 4 * _Fsf_ ); + AND32ItoR( rreg, 0x7fffff ); + OR32ItoR ( rreg, 0x3f800000 ); + + _deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// WAITQ +//------------------------------------------------------------------ +void recVUMI_WAITQ( VURegs *VU, int info ) +{ + //Console::WriteLn("recVUMI_WAITQ"); +// if( info & PROCESS_VU_SUPER ) { +// //CALLFunc(waitqfn); +// SuperVUFlush(0, 1); +// } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// FSAND +//------------------------------------------------------------------ +void recVUMI_FSAND( VURegs *VU, int info ) +{ + int itreg; + u16 imm; + //Console::WriteLn("recVUMI_FSAND"); + imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff); + if(_It_ == 0) return; + + itreg = ALLOCVI(_It_, MODE_WRITE); + MOV32MtoR( itreg, VU_VI_ADDR(REG_STATUS_FLAG, 1) ); + AND32ItoR( itreg, imm ); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// FSEQ +//------------------------------------------------------------------ +void recVUMI_FSEQ( VURegs *VU, int info ) +{ + int itreg; + u16 imm; + if ( _It_ == 0 ) return; + //Console::WriteLn("recVUMI_FSEQ"); + imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff); + + itreg = ALLOCVI(_It_, MODE_WRITE|MODE_8BITREG); + + MOVZX32M16toR( EAX, VU_VI_ADDR(REG_STATUS_FLAG, 1) ); + XOR32RtoR(itreg, itreg); + CMP16ItoR(EAX, imm); + SETE8R(itreg); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// FSOR +//------------------------------------------------------------------ +void recVUMI_FSOR( VURegs *VU, int info ) +{ + int itreg; + u32 imm; + if(_It_ == 0) return; + //Console::WriteLn("recVUMI_FSOR"); + imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff); + + itreg = ALLOCVI(_It_, MODE_WRITE); + + MOVZX32M16toR( itreg, VU_VI_ADDR(REG_STATUS_FLAG, 1) ); + OR32ItoR( itreg, imm ); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// FSSET +//------------------------------------------------------------------ +void recVUMI_FSSET(VURegs *VU, int info) +{ + u32 writeaddr = VU_VI_ADDR(REG_STATUS_FLAG, 0); + u32 prevaddr = VU_VI_ADDR(REG_STATUS_FLAG, 2); + + u16 imm = 0; + //Console::WriteLn("recVUMI_FSSET"); + imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7FF); + + // keep the low 6 bits ONLY if the upper instruction is an fmac instruction (otherwise rewrite) - metal gear solid 3 + //if( (info & PROCESS_VU_SUPER) && VUREC_FMAC ) { + MOV32MtoR(EAX, prevaddr); + AND32ItoR(EAX, 0x3f); + if ((imm&0xfc0) != 0) OR32ItoR(EAX, imm & 0xFC0); + MOV32RtoM(writeaddr ? writeaddr : prevaddr, EAX); + //} + //else { + // MOV32ItoM(writeaddr ? writeaddr : prevaddr, imm&0xfc0); + //} +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// FMAND +//------------------------------------------------------------------ +void recVUMI_FMAND( VURegs *VU, int info ) +{ + int isreg, itreg; + if ( _It_ == 0 ) return; + //Console::WriteLn("recVUMI_FMAND"); + isreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Is_, MODE_READ); + itreg = ALLOCVI(_It_, MODE_WRITE);//|MODE_8BITREG); + + if( isreg >= 0 ) { + if( itreg != isreg ) MOV32RtoR(itreg, isreg); + } + else MOVZX32M16toR(itreg, VU_VI_ADDR(_Is_, 1)); + + AND16MtoR( itreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// FMEQ +//------------------------------------------------------------------ +void recVUMI_FMEQ( VURegs *VU, int info ) +{ + int itreg, isreg; + if ( _It_ == 0 ) return; + //Console::WriteLn("recVUMI_FMEQ"); + if( _It_ == _Is_ ) { + itreg = ALLOCVI(_It_, MODE_WRITE|MODE_READ);//|MODE_8BITREG + + CMP16MtoR(itreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); + SETE8R(EAX); + MOVZX32R8toR(itreg, EAX); + } + else { + ADD_VI_NEEDED(_Is_); + itreg = ALLOCVI(_It_, MODE_WRITE|MODE_8BITREG); + isreg = ALLOCVI(_Is_, MODE_READ); + + XOR32RtoR(itreg, itreg); + + CMP16MtoR(isreg, VU_VI_ADDR(REG_MAC_FLAG, 1)); + SETE8R(itreg); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// FMOR +//------------------------------------------------------------------ +void recVUMI_FMOR( VURegs *VU, int info ) +{ + int isreg, itreg; + if ( _It_ == 0 ) return; + //Console::WriteLn("recVUMI_FMOR"); + if( _Is_ == 0 ) { + itreg = ALLOCVI(_It_, MODE_WRITE);//|MODE_8BITREG); + MOVZX32M16toR( itreg, VU_VI_ADDR(REG_MAC_FLAG, 1) ); + } + else if( _It_ == _Is_ ) { + itreg = ALLOCVI(_It_, MODE_WRITE|MODE_READ);//|MODE_8BITREG); + OR16MtoR( itreg, VU_VI_ADDR(REG_MAC_FLAG, 1) ); + } + else { + isreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Is_, MODE_READ); + itreg = ALLOCVI(_It_, MODE_WRITE); + + MOVZX32M16toR( itreg, VU_VI_ADDR(REG_MAC_FLAG, 1) ); + + if( isreg >= 0 ) + OR16RtoR( itreg, isreg ); + else + OR16MtoR( itreg, VU_VI_ADDR(_Is_, 1) ); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// FCAND +//------------------------------------------------------------------ +void recVUMI_FCAND( VURegs *VU, int info ) +{ + int itreg = ALLOCVI(1, MODE_WRITE|MODE_8BITREG); + //Console::WriteLn("recVUMI_FCAND"); + MOV32MtoR( EAX, VU_VI_ADDR(REG_CLIP_FLAG, 1) ); + XOR32RtoR( itreg, itreg ); + AND32ItoR( EAX, VU->code & 0xFFFFFF ); + + SETNZ8R(itreg); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// FCEQ +//------------------------------------------------------------------ +void recVUMI_FCEQ( VURegs *VU, int info ) +{ + int itreg = ALLOCVI(1, MODE_WRITE|MODE_8BITREG); + //Console::WriteLn("recVUMI_FCEQ"); + MOV32MtoR( EAX, VU_VI_ADDR(REG_CLIP_FLAG, 1) ); + AND32ItoR( EAX, 0xffffff ); + XOR32RtoR( itreg, itreg ); + CMP32ItoR( EAX, VU->code&0xffffff ); + + SETE8R(itreg); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// FCOR +//------------------------------------------------------------------ +void recVUMI_FCOR( VURegs *VU, int info ) +{ + int itreg; + //Console::WriteLn("recVUMI_FCOR"); + itreg = ALLOCVI(1, MODE_WRITE); + MOV32MtoR( itreg, VU_VI_ADDR(REG_CLIP_FLAG, 1) ); + OR32ItoR ( itreg, VU->code ); + AND32ItoR( itreg, 0xffffff ); + ADD32ItoR( itreg, 1 ); // If 24 1's will make 25th bit 1, else 0 + SHR32ItoR( itreg, 24 ); // Get the 25th bit (also clears the rest of the garbage in the reg) +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// FCSET +//------------------------------------------------------------------ +void recVUMI_FCSET( VURegs *VU, int info ) +{ + u32 addr = VU_VI_ADDR(REG_CLIP_FLAG, 0); + //Console::WriteLn("recVUMI_FCSET"); + MOV32ItoM(addr ? addr : VU_VI_ADDR(REG_CLIP_FLAG, 2), VU->code&0xffffff ); + + if( !(info & (PROCESS_VU_SUPER|PROCESS_VU_COP2)) ) + MOV32ItoM( VU_VI_ADDR(REG_CLIP_FLAG, 1), VU->code&0xffffff ); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// FCGET +//------------------------------------------------------------------ +void recVUMI_FCGET( VURegs *VU, int info ) +{ + int itreg; + if(_It_ == 0) return; + //Console::WriteLn("recVUMI_FCGET"); + itreg = ALLOCVI(_It_, MODE_WRITE); + + MOV32MtoR(itreg, VU_VI_ADDR(REG_CLIP_FLAG, 1)); + AND32ItoR(itreg, 0x0fff); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// _recbranchAddr() +// +// NOTE: Due to static var dependencies, several SuperVU branch instructions +// are still located in iVUzerorec.cpp. +//------------------------------------------------------------------ + +//------------------------------------------------------------------ +// MFP* +//------------------------------------------------------------------ +void recVUMI_MFP(VURegs *VU, int info) +{ + if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; + //Console::WriteLn("recVUMI_MFP"); + if( _XYZW_SS ) { + _vuFlipRegSS(VU, EEREC_T); + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_P, 1)); + SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP); + _vuFlipRegSS(VU, EEREC_T); + } + else if (_X_Y_Z_W != 0xf) { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_P, 1)); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0); + VU_MERGE_REGS(EEREC_T, EEREC_TEMP); + } + else { + SSE_MOVSS_M32_to_XMM(EEREC_T, VU_VI_ADDR(REG_P, 1)); + SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// WAITP +//------------------------------------------------------------------ +static PCSX2_ALIGNED16(float s_tempmem[4]); +void recVUMI_WAITP(VURegs *VU, int info) +{ + //Console::WriteLn("recVUMI_WAITP"); +// if( info & PROCESS_VU_SUPER ) +// SuperVUFlush(1, 1); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// vuSqSumXYZ()* +// +// NOTE: In all EFU insts, EEREC_D is a temp reg +//------------------------------------------------------------------ +void vuSqSumXYZ(int regd, int regs, int regtemp) // regd.x = x ^ 2 + y ^ 2 + z ^ 2 +{ + //Console::WriteLn("VU: SUMXYZ"); + if( cpucaps.hasStreamingSIMD4Extensions ) + { + SSE_MOVAPS_XMM_to_XMM(regd, regs); + if (CHECK_VU_EXTRA_OVERFLOW) vuFloat2(regd, regtemp, 0xf); + SSE4_DPPS_XMM_to_XMM(regd, regd, 0x71); + } + else + { + SSE_MOVAPS_XMM_to_XMM(regtemp, regs); + if (CHECK_VU_EXTRA_OVERFLOW) vuFloat2(regtemp, regd, 0xf); + SSE_MULPS_XMM_to_XMM(regtemp, regtemp); // xyzw ^ 2 + + if( cpucaps.hasStreamingSIMD3Extensions ) { + SSE3_HADDPS_XMM_to_XMM(regd, regtemp); + SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x ^ 2 + y ^ 2 + z ^ 2 + SSE_MOVHLPS_XMM_to_XMM(regd, regd); // regd.x = regd.z + } + else { + SSE_MOVSS_XMM_to_XMM(regd, regtemp); + SSE2_PSHUFLW_XMM_to_XMM(regtemp, regtemp, 0x4e); // wzyx -> wzxy + SSE_ADDSS_XMM_to_XMM(regd, regtemp); // x ^ 2 + y ^ 2 + SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xD2); // wzxy -> wxyz + SSE_ADDSS_XMM_to_XMM(regd, regtemp); // x ^ 2 + y ^ 2 + z ^ 2 + } + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ESADD* +//------------------------------------------------------------------ +void recVUMI_ESADD( VURegs *VU, int info) +{ + //Console::WriteLn("VU: ESADD"); + assert( VU == &VU1 ); + if( EEREC_TEMP == EEREC_D ) { // special code to reset P ( FixMe: don't know if this is still needed! (cottonvibes) ) + Console::Notice("ESADD: Resetting P reg!!!\n"); + MOV32ItoM(VU_VI_ADDR(REG_P, 0), 0); + return; + } + vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); + if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_D); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ERSADD* +//------------------------------------------------------------------ +void recVUMI_ERSADD( VURegs *VU, int info ) +{ + //Console::WriteLn("VU: ERSADD"); + assert( VU == &VU1 ); + vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); + // don't use RCPSS (very bad precision) + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); + SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); + if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ELENG* +//------------------------------------------------------------------ +void recVUMI_ELENG( VURegs *VU, int info ) +{ + //Console::WriteLn("VU: ELENG"); + assert( VU == &VU1 ); + vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); + if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive + SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_D); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ERLENG* +//------------------------------------------------------------------ +void recVUMI_ERLENG( VURegs *VU, int info ) +{ + //Console::WriteLn("VU: ERLENG"); + assert( VU == &VU1 ); + vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); + if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_D, (uptr)g_maxvals); // Only need to do positive clamp since (x ^ 2 + y ^ 2 + z ^ 2) is positive + SSE_SQRTSS_XMM_to_XMM(EEREC_D, EEREC_D); // regd <- sqrt(x^2 + y^2 + z^2) + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 + SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // temp = 1 / sqrt(x^2 + y^2 + z^2) + if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// EATANxy +//------------------------------------------------------------------ +void recVUMI_EATANxy( VURegs *VU, int info ) +{ + assert( VU == &VU1 ); + //Console::WriteLn("recVUMI_EATANxy"); + if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { + SSE_MOVLPS_XMM_to_M64((uptr)s_tempmem, EEREC_S); + FLD32((uptr)&s_tempmem[0]); + FLD32((uptr)&s_tempmem[1]); + } + else { + if( xmmregs[EEREC_S].mode & MODE_WRITE ) { + SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S); + xmmregs[EEREC_S].mode &= ~MODE_WRITE; + } + + FLD32((uptr)&VU->VF[_Fs_].UL[0]); + FLD32((uptr)&VU->VF[_Fs_].UL[1]); + } + + FPATAN(); + FSTP32(VU_VI_ADDR(REG_P, 0)); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// EATANxz +//------------------------------------------------------------------ +void recVUMI_EATANxz( VURegs *VU, int info ) +{ + assert( VU == &VU1 ); + //Console::WriteLn("recVUMI_EATANxz"); + if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { + SSE_MOVLPS_XMM_to_M64((uptr)s_tempmem, EEREC_S); + FLD32((uptr)&s_tempmem[0]); + FLD32((uptr)&s_tempmem[2]); + } + else { + if( xmmregs[EEREC_S].mode & MODE_WRITE ) { + SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S); + xmmregs[EEREC_S].mode &= ~MODE_WRITE; + } + + FLD32((uptr)&VU->VF[_Fs_].UL[0]); + FLD32((uptr)&VU->VF[_Fs_].UL[2]); + } + FPATAN(); + FSTP32(VU_VI_ADDR(REG_P, 0)); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ESUM* +//------------------------------------------------------------------ +void recVUMI_ESUM( VURegs *VU, int info ) +{ + //Console::WriteLn("VU: ESUM"); + assert( VU == &VU1 ); + + if( cpucaps.hasStreamingSIMD3Extensions ) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) vuFloat_useEAX(info, EEREC_TEMP, 0xf); + SSE3_HADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); + SSE3_HADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); + } + else { + SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); // z, w, z, w + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); // z+x, w+y, z+z, w+w + SSE_UNPCKLPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // z+x, z+x, w+y, w+y + SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // w+y, w+y, w+y, w+y + SSE_ADDSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); // x+y+z+w, w+y, w+y, w+y + } + + vuFloat_useEAX(info, EEREC_TEMP, 8); + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ERCPR* +//------------------------------------------------------------------ +void recVUMI_ERCPR( VURegs *VU, int info ) +{ + assert( VU == &VU1 ); + //Console::WriteLn("VU1: ERCPR"); + + // don't use RCPSS (very bad precision) + switch ( _Fsf_ ) { + case 0: //0001 + if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 + SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + break; + case 1: //0010 + SSE2_PSHUFLW_XMM_to_XMM(EEREC_S, EEREC_S, 0x4e); + if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 + SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE2_PSHUFLW_XMM_to_XMM(EEREC_S, EEREC_S, 0x4e); + break; + case 2: //0100 + SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xc6); + if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 + SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xc6); + break; + case 3: //1000 + SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0x27); + if (CHECK_VU_EXTRA_OVERFLOW) vuFloat5_useEAX(EEREC_S, EEREC_TEMP, 8); + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); // temp <- 1 + SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0x27); + break; + } + + vuFloat_useEAX(info, EEREC_TEMP, 8); + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ESQRT* +//------------------------------------------------------------------ +void recVUMI_ESQRT( VURegs *VU, int info ) +{ + assert( VU == &VU1 ); + + //Console::WriteLn("VU1: ESQRT"); + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)const_clip); // abs(x) + if (CHECK_VU_OVERFLOW) SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Only need to do positive clamp + SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); + + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ERSQRT* +//------------------------------------------------------------------ +void recVUMI_ERSQRT( VURegs *VU, int info ) +{ + int t1reg = _vuGetTempXMMreg(info); + + assert( VU == &VU1 ); + //Console::WriteLn("VU1: ERSQRT"); + + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)const_clip); // abs(x) + SSE_MINSS_M32_to_XMM(EEREC_TEMP, (uptr)g_maxvals); // Clamp Infinities to Fmax + SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // SQRT(abs(x)) + + if( t1reg >= 0 ) + { + SSE_MOVSS_M32_to_XMM(t1reg, (uptr)VU_ONE); + SSE_DIVSS_XMM_to_XMM(t1reg, EEREC_TEMP); + vuFloat_useEAX(info, t1reg, 8); + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), t1reg); + _freeXMMreg(t1reg); + } + else + { + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)VU_ONE); + SSE_DIVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_P, 0)); + vuFloat_useEAX(info, EEREC_TEMP, 8); + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ESIN +//------------------------------------------------------------------ +void recVUMI_ESIN( VURegs *VU, int info ) +{ + assert( VU == &VU1 ); + + //Console::WriteLn("recVUMI_ESIN"); + if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { + switch(_Fsf_) { + case 0: SSE_MOVSS_XMM_to_M32((uptr)s_tempmem, EEREC_S); + case 1: SSE_MOVLPS_XMM_to_M64((uptr)s_tempmem, EEREC_S); + default: SSE_MOVHPS_XMM_to_M64((uptr)&s_tempmem[2], EEREC_S); + } + FLD32((uptr)&s_tempmem[_Fsf_]); + } + else { + if( xmmregs[EEREC_S].mode & MODE_WRITE ) { + SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S); + xmmregs[EEREC_S].mode &= ~MODE_WRITE; + } + + FLD32((uptr)&VU->VF[_Fs_].UL[_Fsf_]); + } + + FSIN(); + FSTP32(VU_VI_ADDR(REG_P, 0)); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// EATAN +//------------------------------------------------------------------ +void recVUMI_EATAN( VURegs *VU, int info ) +{ + assert( VU == &VU1 ); + + //Console::WriteLn("recVUMI_EATAN"); + if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { + switch(_Fsf_) { + case 0: SSE_MOVSS_XMM_to_M32((uptr)s_tempmem, EEREC_S); + case 1: SSE_MOVLPS_XMM_to_M64((uptr)s_tempmem, EEREC_S); + default: SSE_MOVHPS_XMM_to_M64((uptr)&s_tempmem[2], EEREC_S); + } + FLD32((uptr)&s_tempmem[_Fsf_]); + } + else { + if( xmmregs[EEREC_S].mode & MODE_WRITE ) { + SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S); + xmmregs[EEREC_S].mode &= ~MODE_WRITE; + } + } + + FLD1(); + FLD32((uptr)&VU->VF[_Fs_].UL[_Fsf_]); + FPATAN(); + FSTP32(VU_VI_ADDR(REG_P, 0)); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// EEXP +//------------------------------------------------------------------ +void recVUMI_EEXP( VURegs *VU, int info ) +{ + assert( VU == &VU1 ); + //Console::WriteLn("recVUMI_EEXP"); + FLDL2E(); + + if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) { + switch(_Fsf_) { + case 0: SSE_MOVSS_XMM_to_M32((uptr)s_tempmem, EEREC_S); + case 1: SSE_MOVLPS_XMM_to_M64((uptr)s_tempmem, EEREC_S); + default: SSE_MOVHPS_XMM_to_M64((uptr)&s_tempmem[2], EEREC_S); + } + FMUL32((uptr)&s_tempmem[_Fsf_]); + } + else { + if( xmmregs[EEREC_S].mode & MODE_WRITE ) { + SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[_Fs_], EEREC_S); + xmmregs[EEREC_S].mode &= ~MODE_WRITE; + } + + FMUL32((uptr)&VU->VF[_Fs_].UL[_Fsf_]); + } + + // basically do 2^(log_2(e) * val) + FLD(0); + FRNDINT(); + FXCH(1); + FSUB32Rto0(1); + F2XM1(); + FLD1(); + FADD320toR(1); + FSCALE(); + FSTP(1); + + FSTP32(VU_VI_ADDR(REG_P, 0)); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// XITOP +//------------------------------------------------------------------ +void recVUMI_XITOP( VURegs *VU, int info ) +{ + int itreg; + if (_It_ == 0) return; + //Console::WriteLn("recVUMI_XITOP"); + itreg = ALLOCVI(_It_, MODE_WRITE); + MOVZX32M16toR( itreg, (uptr)&VU->vifRegs->itop ); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// XTOP +//------------------------------------------------------------------ +void recVUMI_XTOP( VURegs *VU, int info ) +{ + int itreg; + if ( _It_ == 0 ) return; + //Console::WriteLn("recVUMI_XTOP"); + itreg = ALLOCVI(_It_, MODE_WRITE); + MOVZX32M16toR( itreg, (uptr)&VU->vifRegs->top ); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// VU1XGKICK_MTGSTransfer() - Called by ivuZerorec.cpp +//------------------------------------------------------------------ +void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr) +{ + u32 size; + u32* data = (u32*)((u8*)pMem + (addr&0x3fff)); + + // fixme: The gifTagDummy function in the MTGS (called by PrepDataPacket) has a + // hack that aborts the packet if it goes past the end of VU1 memory. + // Chances are this should be a "loops around memory" situation, and the packet + // should be continued starting at addr zero (0). + + size = mtgsThread->PrepDataPacket( GIF_PATH_1, data, (0x4000-(addr&0x3fff)) >> 4); + jASSUME( size > 0 ); + + //if( size > 0 ) + { + u8* pmem = mtgsThread->GetDataPacketPtr(); + + /* if((size << 4) > (0x4000-(addr&0x3fff))) + { + //DevCon::Notice("addr + Size = 0x%x, transferring %x then doing %x", params (addr&0x3fff) + (size << 4), (0x4000-(addr&0x3fff)) >> 4, size - ((0x4000-(addr&0x3fff)) >> 4)); + memcpy_aligned(pmem, (u8*)pMem+addr, 0x4000-(addr&0x3fff)); + size -= (0x4000-(addr&0x3fff)) >> 4; + //DevCon::Notice("Size left %x", params size); + pmem += 0x4000-(addr&0x3fff); + memcpy_aligned(pmem, (u8*)pMem, size<<4); + } + else + {*/ + memcpy_aligned(pmem, (u8*)pMem+addr, size<<4); + // } + mtgsThread->SendDataPacket(); + } +} +//------------------------------------------------------------------ diff --git a/pcsx2/x86/iVUmicro.cpp b/pcsx2/x86/sVU_Micro.cpp similarity index 96% rename from pcsx2/x86/iVUmicro.cpp rename to pcsx2/x86/sVU_Micro.cpp index 7738a52da9..89ddd307e2 100644 --- a/pcsx2/x86/iVUmicro.cpp +++ b/pcsx2/x86/sVU_Micro.cpp @@ -1,1740 +1,1740 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "PrecompiledHeader.h" - -#include "Common.h" -#include "GS.h" -#include "R5900OpcodeTables.h" -#include "iR5900.h" -#include "iMMI.h" -#include "iFPU.h" -#include "iCOP0.h" -#include "VUmicro.h" -#include "VUflags.h" -#include "iVUmicro.h" -#include "iVUops.h" -#include "iVUzerorec.h" - -#ifdef _WIN32 -#pragma warning(disable:4244) -#pragma warning(disable:4761) -#endif -//------------------------------------------------------------------ - -// fixme - VUmicro should really use its own static vars for pc and branch. -// Sharing with the EE's copies of pc and branch is not cool! (air) - -//------------------------------------------------------------------ -// Helper Macros -//------------------------------------------------------------------ -#define _Ft_ (( VU->code >> 16) & 0x1F) // The rt part of the instruction register -#define _Fs_ (( VU->code >> 11) & 0x1F) // The rd part of the instruction register -#define _Fd_ (( VU->code >> 6) & 0x1F) // The sa part of the instruction register -#define _It_ (_Ft_ & 15) -#define _Is_ (_Fs_ & 15) -#define _Id_ (_Fd_ & 15) - -#define _X (( VU->code>>24) & 0x1) -#define _Y (( VU->code>>23) & 0x1) -#define _Z (( VU->code>>22) & 0x1) -#define _W (( VU->code>>21) & 0x1) - -#define _XYZW_SS (_X+_Y+_Z+_W==1) - -#define _Fsf_ (( VU->code >> 21) & 0x03) -#define _Ftf_ (( VU->code >> 23) & 0x03) - -#define _Imm11_ (s32)(VU->code & 0x400 ? 0xfffffc00 | (VU->code & 0x3ff) : VU->code & 0x3ff) -#define _UImm11_ (s32)(VU->code & 0x7ff) - -#define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0] -#define VU_VFy_ADDR(x) (uptr)&VU->VF[x].UL[1] -#define VU_VFz_ADDR(x) (uptr)&VU->VF[x].UL[2] -#define VU_VFw_ADDR(x) (uptr)&VU->VF[x].UL[3] - -#define VU_REGR_ADDR (uptr)&VU->VI[REG_R] -#define VU_REGQ_ADDR (uptr)&VU->VI[REG_Q] -#define VU_REGMAC_ADDR (uptr)&VU->VI[REG_MAC_FLAG] - -#define VU_VI_ADDR(x, read) GetVIAddr(VU, x, read, info) - -#define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0] -#define VU_ACCy_ADDR (uptr)&VU->ACC.UL[1] -#define VU_ACCz_ADDR (uptr)&VU->ACC.UL[2] -#define VU_ACCw_ADDR (uptr)&VU->ACC.UL[3] - -#define _X_Y_Z_W ((( VU->code >> 21 ) & 0xF ) ) -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// Global Variables -//------------------------------------------------------------------ -int vucycle; - -PCSX2_ALIGNED16(const float s_fones[8]) = {1.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f}; -PCSX2_ALIGNED16(const u32 s_mask[4]) = {0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff}; -PCSX2_ALIGNED16(const u32 s_expmask[4]) = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; -PCSX2_ALIGNED16(const u32 g_minvals[4]) = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}; -PCSX2_ALIGNED16(const u32 g_maxvals[4]) = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}; -PCSX2_ALIGNED16(const u32 const_clip[8]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, - 0x80000000, 0x80000000, 0x80000000, 0x80000000}; -PCSX2_ALIGNED(64, const u32 g_ones[4]) = {0x00000001, 0x00000001, 0x00000001, 0x00000001}; -PCSX2_ALIGNED16(const u32 g_minvals_XYZW[16][4]) = -{ - { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }, //0000 - { 0xffffffff, 0xffffffff, 0xffffffff, 0xff7fffff }, //0001 - { 0xffffffff, 0xffffffff, 0xff7fffff, 0xffffffff }, //0010 - { 0xffffffff, 0xffffffff, 0xff7fffff, 0xff7fffff }, //0011 - { 0xffffffff, 0xff7fffff, 0xffffffff, 0xffffffff }, //0100 - { 0xffffffff, 0xff7fffff, 0xffffffff, 0xff7fffff }, //0101 - { 0xffffffff, 0xff7fffff, 0xff7fffff, 0xffffffff }, //0110 - { 0xffffffff, 0xff7fffff, 0xff7fffff, 0xff7fffff }, //0111 - { 0xff7fffff, 0xffffffff, 0xffffffff, 0xffffffff }, //1000 - { 0xff7fffff, 0xffffffff, 0xffffffff, 0xff7fffff }, //1001 - { 0xff7fffff, 0xffffffff, 0xff7fffff, 0xffffffff }, //1010 - { 0xff7fffff, 0xffffffff, 0xff7fffff, 0xff7fffff }, //1011 - { 0xff7fffff, 0xff7fffff, 0xffffffff, 0xffffffff }, //1100 - { 0xff7fffff, 0xff7fffff, 0xffffffff, 0xff7fffff }, //1101 - { 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xffffffff }, //1110 - { 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff }, //1111 -}; -PCSX2_ALIGNED16(const u32 g_maxvals_XYZW[16][4])= -{ - { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //0000 - { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7f7fffff }, //0001 - { 0x7fffffff, 0x7fffffff, 0x7f7fffff, 0x7fffffff }, //0010 - { 0x7fffffff, 0x7fffffff, 0x7f7fffff, 0x7f7fffff }, //0011 - { 0x7fffffff, 0x7f7fffff, 0x7fffffff, 0x7fffffff }, //0100 - { 0x7fffffff, 0x7f7fffff, 0x7fffffff, 0x7f7fffff }, //0101 - { 0x7fffffff, 0x7f7fffff, 0x7f7fffff, 0x7fffffff }, //0110 - { 0x7fffffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff }, //0111 - { 0x7f7fffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1000 - { 0x7f7fffff, 0x7fffffff, 0x7fffffff, 0x7f7fffff }, //1001 - { 0x7f7fffff, 0x7fffffff, 0x7f7fffff, 0x7fffffff }, //1010 - { 0x7f7fffff, 0x7fffffff, 0x7f7fffff, 0x7f7fffff }, //1011 - { 0x7f7fffff, 0x7f7fffff, 0x7fffffff, 0x7fffffff }, //1100 - { 0x7f7fffff, 0x7f7fffff, 0x7fffffff, 0x7f7fffff }, //1101 - { 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7fffffff }, //1110 - { 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff }, //1111 -}; -//------------------------------------------------------------------ - -//------------------------------------------------------------------ -// VU Pipeline/Test Stalls/Analyzing Functions -//------------------------------------------------------------------ -void _recvuFMACflush(VURegs * VU, bool intermediate) { - int i; - - for (i=0; i<8; i++) { - if (VU->fmac[i].enable == 0) continue; - - if( intermediate ) { - if ((vucycle - VU->fmac[i].sCycle) > VU->fmac[i].Cycle) { -// VUM_LOG("flushing FMAC pipe[%d]", i); - VU->fmac[i].enable = 0; - } - } - else { - if ((vucycle - VU->fmac[i].sCycle) >= VU->fmac[i].Cycle) { -// VUM_LOG("flushing FMAC pipe[%d]", i); - VU->fmac[i].enable = 0; - } - } - } -} - -void _recvuFDIVflush(VURegs * VU, bool intermediate) { - if (VU->fdiv.enable == 0) return; - - if( intermediate ) { - if ((vucycle - VU->fdiv.sCycle) > VU->fdiv.Cycle) { -// Console::WriteLn("flushing FDIV pipe"); - VU->fdiv.enable = 0; - } - } - else { - if ((vucycle - VU->fdiv.sCycle) >= VU->fdiv.Cycle) { -// Console::WriteLn("flushing FDIV pipe"); - VU->fdiv.enable = 0; - } - } -} - -void _recvuEFUflush(VURegs * VU, bool intermediate) { - if (VU->efu.enable == 0) return; - - if( intermediate ) { - if ((vucycle - VU->efu.sCycle) > VU->efu.Cycle) { -// Console::WriteLn("flushing FDIV pipe"); - VU->efu.enable = 0; - } - } - else { - if ((vucycle - VU->efu.sCycle) >= VU->efu.Cycle) { -// Console::WriteLn("flushing FDIV pipe"); - VU->efu.enable = 0; - } - } -} - -void _recvuIALUflush(VURegs * VU, bool intermediate) { - int i; - - for (i=0; i<8; i++) { - if (VU->ialu[i].enable == 0) continue; - - if( intermediate ) { - if ((vucycle - VU->ialu[i].sCycle) > VU->ialu[i].Cycle) { -// VUM_LOG("flushing IALU pipe[%d]", i); - VU->ialu[i].enable = 0; - } - } - else { - if ((vucycle - VU->ialu[i].sCycle) >= VU->ialu[i].Cycle) { -// VUM_LOG("flushing IALU pipe[%d]", i); - VU->ialu[i].enable = 0; - } - } - } -} - -void _recvuTestPipes(VURegs * VU, bool intermediate) { // intermediate = true if called by upper FMAC stall detection - _recvuFMACflush(VU, intermediate); - _recvuFDIVflush(VU, intermediate); - _recvuEFUflush(VU, intermediate); - _recvuIALUflush(VU, intermediate); -} - -void _recvuFMACTestStall(VURegs * VU, int reg, int xyzw) { - int cycle; - int i; - u32 mask = 0; - - for (i=0; i<8; i++) { - if (VU->fmac[i].enable == 0) continue; - if (VU->fmac[i].reg == reg && (VU->fmac[i].xyzw & xyzw)) break; - } - - if (i == 8) return; - - // do a perchannel delay - // old code -// cycle = VU->fmac[i].Cycle - (vucycle - VU->fmac[i].sCycle); - - // new code - mask = 4; // w -// if( VU->fmac[i].xyzw & 1 ) mask = 4; // w -// else if( VU->fmac[i].xyzw & 2 ) mask = 3; // z -// else if( VU->fmac[i].xyzw & 4 ) mask = 2; // y -// else { -// assert(VU->fmac[i].xyzw & 8 ); -// mask = 1; // x -// } - -// mask = 0; -// if( VU->fmac[i].xyzw & 1 ) mask++; // w -// else if( VU->fmac[i].xyzw & 2 ) mask++; // z -// else if( VU->fmac[i].xyzw & 4 ) mask++; // y -// else if( VU->fmac[i].xyzw & 8 ) mask++; // x - - assert( (int)VU->fmac[i].sCycle < (int)vucycle ); - cycle = 0; - if( vucycle - VU->fmac[i].sCycle < mask ) - cycle = mask - (vucycle - VU->fmac[i].sCycle); - - VU->fmac[i].enable = 0; - vucycle+= cycle; - _recvuTestPipes(VU, true); // for lower instructions -} - -void _recvuIALUTestStall(VURegs * VU, int reg) { - int cycle; - int i; - u32 latency; - - for (i=0; i<8; i++) { - if (VU->ialu[i].enable == 0) continue; - if (VU->ialu[i].reg == reg) break; - } - - if (i == 8) return; - - latency = VU->ialu[i].Cycle + 1; - cycle = 0; - if( vucycle - VU->ialu[i].sCycle < latency ) - cycle = latency - (vucycle - VU->ialu[i].sCycle); - - VU->ialu[i].enable = 0; - vucycle+= cycle; - _recvuTestPipes(VU, true); -} - -void _recvuFMACAdd(VURegs * VU, int reg, int xyzw) { - int i; - - /* find a free fmac pipe */ - for (i=0; i<8; i++) { - if (VU->fmac[i].enable == 1) continue; - break; - } - - if (i==8) Console::Error("*PCSX2*: error , out of fmacs"); -// VUM_LOG("adding FMAC pipe[%d]; reg %d", i, reg); - - VU->fmac[i].enable = 1; - VU->fmac[i].sCycle = vucycle; - VU->fmac[i].Cycle = 3; - VU->fmac[i].xyzw = xyzw; - VU->fmac[i].reg = reg; -} - -void _recvuFDIVAdd(VURegs * VU, int cycles) { -// Console::WriteLn("adding FDIV pipe"); - VU->fdiv.enable = 1; - VU->fdiv.sCycle = vucycle; - VU->fdiv.Cycle = cycles; -} - -void _recvuEFUAdd(VURegs * VU, int cycles) { -// Console::WriteLn("adding EFU pipe"); - VU->efu.enable = 1; - VU->efu.sCycle = vucycle; - VU->efu.Cycle = cycles; -} - -void _recvuIALUAdd(VURegs * VU, int reg, int cycles) { - int i; - - /* find a free ialu pipe */ - for (i=0; i<8; i++) { - if (VU->ialu[i].enable == 1) continue; - break; - } - - if (i==8) Console::Error("*PCSX2*: error , out of ialus"); - - VU->ialu[i].enable = 1; - VU->ialu[i].sCycle = vucycle; - VU->ialu[i].Cycle = cycles; - VU->ialu[i].reg = reg; -} - -void _recvuTestIALUStalls(VURegs * VU, _VURegsNum *VUregsn) { - - int VIread0 = 0, VIread1 = 0; // max 2 integer registers are read simulataneously - int i; - - for(i=0;i<16;i++) { // find used integer(vi00-vi15) registers - if( (VUregsn->VIread >> i) & 1 ) { - if( VIread0 ) VIread1 = i; - else VIread0 = i; - } - } - - if( VIread0 ) _recvuIALUTestStall(VU, VIread0); - if( VIread1 ) _recvuIALUTestStall(VU, VIread1); -} - -void _recvuAddIALUStalls(VURegs * VU, _VURegsNum *VUregsn) { - if (VUregsn->VIwrite && VUregsn->cycles) { - int VIWrite0 = 0; - int i; - - for(i=0;i<16;i++) { // find used(vi00-vi15) registers - if( (VUregsn->VIwrite >> i) & 1 ) { - VIWrite0 = i; - } - } - if( VIWrite0 ) _recvuIALUAdd(VU, VIWrite0, VUregsn->cycles); - } -} - -void _recvuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn, bool upper) { - - if( VUregsn->VFread0 && (VUregsn->VFread0 == VUregsn->VFread1) ) { - _recvuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw|VUregsn->VFr1xyzw); - } - else { - if (VUregsn->VFread0) _recvuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw); - if (VUregsn->VFread1) _recvuFMACTestStall(VU, VUregsn->VFread1, VUregsn->VFr1xyzw); - } - - if( !upper && VUregsn->VIread ) _recvuTestIALUStalls(VU, VUregsn); // for lower instructions which read integer reg -} - -void _recvuAddFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { - - if (VUregsn->VFwrite) _recvuFMACAdd(VU, VUregsn->VFwrite, VUregsn->VFwxyzw); - else if (VUregsn->VIwrite & (1 << REG_CLIP_FLAG)) _recvuFMACAdd(VU, -REG_CLIP_FLAG, 0); // REG_CLIP_FLAG pipe - else _recvuFMACAdd(VU, 0, 0); // cause no data dependency with fp registers -} - -void _recvuFlushFDIV(VURegs * VU) { - int cycle; - - if (VU->fdiv.enable == 0) return; - - cycle = VU->fdiv.Cycle + 1 - (vucycle - VU->fdiv.sCycle); //VU->fdiv.Cycle contains the latency minus 1 (6 or 12) -// Console::WriteLn("waiting FDIV pipe %d", params cycle); - VU->fdiv.enable = 0; - vucycle+= cycle; -} - -void _recvuFlushEFU(VURegs * VU) { - int cycle; - - if (VU->efu.enable == 0) return; - - cycle = VU->efu.Cycle - (vucycle - VU->efu.sCycle); -// Console::WriteLn("waiting FDIV pipe %d", params cycle); - VU->efu.enable = 0; - vucycle+= cycle; -} - -void _recvuTestFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) { - _recvuTestFMACStalls(VU,VUregsn, false); - _recvuFlushFDIV(VU); -} - -void _recvuTestEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { - _recvuTestFMACStalls(VU,VUregsn, false); - _recvuFlushEFU(VU); -} - -void _recvuAddFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) { -// _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn); - if (VUregsn->VIwrite & (1 << REG_Q)) { - _recvuFDIVAdd(VU, VUregsn->cycles); - } -} - -void _recvuAddEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { -// _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn); - if (VUregsn->VIwrite & (1 << REG_P)) { - _recvuEFUAdd(VU, VUregsn->cycles); - } -} - -void _recvuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn) { - switch (VUregsn->pipe) { - case VUPIPE_FMAC: _recvuTestFMACStalls(VU, VUregsn, true); break; - } -} - -void _recvuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { - switch (VUregsn->pipe) { - case VUPIPE_FMAC: _recvuTestFMACStalls(VU, VUregsn, false); break; - case VUPIPE_FDIV: _recvuTestFDIVStalls(VU, VUregsn); break; - case VUPIPE_EFU: _recvuTestEFUStalls(VU, VUregsn); break; - case VUPIPE_IALU: _recvuTestIALUStalls(VU, VUregsn); break; - case VUPIPE_BRANCH: _recvuTestIALUStalls(VU, VUregsn); break; - } -} - -void _recvuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn) { - switch (VUregsn->pipe) { - case VUPIPE_FMAC: _recvuAddFMACStalls(VU, VUregsn); break; - } -} - -void _recvuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { - switch (VUregsn->pipe) { - case VUPIPE_FMAC: _recvuAddFMACStalls(VU, VUregsn); break; - case VUPIPE_FDIV: _recvuAddFDIVStalls(VU, VUregsn); break; - case VUPIPE_EFU: _recvuAddEFUStalls(VU, VUregsn); break; - case VUPIPE_IALU: _recvuAddIALUStalls(VU, VUregsn); break; // note: only ILW and ILWR cause stall in IALU pipe - } -} - -void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs) -{ - _VURegsNum* lregs; - _VURegsNum* uregs; - int *ptr; - - lregs = pCodeRegs; - uregs = pCodeRegs+1; - - ptr = (int*)&VU->Micro[pc]; - pc += 8; - - if (ptr[1] & 0x40000000) { // EOP - branch |= 8; - } - - VU->code = ptr[1]; - if (VU == &VU1) VU1regs_UPPER_OPCODE[VU->code & 0x3f](uregs); - else VU0regs_UPPER_OPCODE[VU->code & 0x3f](uregs); - - _recvuTestUpperStalls(VU, uregs); - switch(VU->code & 0x3f) { - case 0x10: case 0x11: case 0x12: case 0x13: - case 0x14: case 0x15: case 0x16: case 0x17: - case 0x1d: case 0x1f: - case 0x2b: case 0x2f: - break; - - case 0x3c: - switch ((VU->code >> 6) & 0x1f) { - case 0x4: case 0x5: - break; - default: - info->statusflag = 4; - info->macflag = 4; - break; - } - break; - case 0x3d: - switch ((VU->code >> 6) & 0x1f) { - case 0x4: case 0x5: case 0x7: - break; - default: - info->statusflag = 4; - info->macflag = 4; - break; - } - break; - case 0x3e: - switch ((VU->code >> 6) & 0x1f) { - case 0x4: case 0x5: - break; - default: - info->statusflag = 4; - info->macflag = 4; - break; - } - break; - case 0x3f: - switch ((VU->code >> 6) & 0x1f) { - case 0x4: case 0x5: case 0x7: case 0xb: - break; - default: - info->statusflag = 4; - info->macflag = 4; - break; - } - break; - - default: - info->statusflag = 4; - info->macflag = 4; - break; - } - - if (uregs->VIread & (1 << REG_Q)) { info->q |= 2; } - if (uregs->VIread & (1 << REG_P)) { info->p |= 2; assert( VU == &VU1 ); } - - // check upper flags - if (ptr[1] & 0x80000000) { // I flag - info->cycle = vucycle; - memzero_obj(*lregs); - } - else { - - VU->code = ptr[0]; - if (VU == &VU1) VU1regs_LOWER_OPCODE[VU->code >> 25](lregs); - else VU0regs_LOWER_OPCODE[VU->code >> 25](lregs); - - _recvuTestLowerStalls(VU, lregs); - info->cycle = vucycle; - - if (lregs->pipe == VUPIPE_BRANCH) { - branch |= 1; - } - - if (lregs->VIwrite & (1 << REG_Q)) { - info->q |= 4; - info->cycles = lregs->cycles; - info->pqinst = (VU->code&2)>>1; // rsqrt is 2 - } - else if (lregs->pipe == VUPIPE_FDIV) { - info->q |= 8|1; - info->pqinst = 0; - } - - if (lregs->VIwrite & (1 << REG_P)) { - assert( VU == &VU1 ); - info->p |= 4; - info->cycles = lregs->cycles; - - switch( VU->code & 0xff ) { - case 0xfd: info->pqinst = 0; break; //eatan - case 0x7c: info->pqinst = 0; break; //eatanxy - case 0x7d: info->pqinst = 0; break; //eatanzy - case 0xfe: info->pqinst = 1; break; //eexp - case 0xfc: info->pqinst = 2; break; //esin - case 0x3f: info->pqinst = 3; break; //erleng - case 0x3e: info->pqinst = 4; break; //eleng - case 0x3d: info->pqinst = 4; break; //ersadd - case 0xbd: info->pqinst = 4; break; //ersqrt - case 0xbe: info->pqinst = 5; break; //ercpr - case 0xbc: info->pqinst = 5; break; //esqrt - case 0x7e: info->pqinst = 5; break; //esum - case 0x3c: info->pqinst = 6; break; //esadd - default: assert(0); - } - } - else if (lregs->pipe == VUPIPE_EFU) { - info->p |= 8|1; - } - - if (lregs->VIread & (1 << REG_STATUS_FLAG)) info->statusflag|= VUOP_READ; - if (lregs->VIread & (1 << REG_MAC_FLAG)) info->macflag|= VUOP_READ; - - if (lregs->VIwrite & (1 << REG_STATUS_FLAG)) info->statusflag|= VUOP_WRITE; - if (lregs->VIwrite & (1 << REG_MAC_FLAG)) info->macflag|= VUOP_WRITE; - - if (lregs->VIread & (1 << REG_Q)) { info->q |= 2; } - if (lregs->VIread & (1 << REG_P)) { info->p |= 2; assert( VU == &VU1 ); } - - _recvuAddLowerStalls(VU, lregs); - } - - _recvuAddUpperStalls(VU, uregs); - _recvuTestPipes(VU, false); - - vucycle++; -} - -int eeVURecompileCode(VURegs *VU, _VURegsNum* regs) -{ - int info = 0; - int vfread0=-1, vfread1 = -1, vfwrite = -1, vfacc = -1, vftemp=-1; - - assert( regs != NULL ); - - if( regs->VFread0 ) _addNeededVFtoXMMreg(regs->VFread0); - if( regs->VFread1 ) _addNeededVFtoXMMreg(regs->VFread1); - if( regs->VFwrite ) _addNeededVFtoXMMreg(regs->VFwrite); - if( regs->VIread & (1<VIread & (1<VFread0 ) vfread0 = _allocVFtoXMMreg(VU, -1, regs->VFread0, MODE_READ); - else if( regs->VIread & (1<VFread1 ) vfread1 = _allocVFtoXMMreg(VU, -1, regs->VFread1, MODE_READ); - else if( (regs->VIread & (1<VFr1xyzw != 0xff) vfread1 = _allocVFtoXMMreg(VU, -1, 0, MODE_READ); - - if( regs->VIread & (1<VIwrite&(1<VIwrite & (1<VFwxyzw != 0xf?MODE_READ:0)); - } - - if( regs->VFwrite ) { - assert( !(regs->VIwrite&(1<VFwrite, MODE_WRITE|(regs->VFwxyzw != 0xf?MODE_READ:0)); - } - - if( vfacc>= 0 ) info |= PROCESS_EE_SET_ACC(vfacc); - if( vfwrite >= 0 ) { - if( regs->VFwrite == _Ft_ && vfread1 < 0 ) { - info |= PROCESS_EE_SET_T(vfwrite); - } - else { - assert( regs->VFwrite == _Fd_ ); - info |= PROCESS_EE_SET_D(vfwrite); - } - } - - if( vfread0 >= 0 ) info |= PROCESS_EE_SET_S(vfread0); - if( vfread1 >= 0 ) info |= PROCESS_EE_SET_T(vfread1); - - vftemp = _allocTempXMMreg(XMMT_FPS, -1); - info |= PROCESS_VU_SET_TEMP(vftemp); - - if( regs->VIwrite & (1 << REG_CLIP_FLAG) ) { - // CLIP inst, need two extra temp registers, put it EEREC_D and EEREC_ACC - int t1reg = _allocTempXMMreg(XMMT_FPS, -1); - int t2reg = _allocTempXMMreg(XMMT_FPS, -1); - - info |= PROCESS_EE_SET_D(t1reg); - info |= PROCESS_EE_SET_ACC(t2reg); - - _freeXMMreg(t1reg); // don't need - _freeXMMreg(t2reg); // don't need - } - else if( regs->VIwrite & (1<VI[reg].UL; - - if( read != 1 ) { - if( reg == REG_MAC_FLAG ) return (uptr)&VU->macflag; - if( reg == REG_CLIP_FLAG ) return (uptr)&VU->clipflag; - if( reg == REG_STATUS_FLAG ) return (uptr)&VU->statusflag; - if( reg == REG_Q ) return (uptr)&VU->q; - if( reg == REG_P ) return (uptr)&VU->p; - } - - return (uptr)&VU->VI[reg].UL; -} - -// gets a temp reg that is not EEREC_TEMP -int _vuGetTempXMMreg(int info) -{ - int t1reg = -1; - - if( _hasFreeXMMreg() ) { - t1reg = _allocTempXMMreg(XMMT_FPS, -1); - - if( t1reg == EEREC_TEMP ) { - if( _hasFreeXMMreg() ) { - int t = _allocTempXMMreg(XMMT_FPS, -1); - _freeXMMreg(t1reg); - t1reg = t; - } - else { - _freeXMMreg(t1reg); - t1reg = -1; - } - } - } - - return t1reg; -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// Misc VU Reg Flipping/Merging Functions -//------------------------------------------------------------------ -void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw) -{ - switch (xyzw) { - case 0: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0x00); break; - case 1: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0x55); break; - case 2: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0xaa); break; - case 3: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0xff); break; - } -} - -void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw) -{ - switch (xyzw) { - case 0: SSE_MOVSS_XMM_to_XMM(dstreg, srcreg); break; - case 1: if ( cpucaps.hasStreamingSIMD4Extensions ) SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(1, 0, 0)); - else SSE2_PSHUFLW_XMM_to_XMM(dstreg, srcreg, 0xee); - break; - case 2: SSE_MOVHLPS_XMM_to_XMM(dstreg, srcreg); break; - case 3: if ( cpucaps.hasStreamingSIMD4Extensions ) SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(3, 0, 0)); - else { SSE_MOVHLPS_XMM_to_XMM(dstreg, srcreg); SSE2_PSHUFLW_XMM_to_XMM(dstreg, dstreg, 0xee); } - break; - } -} - -void _vuFlipRegSS(VURegs * VU, int reg) -{ - assert( _XYZW_SS ); - if( _Y ) SSE2_PSHUFLW_XMM_to_XMM(reg, reg, 0x4e); - else if( _Z ) SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xc6); - else if( _W ) SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x27); -} - -void _vuFlipRegSS_xyzw(int reg, int xyzw) -{ - switch ( xyzw ) { - case 1: SSE2_PSHUFLW_XMM_to_XMM(reg, reg, 0x4e); break; - case 2: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xc6); break; - case 3: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x27); break; - } -} - -void _vuMoveSS(VURegs * VU, int dstreg, int srcreg) -{ - assert( _XYZW_SS ); - if( _Y ) _unpackVFSS_xyzw(dstreg, srcreg, 1); - else if( _Z ) _unpackVFSS_xyzw(dstreg, srcreg, 2); - else if( _W ) _unpackVFSS_xyzw(dstreg, srcreg, 3); - else _unpackVFSS_xyzw(dstreg, srcreg, 0); -} - -// 1 - src, 0 - dest wzyx -void VU_MERGE0(int dest, int src) { // 0000s -} -void VU_MERGE1(int dest, int src) { // 1000 - SSE_MOVHLPS_XMM_to_XMM(src, dest); - SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc4); -} -void VU_MERGE1b(int dest, int src) { // 1000s - SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); -} -void VU_MERGE2(int dest, int src) { // 0100 - SSE_MOVHLPS_XMM_to_XMM(src, dest); - SSE_SHUFPS_XMM_to_XMM(dest, src, 0x64); -} -void VU_MERGE2b(int dest, int src) { // 0100s - SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); -} -void VU_MERGE3(int dest, int src) { // 1100s - SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4); -} -void VU_MERGE4(int dest, int src) { // 0010 - SSE_MOVSS_XMM_to_XMM(src, dest); - SSE2_MOVSD_XMM_to_XMM(dest, src); -} -void VU_MERGE4b(int dest, int src) { // 0010s - SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); -} -void VU_MERGE5(int dest, int src) { // 1010 - SSE_SHUFPS_XMM_to_XMM(dest, src, 0xd8); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xd8); -} -void VU_MERGE5b(int dest, int src) { // 1010s - SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); -} -void VU_MERGE6(int dest, int src) { // 0110 - SSE_SHUFPS_XMM_to_XMM(dest, src, 0x9c); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x78); -} -void VU_MERGE6b(int dest, int src) { // 0110s - SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); -} -void VU_MERGE7(int dest, int src) { // 1110 - SSE_MOVSS_XMM_to_XMM(src, dest); - SSE_MOVAPS_XMM_to_XMM(dest, src); -} -void VU_MERGE7b(int dest, int src) { // 1110s - SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); -} -void VU_MERGE8(int dest, int src) { // 0001s - SSE_MOVSS_XMM_to_XMM(dest, src); -} -void VU_MERGE9(int dest, int src) { // 1001 - SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc9); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xd2); -} -void VU_MERGE9b(int dest, int src) { // 1001s - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); -} -void VU_MERGE10(int dest, int src) { // 0101 - SSE_SHUFPS_XMM_to_XMM(dest, src, 0x8d); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x72); -} -void VU_MERGE10b(int dest, int src) { // 0101s - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); -} -void VU_MERGE11(int dest, int src) { // 1101s - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4); -} -void VU_MERGE12(int dest, int src) { // 0011 - SSE2_MOVSD_XMM_to_XMM(dest, src); -} -void VU_MERGE13(int dest, int src) { // 1011 - SSE_MOVHLPS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, dest, 0x64); - SSE_MOVAPS_XMM_to_XMM(dest, src); -} -void VU_MERGE13b(int dest, int src) { // 1011s - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); -} -void VU_MERGE14(int dest, int src) { // 0111 - SSE_MOVHLPS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, dest, 0xc4); - SSE_MOVAPS_XMM_to_XMM(dest, src); -} -void VU_MERGE14b(int dest, int src) { // 0111s - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); - SSE_MOVSS_XMM_to_XMM(dest, src); - SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); - SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); -} -void VU_MERGE15(int dest, int src) { // 1111s - SSE_MOVAPS_XMM_to_XMM(dest, src); -} - -typedef void (*VUMERGEFN)(int dest, int src); - -static VUMERGEFN s_VuMerge[16] = { - VU_MERGE0, VU_MERGE1, VU_MERGE2, VU_MERGE3, - VU_MERGE4, VU_MERGE5, VU_MERGE6, VU_MERGE7, - VU_MERGE8, VU_MERGE9, VU_MERGE10, VU_MERGE11, - VU_MERGE12, VU_MERGE13, VU_MERGE14, VU_MERGE15 }; - -static VUMERGEFN s_VuMerge2[16] = { - VU_MERGE0, VU_MERGE1b, VU_MERGE2b, VU_MERGE3, - VU_MERGE4b, VU_MERGE5b, VU_MERGE6b, VU_MERGE7b, - VU_MERGE8, VU_MERGE9b, VU_MERGE10b, VU_MERGE11, - VU_MERGE12, VU_MERGE13b, VU_MERGE14b, VU_MERGE15 }; - -// Modifies the Source Reg! -void VU_MERGE_REGS_CUSTOM(int dest, int src, int xyzw) { - xyzw &= 0xf; - if ( (dest != src) && (xyzw != 0) ) { - if ( cpucaps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf) ) { - xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3); - SSE4_BLENDPS_XMM_to_XMM(dest, src, xyzw); - } - else s_VuMerge[xyzw](dest, src); - } -} -// Doesn't Modify the Source Reg! (ToDo: s_VuMerge2() has room for optimization) -void VU_MERGE_REGS_SAFE(int dest, int src, int xyzw) { - xyzw &= 0xf; - if ( (dest != src) && (xyzw != 0) ) { - if ( cpucaps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf) ) { - xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3); - SSE4_BLENDPS_XMM_to_XMM(dest, src, xyzw); - } - else s_VuMerge2[xyzw](dest, src); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// Misc VU Reg Clamping/Overflow Functions -//------------------------------------------------------------------ -#define CLAMP_NORMAL_SSE4(n) \ - SSE_MOVAPS_XMM_to_XMM(regTemp, regd);\ - SSE4_PMINUD_M128_to_XMM(regd, (uptr)&g_minvals_XYZW[n][0]);\ - SSE2_PSUBD_XMM_to_XMM(regTemp, regd);\ - SSE2_PCMPGTD_M128_to_XMM(regTemp, (uptr)&g_ones[0]);\ - SSE4_PMINSD_M128_to_XMM(regd, (uptr)&g_maxvals_XYZW[n][0]);\ - SSE2_PSLLD_I8_to_XMM(regTemp, 31);\ - SSE_XORPS_XMM_to_XMM(regd, regTemp); - -#define CLAMP_SIGN_SSE4(n) \ - SSE4_PMINSD_M128_to_XMM(regd, (uptr)&g_maxvals_XYZW[n][0]);\ - SSE4_PMINUD_M128_to_XMM(regd, (uptr)&g_minvals_XYZW[n][0]); - -void vFloat0(int regd, int regTemp) { } //0000 -void vFloat1(int regd, int regTemp) { //1000 - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); -} -void vFloat1c(int regd, int regTemp) { //1000 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(1); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat2(int regd, int regTemp) { //0100 - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); -} -void vFloat2c(int regd, int regTemp) { //0100 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(2); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat3(int regd, int regTemp) { //1100 - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36); -} -void vFloat3b(int regd, int regTemp) { //1100 //regTemp is Modified - SSE2_MOVSD_XMM_to_XMM(regTemp, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - SSE2_MOVSD_XMM_to_XMM(regd, regTemp); -} -void vFloat3c(int regd, int regTemp) { //1100 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(3); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat4(int regd, int regTemp) { //0010 - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); -} -void vFloat4c(int regd, int regTemp) { //0010 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(4); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat5(int regd, int regTemp) { //1010 - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); -} -void vFloat5b(int regd, int regTemp) { //1010 //regTemp is Modified - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_NORMAL_SSE4(5); - } - else { - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); - } -} -void vFloat5c(int regd, int regTemp) { //1010 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(5); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat6(int regd, int regTemp) { //0110 - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9); -} -void vFloat6b(int regd, int regTemp) { //0110 //regTemp is Modified - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_NORMAL_SSE4(6); - } - else { - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9); - } -} -void vFloat6c(int regd, int regTemp) { //0110 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(6); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat7(int regd, int regTemp) { //1110 - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x39); -} -void vFloat7_useEAX(int regd, int regTemp) { //1110 //EAX is Modified - SSE2_MOVD_XMM_to_R(EAX, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - if ( cpucaps.hasStreamingSIMD4Extensions ) - SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00); - else { - SSE_PINSRW_R32_to_XMM(regd, EAX, 0); - SHR32ItoR(EAX, 16); - SSE_PINSRW_R32_to_XMM(regd, EAX, 1); - } -} -void vFloat7b(int regd, int regTemp) { //1110 //regTemp is Modified - SSE_MOVSS_XMM_to_XMM(regTemp, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - SSE_MOVSS_XMM_to_XMM(regd, regTemp); -} -void vFloat7c(int regd, int regTemp) { //1110 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(7); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x39); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat7c_useEAX(int regd, int regTemp) { //1110 //EAX is Modified - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(7); - } - else { - SSE2_MOVD_XMM_to_R(EAX, regd); - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - SSE2_MOVD_R_to_XMM(regTemp, EAX); - SSE_MOVSS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat8(int regd, int regTemp) { //0001 - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); -} -void vFloat8c(int regd, int regTemp) { //0001 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(8); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat9(int regd, int regTemp) { //1001 - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); -} -void vFloat9b(int regd, int regTemp) { //1001 //regTemp is Modified - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_NORMAL_SSE4(9); - } - else { - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - } -} -void vFloat9c(int regd, int regTemp) { //1001 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(9); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat10(int regd, int regTemp) { //0101 - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); -} -void vFloat10b(int regd, int regTemp) { //0101 //regTemp is Modified - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_NORMAL_SSE4(10); - } - else { - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - } -} -void vFloat10c(int regd, int regTemp) { //0101 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(10); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat11(int regd, int regTemp) { //1101 - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36); -} -void vFloat11_useEAX(int regd, int regTemp) { //1101 //EAX is Modified - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE2_MOVD_XMM_to_R(EAX, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - if ( cpucaps.hasStreamingSIMD4Extensions ) - SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00); - else { - SSE_PINSRW_R32_to_XMM(regd, EAX, 0); - SHR32ItoR(EAX, 16); - SSE_PINSRW_R32_to_XMM(regd, EAX, 1); - } - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); -} -void vFloat11b(int regd, int regTemp) { //1101 //regTemp is Modified - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - SSE_MOVSS_XMM_to_XMM(regTemp, regd); - SSE2_MOVSD_XMM_to_XMM(regd, regTemp); -} -void vFloat11c(int regd, int regTemp) { //1101 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(11); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat11c_useEAX(int regd, int regTemp) { //1101 // EAX is modified - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(11); - } - else { - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE2_MOVD_XMM_to_R(EAX, regd); - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - SSE2_MOVD_R_to_XMM(regTemp, EAX); - SSE_MOVSS_XMM_to_XMM(regd, regTemp); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - } -} -void vFloat12(int regd, int regTemp) { //0011 - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); -} -void vFloat12b(int regd, int regTemp) { //0011 //regTemp is Modified - SSE_MOVHLPS_XMM_to_XMM(regTemp, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - SSE2_PUNPCKLQDQ_XMM_to_XMM(regd, regTemp); -} -void vFloat12c(int regd, int regTemp) { //0011 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(12); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat13(int regd, int regTemp) { //1011 - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); -} -void vFloat13_useEAX(int regd, int regTemp) { //1011 // EAX is modified - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE2_MOVD_XMM_to_R(EAX, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - if ( cpucaps.hasStreamingSIMD4Extensions ) - SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00); - else { - SSE_PINSRW_R32_to_XMM(regd, EAX, 0); - SHR32ItoR(EAX, 16); - SSE_PINSRW_R32_to_XMM(regd, EAX, 1); - } - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); -} -void vFloat13b(int regd, int regTemp) { //1011 //regTemp is Modified - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - SSE_MOVHLPS_XMM_to_XMM(regTemp, regd); - SSE_SHUFPS_XMM_to_XMM(regd, regTemp, 0x64); -} -void vFloat13c(int regd, int regTemp) { //1011 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(13); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat13c_useEAX(int regd, int regTemp) { //1011 // EAX is modified - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(13); - } - else { - SSE2_PSHUFD_XMM_to_XMM(regd, regd, 0xc6); - SSE2_MOVD_XMM_to_R(EAX, regd); - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - SSE2_MOVD_R_to_XMM(regTemp, EAX); - SSE_MOVSS_XMM_to_XMM(regd, regTemp); - SSE2_PSHUFD_XMM_to_XMM(regd, regd, 0xc6); - } -} -void vFloat14(int regd, int regTemp) { //0111 - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9); -} -void vFloat14_useEAX(int regd, int regTemp) { //0111 // EAX is modified - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); - SSE2_MOVD_XMM_to_R(EAX, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - if ( cpucaps.hasStreamingSIMD4Extensions ) - SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00); - else { - SSE_PINSRW_R32_to_XMM(regd, EAX, 0); - SHR32ItoR(EAX, 16); - SSE_PINSRW_R32_to_XMM(regd, EAX, 1); - } - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); -} -void vFloat14b(int regd, int regTemp) { //0111 //regTemp is Modified - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - SSE_MOVHLPS_XMM_to_XMM(regTemp, regd); - SSE_SHUFPS_XMM_to_XMM(regd, regTemp, 0xc4); -} -void vFloat14c(int regd, int regTemp) { //0111 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(14); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); - SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} -void vFloat14c_useEAX(int regd, int regTemp) { //0111 // EAX is modified - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(14); - } - else { - SSE2_PSHUFD_XMM_to_XMM(regd, regd, 0x27); - SSE2_MOVD_XMM_to_R(EAX, regd); - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - SSE2_MOVD_R_to_XMM(regTemp, EAX); - SSE_MOVSS_XMM_to_XMM(regd, regTemp); - SSE2_PSHUFD_XMM_to_XMM(regd, regd, 0x27); - } -} -void vFloat15(int regd, int regTemp) { //1111 - SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); - SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); -} -void vFloat15c(int regd, int regTemp) { //1111 - if ( cpucaps.hasStreamingSIMD4Extensions ) { - CLAMP_SIGN_SSE4(15); - } - else { - SSE_MOVAPS_XMM_to_XMM(regTemp, regd); - SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); - SSE_MINPS_M128_to_XMM(regd, (uptr)&g_maxvals[0]); - SSE_MAXPS_M128_to_XMM(regd, (uptr)&g_minvals[0]); - SSE_ORPS_XMM_to_XMM(regd, regTemp); - } -} - -vFloat vFloats1[16] = { //regTemp is not modified - vFloat0, vFloat1, vFloat2, vFloat3, - vFloat4, vFloat5, vFloat6, vFloat7, - vFloat8, vFloat9, vFloat10, vFloat11, - vFloat12, vFloat13, vFloat14, vFloat15 }; - -vFloat vFloats1_useEAX[16] = { //regTemp is not modified but EAX is used - vFloat0, vFloat1, vFloat2, vFloat3, - vFloat4, vFloat5, vFloat6, vFloat7_useEAX, - vFloat8, vFloat9, vFloat10, vFloat11_useEAX, - vFloat12, vFloat13_useEAX, vFloat14_useEAX, vFloat15 }; - -vFloat vFloats2[16] = { //regTemp is modified - vFloat0, vFloat1, vFloat2, vFloat3b, - vFloat4, vFloat5b, vFloat6b, vFloat7b, - vFloat8, vFloat9b, vFloat10b, vFloat11b, - vFloat12b, vFloat13b, vFloat14b, vFloat15 }; - -vFloat vFloats4[16] = { //regTemp is modified - vFloat0, vFloat1c, vFloat2c, vFloat3c, - vFloat4c, vFloat5c, vFloat6c, vFloat7c, - vFloat8c, vFloat9c, vFloat10c, vFloat11c, - vFloat12c, vFloat13c, vFloat14c, vFloat15c }; - -vFloat vFloats4_useEAX[16] = { //regTemp is modified and EAX is used - vFloat0, vFloat1c, vFloat2c, vFloat3c, - vFloat4c, vFloat5c, vFloat6c, vFloat7c_useEAX, - vFloat8c, vFloat9c, vFloat10c, vFloat11c_useEAX, - vFloat12c, vFloat13c_useEAX, vFloat14c_useEAX, vFloat15c }; - -//------------------------------------------------------------------ -// Clamping Functions (wrapper for vFloat* functions) -// vuFloat : "normal" clamping -// vuFloat_useEAX : "normal" clamping (faster but EAX is modified) -// vuFloat2 : "normal" clamping (fastest but regTemp is modified) -// vuFloat3 : "preserve sign" clamping for pointer -// vuFloat4 : "preserve sign" clamping (regTemp is modified; *FASTEST* on SSE4 CPUs) -// vuFloat4_useEAX : "preserve sign" clamping (faster but regTemp and EAX are modified) -// vuFloat5 : wrapper function for vuFloat2 and vuFloat4 -// vuFloat5_useEAX : wrapper function for vuFloat2 and vuFloat4_useEAX -// vuFloatExtra : for debugging -// -// Notice 1: vuFloat*_useEAX may be slower on AMD CPUs, which have independent execution pipeline for -// vector and scalar instructions (need checks) -// Notice 2: recVUMI_MUL_xyzw_toD and recVUMI_MADD_xyzw_toD use vFloats directly! -//------------------------------------------------------------------ - -// Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (doesn't use any temp regs) -void vuFloat( int info, int regd, int XYZW) { - if( CHECK_VU_OVERFLOW ) { - /*if ( (XYZW != 0) && (XYZW != 8) && (XYZW != 0xF) ) { - int t1reg = _vuGetTempXMMreg(info); - if (t1reg >= 0) { - vuFloat2( regd, t1reg, XYZW ); - _freeXMMreg( t1reg ); - return; - } - }*/ - //vuFloatExtra(regd, XYZW); - vFloats1[XYZW](regd, regd); - } -} - -// Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (uses EAX as a temp register; faster but **destroys EAX**) -void vuFloat_useEAX( int info, int regd, int XYZW) { - if( CHECK_VU_OVERFLOW ) { - vFloats1_useEAX[XYZW](regd, regd); - } -} - -// Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (uses a temp reg) -void vuFloat2(int regd, int regTemp, int XYZW) { - if( CHECK_VU_OVERFLOW ) { - //vuFloatExtra(regd, XYZW); - vFloats2[XYZW](regd, regTemp); - } -} - -// Clamps +/-NaN and +/-Inf to +/-fMax (uses a temp reg) -void vuFloat4(int regd, int regTemp, int XYZW) { - if( CHECK_VU_OVERFLOW ) { - vFloats4[XYZW](regd, regTemp); - } -} - -// Clamps +/-NaN and +/-Inf to +/-fMax (uses a temp reg, and uses EAX as a temp register; faster but **destroys EAX**) -void vuFloat4_useEAX(int regd, int regTemp, int XYZW) { - if( CHECK_VU_OVERFLOW ) { - vFloats4_useEAX[XYZW](regd, regTemp); - } -} - -// Uses vuFloat4 or vuFloat2 depending on the CHECK_VU_SIGN_OVERFLOW setting -void vuFloat5(int regd, int regTemp, int XYZW) { - if (CHECK_VU_SIGN_OVERFLOW) { - vuFloat4(regd, regTemp, XYZW); - } - else vuFloat2(regd, regTemp, XYZW); -} - -// Uses vuFloat4_useEAX or vuFloat2 depending on the CHECK_VU_SIGN_OVERFLOW setting (uses EAX as a temp register; faster but **destoroyes EAX**) -void vuFloat5_useEAX(int regd, int regTemp, int XYZW) { - if (CHECK_VU_SIGN_OVERFLOW) { - vuFloat4_useEAX(regd, regTemp, XYZW); - } - else vuFloat2(regd, regTemp, XYZW); -} - -// Clamps +/-infs to +/-fMax, and +/-NaNs to +/-fMax -void vuFloat3(uptr x86ptr) { - u8* pjmp; - - if( CHECK_VU_OVERFLOW ) { - CMP32ItoM(x86ptr, 0x7f800000 ); - pjmp = JL8(0); // Signed Comparison - MOV32ItoM(x86ptr, 0x7f7fffff ); - x86SetJ8(pjmp); - - CMP32ItoM(x86ptr, 0xff800000 ); - pjmp = JB8(0); // Unsigned Comparison - MOV32ItoM(x86ptr, 0xff7fffff ); - x86SetJ8(pjmp); - } -} - -PCSX2_ALIGNED16(u64 vuFloatData[2]); -PCSX2_ALIGNED16(u64 vuFloatData2[2]); -// Makes NaN == 0, Infinities stay the same; Very Slow - Use only for debugging -void vuFloatExtra( int regd, int XYZW) { - int t1reg = (regd == 0) ? (regd + 1) : (regd - 1); - int t2reg = (regd <= 1) ? (regd + 2) : (regd - 2); - SSE_MOVAPS_XMM_to_M128( (uptr)vuFloatData, t1reg ); - SSE_MOVAPS_XMM_to_M128( (uptr)vuFloatData2, t2reg ); - - SSE_XORPS_XMM_to_XMM(t1reg, t1reg); - SSE_CMPORDPS_XMM_to_XMM(t1reg, regd); - SSE_MOVAPS_XMM_to_XMM(t2reg, regd); - SSE_ANDPS_XMM_to_XMM(t2reg, t1reg); - VU_MERGE_REGS_CUSTOM(regd, t2reg, XYZW); - - SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)vuFloatData ); - SSE_MOVAPS_M128_to_XMM( t2reg, (uptr)vuFloatData2 ); -} - -static PCSX2_ALIGNED16(u32 tempRegX[]) = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; - -// Called by testWhenOverflow() function -void testPrintOverflow() { - tempRegX[0] &= 0xff800000; - tempRegX[1] &= 0xff800000; - tempRegX[2] &= 0xff800000; - tempRegX[3] &= 0xff800000; - if ( (tempRegX[0] == 0x7f800000) || (tempRegX[1] == 0x7f800000) || (tempRegX[2] == 0x7f800000) || (tempRegX[3] == 0x7f800000) ) - Console::Notice( "VU OVERFLOW!: Changing to +Fmax!!!!!!!!!!!!" ); - if ( (tempRegX[0] == 0xff800000) || (tempRegX[1] == 0xff800000) || (tempRegX[2] == 0xff800000) || (tempRegX[3] == 0xff800000) ) - Console::Notice( "VU OVERFLOW!: Changing to -Fmax!!!!!!!!!!!!" ); -} - -// Outputs to the console when overflow has occured. -void testWhenOverflow(int info, int regd, int t0reg) { - SSE_MOVAPS_XMM_to_M128((uptr)tempRegX, regd); - CALLFunc((uptr)testPrintOverflow); -} +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "PrecompiledHeader.h" + +#include "Common.h" +#include "GS.h" +#include "R5900OpcodeTables.h" +#include "iR5900.h" +#include "iMMI.h" +#include "iFPU.h" +#include "iCOP0.h" +#include "VUmicro.h" +#include "VUflags.h" +#include "sVU_Micro.h" +#include "sVU_Debug.h" +#include "sVU_zerorec.h" + +#ifdef _WIN32 +#pragma warning(disable:4244) +#pragma warning(disable:4761) +#endif +//------------------------------------------------------------------ + +// fixme - VUmicro should really use its own static vars for pc and branch. +// Sharing with the EE's copies of pc and branch is not cool! (air) + +//------------------------------------------------------------------ +// Helper Macros +//------------------------------------------------------------------ +#define _Ft_ (( VU->code >> 16) & 0x1F) // The rt part of the instruction register +#define _Fs_ (( VU->code >> 11) & 0x1F) // The rd part of the instruction register +#define _Fd_ (( VU->code >> 6) & 0x1F) // The sa part of the instruction register +#define _It_ (_Ft_ & 15) +#define _Is_ (_Fs_ & 15) +#define _Id_ (_Fd_ & 15) + +#define _X (( VU->code>>24) & 0x1) +#define _Y (( VU->code>>23) & 0x1) +#define _Z (( VU->code>>22) & 0x1) +#define _W (( VU->code>>21) & 0x1) + +#define _XYZW_SS (_X+_Y+_Z+_W==1) + +#define _Fsf_ (( VU->code >> 21) & 0x03) +#define _Ftf_ (( VU->code >> 23) & 0x03) + +#define _Imm11_ (s32)(VU->code & 0x400 ? 0xfffffc00 | (VU->code & 0x3ff) : VU->code & 0x3ff) +#define _UImm11_ (s32)(VU->code & 0x7ff) + +#define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0] +#define VU_VFy_ADDR(x) (uptr)&VU->VF[x].UL[1] +#define VU_VFz_ADDR(x) (uptr)&VU->VF[x].UL[2] +#define VU_VFw_ADDR(x) (uptr)&VU->VF[x].UL[3] + +#define VU_REGR_ADDR (uptr)&VU->VI[REG_R] +#define VU_REGQ_ADDR (uptr)&VU->VI[REG_Q] +#define VU_REGMAC_ADDR (uptr)&VU->VI[REG_MAC_FLAG] + +#define VU_VI_ADDR(x, read) GetVIAddr(VU, x, read, info) + +#define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0] +#define VU_ACCy_ADDR (uptr)&VU->ACC.UL[1] +#define VU_ACCz_ADDR (uptr)&VU->ACC.UL[2] +#define VU_ACCw_ADDR (uptr)&VU->ACC.UL[3] + +#define _X_Y_Z_W ((( VU->code >> 21 ) & 0xF ) ) +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// Global Variables +//------------------------------------------------------------------ +int vucycle; + +PCSX2_ALIGNED16(const float s_fones[8]) = {1.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f}; +PCSX2_ALIGNED16(const u32 s_mask[4]) = {0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff}; +PCSX2_ALIGNED16(const u32 s_expmask[4]) = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; +PCSX2_ALIGNED16(const u32 g_minvals[4]) = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff}; +PCSX2_ALIGNED16(const u32 g_maxvals[4]) = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff}; +PCSX2_ALIGNED16(const u32 const_clip[8]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff, + 0x80000000, 0x80000000, 0x80000000, 0x80000000}; +PCSX2_ALIGNED(64, const u32 g_ones[4]) = {0x00000001, 0x00000001, 0x00000001, 0x00000001}; +PCSX2_ALIGNED16(const u32 g_minvals_XYZW[16][4]) = +{ + { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }, //0000 + { 0xffffffff, 0xffffffff, 0xffffffff, 0xff7fffff }, //0001 + { 0xffffffff, 0xffffffff, 0xff7fffff, 0xffffffff }, //0010 + { 0xffffffff, 0xffffffff, 0xff7fffff, 0xff7fffff }, //0011 + { 0xffffffff, 0xff7fffff, 0xffffffff, 0xffffffff }, //0100 + { 0xffffffff, 0xff7fffff, 0xffffffff, 0xff7fffff }, //0101 + { 0xffffffff, 0xff7fffff, 0xff7fffff, 0xffffffff }, //0110 + { 0xffffffff, 0xff7fffff, 0xff7fffff, 0xff7fffff }, //0111 + { 0xff7fffff, 0xffffffff, 0xffffffff, 0xffffffff }, //1000 + { 0xff7fffff, 0xffffffff, 0xffffffff, 0xff7fffff }, //1001 + { 0xff7fffff, 0xffffffff, 0xff7fffff, 0xffffffff }, //1010 + { 0xff7fffff, 0xffffffff, 0xff7fffff, 0xff7fffff }, //1011 + { 0xff7fffff, 0xff7fffff, 0xffffffff, 0xffffffff }, //1100 + { 0xff7fffff, 0xff7fffff, 0xffffffff, 0xff7fffff }, //1101 + { 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xffffffff }, //1110 + { 0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff }, //1111 +}; +PCSX2_ALIGNED16(const u32 g_maxvals_XYZW[16][4])= +{ + { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //0000 + { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7f7fffff }, //0001 + { 0x7fffffff, 0x7fffffff, 0x7f7fffff, 0x7fffffff }, //0010 + { 0x7fffffff, 0x7fffffff, 0x7f7fffff, 0x7f7fffff }, //0011 + { 0x7fffffff, 0x7f7fffff, 0x7fffffff, 0x7fffffff }, //0100 + { 0x7fffffff, 0x7f7fffff, 0x7fffffff, 0x7f7fffff }, //0101 + { 0x7fffffff, 0x7f7fffff, 0x7f7fffff, 0x7fffffff }, //0110 + { 0x7fffffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff }, //0111 + { 0x7f7fffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1000 + { 0x7f7fffff, 0x7fffffff, 0x7fffffff, 0x7f7fffff }, //1001 + { 0x7f7fffff, 0x7fffffff, 0x7f7fffff, 0x7fffffff }, //1010 + { 0x7f7fffff, 0x7fffffff, 0x7f7fffff, 0x7f7fffff }, //1011 + { 0x7f7fffff, 0x7f7fffff, 0x7fffffff, 0x7fffffff }, //1100 + { 0x7f7fffff, 0x7f7fffff, 0x7fffffff, 0x7f7fffff }, //1101 + { 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7fffffff }, //1110 + { 0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff }, //1111 +}; +//------------------------------------------------------------------ + +//------------------------------------------------------------------ +// VU Pipeline/Test Stalls/Analyzing Functions +//------------------------------------------------------------------ +void _recvuFMACflush(VURegs * VU, bool intermediate) { + int i; + + for (i=0; i<8; i++) { + if (VU->fmac[i].enable == 0) continue; + + if( intermediate ) { + if ((vucycle - VU->fmac[i].sCycle) > VU->fmac[i].Cycle) { +// VUM_LOG("flushing FMAC pipe[%d]", i); + VU->fmac[i].enable = 0; + } + } + else { + if ((vucycle - VU->fmac[i].sCycle) >= VU->fmac[i].Cycle) { +// VUM_LOG("flushing FMAC pipe[%d]", i); + VU->fmac[i].enable = 0; + } + } + } +} + +void _recvuFDIVflush(VURegs * VU, bool intermediate) { + if (VU->fdiv.enable == 0) return; + + if( intermediate ) { + if ((vucycle - VU->fdiv.sCycle) > VU->fdiv.Cycle) { +// Console::WriteLn("flushing FDIV pipe"); + VU->fdiv.enable = 0; + } + } + else { + if ((vucycle - VU->fdiv.sCycle) >= VU->fdiv.Cycle) { +// Console::WriteLn("flushing FDIV pipe"); + VU->fdiv.enable = 0; + } + } +} + +void _recvuEFUflush(VURegs * VU, bool intermediate) { + if (VU->efu.enable == 0) return; + + if( intermediate ) { + if ((vucycle - VU->efu.sCycle) > VU->efu.Cycle) { +// Console::WriteLn("flushing FDIV pipe"); + VU->efu.enable = 0; + } + } + else { + if ((vucycle - VU->efu.sCycle) >= VU->efu.Cycle) { +// Console::WriteLn("flushing FDIV pipe"); + VU->efu.enable = 0; + } + } +} + +void _recvuIALUflush(VURegs * VU, bool intermediate) { + int i; + + for (i=0; i<8; i++) { + if (VU->ialu[i].enable == 0) continue; + + if( intermediate ) { + if ((vucycle - VU->ialu[i].sCycle) > VU->ialu[i].Cycle) { +// VUM_LOG("flushing IALU pipe[%d]", i); + VU->ialu[i].enable = 0; + } + } + else { + if ((vucycle - VU->ialu[i].sCycle) >= VU->ialu[i].Cycle) { +// VUM_LOG("flushing IALU pipe[%d]", i); + VU->ialu[i].enable = 0; + } + } + } +} + +void _recvuTestPipes(VURegs * VU, bool intermediate) { // intermediate = true if called by upper FMAC stall detection + _recvuFMACflush(VU, intermediate); + _recvuFDIVflush(VU, intermediate); + _recvuEFUflush(VU, intermediate); + _recvuIALUflush(VU, intermediate); +} + +void _recvuFMACTestStall(VURegs * VU, int reg, int xyzw) { + int cycle; + int i; + u32 mask = 0; + + for (i=0; i<8; i++) { + if (VU->fmac[i].enable == 0) continue; + if (VU->fmac[i].reg == reg && (VU->fmac[i].xyzw & xyzw)) break; + } + + if (i == 8) return; + + // do a perchannel delay + // old code +// cycle = VU->fmac[i].Cycle - (vucycle - VU->fmac[i].sCycle); + + // new code + mask = 4; // w +// if( VU->fmac[i].xyzw & 1 ) mask = 4; // w +// else if( VU->fmac[i].xyzw & 2 ) mask = 3; // z +// else if( VU->fmac[i].xyzw & 4 ) mask = 2; // y +// else { +// assert(VU->fmac[i].xyzw & 8 ); +// mask = 1; // x +// } + +// mask = 0; +// if( VU->fmac[i].xyzw & 1 ) mask++; // w +// else if( VU->fmac[i].xyzw & 2 ) mask++; // z +// else if( VU->fmac[i].xyzw & 4 ) mask++; // y +// else if( VU->fmac[i].xyzw & 8 ) mask++; // x + + assert( (int)VU->fmac[i].sCycle < (int)vucycle ); + cycle = 0; + if( vucycle - VU->fmac[i].sCycle < mask ) + cycle = mask - (vucycle - VU->fmac[i].sCycle); + + VU->fmac[i].enable = 0; + vucycle+= cycle; + _recvuTestPipes(VU, true); // for lower instructions +} + +void _recvuIALUTestStall(VURegs * VU, int reg) { + int cycle; + int i; + u32 latency; + + for (i=0; i<8; i++) { + if (VU->ialu[i].enable == 0) continue; + if (VU->ialu[i].reg == reg) break; + } + + if (i == 8) return; + + latency = VU->ialu[i].Cycle + 1; + cycle = 0; + if( vucycle - VU->ialu[i].sCycle < latency ) + cycle = latency - (vucycle - VU->ialu[i].sCycle); + + VU->ialu[i].enable = 0; + vucycle+= cycle; + _recvuTestPipes(VU, true); +} + +void _recvuFMACAdd(VURegs * VU, int reg, int xyzw) { + int i; + + /* find a free fmac pipe */ + for (i=0; i<8; i++) { + if (VU->fmac[i].enable == 1) continue; + break; + } + + if (i==8) Console::Error("*PCSX2*: error , out of fmacs"); +// VUM_LOG("adding FMAC pipe[%d]; reg %d", i, reg); + + VU->fmac[i].enable = 1; + VU->fmac[i].sCycle = vucycle; + VU->fmac[i].Cycle = 3; + VU->fmac[i].xyzw = xyzw; + VU->fmac[i].reg = reg; +} + +void _recvuFDIVAdd(VURegs * VU, int cycles) { +// Console::WriteLn("adding FDIV pipe"); + VU->fdiv.enable = 1; + VU->fdiv.sCycle = vucycle; + VU->fdiv.Cycle = cycles; +} + +void _recvuEFUAdd(VURegs * VU, int cycles) { +// Console::WriteLn("adding EFU pipe"); + VU->efu.enable = 1; + VU->efu.sCycle = vucycle; + VU->efu.Cycle = cycles; +} + +void _recvuIALUAdd(VURegs * VU, int reg, int cycles) { + int i; + + /* find a free ialu pipe */ + for (i=0; i<8; i++) { + if (VU->ialu[i].enable == 1) continue; + break; + } + + if (i==8) Console::Error("*PCSX2*: error , out of ialus"); + + VU->ialu[i].enable = 1; + VU->ialu[i].sCycle = vucycle; + VU->ialu[i].Cycle = cycles; + VU->ialu[i].reg = reg; +} + +void _recvuTestIALUStalls(VURegs * VU, _VURegsNum *VUregsn) { + + int VIread0 = 0, VIread1 = 0; // max 2 integer registers are read simulataneously + int i; + + for(i=0;i<16;i++) { // find used integer(vi00-vi15) registers + if( (VUregsn->VIread >> i) & 1 ) { + if( VIread0 ) VIread1 = i; + else VIread0 = i; + } + } + + if( VIread0 ) _recvuIALUTestStall(VU, VIread0); + if( VIread1 ) _recvuIALUTestStall(VU, VIread1); +} + +void _recvuAddIALUStalls(VURegs * VU, _VURegsNum *VUregsn) { + if (VUregsn->VIwrite && VUregsn->cycles) { + int VIWrite0 = 0; + int i; + + for(i=0;i<16;i++) { // find used(vi00-vi15) registers + if( (VUregsn->VIwrite >> i) & 1 ) { + VIWrite0 = i; + } + } + if( VIWrite0 ) _recvuIALUAdd(VU, VIWrite0, VUregsn->cycles); + } +} + +void _recvuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn, bool upper) { + + if( VUregsn->VFread0 && (VUregsn->VFread0 == VUregsn->VFread1) ) { + _recvuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw|VUregsn->VFr1xyzw); + } + else { + if (VUregsn->VFread0) _recvuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw); + if (VUregsn->VFread1) _recvuFMACTestStall(VU, VUregsn->VFread1, VUregsn->VFr1xyzw); + } + + if( !upper && VUregsn->VIread ) _recvuTestIALUStalls(VU, VUregsn); // for lower instructions which read integer reg +} + +void _recvuAddFMACStalls(VURegs * VU, _VURegsNum *VUregsn) { + + if (VUregsn->VFwrite) _recvuFMACAdd(VU, VUregsn->VFwrite, VUregsn->VFwxyzw); + else if (VUregsn->VIwrite & (1 << REG_CLIP_FLAG)) _recvuFMACAdd(VU, -REG_CLIP_FLAG, 0); // REG_CLIP_FLAG pipe + else _recvuFMACAdd(VU, 0, 0); // cause no data dependency with fp registers +} + +void _recvuFlushFDIV(VURegs * VU) { + int cycle; + + if (VU->fdiv.enable == 0) return; + + cycle = VU->fdiv.Cycle + 1 - (vucycle - VU->fdiv.sCycle); //VU->fdiv.Cycle contains the latency minus 1 (6 or 12) +// Console::WriteLn("waiting FDIV pipe %d", params cycle); + VU->fdiv.enable = 0; + vucycle+= cycle; +} + +void _recvuFlushEFU(VURegs * VU) { + int cycle; + + if (VU->efu.enable == 0) return; + + cycle = VU->efu.Cycle - (vucycle - VU->efu.sCycle); +// Console::WriteLn("waiting FDIV pipe %d", params cycle); + VU->efu.enable = 0; + vucycle+= cycle; +} + +void _recvuTestFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) { + _recvuTestFMACStalls(VU,VUregsn, false); + _recvuFlushFDIV(VU); +} + +void _recvuTestEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { + _recvuTestFMACStalls(VU,VUregsn, false); + _recvuFlushEFU(VU); +} + +void _recvuAddFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) { +// _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn); + if (VUregsn->VIwrite & (1 << REG_Q)) { + _recvuFDIVAdd(VU, VUregsn->cycles); + } +} + +void _recvuAddEFUStalls(VURegs * VU, _VURegsNum *VUregsn) { +// _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn); + if (VUregsn->VIwrite & (1 << REG_P)) { + _recvuEFUAdd(VU, VUregsn->cycles); + } +} + +void _recvuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn) { + switch (VUregsn->pipe) { + case VUPIPE_FMAC: _recvuTestFMACStalls(VU, VUregsn, true); break; + } +} + +void _recvuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { + switch (VUregsn->pipe) { + case VUPIPE_FMAC: _recvuTestFMACStalls(VU, VUregsn, false); break; + case VUPIPE_FDIV: _recvuTestFDIVStalls(VU, VUregsn); break; + case VUPIPE_EFU: _recvuTestEFUStalls(VU, VUregsn); break; + case VUPIPE_IALU: _recvuTestIALUStalls(VU, VUregsn); break; + case VUPIPE_BRANCH: _recvuTestIALUStalls(VU, VUregsn); break; + } +} + +void _recvuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn) { + switch (VUregsn->pipe) { + case VUPIPE_FMAC: _recvuAddFMACStalls(VU, VUregsn); break; + } +} + +void _recvuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn) { + switch (VUregsn->pipe) { + case VUPIPE_FMAC: _recvuAddFMACStalls(VU, VUregsn); break; + case VUPIPE_FDIV: _recvuAddFDIVStalls(VU, VUregsn); break; + case VUPIPE_EFU: _recvuAddEFUStalls(VU, VUregsn); break; + case VUPIPE_IALU: _recvuAddIALUStalls(VU, VUregsn); break; // note: only ILW and ILWR cause stall in IALU pipe + } +} + +void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs) +{ + _VURegsNum* lregs; + _VURegsNum* uregs; + int *ptr; + + lregs = pCodeRegs; + uregs = pCodeRegs+1; + + ptr = (int*)&VU->Micro[pc]; + pc += 8; + + if (ptr[1] & 0x40000000) { // EOP + branch |= 8; + } + + VU->code = ptr[1]; + if (VU == &VU1) VU1regs_UPPER_OPCODE[VU->code & 0x3f](uregs); + else VU0regs_UPPER_OPCODE[VU->code & 0x3f](uregs); + + _recvuTestUpperStalls(VU, uregs); + switch(VU->code & 0x3f) { + case 0x10: case 0x11: case 0x12: case 0x13: + case 0x14: case 0x15: case 0x16: case 0x17: + case 0x1d: case 0x1f: + case 0x2b: case 0x2f: + break; + + case 0x3c: + switch ((VU->code >> 6) & 0x1f) { + case 0x4: case 0x5: + break; + default: + info->statusflag = 4; + info->macflag = 4; + break; + } + break; + case 0x3d: + switch ((VU->code >> 6) & 0x1f) { + case 0x4: case 0x5: case 0x7: + break; + default: + info->statusflag = 4; + info->macflag = 4; + break; + } + break; + case 0x3e: + switch ((VU->code >> 6) & 0x1f) { + case 0x4: case 0x5: + break; + default: + info->statusflag = 4; + info->macflag = 4; + break; + } + break; + case 0x3f: + switch ((VU->code >> 6) & 0x1f) { + case 0x4: case 0x5: case 0x7: case 0xb: + break; + default: + info->statusflag = 4; + info->macflag = 4; + break; + } + break; + + default: + info->statusflag = 4; + info->macflag = 4; + break; + } + + if (uregs->VIread & (1 << REG_Q)) { info->q |= 2; } + if (uregs->VIread & (1 << REG_P)) { info->p |= 2; assert( VU == &VU1 ); } + + // check upper flags + if (ptr[1] & 0x80000000) { // I flag + info->cycle = vucycle; + memzero_obj(*lregs); + } + else { + + VU->code = ptr[0]; + if (VU == &VU1) VU1regs_LOWER_OPCODE[VU->code >> 25](lregs); + else VU0regs_LOWER_OPCODE[VU->code >> 25](lregs); + + _recvuTestLowerStalls(VU, lregs); + info->cycle = vucycle; + + if (lregs->pipe == VUPIPE_BRANCH) { + branch |= 1; + } + + if (lregs->VIwrite & (1 << REG_Q)) { + info->q |= 4; + info->cycles = lregs->cycles; + info->pqinst = (VU->code&2)>>1; // rsqrt is 2 + } + else if (lregs->pipe == VUPIPE_FDIV) { + info->q |= 8|1; + info->pqinst = 0; + } + + if (lregs->VIwrite & (1 << REG_P)) { + assert( VU == &VU1 ); + info->p |= 4; + info->cycles = lregs->cycles; + + switch( VU->code & 0xff ) { + case 0xfd: info->pqinst = 0; break; //eatan + case 0x7c: info->pqinst = 0; break; //eatanxy + case 0x7d: info->pqinst = 0; break; //eatanzy + case 0xfe: info->pqinst = 1; break; //eexp + case 0xfc: info->pqinst = 2; break; //esin + case 0x3f: info->pqinst = 3; break; //erleng + case 0x3e: info->pqinst = 4; break; //eleng + case 0x3d: info->pqinst = 4; break; //ersadd + case 0xbd: info->pqinst = 4; break; //ersqrt + case 0xbe: info->pqinst = 5; break; //ercpr + case 0xbc: info->pqinst = 5; break; //esqrt + case 0x7e: info->pqinst = 5; break; //esum + case 0x3c: info->pqinst = 6; break; //esadd + default: assert(0); + } + } + else if (lregs->pipe == VUPIPE_EFU) { + info->p |= 8|1; + } + + if (lregs->VIread & (1 << REG_STATUS_FLAG)) info->statusflag|= VUOP_READ; + if (lregs->VIread & (1 << REG_MAC_FLAG)) info->macflag|= VUOP_READ; + + if (lregs->VIwrite & (1 << REG_STATUS_FLAG)) info->statusflag|= VUOP_WRITE; + if (lregs->VIwrite & (1 << REG_MAC_FLAG)) info->macflag|= VUOP_WRITE; + + if (lregs->VIread & (1 << REG_Q)) { info->q |= 2; } + if (lregs->VIread & (1 << REG_P)) { info->p |= 2; assert( VU == &VU1 ); } + + _recvuAddLowerStalls(VU, lregs); + } + + _recvuAddUpperStalls(VU, uregs); + _recvuTestPipes(VU, false); + + vucycle++; +} + +int eeVURecompileCode(VURegs *VU, _VURegsNum* regs) +{ + int info = 0; + int vfread0=-1, vfread1 = -1, vfwrite = -1, vfacc = -1, vftemp=-1; + + assert( regs != NULL ); + + if( regs->VFread0 ) _addNeededVFtoXMMreg(regs->VFread0); + if( regs->VFread1 ) _addNeededVFtoXMMreg(regs->VFread1); + if( regs->VFwrite ) _addNeededVFtoXMMreg(regs->VFwrite); + if( regs->VIread & (1<VIread & (1<VFread0 ) vfread0 = _allocVFtoXMMreg(VU, -1, regs->VFread0, MODE_READ); + else if( regs->VIread & (1<VFread1 ) vfread1 = _allocVFtoXMMreg(VU, -1, regs->VFread1, MODE_READ); + else if( (regs->VIread & (1<VFr1xyzw != 0xff) vfread1 = _allocVFtoXMMreg(VU, -1, 0, MODE_READ); + + if( regs->VIread & (1<VIwrite&(1<VIwrite & (1<VFwxyzw != 0xf?MODE_READ:0)); + } + + if( regs->VFwrite ) { + assert( !(regs->VIwrite&(1<VFwrite, MODE_WRITE|(regs->VFwxyzw != 0xf?MODE_READ:0)); + } + + if( vfacc>= 0 ) info |= PROCESS_EE_SET_ACC(vfacc); + if( vfwrite >= 0 ) { + if( regs->VFwrite == _Ft_ && vfread1 < 0 ) { + info |= PROCESS_EE_SET_T(vfwrite); + } + else { + assert( regs->VFwrite == _Fd_ ); + info |= PROCESS_EE_SET_D(vfwrite); + } + } + + if( vfread0 >= 0 ) info |= PROCESS_EE_SET_S(vfread0); + if( vfread1 >= 0 ) info |= PROCESS_EE_SET_T(vfread1); + + vftemp = _allocTempXMMreg(XMMT_FPS, -1); + info |= PROCESS_VU_SET_TEMP(vftemp); + + if( regs->VIwrite & (1 << REG_CLIP_FLAG) ) { + // CLIP inst, need two extra temp registers, put it EEREC_D and EEREC_ACC + int t1reg = _allocTempXMMreg(XMMT_FPS, -1); + int t2reg = _allocTempXMMreg(XMMT_FPS, -1); + + info |= PROCESS_EE_SET_D(t1reg); + info |= PROCESS_EE_SET_ACC(t2reg); + + _freeXMMreg(t1reg); // don't need + _freeXMMreg(t2reg); // don't need + } + else if( regs->VIwrite & (1<VI[reg].UL; + + if( read != 1 ) { + if( reg == REG_MAC_FLAG ) return (uptr)&VU->macflag; + if( reg == REG_CLIP_FLAG ) return (uptr)&VU->clipflag; + if( reg == REG_STATUS_FLAG ) return (uptr)&VU->statusflag; + if( reg == REG_Q ) return (uptr)&VU->q; + if( reg == REG_P ) return (uptr)&VU->p; + } + + return (uptr)&VU->VI[reg].UL; +} + +// gets a temp reg that is not EEREC_TEMP +int _vuGetTempXMMreg(int info) +{ + int t1reg = -1; + + if( _hasFreeXMMreg() ) { + t1reg = _allocTempXMMreg(XMMT_FPS, -1); + + if( t1reg == EEREC_TEMP ) { + if( _hasFreeXMMreg() ) { + int t = _allocTempXMMreg(XMMT_FPS, -1); + _freeXMMreg(t1reg); + t1reg = t; + } + else { + _freeXMMreg(t1reg); + t1reg = -1; + } + } + } + + return t1reg; +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// Misc VU Reg Flipping/Merging Functions +//------------------------------------------------------------------ +void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw) +{ + switch (xyzw) { + case 0: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0x00); break; + case 1: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0x55); break; + case 2: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0xaa); break; + case 3: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0xff); break; + } +} + +void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw) +{ + switch (xyzw) { + case 0: SSE_MOVSS_XMM_to_XMM(dstreg, srcreg); break; + case 1: if ( cpucaps.hasStreamingSIMD4Extensions ) SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(1, 0, 0)); + else SSE2_PSHUFLW_XMM_to_XMM(dstreg, srcreg, 0xee); + break; + case 2: SSE_MOVHLPS_XMM_to_XMM(dstreg, srcreg); break; + case 3: if ( cpucaps.hasStreamingSIMD4Extensions ) SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(3, 0, 0)); + else { SSE_MOVHLPS_XMM_to_XMM(dstreg, srcreg); SSE2_PSHUFLW_XMM_to_XMM(dstreg, dstreg, 0xee); } + break; + } +} + +void _vuFlipRegSS(VURegs * VU, int reg) +{ + assert( _XYZW_SS ); + if( _Y ) SSE2_PSHUFLW_XMM_to_XMM(reg, reg, 0x4e); + else if( _Z ) SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xc6); + else if( _W ) SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x27); +} + +void _vuFlipRegSS_xyzw(int reg, int xyzw) +{ + switch ( xyzw ) { + case 1: SSE2_PSHUFLW_XMM_to_XMM(reg, reg, 0x4e); break; + case 2: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xc6); break; + case 3: SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x27); break; + } +} + +void _vuMoveSS(VURegs * VU, int dstreg, int srcreg) +{ + assert( _XYZW_SS ); + if( _Y ) _unpackVFSS_xyzw(dstreg, srcreg, 1); + else if( _Z ) _unpackVFSS_xyzw(dstreg, srcreg, 2); + else if( _W ) _unpackVFSS_xyzw(dstreg, srcreg, 3); + else _unpackVFSS_xyzw(dstreg, srcreg, 0); +} + +// 1 - src, 0 - dest wzyx +void VU_MERGE0(int dest, int src) { // 0000s +} +void VU_MERGE1(int dest, int src) { // 1000 + SSE_MOVHLPS_XMM_to_XMM(src, dest); + SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc4); +} +void VU_MERGE1b(int dest, int src) { // 1000s + SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); +} +void VU_MERGE2(int dest, int src) { // 0100 + SSE_MOVHLPS_XMM_to_XMM(src, dest); + SSE_SHUFPS_XMM_to_XMM(dest, src, 0x64); +} +void VU_MERGE2b(int dest, int src) { // 0100s + SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); +} +void VU_MERGE3(int dest, int src) { // 1100s + SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4); +} +void VU_MERGE4(int dest, int src) { // 0010 + SSE_MOVSS_XMM_to_XMM(src, dest); + SSE2_MOVSD_XMM_to_XMM(dest, src); +} +void VU_MERGE4b(int dest, int src) { // 0010s + SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); +} +void VU_MERGE5(int dest, int src) { // 1010 + SSE_SHUFPS_XMM_to_XMM(dest, src, 0xd8); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xd8); +} +void VU_MERGE5b(int dest, int src) { // 1010s + SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); +} +void VU_MERGE6(int dest, int src) { // 0110 + SSE_SHUFPS_XMM_to_XMM(dest, src, 0x9c); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x78); +} +void VU_MERGE6b(int dest, int src) { // 0110s + SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); +} +void VU_MERGE7(int dest, int src) { // 1110 + SSE_MOVSS_XMM_to_XMM(src, dest); + SSE_MOVAPS_XMM_to_XMM(dest, src); +} +void VU_MERGE7b(int dest, int src) { // 1110s + SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); +} +void VU_MERGE8(int dest, int src) { // 0001s + SSE_MOVSS_XMM_to_XMM(dest, src); +} +void VU_MERGE9(int dest, int src) { // 1001 + SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc9); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xd2); +} +void VU_MERGE9b(int dest, int src) { // 1001s + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); +} +void VU_MERGE10(int dest, int src) { // 0101 + SSE_SHUFPS_XMM_to_XMM(dest, src, 0x8d); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x72); +} +void VU_MERGE10b(int dest, int src) { // 0101s + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); +} +void VU_MERGE11(int dest, int src) { // 1101s + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4); +} +void VU_MERGE12(int dest, int src) { // 0011 + SSE2_MOVSD_XMM_to_XMM(dest, src); +} +void VU_MERGE13(int dest, int src) { // 1011 + SSE_MOVHLPS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, dest, 0x64); + SSE_MOVAPS_XMM_to_XMM(dest, src); +} +void VU_MERGE13b(int dest, int src) { // 1011s + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0x27); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x27); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); +} +void VU_MERGE14(int dest, int src) { // 0111 + SSE_MOVHLPS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, dest, 0xc4); + SSE_MOVAPS_XMM_to_XMM(dest, src); +} +void VU_MERGE14b(int dest, int src) { // 0111s + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xE1); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xE1); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); + SSE_MOVSS_XMM_to_XMM(dest, src); + SSE_SHUFPS_XMM_to_XMM(src, src, 0xC6); + SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xC6); +} +void VU_MERGE15(int dest, int src) { // 1111s + SSE_MOVAPS_XMM_to_XMM(dest, src); +} + +typedef void (*VUMERGEFN)(int dest, int src); + +static VUMERGEFN s_VuMerge[16] = { + VU_MERGE0, VU_MERGE1, VU_MERGE2, VU_MERGE3, + VU_MERGE4, VU_MERGE5, VU_MERGE6, VU_MERGE7, + VU_MERGE8, VU_MERGE9, VU_MERGE10, VU_MERGE11, + VU_MERGE12, VU_MERGE13, VU_MERGE14, VU_MERGE15 }; + +static VUMERGEFN s_VuMerge2[16] = { + VU_MERGE0, VU_MERGE1b, VU_MERGE2b, VU_MERGE3, + VU_MERGE4b, VU_MERGE5b, VU_MERGE6b, VU_MERGE7b, + VU_MERGE8, VU_MERGE9b, VU_MERGE10b, VU_MERGE11, + VU_MERGE12, VU_MERGE13b, VU_MERGE14b, VU_MERGE15 }; + +// Modifies the Source Reg! +void VU_MERGE_REGS_CUSTOM(int dest, int src, int xyzw) { + xyzw &= 0xf; + if ( (dest != src) && (xyzw != 0) ) { + if ( cpucaps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf) ) { + xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3); + SSE4_BLENDPS_XMM_to_XMM(dest, src, xyzw); + } + else s_VuMerge[xyzw](dest, src); + } +} +// Doesn't Modify the Source Reg! (ToDo: s_VuMerge2() has room for optimization) +void VU_MERGE_REGS_SAFE(int dest, int src, int xyzw) { + xyzw &= 0xf; + if ( (dest != src) && (xyzw != 0) ) { + if ( cpucaps.hasStreamingSIMD4Extensions && (xyzw != 0x8) && (xyzw != 0xf) ) { + xyzw = ((xyzw & 1) << 3) | ((xyzw & 2) << 1) | ((xyzw & 4) >> 1) | ((xyzw & 8) >> 3); + SSE4_BLENDPS_XMM_to_XMM(dest, src, xyzw); + } + else s_VuMerge2[xyzw](dest, src); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// Misc VU Reg Clamping/Overflow Functions +//------------------------------------------------------------------ +#define CLAMP_NORMAL_SSE4(n) \ + SSE_MOVAPS_XMM_to_XMM(regTemp, regd);\ + SSE4_PMINUD_M128_to_XMM(regd, (uptr)&g_minvals_XYZW[n][0]);\ + SSE2_PSUBD_XMM_to_XMM(regTemp, regd);\ + SSE2_PCMPGTD_M128_to_XMM(regTemp, (uptr)&g_ones[0]);\ + SSE4_PMINSD_M128_to_XMM(regd, (uptr)&g_maxvals_XYZW[n][0]);\ + SSE2_PSLLD_I8_to_XMM(regTemp, 31);\ + SSE_XORPS_XMM_to_XMM(regd, regTemp); + +#define CLAMP_SIGN_SSE4(n) \ + SSE4_PMINSD_M128_to_XMM(regd, (uptr)&g_maxvals_XYZW[n][0]);\ + SSE4_PMINUD_M128_to_XMM(regd, (uptr)&g_minvals_XYZW[n][0]); + +void vFloat0(int regd, int regTemp) { } //0000 +void vFloat1(int regd, int regTemp) { //1000 + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); +} +void vFloat1c(int regd, int regTemp) { //1000 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(1); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat2(int regd, int regTemp) { //0100 + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); +} +void vFloat2c(int regd, int regTemp) { //0100 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(2); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat3(int regd, int regTemp) { //1100 + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36); +} +void vFloat3b(int regd, int regTemp) { //1100 //regTemp is Modified + SSE2_MOVSD_XMM_to_XMM(regTemp, regd); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + SSE2_MOVSD_XMM_to_XMM(regd, regTemp); +} +void vFloat3c(int regd, int regTemp) { //1100 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(3); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat4(int regd, int regTemp) { //0010 + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); +} +void vFloat4c(int regd, int regTemp) { //0010 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(4); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat5(int regd, int regTemp) { //1010 + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); +} +void vFloat5b(int regd, int regTemp) { //1010 //regTemp is Modified + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_NORMAL_SSE4(5); + } + else { + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); + } +} +void vFloat5c(int regd, int regTemp) { //1010 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(5); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat6(int regd, int regTemp) { //0110 + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9); +} +void vFloat6b(int regd, int regTemp) { //0110 //regTemp is Modified + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_NORMAL_SSE4(6); + } + else { + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9); + } +} +void vFloat6c(int regd, int regTemp) { //0110 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(6); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat7(int regd, int regTemp) { //1110 + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x39); +} +void vFloat7_useEAX(int regd, int regTemp) { //1110 //EAX is Modified + SSE2_MOVD_XMM_to_R(EAX, regd); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + if ( cpucaps.hasStreamingSIMD4Extensions ) + SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00); + else { + SSE_PINSRW_R32_to_XMM(regd, EAX, 0); + SHR32ItoR(EAX, 16); + SSE_PINSRW_R32_to_XMM(regd, EAX, 1); + } +} +void vFloat7b(int regd, int regTemp) { //1110 //regTemp is Modified + SSE_MOVSS_XMM_to_XMM(regTemp, regd); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + SSE_MOVSS_XMM_to_XMM(regd, regTemp); +} +void vFloat7c(int regd, int regTemp) { //1110 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(7); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x39); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat7c_useEAX(int regd, int regTemp) { //1110 //EAX is Modified + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(7); + } + else { + SSE2_MOVD_XMM_to_R(EAX, regd); + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + SSE2_MOVD_R_to_XMM(regTemp, EAX); + SSE_MOVSS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat8(int regd, int regTemp) { //0001 + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); +} +void vFloat8c(int regd, int regTemp) { //0001 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(8); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat9(int regd, int regTemp) { //1001 + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); +} +void vFloat9b(int regd, int regTemp) { //1001 //regTemp is Modified + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_NORMAL_SSE4(9); + } + else { + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + } +} +void vFloat9c(int regd, int regTemp) { //1001 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(9); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat10(int regd, int regTemp) { //0101 + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); +} +void vFloat10b(int regd, int regTemp) { //0101 //regTemp is Modified + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_NORMAL_SSE4(10); + } + else { + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + } +} +void vFloat10c(int regd, int regTemp) { //0101 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(10); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat11(int regd, int regTemp) { //1101 + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36); +} +void vFloat11_useEAX(int regd, int regTemp) { //1101 //EAX is Modified + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE2_MOVD_XMM_to_R(EAX, regd); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + if ( cpucaps.hasStreamingSIMD4Extensions ) + SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00); + else { + SSE_PINSRW_R32_to_XMM(regd, EAX, 0); + SHR32ItoR(EAX, 16); + SSE_PINSRW_R32_to_XMM(regd, EAX, 1); + } + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); +} +void vFloat11b(int regd, int regTemp) { //1101 //regTemp is Modified + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + SSE_MOVSS_XMM_to_XMM(regTemp, regd); + SSE2_MOVSD_XMM_to_XMM(regd, regTemp); +} +void vFloat11c(int regd, int regTemp) { //1101 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(11); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x36); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat11c_useEAX(int regd, int regTemp) { //1101 // EAX is modified + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(11); + } + else { + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE2_MOVD_XMM_to_R(EAX, regd); + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + SSE2_MOVD_R_to_XMM(regTemp, EAX); + SSE_MOVSS_XMM_to_XMM(regd, regTemp); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + } +} +void vFloat12(int regd, int regTemp) { //0011 + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); +} +void vFloat12b(int regd, int regTemp) { //0011 //regTemp is Modified + SSE_MOVHLPS_XMM_to_XMM(regTemp, regd); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + SSE2_PUNPCKLQDQ_XMM_to_XMM(regd, regTemp); +} +void vFloat12c(int regd, int regTemp) { //0011 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(12); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat13(int regd, int regTemp) { //1011 + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); +} +void vFloat13_useEAX(int regd, int regTemp) { //1011 // EAX is modified + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE2_MOVD_XMM_to_R(EAX, regd); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + if ( cpucaps.hasStreamingSIMD4Extensions ) + SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00); + else { + SSE_PINSRW_R32_to_XMM(regd, EAX, 0); + SHR32ItoR(EAX, 16); + SSE_PINSRW_R32_to_XMM(regd, EAX, 1); + } + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); +} +void vFloat13b(int regd, int regTemp) { //1011 //regTemp is Modified + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + SSE_MOVHLPS_XMM_to_XMM(regTemp, regd); + SSE_SHUFPS_XMM_to_XMM(regd, regTemp, 0x64); +} +void vFloat13c(int regd, int regTemp) { //1011 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(13); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x2d); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat13c_useEAX(int regd, int regTemp) { //1011 // EAX is modified + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(13); + } + else { + SSE2_PSHUFD_XMM_to_XMM(regd, regd, 0xc6); + SSE2_MOVD_XMM_to_R(EAX, regd); + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + SSE2_MOVD_R_to_XMM(regTemp, EAX); + SSE_MOVSS_XMM_to_XMM(regd, regTemp); + SSE2_PSHUFD_XMM_to_XMM(regd, regd, 0xc6); + } +} +void vFloat14(int regd, int regTemp) { //0111 + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9); +} +void vFloat14_useEAX(int regd, int regTemp) { //0111 // EAX is modified + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); + SSE2_MOVD_XMM_to_R(EAX, regd); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + if ( cpucaps.hasStreamingSIMD4Extensions ) + SSE4_PINSRD_R32_to_XMM(regd, EAX, 0x00); + else { + SSE_PINSRW_R32_to_XMM(regd, EAX, 0); + SHR32ItoR(EAX, 16); + SSE_PINSRW_R32_to_XMM(regd, EAX, 1); + } + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27); +} +void vFloat14b(int regd, int regTemp) { //0111 //regTemp is Modified + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + SSE_MOVHLPS_XMM_to_XMM(regTemp, regd); + SSE_SHUFPS_XMM_to_XMM(regd, regTemp, 0xc4); +} +void vFloat14c(int regd, int regTemp) { //0111 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(14); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE2_PSHUFLW_XMM_to_XMM(regd, regd, 0x4e); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc6); + SSE_MINSS_M32_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXSS_M32_to_XMM(regd, (uptr)g_minvals); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0xc9); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} +void vFloat14c_useEAX(int regd, int regTemp) { //0111 // EAX is modified + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(14); + } + else { + SSE2_PSHUFD_XMM_to_XMM(regd, regd, 0x27); + SSE2_MOVD_XMM_to_R(EAX, regd); + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + SSE2_MOVD_R_to_XMM(regTemp, EAX); + SSE_MOVSS_XMM_to_XMM(regd, regTemp); + SSE2_PSHUFD_XMM_to_XMM(regd, regd, 0x27); + } +} +void vFloat15(int regd, int regTemp) { //1111 + SSE_MINPS_M128_to_XMM(regd, (uptr)g_maxvals); + SSE_MAXPS_M128_to_XMM(regd, (uptr)g_minvals); +} +void vFloat15c(int regd, int regTemp) { //1111 + if ( cpucaps.hasStreamingSIMD4Extensions ) { + CLAMP_SIGN_SSE4(15); + } + else { + SSE_MOVAPS_XMM_to_XMM(regTemp, regd); + SSE_ANDPS_M128_to_XMM(regTemp, (uptr)&const_clip[4]); + SSE_MINPS_M128_to_XMM(regd, (uptr)&g_maxvals[0]); + SSE_MAXPS_M128_to_XMM(regd, (uptr)&g_minvals[0]); + SSE_ORPS_XMM_to_XMM(regd, regTemp); + } +} + +vFloat vFloats1[16] = { //regTemp is not modified + vFloat0, vFloat1, vFloat2, vFloat3, + vFloat4, vFloat5, vFloat6, vFloat7, + vFloat8, vFloat9, vFloat10, vFloat11, + vFloat12, vFloat13, vFloat14, vFloat15 }; + +vFloat vFloats1_useEAX[16] = { //regTemp is not modified but EAX is used + vFloat0, vFloat1, vFloat2, vFloat3, + vFloat4, vFloat5, vFloat6, vFloat7_useEAX, + vFloat8, vFloat9, vFloat10, vFloat11_useEAX, + vFloat12, vFloat13_useEAX, vFloat14_useEAX, vFloat15 }; + +vFloat vFloats2[16] = { //regTemp is modified + vFloat0, vFloat1, vFloat2, vFloat3b, + vFloat4, vFloat5b, vFloat6b, vFloat7b, + vFloat8, vFloat9b, vFloat10b, vFloat11b, + vFloat12b, vFloat13b, vFloat14b, vFloat15 }; + +vFloat vFloats4[16] = { //regTemp is modified + vFloat0, vFloat1c, vFloat2c, vFloat3c, + vFloat4c, vFloat5c, vFloat6c, vFloat7c, + vFloat8c, vFloat9c, vFloat10c, vFloat11c, + vFloat12c, vFloat13c, vFloat14c, vFloat15c }; + +vFloat vFloats4_useEAX[16] = { //regTemp is modified and EAX is used + vFloat0, vFloat1c, vFloat2c, vFloat3c, + vFloat4c, vFloat5c, vFloat6c, vFloat7c_useEAX, + vFloat8c, vFloat9c, vFloat10c, vFloat11c_useEAX, + vFloat12c, vFloat13c_useEAX, vFloat14c_useEAX, vFloat15c }; + +//------------------------------------------------------------------ +// Clamping Functions (wrapper for vFloat* functions) +// vuFloat : "normal" clamping +// vuFloat_useEAX : "normal" clamping (faster but EAX is modified) +// vuFloat2 : "normal" clamping (fastest but regTemp is modified) +// vuFloat3 : "preserve sign" clamping for pointer +// vuFloat4 : "preserve sign" clamping (regTemp is modified; *FASTEST* on SSE4 CPUs) +// vuFloat4_useEAX : "preserve sign" clamping (faster but regTemp and EAX are modified) +// vuFloat5 : wrapper function for vuFloat2 and vuFloat4 +// vuFloat5_useEAX : wrapper function for vuFloat2 and vuFloat4_useEAX +// vuFloatExtra : for debugging +// +// Notice 1: vuFloat*_useEAX may be slower on AMD CPUs, which have independent execution pipeline for +// vector and scalar instructions (need checks) +// Notice 2: recVUMI_MUL_xyzw_toD and recVUMI_MADD_xyzw_toD use vFloats directly! +//------------------------------------------------------------------ + +// Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (doesn't use any temp regs) +void vuFloat( int info, int regd, int XYZW) { + if( CHECK_VU_OVERFLOW ) { + /*if ( (XYZW != 0) && (XYZW != 8) && (XYZW != 0xF) ) { + int t1reg = _vuGetTempXMMreg(info); + if (t1reg >= 0) { + vuFloat2( regd, t1reg, XYZW ); + _freeXMMreg( t1reg ); + return; + } + }*/ + //vuFloatExtra(regd, XYZW); + vFloats1[XYZW](regd, regd); + } +} + +// Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (uses EAX as a temp register; faster but **destroys EAX**) +void vuFloat_useEAX( int info, int regd, int XYZW) { + if( CHECK_VU_OVERFLOW ) { + vFloats1_useEAX[XYZW](regd, regd); + } +} + +// Clamps +/-NaN to +fMax and +/-Inf to +/-fMax (uses a temp reg) +void vuFloat2(int regd, int regTemp, int XYZW) { + if( CHECK_VU_OVERFLOW ) { + //vuFloatExtra(regd, XYZW); + vFloats2[XYZW](regd, regTemp); + } +} + +// Clamps +/-NaN and +/-Inf to +/-fMax (uses a temp reg) +void vuFloat4(int regd, int regTemp, int XYZW) { + if( CHECK_VU_OVERFLOW ) { + vFloats4[XYZW](regd, regTemp); + } +} + +// Clamps +/-NaN and +/-Inf to +/-fMax (uses a temp reg, and uses EAX as a temp register; faster but **destroys EAX**) +void vuFloat4_useEAX(int regd, int regTemp, int XYZW) { + if( CHECK_VU_OVERFLOW ) { + vFloats4_useEAX[XYZW](regd, regTemp); + } +} + +// Uses vuFloat4 or vuFloat2 depending on the CHECK_VU_SIGN_OVERFLOW setting +void vuFloat5(int regd, int regTemp, int XYZW) { + if (CHECK_VU_SIGN_OVERFLOW) { + vuFloat4(regd, regTemp, XYZW); + } + else vuFloat2(regd, regTemp, XYZW); +} + +// Uses vuFloat4_useEAX or vuFloat2 depending on the CHECK_VU_SIGN_OVERFLOW setting (uses EAX as a temp register; faster but **destoroyes EAX**) +void vuFloat5_useEAX(int regd, int regTemp, int XYZW) { + if (CHECK_VU_SIGN_OVERFLOW) { + vuFloat4_useEAX(regd, regTemp, XYZW); + } + else vuFloat2(regd, regTemp, XYZW); +} + +// Clamps +/-infs to +/-fMax, and +/-NaNs to +/-fMax +void vuFloat3(uptr x86ptr) { + u8* pjmp; + + if( CHECK_VU_OVERFLOW ) { + CMP32ItoM(x86ptr, 0x7f800000 ); + pjmp = JL8(0); // Signed Comparison + MOV32ItoM(x86ptr, 0x7f7fffff ); + x86SetJ8(pjmp); + + CMP32ItoM(x86ptr, 0xff800000 ); + pjmp = JB8(0); // Unsigned Comparison + MOV32ItoM(x86ptr, 0xff7fffff ); + x86SetJ8(pjmp); + } +} + +PCSX2_ALIGNED16(u64 vuFloatData[2]); +PCSX2_ALIGNED16(u64 vuFloatData2[2]); +// Makes NaN == 0, Infinities stay the same; Very Slow - Use only for debugging +void vuFloatExtra( int regd, int XYZW) { + int t1reg = (regd == 0) ? (regd + 1) : (regd - 1); + int t2reg = (regd <= 1) ? (regd + 2) : (regd - 2); + SSE_MOVAPS_XMM_to_M128( (uptr)vuFloatData, t1reg ); + SSE_MOVAPS_XMM_to_M128( (uptr)vuFloatData2, t2reg ); + + SSE_XORPS_XMM_to_XMM(t1reg, t1reg); + SSE_CMPORDPS_XMM_to_XMM(t1reg, regd); + SSE_MOVAPS_XMM_to_XMM(t2reg, regd); + SSE_ANDPS_XMM_to_XMM(t2reg, t1reg); + VU_MERGE_REGS_CUSTOM(regd, t2reg, XYZW); + + SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)vuFloatData ); + SSE_MOVAPS_M128_to_XMM( t2reg, (uptr)vuFloatData2 ); +} + +static PCSX2_ALIGNED16(u32 tempRegX[]) = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; + +// Called by testWhenOverflow() function +void testPrintOverflow() { + tempRegX[0] &= 0xff800000; + tempRegX[1] &= 0xff800000; + tempRegX[2] &= 0xff800000; + tempRegX[3] &= 0xff800000; + if ( (tempRegX[0] == 0x7f800000) || (tempRegX[1] == 0x7f800000) || (tempRegX[2] == 0x7f800000) || (tempRegX[3] == 0x7f800000) ) + Console::Notice( "VU OVERFLOW!: Changing to +Fmax!!!!!!!!!!!!" ); + if ( (tempRegX[0] == 0xff800000) || (tempRegX[1] == 0xff800000) || (tempRegX[2] == 0xff800000) || (tempRegX[3] == 0xff800000) ) + Console::Notice( "VU OVERFLOW!: Changing to -Fmax!!!!!!!!!!!!" ); +} + +// Outputs to the console when overflow has occured. +void testWhenOverflow(int info, int regd, int t0reg) { + SSE_MOVAPS_XMM_to_M128((uptr)tempRegX, regd); + CALLFunc((uptr)testPrintOverflow); +} diff --git a/pcsx2/x86/iVUmicro.h b/pcsx2/x86/sVU_Micro.h similarity index 97% rename from pcsx2/x86/iVUmicro.h rename to pcsx2/x86/sVU_Micro.h index 777a7dc063..214a1c425c 100644 --- a/pcsx2/x86/iVUmicro.h +++ b/pcsx2/x86/sVU_Micro.h @@ -1,288 +1,286 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2003 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __IVUMICRO_H__ -#define __IVUMICRO_H__ - -#include "VUmicro.h" - -extern u32 vudump; - -#define VU0_MEMSIZE 0x1000 -#define VU1_MEMSIZE 0x4000 - -void recResetVU0(); -void recExecuteVU0Block(); -void recClearVU0( u32 Addr, u32 Size ); - -void recVU1Init(); -void recVU1Shutdown(); -void recResetVU1(); -void recExecuteVU1Block(); -void recClearVU1( u32 Addr, u32 Size ); - - -u32 GetVIAddr(VURegs * VU, int reg, int read, int info); // returns the correct VI addr -void recUpdateFlags(VURegs * VU, int reg, int info); - -void _recvuTestPipes(VURegs * VU); -void _recvuFlushFDIV(VURegs * VU); -void _recvuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn); -void _recvuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn); -void _recvuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn); -void _recvuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn); - -#define VUOP_READ 2 -#define VUOP_WRITE 4 - -// save on mem -struct _vuopinfo { - int cycle; - int cycles; - u8 statusflag; - u8 macflag; - u8 clipflag; - u8 dummy; - u8 q; - u8 p; - u16 pqinst; // bit of instruction specifying index (srec only) -}; - -void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs); -int eeVURecompileCode(VURegs *VU, _VURegsNum* regs); // allocates all the necessary regs and returns the indices -void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK - -extern int vucycle; -typedef void (*vFloat)(int regd, int regTemp); -extern vFloat vFloats1[16]; -extern vFloat vFloats1_useEAX[16]; -extern vFloat vFloats2[16]; -extern vFloat vFloats4[16]; -extern vFloat vFloats4_useEAX[16]; -PCSX2_ALIGNED16_EXTERN(const float s_fones[8]); -PCSX2_ALIGNED16_EXTERN(const u32 s_mask[4]); -PCSX2_ALIGNED16_EXTERN(const u32 s_expmask[4]); -PCSX2_ALIGNED16_EXTERN(const u32 g_minvals[4]); -PCSX2_ALIGNED16_EXTERN(const u32 g_maxvals[4]); -PCSX2_ALIGNED16_EXTERN(const u32 const_clip[8]); - -u32 GetVIAddr(VURegs * VU, int reg, int read, int info); -int _vuGetTempXMMreg(int info); -void vuFloat(int info, int regd, int XYZW); -void vuFloat_useEAX(int regd, int regTemp, int XYZW); -void vuFloat2(int regd, int regTemp, int XYZW); -void vuFloat3(uptr x86ptr); -void vuFloat4(int regd, int regTemp, int XYZW); -void vuFloat4_useEAX(int regd, int regTemp, int XYZW); -void vuFloat5(int regd, int regTemp, int XYZW); -void vuFloat5_useEAX(int regd, int regTemp, int XYZW); -void _vuFlipRegSS(VURegs * VU, int reg); -void _vuFlipRegSS_xyzw(int reg, int xyzw); -void _vuMoveSS(VURegs * VU, int dstreg, int srcreg); -void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw); -void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw); -void VU_MERGE_REGS_CUSTOM(int dest, int src, int xyzw); -void VU_MERGE_REGS_SAFE(int dest, int src, int xyzw); -#define VU_MERGE_REGS(dest, src) { \ - VU_MERGE_REGS_CUSTOM(dest, src, _X_Y_Z_W); \ -} - -// use for allocating vi regs -#define ALLOCTEMPX86(mode) _allocX86reg(-1, X86TYPE_TEMP, 0, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode) -#define ALLOCVI(vi, mode) _allocX86reg(-1, X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), vi, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode) -#define ADD_VI_NEEDED(vi) _addNeededX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), vi); - -#define SWAP(x, y) *(u32*)&y ^= *(u32*)&x ^= *(u32*)&y ^= *(u32*)&x; - -/***************************************** - VU Micromode Upper instructions -*****************************************/ - -void recVUMI_ABS(VURegs *vuRegs, int info); -void recVUMI_ADD(VURegs *vuRegs, int info); -void recVUMI_ADDi(VURegs *vuRegs, int info); -void recVUMI_ADDq(VURegs *vuRegs, int info); -void recVUMI_ADDx(VURegs *vuRegs, int info); -void recVUMI_ADDy(VURegs *vuRegs, int info); -void recVUMI_ADDz(VURegs *vuRegs, int info); -void recVUMI_ADDw(VURegs *vuRegs, int info); -void recVUMI_ADDA(VURegs *vuRegs, int info); -void recVUMI_ADDAi(VURegs *vuRegs, int info); -void recVUMI_ADDAq(VURegs *vuRegs, int info); -void recVUMI_ADDAx(VURegs *vuRegs, int info); -void recVUMI_ADDAy(VURegs *vuRegs, int info); -void recVUMI_ADDAz(VURegs *vuRegs, int info); -void recVUMI_ADDAw(VURegs *vuRegs, int info); -void recVUMI_SUB(VURegs *vuRegs, int info); -void recVUMI_SUBi(VURegs *vuRegs, int info); -void recVUMI_SUBq(VURegs *vuRegs, int info); -void recVUMI_SUBx(VURegs *vuRegs, int info); -void recVUMI_SUBy(VURegs *vuRegs, int info); -void recVUMI_SUBz(VURegs *vuRegs, int info); -void recVUMI_SUBw(VURegs *vuRegs, int info); -void recVUMI_SUBA(VURegs *vuRegs, int info); -void recVUMI_SUBAi(VURegs *vuRegs, int info); -void recVUMI_SUBAq(VURegs *vuRegs, int info); -void recVUMI_SUBAx(VURegs *vuRegs, int info); -void recVUMI_SUBAy(VURegs *vuRegs, int info); -void recVUMI_SUBAz(VURegs *vuRegs, int info); -void recVUMI_SUBAw(VURegs *vuRegs, int info); -void recVUMI_MUL(VURegs *vuRegs, int info); -void recVUMI_MULi(VURegs *vuRegs, int info); -void recVUMI_MULq(VURegs *vuRegs, int info); -void recVUMI_MULx(VURegs *vuRegs, int info); -void recVUMI_MULy(VURegs *vuRegs, int info); -void recVUMI_MULz(VURegs *vuRegs, int info); -void recVUMI_MULw(VURegs *vuRegs, int info); -void recVUMI_MULA(VURegs *vuRegs, int info); -void recVUMI_MULAi(VURegs *vuRegs, int info); -void recVUMI_MULAq(VURegs *vuRegs, int info); -void recVUMI_MULAx(VURegs *vuRegs, int info); -void recVUMI_MULAy(VURegs *vuRegs, int info); -void recVUMI_MULAz(VURegs *vuRegs, int info); -void recVUMI_MULAw(VURegs *vuRegs, int info); -void recVUMI_MADD(VURegs *vuRegs, int info); -void recVUMI_MADDi(VURegs *vuRegs, int info); -void recVUMI_MADDq(VURegs *vuRegs, int info); -void recVUMI_MADDx(VURegs *vuRegs, int info); -void recVUMI_MADDy(VURegs *vuRegs, int info); -void recVUMI_MADDz(VURegs *vuRegs, int info); -void recVUMI_MADDw(VURegs *vuRegs, int info); -void recVUMI_MADDA(VURegs *vuRegs, int info); -void recVUMI_MADDAi(VURegs *vuRegs, int info); -void recVUMI_MADDAq(VURegs *vuRegs, int info); -void recVUMI_MADDAx(VURegs *vuRegs, int info); -void recVUMI_MADDAy(VURegs *vuRegs, int info); -void recVUMI_MADDAz(VURegs *vuRegs, int info); -void recVUMI_MADDAw(VURegs *vuRegs, int info); -void recVUMI_MSUB(VURegs *vuRegs, int info); -void recVUMI_MSUBi(VURegs *vuRegs, int info); -void recVUMI_MSUBq(VURegs *vuRegs, int info); -void recVUMI_MSUBx(VURegs *vuRegs, int info); -void recVUMI_MSUBy(VURegs *vuRegs, int info); -void recVUMI_MSUBz(VURegs *vuRegs, int info); -void recVUMI_MSUBw(VURegs *vuRegs, int info); -void recVUMI_MSUBA(VURegs *vuRegs, int info); -void recVUMI_MSUBAi(VURegs *vuRegs, int info); -void recVUMI_MSUBAq(VURegs *vuRegs, int info); -void recVUMI_MSUBAx(VURegs *vuRegs, int info); -void recVUMI_MSUBAy(VURegs *vuRegs, int info); -void recVUMI_MSUBAz(VURegs *vuRegs, int info); -void recVUMI_MSUBAw(VURegs *vuRegs, int info); -void recVUMI_MAX(VURegs *vuRegs, int info); -void recVUMI_MAXi(VURegs *vuRegs, int info); -void recVUMI_MAXx(VURegs *vuRegs, int info); -void recVUMI_MAXy(VURegs *vuRegs, int info); -void recVUMI_MAXz(VURegs *vuRegs, int info); -void recVUMI_MAXw(VURegs *vuRegs, int info); -void recVUMI_MINI(VURegs *vuRegs, int info); -void recVUMI_MINIi(VURegs *vuRegs, int info); -void recVUMI_MINIx(VURegs *vuRegs, int info); -void recVUMI_MINIy(VURegs *vuRegs, int info); -void recVUMI_MINIz(VURegs *vuRegs, int info); -void recVUMI_MINIw(VURegs *vuRegs, int info); -void recVUMI_OPMULA(VURegs *vuRegs, int info); -void recVUMI_OPMSUB(VURegs *vuRegs, int info); -void recVUMI_NOP(VURegs *vuRegs, int info); -void recVUMI_FTOI0(VURegs *vuRegs, int info); -void recVUMI_FTOI4(VURegs *vuRegs, int info); -void recVUMI_FTOI12(VURegs *vuRegs, int info); -void recVUMI_FTOI15(VURegs *vuRegs, int info); -void recVUMI_ITOF0(VURegs *vuRegs, int info); -void recVUMI_ITOF4(VURegs *vuRegs, int info); -void recVUMI_ITOF12(VURegs *vuRegs, int info); -void recVUMI_ITOF15(VURegs *vuRegs, int info); -void recVUMI_CLIP(VURegs *vuRegs, int info); - -/***************************************** - VU Micromode Lower instructions -*****************************************/ - -void recVUMI_DIV(VURegs *vuRegs, int info); -void recVUMI_SQRT(VURegs *vuRegs, int info); -void recVUMI_RSQRT(VURegs *vuRegs, int info); -void recVUMI_IADD(VURegs *vuRegs, int info); -void recVUMI_IADDI(VURegs *vuRegs, int info); -void recVUMI_IADDIU(VURegs *vuRegs, int info); -void recVUMI_IAND(VURegs *vuRegs, int info); -void recVUMI_IOR(VURegs *vuRegs, int info); -void recVUMI_ISUB(VURegs *vuRegs, int info); -void recVUMI_ISUBIU(VURegs *vuRegs, int info); -void recVUMI_MOVE(VURegs *vuRegs, int info); -void recVUMI_MFIR(VURegs *vuRegs, int info); -void recVUMI_MTIR(VURegs *vuRegs, int info); -void recVUMI_MR32(VURegs *vuRegs, int info); -void recVUMI_LQ(VURegs *vuRegs, int info); -void recVUMI_LQD(VURegs *vuRegs, int info); -void recVUMI_LQI(VURegs *vuRegs, int info); -void recVUMI_SQ(VURegs *vuRegs, int info); -void recVUMI_SQD(VURegs *vuRegs, int info); -void recVUMI_SQI(VURegs *vuRegs, int info); -void recVUMI_ILW(VURegs *vuRegs, int info); -void recVUMI_ISW(VURegs *vuRegs, int info); -void recVUMI_ILWR(VURegs *vuRegs, int info); -void recVUMI_ISWR(VURegs *vuRegs, int info); -void recVUMI_LOI(VURegs *vuRegs, int info); -void recVUMI_RINIT(VURegs *vuRegs, int info); -void recVUMI_RGET(VURegs *vuRegs, int info); -void recVUMI_RNEXT(VURegs *vuRegs, int info); -void recVUMI_RXOR(VURegs *vuRegs, int info); -void recVUMI_WAITQ(VURegs *vuRegs, int info); -void recVUMI_FSAND(VURegs *vuRegs, int info); -void recVUMI_FSEQ(VURegs *vuRegs, int info); -void recVUMI_FSOR(VURegs *vuRegs, int info); -void recVUMI_FSSET(VURegs *vuRegs, int info); -void recVUMI_FMAND(VURegs *vuRegs, int info); -void recVUMI_FMEQ(VURegs *vuRegs, int info); -void recVUMI_FMOR(VURegs *vuRegs, int info); -void recVUMI_FCAND(VURegs *vuRegs, int info); -void recVUMI_FCEQ(VURegs *vuRegs, int info); -void recVUMI_FCOR(VURegs *vuRegs, int info); -void recVUMI_FCSET(VURegs *vuRegs, int info); -void recVUMI_FCGET(VURegs *vuRegs, int info); -void recVUMI_IBEQ(VURegs *vuRegs, int info); -void recVUMI_IBGEZ(VURegs *vuRegs, int info); -void recVUMI_IBGTZ(VURegs *vuRegs, int info); -void recVUMI_IBLTZ(VURegs *vuRegs, int info); -void recVUMI_IBLEZ(VURegs *vuRegs, int info); -void recVUMI_IBNE(VURegs *vuRegs, int info); -void recVUMI_B(VURegs *vuRegs, int info); -void recVUMI_BAL(VURegs *vuRegs, int info); -void recVUMI_JR(VURegs *vuRegs, int info); -void recVUMI_JALR(VURegs *vuRegs, int info); -void recVUMI_MFP(VURegs *vuRegs, int info); -void recVUMI_WAITP(VURegs *vuRegs, int info); -void recVUMI_ESADD(VURegs *vuRegs, int info); -void recVUMI_ERSADD(VURegs *vuRegs, int info); -void recVUMI_ELENG(VURegs *vuRegs, int info); -void recVUMI_ERLENG(VURegs *vuRegs, int info); -void recVUMI_EATANxy(VURegs *vuRegs, int info); -void recVUMI_EATANxz(VURegs *vuRegs, int info); -void recVUMI_ESUM(VURegs *vuRegs, int info); -void recVUMI_ERCPR(VURegs *vuRegs, int info); -void recVUMI_ESQRT(VURegs *vuRegs, int info); -void recVUMI_ERSQRT(VURegs *vuRegs, int info); -void recVUMI_ESIN(VURegs *vuRegs, int info); -void recVUMI_EATAN(VURegs *vuRegs, int info); -void recVUMI_EEXP(VURegs *vuRegs, int info); -void recVUMI_XGKICK(VURegs *vuRegs, int info); -void recVUMI_XTOP(VURegs *vuRegs, int info); -void recVUMI_XITOP(VURegs *vuRegs, int info); -void recVUMI_XTOP( VURegs *VU , int info); - -#endif /* __IVUMICRO_H__ */ +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2003 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#pragma once + +#include "VUmicro.h" + +extern u32 vudump; + +#define VU0_MEMSIZE 0x1000 +#define VU1_MEMSIZE 0x4000 + +void recResetVU0(); +void recExecuteVU0Block(); +void recClearVU0( u32 Addr, u32 Size ); + +void recVU1Init(); +void recVU1Shutdown(); +void recResetVU1(); +void recExecuteVU1Block(); +void recClearVU1( u32 Addr, u32 Size ); + + +u32 GetVIAddr(VURegs * VU, int reg, int read, int info); // returns the correct VI addr +void recUpdateFlags(VURegs * VU, int reg, int info); + +void _recvuTestPipes(VURegs * VU); +void _recvuFlushFDIV(VURegs * VU); +void _recvuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn); +void _recvuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn); +void _recvuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn); +void _recvuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn); + +#define VUOP_READ 2 +#define VUOP_WRITE 4 + +// save on mem +struct _vuopinfo { + int cycle; + int cycles; + u8 statusflag; + u8 macflag; + u8 clipflag; + u8 dummy; + u8 q; + u8 p; + u16 pqinst; // bit of instruction specifying index (srec only) +}; + +void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs); +int eeVURecompileCode(VURegs *VU, _VURegsNum* regs); // allocates all the necessary regs and returns the indices +void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK + +extern int vucycle; +typedef void (*vFloat)(int regd, int regTemp); +extern vFloat vFloats1[16]; +extern vFloat vFloats1_useEAX[16]; +extern vFloat vFloats2[16]; +extern vFloat vFloats4[16]; +extern vFloat vFloats4_useEAX[16]; +PCSX2_ALIGNED16_EXTERN(const float s_fones[8]); +PCSX2_ALIGNED16_EXTERN(const u32 s_mask[4]); +PCSX2_ALIGNED16_EXTERN(const u32 s_expmask[4]); +PCSX2_ALIGNED16_EXTERN(const u32 g_minvals[4]); +PCSX2_ALIGNED16_EXTERN(const u32 g_maxvals[4]); +PCSX2_ALIGNED16_EXTERN(const u32 const_clip[8]); + +u32 GetVIAddr(VURegs * VU, int reg, int read, int info); +int _vuGetTempXMMreg(int info); +void vuFloat(int info, int regd, int XYZW); +void vuFloat_useEAX(int regd, int regTemp, int XYZW); +void vuFloat2(int regd, int regTemp, int XYZW); +void vuFloat3(uptr x86ptr); +void vuFloat4(int regd, int regTemp, int XYZW); +void vuFloat4_useEAX(int regd, int regTemp, int XYZW); +void vuFloat5(int regd, int regTemp, int XYZW); +void vuFloat5_useEAX(int regd, int regTemp, int XYZW); +void _vuFlipRegSS(VURegs * VU, int reg); +void _vuFlipRegSS_xyzw(int reg, int xyzw); +void _vuMoveSS(VURegs * VU, int dstreg, int srcreg); +void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw); +void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw); +void VU_MERGE_REGS_CUSTOM(int dest, int src, int xyzw); +void VU_MERGE_REGS_SAFE(int dest, int src, int xyzw); +#define VU_MERGE_REGS(dest, src) { \ + VU_MERGE_REGS_CUSTOM(dest, src, _X_Y_Z_W); \ +} + +// use for allocating vi regs +#define ALLOCTEMPX86(mode) _allocX86reg(-1, X86TYPE_TEMP, 0, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode) +#define ALLOCVI(vi, mode) _allocX86reg(-1, X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), vi, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode) +#define ADD_VI_NEEDED(vi) _addNeededX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), vi); + +#define SWAP(x, y) *(u32*)&y ^= *(u32*)&x ^= *(u32*)&y ^= *(u32*)&x; + +/***************************************** + VU Micromode Upper instructions +*****************************************/ + +void recVUMI_ABS(VURegs *vuRegs, int info); +void recVUMI_ADD(VURegs *vuRegs, int info); +void recVUMI_ADDi(VURegs *vuRegs, int info); +void recVUMI_ADDq(VURegs *vuRegs, int info); +void recVUMI_ADDx(VURegs *vuRegs, int info); +void recVUMI_ADDy(VURegs *vuRegs, int info); +void recVUMI_ADDz(VURegs *vuRegs, int info); +void recVUMI_ADDw(VURegs *vuRegs, int info); +void recVUMI_ADDA(VURegs *vuRegs, int info); +void recVUMI_ADDAi(VURegs *vuRegs, int info); +void recVUMI_ADDAq(VURegs *vuRegs, int info); +void recVUMI_ADDAx(VURegs *vuRegs, int info); +void recVUMI_ADDAy(VURegs *vuRegs, int info); +void recVUMI_ADDAz(VURegs *vuRegs, int info); +void recVUMI_ADDAw(VURegs *vuRegs, int info); +void recVUMI_SUB(VURegs *vuRegs, int info); +void recVUMI_SUBi(VURegs *vuRegs, int info); +void recVUMI_SUBq(VURegs *vuRegs, int info); +void recVUMI_SUBx(VURegs *vuRegs, int info); +void recVUMI_SUBy(VURegs *vuRegs, int info); +void recVUMI_SUBz(VURegs *vuRegs, int info); +void recVUMI_SUBw(VURegs *vuRegs, int info); +void recVUMI_SUBA(VURegs *vuRegs, int info); +void recVUMI_SUBAi(VURegs *vuRegs, int info); +void recVUMI_SUBAq(VURegs *vuRegs, int info); +void recVUMI_SUBAx(VURegs *vuRegs, int info); +void recVUMI_SUBAy(VURegs *vuRegs, int info); +void recVUMI_SUBAz(VURegs *vuRegs, int info); +void recVUMI_SUBAw(VURegs *vuRegs, int info); +void recVUMI_MUL(VURegs *vuRegs, int info); +void recVUMI_MULi(VURegs *vuRegs, int info); +void recVUMI_MULq(VURegs *vuRegs, int info); +void recVUMI_MULx(VURegs *vuRegs, int info); +void recVUMI_MULy(VURegs *vuRegs, int info); +void recVUMI_MULz(VURegs *vuRegs, int info); +void recVUMI_MULw(VURegs *vuRegs, int info); +void recVUMI_MULA(VURegs *vuRegs, int info); +void recVUMI_MULAi(VURegs *vuRegs, int info); +void recVUMI_MULAq(VURegs *vuRegs, int info); +void recVUMI_MULAx(VURegs *vuRegs, int info); +void recVUMI_MULAy(VURegs *vuRegs, int info); +void recVUMI_MULAz(VURegs *vuRegs, int info); +void recVUMI_MULAw(VURegs *vuRegs, int info); +void recVUMI_MADD(VURegs *vuRegs, int info); +void recVUMI_MADDi(VURegs *vuRegs, int info); +void recVUMI_MADDq(VURegs *vuRegs, int info); +void recVUMI_MADDx(VURegs *vuRegs, int info); +void recVUMI_MADDy(VURegs *vuRegs, int info); +void recVUMI_MADDz(VURegs *vuRegs, int info); +void recVUMI_MADDw(VURegs *vuRegs, int info); +void recVUMI_MADDA(VURegs *vuRegs, int info); +void recVUMI_MADDAi(VURegs *vuRegs, int info); +void recVUMI_MADDAq(VURegs *vuRegs, int info); +void recVUMI_MADDAx(VURegs *vuRegs, int info); +void recVUMI_MADDAy(VURegs *vuRegs, int info); +void recVUMI_MADDAz(VURegs *vuRegs, int info); +void recVUMI_MADDAw(VURegs *vuRegs, int info); +void recVUMI_MSUB(VURegs *vuRegs, int info); +void recVUMI_MSUBi(VURegs *vuRegs, int info); +void recVUMI_MSUBq(VURegs *vuRegs, int info); +void recVUMI_MSUBx(VURegs *vuRegs, int info); +void recVUMI_MSUBy(VURegs *vuRegs, int info); +void recVUMI_MSUBz(VURegs *vuRegs, int info); +void recVUMI_MSUBw(VURegs *vuRegs, int info); +void recVUMI_MSUBA(VURegs *vuRegs, int info); +void recVUMI_MSUBAi(VURegs *vuRegs, int info); +void recVUMI_MSUBAq(VURegs *vuRegs, int info); +void recVUMI_MSUBAx(VURegs *vuRegs, int info); +void recVUMI_MSUBAy(VURegs *vuRegs, int info); +void recVUMI_MSUBAz(VURegs *vuRegs, int info); +void recVUMI_MSUBAw(VURegs *vuRegs, int info); +void recVUMI_MAX(VURegs *vuRegs, int info); +void recVUMI_MAXi(VURegs *vuRegs, int info); +void recVUMI_MAXx(VURegs *vuRegs, int info); +void recVUMI_MAXy(VURegs *vuRegs, int info); +void recVUMI_MAXz(VURegs *vuRegs, int info); +void recVUMI_MAXw(VURegs *vuRegs, int info); +void recVUMI_MINI(VURegs *vuRegs, int info); +void recVUMI_MINIi(VURegs *vuRegs, int info); +void recVUMI_MINIx(VURegs *vuRegs, int info); +void recVUMI_MINIy(VURegs *vuRegs, int info); +void recVUMI_MINIz(VURegs *vuRegs, int info); +void recVUMI_MINIw(VURegs *vuRegs, int info); +void recVUMI_OPMULA(VURegs *vuRegs, int info); +void recVUMI_OPMSUB(VURegs *vuRegs, int info); +void recVUMI_NOP(VURegs *vuRegs, int info); +void recVUMI_FTOI0(VURegs *vuRegs, int info); +void recVUMI_FTOI4(VURegs *vuRegs, int info); +void recVUMI_FTOI12(VURegs *vuRegs, int info); +void recVUMI_FTOI15(VURegs *vuRegs, int info); +void recVUMI_ITOF0(VURegs *vuRegs, int info); +void recVUMI_ITOF4(VURegs *vuRegs, int info); +void recVUMI_ITOF12(VURegs *vuRegs, int info); +void recVUMI_ITOF15(VURegs *vuRegs, int info); +void recVUMI_CLIP(VURegs *vuRegs, int info); + +/***************************************** + VU Micromode Lower instructions +*****************************************/ + +void recVUMI_DIV(VURegs *vuRegs, int info); +void recVUMI_SQRT(VURegs *vuRegs, int info); +void recVUMI_RSQRT(VURegs *vuRegs, int info); +void recVUMI_IADD(VURegs *vuRegs, int info); +void recVUMI_IADDI(VURegs *vuRegs, int info); +void recVUMI_IADDIU(VURegs *vuRegs, int info); +void recVUMI_IAND(VURegs *vuRegs, int info); +void recVUMI_IOR(VURegs *vuRegs, int info); +void recVUMI_ISUB(VURegs *vuRegs, int info); +void recVUMI_ISUBIU(VURegs *vuRegs, int info); +void recVUMI_MOVE(VURegs *vuRegs, int info); +void recVUMI_MFIR(VURegs *vuRegs, int info); +void recVUMI_MTIR(VURegs *vuRegs, int info); +void recVUMI_MR32(VURegs *vuRegs, int info); +void recVUMI_LQ(VURegs *vuRegs, int info); +void recVUMI_LQD(VURegs *vuRegs, int info); +void recVUMI_LQI(VURegs *vuRegs, int info); +void recVUMI_SQ(VURegs *vuRegs, int info); +void recVUMI_SQD(VURegs *vuRegs, int info); +void recVUMI_SQI(VURegs *vuRegs, int info); +void recVUMI_ILW(VURegs *vuRegs, int info); +void recVUMI_ISW(VURegs *vuRegs, int info); +void recVUMI_ILWR(VURegs *vuRegs, int info); +void recVUMI_ISWR(VURegs *vuRegs, int info); +void recVUMI_LOI(VURegs *vuRegs, int info); +void recVUMI_RINIT(VURegs *vuRegs, int info); +void recVUMI_RGET(VURegs *vuRegs, int info); +void recVUMI_RNEXT(VURegs *vuRegs, int info); +void recVUMI_RXOR(VURegs *vuRegs, int info); +void recVUMI_WAITQ(VURegs *vuRegs, int info); +void recVUMI_FSAND(VURegs *vuRegs, int info); +void recVUMI_FSEQ(VURegs *vuRegs, int info); +void recVUMI_FSOR(VURegs *vuRegs, int info); +void recVUMI_FSSET(VURegs *vuRegs, int info); +void recVUMI_FMAND(VURegs *vuRegs, int info); +void recVUMI_FMEQ(VURegs *vuRegs, int info); +void recVUMI_FMOR(VURegs *vuRegs, int info); +void recVUMI_FCAND(VURegs *vuRegs, int info); +void recVUMI_FCEQ(VURegs *vuRegs, int info); +void recVUMI_FCOR(VURegs *vuRegs, int info); +void recVUMI_FCSET(VURegs *vuRegs, int info); +void recVUMI_FCGET(VURegs *vuRegs, int info); +void recVUMI_IBEQ(VURegs *vuRegs, int info); +void recVUMI_IBGEZ(VURegs *vuRegs, int info); +void recVUMI_IBGTZ(VURegs *vuRegs, int info); +void recVUMI_IBLTZ(VURegs *vuRegs, int info); +void recVUMI_IBLEZ(VURegs *vuRegs, int info); +void recVUMI_IBNE(VURegs *vuRegs, int info); +void recVUMI_B(VURegs *vuRegs, int info); +void recVUMI_BAL(VURegs *vuRegs, int info); +void recVUMI_JR(VURegs *vuRegs, int info); +void recVUMI_JALR(VURegs *vuRegs, int info); +void recVUMI_MFP(VURegs *vuRegs, int info); +void recVUMI_WAITP(VURegs *vuRegs, int info); +void recVUMI_ESADD(VURegs *vuRegs, int info); +void recVUMI_ERSADD(VURegs *vuRegs, int info); +void recVUMI_ELENG(VURegs *vuRegs, int info); +void recVUMI_ERLENG(VURegs *vuRegs, int info); +void recVUMI_EATANxy(VURegs *vuRegs, int info); +void recVUMI_EATANxz(VURegs *vuRegs, int info); +void recVUMI_ESUM(VURegs *vuRegs, int info); +void recVUMI_ERCPR(VURegs *vuRegs, int info); +void recVUMI_ESQRT(VURegs *vuRegs, int info); +void recVUMI_ERSQRT(VURegs *vuRegs, int info); +void recVUMI_ESIN(VURegs *vuRegs, int info); +void recVUMI_EATAN(VURegs *vuRegs, int info); +void recVUMI_EEXP(VURegs *vuRegs, int info); +void recVUMI_XGKICK(VURegs *vuRegs, int info); +void recVUMI_XTOP(VURegs *vuRegs, int info); +void recVUMI_XITOP(VURegs *vuRegs, int info); +void recVUMI_XTOP( VURegs *VU , int info); + diff --git a/pcsx2/x86/iVUmicroUpper.cpp b/pcsx2/x86/sVU_Upper.cpp similarity index 97% rename from pcsx2/x86/iVUmicroUpper.cpp rename to pcsx2/x86/sVU_Upper.cpp index 61393c43fe..6e06ccdf08 100644 --- a/pcsx2/x86/iVUmicroUpper.cpp +++ b/pcsx2/x86/sVU_Upper.cpp @@ -1,3070 +1,3070 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include "PrecompiledHeader.h" - -#include "Common.h" -#include "GS.h" -#include "R5900OpcodeTables.h" -#include "iR5900.h" -#include "iMMI.h" -#include "iFPU.h" -#include "iCOP0.h" -#include "VUmicro.h" -#include "VUflags.h" -#include "iVUmicro.h" -#include "iVUops.h" -#include "iVUzerorec.h" -//------------------------------------------------------------------ -#define MINMAXFIX 1 -//------------------------------------------------------------------ -// Helper Macros -//------------------------------------------------------------------ -#define _Ft_ (( VU->code >> 16) & 0x1F) // The rt part of the instruction register -#define _Fs_ (( VU->code >> 11) & 0x1F) // The rd part of the instruction register -#define _Fd_ (( VU->code >> 6) & 0x1F) // The sa part of the instruction register - -#define _X (( VU->code>>24) & 0x1) -#define _Y (( VU->code>>23) & 0x1) -#define _Z (( VU->code>>22) & 0x1) -#define _W (( VU->code>>21) & 0x1) - -#define _XYZW_SS (_X+_Y+_Z+_W==1) - -#define _Fsf_ (( VU->code >> 21) & 0x03) -#define _Ftf_ (( VU->code >> 23) & 0x03) - -#define _Imm11_ (s32)(VU->code & 0x400 ? 0xfffffc00 | (VU->code & 0x3ff) : VU->code & 0x3ff) -#define _UImm11_ (s32)(VU->code & 0x7ff) - -#define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0] -#define VU_VFy_ADDR(x) (uptr)&VU->VF[x].UL[1] -#define VU_VFz_ADDR(x) (uptr)&VU->VF[x].UL[2] -#define VU_VFw_ADDR(x) (uptr)&VU->VF[x].UL[3] - -#define VU_REGR_ADDR (uptr)&VU->VI[REG_R] -#define VU_REGQ_ADDR (uptr)&VU->VI[REG_Q] -#define VU_REGMAC_ADDR (uptr)&VU->VI[REG_MAC_FLAG] - -#define VU_VI_ADDR(x, read) GetVIAddr(VU, x, read, info) - -#define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0] -#define VU_ACCy_ADDR (uptr)&VU->ACC.UL[1] -#define VU_ACCz_ADDR (uptr)&VU->ACC.UL[2] -#define VU_ACCw_ADDR (uptr)&VU->ACC.UL[3] - -#define _X_Y_Z_W ((( VU->code >> 21 ) & 0xF ) ) -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// Global Variables -//------------------------------------------------------------------ -static const PCSX2_ALIGNED16(int SSEmovMask[ 16 ][ 4 ]) = -{ - { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, - { 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF }, - { 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 }, - { 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF }, - { 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 }, - { 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF }, - { 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 }, - { 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }, - { 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 }, - { 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF }, - { 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 }, - { 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF }, - { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 }, - { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF }, - { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 }, - { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF } -}; - -static const PCSX2_ALIGNED16(u32 const_abs_table[16][4]) = -{ - { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }, //0000 - { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff }, //0001 - { 0xffffffff, 0xffffffff, 0x7fffffff, 0xffffffff }, //0010 - { 0xffffffff, 0xffffffff, 0x7fffffff, 0x7fffffff }, //0011 - { 0xffffffff, 0x7fffffff, 0xffffffff, 0xffffffff }, //0100 - { 0xffffffff, 0x7fffffff, 0xffffffff, 0x7fffffff }, //0101 - { 0xffffffff, 0x7fffffff, 0x7fffffff, 0xffffffff }, //0110 - { 0xffffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //0111 - { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }, //1000 - { 0x7fffffff, 0xffffffff, 0xffffffff, 0x7fffffff }, //1001 - { 0x7fffffff, 0xffffffff, 0x7fffffff, 0xffffffff }, //1010 - { 0x7fffffff, 0xffffffff, 0x7fffffff, 0x7fffffff }, //1011 - { 0x7fffffff, 0x7fffffff, 0xffffffff, 0xffffffff }, //1100 - { 0x7fffffff, 0x7fffffff, 0xffffffff, 0x7fffffff }, //1101 - { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0xffffffff }, //1110 - { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1111 -}; - -static const PCSX2_ALIGNED16(float recMult_float_to_int4[4]) = { 16.0, 16.0, 16.0, 16.0 }; -static const PCSX2_ALIGNED16(float recMult_float_to_int12[4]) = { 4096.0, 4096.0, 4096.0, 4096.0 }; -static const PCSX2_ALIGNED16(float recMult_float_to_int15[4]) = { 32768.0, 32768.0, 32768.0, 32768.0 }; - -static const PCSX2_ALIGNED16(float recMult_int_to_float4[4]) = { 0.0625f, 0.0625f, 0.0625f, 0.0625f }; -static const PCSX2_ALIGNED16(float recMult_int_to_float12[4]) = { 0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625 }; -static const PCSX2_ALIGNED16(float recMult_int_to_float15[4]) = { 0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125 }; - -static const PCSX2_ALIGNED16(u32 VU_Underflow_Mask1[4]) = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; -static const PCSX2_ALIGNED16(u32 VU_Underflow_Mask2[4]) = {0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff}; -static const PCSX2_ALIGNED16(u32 VU_Zero_Mask[4]) = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; -static const PCSX2_ALIGNED16(u32 VU_Zero_Helper_Mask[4]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; -static const PCSX2_ALIGNED16(u32 VU_Signed_Zero_Mask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; -static const PCSX2_ALIGNED16(u32 VU_Pos_Infinity[4]) = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; -static const PCSX2_ALIGNED16(u32 VU_Neg_Infinity[4]) = {0xff800000, 0xff800000, 0xff800000, 0xff800000}; -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// recUpdateFlags() - Computes the flags for the Upper Opcodes -// -// Note: Computes under/overflow flags if CHECK_VU_EXTRA_FLAGS is 1 -//------------------------------------------------------------------ -PCSX2_ALIGNED16(u64 TEMPXMMData[2]); -void recUpdateFlags(VURegs * VU, int reg, int info) -{ - static u8 *pjmp, *pjmp2; - static u32 *pjmp32; - static u32 macaddr, stataddr, prevstataddr; - static int x86macflag, x86statflag, x86temp; - static int t1reg, t1regBoolean; - static const int flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; - - if( !(info & PROCESS_VU_UPDATEFLAGS) ) { - if (CHECK_VU_EXTRA_OVERFLOW) { - if (reg != EEREC_TEMP) vuFloat2(reg, EEREC_TEMP, _X_Y_Z_W); - else vuFloat_useEAX(info, reg, _X_Y_Z_W); - } - return; - } - - //Console::WriteLn ("recUpdateFlags"); - - macaddr = VU_VI_ADDR(REG_MAC_FLAG, 0); - stataddr = VU_VI_ADDR(REG_STATUS_FLAG, 0); // write address - prevstataddr = VU_VI_ADDR(REG_STATUS_FLAG, 2); // previous address - - if( stataddr == 0 ) stataddr = prevstataddr; - if( macaddr == 0 ) { - Console::WriteLn( "VU ALLOCATION WARNING: Using Mac Flag Previous Address!" ); - macaddr = VU_VI_ADDR(REG_MAC_FLAG, 2); - } - - x86macflag = ALLOCTEMPX86(0); - x86statflag = ALLOCTEMPX86(0); - - if (reg == EEREC_TEMP) { - t1reg = _vuGetTempXMMreg(info); - if (t1reg < 0) { - //Console::WriteLn( "VU ALLOCATION ERROR: Temp reg can't be allocated!!!!" ); - t1reg = (reg == 0) ? 1 : 0; // Make t1reg != reg - SSE_MOVAPS_XMM_to_M128( (uptr)TEMPXMMData, t1reg ); // Backup data to temp address - t1regBoolean = 1; - } - else t1regBoolean = 0; - } - else { - t1reg = EEREC_TEMP; - t1regBoolean = 2; - } - - SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw - MOV32MtoR(x86statflag, prevstataddr); // Load the previous status in to x86statflag - AND16ItoR(x86statflag, 0xff0); // Keep Sticky and D/I flags - - - if (CHECK_VU_EXTRA_FLAGS) { // Checks all flags - - x86temp = ALLOCTEMPX86(0); - - //-------------------------Check for Overflow flags------------------------------ - - //SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg - //SSE_CMPUNORDPS_XMM_to_XMM(t1reg, reg); // If reg == NaN then set Vector to 0xFFFFFFFF - - //SSE_MOVAPS_XMM_to_XMM(t1reg, reg); - //SSE_MINPS_M128_to_XMM(t1reg, (uptr)g_maxvals); - //SSE_MAXPS_M128_to_XMM(t1reg, (uptr)g_minvals); - //SSE_CMPNEPS_XMM_to_XMM(t1reg, reg); // If they're not equal, then overflow has occured - - SSE_MOVAPS_XMM_to_XMM(t1reg, reg); - SSE_ANDPS_M128_to_XMM(t1reg, (uptr)VU_Zero_Helper_Mask); - SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)VU_Pos_Infinity); // If infinity, then overflow has occured (NaN's don't report as overflow) - - SSE_MOVMSKPS_XMM_to_R32(x86macflag, t1reg); // Move the sign bits of the previous calculation - - AND16ItoR(x86macflag, _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified) - pjmp = JZ8(0); // Skip if none are - OR16ItoR(x86statflag, 0x208); // OS, O flags - SHL16ItoR(x86macflag, 12); - if (_XYZW_SS) pjmp32 = JMP32(0); // Skip Underflow Check - x86SetJ8(pjmp); - - //-------------------------Check for Underflow flags------------------------------ - - SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg - - SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask1[ 0 ]); - SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg == zero exponent) then set Vector to 0xFFFFFFFF - - SSE_ANDPS_XMM_to_XMM(t1reg, reg); - SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask2[ 0 ]); - SSE_CMPNEPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg != zero mantisa) then set Vector to 0xFFFFFFFF - - SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation - - AND16ItoR(EAX, _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation - pjmp = JZ8(0); // Skip if none are - OR16ItoR(x86statflag, 0x104); // US, U flags - SHL16ItoR(EAX, 8); - OR32RtoR(x86macflag, EAX); - x86SetJ8(pjmp); - - //-------------------------Optional Code: Denormals Are Zero------------------------------ - if (CHECK_VU_UNDERFLOW) { // Sets underflow/denormals to zero - SSE_ANDNPS_XMM_to_XMM(t1reg, reg); // t1reg = !t1reg & reg (t1reg = denormals are positive zero) - VU_MERGE_REGS_SAFE(t1reg, reg, (15 - flipMask[_X_Y_Z_W])); // Send t1reg the vectors that shouldn't be modified (since reg was flipped, we need a mask to get the unmodified vectors) - // Now we have Denormals are Positive Zero in t1reg; the next two lines take Signed Zero into account - SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]); // Only keep the sign bit for each vector - SSE_ORPS_XMM_to_XMM(reg, t1reg); // Denormals are Signed Zero, and unmodified vectors stay the same! - } - - if (_XYZW_SS) x86SetJ32(pjmp32); // If we skipped the Underflow Flag Checking (when we had an Overflow), return here - - vuFloat2(reg, t1reg, flipMask[_X_Y_Z_W]); // Clamp overflowed vectors that were modified (remember reg's vectors have been flipped, so have to use a flipmask) - - //-------------------------Check for Signed flags------------------------------ - - // The following code makes sure the Signed Bit isn't set with Negative Zero - SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg - SSE_CMPEQPS_XMM_to_XMM(t1reg, reg); // Set all F's if each vector is zero - SSE_MOVMSKPS_XMM_to_R32(x86temp, t1reg); // Used for Zero Flag Calculation - SSE_ANDNPS_XMM_to_XMM(t1reg, reg); - - SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the t1reg - - AND16ItoR(EAX, _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation - pjmp = JZ8(0); // Skip if none are - OR16ItoR(x86statflag, 0x82); // SS, S flags - SHL16ItoR(EAX, 4); - OR32RtoR(x86macflag, EAX); - if (_XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking - x86SetJ8(pjmp); - - //-------------------------Check for Zero flags------------------------------ - - AND16ItoR(x86temp, _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation - pjmp = JZ8(0); // Skip if none are - OR16ItoR(x86statflag, 0x41); // ZS, Z flags - OR32RtoR(x86macflag, x86temp); - x86SetJ8(pjmp); - - _freeX86reg(x86temp); - } - else { // Only Checks for Sign and Zero Flags - - vuFloat2(reg, t1reg, flipMask[_X_Y_Z_W]); // Clamp overflowed vectors that were modified (remember reg's vectors have been flipped, so have to use a flipmask) - - //-------------------------Check for Signed flags------------------------------ - - // The following code makes sure the Signed Bit isn't set with Negative Zero - SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg - SSE_CMPEQPS_XMM_to_XMM(t1reg, reg); // Set all F's if each vector is zero - SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Used for Zero Flag Calculation - SSE_ANDNPS_XMM_to_XMM(t1reg, reg); - - SSE_MOVMSKPS_XMM_to_R32(x86macflag, t1reg); // Move the sign bits of the t1reg - - AND16ItoR(x86macflag, _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation - pjmp = JZ8(0); // Skip if none are - OR16ItoR(x86statflag, 0x82); // SS, S flags - SHL16ItoR(x86macflag, 4); - if (_XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking - x86SetJ8(pjmp); - - //-------------------------Check for Zero flags------------------------------ - - AND16ItoR(EAX, _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation - pjmp = JZ8(0); // Skip if none are - OR16ItoR(x86statflag, 0x41); // ZS, Z flags - OR32RtoR(x86macflag, EAX); - x86SetJ8(pjmp); - } - //-------------------------Finally: Send the Flags to the Mac Flag Address------------------------------ - - if (_XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here - - if (t1regBoolean == 2) SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip back reg to wzyx (have to do this because reg != EEREC_TEMP) - else if (t1regBoolean == 1) SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)TEMPXMMData ); // Restore data from temo address - else _freeXMMreg(t1reg); // Free temp reg - - MOV16RtoM(macaddr, x86macflag); - MOV16RtoM(stataddr, x86statflag); - - _freeX86reg(x86macflag); - _freeX86reg(x86statflag); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// Custom VU ADD/SUB routines by Nneeve -// -// Note: See FPU_ADD_SUB() for more info on what this is doing. -//------------------------------------------------------------------ -static PCSX2_ALIGNED16(u32 VU_addsuband[2][4]); -static PCSX2_ALIGNED16(u32 VU_addsub_reg[2][4]); - -static u32 tempECX; - -void VU_ADD_SUB(u32 regd, u32 regt, int is_sub, int info) -{ - u8 *localptr[4][8]; - - MOV32RtoM((uptr)&tempECX, ECX); - - int temp1 = ECX; //receives regd - int temp2 = ALLOCTEMPX86(0); - - if (temp2 == ECX) - { - temp2 = ALLOCTEMPX86(0); - _freeX86reg(ECX); - } - - SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[0][0], regd); - SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[1][0], regt); - - SSE2_PCMPEQB_XMM_to_XMM(regd, regd); - SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsuband[0][0], regd); - SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsuband[1][0], regd); - SSE_MOVAPS_M128_to_XMM(regd, (uptr)&VU_addsub_reg[0][0]); - - SSE2_PSLLD_I8_to_XMM(regd, 1); - SSE2_PSLLD_I8_to_XMM(regt, 1); - - SSE2_PSRLD_I8_to_XMM(regd, 24); - SSE2_PSRLD_I8_to_XMM(regt, 24); - - SSE2_PSUBD_XMM_to_XMM(regd, regt); - -#define PERFORM(i) \ - \ - SSE_PEXTRW_XMM_to_R32(temp1, regd, i*2); \ - MOVSX32R16toR(temp1, temp1); \ - CMP32ItoR(temp1, 25);\ - localptr[i][0] = JGE8(0);\ - CMP32ItoR(temp1, 0);\ - localptr[i][1] = JG8(0);\ - localptr[i][2] = JE8(0);\ - CMP32ItoR(temp1, -25);\ - localptr[i][3] = JLE8(0);\ - \ - NEG32R(temp1); \ - DEC32R(temp1);\ - MOV32ItoR(temp2, 0xffffffff); \ - SHL32CLtoR(temp2); \ - MOV32RtoM((uptr)&VU_addsuband[0][i], temp2);\ - localptr[i][4] = JMP8(0);\ - \ - x86SetJ8(localptr[i][0]);\ - MOV32ItoM((uptr)&VU_addsuband[1][i], 0x80000000);\ - localptr[i][5] = JMP8(0);\ - \ - x86SetJ8(localptr[i][1]);\ - DEC32R(temp1);\ - MOV32ItoR(temp2, 0xffffffff);\ - SHL32CLtoR(temp2); \ - MOV32RtoM((uptr)&VU_addsuband[1][i], temp2);\ - localptr[i][6] = JMP8(0);\ - \ - x86SetJ8(localptr[i][3]);\ - MOV32ItoM((uptr)&VU_addsuband[0][i], 0x80000000);\ - localptr[i][7] = JMP8(0);\ - \ - x86SetJ8(localptr[i][2]);\ - \ - x86SetJ8(localptr[i][4]);\ - x86SetJ8(localptr[i][5]);\ - x86SetJ8(localptr[i][6]);\ - x86SetJ8(localptr[i][7]); - - PERFORM(0); - PERFORM(1); - PERFORM(2); - PERFORM(3); -#undef PERFORM - - SSE_MOVAPS_M128_to_XMM(regd, (uptr)&VU_addsub_reg[0][0]); - SSE_MOVAPS_M128_to_XMM(regt, (uptr)&VU_addsub_reg[1][0]); - - SSE_ANDPS_M128_to_XMM(regd, (uptr)&VU_addsuband[0][0]); - SSE_ANDPS_M128_to_XMM(regt, (uptr)&VU_addsuband[1][0]); - - if (is_sub) SSE_SUBPS_XMM_to_XMM(regd, regt); - else SSE_ADDPS_XMM_to_XMM(regd, regt); - - SSE_MOVAPS_M128_to_XMM(regt, (uptr)&VU_addsub_reg[1][0]); - - _freeX86reg(temp2); - - MOV32MtoR(ECX, (uptr)&tempECX); -} - -void VU_ADD_SUB_SS(u32 regd, u32 regt, int is_sub, int is_mem, int info) -{ - u8 *localptr[8]; - u32 addrt = regt; //for case is_mem - - MOV32RtoM((uptr)&tempECX, ECX); - - int temp1 = ECX; //receives regd - int temp2 = ALLOCTEMPX86(0); - - if (temp2 == ECX) - { - temp2 = ALLOCTEMPX86(0); - _freeX86reg(ECX); - } - - SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[0][0], regd); - if (!is_mem) SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[1][0], regt); - - SSE2_MOVD_XMM_to_R(temp1, regd); - SHR32ItoR(temp1, 23); - - if (is_mem) { - MOV32MtoR(temp2, addrt); - MOV32RtoM((uptr)&VU_addsub_reg[1][0], temp2); - SHR32ItoR(temp2, 23); - } - else { - SSE2_MOVD_XMM_to_R(temp2, regt); - SHR32ItoR(temp2, 23); - } - - AND32ItoR(temp1, 0xff); - AND32ItoR(temp2, 0xff); - - SUB32RtoR(temp1, temp2); //temp1 = exponent difference - - CMP32ItoR(temp1, 25); - localptr[0] = JGE8(0); - CMP32ItoR(temp1, 0); - localptr[1] = JG8(0); - localptr[2] = JE8(0); - CMP32ItoR(temp1, -25); - localptr[3] = JLE8(0); - - NEG32R(temp1); - DEC32R(temp1); - MOV32ItoR(temp2, 0xffffffff); - SHL32CLtoR(temp2); - SSE2_PCMPEQB_XMM_to_XMM(regd, regd); - if (is_mem) { - SSE_PINSRW_R32_to_XMM(regd, temp2, 0); - SHR32ItoR(temp2, 16); - SSE_PINSRW_R32_to_XMM(regd, temp2, 1); - } - else { - SSE2_MOVD_R_to_XMM(regt, temp2); - SSE_MOVSS_XMM_to_XMM(regd, regt); - SSE2_PCMPEQB_XMM_to_XMM(regt, regt); - } - localptr[4] = JMP8(0); - - x86SetJ8(localptr[0]); - MOV32ItoR(temp2, 0x80000000); - if (is_mem) - AND32RtoM((uptr)&VU_addsub_reg[1][0], temp2); - else { - SSE2_PCMPEQB_XMM_to_XMM(regt, regt); - SSE2_MOVD_R_to_XMM(regd, temp2); - SSE_MOVSS_XMM_to_XMM(regt, regd); - } - SSE2_PCMPEQB_XMM_to_XMM(regd, regd); - localptr[5] = JMP8(0); - - x86SetJ8(localptr[1]); - DEC32R(temp1); - MOV32ItoR(temp2, 0xffffffff); - SHL32CLtoR(temp2); - if (is_mem) - AND32RtoM((uptr)&VU_addsub_reg[1][0], temp2); - else { - SSE2_PCMPEQB_XMM_to_XMM(regt, regt); - SSE2_MOVD_R_to_XMM(regd, temp2); - SSE_MOVSS_XMM_to_XMM(regt, regd); - } - SSE2_PCMPEQB_XMM_to_XMM(regd, regd); - localptr[6] = JMP8(0); - - x86SetJ8(localptr[3]); - MOV32ItoR(temp2, 0x80000000); - SSE2_PCMPEQB_XMM_to_XMM(regd, regd); - if (is_mem) { - SSE_PINSRW_R32_to_XMM(regd, temp2, 0); - SHR32ItoR(temp2, 16); - SSE_PINSRW_R32_to_XMM(regd, temp2, 1); - } - else { - SSE2_MOVD_R_to_XMM(regt, temp2); - SSE_MOVSS_XMM_to_XMM(regd, regt); - SSE2_PCMPEQB_XMM_to_XMM(regt, regt); - } - localptr[7] = JMP8(0); - - x86SetJ8(localptr[2]); - x86SetJ8(localptr[4]); - x86SetJ8(localptr[5]); - x86SetJ8(localptr[6]); - x86SetJ8(localptr[7]); - - if (is_mem) - { - SSE_ANDPS_M128_to_XMM(regd, (uptr)&VU_addsub_reg[0][0]); //regd contains mask - - if (is_sub) SSE_SUBSS_M32_to_XMM(regd, (uptr)&VU_addsub_reg[1][0]); - else SSE_ADDSS_M32_to_XMM(regd, (uptr)&VU_addsub_reg[1][0]); - } - else - { - SSE_ANDPS_M128_to_XMM(regd, (uptr)&VU_addsub_reg[0][0]); //regd contains mask - SSE_ANDPS_M128_to_XMM(regt, (uptr)&VU_addsub_reg[1][0]); //regt contains mask - - if (is_sub) SSE_SUBSS_XMM_to_XMM(regd, regt); - else SSE_ADDSS_XMM_to_XMM(regd, regt); - - SSE_MOVAPS_M128_to_XMM(regt, (uptr)&VU_addsub_reg[1][0]); - } - - _freeX86reg(temp2); - - MOV32MtoR(ECX, (uptr)&tempECX); -} - -void SSE_ADDPS_XMM_to_XMM_custom(int info, int regd, int regt) { - if (CHECK_VUADDSUBHACK) { - VU_ADD_SUB(regd, regt, 0, info); - } - else SSE_ADDPS_XMM_to_XMM(regd, regt); -} -void SSE_SUBPS_XMM_to_XMM_custom(int info, int regd, int regt) { - if (CHECK_VUADDSUBHACK) { - VU_ADD_SUB(regd, regt, 1, info); - } - else SSE_SUBPS_XMM_to_XMM(regd, regt); -} -void SSE_ADDSS_XMM_to_XMM_custom(int info, int regd, int regt) { - if (CHECK_VUADDSUBHACK) { - VU_ADD_SUB_SS(regd, regt, 0, 0, info); - } - else SSE_ADDSS_XMM_to_XMM(regd, regt); -} -void SSE_SUBSS_XMM_to_XMM_custom(int info, int regd, int regt) { - if (CHECK_VUADDSUBHACK) { - VU_ADD_SUB_SS(regd, regt, 1, 0, info); - } - else SSE_SUBSS_XMM_to_XMM(regd, regt); -} -void SSE_ADDSS_M32_to_XMM_custom(int info, int regd, int regt) { - if (CHECK_VUADDSUBHACK) { - VU_ADD_SUB_SS(regd, regt, 0, 1, info); - } - else SSE_ADDSS_M32_to_XMM(regd, regt); -} -void SSE_SUBSS_M32_to_XMM_custom(int info, int regd, int regt) { - if (CHECK_VUADDSUBHACK) { - VU_ADD_SUB_SS(regd, regt, 1, 1, info); - } - else SSE_SUBSS_M32_to_XMM(regd, regt); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// *VU Upper Instructions!* -// -// Note: * = Checked for errors by cottonvibes -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ABS* -//------------------------------------------------------------------ -void recVUMI_ABS(VURegs *VU, int info) -{ - //Console::WriteLn("recVUMI_ABS()"); - if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; - - if ((_X_Y_Z_W == 0x8) || (_X_Y_Z_W == 0xf)) { - VU_MERGE_REGS(EEREC_T, EEREC_S); - SSE_ANDPS_M128_to_XMM(EEREC_T, (uptr)&const_abs_table[ _X_Y_Z_W ][ 0 ] ); - } - else { // Use a temp reg because VU_MERGE_REGS() modifies source reg! - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_abs_table[ _X_Y_Z_W ][ 0 ] ); - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ADD*, ADD_iq*, ADD_xyzw* -//------------------------------------------------------------------ -PCSX2_ALIGNED16(float s_two[4]) = {0,0,0,2}; -void recVUMI_ADD(VURegs *VU, int info) -{ - //Console::WriteLn("recVUMI_ADD()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; // Don't do anything and just clear flags - if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); - - if ( _Fs_ == 0 && _Ft_ == 0 ) { // if adding VF00 with VF00, then the result is always 0,0,0,2 - if ( _X_Y_Z_W != 0xf ) { - SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)s_two); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else SSE_MOVAPS_M128_to_XMM(EEREC_D, (uptr)s_two); - } - else { - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - } - if( _X_Y_Z_W == 8 ) { // If only adding x, then we can do a Scalar Add - if (EEREC_D == EEREC_S) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T); - else if (EEREC_D == EEREC_T) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_S); - else { - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T); - } - } - else if (_X_Y_Z_W != 0xf) { // If xyzw != 1111, then we have to use a temp reg - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { // All xyzw being modified (xyzw == 1111) - if (EEREC_D == EEREC_S) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_T); - else if (EEREC_D == EEREC_T) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_T); - } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_ADD_iq(VURegs *VU, uptr addr, int info) -{ - //Console::WriteLn("recVUMI_ADD_iq()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); - if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat3(addr); - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - } - - if ( _XYZW_SS ) { - if ( EEREC_D == EEREC_TEMP ) { - _vuFlipRegSS(VU, EEREC_S); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_ADDSS_M32_to_XMM(EEREC_D, addr); - _vuFlipRegSS(VU, EEREC_S); - _vuFlipRegSS(VU, EEREC_D); // have to flip over EEREC_D for computing flags! - } - else if ( EEREC_D == EEREC_S ) { - _vuFlipRegSS(VU, EEREC_D); - SSE_ADDSS_M32_to_XMM(EEREC_D, addr); - _vuFlipRegSS(VU, EEREC_D); - } - else { - if ( _X ) { - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_ADDSS_M32_to_XMM_custom(info, EEREC_D, addr); - } - else { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); - SSE_ADDPS_XMM_to_XMM_custom(info, EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - } - } - else { - if ( (_X_Y_Z_W != 0xf) || (EEREC_D == EEREC_S) || (EEREC_D == EEREC_TEMP) ) { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); - } - - if (_X_Y_Z_W != 0xf) { - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if ( EEREC_D == EEREC_TEMP ) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); - else if ( EEREC_D == EEREC_S ) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - else { - SSE_MOVSS_M32_to_XMM(EEREC_D, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_D, 0x00); - SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); - } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_ADD_xyzw(VURegs *VU, int xyzw, int info) -{ - //Console::WriteLn("recVUMI_ADD_xyzw()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - } - - if ( _Ft_ == 0 && xyzw < 3 ) { // just move since adding zero - if ( _X_Y_Z_W == 0x8 ) { VU_MERGE_REGS(EEREC_D, EEREC_S); } - else if ( _X_Y_Z_W != 0xf ) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); - } - else if ( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP) ) { - if ( xyzw == 0 ) { - if ( EEREC_D == EEREC_T ) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_S); - else { - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T); - } - } - else { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - } - else if( _Fs_ == 0 && !_W ) { // just move - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if ( _X_Y_Z_W != 0xf ) { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if( EEREC_D == EEREC_TEMP ) { _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); } - else if( EEREC_D == EEREC_S ) { _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); } - else { _unpackVF_xyzw(EEREC_D, EEREC_T, xyzw); SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_ADDi(VURegs *VU, int info) { recVUMI_ADD_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_ADDq(VURegs *VU, int info) { recVUMI_ADD_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_ADDx(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 0, info); } -void recVUMI_ADDy(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 1, info); } -void recVUMI_ADDz(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 2, info); } -void recVUMI_ADDw(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ADDA*, ADDA_iq*, ADDA_xyzw* -//------------------------------------------------------------------ -void recVUMI_ADDA(VURegs *VU, int info) -{ - //Console::WriteLn("recVUMI_ADDA()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - } - - if( _X_Y_Z_W == 8 ) { - if (EEREC_ACC == EEREC_S) SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_T); // Can this case happen? (cottonvibes) - else if (EEREC_ACC == EEREC_T) SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_S); // Can this case happen? - else { - SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); - SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_T); - } - } - else if (_X_Y_Z_W != 0xf) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - - VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); - } - else { - if( EEREC_ACC == EEREC_S ) SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_T); // Can this case happen? - else if( EEREC_ACC == EEREC_T ) SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_S); // Can this case happen? - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_S); - SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_T); - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_ADDA_iq(VURegs *VU, uptr addr, int info) -{ - //Console::WriteLn("recVUMI_ADDA_iq()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat3(addr); - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - } - - if( _XYZW_SS ) { - assert( EEREC_ACC != EEREC_TEMP ); - if( EEREC_ACC == EEREC_S ) { - _vuFlipRegSS(VU, EEREC_ACC); - SSE_ADDSS_M32_to_XMM(EEREC_ACC, addr); - _vuFlipRegSS(VU, EEREC_ACC); - } - else { - if( _X ) { - SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); - SSE_ADDSS_M32_to_XMM(EEREC_ACC, addr); - } - else { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); - } - } - } - else { - if( _X_Y_Z_W != 0xf || EEREC_ACC == EEREC_S ) { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); - } - - if (_X_Y_Z_W != 0xf) { - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); - } - else { - if( EEREC_ACC == EEREC_S ) SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); - else { - SSE_MOVSS_M32_to_XMM(EEREC_ACC, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_ACC, EEREC_ACC, 0x00); - SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_S); - } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_ADDA_xyzw(VURegs *VU, int xyzw, int info) -{ - //Console::WriteLn("recVUMI_ADDA_xyzw()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - } - - if( _X_Y_Z_W == 8 ) { - assert( EEREC_ACC != EEREC_T ); - if( xyzw == 0 ) { - SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); - SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_T); - } - else { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - if( _Fs_ == 0 ) { - SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); - } - else { - SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); - SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); - } - } - } - else { - if( _X_Y_Z_W != 0xf || EEREC_ACC == EEREC_S ) - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - - if (_X_Y_Z_W != 0xf) { - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); - } - else { - if( EEREC_ACC == EEREC_S ) SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); - else { - _unpackVF_xyzw(EEREC_ACC, EEREC_T, xyzw); - SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_S); - } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_ADDAi(VURegs *VU, int info) { recVUMI_ADDA_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_ADDAq(VURegs *VU, int info) { recVUMI_ADDA_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_ADDAx(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 0, info); } -void recVUMI_ADDAy(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 1, info); } -void recVUMI_ADDAz(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 2, info); } -void recVUMI_ADDAw(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// SUB*, SUB_iq*, SUB_xyzw* -//------------------------------------------------------------------ -void recVUMI_SUB(VURegs *VU, int info) -{ - //Console::WriteLn("recVUMI_SUB()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); - - if( EEREC_S == EEREC_T ) { - if (_X_Y_Z_W != 0xf) SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&SSEmovMask[15-_X_Y_Z_W][0]); - else SSE_XORPS_XMM_to_XMM(EEREC_D, EEREC_D); - } - else if( _X_Y_Z_W == 8 ) { - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - } - if (EEREC_D == EEREC_S) { - if (_Ft_) SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_T); - } - else if (EEREC_D == EEREC_T) { - if (_Ft_) { - SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_SUBSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - else SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - } - else { - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - if (_Ft_) SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_T); - } - } - else { - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - } - if (_X_Y_Z_W != 0xf) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if( ( _Ft_ > 0 ) || _W ) SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if (EEREC_D == EEREC_S) SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_T); - else if (EEREC_D == EEREC_T) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_T); - } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_SUB_iq(VURegs *VU, uptr addr, int info) -{ - //Console::WriteLn("recVUMI_SUB_iq()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat3(addr); - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - } - if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); - - if( _XYZW_SS ) { - if( EEREC_D == EEREC_TEMP ) { - _vuFlipRegSS(VU, EEREC_S); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_SUBSS_M32_to_XMM(EEREC_D, addr); - _vuFlipRegSS(VU, EEREC_S); - _vuFlipRegSS(VU, EEREC_D); - } - else if( EEREC_D == EEREC_S ) { - _vuFlipRegSS(VU, EEREC_D); - SSE_SUBSS_M32_to_XMM(EEREC_D, addr); - _vuFlipRegSS(VU, EEREC_D); - } - else { - if( _X ) { - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_SUBSS_M32_to_XMM(EEREC_D, addr); - } - else { - _vuMoveSS(VU, EEREC_TEMP, EEREC_S); - _vuFlipRegSS(VU, EEREC_D); - SSE_SUBSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - _vuFlipRegSS(VU, EEREC_D); - } - } - } - else { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); - - if (_X_Y_Z_W != 0xf) { - int t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { - SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_S); - SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP); - - VU_MERGE_REGS(EEREC_D, t1reg); - _freeXMMreg(t1reg); - } - else { - // negate - SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - } - else { - if( EEREC_D == EEREC_TEMP ) { - SSE_XORPS_M128_to_XMM(EEREC_D, (uptr)&const_clip[4]); - SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); - } - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_SUB_xyzw(VURegs *VU, int xyzw, int info) -{ - //Console::WriteLn("recVUMI_SUB_xyzw()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - } - - if ( _X_Y_Z_W == 8 ) { - if ( (xyzw == 0) && (_Ft_ == _Fs_) ) { - SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&SSEmovMask[7][0]); - } - else if ( EEREC_D == EEREC_TEMP ) { - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - if ( (_Ft_ > 0) || (xyzw == 3) ) { - _vuFlipRegSS_xyzw(EEREC_T, xyzw); - SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_T); - _vuFlipRegSS_xyzw(EEREC_T, xyzw); - } - } - else { - if ( (_Ft_ > 0) || (xyzw == 3) ) { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - else SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - } - } - else { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - - if (_X_Y_Z_W != 0xf) { - int t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { - SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_S); - SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP); - - VU_MERGE_REGS(EEREC_D, t1reg); - _freeXMMreg(t1reg); - } - else { - // negate - SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - } - else { - if( EEREC_D == EEREC_TEMP ) { - SSE_XORPS_M128_to_XMM(EEREC_D, (uptr)&const_clip[4]); - SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); - } - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_SUBi(VURegs *VU, int info) { recVUMI_SUB_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_SUBq(VURegs *VU, int info) { recVUMI_SUB_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_SUBx(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 0, info); } -void recVUMI_SUBy(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 1, info); } -void recVUMI_SUBz(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 2, info); } -void recVUMI_SUBw(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// SUBA*, SUBA_iq, SUBA_xyzw -//------------------------------------------------------------------ -void recVUMI_SUBA(VURegs *VU, int info) -{ - //Console::WriteLn("recVUMI_SUBA()"); - if ( _X_Y_Z_W == 0 ) goto flagUpdate; - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - } - - if( EEREC_S == EEREC_T ) { - if (_X_Y_Z_W != 0xf) SSE_ANDPS_M128_to_XMM(EEREC_ACC, (uptr)&SSEmovMask[15-_X_Y_Z_W][0]); - else SSE_XORPS_XMM_to_XMM(EEREC_ACC, EEREC_ACC); - } - else if( _X_Y_Z_W == 8 ) { - if (EEREC_ACC == EEREC_S) SSE_SUBSS_XMM_to_XMM(EEREC_ACC, EEREC_T); - else if (EEREC_ACC == EEREC_T) { - SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_SUBSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); - } - else { - SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); - SSE_SUBSS_XMM_to_XMM(EEREC_ACC, EEREC_T); - } - } - else if (_X_Y_Z_W != 0xf) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - - VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); - } - else { - if( EEREC_ACC == EEREC_S ) SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_T); - else if( EEREC_ACC == EEREC_T ) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); - } - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_S); - SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_T); - } - } -flagUpdate: - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_SUBA_iq(VURegs *VU, uptr addr, int info) -{ - //Console::WriteLn ("recVUMI_SUBA_iq"); - if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat3(addr); - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - } - - if( _XYZW_SS ) { - if( EEREC_ACC == EEREC_S ) { - _vuFlipRegSS(VU, EEREC_ACC); - SSE_SUBSS_M32_to_XMM(EEREC_ACC, addr); - _vuFlipRegSS(VU, EEREC_ACC); - } - else { - if( _X ) { - SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); - SSE_SUBSS_M32_to_XMM(EEREC_ACC, addr); - } - else { - _vuMoveSS(VU, EEREC_TEMP, EEREC_S); - _vuFlipRegSS(VU, EEREC_ACC); - SSE_SUBSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); - _vuFlipRegSS(VU, EEREC_ACC); - } - } - } - else { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); - - if (_X_Y_Z_W != 0xf) { - int t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { - SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_S); - SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP); - - VU_MERGE_REGS(EEREC_ACC, t1reg); - _freeXMMreg(t1reg); - } - else { - // negate - SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); - } - } - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_S); - SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); - } - } - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_SUBA_xyzw(VURegs *VU, int xyzw, int info) -{ - //Console::WriteLn ("recVUMI_SUBA_xyzw"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - } - - if( _X_Y_Z_W == 8 ) { - if( xyzw == 0 ) { - SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); - SSE_SUBSS_XMM_to_XMM(EEREC_ACC, EEREC_T); - } - else { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); - SSE_SUBSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); - } - } - else { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - - if (_X_Y_Z_W != 0xf) { - int t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { - SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_S); - SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP); - - VU_MERGE_REGS(EEREC_ACC, t1reg); - _freeXMMreg(t1reg); - } - else { - // negate - SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); - } - } - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_S); - SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); - } - } - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_SUBAi(VURegs *VU, int info) { recVUMI_SUBA_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_SUBAq(VURegs *VU, int info) { recVUMI_SUBA_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_SUBAx(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 0, info); } -void recVUMI_SUBAy(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 1, info); } -void recVUMI_SUBAz(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 2, info); } -void recVUMI_SUBAw(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MUL -//------------------------------------------------------------------ -void recVUMI_MUL_toD(VURegs *VU, int regd, int info) -{ - //Console::WriteLn ("recVUMI_MUL_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - } - - if (_X_Y_Z_W == 1 && (_Ft_ == 0 || _Fs_==0) ) { // W - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, _Ft_ ? EEREC_T : EEREC_S); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else if( _Fd_ == _Fs_ && _Fs_ == _Ft_ && _XYZW_SS ) { - _vuFlipRegSS(VU, EEREC_D); - SSE_MULSS_XMM_to_XMM(EEREC_D, EEREC_D); - _vuFlipRegSS(VU, EEREC_D); - } - else if( _X_Y_Z_W == 8 ) { - if (regd == EEREC_S) SSE_MULSS_XMM_to_XMM(regd, EEREC_T); - else if (regd == EEREC_T) SSE_MULSS_XMM_to_XMM(regd, EEREC_S); - else { - SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); - SSE_MULSS_XMM_to_XMM(regd, EEREC_T); - } - } - else if (_X_Y_Z_W != 0xf) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else { - if (regd == EEREC_S) SSE_MULPS_XMM_to_XMM(regd, EEREC_T); - else if (regd == EEREC_T) SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - else { - SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S); - SSE_MULPS_XMM_to_XMM(regd, EEREC_T); - } - } -} - -void recVUMI_MUL_iq_toD(VURegs *VU, uptr addr, int regd, int info) -{ - //Console::WriteLn ("recVUMI_MUL_iq_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat3(addr); - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - } - - if( _XYZW_SS ) { - if( regd == EEREC_TEMP ) { - _vuFlipRegSS(VU, EEREC_S); - SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); - SSE_MULSS_M32_to_XMM(regd, addr); - _vuFlipRegSS(VU, EEREC_S); - _vuFlipRegSS(VU, regd); - } - else if( regd == EEREC_S ) { - _vuFlipRegSS(VU, regd); - SSE_MULSS_M32_to_XMM(regd, addr); - _vuFlipRegSS(VU, regd); - } - else { - if( _X ) { - SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); - SSE_MULSS_M32_to_XMM(regd, addr); - } - else { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - } - } - else { - if( _X_Y_Z_W != 0xf || regd == EEREC_TEMP || regd == EEREC_S ) { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); - } - - if (_X_Y_Z_W != 0xf) { - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else { - if( regd == EEREC_TEMP ) SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - else if (regd == EEREC_S) SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP); - else { - SSE_MOVSS_M32_to_XMM(regd, addr); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x00); - SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - } - } - } -} - -void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) -{ - //Console::WriteLn ("recVUMI_MUL_xyzw_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - } - if (_Fs_) { // This is needed for alot of games; so always clamp this operand - if (CHECK_VU_SIGN_OVERFLOW) vFloats4_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set - else vFloats2[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set - } - if( _Ft_ == 0 ) { - if( xyzw < 3 ) { - if (_X_Y_Z_W != 0xf) { - SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else SSE_XORPS_XMM_to_XMM(regd, regd); - } - else { - assert(xyzw==3); - if (_X_Y_Z_W != 0xf) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S); - } - } - else if( _X_Y_Z_W == 8 ) { - if( regd == EEREC_TEMP ) { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - SSE_MULSS_XMM_to_XMM(regd, EEREC_S); - } - else { - if( xyzw == 0 ) { - if( regd == EEREC_T ) { - SSE_MULSS_XMM_to_XMM(regd, EEREC_S); - } - else { - SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); - SSE_MULSS_XMM_to_XMM(regd, EEREC_T); - } - } - else { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); - SSE_MULSS_XMM_to_XMM(regd, EEREC_TEMP); - } - } - } - else { - if( _X_Y_Z_W != 0xf || regd == EEREC_TEMP || regd == EEREC_S ) - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - - if (_X_Y_Z_W != 0xf) { - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else { - if( regd == EEREC_TEMP ) SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - else if (regd == EEREC_S) SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP); - else { - _unpackVF_xyzw(regd, EEREC_T, xyzw); - SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - } - } - } -} - -void recVUMI_MUL(VURegs *VU, int info) -{ - //Console::WriteLn ("recVUMI_MUL"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MUL_toD(VU, EEREC_D, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MUL_iq(VURegs *VU, int addr, int info) -{ - //Console::WriteLn ("recVUMI_MUL_iq"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MUL_iq_toD(VU, addr, EEREC_D, info); - recUpdateFlags(VU, EEREC_D, info); - // spacefisherman needs overflow checking on MULi.z -} - -void recVUMI_MUL_xyzw(VURegs *VU, int xyzw, int info) -{ - //Console::WriteLn ("recVUMI_MUL_xyzw"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MUL_xyzw_toD(VU, xyzw, EEREC_D, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MULi(VURegs *VU, int info) { recVUMI_MUL_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_MULq(VURegs *VU, int info) { recVUMI_MUL_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_MULx(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 0, info); } -void recVUMI_MULy(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 1, info); } -void recVUMI_MULz(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 2, info); } -void recVUMI_MULw(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MULA -//------------------------------------------------------------------ -void recVUMI_MULA( VURegs *VU, int info ) -{ - //Console::WriteLn ("recVUMI_MULA"); - recVUMI_MUL_toD(VU, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MULA_iq(VURegs *VU, int addr, int info) -{ - //Console::WriteLn ("recVUMI_MULA_iq"); - recVUMI_MUL_iq_toD(VU, addr, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MULA_xyzw(VURegs *VU, int xyzw, int info) -{ - //Console::WriteLn ("recVUMI_MULA_xyzw"); - recVUMI_MUL_xyzw_toD(VU, xyzw, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MULAi(VURegs *VU, int info) { recVUMI_MULA_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_MULAq(VURegs *VU, int info) { recVUMI_MULA_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_MULAx(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 0, info); } -void recVUMI_MULAy(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 1, info); } -void recVUMI_MULAz(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 2, info); } -void recVUMI_MULAw(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MADD -//------------------------------------------------------------------ -void recVUMI_MADD_toD(VURegs *VU, int regd, int info) -{ - //Console::WriteLn ("recVUMI_MADD_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); - } - - - if( _X_Y_Z_W == 8 ) { - if( regd == EEREC_ACC ) { - SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } - SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP); - } - else if (regd == EEREC_T) { - SSE_MULSS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } - SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); - } - else if (regd == EEREC_S) { - SSE_MULSS_XMM_to_XMM(regd, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } - SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); - } - else { - SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); - SSE_MULSS_XMM_to_XMM(regd, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } - SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); - } - } - else if (_X_Y_Z_W != 0xf) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); - - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else { - if( regd == EEREC_ACC ) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP); - } - else if (regd == EEREC_T) { - SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); - } - else if (regd == EEREC_S) { - SSE_MULPS_XMM_to_XMM(regd, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); - } - else { - SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S); - SSE_MULPS_XMM_to_XMM(regd, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); - } - } -} - -void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info) -{ - //Console::WriteLn ("recVUMI_MADD_iq_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - vuFloat3(addr); - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); - } - - if( _X_Y_Z_W == 8 ) { - if( _Fs_ == 0 ) { - // do nothing if regd == ACC (ACCx <= ACCx + 0.0 * *addr) - if( regd != EEREC_ACC ) { - SSE_MOVSS_XMM_to_XMM(regd, EEREC_ACC); - } - return; - } - - if( regd == EEREC_ACC ) { - assert( EEREC_TEMP < iREGCNT_XMM ); - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } - SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP); - } - else if( regd == EEREC_S ) { - SSE_MULSS_M32_to_XMM(regd, addr); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); - } - else { - SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); - SSE_MULSS_M32_to_XMM(regd, addr); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); - } - } - else { - if( _Fs_ == 0 ) { - if( regd == EEREC_ACC ) { // ACCxyz is unchanged, ACCw <= ACCw + *addr - if( _W ) { // if _W is zero, do nothing - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); // { *addr, 0, 0, 0 } - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x27); // { 0, 0, 0, *addr } - SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP); // { ACCx, ACCy, ACCz, ACCw + *addr } - } - } - else { // DESTxyz <= ACCxyz, DESTw <= ACCw + *addr - if( _W ) { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); // { *addr, 0, 0, 0 } - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x27); // { 0, 0, 0, *addr } - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); // { ACCx, ACCy, ACCz, ACCw + *addr } - } - else SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - - return; - } - - if( _X_Y_Z_W != 0xf || regd == EEREC_ACC || regd == EEREC_TEMP || regd == EEREC_S ) { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); - } - - if (_X_Y_Z_W != 0xf) { - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); - - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else { - if( regd == EEREC_ACC ) { - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP); - } - else if( regd == EEREC_S ) { - SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); - } - else if( regd == EEREC_TEMP ) { - SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); - } - else { - SSE_MOVSS_M32_to_XMM(regd, addr); - SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x00); - SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); - } - } - } -} - -void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) -{ - //Console::WriteLn ("recVUMI_MADD_xyzw_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); - } - if (_Fs_) { // This is needed for alot of games; so always clamp this operand - if (CHECK_VU_SIGN_OVERFLOW) vFloats4_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set - else vFloats2[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set - } - if( _Ft_ == 0 ) { - - if( xyzw == 3 ) { - // just add - if( _X_Y_Z_W == 8 ) { - if( regd == EEREC_S ) SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); - else { - SSE_MOVSS_XMM_to_XMM(regd, EEREC_ACC); - SSE_ADDSS_XMM_to_XMM(regd, EEREC_S); - } - } - else { - if( _X_Y_Z_W != 0xf ) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); - - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else { - if( regd == EEREC_S ) SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); - else { - SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC); - SSE_ADDPS_XMM_to_XMM(regd, EEREC_S); - } - } - } - } - else { - // just move acc to regd - if( _X_Y_Z_W != 0xf ) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC); - } - - return; - } - - if( _X_Y_Z_W == 8 ) { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - - if( regd == EEREC_ACC ) { - SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } - SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP); - } - else if( regd == EEREC_S ) { - SSE_MULSS_XMM_to_XMM(regd, EEREC_TEMP); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } - SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); - } - else if( regd == EEREC_TEMP ) { - SSE_MULSS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } - SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); - } - else { - SSE_MOVSS_XMM_to_XMM(regd, EEREC_ACC); - SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } - SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP); - } - } - else { - if( _X_Y_Z_W != 0xf || regd == EEREC_ACC || regd == EEREC_TEMP || regd == EEREC_S ) { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - } - - if (_X_Y_Z_W != 0xf) { - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); - - VU_MERGE_REGS(regd, EEREC_TEMP); - } - else { - if( regd == EEREC_ACC ) { - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP); - } - else if( regd == EEREC_S ) { - SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); - } - else if( regd == EEREC_TEMP ) { - SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); - } - else { - _unpackVF_xyzw(regd, EEREC_T, xyzw); - SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); - } - } - } -} - -void recVUMI_MADD(VURegs *VU, int info) -{ - //Console::WriteLn ("recVUMI_MADD"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MADD_toD(VU, EEREC_D, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MADD_iq(VURegs *VU, int addr, int info) -{ - //Console::WriteLn ("recVUMI_MADD_iq"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MADD_iq_toD(VU, addr, EEREC_D, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MADD_xyzw(VURegs *VU, int xyzw, int info) -{ - //Console::WriteLn ("recVUMI_MADD_xyzw"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MADD_xyzw_toD(VU, xyzw, EEREC_D, info); - recUpdateFlags(VU, EEREC_D, info); - // super bust-a-move arrows needs overflow clamping -} - -void recVUMI_MADDi(VURegs *VU, int info) { recVUMI_MADD_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_MADDq(VURegs *VU, int info) { recVUMI_MADD_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_MADDx(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 0, info); } -void recVUMI_MADDy(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 1, info); } -void recVUMI_MADDz(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 2, info); } -void recVUMI_MADDw(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MADDA -//------------------------------------------------------------------ -void recVUMI_MADDA( VURegs *VU, int info ) -{ - //Console::WriteLn ("recVUMI_MADDA"); - recVUMI_MADD_toD(VU, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MADDAi( VURegs *VU , int info) -{ - //Console::WriteLn ("recVUMI_MADDAi"); - recVUMI_MADD_iq_toD( VU, VU_VI_ADDR(REG_I, 1), EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MADDAq( VURegs *VU , int info) -{ - //Console::WriteLn ("recVUMI_MADDAq "); - recVUMI_MADD_iq_toD( VU, VU_REGQ_ADDR, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MADDAx( VURegs *VU , int info) -{ - //Console::WriteLn ("recVUMI_MADDAx"); - recVUMI_MADD_xyzw_toD(VU, 0, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MADDAy( VURegs *VU , int info) -{ - //Console::WriteLn ("recVUMI_MADDAy"); - recVUMI_MADD_xyzw_toD(VU, 1, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MADDAz( VURegs *VU , int info) -{ - //Console::WriteLn ("recVUMI_MADDAz"); - recVUMI_MADD_xyzw_toD(VU, 2, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MADDAw( VURegs *VU , int info) -{ - //Console::WriteLn ("recVUMI_MADDAw"); - recVUMI_MADD_xyzw_toD(VU, 3, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MSUB -//------------------------------------------------------------------ -void recVUMI_MSUB_toD(VURegs *VU, int regd, int info) -{ - //Console::WriteLn ("recVUMI_MSUB_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); - } - - if (_X_Y_Z_W != 0xf) { - int t1reg = _vuGetTempXMMreg(info); - - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - - if( t1reg >= 0 ) { - SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_ACC); - SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP); - - VU_MERGE_REGS(regd, t1reg); - _freeXMMreg(t1reg); - } - else { - SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - } - else { - if( regd == EEREC_S ) { - assert( regd != EEREC_ACC ); - SSE_MULPS_XMM_to_XMM(regd, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); - SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); - } - else if( regd == EEREC_T ) { - assert( regd != EEREC_ACC ); - SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); - SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); - } - else if( regd == EEREC_TEMP ) { - SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S); - SSE_MULPS_XMM_to_XMM(regd, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); - SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); - } - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC); - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP); - } - } -} - -void recVUMI_MSUB_temp_toD(VURegs *VU, int regd, int info) -{ - //Console::WriteLn ("recVUMI_MSUB_temp_toD"); - - if (_X_Y_Z_W != 0xf) { - int t1reg = _vuGetTempXMMreg(info); - - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - - if( t1reg >= 0 ) { - SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_ACC); - SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP); - - if ( regd != EEREC_TEMP ) { VU_MERGE_REGS(regd, t1reg); } - else SSE_MOVAPS_XMM_to_XMM(regd, t1reg); - - _freeXMMreg(t1reg); - } - else { - SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); - VU_MERGE_REGS(regd, EEREC_TEMP); - } - } - else { - if( regd == EEREC_ACC ) { - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP); - } - else if( regd == EEREC_S ) { - SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); - SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); - } - else if( regd == EEREC_TEMP ) { - SSE_MULPS_XMM_to_XMM(regd, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } - SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); - SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); - } - else { - SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC); - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } - SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP); - } - } -} - -void recVUMI_MSUB_iq_toD(VURegs *VU, int regd, int addr, int info) -{ - //Console::WriteLn ("recVUMI_MSUB_iq_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); - vuFloat3(addr); - } - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); - recVUMI_MSUB_temp_toD(VU, regd, info); -} - -void recVUMI_MSUB_xyzw_toD(VURegs *VU, int regd, int xyzw, int info) -{ - //Console::WriteLn ("recVUMI_MSUB_xyzw_toD"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 1 << (3 - xyzw)); - vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); - } - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - recVUMI_MSUB_temp_toD(VU, regd, info); -} - -void recVUMI_MSUB(VURegs *VU, int info) -{ - //Console::WriteLn ("recVUMI_MSUB"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MSUB_toD(VU, EEREC_D, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MSUB_iq(VURegs *VU, int addr, int info) -{ - //Console::WriteLn ("recVUMI_MSUB_iq"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MSUB_iq_toD(VU, EEREC_D, addr, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MSUBi(VURegs *VU, int info) { recVUMI_MSUB_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_MSUBq(VURegs *VU, int info) { recVUMI_MSUB_iq(VU, VU_REGQ_ADDR, info); } -void recVUMI_MSUBx(VURegs *VU, int info) -{ - //Console::WriteLn ("recVUMI_MSUBx"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 0, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MSUBy(VURegs *VU, int info) -{ - //Console::WriteLn ("recVUMI_MSUBy"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 1, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MSUBz(VURegs *VU, int info) -{ - //Console::WriteLn ("recVUMI_MSUBz"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 2, info); - recUpdateFlags(VU, EEREC_D, info); -} - -void recVUMI_MSUBw(VURegs *VU, int info) -{ - //Console::WriteLn ("recVUMI_MSUBw"); - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 3, info); - recUpdateFlags(VU, EEREC_D, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MSUBA -//------------------------------------------------------------------ -void recVUMI_MSUBA( VURegs *VU, int info ) -{ - //Console::WriteLn ("recVUMI_MSUBA"); - recVUMI_MSUB_toD(VU, EEREC_ACC, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MSUBAi( VURegs *VU, int info ) -{ - //Console::WriteLn ("recVUMI_MSUBAi "); - recVUMI_MSUB_iq_toD( VU, EEREC_ACC, VU_VI_ADDR(REG_I, 1), info ); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MSUBAq( VURegs *VU, int info ) -{ - //Console::WriteLn ("recVUMI_MSUBAq"); - recVUMI_MSUB_iq_toD( VU, EEREC_ACC, VU_REGQ_ADDR, info ); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MSUBAx( VURegs *VU, int info ) -{ - //Console::WriteLn ("recVUMI_MSUBAx"); - recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 0, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MSUBAy( VURegs *VU, int info ) -{ - //Console::WriteLn ("recVUMI_MSUBAy"); - recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 1, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MSUBAz( VURegs *VU, int info ) -{ - //Console::WriteLn ("recVUMI_MSUBAz "); - recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 2, info); - recUpdateFlags(VU, EEREC_ACC, info); -} - -void recVUMI_MSUBAw( VURegs *VU, int info ) -{ - //Console::WriteLn ("recVUMI_MSUBAw"); - recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 3, info); - recUpdateFlags(VU, EEREC_ACC, info); -} -//------------------------------------------------------------------ - - -static const u32 PCSX2_ALIGNED16(special_mask[4]) = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000}; -static const u32 PCSX2_ALIGNED16(special_mask2[4]) = {0, 0x40000000, 0, 0x40000000}; - -u32 PCSX2_ALIGNED16(temp_loc[4]); -u32 PCSX2_ALIGNED16(temp_loc2[4]); - -//MAX/MINI are non-arithmetic operations that implicitly support numbers with the EXP field being 0 ("denormals"). -// -//As such, they are sometimes used for integer move and (positive!) integer max/min, knowing that integers that -//represent denormals will not be flushed to 0. -// -//As such, this implementation performs a non-arithmetic operation that supports "denormals" and "infs/nans". -//There might be an easier way to do it but here, MAX/MIN is performed with PMAXPD/PMINPD. -//Fake double-precision numbers are constructed by copying the sign of the original numbers, clearing the upper 32 bits, -//setting the 62nd bit to 1 (to ensure double-precision number is "normalized") and having the lower 32bits -//being the same as the original number. - -void MINMAXlogical(VURegs *VU, int info, int min, int mode, uptr addr = 0, int xyzw = 0) -//mode1 = iq, mode2 = xyzw, mode0 = normal -{ - int t1regbool = 0; - int t1reg = _vuGetTempXMMreg(info); - if (t1reg < 0) - { - t1regbool = 1; - for (t1reg = 0; ( (t1reg == EEREC_D) || (t1reg == EEREC_S) || (mode != 1 && t1reg == EEREC_T) - || (t1reg == EEREC_TEMP) ); t1reg++); // Find unused reg (For first temp reg) - SSE_MOVAPS_XMM_to_M128((uptr)temp_loc, t1reg); // Backup t1reg XMM reg - } - int t2regbool = -1; - int t2reg = EEREC_TEMP; - if (EEREC_TEMP == EEREC_D || EEREC_TEMP == EEREC_S || (mode != 1 && EEREC_TEMP == EEREC_T)) - { - t2regbool = 0; - t2reg = _vuGetTempXMMreg(info); - if (t2reg < 0) - { - t2regbool = 1; - for (t2reg = 0; ( (t2reg == EEREC_D) || (t2reg == EEREC_S) || (mode != 1 && t2reg == EEREC_T) || - (t2reg == t1reg) || (t2reg == EEREC_TEMP) ); t2reg++); // Find unused reg (For second temp reg) - SSE_MOVAPS_XMM_to_M128((uptr)temp_loc2, t2reg); // Backup t2reg XMM reg - } - } - - if (_X || _Y) - { - SSE2_PSHUFD_XMM_to_XMM(t1reg, EEREC_S, 0x50); - SSE2_PAND_M128_to_XMM(t1reg, (uptr)special_mask); - SSE2_POR_M128_to_XMM(t1reg, (uptr)special_mask2); - if (mode == 0) - SSE2_PSHUFD_XMM_to_XMM(t2reg, EEREC_T, 0x50); - else if (mode == 1) - { - SSE2_MOVD_M32_to_XMM(t2reg, addr); - SSE2_PSHUFD_XMM_to_XMM(t2reg, t2reg, 0x00); - } - else if (mode == 2) - _unpackVF_xyzw(t2reg, EEREC_T, xyzw); - SSE2_PAND_M128_to_XMM(t2reg, (uptr)special_mask); - SSE2_POR_M128_to_XMM(t2reg, (uptr)special_mask2); - if (min) - SSE2_MINPD_XMM_to_XMM(t1reg, t2reg); - else - SSE2_MAXPD_XMM_to_XMM(t1reg, t2reg); - SSE2_PSHUFD_XMM_to_XMM(t1reg, t1reg, 0x88); - VU_MERGE_REGS_CUSTOM(EEREC_D, t1reg, 0xc & _X_Y_Z_W); - } - - if (_Z || _W) - { - SSE2_PSHUFD_XMM_to_XMM(t1reg, EEREC_S, 0xfa); - SSE2_PAND_M128_to_XMM(t1reg, (uptr)special_mask); - SSE2_POR_M128_to_XMM(t1reg, (uptr)special_mask2); - if (mode == 0) - SSE2_PSHUFD_XMM_to_XMM(t2reg, EEREC_T, 0xfa); - else if (mode == 1) - { - SSE2_MOVD_M32_to_XMM(t2reg, addr); - SSE2_PSHUFD_XMM_to_XMM(t2reg, t2reg, 0x00); - } - else if (mode == 2) - _unpackVF_xyzw(t2reg, EEREC_T, xyzw); - SSE2_PAND_M128_to_XMM(t2reg, (uptr)special_mask); - SSE2_POR_M128_to_XMM(t2reg, (uptr)special_mask2); - if (min) - SSE2_MINPD_XMM_to_XMM(t1reg, t2reg); - else - SSE2_MAXPD_XMM_to_XMM(t1reg, t2reg); - SSE2_PSHUFD_XMM_to_XMM(t1reg, t1reg, 0x88); - VU_MERGE_REGS_CUSTOM(EEREC_D, t1reg, 0x3 & _X_Y_Z_W); - } - - if (t1regbool == 0) - _freeXMMreg(t1reg); - else if (t1regbool == 1) - SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)temp_loc); // Restore t1reg XMM reg - if (t2regbool == 0) - _freeXMMreg(t2reg); - else if (t2regbool == 1) - SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)temp_loc2); // Restore t2reg XMM reg -} - -//------------------------------------------------------------------ -// MAX -//------------------------------------------------------------------ - -void recVUMI_MAX(VURegs *VU, int info) -{ - if ( _Fd_ == 0 ) return; - //Console::WriteLn ("recVUMI_MAX"); - - if (MINMAXFIX) - MINMAXlogical(VU, info, 0, 0); - else - { - - if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping - if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - - if( _X_Y_Z_W == 8 ) { - if (EEREC_D == EEREC_S) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_T); - else if (EEREC_D == EEREC_T) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_S); - else { - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_T); - } - } - else if (_X_Y_Z_W != 0xf) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MAXPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if( EEREC_D == EEREC_S ) SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_T); - else if( EEREC_D == EEREC_T ) SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_S); - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_T); - } - } - } -} - -void recVUMI_MAX_iq(VURegs *VU, uptr addr, int info) -{ - if ( _Fd_ == 0 ) return; - //Console::WriteLn ("recVUMI_MAX_iq"); - - if (MINMAXFIX) - MINMAXlogical(VU, info, 0, 1, addr); - else - { - if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping - vuFloat3(addr); - - if( _XYZW_SS ) { - if( EEREC_D == EEREC_TEMP ) { - _vuFlipRegSS(VU, EEREC_S); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_MAXSS_M32_to_XMM(EEREC_D, addr); - _vuFlipRegSS(VU, EEREC_S); - - // have to flip over EEREC_D if computing flags! - //if( (info & PROCESS_VU_UPDATEFLAGS) ) - _vuFlipRegSS(VU, EEREC_D); - } - else if( EEREC_D == EEREC_S ) { - _vuFlipRegSS(VU, EEREC_D); - SSE_MAXSS_M32_to_XMM(EEREC_D, addr); - _vuFlipRegSS(VU, EEREC_D); - } - else { - if( _X ) { - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_MAXSS_M32_to_XMM(EEREC_D, addr); - } - else { - _vuMoveSS(VU, EEREC_TEMP, EEREC_S); - _vuFlipRegSS(VU, EEREC_D); - SSE_MAXSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - _vuFlipRegSS(VU, EEREC_D); - } - } - } - else if (_X_Y_Z_W != 0xf) { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); - SSE_MAXPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if(EEREC_D == EEREC_S) { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); - SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - else { - SSE_MOVSS_M32_to_XMM(EEREC_D, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_D, 0x00); - SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_S); - } - } - } -} - -void recVUMI_MAX_xyzw(VURegs *VU, int xyzw, int info) -{ - if ( _Fd_ == 0 ) return; - //Console::WriteLn ("recVUMI_MAX_xyzw"); - - if (_Fs_ == 0 && _Ft_ == 0) - { - if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { - if( xyzw < 3 ) { - SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - else { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)s_fones); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - } - else if (_X_Y_Z_W != 0xf) { - if( xyzw < 3 ) { - if( _X_Y_Z_W & 1 ) SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[0]); // w included, so insert the whole reg - else SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // w not included, can zero out - } - else SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)s_fones); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - //If VF0.w isnt chosen as the constant, then its going to be MAX( 0, VF0 ), so the result is VF0 - if( xyzw < 3 ) { SSE_MOVAPS_M128_to_XMM(EEREC_D, (uptr)&VU->VF[0].UL[0]); } - else SSE_MOVAPS_M128_to_XMM(EEREC_D, (uptr)s_fones); - } - return; - } - - if (MINMAXFIX) - MINMAXlogical(VU, info, 0, 2, 0, xyzw); - else - { - if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping - if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - - if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { - if( xyzw == 0 ) { - if( EEREC_D == EEREC_S ) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_T); - else if( EEREC_D == EEREC_T ) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_S); - else { - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_T); - } - } - else { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - } - else if (_X_Y_Z_W != 0xf) { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - SSE_MAXPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if (EEREC_D == EEREC_S) { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - else { - _unpackVF_xyzw(EEREC_D, EEREC_T, xyzw); - SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_S); - } - } - } -} - -void recVUMI_MAXi(VURegs *VU, int info) { recVUMI_MAX_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_MAXx(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 0, info); } -void recVUMI_MAXy(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 1, info); } -void recVUMI_MAXz(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 2, info); } -void recVUMI_MAXw(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// MINI -//------------------------------------------------------------------ -void recVUMI_MINI(VURegs *VU, int info) -{ - if ( _Fd_ == 0 ) return; - //Console::WriteLn ("recVUMI_MINI"); - - if (MINMAXFIX) - MINMAXlogical(VU, info, 1, 0); - else - { - - if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping - if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); - - if( _X_Y_Z_W == 8 ) { - if (EEREC_D == EEREC_S) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T); - else if (EEREC_D == EEREC_T) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_S); - else { - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T); - } - } - else if (_X_Y_Z_W != 0xf) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MINPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if( EEREC_D == EEREC_S ) { - //ClampUnordered(EEREC_T, EEREC_TEMP, 0); // need for GT4 vu0rec - SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_T); - } - else if( EEREC_D == EEREC_T ) { - //ClampUnordered(EEREC_S, EEREC_TEMP, 0); // need for GT4 vu0rec - SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_S); - } - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_T); - } - } - } -} - -void recVUMI_MINI_iq(VURegs *VU, uptr addr, int info) -{ - if ( _Fd_ == 0 ) return; - //Console::WriteLn ("recVUMI_MINI_iq"); - - if (MINMAXFIX) - MINMAXlogical(VU, info, 1, 1, addr); - else - { - - if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping - vuFloat3(addr); - - if( _XYZW_SS ) { - if( EEREC_D == EEREC_TEMP ) { - _vuFlipRegSS(VU, EEREC_S); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_MINSS_M32_to_XMM(EEREC_D, addr); - _vuFlipRegSS(VU, EEREC_S); - - // have to flip over EEREC_D if computing flags! - //if( (info & PROCESS_VU_UPDATEFLAGS) ) - _vuFlipRegSS(VU, EEREC_D); - } - else if( EEREC_D == EEREC_S ) { - _vuFlipRegSS(VU, EEREC_D); - SSE_MINSS_M32_to_XMM(EEREC_D, addr); - _vuFlipRegSS(VU, EEREC_D); - } - else { - if( _X ) { - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_MINSS_M32_to_XMM(EEREC_D, addr); - } - else { - _vuMoveSS(VU, EEREC_TEMP, EEREC_S); - _vuFlipRegSS(VU, EEREC_D); - SSE_MINSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - _vuFlipRegSS(VU, EEREC_D); - } - } - } - else if (_X_Y_Z_W != 0xf) { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); - SSE_MINPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if(EEREC_D == EEREC_S) { - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); - SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - else { - SSE_MOVSS_M32_to_XMM(EEREC_D, addr); - SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_D, 0x00); - SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_S); - } - } - } -} - -void recVUMI_MINI_xyzw(VURegs *VU, int xyzw, int info) -{ - if ( _Fd_ == 0 ) return; - //Console::WriteLn ("recVUMI_MINI_xyzw"); - - if (_Fs_ == 0 && _Ft_ == 0) - { - if( _X_Y_Z_W == 0xf ) - { - //If VF0.w is the constant, the result will match VF0, else its all 0's - if(xyzw == 3) SSE_MOVAPS_M128_to_XMM(EEREC_D, (uptr)&VU->VF[0].UL[0]); - else SSE_XORPS_XMM_to_XMM(EEREC_D, EEREC_D); - } - else - { - //If VF0.w is the constant, the result will match VF0, else its all 0's - if(xyzw == 3) SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[0]); - else SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - return; - } - if (MINMAXFIX) - MINMAXlogical(VU, info, 1, 2, 0, xyzw); - else - { - if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping - if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); - - if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { - if( xyzw == 0 ) { - if( EEREC_D == EEREC_S ) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T); - else if( EEREC_D == EEREC_T ) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_S); - else { - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T); - } - } - else { - _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); - SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); - SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - } - else if (_X_Y_Z_W != 0xf) { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - SSE_MINPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - VU_MERGE_REGS(EEREC_D, EEREC_TEMP); - } - else { - if (EEREC_D == EEREC_S) { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); - SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - } - else { - _unpackVF_xyzw(EEREC_D, EEREC_T, xyzw); - SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_S); - } - } - } -} - -void recVUMI_MINIi(VURegs *VU, int info) { recVUMI_MINI_iq(VU, VU_VI_ADDR(REG_I, 1), info); } -void recVUMI_MINIx(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 0, info); } -void recVUMI_MINIy(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 1, info); } -void recVUMI_MINIz(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 2, info); } -void recVUMI_MINIw(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 3, info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// OPMULA -//------------------------------------------------------------------ -void recVUMI_OPMULA( VURegs *VU, int info ) -{ - //Console::WriteLn ("recVUMI_OPMULA"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, 0xE); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 0xE); - } - - SSE_MOVAPS_XMM_to_XMM( EEREC_TEMP, EEREC_S ); - SSE_SHUFPS_XMM_to_XMM( EEREC_T, EEREC_T, 0xD2 ); // EEREC_T = WYXZ - SSE_SHUFPS_XMM_to_XMM( EEREC_TEMP, EEREC_TEMP, 0xC9 ); // EEREC_TEMP = WXZY - SSE_MULPS_XMM_to_XMM( EEREC_TEMP, EEREC_T ); - - VU_MERGE_REGS_CUSTOM(EEREC_ACC, EEREC_TEMP, 14); - - // revert EEREC_T - if( EEREC_T != EEREC_ACC ) - SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xC9); - - recUpdateFlags(VU, EEREC_ACC, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// OPMSUB -//------------------------------------------------------------------ -void recVUMI_OPMSUB( VURegs *VU, int info ) -{ - //Console::WriteLn ("recVUMI_OPMSUB"); - if (CHECK_VU_EXTRA_OVERFLOW) { - if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, 0xE); - if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 0xE); - } - - if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xD2); // EEREC_T = WYXZ - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0xC9); // EEREC_TEMP = WXZY - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - - // negate and add - SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); - SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); - VU_MERGE_REGS_CUSTOM(EEREC_D, EEREC_TEMP, 14); - - // revert EEREC_T - if( EEREC_T != EEREC_D ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xC9); - - recUpdateFlags(VU, EEREC_D, info); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// NOP -//------------------------------------------------------------------ -void recVUMI_NOP( VURegs *VU, int info ) -{ - //Console::WriteLn ("recVUMI_NOP"); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// recVUMI_FTOI_Saturate() - Saturates result from FTOI Instructions -//------------------------------------------------------------------ -static const PCSX2_ALIGNED16(int rec_const_0x8000000[4]) = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; - -void recVUMI_FTOI_Saturate(int rec_s, int rec_t, int rec_tmp1, int rec_tmp2) -{ - //Console::WriteLn ("recVUMI_FTOI_Saturate"); - //Duplicate the xor'd sign bit to the whole value - //FFFF FFFF for positive, 0 for negative - SSE_MOVAPS_XMM_to_XMM(rec_tmp1, rec_s); - SSE2_PXOR_M128_to_XMM(rec_tmp1, (uptr)&const_clip[4]); - SSE2_PSRAD_I8_to_XMM(rec_tmp1, 31); - - //Create mask: 0 where !=8000 0000 - SSE_MOVAPS_XMM_to_XMM(rec_tmp2, rec_t); - SSE2_PCMPEQD_M128_to_XMM(rec_tmp2, (uptr)&const_clip[4]); - - //AND the mask w/ the edit values - SSE_ANDPS_XMM_to_XMM(rec_tmp1, rec_tmp2); - - //if v==8000 0000 && positive -> 8000 0000 + FFFF FFFF -> 7FFF FFFF - //if v==8000 0000 && negative -> 8000 0000 + 0 -> 8000 0000 - //if v!=8000 0000 -> v+0 (masked from the and) - - //Add the values as needed - SSE2_PADDD_XMM_to_XMM(rec_t, rec_tmp1); -} -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// FTOI 0/4/12/15 -//------------------------------------------------------------------ -static PCSX2_ALIGNED16(float FTIO_Temp1[4]) = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -static PCSX2_ALIGNED16(float FTIO_Temp2[4]) = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; -void recVUMI_FTOI0(VURegs *VU, int info) -{ - int t1reg, t2reg; // Temp XMM regs - - if ( _Ft_ == 0 ) return; - - //Console::WriteLn ("recVUMI_FTOI0"); - - if (_X_Y_Z_W != 0xf) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) - SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - - t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { // If theres a temp XMM reg available - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) - ; // Find unused reg (For second temp reg) - SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t2reg); // Backup XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp1); // Restore XMM reg - _freeXMMreg(t1reg); // Free temp reg - } - else { // No temp reg available - for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) - ; // Find unused reg (For first temp reg) - SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t1reg); // Backup t1reg XMM reg - - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) - ; // Find unused reg (For second temp reg) - SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp2, t2reg); // Backup t2reg XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)FTIO_Temp1); // Restore t1reg XMM reg - SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp2); // Restore t2reg XMM reg - } - - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } - else { - if (EEREC_T != EEREC_S) { - SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S); - vuFloat_useEAX( info, EEREC_T, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) - SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T); - - t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { // If theres a temp XMM reg available - recVUMI_FTOI_Saturate(EEREC_S, EEREC_T, EEREC_TEMP, t1reg); // Saturate if Float->Int conversion returned illegal result - _freeXMMreg(t1reg); // Free temp reg - } - else { // No temp reg available - for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) - ; // Find unused reg - SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t1reg); // Backup t1reg XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_T, EEREC_TEMP, t1reg); // Saturate if Float->Int conversion returned illegal result - - SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)FTIO_Temp1); // Restore t1reg XMM reg - } - } - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) - SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - - t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { // If theres a temp XMM reg available - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg)); t2reg++) - ; // Find unused reg (For second temp reg) - SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t2reg); // Backup XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp1); // Restore XMM reg - _freeXMMreg(t1reg); // Free temp reg - } - else { // No temp reg available - for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) - ; // Find unused reg (For first temp reg) - SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t1reg); // Backup t1reg XMM reg - - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) - ; // Find unused reg (For second temp reg) - SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp2, t2reg); // Backup t2reg XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)FTIO_Temp1); // Restore t1reg XMM reg - SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp2); // Restore t2reg XMM reg - } - - SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_TEMP); - } - } -} - -void recVUMI_FTOIX(VURegs *VU, int addr, int info) -{ - int t1reg, t2reg; // Temp XMM regs - - if ( _Ft_ == 0 ) return; - - //Console::WriteLn ("recVUMI_FTOIX"); - if (_X_Y_Z_W != 0xf) { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr); - vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) - SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - - t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { // If theres a temp XMM reg available - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg)); t2reg++) - ; // Find unused reg (For second temp reg) - SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t2reg); // Backup XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp1); // Restore XMM reg - _freeXMMreg(t1reg); // Free temp reg - } - else { // No temp reg available - for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) - ; // Find unused reg (For first temp reg) - SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t1reg); // Backup t1reg XMM reg - - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) - ; // Find unused reg (For second temp reg) - SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp2, t2reg); // Backup t2reg XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)FTIO_Temp1); // Restore t1reg XMM reg - SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp2); // Restore t2reg XMM reg - } - - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - } - else { - if (EEREC_T != EEREC_S) { - SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S); - SSE_MULPS_M128_to_XMM(EEREC_T, addr); - vuFloat_useEAX( info, EEREC_T, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) - SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T); - - t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { // If theres a temp XMM reg available - recVUMI_FTOI_Saturate(EEREC_S, EEREC_T, EEREC_TEMP, t1reg); // Saturate if Float->Int conversion returned illegal result - _freeXMMreg(t1reg); // Free temp reg - } - else { // No temp reg available - for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) - ; // Find unused reg - SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t1reg); // Backup t1reg XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_T, EEREC_TEMP, t1reg); // Saturate if Float->Int conversion returned illegal result - - SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)FTIO_Temp1); // Restore t1reg XMM reg - } - } - else { - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr); - vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) - SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - - t1reg = _vuGetTempXMMreg(info); - - if( t1reg >= 0 ) { // If theres a temp XMM reg available - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg)); t2reg++) - ; // Find unused reg (For second temp reg) - SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t2reg); // Backup XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp1); // Restore XMM reg - _freeXMMreg(t1reg); // Free temp reg - } - else { // No temp reg available - for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) - ; // Find unused reg (For first temp reg) - SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t1reg); // Backup t1reg XMM reg - - for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) - ; // Find unused reg (For second temp reg) - SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp2, t2reg); // Backup t2reg XMM reg - - recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result - - SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)FTIO_Temp1); // Restore t1reg XMM reg - SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp2); // Restore t2reg XMM reg - } - - SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_TEMP); - } - } -} - -void recVUMI_FTOI4( VURegs *VU, int info ) { recVUMI_FTOIX(VU, (uptr)&recMult_float_to_int4[0], info); } -void recVUMI_FTOI12( VURegs *VU, int info ) { recVUMI_FTOIX(VU, (uptr)&recMult_float_to_int12[0], info); } -void recVUMI_FTOI15( VURegs *VU, int info ) { recVUMI_FTOIX(VU, (uptr)&recMult_float_to_int15[0], info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// ITOF 0/4/12/15 -//------------------------------------------------------------------ -void recVUMI_ITOF0( VURegs *VU, int info ) -{ - if ( _Ft_ == 0 ) return; - - //Console::WriteLn ("recVUMI_ITOF0"); - if (_X_Y_Z_W != 0xf) { - SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - vuFloat_useEAX( info, EEREC_TEMP, 15); // Clamp infinities - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - xmmregs[EEREC_T].mode |= MODE_WRITE; - } - else { - SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S); - vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities - } -} - -void recVUMI_ITOFX(VURegs *VU, int addr, int info) -{ - if ( _Ft_ == 0 ) return; - - //Console::WriteLn ("recVUMI_ITOFX"); - if (_X_Y_Z_W != 0xf) { - SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr); - vuFloat_useEAX( info, EEREC_TEMP, 15); // Clamp infinities - VU_MERGE_REGS(EEREC_T, EEREC_TEMP); - xmmregs[EEREC_T].mode |= MODE_WRITE; - } - else { - SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S); - SSE_MULPS_M128_to_XMM(EEREC_T, addr); - vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities - } -} - -void recVUMI_ITOF4( VURegs *VU, int info ) { recVUMI_ITOFX(VU, (uptr)&recMult_int_to_float4[0], info); } -void recVUMI_ITOF12( VURegs *VU, int info ) { recVUMI_ITOFX(VU, (uptr)&recMult_int_to_float12[0], info); } -void recVUMI_ITOF15( VURegs *VU, int info ) { recVUMI_ITOFX(VU, (uptr)&recMult_int_to_float15[0], info); } -//------------------------------------------------------------------ - - -//------------------------------------------------------------------ -// CLIP -//------------------------------------------------------------------ -void recVUMI_CLIP(VURegs *VU, int info) -{ - int t1reg = EEREC_D; - int t2reg = EEREC_ACC; - int x86temp1, x86temp2; - - u32 clipaddr = VU_VI_ADDR(REG_CLIP_FLAG, 0); - u32 prevclipaddr = VU_VI_ADDR(REG_CLIP_FLAG, 2); - - if( clipaddr == 0 ) { // battle star has a clip right before fcset - Console::WriteLn("skipping vu clip"); - return; - } - - //Flush the clip flag before processing, incase of double clip commands (GoW) - - if( prevclipaddr != (uptr)&VU->VI[REG_CLIP_FLAG] ) { - MOV32MtoR(EAX, prevclipaddr); - MOV32RtoM((uptr)&VU->VI[REG_CLIP_FLAG], EAX); - } - - assert( clipaddr != 0 ); - assert( t1reg != t2reg && t1reg != EEREC_TEMP && t2reg != EEREC_TEMP ); - - x86temp1 = ALLOCTEMPX86(MODE_8BITREG); - x86temp2 = ALLOCTEMPX86(MODE_8BITREG); - - //if ( (x86temp1 == 0) || (x86temp2 == 0) ) Console::Error("VU CLIP Allocation Error: EAX being allocated!"); - - _freeXMMreg(t1reg); // These should have been freed at allocation in eeVURecompileCode() - _freeXMMreg(t2reg); // but if they've been used since then, then free them. (just doing this incase :p (cottonvibes)) - - if( _Ft_ == 0 ) { - SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)&s_fones[0]); // all 1s - SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)&s_fones[4]); - } - else { - _unpackVF_xyzw(EEREC_TEMP, EEREC_T, 3); - SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[0]); - SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_TEMP); - SSE_ORPS_M128_to_XMM(t1reg, (uptr)&const_clip[4]); - } - - MOV32MtoR(EAX, prevclipaddr); - - SSE_CMPNLEPS_XMM_to_XMM(t1reg, EEREC_S); //-w, -z, -y, -x - SSE_CMPLTPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); //+w, +z, +y, +x - - SHL32ItoR(EAX, 6); - - SSE_MOVAPS_XMM_to_XMM(t2reg, EEREC_TEMP); //t2 = +w, +z, +y, +x - SSE_UNPCKLPS_XMM_to_XMM(EEREC_TEMP, t1reg); //EEREC_TEMP = -y,+y,-x,+x - SSE_UNPCKHPS_XMM_to_XMM(t2reg, t1reg); //t2reg = -w,+w,-z,+z - SSE_MOVMSKPS_XMM_to_R32(x86temp2, EEREC_TEMP); // -y,+y,-x,+x - SSE_MOVMSKPS_XMM_to_R32(x86temp1, t2reg); // -w,+w,-z,+z - - AND8ItoR(x86temp1, 0x3); - SHL8ItoR(x86temp1, 4); - OR8RtoR(EAX, x86temp1); - AND8ItoR(x86temp2, 0xf); - OR8RtoR(EAX, x86temp2); - AND32ItoR(EAX, 0xffffff); - - MOV32RtoM(clipaddr, EAX); - - if (( !(info & (PROCESS_VU_SUPER|PROCESS_VU_COP2)) ) ) //Instantly update the flag if its called from elsewhere (unlikely, but ok) - MOV32RtoM((uptr)&VU->VI[REG_CLIP_FLAG], EAX); - - _freeX86reg(x86temp1); - _freeX86reg(x86temp2); -} +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "PrecompiledHeader.h" + +#include "Common.h" +#include "GS.h" +#include "R5900OpcodeTables.h" +#include "iR5900.h" +#include "iMMI.h" +#include "iFPU.h" +#include "iCOP0.h" +#include "VUmicro.h" +#include "VUflags.h" +#include "sVU_Micro.h" +#include "sVU_Debug.h" +#include "sVU_zerorec.h" +//------------------------------------------------------------------ +#define MINMAXFIX 1 +//------------------------------------------------------------------ +// Helper Macros +//------------------------------------------------------------------ +#define _Ft_ (( VU->code >> 16) & 0x1F) // The rt part of the instruction register +#define _Fs_ (( VU->code >> 11) & 0x1F) // The rd part of the instruction register +#define _Fd_ (( VU->code >> 6) & 0x1F) // The sa part of the instruction register + +#define _X (( VU->code>>24) & 0x1) +#define _Y (( VU->code>>23) & 0x1) +#define _Z (( VU->code>>22) & 0x1) +#define _W (( VU->code>>21) & 0x1) + +#define _XYZW_SS (_X+_Y+_Z+_W==1) + +#define _Fsf_ (( VU->code >> 21) & 0x03) +#define _Ftf_ (( VU->code >> 23) & 0x03) + +#define _Imm11_ (s32)(VU->code & 0x400 ? 0xfffffc00 | (VU->code & 0x3ff) : VU->code & 0x3ff) +#define _UImm11_ (s32)(VU->code & 0x7ff) + +#define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0] +#define VU_VFy_ADDR(x) (uptr)&VU->VF[x].UL[1] +#define VU_VFz_ADDR(x) (uptr)&VU->VF[x].UL[2] +#define VU_VFw_ADDR(x) (uptr)&VU->VF[x].UL[3] + +#define VU_REGR_ADDR (uptr)&VU->VI[REG_R] +#define VU_REGQ_ADDR (uptr)&VU->VI[REG_Q] +#define VU_REGMAC_ADDR (uptr)&VU->VI[REG_MAC_FLAG] + +#define VU_VI_ADDR(x, read) GetVIAddr(VU, x, read, info) + +#define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0] +#define VU_ACCy_ADDR (uptr)&VU->ACC.UL[1] +#define VU_ACCz_ADDR (uptr)&VU->ACC.UL[2] +#define VU_ACCw_ADDR (uptr)&VU->ACC.UL[3] + +#define _X_Y_Z_W ((( VU->code >> 21 ) & 0xF ) ) +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// Global Variables +//------------------------------------------------------------------ +static const PCSX2_ALIGNED16(int SSEmovMask[ 16 ][ 4 ]) = +{ + { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, + { 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF }, + { 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 }, + { 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF }, + { 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 }, + { 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF }, + { 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 }, + { 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }, + { 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 }, + { 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF }, + { 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 }, + { 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF }, + { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 }, + { 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF }, + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 }, + { 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF } +}; + +static const PCSX2_ALIGNED16(u32 const_abs_table[16][4]) = +{ + { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff }, //0000 + { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff }, //0001 + { 0xffffffff, 0xffffffff, 0x7fffffff, 0xffffffff }, //0010 + { 0xffffffff, 0xffffffff, 0x7fffffff, 0x7fffffff }, //0011 + { 0xffffffff, 0x7fffffff, 0xffffffff, 0xffffffff }, //0100 + { 0xffffffff, 0x7fffffff, 0xffffffff, 0x7fffffff }, //0101 + { 0xffffffff, 0x7fffffff, 0x7fffffff, 0xffffffff }, //0110 + { 0xffffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //0111 + { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }, //1000 + { 0x7fffffff, 0xffffffff, 0xffffffff, 0x7fffffff }, //1001 + { 0x7fffffff, 0xffffffff, 0x7fffffff, 0xffffffff }, //1010 + { 0x7fffffff, 0xffffffff, 0x7fffffff, 0x7fffffff }, //1011 + { 0x7fffffff, 0x7fffffff, 0xffffffff, 0xffffffff }, //1100 + { 0x7fffffff, 0x7fffffff, 0xffffffff, 0x7fffffff }, //1101 + { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0xffffffff }, //1110 + { 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff }, //1111 +}; + +static const PCSX2_ALIGNED16(float recMult_float_to_int4[4]) = { 16.0, 16.0, 16.0, 16.0 }; +static const PCSX2_ALIGNED16(float recMult_float_to_int12[4]) = { 4096.0, 4096.0, 4096.0, 4096.0 }; +static const PCSX2_ALIGNED16(float recMult_float_to_int15[4]) = { 32768.0, 32768.0, 32768.0, 32768.0 }; + +static const PCSX2_ALIGNED16(float recMult_int_to_float4[4]) = { 0.0625f, 0.0625f, 0.0625f, 0.0625f }; +static const PCSX2_ALIGNED16(float recMult_int_to_float12[4]) = { 0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625 }; +static const PCSX2_ALIGNED16(float recMult_int_to_float15[4]) = { 0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125 }; + +static const PCSX2_ALIGNED16(u32 VU_Underflow_Mask1[4]) = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; +static const PCSX2_ALIGNED16(u32 VU_Underflow_Mask2[4]) = {0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff}; +static const PCSX2_ALIGNED16(u32 VU_Zero_Mask[4]) = {0x00000000, 0x00000000, 0x00000000, 0x00000000}; +static const PCSX2_ALIGNED16(u32 VU_Zero_Helper_Mask[4]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff}; +static const PCSX2_ALIGNED16(u32 VU_Signed_Zero_Mask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000}; +static const PCSX2_ALIGNED16(u32 VU_Pos_Infinity[4]) = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000}; +static const PCSX2_ALIGNED16(u32 VU_Neg_Infinity[4]) = {0xff800000, 0xff800000, 0xff800000, 0xff800000}; +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// recUpdateFlags() - Computes the flags for the Upper Opcodes +// +// Note: Computes under/overflow flags if CHECK_VU_EXTRA_FLAGS is 1 +//------------------------------------------------------------------ +PCSX2_ALIGNED16(u64 TEMPXMMData[2]); +void recUpdateFlags(VURegs * VU, int reg, int info) +{ + static u8 *pjmp, *pjmp2; + static u32 *pjmp32; + static u32 macaddr, stataddr, prevstataddr; + static int x86macflag, x86statflag, x86temp; + static int t1reg, t1regBoolean; + static const int flipMask[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15}; + + if( !(info & PROCESS_VU_UPDATEFLAGS) ) { + if (CHECK_VU_EXTRA_OVERFLOW) { + if (reg != EEREC_TEMP) vuFloat2(reg, EEREC_TEMP, _X_Y_Z_W); + else vuFloat_useEAX(info, reg, _X_Y_Z_W); + } + return; + } + + //Console::WriteLn ("recUpdateFlags"); + + macaddr = VU_VI_ADDR(REG_MAC_FLAG, 0); + stataddr = VU_VI_ADDR(REG_STATUS_FLAG, 0); // write address + prevstataddr = VU_VI_ADDR(REG_STATUS_FLAG, 2); // previous address + + if( stataddr == 0 ) stataddr = prevstataddr; + if( macaddr == 0 ) { + Console::WriteLn( "VU ALLOCATION WARNING: Using Mac Flag Previous Address!" ); + macaddr = VU_VI_ADDR(REG_MAC_FLAG, 2); + } + + x86macflag = ALLOCTEMPX86(0); + x86statflag = ALLOCTEMPX86(0); + + if (reg == EEREC_TEMP) { + t1reg = _vuGetTempXMMreg(info); + if (t1reg < 0) { + //Console::WriteLn( "VU ALLOCATION ERROR: Temp reg can't be allocated!!!!" ); + t1reg = (reg == 0) ? 1 : 0; // Make t1reg != reg + SSE_MOVAPS_XMM_to_M128( (uptr)TEMPXMMData, t1reg ); // Backup data to temp address + t1regBoolean = 1; + } + else t1regBoolean = 0; + } + else { + t1reg = EEREC_TEMP; + t1regBoolean = 2; + } + + SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw + MOV32MtoR(x86statflag, prevstataddr); // Load the previous status in to x86statflag + AND16ItoR(x86statflag, 0xff0); // Keep Sticky and D/I flags + + + if (CHECK_VU_EXTRA_FLAGS) { // Checks all flags + + x86temp = ALLOCTEMPX86(0); + + //-------------------------Check for Overflow flags------------------------------ + + //SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg + //SSE_CMPUNORDPS_XMM_to_XMM(t1reg, reg); // If reg == NaN then set Vector to 0xFFFFFFFF + + //SSE_MOVAPS_XMM_to_XMM(t1reg, reg); + //SSE_MINPS_M128_to_XMM(t1reg, (uptr)g_maxvals); + //SSE_MAXPS_M128_to_XMM(t1reg, (uptr)g_minvals); + //SSE_CMPNEPS_XMM_to_XMM(t1reg, reg); // If they're not equal, then overflow has occured + + SSE_MOVAPS_XMM_to_XMM(t1reg, reg); + SSE_ANDPS_M128_to_XMM(t1reg, (uptr)VU_Zero_Helper_Mask); + SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)VU_Pos_Infinity); // If infinity, then overflow has occured (NaN's don't report as overflow) + + SSE_MOVMSKPS_XMM_to_R32(x86macflag, t1reg); // Move the sign bits of the previous calculation + + AND16ItoR(x86macflag, _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified) + pjmp = JZ8(0); // Skip if none are + OR16ItoR(x86statflag, 0x208); // OS, O flags + SHL16ItoR(x86macflag, 12); + if (_XYZW_SS) pjmp32 = JMP32(0); // Skip Underflow Check + x86SetJ8(pjmp); + + //-------------------------Check for Underflow flags------------------------------ + + SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg + + SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask1[ 0 ]); + SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg == zero exponent) then set Vector to 0xFFFFFFFF + + SSE_ANDPS_XMM_to_XMM(t1reg, reg); + SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask2[ 0 ]); + SSE_CMPNEPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg != zero mantisa) then set Vector to 0xFFFFFFFF + + SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation + + AND16ItoR(EAX, _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation + pjmp = JZ8(0); // Skip if none are + OR16ItoR(x86statflag, 0x104); // US, U flags + SHL16ItoR(EAX, 8); + OR32RtoR(x86macflag, EAX); + x86SetJ8(pjmp); + + //-------------------------Optional Code: Denormals Are Zero------------------------------ + if (CHECK_VU_UNDERFLOW) { // Sets underflow/denormals to zero + SSE_ANDNPS_XMM_to_XMM(t1reg, reg); // t1reg = !t1reg & reg (t1reg = denormals are positive zero) + VU_MERGE_REGS_SAFE(t1reg, reg, (15 - flipMask[_X_Y_Z_W])); // Send t1reg the vectors that shouldn't be modified (since reg was flipped, we need a mask to get the unmodified vectors) + // Now we have Denormals are Positive Zero in t1reg; the next two lines take Signed Zero into account + SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]); // Only keep the sign bit for each vector + SSE_ORPS_XMM_to_XMM(reg, t1reg); // Denormals are Signed Zero, and unmodified vectors stay the same! + } + + if (_XYZW_SS) x86SetJ32(pjmp32); // If we skipped the Underflow Flag Checking (when we had an Overflow), return here + + vuFloat2(reg, t1reg, flipMask[_X_Y_Z_W]); // Clamp overflowed vectors that were modified (remember reg's vectors have been flipped, so have to use a flipmask) + + //-------------------------Check for Signed flags------------------------------ + + // The following code makes sure the Signed Bit isn't set with Negative Zero + SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg + SSE_CMPEQPS_XMM_to_XMM(t1reg, reg); // Set all F's if each vector is zero + SSE_MOVMSKPS_XMM_to_R32(x86temp, t1reg); // Used for Zero Flag Calculation + SSE_ANDNPS_XMM_to_XMM(t1reg, reg); + + SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the t1reg + + AND16ItoR(EAX, _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation + pjmp = JZ8(0); // Skip if none are + OR16ItoR(x86statflag, 0x82); // SS, S flags + SHL16ItoR(EAX, 4); + OR32RtoR(x86macflag, EAX); + if (_XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking + x86SetJ8(pjmp); + + //-------------------------Check for Zero flags------------------------------ + + AND16ItoR(x86temp, _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation + pjmp = JZ8(0); // Skip if none are + OR16ItoR(x86statflag, 0x41); // ZS, Z flags + OR32RtoR(x86macflag, x86temp); + x86SetJ8(pjmp); + + _freeX86reg(x86temp); + } + else { // Only Checks for Sign and Zero Flags + + vuFloat2(reg, t1reg, flipMask[_X_Y_Z_W]); // Clamp overflowed vectors that were modified (remember reg's vectors have been flipped, so have to use a flipmask) + + //-------------------------Check for Signed flags------------------------------ + + // The following code makes sure the Signed Bit isn't set with Negative Zero + SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg + SSE_CMPEQPS_XMM_to_XMM(t1reg, reg); // Set all F's if each vector is zero + SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Used for Zero Flag Calculation + SSE_ANDNPS_XMM_to_XMM(t1reg, reg); + + SSE_MOVMSKPS_XMM_to_R32(x86macflag, t1reg); // Move the sign bits of the t1reg + + AND16ItoR(x86macflag, _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation + pjmp = JZ8(0); // Skip if none are + OR16ItoR(x86statflag, 0x82); // SS, S flags + SHL16ItoR(x86macflag, 4); + if (_XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking + x86SetJ8(pjmp); + + //-------------------------Check for Zero flags------------------------------ + + AND16ItoR(EAX, _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation + pjmp = JZ8(0); // Skip if none are + OR16ItoR(x86statflag, 0x41); // ZS, Z flags + OR32RtoR(x86macflag, EAX); + x86SetJ8(pjmp); + } + //-------------------------Finally: Send the Flags to the Mac Flag Address------------------------------ + + if (_XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here + + if (t1regBoolean == 2) SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip back reg to wzyx (have to do this because reg != EEREC_TEMP) + else if (t1regBoolean == 1) SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)TEMPXMMData ); // Restore data from temo address + else _freeXMMreg(t1reg); // Free temp reg + + MOV16RtoM(macaddr, x86macflag); + MOV16RtoM(stataddr, x86statflag); + + _freeX86reg(x86macflag); + _freeX86reg(x86statflag); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// Custom VU ADD/SUB routines by Nneeve +// +// Note: See FPU_ADD_SUB() for more info on what this is doing. +//------------------------------------------------------------------ +static PCSX2_ALIGNED16(u32 VU_addsuband[2][4]); +static PCSX2_ALIGNED16(u32 VU_addsub_reg[2][4]); + +static u32 tempECX; + +void VU_ADD_SUB(u32 regd, u32 regt, int is_sub, int info) +{ + u8 *localptr[4][8]; + + MOV32RtoM((uptr)&tempECX, ECX); + + int temp1 = ECX; //receives regd + int temp2 = ALLOCTEMPX86(0); + + if (temp2 == ECX) + { + temp2 = ALLOCTEMPX86(0); + _freeX86reg(ECX); + } + + SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[0][0], regd); + SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[1][0], regt); + + SSE2_PCMPEQB_XMM_to_XMM(regd, regd); + SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsuband[0][0], regd); + SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsuband[1][0], regd); + SSE_MOVAPS_M128_to_XMM(regd, (uptr)&VU_addsub_reg[0][0]); + + SSE2_PSLLD_I8_to_XMM(regd, 1); + SSE2_PSLLD_I8_to_XMM(regt, 1); + + SSE2_PSRLD_I8_to_XMM(regd, 24); + SSE2_PSRLD_I8_to_XMM(regt, 24); + + SSE2_PSUBD_XMM_to_XMM(regd, regt); + +#define PERFORM(i) \ + \ + SSE_PEXTRW_XMM_to_R32(temp1, regd, i*2); \ + MOVSX32R16toR(temp1, temp1); \ + CMP32ItoR(temp1, 25);\ + localptr[i][0] = JGE8(0);\ + CMP32ItoR(temp1, 0);\ + localptr[i][1] = JG8(0);\ + localptr[i][2] = JE8(0);\ + CMP32ItoR(temp1, -25);\ + localptr[i][3] = JLE8(0);\ + \ + NEG32R(temp1); \ + DEC32R(temp1);\ + MOV32ItoR(temp2, 0xffffffff); \ + SHL32CLtoR(temp2); \ + MOV32RtoM((uptr)&VU_addsuband[0][i], temp2);\ + localptr[i][4] = JMP8(0);\ + \ + x86SetJ8(localptr[i][0]);\ + MOV32ItoM((uptr)&VU_addsuband[1][i], 0x80000000);\ + localptr[i][5] = JMP8(0);\ + \ + x86SetJ8(localptr[i][1]);\ + DEC32R(temp1);\ + MOV32ItoR(temp2, 0xffffffff);\ + SHL32CLtoR(temp2); \ + MOV32RtoM((uptr)&VU_addsuband[1][i], temp2);\ + localptr[i][6] = JMP8(0);\ + \ + x86SetJ8(localptr[i][3]);\ + MOV32ItoM((uptr)&VU_addsuband[0][i], 0x80000000);\ + localptr[i][7] = JMP8(0);\ + \ + x86SetJ8(localptr[i][2]);\ + \ + x86SetJ8(localptr[i][4]);\ + x86SetJ8(localptr[i][5]);\ + x86SetJ8(localptr[i][6]);\ + x86SetJ8(localptr[i][7]); + + PERFORM(0); + PERFORM(1); + PERFORM(2); + PERFORM(3); +#undef PERFORM + + SSE_MOVAPS_M128_to_XMM(regd, (uptr)&VU_addsub_reg[0][0]); + SSE_MOVAPS_M128_to_XMM(regt, (uptr)&VU_addsub_reg[1][0]); + + SSE_ANDPS_M128_to_XMM(regd, (uptr)&VU_addsuband[0][0]); + SSE_ANDPS_M128_to_XMM(regt, (uptr)&VU_addsuband[1][0]); + + if (is_sub) SSE_SUBPS_XMM_to_XMM(regd, regt); + else SSE_ADDPS_XMM_to_XMM(regd, regt); + + SSE_MOVAPS_M128_to_XMM(regt, (uptr)&VU_addsub_reg[1][0]); + + _freeX86reg(temp2); + + MOV32MtoR(ECX, (uptr)&tempECX); +} + +void VU_ADD_SUB_SS(u32 regd, u32 regt, int is_sub, int is_mem, int info) +{ + u8 *localptr[8]; + u32 addrt = regt; //for case is_mem + + MOV32RtoM((uptr)&tempECX, ECX); + + int temp1 = ECX; //receives regd + int temp2 = ALLOCTEMPX86(0); + + if (temp2 == ECX) + { + temp2 = ALLOCTEMPX86(0); + _freeX86reg(ECX); + } + + SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[0][0], regd); + if (!is_mem) SSE_MOVAPS_XMM_to_M128((uptr)&VU_addsub_reg[1][0], regt); + + SSE2_MOVD_XMM_to_R(temp1, regd); + SHR32ItoR(temp1, 23); + + if (is_mem) { + MOV32MtoR(temp2, addrt); + MOV32RtoM((uptr)&VU_addsub_reg[1][0], temp2); + SHR32ItoR(temp2, 23); + } + else { + SSE2_MOVD_XMM_to_R(temp2, regt); + SHR32ItoR(temp2, 23); + } + + AND32ItoR(temp1, 0xff); + AND32ItoR(temp2, 0xff); + + SUB32RtoR(temp1, temp2); //temp1 = exponent difference + + CMP32ItoR(temp1, 25); + localptr[0] = JGE8(0); + CMP32ItoR(temp1, 0); + localptr[1] = JG8(0); + localptr[2] = JE8(0); + CMP32ItoR(temp1, -25); + localptr[3] = JLE8(0); + + NEG32R(temp1); + DEC32R(temp1); + MOV32ItoR(temp2, 0xffffffff); + SHL32CLtoR(temp2); + SSE2_PCMPEQB_XMM_to_XMM(regd, regd); + if (is_mem) { + SSE_PINSRW_R32_to_XMM(regd, temp2, 0); + SHR32ItoR(temp2, 16); + SSE_PINSRW_R32_to_XMM(regd, temp2, 1); + } + else { + SSE2_MOVD_R_to_XMM(regt, temp2); + SSE_MOVSS_XMM_to_XMM(regd, regt); + SSE2_PCMPEQB_XMM_to_XMM(regt, regt); + } + localptr[4] = JMP8(0); + + x86SetJ8(localptr[0]); + MOV32ItoR(temp2, 0x80000000); + if (is_mem) + AND32RtoM((uptr)&VU_addsub_reg[1][0], temp2); + else { + SSE2_PCMPEQB_XMM_to_XMM(regt, regt); + SSE2_MOVD_R_to_XMM(regd, temp2); + SSE_MOVSS_XMM_to_XMM(regt, regd); + } + SSE2_PCMPEQB_XMM_to_XMM(regd, regd); + localptr[5] = JMP8(0); + + x86SetJ8(localptr[1]); + DEC32R(temp1); + MOV32ItoR(temp2, 0xffffffff); + SHL32CLtoR(temp2); + if (is_mem) + AND32RtoM((uptr)&VU_addsub_reg[1][0], temp2); + else { + SSE2_PCMPEQB_XMM_to_XMM(regt, regt); + SSE2_MOVD_R_to_XMM(regd, temp2); + SSE_MOVSS_XMM_to_XMM(regt, regd); + } + SSE2_PCMPEQB_XMM_to_XMM(regd, regd); + localptr[6] = JMP8(0); + + x86SetJ8(localptr[3]); + MOV32ItoR(temp2, 0x80000000); + SSE2_PCMPEQB_XMM_to_XMM(regd, regd); + if (is_mem) { + SSE_PINSRW_R32_to_XMM(regd, temp2, 0); + SHR32ItoR(temp2, 16); + SSE_PINSRW_R32_to_XMM(regd, temp2, 1); + } + else { + SSE2_MOVD_R_to_XMM(regt, temp2); + SSE_MOVSS_XMM_to_XMM(regd, regt); + SSE2_PCMPEQB_XMM_to_XMM(regt, regt); + } + localptr[7] = JMP8(0); + + x86SetJ8(localptr[2]); + x86SetJ8(localptr[4]); + x86SetJ8(localptr[5]); + x86SetJ8(localptr[6]); + x86SetJ8(localptr[7]); + + if (is_mem) + { + SSE_ANDPS_M128_to_XMM(regd, (uptr)&VU_addsub_reg[0][0]); //regd contains mask + + if (is_sub) SSE_SUBSS_M32_to_XMM(regd, (uptr)&VU_addsub_reg[1][0]); + else SSE_ADDSS_M32_to_XMM(regd, (uptr)&VU_addsub_reg[1][0]); + } + else + { + SSE_ANDPS_M128_to_XMM(regd, (uptr)&VU_addsub_reg[0][0]); //regd contains mask + SSE_ANDPS_M128_to_XMM(regt, (uptr)&VU_addsub_reg[1][0]); //regt contains mask + + if (is_sub) SSE_SUBSS_XMM_to_XMM(regd, regt); + else SSE_ADDSS_XMM_to_XMM(regd, regt); + + SSE_MOVAPS_M128_to_XMM(regt, (uptr)&VU_addsub_reg[1][0]); + } + + _freeX86reg(temp2); + + MOV32MtoR(ECX, (uptr)&tempECX); +} + +void SSE_ADDPS_XMM_to_XMM_custom(int info, int regd, int regt) { + if (CHECK_VUADDSUBHACK) { + VU_ADD_SUB(regd, regt, 0, info); + } + else SSE_ADDPS_XMM_to_XMM(regd, regt); +} +void SSE_SUBPS_XMM_to_XMM_custom(int info, int regd, int regt) { + if (CHECK_VUADDSUBHACK) { + VU_ADD_SUB(regd, regt, 1, info); + } + else SSE_SUBPS_XMM_to_XMM(regd, regt); +} +void SSE_ADDSS_XMM_to_XMM_custom(int info, int regd, int regt) { + if (CHECK_VUADDSUBHACK) { + VU_ADD_SUB_SS(regd, regt, 0, 0, info); + } + else SSE_ADDSS_XMM_to_XMM(regd, regt); +} +void SSE_SUBSS_XMM_to_XMM_custom(int info, int regd, int regt) { + if (CHECK_VUADDSUBHACK) { + VU_ADD_SUB_SS(regd, regt, 1, 0, info); + } + else SSE_SUBSS_XMM_to_XMM(regd, regt); +} +void SSE_ADDSS_M32_to_XMM_custom(int info, int regd, int regt) { + if (CHECK_VUADDSUBHACK) { + VU_ADD_SUB_SS(regd, regt, 0, 1, info); + } + else SSE_ADDSS_M32_to_XMM(regd, regt); +} +void SSE_SUBSS_M32_to_XMM_custom(int info, int regd, int regt) { + if (CHECK_VUADDSUBHACK) { + VU_ADD_SUB_SS(regd, regt, 1, 1, info); + } + else SSE_SUBSS_M32_to_XMM(regd, regt); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// *VU Upper Instructions!* +// +// Note: * = Checked for errors by cottonvibes +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ABS* +//------------------------------------------------------------------ +void recVUMI_ABS(VURegs *VU, int info) +{ + //Console::WriteLn("recVUMI_ABS()"); + if ( (_Ft_ == 0) || (_X_Y_Z_W == 0) ) return; + + if ((_X_Y_Z_W == 0x8) || (_X_Y_Z_W == 0xf)) { + VU_MERGE_REGS(EEREC_T, EEREC_S); + SSE_ANDPS_M128_to_XMM(EEREC_T, (uptr)&const_abs_table[ _X_Y_Z_W ][ 0 ] ); + } + else { // Use a temp reg because VU_MERGE_REGS() modifies source reg! + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_abs_table[ _X_Y_Z_W ][ 0 ] ); + VU_MERGE_REGS(EEREC_T, EEREC_TEMP); + } +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ADD*, ADD_iq*, ADD_xyzw* +//------------------------------------------------------------------ +PCSX2_ALIGNED16(float s_two[4]) = {0,0,0,2}; +void recVUMI_ADD(VURegs *VU, int info) +{ + //Console::WriteLn("recVUMI_ADD()"); + if ( _X_Y_Z_W == 0 ) goto flagUpdate; // Don't do anything and just clear flags + if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); + + if ( _Fs_ == 0 && _Ft_ == 0 ) { // if adding VF00 with VF00, then the result is always 0,0,0,2 + if ( _X_Y_Z_W != 0xf ) { + SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)s_two); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + else SSE_MOVAPS_M128_to_XMM(EEREC_D, (uptr)s_two); + } + else { + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); + } + if( _X_Y_Z_W == 8 ) { // If only adding x, then we can do a Scalar Add + if (EEREC_D == EEREC_S) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T); + else if (EEREC_D == EEREC_T) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_S); + else { + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T); + } + } + else if (_X_Y_Z_W != 0xf) { // If xyzw != 1111, then we have to use a temp reg + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + else { // All xyzw being modified (xyzw == 1111) + if (EEREC_D == EEREC_S) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_T); + else if (EEREC_D == EEREC_T) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_T); + } + } + } +flagUpdate: + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_ADD_iq(VURegs *VU, uptr addr, int info) +{ + //Console::WriteLn("recVUMI_ADD_iq()"); + if ( _X_Y_Z_W == 0 ) goto flagUpdate; + if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); + if (CHECK_VU_EXTRA_OVERFLOW) { + vuFloat3(addr); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + } + + if ( _XYZW_SS ) { + if ( EEREC_D == EEREC_TEMP ) { + _vuFlipRegSS(VU, EEREC_S); + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_ADDSS_M32_to_XMM(EEREC_D, addr); + _vuFlipRegSS(VU, EEREC_S); + _vuFlipRegSS(VU, EEREC_D); // have to flip over EEREC_D for computing flags! + } + else if ( EEREC_D == EEREC_S ) { + _vuFlipRegSS(VU, EEREC_D); + SSE_ADDSS_M32_to_XMM(EEREC_D, addr); + _vuFlipRegSS(VU, EEREC_D); + } + else { + if ( _X ) { + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_ADDSS_M32_to_XMM_custom(info, EEREC_D, addr); + } + else { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); + SSE_ADDPS_XMM_to_XMM_custom(info, EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + } + } + else { + if ( (_X_Y_Z_W != 0xf) || (EEREC_D == EEREC_S) || (EEREC_D == EEREC_TEMP) ) { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); + } + + if (_X_Y_Z_W != 0xf) { + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + else { + if ( EEREC_D == EEREC_TEMP ) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); + else if ( EEREC_D == EEREC_S ) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + else { + SSE_MOVSS_M32_to_XMM(EEREC_D, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_D, 0x00); + SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); + } + } + } +flagUpdate: + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_ADD_xyzw(VURegs *VU, int xyzw, int info) +{ + //Console::WriteLn("recVUMI_ADD_xyzw()"); + if ( _X_Y_Z_W == 0 ) goto flagUpdate; + if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); + } + + if ( _Ft_ == 0 && xyzw < 3 ) { // just move since adding zero + if ( _X_Y_Z_W == 0x8 ) { VU_MERGE_REGS(EEREC_D, EEREC_S); } + else if ( _X_Y_Z_W != 0xf ) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + else SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); + } + else if ( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP) ) { + if ( xyzw == 0 ) { + if ( EEREC_D == EEREC_T ) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_S); + else { + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T); + } + } + else { + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } + } + else if( _Fs_ == 0 && !_W ) { // just move + _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + else { + if ( _X_Y_Z_W != 0xf ) { + _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + else { + if( EEREC_D == EEREC_TEMP ) { _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); } + else if( EEREC_D == EEREC_S ) { _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); } + else { _unpackVF_xyzw(EEREC_D, EEREC_T, xyzw); SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); } + } + } +flagUpdate: + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_ADDi(VURegs *VU, int info) { recVUMI_ADD_iq(VU, VU_VI_ADDR(REG_I, 1), info); } +void recVUMI_ADDq(VURegs *VU, int info) { recVUMI_ADD_iq(VU, VU_REGQ_ADDR, info); } +void recVUMI_ADDx(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 0, info); } +void recVUMI_ADDy(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 1, info); } +void recVUMI_ADDz(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 2, info); } +void recVUMI_ADDw(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 3, info); } +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ADDA*, ADDA_iq*, ADDA_xyzw* +//------------------------------------------------------------------ +void recVUMI_ADDA(VURegs *VU, int info) +{ + //Console::WriteLn("recVUMI_ADDA()"); + if ( _X_Y_Z_W == 0 ) goto flagUpdate; + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); + } + + if( _X_Y_Z_W == 8 ) { + if (EEREC_ACC == EEREC_S) SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_T); // Can this case happen? (cottonvibes) + else if (EEREC_ACC == EEREC_T) SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_S); // Can this case happen? + else { + SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); + SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_T); + } + } + else if (_X_Y_Z_W != 0xf) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + + VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); + } + else { + if( EEREC_ACC == EEREC_S ) SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_T); // Can this case happen? + else if( EEREC_ACC == EEREC_T ) SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_S); // Can this case happen? + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_S); + SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_T); + } + } +flagUpdate: + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_ADDA_iq(VURegs *VU, uptr addr, int info) +{ + //Console::WriteLn("recVUMI_ADDA_iq()"); + if ( _X_Y_Z_W == 0 ) goto flagUpdate; + if (CHECK_VU_EXTRA_OVERFLOW) { + vuFloat3(addr); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + } + + if( _XYZW_SS ) { + assert( EEREC_ACC != EEREC_TEMP ); + if( EEREC_ACC == EEREC_S ) { + _vuFlipRegSS(VU, EEREC_ACC); + SSE_ADDSS_M32_to_XMM(EEREC_ACC, addr); + _vuFlipRegSS(VU, EEREC_ACC); + } + else { + if( _X ) { + SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); + SSE_ADDSS_M32_to_XMM(EEREC_ACC, addr); + } + else { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); + } + } + } + else { + if( _X_Y_Z_W != 0xf || EEREC_ACC == EEREC_S ) { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); + } + + if (_X_Y_Z_W != 0xf) { + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); + } + else { + if( EEREC_ACC == EEREC_S ) SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); + else { + SSE_MOVSS_M32_to_XMM(EEREC_ACC, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_ACC, EEREC_ACC, 0x00); + SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_S); + } + } + } +flagUpdate: + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_ADDA_xyzw(VURegs *VU, int xyzw, int info) +{ + //Console::WriteLn("recVUMI_ADDA_xyzw()"); + if ( _X_Y_Z_W == 0 ) goto flagUpdate; + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); + } + + if( _X_Y_Z_W == 8 ) { + assert( EEREC_ACC != EEREC_T ); + if( xyzw == 0 ) { + SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); + SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_T); + } + else { + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); + if( _Fs_ == 0 ) { + SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); + } + else { + SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); + SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); + } + } + } + else { + if( _X_Y_Z_W != 0xf || EEREC_ACC == EEREC_S ) + _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); + + if (_X_Y_Z_W != 0xf) { + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); + } + else { + if( EEREC_ACC == EEREC_S ) SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); + else { + _unpackVF_xyzw(EEREC_ACC, EEREC_T, xyzw); + SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_S); + } + } + } +flagUpdate: + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_ADDAi(VURegs *VU, int info) { recVUMI_ADDA_iq(VU, VU_VI_ADDR(REG_I, 1), info); } +void recVUMI_ADDAq(VURegs *VU, int info) { recVUMI_ADDA_iq(VU, VU_REGQ_ADDR, info); } +void recVUMI_ADDAx(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 0, info); } +void recVUMI_ADDAy(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 1, info); } +void recVUMI_ADDAz(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 2, info); } +void recVUMI_ADDAw(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 3, info); } +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// SUB*, SUB_iq*, SUB_xyzw* +//------------------------------------------------------------------ +void recVUMI_SUB(VURegs *VU, int info) +{ + //Console::WriteLn("recVUMI_SUB()"); + if ( _X_Y_Z_W == 0 ) goto flagUpdate; + if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); + + if( EEREC_S == EEREC_T ) { + if (_X_Y_Z_W != 0xf) SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&SSEmovMask[15-_X_Y_Z_W][0]); + else SSE_XORPS_XMM_to_XMM(EEREC_D, EEREC_D); + } + else if( _X_Y_Z_W == 8 ) { + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); + } + if (EEREC_D == EEREC_S) { + if (_Ft_) SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_T); + } + else if (EEREC_D == EEREC_T) { + if (_Ft_) { + SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_SUBSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } + else SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + } + else { + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + if (_Ft_) SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_T); + } + } + else { + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); + } + if (_X_Y_Z_W != 0xf) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if( ( _Ft_ > 0 ) || _W ) SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + else { + if (EEREC_D == EEREC_S) SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_T); + else if (EEREC_D == EEREC_T) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_T); + } + } + } +flagUpdate: + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_SUB_iq(VURegs *VU, uptr addr, int info) +{ + //Console::WriteLn("recVUMI_SUB_iq()"); + if ( _X_Y_Z_W == 0 ) goto flagUpdate; + if (CHECK_VU_EXTRA_OVERFLOW) { + vuFloat3(addr); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + } + if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); + + if( _XYZW_SS ) { + if( EEREC_D == EEREC_TEMP ) { + _vuFlipRegSS(VU, EEREC_S); + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_SUBSS_M32_to_XMM(EEREC_D, addr); + _vuFlipRegSS(VU, EEREC_S); + _vuFlipRegSS(VU, EEREC_D); + } + else if( EEREC_D == EEREC_S ) { + _vuFlipRegSS(VU, EEREC_D); + SSE_SUBSS_M32_to_XMM(EEREC_D, addr); + _vuFlipRegSS(VU, EEREC_D); + } + else { + if( _X ) { + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_SUBSS_M32_to_XMM(EEREC_D, addr); + } + else { + _vuMoveSS(VU, EEREC_TEMP, EEREC_S); + _vuFlipRegSS(VU, EEREC_D); + SSE_SUBSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + _vuFlipRegSS(VU, EEREC_D); + } + } + } + else { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); + + if (_X_Y_Z_W != 0xf) { + int t1reg = _vuGetTempXMMreg(info); + + if( t1reg >= 0 ) { + SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_S); + SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP); + + VU_MERGE_REGS(EEREC_D, t1reg); + _freeXMMreg(t1reg); + } + else { + // negate + SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + } + else { + if( EEREC_D == EEREC_TEMP ) { + SSE_XORPS_M128_to_XMM(EEREC_D, (uptr)&const_clip[4]); + SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); + } + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } + } + } +flagUpdate: + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_SUB_xyzw(VURegs *VU, int xyzw, int info) +{ + //Console::WriteLn("recVUMI_SUB_xyzw()"); + if ( _X_Y_Z_W == 0 ) goto flagUpdate; + if ( !_Fd_ ) info = (info & ~PROCESS_EE_SET_D(0xf)) | PROCESS_EE_SET_D(EEREC_TEMP); + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); + } + + if ( _X_Y_Z_W == 8 ) { + if ( (xyzw == 0) && (_Ft_ == _Fs_) ) { + SSE_ANDPS_M128_to_XMM(EEREC_D, (uptr)&SSEmovMask[7][0]); + } + else if ( EEREC_D == EEREC_TEMP ) { + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + if ( (_Ft_ > 0) || (xyzw == 3) ) { + _vuFlipRegSS_xyzw(EEREC_T, xyzw); + SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_T); + _vuFlipRegSS_xyzw(EEREC_T, xyzw); + } + } + else { + if ( (_Ft_ > 0) || (xyzw == 3) ) { + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } + else SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + } + } + else { + _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); + + if (_X_Y_Z_W != 0xf) { + int t1reg = _vuGetTempXMMreg(info); + + if( t1reg >= 0 ) { + SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_S); + SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP); + + VU_MERGE_REGS(EEREC_D, t1reg); + _freeXMMreg(t1reg); + } + else { + // negate + SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + } + else { + if( EEREC_D == EEREC_TEMP ) { + SSE_XORPS_M128_to_XMM(EEREC_D, (uptr)&const_clip[4]); + SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S); + } + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } + } + } +flagUpdate: + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_SUBi(VURegs *VU, int info) { recVUMI_SUB_iq(VU, VU_VI_ADDR(REG_I, 1), info); } +void recVUMI_SUBq(VURegs *VU, int info) { recVUMI_SUB_iq(VU, VU_REGQ_ADDR, info); } +void recVUMI_SUBx(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 0, info); } +void recVUMI_SUBy(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 1, info); } +void recVUMI_SUBz(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 2, info); } +void recVUMI_SUBw(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 3, info); } +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// SUBA*, SUBA_iq, SUBA_xyzw +//------------------------------------------------------------------ +void recVUMI_SUBA(VURegs *VU, int info) +{ + //Console::WriteLn("recVUMI_SUBA()"); + if ( _X_Y_Z_W == 0 ) goto flagUpdate; + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); + } + + if( EEREC_S == EEREC_T ) { + if (_X_Y_Z_W != 0xf) SSE_ANDPS_M128_to_XMM(EEREC_ACC, (uptr)&SSEmovMask[15-_X_Y_Z_W][0]); + else SSE_XORPS_XMM_to_XMM(EEREC_ACC, EEREC_ACC); + } + else if( _X_Y_Z_W == 8 ) { + if (EEREC_ACC == EEREC_S) SSE_SUBSS_XMM_to_XMM(EEREC_ACC, EEREC_T); + else if (EEREC_ACC == EEREC_T) { + SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_SUBSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); + } + else { + SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); + SSE_SUBSS_XMM_to_XMM(EEREC_ACC, EEREC_T); + } + } + else if (_X_Y_Z_W != 0xf) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + + VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); + } + else { + if( EEREC_ACC == EEREC_S ) SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_T); + else if( EEREC_ACC == EEREC_T ) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); + } + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_S); + SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_T); + } + } +flagUpdate: + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_SUBA_iq(VURegs *VU, uptr addr, int info) +{ + //Console::WriteLn ("recVUMI_SUBA_iq"); + if (CHECK_VU_EXTRA_OVERFLOW) { + vuFloat3(addr); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + } + + if( _XYZW_SS ) { + if( EEREC_ACC == EEREC_S ) { + _vuFlipRegSS(VU, EEREC_ACC); + SSE_SUBSS_M32_to_XMM(EEREC_ACC, addr); + _vuFlipRegSS(VU, EEREC_ACC); + } + else { + if( _X ) { + SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); + SSE_SUBSS_M32_to_XMM(EEREC_ACC, addr); + } + else { + _vuMoveSS(VU, EEREC_TEMP, EEREC_S); + _vuFlipRegSS(VU, EEREC_ACC); + SSE_SUBSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); + _vuFlipRegSS(VU, EEREC_ACC); + } + } + } + else { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); + + if (_X_Y_Z_W != 0xf) { + int t1reg = _vuGetTempXMMreg(info); + + if( t1reg >= 0 ) { + SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_S); + SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP); + + VU_MERGE_REGS(EEREC_ACC, t1reg); + _freeXMMreg(t1reg); + } + else { + // negate + SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); + } + } + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_S); + SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); + } + } + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_SUBA_xyzw(VURegs *VU, int xyzw, int info) +{ + //Console::WriteLn ("recVUMI_SUBA_xyzw"); + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); + } + + if( _X_Y_Z_W == 8 ) { + if( xyzw == 0 ) { + SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); + SSE_SUBSS_XMM_to_XMM(EEREC_ACC, EEREC_T); + } + else { + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); + SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S); + SSE_SUBSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); + } + } + else { + _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); + + if (_X_Y_Z_W != 0xf) { + int t1reg = _vuGetTempXMMreg(info); + + if( t1reg >= 0 ) { + SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_S); + SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP); + + VU_MERGE_REGS(EEREC_ACC, t1reg); + _freeXMMreg(t1reg); + } + else { + // negate + SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP); + } + } + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_S); + SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP); + } + } + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_SUBAi(VURegs *VU, int info) { recVUMI_SUBA_iq(VU, VU_VI_ADDR(REG_I, 1), info); } +void recVUMI_SUBAq(VURegs *VU, int info) { recVUMI_SUBA_iq(VU, VU_REGQ_ADDR, info); } +void recVUMI_SUBAx(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 0, info); } +void recVUMI_SUBAy(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 1, info); } +void recVUMI_SUBAz(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 2, info); } +void recVUMI_SUBAw(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 3, info); } +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MUL +//------------------------------------------------------------------ +void recVUMI_MUL_toD(VURegs *VU, int regd, int info) +{ + //Console::WriteLn ("recVUMI_MUL_toD"); + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); + } + + if (_X_Y_Z_W == 1 && (_Ft_ == 0 || _Fs_==0) ) { // W + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, _Ft_ ? EEREC_T : EEREC_S); + VU_MERGE_REGS(regd, EEREC_TEMP); + } + else if( _Fd_ == _Fs_ && _Fs_ == _Ft_ && _XYZW_SS ) { + _vuFlipRegSS(VU, EEREC_D); + SSE_MULSS_XMM_to_XMM(EEREC_D, EEREC_D); + _vuFlipRegSS(VU, EEREC_D); + } + else if( _X_Y_Z_W == 8 ) { + if (regd == EEREC_S) SSE_MULSS_XMM_to_XMM(regd, EEREC_T); + else if (regd == EEREC_T) SSE_MULSS_XMM_to_XMM(regd, EEREC_S); + else { + SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); + SSE_MULSS_XMM_to_XMM(regd, EEREC_T); + } + } + else if (_X_Y_Z_W != 0xf) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + + VU_MERGE_REGS(regd, EEREC_TEMP); + } + else { + if (regd == EEREC_S) SSE_MULPS_XMM_to_XMM(regd, EEREC_T); + else if (regd == EEREC_T) SSE_MULPS_XMM_to_XMM(regd, EEREC_S); + else { + SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S); + SSE_MULPS_XMM_to_XMM(regd, EEREC_T); + } + } +} + +void recVUMI_MUL_iq_toD(VURegs *VU, uptr addr, int regd, int info) +{ + //Console::WriteLn ("recVUMI_MUL_iq_toD"); + if (CHECK_VU_EXTRA_OVERFLOW) { + vuFloat3(addr); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + } + + if( _XYZW_SS ) { + if( regd == EEREC_TEMP ) { + _vuFlipRegSS(VU, EEREC_S); + SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); + SSE_MULSS_M32_to_XMM(regd, addr); + _vuFlipRegSS(VU, EEREC_S); + _vuFlipRegSS(VU, regd); + } + else if( regd == EEREC_S ) { + _vuFlipRegSS(VU, regd); + SSE_MULSS_M32_to_XMM(regd, addr); + _vuFlipRegSS(VU, regd); + } + else { + if( _X ) { + SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); + SSE_MULSS_M32_to_XMM(regd, addr); + } + else { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(regd, EEREC_TEMP); + } + } + } + else { + if( _X_Y_Z_W != 0xf || regd == EEREC_TEMP || regd == EEREC_S ) { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); + } + + if (_X_Y_Z_W != 0xf) { + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(regd, EEREC_TEMP); + } + else { + if( regd == EEREC_TEMP ) SSE_MULPS_XMM_to_XMM(regd, EEREC_S); + else if (regd == EEREC_S) SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP); + else { + SSE_MOVSS_M32_to_XMM(regd, addr); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x00); + SSE_MULPS_XMM_to_XMM(regd, EEREC_S); + } + } + } +} + +void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) +{ + //Console::WriteLn ("recVUMI_MUL_xyzw_toD"); + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); + } + if (_Fs_) { // This is needed for alot of games; so always clamp this operand + if (CHECK_VU_SIGN_OVERFLOW) vFloats4_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set + else vFloats2[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set + } + if( _Ft_ == 0 ) { + if( xyzw < 3 ) { + if (_X_Y_Z_W != 0xf) { + SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); + VU_MERGE_REGS(regd, EEREC_TEMP); + } + else SSE_XORPS_XMM_to_XMM(regd, regd); + } + else { + assert(xyzw==3); + if (_X_Y_Z_W != 0xf) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(regd, EEREC_TEMP); + } + else SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S); + } + } + else if( _X_Y_Z_W == 8 ) { + if( regd == EEREC_TEMP ) { + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); + SSE_MULSS_XMM_to_XMM(regd, EEREC_S); + } + else { + if( xyzw == 0 ) { + if( regd == EEREC_T ) { + SSE_MULSS_XMM_to_XMM(regd, EEREC_S); + } + else { + SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); + SSE_MULSS_XMM_to_XMM(regd, EEREC_T); + } + } + else { + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); + SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); + SSE_MULSS_XMM_to_XMM(regd, EEREC_TEMP); + } + } + } + else { + if( _X_Y_Z_W != 0xf || regd == EEREC_TEMP || regd == EEREC_S ) + _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); + + if (_X_Y_Z_W != 0xf) { + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(regd, EEREC_TEMP); + } + else { + if( regd == EEREC_TEMP ) SSE_MULPS_XMM_to_XMM(regd, EEREC_S); + else if (regd == EEREC_S) SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP); + else { + _unpackVF_xyzw(regd, EEREC_T, xyzw); + SSE_MULPS_XMM_to_XMM(regd, EEREC_S); + } + } + } +} + +void recVUMI_MUL(VURegs *VU, int info) +{ + //Console::WriteLn ("recVUMI_MUL"); + if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); + recVUMI_MUL_toD(VU, EEREC_D, info); + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_MUL_iq(VURegs *VU, int addr, int info) +{ + //Console::WriteLn ("recVUMI_MUL_iq"); + if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); + recVUMI_MUL_iq_toD(VU, addr, EEREC_D, info); + recUpdateFlags(VU, EEREC_D, info); + // spacefisherman needs overflow checking on MULi.z +} + +void recVUMI_MUL_xyzw(VURegs *VU, int xyzw, int info) +{ + //Console::WriteLn ("recVUMI_MUL_xyzw"); + if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); + recVUMI_MUL_xyzw_toD(VU, xyzw, EEREC_D, info); + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_MULi(VURegs *VU, int info) { recVUMI_MUL_iq(VU, VU_VI_ADDR(REG_I, 1), info); } +void recVUMI_MULq(VURegs *VU, int info) { recVUMI_MUL_iq(VU, VU_REGQ_ADDR, info); } +void recVUMI_MULx(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 0, info); } +void recVUMI_MULy(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 1, info); } +void recVUMI_MULz(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 2, info); } +void recVUMI_MULw(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 3, info); } +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MULA +//------------------------------------------------------------------ +void recVUMI_MULA( VURegs *VU, int info ) +{ + //Console::WriteLn ("recVUMI_MULA"); + recVUMI_MUL_toD(VU, EEREC_ACC, info); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MULA_iq(VURegs *VU, int addr, int info) +{ + //Console::WriteLn ("recVUMI_MULA_iq"); + recVUMI_MUL_iq_toD(VU, addr, EEREC_ACC, info); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MULA_xyzw(VURegs *VU, int xyzw, int info) +{ + //Console::WriteLn ("recVUMI_MULA_xyzw"); + recVUMI_MUL_xyzw_toD(VU, xyzw, EEREC_ACC, info); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MULAi(VURegs *VU, int info) { recVUMI_MULA_iq(VU, VU_VI_ADDR(REG_I, 1), info); } +void recVUMI_MULAq(VURegs *VU, int info) { recVUMI_MULA_iq(VU, VU_REGQ_ADDR, info); } +void recVUMI_MULAx(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 0, info); } +void recVUMI_MULAy(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 1, info); } +void recVUMI_MULAz(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 2, info); } +void recVUMI_MULAw(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 3, info); } +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MADD +//------------------------------------------------------------------ +void recVUMI_MADD_toD(VURegs *VU, int regd, int info) +{ + //Console::WriteLn ("recVUMI_MADD_toD"); + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); + vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); + } + + + if( _X_Y_Z_W == 8 ) { + if( regd == EEREC_ACC ) { + SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } + SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP); + } + else if (regd == EEREC_T) { + SSE_MULSS_XMM_to_XMM(regd, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } + SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); + } + else if (regd == EEREC_S) { + SSE_MULSS_XMM_to_XMM(regd, EEREC_T); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } + SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); + } + else { + SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); + SSE_MULSS_XMM_to_XMM(regd, EEREC_T); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } + SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); + } + } + else if (_X_Y_Z_W != 0xf) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); + + VU_MERGE_REGS(regd, EEREC_TEMP); + } + else { + if( regd == EEREC_ACC ) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP); + } + else if (regd == EEREC_T) { + SSE_MULPS_XMM_to_XMM(regd, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); + } + else if (regd == EEREC_S) { + SSE_MULPS_XMM_to_XMM(regd, EEREC_T); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); + } + else { + SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S); + SSE_MULPS_XMM_to_XMM(regd, EEREC_T); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); + } + } +} + +void recVUMI_MADD_iq_toD(VURegs *VU, uptr addr, int regd, int info) +{ + //Console::WriteLn ("recVUMI_MADD_iq_toD"); + if (CHECK_VU_EXTRA_OVERFLOW) { + vuFloat3(addr); + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); + } + + if( _X_Y_Z_W == 8 ) { + if( _Fs_ == 0 ) { + // do nothing if regd == ACC (ACCx <= ACCx + 0.0 * *addr) + if( regd != EEREC_ACC ) { + SSE_MOVSS_XMM_to_XMM(regd, EEREC_ACC); + } + return; + } + + if( regd == EEREC_ACC ) { + assert( EEREC_TEMP < iREGCNT_XMM ); + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } + SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP); + } + else if( regd == EEREC_S ) { + SSE_MULSS_M32_to_XMM(regd, addr); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); + } + else { + SSE_MOVSS_XMM_to_XMM(regd, EEREC_S); + SSE_MULSS_M32_to_XMM(regd, addr); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); + } + } + else { + if( _Fs_ == 0 ) { + if( regd == EEREC_ACC ) { // ACCxyz is unchanged, ACCw <= ACCw + *addr + if( _W ) { // if _W is zero, do nothing + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); // { *addr, 0, 0, 0 } + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x27); // { 0, 0, 0, *addr } + SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP); // { ACCx, ACCy, ACCz, ACCw + *addr } + } + } + else { // DESTxyz <= ACCxyz, DESTw <= ACCw + *addr + if( _W ) { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); // { *addr, 0, 0, 0 } + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x27); // { 0, 0, 0, *addr } + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); // { ACCx, ACCy, ACCz, ACCw + *addr } + } + else SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); + VU_MERGE_REGS(regd, EEREC_TEMP); + } + + return; + } + + if( _X_Y_Z_W != 0xf || regd == EEREC_ACC || regd == EEREC_TEMP || regd == EEREC_S ) { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); + } + + if (_X_Y_Z_W != 0xf) { + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); + + VU_MERGE_REGS(regd, EEREC_TEMP); + } + else { + if( regd == EEREC_ACC ) { + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP); + } + else if( regd == EEREC_S ) { + SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); + } + else if( regd == EEREC_TEMP ) { + SSE_MULPS_XMM_to_XMM(regd, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); + } + else { + SSE_MOVSS_M32_to_XMM(regd, addr); + SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x00); + SSE_MULPS_XMM_to_XMM(regd, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); + } + } + } +} + +void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info) +{ + //Console::WriteLn ("recVUMI_MADD_xyzw_toD"); + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); + vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); + } + if (_Fs_) { // This is needed for alot of games; so always clamp this operand + if (CHECK_VU_SIGN_OVERFLOW) vFloats4_useEAX[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set + else vFloats2[_X_Y_Z_W]( EEREC_S, EEREC_TEMP ); // Always clamp EEREC_S, regardless if CHECK_VU_OVERFLOW is set + } + if( _Ft_ == 0 ) { + + if( xyzw == 3 ) { + // just add + if( _X_Y_Z_W == 8 ) { + if( regd == EEREC_S ) SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); + else { + SSE_MOVSS_XMM_to_XMM(regd, EEREC_ACC); + SSE_ADDSS_XMM_to_XMM(regd, EEREC_S); + } + } + else { + if( _X_Y_Z_W != 0xf ) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); + + VU_MERGE_REGS(regd, EEREC_TEMP); + } + else { + if( regd == EEREC_S ) SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); + else { + SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC); + SSE_ADDPS_XMM_to_XMM(regd, EEREC_S); + } + } + } + } + else { + // just move acc to regd + if( _X_Y_Z_W != 0xf ) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); + VU_MERGE_REGS(regd, EEREC_TEMP); + } + else SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC); + } + + return; + } + + if( _X_Y_Z_W == 8 ) { + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); + + if( regd == EEREC_ACC ) { + SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } + SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP); + } + else if( regd == EEREC_S ) { + SSE_MULSS_XMM_to_XMM(regd, EEREC_TEMP); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } + SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); + } + else if( regd == EEREC_TEMP ) { + SSE_MULSS_XMM_to_XMM(regd, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, 8); } + SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC); + } + else { + SSE_MOVSS_XMM_to_XMM(regd, EEREC_ACC); + SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, 8); } + SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP); + } + } + else { + if( _X_Y_Z_W != 0xf || regd == EEREC_ACC || regd == EEREC_TEMP || regd == EEREC_S ) { + _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); + } + + if (_X_Y_Z_W != 0xf) { + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); + + VU_MERGE_REGS(regd, EEREC_TEMP); + } + else { + if( regd == EEREC_ACC ) { + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP); + } + else if( regd == EEREC_S ) { + SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); + } + else if( regd == EEREC_TEMP ) { + SSE_MULPS_XMM_to_XMM(regd, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); + } + else { + _unpackVF_xyzw(regd, EEREC_T, xyzw); + SSE_MULPS_XMM_to_XMM(regd, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC); + } + } + } +} + +void recVUMI_MADD(VURegs *VU, int info) +{ + //Console::WriteLn ("recVUMI_MADD"); + if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); + recVUMI_MADD_toD(VU, EEREC_D, info); + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_MADD_iq(VURegs *VU, int addr, int info) +{ + //Console::WriteLn ("recVUMI_MADD_iq"); + if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); + recVUMI_MADD_iq_toD(VU, addr, EEREC_D, info); + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_MADD_xyzw(VURegs *VU, int xyzw, int info) +{ + //Console::WriteLn ("recVUMI_MADD_xyzw"); + if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); + recVUMI_MADD_xyzw_toD(VU, xyzw, EEREC_D, info); + recUpdateFlags(VU, EEREC_D, info); + // super bust-a-move arrows needs overflow clamping +} + +void recVUMI_MADDi(VURegs *VU, int info) { recVUMI_MADD_iq(VU, VU_VI_ADDR(REG_I, 1), info); } +void recVUMI_MADDq(VURegs *VU, int info) { recVUMI_MADD_iq(VU, VU_REGQ_ADDR, info); } +void recVUMI_MADDx(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 0, info); } +void recVUMI_MADDy(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 1, info); } +void recVUMI_MADDz(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 2, info); } +void recVUMI_MADDw(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 3, info); } +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MADDA +//------------------------------------------------------------------ +void recVUMI_MADDA( VURegs *VU, int info ) +{ + //Console::WriteLn ("recVUMI_MADDA"); + recVUMI_MADD_toD(VU, EEREC_ACC, info); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MADDAi( VURegs *VU , int info) +{ + //Console::WriteLn ("recVUMI_MADDAi"); + recVUMI_MADD_iq_toD( VU, VU_VI_ADDR(REG_I, 1), EEREC_ACC, info); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MADDAq( VURegs *VU , int info) +{ + //Console::WriteLn ("recVUMI_MADDAq "); + recVUMI_MADD_iq_toD( VU, VU_REGQ_ADDR, EEREC_ACC, info); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MADDAx( VURegs *VU , int info) +{ + //Console::WriteLn ("recVUMI_MADDAx"); + recVUMI_MADD_xyzw_toD(VU, 0, EEREC_ACC, info); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MADDAy( VURegs *VU , int info) +{ + //Console::WriteLn ("recVUMI_MADDAy"); + recVUMI_MADD_xyzw_toD(VU, 1, EEREC_ACC, info); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MADDAz( VURegs *VU , int info) +{ + //Console::WriteLn ("recVUMI_MADDAz"); + recVUMI_MADD_xyzw_toD(VU, 2, EEREC_ACC, info); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MADDAw( VURegs *VU , int info) +{ + //Console::WriteLn ("recVUMI_MADDAw"); + recVUMI_MADD_xyzw_toD(VU, 3, EEREC_ACC, info); + recUpdateFlags(VU, EEREC_ACC, info); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MSUB +//------------------------------------------------------------------ +void recVUMI_MSUB_toD(VURegs *VU, int regd, int info) +{ + //Console::WriteLn ("recVUMI_MSUB_toD"); + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); + vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); + } + + if (_X_Y_Z_W != 0xf) { + int t1reg = _vuGetTempXMMreg(info); + + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } + + if( t1reg >= 0 ) { + SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_ACC); + SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP); + + VU_MERGE_REGS(regd, t1reg); + _freeXMMreg(t1reg); + } + else { + SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); + VU_MERGE_REGS(regd, EEREC_TEMP); + } + } + else { + if( regd == EEREC_S ) { + assert( regd != EEREC_ACC ); + SSE_MULPS_XMM_to_XMM(regd, EEREC_T); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); + SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); + } + else if( regd == EEREC_T ) { + assert( regd != EEREC_ACC ); + SSE_MULPS_XMM_to_XMM(regd, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); + SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); + } + else if( regd == EEREC_TEMP ) { + SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S); + SSE_MULPS_XMM_to_XMM(regd, EEREC_T); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); + SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); + } + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC); + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } + SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP); + } + } +} + +void recVUMI_MSUB_temp_toD(VURegs *VU, int regd, int info) +{ + //Console::WriteLn ("recVUMI_MSUB_temp_toD"); + + if (_X_Y_Z_W != 0xf) { + int t1reg = _vuGetTempXMMreg(info); + + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } + + if( t1reg >= 0 ) { + SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_ACC); + SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP); + + if ( regd != EEREC_TEMP ) { VU_MERGE_REGS(regd, t1reg); } + else SSE_MOVAPS_XMM_to_XMM(regd, t1reg); + + _freeXMMreg(t1reg); + } + else { + SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); + VU_MERGE_REGS(regd, EEREC_TEMP); + } + } + else { + if( regd == EEREC_ACC ) { + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } + SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP); + } + else if( regd == EEREC_S ) { + SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); + SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); + } + else if( regd == EEREC_TEMP ) { + SSE_MULPS_XMM_to_XMM(regd, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, regd, _X_Y_Z_W ); } + SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC); + SSE_XORPS_M128_to_XMM(regd, (uptr)&const_clip[4]); + } + else { + SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC); + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + if (CHECK_VU_EXTRA_OVERFLOW) { vuFloat_useEAX( info, EEREC_TEMP, _X_Y_Z_W ); } + SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP); + } + } +} + +void recVUMI_MSUB_iq_toD(VURegs *VU, int regd, int addr, int info) +{ + //Console::WriteLn ("recVUMI_MSUB_iq_toD"); + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); + vuFloat3(addr); + } + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); + recVUMI_MSUB_temp_toD(VU, regd, info); +} + +void recVUMI_MSUB_xyzw_toD(VURegs *VU, int regd, int xyzw, int info) +{ + //Console::WriteLn ("recVUMI_MSUB_xyzw_toD"); + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 1 << (3 - xyzw)); + vuFloat5_useEAX( EEREC_ACC, EEREC_TEMP, _X_Y_Z_W ); + } + _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); + recVUMI_MSUB_temp_toD(VU, regd, info); +} + +void recVUMI_MSUB(VURegs *VU, int info) +{ + //Console::WriteLn ("recVUMI_MSUB"); + if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); + recVUMI_MSUB_toD(VU, EEREC_D, info); + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_MSUB_iq(VURegs *VU, int addr, int info) +{ + //Console::WriteLn ("recVUMI_MSUB_iq"); + if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); + recVUMI_MSUB_iq_toD(VU, EEREC_D, addr, info); + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_MSUBi(VURegs *VU, int info) { recVUMI_MSUB_iq(VU, VU_VI_ADDR(REG_I, 1), info); } +void recVUMI_MSUBq(VURegs *VU, int info) { recVUMI_MSUB_iq(VU, VU_REGQ_ADDR, info); } +void recVUMI_MSUBx(VURegs *VU, int info) +{ + //Console::WriteLn ("recVUMI_MSUBx"); + if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); + recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 0, info); + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_MSUBy(VURegs *VU, int info) +{ + //Console::WriteLn ("recVUMI_MSUBy"); + if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); + recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 1, info); + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_MSUBz(VURegs *VU, int info) +{ + //Console::WriteLn ("recVUMI_MSUBz"); + if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); + recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 2, info); + recUpdateFlags(VU, EEREC_D, info); +} + +void recVUMI_MSUBw(VURegs *VU, int info) +{ + //Console::WriteLn ("recVUMI_MSUBw"); + if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); + recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 3, info); + recUpdateFlags(VU, EEREC_D, info); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MSUBA +//------------------------------------------------------------------ +void recVUMI_MSUBA( VURegs *VU, int info ) +{ + //Console::WriteLn ("recVUMI_MSUBA"); + recVUMI_MSUB_toD(VU, EEREC_ACC, info); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MSUBAi( VURegs *VU, int info ) +{ + //Console::WriteLn ("recVUMI_MSUBAi "); + recVUMI_MSUB_iq_toD( VU, EEREC_ACC, VU_VI_ADDR(REG_I, 1), info ); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MSUBAq( VURegs *VU, int info ) +{ + //Console::WriteLn ("recVUMI_MSUBAq"); + recVUMI_MSUB_iq_toD( VU, EEREC_ACC, VU_REGQ_ADDR, info ); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MSUBAx( VURegs *VU, int info ) +{ + //Console::WriteLn ("recVUMI_MSUBAx"); + recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 0, info); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MSUBAy( VURegs *VU, int info ) +{ + //Console::WriteLn ("recVUMI_MSUBAy"); + recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 1, info); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MSUBAz( VURegs *VU, int info ) +{ + //Console::WriteLn ("recVUMI_MSUBAz "); + recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 2, info); + recUpdateFlags(VU, EEREC_ACC, info); +} + +void recVUMI_MSUBAw( VURegs *VU, int info ) +{ + //Console::WriteLn ("recVUMI_MSUBAw"); + recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 3, info); + recUpdateFlags(VU, EEREC_ACC, info); +} +//------------------------------------------------------------------ + + +static const u32 PCSX2_ALIGNED16(special_mask[4]) = {0xffffffff, 0x80000000, 0xffffffff, 0x80000000}; +static const u32 PCSX2_ALIGNED16(special_mask2[4]) = {0, 0x40000000, 0, 0x40000000}; + +u32 PCSX2_ALIGNED16(temp_loc[4]); +u32 PCSX2_ALIGNED16(temp_loc2[4]); + +//MAX/MINI are non-arithmetic operations that implicitly support numbers with the EXP field being 0 ("denormals"). +// +//As such, they are sometimes used for integer move and (positive!) integer max/min, knowing that integers that +//represent denormals will not be flushed to 0. +// +//As such, this implementation performs a non-arithmetic operation that supports "denormals" and "infs/nans". +//There might be an easier way to do it but here, MAX/MIN is performed with PMAXPD/PMINPD. +//Fake double-precision numbers are constructed by copying the sign of the original numbers, clearing the upper 32 bits, +//setting the 62nd bit to 1 (to ensure double-precision number is "normalized") and having the lower 32bits +//being the same as the original number. + +void MINMAXlogical(VURegs *VU, int info, int min, int mode, uptr addr = 0, int xyzw = 0) +//mode1 = iq, mode2 = xyzw, mode0 = normal +{ + int t1regbool = 0; + int t1reg = _vuGetTempXMMreg(info); + if (t1reg < 0) + { + t1regbool = 1; + for (t1reg = 0; ( (t1reg == EEREC_D) || (t1reg == EEREC_S) || (mode != 1 && t1reg == EEREC_T) + || (t1reg == EEREC_TEMP) ); t1reg++); // Find unused reg (For first temp reg) + SSE_MOVAPS_XMM_to_M128((uptr)temp_loc, t1reg); // Backup t1reg XMM reg + } + int t2regbool = -1; + int t2reg = EEREC_TEMP; + if (EEREC_TEMP == EEREC_D || EEREC_TEMP == EEREC_S || (mode != 1 && EEREC_TEMP == EEREC_T)) + { + t2regbool = 0; + t2reg = _vuGetTempXMMreg(info); + if (t2reg < 0) + { + t2regbool = 1; + for (t2reg = 0; ( (t2reg == EEREC_D) || (t2reg == EEREC_S) || (mode != 1 && t2reg == EEREC_T) || + (t2reg == t1reg) || (t2reg == EEREC_TEMP) ); t2reg++); // Find unused reg (For second temp reg) + SSE_MOVAPS_XMM_to_M128((uptr)temp_loc2, t2reg); // Backup t2reg XMM reg + } + } + + if (_X || _Y) + { + SSE2_PSHUFD_XMM_to_XMM(t1reg, EEREC_S, 0x50); + SSE2_PAND_M128_to_XMM(t1reg, (uptr)special_mask); + SSE2_POR_M128_to_XMM(t1reg, (uptr)special_mask2); + if (mode == 0) + SSE2_PSHUFD_XMM_to_XMM(t2reg, EEREC_T, 0x50); + else if (mode == 1) + { + SSE2_MOVD_M32_to_XMM(t2reg, addr); + SSE2_PSHUFD_XMM_to_XMM(t2reg, t2reg, 0x00); + } + else if (mode == 2) + _unpackVF_xyzw(t2reg, EEREC_T, xyzw); + SSE2_PAND_M128_to_XMM(t2reg, (uptr)special_mask); + SSE2_POR_M128_to_XMM(t2reg, (uptr)special_mask2); + if (min) + SSE2_MINPD_XMM_to_XMM(t1reg, t2reg); + else + SSE2_MAXPD_XMM_to_XMM(t1reg, t2reg); + SSE2_PSHUFD_XMM_to_XMM(t1reg, t1reg, 0x88); + VU_MERGE_REGS_CUSTOM(EEREC_D, t1reg, 0xc & _X_Y_Z_W); + } + + if (_Z || _W) + { + SSE2_PSHUFD_XMM_to_XMM(t1reg, EEREC_S, 0xfa); + SSE2_PAND_M128_to_XMM(t1reg, (uptr)special_mask); + SSE2_POR_M128_to_XMM(t1reg, (uptr)special_mask2); + if (mode == 0) + SSE2_PSHUFD_XMM_to_XMM(t2reg, EEREC_T, 0xfa); + else if (mode == 1) + { + SSE2_MOVD_M32_to_XMM(t2reg, addr); + SSE2_PSHUFD_XMM_to_XMM(t2reg, t2reg, 0x00); + } + else if (mode == 2) + _unpackVF_xyzw(t2reg, EEREC_T, xyzw); + SSE2_PAND_M128_to_XMM(t2reg, (uptr)special_mask); + SSE2_POR_M128_to_XMM(t2reg, (uptr)special_mask2); + if (min) + SSE2_MINPD_XMM_to_XMM(t1reg, t2reg); + else + SSE2_MAXPD_XMM_to_XMM(t1reg, t2reg); + SSE2_PSHUFD_XMM_to_XMM(t1reg, t1reg, 0x88); + VU_MERGE_REGS_CUSTOM(EEREC_D, t1reg, 0x3 & _X_Y_Z_W); + } + + if (t1regbool == 0) + _freeXMMreg(t1reg); + else if (t1regbool == 1) + SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)temp_loc); // Restore t1reg XMM reg + if (t2regbool == 0) + _freeXMMreg(t2reg); + else if (t2regbool == 1) + SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)temp_loc2); // Restore t2reg XMM reg +} + +//------------------------------------------------------------------ +// MAX +//------------------------------------------------------------------ + +void recVUMI_MAX(VURegs *VU, int info) +{ + if ( _Fd_ == 0 ) return; + //Console::WriteLn ("recVUMI_MAX"); + + if (MINMAXFIX) + MINMAXlogical(VU, info, 0, 0); + else + { + + if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping + if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); + + if( _X_Y_Z_W == 8 ) { + if (EEREC_D == EEREC_S) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_T); + else if (EEREC_D == EEREC_T) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_S); + else { + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_T); + } + } + else if (_X_Y_Z_W != 0xf) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_MAXPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + else { + if( EEREC_D == EEREC_S ) SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_T); + else if( EEREC_D == EEREC_T ) SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_S); + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_T); + } + } + } +} + +void recVUMI_MAX_iq(VURegs *VU, uptr addr, int info) +{ + if ( _Fd_ == 0 ) return; + //Console::WriteLn ("recVUMI_MAX_iq"); + + if (MINMAXFIX) + MINMAXlogical(VU, info, 0, 1, addr); + else + { + if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping + vuFloat3(addr); + + if( _XYZW_SS ) { + if( EEREC_D == EEREC_TEMP ) { + _vuFlipRegSS(VU, EEREC_S); + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_MAXSS_M32_to_XMM(EEREC_D, addr); + _vuFlipRegSS(VU, EEREC_S); + + // have to flip over EEREC_D if computing flags! + //if( (info & PROCESS_VU_UPDATEFLAGS) ) + _vuFlipRegSS(VU, EEREC_D); + } + else if( EEREC_D == EEREC_S ) { + _vuFlipRegSS(VU, EEREC_D); + SSE_MAXSS_M32_to_XMM(EEREC_D, addr); + _vuFlipRegSS(VU, EEREC_D); + } + else { + if( _X ) { + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_MAXSS_M32_to_XMM(EEREC_D, addr); + } + else { + _vuMoveSS(VU, EEREC_TEMP, EEREC_S); + _vuFlipRegSS(VU, EEREC_D); + SSE_MAXSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + _vuFlipRegSS(VU, EEREC_D); + } + } + } + else if (_X_Y_Z_W != 0xf) { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); + SSE_MAXPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + else { + if(EEREC_D == EEREC_S) { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); + SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } + else { + SSE_MOVSS_M32_to_XMM(EEREC_D, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_D, 0x00); + SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_S); + } + } + } +} + +void recVUMI_MAX_xyzw(VURegs *VU, int xyzw, int info) +{ + if ( _Fd_ == 0 ) return; + //Console::WriteLn ("recVUMI_MAX_xyzw"); + + if (_Fs_ == 0 && _Ft_ == 0) + { + if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { + if( xyzw < 3 ) { + SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } + else { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)s_fones); + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } + } + else if (_X_Y_Z_W != 0xf) { + if( xyzw < 3 ) { + if( _X_Y_Z_W & 1 ) SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[0]); // w included, so insert the whole reg + else SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // w not included, can zero out + } + else SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)s_fones); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + else { + //If VF0.w isnt chosen as the constant, then its going to be MAX( 0, VF0 ), so the result is VF0 + if( xyzw < 3 ) { SSE_MOVAPS_M128_to_XMM(EEREC_D, (uptr)&VU->VF[0].UL[0]); } + else SSE_MOVAPS_M128_to_XMM(EEREC_D, (uptr)s_fones); + } + return; + } + + if (MINMAXFIX) + MINMAXlogical(VU, info, 0, 2, 0, xyzw); + else + { + if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping + if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); + + if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { + if( xyzw == 0 ) { + if( EEREC_D == EEREC_S ) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_T); + else if( EEREC_D == EEREC_T ) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_S); + else { + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_T); + } + } + else { + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } + } + else if (_X_Y_Z_W != 0xf) { + _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); + SSE_MAXPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + else { + if (EEREC_D == EEREC_S) { + _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); + SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } + else { + _unpackVF_xyzw(EEREC_D, EEREC_T, xyzw); + SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_S); + } + } + } +} + +void recVUMI_MAXi(VURegs *VU, int info) { recVUMI_MAX_iq(VU, VU_VI_ADDR(REG_I, 1), info); } +void recVUMI_MAXx(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 0, info); } +void recVUMI_MAXy(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 1, info); } +void recVUMI_MAXz(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 2, info); } +void recVUMI_MAXw(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 3, info); } +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// MINI +//------------------------------------------------------------------ +void recVUMI_MINI(VURegs *VU, int info) +{ + if ( _Fd_ == 0 ) return; + //Console::WriteLn ("recVUMI_MINI"); + + if (MINMAXFIX) + MINMAXlogical(VU, info, 1, 0); + else + { + + if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping + if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, _X_Y_Z_W ); + + if( _X_Y_Z_W == 8 ) { + if (EEREC_D == EEREC_S) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T); + else if (EEREC_D == EEREC_T) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_S); + else { + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T); + } + } + else if (_X_Y_Z_W != 0xf) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_MINPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + else { + if( EEREC_D == EEREC_S ) { + //ClampUnordered(EEREC_T, EEREC_TEMP, 0); // need for GT4 vu0rec + SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_T); + } + else if( EEREC_D == EEREC_T ) { + //ClampUnordered(EEREC_S, EEREC_TEMP, 0); // need for GT4 vu0rec + SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_S); + } + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_T); + } + } + } +} + +void recVUMI_MINI_iq(VURegs *VU, uptr addr, int info) +{ + if ( _Fd_ == 0 ) return; + //Console::WriteLn ("recVUMI_MINI_iq"); + + if (MINMAXFIX) + MINMAXlogical(VU, info, 1, 1, addr); + else + { + + if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping + vuFloat3(addr); + + if( _XYZW_SS ) { + if( EEREC_D == EEREC_TEMP ) { + _vuFlipRegSS(VU, EEREC_S); + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_MINSS_M32_to_XMM(EEREC_D, addr); + _vuFlipRegSS(VU, EEREC_S); + + // have to flip over EEREC_D if computing flags! + //if( (info & PROCESS_VU_UPDATEFLAGS) ) + _vuFlipRegSS(VU, EEREC_D); + } + else if( EEREC_D == EEREC_S ) { + _vuFlipRegSS(VU, EEREC_D); + SSE_MINSS_M32_to_XMM(EEREC_D, addr); + _vuFlipRegSS(VU, EEREC_D); + } + else { + if( _X ) { + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_MINSS_M32_to_XMM(EEREC_D, addr); + } + else { + _vuMoveSS(VU, EEREC_TEMP, EEREC_S); + _vuFlipRegSS(VU, EEREC_D); + SSE_MINSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + _vuFlipRegSS(VU, EEREC_D); + } + } + } + else if (_X_Y_Z_W != 0xf) { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); + SSE_MINPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + else { + if(EEREC_D == EEREC_S) { + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00); + SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } + else { + SSE_MOVSS_M32_to_XMM(EEREC_D, addr); + SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_D, 0x00); + SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_S); + } + } + } +} + +void recVUMI_MINI_xyzw(VURegs *VU, int xyzw, int info) +{ + if ( _Fd_ == 0 ) return; + //Console::WriteLn ("recVUMI_MINI_xyzw"); + + if (_Fs_ == 0 && _Ft_ == 0) + { + if( _X_Y_Z_W == 0xf ) + { + //If VF0.w is the constant, the result will match VF0, else its all 0's + if(xyzw == 3) SSE_MOVAPS_M128_to_XMM(EEREC_D, (uptr)&VU->VF[0].UL[0]); + else SSE_XORPS_XMM_to_XMM(EEREC_D, EEREC_D); + } + else + { + //If VF0.w is the constant, the result will match VF0, else its all 0's + if(xyzw == 3) SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[0]); + else SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + return; + } + if (MINMAXFIX) + MINMAXlogical(VU, info, 1, 2, 0, xyzw); + else + { + if (_Fs_) vuFloat4_useEAX( EEREC_S, EEREC_TEMP, _X_Y_Z_W ); // Always do Preserved Sign Clamping + if (_Ft_) vuFloat4_useEAX( EEREC_T, EEREC_TEMP, ( 1 << (3 - xyzw) ) ); + + if( _X_Y_Z_W == 8 && (EEREC_D != EEREC_TEMP)) { + if( xyzw == 0 ) { + if( EEREC_D == EEREC_S ) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T); + else if( EEREC_D == EEREC_T ) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_S); + else { + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T); + } + } + else { + _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw); + SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S); + SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } + } + else if (_X_Y_Z_W != 0xf) { + _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); + SSE_MINPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + VU_MERGE_REGS(EEREC_D, EEREC_TEMP); + } + else { + if (EEREC_D == EEREC_S) { + _unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw); + SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } + else { + _unpackVF_xyzw(EEREC_D, EEREC_T, xyzw); + SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_S); + } + } + } +} + +void recVUMI_MINIi(VURegs *VU, int info) { recVUMI_MINI_iq(VU, VU_VI_ADDR(REG_I, 1), info); } +void recVUMI_MINIx(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 0, info); } +void recVUMI_MINIy(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 1, info); } +void recVUMI_MINIz(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 2, info); } +void recVUMI_MINIw(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 3, info); } +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// OPMULA +//------------------------------------------------------------------ +void recVUMI_OPMULA( VURegs *VU, int info ) +{ + //Console::WriteLn ("recVUMI_OPMULA"); + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, 0xE); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 0xE); + } + + SSE_MOVAPS_XMM_to_XMM( EEREC_TEMP, EEREC_S ); + SSE_SHUFPS_XMM_to_XMM( EEREC_T, EEREC_T, 0xD2 ); // EEREC_T = WYXZ + SSE_SHUFPS_XMM_to_XMM( EEREC_TEMP, EEREC_TEMP, 0xC9 ); // EEREC_TEMP = WXZY + SSE_MULPS_XMM_to_XMM( EEREC_TEMP, EEREC_T ); + + VU_MERGE_REGS_CUSTOM(EEREC_ACC, EEREC_TEMP, 14); + + // revert EEREC_T + if( EEREC_T != EEREC_ACC ) + SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xC9); + + recUpdateFlags(VU, EEREC_ACC, info); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// OPMSUB +//------------------------------------------------------------------ +void recVUMI_OPMSUB( VURegs *VU, int info ) +{ + //Console::WriteLn ("recVUMI_OPMSUB"); + if (CHECK_VU_EXTRA_OVERFLOW) { + if (_Fs_) vuFloat5_useEAX( EEREC_S, EEREC_TEMP, 0xE); + if (_Ft_) vuFloat5_useEAX( EEREC_T, EEREC_TEMP, 0xE); + } + + if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP); + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xD2); // EEREC_T = WYXZ + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0xC9); // EEREC_TEMP = WXZY + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + + // negate and add + SSE_XORPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[4]); + SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC); + VU_MERGE_REGS_CUSTOM(EEREC_D, EEREC_TEMP, 14); + + // revert EEREC_T + if( EEREC_T != EEREC_D ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xC9); + + recUpdateFlags(VU, EEREC_D, info); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// NOP +//------------------------------------------------------------------ +void recVUMI_NOP( VURegs *VU, int info ) +{ + //Console::WriteLn ("recVUMI_NOP"); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// recVUMI_FTOI_Saturate() - Saturates result from FTOI Instructions +//------------------------------------------------------------------ +static const PCSX2_ALIGNED16(int rec_const_0x8000000[4]) = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + +void recVUMI_FTOI_Saturate(int rec_s, int rec_t, int rec_tmp1, int rec_tmp2) +{ + //Console::WriteLn ("recVUMI_FTOI_Saturate"); + //Duplicate the xor'd sign bit to the whole value + //FFFF FFFF for positive, 0 for negative + SSE_MOVAPS_XMM_to_XMM(rec_tmp1, rec_s); + SSE2_PXOR_M128_to_XMM(rec_tmp1, (uptr)&const_clip[4]); + SSE2_PSRAD_I8_to_XMM(rec_tmp1, 31); + + //Create mask: 0 where !=8000 0000 + SSE_MOVAPS_XMM_to_XMM(rec_tmp2, rec_t); + SSE2_PCMPEQD_M128_to_XMM(rec_tmp2, (uptr)&const_clip[4]); + + //AND the mask w/ the edit values + SSE_ANDPS_XMM_to_XMM(rec_tmp1, rec_tmp2); + + //if v==8000 0000 && positive -> 8000 0000 + FFFF FFFF -> 7FFF FFFF + //if v==8000 0000 && negative -> 8000 0000 + 0 -> 8000 0000 + //if v!=8000 0000 -> v+0 (masked from the and) + + //Add the values as needed + SSE2_PADDD_XMM_to_XMM(rec_t, rec_tmp1); +} +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// FTOI 0/4/12/15 +//------------------------------------------------------------------ +static PCSX2_ALIGNED16(float FTIO_Temp1[4]) = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; +static PCSX2_ALIGNED16(float FTIO_Temp2[4]) = { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }; +void recVUMI_FTOI0(VURegs *VU, int info) +{ + int t1reg, t2reg; // Temp XMM regs + + if ( _Ft_ == 0 ) return; + + //Console::WriteLn ("recVUMI_FTOI0"); + + if (_X_Y_Z_W != 0xf) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) + SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); + + t1reg = _vuGetTempXMMreg(info); + + if( t1reg >= 0 ) { // If theres a temp XMM reg available + for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) + ; // Find unused reg (For second temp reg) + SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t2reg); // Backup XMM reg + + recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result + + SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp1); // Restore XMM reg + _freeXMMreg(t1reg); // Free temp reg + } + else { // No temp reg available + for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) + ; // Find unused reg (For first temp reg) + SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t1reg); // Backup t1reg XMM reg + + for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) + ; // Find unused reg (For second temp reg) + SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp2, t2reg); // Backup t2reg XMM reg + + recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result + + SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)FTIO_Temp1); // Restore t1reg XMM reg + SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp2); // Restore t2reg XMM reg + } + + VU_MERGE_REGS(EEREC_T, EEREC_TEMP); + } + else { + if (EEREC_T != EEREC_S) { + SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S); + vuFloat_useEAX( info, EEREC_T, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) + SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T); + + t1reg = _vuGetTempXMMreg(info); + + if( t1reg >= 0 ) { // If theres a temp XMM reg available + recVUMI_FTOI_Saturate(EEREC_S, EEREC_T, EEREC_TEMP, t1reg); // Saturate if Float->Int conversion returned illegal result + _freeXMMreg(t1reg); // Free temp reg + } + else { // No temp reg available + for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) + ; // Find unused reg + SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t1reg); // Backup t1reg XMM reg + + recVUMI_FTOI_Saturate(EEREC_S, EEREC_T, EEREC_TEMP, t1reg); // Saturate if Float->Int conversion returned illegal result + + SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)FTIO_Temp1); // Restore t1reg XMM reg + } + } + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) + SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); + + t1reg = _vuGetTempXMMreg(info); + + if( t1reg >= 0 ) { // If theres a temp XMM reg available + for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg)); t2reg++) + ; // Find unused reg (For second temp reg) + SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t2reg); // Backup XMM reg + + recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result + + SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp1); // Restore XMM reg + _freeXMMreg(t1reg); // Free temp reg + } + else { // No temp reg available + for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) + ; // Find unused reg (For first temp reg) + SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t1reg); // Backup t1reg XMM reg + + for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) + ; // Find unused reg (For second temp reg) + SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp2, t2reg); // Backup t2reg XMM reg + + recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result + + SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)FTIO_Temp1); // Restore t1reg XMM reg + SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp2); // Restore t2reg XMM reg + } + + SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_TEMP); + } + } +} + +void recVUMI_FTOIX(VURegs *VU, int addr, int info) +{ + int t1reg, t2reg; // Temp XMM regs + + if ( _Ft_ == 0 ) return; + + //Console::WriteLn ("recVUMI_FTOIX"); + if (_X_Y_Z_W != 0xf) { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr); + vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) + SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); + + t1reg = _vuGetTempXMMreg(info); + + if( t1reg >= 0 ) { // If theres a temp XMM reg available + for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg)); t2reg++) + ; // Find unused reg (For second temp reg) + SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t2reg); // Backup XMM reg + + recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result + + SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp1); // Restore XMM reg + _freeXMMreg(t1reg); // Free temp reg + } + else { // No temp reg available + for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) + ; // Find unused reg (For first temp reg) + SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t1reg); // Backup t1reg XMM reg + + for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) + ; // Find unused reg (For second temp reg) + SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp2, t2reg); // Backup t2reg XMM reg + + recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result + + SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)FTIO_Temp1); // Restore t1reg XMM reg + SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp2); // Restore t2reg XMM reg + } + + VU_MERGE_REGS(EEREC_T, EEREC_TEMP); + } + else { + if (EEREC_T != EEREC_S) { + SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S); + SSE_MULPS_M128_to_XMM(EEREC_T, addr); + vuFloat_useEAX( info, EEREC_T, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) + SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T); + + t1reg = _vuGetTempXMMreg(info); + + if( t1reg >= 0 ) { // If theres a temp XMM reg available + recVUMI_FTOI_Saturate(EEREC_S, EEREC_T, EEREC_TEMP, t1reg); // Saturate if Float->Int conversion returned illegal result + _freeXMMreg(t1reg); // Free temp reg + } + else { // No temp reg available + for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) + ; // Find unused reg + SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t1reg); // Backup t1reg XMM reg + + recVUMI_FTOI_Saturate(EEREC_S, EEREC_T, EEREC_TEMP, t1reg); // Saturate if Float->Int conversion returned illegal result + + SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)FTIO_Temp1); // Restore t1reg XMM reg + } + } + else { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr); + vuFloat_useEAX( info, EEREC_TEMP, 0xf ); // Clamp Infs and NaNs to pos/neg fmax (NaNs always to positive fmax) + SSE2_CVTTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); + + t1reg = _vuGetTempXMMreg(info); + + if( t1reg >= 0 ) { // If theres a temp XMM reg available + for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg)); t2reg++) + ; // Find unused reg (For second temp reg) + SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t2reg); // Backup XMM reg + + recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result + + SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp1); // Restore XMM reg + _freeXMMreg(t1reg); // Free temp reg + } + else { // No temp reg available + for (t1reg = 0; ( (t1reg == EEREC_S) || (t1reg == EEREC_T) || (t1reg == EEREC_TEMP) ); t1reg++) + ; // Find unused reg (For first temp reg) + SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp1, t1reg); // Backup t1reg XMM reg + + for (t2reg = 0; ( (t2reg == EEREC_S) || (t2reg == EEREC_T) || (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++) + ; // Find unused reg (For second temp reg) + SSE_MOVAPS_XMM_to_M128((uptr)FTIO_Temp2, t2reg); // Backup t2reg XMM reg + + recVUMI_FTOI_Saturate(EEREC_S, EEREC_TEMP, t1reg, t2reg); // Saturate if Float->Int conversion returned illegal result + + SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)FTIO_Temp1); // Restore t1reg XMM reg + SSE_MOVAPS_M128_to_XMM(t2reg, (uptr)FTIO_Temp2); // Restore t2reg XMM reg + } + + SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_TEMP); + } + } +} + +void recVUMI_FTOI4( VURegs *VU, int info ) { recVUMI_FTOIX(VU, (uptr)&recMult_float_to_int4[0], info); } +void recVUMI_FTOI12( VURegs *VU, int info ) { recVUMI_FTOIX(VU, (uptr)&recMult_float_to_int12[0], info); } +void recVUMI_FTOI15( VURegs *VU, int info ) { recVUMI_FTOIX(VU, (uptr)&recMult_float_to_int15[0], info); } +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// ITOF 0/4/12/15 +//------------------------------------------------------------------ +void recVUMI_ITOF0( VURegs *VU, int info ) +{ + if ( _Ft_ == 0 ) return; + + //Console::WriteLn ("recVUMI_ITOF0"); + if (_X_Y_Z_W != 0xf) { + SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + vuFloat_useEAX( info, EEREC_TEMP, 15); // Clamp infinities + VU_MERGE_REGS(EEREC_T, EEREC_TEMP); + xmmregs[EEREC_T].mode |= MODE_WRITE; + } + else { + SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S); + vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities + } +} + +void recVUMI_ITOFX(VURegs *VU, int addr, int info) +{ + if ( _Ft_ == 0 ) return; + + //Console::WriteLn ("recVUMI_ITOFX"); + if (_X_Y_Z_W != 0xf) { + SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr); + vuFloat_useEAX( info, EEREC_TEMP, 15); // Clamp infinities + VU_MERGE_REGS(EEREC_T, EEREC_TEMP); + xmmregs[EEREC_T].mode |= MODE_WRITE; + } + else { + SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S); + SSE_MULPS_M128_to_XMM(EEREC_T, addr); + vuFloat2(EEREC_T, EEREC_TEMP, 15); // Clamp infinities + } +} + +void recVUMI_ITOF4( VURegs *VU, int info ) { recVUMI_ITOFX(VU, (uptr)&recMult_int_to_float4[0], info); } +void recVUMI_ITOF12( VURegs *VU, int info ) { recVUMI_ITOFX(VU, (uptr)&recMult_int_to_float12[0], info); } +void recVUMI_ITOF15( VURegs *VU, int info ) { recVUMI_ITOFX(VU, (uptr)&recMult_int_to_float15[0], info); } +//------------------------------------------------------------------ + + +//------------------------------------------------------------------ +// CLIP +//------------------------------------------------------------------ +void recVUMI_CLIP(VURegs *VU, int info) +{ + int t1reg = EEREC_D; + int t2reg = EEREC_ACC; + int x86temp1, x86temp2; + + u32 clipaddr = VU_VI_ADDR(REG_CLIP_FLAG, 0); + u32 prevclipaddr = VU_VI_ADDR(REG_CLIP_FLAG, 2); + + if( clipaddr == 0 ) { // battle star has a clip right before fcset + Console::WriteLn("skipping vu clip"); + return; + } + + //Flush the clip flag before processing, incase of double clip commands (GoW) + + if( prevclipaddr != (uptr)&VU->VI[REG_CLIP_FLAG] ) { + MOV32MtoR(EAX, prevclipaddr); + MOV32RtoM((uptr)&VU->VI[REG_CLIP_FLAG], EAX); + } + + assert( clipaddr != 0 ); + assert( t1reg != t2reg && t1reg != EEREC_TEMP && t2reg != EEREC_TEMP ); + + x86temp1 = ALLOCTEMPX86(MODE_8BITREG); + x86temp2 = ALLOCTEMPX86(MODE_8BITREG); + + //if ( (x86temp1 == 0) || (x86temp2 == 0) ) Console::Error("VU CLIP Allocation Error: EAX being allocated!"); + + _freeXMMreg(t1reg); // These should have been freed at allocation in eeVURecompileCode() + _freeXMMreg(t2reg); // but if they've been used since then, then free them. (just doing this incase :p (cottonvibes)) + + if( _Ft_ == 0 ) { + SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (uptr)&s_fones[0]); // all 1s + SSE_MOVAPS_M128_to_XMM(t1reg, (uptr)&s_fones[4]); + } + else { + _unpackVF_xyzw(EEREC_TEMP, EEREC_T, 3); + SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&const_clip[0]); + SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_TEMP); + SSE_ORPS_M128_to_XMM(t1reg, (uptr)&const_clip[4]); + } + + MOV32MtoR(EAX, prevclipaddr); + + SSE_CMPNLEPS_XMM_to_XMM(t1reg, EEREC_S); //-w, -z, -y, -x + SSE_CMPLTPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); //+w, +z, +y, +x + + SHL32ItoR(EAX, 6); + + SSE_MOVAPS_XMM_to_XMM(t2reg, EEREC_TEMP); //t2 = +w, +z, +y, +x + SSE_UNPCKLPS_XMM_to_XMM(EEREC_TEMP, t1reg); //EEREC_TEMP = -y,+y,-x,+x + SSE_UNPCKHPS_XMM_to_XMM(t2reg, t1reg); //t2reg = -w,+w,-z,+z + SSE_MOVMSKPS_XMM_to_R32(x86temp2, EEREC_TEMP); // -y,+y,-x,+x + SSE_MOVMSKPS_XMM_to_R32(x86temp1, t2reg); // -w,+w,-z,+z + + AND8ItoR(x86temp1, 0x3); + SHL8ItoR(x86temp1, 4); + OR8RtoR(EAX, x86temp1); + AND8ItoR(x86temp2, 0xf); + OR8RtoR(EAX, x86temp2); + AND32ItoR(EAX, 0xffffff); + + MOV32RtoM(clipaddr, EAX); + + if (( !(info & (PROCESS_VU_SUPER|PROCESS_VU_COP2)) ) ) //Instantly update the flag if its called from elsewhere (unlikely, but ok) + MOV32RtoM((uptr)&VU->VI[REG_CLIP_FLAG], EAX); + + _freeX86reg(x86temp1); + _freeX86reg(x86temp2); +} diff --git a/pcsx2/x86/iVUzerorec.cpp b/pcsx2/x86/sVU_zerorec.cpp similarity index 96% rename from pcsx2/x86/iVUzerorec.cpp rename to pcsx2/x86/sVU_zerorec.cpp index 9b3942d71e..c02a7366fd 100644 --- a/pcsx2/x86/iVUzerorec.cpp +++ b/pcsx2/x86/sVU_zerorec.cpp @@ -1,4682 +1,4682 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -// Super VU recompiler - author: zerofrog(@gmail.com) - -#include "PrecompiledHeader.h" - -#include -#include -#include -#include -#include - -#ifndef _WIN32 -#include -#endif - -#include "Common.h" - -#include "GS.h" -#include "R5900.h" -#include "VU.h" -#include "iR5900.h" - -#include "iVUzerorec.h" -#include "SamplProf.h" -#include "NakedAsm.h" - -using namespace std; - -// temporary externs -extern void iDumpVU0Registers(); -extern void iDumpVU1Registers(); - -// SuperVURec optimization options, uncomment only for debugging purposes -#define SUPERVU_CACHING // vu programs are saved and queried via memcompare (should be no reason to disable this) -#define SUPERVU_WRITEBACKS // don't flush the writebacks after every block -#define SUPERVU_X86CACHING // use x86reg caching (faster) (not really. rather lots slower :p (rama) ) -#define SUPERVU_VIBRANCHDELAY // when integers are modified right before a branch that uses the integer, - // the old integer value is used in the branch, fixes kh2 - -#define SUPERVU_PROPAGATEFLAGS // the correct behavior of VUs, for some reason superman breaks gfx with it on... - -// registers won't be flushed at block boundaries (faster) (nothing noticable speed-wise, causes SPS in Ratchet and clank (Nneeve) ) -#ifndef _DEBUG -//#define SUPERVU_INTERCACHING -#endif - -#define SUPERVU_CHECKCONDITION 0 // has to be 0!! - -#define VU_EXESIZE 0x00800000 - -#define _Imm11_ (s32)( (vucode & 0x400) ? (0xfffffc00 | (vucode & 0x3ff)) : (vucode & 0x3ff) ) -#define _UImm11_ (s32)(vucode & 0x7ff) - -#define _Ft_ ((VU->code >> 16) & 0x1F) // The rt part of the instruction register -#define _Fs_ ((VU->code >> 11) & 0x1F) // The rd part of the instruction register -#define _Fd_ ((VU->code >> 6) & 0x1F) // The sa part of the instruction register -#define _It_ (_Ft_ & 15) -#define _Is_ (_Fs_ & 15) -#define _Id_ (_Fd_ & 15) - -static const u32 QWaitTimes[] = { 6, 12 }; -static const u32 PWaitTimes[] = { 53, 43, 28, 23, 17, 11, 10 }; - -static u32 s_vuInfo; // info passed into rec insts - -static const u32 s_MemSize[2] = {VU0_MEMSIZE, VU1_MEMSIZE}; -static u8* s_recVUMem = NULL, *s_recVUPtr = NULL; - -// tables which are defined at the bottom of this massive file. -extern void (*recVU_UPPER_OPCODE[64])(VURegs* VU, s32 info); -extern void (*recVU_LOWER_OPCODE[128])(VURegs* VU, s32 info); - -#define INST_Q_READ 0x0001 // flush Q -#define INST_P_READ 0x0002 // flush P -#define INST_BRANCH_DELAY 0x0004 -#define INST_CLIP_WRITE 0x0040 // inst writes CLIP in the future -#define INST_STATUS_WRITE 0x0080 -#define INST_MAC_WRITE 0x0100 -#define INST_Q_WRITE 0x0200 -#define INST_CACHE_VI 0x0400 // write old vi value to s_VIBranchDelay - -// Let's tempt fate by defining two different constants with almost identical names -#define INST_DUMMY_ 0x8000 -#define INST_DUMMY 0x83c0 - -#define VFFREE_INVALID0 0x80000000 // (vffree[i]&0xf) is invalid - -//#define FORIT(it, v) for(it = (v).begin(); it != (v).end(); ++(it)) - -#ifdef _DEBUG -u32 s_vucount = 0; - -static u32 g_vu1lastrec = 0, skipparent = -1; -static u32 s_svulast = 0, s_vufnheader; -static u32 badaddrs[][2] = {0, 0xffff}; -#endif - -union VURecRegs -{ - struct - { - u16 reg; - u16 type; - }; - u32 id; -}; - -#define SUPERVU_XGKICKDELAY 1 // yes this is needed as default (wipeout) - -class VuBaseBlock; - -struct VuFunctionHeader -{ - struct RANGE - { - RANGE() : pmem(NULL) {} - - u16 start, size; - void* pmem; // all the mem - }; - - VuFunctionHeader() : pprogfunc(NULL), startpc(0xffffffff) {} - ~VuFunctionHeader() - { - for (vector::iterator it = ranges.begin(); it != ranges.end(); ++it) - { - free(it->pmem); - } - } - - // returns true if the checksum for the current mem is the same as this fn - bool IsSame(void* pmem); - - u32 startpc; - void* pprogfunc; - - vector ranges; -}; - -struct VuBlockHeader -{ - VuBaseBlock* pblock; - u32 delay; -}; - -// one vu inst (lower and upper) -class VuInstruction -{ - public: - VuInstruction() - { - memzero_obj(*this); - nParentPc = -1; - vicached = -1; - } - - int nParentPc; // used for syncing with flag writes, -1 for no parent - - _vuopinfo info; - - _VURegsNum regs[2]; // [0] - lower, [1] - upper - u32 livevars[2]; // live variables right before this inst, [0] - inst, [1] - float - u32 addvars[2]; // live variables to add - u32 usedvars[2]; // set if var is used in the future including vars used in this inst - u32 keepvars[2]; - u16 pqcycles; // the number of cycles to stall if function writes to the regs - u16 type; // INST_ - - u32 pClipWrite, pMACWrite, pStatusWrite; // addrs to write the flags - u32 vffree[2]; - s8 vfwrite[2], vfread0[2], vfread1[2], vfacc[2]; - s8 vfflush[2]; // extra flush regs - s8 vicached; // if >= 0, then use the cached integer s_VIBranchDelay - VuInstruction *pPrevInst; - - int SetCachedRegs(int upper, u32 vuxyz); - void Recompile(list::iterator& itinst, u32 vuxyz); -}; - -enum BlockType -{ - BLOCKTYPE_EOP = 0x01, // at least one of the children of the block contains eop (or the block itself) - BLOCKTYPE_FUNCTION = 0x02, - BLOCKTYPE_HASEOP = 0x04, // last inst of block is an eop - BLOCKTYPE_MACFLAGS = 0x08, - BLOCKTYPE_ANALYZED = 0x40, - BLOCKTYPE_IGNORE = 0x80, // special for recursive fns - BLOCKTYPE_ANALYZEDPARENT = 0x100 -}; - -// base block used when recompiling -class VuBaseBlock -{ - public: - typedef list LISTBLOCKS; - - VuBaseBlock(); - - // returns true if the leads to a EOP (ALL VU blocks must ret true) - void AssignVFRegs(); - void AssignVIRegs(int parent); - - list::iterator GetInstIterAtPc(int instpc); - void GetInstsAtPc(int instpc, list& listinsts); - - void Recompile(); - - u16 type; // BLOCKTYPE_ - u16 id; - u16 startpc; - u16 endpc; // first inst not in block - void* pcode; // x86 code pointer - void* pendcode; // end of the x86 code pointer - int cycles; - list insts; - list parents; - LISTBLOCKS blocks; // blocks branches to - u32* pChildJumps[4]; // addrs that need to be filled with the children's start addrs - // if highest bit is set, addr needs to be relational - u32 vuxyz; // corresponding bit is set if reg's xyz channels are used only - u32 vuxy; // corresponding bit is set if reg's xyz channels are used only - - _xmmregs startregs[iREGCNT_XMM], endregs[iREGCNT_XMM]; - int nStartx86, nEndx86; // indices into s_vecRegArray - - int allocX86Regs; - int prevFlagsOutOfBlock; -}; - -struct WRITEBACK -{ - WRITEBACK() : nParentPc(0), cycle(0) //, pStatusWrite(NULL), pMACWrite(NULL) - { - viwrite[0] = viwrite[1] = 0; - viread[0] = viread[1] = 0; - } - - void InitInst(VuInstruction* pinst, int cycle) const - { - u32 write = viwrite[0] | viwrite[1]; - pinst->type = ((write & (1 << REG_CLIP_FLAG)) ? INST_CLIP_WRITE : 0) | - ((write & (1 << REG_MAC_FLAG)) ? INST_MAC_WRITE : 0) | - ((write & (1 << REG_STATUS_FLAG)) ? INST_STATUS_WRITE : 0) | - ((write & (1 << REG_Q)) ? INST_Q_WRITE : 0); - pinst->nParentPc = nParentPc; - pinst->info.cycle = cycle; - for (int i = 0; i < 2; ++i) - { - pinst->regs[i].VIwrite = viwrite[i]; - pinst->regs[i].VIread = viread[i]; - } - } - - static int SortWritebacks(const WRITEBACK& w1, const WRITEBACK& w2) - { - return w1.cycle < w2.cycle; - } - - int nParentPc; - int cycle; - u32 viwrite[2]; - u32 viread[2]; -}; - -struct VUPIPELINES -{ - fmacPipe fmac[8]; - fdivPipe fdiv; - efuPipe efu; - ialuPipe ialu[8]; - list< WRITEBACK > listWritebacks; -}; - -VuBaseBlock::VuBaseBlock() -{ - type = 0; - endpc = 0; - cycles = 0; - pcode = NULL; - id = 0; - memzero_obj(pChildJumps); - memzero_obj(startregs); - memzero_obj(endregs); - allocX86Regs = nStartx86 = nEndx86 = -1; - prevFlagsOutOfBlock = 0; -} - -#define SUPERVU_STACKSIZE 0x1000 - -static list s_listVUHeaders[2]; -static list* s_plistCachedHeaders[2] = {NULL, NULL}; -static VuFunctionHeader** recVUHeaders[2] = {NULL, NULL}; -static VuBlockHeader* recVUBlocks[2] = {NULL, NULL}; -static u8* recVUStack = NULL, *recVUStackPtr = NULL; -static vector<_x86regs> s_vecRegArray(128); - -static VURegs* VU = NULL; -static list s_listBlocks; -static u32 s_vu = 0; -static u32 s_UnconditionalDelay = 0; // 1 if there are two sequential branches and the last is unconditional -static u32 g_nLastBlockExecuted = 0; - -static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex); -static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const VUPIPELINES& pipes); -static void SuperVUInitLiveness(VuBaseBlock* pblock); -static void SuperVULivenessAnalysis(); -static void SuperVUEliminateDeadCode(); -static void SuperVUAssignRegs(); - -//void SuperVUFreeXMMreg(int xmmreg, int xmmtype, int reg); -#define SuperVUFreeXMMreg 0&& -void SuperVUFreeXMMregs(u32* livevars); - -static u32* SuperVUStaticAlloc(u32 size); -static void SuperVURecompile(); - -// allocate VU resources -void SuperVUAlloc(int vuindex) -{ - // The old -1 crap has been depreciated on this function. Please - // specify either 0 or 1, thanks. - jASSUME(vuindex >= 0); - - // upper 4 bits must be zero! - if (s_recVUMem == NULL) - { - // upper 4 bits must be zero! - // Changed "first try base" to 0xf1e0000, since 0x0c000000 liked to fail a lot. (cottonvibes) - s_recVUMem = SysMmapEx(0xf1e0000, VU_EXESIZE, 0x10000000, "SuperVUAlloc"); - - if (s_recVUMem == NULL) - { - throw Exception::OutOfMemory( - fmt_string("SuperVU Error > failed to allocate recompiler memory (addr: 0x%x)", (u32)s_recVUMem) - ); - } - - ProfilerRegisterSource("VURec", s_recVUMem, VU_EXESIZE); - - if (recVUStack == NULL) recVUStack = new u8[SUPERVU_STACKSIZE * 4]; - } - - if (vuindex >= 0) - { - jASSUME(s_recVUMem != NULL); - - if (recVUHeaders[vuindex] == NULL) - recVUHeaders[vuindex] = new VuFunctionHeader* [s_MemSize[vuindex] / 8]; - if (recVUBlocks[vuindex] == NULL) - recVUBlocks[vuindex] = new VuBlockHeader[s_MemSize[vuindex] / 8]; - if (s_plistCachedHeaders[vuindex] == NULL) - s_plistCachedHeaders[vuindex] = new list[s_MemSize[vuindex] / 8]; - } -} - -void DestroyCachedHeaders(int vuindex, int j) -{ - list::iterator it = s_plistCachedHeaders[vuindex][j].begin(); - - while (it != s_plistCachedHeaders[vuindex][j].end()) - { - delete *it; - it++; - } - - s_plistCachedHeaders[vuindex][j].clear(); -} - -void DestroyVUHeaders(int vuindex) -{ - list::iterator it = s_listVUHeaders[vuindex].begin(); - - while (it != s_listVUHeaders[vuindex].end()) - { - delete *it; - it++; - } - - s_listVUHeaders[vuindex].clear(); -} - -// destroy VU resources -void SuperVUDestroy(int vuindex) -{ - list::iterator it; - - if (vuindex < 0) - { - SuperVUDestroy(0); - SuperVUDestroy(1); - ProfilerTerminateSource("VURec"); - SafeSysMunmap(s_recVUMem, VU_EXESIZE); - safe_delete_array(recVUStack); - } - else - { - safe_delete_array(recVUHeaders[vuindex]); - safe_delete_array(recVUBlocks[vuindex]); - - if (s_plistCachedHeaders[vuindex] != NULL) - { - for (u32 j = 0; j < s_MemSize[vuindex] / 8; ++j) - { - DestroyCachedHeaders(vuindex, j); - } - safe_delete_array(s_plistCachedHeaders[vuindex]); - } - DestroyVUHeaders(vuindex); - } -} - -// reset VU -void SuperVUReset(int vuindex) -{ -#ifdef _DEBUG - s_vucount = 0; -#endif - - if (s_recVUMem == NULL) - return; - - //jASSUME( s_recVUMem != NULL ); - - if (vuindex < 0) - { - DbgCon::Status("SuperVU reset > Resetting recompiler memory and structures."); - - // Does this cause problems on VU recompiler resets? It could, if the VU works like - // the EE used to, and actually tries to re-enter the recBlock after issuing a clear. (air) - - //memset_8<0xcd, VU_EXESIZE>(s_recVUMem); - memzero_ptr(recVUStack); - - s_recVUPtr = s_recVUMem; - } - else - { - DbgCon::Status("SuperVU reset [VU%d] > Resetting the recs and junk", params vuindex); - list::iterator it; - if (recVUHeaders[vuindex]) memset(recVUHeaders[vuindex], 0, sizeof(VuFunctionHeader*) * (s_MemSize[vuindex] / 8)); - if (recVUBlocks[vuindex]) memset(recVUBlocks[vuindex], 0, sizeof(VuBlockHeader) * (s_MemSize[vuindex] / 8)); - - if (s_plistCachedHeaders[vuindex] != NULL) - { - for (u32 j = 0; j < s_MemSize[vuindex] / 8; ++j) - { - DestroyCachedHeaders(vuindex, j); - } - } - DestroyVUHeaders(vuindex); - } -} - -// clear the block and any joining blocks -void __fastcall SuperVUClear(u32 startpc, u32 size, int vuindex) -{ - vector::iterator itrange; - list::iterator it = s_listVUHeaders[vuindex].begin(); - u32 endpc = startpc + (size + (8 - (size & 7))); // Adding this code to ensure size is always a multiple of 8, it can be simplified to startpc+size if size is always a multiple of 8 (cottonvibes) - while (it != s_listVUHeaders[vuindex].end()) - { - - // for every fn, check if it has code in the range - for(itrange = (*it)->ranges.begin(); itrange != (*it)->ranges.end(); itrange++) - { - if (startpc < (u32)itrange->start + itrange->size && itrange->start < endpc) - break; - } - - if (itrange != (*it)->ranges.end()) - { - recVUHeaders[vuindex][(*it)->startpc/8] = NULL; -#ifdef SUPERVU_CACHING - list* plist = &s_plistCachedHeaders[vuindex][(*it)->startpc/8]; - plist->push_back(*it); - if (plist->size() > 30) - { - // list is too big, delete - //Console::Notice("Performance warning: deleting cached VU program!"); - delete plist->front(); - plist->pop_front(); - } - it = s_listVUHeaders[vuindex].erase(it); -#else - delete *it; - it = s_listVUHeaders[vuindex].erase(it); -#endif - } - else ++it; - } -} - -static VuFunctionHeader* s_pFnHeader = NULL; -static VuBaseBlock* s_pCurBlock = NULL; -static VuInstruction* s_pCurInst = NULL; -static u32 s_StatusRead = 0, s_MACRead = 0, s_ClipRead = 0; // read addrs -static u32 s_PrevStatusWrite = 0, s_PrevMACWrite = 0, s_PrevClipWrite = 0, s_PrevIWrite = 0; -static u32 s_WriteToReadQ = 0; - -static u32 s_VIBranchDelay = 0; //Value of register to use in a vi branch delayed situation - - -u32 s_TotalVUCycles; // total cycles since start of program execution - - -u32 SuperVUGetVIAddr(int reg, int read) -{ - assert(s_pCurInst != NULL); - - switch (reg) - { - case REG_STATUS_FLAG: - { - u32 addr = (read == 2) ? s_PrevStatusWrite : (read ? s_StatusRead : s_pCurInst->pStatusWrite); - assert(!read || addr != 0); - return addr; - } - case REG_MAC_FLAG: - { - u32 addr = (read == 2) ? s_PrevMACWrite : (read ? s_MACRead : s_pCurInst->pMACWrite); - return addr; - } - case REG_CLIP_FLAG: - { - u32 addr = (read == 2) ? s_PrevClipWrite : (read ? s_ClipRead : s_pCurInst->pClipWrite); - assert(!read || addr != 0); - return addr; - } - case REG_Q: - return (read || s_WriteToReadQ) ? (uptr)&VU->VI[REG_Q] : (uptr)&VU->q; - case REG_P: - return read ? (uptr)&VU->VI[REG_P] : (uptr)&VU->p; - case REG_I: - return s_PrevIWrite; - } - -#ifdef SUPERVU_VIBRANCHDELAY - if ((read != 0) && (s_pCurInst->regs[0].pipe == VUPIPE_BRANCH) && (s_pCurInst->vicached >= 0) && (s_pCurInst->vicached == reg)) - { - return (uptr)&s_VIBranchDelay; // test for branch delays - } -#endif - - return (uptr)&VU->VI[reg]; -} - -void SuperVUDumpBlock(list& blocks, int vuindex) -{ - FILE *f; - char str[256]; - u32 *mem; - u32 i; - - Path::CreateDirectory("dumps"); - string filename(Path::Combine("dumps", fmt_string("svu%cdump%.4X.txt", s_vu ? '0' : '1', s_pFnHeader->startpc))); - //Console::WriteLn( "dump1 %x => %s", params s_pFnHeader->startpc, filename ); - - f = fopen(filename.c_str(), "w"); - - fprintf(f, "Format: upper_inst lower_inst\ntype f:vf_live_vars vf_used_vars i:vi_live_vars vi_used_vars inst_cycle pq_inst\n"); - fprintf(f, "Type: %.2x - qread, %.2x - pread, %.2x - clip_write, %.2x - status_write\n" - "%.2x - mac_write, %.2x -qflush\n", - INST_Q_READ, INST_P_READ, INST_CLIP_WRITE, INST_STATUS_WRITE, INST_MAC_WRITE, INST_Q_WRITE); - fprintf(f, "XMM: Upper: read0 read1 write acc temp; Lower: read0 read1 write acc temp\n\n"); - - list::iterator itblock; - list::iterator itinst; - VuBaseBlock::LISTBLOCKS::iterator itchild; - - for(itblock = blocks.begin(); itblock != blocks.end(); itblock++) - { - fprintf(f, "block:%c %x-%x; children: ", ((*itblock)->type&BLOCKTYPE_HASEOP) ? '*' : ' ', - (*itblock)->startpc, (*itblock)->endpc - 8); - - for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++) - { - fprintf(f, "%x ", (*itchild)->startpc); - } - fprintf(f, "; vuxyz = %x, vuxy = %x\n", (*itblock)->vuxyz&(*itblock)->insts.front().usedvars[1], - (*itblock)->vuxy&(*itblock)->insts.front().usedvars[1]); - - itinst = (*itblock)->insts.begin(); - i = (*itblock)->startpc; - while (itinst != (*itblock)->insts.end()) - { - assert(i <= (*itblock)->endpc); - if (itinst->type & INST_DUMMY) - { - if (itinst->nParentPc >= 0 && !(itinst->type&INST_DUMMY_)) - { - // search for the parent - fprintf(f, "writeback 0x%x (%x)\n", itinst->type, itinst->nParentPc); - } - } - else - { - mem = (u32*) & VU->Micro[i]; - char* pstr = disVU1MicroUF(mem[1], i + 4); - fprintf(f, "%.4x: %-40s", i, pstr); - if (mem[1] & 0x80000000) fprintf(f, " I=%f(%.8x)\n", *(float*)mem, mem[0]); - else fprintf(f, "%s\n", disVU1MicroLF(mem[0], i)); - i += 8; - } - - ++itinst; - } - - fprintf(f, "\n"); - - _x86regs* pregs; - if ((*itblock)->nStartx86 >= 0 || (*itblock)->nEndx86 >= 0) - { - fprintf(f, "X86: AX CX DX BX SP BP SI DI\n"); - } - - if ((*itblock)->nStartx86 >= 0) - { - pregs = &s_vecRegArray[(*itblock)->nStartx86]; - fprintf(f, "STR: "); - for (i = 0; i < iREGCNT_GPR; ++i) - { - if (pregs[i].inuse) - fprintf(f, "%.2d ", pregs[i].reg); - else - fprintf(f, "-1 "); - } - fprintf(f, "\n"); - } - - if ((*itblock)->nEndx86 >= 0) - { - fprintf(f, "END: "); - pregs = &s_vecRegArray[(*itblock)->nEndx86]; - for (i = 0; i < iREGCNT_GPR; ++i) - { - if (pregs[i].inuse) - fprintf(f, "%.2d ", pregs[i].reg); - else - fprintf(f, "-1 "); - } - fprintf(f, "\n"); - } - - itinst = (*itblock)->insts.begin(); - for (i = (*itblock)->startpc; i < (*itblock)->endpc; ++itinst) - { - - if (itinst->type & INST_DUMMY) - { - } - else - { - sprintf(str, "%.4x:%x f:%.8x_%.8x", i, itinst->type, itinst->livevars[1], itinst->usedvars[1]); - fprintf(f, "%-46s i:%.8x_%.8x c:%d pq:%d\n", str, - itinst->livevars[0], itinst->usedvars[0], (int)itinst->info.cycle, (int)itinst->pqcycles); - - sprintf(str, "XMM r0:%d r1:%d w:%d a:%d t:%x;", - itinst->vfread0[1], itinst->vfread1[1], itinst->vfwrite[1], itinst->vfacc[1], itinst->vffree[1]); - fprintf(f, "%-46s r0:%d r1:%d w:%d a:%d t:%x\n", str, - itinst->vfread0[0], itinst->vfread1[0], itinst->vfwrite[0], itinst->vfacc[0], itinst->vffree[0]); - i += 8; - } - } - -#ifdef __LINUX__ - // dump the asm - if ((*itblock)->pcode != NULL) - { - char command[255]; - FILE* fasm = fopen("mydump1", "wb"); - //Console::WriteLn("writing: %x, %x", params (*itblock)->startpc, (uptr)(*itblock)->pendcode - (uptr)(*itblock)->pcode); - fwrite((*itblock)->pcode, 1, (uptr)(*itblock)->pendcode - (uptr)(*itblock)->pcode, fasm); - fclose(fasm); - sprintf(command, "objdump -D --target=binary --architecture=i386 -M intel mydump1 > tempdump"); - system(command); - fasm = fopen("tempdump", "r"); - // read all of it and write it to f - fseek(fasm, 0, SEEK_END); - vector vbuffer(ftell(fasm)); - fseek(fasm, 0, SEEK_SET); - fread(&vbuffer[0], vbuffer.size(), 1, fasm); - - fprintf(f, "\n\n"); - fwrite(&vbuffer[0], vbuffer.size(), 1, f); - fclose(fasm); - } -#endif - - fprintf(f, "\n---------------\n"); - } - - fclose(f); -} - -// uncomment to count svu exec time -//#define SUPERVU_COUNT - -// Private methods -void* SuperVUGetProgram(u32 startpc, int vuindex) -{ - assert(startpc < s_MemSize[vuindex]); - assert((startpc % 8) == 0); - assert(recVUHeaders[vuindex] != NULL); - VuFunctionHeader** pheader = &recVUHeaders[vuindex][startpc/8]; - - if (*pheader == NULL) - { -#ifdef _DEBUG -// if( vuindex ) VU1.VI[REG_TPC].UL = startpc; -// else VU0.VI[REG_TPC].UL = startpc; -// __Log("VU: %x\n", startpc); -// iDumpVU1Registers(); -// vudump |= 2; -#endif - - // measure run time - //QueryPerformanceCounter(&svubase); - -#ifdef SUPERVU_CACHING - void* pmem = (vuindex & 1) ? VU1.Micro : VU0.Micro; - // check if program exists in cache - list::iterator it; - for(it = s_plistCachedHeaders[vuindex][startpc/8].begin(); it != s_plistCachedHeaders[vuindex][startpc/8].end(); it++) - { - if ((*it)->IsSame(pmem)) - { - // found, transfer to regular lists - void* pfn = (*it)->pprogfunc; - recVUHeaders[vuindex][startpc/8] = *it; - s_listVUHeaders[vuindex].push_back(*it); - s_plistCachedHeaders[vuindex][startpc/8].erase(it); - return pfn; - } - } -#endif - - *pheader = SuperVURecompileProgram(startpc, vuindex); - - if (*pheader == NULL) - { - assert(s_TotalVUCycles > 0); - if (vuindex) - VU1.VI[REG_TPC].UL = startpc; - else - VU0.VI[REG_TPC].UL = startpc; - - return (void*)SuperVUEndProgram; - } - - //QueryPerformanceCounter(&svufinal); - //svutime += (u32)(svufinal.QuadPart-svubase.QuadPart); - - assert((*pheader)->pprogfunc != NULL); - } - //else assert( (*pheader)->IsSame((vuindex&1) ? VU1.Micro : VU0.Micro) ); - - assert((*pheader)->startpc == startpc); - - return (*pheader)->pprogfunc; -} - -bool VuFunctionHeader::IsSame(void* pmem) -{ -#ifdef SUPERVU_CACHING - vector::iterator it; - for(it = ranges.begin(); it != ranges.end(); it++) - { - if (memcmp_mmx((u8*)pmem + it->start, it->pmem, it->size)) - return false; - } -#endif - return true; -} - -list::iterator VuBaseBlock::GetInstIterAtPc(int instpc) -{ - assert(instpc >= 0); - - u32 curpc = startpc; - list::iterator it; - for (it = insts.begin(); it != insts.end(); ++it) - { - if (it->type & INST_DUMMY) continue; - if (curpc == instpc) break; - curpc += 8; - } - - if (it != insts.end()) return it; - - assert(0); - return insts.begin(); -} - -void VuBaseBlock::GetInstsAtPc(int instpc, list& listinsts) -{ - assert(instpc >= 0); - - listinsts.clear(); - - u32 curpc = startpc; - list::iterator it; - for (it = insts.begin(); it != insts.end(); ++it) - { - if (it->type & INST_DUMMY) continue; - if (curpc == instpc) break; - curpc += 8; - } - - if (it != insts.end()) - { - listinsts.push_back(&(*it)); - return; - } - - // look for the pc in other blocks - for (list::iterator itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); ++itblock) - { - if (*itblock == this) continue; - - if (instpc >= (*itblock)->startpc && instpc < (*itblock)->endpc) - { - listinsts.push_back(&(*(*itblock)->GetInstIterAtPc(instpc))); - } - } - - assert(listinsts.size() > 0); -} - -static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex) -{ - assert(vuindex < 2); - assert(s_recVUPtr != NULL); - //Console::WriteLn("svu%c rec: %x", params '0'+vuindex, startpc); - - // if recPtr reached the mem limit reset whole mem - if (((uptr)s_recVUPtr - (uptr)s_recVUMem) >= VU_EXESIZE - 0x40000) - { - //Console::WriteLn("SuperVU reset mem"); - SuperVUReset(0); - SuperVUReset(1); - SuperVUReset(-1); - if (s_TotalVUCycles > 0) - { - // already executing, so return NULL - return NULL; - } - } - - list::iterator itblock; - - s_vu = vuindex; - VU = s_vu ? &VU1 : &VU0; - s_pFnHeader = new VuFunctionHeader(); - s_listVUHeaders[vuindex].push_back(s_pFnHeader); - s_pFnHeader->startpc = startpc; - - memset(recVUBlocks[s_vu], 0, sizeof(VuBlockHeader) * (s_MemSize[s_vu] / 8)); - - // analyze the global graph - s_listBlocks.clear(); - VUPIPELINES pipes; - memzero_obj(pipes.fmac); - memzero_obj(pipes.fdiv); - memzero_obj(pipes.efu); - memzero_obj(pipes.ialu); - SuperVUBuildBlocks(NULL, startpc, pipes); - - // fill parents - VuBaseBlock::LISTBLOCKS::iterator itchild; - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++) - { - (*itchild)->parents.push_back(*itblock); - } - - //(*itblock)->type &= ~(BLOCKTYPE_IGNORE|BLOCKTYPE_ANALYZED); - } - - assert(s_listBlocks.front()->startpc == startpc); - s_listBlocks.front()->type |= BLOCKTYPE_FUNCTION; - - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - SuperVUInitLiveness(*itblock); - } - - SuperVULivenessAnalysis(); - SuperVUEliminateDeadCode(); - SuperVUAssignRegs(); - -#ifdef _DEBUG - if ((s_vu && (vudump&1)) || (!s_vu && (vudump&16))) SuperVUDumpBlock(s_listBlocks, s_vu); -#endif - - // code generation - x86SetPtr(s_recVUPtr); - branch = 0; - - SuperVURecompile(); - - s_recVUPtr = x86Ptr; - - // set the function's range - VuFunctionHeader::RANGE r; - s_pFnHeader->ranges.reserve(s_listBlocks.size()); - - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - r.start = (*itblock)->startpc; - r.size = (*itblock)->endpc - (*itblock)->startpc; -#ifdef SUPERVU_CACHING - //memxor_mmx(r.checksum, &VU->Micro[r.start], r.size); - r.pmem = malloc(r.size); - memcpy_fast(r.pmem, &VU->Micro[r.start], r.size); -#endif - s_pFnHeader->ranges.push_back(r); - } - -#if defined(_DEBUG) && defined(__LINUX__) - // dump at the end to capture the actual code - if ((s_vu && (vudump&1)) || (!s_vu && (vudump&16))) SuperVUDumpBlock(s_listBlocks, s_vu); -#endif - - // destroy - for (list::iterator itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); ++itblock) - { - delete *itblock; - } - s_listBlocks.clear(); - - assert(s_recVUPtr < s_recVUMem + VU_EXESIZE); - - return s_pFnHeader; -} - -static int _recbranchAddr(u32 vucode) -{ - s32 bpc = pc + (_Imm11_ << 3); - /* - if ( bpc < 0 ) { - Console::WriteLn("zerorec branch warning: bpc < 0 ( %x ); Using unsigned imm11", params bpc); - bpc = pc + (_UImm11_ << 3); - }*/ - bpc &= (s_MemSize[s_vu] - 1); - - return bpc; -} - -// return inst that flushes everything -static VuInstruction SuperVUFlushInst() -{ - VuInstruction inst; - // don't need to raed q/p - inst.type = INST_DUMMY_;//|INST_Q_READ|INST_P_READ; - return inst; -} - -void SuperVUAddWritebacks(VuBaseBlock* pblock, const list& listWritebacks) -{ -#ifdef SUPERVU_WRITEBACKS - // regardless of repetition, add the pipes (for selfloops) - list::const_iterator itwriteback = listWritebacks.begin(); - list::iterator itinst = pblock->insts.begin(), itinst2; - - while (itwriteback != listWritebacks.end()) - { - if (itinst != pblock->insts.end() && (itinst->info.cycle < itwriteback->cycle || (itinst->type&INST_DUMMY))) - { - ++itinst; - continue; - } - - itinst2 = pblock->insts.insert(itinst, VuInstruction()); - itwriteback->InitInst(&(*itinst2), vucycle); - ++itwriteback; - } -#endif -} - -#ifdef SUPERVU_VIBRANCHDELAY -static VuInstruction* getDelayInst(VuInstruction* pInst) -{ - // check for the N cycle branch delay - // example of 2 cycles delay (monster house) : - // sqi vi05 - // sqi vi05 - // ibeq vi05, vi03 - // The ibeq should read the vi05 before the first sqi - - //more info: - - // iaddiu vi01, 0, 1 - // ibeq vi01, 0 <- reads vi01 before the iaddiu - - // iaddiu vi01, 0, 1 - // iaddiu vi01, vi01, 1 - // iaddiu vi01, vi01, 1 - // ibeq vi01, 0 <- reads vi01 before the last two iaddiu's (so the value read is 1) - - // ilw vi02, addr - // iaddiu vi01, 0, 1 - // ibeq vi01, vi02 <- reads current values of both vi01 and vi02 because the branch instruction stalls - - int delay = 1; - VuInstruction* pDelayInst = NULL; - VuInstruction* pTargetInst = pInst->pPrevInst; - while (1) - { - if (pTargetInst != NULL - && pTargetInst->info.cycle + delay == pInst->info.cycle - && (pTargetInst->regs[0].pipe == VUPIPE_IALU || pTargetInst->regs[0].pipe == VUPIPE_FMAC) - && ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) - && (delay == 1 || ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) == ((pTargetInst->regs[0].VIwrite & pInst->pPrevInst->regs[0].VIread) & 0xffff)) - && !(pTargetInst->regs[0].VIread&((1 << REG_STATUS_FLAG) | (1 << REG_MAC_FLAG) | (1 << REG_CLIP_FLAG)))) - { - pDelayInst = pTargetInst; - pTargetInst = pTargetInst->pPrevInst; - delay++; - if (delay == 5) //maximum delay is 4 (length of the pipeline) - { - DevCon::WriteLn("supervu: cycle branch delay maximum (4) is reached"); - break; - } - } - else break; - } - if (delay > 1) DevCon::WriteLn("supervu: %d cycle branch delay detected: %x %x", params delay - 1, pc, s_pFnHeader->startpc); - return pDelayInst; -} -#endif - -static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const VUPIPELINES& pipes) -{ - // check if block already exists - //Console::WriteLn("startpc %x", params startpc); - startpc &= (s_vu ? 0x3fff : 0xfff); - VuBlockHeader* pbh = &recVUBlocks[s_vu][startpc/8]; - - if (pbh->pblock != NULL) - { - - VuBaseBlock* pblock = pbh->pblock; - list::iterator itinst; - - if (pblock->startpc == startpc) - { - SuperVUAddWritebacks(pblock, pipes.listWritebacks); - return pblock; - } - - // have to divide the blocks, pnewblock is first block - assert(startpc > pblock->startpc); - assert(startpc < pblock->endpc); - - u32 dummyinst = (startpc - pblock->startpc) >> 3; - - // count inst non-dummy insts - itinst = pblock->insts.begin(); - int cycleoff = 0; - - while (dummyinst > 0) - { - if (itinst->type & INST_DUMMY) - ++itinst; - else - { - cycleoff = itinst->info.cycle; - ++itinst; - --dummyinst; - } - } - - // NOTE: still leaves insts with their writebacks in different blocks - while (itinst->type & INST_DUMMY) - ++itinst; - - // the difference in cycles between dummy insts (naruto utlimate ninja) - int cyclediff = 0; - if (parent == pblock) - cyclediff = itinst->info.cycle - cycleoff; - cycleoff = itinst->info.cycle; - - // new block - VuBaseBlock* pnewblock = new VuBaseBlock(); - s_listBlocks.push_back(pnewblock); - - pnewblock->startpc = startpc; - pnewblock->endpc = pblock->endpc; - pnewblock->cycles = pblock->cycles - cycleoff + cyclediff; - - pnewblock->blocks.splice(pnewblock->blocks.end(), pblock->blocks); - pnewblock->insts.splice(pnewblock->insts.end(), pblock->insts, itinst, pblock->insts.end()); - pnewblock->type = pblock->type; - - // any writebacks in the next 3 cycles also belong to original block -// for(itinst = pnewblock->insts.begin(); itinst != pnewblock->insts.end(); ) { -// if( (itinst->type & INST_DUMMY) && itinst->nParentPc >= 0 && itinst->nParentPc < (int)startpc ) { -// -// if( !(itinst->type & INST_Q_WRITE) ) -// pblock->insts.push_back(*itinst); -// itinst = pnewblock->insts.erase(itinst); -// continue; -// } -// -// ++itinst; -// } - - pbh = &recVUBlocks[s_vu][startpc/8]; - for (u32 inst = startpc; inst < pblock->endpc; inst += 8) - { - if (pbh->pblock == pblock) - pbh->pblock = pnewblock; - ++pbh; - } - - for(itinst = pnewblock->insts.begin(); itinst != pnewblock->insts.end(); itinst++) - { - itinst->info.cycle -= cycleoff; - } - - SuperVUAddWritebacks(pnewblock, pipes.listWritebacks); - - // old block - pblock->blocks.push_back(pnewblock); - pblock->endpc = startpc; - pblock->cycles = cycleoff; - pblock->type &= BLOCKTYPE_MACFLAGS; - //pblock->insts.push_back(SuperVUFlushInst()); //don't need - - return pnewblock; - } - - VuBaseBlock* pblock = new VuBaseBlock(); - s_listBlocks.push_back(pblock); - - int i = 0; - branch = 0; - pc = startpc; - pblock->startpc = startpc; - - // clear stalls (might be a prob) - memcpy(VU->fmac, pipes.fmac, sizeof(pipes.fmac)); - memcpy(&VU->fdiv, &pipes.fdiv, sizeof(pipes.fdiv)); - memcpy(&VU->efu, &pipes.efu, sizeof(pipes.efu)); - memcpy(VU->ialu, pipes.ialu, sizeof(pipes.ialu)); -// memset(VU->fmac, 0, sizeof(VU->fmac)); -// memset(&VU->fdiv, 0, sizeof(VU->fdiv)); -// memset(&VU->efu, 0, sizeof(VU->efu)); - - vucycle = 0; - - u8 macflags = 0; - - list< WRITEBACK > listWritebacks; - list< WRITEBACK >::iterator itwriteback; - list::iterator itinst; - u32 hasSecondBranch = 0; - u32 needFullStatusFlag = 0; - -#ifdef SUPERVU_WRITEBACKS - listWritebacks = pipes.listWritebacks; -#endif - - // first analysis pass for status flags - while (1) - { - u32* ptr = (u32*) & VU->Micro[pc]; - pc += 8; - int prevbranch = branch; - - if (ptr[1] & 0x40000000) - branch = 1; - - if (!(ptr[1] & 0x80000000)) // not I - { - switch (ptr[0] >> 25) - { - case 0x24: // jr - case 0x25: // jalr - case 0x20: // B - case 0x21: // BAL - case 0x28: // IBEQ - case 0x2f: // IBGEZ - case 0x2d: // IBGTZ - case 0x2e: // IBLEZ - case 0x2c: // IBLTZ - case 0x29: // IBNE - branch = 1; - break; - - case 0x14: // fseq - case 0x17: // fsor - //needFullStatusFlag = 2; - break; - - case 0x16: // fsand - if ((ptr[0]&0xc0)) - { - // sometimes full sticky bits are needed (simple series 2000 - oane chapara) - //Console::WriteLn("needSticky: %x-%x", params s_pFnHeader->startpc, startpc); - needFullStatusFlag = 2; - } - break; - } - } - - if (prevbranch) - break; - - if (pc >= s_MemSize[s_vu]) - { - Console::Error("inf vu0 prog %x", params startpc); - break; - } - } - - // second full pass - pc = startpc; - branch = 0; - VuInstruction* pprevinst = NULL, *pinst = NULL; - - while (1) - { - - if (pc == s_MemSize[s_vu]) - { - branch |= 8; - break; - } - - if (!branch && pbh->pblock != NULL) - { - pblock->blocks.push_back(pbh->pblock); - break; - } - - int prevbranch = branch; - - if (!prevbranch) - { - pbh->pblock = pblock; - } - else assert(prevbranch || pbh->pblock == NULL); - - pblock->insts.push_back(VuInstruction()); - - pprevinst = pinst; - pinst = &pblock->insts.back(); - pinst->pPrevInst = pprevinst; - SuperVUAnalyzeOp(VU, &pinst->info, pinst->regs); - -#ifdef SUPERVU_VIBRANCHDELAY - if (pinst->regs[0].pipe == VUPIPE_BRANCH && pblock->insts.size() > 1) - { - - VuInstruction* pdelayinst = getDelayInst(pinst); - if (pdelayinst) - { - pdelayinst->type |= INST_CACHE_VI; - - // find the correct register - u32 mask = pdelayinst->regs[0].VIwrite & pinst->regs[0].VIread; - for (int i = 0; i < 16; ++i) - { - if (mask & (1 << i)) - { - pdelayinst->vicached = i; - break; - } - } - - pinst->vicached = pdelayinst->vicached; - } - } -#endif - - if (prevbranch) - { - if (pinst->regs[0].pipe == VUPIPE_BRANCH) - hasSecondBranch = 1; - pinst->type |= INST_BRANCH_DELAY; - } - - // check write back - for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end();) - { - if (pinst->info.cycle >= itwriteback->cycle) - { - itinst = pblock->insts.insert(--pblock->insts.end(), VuInstruction()); - itwriteback->InitInst(&(*itinst), pinst->info.cycle); - itwriteback = listWritebacks.erase(itwriteback); - } - else ++itwriteback; - } - - // add new writebacks - WRITEBACK w; - const u32 allflags = (1 << REG_CLIP_FLAG) | (1 << REG_MAC_FLAG) | (1 << REG_STATUS_FLAG); - for (int j = 0; j < 2; ++j) w.viwrite[j] = pinst->regs[j].VIwrite & allflags; - - if (pinst->info.macflag & VUOP_WRITE) w.viwrite[1] |= (1 << REG_MAC_FLAG); - if (pinst->info.statusflag & VUOP_WRITE) w.viwrite[1] |= (1 << REG_STATUS_FLAG); - - if ((pinst->info.macflag | pinst->info.statusflag) & VUOP_READ) - macflags = 1; - if (pinst->regs[0].VIread & ((1 << REG_MAC_FLAG) | (1 << REG_STATUS_FLAG))) - macflags = 1; - -// if( pinst->regs[1].pipe == VUPIPE_FMAC && (pinst->regs[1].VFwrite==0&&!(pinst->regs[1].VIwrite&(1<regs[0].VIread |= (1<VIwrite |= lregs->VIwrite & (1<info.statusflag&VUOP_WRITE) && !(pinst->regs[0].VIwrite&(1 << REG_STATUS_FLAG))) && needFullStatusFlag) - { - // don't read if first inst - if (needFullStatusFlag == 1) - w.viread[1] |= (1 << REG_STATUS_FLAG); - else --needFullStatusFlag; - } - - for (int j = 0; j < 2; ++j) - { - w.viread[j] |= pinst->regs[j].VIread & allflags; - - if ((pinst->regs[j].VIread&(1 << REG_STATUS_FLAG)) && (pinst->regs[j].VIwrite&(1 << REG_STATUS_FLAG))) - { - // don't need the read anymore - pinst->regs[j].VIread &= ~(1 << REG_STATUS_FLAG); - } - if ((pinst->regs[j].VIread&(1 << REG_MAC_FLAG)) && (pinst->regs[j].VIwrite&(1 << REG_MAC_FLAG))) - { - // don't need the read anymore - pinst->regs[j].VIread &= ~(1 << REG_MAC_FLAG); - } - - pinst->regs[j].VIwrite &= ~allflags; - } - - if (pinst->info.macflag & VUOP_READ) w.viread[1] |= 1 << REG_MAC_FLAG; - if (pinst->info.statusflag & VUOP_READ) w.viread[1] |= 1 << REG_STATUS_FLAG; - - w.nParentPc = pc - 8; - w.cycle = pinst->info.cycle + 4; - listWritebacks.push_back(w); - } - - if (pinst->info.q&VUOP_READ) pinst->type |= INST_Q_READ; - if (pinst->info.p&VUOP_READ) pinst->type |= INST_P_READ; - - if (pinst->info.q&VUOP_WRITE) - { - pinst->pqcycles = QWaitTimes[pinst->info.pqinst] + 1; - - memset(&w, 0, sizeof(w)); - w.nParentPc = pc - 8; - w.cycle = pinst->info.cycle + pinst->pqcycles; - w.viwrite[0] = 1 << REG_Q; - listWritebacks.push_back(w); - } - if (pinst->info.p&VUOP_WRITE) - pinst->pqcycles = PWaitTimes[pinst->info.pqinst] + 1; - - if (prevbranch) - { - break; - } - - // make sure there is always a branch - // sensible soccer overflows on vu0, so increase the limit... - if ((s_vu == 1 && i >= 0x799) || (s_vu == 0 && i >= 0x201)) - { - Console::Error("VuRec base block doesn't terminate!"); - assert(0); - break; - } - - i++; - pbh++; - } - - if (macflags) - pblock->type |= BLOCKTYPE_MACFLAGS; - - pblock->endpc = pc; - u32 lastpc = pc; - - pblock->cycles = vucycle; - -#ifdef SUPERVU_WRITEBACKS - if (!branch || (branch&8)) -#endif - { - // flush writebacks - if (listWritebacks.size() > 0) - { - listWritebacks.sort(WRITEBACK::SortWritebacks); - for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end(); ++itwriteback) - { - if (itwriteback->viwrite[0] & (1 << REG_Q)) - { - // ignore all Q writebacks - continue; - } - - pblock->insts.push_back(VuInstruction()); - itwriteback->InitInst(&pblock->insts.back(), vucycle); - } - - listWritebacks.clear(); - } - } - - if (!branch) return pblock; - - if (branch & 8) - { - // what if also a jump? - pblock->type |= BLOCKTYPE_EOP | BLOCKTYPE_HASEOP; - - // add an instruction to flush p and q (if written) - pblock->insts.push_back(SuperVUFlushInst()); - return pblock; - } - - // it is a (cond) branch or a jump - u32 vucode = *(u32*)(VU->Micro + lastpc - 16); - int bpc = _recbranchAddr(vucode) - 8; - - VUPIPELINES newpipes; - memcpy(newpipes.fmac, VU->fmac, sizeof(newpipes.fmac)); - memcpy(&newpipes.fdiv, &VU->fdiv, sizeof(newpipes.fdiv)); - memcpy(&newpipes.efu, &VU->efu, sizeof(newpipes.efu)); - memcpy(newpipes.ialu, VU->ialu, sizeof(newpipes.ialu)); - - for (i = 0; i < 8; ++i) newpipes.fmac[i].sCycle -= vucycle; - newpipes.fdiv.sCycle -= vucycle; - newpipes.efu.sCycle -= vucycle; - for (i = 0; i < 8; ++i) newpipes.ialu[i].sCycle -= vucycle; - - if (listWritebacks.size() > 0) - { - // flush all when jumping, send down the pipe when in branching - bool bFlushWritebacks = (vucode >> 25) == 0x24 || (vucode >> 25) == 0x25;//||(vucode>>25)==0x20||(vucode>>25)==0x21; - - listWritebacks.sort(WRITEBACK::SortWritebacks); - for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end(); ++itwriteback) - { - if (itwriteback->viwrite[0] & (1 << REG_Q)) - { - // ignore all Q writebacks - continue; - } - - if (itwriteback->cycle < vucycle || bFlushWritebacks) - { - pblock->insts.push_back(VuInstruction()); - itwriteback->InitInst(&pblock->insts.back(), vucycle); - } - else - { - newpipes.listWritebacks.push_back(*itwriteback); - newpipes.listWritebacks.back().cycle -= vucycle; - } - } - } - - if (newpipes.listWritebacks.size() > 0) // other blocks might read the mac flags - pblock->type |= BLOCKTYPE_MACFLAGS; - - u32 firstbranch = vucode >> 25; - switch (firstbranch) - { - case 0x24: // jr - pblock->type |= BLOCKTYPE_EOP; // jump out of procedure, since not returning, set EOP - pblock->insts.push_back(SuperVUFlushInst()); - firstbranch = 0xff; //Non-Conditional Jump - break; - - case 0x25: // jalr - { - // linking, so will return to procedure - pblock->insts.push_back(SuperVUFlushInst()); - - VuBaseBlock* pjumpblock = SuperVUBuildBlocks(pblock, lastpc, newpipes); - - // update pblock since could have changed - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - assert(pblock != NULL); - - pblock->blocks.push_back(pjumpblock); - firstbranch = 0xff; //Non-Conditional Jump - break; - } - case 0x20: // B - { - VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); - - // update pblock since could have changed - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - assert(pblock != NULL); - - pblock->blocks.push_back(pbranchblock); - firstbranch = 0xff; //Non-Conditional Jump - break; - } - case 0x21: // BAL - { - VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); - - // update pblock since could have changed - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - assert(pblock != NULL); - pblock->blocks.push_back(pbranchblock); - firstbranch = 0xff; //Non-Conditional Jump - break; - } - case 0x28: // IBEQ - case 0x2f: // IBGEZ - case 0x2d: // IBGTZ - case 0x2e: // IBLEZ - case 0x2c: // IBLTZ - case 0x29: // IBNE - { - VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); - - // update pblock since could have changed - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - assert(pblock != NULL); - pblock->blocks.push_back(pbranchblock); - - // if has a second branch that is B or BAL, skip this - u32 secondbranch = (*(u32*)(VU->Micro + lastpc - 8)) >> 25; - if (!hasSecondBranch || (secondbranch != 0x21 && secondbranch != 0x20)) - { - pbranchblock = SuperVUBuildBlocks(pblock, lastpc, newpipes); - - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - pblock->blocks.push_back(pbranchblock); - } - - break; - } - default: - assert(pblock->blocks.size() == 1); - break; - } - - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - -#ifdef SUPERVU_VIBRANCHDELAY -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -///// NOTE! This could still be a hack for KH2/GoW, but until we know how it properly works, this will do for now./// -///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// - - if (hasSecondBranch && firstbranch != 0xff) //check the previous jump was conditional and there is a second branch - { -#else - if (hasSecondBranch) - { -#endif - - u32 vucode = *(u32*)(VU->Micro + lastpc - 8); - pc = lastpc; - int bpc = _recbranchAddr(vucode); - - switch (vucode >> 25) - { - case 0x24: // jr - Console::Error("svurec bad jr jump!"); - assert(0); - break; - - case 0x25: // jalr - { - Console::Error("svurec bad jalr jump!"); - assert(0); - break; - } - case 0x20: // B - { - VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); - - // update pblock since could have changed - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - - pblock->blocks.push_back(pbranchblock); - break; - } - case 0x21: // BAL - { - VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); - - // replace instead of pushing a new block - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - pblock->blocks.push_back(pbranchblock); - break; - } - case 0x28: // IBEQ - case 0x2f: // IBGEZ - case 0x2d: // IBGTZ - case 0x2e: // IBLEZ - case 0x2c: // IBLTZ - case 0x29: // IBNE - { - VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); - - // update pblock since could have changed - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - pblock->blocks.push_back(pbranchblock); - - // only add the block if the previous branch doesn't include the next instruction (ie, if a direct jump) - if (firstbranch == 0x24 || firstbranch == 0x25 || firstbranch == 0x20 || firstbranch == 0x21) - { - pbranchblock = SuperVUBuildBlocks(pblock, lastpc, newpipes); - - pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; - pblock->blocks.push_back(pbranchblock); - } - - break; - } - - jNO_DEFAULT; - } - } - - return recVUBlocks[s_vu][startpc/8].pblock; -} - -static void SuperVUInitLiveness(VuBaseBlock* pblock) -{ - list::iterator itinst, itnext; - - assert(pblock->insts.size() > 0); - - for (itinst = pblock->insts.begin(); itinst != pblock->insts.end(); ++itinst) - { - - if (itinst->type & INST_DUMMY_) - { - itinst->addvars[0] = itinst->addvars[1] = 0xffffffff; - itinst->livevars[0] = itinst->livevars[1] = 0xffffffff; - itinst->keepvars[0] = itinst->keepvars[1] = 0xffffffff; - itinst->usedvars[0] = itinst->usedvars[1] = 0; - } - else - { - itinst->addvars[0] = itinst->regs[0].VIread | itinst->regs[1].VIread; - itinst->addvars[1] = (itinst->regs[0].VFread0 ? (1 << itinst->regs[0].VFread0) : 0) | - (itinst->regs[0].VFread1 ? (1 << itinst->regs[0].VFread1) : 0) | - (itinst->regs[1].VFread0 ? (1 << itinst->regs[1].VFread0) : 0) | - (itinst->regs[1].VFread1 ? (1 << itinst->regs[1].VFread1) : 0); - - // vf0 is not handled by VFread - if (!itinst->regs[0].VFread0 && (itinst->regs[0].VIread & (1 << REG_VF0_FLAG))) itinst->addvars[1] |= 1; - if (!itinst->regs[1].VFread0 && (itinst->regs[1].VIread & (1 << REG_VF0_FLAG))) itinst->addvars[1] |= 1; - if (!itinst->regs[0].VFread1 && (itinst->regs[0].VIread & (1 << REG_VF0_FLAG)) && itinst->regs[0].VFr1xyzw != 0xff) itinst->addvars[1] |= 1; - if (!itinst->regs[1].VFread1 && (itinst->regs[1].VIread & (1 << REG_VF0_FLAG)) && itinst->regs[1].VFr1xyzw != 0xff) itinst->addvars[1] |= 1; - - - u32 vfwrite = 0; - if (itinst->regs[0].VFwrite != 0) - { - if (itinst->regs[0].VFwxyzw != 0xf) itinst->addvars[1] |= 1 << itinst->regs[0].VFwrite; - else vfwrite |= 1 << itinst->regs[0].VFwrite; - } - if (itinst->regs[1].VFwrite != 0) - { - if (itinst->regs[1].VFwxyzw != 0xf) itinst->addvars[1] |= 1 << itinst->regs[1].VFwrite; - else vfwrite |= 1 << itinst->regs[1].VFwrite; - } - if ((itinst->regs[1].VIwrite & (1 << REG_ACC_FLAG)) && itinst->regs[1].VFwxyzw != 0xf) - itinst->addvars[1] |= 1 << REG_ACC_FLAG; - - u32 viwrite = (itinst->regs[0].VIwrite | itinst->regs[1].VIwrite); - - itinst->usedvars[0] = itinst->addvars[0] | viwrite; - itinst->usedvars[1] = itinst->addvars[1] | vfwrite; - -// itinst->addvars[0] &= ~viwrite; -// itinst->addvars[1] &= ~vfwrite; - itinst->keepvars[0] = ~viwrite; - itinst->keepvars[1] = ~vfwrite; - } - } - - itinst = --pblock->insts.end(); - while (itinst != pblock->insts.begin()) - { - itnext = itinst; - --itnext; - - itnext->usedvars[0] |= itinst->usedvars[0]; - itnext->usedvars[1] |= itinst->usedvars[1]; - - itinst = itnext; - } -} - -u32 COMPUTE_LIVE(u32 R, u32 K, u32 L) -{ - u32 live = R | ((L) & (K)); - // special process mac and status flags - // only propagate liveness if doesn't write to the flag - if (!(L&(1 << REG_STATUS_FLAG)) && !(K&(1 << REG_STATUS_FLAG))) - live &= ~(1 << REG_STATUS_FLAG); - if (!(L&(1 << REG_MAC_FLAG)) && !(K&(1 << REG_MAC_FLAG))) - live &= ~(1 << REG_MAC_FLAG); - return live;//|(1<::reverse_iterator itblock; - list::iterator itinst, itnext; - VuBaseBlock::LISTBLOCKS::iterator itchild; - - u32 livevars[2]; - - do - { - changed = FALSE; - for (itblock = s_listBlocks.rbegin(); itblock != s_listBlocks.rend(); ++itblock) - { - - u32 newlive; - VuBaseBlock* pb = *itblock; - - // the last inst relies on the neighbor's insts - itinst = --pb->insts.end(); - - if (pb->blocks.size() > 0) - { - livevars[0] = 0; - livevars[1] = 0; - for (itchild = pb->blocks.begin(); itchild != pb->blocks.end(); ++itchild) - { - VuInstruction& front = (*itchild)->insts.front(); - livevars[0] |= front.livevars[0]; - livevars[1] |= front.livevars[1]; - } - - newlive = COMPUTE_LIVE(itinst->addvars[0], itinst->keepvars[0], livevars[0]); - - // should propagate status flags whose parent insts are not in this block -// if( itinst->nParentPc >= 0 && (itinst->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) ) -// newlive |= livevars[0]&((1<livevars[0] != newlive) - { - changed = TRUE; - itinst->livevars[0] = newlive; - } - - newlive = COMPUTE_LIVE(itinst->addvars[1], itinst->keepvars[1], livevars[1]); - if (itinst->livevars[1] != newlive) - { - changed = TRUE; - itinst->livevars[1] = newlive; - } - } - - while (itinst != pb->insts.begin()) - { - - itnext = itinst; - --itnext; - - newlive = COMPUTE_LIVE(itnext->addvars[0], itnext->keepvars[0], itinst->livevars[0]); - - // should propagate status flags whose parent insts are not in this block -// if( itnext->nParentPc >= 0 && (itnext->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) && !(itinst->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) ) -// newlive |= itinst->livevars[0]&((1<livevars[0] != newlive) - { - changed = TRUE; - itnext->livevars[0] = newlive; - itnext->livevars[1] = COMPUTE_LIVE(itnext->addvars[1], itnext->keepvars[1], itinst->livevars[1]); - } - else - { - newlive = COMPUTE_LIVE(itnext->addvars[1], itnext->keepvars[1], itinst->livevars[1]); - if (itnext->livevars[1] != newlive) - { - changed = TRUE; - itnext->livevars[1] = newlive; - } - } - - itinst = itnext; - } - -// if( (livevars[0] | itinst->livevars[0]) != itinst->livevars[0] ) { -// changed = TRUE; -// itinst->livevars[0] |= livevars[0]; -// } -// if( (livevars[1] | itinst->livevars[1]) != itinst->livevars[1] ) { -// changed = TRUE; -// itinst->livevars[1] |= livevars[1]; -// } -// -// while( itinst != pb->insts.begin() ) { -// -// itnext = itinst; --itnext; -// if( (itnext->livevars[0] | (itinst->livevars[0] & itnext->keepvars[0])) != itnext->livevars[0] ) { -// changed = TRUE; -// itnext->livevars[0] |= itinst->livevars[0] & itnext->keepvars[0]; -// itnext->livevars[1] |= itinst->livevars[1] & itnext->keepvars[1]; -// } -// else if( (itnext->livevars[1] | (itinst->livevars[1] & itnext->keepvars[1])) != itnext->livevars[1] ) { -// changed = TRUE; -// itnext->livevars[1] |= itinst->livevars[1] & itnext->keepvars[1]; -// } -// -// itinst = itnext; -// } - } - - } - while (changed); -} - -static void SuperVUEliminateDeadCode() -{ - list::iterator itblock; - VuBaseBlock::LISTBLOCKS::iterator itchild; - list::iterator itinst, itnext; - list listParents; - list::iterator itparent; - - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - -#ifdef _DEBUG - u32 startpc = (*itblock)->startpc; - u32 curpc = startpc; -#endif - - itnext = (*itblock)->insts.begin(); - itinst = itnext++; - while (itnext != (*itblock)->insts.end()) - { - if (itinst->type & (INST_CLIP_WRITE | INST_MAC_WRITE | INST_STATUS_WRITE)) - { - u32 live0 = itnext->livevars[0]; - if (itinst->nParentPc >= 0 && itnext->nParentPc >= 0 && itinst->nParentPc != itnext->nParentPc) // superman returns - { - // take the live vars from the next next inst - list::iterator itnextnext = itnext; - ++itnextnext; - if (itnextnext != (*itblock)->insts.end()) - { - live0 = itnextnext->livevars[0]; - } - } - - itinst->regs[0].VIwrite &= live0; - itinst->regs[1].VIwrite &= live0; - - u32 viwrite = itinst->regs[0].VIwrite | itinst->regs[1].VIwrite; - - (*itblock)->GetInstsAtPc(itinst->nParentPc, listParents); - int removetype = 0; - - for(itparent = listParents.begin(); itparent != listParents.end(); itparent++) - { - VuInstruction* parent = *itparent; - - if (viwrite & (1 << REG_CLIP_FLAG)) - { - parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_CLIP_FLAG)); - parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_CLIP_FLAG)); - } - else - removetype |= INST_CLIP_WRITE; - - if (parent->info.macflag && (itinst->type & INST_MAC_WRITE)) - { - if (!(viwrite&(1 << REG_MAC_FLAG))) - { - //parent->info.macflag = 0; - // parent->regs[0].VIwrite &= ~(1<regs[1].VIwrite &= ~(1<regs[0].VIwrite & (1 << REG_MAC_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_MAC_FLAG))); -#endif - // if VUPIPE_FMAC and destination is vf00, probably need to keep the mac flag - if (parent->regs[1].pipe == VUPIPE_FMAC && (parent->regs[1].VFwrite == 0 && !(parent->regs[1].VIwrite&(1 << REG_ACC_FLAG)))) - { - parent->regs[0].VIwrite |= ((1 << REG_MAC_FLAG)); - parent->regs[1].VIwrite |= ((1 << REG_MAC_FLAG)); - } - else - removetype |= INST_MAC_WRITE; - } - else - { - parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_MAC_FLAG)); - parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_MAC_FLAG)); - } - } - else removetype |= INST_MAC_WRITE; - - if (parent->info.statusflag && (itinst->type & INST_STATUS_WRITE)) - { - if (!(viwrite&(1 << REG_STATUS_FLAG))) - { - //parent->info.statusflag = 0; - // parent->regs[0].VIwrite &= ~(1<regs[1].VIwrite &= ~(1<regs[0].VIwrite & (1 << REG_STATUS_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_STATUS_FLAG))); -#endif - if (parent->regs[1].pipe == VUPIPE_FMAC && (parent->regs[1].VFwrite == 0 && !(parent->regs[1].VIwrite&(1 << REG_ACC_FLAG)))) - { - parent->regs[0].VIwrite |= ((1 << REG_STATUS_FLAG)); - parent->regs[1].VIwrite |= ((1 << REG_STATUS_FLAG)); - } - else - removetype |= INST_STATUS_WRITE; - } - else - { - parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_STATUS_FLAG)); - parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_STATUS_FLAG)); - } - } - else removetype |= INST_STATUS_WRITE; - } - - itinst->type &= ~removetype; - if (itinst->type == 0) - { - itnext = (*itblock)->insts.erase(itinst); - itinst = itnext++; - continue; - } - } -#ifdef _DEBUG - else - { - curpc += 8; - } -#endif - itinst = itnext; - ++itnext; - } - - if (itinst->type & INST_DUMMY) - { - // last inst with the children - u32 mask = 0; - for (itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); ++itchild) - { - mask |= (*itchild)->insts.front().livevars[0]; - } - itinst->regs[0].VIwrite &= mask; - itinst->regs[1].VIwrite &= mask; - u32 viwrite = itinst->regs[0].VIwrite | itinst->regs[1].VIwrite; - - if (itinst->nParentPc >= 0) - { - - (*itblock)->GetInstsAtPc(itinst->nParentPc, listParents); - int removetype = 0; - - for(itparent = listParents.begin(); itparent != listParents.end(); itparent++) - { - VuInstruction* parent = *itparent; - - if (viwrite & (1 << REG_CLIP_FLAG)) - { - parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_CLIP_FLAG)); - parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_CLIP_FLAG)); - } - else removetype |= INST_CLIP_WRITE; - - if (parent->info.macflag && (itinst->type & INST_MAC_WRITE)) - { - if (!(viwrite&(1 << REG_MAC_FLAG))) - { - //parent->info.macflag = 0; -#ifndef SUPERVU_WRITEBACKS - assert(!(parent->regs[0].VIwrite & (1 << REG_MAC_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_MAC_FLAG))); -#endif - removetype |= INST_MAC_WRITE; - } - else - { - parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_MAC_FLAG)); - parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_MAC_FLAG)); - } - } - else removetype |= INST_MAC_WRITE; - - if (parent->info.statusflag && (itinst->type & INST_STATUS_WRITE)) - { - if (!(viwrite&(1 << REG_STATUS_FLAG))) - { - //parent->info.statusflag = 0; -#ifndef SUPERVU_WRITEBACKS - assert(!(parent->regs[0].VIwrite & (1 << REG_STATUS_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_STATUS_FLAG))); -#endif - removetype |= INST_STATUS_WRITE; - } - else - { - parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_STATUS_FLAG)); - parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_STATUS_FLAG)); - } - } - else removetype |= INST_STATUS_WRITE; - } - - itinst->type &= ~removetype; - if (itinst->type == 0) - { - (*itblock)->insts.erase(itinst); - } - } - } - } -} - -void VuBaseBlock::AssignVFRegs() -{ - int i; - VuBaseBlock::LISTBLOCKS::iterator itchild; - list::iterator itblock; - list::iterator itinst, itnext, itinst2; - - // init the start regs - if (type & BLOCKTYPE_ANALYZED) return; // nothing changed - memcpy(xmmregs, startregs, sizeof(xmmregs)); - - if (type & BLOCKTYPE_ANALYZED) - { - // check if changed - for (i = 0; i < iREGCNT_XMM; ++i) - { - if (xmmregs[i].inuse != startregs[i].inuse) - break; - if (xmmregs[i].inuse && (xmmregs[i].reg != startregs[i].reg || xmmregs[i].type != startregs[i].type)) - break; - } - - if (i == iREGCNT_XMM) return; // nothing changed - } - - u8* oldX86 = x86Ptr; - - for(itinst = insts.begin(); itinst != insts.end(); itinst++) - { - - if (itinst->type & INST_DUMMY) continue; - - // reserve, go from upper to lower - int lastwrite = -1; - - for (i = 1; i >= 0; --i) - { - _VURegsNum* regs = itinst->regs + i; - - - // redo the counters so that the proper regs are released - for (int j = 0; j < iREGCNT_XMM; ++j) - { - if (xmmregs[j].inuse) - { - if (xmmregs[j].type == XMMTYPE_VFREG) - { - int count = 0; - itinst2 = itinst; - - if (i) - { - if (itinst2->regs[0].VFread0 == xmmregs[j].reg || itinst2->regs[0].VFread1 == xmmregs[j].reg || itinst2->regs[0].VFwrite == xmmregs[j].reg) - { - itinst2 = insts.end(); - break; - } - else - { - ++count; - ++itinst2; - } - } - - while (itinst2 != insts.end()) - { - if (itinst2->regs[0].VFread0 == xmmregs[j].reg || itinst2->regs[0].VFread1 == xmmregs[j].reg || itinst2->regs[0].VFwrite == xmmregs[j].reg || - itinst2->regs[1].VFread0 == xmmregs[j].reg || itinst2->regs[1].VFread1 == xmmregs[j].reg || itinst2->regs[1].VFwrite == xmmregs[j].reg) - break; - - ++count; - ++itinst2; - } - xmmregs[j].counter = 1000 - count; - } - else - { - assert(xmmregs[j].type == XMMTYPE_ACC); - - int count = 0; - itinst2 = itinst; - - if (i) ++itinst2; // acc isn't used in lower insts - - while (itinst2 != insts.end()) - { - assert(!((itinst2->regs[0].VIread | itinst2->regs[0].VIwrite) & (1 << REG_ACC_FLAG))); - - if ((itinst2->regs[1].VIread | itinst2->regs[1].VIwrite) & (1 << REG_ACC_FLAG)) - break; - - ++count; - ++itinst2; - } - - xmmregs[j].counter = 1000 - count; - } - } - } - - if (regs->VFread0) _addNeededVFtoXMMreg(regs->VFread0); - if (regs->VFread1) _addNeededVFtoXMMreg(regs->VFread1); - if (regs->VFwrite) _addNeededVFtoXMMreg(regs->VFwrite); - if (regs->VIread & (1 << REG_ACC_FLAG)) _addNeededACCtoXMMreg(); - if (regs->VIread & (1 << REG_VF0_FLAG)) _addNeededVFtoXMMreg(0); - - // alloc - itinst->vfread0[i] = itinst->vfread1[i] = itinst->vfwrite[i] = itinst->vfacc[i] = -1; - itinst->vfflush[i] = -1; - - if (regs->VFread0) - itinst->vfread0[i] = _allocVFtoXMMreg(VU, -1, regs->VFread0, 0); - else if (regs->VIread & (1 << REG_VF0_FLAG)) - itinst->vfread0[i] = _allocVFtoXMMreg(VU, -1, 0, 0); - - if (regs->VFread1) - itinst->vfread1[i] = _allocVFtoXMMreg(VU, -1, regs->VFread1, 0); - else if ((regs->VIread & (1 << REG_VF0_FLAG)) && regs->VFr1xyzw != 0xff) - itinst->vfread1[i] = _allocVFtoXMMreg(VU, -1, 0, 0); - - if (regs->VIread & (1 << REG_ACC_FLAG)) itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0); - - int reusereg = -1; // 0 - VFwrite, 1 - VFAcc - - if (regs->VFwrite) - { - assert(!(regs->VIwrite&(1 << REG_ACC_FLAG))); - - if (regs->VFwxyzw == 0xf) - { - itinst->vfwrite[i] = _checkXMMreg(XMMTYPE_VFREG, regs->VFwrite, 0); - if (itinst->vfwrite[i] < 0) reusereg = 0; - } - else - { - itinst->vfwrite[i] = _allocVFtoXMMreg(VU, -1, regs->VFwrite, 0); - } - } - else if (regs->VIwrite & (1 << REG_ACC_FLAG)) - { - - if (regs->VFwxyzw == 0xf) - { - itinst->vfacc[i] = _checkXMMreg(XMMTYPE_ACC, 0, 0); - if (itinst->vfacc[i] < 0) reusereg = 1; - } - else - { - itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0); - } - } - - if (reusereg >= 0) - { - // reuse - itnext = itinst; - itnext++; - - u8 type = reusereg ? XMMTYPE_ACC : XMMTYPE_VFREG; - u8 reg = reusereg ? 0 : regs->VFwrite; - - if (itinst->vfacc[i] >= 0 && lastwrite != itinst->vfacc[i] && - (itnext == insts.end() || ((regs->VIread&(1 << REG_ACC_FLAG)) && (!(itnext->usedvars[0]&(1 << REG_ACC_FLAG)) || !(itnext->livevars[0]&(1 << REG_ACC_FLAG)))))) - { - - assert(reusereg == 0); - if (itnext == insts.end() || (itnext->livevars[0]&(1 << REG_ACC_FLAG))) _freeXMMreg(itinst->vfacc[i]); - xmmregs[itinst->vfacc[i]].inuse = 1; - xmmregs[itinst->vfacc[i]].reg = reg; - xmmregs[itinst->vfacc[i]].type = type; - xmmregs[itinst->vfacc[i]].mode = 0; - itinst->vfwrite[i] = itinst->vfacc[i]; - } - else if (itinst->vfread0[i] >= 0 && lastwrite != itinst->vfread0[i] && - (itnext == insts.end() || (regs->VFread0 > 0 && (!(itnext->usedvars[1]&(1 << regs->VFread0)) || !(itnext->livevars[1]&(1 << regs->VFread0)))))) - { - - if (itnext == insts.end() || (itnext->livevars[1]®s->VFread0)) _freeXMMreg(itinst->vfread0[i]); - - xmmregs[itinst->vfread0[i]].inuse = 1; - xmmregs[itinst->vfread0[i]].reg = reg; - xmmregs[itinst->vfread0[i]].type = type; - xmmregs[itinst->vfread0[i]].mode = 0; - - if (reusereg) - itinst->vfacc[i] = itinst->vfread0[i]; - else - itinst->vfwrite[i] = itinst->vfread0[i]; - } - else if (itinst->vfread1[i] >= 0 && lastwrite != itinst->vfread1[i] && - (itnext == insts.end() || (regs->VFread1 > 0 && (!(itnext->usedvars[1]&(1 << regs->VFread1)) || !(itnext->livevars[1]&(1 << regs->VFread1)))))) - { - - if (itnext == insts.end() || (itnext->livevars[1]®s->VFread1)) _freeXMMreg(itinst->vfread1[i]); - - xmmregs[itinst->vfread1[i]].inuse = 1; - xmmregs[itinst->vfread1[i]].reg = reg; - xmmregs[itinst->vfread1[i]].type = type; - xmmregs[itinst->vfread1[i]].mode = 0; - if (reusereg) - itinst->vfacc[i] = itinst->vfread1[i]; - else - itinst->vfwrite[i] = itinst->vfread1[i]; - } - else - { - if (reusereg) - itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0); - else - itinst->vfwrite[i] = _allocVFtoXMMreg(VU, -1, regs->VFwrite, 0); - } - } - - if (itinst->vfwrite[i] >= 0) lastwrite = itinst->vfwrite[i]; - else if (itinst->vfacc[i] >= 0) lastwrite = itinst->vfacc[i]; - - // always alloc at least 1 temp reg - int free0 = (i || regs->VFwrite || regs->VFread0 || regs->VFread1 || (regs->VIwrite & (1 << REG_ACC_FLAG)) || (regs->VIread & (1 << REG_VF0_FLAG))) - ? _allocTempXMMreg(XMMT_FPS, -1) : -1; - int free1 = 0, free2 = 0; - - if (i == 0 && itinst->vfwrite[1] >= 0 && (itinst->vfread0[0] == itinst->vfwrite[1] || itinst->vfread1[0] == itinst->vfwrite[1])) - { - itinst->vfflush[i] = _allocTempXMMreg(XMMT_FPS, -1); - } - - if (i == 1 && (regs->VIwrite & (1 << REG_CLIP_FLAG))) - { - // CLIP inst, need two extra regs - if (free0 < 0) free0 = _allocTempXMMreg(XMMT_FPS, -1); - - free1 = _allocTempXMMreg(XMMT_FPS, -1); - free2 = _allocTempXMMreg(XMMT_FPS, -1); - _freeXMMreg(free1); - _freeXMMreg(free2); - } - else if (regs->VIwrite & (1 << REG_P)) - { - // EFU inst, need extra reg - free1 = _allocTempXMMreg(XMMT_FPS, -1); - if (free0 == -1) free0 = free1; - _freeXMMreg(free1); - } - - if (itinst->vfflush[i] >= 0) _freeXMMreg(itinst->vfflush[i]); - if (free0 >= 0) _freeXMMreg(free0); - - itinst->vffree[i] = (free0 & 0xf) | (free1 << 8) | (free2 << 16); - if (free0 == -1) itinst->vffree[i] |= VFFREE_INVALID0; - - _clearNeededXMMregs(); - } - } - - assert(x86Ptr == oldX86); - u32 analyzechildren = !(type & BLOCKTYPE_ANALYZED); - type |= BLOCKTYPE_ANALYZED; - - //memset(endregs, 0, sizeof(endregs)); - - if (analyzechildren) - { - for(itchild = blocks.begin(); itchild != blocks.end(); itchild++) - { - (*itchild)->AssignVFRegs(); - } - } -} - -struct MARKOVBLANKET -{ - list parents; - list children; -}; - -static MARKOVBLANKET s_markov; - -void VuBaseBlock::AssignVIRegs(int parent) -{ - const int maxregs = 6; - - if (parent) - { - if ((type&BLOCKTYPE_ANALYZEDPARENT)) - return; - - type |= BLOCKTYPE_ANALYZEDPARENT; - s_markov.parents.push_back(this); - for (LISTBLOCKS::iterator it = blocks.begin(); it != blocks.end(); ++it) - { - (*it)->AssignVIRegs(0); - } - return; - } - - if ((type&BLOCKTYPE_ANALYZED)) - return; - - // child - assert(allocX86Regs == -1); - allocX86Regs = s_vecRegArray.size(); - s_vecRegArray.resize(allocX86Regs + iREGCNT_GPR); - - _x86regs* pregs = &s_vecRegArray[allocX86Regs]; - memset(pregs, 0, sizeof(_x86regs)*iREGCNT_GPR); - - assert(parents.size() > 0); - - list::iterator itparent; - u32 usedvars = insts.front().usedvars[0]; - u32 livevars = insts.front().livevars[0]; - - if (parents.size() > 0) - { - u32 usedvars2 = 0xffffffff; - - for(itparent = parents.begin(); itparent != parents.end(); itparent++) - { - usedvars2 &= (*itparent)->insts.front().usedvars[0]; - } - - usedvars |= usedvars2; - } - - usedvars &= livevars; - - // currently order doesn't matter - int num = 0; - - if (usedvars) - { - for (int i = 1; i < 16; ++i) - { - if (usedvars & (1 << i)) - { - pregs[num].inuse = 1; - pregs[num].reg = i; - - livevars &= ~(1 << i); - - if (++num >= maxregs) break; - } - } - } - - if (num < maxregs) - { - livevars &= ~usedvars; - livevars &= insts.back().usedvars[0]; - - if (livevars) - { - for (int i = 1; i < 16; ++i) - { - if (livevars & (1 << i)) - { - pregs[num].inuse = 1; - pregs[num].reg = i; - - if (++num >= maxregs) break; - } - } - } - } - - s_markov.children.push_back(this); - type |= BLOCKTYPE_ANALYZED; - - for(itparent = parents.begin(); itparent != parents.end(); itparent++) - { - (*itparent)->AssignVIRegs(1); - } -} - -static void SuperVUAssignRegs() -{ - list::iterator itblock, itblock2; - - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - (*itblock)->type &= ~BLOCKTYPE_ANALYZED; - } - s_listBlocks.front()->AssignVFRegs(); - - // VI assignments, find markov blanket for each node in the graph - // then allocate regs based on the commonly used ones -#ifdef SUPERVU_X86CACHING - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - (*itblock)->type &= ~(BLOCKTYPE_ANALYZED | BLOCKTYPE_ANALYZEDPARENT); - } - s_vecRegArray.resize(0); - u8 usedregs[16]; - - // note: first block always has to start with no alloc regs - bool bfirst = true; - - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - - if (!((*itblock)->type & BLOCKTYPE_ANALYZED)) - { - - if ((*itblock)->parents.size() == 0) - { - (*itblock)->type |= BLOCKTYPE_ANALYZED; - bfirst = false; - continue; - } - - s_markov.children.clear(); - s_markov.parents.clear(); - (*itblock)->AssignVIRegs(0); - - // assign the regs - int regid = s_vecRegArray.size(); - s_vecRegArray.resize(regid + iREGCNT_GPR); - - _x86regs* mergedx86 = &s_vecRegArray[regid]; - memset(mergedx86, 0, sizeof(_x86regs)*iREGCNT_GPR); - - if (!bfirst) - { - *(u32*)usedregs = *((u32*)usedregs + 1) = *((u32*)usedregs + 2) = *((u32*)usedregs + 3) = 0; - - for(itblock2 = s_markov.children.begin(); itblock2 != s_markov.children.end(); itblock2++) - { - assert((*itblock2)->allocX86Regs >= 0); - _x86regs* pregs = &s_vecRegArray[(*itblock2)->allocX86Regs]; - for (int i = 0; i < iREGCNT_GPR; ++i) - { - if (pregs[i].inuse && pregs[i].reg < 16) - { - //assert( pregs[i].reg < 16); - usedregs[pregs[i].reg]++; - } - } - } - - int num = 1; - for (int i = 0; i < 16; ++i) - { - if (usedregs[i] == s_markov.children.size()) - { - // use - mergedx86[num].inuse = 1; - mergedx86[num].reg = i; - mergedx86[num].type = (s_vu ? X86TYPE_VU1 : 0) | X86TYPE_VI; - mergedx86[num].mode = MODE_READ; - if (++num >= iREGCNT_GPR) - break; - if (num == ESP) - ++num; - } - } - - for(itblock2 = s_markov.children.begin(); itblock2 != s_markov.children.end(); itblock2++) - { - assert((*itblock2)->nStartx86 == -1); - (*itblock2)->nStartx86 = regid; - } - - for(itblock2 = s_markov.parents.begin(); itblock2 != s_markov.parents.end(); itblock2++) - { - assert((*itblock2)->nEndx86 == -1); - (*itblock2)->nEndx86 = regid; - } - } - - bfirst = false; - } - } -#endif -} - -////////////////// -// Recompilation -////////////////// - -// cycles in which the last Q,P regs were finished (written to VU->VI[]) -// the write occurs before the instruction is executed at that cycle -// compare with s_TotalVUCycles -// if less than 0, already flushed -int s_writeQ, s_writeP; - -// declare the saved registers -uptr s_vu1esp, s_callstack;//, s_vu1esp -uptr s_vu1ebp, s_vuebx, s_vuedi, s_vu1esi; - -static int s_recWriteQ, s_recWriteP; // wait times during recompilation -static int s_needFlush; // first bit - Q, second bit - P, third bit - Q has been written, fourth bit - P has been written - -static int s_JumpX86; -static int s_ScheduleXGKICK = 0, s_XGKICKReg = -1; - -void recVUMI_XGKICK_(VURegs *VU); - -void SuperVUCleanupProgram(u32 startpc, int vuindex) -{ -#ifdef SUPERVU_COUNT - QueryPerformanceCounter(&svufinal); - svutime += (u32)(svufinal.QuadPart - svubase.QuadPart); -#endif - - assert(s_vu1esp == 0); - - VU = vuindex ? &VU1 : &VU0; - VU->cycle += s_TotalVUCycles; - - //VU cycle stealing hack, 3000 cycle maximum so it doesn't get out of hand - if (s_TotalVUCycles < 3000) - cpuRegs.cycle += s_TotalVUCycles * Config.Hacks.VUCycleSteal; - else - cpuRegs.cycle += 3000 * Config.Hacks.VUCycleSteal; - - if ((int)s_writeQ > 0) VU->VI[REG_Q] = VU->q; - if ((int)s_writeP > 0) - { - assert(VU == &VU1); - VU1.VI[REG_P] = VU1.p; // only VU1 - } - - //memset(recVUStack, 0, SUPERVU_STACKSIZE * 4); - - // Could clear allocation info to prevent possibly bad data being used in other parts of pcsx2; - // not doing this because it's slow and not needed (rama) - // _initXMMregs(); - // _initMMXregs(); - // _initX86regs(); -} - -#if defined(_MSC_VER) - -// entry point of all vu programs from emulator calls -__declspec(naked) void SuperVUExecuteProgram(u32 startpc, int vuindex) -{ - __asm - { - mov eax, dword ptr [esp] - mov s_TotalVUCycles, 0 // necessary to be here! - add esp, 4 - mov s_callstack, eax - call SuperVUGetProgram - - // save cpu state - mov s_vu1ebp, ebp - mov s_vu1esi, esi // have to save even in Release - mov s_vuedi, edi // have to save even in Release - mov s_vuebx, ebx - } -#ifdef _DEBUG - __asm - { - mov s_vu1esp, esp - } -#endif - - __asm - { - //stmxcsr s_ssecsr - ldmxcsr g_sseVUMXCSR - - // init vars - mov s_writeQ, 0xffffffff - mov s_writeP, 0xffffffff - - jmp eax - } -} - -// exit point of all vu programs -__declspec(naked) static void SuperVUEndProgram() -{ - __asm - { - // restore cpu state - ldmxcsr g_sseMXCSR - - mov ebp, s_vu1ebp - mov esi, s_vu1esi - mov edi, s_vuedi - mov ebx, s_vuebx - } - -#ifdef _DEBUG - __asm - { - sub s_vu1esp, esp - } -#endif - - __asm - { - call SuperVUCleanupProgram - jmp s_callstack // so returns correctly - } -} - -#endif - -// Flushes P/Q regs -void SuperVUFlush(int p, int wait) -{ - u8* pjmp[3]; - if (!(s_needFlush&(1 << p))) return; - - int recwait = p ? s_recWriteP : s_recWriteQ; - if (!wait && s_pCurInst->info.cycle < recwait) return; - - if (recwait == 0) - { - // write didn't happen this block - MOV32MtoR(EAX, p ? (uptr)&s_writeP : (uptr)&s_writeQ); - OR32RtoR(EAX, EAX); - pjmp[0] = JS8(0); - - if (s_pCurInst->info.cycle) SUB32ItoR(EAX, s_pCurInst->info.cycle); - - // if writeQ <= total+offset - if (!wait) // only write back if time is up - { - CMP32MtoR(EAX, (uptr)&s_TotalVUCycles); - pjmp[1] = JG8(0); - } - else - { - // add (writeQ-total-offset) to s_TotalVUCycles - // necessary? - CMP32MtoR(EAX, (uptr)&s_TotalVUCycles); - pjmp[2] = JLE8(0); - MOV32RtoM((uptr)&s_TotalVUCycles, EAX); - x86SetJ8(pjmp[2]); - } - } - else if (wait && s_pCurInst->info.cycle < recwait) - { - ADD32ItoM((uptr)&s_TotalVUCycles, recwait); - } - - MOV32MtoR(EAX, SuperVUGetVIAddr(p ? REG_P : REG_Q, 0)); - MOV32ItoM(p ? (uptr)&s_writeP : (uptr)&s_writeQ, 0x80000000); - MOV32RtoM(SuperVUGetVIAddr(p ? REG_P : REG_Q, 1), EAX); - - if (recwait == 0) - { - if (!wait) x86SetJ8(pjmp[1]); - x86SetJ8(pjmp[0]); - } - - if (wait || (!p && recwait == 0 && s_pCurInst->info.cycle >= 12) || (!p && recwait > 0 && s_pCurInst->info.cycle >= recwait)) - s_needFlush &= ~(1 << p); -} - -// executed only once per program -static u32* SuperVUStaticAlloc(u32 size) -{ - assert(recVUStackPtr + size <= recVUStack + SUPERVU_STACKSIZE); - // always zero - if (size == 4) *(u32*)recVUStackPtr = 0; - else memset(recVUStackPtr, 0, size); - recVUStackPtr += size; - return (u32*)(recVUStackPtr - size); -} - -static void SuperVURecompile() -{ - // save cpu state - recVUStackPtr = recVUStack; - - _initXMMregs(); - - list::iterator itblock; - - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - (*itblock)->type &= ~BLOCKTYPE_ANALYZED; - } - - s_listBlocks.front()->Recompile(); - - // make sure everything compiled - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - assert(((*itblock)->type & BLOCKTYPE_ANALYZED) && (*itblock)->pcode != NULL); - } - - // link all blocks - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - VuBaseBlock::LISTBLOCKS::iterator itchild; - - assert((*itblock)->blocks.size() <= ArraySize((*itblock)->pChildJumps)); - - int i = 0; - for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++) - { - - if ((u32)(uptr)(*itblock)->pChildJumps[i] == 0xffffffff) - continue; - - if ((*itblock)->pChildJumps[i] == NULL) - { - VuBaseBlock* pchild = *itchild; - - if (pchild->type & BLOCKTYPE_HASEOP) - { - assert(pchild->blocks.size() == 0); - - AND32ItoM((uptr)&VU0.VI[ REG_VPU_STAT ].UL, s_vu ? ~0x100 : ~0x001); // E flag - AND32ItoM((uptr)&VU->vifRegs->stat, ~0x4); - - MOV32ItoM((uptr)&VU->VI[REG_TPC], pchild->endpc); - JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5)); - } - // only other case is when there are two branches - else - { - assert((*itblock)->insts.back().regs[0].pipe == VUPIPE_BRANCH); - } - - continue; - } - - if ((u32)(uptr)(*itblock)->pChildJumps[i] & 0x80000000) - { - // relative - assert((uptr)(*itblock)->pChildJumps[i] <= 0xffffffff); - (*itblock)->pChildJumps[i] = (u32*)((uptr)(*itblock)->pChildJumps[i] & 0x7fffffff); - *(*itblock)->pChildJumps[i] = (uptr)(*itchild)->pcode - ((uptr)(*itblock)->pChildJumps[i] + 4); - } - else - { - *(*itblock)->pChildJumps[i] = (uptr)(*itchild)->pcode; - } - - ++i; - } - } - - s_pFnHeader->pprogfunc = s_listBlocks.front()->pcode; -} - -// debug - - -u32 s_saveecx, s_saveedx, s_saveebx, s_saveesi, s_saveedi, s_saveebp; -u32 g_curdebugvu; - -//float vuDouble(u32 f); - -#if defined(_MSC_VER) -__declspec(naked) static void svudispfn() -{ - __asm - { - mov g_curdebugvu, eax - mov s_saveecx, ecx - mov s_saveedx, edx - mov s_saveebx, ebx - mov s_saveesi, esi - mov s_saveedi, edi - mov s_saveebp, ebp - } -#else - -void svudispfntemp() -{ -#endif - -#ifdef _DEBUG - static u32 i; - - if (((vudump&8) && g_curdebugvu) || ((vudump&0x80) && !g_curdebugvu)) //&& g_vu1lastrec != g_vu1last ) { - { - - if (skipparent != g_vu1lastrec) - { - for (i = 0; i < ArraySize(badaddrs); ++i) - { - if (s_svulast == badaddrs[i][1] && g_vu1lastrec == badaddrs[i][0]) - break; - } - - if (i == ArraySize(badaddrs)) - { - //static int curesp; - //__asm mov curesp, esp - //Console::WriteLn("tVU: %x %x %x", s_svulast, s_vucount, s_vufnheader); - if (g_curdebugvu) iDumpVU1Registers(); - else iDumpVU0Registers(); - s_vucount++; - } - } - - g_vu1lastrec = s_svulast; - } -#endif - -#if defined(_MSC_VER) - __asm - { - mov ecx, s_saveecx - mov edx, s_saveedx - mov ebx, s_saveebx - mov esi, s_saveesi - mov edi, s_saveedi - mov ebp, s_saveebp - ret - } -#endif -} - -// frees all regs taking into account the livevars -void SuperVUFreeXMMregs(u32* livevars) -{ - for (int i = 0; i < iREGCNT_XMM; ++i) - { - if (xmmregs[i].inuse) - { - // same reg - if ((xmmregs[i].mode & MODE_WRITE)) - { - -#ifdef SUPERVU_INTERCACHING - if (xmmregs[i].type == XMMTYPE_VFREG) - { - if (!(livevars[1] & (1 << xmmregs[i].reg))) continue; - } - else if (xmmregs[i].type == XMMTYPE_ACC) - { - if (!(livevars[0] & (1 << REG_ACC_FLAG))) continue; - } -#endif - - if (xmmregs[i].mode & MODE_VUXYZ) - { - // ALWAYS update - u32 addr = xmmregs[i].type == XMMTYPE_VFREG ? (uptr) & VU->VF[xmmregs[i].reg] : (uptr) & VU->ACC; - - if (xmmregs[i].mode & MODE_VUZ) - { - SSE_MOVHPS_XMM_to_M64(addr, (x86SSERegType)i); - SSE_SHUFPS_M128_to_XMM((x86SSERegType)i, addr, 0xc4); - } - else - { - SSE_MOVHPS_M64_to_XMM((x86SSERegType)i, addr + 8); - } - - xmmregs[i].mode &= ~MODE_VUXYZ; - } - - _freeXMMreg(i); - } - } - } - - //_freeXMMregs(); -} - -void SuperVUTestVU0Condition(u32 incstack) -{ - if (s_vu && !SUPERVU_CHECKCONDITION) return; // vu0 only - - CMP32ItoM((uptr)&s_TotalVUCycles, 512); // sometimes games spin on vu0, so be careful with this value - // woody hangs if too high - - if (incstack) - { - u8* ptr = JB8(0); - - ADD32ItoR(ESP, incstack); - //CALLFunc((u32)timeout); - JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5)); - - x86SetJ8(ptr); - } - else JAE32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 6)); -} - -void VuBaseBlock::Recompile() -{ - if (type & BLOCKTYPE_ANALYZED) return; - - x86Align(16); - pcode = x86Ptr; - -#ifdef _DEBUG - MOV32ItoM((uptr)&s_vufnheader, s_pFnHeader->startpc); - MOV32ItoM((uptr)&VU->VI[REG_TPC], startpc); - MOV32ItoM((uptr)&s_svulast, startpc); - - list::iterator itparent; - for (itparent = parents.begin(); itparent != parents.end(); ++itparent) - { - if ((*itparent)->blocks.size() == 1 && (*itparent)->blocks.front()->startpc == startpc && - ((*itparent)->insts.size() < 2 || (----(*itparent)->insts.end())->regs[0].pipe != VUPIPE_BRANCH)) - { - MOV32ItoM((uptr)&skipparent, (*itparent)->startpc); - break; - } - } - - if (itparent == parents.end()) MOV32ItoM((uptr)&skipparent, -1); - - MOV32ItoR(EAX, s_vu); - CALLFunc((uptr)svudispfn); -#endif - - s_pCurBlock = this; - s_needFlush = 3; - pc = startpc; - branch = 0; - s_recWriteQ = s_recWriteP = 0; - s_XGKICKReg = -1; - s_ScheduleXGKICK = 0; - - s_ClipRead = s_PrevClipWrite = (uptr) & VU->VI[REG_CLIP_FLAG]; - s_StatusRead = s_PrevStatusWrite = (uptr) & VU->VI[REG_STATUS_FLAG]; - s_MACRead = s_PrevMACWrite = (uptr) & VU->VI[REG_MAC_FLAG]; - s_PrevIWrite = (uptr) & VU->VI[REG_I]; - s_JumpX86 = 0; - s_UnconditionalDelay = 0; - - memcpy(xmmregs, startregs, sizeof(xmmregs)); -#ifdef SUPERVU_X86CACHING - if (nStartx86 >= 0) - memcpy(x86regs, &s_vecRegArray[nStartx86], sizeof(x86regs)); - else - _initX86regs(); -#else - _initX86regs(); -#endif - - list::iterator itinst; - for(itinst = insts.begin(); itinst != insts.end(); itinst++) - { - s_pCurInst = &(*itinst); - if (s_JumpX86 > 0) - { - if (!x86regs[s_JumpX86].inuse) - { - // load - s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_READ); - } - x86regs[s_JumpX86].needed = 1; - } - - if (s_ScheduleXGKICK && s_XGKICKReg > 0) - { - assert(x86regs[s_XGKICKReg].inuse); - x86regs[s_XGKICKReg].needed = 1; - } - itinst->Recompile(itinst, vuxyz); - - if (s_ScheduleXGKICK > 0) - { - if (s_ScheduleXGKICK-- == 1) - { - recVUMI_XGKICK_(VU); - } - } - } - assert(pc == endpc); - assert(s_ScheduleXGKICK == 0); - - // flush flags - if (s_PrevClipWrite != (uptr)&VU->VI[REG_CLIP_FLAG]) - { - MOV32MtoR(EAX, s_PrevClipWrite); - MOV32RtoM((uptr)&VU->VI[REG_CLIP_FLAG], EAX); - } - if (s_PrevStatusWrite != (uptr)&VU->VI[REG_STATUS_FLAG]) - { - MOV32MtoR(EAX, s_PrevStatusWrite); - MOV32RtoM((uptr)&VU->VI[REG_STATUS_FLAG], EAX); - } - if (s_PrevMACWrite != (uptr)&VU->VI[REG_MAC_FLAG]) - { - MOV32MtoR(EAX, s_PrevMACWrite); - MOV32RtoM((uptr)&VU->VI[REG_MAC_FLAG], EAX); - } -// if( s_StatusRead != (uptr)&VU->VI[REG_STATUS_FLAG] ) { -// // only lower 8 bits valid! -// MOVZX32M8toR(EAX, s_StatusRead); -// MOV32RtoM((uptr)&VU->VI[REG_STATUS_FLAG], EAX); -// } -// if( s_MACRead != (uptr)&VU->VI[REG_MAC_FLAG] ) { -// // only lower 8 bits valid! -// MOVZX32M8toR(EAX, s_MACRead); -// MOV32RtoM((uptr)&VU->VI[REG_MAC_FLAG], EAX); -// } - if (s_PrevIWrite != (uptr)&VU->VI[REG_I]) - { - MOV32ItoM((uptr)&VU->VI[REG_I], *(u32*)s_PrevIWrite); // never changes - } - - ADD32ItoM((uptr)&s_TotalVUCycles, cycles); - - // compute branches, jumps, eop - if (type & BLOCKTYPE_HASEOP) - { - // end - _freeXMMregs(); - _freeX86regs(); - AND32ItoM((uptr)&VU0.VI[ REG_VPU_STAT ].UL, s_vu ? ~0x100 : ~0x001); // E flag - AND32ItoM((uptr)&VU->vifRegs->stat, ~0x4); - - if (!branch) MOV32ItoM((uptr)&VU->VI[REG_TPC], endpc); - - JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5)); - } - else - { - - u32 livevars[2] = {0}; - - list::iterator lastinst = GetInstIterAtPc(endpc - 8); - lastinst++; - - if (lastinst != insts.end()) - { - livevars[0] = lastinst->livevars[0]; - livevars[1] = lastinst->livevars[1]; - } - else - { - // take from children - if (blocks.size() > 0) - { - LISTBLOCKS::iterator itchild; - for(itchild = blocks.begin(); itchild != blocks.end(); itchild++) - { - livevars[0] |= (*itchild)->insts.front().livevars[0]; - livevars[1] |= (*itchild)->insts.front().livevars[1]; - } - } - else - { - livevars[0] = ~0; - livevars[1] = ~0; - } - } - - SuperVUFreeXMMregs(livevars); - - // get rid of any writes, otherwise _freeX86regs will write - x86regs[s_JumpX86].mode &= ~MODE_WRITE; - - if (branch == 1) - { - if (!x86regs[s_JumpX86].inuse) - { - assert(x86regs[s_JumpX86].type == X86TYPE_VUJUMP); - s_JumpX86 = 0xffffffff; // notify to jump from g_recWriteback - } - } - - // align VI regs -#ifdef SUPERVU_X86CACHING - if (nEndx86 >= 0) - { - _x86regs* endx86 = &s_vecRegArray[nEndx86]; - for (int i = 0; i < iREGCNT_GPR; ++i) - { - if (endx86[i].inuse) - { - - if (s_JumpX86 == i && x86regs[s_JumpX86].inuse) - { - x86regs[s_JumpX86].inuse = 0; - x86regs[EAX].inuse = 1; - MOV32RtoR(EAX, s_JumpX86); - s_JumpX86 = EAX; - } - - if (x86regs[i].inuse) - { - if (x86regs[i].type == endx86[i].type && x86regs[i].reg == endx86[i].reg) - { - _freeX86reg(i); - // will continue to use it - continue; - } - -#ifdef SUPERVU_INTERCACHING - if (x86regs[i].type == (X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0))) - { - if (livevars[0] & (1 << x86regs[i].reg)) - _freeX86reg(i); - else - x86regs[i].inuse = 0; - } - else -#endif - { - _freeX86reg(i); - } - } - - // realloc - _allocX86reg(i, endx86[i].type, endx86[i].reg, MODE_READ); - if (x86regs[i].mode & MODE_WRITE) - { - _freeX86reg(i); - x86regs[i].inuse = 1; - } - } - else _freeX86reg(i); - } - } - else _freeX86regs(); -#else - _freeX86regs(); -#endif - - // store the last block executed - MOV32ItoM((uptr)&g_nLastBlockExecuted, s_pCurBlock->startpc); - - switch (branch) - { - case 1: // branch, esi has new prog - - SuperVUTestVU0Condition(0); - - if (s_JumpX86 == 0xffffffff) - JMP32M((uptr)&g_recWriteback); - else - JMPR(s_JumpX86); - - break; - case 4: // jalr - pChildJumps[0] = (u32*)0xffffffff; - // fall through - - case 0x10: // jump, esi has new vupc - { - _freeXMMregs(); - _freeX86regs(); - - SuperVUTestVU0Condition(8); - - // already onto stack - CALLFunc((uptr)SuperVUGetProgram); - ADD32ItoR(ESP, 8); - JMPR(EAX); - break; - } - - case 0x13: // jr with uncon branch, uncond branch takes precendence (dropship) - { -// s32 delta = (s32)(VU->code & 0x400 ? 0xfffffc00 | (VU->code & 0x3ff) : VU->code & 0x3ff) << 3; -// ADD32ItoRmOffset(ESP, delta, 0); - - ADD32ItoR(ESP, 8); // restore - pChildJumps[0] = (u32*)((uptr)JMP32(0) | 0x80000000); - break; - } - case 0: - case 3: // unconditional branch - pChildJumps[s_UnconditionalDelay] = (u32*)((uptr)JMP32(0) | 0x80000000); - break; - - default: - DevCon::Error("Bad branch %x\n", params branch); - assert(0); - break; - } - } - - pendcode = x86Ptr; - type |= BLOCKTYPE_ANALYZED; - - LISTBLOCKS::iterator itchild; - for(itchild = blocks.begin(); itchild != blocks.end(); itchild++) - { - (*itchild)->Recompile(); - } -} - -#define GET_VUXYZMODE(reg) 0//((vuxyz&(1<<(reg)))?MODE_VUXYZ:0) - -int VuInstruction::SetCachedRegs(int upper, u32 vuxyz) -{ - if (vfread0[upper] >= 0) - { - SuperVUFreeXMMreg(vfread0[upper], XMMTYPE_VFREG, regs[upper].VFread0); - _allocVFtoXMMreg(VU, vfread0[upper], regs[upper].VFread0, MODE_READ | GET_VUXYZMODE(regs[upper].VFread0)); - } - if (vfread1[upper] >= 0) - { - SuperVUFreeXMMreg(vfread1[upper], XMMTYPE_VFREG, regs[upper].VFread1); - _allocVFtoXMMreg(VU, vfread1[upper], regs[upper].VFread1, MODE_READ | GET_VUXYZMODE(regs[upper].VFread1)); - } - if (vfacc[upper] >= 0 && (regs[upper].VIread&(1 << REG_ACC_FLAG))) - { - SuperVUFreeXMMreg(vfacc[upper], XMMTYPE_ACC, 0); - _allocACCtoXMMreg(VU, vfacc[upper], MODE_READ); - } - if (vfwrite[upper] >= 0) - { - assert(regs[upper].VFwrite > 0); - SuperVUFreeXMMreg(vfwrite[upper], XMMTYPE_VFREG, regs[upper].VFwrite); - _allocVFtoXMMreg(VU, vfwrite[upper], regs[upper].VFwrite, - MODE_WRITE | (regs[upper].VFwxyzw != 0xf ? MODE_READ : 0) | GET_VUXYZMODE(regs[upper].VFwrite)); - } - if (vfacc[upper] >= 0 && (regs[upper].VIwrite&(1 << REG_ACC_FLAG))) - { - SuperVUFreeXMMreg(vfacc[upper], XMMTYPE_ACC, 0); - _allocACCtoXMMreg(VU, vfacc[upper], MODE_WRITE | (regs[upper].VFwxyzw != 0xf ? MODE_READ : 0)); - } - - int info = PROCESS_VU_SUPER; - if (vfread0[upper] >= 0) info |= PROCESS_EE_SET_S(vfread0[upper]); - if (vfread1[upper] >= 0) info |= PROCESS_EE_SET_T(vfread1[upper]); - if (vfacc[upper] >= 0) info |= PROCESS_VU_SET_ACC(vfacc[upper]); - if (vfwrite[upper] >= 0) - { - if (regs[upper].VFwrite == _Ft_ && vfread1[upper] < 0) - { - info |= PROCESS_EE_SET_T(vfwrite[upper]); - } - else - { - assert(regs[upper].VFwrite == _Fd_); - info |= PROCESS_EE_SET_D(vfwrite[upper]); - } - } - - if (!(vffree[upper]&VFFREE_INVALID0)) - { - SuperVUFreeXMMreg(vffree[upper]&0xf, XMMTYPE_TEMP, 0); - _allocTempXMMreg(XMMT_FPS, vffree[upper]&0xf); - } - info |= PROCESS_VU_SET_TEMP(vffree[upper] & 0xf); - - if (vfflush[upper] >= 0) - { - SuperVUFreeXMMreg(vfflush[upper], XMMTYPE_TEMP, 0); - _allocTempXMMreg(XMMT_FPS, vfflush[upper]); - } - - if (upper && (regs[upper].VIwrite & (1 << REG_CLIP_FLAG))) - { - // CLIP inst, need two extra temp registers, put it EEREC_D and EEREC_ACC - assert(vfwrite[upper] == -1); - SuperVUFreeXMMreg((vffree[upper] >> 8)&0xf, XMMTYPE_TEMP, 0); - _allocTempXMMreg(XMMT_FPS, (vffree[upper] >> 8)&0xf); - info |= PROCESS_EE_SET_D((vffree[upper] >> 8) & 0xf); - - SuperVUFreeXMMreg((vffree[upper] >> 16)&0xf, XMMTYPE_TEMP, 0); - _allocTempXMMreg(XMMT_FPS, (vffree[upper] >> 16)&0xf); - info |= PROCESS_EE_SET_ACC((vffree[upper] >> 16) & 0xf); - - _freeXMMreg((vffree[upper] >> 8)&0xf); // don't need anymore - _freeXMMreg((vffree[upper] >> 16)&0xf); // don't need anymore - } - else if (regs[upper].VIwrite & (1 << REG_P)) - { - SuperVUFreeXMMreg((vffree[upper] >> 8)&0xf, XMMTYPE_TEMP, 0); - _allocTempXMMreg(XMMT_FPS, (vffree[upper] >> 8)&0xf); - info |= PROCESS_EE_SET_D((vffree[upper] >> 8) & 0xf); - _freeXMMreg((vffree[upper] >> 8)&0xf); // don't need anymore - } - - if (vfflush[upper] >= 0) _freeXMMreg(vfflush[upper]); - if (!(vffree[upper]&VFFREE_INVALID0)) - _freeXMMreg(vffree[upper]&0xf); // don't need anymore - - if ((regs[0].VIwrite | regs[1].VIwrite) & ((1 << REG_STATUS_FLAG) | (1 << REG_MAC_FLAG))) - info |= PROCESS_VU_UPDATEFLAGS; - - return info; -} - -void VuInstruction::Recompile(list::iterator& itinst, u32 vuxyz) -{ - //static PCSX2_ALIGNED16(VECTOR _VF); - //static PCSX2_ALIGNED16(VECTOR _VFc); - u32 *ptr; - u8* pjmp; - int vfregstore = 0; - - assert(s_pCurInst == this); - s_WriteToReadQ = 0; - - ptr = (u32*) & VU->Micro[ pc ]; - - if (type & INST_Q_READ) - SuperVUFlush(0, (ptr[0] == 0x800003bf) || !!(regs[0].VIwrite & (1 << REG_Q))); - if (type & INST_P_READ) - SuperVUFlush(1, (ptr[0] == 0x800007bf) || !!(regs[0].VIwrite & (1 << REG_P))); - - if (type & INST_DUMMY) - { - - // find nParentPc - VuInstruction* pparentinst = NULL; - - // if true, will check if parent block was executed before getting the results of the flags (superman returns) - int nParentCheckForExecution = -1; - -// int badaddrs[] = { -// 0x60,0x68,0x70,0x60,0x68,0x70,0x88,0x90,0x98,0x98,0xa8,0xb8,0x88,0x90, -// 0x4a8,0x4a8,0x398,0x3a0,0x3a8,0xa0 -// }; - -#ifdef SUPERVU_PROPAGATEFLAGS - if (nParentPc != -1 && (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc)) - { - -// if( !s_vu ) { -// for(int j = 0; j < ARRAYSIZE(badaddrs); ++j) { -// if( badaddrs[j] == nParentPc ) -// goto NoParent; -// } -// } - - list::iterator itblock; - for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) - { - if (nParentPc >= (*itblock)->startpc && nParentPc < (*itblock)->endpc) - { - pparentinst = &(*(*itblock)->GetInstIterAtPc(nParentPc)); - //if( !s_vu ) SysPrintf("%x ", nParentPc); - if (find(s_pCurBlock->parents.begin(), s_pCurBlock->parents.end(), *itblock) != s_pCurBlock->parents.end()) - nParentCheckForExecution = (*itblock)->startpc; - break; - } - } - - assert(pparentinst != NULL); - } -#endif - - if (type & INST_CLIP_WRITE) - { - if (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc) - { - - if (!CHECK_VUCLIPFLAGHACK && pparentinst != NULL) - { - - if (nParentCheckForExecution >= 0) - { - if (pparentinst->pClipWrite == 0) - pparentinst->pClipWrite = (uptr)SuperVUStaticAlloc(4); - - if (s_ClipRead == 0) - s_ClipRead = (uptr) & VU->VI[REG_CLIP_FLAG]; - - CMP32ItoM((uptr)&g_nLastBlockExecuted, nParentCheckForExecution); - u8* jptr = JNE8(0); - CMP32ItoM((uptr)&s_ClipRead, (uptr)&VU->VI[REG_CLIP_FLAG]); - u8* jptr2 = JE8(0); - MOV32MtoR(EAX, pparentinst->pClipWrite); - MOV32RtoM(s_ClipRead, EAX); - x86SetJ8(jptr); - x86SetJ8(jptr2); - } - } - else s_ClipRead = (uptr) & VU->VI[REG_CLIP_FLAG]; - } - else - { - s_ClipRead = s_pCurBlock->GetInstIterAtPc(nParentPc)->pClipWrite; - if (s_ClipRead == 0) Console::WriteLn("super ClipRead allocation error!"); - } - } - - // before modifying, check if they will ever be read - if (s_pCurBlock->type & BLOCKTYPE_MACFLAGS) - { - - u8 outofblock = 0; - if (type & INST_STATUS_WRITE) - { - - if (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc) - { - - // reading from out of this block, so already flushed to mem - if (pparentinst != NULL) //&& pparentinst->pStatusWrite != NULL ) { - { - - // might not have processed it yet, so reserve a mem loc - if (pparentinst->pStatusWrite == 0) - { - pparentinst->pStatusWrite = (uptr)SuperVUStaticAlloc(4); - //MOV32ItoM(pparentinst->pStatusWrite, 0); - } - -// if( s_pCurBlock->prevFlagsOutOfBlock && s_StatusRead != NULL ) { -// // or instead since don't now which parent we came from -// MOV32MtoR(EAX, pparentinst->pStatusWrite); -// OR32RtoM(s_StatusRead, EAX); -// MOV32ItoM(pparentinst->pStatusWrite, 0); -// } - - if (nParentCheckForExecution >= 0) - { - - // don't now which parent we came from, so have to check -// uptr tempstatus = (uptr)SuperVUStaticAlloc(4); -// if( s_StatusRead != NULL ) -// MOV32MtoR(EAX, s_StatusRead); -// else -// MOV32MtoR(EAX, (uptr)&VU->VI[REG_STATUS_FLAG]); -// s_StatusRead = tempstatus; - - if (s_StatusRead == 0) - s_StatusRead = (uptr) & VU->VI[REG_STATUS_FLAG]; - - CMP32ItoM((uptr)&g_nLastBlockExecuted, nParentCheckForExecution); - u8* jptr = JNE8(0); - MOV32MtoR(EAX, pparentinst->pStatusWrite); - MOV32ItoM(pparentinst->pStatusWrite, 0); - MOV32RtoM(s_StatusRead, EAX); - x86SetJ8(jptr); - } - else - { - uptr tempstatus = (uptr)SuperVUStaticAlloc(4); - MOV32MtoR(EAX, pparentinst->pStatusWrite); - MOV32RtoM(tempstatus, EAX); - MOV32ItoM(pparentinst->pStatusWrite, 0); - s_StatusRead = tempstatus; - } - - outofblock = 2; - } - else - s_StatusRead = (uptr) & VU->VI[REG_STATUS_FLAG]; - } - else - { - s_StatusRead = s_pCurBlock->GetInstIterAtPc(nParentPc)->pStatusWrite; - if (s_StatusRead == 0) Console::WriteLn("super StatusRead allocation error!"); -// if( pc >= (u32)s_pCurBlock->endpc-8 ) { -// // towards the end, so variable might be leaded to another block (silent hill 4) -// uptr tempstatus = (uptr)SuperVUStaticAlloc(4); -// MOV32MtoR(EAX, s_StatusRead); -// MOV32RtoM(tempstatus, EAX); -// MOV32ItoM(s_StatusRead, 0); -// s_StatusRead = tempstatus; -// } - } - } - if (type & INST_MAC_WRITE) - { - - if (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc) - { - // reading from out of this block, so already flushed to mem - - if (pparentinst != NULL) //&& pparentinst->pMACWrite != NULL ) { - { - // necessary for (katamari) - // towards the end, so variable might be leaked to another block (silent hill 4) - - // might not have processed it yet, so reserve a mem loc - if (pparentinst->pMACWrite == 0) - { - pparentinst->pMACWrite = (uptr)SuperVUStaticAlloc(4); - //MOV32ItoM(pparentinst->pMACWrite, 0); - } - -// if( s_pCurBlock->prevFlagsOutOfBlock && s_MACRead != NULL ) { -// // or instead since don't now which parent we came from -// MOV32MtoR(EAX, pparentinst->pMACWrite); -// OR32RtoM(s_MACRead, EAX); -// MOV32ItoM(pparentinst->pMACWrite, 0); -// } - if (nParentCheckForExecution >= 0) - { - - // don't now which parent we came from, so have to check -// uptr tempmac = (uptr)SuperVUStaticAlloc(4); -// if( s_MACRead != NULL ) -// MOV32MtoR(EAX, s_MACRead); -// else -// MOV32MtoR(EAX, (uptr)&VU->VI[REG_MAC_FLAG]); -// s_MACRead = tempmac; - - if (s_MACRead == 0) s_MACRead = (uptr) & VU->VI[REG_MAC_FLAG]; - - CMP32ItoM((uptr)&g_nLastBlockExecuted, nParentCheckForExecution); - u8* jptr = JNE8(0); - MOV32MtoR(EAX, pparentinst->pMACWrite); - MOV32ItoM(pparentinst->pMACWrite, 0); - MOV32RtoM(s_MACRead, EAX); - x86SetJ8(jptr); - } - else - { - uptr tempMAC = (uptr)SuperVUStaticAlloc(4); - MOV32MtoR(EAX, pparentinst->pMACWrite); - MOV32RtoM(tempMAC, EAX); - MOV32ItoM(pparentinst->pMACWrite, 0); - s_MACRead = tempMAC; - } - - outofblock = 2; - } - else - s_MACRead = (uptr) & VU->VI[REG_MAC_FLAG]; - -// if( pc >= (u32)s_pCurBlock->endpc-8 ) { -// // towards the end, so variable might be leaked to another block (silent hill 4) -// uptr tempMAC = (uptr)SuperVUStaticAlloc(4); -// MOV32MtoR(EAX, s_MACRead); -// MOV32RtoM(tempMAC, EAX); -// MOV32ItoM(s_MACRead, 0); -// s_MACRead = tempMAC; -// } - } - else - { - s_MACRead = s_pCurBlock->GetInstIterAtPc(nParentPc)->pMACWrite; - } - } - - s_pCurBlock->prevFlagsOutOfBlock = outofblock; - } - else if (pparentinst != NULL) - { - // make sure to reset the mac and status flags! (katamari) - if (pparentinst->pStatusWrite) - MOV32ItoM(pparentinst->pStatusWrite, 0); - if (pparentinst->pMACWrite) - MOV32ItoM(pparentinst->pMACWrite, 0); - } - - assert(s_ClipRead != 0); - assert(s_MACRead != 0); - assert(s_StatusRead != 0); - - return; - } - - s_pCurBlock->prevFlagsOutOfBlock = 0; - -#ifdef _DEBUG - MOV32ItoR(EAX, pc); -#endif - - assert(!(type & (INST_CLIP_WRITE | INST_STATUS_WRITE | INST_MAC_WRITE))); - pc += 8; - - list::const_iterator itinst2; - - if ((regs[0].VIwrite | regs[1].VIwrite) & ((1 << REG_MAC_FLAG) | (1 << REG_STATUS_FLAG))) - { - if (s_pCurBlock->type & BLOCKTYPE_MACFLAGS) - { - if (pMACWrite == 0) - { - pMACWrite = (uptr)SuperVUStaticAlloc(4); - //MOV32ItoM(pMACWrite, 0); - } - if (pStatusWrite == 0) - { - pStatusWrite = (uptr)SuperVUStaticAlloc(4); - //MOV32ItoM(pStatusWrite, 0); - } - } - else - { - assert(s_StatusRead == (uptr)&VU->VI[REG_STATUS_FLAG]); - assert(s_MACRead == (uptr)&VU->VI[REG_MAC_FLAG]); - pMACWrite = s_MACRead; - pStatusWrite = s_StatusRead; - } - } - - if ((pClipWrite == 0) && ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_CLIP_FLAG))) - { - pClipWrite = (uptr)SuperVUStaticAlloc(4); - //MOV32ItoM(pClipWrite, 0); - } - -#ifdef SUPERVU_X86CACHING - // redo the counters so that the proper regs are released - for (int j = 0; j < iREGCNT_GPR; ++j) - { - if (x86regs[j].inuse && X86_ISVI(x86regs[j].type)) - { - int count = 0; - itinst2 = itinst; - - while (itinst2 != s_pCurBlock->insts.end()) - { - if ((itinst2->regs[0].VIread | itinst2->regs[0].VIwrite | itinst2->regs[1].VIread | itinst2->regs[1].VIwrite) && (1 << x86regs[j].reg)) - break; - - ++count; - ++itinst2; - } - - x86regs[j].counter = 1000 - count; - } - } -#endif - - if (s_vu == 0 && (ptr[1] & 0x20000000)) // M flag - { - OR8ItoM((uptr)&VU->flags, VUFLAG_MFLAGSET); - } - if (ptr[1] & 0x10000000) // D flag - { - TEST32ItoM((uptr)&VU0.VI[REG_FBRST].UL, s_vu ? 0x400 : 0x004); - u8* ptr = JZ8(0); - OR32ItoM((uptr)&VU0.VI[REG_VPU_STAT].UL, s_vu ? 0x200 : 0x002); - _callFunctionArg1((uptr)hwIntcIrq, MEM_CONSTTAG, s_vu ? INTC_VU1 : INTC_VU0); - x86SetJ8(ptr); - } - if (ptr[1] & 0x08000000) // T flag - { - TEST32ItoM((uptr)&VU0.VI[REG_FBRST].UL, s_vu ? 0x800 : 0x008); - u8* ptr = JZ8(0); - OR32ItoM((uptr)&VU0.VI[REG_VPU_STAT].UL, s_vu ? 0x400 : 0x004); - _callFunctionArg1((uptr)hwIntcIrq, MEM_CONSTTAG, s_vu ? INTC_VU1 : INTC_VU0); - x86SetJ8(ptr); - } - - // check upper flags - if (ptr[1] & 0x80000000) // I flag - { - - assert(!(regs[0].VIwrite & ((1 << REG_Q) | (1 << REG_P)))); - - VU->code = ptr[1]; - s_vuInfo = SetCachedRegs(1, vuxyz); - if (s_JumpX86 > 0) x86regs[s_JumpX86].needed = 1; - if (s_ScheduleXGKICK && s_XGKICKReg > 0) x86regs[s_XGKICKReg].needed = 1; - - recVU_UPPER_OPCODE[ VU->code & 0x3f ](VU, s_vuInfo); - - s_PrevIWrite = (uptr)ptr; - _clearNeededXMMregs(); - _clearNeededX86regs(); - } - else - { - if (regs[0].VIwrite & (1 << REG_Q)) - { - - // search for all the insts between this inst and writeback - itinst2 = itinst; - ++itinst2; - u32 cacheq = (itinst2 == s_pCurBlock->insts.end()); - u32* codeptr2 = ptr + 2; - - while (itinst2 != s_pCurBlock->insts.end()) - { - if (!(itinst2->type & INST_DUMMY) && ((itinst2->regs[0].VIwrite&(1 << REG_Q)) || codeptr2[0] == 0x800003bf)) // waitq, or fdiv inst - { - break; - } - if ((itinst2->type & INST_Q_WRITE) && itinst2->nParentPc == pc - 8) - { - break; - } - if (itinst2->type & INST_Q_READ) - { - cacheq = 1; - break; - } - if (itinst2->type & INST_DUMMY) - { - ++itinst2; - continue; - } - codeptr2 += 2; - ++itinst2; - } - - if (itinst2 == s_pCurBlock->insts.end()) - cacheq = 1; - - int x86temp = -1; - if (cacheq) - x86temp = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); - - // new is written so flush old - // if type & INST_Q_READ, already flushed - if (!(type & INST_Q_READ) && s_recWriteQ == 0) MOV32MtoR(EAX, (uptr)&s_writeQ); - - if (cacheq) - MOV32MtoR(x86temp, (uptr)&s_TotalVUCycles); - - if (!(type & INST_Q_READ)) - { - if (s_recWriteQ == 0) - { - OR32RtoR(EAX, EAX); - pjmp = JS8(0); - MOV32MtoR(EAX, SuperVUGetVIAddr(REG_Q, 0)); - MOV32RtoM(SuperVUGetVIAddr(REG_Q, 1), EAX); - x86SetJ8(pjmp); - } - else if (s_needFlush & 1) - { - MOV32MtoR(EAX, SuperVUGetVIAddr(REG_Q, 0)); - MOV32RtoM(SuperVUGetVIAddr(REG_Q, 1), EAX); - s_needFlush &= ~1; - } - } - - // write new Q - if (cacheq) - { - assert(s_pCurInst->pqcycles > 1); - ADD32ItoR(x86temp, s_pCurInst->info.cycle + s_pCurInst->pqcycles); - MOV32RtoM((uptr)&s_writeQ, x86temp); - s_needFlush |= 1; - } - else - { - // won't be writing back - s_WriteToReadQ = 1; - s_needFlush &= ~1; - MOV32ItoM((uptr)&s_writeQ, 0x80000001); - } - - s_recWriteQ = s_pCurInst->info.cycle + s_pCurInst->pqcycles; - - if (x86temp >= 0) - _freeX86reg(x86temp); - } - - if (regs[0].VIwrite & (1 << REG_P)) - { - int x86temp = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); - - // new is written so flush old - if (!(type & INST_P_READ) && s_recWriteP == 0) - MOV32MtoR(EAX, (uptr)&s_writeP); - MOV32MtoR(x86temp, (uptr)&s_TotalVUCycles); - - if (!(type & INST_P_READ)) - { - if (s_recWriteP == 0) - { - OR32RtoR(EAX, EAX); - pjmp = JS8(0); - MOV32MtoR(EAX, SuperVUGetVIAddr(REG_P, 0)); - MOV32RtoM(SuperVUGetVIAddr(REG_P, 1), EAX); - x86SetJ8(pjmp); - } - else if (s_needFlush & 2) - { - MOV32MtoR(EAX, SuperVUGetVIAddr(REG_P, 0)); - MOV32RtoM(SuperVUGetVIAddr(REG_P, 1), EAX); - s_needFlush &= ~2; - } - } - - // write new P - assert(s_pCurInst->pqcycles > 1); - ADD32ItoR(x86temp, s_pCurInst->info.cycle + s_pCurInst->pqcycles); - MOV32RtoM((uptr)&s_writeP, x86temp); - s_needFlush |= 2; - - s_recWriteP = s_pCurInst->info.cycle + s_pCurInst->pqcycles; - - _freeX86reg(x86temp); - } - - // waitq - if (ptr[0] == 0x800003bf) SuperVUFlush(0, 1); - // waitp - if (ptr[0] == 0x800007bf) SuperVUFlush(1, 1); - -#ifdef PCSX2_DEVBUILD - if (regs[1].VIread & regs[0].VIwrite & ~((1 << REG_Q) | (1 << REG_P) | (1 << REG_VF0_FLAG) | (1 << REG_ACC_FLAG))) - { - Console::Notice("*PCSX2*: Warning, VI write to the same reg %x in both lower/upper cycle %x", params regs[1].VIread & regs[0].VIwrite, s_pCurBlock->startpc); - } -#endif - - u32 modewrite = 0; - if (vfwrite[1] >= 0 && xmmregs[vfwrite[1]].inuse && xmmregs[vfwrite[1]].type == XMMTYPE_VFREG && xmmregs[vfwrite[1]].reg == regs[1].VFwrite) - modewrite = xmmregs[vfwrite[1]].mode & MODE_WRITE; - - VU->code = ptr[1]; - s_vuInfo = SetCachedRegs(1, vuxyz); - - if (vfwrite[1] >= 0) - { - assert(regs[1].VFwrite > 0); - - if (vfwrite[0] == vfwrite[1]) - { - //Console::WriteLn("*PCSX2*: Warning, VF write to the same reg in both lower/upper cycle %x", params s_pCurBlock->startpc); - } - - if (vfread0[0] == vfwrite[1] || vfread1[0] == vfwrite[1]) - { - assert(regs[0].VFread0 == regs[1].VFwrite || regs[0].VFread1 == regs[1].VFwrite); - assert(vfflush[0] >= 0); - if (modewrite) - { - SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[regs[1].VFwrite], (x86SSERegType)vfwrite[1]); - } - vfregstore = 1; - } - } - - if (s_JumpX86 > 0) x86regs[s_JumpX86].needed = 1; - if (s_ScheduleXGKICK && s_XGKICKReg > 0) x86regs[s_XGKICKReg].needed = 1; - - recVU_UPPER_OPCODE[ VU->code & 0x3f ](VU, s_vuInfo); - _clearNeededXMMregs(); - _clearNeededX86regs(); - - // necessary because status can be set by both upper and lower - if (regs[1].VIwrite & (1 << REG_STATUS_FLAG)) - { - assert(pStatusWrite != 0); - s_PrevStatusWrite = pStatusWrite; - } - - VU->code = ptr[0]; - s_vuInfo = SetCachedRegs(0, vuxyz); - - if (vfregstore) - { - // load - SSE_MOVAPS_M128_to_XMM(vfflush[0], (uptr)&VU->VF[regs[1].VFwrite]); - - assert(xmmregs[vfwrite[1]].mode & MODE_WRITE); - - // replace with vfflush - if (_Fs_ == regs[1].VFwrite) - { - s_vuInfo &= ~PROCESS_EE_SET_S(0xf); - s_vuInfo |= PROCESS_EE_SET_S(vfflush[0]); - } - if (_Ft_ == regs[1].VFwrite) - { - s_vuInfo &= ~PROCESS_EE_SET_T(0xf); - s_vuInfo |= PROCESS_EE_SET_T(vfflush[0]); - } - - xmmregs[vfflush[0]].mode |= MODE_NOFLUSH | MODE_WRITE; // so that lower inst doesn't flush - } - - // notify vuinsts that upper inst is a fmac - if (regs[1].pipe == VUPIPE_FMAC) - s_vuInfo |= PROCESS_VU_SET_FMAC(); - - if (s_JumpX86 > 0) x86regs[s_JumpX86].needed = 1; - if (s_ScheduleXGKICK && s_XGKICKReg > 0) x86regs[s_XGKICKReg].needed = 1; - -#ifdef SUPERVU_VIBRANCHDELAY - if (type & INST_CACHE_VI) - { - assert(vicached >= 0); - int cachedreg = _allocX86reg(-1, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), vicached, MODE_READ); - MOV32RtoM((uptr)&s_VIBranchDelay, cachedreg); - } -#endif - - // check if inst before branch and the write is the same as the read in the branch (wipeout) -// int oldreg=0; -// if( pc == s_pCurBlock->endpc-16 ) { -// itinst2 = itinst; ++itinst2; -// if( itinst2->regs[0].pipe == VUPIPE_BRANCH && (itinst->regs[0].VIwrite&itinst2->regs[0].VIread) ) { -// -// CALLFunc((u32)branchfn); -// assert( itinst->regs[0].VIwrite & 0xffff ); -// Console::WriteLn("vi write before branch"); -// for(s_CacheVIReg = 0; s_CacheVIReg < 16; ++s_CacheVIReg) { -// if( itinst->regs[0].VIwrite & (1<endpc-8 && s_CacheVIReg >= 0 ) { -// assert( s_CacheVIX86 > 0 && x86regs[s_CacheVIX86].inuse && x86regs[s_CacheVIX86].reg == s_CacheVIReg && x86regs[s_CacheVIX86].type == X86TYPE_VITEMP ); -// -// oldreg = _allocX86reg(-1, X86TYPE_VI|(s_vu?X86TYPE_VU1:0), s_CacheVIReg, MODE_READ); -// x86regs[s_CacheVIX86].needed = 1; -// assert( x86regs[oldreg].mode & MODE_WRITE ); -// -// x86regs[s_CacheVIX86].type = X86TYPE_VI|(s_vu?X86TYPE_VU1:0); -// x86regs[oldreg].type = X86TYPE_VITEMP; -// } - - recVU_LOWER_OPCODE[ VU->code >> 25 ](VU, s_vuInfo); - -// if( pc == s_pCurBlock->endpc-8 && s_CacheVIReg >= 0 ) { -// // revert -// x86regs[s_CacheVIX86].inuse = 0; -// x86regs[oldreg].type = X86TYPE_VI|(s_vu?X86TYPE_VU1:0); -// } - - _clearNeededXMMregs(); - _clearNeededX86regs(); - } - - // clip is always written so ok - if ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_CLIP_FLAG)) - { - assert(pClipWrite != 0); - s_PrevClipWrite = pClipWrite; - } - - if ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_STATUS_FLAG)) - { - assert(pStatusWrite != 0); - s_PrevStatusWrite = pStatusWrite; - } - - if ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_MAC_FLAG)) - { - assert(pMACWrite != 0); - s_PrevMACWrite = pMACWrite; - } -} - -/////////////////////////////////// -// Super VU Recompilation Tables // -/////////////////////////////////// - -void recVUMI_BranchHandle() -{ - int bpc = _recbranchAddr(VU->code); - int curjump = 0; - - if (s_pCurInst->type & INST_BRANCH_DELAY) - { - assert((branch&0x17) != 0x10 && (branch&0x17) != 4); // no jump handlig for now - - if ((branch & 0x7) == 3) - { - // previous was a direct jump - curjump = 1; - } - else if (branch & 1) curjump = 2; - } - - assert(s_JumpX86 > 0); - - if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) - MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc); - MOV32ItoR(s_JumpX86, 1); // use 1 to disable optimization to XOR - s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr - 1; - - if (!(s_pCurInst->type & INST_BRANCH_DELAY)) - { - j8Ptr[1] = JMP8(0); - x86SetJ8(j8Ptr[ 0 ]); - - if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) - MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), pc + 8); - MOV32ItoR(s_JumpX86, 1); // use 1 to disable optimization to XOR - s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr - 1; - - x86SetJ8(j8Ptr[ 1 ]); - } - else - x86SetJ8(j8Ptr[ 0 ]); - - branch |= 1; -} - -// supervu specific insts -void recVUMI_IBQ_prep() -{ - int isreg, itreg; - - if (_Is_ == 0) - { -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _It_) - { - itreg = -1; - } - else -#endif - { - itreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _It_, MODE_READ); - } - - s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); - - if (itreg >= 0) - { - CMP16ItoR(itreg, 0); - } - else CMP16ItoM(SuperVUGetVIAddr(_It_, 1), 0); - } - else if (_It_ == 0) - { -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _Is_) - { - isreg = -1; - } - else -#endif - { - isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - } - - s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); - - if (isreg >= 0) - { - CMP16ItoR(isreg, 0); - } - else CMP16ItoM(SuperVUGetVIAddr(_Is_, 1), 0); - - } - else - { - _addNeededX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _It_); - -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _Is_) - { - isreg = -1; - } - else -#endif - { - isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - } - -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _It_) - { - itreg = -1; - - if (isreg <= 0) - { - // allocate fsreg - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _Is_) - { - isreg = _allocX86reg(-1, X86TYPE_TEMP, 0, MODE_READ | MODE_WRITE); - MOV32MtoR(isreg, SuperVUGetVIAddr(_Is_, 1)); - } - else - isreg = _allocX86reg(-1, X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - } - } - else -#endif - { - itreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _It_, MODE_READ); - } - - s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); - - if (isreg >= 0) - { - if (itreg >= 0) - { - CMP16RtoR(isreg, itreg); - } - else CMP16MtoR(isreg, SuperVUGetVIAddr(_It_, 1)); - } - else if (itreg >= 0) - { - CMP16MtoR(itreg, SuperVUGetVIAddr(_Is_, 1)); - } - else - { - isreg = _allocX86reg(-1, X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - CMP16MtoR(isreg, SuperVUGetVIAddr(_It_, 1)); - } - } -} - -void recVUMI_IBEQ(VURegs* vuu, s32 info) -{ - recVUMI_IBQ_prep(); - j8Ptr[ 0 ] = JNE8(0); - recVUMI_BranchHandle(); -} - -void recVUMI_IBGEZ(VURegs* vuu, s32 info) -{ - int isreg; - s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); - -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _Is_) - { - isreg = -1; - } - else -#endif - { - isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - } - - if (isreg >= 0) - { - TEST16RtoR(isreg, isreg); - j8Ptr[ 0 ] = JS8(0); - } - else - { - CMP16ItoM(SuperVUGetVIAddr(_Is_, 1), 0x0); - j8Ptr[ 0 ] = JL8(0); - } - - recVUMI_BranchHandle(); -} - -void recVUMI_IBGTZ(VURegs* vuu, s32 info) -{ - int isreg; - s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); - -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _Is_) - { - isreg = -1; - } - else -#endif - { - isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - } - - if (isreg >= 0) - { - CMP16ItoR(isreg, 0); - j8Ptr[ 0 ] = JLE8(0); - } - else - { - CMP16ItoM(SuperVUGetVIAddr(_Is_, 1), 0x0); - j8Ptr[ 0 ] = JLE8(0); - } - recVUMI_BranchHandle(); -} - -void recVUMI_IBLEZ(VURegs* vuu, s32 info) -{ - int isreg; - s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); - -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _Is_) - { - isreg = -1; - } - else -#endif - { - isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - } - - if (isreg >= 0) - { - CMP16ItoR(isreg, 0); - j8Ptr[ 0 ] = JG8(0); - } - else - { - CMP16ItoM(SuperVUGetVIAddr(_Is_, 1), 0x0); - j8Ptr[ 0 ] = JG8(0); - } - recVUMI_BranchHandle(); -} - -void recVUMI_IBLTZ(VURegs* vuu, s32 info) -{ - int isreg; - s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); - -#ifdef SUPERVU_VIBRANCHDELAY - if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _Is_) - { - isreg = -1; - } - else -#endif - { - isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - } - - if (isreg >= 0) - { - TEST16RtoR(isreg, isreg); - j8Ptr[ 0 ] = JNS8(0); - } - else - { - CMP16ItoM(SuperVUGetVIAddr(_Is_, 1), 0x0); - j8Ptr[ 0 ] = JGE8(0); - } - recVUMI_BranchHandle(); -} - -void recVUMI_IBNE(VURegs* vuu, s32 info) -{ - recVUMI_IBQ_prep(); - j8Ptr[ 0 ] = JE8(0); - recVUMI_BranchHandle(); -} - -void recVUMI_B(VURegs* vuu, s32 info) -{ - // supervu will take care of the rest - int bpc = _recbranchAddr(VU->code); - if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) - MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc); - - // loops to self, so check condition - if (bpc == s_pCurBlock->startpc && (s_vu == 0 || SUPERVU_CHECKCONDITION)) - { - SuperVUTestVU0Condition(0); - } - - if (s_pCurBlock->blocks.size() > 1) - { - s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); - MOV32ItoR(s_JumpX86, 1); - s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr - 1; - s_UnconditionalDelay = 1; - } - - branch |= 3; -} - -void recVUMI_BAL(VURegs* vuu, s32 info) -{ - int bpc = _recbranchAddr(VU->code); - if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) - MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc); - - // loops to self, so check condition - if (bpc == s_pCurBlock->startpc && (s_vu == 0 || SUPERVU_CHECKCONDITION)) - { - SuperVUTestVU0Condition(0); - } - - if (_It_) - { - _deleteX86reg(X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _It_, 2); - MOV16ItoM(SuperVUGetVIAddr(_It_, 0), (pc + 8) >> 3); - } - - if (s_pCurBlock->blocks.size() > 1) - { - s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); - MOV32ItoR(s_JumpX86, 1); - s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr - 1; - s_UnconditionalDelay = 1; - } - - branch |= 3; -} - -void recVUMI_JR(VURegs* vuu, s32 info) -{ - int isreg = _allocX86reg(-1, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - LEA32RStoR(EAX, isreg, 3); - - //Mask the address to something valid - if (vuu == &VU0) - AND32ItoR(EAX, 0xfff); - else - AND32ItoR(EAX, 0x3fff); - - if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0) MOV32RtoM(SuperVUGetVIAddr(REG_TPC, 0), EAX); - - if (!(s_pCurBlock->type & BLOCKTYPE_HASEOP)) - { - PUSH32I(s_vu); - PUSH32R(EAX); - } - branch |= 0x10; // 0x08 is reserved -} - -void recVUMI_JALR(VURegs* vuu, s32 info) -{ - _addNeededX86reg(X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _It_); - - int isreg = _allocX86reg(-1, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - LEA32RStoR(EAX, isreg, 3); - - //Mask the address to something valid - if (vuu == &VU0) - AND32ItoR(EAX, 0xfff); - else - AND32ItoR(EAX, 0x3fff); - - if (_It_) - { - _deleteX86reg(X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _It_, 2); - MOV16ItoM(SuperVUGetVIAddr(_It_, 0), (pc + 8) >> 3); - } - - if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0) MOV32RtoM(SuperVUGetVIAddr(REG_TPC, 0), EAX); - - if (!(s_pCurBlock->type & BLOCKTYPE_HASEOP)) - { - PUSH32I(s_vu); - PUSH32R(EAX); - } - - branch |= 4; -} - -#ifdef PCSX2_DEVBUILD -void vu1xgkick(u32* pMem, u32 addr) -{ - assert(addr < 0x4000); -#ifdef SUPERVU_COUNT - StopSVUCounter(); -#endif - - GSGIFTRANSFER1(pMem, addr); - -#ifdef SUPERVU_COUNT - StartSVUCounter(); -#endif -} -#endif - -void recVUMI_XGKICK_(VURegs *VU) -{ - assert(s_XGKICKReg > 0 && x86regs[s_XGKICKReg].inuse && x86regs[s_XGKICKReg].type == X86TYPE_VITEMP); - - x86regs[s_XGKICKReg].inuse = 0; // so free doesn't flush - _freeX86regs(); - _freeXMMregs(); - - PUSH32R(s_XGKICKReg); - PUSH32I((uptr)VU->Mem); - - //CALLFunc((u32)countfn); - - if (mtgsThread != NULL) - { - CALLFunc((uptr)VU1XGKICK_MTGSTransfer); - ADD32ItoR(ESP, 8); - } - else - { -#ifdef PCSX2_DEVBUILD - CALLFunc((uptr)vu1xgkick); - ADD32ItoR(ESP, 8); -#else - CALLFunc((uptr)GSgifTransfer1); -#endif - } - psHu32(GIF_STAT) &= ~(GIF_STAT_APATH1 | GIF_STAT_OPH); //Probably should be in the recompilation but im a rec nab :( (Refraction) - s_ScheduleXGKICK = 0; -} - -void recVUMI_XGKICK(VURegs *VU, int info) -{ - if (s_ScheduleXGKICK) - { - // second xgkick, so launch the first - recVUMI_XGKICK_(VU); - } - - int isreg = _allocX86reg(X86ARG2, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ); - _freeX86reg(isreg); // flush - x86regs[isreg].inuse = 1; - x86regs[isreg].type = X86TYPE_VITEMP; - x86regs[isreg].needed = 1; - x86regs[isreg].mode = MODE_WRITE | MODE_READ; - SHL32ItoR(isreg, 4); - AND32ItoR(isreg, 0x3fff); - s_XGKICKReg = isreg; - psHu32(GIF_STAT) |= (GIF_STAT_APATH1 | GIF_STAT_OPH); //Probably should be in the recompilation but im a rec nab :( (Refraction) - - - if (!SUPERVU_XGKICKDELAY || pc == s_pCurBlock->endpc) - { - recVUMI_XGKICK_(VU); - } - else - { - // Erementar Gerad hack. - // Corrects the color of some graphics on a game that has somewhat scrambled graphics either way, and only works with ZeroGS. Not even ZZOgl. :) - if (g_VUGameFixes & VUFIX_XGKICKDELAY2) - s_ScheduleXGKICK = min((u32)4, (s_pCurBlock->endpc - pc) / 8); - else - s_ScheduleXGKICK = 2; - } -} - -void recVU_UPPER_FD_00(VURegs* VU, s32 info); -void recVU_UPPER_FD_01(VURegs* VU, s32 info); -void recVU_UPPER_FD_10(VURegs* VU, s32 info); -void recVU_UPPER_FD_11(VURegs* VU, s32 info); -void recVULowerOP(VURegs* VU, s32 info); -void recVULowerOP_T3_00(VURegs* VU, s32 info); -void recVULowerOP_T3_01(VURegs* VU, s32 info); -void recVULowerOP_T3_10(VURegs* VU, s32 info); -void recVULowerOP_T3_11(VURegs* VU, s32 info); -void recVUunknown(VURegs* VU, s32 info); - -void (*recVU_LOWER_OPCODE[128])(VURegs* VU, s32 info) = -{ - recVUMI_LQ , recVUMI_SQ , recVUunknown , recVUunknown, - recVUMI_ILW , recVUMI_ISW , recVUunknown , recVUunknown, - recVUMI_IADDIU, recVUMI_ISUBIU, recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUMI_FCEQ , recVUMI_FCSET , recVUMI_FCAND, recVUMI_FCOR, /* 0x10 */ - recVUMI_FSEQ , recVUMI_FSSET , recVUMI_FSAND, recVUMI_FSOR, - recVUMI_FMEQ , recVUunknown , recVUMI_FMAND, recVUMI_FMOR, - recVUMI_FCGET , recVUunknown , recVUunknown , recVUunknown, - recVUMI_B , recVUMI_BAL , recVUunknown , recVUunknown, /* 0x20 */ - recVUMI_JR , recVUMI_JALR , recVUunknown , recVUunknown, - recVUMI_IBEQ , recVUMI_IBNE , recVUunknown , recVUunknown, - recVUMI_IBLTZ , recVUMI_IBGTZ , recVUMI_IBLEZ, recVUMI_IBGEZ, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x30 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVULowerOP , recVUunknown , recVUunknown , recVUunknown, /* 0x40*/ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x50 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x60 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x70 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, -}; - -void (*recVULowerOP_T3_00_OPCODE[32])(VURegs* VU, s32 info) = -{ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUMI_MOVE , recVUMI_LQI , recVUMI_DIV , recVUMI_MTIR, - recVUMI_RNEXT , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUMI_MFP , recVUMI_XTOP , recVUMI_XGKICK, - recVUMI_ESADD , recVUMI_EATANxy, recVUMI_ESQRT, recVUMI_ESIN, -}; - -void (*recVULowerOP_T3_01_OPCODE[32])(VURegs* VU, s32 info) = -{ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUMI_MR32 , recVUMI_SQI , recVUMI_SQRT , recVUMI_MFIR, - recVUMI_RGET , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUMI_XITOP, recVUunknown, - recVUMI_ERSADD, recVUMI_EATANxz, recVUMI_ERSQRT, recVUMI_EATAN, -}; - -void (*recVULowerOP_T3_10_OPCODE[32])(VURegs* VU, s32 info) = -{ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUMI_LQD , recVUMI_RSQRT, recVUMI_ILWR, - recVUMI_RINIT , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUMI_ELENG , recVUMI_ESUM , recVUMI_ERCPR, recVUMI_EEXP, -}; - -void (*recVULowerOP_T3_11_OPCODE[32])(VURegs* VU, s32 info) = -{ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUMI_SQD , recVUMI_WAITQ, recVUMI_ISWR, - recVUMI_RXOR , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUMI_ERLENG, recVUunknown , recVUMI_WAITP, recVUunknown, -}; - -void (*recVULowerOP_OPCODE[64])(VURegs* VU, s32 info) = -{ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x20 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUMI_IADD , recVUMI_ISUB , recVUMI_IADDI, recVUunknown, /* 0x30 */ - recVUMI_IAND , recVUMI_IOR , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVULowerOP_T3_00, recVULowerOP_T3_01, recVULowerOP_T3_10, recVULowerOP_T3_11, -}; - -void (*recVU_UPPER_OPCODE[64])(VURegs* VU, s32 info) = -{ - recVUMI_ADDx , recVUMI_ADDy , recVUMI_ADDz , recVUMI_ADDw, - recVUMI_SUBx , recVUMI_SUBy , recVUMI_SUBz , recVUMI_SUBw, - recVUMI_MADDx , recVUMI_MADDy , recVUMI_MADDz , recVUMI_MADDw, - recVUMI_MSUBx , recVUMI_MSUBy , recVUMI_MSUBz , recVUMI_MSUBw, - recVUMI_MAXx , recVUMI_MAXy , recVUMI_MAXz , recVUMI_MAXw, /* 0x10 */ - recVUMI_MINIx , recVUMI_MINIy , recVUMI_MINIz , recVUMI_MINIw, - recVUMI_MULx , recVUMI_MULy , recVUMI_MULz , recVUMI_MULw, - recVUMI_MULq , recVUMI_MAXi , recVUMI_MULi , recVUMI_MINIi, - recVUMI_ADDq , recVUMI_MADDq , recVUMI_ADDi , recVUMI_MADDi, /* 0x20 */ - recVUMI_SUBq , recVUMI_MSUBq , recVUMI_SUBi , recVUMI_MSUBi, - recVUMI_ADD , recVUMI_MADD , recVUMI_MUL , recVUMI_MAX, - recVUMI_SUB , recVUMI_MSUB , recVUMI_OPMSUB, recVUMI_MINI, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x30 */ - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVUunknown , recVUunknown , recVUunknown , recVUunknown, - recVU_UPPER_FD_00, recVU_UPPER_FD_01, recVU_UPPER_FD_10, recVU_UPPER_FD_11, -}; - -void (*recVU_UPPER_FD_00_TABLE[32])(VURegs* VU, s32 info) = -{ - recVUMI_ADDAx, recVUMI_SUBAx , recVUMI_MADDAx, recVUMI_MSUBAx, - recVUMI_ITOF0, recVUMI_FTOI0, recVUMI_MULAx , recVUMI_MULAq , - recVUMI_ADDAq, recVUMI_SUBAq, recVUMI_ADDA , recVUMI_SUBA , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , -}; - -void (*recVU_UPPER_FD_01_TABLE[32])(VURegs* VU, s32 info) = -{ - recVUMI_ADDAy , recVUMI_SUBAy , recVUMI_MADDAy, recVUMI_MSUBAy, - recVUMI_ITOF4 , recVUMI_FTOI4 , recVUMI_MULAy , recVUMI_ABS , - recVUMI_MADDAq, recVUMI_MSUBAq, recVUMI_MADDA , recVUMI_MSUBA , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , -}; - -void (*recVU_UPPER_FD_10_TABLE[32])(VURegs* VU, s32 info) = -{ - recVUMI_ADDAz , recVUMI_SUBAz , recVUMI_MADDAz, recVUMI_MSUBAz, - recVUMI_ITOF12, recVUMI_FTOI12, recVUMI_MULAz , recVUMI_MULAi , - recVUMI_ADDAi, recVUMI_SUBAi , recVUMI_MULA , recVUMI_OPMULA, - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , -}; - -void (*recVU_UPPER_FD_11_TABLE[32])(VURegs* VU, s32 info) = -{ - recVUMI_ADDAw , recVUMI_SUBAw , recVUMI_MADDAw, recVUMI_MSUBAw, - recVUMI_ITOF15, recVUMI_FTOI15, recVUMI_MULAw , recVUMI_CLIP , - recVUMI_MADDAi, recVUMI_MSUBAi, recVUunknown , recVUMI_NOP , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , - recVUunknown , recVUunknown , recVUunknown , recVUunknown , -}; - -void recVU_UPPER_FD_00(VURegs* VU, s32 info) -{ - recVU_UPPER_FD_00_TABLE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVU_UPPER_FD_01(VURegs* VU, s32 info) -{ - recVU_UPPER_FD_01_TABLE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVU_UPPER_FD_10(VURegs* VU, s32 info) -{ - recVU_UPPER_FD_10_TABLE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVU_UPPER_FD_11(VURegs* VU, s32 info) -{ - recVU_UPPER_FD_11_TABLE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVULowerOP(VURegs* VU, s32 info) -{ - recVULowerOP_OPCODE[ VU->code & 0x3f ](VU, info); -} - -void recVULowerOP_T3_00(VURegs* VU, s32 info) -{ - recVULowerOP_T3_00_OPCODE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVULowerOP_T3_01(VURegs* VU, s32 info) -{ - recVULowerOP_T3_01_OPCODE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVULowerOP_T3_10(VURegs* VU, s32 info) -{ - recVULowerOP_T3_10_OPCODE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVULowerOP_T3_11(VURegs* VU, s32 info) -{ - recVULowerOP_T3_11_OPCODE[(VU->code >> 6) & 0x1f ](VU, info); -} - -void recVUunknown(VURegs* VU, s32 info) -{ - Console::Notice("Unknown SVU micromode opcode called"); -} +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +// Super VU recompiler - author: zerofrog(@gmail.com) + +#include "PrecompiledHeader.h" + +#include +#include +#include +#include +#include + +#ifndef _WIN32 +#include +#endif + +#include "Common.h" + +#include "GS.h" +#include "R5900.h" +#include "VU.h" +#include "iR5900.h" + +#include "sVU_zerorec.h" +#include "SamplProf.h" +#include "NakedAsm.h" + +using namespace std; + +// temporary externs +extern void iDumpVU0Registers(); +extern void iDumpVU1Registers(); + +// SuperVURec optimization options, uncomment only for debugging purposes +#define SUPERVU_CACHING // vu programs are saved and queried via memcompare (should be no reason to disable this) +#define SUPERVU_WRITEBACKS // don't flush the writebacks after every block +#define SUPERVU_X86CACHING // use x86reg caching (faster) (not really. rather lots slower :p (rama) ) +#define SUPERVU_VIBRANCHDELAY // when integers are modified right before a branch that uses the integer, + // the old integer value is used in the branch, fixes kh2 + +#define SUPERVU_PROPAGATEFLAGS // the correct behavior of VUs, for some reason superman breaks gfx with it on... + +// registers won't be flushed at block boundaries (faster) (nothing noticable speed-wise, causes SPS in Ratchet and clank (Nneeve) ) +#ifndef _DEBUG +//#define SUPERVU_INTERCACHING +#endif + +#define SUPERVU_CHECKCONDITION 0 // has to be 0!! + +#define VU_EXESIZE 0x00800000 + +#define _Imm11_ (s32)( (vucode & 0x400) ? (0xfffffc00 | (vucode & 0x3ff)) : (vucode & 0x3ff) ) +#define _UImm11_ (s32)(vucode & 0x7ff) + +#define _Ft_ ((VU->code >> 16) & 0x1F) // The rt part of the instruction register +#define _Fs_ ((VU->code >> 11) & 0x1F) // The rd part of the instruction register +#define _Fd_ ((VU->code >> 6) & 0x1F) // The sa part of the instruction register +#define _It_ (_Ft_ & 15) +#define _Is_ (_Fs_ & 15) +#define _Id_ (_Fd_ & 15) + +static const u32 QWaitTimes[] = { 6, 12 }; +static const u32 PWaitTimes[] = { 53, 43, 28, 23, 17, 11, 10 }; + +static u32 s_vuInfo; // info passed into rec insts + +static const u32 s_MemSize[2] = {VU0_MEMSIZE, VU1_MEMSIZE}; +static u8* s_recVUMem = NULL, *s_recVUPtr = NULL; + +// tables which are defined at the bottom of this massive file. +extern void (*recVU_UPPER_OPCODE[64])(VURegs* VU, s32 info); +extern void (*recVU_LOWER_OPCODE[128])(VURegs* VU, s32 info); + +#define INST_Q_READ 0x0001 // flush Q +#define INST_P_READ 0x0002 // flush P +#define INST_BRANCH_DELAY 0x0004 +#define INST_CLIP_WRITE 0x0040 // inst writes CLIP in the future +#define INST_STATUS_WRITE 0x0080 +#define INST_MAC_WRITE 0x0100 +#define INST_Q_WRITE 0x0200 +#define INST_CACHE_VI 0x0400 // write old vi value to s_VIBranchDelay + +// Let's tempt fate by defining two different constants with almost identical names +#define INST_DUMMY_ 0x8000 +#define INST_DUMMY 0x83c0 + +#define VFFREE_INVALID0 0x80000000 // (vffree[i]&0xf) is invalid + +//#define FORIT(it, v) for(it = (v).begin(); it != (v).end(); ++(it)) + +#ifdef _DEBUG +u32 s_vucount = 0; + +static u32 g_vu1lastrec = 0, skipparent = -1; +static u32 s_svulast = 0, s_vufnheader; +static u32 badaddrs[][2] = {0, 0xffff}; +#endif + +union VURecRegs +{ + struct + { + u16 reg; + u16 type; + }; + u32 id; +}; + +#define SUPERVU_XGKICKDELAY 1 // yes this is needed as default (wipeout) + +class VuBaseBlock; + +struct VuFunctionHeader +{ + struct RANGE + { + RANGE() : pmem(NULL) {} + + u16 start, size; + void* pmem; // all the mem + }; + + VuFunctionHeader() : pprogfunc(NULL), startpc(0xffffffff) {} + ~VuFunctionHeader() + { + for (vector::iterator it = ranges.begin(); it != ranges.end(); ++it) + { + free(it->pmem); + } + } + + // returns true if the checksum for the current mem is the same as this fn + bool IsSame(void* pmem); + + u32 startpc; + void* pprogfunc; + + vector ranges; +}; + +struct VuBlockHeader +{ + VuBaseBlock* pblock; + u32 delay; +}; + +// one vu inst (lower and upper) +class VuInstruction +{ + public: + VuInstruction() + { + memzero_obj(*this); + nParentPc = -1; + vicached = -1; + } + + int nParentPc; // used for syncing with flag writes, -1 for no parent + + _vuopinfo info; + + _VURegsNum regs[2]; // [0] - lower, [1] - upper + u32 livevars[2]; // live variables right before this inst, [0] - inst, [1] - float + u32 addvars[2]; // live variables to add + u32 usedvars[2]; // set if var is used in the future including vars used in this inst + u32 keepvars[2]; + u16 pqcycles; // the number of cycles to stall if function writes to the regs + u16 type; // INST_ + + u32 pClipWrite, pMACWrite, pStatusWrite; // addrs to write the flags + u32 vffree[2]; + s8 vfwrite[2], vfread0[2], vfread1[2], vfacc[2]; + s8 vfflush[2]; // extra flush regs + s8 vicached; // if >= 0, then use the cached integer s_VIBranchDelay + VuInstruction *pPrevInst; + + int SetCachedRegs(int upper, u32 vuxyz); + void Recompile(list::iterator& itinst, u32 vuxyz); +}; + +enum BlockType +{ + BLOCKTYPE_EOP = 0x01, // at least one of the children of the block contains eop (or the block itself) + BLOCKTYPE_FUNCTION = 0x02, + BLOCKTYPE_HASEOP = 0x04, // last inst of block is an eop + BLOCKTYPE_MACFLAGS = 0x08, + BLOCKTYPE_ANALYZED = 0x40, + BLOCKTYPE_IGNORE = 0x80, // special for recursive fns + BLOCKTYPE_ANALYZEDPARENT = 0x100 +}; + +// base block used when recompiling +class VuBaseBlock +{ + public: + typedef list LISTBLOCKS; + + VuBaseBlock(); + + // returns true if the leads to a EOP (ALL VU blocks must ret true) + void AssignVFRegs(); + void AssignVIRegs(int parent); + + list::iterator GetInstIterAtPc(int instpc); + void GetInstsAtPc(int instpc, list& listinsts); + + void Recompile(); + + u16 type; // BLOCKTYPE_ + u16 id; + u16 startpc; + u16 endpc; // first inst not in block + void* pcode; // x86 code pointer + void* pendcode; // end of the x86 code pointer + int cycles; + list insts; + list parents; + LISTBLOCKS blocks; // blocks branches to + u32* pChildJumps[4]; // addrs that need to be filled with the children's start addrs + // if highest bit is set, addr needs to be relational + u32 vuxyz; // corresponding bit is set if reg's xyz channels are used only + u32 vuxy; // corresponding bit is set if reg's xyz channels are used only + + _xmmregs startregs[iREGCNT_XMM], endregs[iREGCNT_XMM]; + int nStartx86, nEndx86; // indices into s_vecRegArray + + int allocX86Regs; + int prevFlagsOutOfBlock; +}; + +struct WRITEBACK +{ + WRITEBACK() : nParentPc(0), cycle(0) //, pStatusWrite(NULL), pMACWrite(NULL) + { + viwrite[0] = viwrite[1] = 0; + viread[0] = viread[1] = 0; + } + + void InitInst(VuInstruction* pinst, int cycle) const + { + u32 write = viwrite[0] | viwrite[1]; + pinst->type = ((write & (1 << REG_CLIP_FLAG)) ? INST_CLIP_WRITE : 0) | + ((write & (1 << REG_MAC_FLAG)) ? INST_MAC_WRITE : 0) | + ((write & (1 << REG_STATUS_FLAG)) ? INST_STATUS_WRITE : 0) | + ((write & (1 << REG_Q)) ? INST_Q_WRITE : 0); + pinst->nParentPc = nParentPc; + pinst->info.cycle = cycle; + for (int i = 0; i < 2; ++i) + { + pinst->regs[i].VIwrite = viwrite[i]; + pinst->regs[i].VIread = viread[i]; + } + } + + static int SortWritebacks(const WRITEBACK& w1, const WRITEBACK& w2) + { + return w1.cycle < w2.cycle; + } + + int nParentPc; + int cycle; + u32 viwrite[2]; + u32 viread[2]; +}; + +struct VUPIPELINES +{ + fmacPipe fmac[8]; + fdivPipe fdiv; + efuPipe efu; + ialuPipe ialu[8]; + list< WRITEBACK > listWritebacks; +}; + +VuBaseBlock::VuBaseBlock() +{ + type = 0; + endpc = 0; + cycles = 0; + pcode = NULL; + id = 0; + memzero_obj(pChildJumps); + memzero_obj(startregs); + memzero_obj(endregs); + allocX86Regs = nStartx86 = nEndx86 = -1; + prevFlagsOutOfBlock = 0; +} + +#define SUPERVU_STACKSIZE 0x1000 + +static list s_listVUHeaders[2]; +static list* s_plistCachedHeaders[2] = {NULL, NULL}; +static VuFunctionHeader** recVUHeaders[2] = {NULL, NULL}; +static VuBlockHeader* recVUBlocks[2] = {NULL, NULL}; +static u8* recVUStack = NULL, *recVUStackPtr = NULL; +static vector<_x86regs> s_vecRegArray(128); + +static VURegs* VU = NULL; +static list s_listBlocks; +static u32 s_vu = 0; +static u32 s_UnconditionalDelay = 0; // 1 if there are two sequential branches and the last is unconditional +static u32 g_nLastBlockExecuted = 0; + +static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex); +static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const VUPIPELINES& pipes); +static void SuperVUInitLiveness(VuBaseBlock* pblock); +static void SuperVULivenessAnalysis(); +static void SuperVUEliminateDeadCode(); +static void SuperVUAssignRegs(); + +//void SuperVUFreeXMMreg(int xmmreg, int xmmtype, int reg); +#define SuperVUFreeXMMreg 0&& +void SuperVUFreeXMMregs(u32* livevars); + +static u32* SuperVUStaticAlloc(u32 size); +static void SuperVURecompile(); + +// allocate VU resources +void SuperVUAlloc(int vuindex) +{ + // The old -1 crap has been depreciated on this function. Please + // specify either 0 or 1, thanks. + jASSUME(vuindex >= 0); + + // upper 4 bits must be zero! + if (s_recVUMem == NULL) + { + // upper 4 bits must be zero! + // Changed "first try base" to 0xf1e0000, since 0x0c000000 liked to fail a lot. (cottonvibes) + s_recVUMem = SysMmapEx(0xf1e0000, VU_EXESIZE, 0x10000000, "SuperVUAlloc"); + + if (s_recVUMem == NULL) + { + throw Exception::OutOfMemory( + fmt_string("SuperVU Error > failed to allocate recompiler memory (addr: 0x%x)", (u32)s_recVUMem) + ); + } + + ProfilerRegisterSource("VURec", s_recVUMem, VU_EXESIZE); + + if (recVUStack == NULL) recVUStack = new u8[SUPERVU_STACKSIZE * 4]; + } + + if (vuindex >= 0) + { + jASSUME(s_recVUMem != NULL); + + if (recVUHeaders[vuindex] == NULL) + recVUHeaders[vuindex] = new VuFunctionHeader* [s_MemSize[vuindex] / 8]; + if (recVUBlocks[vuindex] == NULL) + recVUBlocks[vuindex] = new VuBlockHeader[s_MemSize[vuindex] / 8]; + if (s_plistCachedHeaders[vuindex] == NULL) + s_plistCachedHeaders[vuindex] = new list[s_MemSize[vuindex] / 8]; + } +} + +void DestroyCachedHeaders(int vuindex, int j) +{ + list::iterator it = s_plistCachedHeaders[vuindex][j].begin(); + + while (it != s_plistCachedHeaders[vuindex][j].end()) + { + delete *it; + it++; + } + + s_plistCachedHeaders[vuindex][j].clear(); +} + +void DestroyVUHeaders(int vuindex) +{ + list::iterator it = s_listVUHeaders[vuindex].begin(); + + while (it != s_listVUHeaders[vuindex].end()) + { + delete *it; + it++; + } + + s_listVUHeaders[vuindex].clear(); +} + +// destroy VU resources +void SuperVUDestroy(int vuindex) +{ + list::iterator it; + + if (vuindex < 0) + { + SuperVUDestroy(0); + SuperVUDestroy(1); + ProfilerTerminateSource("VURec"); + SafeSysMunmap(s_recVUMem, VU_EXESIZE); + safe_delete_array(recVUStack); + } + else + { + safe_delete_array(recVUHeaders[vuindex]); + safe_delete_array(recVUBlocks[vuindex]); + + if (s_plistCachedHeaders[vuindex] != NULL) + { + for (u32 j = 0; j < s_MemSize[vuindex] / 8; ++j) + { + DestroyCachedHeaders(vuindex, j); + } + safe_delete_array(s_plistCachedHeaders[vuindex]); + } + DestroyVUHeaders(vuindex); + } +} + +// reset VU +void SuperVUReset(int vuindex) +{ +#ifdef _DEBUG + s_vucount = 0; +#endif + + if (s_recVUMem == NULL) + return; + + //jASSUME( s_recVUMem != NULL ); + + if (vuindex < 0) + { + DbgCon::Status("SuperVU reset > Resetting recompiler memory and structures."); + + // Does this cause problems on VU recompiler resets? It could, if the VU works like + // the EE used to, and actually tries to re-enter the recBlock after issuing a clear. (air) + + //memset_8<0xcd, VU_EXESIZE>(s_recVUMem); + memzero_ptr(recVUStack); + + s_recVUPtr = s_recVUMem; + } + else + { + DbgCon::Status("SuperVU reset [VU%d] > Resetting the recs and junk", params vuindex); + list::iterator it; + if (recVUHeaders[vuindex]) memset(recVUHeaders[vuindex], 0, sizeof(VuFunctionHeader*) * (s_MemSize[vuindex] / 8)); + if (recVUBlocks[vuindex]) memset(recVUBlocks[vuindex], 0, sizeof(VuBlockHeader) * (s_MemSize[vuindex] / 8)); + + if (s_plistCachedHeaders[vuindex] != NULL) + { + for (u32 j = 0; j < s_MemSize[vuindex] / 8; ++j) + { + DestroyCachedHeaders(vuindex, j); + } + } + DestroyVUHeaders(vuindex); + } +} + +// clear the block and any joining blocks +void __fastcall SuperVUClear(u32 startpc, u32 size, int vuindex) +{ + vector::iterator itrange; + list::iterator it = s_listVUHeaders[vuindex].begin(); + u32 endpc = startpc + (size + (8 - (size & 7))); // Adding this code to ensure size is always a multiple of 8, it can be simplified to startpc+size if size is always a multiple of 8 (cottonvibes) + while (it != s_listVUHeaders[vuindex].end()) + { + + // for every fn, check if it has code in the range + for(itrange = (*it)->ranges.begin(); itrange != (*it)->ranges.end(); itrange++) + { + if (startpc < (u32)itrange->start + itrange->size && itrange->start < endpc) + break; + } + + if (itrange != (*it)->ranges.end()) + { + recVUHeaders[vuindex][(*it)->startpc/8] = NULL; +#ifdef SUPERVU_CACHING + list* plist = &s_plistCachedHeaders[vuindex][(*it)->startpc/8]; + plist->push_back(*it); + if (plist->size() > 30) + { + // list is too big, delete + //Console::Notice("Performance warning: deleting cached VU program!"); + delete plist->front(); + plist->pop_front(); + } + it = s_listVUHeaders[vuindex].erase(it); +#else + delete *it; + it = s_listVUHeaders[vuindex].erase(it); +#endif + } + else ++it; + } +} + +static VuFunctionHeader* s_pFnHeader = NULL; +static VuBaseBlock* s_pCurBlock = NULL; +static VuInstruction* s_pCurInst = NULL; +static u32 s_StatusRead = 0, s_MACRead = 0, s_ClipRead = 0; // read addrs +static u32 s_PrevStatusWrite = 0, s_PrevMACWrite = 0, s_PrevClipWrite = 0, s_PrevIWrite = 0; +static u32 s_WriteToReadQ = 0; + +static u32 s_VIBranchDelay = 0; //Value of register to use in a vi branch delayed situation + + +u32 s_TotalVUCycles; // total cycles since start of program execution + + +u32 SuperVUGetVIAddr(int reg, int read) +{ + assert(s_pCurInst != NULL); + + switch (reg) + { + case REG_STATUS_FLAG: + { + u32 addr = (read == 2) ? s_PrevStatusWrite : (read ? s_StatusRead : s_pCurInst->pStatusWrite); + assert(!read || addr != 0); + return addr; + } + case REG_MAC_FLAG: + { + u32 addr = (read == 2) ? s_PrevMACWrite : (read ? s_MACRead : s_pCurInst->pMACWrite); + return addr; + } + case REG_CLIP_FLAG: + { + u32 addr = (read == 2) ? s_PrevClipWrite : (read ? s_ClipRead : s_pCurInst->pClipWrite); + assert(!read || addr != 0); + return addr; + } + case REG_Q: + return (read || s_WriteToReadQ) ? (uptr)&VU->VI[REG_Q] : (uptr)&VU->q; + case REG_P: + return read ? (uptr)&VU->VI[REG_P] : (uptr)&VU->p; + case REG_I: + return s_PrevIWrite; + } + +#ifdef SUPERVU_VIBRANCHDELAY + if ((read != 0) && (s_pCurInst->regs[0].pipe == VUPIPE_BRANCH) && (s_pCurInst->vicached >= 0) && (s_pCurInst->vicached == reg)) + { + return (uptr)&s_VIBranchDelay; // test for branch delays + } +#endif + + return (uptr)&VU->VI[reg]; +} + +void SuperVUDumpBlock(list& blocks, int vuindex) +{ + FILE *f; + char str[256]; + u32 *mem; + u32 i; + + Path::CreateDirectory("dumps"); + string filename(Path::Combine("dumps", fmt_string("svu%cdump%.4X.txt", s_vu ? '0' : '1', s_pFnHeader->startpc))); + //Console::WriteLn( "dump1 %x => %s", params s_pFnHeader->startpc, filename ); + + f = fopen(filename.c_str(), "w"); + + fprintf(f, "Format: upper_inst lower_inst\ntype f:vf_live_vars vf_used_vars i:vi_live_vars vi_used_vars inst_cycle pq_inst\n"); + fprintf(f, "Type: %.2x - qread, %.2x - pread, %.2x - clip_write, %.2x - status_write\n" + "%.2x - mac_write, %.2x -qflush\n", + INST_Q_READ, INST_P_READ, INST_CLIP_WRITE, INST_STATUS_WRITE, INST_MAC_WRITE, INST_Q_WRITE); + fprintf(f, "XMM: Upper: read0 read1 write acc temp; Lower: read0 read1 write acc temp\n\n"); + + list::iterator itblock; + list::iterator itinst; + VuBaseBlock::LISTBLOCKS::iterator itchild; + + for(itblock = blocks.begin(); itblock != blocks.end(); itblock++) + { + fprintf(f, "block:%c %x-%x; children: ", ((*itblock)->type&BLOCKTYPE_HASEOP) ? '*' : ' ', + (*itblock)->startpc, (*itblock)->endpc - 8); + + for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++) + { + fprintf(f, "%x ", (*itchild)->startpc); + } + fprintf(f, "; vuxyz = %x, vuxy = %x\n", (*itblock)->vuxyz&(*itblock)->insts.front().usedvars[1], + (*itblock)->vuxy&(*itblock)->insts.front().usedvars[1]); + + itinst = (*itblock)->insts.begin(); + i = (*itblock)->startpc; + while (itinst != (*itblock)->insts.end()) + { + assert(i <= (*itblock)->endpc); + if (itinst->type & INST_DUMMY) + { + if (itinst->nParentPc >= 0 && !(itinst->type&INST_DUMMY_)) + { + // search for the parent + fprintf(f, "writeback 0x%x (%x)\n", itinst->type, itinst->nParentPc); + } + } + else + { + mem = (u32*) & VU->Micro[i]; + char* pstr = disVU1MicroUF(mem[1], i + 4); + fprintf(f, "%.4x: %-40s", i, pstr); + if (mem[1] & 0x80000000) fprintf(f, " I=%f(%.8x)\n", *(float*)mem, mem[0]); + else fprintf(f, "%s\n", disVU1MicroLF(mem[0], i)); + i += 8; + } + + ++itinst; + } + + fprintf(f, "\n"); + + _x86regs* pregs; + if ((*itblock)->nStartx86 >= 0 || (*itblock)->nEndx86 >= 0) + { + fprintf(f, "X86: AX CX DX BX SP BP SI DI\n"); + } + + if ((*itblock)->nStartx86 >= 0) + { + pregs = &s_vecRegArray[(*itblock)->nStartx86]; + fprintf(f, "STR: "); + for (i = 0; i < iREGCNT_GPR; ++i) + { + if (pregs[i].inuse) + fprintf(f, "%.2d ", pregs[i].reg); + else + fprintf(f, "-1 "); + } + fprintf(f, "\n"); + } + + if ((*itblock)->nEndx86 >= 0) + { + fprintf(f, "END: "); + pregs = &s_vecRegArray[(*itblock)->nEndx86]; + for (i = 0; i < iREGCNT_GPR; ++i) + { + if (pregs[i].inuse) + fprintf(f, "%.2d ", pregs[i].reg); + else + fprintf(f, "-1 "); + } + fprintf(f, "\n"); + } + + itinst = (*itblock)->insts.begin(); + for (i = (*itblock)->startpc; i < (*itblock)->endpc; ++itinst) + { + + if (itinst->type & INST_DUMMY) + { + } + else + { + sprintf(str, "%.4x:%x f:%.8x_%.8x", i, itinst->type, itinst->livevars[1], itinst->usedvars[1]); + fprintf(f, "%-46s i:%.8x_%.8x c:%d pq:%d\n", str, + itinst->livevars[0], itinst->usedvars[0], (int)itinst->info.cycle, (int)itinst->pqcycles); + + sprintf(str, "XMM r0:%d r1:%d w:%d a:%d t:%x;", + itinst->vfread0[1], itinst->vfread1[1], itinst->vfwrite[1], itinst->vfacc[1], itinst->vffree[1]); + fprintf(f, "%-46s r0:%d r1:%d w:%d a:%d t:%x\n", str, + itinst->vfread0[0], itinst->vfread1[0], itinst->vfwrite[0], itinst->vfacc[0], itinst->vffree[0]); + i += 8; + } + } + +#ifdef __LINUX__ + // dump the asm + if ((*itblock)->pcode != NULL) + { + char command[255]; + FILE* fasm = fopen("mydump1", "wb"); + //Console::WriteLn("writing: %x, %x", params (*itblock)->startpc, (uptr)(*itblock)->pendcode - (uptr)(*itblock)->pcode); + fwrite((*itblock)->pcode, 1, (uptr)(*itblock)->pendcode - (uptr)(*itblock)->pcode, fasm); + fclose(fasm); + sprintf(command, "objdump -D --target=binary --architecture=i386 -M intel mydump1 > tempdump"); + system(command); + fasm = fopen("tempdump", "r"); + // read all of it and write it to f + fseek(fasm, 0, SEEK_END); + vector vbuffer(ftell(fasm)); + fseek(fasm, 0, SEEK_SET); + fread(&vbuffer[0], vbuffer.size(), 1, fasm); + + fprintf(f, "\n\n"); + fwrite(&vbuffer[0], vbuffer.size(), 1, f); + fclose(fasm); + } +#endif + + fprintf(f, "\n---------------\n"); + } + + fclose(f); +} + +// uncomment to count svu exec time +//#define SUPERVU_COUNT + +// Private methods +void* SuperVUGetProgram(u32 startpc, int vuindex) +{ + assert(startpc < s_MemSize[vuindex]); + assert((startpc % 8) == 0); + assert(recVUHeaders[vuindex] != NULL); + VuFunctionHeader** pheader = &recVUHeaders[vuindex][startpc/8]; + + if (*pheader == NULL) + { +#ifdef _DEBUG +// if( vuindex ) VU1.VI[REG_TPC].UL = startpc; +// else VU0.VI[REG_TPC].UL = startpc; +// __Log("VU: %x\n", startpc); +// iDumpVU1Registers(); +// vudump |= 2; +#endif + + // measure run time + //QueryPerformanceCounter(&svubase); + +#ifdef SUPERVU_CACHING + void* pmem = (vuindex & 1) ? VU1.Micro : VU0.Micro; + // check if program exists in cache + list::iterator it; + for(it = s_plistCachedHeaders[vuindex][startpc/8].begin(); it != s_plistCachedHeaders[vuindex][startpc/8].end(); it++) + { + if ((*it)->IsSame(pmem)) + { + // found, transfer to regular lists + void* pfn = (*it)->pprogfunc; + recVUHeaders[vuindex][startpc/8] = *it; + s_listVUHeaders[vuindex].push_back(*it); + s_plistCachedHeaders[vuindex][startpc/8].erase(it); + return pfn; + } + } +#endif + + *pheader = SuperVURecompileProgram(startpc, vuindex); + + if (*pheader == NULL) + { + assert(s_TotalVUCycles > 0); + if (vuindex) + VU1.VI[REG_TPC].UL = startpc; + else + VU0.VI[REG_TPC].UL = startpc; + + return (void*)SuperVUEndProgram; + } + + //QueryPerformanceCounter(&svufinal); + //svutime += (u32)(svufinal.QuadPart-svubase.QuadPart); + + assert((*pheader)->pprogfunc != NULL); + } + //else assert( (*pheader)->IsSame((vuindex&1) ? VU1.Micro : VU0.Micro) ); + + assert((*pheader)->startpc == startpc); + + return (*pheader)->pprogfunc; +} + +bool VuFunctionHeader::IsSame(void* pmem) +{ +#ifdef SUPERVU_CACHING + vector::iterator it; + for(it = ranges.begin(); it != ranges.end(); it++) + { + if (memcmp_mmx((u8*)pmem + it->start, it->pmem, it->size)) + return false; + } +#endif + return true; +} + +list::iterator VuBaseBlock::GetInstIterAtPc(int instpc) +{ + assert(instpc >= 0); + + u32 curpc = startpc; + list::iterator it; + for (it = insts.begin(); it != insts.end(); ++it) + { + if (it->type & INST_DUMMY) continue; + if (curpc == instpc) break; + curpc += 8; + } + + if (it != insts.end()) return it; + + assert(0); + return insts.begin(); +} + +void VuBaseBlock::GetInstsAtPc(int instpc, list& listinsts) +{ + assert(instpc >= 0); + + listinsts.clear(); + + u32 curpc = startpc; + list::iterator it; + for (it = insts.begin(); it != insts.end(); ++it) + { + if (it->type & INST_DUMMY) continue; + if (curpc == instpc) break; + curpc += 8; + } + + if (it != insts.end()) + { + listinsts.push_back(&(*it)); + return; + } + + // look for the pc in other blocks + for (list::iterator itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); ++itblock) + { + if (*itblock == this) continue; + + if (instpc >= (*itblock)->startpc && instpc < (*itblock)->endpc) + { + listinsts.push_back(&(*(*itblock)->GetInstIterAtPc(instpc))); + } + } + + assert(listinsts.size() > 0); +} + +static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex) +{ + assert(vuindex < 2); + assert(s_recVUPtr != NULL); + //Console::WriteLn("svu%c rec: %x", params '0'+vuindex, startpc); + + // if recPtr reached the mem limit reset whole mem + if (((uptr)s_recVUPtr - (uptr)s_recVUMem) >= VU_EXESIZE - 0x40000) + { + //Console::WriteLn("SuperVU reset mem"); + SuperVUReset(0); + SuperVUReset(1); + SuperVUReset(-1); + if (s_TotalVUCycles > 0) + { + // already executing, so return NULL + return NULL; + } + } + + list::iterator itblock; + + s_vu = vuindex; + VU = s_vu ? &VU1 : &VU0; + s_pFnHeader = new VuFunctionHeader(); + s_listVUHeaders[vuindex].push_back(s_pFnHeader); + s_pFnHeader->startpc = startpc; + + memset(recVUBlocks[s_vu], 0, sizeof(VuBlockHeader) * (s_MemSize[s_vu] / 8)); + + // analyze the global graph + s_listBlocks.clear(); + VUPIPELINES pipes; + memzero_obj(pipes.fmac); + memzero_obj(pipes.fdiv); + memzero_obj(pipes.efu); + memzero_obj(pipes.ialu); + SuperVUBuildBlocks(NULL, startpc, pipes); + + // fill parents + VuBaseBlock::LISTBLOCKS::iterator itchild; + for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) + { + for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++) + { + (*itchild)->parents.push_back(*itblock); + } + + //(*itblock)->type &= ~(BLOCKTYPE_IGNORE|BLOCKTYPE_ANALYZED); + } + + assert(s_listBlocks.front()->startpc == startpc); + s_listBlocks.front()->type |= BLOCKTYPE_FUNCTION; + + for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) + { + SuperVUInitLiveness(*itblock); + } + + SuperVULivenessAnalysis(); + SuperVUEliminateDeadCode(); + SuperVUAssignRegs(); + +#ifdef _DEBUG + if ((s_vu && (vudump&1)) || (!s_vu && (vudump&16))) SuperVUDumpBlock(s_listBlocks, s_vu); +#endif + + // code generation + x86SetPtr(s_recVUPtr); + branch = 0; + + SuperVURecompile(); + + s_recVUPtr = x86Ptr; + + // set the function's range + VuFunctionHeader::RANGE r; + s_pFnHeader->ranges.reserve(s_listBlocks.size()); + + for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) + { + r.start = (*itblock)->startpc; + r.size = (*itblock)->endpc - (*itblock)->startpc; +#ifdef SUPERVU_CACHING + //memxor_mmx(r.checksum, &VU->Micro[r.start], r.size); + r.pmem = malloc(r.size); + memcpy_fast(r.pmem, &VU->Micro[r.start], r.size); +#endif + s_pFnHeader->ranges.push_back(r); + } + +#if defined(_DEBUG) && defined(__LINUX__) + // dump at the end to capture the actual code + if ((s_vu && (vudump&1)) || (!s_vu && (vudump&16))) SuperVUDumpBlock(s_listBlocks, s_vu); +#endif + + // destroy + for (list::iterator itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); ++itblock) + { + delete *itblock; + } + s_listBlocks.clear(); + + assert(s_recVUPtr < s_recVUMem + VU_EXESIZE); + + return s_pFnHeader; +} + +static int _recbranchAddr(u32 vucode) +{ + s32 bpc = pc + (_Imm11_ << 3); + /* + if ( bpc < 0 ) { + Console::WriteLn("zerorec branch warning: bpc < 0 ( %x ); Using unsigned imm11", params bpc); + bpc = pc + (_UImm11_ << 3); + }*/ + bpc &= (s_MemSize[s_vu] - 1); + + return bpc; +} + +// return inst that flushes everything +static VuInstruction SuperVUFlushInst() +{ + VuInstruction inst; + // don't need to raed q/p + inst.type = INST_DUMMY_;//|INST_Q_READ|INST_P_READ; + return inst; +} + +void SuperVUAddWritebacks(VuBaseBlock* pblock, const list& listWritebacks) +{ +#ifdef SUPERVU_WRITEBACKS + // regardless of repetition, add the pipes (for selfloops) + list::const_iterator itwriteback = listWritebacks.begin(); + list::iterator itinst = pblock->insts.begin(), itinst2; + + while (itwriteback != listWritebacks.end()) + { + if (itinst != pblock->insts.end() && (itinst->info.cycle < itwriteback->cycle || (itinst->type&INST_DUMMY))) + { + ++itinst; + continue; + } + + itinst2 = pblock->insts.insert(itinst, VuInstruction()); + itwriteback->InitInst(&(*itinst2), vucycle); + ++itwriteback; + } +#endif +} + +#ifdef SUPERVU_VIBRANCHDELAY +static VuInstruction* getDelayInst(VuInstruction* pInst) +{ + // check for the N cycle branch delay + // example of 2 cycles delay (monster house) : + // sqi vi05 + // sqi vi05 + // ibeq vi05, vi03 + // The ibeq should read the vi05 before the first sqi + + //more info: + + // iaddiu vi01, 0, 1 + // ibeq vi01, 0 <- reads vi01 before the iaddiu + + // iaddiu vi01, 0, 1 + // iaddiu vi01, vi01, 1 + // iaddiu vi01, vi01, 1 + // ibeq vi01, 0 <- reads vi01 before the last two iaddiu's (so the value read is 1) + + // ilw vi02, addr + // iaddiu vi01, 0, 1 + // ibeq vi01, vi02 <- reads current values of both vi01 and vi02 because the branch instruction stalls + + int delay = 1; + VuInstruction* pDelayInst = NULL; + VuInstruction* pTargetInst = pInst->pPrevInst; + while (1) + { + if (pTargetInst != NULL + && pTargetInst->info.cycle + delay == pInst->info.cycle + && (pTargetInst->regs[0].pipe == VUPIPE_IALU || pTargetInst->regs[0].pipe == VUPIPE_FMAC) + && ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) + && (delay == 1 || ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) == ((pTargetInst->regs[0].VIwrite & pInst->pPrevInst->regs[0].VIread) & 0xffff)) + && !(pTargetInst->regs[0].VIread&((1 << REG_STATUS_FLAG) | (1 << REG_MAC_FLAG) | (1 << REG_CLIP_FLAG)))) + { + pDelayInst = pTargetInst; + pTargetInst = pTargetInst->pPrevInst; + delay++; + if (delay == 5) //maximum delay is 4 (length of the pipeline) + { + DevCon::WriteLn("supervu: cycle branch delay maximum (4) is reached"); + break; + } + } + else break; + } + if (delay > 1) DevCon::WriteLn("supervu: %d cycle branch delay detected: %x %x", params delay - 1, pc, s_pFnHeader->startpc); + return pDelayInst; +} +#endif + +static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const VUPIPELINES& pipes) +{ + // check if block already exists + //Console::WriteLn("startpc %x", params startpc); + startpc &= (s_vu ? 0x3fff : 0xfff); + VuBlockHeader* pbh = &recVUBlocks[s_vu][startpc/8]; + + if (pbh->pblock != NULL) + { + + VuBaseBlock* pblock = pbh->pblock; + list::iterator itinst; + + if (pblock->startpc == startpc) + { + SuperVUAddWritebacks(pblock, pipes.listWritebacks); + return pblock; + } + + // have to divide the blocks, pnewblock is first block + assert(startpc > pblock->startpc); + assert(startpc < pblock->endpc); + + u32 dummyinst = (startpc - pblock->startpc) >> 3; + + // count inst non-dummy insts + itinst = pblock->insts.begin(); + int cycleoff = 0; + + while (dummyinst > 0) + { + if (itinst->type & INST_DUMMY) + ++itinst; + else + { + cycleoff = itinst->info.cycle; + ++itinst; + --dummyinst; + } + } + + // NOTE: still leaves insts with their writebacks in different blocks + while (itinst->type & INST_DUMMY) + ++itinst; + + // the difference in cycles between dummy insts (naruto utlimate ninja) + int cyclediff = 0; + if (parent == pblock) + cyclediff = itinst->info.cycle - cycleoff; + cycleoff = itinst->info.cycle; + + // new block + VuBaseBlock* pnewblock = new VuBaseBlock(); + s_listBlocks.push_back(pnewblock); + + pnewblock->startpc = startpc; + pnewblock->endpc = pblock->endpc; + pnewblock->cycles = pblock->cycles - cycleoff + cyclediff; + + pnewblock->blocks.splice(pnewblock->blocks.end(), pblock->blocks); + pnewblock->insts.splice(pnewblock->insts.end(), pblock->insts, itinst, pblock->insts.end()); + pnewblock->type = pblock->type; + + // any writebacks in the next 3 cycles also belong to original block +// for(itinst = pnewblock->insts.begin(); itinst != pnewblock->insts.end(); ) { +// if( (itinst->type & INST_DUMMY) && itinst->nParentPc >= 0 && itinst->nParentPc < (int)startpc ) { +// +// if( !(itinst->type & INST_Q_WRITE) ) +// pblock->insts.push_back(*itinst); +// itinst = pnewblock->insts.erase(itinst); +// continue; +// } +// +// ++itinst; +// } + + pbh = &recVUBlocks[s_vu][startpc/8]; + for (u32 inst = startpc; inst < pblock->endpc; inst += 8) + { + if (pbh->pblock == pblock) + pbh->pblock = pnewblock; + ++pbh; + } + + for(itinst = pnewblock->insts.begin(); itinst != pnewblock->insts.end(); itinst++) + { + itinst->info.cycle -= cycleoff; + } + + SuperVUAddWritebacks(pnewblock, pipes.listWritebacks); + + // old block + pblock->blocks.push_back(pnewblock); + pblock->endpc = startpc; + pblock->cycles = cycleoff; + pblock->type &= BLOCKTYPE_MACFLAGS; + //pblock->insts.push_back(SuperVUFlushInst()); //don't need + + return pnewblock; + } + + VuBaseBlock* pblock = new VuBaseBlock(); + s_listBlocks.push_back(pblock); + + int i = 0; + branch = 0; + pc = startpc; + pblock->startpc = startpc; + + // clear stalls (might be a prob) + memcpy(VU->fmac, pipes.fmac, sizeof(pipes.fmac)); + memcpy(&VU->fdiv, &pipes.fdiv, sizeof(pipes.fdiv)); + memcpy(&VU->efu, &pipes.efu, sizeof(pipes.efu)); + memcpy(VU->ialu, pipes.ialu, sizeof(pipes.ialu)); +// memset(VU->fmac, 0, sizeof(VU->fmac)); +// memset(&VU->fdiv, 0, sizeof(VU->fdiv)); +// memset(&VU->efu, 0, sizeof(VU->efu)); + + vucycle = 0; + + u8 macflags = 0; + + list< WRITEBACK > listWritebacks; + list< WRITEBACK >::iterator itwriteback; + list::iterator itinst; + u32 hasSecondBranch = 0; + u32 needFullStatusFlag = 0; + +#ifdef SUPERVU_WRITEBACKS + listWritebacks = pipes.listWritebacks; +#endif + + // first analysis pass for status flags + while (1) + { + u32* ptr = (u32*) & VU->Micro[pc]; + pc += 8; + int prevbranch = branch; + + if (ptr[1] & 0x40000000) + branch = 1; + + if (!(ptr[1] & 0x80000000)) // not I + { + switch (ptr[0] >> 25) + { + case 0x24: // jr + case 0x25: // jalr + case 0x20: // B + case 0x21: // BAL + case 0x28: // IBEQ + case 0x2f: // IBGEZ + case 0x2d: // IBGTZ + case 0x2e: // IBLEZ + case 0x2c: // IBLTZ + case 0x29: // IBNE + branch = 1; + break; + + case 0x14: // fseq + case 0x17: // fsor + //needFullStatusFlag = 2; + break; + + case 0x16: // fsand + if ((ptr[0]&0xc0)) + { + // sometimes full sticky bits are needed (simple series 2000 - oane chapara) + //Console::WriteLn("needSticky: %x-%x", params s_pFnHeader->startpc, startpc); + needFullStatusFlag = 2; + } + break; + } + } + + if (prevbranch) + break; + + if (pc >= s_MemSize[s_vu]) + { + Console::Error("inf vu0 prog %x", params startpc); + break; + } + } + + // second full pass + pc = startpc; + branch = 0; + VuInstruction* pprevinst = NULL, *pinst = NULL; + + while (1) + { + + if (pc == s_MemSize[s_vu]) + { + branch |= 8; + break; + } + + if (!branch && pbh->pblock != NULL) + { + pblock->blocks.push_back(pbh->pblock); + break; + } + + int prevbranch = branch; + + if (!prevbranch) + { + pbh->pblock = pblock; + } + else assert(prevbranch || pbh->pblock == NULL); + + pblock->insts.push_back(VuInstruction()); + + pprevinst = pinst; + pinst = &pblock->insts.back(); + pinst->pPrevInst = pprevinst; + SuperVUAnalyzeOp(VU, &pinst->info, pinst->regs); + +#ifdef SUPERVU_VIBRANCHDELAY + if (pinst->regs[0].pipe == VUPIPE_BRANCH && pblock->insts.size() > 1) + { + + VuInstruction* pdelayinst = getDelayInst(pinst); + if (pdelayinst) + { + pdelayinst->type |= INST_CACHE_VI; + + // find the correct register + u32 mask = pdelayinst->regs[0].VIwrite & pinst->regs[0].VIread; + for (int i = 0; i < 16; ++i) + { + if (mask & (1 << i)) + { + pdelayinst->vicached = i; + break; + } + } + + pinst->vicached = pdelayinst->vicached; + } + } +#endif + + if (prevbranch) + { + if (pinst->regs[0].pipe == VUPIPE_BRANCH) + hasSecondBranch = 1; + pinst->type |= INST_BRANCH_DELAY; + } + + // check write back + for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end();) + { + if (pinst->info.cycle >= itwriteback->cycle) + { + itinst = pblock->insts.insert(--pblock->insts.end(), VuInstruction()); + itwriteback->InitInst(&(*itinst), pinst->info.cycle); + itwriteback = listWritebacks.erase(itwriteback); + } + else ++itwriteback; + } + + // add new writebacks + WRITEBACK w; + const u32 allflags = (1 << REG_CLIP_FLAG) | (1 << REG_MAC_FLAG) | (1 << REG_STATUS_FLAG); + for (int j = 0; j < 2; ++j) w.viwrite[j] = pinst->regs[j].VIwrite & allflags; + + if (pinst->info.macflag & VUOP_WRITE) w.viwrite[1] |= (1 << REG_MAC_FLAG); + if (pinst->info.statusflag & VUOP_WRITE) w.viwrite[1] |= (1 << REG_STATUS_FLAG); + + if ((pinst->info.macflag | pinst->info.statusflag) & VUOP_READ) + macflags = 1; + if (pinst->regs[0].VIread & ((1 << REG_MAC_FLAG) | (1 << REG_STATUS_FLAG))) + macflags = 1; + +// if( pinst->regs[1].pipe == VUPIPE_FMAC && (pinst->regs[1].VFwrite==0&&!(pinst->regs[1].VIwrite&(1<regs[0].VIread |= (1<VIwrite |= lregs->VIwrite & (1<info.statusflag&VUOP_WRITE) && !(pinst->regs[0].VIwrite&(1 << REG_STATUS_FLAG))) && needFullStatusFlag) + { + // don't read if first inst + if (needFullStatusFlag == 1) + w.viread[1] |= (1 << REG_STATUS_FLAG); + else --needFullStatusFlag; + } + + for (int j = 0; j < 2; ++j) + { + w.viread[j] |= pinst->regs[j].VIread & allflags; + + if ((pinst->regs[j].VIread&(1 << REG_STATUS_FLAG)) && (pinst->regs[j].VIwrite&(1 << REG_STATUS_FLAG))) + { + // don't need the read anymore + pinst->regs[j].VIread &= ~(1 << REG_STATUS_FLAG); + } + if ((pinst->regs[j].VIread&(1 << REG_MAC_FLAG)) && (pinst->regs[j].VIwrite&(1 << REG_MAC_FLAG))) + { + // don't need the read anymore + pinst->regs[j].VIread &= ~(1 << REG_MAC_FLAG); + } + + pinst->regs[j].VIwrite &= ~allflags; + } + + if (pinst->info.macflag & VUOP_READ) w.viread[1] |= 1 << REG_MAC_FLAG; + if (pinst->info.statusflag & VUOP_READ) w.viread[1] |= 1 << REG_STATUS_FLAG; + + w.nParentPc = pc - 8; + w.cycle = pinst->info.cycle + 4; + listWritebacks.push_back(w); + } + + if (pinst->info.q&VUOP_READ) pinst->type |= INST_Q_READ; + if (pinst->info.p&VUOP_READ) pinst->type |= INST_P_READ; + + if (pinst->info.q&VUOP_WRITE) + { + pinst->pqcycles = QWaitTimes[pinst->info.pqinst] + 1; + + memset(&w, 0, sizeof(w)); + w.nParentPc = pc - 8; + w.cycle = pinst->info.cycle + pinst->pqcycles; + w.viwrite[0] = 1 << REG_Q; + listWritebacks.push_back(w); + } + if (pinst->info.p&VUOP_WRITE) + pinst->pqcycles = PWaitTimes[pinst->info.pqinst] + 1; + + if (prevbranch) + { + break; + } + + // make sure there is always a branch + // sensible soccer overflows on vu0, so increase the limit... + if ((s_vu == 1 && i >= 0x799) || (s_vu == 0 && i >= 0x201)) + { + Console::Error("VuRec base block doesn't terminate!"); + assert(0); + break; + } + + i++; + pbh++; + } + + if (macflags) + pblock->type |= BLOCKTYPE_MACFLAGS; + + pblock->endpc = pc; + u32 lastpc = pc; + + pblock->cycles = vucycle; + +#ifdef SUPERVU_WRITEBACKS + if (!branch || (branch&8)) +#endif + { + // flush writebacks + if (listWritebacks.size() > 0) + { + listWritebacks.sort(WRITEBACK::SortWritebacks); + for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end(); ++itwriteback) + { + if (itwriteback->viwrite[0] & (1 << REG_Q)) + { + // ignore all Q writebacks + continue; + } + + pblock->insts.push_back(VuInstruction()); + itwriteback->InitInst(&pblock->insts.back(), vucycle); + } + + listWritebacks.clear(); + } + } + + if (!branch) return pblock; + + if (branch & 8) + { + // what if also a jump? + pblock->type |= BLOCKTYPE_EOP | BLOCKTYPE_HASEOP; + + // add an instruction to flush p and q (if written) + pblock->insts.push_back(SuperVUFlushInst()); + return pblock; + } + + // it is a (cond) branch or a jump + u32 vucode = *(u32*)(VU->Micro + lastpc - 16); + int bpc = _recbranchAddr(vucode) - 8; + + VUPIPELINES newpipes; + memcpy(newpipes.fmac, VU->fmac, sizeof(newpipes.fmac)); + memcpy(&newpipes.fdiv, &VU->fdiv, sizeof(newpipes.fdiv)); + memcpy(&newpipes.efu, &VU->efu, sizeof(newpipes.efu)); + memcpy(newpipes.ialu, VU->ialu, sizeof(newpipes.ialu)); + + for (i = 0; i < 8; ++i) newpipes.fmac[i].sCycle -= vucycle; + newpipes.fdiv.sCycle -= vucycle; + newpipes.efu.sCycle -= vucycle; + for (i = 0; i < 8; ++i) newpipes.ialu[i].sCycle -= vucycle; + + if (listWritebacks.size() > 0) + { + // flush all when jumping, send down the pipe when in branching + bool bFlushWritebacks = (vucode >> 25) == 0x24 || (vucode >> 25) == 0x25;//||(vucode>>25)==0x20||(vucode>>25)==0x21; + + listWritebacks.sort(WRITEBACK::SortWritebacks); + for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end(); ++itwriteback) + { + if (itwriteback->viwrite[0] & (1 << REG_Q)) + { + // ignore all Q writebacks + continue; + } + + if (itwriteback->cycle < vucycle || bFlushWritebacks) + { + pblock->insts.push_back(VuInstruction()); + itwriteback->InitInst(&pblock->insts.back(), vucycle); + } + else + { + newpipes.listWritebacks.push_back(*itwriteback); + newpipes.listWritebacks.back().cycle -= vucycle; + } + } + } + + if (newpipes.listWritebacks.size() > 0) // other blocks might read the mac flags + pblock->type |= BLOCKTYPE_MACFLAGS; + + u32 firstbranch = vucode >> 25; + switch (firstbranch) + { + case 0x24: // jr + pblock->type |= BLOCKTYPE_EOP; // jump out of procedure, since not returning, set EOP + pblock->insts.push_back(SuperVUFlushInst()); + firstbranch = 0xff; //Non-Conditional Jump + break; + + case 0x25: // jalr + { + // linking, so will return to procedure + pblock->insts.push_back(SuperVUFlushInst()); + + VuBaseBlock* pjumpblock = SuperVUBuildBlocks(pblock, lastpc, newpipes); + + // update pblock since could have changed + pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; + assert(pblock != NULL); + + pblock->blocks.push_back(pjumpblock); + firstbranch = 0xff; //Non-Conditional Jump + break; + } + case 0x20: // B + { + VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); + + // update pblock since could have changed + pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; + assert(pblock != NULL); + + pblock->blocks.push_back(pbranchblock); + firstbranch = 0xff; //Non-Conditional Jump + break; + } + case 0x21: // BAL + { + VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); + + // update pblock since could have changed + pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; + assert(pblock != NULL); + pblock->blocks.push_back(pbranchblock); + firstbranch = 0xff; //Non-Conditional Jump + break; + } + case 0x28: // IBEQ + case 0x2f: // IBGEZ + case 0x2d: // IBGTZ + case 0x2e: // IBLEZ + case 0x2c: // IBLTZ + case 0x29: // IBNE + { + VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); + + // update pblock since could have changed + pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; + assert(pblock != NULL); + pblock->blocks.push_back(pbranchblock); + + // if has a second branch that is B or BAL, skip this + u32 secondbranch = (*(u32*)(VU->Micro + lastpc - 8)) >> 25; + if (!hasSecondBranch || (secondbranch != 0x21 && secondbranch != 0x20)) + { + pbranchblock = SuperVUBuildBlocks(pblock, lastpc, newpipes); + + pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; + pblock->blocks.push_back(pbranchblock); + } + + break; + } + default: + assert(pblock->blocks.size() == 1); + break; + } + + pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; + +#ifdef SUPERVU_VIBRANCHDELAY +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +///// NOTE! This could still be a hack for KH2/GoW, but until we know how it properly works, this will do for now./// +///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// + + if (hasSecondBranch && firstbranch != 0xff) //check the previous jump was conditional and there is a second branch + { +#else + if (hasSecondBranch) + { +#endif + + u32 vucode = *(u32*)(VU->Micro + lastpc - 8); + pc = lastpc; + int bpc = _recbranchAddr(vucode); + + switch (vucode >> 25) + { + case 0x24: // jr + Console::Error("svurec bad jr jump!"); + assert(0); + break; + + case 0x25: // jalr + { + Console::Error("svurec bad jalr jump!"); + assert(0); + break; + } + case 0x20: // B + { + VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); + + // update pblock since could have changed + pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; + + pblock->blocks.push_back(pbranchblock); + break; + } + case 0x21: // BAL + { + VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); + + // replace instead of pushing a new block + pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; + pblock->blocks.push_back(pbranchblock); + break; + } + case 0x28: // IBEQ + case 0x2f: // IBGEZ + case 0x2d: // IBGTZ + case 0x2e: // IBLEZ + case 0x2c: // IBLTZ + case 0x29: // IBNE + { + VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes); + + // update pblock since could have changed + pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; + pblock->blocks.push_back(pbranchblock); + + // only add the block if the previous branch doesn't include the next instruction (ie, if a direct jump) + if (firstbranch == 0x24 || firstbranch == 0x25 || firstbranch == 0x20 || firstbranch == 0x21) + { + pbranchblock = SuperVUBuildBlocks(pblock, lastpc, newpipes); + + pblock = recVUBlocks[s_vu][lastpc/8-2].pblock; + pblock->blocks.push_back(pbranchblock); + } + + break; + } + + jNO_DEFAULT; + } + } + + return recVUBlocks[s_vu][startpc/8].pblock; +} + +static void SuperVUInitLiveness(VuBaseBlock* pblock) +{ + list::iterator itinst, itnext; + + assert(pblock->insts.size() > 0); + + for (itinst = pblock->insts.begin(); itinst != pblock->insts.end(); ++itinst) + { + + if (itinst->type & INST_DUMMY_) + { + itinst->addvars[0] = itinst->addvars[1] = 0xffffffff; + itinst->livevars[0] = itinst->livevars[1] = 0xffffffff; + itinst->keepvars[0] = itinst->keepvars[1] = 0xffffffff; + itinst->usedvars[0] = itinst->usedvars[1] = 0; + } + else + { + itinst->addvars[0] = itinst->regs[0].VIread | itinst->regs[1].VIread; + itinst->addvars[1] = (itinst->regs[0].VFread0 ? (1 << itinst->regs[0].VFread0) : 0) | + (itinst->regs[0].VFread1 ? (1 << itinst->regs[0].VFread1) : 0) | + (itinst->regs[1].VFread0 ? (1 << itinst->regs[1].VFread0) : 0) | + (itinst->regs[1].VFread1 ? (1 << itinst->regs[1].VFread1) : 0); + + // vf0 is not handled by VFread + if (!itinst->regs[0].VFread0 && (itinst->regs[0].VIread & (1 << REG_VF0_FLAG))) itinst->addvars[1] |= 1; + if (!itinst->regs[1].VFread0 && (itinst->regs[1].VIread & (1 << REG_VF0_FLAG))) itinst->addvars[1] |= 1; + if (!itinst->regs[0].VFread1 && (itinst->regs[0].VIread & (1 << REG_VF0_FLAG)) && itinst->regs[0].VFr1xyzw != 0xff) itinst->addvars[1] |= 1; + if (!itinst->regs[1].VFread1 && (itinst->regs[1].VIread & (1 << REG_VF0_FLAG)) && itinst->regs[1].VFr1xyzw != 0xff) itinst->addvars[1] |= 1; + + + u32 vfwrite = 0; + if (itinst->regs[0].VFwrite != 0) + { + if (itinst->regs[0].VFwxyzw != 0xf) itinst->addvars[1] |= 1 << itinst->regs[0].VFwrite; + else vfwrite |= 1 << itinst->regs[0].VFwrite; + } + if (itinst->regs[1].VFwrite != 0) + { + if (itinst->regs[1].VFwxyzw != 0xf) itinst->addvars[1] |= 1 << itinst->regs[1].VFwrite; + else vfwrite |= 1 << itinst->regs[1].VFwrite; + } + if ((itinst->regs[1].VIwrite & (1 << REG_ACC_FLAG)) && itinst->regs[1].VFwxyzw != 0xf) + itinst->addvars[1] |= 1 << REG_ACC_FLAG; + + u32 viwrite = (itinst->regs[0].VIwrite | itinst->regs[1].VIwrite); + + itinst->usedvars[0] = itinst->addvars[0] | viwrite; + itinst->usedvars[1] = itinst->addvars[1] | vfwrite; + +// itinst->addvars[0] &= ~viwrite; +// itinst->addvars[1] &= ~vfwrite; + itinst->keepvars[0] = ~viwrite; + itinst->keepvars[1] = ~vfwrite; + } + } + + itinst = --pblock->insts.end(); + while (itinst != pblock->insts.begin()) + { + itnext = itinst; + --itnext; + + itnext->usedvars[0] |= itinst->usedvars[0]; + itnext->usedvars[1] |= itinst->usedvars[1]; + + itinst = itnext; + } +} + +u32 COMPUTE_LIVE(u32 R, u32 K, u32 L) +{ + u32 live = R | ((L) & (K)); + // special process mac and status flags + // only propagate liveness if doesn't write to the flag + if (!(L&(1 << REG_STATUS_FLAG)) && !(K&(1 << REG_STATUS_FLAG))) + live &= ~(1 << REG_STATUS_FLAG); + if (!(L&(1 << REG_MAC_FLAG)) && !(K&(1 << REG_MAC_FLAG))) + live &= ~(1 << REG_MAC_FLAG); + return live;//|(1<::reverse_iterator itblock; + list::iterator itinst, itnext; + VuBaseBlock::LISTBLOCKS::iterator itchild; + + u32 livevars[2]; + + do + { + changed = FALSE; + for (itblock = s_listBlocks.rbegin(); itblock != s_listBlocks.rend(); ++itblock) + { + + u32 newlive; + VuBaseBlock* pb = *itblock; + + // the last inst relies on the neighbor's insts + itinst = --pb->insts.end(); + + if (pb->blocks.size() > 0) + { + livevars[0] = 0; + livevars[1] = 0; + for (itchild = pb->blocks.begin(); itchild != pb->blocks.end(); ++itchild) + { + VuInstruction& front = (*itchild)->insts.front(); + livevars[0] |= front.livevars[0]; + livevars[1] |= front.livevars[1]; + } + + newlive = COMPUTE_LIVE(itinst->addvars[0], itinst->keepvars[0], livevars[0]); + + // should propagate status flags whose parent insts are not in this block +// if( itinst->nParentPc >= 0 && (itinst->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) ) +// newlive |= livevars[0]&((1<livevars[0] != newlive) + { + changed = TRUE; + itinst->livevars[0] = newlive; + } + + newlive = COMPUTE_LIVE(itinst->addvars[1], itinst->keepvars[1], livevars[1]); + if (itinst->livevars[1] != newlive) + { + changed = TRUE; + itinst->livevars[1] = newlive; + } + } + + while (itinst != pb->insts.begin()) + { + + itnext = itinst; + --itnext; + + newlive = COMPUTE_LIVE(itnext->addvars[0], itnext->keepvars[0], itinst->livevars[0]); + + // should propagate status flags whose parent insts are not in this block +// if( itnext->nParentPc >= 0 && (itnext->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) && !(itinst->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) ) +// newlive |= itinst->livevars[0]&((1<livevars[0] != newlive) + { + changed = TRUE; + itnext->livevars[0] = newlive; + itnext->livevars[1] = COMPUTE_LIVE(itnext->addvars[1], itnext->keepvars[1], itinst->livevars[1]); + } + else + { + newlive = COMPUTE_LIVE(itnext->addvars[1], itnext->keepvars[1], itinst->livevars[1]); + if (itnext->livevars[1] != newlive) + { + changed = TRUE; + itnext->livevars[1] = newlive; + } + } + + itinst = itnext; + } + +// if( (livevars[0] | itinst->livevars[0]) != itinst->livevars[0] ) { +// changed = TRUE; +// itinst->livevars[0] |= livevars[0]; +// } +// if( (livevars[1] | itinst->livevars[1]) != itinst->livevars[1] ) { +// changed = TRUE; +// itinst->livevars[1] |= livevars[1]; +// } +// +// while( itinst != pb->insts.begin() ) { +// +// itnext = itinst; --itnext; +// if( (itnext->livevars[0] | (itinst->livevars[0] & itnext->keepvars[0])) != itnext->livevars[0] ) { +// changed = TRUE; +// itnext->livevars[0] |= itinst->livevars[0] & itnext->keepvars[0]; +// itnext->livevars[1] |= itinst->livevars[1] & itnext->keepvars[1]; +// } +// else if( (itnext->livevars[1] | (itinst->livevars[1] & itnext->keepvars[1])) != itnext->livevars[1] ) { +// changed = TRUE; +// itnext->livevars[1] |= itinst->livevars[1] & itnext->keepvars[1]; +// } +// +// itinst = itnext; +// } + } + + } + while (changed); +} + +static void SuperVUEliminateDeadCode() +{ + list::iterator itblock; + VuBaseBlock::LISTBLOCKS::iterator itchild; + list::iterator itinst, itnext; + list listParents; + list::iterator itparent; + + for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) + { + +#ifdef _DEBUG + u32 startpc = (*itblock)->startpc; + u32 curpc = startpc; +#endif + + itnext = (*itblock)->insts.begin(); + itinst = itnext++; + while (itnext != (*itblock)->insts.end()) + { + if (itinst->type & (INST_CLIP_WRITE | INST_MAC_WRITE | INST_STATUS_WRITE)) + { + u32 live0 = itnext->livevars[0]; + if (itinst->nParentPc >= 0 && itnext->nParentPc >= 0 && itinst->nParentPc != itnext->nParentPc) // superman returns + { + // take the live vars from the next next inst + list::iterator itnextnext = itnext; + ++itnextnext; + if (itnextnext != (*itblock)->insts.end()) + { + live0 = itnextnext->livevars[0]; + } + } + + itinst->regs[0].VIwrite &= live0; + itinst->regs[1].VIwrite &= live0; + + u32 viwrite = itinst->regs[0].VIwrite | itinst->regs[1].VIwrite; + + (*itblock)->GetInstsAtPc(itinst->nParentPc, listParents); + int removetype = 0; + + for(itparent = listParents.begin(); itparent != listParents.end(); itparent++) + { + VuInstruction* parent = *itparent; + + if (viwrite & (1 << REG_CLIP_FLAG)) + { + parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_CLIP_FLAG)); + parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_CLIP_FLAG)); + } + else + removetype |= INST_CLIP_WRITE; + + if (parent->info.macflag && (itinst->type & INST_MAC_WRITE)) + { + if (!(viwrite&(1 << REG_MAC_FLAG))) + { + //parent->info.macflag = 0; + // parent->regs[0].VIwrite &= ~(1<regs[1].VIwrite &= ~(1<regs[0].VIwrite & (1 << REG_MAC_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_MAC_FLAG))); +#endif + // if VUPIPE_FMAC and destination is vf00, probably need to keep the mac flag + if (parent->regs[1].pipe == VUPIPE_FMAC && (parent->regs[1].VFwrite == 0 && !(parent->regs[1].VIwrite&(1 << REG_ACC_FLAG)))) + { + parent->regs[0].VIwrite |= ((1 << REG_MAC_FLAG)); + parent->regs[1].VIwrite |= ((1 << REG_MAC_FLAG)); + } + else + removetype |= INST_MAC_WRITE; + } + else + { + parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_MAC_FLAG)); + parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_MAC_FLAG)); + } + } + else removetype |= INST_MAC_WRITE; + + if (parent->info.statusflag && (itinst->type & INST_STATUS_WRITE)) + { + if (!(viwrite&(1 << REG_STATUS_FLAG))) + { + //parent->info.statusflag = 0; + // parent->regs[0].VIwrite &= ~(1<regs[1].VIwrite &= ~(1<regs[0].VIwrite & (1 << REG_STATUS_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_STATUS_FLAG))); +#endif + if (parent->regs[1].pipe == VUPIPE_FMAC && (parent->regs[1].VFwrite == 0 && !(parent->regs[1].VIwrite&(1 << REG_ACC_FLAG)))) + { + parent->regs[0].VIwrite |= ((1 << REG_STATUS_FLAG)); + parent->regs[1].VIwrite |= ((1 << REG_STATUS_FLAG)); + } + else + removetype |= INST_STATUS_WRITE; + } + else + { + parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_STATUS_FLAG)); + parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_STATUS_FLAG)); + } + } + else removetype |= INST_STATUS_WRITE; + } + + itinst->type &= ~removetype; + if (itinst->type == 0) + { + itnext = (*itblock)->insts.erase(itinst); + itinst = itnext++; + continue; + } + } +#ifdef _DEBUG + else + { + curpc += 8; + } +#endif + itinst = itnext; + ++itnext; + } + + if (itinst->type & INST_DUMMY) + { + // last inst with the children + u32 mask = 0; + for (itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); ++itchild) + { + mask |= (*itchild)->insts.front().livevars[0]; + } + itinst->regs[0].VIwrite &= mask; + itinst->regs[1].VIwrite &= mask; + u32 viwrite = itinst->regs[0].VIwrite | itinst->regs[1].VIwrite; + + if (itinst->nParentPc >= 0) + { + + (*itblock)->GetInstsAtPc(itinst->nParentPc, listParents); + int removetype = 0; + + for(itparent = listParents.begin(); itparent != listParents.end(); itparent++) + { + VuInstruction* parent = *itparent; + + if (viwrite & (1 << REG_CLIP_FLAG)) + { + parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_CLIP_FLAG)); + parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_CLIP_FLAG)); + } + else removetype |= INST_CLIP_WRITE; + + if (parent->info.macflag && (itinst->type & INST_MAC_WRITE)) + { + if (!(viwrite&(1 << REG_MAC_FLAG))) + { + //parent->info.macflag = 0; +#ifndef SUPERVU_WRITEBACKS + assert(!(parent->regs[0].VIwrite & (1 << REG_MAC_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_MAC_FLAG))); +#endif + removetype |= INST_MAC_WRITE; + } + else + { + parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_MAC_FLAG)); + parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_MAC_FLAG)); + } + } + else removetype |= INST_MAC_WRITE; + + if (parent->info.statusflag && (itinst->type & INST_STATUS_WRITE)) + { + if (!(viwrite&(1 << REG_STATUS_FLAG))) + { + //parent->info.statusflag = 0; +#ifndef SUPERVU_WRITEBACKS + assert(!(parent->regs[0].VIwrite & (1 << REG_STATUS_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_STATUS_FLAG))); +#endif + removetype |= INST_STATUS_WRITE; + } + else + { + parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_STATUS_FLAG)); + parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_STATUS_FLAG)); + } + } + else removetype |= INST_STATUS_WRITE; + } + + itinst->type &= ~removetype; + if (itinst->type == 0) + { + (*itblock)->insts.erase(itinst); + } + } + } + } +} + +void VuBaseBlock::AssignVFRegs() +{ + int i; + VuBaseBlock::LISTBLOCKS::iterator itchild; + list::iterator itblock; + list::iterator itinst, itnext, itinst2; + + // init the start regs + if (type & BLOCKTYPE_ANALYZED) return; // nothing changed + memcpy(xmmregs, startregs, sizeof(xmmregs)); + + if (type & BLOCKTYPE_ANALYZED) + { + // check if changed + for (i = 0; i < iREGCNT_XMM; ++i) + { + if (xmmregs[i].inuse != startregs[i].inuse) + break; + if (xmmregs[i].inuse && (xmmregs[i].reg != startregs[i].reg || xmmregs[i].type != startregs[i].type)) + break; + } + + if (i == iREGCNT_XMM) return; // nothing changed + } + + u8* oldX86 = x86Ptr; + + for(itinst = insts.begin(); itinst != insts.end(); itinst++) + { + + if (itinst->type & INST_DUMMY) continue; + + // reserve, go from upper to lower + int lastwrite = -1; + + for (i = 1; i >= 0; --i) + { + _VURegsNum* regs = itinst->regs + i; + + + // redo the counters so that the proper regs are released + for (int j = 0; j < iREGCNT_XMM; ++j) + { + if (xmmregs[j].inuse) + { + if (xmmregs[j].type == XMMTYPE_VFREG) + { + int count = 0; + itinst2 = itinst; + + if (i) + { + if (itinst2->regs[0].VFread0 == xmmregs[j].reg || itinst2->regs[0].VFread1 == xmmregs[j].reg || itinst2->regs[0].VFwrite == xmmregs[j].reg) + { + itinst2 = insts.end(); + break; + } + else + { + ++count; + ++itinst2; + } + } + + while (itinst2 != insts.end()) + { + if (itinst2->regs[0].VFread0 == xmmregs[j].reg || itinst2->regs[0].VFread1 == xmmregs[j].reg || itinst2->regs[0].VFwrite == xmmregs[j].reg || + itinst2->regs[1].VFread0 == xmmregs[j].reg || itinst2->regs[1].VFread1 == xmmregs[j].reg || itinst2->regs[1].VFwrite == xmmregs[j].reg) + break; + + ++count; + ++itinst2; + } + xmmregs[j].counter = 1000 - count; + } + else + { + assert(xmmregs[j].type == XMMTYPE_ACC); + + int count = 0; + itinst2 = itinst; + + if (i) ++itinst2; // acc isn't used in lower insts + + while (itinst2 != insts.end()) + { + assert(!((itinst2->regs[0].VIread | itinst2->regs[0].VIwrite) & (1 << REG_ACC_FLAG))); + + if ((itinst2->regs[1].VIread | itinst2->regs[1].VIwrite) & (1 << REG_ACC_FLAG)) + break; + + ++count; + ++itinst2; + } + + xmmregs[j].counter = 1000 - count; + } + } + } + + if (regs->VFread0) _addNeededVFtoXMMreg(regs->VFread0); + if (regs->VFread1) _addNeededVFtoXMMreg(regs->VFread1); + if (regs->VFwrite) _addNeededVFtoXMMreg(regs->VFwrite); + if (regs->VIread & (1 << REG_ACC_FLAG)) _addNeededACCtoXMMreg(); + if (regs->VIread & (1 << REG_VF0_FLAG)) _addNeededVFtoXMMreg(0); + + // alloc + itinst->vfread0[i] = itinst->vfread1[i] = itinst->vfwrite[i] = itinst->vfacc[i] = -1; + itinst->vfflush[i] = -1; + + if (regs->VFread0) + itinst->vfread0[i] = _allocVFtoXMMreg(VU, -1, regs->VFread0, 0); + else if (regs->VIread & (1 << REG_VF0_FLAG)) + itinst->vfread0[i] = _allocVFtoXMMreg(VU, -1, 0, 0); + + if (regs->VFread1) + itinst->vfread1[i] = _allocVFtoXMMreg(VU, -1, regs->VFread1, 0); + else if ((regs->VIread & (1 << REG_VF0_FLAG)) && regs->VFr1xyzw != 0xff) + itinst->vfread1[i] = _allocVFtoXMMreg(VU, -1, 0, 0); + + if (regs->VIread & (1 << REG_ACC_FLAG)) itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0); + + int reusereg = -1; // 0 - VFwrite, 1 - VFAcc + + if (regs->VFwrite) + { + assert(!(regs->VIwrite&(1 << REG_ACC_FLAG))); + + if (regs->VFwxyzw == 0xf) + { + itinst->vfwrite[i] = _checkXMMreg(XMMTYPE_VFREG, regs->VFwrite, 0); + if (itinst->vfwrite[i] < 0) reusereg = 0; + } + else + { + itinst->vfwrite[i] = _allocVFtoXMMreg(VU, -1, regs->VFwrite, 0); + } + } + else if (regs->VIwrite & (1 << REG_ACC_FLAG)) + { + + if (regs->VFwxyzw == 0xf) + { + itinst->vfacc[i] = _checkXMMreg(XMMTYPE_ACC, 0, 0); + if (itinst->vfacc[i] < 0) reusereg = 1; + } + else + { + itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0); + } + } + + if (reusereg >= 0) + { + // reuse + itnext = itinst; + itnext++; + + u8 type = reusereg ? XMMTYPE_ACC : XMMTYPE_VFREG; + u8 reg = reusereg ? 0 : regs->VFwrite; + + if (itinst->vfacc[i] >= 0 && lastwrite != itinst->vfacc[i] && + (itnext == insts.end() || ((regs->VIread&(1 << REG_ACC_FLAG)) && (!(itnext->usedvars[0]&(1 << REG_ACC_FLAG)) || !(itnext->livevars[0]&(1 << REG_ACC_FLAG)))))) + { + + assert(reusereg == 0); + if (itnext == insts.end() || (itnext->livevars[0]&(1 << REG_ACC_FLAG))) _freeXMMreg(itinst->vfacc[i]); + xmmregs[itinst->vfacc[i]].inuse = 1; + xmmregs[itinst->vfacc[i]].reg = reg; + xmmregs[itinst->vfacc[i]].type = type; + xmmregs[itinst->vfacc[i]].mode = 0; + itinst->vfwrite[i] = itinst->vfacc[i]; + } + else if (itinst->vfread0[i] >= 0 && lastwrite != itinst->vfread0[i] && + (itnext == insts.end() || (regs->VFread0 > 0 && (!(itnext->usedvars[1]&(1 << regs->VFread0)) || !(itnext->livevars[1]&(1 << regs->VFread0)))))) + { + + if (itnext == insts.end() || (itnext->livevars[1]®s->VFread0)) _freeXMMreg(itinst->vfread0[i]); + + xmmregs[itinst->vfread0[i]].inuse = 1; + xmmregs[itinst->vfread0[i]].reg = reg; + xmmregs[itinst->vfread0[i]].type = type; + xmmregs[itinst->vfread0[i]].mode = 0; + + if (reusereg) + itinst->vfacc[i] = itinst->vfread0[i]; + else + itinst->vfwrite[i] = itinst->vfread0[i]; + } + else if (itinst->vfread1[i] >= 0 && lastwrite != itinst->vfread1[i] && + (itnext == insts.end() || (regs->VFread1 > 0 && (!(itnext->usedvars[1]&(1 << regs->VFread1)) || !(itnext->livevars[1]&(1 << regs->VFread1)))))) + { + + if (itnext == insts.end() || (itnext->livevars[1]®s->VFread1)) _freeXMMreg(itinst->vfread1[i]); + + xmmregs[itinst->vfread1[i]].inuse = 1; + xmmregs[itinst->vfread1[i]].reg = reg; + xmmregs[itinst->vfread1[i]].type = type; + xmmregs[itinst->vfread1[i]].mode = 0; + if (reusereg) + itinst->vfacc[i] = itinst->vfread1[i]; + else + itinst->vfwrite[i] = itinst->vfread1[i]; + } + else + { + if (reusereg) + itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0); + else + itinst->vfwrite[i] = _allocVFtoXMMreg(VU, -1, regs->VFwrite, 0); + } + } + + if (itinst->vfwrite[i] >= 0) lastwrite = itinst->vfwrite[i]; + else if (itinst->vfacc[i] >= 0) lastwrite = itinst->vfacc[i]; + + // always alloc at least 1 temp reg + int free0 = (i || regs->VFwrite || regs->VFread0 || regs->VFread1 || (regs->VIwrite & (1 << REG_ACC_FLAG)) || (regs->VIread & (1 << REG_VF0_FLAG))) + ? _allocTempXMMreg(XMMT_FPS, -1) : -1; + int free1 = 0, free2 = 0; + + if (i == 0 && itinst->vfwrite[1] >= 0 && (itinst->vfread0[0] == itinst->vfwrite[1] || itinst->vfread1[0] == itinst->vfwrite[1])) + { + itinst->vfflush[i] = _allocTempXMMreg(XMMT_FPS, -1); + } + + if (i == 1 && (regs->VIwrite & (1 << REG_CLIP_FLAG))) + { + // CLIP inst, need two extra regs + if (free0 < 0) free0 = _allocTempXMMreg(XMMT_FPS, -1); + + free1 = _allocTempXMMreg(XMMT_FPS, -1); + free2 = _allocTempXMMreg(XMMT_FPS, -1); + _freeXMMreg(free1); + _freeXMMreg(free2); + } + else if (regs->VIwrite & (1 << REG_P)) + { + // EFU inst, need extra reg + free1 = _allocTempXMMreg(XMMT_FPS, -1); + if (free0 == -1) free0 = free1; + _freeXMMreg(free1); + } + + if (itinst->vfflush[i] >= 0) _freeXMMreg(itinst->vfflush[i]); + if (free0 >= 0) _freeXMMreg(free0); + + itinst->vffree[i] = (free0 & 0xf) | (free1 << 8) | (free2 << 16); + if (free0 == -1) itinst->vffree[i] |= VFFREE_INVALID0; + + _clearNeededXMMregs(); + } + } + + assert(x86Ptr == oldX86); + u32 analyzechildren = !(type & BLOCKTYPE_ANALYZED); + type |= BLOCKTYPE_ANALYZED; + + //memset(endregs, 0, sizeof(endregs)); + + if (analyzechildren) + { + for(itchild = blocks.begin(); itchild != blocks.end(); itchild++) + { + (*itchild)->AssignVFRegs(); + } + } +} + +struct MARKOVBLANKET +{ + list parents; + list children; +}; + +static MARKOVBLANKET s_markov; + +void VuBaseBlock::AssignVIRegs(int parent) +{ + const int maxregs = 6; + + if (parent) + { + if ((type&BLOCKTYPE_ANALYZEDPARENT)) + return; + + type |= BLOCKTYPE_ANALYZEDPARENT; + s_markov.parents.push_back(this); + for (LISTBLOCKS::iterator it = blocks.begin(); it != blocks.end(); ++it) + { + (*it)->AssignVIRegs(0); + } + return; + } + + if ((type&BLOCKTYPE_ANALYZED)) + return; + + // child + assert(allocX86Regs == -1); + allocX86Regs = s_vecRegArray.size(); + s_vecRegArray.resize(allocX86Regs + iREGCNT_GPR); + + _x86regs* pregs = &s_vecRegArray[allocX86Regs]; + memset(pregs, 0, sizeof(_x86regs)*iREGCNT_GPR); + + assert(parents.size() > 0); + + list::iterator itparent; + u32 usedvars = insts.front().usedvars[0]; + u32 livevars = insts.front().livevars[0]; + + if (parents.size() > 0) + { + u32 usedvars2 = 0xffffffff; + + for(itparent = parents.begin(); itparent != parents.end(); itparent++) + { + usedvars2 &= (*itparent)->insts.front().usedvars[0]; + } + + usedvars |= usedvars2; + } + + usedvars &= livevars; + + // currently order doesn't matter + int num = 0; + + if (usedvars) + { + for (int i = 1; i < 16; ++i) + { + if (usedvars & (1 << i)) + { + pregs[num].inuse = 1; + pregs[num].reg = i; + + livevars &= ~(1 << i); + + if (++num >= maxregs) break; + } + } + } + + if (num < maxregs) + { + livevars &= ~usedvars; + livevars &= insts.back().usedvars[0]; + + if (livevars) + { + for (int i = 1; i < 16; ++i) + { + if (livevars & (1 << i)) + { + pregs[num].inuse = 1; + pregs[num].reg = i; + + if (++num >= maxregs) break; + } + } + } + } + + s_markov.children.push_back(this); + type |= BLOCKTYPE_ANALYZED; + + for(itparent = parents.begin(); itparent != parents.end(); itparent++) + { + (*itparent)->AssignVIRegs(1); + } +} + +static void SuperVUAssignRegs() +{ + list::iterator itblock, itblock2; + + for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) + { + (*itblock)->type &= ~BLOCKTYPE_ANALYZED; + } + s_listBlocks.front()->AssignVFRegs(); + + // VI assignments, find markov blanket for each node in the graph + // then allocate regs based on the commonly used ones +#ifdef SUPERVU_X86CACHING + for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) + { + (*itblock)->type &= ~(BLOCKTYPE_ANALYZED | BLOCKTYPE_ANALYZEDPARENT); + } + s_vecRegArray.resize(0); + u8 usedregs[16]; + + // note: first block always has to start with no alloc regs + bool bfirst = true; + + for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) + { + + if (!((*itblock)->type & BLOCKTYPE_ANALYZED)) + { + + if ((*itblock)->parents.size() == 0) + { + (*itblock)->type |= BLOCKTYPE_ANALYZED; + bfirst = false; + continue; + } + + s_markov.children.clear(); + s_markov.parents.clear(); + (*itblock)->AssignVIRegs(0); + + // assign the regs + int regid = s_vecRegArray.size(); + s_vecRegArray.resize(regid + iREGCNT_GPR); + + _x86regs* mergedx86 = &s_vecRegArray[regid]; + memset(mergedx86, 0, sizeof(_x86regs)*iREGCNT_GPR); + + if (!bfirst) + { + *(u32*)usedregs = *((u32*)usedregs + 1) = *((u32*)usedregs + 2) = *((u32*)usedregs + 3) = 0; + + for(itblock2 = s_markov.children.begin(); itblock2 != s_markov.children.end(); itblock2++) + { + assert((*itblock2)->allocX86Regs >= 0); + _x86regs* pregs = &s_vecRegArray[(*itblock2)->allocX86Regs]; + for (int i = 0; i < iREGCNT_GPR; ++i) + { + if (pregs[i].inuse && pregs[i].reg < 16) + { + //assert( pregs[i].reg < 16); + usedregs[pregs[i].reg]++; + } + } + } + + int num = 1; + for (int i = 0; i < 16; ++i) + { + if (usedregs[i] == s_markov.children.size()) + { + // use + mergedx86[num].inuse = 1; + mergedx86[num].reg = i; + mergedx86[num].type = (s_vu ? X86TYPE_VU1 : 0) | X86TYPE_VI; + mergedx86[num].mode = MODE_READ; + if (++num >= iREGCNT_GPR) + break; + if (num == ESP) + ++num; + } + } + + for(itblock2 = s_markov.children.begin(); itblock2 != s_markov.children.end(); itblock2++) + { + assert((*itblock2)->nStartx86 == -1); + (*itblock2)->nStartx86 = regid; + } + + for(itblock2 = s_markov.parents.begin(); itblock2 != s_markov.parents.end(); itblock2++) + { + assert((*itblock2)->nEndx86 == -1); + (*itblock2)->nEndx86 = regid; + } + } + + bfirst = false; + } + } +#endif +} + +////////////////// +// Recompilation +////////////////// + +// cycles in which the last Q,P regs were finished (written to VU->VI[]) +// the write occurs before the instruction is executed at that cycle +// compare with s_TotalVUCycles +// if less than 0, already flushed +int s_writeQ, s_writeP; + +// declare the saved registers +uptr s_vu1esp, s_callstack;//, s_vu1esp +uptr s_vu1ebp, s_vuebx, s_vuedi, s_vu1esi; + +static int s_recWriteQ, s_recWriteP; // wait times during recompilation +static int s_needFlush; // first bit - Q, second bit - P, third bit - Q has been written, fourth bit - P has been written + +static int s_JumpX86; +static int s_ScheduleXGKICK = 0, s_XGKICKReg = -1; + +void recVUMI_XGKICK_(VURegs *VU); + +void SuperVUCleanupProgram(u32 startpc, int vuindex) +{ +#ifdef SUPERVU_COUNT + QueryPerformanceCounter(&svufinal); + svutime += (u32)(svufinal.QuadPart - svubase.QuadPart); +#endif + + assert(s_vu1esp == 0); + + VU = vuindex ? &VU1 : &VU0; + VU->cycle += s_TotalVUCycles; + + //VU cycle stealing hack, 3000 cycle maximum so it doesn't get out of hand + if (s_TotalVUCycles < 3000) + cpuRegs.cycle += s_TotalVUCycles * Config.Hacks.VUCycleSteal; + else + cpuRegs.cycle += 3000 * Config.Hacks.VUCycleSteal; + + if ((int)s_writeQ > 0) VU->VI[REG_Q] = VU->q; + if ((int)s_writeP > 0) + { + assert(VU == &VU1); + VU1.VI[REG_P] = VU1.p; // only VU1 + } + + //memset(recVUStack, 0, SUPERVU_STACKSIZE * 4); + + // Could clear allocation info to prevent possibly bad data being used in other parts of pcsx2; + // not doing this because it's slow and not needed (rama) + // _initXMMregs(); + // _initMMXregs(); + // _initX86regs(); +} + +#if defined(_MSC_VER) + +// entry point of all vu programs from emulator calls +__declspec(naked) void SuperVUExecuteProgram(u32 startpc, int vuindex) +{ + __asm + { + mov eax, dword ptr [esp] + mov s_TotalVUCycles, 0 // necessary to be here! + add esp, 4 + mov s_callstack, eax + call SuperVUGetProgram + + // save cpu state + mov s_vu1ebp, ebp + mov s_vu1esi, esi // have to save even in Release + mov s_vuedi, edi // have to save even in Release + mov s_vuebx, ebx + } +#ifdef _DEBUG + __asm + { + mov s_vu1esp, esp + } +#endif + + __asm + { + //stmxcsr s_ssecsr + ldmxcsr g_sseVUMXCSR + + // init vars + mov s_writeQ, 0xffffffff + mov s_writeP, 0xffffffff + + jmp eax + } +} + +// exit point of all vu programs +__declspec(naked) static void SuperVUEndProgram() +{ + __asm + { + // restore cpu state + ldmxcsr g_sseMXCSR + + mov ebp, s_vu1ebp + mov esi, s_vu1esi + mov edi, s_vuedi + mov ebx, s_vuebx + } + +#ifdef _DEBUG + __asm + { + sub s_vu1esp, esp + } +#endif + + __asm + { + call SuperVUCleanupProgram + jmp s_callstack // so returns correctly + } +} + +#endif + +// Flushes P/Q regs +void SuperVUFlush(int p, int wait) +{ + u8* pjmp[3]; + if (!(s_needFlush&(1 << p))) return; + + int recwait = p ? s_recWriteP : s_recWriteQ; + if (!wait && s_pCurInst->info.cycle < recwait) return; + + if (recwait == 0) + { + // write didn't happen this block + MOV32MtoR(EAX, p ? (uptr)&s_writeP : (uptr)&s_writeQ); + OR32RtoR(EAX, EAX); + pjmp[0] = JS8(0); + + if (s_pCurInst->info.cycle) SUB32ItoR(EAX, s_pCurInst->info.cycle); + + // if writeQ <= total+offset + if (!wait) // only write back if time is up + { + CMP32MtoR(EAX, (uptr)&s_TotalVUCycles); + pjmp[1] = JG8(0); + } + else + { + // add (writeQ-total-offset) to s_TotalVUCycles + // necessary? + CMP32MtoR(EAX, (uptr)&s_TotalVUCycles); + pjmp[2] = JLE8(0); + MOV32RtoM((uptr)&s_TotalVUCycles, EAX); + x86SetJ8(pjmp[2]); + } + } + else if (wait && s_pCurInst->info.cycle < recwait) + { + ADD32ItoM((uptr)&s_TotalVUCycles, recwait); + } + + MOV32MtoR(EAX, SuperVUGetVIAddr(p ? REG_P : REG_Q, 0)); + MOV32ItoM(p ? (uptr)&s_writeP : (uptr)&s_writeQ, 0x80000000); + MOV32RtoM(SuperVUGetVIAddr(p ? REG_P : REG_Q, 1), EAX); + + if (recwait == 0) + { + if (!wait) x86SetJ8(pjmp[1]); + x86SetJ8(pjmp[0]); + } + + if (wait || (!p && recwait == 0 && s_pCurInst->info.cycle >= 12) || (!p && recwait > 0 && s_pCurInst->info.cycle >= recwait)) + s_needFlush &= ~(1 << p); +} + +// executed only once per program +static u32* SuperVUStaticAlloc(u32 size) +{ + assert(recVUStackPtr + size <= recVUStack + SUPERVU_STACKSIZE); + // always zero + if (size == 4) *(u32*)recVUStackPtr = 0; + else memset(recVUStackPtr, 0, size); + recVUStackPtr += size; + return (u32*)(recVUStackPtr - size); +} + +static void SuperVURecompile() +{ + // save cpu state + recVUStackPtr = recVUStack; + + _initXMMregs(); + + list::iterator itblock; + + for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) + { + (*itblock)->type &= ~BLOCKTYPE_ANALYZED; + } + + s_listBlocks.front()->Recompile(); + + // make sure everything compiled + for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) + { + assert(((*itblock)->type & BLOCKTYPE_ANALYZED) && (*itblock)->pcode != NULL); + } + + // link all blocks + for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) + { + VuBaseBlock::LISTBLOCKS::iterator itchild; + + assert((*itblock)->blocks.size() <= ArraySize((*itblock)->pChildJumps)); + + int i = 0; + for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++) + { + + if ((u32)(uptr)(*itblock)->pChildJumps[i] == 0xffffffff) + continue; + + if ((*itblock)->pChildJumps[i] == NULL) + { + VuBaseBlock* pchild = *itchild; + + if (pchild->type & BLOCKTYPE_HASEOP) + { + assert(pchild->blocks.size() == 0); + + AND32ItoM((uptr)&VU0.VI[ REG_VPU_STAT ].UL, s_vu ? ~0x100 : ~0x001); // E flag + AND32ItoM((uptr)&VU->vifRegs->stat, ~0x4); + + MOV32ItoM((uptr)&VU->VI[REG_TPC], pchild->endpc); + JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5)); + } + // only other case is when there are two branches + else + { + assert((*itblock)->insts.back().regs[0].pipe == VUPIPE_BRANCH); + } + + continue; + } + + if ((u32)(uptr)(*itblock)->pChildJumps[i] & 0x80000000) + { + // relative + assert((uptr)(*itblock)->pChildJumps[i] <= 0xffffffff); + (*itblock)->pChildJumps[i] = (u32*)((uptr)(*itblock)->pChildJumps[i] & 0x7fffffff); + *(*itblock)->pChildJumps[i] = (uptr)(*itchild)->pcode - ((uptr)(*itblock)->pChildJumps[i] + 4); + } + else + { + *(*itblock)->pChildJumps[i] = (uptr)(*itchild)->pcode; + } + + ++i; + } + } + + s_pFnHeader->pprogfunc = s_listBlocks.front()->pcode; +} + +// debug + + +u32 s_saveecx, s_saveedx, s_saveebx, s_saveesi, s_saveedi, s_saveebp; +u32 g_curdebugvu; + +//float vuDouble(u32 f); + +#if defined(_MSC_VER) +__declspec(naked) static void svudispfn() +{ + __asm + { + mov g_curdebugvu, eax + mov s_saveecx, ecx + mov s_saveedx, edx + mov s_saveebx, ebx + mov s_saveesi, esi + mov s_saveedi, edi + mov s_saveebp, ebp + } +#else + +void svudispfntemp() +{ +#endif + +#ifdef _DEBUG + static u32 i; + + if (((vudump&8) && g_curdebugvu) || ((vudump&0x80) && !g_curdebugvu)) //&& g_vu1lastrec != g_vu1last ) { + { + + if (skipparent != g_vu1lastrec) + { + for (i = 0; i < ArraySize(badaddrs); ++i) + { + if (s_svulast == badaddrs[i][1] && g_vu1lastrec == badaddrs[i][0]) + break; + } + + if (i == ArraySize(badaddrs)) + { + //static int curesp; + //__asm mov curesp, esp + //Console::WriteLn("tVU: %x %x %x", s_svulast, s_vucount, s_vufnheader); + if (g_curdebugvu) iDumpVU1Registers(); + else iDumpVU0Registers(); + s_vucount++; + } + } + + g_vu1lastrec = s_svulast; + } +#endif + +#if defined(_MSC_VER) + __asm + { + mov ecx, s_saveecx + mov edx, s_saveedx + mov ebx, s_saveebx + mov esi, s_saveesi + mov edi, s_saveedi + mov ebp, s_saveebp + ret + } +#endif +} + +// frees all regs taking into account the livevars +void SuperVUFreeXMMregs(u32* livevars) +{ + for (int i = 0; i < iREGCNT_XMM; ++i) + { + if (xmmregs[i].inuse) + { + // same reg + if ((xmmregs[i].mode & MODE_WRITE)) + { + +#ifdef SUPERVU_INTERCACHING + if (xmmregs[i].type == XMMTYPE_VFREG) + { + if (!(livevars[1] & (1 << xmmregs[i].reg))) continue; + } + else if (xmmregs[i].type == XMMTYPE_ACC) + { + if (!(livevars[0] & (1 << REG_ACC_FLAG))) continue; + } +#endif + + if (xmmregs[i].mode & MODE_VUXYZ) + { + // ALWAYS update + u32 addr = xmmregs[i].type == XMMTYPE_VFREG ? (uptr) & VU->VF[xmmregs[i].reg] : (uptr) & VU->ACC; + + if (xmmregs[i].mode & MODE_VUZ) + { + SSE_MOVHPS_XMM_to_M64(addr, (x86SSERegType)i); + SSE_SHUFPS_M128_to_XMM((x86SSERegType)i, addr, 0xc4); + } + else + { + SSE_MOVHPS_M64_to_XMM((x86SSERegType)i, addr + 8); + } + + xmmregs[i].mode &= ~MODE_VUXYZ; + } + + _freeXMMreg(i); + } + } + } + + //_freeXMMregs(); +} + +void SuperVUTestVU0Condition(u32 incstack) +{ + if (s_vu && !SUPERVU_CHECKCONDITION) return; // vu0 only + + CMP32ItoM((uptr)&s_TotalVUCycles, 512); // sometimes games spin on vu0, so be careful with this value + // woody hangs if too high + + if (incstack) + { + u8* ptr = JB8(0); + + ADD32ItoR(ESP, incstack); + //CALLFunc((u32)timeout); + JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5)); + + x86SetJ8(ptr); + } + else JAE32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 6)); +} + +void VuBaseBlock::Recompile() +{ + if (type & BLOCKTYPE_ANALYZED) return; + + x86Align(16); + pcode = x86Ptr; + +#ifdef _DEBUG + MOV32ItoM((uptr)&s_vufnheader, s_pFnHeader->startpc); + MOV32ItoM((uptr)&VU->VI[REG_TPC], startpc); + MOV32ItoM((uptr)&s_svulast, startpc); + + list::iterator itparent; + for (itparent = parents.begin(); itparent != parents.end(); ++itparent) + { + if ((*itparent)->blocks.size() == 1 && (*itparent)->blocks.front()->startpc == startpc && + ((*itparent)->insts.size() < 2 || (----(*itparent)->insts.end())->regs[0].pipe != VUPIPE_BRANCH)) + { + MOV32ItoM((uptr)&skipparent, (*itparent)->startpc); + break; + } + } + + if (itparent == parents.end()) MOV32ItoM((uptr)&skipparent, -1); + + MOV32ItoR(EAX, s_vu); + CALLFunc((uptr)svudispfn); +#endif + + s_pCurBlock = this; + s_needFlush = 3; + pc = startpc; + branch = 0; + s_recWriteQ = s_recWriteP = 0; + s_XGKICKReg = -1; + s_ScheduleXGKICK = 0; + + s_ClipRead = s_PrevClipWrite = (uptr) & VU->VI[REG_CLIP_FLAG]; + s_StatusRead = s_PrevStatusWrite = (uptr) & VU->VI[REG_STATUS_FLAG]; + s_MACRead = s_PrevMACWrite = (uptr) & VU->VI[REG_MAC_FLAG]; + s_PrevIWrite = (uptr) & VU->VI[REG_I]; + s_JumpX86 = 0; + s_UnconditionalDelay = 0; + + memcpy(xmmregs, startregs, sizeof(xmmregs)); +#ifdef SUPERVU_X86CACHING + if (nStartx86 >= 0) + memcpy(x86regs, &s_vecRegArray[nStartx86], sizeof(x86regs)); + else + _initX86regs(); +#else + _initX86regs(); +#endif + + list::iterator itinst; + for(itinst = insts.begin(); itinst != insts.end(); itinst++) + { + s_pCurInst = &(*itinst); + if (s_JumpX86 > 0) + { + if (!x86regs[s_JumpX86].inuse) + { + // load + s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_READ); + } + x86regs[s_JumpX86].needed = 1; + } + + if (s_ScheduleXGKICK && s_XGKICKReg > 0) + { + assert(x86regs[s_XGKICKReg].inuse); + x86regs[s_XGKICKReg].needed = 1; + } + itinst->Recompile(itinst, vuxyz); + + if (s_ScheduleXGKICK > 0) + { + if (s_ScheduleXGKICK-- == 1) + { + recVUMI_XGKICK_(VU); + } + } + } + assert(pc == endpc); + assert(s_ScheduleXGKICK == 0); + + // flush flags + if (s_PrevClipWrite != (uptr)&VU->VI[REG_CLIP_FLAG]) + { + MOV32MtoR(EAX, s_PrevClipWrite); + MOV32RtoM((uptr)&VU->VI[REG_CLIP_FLAG], EAX); + } + if (s_PrevStatusWrite != (uptr)&VU->VI[REG_STATUS_FLAG]) + { + MOV32MtoR(EAX, s_PrevStatusWrite); + MOV32RtoM((uptr)&VU->VI[REG_STATUS_FLAG], EAX); + } + if (s_PrevMACWrite != (uptr)&VU->VI[REG_MAC_FLAG]) + { + MOV32MtoR(EAX, s_PrevMACWrite); + MOV32RtoM((uptr)&VU->VI[REG_MAC_FLAG], EAX); + } +// if( s_StatusRead != (uptr)&VU->VI[REG_STATUS_FLAG] ) { +// // only lower 8 bits valid! +// MOVZX32M8toR(EAX, s_StatusRead); +// MOV32RtoM((uptr)&VU->VI[REG_STATUS_FLAG], EAX); +// } +// if( s_MACRead != (uptr)&VU->VI[REG_MAC_FLAG] ) { +// // only lower 8 bits valid! +// MOVZX32M8toR(EAX, s_MACRead); +// MOV32RtoM((uptr)&VU->VI[REG_MAC_FLAG], EAX); +// } + if (s_PrevIWrite != (uptr)&VU->VI[REG_I]) + { + MOV32ItoM((uptr)&VU->VI[REG_I], *(u32*)s_PrevIWrite); // never changes + } + + ADD32ItoM((uptr)&s_TotalVUCycles, cycles); + + // compute branches, jumps, eop + if (type & BLOCKTYPE_HASEOP) + { + // end + _freeXMMregs(); + _freeX86regs(); + AND32ItoM((uptr)&VU0.VI[ REG_VPU_STAT ].UL, s_vu ? ~0x100 : ~0x001); // E flag + AND32ItoM((uptr)&VU->vifRegs->stat, ~0x4); + + if (!branch) MOV32ItoM((uptr)&VU->VI[REG_TPC], endpc); + + JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5)); + } + else + { + + u32 livevars[2] = {0}; + + list::iterator lastinst = GetInstIterAtPc(endpc - 8); + lastinst++; + + if (lastinst != insts.end()) + { + livevars[0] = lastinst->livevars[0]; + livevars[1] = lastinst->livevars[1]; + } + else + { + // take from children + if (blocks.size() > 0) + { + LISTBLOCKS::iterator itchild; + for(itchild = blocks.begin(); itchild != blocks.end(); itchild++) + { + livevars[0] |= (*itchild)->insts.front().livevars[0]; + livevars[1] |= (*itchild)->insts.front().livevars[1]; + } + } + else + { + livevars[0] = ~0; + livevars[1] = ~0; + } + } + + SuperVUFreeXMMregs(livevars); + + // get rid of any writes, otherwise _freeX86regs will write + x86regs[s_JumpX86].mode &= ~MODE_WRITE; + + if (branch == 1) + { + if (!x86regs[s_JumpX86].inuse) + { + assert(x86regs[s_JumpX86].type == X86TYPE_VUJUMP); + s_JumpX86 = 0xffffffff; // notify to jump from g_recWriteback + } + } + + // align VI regs +#ifdef SUPERVU_X86CACHING + if (nEndx86 >= 0) + { + _x86regs* endx86 = &s_vecRegArray[nEndx86]; + for (int i = 0; i < iREGCNT_GPR; ++i) + { + if (endx86[i].inuse) + { + + if (s_JumpX86 == i && x86regs[s_JumpX86].inuse) + { + x86regs[s_JumpX86].inuse = 0; + x86regs[EAX].inuse = 1; + MOV32RtoR(EAX, s_JumpX86); + s_JumpX86 = EAX; + } + + if (x86regs[i].inuse) + { + if (x86regs[i].type == endx86[i].type && x86regs[i].reg == endx86[i].reg) + { + _freeX86reg(i); + // will continue to use it + continue; + } + +#ifdef SUPERVU_INTERCACHING + if (x86regs[i].type == (X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0))) + { + if (livevars[0] & (1 << x86regs[i].reg)) + _freeX86reg(i); + else + x86regs[i].inuse = 0; + } + else +#endif + { + _freeX86reg(i); + } + } + + // realloc + _allocX86reg(i, endx86[i].type, endx86[i].reg, MODE_READ); + if (x86regs[i].mode & MODE_WRITE) + { + _freeX86reg(i); + x86regs[i].inuse = 1; + } + } + else _freeX86reg(i); + } + } + else _freeX86regs(); +#else + _freeX86regs(); +#endif + + // store the last block executed + MOV32ItoM((uptr)&g_nLastBlockExecuted, s_pCurBlock->startpc); + + switch (branch) + { + case 1: // branch, esi has new prog + + SuperVUTestVU0Condition(0); + + if (s_JumpX86 == 0xffffffff) + JMP32M((uptr)&g_recWriteback); + else + JMPR(s_JumpX86); + + break; + case 4: // jalr + pChildJumps[0] = (u32*)0xffffffff; + // fall through + + case 0x10: // jump, esi has new vupc + { + _freeXMMregs(); + _freeX86regs(); + + SuperVUTestVU0Condition(8); + + // already onto stack + CALLFunc((uptr)SuperVUGetProgram); + ADD32ItoR(ESP, 8); + JMPR(EAX); + break; + } + + case 0x13: // jr with uncon branch, uncond branch takes precendence (dropship) + { +// s32 delta = (s32)(VU->code & 0x400 ? 0xfffffc00 | (VU->code & 0x3ff) : VU->code & 0x3ff) << 3; +// ADD32ItoRmOffset(ESP, delta, 0); + + ADD32ItoR(ESP, 8); // restore + pChildJumps[0] = (u32*)((uptr)JMP32(0) | 0x80000000); + break; + } + case 0: + case 3: // unconditional branch + pChildJumps[s_UnconditionalDelay] = (u32*)((uptr)JMP32(0) | 0x80000000); + break; + + default: + DevCon::Error("Bad branch %x\n", params branch); + assert(0); + break; + } + } + + pendcode = x86Ptr; + type |= BLOCKTYPE_ANALYZED; + + LISTBLOCKS::iterator itchild; + for(itchild = blocks.begin(); itchild != blocks.end(); itchild++) + { + (*itchild)->Recompile(); + } +} + +#define GET_VUXYZMODE(reg) 0//((vuxyz&(1<<(reg)))?MODE_VUXYZ:0) + +int VuInstruction::SetCachedRegs(int upper, u32 vuxyz) +{ + if (vfread0[upper] >= 0) + { + SuperVUFreeXMMreg(vfread0[upper], XMMTYPE_VFREG, regs[upper].VFread0); + _allocVFtoXMMreg(VU, vfread0[upper], regs[upper].VFread0, MODE_READ | GET_VUXYZMODE(regs[upper].VFread0)); + } + if (vfread1[upper] >= 0) + { + SuperVUFreeXMMreg(vfread1[upper], XMMTYPE_VFREG, regs[upper].VFread1); + _allocVFtoXMMreg(VU, vfread1[upper], regs[upper].VFread1, MODE_READ | GET_VUXYZMODE(regs[upper].VFread1)); + } + if (vfacc[upper] >= 0 && (regs[upper].VIread&(1 << REG_ACC_FLAG))) + { + SuperVUFreeXMMreg(vfacc[upper], XMMTYPE_ACC, 0); + _allocACCtoXMMreg(VU, vfacc[upper], MODE_READ); + } + if (vfwrite[upper] >= 0) + { + assert(regs[upper].VFwrite > 0); + SuperVUFreeXMMreg(vfwrite[upper], XMMTYPE_VFREG, regs[upper].VFwrite); + _allocVFtoXMMreg(VU, vfwrite[upper], regs[upper].VFwrite, + MODE_WRITE | (regs[upper].VFwxyzw != 0xf ? MODE_READ : 0) | GET_VUXYZMODE(regs[upper].VFwrite)); + } + if (vfacc[upper] >= 0 && (regs[upper].VIwrite&(1 << REG_ACC_FLAG))) + { + SuperVUFreeXMMreg(vfacc[upper], XMMTYPE_ACC, 0); + _allocACCtoXMMreg(VU, vfacc[upper], MODE_WRITE | (regs[upper].VFwxyzw != 0xf ? MODE_READ : 0)); + } + + int info = PROCESS_VU_SUPER; + if (vfread0[upper] >= 0) info |= PROCESS_EE_SET_S(vfread0[upper]); + if (vfread1[upper] >= 0) info |= PROCESS_EE_SET_T(vfread1[upper]); + if (vfacc[upper] >= 0) info |= PROCESS_VU_SET_ACC(vfacc[upper]); + if (vfwrite[upper] >= 0) + { + if (regs[upper].VFwrite == _Ft_ && vfread1[upper] < 0) + { + info |= PROCESS_EE_SET_T(vfwrite[upper]); + } + else + { + assert(regs[upper].VFwrite == _Fd_); + info |= PROCESS_EE_SET_D(vfwrite[upper]); + } + } + + if (!(vffree[upper]&VFFREE_INVALID0)) + { + SuperVUFreeXMMreg(vffree[upper]&0xf, XMMTYPE_TEMP, 0); + _allocTempXMMreg(XMMT_FPS, vffree[upper]&0xf); + } + info |= PROCESS_VU_SET_TEMP(vffree[upper] & 0xf); + + if (vfflush[upper] >= 0) + { + SuperVUFreeXMMreg(vfflush[upper], XMMTYPE_TEMP, 0); + _allocTempXMMreg(XMMT_FPS, vfflush[upper]); + } + + if (upper && (regs[upper].VIwrite & (1 << REG_CLIP_FLAG))) + { + // CLIP inst, need two extra temp registers, put it EEREC_D and EEREC_ACC + assert(vfwrite[upper] == -1); + SuperVUFreeXMMreg((vffree[upper] >> 8)&0xf, XMMTYPE_TEMP, 0); + _allocTempXMMreg(XMMT_FPS, (vffree[upper] >> 8)&0xf); + info |= PROCESS_EE_SET_D((vffree[upper] >> 8) & 0xf); + + SuperVUFreeXMMreg((vffree[upper] >> 16)&0xf, XMMTYPE_TEMP, 0); + _allocTempXMMreg(XMMT_FPS, (vffree[upper] >> 16)&0xf); + info |= PROCESS_EE_SET_ACC((vffree[upper] >> 16) & 0xf); + + _freeXMMreg((vffree[upper] >> 8)&0xf); // don't need anymore + _freeXMMreg((vffree[upper] >> 16)&0xf); // don't need anymore + } + else if (regs[upper].VIwrite & (1 << REG_P)) + { + SuperVUFreeXMMreg((vffree[upper] >> 8)&0xf, XMMTYPE_TEMP, 0); + _allocTempXMMreg(XMMT_FPS, (vffree[upper] >> 8)&0xf); + info |= PROCESS_EE_SET_D((vffree[upper] >> 8) & 0xf); + _freeXMMreg((vffree[upper] >> 8)&0xf); // don't need anymore + } + + if (vfflush[upper] >= 0) _freeXMMreg(vfflush[upper]); + if (!(vffree[upper]&VFFREE_INVALID0)) + _freeXMMreg(vffree[upper]&0xf); // don't need anymore + + if ((regs[0].VIwrite | regs[1].VIwrite) & ((1 << REG_STATUS_FLAG) | (1 << REG_MAC_FLAG))) + info |= PROCESS_VU_UPDATEFLAGS; + + return info; +} + +void VuInstruction::Recompile(list::iterator& itinst, u32 vuxyz) +{ + //static PCSX2_ALIGNED16(VECTOR _VF); + //static PCSX2_ALIGNED16(VECTOR _VFc); + u32 *ptr; + u8* pjmp; + int vfregstore = 0; + + assert(s_pCurInst == this); + s_WriteToReadQ = 0; + + ptr = (u32*) & VU->Micro[ pc ]; + + if (type & INST_Q_READ) + SuperVUFlush(0, (ptr[0] == 0x800003bf) || !!(regs[0].VIwrite & (1 << REG_Q))); + if (type & INST_P_READ) + SuperVUFlush(1, (ptr[0] == 0x800007bf) || !!(regs[0].VIwrite & (1 << REG_P))); + + if (type & INST_DUMMY) + { + + // find nParentPc + VuInstruction* pparentinst = NULL; + + // if true, will check if parent block was executed before getting the results of the flags (superman returns) + int nParentCheckForExecution = -1; + +// int badaddrs[] = { +// 0x60,0x68,0x70,0x60,0x68,0x70,0x88,0x90,0x98,0x98,0xa8,0xb8,0x88,0x90, +// 0x4a8,0x4a8,0x398,0x3a0,0x3a8,0xa0 +// }; + +#ifdef SUPERVU_PROPAGATEFLAGS + if (nParentPc != -1 && (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc)) + { + +// if( !s_vu ) { +// for(int j = 0; j < ARRAYSIZE(badaddrs); ++j) { +// if( badaddrs[j] == nParentPc ) +// goto NoParent; +// } +// } + + list::iterator itblock; + for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++) + { + if (nParentPc >= (*itblock)->startpc && nParentPc < (*itblock)->endpc) + { + pparentinst = &(*(*itblock)->GetInstIterAtPc(nParentPc)); + //if( !s_vu ) SysPrintf("%x ", nParentPc); + if (find(s_pCurBlock->parents.begin(), s_pCurBlock->parents.end(), *itblock) != s_pCurBlock->parents.end()) + nParentCheckForExecution = (*itblock)->startpc; + break; + } + } + + assert(pparentinst != NULL); + } +#endif + + if (type & INST_CLIP_WRITE) + { + if (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc) + { + + if (!CHECK_VUCLIPFLAGHACK && pparentinst != NULL) + { + + if (nParentCheckForExecution >= 0) + { + if (pparentinst->pClipWrite == 0) + pparentinst->pClipWrite = (uptr)SuperVUStaticAlloc(4); + + if (s_ClipRead == 0) + s_ClipRead = (uptr) & VU->VI[REG_CLIP_FLAG]; + + CMP32ItoM((uptr)&g_nLastBlockExecuted, nParentCheckForExecution); + u8* jptr = JNE8(0); + CMP32ItoM((uptr)&s_ClipRead, (uptr)&VU->VI[REG_CLIP_FLAG]); + u8* jptr2 = JE8(0); + MOV32MtoR(EAX, pparentinst->pClipWrite); + MOV32RtoM(s_ClipRead, EAX); + x86SetJ8(jptr); + x86SetJ8(jptr2); + } + } + else s_ClipRead = (uptr) & VU->VI[REG_CLIP_FLAG]; + } + else + { + s_ClipRead = s_pCurBlock->GetInstIterAtPc(nParentPc)->pClipWrite; + if (s_ClipRead == 0) Console::WriteLn("super ClipRead allocation error!"); + } + } + + // before modifying, check if they will ever be read + if (s_pCurBlock->type & BLOCKTYPE_MACFLAGS) + { + + u8 outofblock = 0; + if (type & INST_STATUS_WRITE) + { + + if (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc) + { + + // reading from out of this block, so already flushed to mem + if (pparentinst != NULL) //&& pparentinst->pStatusWrite != NULL ) { + { + + // might not have processed it yet, so reserve a mem loc + if (pparentinst->pStatusWrite == 0) + { + pparentinst->pStatusWrite = (uptr)SuperVUStaticAlloc(4); + //MOV32ItoM(pparentinst->pStatusWrite, 0); + } + +// if( s_pCurBlock->prevFlagsOutOfBlock && s_StatusRead != NULL ) { +// // or instead since don't now which parent we came from +// MOV32MtoR(EAX, pparentinst->pStatusWrite); +// OR32RtoM(s_StatusRead, EAX); +// MOV32ItoM(pparentinst->pStatusWrite, 0); +// } + + if (nParentCheckForExecution >= 0) + { + + // don't now which parent we came from, so have to check +// uptr tempstatus = (uptr)SuperVUStaticAlloc(4); +// if( s_StatusRead != NULL ) +// MOV32MtoR(EAX, s_StatusRead); +// else +// MOV32MtoR(EAX, (uptr)&VU->VI[REG_STATUS_FLAG]); +// s_StatusRead = tempstatus; + + if (s_StatusRead == 0) + s_StatusRead = (uptr) & VU->VI[REG_STATUS_FLAG]; + + CMP32ItoM((uptr)&g_nLastBlockExecuted, nParentCheckForExecution); + u8* jptr = JNE8(0); + MOV32MtoR(EAX, pparentinst->pStatusWrite); + MOV32ItoM(pparentinst->pStatusWrite, 0); + MOV32RtoM(s_StatusRead, EAX); + x86SetJ8(jptr); + } + else + { + uptr tempstatus = (uptr)SuperVUStaticAlloc(4); + MOV32MtoR(EAX, pparentinst->pStatusWrite); + MOV32RtoM(tempstatus, EAX); + MOV32ItoM(pparentinst->pStatusWrite, 0); + s_StatusRead = tempstatus; + } + + outofblock = 2; + } + else + s_StatusRead = (uptr) & VU->VI[REG_STATUS_FLAG]; + } + else + { + s_StatusRead = s_pCurBlock->GetInstIterAtPc(nParentPc)->pStatusWrite; + if (s_StatusRead == 0) Console::WriteLn("super StatusRead allocation error!"); +// if( pc >= (u32)s_pCurBlock->endpc-8 ) { +// // towards the end, so variable might be leaded to another block (silent hill 4) +// uptr tempstatus = (uptr)SuperVUStaticAlloc(4); +// MOV32MtoR(EAX, s_StatusRead); +// MOV32RtoM(tempstatus, EAX); +// MOV32ItoM(s_StatusRead, 0); +// s_StatusRead = tempstatus; +// } + } + } + if (type & INST_MAC_WRITE) + { + + if (nParentPc < s_pCurBlock->startpc || nParentPc >= (int)pc) + { + // reading from out of this block, so already flushed to mem + + if (pparentinst != NULL) //&& pparentinst->pMACWrite != NULL ) { + { + // necessary for (katamari) + // towards the end, so variable might be leaked to another block (silent hill 4) + + // might not have processed it yet, so reserve a mem loc + if (pparentinst->pMACWrite == 0) + { + pparentinst->pMACWrite = (uptr)SuperVUStaticAlloc(4); + //MOV32ItoM(pparentinst->pMACWrite, 0); + } + +// if( s_pCurBlock->prevFlagsOutOfBlock && s_MACRead != NULL ) { +// // or instead since don't now which parent we came from +// MOV32MtoR(EAX, pparentinst->pMACWrite); +// OR32RtoM(s_MACRead, EAX); +// MOV32ItoM(pparentinst->pMACWrite, 0); +// } + if (nParentCheckForExecution >= 0) + { + + // don't now which parent we came from, so have to check +// uptr tempmac = (uptr)SuperVUStaticAlloc(4); +// if( s_MACRead != NULL ) +// MOV32MtoR(EAX, s_MACRead); +// else +// MOV32MtoR(EAX, (uptr)&VU->VI[REG_MAC_FLAG]); +// s_MACRead = tempmac; + + if (s_MACRead == 0) s_MACRead = (uptr) & VU->VI[REG_MAC_FLAG]; + + CMP32ItoM((uptr)&g_nLastBlockExecuted, nParentCheckForExecution); + u8* jptr = JNE8(0); + MOV32MtoR(EAX, pparentinst->pMACWrite); + MOV32ItoM(pparentinst->pMACWrite, 0); + MOV32RtoM(s_MACRead, EAX); + x86SetJ8(jptr); + } + else + { + uptr tempMAC = (uptr)SuperVUStaticAlloc(4); + MOV32MtoR(EAX, pparentinst->pMACWrite); + MOV32RtoM(tempMAC, EAX); + MOV32ItoM(pparentinst->pMACWrite, 0); + s_MACRead = tempMAC; + } + + outofblock = 2; + } + else + s_MACRead = (uptr) & VU->VI[REG_MAC_FLAG]; + +// if( pc >= (u32)s_pCurBlock->endpc-8 ) { +// // towards the end, so variable might be leaked to another block (silent hill 4) +// uptr tempMAC = (uptr)SuperVUStaticAlloc(4); +// MOV32MtoR(EAX, s_MACRead); +// MOV32RtoM(tempMAC, EAX); +// MOV32ItoM(s_MACRead, 0); +// s_MACRead = tempMAC; +// } + } + else + { + s_MACRead = s_pCurBlock->GetInstIterAtPc(nParentPc)->pMACWrite; + } + } + + s_pCurBlock->prevFlagsOutOfBlock = outofblock; + } + else if (pparentinst != NULL) + { + // make sure to reset the mac and status flags! (katamari) + if (pparentinst->pStatusWrite) + MOV32ItoM(pparentinst->pStatusWrite, 0); + if (pparentinst->pMACWrite) + MOV32ItoM(pparentinst->pMACWrite, 0); + } + + assert(s_ClipRead != 0); + assert(s_MACRead != 0); + assert(s_StatusRead != 0); + + return; + } + + s_pCurBlock->prevFlagsOutOfBlock = 0; + +#ifdef _DEBUG + MOV32ItoR(EAX, pc); +#endif + + assert(!(type & (INST_CLIP_WRITE | INST_STATUS_WRITE | INST_MAC_WRITE))); + pc += 8; + + list::const_iterator itinst2; + + if ((regs[0].VIwrite | regs[1].VIwrite) & ((1 << REG_MAC_FLAG) | (1 << REG_STATUS_FLAG))) + { + if (s_pCurBlock->type & BLOCKTYPE_MACFLAGS) + { + if (pMACWrite == 0) + { + pMACWrite = (uptr)SuperVUStaticAlloc(4); + //MOV32ItoM(pMACWrite, 0); + } + if (pStatusWrite == 0) + { + pStatusWrite = (uptr)SuperVUStaticAlloc(4); + //MOV32ItoM(pStatusWrite, 0); + } + } + else + { + assert(s_StatusRead == (uptr)&VU->VI[REG_STATUS_FLAG]); + assert(s_MACRead == (uptr)&VU->VI[REG_MAC_FLAG]); + pMACWrite = s_MACRead; + pStatusWrite = s_StatusRead; + } + } + + if ((pClipWrite == 0) && ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_CLIP_FLAG))) + { + pClipWrite = (uptr)SuperVUStaticAlloc(4); + //MOV32ItoM(pClipWrite, 0); + } + +#ifdef SUPERVU_X86CACHING + // redo the counters so that the proper regs are released + for (int j = 0; j < iREGCNT_GPR; ++j) + { + if (x86regs[j].inuse && X86_ISVI(x86regs[j].type)) + { + int count = 0; + itinst2 = itinst; + + while (itinst2 != s_pCurBlock->insts.end()) + { + if ((itinst2->regs[0].VIread | itinst2->regs[0].VIwrite | itinst2->regs[1].VIread | itinst2->regs[1].VIwrite) && (1 << x86regs[j].reg)) + break; + + ++count; + ++itinst2; + } + + x86regs[j].counter = 1000 - count; + } + } +#endif + + if (s_vu == 0 && (ptr[1] & 0x20000000)) // M flag + { + OR8ItoM((uptr)&VU->flags, VUFLAG_MFLAGSET); + } + if (ptr[1] & 0x10000000) // D flag + { + TEST32ItoM((uptr)&VU0.VI[REG_FBRST].UL, s_vu ? 0x400 : 0x004); + u8* ptr = JZ8(0); + OR32ItoM((uptr)&VU0.VI[REG_VPU_STAT].UL, s_vu ? 0x200 : 0x002); + _callFunctionArg1((uptr)hwIntcIrq, MEM_CONSTTAG, s_vu ? INTC_VU1 : INTC_VU0); + x86SetJ8(ptr); + } + if (ptr[1] & 0x08000000) // T flag + { + TEST32ItoM((uptr)&VU0.VI[REG_FBRST].UL, s_vu ? 0x800 : 0x008); + u8* ptr = JZ8(0); + OR32ItoM((uptr)&VU0.VI[REG_VPU_STAT].UL, s_vu ? 0x400 : 0x004); + _callFunctionArg1((uptr)hwIntcIrq, MEM_CONSTTAG, s_vu ? INTC_VU1 : INTC_VU0); + x86SetJ8(ptr); + } + + // check upper flags + if (ptr[1] & 0x80000000) // I flag + { + + assert(!(regs[0].VIwrite & ((1 << REG_Q) | (1 << REG_P)))); + + VU->code = ptr[1]; + s_vuInfo = SetCachedRegs(1, vuxyz); + if (s_JumpX86 > 0) x86regs[s_JumpX86].needed = 1; + if (s_ScheduleXGKICK && s_XGKICKReg > 0) x86regs[s_XGKICKReg].needed = 1; + + recVU_UPPER_OPCODE[ VU->code & 0x3f ](VU, s_vuInfo); + + s_PrevIWrite = (uptr)ptr; + _clearNeededXMMregs(); + _clearNeededX86regs(); + } + else + { + if (regs[0].VIwrite & (1 << REG_Q)) + { + + // search for all the insts between this inst and writeback + itinst2 = itinst; + ++itinst2; + u32 cacheq = (itinst2 == s_pCurBlock->insts.end()); + u32* codeptr2 = ptr + 2; + + while (itinst2 != s_pCurBlock->insts.end()) + { + if (!(itinst2->type & INST_DUMMY) && ((itinst2->regs[0].VIwrite&(1 << REG_Q)) || codeptr2[0] == 0x800003bf)) // waitq, or fdiv inst + { + break; + } + if ((itinst2->type & INST_Q_WRITE) && itinst2->nParentPc == pc - 8) + { + break; + } + if (itinst2->type & INST_Q_READ) + { + cacheq = 1; + break; + } + if (itinst2->type & INST_DUMMY) + { + ++itinst2; + continue; + } + codeptr2 += 2; + ++itinst2; + } + + if (itinst2 == s_pCurBlock->insts.end()) + cacheq = 1; + + int x86temp = -1; + if (cacheq) + x86temp = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); + + // new is written so flush old + // if type & INST_Q_READ, already flushed + if (!(type & INST_Q_READ) && s_recWriteQ == 0) MOV32MtoR(EAX, (uptr)&s_writeQ); + + if (cacheq) + MOV32MtoR(x86temp, (uptr)&s_TotalVUCycles); + + if (!(type & INST_Q_READ)) + { + if (s_recWriteQ == 0) + { + OR32RtoR(EAX, EAX); + pjmp = JS8(0); + MOV32MtoR(EAX, SuperVUGetVIAddr(REG_Q, 0)); + MOV32RtoM(SuperVUGetVIAddr(REG_Q, 1), EAX); + x86SetJ8(pjmp); + } + else if (s_needFlush & 1) + { + MOV32MtoR(EAX, SuperVUGetVIAddr(REG_Q, 0)); + MOV32RtoM(SuperVUGetVIAddr(REG_Q, 1), EAX); + s_needFlush &= ~1; + } + } + + // write new Q + if (cacheq) + { + assert(s_pCurInst->pqcycles > 1); + ADD32ItoR(x86temp, s_pCurInst->info.cycle + s_pCurInst->pqcycles); + MOV32RtoM((uptr)&s_writeQ, x86temp); + s_needFlush |= 1; + } + else + { + // won't be writing back + s_WriteToReadQ = 1; + s_needFlush &= ~1; + MOV32ItoM((uptr)&s_writeQ, 0x80000001); + } + + s_recWriteQ = s_pCurInst->info.cycle + s_pCurInst->pqcycles; + + if (x86temp >= 0) + _freeX86reg(x86temp); + } + + if (regs[0].VIwrite & (1 << REG_P)) + { + int x86temp = _allocX86reg(-1, X86TYPE_TEMP, 0, 0); + + // new is written so flush old + if (!(type & INST_P_READ) && s_recWriteP == 0) + MOV32MtoR(EAX, (uptr)&s_writeP); + MOV32MtoR(x86temp, (uptr)&s_TotalVUCycles); + + if (!(type & INST_P_READ)) + { + if (s_recWriteP == 0) + { + OR32RtoR(EAX, EAX); + pjmp = JS8(0); + MOV32MtoR(EAX, SuperVUGetVIAddr(REG_P, 0)); + MOV32RtoM(SuperVUGetVIAddr(REG_P, 1), EAX); + x86SetJ8(pjmp); + } + else if (s_needFlush & 2) + { + MOV32MtoR(EAX, SuperVUGetVIAddr(REG_P, 0)); + MOV32RtoM(SuperVUGetVIAddr(REG_P, 1), EAX); + s_needFlush &= ~2; + } + } + + // write new P + assert(s_pCurInst->pqcycles > 1); + ADD32ItoR(x86temp, s_pCurInst->info.cycle + s_pCurInst->pqcycles); + MOV32RtoM((uptr)&s_writeP, x86temp); + s_needFlush |= 2; + + s_recWriteP = s_pCurInst->info.cycle + s_pCurInst->pqcycles; + + _freeX86reg(x86temp); + } + + // waitq + if (ptr[0] == 0x800003bf) SuperVUFlush(0, 1); + // waitp + if (ptr[0] == 0x800007bf) SuperVUFlush(1, 1); + +#ifdef PCSX2_DEVBUILD + if (regs[1].VIread & regs[0].VIwrite & ~((1 << REG_Q) | (1 << REG_P) | (1 << REG_VF0_FLAG) | (1 << REG_ACC_FLAG))) + { + Console::Notice("*PCSX2*: Warning, VI write to the same reg %x in both lower/upper cycle %x", params regs[1].VIread & regs[0].VIwrite, s_pCurBlock->startpc); + } +#endif + + u32 modewrite = 0; + if (vfwrite[1] >= 0 && xmmregs[vfwrite[1]].inuse && xmmregs[vfwrite[1]].type == XMMTYPE_VFREG && xmmregs[vfwrite[1]].reg == regs[1].VFwrite) + modewrite = xmmregs[vfwrite[1]].mode & MODE_WRITE; + + VU->code = ptr[1]; + s_vuInfo = SetCachedRegs(1, vuxyz); + + if (vfwrite[1] >= 0) + { + assert(regs[1].VFwrite > 0); + + if (vfwrite[0] == vfwrite[1]) + { + //Console::WriteLn("*PCSX2*: Warning, VF write to the same reg in both lower/upper cycle %x", params s_pCurBlock->startpc); + } + + if (vfread0[0] == vfwrite[1] || vfread1[0] == vfwrite[1]) + { + assert(regs[0].VFread0 == regs[1].VFwrite || regs[0].VFread1 == regs[1].VFwrite); + assert(vfflush[0] >= 0); + if (modewrite) + { + SSE_MOVAPS_XMM_to_M128((uptr)&VU->VF[regs[1].VFwrite], (x86SSERegType)vfwrite[1]); + } + vfregstore = 1; + } + } + + if (s_JumpX86 > 0) x86regs[s_JumpX86].needed = 1; + if (s_ScheduleXGKICK && s_XGKICKReg > 0) x86regs[s_XGKICKReg].needed = 1; + + recVU_UPPER_OPCODE[ VU->code & 0x3f ](VU, s_vuInfo); + _clearNeededXMMregs(); + _clearNeededX86regs(); + + // necessary because status can be set by both upper and lower + if (regs[1].VIwrite & (1 << REG_STATUS_FLAG)) + { + assert(pStatusWrite != 0); + s_PrevStatusWrite = pStatusWrite; + } + + VU->code = ptr[0]; + s_vuInfo = SetCachedRegs(0, vuxyz); + + if (vfregstore) + { + // load + SSE_MOVAPS_M128_to_XMM(vfflush[0], (uptr)&VU->VF[regs[1].VFwrite]); + + assert(xmmregs[vfwrite[1]].mode & MODE_WRITE); + + // replace with vfflush + if (_Fs_ == regs[1].VFwrite) + { + s_vuInfo &= ~PROCESS_EE_SET_S(0xf); + s_vuInfo |= PROCESS_EE_SET_S(vfflush[0]); + } + if (_Ft_ == regs[1].VFwrite) + { + s_vuInfo &= ~PROCESS_EE_SET_T(0xf); + s_vuInfo |= PROCESS_EE_SET_T(vfflush[0]); + } + + xmmregs[vfflush[0]].mode |= MODE_NOFLUSH | MODE_WRITE; // so that lower inst doesn't flush + } + + // notify vuinsts that upper inst is a fmac + if (regs[1].pipe == VUPIPE_FMAC) + s_vuInfo |= PROCESS_VU_SET_FMAC(); + + if (s_JumpX86 > 0) x86regs[s_JumpX86].needed = 1; + if (s_ScheduleXGKICK && s_XGKICKReg > 0) x86regs[s_XGKICKReg].needed = 1; + +#ifdef SUPERVU_VIBRANCHDELAY + if (type & INST_CACHE_VI) + { + assert(vicached >= 0); + int cachedreg = _allocX86reg(-1, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), vicached, MODE_READ); + MOV32RtoM((uptr)&s_VIBranchDelay, cachedreg); + } +#endif + + // check if inst before branch and the write is the same as the read in the branch (wipeout) +// int oldreg=0; +// if( pc == s_pCurBlock->endpc-16 ) { +// itinst2 = itinst; ++itinst2; +// if( itinst2->regs[0].pipe == VUPIPE_BRANCH && (itinst->regs[0].VIwrite&itinst2->regs[0].VIread) ) { +// +// CALLFunc((u32)branchfn); +// assert( itinst->regs[0].VIwrite & 0xffff ); +// Console::WriteLn("vi write before branch"); +// for(s_CacheVIReg = 0; s_CacheVIReg < 16; ++s_CacheVIReg) { +// if( itinst->regs[0].VIwrite & (1<endpc-8 && s_CacheVIReg >= 0 ) { +// assert( s_CacheVIX86 > 0 && x86regs[s_CacheVIX86].inuse && x86regs[s_CacheVIX86].reg == s_CacheVIReg && x86regs[s_CacheVIX86].type == X86TYPE_VITEMP ); +// +// oldreg = _allocX86reg(-1, X86TYPE_VI|(s_vu?X86TYPE_VU1:0), s_CacheVIReg, MODE_READ); +// x86regs[s_CacheVIX86].needed = 1; +// assert( x86regs[oldreg].mode & MODE_WRITE ); +// +// x86regs[s_CacheVIX86].type = X86TYPE_VI|(s_vu?X86TYPE_VU1:0); +// x86regs[oldreg].type = X86TYPE_VITEMP; +// } + + recVU_LOWER_OPCODE[ VU->code >> 25 ](VU, s_vuInfo); + +// if( pc == s_pCurBlock->endpc-8 && s_CacheVIReg >= 0 ) { +// // revert +// x86regs[s_CacheVIX86].inuse = 0; +// x86regs[oldreg].type = X86TYPE_VI|(s_vu?X86TYPE_VU1:0); +// } + + _clearNeededXMMregs(); + _clearNeededX86regs(); + } + + // clip is always written so ok + if ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_CLIP_FLAG)) + { + assert(pClipWrite != 0); + s_PrevClipWrite = pClipWrite; + } + + if ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_STATUS_FLAG)) + { + assert(pStatusWrite != 0); + s_PrevStatusWrite = pStatusWrite; + } + + if ((regs[0].VIwrite | regs[1].VIwrite) & (1 << REG_MAC_FLAG)) + { + assert(pMACWrite != 0); + s_PrevMACWrite = pMACWrite; + } +} + +/////////////////////////////////// +// Super VU Recompilation Tables // +/////////////////////////////////// + +void recVUMI_BranchHandle() +{ + int bpc = _recbranchAddr(VU->code); + int curjump = 0; + + if (s_pCurInst->type & INST_BRANCH_DELAY) + { + assert((branch&0x17) != 0x10 && (branch&0x17) != 4); // no jump handlig for now + + if ((branch & 0x7) == 3) + { + // previous was a direct jump + curjump = 1; + } + else if (branch & 1) curjump = 2; + } + + assert(s_JumpX86 > 0); + + if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) + MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc); + MOV32ItoR(s_JumpX86, 1); // use 1 to disable optimization to XOR + s_pCurBlock->pChildJumps[curjump] = (u32*)x86Ptr - 1; + + if (!(s_pCurInst->type & INST_BRANCH_DELAY)) + { + j8Ptr[1] = JMP8(0); + x86SetJ8(j8Ptr[ 0 ]); + + if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) + MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), pc + 8); + MOV32ItoR(s_JumpX86, 1); // use 1 to disable optimization to XOR + s_pCurBlock->pChildJumps[curjump+1] = (u32*)x86Ptr - 1; + + x86SetJ8(j8Ptr[ 1 ]); + } + else + x86SetJ8(j8Ptr[ 0 ]); + + branch |= 1; +} + +// supervu specific insts +void recVUMI_IBQ_prep() +{ + int isreg, itreg; + + if (_Is_ == 0) + { +#ifdef SUPERVU_VIBRANCHDELAY + if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _It_) + { + itreg = -1; + } + else +#endif + { + itreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _It_, MODE_READ); + } + + s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); + + if (itreg >= 0) + { + CMP16ItoR(itreg, 0); + } + else CMP16ItoM(SuperVUGetVIAddr(_It_, 1), 0); + } + else if (_It_ == 0) + { +#ifdef SUPERVU_VIBRANCHDELAY + if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _Is_) + { + isreg = -1; + } + else +#endif + { + isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); + } + + s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); + + if (isreg >= 0) + { + CMP16ItoR(isreg, 0); + } + else CMP16ItoM(SuperVUGetVIAddr(_Is_, 1), 0); + + } + else + { + _addNeededX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _It_); + +#ifdef SUPERVU_VIBRANCHDELAY + if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _Is_) + { + isreg = -1; + } + else +#endif + { + isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); + } + +#ifdef SUPERVU_VIBRANCHDELAY + if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _It_) + { + itreg = -1; + + if (isreg <= 0) + { + // allocate fsreg + if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _Is_) + { + isreg = _allocX86reg(-1, X86TYPE_TEMP, 0, MODE_READ | MODE_WRITE); + MOV32MtoR(isreg, SuperVUGetVIAddr(_Is_, 1)); + } + else + isreg = _allocX86reg(-1, X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); + } + } + else +#endif + { + itreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _It_, MODE_READ); + } + + s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); + + if (isreg >= 0) + { + if (itreg >= 0) + { + CMP16RtoR(isreg, itreg); + } + else CMP16MtoR(isreg, SuperVUGetVIAddr(_It_, 1)); + } + else if (itreg >= 0) + { + CMP16MtoR(itreg, SuperVUGetVIAddr(_Is_, 1)); + } + else + { + isreg = _allocX86reg(-1, X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); + CMP16MtoR(isreg, SuperVUGetVIAddr(_It_, 1)); + } + } +} + +void recVUMI_IBEQ(VURegs* vuu, s32 info) +{ + recVUMI_IBQ_prep(); + j8Ptr[ 0 ] = JNE8(0); + recVUMI_BranchHandle(); +} + +void recVUMI_IBGEZ(VURegs* vuu, s32 info) +{ + int isreg; + s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); + +#ifdef SUPERVU_VIBRANCHDELAY + if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _Is_) + { + isreg = -1; + } + else +#endif + { + isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); + } + + if (isreg >= 0) + { + TEST16RtoR(isreg, isreg); + j8Ptr[ 0 ] = JS8(0); + } + else + { + CMP16ItoM(SuperVUGetVIAddr(_Is_, 1), 0x0); + j8Ptr[ 0 ] = JL8(0); + } + + recVUMI_BranchHandle(); +} + +void recVUMI_IBGTZ(VURegs* vuu, s32 info) +{ + int isreg; + s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); + +#ifdef SUPERVU_VIBRANCHDELAY + if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _Is_) + { + isreg = -1; + } + else +#endif + { + isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); + } + + if (isreg >= 0) + { + CMP16ItoR(isreg, 0); + j8Ptr[ 0 ] = JLE8(0); + } + else + { + CMP16ItoM(SuperVUGetVIAddr(_Is_, 1), 0x0); + j8Ptr[ 0 ] = JLE8(0); + } + recVUMI_BranchHandle(); +} + +void recVUMI_IBLEZ(VURegs* vuu, s32 info) +{ + int isreg; + s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); + +#ifdef SUPERVU_VIBRANCHDELAY + if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _Is_) + { + isreg = -1; + } + else +#endif + { + isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); + } + + if (isreg >= 0) + { + CMP16ItoR(isreg, 0); + j8Ptr[ 0 ] = JG8(0); + } + else + { + CMP16ItoM(SuperVUGetVIAddr(_Is_, 1), 0x0); + j8Ptr[ 0 ] = JG8(0); + } + recVUMI_BranchHandle(); +} + +void recVUMI_IBLTZ(VURegs* vuu, s32 info) +{ + int isreg; + s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); + +#ifdef SUPERVU_VIBRANCHDELAY + if (s_pCurInst->vicached >= 0 && s_pCurInst->vicached == _Is_) + { + isreg = -1; + } + else +#endif + { + isreg = _checkX86reg(X86TYPE_VI | (VU == &VU1 ? X86TYPE_VU1 : 0), _Is_, MODE_READ); + } + + if (isreg >= 0) + { + TEST16RtoR(isreg, isreg); + j8Ptr[ 0 ] = JNS8(0); + } + else + { + CMP16ItoM(SuperVUGetVIAddr(_Is_, 1), 0x0); + j8Ptr[ 0 ] = JGE8(0); + } + recVUMI_BranchHandle(); +} + +void recVUMI_IBNE(VURegs* vuu, s32 info) +{ + recVUMI_IBQ_prep(); + j8Ptr[ 0 ] = JE8(0); + recVUMI_BranchHandle(); +} + +void recVUMI_B(VURegs* vuu, s32 info) +{ + // supervu will take care of the rest + int bpc = _recbranchAddr(VU->code); + if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) + MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc); + + // loops to self, so check condition + if (bpc == s_pCurBlock->startpc && (s_vu == 0 || SUPERVU_CHECKCONDITION)) + { + SuperVUTestVU0Condition(0); + } + + if (s_pCurBlock->blocks.size() > 1) + { + s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); + MOV32ItoR(s_JumpX86, 1); + s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr - 1; + s_UnconditionalDelay = 1; + } + + branch |= 3; +} + +void recVUMI_BAL(VURegs* vuu, s32 info) +{ + int bpc = _recbranchAddr(VU->code); + if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0 || SUPERVU_CHECKCONDITION) + MOV32ItoM(SuperVUGetVIAddr(REG_TPC, 0), bpc); + + // loops to self, so check condition + if (bpc == s_pCurBlock->startpc && (s_vu == 0 || SUPERVU_CHECKCONDITION)) + { + SuperVUTestVU0Condition(0); + } + + if (_It_) + { + _deleteX86reg(X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _It_, 2); + MOV16ItoM(SuperVUGetVIAddr(_It_, 0), (pc + 8) >> 3); + } + + if (s_pCurBlock->blocks.size() > 1) + { + s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_WRITE); + MOV32ItoR(s_JumpX86, 1); + s_pCurBlock->pChildJumps[(s_pCurInst->type & INST_BRANCH_DELAY)?1:0] = (u32*)x86Ptr - 1; + s_UnconditionalDelay = 1; + } + + branch |= 3; +} + +void recVUMI_JR(VURegs* vuu, s32 info) +{ + int isreg = _allocX86reg(-1, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ); + LEA32RStoR(EAX, isreg, 3); + + //Mask the address to something valid + if (vuu == &VU0) + AND32ItoR(EAX, 0xfff); + else + AND32ItoR(EAX, 0x3fff); + + if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0) MOV32RtoM(SuperVUGetVIAddr(REG_TPC, 0), EAX); + + if (!(s_pCurBlock->type & BLOCKTYPE_HASEOP)) + { + PUSH32I(s_vu); + PUSH32R(EAX); + } + branch |= 0x10; // 0x08 is reserved +} + +void recVUMI_JALR(VURegs* vuu, s32 info) +{ + _addNeededX86reg(X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _It_); + + int isreg = _allocX86reg(-1, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ); + LEA32RStoR(EAX, isreg, 3); + + //Mask the address to something valid + if (vuu == &VU0) + AND32ItoR(EAX, 0xfff); + else + AND32ItoR(EAX, 0x3fff); + + if (_It_) + { + _deleteX86reg(X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _It_, 2); + MOV16ItoM(SuperVUGetVIAddr(_It_, 0), (pc + 8) >> 3); + } + + if ((s_pCurBlock->type & BLOCKTYPE_HASEOP) || s_vu == 0) MOV32RtoM(SuperVUGetVIAddr(REG_TPC, 0), EAX); + + if (!(s_pCurBlock->type & BLOCKTYPE_HASEOP)) + { + PUSH32I(s_vu); + PUSH32R(EAX); + } + + branch |= 4; +} + +#ifdef PCSX2_DEVBUILD +void vu1xgkick(u32* pMem, u32 addr) +{ + assert(addr < 0x4000); +#ifdef SUPERVU_COUNT + StopSVUCounter(); +#endif + + GSGIFTRANSFER1(pMem, addr); + +#ifdef SUPERVU_COUNT + StartSVUCounter(); +#endif +} +#endif + +void recVUMI_XGKICK_(VURegs *VU) +{ + assert(s_XGKICKReg > 0 && x86regs[s_XGKICKReg].inuse && x86regs[s_XGKICKReg].type == X86TYPE_VITEMP); + + x86regs[s_XGKICKReg].inuse = 0; // so free doesn't flush + _freeX86regs(); + _freeXMMregs(); + + PUSH32R(s_XGKICKReg); + PUSH32I((uptr)VU->Mem); + + //CALLFunc((u32)countfn); + + if (mtgsThread != NULL) + { + CALLFunc((uptr)VU1XGKICK_MTGSTransfer); + ADD32ItoR(ESP, 8); + } + else + { +#ifdef PCSX2_DEVBUILD + CALLFunc((uptr)vu1xgkick); + ADD32ItoR(ESP, 8); +#else + CALLFunc((uptr)GSgifTransfer1); +#endif + } + psHu32(GIF_STAT) &= ~(GIF_STAT_APATH1 | GIF_STAT_OPH); //Probably should be in the recompilation but im a rec nab :( (Refraction) + s_ScheduleXGKICK = 0; +} + +void recVUMI_XGKICK(VURegs *VU, int info) +{ + if (s_ScheduleXGKICK) + { + // second xgkick, so launch the first + recVUMI_XGKICK_(VU); + } + + int isreg = _allocX86reg(X86ARG2, X86TYPE_VI | (s_vu ? X86TYPE_VU1 : 0), _Is_, MODE_READ); + _freeX86reg(isreg); // flush + x86regs[isreg].inuse = 1; + x86regs[isreg].type = X86TYPE_VITEMP; + x86regs[isreg].needed = 1; + x86regs[isreg].mode = MODE_WRITE | MODE_READ; + SHL32ItoR(isreg, 4); + AND32ItoR(isreg, 0x3fff); + s_XGKICKReg = isreg; + psHu32(GIF_STAT) |= (GIF_STAT_APATH1 | GIF_STAT_OPH); //Probably should be in the recompilation but im a rec nab :( (Refraction) + + + if (!SUPERVU_XGKICKDELAY || pc == s_pCurBlock->endpc) + { + recVUMI_XGKICK_(VU); + } + else + { + // Erementar Gerad hack. + // Corrects the color of some graphics on a game that has somewhat scrambled graphics either way, and only works with ZeroGS. Not even ZZOgl. :) + if (g_VUGameFixes & VUFIX_XGKICKDELAY2) + s_ScheduleXGKICK = min((u32)4, (s_pCurBlock->endpc - pc) / 8); + else + s_ScheduleXGKICK = 2; + } +} + +void recVU_UPPER_FD_00(VURegs* VU, s32 info); +void recVU_UPPER_FD_01(VURegs* VU, s32 info); +void recVU_UPPER_FD_10(VURegs* VU, s32 info); +void recVU_UPPER_FD_11(VURegs* VU, s32 info); +void recVULowerOP(VURegs* VU, s32 info); +void recVULowerOP_T3_00(VURegs* VU, s32 info); +void recVULowerOP_T3_01(VURegs* VU, s32 info); +void recVULowerOP_T3_10(VURegs* VU, s32 info); +void recVULowerOP_T3_11(VURegs* VU, s32 info); +void recVUunknown(VURegs* VU, s32 info); + +void (*recVU_LOWER_OPCODE[128])(VURegs* VU, s32 info) = +{ + recVUMI_LQ , recVUMI_SQ , recVUunknown , recVUunknown, + recVUMI_ILW , recVUMI_ISW , recVUunknown , recVUunknown, + recVUMI_IADDIU, recVUMI_ISUBIU, recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUMI_FCEQ , recVUMI_FCSET , recVUMI_FCAND, recVUMI_FCOR, /* 0x10 */ + recVUMI_FSEQ , recVUMI_FSSET , recVUMI_FSAND, recVUMI_FSOR, + recVUMI_FMEQ , recVUunknown , recVUMI_FMAND, recVUMI_FMOR, + recVUMI_FCGET , recVUunknown , recVUunknown , recVUunknown, + recVUMI_B , recVUMI_BAL , recVUunknown , recVUunknown, /* 0x20 */ + recVUMI_JR , recVUMI_JALR , recVUunknown , recVUunknown, + recVUMI_IBEQ , recVUMI_IBNE , recVUunknown , recVUunknown, + recVUMI_IBLTZ , recVUMI_IBGTZ , recVUMI_IBLEZ, recVUMI_IBGEZ, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x30 */ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVULowerOP , recVUunknown , recVUunknown , recVUunknown, /* 0x40*/ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x50 */ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x60 */ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x70 */ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, +}; + +void (*recVULowerOP_T3_00_OPCODE[32])(VURegs* VU, s32 info) = +{ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUMI_MOVE , recVUMI_LQI , recVUMI_DIV , recVUMI_MTIR, + recVUMI_RNEXT , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUMI_MFP , recVUMI_XTOP , recVUMI_XGKICK, + recVUMI_ESADD , recVUMI_EATANxy, recVUMI_ESQRT, recVUMI_ESIN, +}; + +void (*recVULowerOP_T3_01_OPCODE[32])(VURegs* VU, s32 info) = +{ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUMI_MR32 , recVUMI_SQI , recVUMI_SQRT , recVUMI_MFIR, + recVUMI_RGET , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUMI_XITOP, recVUunknown, + recVUMI_ERSADD, recVUMI_EATANxz, recVUMI_ERSQRT, recVUMI_EATAN, +}; + +void (*recVULowerOP_T3_10_OPCODE[32])(VURegs* VU, s32 info) = +{ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUMI_LQD , recVUMI_RSQRT, recVUMI_ILWR, + recVUMI_RINIT , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUMI_ELENG , recVUMI_ESUM , recVUMI_ERCPR, recVUMI_EEXP, +}; + +void (*recVULowerOP_T3_11_OPCODE[32])(VURegs* VU, s32 info) = +{ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUMI_SQD , recVUMI_WAITQ, recVUMI_ISWR, + recVUMI_RXOR , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUMI_ERLENG, recVUunknown , recVUMI_WAITP, recVUunknown, +}; + +void (*recVULowerOP_OPCODE[64])(VURegs* VU, s32 info) = +{ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x10 */ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x20 */ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUMI_IADD , recVUMI_ISUB , recVUMI_IADDI, recVUunknown, /* 0x30 */ + recVUMI_IAND , recVUMI_IOR , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVULowerOP_T3_00, recVULowerOP_T3_01, recVULowerOP_T3_10, recVULowerOP_T3_11, +}; + +void (*recVU_UPPER_OPCODE[64])(VURegs* VU, s32 info) = +{ + recVUMI_ADDx , recVUMI_ADDy , recVUMI_ADDz , recVUMI_ADDw, + recVUMI_SUBx , recVUMI_SUBy , recVUMI_SUBz , recVUMI_SUBw, + recVUMI_MADDx , recVUMI_MADDy , recVUMI_MADDz , recVUMI_MADDw, + recVUMI_MSUBx , recVUMI_MSUBy , recVUMI_MSUBz , recVUMI_MSUBw, + recVUMI_MAXx , recVUMI_MAXy , recVUMI_MAXz , recVUMI_MAXw, /* 0x10 */ + recVUMI_MINIx , recVUMI_MINIy , recVUMI_MINIz , recVUMI_MINIw, + recVUMI_MULx , recVUMI_MULy , recVUMI_MULz , recVUMI_MULw, + recVUMI_MULq , recVUMI_MAXi , recVUMI_MULi , recVUMI_MINIi, + recVUMI_ADDq , recVUMI_MADDq , recVUMI_ADDi , recVUMI_MADDi, /* 0x20 */ + recVUMI_SUBq , recVUMI_MSUBq , recVUMI_SUBi , recVUMI_MSUBi, + recVUMI_ADD , recVUMI_MADD , recVUMI_MUL , recVUMI_MAX, + recVUMI_SUB , recVUMI_MSUB , recVUMI_OPMSUB, recVUMI_MINI, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, /* 0x30 */ + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVUunknown , recVUunknown , recVUunknown , recVUunknown, + recVU_UPPER_FD_00, recVU_UPPER_FD_01, recVU_UPPER_FD_10, recVU_UPPER_FD_11, +}; + +void (*recVU_UPPER_FD_00_TABLE[32])(VURegs* VU, s32 info) = +{ + recVUMI_ADDAx, recVUMI_SUBAx , recVUMI_MADDAx, recVUMI_MSUBAx, + recVUMI_ITOF0, recVUMI_FTOI0, recVUMI_MULAx , recVUMI_MULAq , + recVUMI_ADDAq, recVUMI_SUBAq, recVUMI_ADDA , recVUMI_SUBA , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , +}; + +void (*recVU_UPPER_FD_01_TABLE[32])(VURegs* VU, s32 info) = +{ + recVUMI_ADDAy , recVUMI_SUBAy , recVUMI_MADDAy, recVUMI_MSUBAy, + recVUMI_ITOF4 , recVUMI_FTOI4 , recVUMI_MULAy , recVUMI_ABS , + recVUMI_MADDAq, recVUMI_MSUBAq, recVUMI_MADDA , recVUMI_MSUBA , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , +}; + +void (*recVU_UPPER_FD_10_TABLE[32])(VURegs* VU, s32 info) = +{ + recVUMI_ADDAz , recVUMI_SUBAz , recVUMI_MADDAz, recVUMI_MSUBAz, + recVUMI_ITOF12, recVUMI_FTOI12, recVUMI_MULAz , recVUMI_MULAi , + recVUMI_ADDAi, recVUMI_SUBAi , recVUMI_MULA , recVUMI_OPMULA, + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , +}; + +void (*recVU_UPPER_FD_11_TABLE[32])(VURegs* VU, s32 info) = +{ + recVUMI_ADDAw , recVUMI_SUBAw , recVUMI_MADDAw, recVUMI_MSUBAw, + recVUMI_ITOF15, recVUMI_FTOI15, recVUMI_MULAw , recVUMI_CLIP , + recVUMI_MADDAi, recVUMI_MSUBAi, recVUunknown , recVUMI_NOP , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , + recVUunknown , recVUunknown , recVUunknown , recVUunknown , +}; + +void recVU_UPPER_FD_00(VURegs* VU, s32 info) +{ + recVU_UPPER_FD_00_TABLE[(VU->code >> 6) & 0x1f ](VU, info); +} + +void recVU_UPPER_FD_01(VURegs* VU, s32 info) +{ + recVU_UPPER_FD_01_TABLE[(VU->code >> 6) & 0x1f ](VU, info); +} + +void recVU_UPPER_FD_10(VURegs* VU, s32 info) +{ + recVU_UPPER_FD_10_TABLE[(VU->code >> 6) & 0x1f ](VU, info); +} + +void recVU_UPPER_FD_11(VURegs* VU, s32 info) +{ + recVU_UPPER_FD_11_TABLE[(VU->code >> 6) & 0x1f ](VU, info); +} + +void recVULowerOP(VURegs* VU, s32 info) +{ + recVULowerOP_OPCODE[ VU->code & 0x3f ](VU, info); +} + +void recVULowerOP_T3_00(VURegs* VU, s32 info) +{ + recVULowerOP_T3_00_OPCODE[(VU->code >> 6) & 0x1f ](VU, info); +} + +void recVULowerOP_T3_01(VURegs* VU, s32 info) +{ + recVULowerOP_T3_01_OPCODE[(VU->code >> 6) & 0x1f ](VU, info); +} + +void recVULowerOP_T3_10(VURegs* VU, s32 info) +{ + recVULowerOP_T3_10_OPCODE[(VU->code >> 6) & 0x1f ](VU, info); +} + +void recVULowerOP_T3_11(VURegs* VU, s32 info) +{ + recVULowerOP_T3_11_OPCODE[(VU->code >> 6) & 0x1f ](VU, info); +} + +void recVUunknown(VURegs* VU, s32 info) +{ + Console::Notice("Unknown SVU micromode opcode called"); +} diff --git a/pcsx2/x86/iVUzerorec.h b/pcsx2/x86/sVU_zerorec.h similarity index 85% rename from pcsx2/x86/iVUzerorec.h rename to pcsx2/x86/sVU_zerorec.h index f81ec8d7b1..060ef7d9ce 100644 --- a/pcsx2/x86/iVUzerorec.h +++ b/pcsx2/x86/sVU_zerorec.h @@ -1,50 +1,47 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2009 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - */ - -// Super VU recompiler - author: zerofrog(@gmail.com) - -#ifndef VU1_SUPER_RECOMPILER -#define VU1_SUPER_RECOMPILER - -#include "iVUmicro.h" - -extern void SuperVUAlloc(int vuindex); // global VU resources are automatically allocated if necessary. -extern void SuperVUDestroy(int vuindex); // if vuindex is -1, destroys everything -extern void SuperVUReset(int vuindex); // if vuindex is -1, resets everything - -//Using assembly code from an external file. -#ifdef __LINUX__ -extern "C" { -#endif -extern void SuperVUExecuteProgram(u32 startpc, int vuindex); -extern void SuperVUEndProgram(); -extern void svudispfntemp(); -#ifdef __LINUX__ -} -#endif -extern void __fastcall SuperVUClear(u32 startpc, u32 size, int vuindex); - -// read = 0, will write to reg -// read = 1, will read from reg -// read = 2, addr of previously written reg (used for status and clip flags) -extern u32 SuperVUGetVIAddr(int reg, int read); - -// if p == 0, flush q else flush p; if wait is != 0, waits for p/q -extern void SuperVUFlush(int p, int wait); - -#endif +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2009 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +// Super VU recompiler - author: zerofrog(@gmail.com) + +#pragma once + +#include "sVU_Micro.h" + +extern void SuperVUAlloc(int vuindex); // global VU resources are automatically allocated if necessary. +extern void SuperVUDestroy(int vuindex); // if vuindex is -1, destroys everything +extern void SuperVUReset(int vuindex); // if vuindex is -1, resets everything + +//Using assembly code from an external file. +#ifdef __LINUX__ +extern "C" { +#endif +extern void SuperVUExecuteProgram(u32 startpc, int vuindex); +extern void SuperVUEndProgram(); +extern void svudispfntemp(); +#ifdef __LINUX__ +} +#endif +extern void __fastcall SuperVUClear(u32 startpc, u32 size, int vuindex); + +// read = 0, will write to reg +// read = 1, will read from reg +// read = 2, addr of previously written reg (used for status and clip flags) +extern u32 SuperVUGetVIAddr(int reg, int read); + +// if p == 0, flush q else flush p; if wait is != 0, waits for p/q +extern void SuperVUFlush(int p, int wait);