mirror of https://github.com/PCSX2/pcsx2.git
4542 lines
122 KiB
C
4542 lines
122 KiB
C
/* Pcsx2 - Pc Ps2 Emulator
|
|
* Copyright (C) 2002-2003 Pcsx2 Team
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include <string.h>
|
|
|
|
#include "Common.h"
|
|
#include "InterTables.h"
|
|
#include "ix86/ix86.h"
|
|
#include "iR5900.h"
|
|
#include "iMMI.h"
|
|
#include "iFPU.h"
|
|
#include "iCP0.h"
|
|
#include "VUmicro.h"
|
|
#include "VUflags.h"
|
|
#include "iVUmicro.h"
|
|
#include "iVU0micro.h"
|
|
#include "iVU1micro.h"
|
|
#include "iVUops.h"
|
|
#include "iVUzerorec.h"
|
|
|
|
#ifdef __MSCW32__
|
|
#pragma warning(disable:4244)
|
|
#pragma warning(disable:4761)
|
|
#endif
|
|
|
|
int vucycle;
|
|
int vucycleold;
|
|
_vuopinfo *cinfo = NULL;
|
|
|
|
//Lower/Upper instructions can use that..
|
|
#define _Ft_ (( VU->code >> 16) & 0x1F) // The rt part of the instruction register
|
|
#define _Fs_ (( VU->code >> 11) & 0x1F) // The rd part of the instruction register
|
|
#define _Fd_ (( VU->code >> 6) & 0x1F) // The sa part of the instruction register
|
|
|
|
#define _X (( VU->code>>24) & 0x1)
|
|
#define _Y (( VU->code>>23) & 0x1)
|
|
#define _Z (( VU->code>>22) & 0x1)
|
|
#define _W (( VU->code>>21) & 0x1)
|
|
|
|
#define _XYZW_SS (_X+_Y+_Z+_W==1)
|
|
|
|
#define _Fsf_ (( VU->code >> 21) & 0x03)
|
|
#define _Ftf_ (( VU->code >> 23) & 0x03)
|
|
|
|
#define _Imm11_ (s32)(VU->code & 0x400 ? 0xfffffc00 | (VU->code & 0x3ff) : VU->code & 0x3ff)
|
|
#define _UImm11_ (s32)(VU->code & 0x7ff)
|
|
|
|
#define VU_VFx_ADDR(x) (int)&VU->VF[x].UL[0]
|
|
#define VU_VFy_ADDR(x) (int)&VU->VF[x].UL[1]
|
|
#define VU_VFz_ADDR(x) (int)&VU->VF[x].UL[2]
|
|
#define VU_VFw_ADDR(x) (int)&VU->VF[x].UL[3]
|
|
|
|
#define VU_REGR_ADDR (int)&VU->VI[REG_R]
|
|
#define VU_REGQ_ADDR (int)&VU->VI[REG_Q]
|
|
#define VU_REGMAC_ADDR (int)&VU->VI[REG_MAC_FLAG]
|
|
|
|
#define VU_VI_ADDR(x, read) GetVIAddr(VU, x, read, info)
|
|
|
|
#define VU_ACCx_ADDR (int)&VU->ACC.UL[0]
|
|
#define VU_ACCy_ADDR (int)&VU->ACC.UL[1]
|
|
#define VU_ACCz_ADDR (int)&VU->ACC.UL[2]
|
|
#define VU_ACCw_ADDR (int)&VU->ACC.UL[3]
|
|
|
|
#define _X_Y_Z_W ((( VU->code >> 21 ) & 0xF ) )
|
|
|
|
__declspec(align(16)) float recMult_float_to_int4[4] = { 16.0, 16.0, 16.0, 16.0 };
|
|
__declspec(align(16)) float recMult_float_to_int12[4] = { 4096.0, 4096.0, 4096.0, 4096.0 };
|
|
__declspec(align(16)) float recMult_float_to_int15[4] = { 32768.0, 32768.0, 32768.0, 32768.0 };
|
|
|
|
__declspec(align(16)) float recMult_int_to_float4[4] = { 0.0625f, 0.0625f, 0.0625f, 0.0625f };
|
|
__declspec(align(16)) float recMult_int_to_float12[4] = { 0.000244140625, 0.000244140625, 0.000244140625, 0.000244140625 };
|
|
__declspec(align(16)) float recMult_int_to_float15[4] = { 0.000030517578125, 0.000030517578125, 0.000030517578125, 0.000030517578125 };
|
|
static s32 bpc;
|
|
_VURegsNum* g_VUregs = NULL;
|
|
u8 g_MACFlagTransform[256] = {0}; // used to flip xyzw bits
|
|
|
|
static int SSEmovMask[ 16 ][ 4 ] =
|
|
{
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF },
|
|
{ 0x00000000, 0x00000000, 0xFFFFFFFF, 0x00000000 },
|
|
{ 0x00000000, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF },
|
|
{ 0x00000000, 0xFFFFFFFF, 0x00000000, 0x00000000 },
|
|
{ 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF },
|
|
{ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 },
|
|
{ 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF },
|
|
{ 0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000 },
|
|
{ 0xFFFFFFFF, 0x00000000, 0x00000000, 0xFFFFFFFF },
|
|
{ 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 },
|
|
{ 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0xFFFFFFFF },
|
|
{ 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000 },
|
|
{ 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF },
|
|
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 },
|
|
{ 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }
|
|
};
|
|
|
|
#define VU_SWAPSRC 0xf090 // don't touch
|
|
|
|
#define _vuIsRegSwappedWithTemp() (VU_SWAPSRC & (1<<_X_Y_Z_W))
|
|
|
|
// use for allocating vi regs
|
|
#define ALLOCTEMPX86(mode) _allocX86reg(-1, X86TYPE_TEMP, 0, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode)
|
|
#define ALLOCVI(vi, mode) _allocX86reg(-1, X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), vi, ((info&PROCESS_VU_SUPER)?0:MODE_NOFRAME)|mode)
|
|
#define ADD_VI_NEEDED(vi) _addNeededX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), vi);
|
|
|
|
// 1 - src, 0 - dest wzyx
|
|
void VU_MERGE0(int dest, int src) { // 0000
|
|
}
|
|
void VU_MERGE1(int dest, int src) { // 1000
|
|
SSE_MOVHLPS_XMM_to_XMM(src, dest);
|
|
SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc4);
|
|
}
|
|
void VU_MERGE2(int dest, int src) { // 0100
|
|
SSE_MOVHLPS_XMM_to_XMM(src, dest);
|
|
SSE_SHUFPS_XMM_to_XMM(dest, src, 0x64);
|
|
}
|
|
void VU_MERGE3(int dest, int src) { // 1100
|
|
SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4);
|
|
}
|
|
void VU_MERGE4(int dest, int src) { // 0010s
|
|
SSE_MOVSS_XMM_to_XMM(src, dest);
|
|
SSE_SHUFPS_XMM_to_XMM(src, dest, 0xe4);
|
|
SSE_MOVAPS_XMM_to_XMM(dest, src);
|
|
}
|
|
void VU_MERGE5(int dest, int src) { // 1010
|
|
SSE_SHUFPS_XMM_to_XMM(dest, src, 0xd8);
|
|
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xd8);
|
|
}
|
|
void VU_MERGE6(int dest, int src) { // 0110
|
|
SSE_SHUFPS_XMM_to_XMM(dest, src, 0x9c);
|
|
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x78);
|
|
}
|
|
void VU_MERGE7(int dest, int src) { // 1110s
|
|
SSE_MOVSS_XMM_to_XMM(src, dest);
|
|
SSE_MOVAPS_XMM_to_XMM(dest, src);
|
|
}
|
|
void VU_MERGE8(int dest, int src) { // 0001
|
|
SSE_MOVSS_XMM_to_XMM(dest, src);
|
|
}
|
|
void VU_MERGE9(int dest, int src) { // 1001
|
|
SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc9);
|
|
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0xd2);
|
|
}
|
|
void VU_MERGE10(int dest, int src) { // 0101
|
|
SSE_SHUFPS_XMM_to_XMM(dest, src, 0x8d);
|
|
SSE_SHUFPS_XMM_to_XMM(dest, dest, 0x72);
|
|
}
|
|
void VU_MERGE11(int dest, int src) { // 1101
|
|
SSE_MOVSS_XMM_to_XMM(dest, src);
|
|
SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4);
|
|
}
|
|
void VU_MERGE12(int dest, int src) { // 0011s
|
|
SSE_SHUFPS_XMM_to_XMM(src, dest, 0xe4);
|
|
SSE_MOVAPS_XMM_to_XMM(dest, src);
|
|
}
|
|
void VU_MERGE13(int dest, int src) { // 1011s
|
|
SSE_MOVHLPS_XMM_to_XMM(dest, src);
|
|
SSE_SHUFPS_XMM_to_XMM(src, dest, 0x64);
|
|
SSE_MOVAPS_XMM_to_XMM(dest, src);
|
|
}
|
|
void VU_MERGE14(int dest, int src) { // 0111s
|
|
SSE_MOVHLPS_XMM_to_XMM(dest, src);
|
|
SSE_SHUFPS_XMM_to_XMM(src, dest, 0xc4);
|
|
SSE_MOVAPS_XMM_to_XMM(dest, src);
|
|
}
|
|
void VU_MERGE15(int dest, int src) { // 1111s
|
|
SSE_MOVAPS_XMM_to_XMM(dest, src);
|
|
}
|
|
|
|
typedef void (*VUMERGEFN)(int dest, int src);
|
|
static VUMERGEFN s_VuMerge[16] = {
|
|
VU_MERGE0, VU_MERGE1, VU_MERGE2, VU_MERGE3,
|
|
VU_MERGE4, VU_MERGE5, VU_MERGE6, VU_MERGE7,
|
|
VU_MERGE8, VU_MERGE9, VU_MERGE10, VU_MERGE11,
|
|
VU_MERGE12, VU_MERGE13, VU_MERGE14, VU_MERGE15 };
|
|
|
|
#define VU_MERGE_REGS(dest, src) { \
|
|
if( dest != src ) s_VuMerge[_X_Y_Z_W](dest, src); \
|
|
} \
|
|
|
|
#define VU_MERGE_REGS_CUSTOM(dest, src, xyzw) { \
|
|
if( dest != src ) s_VuMerge[xyzw](dest, src); \
|
|
} \
|
|
|
|
void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw)
|
|
{
|
|
// don't use pshufd
|
|
if( dstreg == srcreg || !cpucaps.hasStreamingSIMD3Extensions) {
|
|
if( dstreg != srcreg ) SSE_MOVAPS_XMM_to_XMM(dstreg, srcreg);
|
|
switch (xyzw) {
|
|
case 0: SSE_SHUFPS_XMM_to_XMM(dstreg, dstreg, 0x00); break;
|
|
case 1: SSE_SHUFPS_XMM_to_XMM(dstreg, dstreg, 0x55); break;
|
|
case 2: SSE_SHUFPS_XMM_to_XMM(dstreg, dstreg, 0xaa); break;
|
|
case 3: SSE_SHUFPS_XMM_to_XMM(dstreg, dstreg, 0xff); break;
|
|
}
|
|
}
|
|
else {
|
|
switch (xyzw) {
|
|
case 0:
|
|
SSE3_MOVSLDUP_XMM_to_XMM(dstreg, srcreg);
|
|
SSE_MOVLHPS_XMM_to_XMM(dstreg, dstreg);
|
|
break;
|
|
case 1:
|
|
SSE3_MOVSHDUP_XMM_to_XMM(dstreg, srcreg);
|
|
SSE_MOVLHPS_XMM_to_XMM(dstreg, dstreg);
|
|
break;
|
|
case 2:
|
|
SSE3_MOVSLDUP_XMM_to_XMM(dstreg, srcreg);
|
|
SSE_MOVHLPS_XMM_to_XMM(dstreg, dstreg);
|
|
break;
|
|
case 3:
|
|
SSE3_MOVSHDUP_XMM_to_XMM(dstreg, srcreg);
|
|
SSE_MOVHLPS_XMM_to_XMM(dstreg, dstreg);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw)
|
|
{
|
|
switch (xyzw) {
|
|
case 0:
|
|
if( dstreg != srcreg ) SSE_MOVAPS_XMM_to_XMM(dstreg, srcreg);
|
|
break;
|
|
case 1:
|
|
if( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSHDUP_XMM_to_XMM(dstreg, srcreg);
|
|
else {
|
|
if( dstreg != srcreg ) SSE_MOVAPS_XMM_to_XMM(dstreg, srcreg);
|
|
SSE_SHUFPS_XMM_to_XMM(dstreg, dstreg, 0x55);
|
|
}
|
|
break;
|
|
case 2:
|
|
SSE_MOVHLPS_XMM_to_XMM(dstreg, srcreg);
|
|
break;
|
|
case 3:
|
|
if( cpucaps.hasStreamingSIMD3Extensions && dstreg != srcreg ) {
|
|
SSE3_MOVSHDUP_XMM_to_XMM(dstreg, srcreg);
|
|
SSE_MOVHLPS_XMM_to_XMM(dstreg, dstreg);
|
|
}
|
|
else {
|
|
if( dstreg != srcreg ) SSE_MOVAPS_XMM_to_XMM(dstreg, srcreg);
|
|
SSE_SHUFPS_XMM_to_XMM(dstreg, dstreg, 0xff);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
void _vuFlipRegSS(VURegs * VU, int reg)
|
|
{
|
|
assert( _XYZW_SS );
|
|
if( _Y ) SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xe1);
|
|
else if( _Z ) SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xc6);
|
|
else if( _W ) SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x27);
|
|
}
|
|
|
|
void _vuMoveSS(VURegs * VU, int dstreg, int srcreg)
|
|
{
|
|
assert( _XYZW_SS );
|
|
if( _Y ) _unpackVFSS_xyzw(dstreg, srcreg, 1);
|
|
else if( _Z ) _unpackVFSS_xyzw(dstreg, srcreg, 2);
|
|
else if( _W ) _unpackVFSS_xyzw(dstreg, srcreg, 3);
|
|
else _unpackVFSS_xyzw(dstreg, srcreg, 0);
|
|
}
|
|
|
|
void _recvuFMACflush(VURegs * VU) {
|
|
int i;
|
|
|
|
for (i=0; i<8; i++) {
|
|
if (VU->fmac[i].enable == 0) continue;
|
|
|
|
if ((vucycle - VU->fmac[i].sCycle) >= VU->fmac[i].Cycle) {
|
|
#ifdef VUM_LOG
|
|
// if (Log) { VUM_LOG("flushing FMAC pipe[%d]\n", i); }
|
|
#endif
|
|
VU->fmac[i].enable = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
void _recvuFDIVflush(VURegs * VU) {
|
|
if (VU->fdiv.enable == 0) return;
|
|
|
|
if ((vucycle - VU->fdiv.sCycle) >= VU->fdiv.Cycle) {
|
|
// SysPrintf("flushing FDIV pipe\n");
|
|
VU->fdiv.enable = 0;
|
|
}
|
|
}
|
|
|
|
void _recvuEFUflush(VURegs * VU) {
|
|
if (VU->efu.enable == 0) return;
|
|
|
|
if ((vucycle - VU->efu.sCycle) >= VU->efu.Cycle) {
|
|
// SysPrintf("flushing FDIV pipe\n");
|
|
VU->efu.enable = 0;
|
|
}
|
|
}
|
|
|
|
void _recvuTestPipes(VURegs * VU) {
|
|
_recvuFMACflush(VU);
|
|
_recvuFDIVflush(VU);
|
|
_recvuEFUflush(VU);
|
|
}
|
|
|
|
void _recvuFMACTestStall(VURegs * VU, int reg, int xyzw) {
|
|
int cycle;
|
|
int i;
|
|
u32 mask = 0;
|
|
|
|
for (i=0; i<8; i++) {
|
|
if (VU->fmac[i].enable == 0) continue;
|
|
if (VU->fmac[i].reg == reg &&
|
|
(VU->fmac[i].xyzw & xyzw)) break;
|
|
}
|
|
|
|
if (i == 8) return;
|
|
|
|
// do a perchannel delay
|
|
// old code
|
|
cycle = VU->fmac[i].Cycle - (vucycle - VU->fmac[i].sCycle);
|
|
VU->fmac[i].enable = 0;
|
|
|
|
// new code
|
|
// mask = VU->fmac[i].xyzw & xyzw;
|
|
// if( mask & 1 ) mask = 4; // w
|
|
// else if( mask & 2 ) mask = 3; // z
|
|
// else if( mask & 4 ) mask = 2; // y
|
|
// else if( mask & 8 ) mask = 1; // x
|
|
//
|
|
// assert( (int)VU->fmac[i].sCycle < (int)vucycle );
|
|
// cycle = 0;
|
|
// if( vucycle - VU->fmac[i].sCycle < mask )
|
|
// cycle = mask - (vucycle - VU->fmac[i].sCycle);
|
|
//
|
|
// VU->fmac[i].xyzw &= ~xyzw;
|
|
// if( !VU->fmac[i].xyzw )
|
|
// VU->fmac[i].enable = 0;
|
|
|
|
// SysPrintf("FMAC stall %d\n", cycle);
|
|
vucycle+= cycle;
|
|
_recvuTestPipes(VU);
|
|
}
|
|
|
|
void _recvuFMACAdd(VURegs * VU, int reg, int xyzw) {
|
|
int i;
|
|
|
|
/* find a free fmac pipe */
|
|
for (i=0; i<8; i++) {
|
|
if (VU->fmac[i].enable == 1) continue;
|
|
break;
|
|
}
|
|
if (i==8) {
|
|
SysPrintf("*PCSX2*: error , out of fmacs\n");
|
|
}
|
|
|
|
#ifdef VUM_LOG
|
|
// if (Log) { VUM_LOG("adding FMAC pipe[%d]; reg %d\n", i, reg); }
|
|
#endif
|
|
VU->fmac[i].enable = 1;
|
|
VU->fmac[i].sCycle = vucycle;
|
|
VU->fmac[i].Cycle = 3;
|
|
VU->fmac[i].xyzw = xyzw;
|
|
VU->fmac[i].reg = reg;
|
|
}
|
|
|
|
void _recvuFDIVAdd(VURegs * VU, int cycles) {
|
|
// SysPrintf("adding FDIV pipe\n");
|
|
VU->fdiv.enable = 1;
|
|
VU->fdiv.sCycle = vucycle;
|
|
VU->fdiv.Cycle = cycles;
|
|
}
|
|
|
|
void _recvuEFUAdd(VURegs * VU, int cycles) {
|
|
// SysPrintf("adding EFU pipe\n");
|
|
VU->efu.enable = 1;
|
|
VU->efu.sCycle = vucycle;
|
|
VU->efu.Cycle = cycles;
|
|
}
|
|
|
|
void _recvuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn) {
|
|
|
|
if( VUregsn->VFread0 && (VUregsn->VFread0 == VUregsn->VFread1) ) {
|
|
_recvuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw|VUregsn->VFr1xyzw);
|
|
}
|
|
else {
|
|
if (VUregsn->VFread0) {
|
|
_recvuFMACTestStall(VU, VUregsn->VFread0, VUregsn->VFr0xyzw);
|
|
}
|
|
if (VUregsn->VFread1) {
|
|
_recvuFMACTestStall(VU, VUregsn->VFread1, VUregsn->VFr1xyzw);
|
|
}
|
|
}
|
|
}
|
|
|
|
void _recvuAddFMACStalls(VURegs * VU, _VURegsNum *VUregsn) {
|
|
if (VUregsn->VFwrite) {
|
|
_recvuFMACAdd(VU, VUregsn->VFwrite, VUregsn->VFwxyzw);
|
|
} else
|
|
if (VUregsn->VIwrite & (1 << REG_CLIP_FLAG)) {
|
|
// SysPrintf("REG_CLIP_FLAG pipe\n");
|
|
_recvuFMACAdd(VU, -REG_CLIP_FLAG, 0);
|
|
} else {
|
|
_recvuFMACAdd(VU, 0, 0);
|
|
}
|
|
}
|
|
|
|
void _recvuFlushFDIV(VURegs * VU) {
|
|
int cycle;
|
|
|
|
if (VU->fdiv.enable == 0) return;
|
|
|
|
cycle = VU->fdiv.Cycle - (vucycle - VU->fdiv.sCycle);
|
|
// SysPrintf("waiting FDIV pipe %d\n", cycle);
|
|
VU->fdiv.enable = 0;
|
|
vucycle+= cycle;
|
|
}
|
|
|
|
void _recvuFlushEFU(VURegs * VU) {
|
|
int cycle;
|
|
|
|
if (VU->efu.enable == 0) return;
|
|
|
|
cycle = VU->efu.Cycle - (vucycle - VU->efu.sCycle);
|
|
// SysPrintf("waiting FDIV pipe %d\n", cycle);
|
|
VU->efu.enable = 0;
|
|
vucycle+= cycle;
|
|
}
|
|
|
|
void _recvuTestFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) {
|
|
// _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn);
|
|
_recvuFlushFDIV(VU);
|
|
}
|
|
|
|
void _recvuTestEFUStalls(VURegs * VU, _VURegsNum *VUregsn) {
|
|
// _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn);
|
|
_recvuFlushEFU(VU);
|
|
}
|
|
|
|
void _recvuAddFDIVStalls(VURegs * VU, _VURegsNum *VUregsn) {
|
|
// _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn);
|
|
if (VUregsn->VIwrite & (1 << REG_Q)) {
|
|
_recvuFDIVAdd(VU, VUregsn->cycles);
|
|
}
|
|
}
|
|
|
|
void _recvuAddEFUStalls(VURegs * VU, _VURegsNum *VUregsn) {
|
|
// _vuTestFMACStalls(VURegs * VU, _VURegsNum *VUregsn);
|
|
if (VUregsn->VIwrite & (1 << REG_P)) {
|
|
_recvuEFUAdd(VU, VUregsn->cycles);
|
|
}
|
|
}
|
|
|
|
void _recvuTestUpperStalls(VURegs * VU, _VURegsNum *VUregsn) {
|
|
switch (VUregsn->pipe) {
|
|
case VUPIPE_FMAC: _recvuTestFMACStalls(VU, VUregsn); break;
|
|
}
|
|
}
|
|
|
|
void _recvuTestLowerStalls(VURegs * VU, _VURegsNum *VUregsn) {
|
|
switch (VUregsn->pipe) {
|
|
case VUPIPE_FMAC: _recvuTestFMACStalls(VU, VUregsn); break;
|
|
case VUPIPE_FDIV: _recvuTestFDIVStalls(VU, VUregsn); break;
|
|
case VUPIPE_EFU: _recvuTestEFUStalls(VU, VUregsn); break;
|
|
}
|
|
}
|
|
|
|
void _recvuAddUpperStalls(VURegs * VU, _VURegsNum *VUregsn) {
|
|
switch (VUregsn->pipe) {
|
|
case VUPIPE_FMAC: _recvuAddFMACStalls(VU, VUregsn); break;
|
|
}
|
|
}
|
|
|
|
void _recvuAddLowerStalls(VURegs * VU, _VURegsNum *VUregsn) {
|
|
switch (VUregsn->pipe) {
|
|
case VUPIPE_FMAC: _recvuAddFMACStalls(VU, VUregsn); break;
|
|
case VUPIPE_FDIV: _recvuAddFDIVStalls(VU, VUregsn); break;
|
|
case VUPIPE_EFU: _recvuAddEFUStalls(VU, VUregsn); break;
|
|
}
|
|
}
|
|
|
|
|
|
void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs)
|
|
{
|
|
_VURegsNum* lregs;
|
|
_VURegsNum* uregs;
|
|
int *ptr;
|
|
|
|
lregs = pCodeRegs;
|
|
uregs = pCodeRegs+1;
|
|
|
|
ptr = (int*)&VU->Micro[pc];
|
|
pc += 8;
|
|
|
|
if (ptr[1] & 0x40000000) { // EOP
|
|
branch |= 8;
|
|
}
|
|
|
|
VU->code = ptr[1];
|
|
if (VU == &VU1) {
|
|
VU1regs_UPPER_OPCODE[VU->code & 0x3f](uregs);
|
|
} else {
|
|
VU0regs_UPPER_OPCODE[VU->code & 0x3f](uregs);
|
|
}
|
|
|
|
_recvuTestUpperStalls(VU, uregs);
|
|
switch(VU->code & 0x3f) {
|
|
case 0x10: case 0x11: case 0x12: case 0x13:
|
|
case 0x14: case 0x15: case 0x16: case 0x17:
|
|
case 0x1d: case 0x1f:
|
|
case 0x2b: case 0x2f:
|
|
break;
|
|
|
|
case 0x3c:
|
|
switch ((VU->code >> 6) & 0x1f) {
|
|
case 0x4: case 0x5:
|
|
break;
|
|
default:
|
|
info->statusflag = 4;
|
|
info->macflag = 4;
|
|
break;
|
|
}
|
|
break;
|
|
case 0x3d:
|
|
switch ((VU->code >> 6) & 0x1f) {
|
|
case 0x4: case 0x5: case 0x7:
|
|
break;
|
|
default:
|
|
info->statusflag = 4;
|
|
info->macflag = 4;
|
|
break;
|
|
}
|
|
break;
|
|
case 0x3e:
|
|
switch ((VU->code >> 6) & 0x1f) {
|
|
case 0x4: case 0x5:
|
|
break;
|
|
default:
|
|
info->statusflag = 4;
|
|
info->macflag = 4;
|
|
break;
|
|
}
|
|
break;
|
|
case 0x3f:
|
|
switch ((VU->code >> 6) & 0x1f) {
|
|
case 0x4: case 0x5: case 0x7: case 0xb:
|
|
break;
|
|
default:
|
|
info->statusflag = 4;
|
|
info->macflag = 4;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
info->statusflag = 4;
|
|
info->macflag = 4;
|
|
break;
|
|
}
|
|
|
|
if (uregs->VIread & (1 << REG_Q)) {
|
|
info->q |= 2;
|
|
}
|
|
|
|
if (uregs->VIread & (1 << REG_P)) {
|
|
assert( VU == &VU1 );
|
|
info->p |= 2;
|
|
}
|
|
|
|
// check upper flags
|
|
if (ptr[1] & 0x80000000) { // I flag
|
|
info->cycle = vucycle;
|
|
memset(lregs, 0, sizeof(lregs));
|
|
} else {
|
|
|
|
VU->code = ptr[0];
|
|
if (VU == &VU1) {
|
|
VU1regs_LOWER_OPCODE[VU->code >> 25](lregs);
|
|
} else {
|
|
VU0regs_LOWER_OPCODE[VU->code >> 25](lregs);
|
|
}
|
|
|
|
_recvuTestLowerStalls(VU, lregs);
|
|
info->cycle = vucycle;
|
|
|
|
if (lregs->pipe == VUPIPE_BRANCH) {
|
|
branch |= 1;
|
|
}
|
|
|
|
if (lregs->VIwrite & (1 << REG_Q)) {
|
|
info->q |= 4;
|
|
info->cycles = lregs->cycles;
|
|
info->pqinst = (VU->code&2)>>1; // rsqrt is 2
|
|
}
|
|
else if (lregs->pipe == VUPIPE_FDIV) {
|
|
info->q |= 8|1;
|
|
info->pqinst = 0;
|
|
}
|
|
|
|
if (lregs->VIwrite & (1 << REG_P)) {
|
|
assert( VU == &VU1 );
|
|
info->p |= 4;
|
|
info->cycles = lregs->cycles;
|
|
|
|
switch( VU->code & 0xff ) {
|
|
case 0xfd: info->pqinst = 0; break; //eatan
|
|
case 0x7c: info->pqinst = 0; break; //eatanxy
|
|
case 0x7d: info->pqinst = 0; break; //eatanzy
|
|
case 0xfe: info->pqinst = 1; break; //eexp
|
|
case 0xfc: info->pqinst = 2; break; //esin
|
|
case 0x3f: info->pqinst = 3; break; //erleng
|
|
case 0x3e: info->pqinst = 4; break; //eleng
|
|
case 0x3d: info->pqinst = 4; break; //ersadd
|
|
case 0xbd: info->pqinst = 4; break; //ersqrt
|
|
case 0xbe: info->pqinst = 5; break; //ercpr
|
|
case 0xbc: info->pqinst = 5; break; //esqrt
|
|
case 0x7e: info->pqinst = 5; break; //esum
|
|
case 0x3c: info->pqinst = 6; break; //esadd
|
|
default: assert(0);
|
|
}
|
|
}
|
|
else if (lregs->pipe == VUPIPE_EFU) {
|
|
info->p |= 8|1;
|
|
}
|
|
|
|
if (lregs->VIread & (1 << REG_STATUS_FLAG)) info->statusflag|= VUOP_READ;
|
|
if (lregs->VIread & (1 << REG_MAC_FLAG)) info->macflag|= VUOP_READ;
|
|
|
|
if (lregs->VIwrite & (1 << REG_STATUS_FLAG)) info->statusflag|= VUOP_WRITE;
|
|
if (lregs->VIwrite & (1 << REG_MAC_FLAG)) info->macflag|= VUOP_WRITE;
|
|
|
|
if (lregs->VIread & (1 << REG_Q)) {
|
|
info->q |= 2;
|
|
}
|
|
|
|
if (lregs->VIread & (1 << REG_P)) {
|
|
assert( VU == &VU1 );
|
|
info->p |= 2;
|
|
}
|
|
|
|
_recvuAddLowerStalls(VU, lregs);
|
|
}
|
|
_recvuAddUpperStalls(VU, uregs);
|
|
|
|
_recvuTestPipes(VU);
|
|
|
|
vucycle++;
|
|
}
|
|
|
|
int eeVURecompileCode(VURegs *VU, _VURegsNum* regs)
|
|
{
|
|
int info = 0;
|
|
int vfread0=-1, vfread1 = -1, vfwrite = -1, vfacc = -1, vftemp=-1;
|
|
|
|
assert( regs != NULL );
|
|
|
|
if( regs->VFread0 ) _addNeededVFtoXMMreg(regs->VFread0);
|
|
if( regs->VFread1 ) _addNeededVFtoXMMreg(regs->VFread1);
|
|
if( regs->VFwrite ) _addNeededVFtoXMMreg(regs->VFwrite);
|
|
if( regs->VIread & (1<<REG_ACC_FLAG) ) _addNeededACCtoXMMreg();
|
|
if( regs->VIread & (1<<REG_VF0_FLAG) ) _addNeededVFtoXMMreg(0);
|
|
|
|
// alloc
|
|
if( regs->VFread0 ) vfread0 = _allocVFtoXMMreg(VU, -1, regs->VFread0, MODE_READ);
|
|
else if( regs->VIread & (1<<REG_VF0_FLAG) ) vfread0 = _allocVFtoXMMreg(VU, -1, 0, MODE_READ);
|
|
if( regs->VFread1 ) vfread1 = _allocVFtoXMMreg(VU, -1, regs->VFread1, MODE_READ);
|
|
else if( (regs->VIread & (1<<REG_VF0_FLAG)) && regs->VFr1xyzw != 0xff) vfread1 = _allocVFtoXMMreg(VU, -1, 0, MODE_READ);
|
|
|
|
if( regs->VIread & (1<<REG_ACC_FLAG )) {
|
|
vfacc = _allocACCtoXMMreg(VU, -1, ((regs->VIwrite&(1<<REG_ACC_FLAG))?MODE_WRITE:0)|MODE_READ);
|
|
}
|
|
else if( regs->VIwrite & (1<<REG_ACC_FLAG) ) {
|
|
vfacc = _allocACCtoXMMreg(VU, -1, MODE_WRITE|(regs->VFwxyzw != 0xf?MODE_READ:0));
|
|
}
|
|
|
|
if( regs->VFwrite ) {
|
|
assert( !(regs->VIwrite&(1<<REG_ACC_FLAG)) );
|
|
vfwrite = _allocVFtoXMMreg(VU, -1, regs->VFwrite, MODE_WRITE|(regs->VFwxyzw != 0xf?MODE_READ:0));
|
|
}
|
|
|
|
if( vfacc>= 0 ) info |= PROCESS_EE_SET_ACC(vfacc);
|
|
if( vfwrite >= 0 ) {
|
|
if( regs->VFwrite == _Ft_ && vfread1 < 0 ) {
|
|
info |= PROCESS_EE_SET_T(vfwrite);
|
|
}
|
|
else {
|
|
assert( regs->VFwrite == _Fd_ );
|
|
info |= PROCESS_EE_SET_D(vfwrite);
|
|
}
|
|
}
|
|
|
|
if( vfread0 >= 0 ) info |= PROCESS_EE_SET_S(vfread0);
|
|
if( vfread1 >= 0 ) info |= PROCESS_EE_SET_T(vfread1);
|
|
|
|
vftemp = _allocTempXMMreg(XMMT_FPS, -1);
|
|
info |= PROCESS_VU_SET_TEMP(vftemp);
|
|
|
|
if( regs->VIwrite & (1 << REG_CLIP_FLAG) ) {
|
|
// CLIP inst, need two extra temp registers, put it EEREC_D and EEREC_ACC
|
|
int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
|
|
int t2reg = _allocTempXMMreg(XMMT_FPS, -1);
|
|
|
|
info |= PROCESS_EE_SET_D(t1reg);
|
|
info |= PROCESS_EE_SET_ACC(t2reg);
|
|
|
|
_freeXMMreg(t1reg); // don't need
|
|
_freeXMMreg(t2reg); // don't need
|
|
}
|
|
else if( regs->VIwrite & (1<<REG_P) ) {
|
|
int t1reg = _allocTempXMMreg(XMMT_FPS, -1);
|
|
info |= PROCESS_EE_SET_D(t1reg);
|
|
_freeXMMreg(t1reg); // don't need
|
|
}
|
|
|
|
_freeXMMreg(vftemp); // don't need it
|
|
|
|
if( cinfo->statusflag & 1 ) info |= PROCESS_VU_UPDATEFLAGS;
|
|
if( cinfo->macflag & 1) info |= PROCESS_VU_UPDATEFLAGS;
|
|
|
|
if( regs->pipe == 0xff ) info |= PROCESS_VU_COP2;
|
|
|
|
return info;
|
|
}
|
|
|
|
// returns the correct VI addr
|
|
u32 GetVIAddr(VURegs * VU, int reg, int read, int info)
|
|
{
|
|
if( info & PROCESS_VU_SUPER ) return SuperVUGetVIAddr(reg, read);
|
|
if( info & PROCESS_VU_COP2 ) return (u32)&VU->VI[reg].UL;
|
|
|
|
if( read != 1 ) {
|
|
if( reg == REG_MAC_FLAG ) return (u32)&VU->macflag;
|
|
if( reg == REG_CLIP_FLAG ) return (u32)&VU->clipflag;
|
|
if( reg == REG_STATUS_FLAG ) return (u32)&VU->statusflag;
|
|
if( reg == REG_Q ) return (u32)&VU->q;
|
|
if( reg == REG_P ) return (u32)&VU->p;
|
|
}
|
|
|
|
return (u32)&VU->VI[reg].UL;
|
|
}
|
|
|
|
// gets a temp reg that is not EEREC_TEMP
|
|
int _vuGetTempXMMreg(int info)
|
|
{
|
|
int t1reg = -1;
|
|
|
|
if( _hasFreeXMMreg() ) {
|
|
t1reg = _allocTempXMMreg(XMMT_FPS, -1);
|
|
if( t1reg == EEREC_TEMP && _hasFreeXMMreg() ) {
|
|
int t = _allocTempXMMreg(XMMT_FPS, -1);
|
|
_freeXMMreg(t1reg);
|
|
t1reg = t;
|
|
_freeXMMreg(t1reg);
|
|
}
|
|
else {
|
|
_freeXMMreg(t1reg);
|
|
t1reg = -1;
|
|
}
|
|
}
|
|
|
|
return t1reg;
|
|
}
|
|
|
|
__declspec(align(16)) u32 g_minvals[4] = {0xff7fffff, 0xff7fffff, 0xff7fffff, 0xff7fffff};
|
|
__declspec(align(16)) u32 g_maxvals[4] = {0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff};
|
|
|
|
static __declspec(align(16)) int const_clip[] = {
|
|
0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff,
|
|
0x80000000, 0x80000000, 0x80000000, 0x80000000 };
|
|
|
|
static __declspec(align(16)) u32 s_FloatMinMax[] = {
|
|
0x007fffff, 0x007fffff, 0x007fffff, 0x007fffff,
|
|
0x7f7fffff, 0x7f7fffff, 0x7f7fffff, 0x7f7fffff,
|
|
0, 0, 0, 0 };
|
|
|
|
static __declspec(align(16)) float s_fones[] = { 1.0f, 1.0f, 1.0f, 1.0f, -1.0f, -1.0f, -1.0f, -1.0f };
|
|
static __declspec(align(16)) u32 s_mask[] = {0x7fffff, 0x7fffff, 0x7fffff, 0x7fffff };
|
|
static __declspec(align(16)) u32 s_expmask[] = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000};
|
|
|
|
void CheckForOverflowSS_(int fdreg, int t0reg)
|
|
{
|
|
assert( t0reg != fdreg );
|
|
SSE_XORPS_XMM_to_XMM(t0reg, t0reg);
|
|
SSE_CMPORDSS_XMM_to_XMM(t0reg, fdreg);
|
|
SSE_ANDPS_XMM_to_XMM(fdreg, t0reg);
|
|
|
|
// SSE_MOVSS_M32_to_XMM(t0reg, (u32)s_expmask);
|
|
// SSE_ANDPS_XMM_to_XMM(t0reg, fdreg);
|
|
// SSE_CMPNESS_M32_to_XMM(t0reg, (u32)s_expmask);
|
|
// SSE_ANDPS_XMM_to_XMM(fdreg, t0reg);
|
|
}
|
|
|
|
void CheckForOverflow_(int fdreg, int t0reg)
|
|
{
|
|
// SSE_MAXPS_M128_to_XMM(fdreg, (u32)g_minvals);
|
|
// SSE_MINPS_M128_to_XMM(fdreg, (u32)g_maxvals);
|
|
|
|
SSE_XORPS_XMM_to_XMM(t0reg, t0reg);
|
|
SSE_CMPORDPS_XMM_to_XMM(t0reg, fdreg);
|
|
SSE_ANDPS_XMM_to_XMM(fdreg, t0reg);
|
|
|
|
// SSE_MOVAPS_M128_to_XMM(t0reg, (u32)s_expmask);
|
|
// SSE_ANDPS_XMM_to_XMM(t0reg, fdreg);
|
|
// SSE_CMPNEPS_M128_to_XMM(t0reg, (u32)s_expmask);
|
|
// //SSE_ORPS_M128_to_XMM(t0reg, (u32)g_minvals);
|
|
// SSE_ANDPS_XMM_to_XMM(fdreg, t0reg);
|
|
}
|
|
|
|
void CheckForOverflow(int info, int regd)
|
|
{
|
|
if( CHECK_FORCEABS && EEREC_TEMP != regd) {
|
|
// changing the order produces different results (tektag)
|
|
CheckForOverflow_(regd, EEREC_TEMP);
|
|
}
|
|
}
|
|
|
|
// if unordered replaces with 0x7f7fffff (note, loses sign)
|
|
void ClampUnordered(int regd, int t0reg, int dosign)
|
|
{
|
|
SSE_XORPS_XMM_to_XMM(t0reg, t0reg);
|
|
SSE_CMPORDPS_XMM_to_XMM(t0reg, regd);
|
|
SSE_ANDPS_XMM_to_XMM(regd, t0reg);
|
|
SSE_ANDNPS_M128_to_XMM(t0reg, (u32)g_maxvals);
|
|
SSE_ORPS_XMM_to_XMM(regd, t0reg);
|
|
}
|
|
|
|
// VU Flags
|
|
// NOTE: flags don't compute under/over flows since it is highly unlikely
|
|
// that games used them. Including them will lower performance.
|
|
void recUpdateFlags(VURegs * VU, int reg, int info)
|
|
{
|
|
u32 flagmask;
|
|
u8* pjmp;
|
|
u32 macaddr, stataddr, prevstataddr;
|
|
int x86macflag, x86newflag, x86oldflag;
|
|
const static u8 macarr[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
|
|
if( !(info & PROCESS_VU_UPDATEFLAGS) )
|
|
return;
|
|
|
|
flagmask = macarr[_X_Y_Z_W];
|
|
macaddr = VU_VI_ADDR(REG_MAC_FLAG, 0);
|
|
stataddr = VU_VI_ADDR(REG_STATUS_FLAG, 0);
|
|
assert( stataddr != 0);
|
|
prevstataddr = VU_VI_ADDR(REG_STATUS_FLAG, 2);
|
|
|
|
// 20 insts
|
|
x86newflag = ALLOCTEMPX86(MODE_8BITREG);
|
|
x86macflag = ALLOCTEMPX86(0);
|
|
x86oldflag = ALLOCTEMPX86(0);
|
|
|
|
// can do with 8 bits since only computing zero/sign flags
|
|
if( EEREC_TEMP != reg ) {
|
|
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
SSE_CMPEQPS_XMM_to_XMM(EEREC_TEMP, reg);
|
|
|
|
MOV32MtoR(x86oldflag, prevstataddr);
|
|
|
|
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // zero
|
|
|
|
XOR32RtoR(EAX, EAX);
|
|
|
|
SSE_ANDNPS_XMM_to_XMM(EEREC_TEMP, reg); // necessary!
|
|
|
|
AND32ItoR(x86newflag, 0x0f&flagmask);
|
|
pjmp = JZ8(0);
|
|
OR32ItoR(EAX, 1);
|
|
x86SetJ8(pjmp);
|
|
|
|
SSE_MOVMSKPS_XMM_to_R32(x86macflag, EEREC_TEMP); // sign
|
|
|
|
SHL32ItoR(x86newflag, 4);
|
|
AND32ItoR(x86macflag, 0x0f&flagmask);
|
|
pjmp = JZ8(0);
|
|
OR32ItoR(EAX, 2);
|
|
x86SetJ8(pjmp);
|
|
|
|
OR32RtoR(x86macflag, x86newflag);
|
|
}
|
|
else {
|
|
SSE_MOVMSKPS_XMM_to_R32(x86macflag, reg); // mask is < 0 (including 80000000)
|
|
|
|
MOV32MtoR(x86oldflag, prevstataddr);
|
|
XOR32RtoR(EAX, EAX);
|
|
|
|
SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (u32)&s_FloatMinMax[8]);
|
|
|
|
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // zero
|
|
|
|
NOT32R(x86newflag);
|
|
AND32RtoR(x86macflag, x86newflag);
|
|
|
|
AND32ItoR(x86macflag, 0xf&flagmask);
|
|
pjmp = JZ8(0);
|
|
OR32ItoR(EAX, 2);
|
|
x86SetJ8(pjmp);
|
|
|
|
NOT32R(x86newflag);
|
|
|
|
AND32ItoR(x86newflag, 0xf&flagmask);
|
|
pjmp = JZ8(0);
|
|
OR32ItoR(EAX, 1);
|
|
x86SetJ8(pjmp);
|
|
|
|
SHL32ItoR(x86newflag, 4);
|
|
OR32RtoR(x86macflag, x86newflag);
|
|
}
|
|
|
|
// x86macflag - new untransformed mac flag, EAX - new status bits, x86oldflag - old status flag
|
|
// x86macflag = zero_wzyx | sign_wzyx
|
|
MOV8RmtoROffset(x86newflag, x86macflag, (u32)g_MACFlagTransform); // transform
|
|
//MOV16RmSOffsettoR(x86newflag, x86macflag, (u32)g_MACFlagTransform, 1);
|
|
MOV32RtoR(x86macflag, x86oldflag);
|
|
SHL32ItoR(x86macflag, 6);
|
|
MOV8RtoM(macaddr, x86newflag);
|
|
OR32RtoR(x86oldflag, x86macflag);
|
|
|
|
AND32ItoR(x86oldflag, 0x0c0);
|
|
OR32RtoR(x86oldflag, EAX);
|
|
MOV32RtoM(stataddr, x86oldflag);
|
|
|
|
_freeX86reg(x86macflag);
|
|
_freeX86reg(x86newflag);
|
|
_freeX86reg(x86oldflag);
|
|
}
|
|
|
|
/******************************/
|
|
/* VU Upper instructions */
|
|
/******************************/
|
|
|
|
static __declspec(align(16)) int const_abs_table[16][4] =
|
|
{
|
|
{ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff },
|
|
{ 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff },
|
|
{ 0xffffffff, 0xffffffff, 0x7fffffff, 0xffffffff },
|
|
{ 0xffffffff, 0xffffffff, 0x7fffffff, 0x7fffffff },
|
|
{ 0xffffffff, 0x7fffffff, 0xffffffff, 0xffffffff },
|
|
{ 0xffffffff, 0x7fffffff, 0xffffffff, 0x7fffffff },
|
|
{ 0xffffffff, 0x7fffffff, 0x7fffffff, 0xffffffff },
|
|
{ 0xffffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff },
|
|
{ 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff },
|
|
{ 0x7fffffff, 0xffffffff, 0xffffffff, 0x7fffffff },
|
|
{ 0x7fffffff, 0xffffffff, 0x7fffffff, 0xffffffff },
|
|
{ 0x7fffffff, 0xffffffff, 0x7fffffff, 0x7fffffff },
|
|
{ 0x7fffffff, 0x7fffffff, 0xffffffff, 0xffffffff },
|
|
{ 0x7fffffff, 0x7fffffff, 0xffffffff, 0x7fffffff },
|
|
{ 0x7fffffff, 0x7fffffff, 0x7fffffff, 0xffffffff },
|
|
{ 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff },
|
|
};
|
|
|
|
void recVUMI_ABS(VURegs *VU, int info)
|
|
{
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (int)&const_abs_table[ _X_Y_Z_W ][ 0 ] );
|
|
|
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
|
} else {
|
|
if( EEREC_T != EEREC_S ) SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S);
|
|
SSE_ANDPS_M128_to_XMM(EEREC_T, (int)&const_abs_table[ _X_Y_Z_W ][ 0 ] );
|
|
}
|
|
}
|
|
|
|
__declspec(align(16)) float s_two[4] = {0,0,0,2};
|
|
|
|
void recVUMI_ADD(VURegs *VU, int info)
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
|
|
if( _Fs_ == 0 && _Ft_ == 0 ) {
|
|
if( _X_Y_Z_W != 0xf ) {
|
|
SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (u32)s_two);
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_MOVAPS_M128_to_XMM(EEREC_D, (u32)s_two);
|
|
}
|
|
}
|
|
else {
|
|
if( _X_Y_Z_W == 8 ) {
|
|
if (EEREC_D == EEREC_S) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
else if (EEREC_D == EEREC_T) SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
else {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
}
|
|
}
|
|
else if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if (EEREC_D == EEREC_S) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
else if (EEREC_D == EEREC_T) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
else {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
}
|
|
}
|
|
}
|
|
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
}
|
|
|
|
void recVUMI_ADD_iq(VURegs *VU, int addr, int info)
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
|
|
if( _XYZW_SS ) {
|
|
if( EEREC_D == EEREC_TEMP ) {
|
|
_vuFlipRegSS(VU, EEREC_S);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_ADDSS_M32_to_XMM(EEREC_D, addr);
|
|
_vuFlipRegSS(VU, EEREC_S);
|
|
}
|
|
else if( EEREC_D == EEREC_S ) {
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
SSE_ADDSS_M32_to_XMM(EEREC_D, addr);
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
}
|
|
else {
|
|
if( _X ) {
|
|
if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_ADDSS_M32_to_XMM(EEREC_D, addr);
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if( _X_Y_Z_W != 0xf || EEREC_D == EEREC_S || EEREC_D == EEREC_TEMP) {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00);
|
|
}
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
} else {
|
|
if( EEREC_D == EEREC_TEMP ) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
else if( EEREC_D == EEREC_S ) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_D, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_D, 0x00);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
}
|
|
}
|
|
}
|
|
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
|
|
if( addr == VU_REGQ_ADDR ) CheckForOverflow(info, EEREC_D);
|
|
}
|
|
|
|
void recVUMI_ADD_xyzw(VURegs *VU, int xyzw, int info)
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
|
|
if( _Ft_ == 0 && xyzw < 3 ) {
|
|
// just move
|
|
if( _X_Y_Z_W != 0xf ) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( EEREC_D != EEREC_S ) SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
}
|
|
}
|
|
else if( _X_Y_Z_W == 8 ) {
|
|
if( EEREC_D == EEREC_TEMP ) {
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
}
|
|
else {
|
|
if( xyzw == 0 ) {
|
|
if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
}
|
|
else {
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
}
|
|
}
|
|
}
|
|
else if( _Fs_ == 0 && !_W ) {
|
|
// just move
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( _X_Y_Z_W != 0xf || EEREC_D == EEREC_S || EEREC_D == EEREC_TEMP)
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
} else {
|
|
if( EEREC_D == EEREC_TEMP ) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
else if( EEREC_D == EEREC_S ) SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
else {
|
|
_unpackVF_xyzw(EEREC_D, EEREC_T, xyzw);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
}
|
|
}
|
|
}
|
|
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
}
|
|
|
|
void recVUMI_ADDi(VURegs *VU, int info) { recVUMI_ADD_iq(VU, VU_VI_ADDR(REG_I, 1), info); }
|
|
void recVUMI_ADDq(VURegs *VU, int info) { recVUMI_ADD_iq(VU, VU_REGQ_ADDR, info); }
|
|
void recVUMI_ADDx(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 0, info); }
|
|
void recVUMI_ADDy(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 1, info); }
|
|
void recVUMI_ADDz(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 2, info); }
|
|
void recVUMI_ADDw(VURegs *VU, int info) { recVUMI_ADD_xyzw(VU, 3, info); }
|
|
|
|
void recVUMI_ADDA(VURegs *VU, int info)
|
|
{
|
|
if( _X_Y_Z_W == 8 ) {
|
|
if (EEREC_ACC == EEREC_S) SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_T);
|
|
else if (EEREC_ACC == EEREC_T) SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
else {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_T);
|
|
}
|
|
}
|
|
else if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
|
|
VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( EEREC_ACC == EEREC_S ) SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_T);
|
|
else if( EEREC_ACC == EEREC_T ) SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
else {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_T);
|
|
}
|
|
}
|
|
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_ADDA_iq(VURegs *VU, int addr, int info)
|
|
{
|
|
if( _XYZW_SS ) {
|
|
assert( EEREC_ACC != EEREC_TEMP );
|
|
if( EEREC_ACC == EEREC_S ) {
|
|
_vuFlipRegSS(VU, EEREC_ACC);
|
|
SSE_ADDSS_M32_to_XMM(EEREC_ACC, addr);
|
|
_vuFlipRegSS(VU, EEREC_ACC);
|
|
}
|
|
else {
|
|
if( _X ) {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
SSE_ADDSS_M32_to_XMM(EEREC_ACC, addr);
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if( _X_Y_Z_W != 0xf || EEREC_ACC == EEREC_S ) {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00);
|
|
}
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( EEREC_ACC == EEREC_S ) SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP);
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_ACC, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_ACC, EEREC_ACC, 0x00);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
}
|
|
}
|
|
}
|
|
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_ADDA_xyzw(VURegs *VU, int xyzw, int info)
|
|
{
|
|
if( _X_Y_Z_W == 8 ) {
|
|
if( xyzw == 0 ) {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_T);
|
|
}
|
|
else {
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
if( _Fs_ == 0 ) {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
SSE_ADDSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if( _X_Y_Z_W != 0xf || EEREC_ACC == EEREC_S )
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
|
|
VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP);
|
|
} else {
|
|
if( EEREC_ACC == EEREC_S ) SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP);
|
|
else {
|
|
_unpackVF_xyzw(EEREC_ACC, EEREC_T, xyzw);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
}
|
|
}
|
|
}
|
|
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_ADDAi(VURegs *VU, int info) { recVUMI_ADDA_iq(VU, VU_VI_ADDR(REG_I, 1), info); }
|
|
void recVUMI_ADDAq(VURegs *VU, int info) { recVUMI_ADDA_iq(VU, VU_REGQ_ADDR, info); }
|
|
void recVUMI_ADDAx(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 0, info); }
|
|
void recVUMI_ADDAy(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 1, info); }
|
|
void recVUMI_ADDAz(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 2, info); }
|
|
void recVUMI_ADDAw(VURegs *VU, int info) { recVUMI_ADDA_xyzw(VU, 3, info); }
|
|
|
|
void recVUMI_SUB(VURegs *VU, int info)
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
|
|
if( EEREC_S == EEREC_T ) {
|
|
if (_X_Y_Z_W != 0xf) SSE_ANDPS_M128_to_XMM(EEREC_D, (u32)&SSEmovMask[15-_X_Y_Z_W][0]);
|
|
else SSE_XORPS_XMM_to_XMM(EEREC_D, EEREC_D);
|
|
}
|
|
else if( _X_Y_Z_W == 8 ) {
|
|
if (EEREC_D == EEREC_S) SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
else if (EEREC_D == EEREC_T) {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_SUBSS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
}
|
|
}
|
|
else {
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
if( _Ft_ > 0 || _W ) SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if (EEREC_D == EEREC_S) SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
else if (EEREC_D == EEREC_T) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
}
|
|
}
|
|
}
|
|
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
// neopets works better with this?
|
|
//CheckForOverflow(info, EEREC_D);
|
|
}
|
|
|
|
void recVUMI_SUB_iq(VURegs *VU, int addr, int info)
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
|
|
if( _XYZW_SS ) {
|
|
if( EEREC_D == EEREC_TEMP ) {
|
|
_vuFlipRegSS(VU, EEREC_S);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_SUBSS_M32_to_XMM(EEREC_D, addr);
|
|
_vuFlipRegSS(VU, EEREC_S);
|
|
}
|
|
else if( EEREC_D == EEREC_S ) {
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
SSE_SUBSS_M32_to_XMM(EEREC_D, addr);
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
}
|
|
else {
|
|
if( _X ) {
|
|
if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_SUBSS_M32_to_XMM(EEREC_D, addr);
|
|
}
|
|
else {
|
|
_vuMoveSS(VU, EEREC_TEMP, EEREC_S);
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
SSE_SUBSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00);
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
int t1reg = _vuGetTempXMMreg(info);
|
|
|
|
if( t1reg >= 0 ) {
|
|
SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP);
|
|
|
|
VU_MERGE_REGS(EEREC_D, t1reg);
|
|
_freeXMMreg(t1reg);
|
|
}
|
|
else {
|
|
// negate
|
|
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (u32)&const_clip[4]);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
}
|
|
}
|
|
else {
|
|
if( EEREC_D == EEREC_TEMP ) {
|
|
SSE_XORPS_M128_to_XMM(EEREC_D, (u32)&const_clip[4]);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
}
|
|
else {
|
|
if (EEREC_D != EEREC_S) SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
}
|
|
}
|
|
}
|
|
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
|
|
if( addr == VU_REGQ_ADDR ) CheckForOverflow(info, EEREC_D);
|
|
}
|
|
|
|
static __declspec(align(16)) s_unaryminus[4] = {0x80000000, 0, 0, 0};
|
|
|
|
void recVUMI_SUB_xyzw(VURegs *VU, int xyzw, int info)
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
|
|
if( _X_Y_Z_W == 8 ) {
|
|
if( EEREC_D == EEREC_TEMP ) {
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
}
|
|
else {
|
|
if( xyzw == 0 ) {
|
|
if( EEREC_D == EEREC_T ) {
|
|
if( _Fs_ > 0 ) SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_XORPS_M128_to_XMM(EEREC_D, (u32)s_unaryminus);
|
|
}
|
|
else {
|
|
if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
}
|
|
}
|
|
else {
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
}
|
|
}
|
|
}
|
|
// else if( _XYZW_SS && xyzw == 0 ) {
|
|
// if( EEREC_D == EEREC_S ) {
|
|
// if( EEREC_D == EEREC_T ) {
|
|
// SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
// _vuFlipRegSS(VU, EEREC_D);
|
|
// SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
// _vuFlipRegSS(VU, EEREC_D);
|
|
// }
|
|
// else {
|
|
// _vuFlipRegSS(VU, EEREC_D);
|
|
// SSE_SUBSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
// _vuFlipRegSS(VU, EEREC_D);
|
|
// }
|
|
// }
|
|
// else if( EEREC_D == EEREC_T ) {
|
|
// _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Y?1:(_Z?2:3));
|
|
// SSE_SUBSS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
// _vuFlipRegSS(VU, EEREC_D);
|
|
// SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
// _vuFlipRegSS(VU, EEREC_D);
|
|
// }
|
|
// else {
|
|
// _unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Y?1:(_Z?2:3));
|
|
// _vuFlipRegSS(VU, EEREC_D);
|
|
// SSE_SUBSS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
// SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
// _vuFlipRegSS(VU, EEREC_D);
|
|
// }
|
|
// }
|
|
else {
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
int t1reg = _vuGetTempXMMreg(info);
|
|
|
|
if( t1reg >= 0 ) {
|
|
SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP);
|
|
|
|
VU_MERGE_REGS(EEREC_D, t1reg);
|
|
_freeXMMreg(t1reg);
|
|
}
|
|
else {
|
|
// negate
|
|
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (u32)&const_clip[4]);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
}
|
|
}
|
|
else {
|
|
if( EEREC_D == EEREC_TEMP ) {
|
|
SSE_XORPS_M128_to_XMM(EEREC_D, (u32)&const_clip[4]);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
}
|
|
else {
|
|
if( EEREC_D != EEREC_S ) SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
}
|
|
}
|
|
}
|
|
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
}
|
|
|
|
void recVUMI_SUBi(VURegs *VU, int info) { recVUMI_SUB_iq(VU, VU_VI_ADDR(REG_I, 1), info); }
|
|
void recVUMI_SUBq(VURegs *VU, int info) { recVUMI_SUB_iq(VU, VU_REGQ_ADDR, info); }
|
|
void recVUMI_SUBx(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 0, info); }
|
|
void recVUMI_SUBy(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 1, info); }
|
|
void recVUMI_SUBz(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 2, info); }
|
|
void recVUMI_SUBw(VURegs *VU, int info) { recVUMI_SUB_xyzw(VU, 3, info); }
|
|
|
|
void recVUMI_SUBA(VURegs *VU, int info)
|
|
{
|
|
if( EEREC_S == EEREC_T ) {
|
|
if (_X_Y_Z_W != 0xf) SSE_ANDPS_M128_to_XMM(EEREC_ACC, (u32)&SSEmovMask[15-_X_Y_Z_W][0]);
|
|
else SSE_XORPS_XMM_to_XMM(EEREC_ACC, EEREC_ACC);
|
|
}
|
|
else if( _X_Y_Z_W == 8 ) {
|
|
if (EEREC_ACC == EEREC_S) SSE_SUBSS_XMM_to_XMM(EEREC_ACC, EEREC_T);
|
|
else if (EEREC_ACC == EEREC_T) {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_SUBSS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
SSE_SUBSS_XMM_to_XMM(EEREC_ACC, EEREC_T);
|
|
}
|
|
}
|
|
else if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
|
|
VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( EEREC_ACC == EEREC_S ) SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_T);
|
|
else if( EEREC_ACC == EEREC_T ) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_T);
|
|
}
|
|
}
|
|
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_SUBA_iq(VURegs *VU, int addr, int info)
|
|
{
|
|
if( _XYZW_SS ) {
|
|
if( EEREC_ACC == EEREC_S ) {
|
|
_vuFlipRegSS(VU, EEREC_ACC);
|
|
SSE_SUBSS_M32_to_XMM(EEREC_ACC, addr);
|
|
_vuFlipRegSS(VU, EEREC_ACC);
|
|
}
|
|
else {
|
|
if( _X ) {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
SSE_SUBSS_M32_to_XMM(EEREC_ACC, addr);
|
|
}
|
|
else {
|
|
_vuMoveSS(VU, EEREC_TEMP, EEREC_S);
|
|
_vuFlipRegSS(VU, EEREC_ACC);
|
|
SSE_SUBSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP);
|
|
_vuFlipRegSS(VU, EEREC_ACC);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00);
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
int t1reg = _vuGetTempXMMreg(info);
|
|
|
|
if( t1reg >= 0 ) {
|
|
SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP);
|
|
|
|
VU_MERGE_REGS(EEREC_ACC, t1reg);
|
|
_freeXMMreg(t1reg);
|
|
}
|
|
else {
|
|
// negate
|
|
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (u32)&const_clip[4]);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP);
|
|
}
|
|
}
|
|
else {
|
|
if( EEREC_ACC != EEREC_S ) SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP);
|
|
}
|
|
}
|
|
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_SUBA_xyzw(VURegs *VU, int xyzw, int info)
|
|
{
|
|
if( _X_Y_Z_W == 8 ) {
|
|
if( xyzw == 0 ) {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
SSE_SUBSS_XMM_to_XMM(EEREC_ACC, EEREC_T);
|
|
}
|
|
else {
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
SSE_SUBSS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP);
|
|
}
|
|
}
|
|
else {
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
int t1reg = _vuGetTempXMMreg(info);
|
|
|
|
if( t1reg >= 0 ) {
|
|
SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP);
|
|
|
|
VU_MERGE_REGS(EEREC_ACC, t1reg);
|
|
_freeXMMreg(t1reg);
|
|
}
|
|
else {
|
|
// negate
|
|
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (u32)&const_clip[4]);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_ACC, EEREC_TEMP);
|
|
}
|
|
}
|
|
else {
|
|
if( EEREC_ACC != EEREC_S ) SSE_MOVAPS_XMM_to_XMM(EEREC_ACC, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(EEREC_ACC, EEREC_TEMP);
|
|
}
|
|
}
|
|
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_SUBAi(VURegs *VU, int info) { recVUMI_SUBA_iq(VU, VU_VI_ADDR(REG_I, 1), info); }
|
|
void recVUMI_SUBAq(VURegs *VU, int info) { recVUMI_SUBA_iq(VU, VU_REGQ_ADDR, info); }
|
|
void recVUMI_SUBAx(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 0, info); }
|
|
void recVUMI_SUBAy(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 1, info); }
|
|
void recVUMI_SUBAz(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 2, info); }
|
|
void recVUMI_SUBAw(VURegs *VU, int info) { recVUMI_SUBA_xyzw(VU, 3, info); }
|
|
|
|
void recVUMI_MUL_toD(VURegs *VU, int regd, int info)
|
|
{
|
|
if (_X_Y_Z_W == 1 && (_Ft_ == 0 || _Fs_==0) ) { // W
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, _Ft_ ? EEREC_T : EEREC_S);
|
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
|
}
|
|
else if( _Fd_ == _Fs_ && _Fs_ == _Ft_ && _XYZW_SS ) {
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
SSE_MULSS_XMM_to_XMM(EEREC_D, EEREC_D);
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
}
|
|
else if( _X_Y_Z_W == 8 ) {
|
|
if (regd == EEREC_S) SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
|
else if (regd == EEREC_T) SSE_MULSS_XMM_to_XMM(regd, EEREC_S);
|
|
else {
|
|
SSE_MOVSS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
|
}
|
|
}
|
|
else if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
|
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if (regd == EEREC_S) SSE_MULPS_XMM_to_XMM(regd, EEREC_T);
|
|
else if (regd == EEREC_T) SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
|
else {
|
|
SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_T);
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MUL_iq_toD(VURegs *VU, int addr, int regd, int info)
|
|
{
|
|
if( _XYZW_SS ) {
|
|
if( regd == EEREC_TEMP ) {
|
|
_vuFlipRegSS(VU, EEREC_S);
|
|
SSE_MOVSS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_MULSS_M32_to_XMM(regd, addr);
|
|
_vuFlipRegSS(VU, EEREC_S);
|
|
}
|
|
else if( regd == EEREC_S ) {
|
|
_vuFlipRegSS(VU, regd);
|
|
SSE_MULSS_M32_to_XMM(regd, addr);
|
|
_vuFlipRegSS(VU, regd);
|
|
}
|
|
else {
|
|
if( _X ) {
|
|
SSE_MOVSS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_MULSS_M32_to_XMM(regd, addr);
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00);
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if( _X_Y_Z_W != 0xf || regd == EEREC_TEMP || regd == EEREC_S ) {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00);
|
|
}
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( regd == EEREC_TEMP ) SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
|
else if (regd == EEREC_S) SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(regd, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x00);
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MUL_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
|
|
{
|
|
if( _Ft_ == 0 ) {
|
|
if( xyzw < 3 ) {
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_XORPS_XMM_to_XMM(regd, regd);
|
|
}
|
|
}
|
|
else {
|
|
assert(xyzw==3);
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
|
}
|
|
else if( regd != EEREC_S ) SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S);
|
|
}
|
|
}
|
|
else if( _X_Y_Z_W == 8 ) {
|
|
if( regd == EEREC_TEMP ) {
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
SSE_MULSS_XMM_to_XMM(regd, EEREC_S);
|
|
}
|
|
else {
|
|
if( xyzw == 0 ) {
|
|
SSE_MOVSS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
|
}
|
|
else {
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
SSE_MOVSS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_MULSS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
if( _X_Y_Z_W != 0xf || regd == EEREC_TEMP || regd == EEREC_S )
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( regd == EEREC_TEMP ) SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
|
else if (regd == EEREC_S) SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
else {
|
|
_unpackVF_xyzw(regd, EEREC_T, xyzw);
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MUL(VURegs *VU, int info)
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
recVUMI_MUL_toD(VU, EEREC_D, info);
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
}
|
|
|
|
void recVUMI_MUL_iq(VURegs *VU, int addr, int info)
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
recVUMI_MUL_iq_toD(VU, addr, EEREC_D, info);
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
if( addr == VU_REGQ_ADDR ) CheckForOverflow(info, EEREC_D);
|
|
}
|
|
|
|
void recVUMI_MUL_xyzw(VURegs *VU, int xyzw, int info)
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
recVUMI_MUL_xyzw_toD(VU, xyzw, EEREC_D, info);
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
}
|
|
|
|
void recVUMI_MULi(VURegs *VU, int info) { recVUMI_MUL_iq(VU, VU_VI_ADDR(REG_I, 1), info); }
|
|
void recVUMI_MULq(VURegs *VU, int info) { recVUMI_MUL_iq(VU, VU_REGQ_ADDR, info); }
|
|
void recVUMI_MULx(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 0, info); }
|
|
void recVUMI_MULy(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 1, info); }
|
|
void recVUMI_MULz(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 2, info); }
|
|
void recVUMI_MULw(VURegs *VU, int info) { recVUMI_MUL_xyzw(VU, 3, info); }
|
|
|
|
void recVUMI_MULA( VURegs *VU, int info )
|
|
{
|
|
recVUMI_MUL_toD(VU, EEREC_ACC, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MULA_iq(VURegs *VU, int addr, int info)
|
|
{
|
|
recVUMI_MUL_iq_toD(VU, addr, EEREC_ACC, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MULA_xyzw(VURegs *VU, int xyzw, int info)
|
|
{
|
|
recVUMI_MUL_xyzw_toD(VU, xyzw, EEREC_ACC, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MULAi(VURegs *VU, int info) { recVUMI_MULA_iq(VU, VU_VI_ADDR(REG_I, 1), info); }
|
|
void recVUMI_MULAq(VURegs *VU, int info) { recVUMI_MULA_iq(VU, VU_REGQ_ADDR, info); }
|
|
void recVUMI_MULAx(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 0, info); }
|
|
void recVUMI_MULAy(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 1, info); }
|
|
void recVUMI_MULAz(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 2, info); }
|
|
void recVUMI_MULAw(VURegs *VU, int info) { recVUMI_MULA_xyzw(VU, 3, info); }
|
|
|
|
void recVUMI_MADD_toD(VURegs *VU, int regd, int info)
|
|
{
|
|
if( _X_Y_Z_W == 8 ) {
|
|
if( regd == EEREC_ACC ) {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
}
|
|
else if (regd == EEREC_T) {
|
|
SSE_MULSS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
else if (regd == EEREC_S) {
|
|
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
|
SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
else {
|
|
SSE_MOVSS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_MULSS_XMM_to_XMM(regd, EEREC_T);
|
|
SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
}
|
|
else if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
|
|
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( regd == EEREC_ACC ) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
}
|
|
else if (regd == EEREC_T) {
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
else if (regd == EEREC_S) {
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_T);
|
|
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
else {
|
|
SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_T);
|
|
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MADD_iq_toD(VURegs *VU, int addr, int regd, int info)
|
|
{
|
|
if( _X_Y_Z_W == 8 ) {
|
|
if( regd == EEREC_ACC ) {
|
|
if( _Fs_ == 0 ) {
|
|
// add addr to w
|
|
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
|
|
SSE_ADDSS_M32_to_XMM(regd, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
|
|
}
|
|
else {
|
|
assert( EEREC_TEMP < XMMREGS );
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
}
|
|
}
|
|
else if( regd == EEREC_S ) {
|
|
SSE_MULSS_M32_to_XMM(regd, addr);
|
|
SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
else {
|
|
SSE_MOVSS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_MULSS_M32_to_XMM(regd, addr);
|
|
SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
}
|
|
else {
|
|
if( _Fs_ == 0 ) {
|
|
// add addr to w
|
|
if( _W ) {
|
|
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
|
|
SSE_ADDSS_M32_to_XMM(regd, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x27);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
if( _X_Y_Z_W != 0xf || regd == EEREC_ACC || regd == EEREC_TEMP || regd == EEREC_S ) {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00);
|
|
}
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
|
|
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( regd == EEREC_ACC ) {
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
}
|
|
else if( regd == EEREC_S ) {
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
else if( regd == EEREC_TEMP ) {
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(regd, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(regd, regd, 0x00);
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MADD_xyzw_toD(VURegs *VU, int xyzw, int regd, int info)
|
|
{
|
|
if( _Ft_ == 0 ) {
|
|
|
|
if( xyzw == 3 ) {
|
|
// just add
|
|
if( _X_Y_Z_W == 8 ) {
|
|
if( regd == EEREC_S ) SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
|
|
else {
|
|
if( regd != EEREC_ACC ) SSE_MOVSS_XMM_to_XMM(regd, EEREC_ACC);
|
|
SSE_ADDSS_XMM_to_XMM(regd, EEREC_S);
|
|
}
|
|
}
|
|
else {
|
|
if( _X_Y_Z_W != 0xf ) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
|
|
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( regd == EEREC_S ) SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
else {
|
|
if( regd != EEREC_ACC ) SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
SSE_ADDPS_XMM_to_XMM(regd, EEREC_S);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
// just move acc to regd
|
|
if( _X_Y_Z_W != 0xf ) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( regd != EEREC_ACC ) SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
if( _X_Y_Z_W == 8 ) {
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
|
|
if( regd == EEREC_ACC ) {
|
|
SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
}
|
|
else if( regd == EEREC_S ) {
|
|
SSE_MULSS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
else if( regd == EEREC_TEMP ) {
|
|
SSE_MULSS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_ADDSS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
else {
|
|
SSE_MOVSS_XMM_to_XMM(regd, EEREC_ACC);
|
|
SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_ADDSS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
}
|
|
}
|
|
else {
|
|
if( _X_Y_Z_W != 0xf || regd == EEREC_ACC || regd == EEREC_TEMP || regd == EEREC_S )
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
|
|
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( regd == EEREC_ACC ) {
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
}
|
|
else if( regd == EEREC_S ) {
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
else if( regd == EEREC_TEMP ) {
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
else {
|
|
_unpackVF_xyzw(regd, EEREC_T, xyzw);
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MADD(VURegs *VU, int info)
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
recVUMI_MADD_toD(VU, EEREC_D, info);
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
}
|
|
|
|
void recVUMI_MADD_iq(VURegs *VU, int addr, int info)
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
recVUMI_MADD_iq_toD(VU, addr, EEREC_D, info);
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
|
|
if( addr == VU_REGQ_ADDR ) CheckForOverflow(info, EEREC_D);
|
|
}
|
|
|
|
void recVUMI_MADD_xyzw(VURegs *VU, int xyzw, int info)
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
recVUMI_MADD_xyzw_toD(VU, xyzw, EEREC_D, info);
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
|
|
// fixes suikoden 5 chars
|
|
CheckForOverflow(info, EEREC_D);
|
|
}
|
|
|
|
void recVUMI_MADDi(VURegs *VU, int info) { recVUMI_MADD_iq(VU, VU_VI_ADDR(REG_I, 1), info); }
|
|
void recVUMI_MADDq(VURegs *VU, int info) { recVUMI_MADD_iq(VU, VU_REGQ_ADDR, info); }
|
|
void recVUMI_MADDx(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 0, info); }
|
|
void recVUMI_MADDy(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 1, info); }
|
|
void recVUMI_MADDz(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 2, info); }
|
|
void recVUMI_MADDw(VURegs *VU, int info) { recVUMI_MADD_xyzw(VU, 3, info); }
|
|
|
|
void recVUMI_MADDA( VURegs *VU, int info )
|
|
{
|
|
recVUMI_MADD_toD(VU, EEREC_ACC, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MADDAi( VURegs *VU , int info)
|
|
{
|
|
recVUMI_MADD_iq_toD( VU, VU_VI_ADDR(REG_I, 1), EEREC_ACC, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MADDAq( VURegs *VU , int info)
|
|
{
|
|
recVUMI_MADD_iq_toD( VU, VU_REGQ_ADDR, EEREC_ACC, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MADDAx( VURegs *VU , int info)
|
|
{
|
|
recVUMI_MADD_xyzw_toD(VU, 0, EEREC_ACC, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MADDAy( VURegs *VU , int info)
|
|
{
|
|
recVUMI_MADD_xyzw_toD(VU, 1, EEREC_ACC, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MADDAz( VURegs *VU , int info)
|
|
{
|
|
recVUMI_MADD_xyzw_toD(VU, 2, EEREC_ACC, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MADDAw( VURegs *VU , int info)
|
|
{
|
|
recVUMI_MADD_xyzw_toD(VU, 3, EEREC_ACC, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MSUB_toD(VURegs *VU, int regd, int info)
|
|
{
|
|
if (_X_Y_Z_W != 0xf) {
|
|
int t1reg = _vuGetTempXMMreg(info);
|
|
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
|
|
if( t1reg >= 0 ) {
|
|
SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_ACC);
|
|
SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP);
|
|
|
|
VU_MERGE_REGS(regd, t1reg);
|
|
_freeXMMreg(t1reg);
|
|
}
|
|
else {
|
|
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (u32)&const_clip[4]);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
|
}
|
|
}
|
|
else {
|
|
if( regd == EEREC_S ) {
|
|
assert( regd != EEREC_ACC );
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_T);
|
|
SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
SSE_XORPS_M128_to_XMM(regd, (u32)&const_clip[4]);
|
|
}
|
|
else if( regd == EEREC_T ) {
|
|
assert( regd != EEREC_ACC );
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
SSE_XORPS_M128_to_XMM(regd, (u32)&const_clip[4]);
|
|
}
|
|
else if( regd == EEREC_TEMP ) {
|
|
SSE_MOVAPS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_T);
|
|
SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
SSE_XORPS_M128_to_XMM(regd, (u32)&const_clip[4]);
|
|
}
|
|
else {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
if( regd != EEREC_ACC ) SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MSUB_temp_toD(VURegs *VU, int regd, int info)
|
|
{
|
|
if (_X_Y_Z_W != 0xf) {
|
|
int t1reg = _vuGetTempXMMreg(info);
|
|
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
|
|
if( t1reg >= 0 ) {
|
|
SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_ACC);
|
|
SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP);
|
|
|
|
VU_MERGE_REGS(regd, t1reg);
|
|
_freeXMMreg(t1reg);
|
|
}
|
|
else {
|
|
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (u32)&const_clip[4]);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
|
VU_MERGE_REGS(regd, EEREC_TEMP);
|
|
}
|
|
}
|
|
else {
|
|
if( regd == EEREC_ACC ) {
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
}
|
|
else if( regd == EEREC_S ) {
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
SSE_XORPS_M128_to_XMM(regd, (u32)&const_clip[4]);
|
|
}
|
|
else if( regd == EEREC_TEMP ) {
|
|
SSE_MULPS_XMM_to_XMM(regd, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
SSE_XORPS_M128_to_XMM(regd, (u32)&const_clip[4]);
|
|
}
|
|
else {
|
|
if( regd != EEREC_ACC ) SSE_MOVAPS_XMM_to_XMM(regd, EEREC_ACC);
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_SUBPS_XMM_to_XMM(regd, EEREC_TEMP);
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MSUB_iq_toD(VURegs *VU, int regd, int addr, int info)
|
|
{
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00);
|
|
recVUMI_MSUB_temp_toD(VU, regd, info);
|
|
}
|
|
|
|
void recVUMI_MSUB_xyzw_toD(VURegs *VU, int regd, int xyzw, int info)
|
|
{
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
recVUMI_MSUB_temp_toD(VU, regd, info);
|
|
}
|
|
|
|
void recVUMI_MSUB(VURegs *VU, int info)
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
recVUMI_MSUB_toD(VU, EEREC_D, info);
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
}
|
|
|
|
void recVUMI_MSUB_iq(VURegs *VU, int addr, int info)
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
recVUMI_MSUB_iq_toD(VU, EEREC_D, addr, info);
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
|
|
if( addr == VU_REGQ_ADDR ) CheckForOverflow(info, EEREC_D);
|
|
}
|
|
|
|
void recVUMI_MSUBi(VURegs *VU, int info) { recVUMI_MSUB_iq(VU, VU_VI_ADDR(REG_I, 1), info); }
|
|
void recVUMI_MSUBq(VURegs *VU, int info) { recVUMI_MSUB_iq(VU, VU_REGQ_ADDR, info); }
|
|
void recVUMI_MSUBx(VURegs *VU, int info)
|
|
{
|
|
recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 0, info);
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
}
|
|
|
|
void recVUMI_MSUBy(VURegs *VU, int info)
|
|
{
|
|
recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 1, info);
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
}
|
|
|
|
void recVUMI_MSUBz(VURegs *VU, int info)
|
|
{
|
|
recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 2, info);
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
}
|
|
|
|
void recVUMI_MSUBw(VURegs *VU, int info)
|
|
{
|
|
recVUMI_MSUB_xyzw_toD(VU, EEREC_D, 3, info);
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
}
|
|
|
|
void recVUMI_MSUBA( VURegs *VU, int info )
|
|
{
|
|
recVUMI_MSUB_toD(VU, EEREC_ACC, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MSUBAi( VURegs *VU, int info )
|
|
{
|
|
recVUMI_MSUB_iq_toD( VU, EEREC_ACC, VU_VI_ADDR(REG_I, 1), info );
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MSUBAq( VURegs *VU, int info )
|
|
{
|
|
recVUMI_MSUB_iq_toD( VU, EEREC_ACC, VU_REGQ_ADDR, info );
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MSUBAx( VURegs *VU, int info )
|
|
{
|
|
recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 0, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MSUBAy( VURegs *VU, int info )
|
|
{
|
|
recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 1, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MSUBAz( VURegs *VU, int info )
|
|
{
|
|
recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 2, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MSUBAw( VURegs *VU, int info )
|
|
{
|
|
recVUMI_MSUB_xyzw_toD(VU, EEREC_ACC, 3, info);
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_MAX(VURegs *VU, int info)
|
|
{
|
|
if ( _Fd_ == 0 ) return;
|
|
|
|
if( _X_Y_Z_W == 8 ) {
|
|
if (EEREC_D == EEREC_S) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
else if (EEREC_D == EEREC_T) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
else {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
}
|
|
}
|
|
else if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_MAXPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( EEREC_D == EEREC_S ) SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
else if( EEREC_D == EEREC_T ) SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
else {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MAX_iq(VURegs *VU, int addr, int info)
|
|
{
|
|
if ( _Fd_ == 0 ) return;
|
|
|
|
if( _XYZW_SS ) {
|
|
if( EEREC_D == EEREC_TEMP ) {
|
|
_vuFlipRegSS(VU, EEREC_S);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_MAXSS_M32_to_XMM(EEREC_D, addr);
|
|
_vuFlipRegSS(VU, EEREC_S);
|
|
}
|
|
else if( EEREC_D == EEREC_S ) {
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
SSE_MAXSS_M32_to_XMM(EEREC_D, addr);
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
}
|
|
else {
|
|
if( _X ) {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_MAXSS_M32_to_XMM(EEREC_D, addr);
|
|
}
|
|
else {
|
|
_vuMoveSS(VU, EEREC_TEMP, EEREC_S);
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
SSE_MAXSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
}
|
|
}
|
|
}
|
|
else if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00);
|
|
SSE_MAXPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if(EEREC_D == EEREC_S) {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00);
|
|
SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_D, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_D, 0x00);
|
|
SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MAX_xyzw(VURegs *VU, int xyzw, int info)
|
|
{
|
|
if ( _Fd_ == 0 ) return;
|
|
|
|
if( _X_Y_Z_W == 8 ) {
|
|
if( _Fs_ == 0 && _Ft_ == 0 ) {
|
|
if( xyzw < 3 ) {
|
|
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (u32)s_fones);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
}
|
|
}
|
|
else if( EEREC_D == EEREC_TEMP ) {
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
}
|
|
else {
|
|
if( xyzw == 0 ) {
|
|
if( EEREC_D == EEREC_S ) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
else if( EEREC_D == EEREC_T ) SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
else {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
}
|
|
}
|
|
else {
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_MAXSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
}
|
|
}
|
|
}
|
|
else if (_X_Y_Z_W != 0xf) {
|
|
if( _Fs_ == 0 && _Ft_ == 0 ) {
|
|
if( xyzw < 3 ) SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
else SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (u32)s_fones);
|
|
}
|
|
else {
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
SSE_MAXPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
}
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( _Fs_ == 0 && _Ft_ == 0 ) {
|
|
if( xyzw < 3 ) SSE_XORPS_XMM_to_XMM(EEREC_D, EEREC_D);
|
|
else SSE_MOVAPS_M128_to_XMM(EEREC_D, (u32)s_fones);
|
|
}
|
|
else {
|
|
if (EEREC_D == EEREC_S) {
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
_unpackVF_xyzw(EEREC_D, EEREC_T, xyzw);
|
|
SSE_MAXPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MAXi(VURegs *VU, int info) { recVUMI_MAX_iq(VU, VU_VI_ADDR(REG_I, 1), info); }
|
|
void recVUMI_MAXx(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 0, info); }
|
|
void recVUMI_MAXy(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 1, info); }
|
|
void recVUMI_MAXz(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 2, info); }
|
|
void recVUMI_MAXw(VURegs *VU, int info) { recVUMI_MAX_xyzw(VU, 3, info); }
|
|
|
|
void recVUMI_MINI(VURegs *VU, int info)
|
|
{
|
|
if ( _Fd_ == 0 ) return;
|
|
|
|
if( _X_Y_Z_W == 8 ) {
|
|
if (EEREC_D == EEREC_S) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
else if (EEREC_D == EEREC_T) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
else {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
}
|
|
}
|
|
else if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_MINPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( EEREC_D == EEREC_S ) {
|
|
// need for GT4 vu0rec
|
|
ClampUnordered(EEREC_T, EEREC_TEMP, 0);
|
|
SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
}
|
|
else if( EEREC_D == EEREC_T ) {
|
|
// need for GT4 vu0rec
|
|
ClampUnordered(EEREC_S, EEREC_TEMP, 0);
|
|
SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
}
|
|
else {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MINI_iq(VURegs *VU, int addr, int info)
|
|
{
|
|
if ( _Fd_ == 0 ) return;
|
|
|
|
if( _XYZW_SS ) {
|
|
if( EEREC_D == EEREC_TEMP ) {
|
|
_vuFlipRegSS(VU, EEREC_S);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_MINSS_M32_to_XMM(EEREC_D, addr);
|
|
_vuFlipRegSS(VU, EEREC_S);
|
|
}
|
|
else if( EEREC_D == EEREC_S ) {
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
SSE_MINSS_M32_to_XMM(EEREC_D, addr);
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
}
|
|
else {
|
|
if( _X ) {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_MINSS_M32_to_XMM(EEREC_D, addr);
|
|
}
|
|
else {
|
|
_vuMoveSS(VU, EEREC_TEMP, EEREC_S);
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
SSE_MINSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
_vuFlipRegSS(VU, EEREC_D);
|
|
}
|
|
}
|
|
}
|
|
else if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00);
|
|
SSE_MINPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if(EEREC_D == EEREC_S) {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x00);
|
|
SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_D, addr);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_D, EEREC_D, 0x00);
|
|
SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MINI_xyzw(VURegs *VU, int xyzw, int info)
|
|
{
|
|
if ( _Fd_ == 0 ) return;
|
|
|
|
if( _X_Y_Z_W == 8 ) {
|
|
if( EEREC_D == EEREC_TEMP ) {
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
}
|
|
else {
|
|
if( xyzw == 0 ) {
|
|
if( EEREC_D == EEREC_S ) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
else if( EEREC_D == EEREC_T ) SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
else {
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_T);
|
|
}
|
|
}
|
|
else {
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
if( EEREC_D != EEREC_S ) SSE_MOVSS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
SSE_MINSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
}
|
|
}
|
|
}
|
|
else if (_X_Y_Z_W != 0xf) {
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
SSE_MINPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if (EEREC_D == EEREC_S) {
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, xyzw);
|
|
SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
}
|
|
else {
|
|
_unpackVF_xyzw(EEREC_D, EEREC_T, xyzw);
|
|
SSE_MINPS_XMM_to_XMM(EEREC_D, EEREC_S);
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MINIi(VURegs *VU, int info) { recVUMI_MINI_iq(VU, VU_VI_ADDR(REG_I, 1), info); }
|
|
void recVUMI_MINIx(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 0, info); }
|
|
void recVUMI_MINIy(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 1, info); }
|
|
void recVUMI_MINIz(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 2, info); }
|
|
void recVUMI_MINIw(VURegs *VU, int info) { recVUMI_MINI_xyzw(VU, 3, info); }
|
|
|
|
void recVUMI_OPMULA( VURegs *VU, int info )
|
|
{
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xD2); // EEREC_T = WYXZ
|
|
SSE_SHUFPS_XMM_to_XMM( EEREC_TEMP, EEREC_TEMP, 0xC9 ); // EEREC_TEMP = WXZY
|
|
SSE_MULPS_XMM_to_XMM( EEREC_TEMP, EEREC_T );
|
|
|
|
VU_MERGE_REGS_CUSTOM(EEREC_ACC, EEREC_TEMP, 14);
|
|
|
|
// revert EEREC_T
|
|
if( EEREC_T != EEREC_ACC )
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xC9);
|
|
|
|
recUpdateFlags(VU, EEREC_ACC, info);
|
|
}
|
|
|
|
void recVUMI_OPMSUB( VURegs *VU, int info )
|
|
{
|
|
if( !_Fd_ ) info |= PROCESS_EE_SET_D(EEREC_TEMP);
|
|
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_SHUFPS_XMM_to_XMM( EEREC_T, EEREC_T, 0xD2 ); // EEREC_T = WYXZ
|
|
SSE_SHUFPS_XMM_to_XMM( EEREC_TEMP, EEREC_TEMP, 0xC9 ); // EEREC_TEMP = WXZY
|
|
SSE_MULPS_XMM_to_XMM( EEREC_TEMP, EEREC_T);
|
|
|
|
// negate and add
|
|
SSE_XORPS_M128_to_XMM(EEREC_TEMP, (u32)&const_clip[4]);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_ACC);
|
|
VU_MERGE_REGS_CUSTOM(EEREC_D, EEREC_TEMP, 14);
|
|
|
|
// revert EEREC_T
|
|
if( EEREC_T != EEREC_D )
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xC9);
|
|
|
|
recUpdateFlags(VU, EEREC_D, info);
|
|
}
|
|
|
|
void recVUMI_NOP( VURegs *VU, int info )
|
|
{
|
|
}
|
|
|
|
void recVUMI_FTOI0(VURegs *VU, int info)
|
|
{
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
else SSE2EMU_CVTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_S);
|
|
else SSE2EMU_CVTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_S);
|
|
}
|
|
}
|
|
|
|
void recVUMI_FTOIX(VURegs *VU, int addr, int info)
|
|
{
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr);
|
|
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
else SSE2EMU_CVTPS2DQ_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
|
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if (EEREC_T != EEREC_S) SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S);
|
|
SSE_MULPS_M128_to_XMM(EEREC_T, addr);
|
|
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T);
|
|
else SSE2EMU_CVTPS2DQ_XMM_to_XMM(EEREC_T, EEREC_T);
|
|
}
|
|
}
|
|
|
|
void recVUMI_FTOI4( VURegs *VU, int info ) { recVUMI_FTOIX(VU, (int)&recMult_float_to_int4[0], info); }
|
|
void recVUMI_FTOI12( VURegs *VU, int info ) { recVUMI_FTOIX(VU, (int)&recMult_float_to_int12[0], info); }
|
|
void recVUMI_FTOI15( VURegs *VU, int info ) { recVUMI_FTOIX(VU, (int)&recMult_float_to_int15[0], info); }
|
|
|
|
void recVUMI_ITOF0( VURegs *VU, int info )
|
|
{
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
else {
|
|
_deleteVFtoXMMreg(_Fs_, VU==&VU1, 1);
|
|
SSE2EMU_CVTDQ2PS_M128_to_XMM(EEREC_TEMP, VU_VFx_ADDR( _Fs_ ));
|
|
}
|
|
|
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( cpucaps.hasStreamingSIMD2Extensions ) SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S);
|
|
else {
|
|
_deleteVFtoXMMreg(_Fs_, VU==&VU1, 1);
|
|
SSE2EMU_CVTDQ2PS_M128_to_XMM(EEREC_T, VU_VFx_ADDR( _Fs_ ));
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_ITOFX(VURegs *VU, int addr, int info)
|
|
{
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
else {
|
|
_deleteVFtoXMMreg(_Fs_, VU==&VU1, 1);
|
|
SSE2EMU_CVTDQ2PS_M128_to_XMM(EEREC_TEMP, VU_VFx_ADDR( _Fs_ ));
|
|
}
|
|
|
|
SSE_MULPS_M128_to_XMM(EEREC_TEMP, addr);
|
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
|
} else {
|
|
if(cpucaps.hasStreamingSIMD2Extensions) SSE2_CVTDQ2PS_XMM_to_XMM(EEREC_T, EEREC_S);
|
|
else {
|
|
_deleteVFtoXMMreg(_Fs_, VU==&VU1, 1);
|
|
SSE2EMU_CVTDQ2PS_M128_to_XMM(EEREC_T, VU_VFx_ADDR( _Fs_ ));
|
|
}
|
|
|
|
SSE_MULPS_M128_to_XMM(EEREC_T, addr);
|
|
}
|
|
}
|
|
|
|
void recVUMI_ITOF4( VURegs *VU, int info ) { recVUMI_ITOFX(VU, (int)&recMult_int_to_float4[0], info); }
|
|
void recVUMI_ITOF12( VURegs *VU, int info ) { recVUMI_ITOFX(VU, (int)&recMult_int_to_float12[0], info); }
|
|
void recVUMI_ITOF15( VURegs *VU, int info ) { recVUMI_ITOFX(VU, (int)&recMult_int_to_float15[0], info); }
|
|
|
|
void recVUMI_CLIP(VURegs *VU, int info)
|
|
{
|
|
int t1reg = EEREC_D;
|
|
int t2reg = EEREC_ACC;
|
|
int x86temp0, x86temp1;
|
|
|
|
u32 clipaddr = VU_VI_ADDR(REG_CLIP_FLAG, 0);
|
|
u32 prevclipaddr = VU_VI_ADDR(REG_CLIP_FLAG, 2);
|
|
|
|
assert( clipaddr != 0 );
|
|
assert( t1reg != t2reg && t1reg != EEREC_TEMP && t2reg != EEREC_TEMP );
|
|
|
|
x86temp1 = ALLOCTEMPX86(MODE_8BITREG);
|
|
x86temp0 = ALLOCTEMPX86(0);
|
|
|
|
if( _Ft_ == 0 ) {
|
|
// all 1s
|
|
SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (u32)&s_fones[0]);
|
|
SSE_MOVAPS_M128_to_XMM(t1reg, (u32)&s_fones[4]);
|
|
|
|
MOV32MtoR(EAX, prevclipaddr);
|
|
}
|
|
else {
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_T, 3);
|
|
SSE_XORPS_XMM_to_XMM(t1reg, t1reg);
|
|
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (int)const_clip);
|
|
|
|
MOV32MtoR(EAX, prevclipaddr);
|
|
|
|
SSE_SUBPS_XMM_to_XMM(t1reg, EEREC_TEMP);
|
|
}
|
|
|
|
SSE_CMPLTPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_CMPNLEPS_XMM_to_XMM(t1reg, EEREC_S);
|
|
|
|
SHL32ItoR(EAX, 6);
|
|
|
|
SSE_MOVAPS_XMM_to_XMM(t2reg, EEREC_TEMP);
|
|
SSE_UNPCKLPS_XMM_to_XMM(EEREC_TEMP, t1reg);
|
|
SSE_UNPCKHPS_XMM_to_XMM(t2reg, t1reg);
|
|
SSE_MOVMSKPS_XMM_to_R32(x86temp0, EEREC_TEMP); // -y,+y,-x,+x
|
|
SSE_MOVMSKPS_XMM_to_R32(x86temp1, t2reg); // -w,+w,-z,+z
|
|
|
|
AND32ItoR(EAX, 0xffffff);
|
|
|
|
AND8ItoR(x86temp1, 0x3);
|
|
SHL32ItoR(x86temp1, 4);
|
|
OR32RtoR(EAX, x86temp0);
|
|
OR32RtoR(EAX, x86temp1);
|
|
|
|
MOV32RtoM(clipaddr, EAX);
|
|
if( !(info&(PROCESS_VU_SUPER|PROCESS_VU_COP2)) ) MOV32RtoM((u32)&VU->VI[REG_CLIP_FLAG], EAX);
|
|
|
|
_freeXMMreg(t1reg);
|
|
_freeXMMreg(t2reg);
|
|
|
|
_freeX86reg(x86temp0);
|
|
_freeX86reg(x86temp1);
|
|
}
|
|
|
|
/******************************/
|
|
/* VU Lower instructions */
|
|
/******************************/
|
|
|
|
void recVUMI_DIV(VURegs *VU, int info)
|
|
{
|
|
if( _Fs_ == 0 ) {
|
|
|
|
if( _Ft_ == 0 ) {
|
|
if( _Fsf_ < 3 ) MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0);
|
|
else if( _Ftf_ < 0 ) MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0x7f7fffff);
|
|
else MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0x3f800000);
|
|
return;
|
|
}
|
|
|
|
if( _Fsf_ == 3 ) { // = 1
|
|
// don't use RCPSS (very bad precision)
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (u32)&VU->VF[0].UL[3]);
|
|
|
|
if( _Ftf_ == 0 || (xmmregs[EEREC_T].mode & MODE_WRITE) ) {
|
|
if( _Ftf_ ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, (0xe4e4>>(2*_Ftf_))&0xff);
|
|
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
if( _Ftf_ ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, (0xe4e4>>(8-2*_Ftf_))&0xff);
|
|
}
|
|
else {
|
|
SSE_DIVSS_M32_to_XMM(EEREC_TEMP, (u32)&VU->VF[_Ft_].UL[_Ftf_]);
|
|
}
|
|
}
|
|
else { // = 0
|
|
MOV32ItoR(VU_VI_ADDR(REG_Q, 0), 0);
|
|
return;
|
|
}
|
|
}
|
|
else {
|
|
if( _Fsf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
else _unpackVF_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
|
|
|
if( _Ftf_ ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, (0xe4e4>>(2*_Ftf_))&0xff);
|
|
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
|
|
// revert
|
|
if( _Ftf_ ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, (0xe4e4>>(8-2*_Ftf_))&0xff);
|
|
}
|
|
|
|
//if( !CHECK_FORCEABS ) {
|
|
SSE_MINSS_M32_to_XMM(EEREC_TEMP, (u32)&g_maxvals[0]);
|
|
SSE_MAXSS_M32_to_XMM(EEREC_TEMP, (u32)&g_minvals[0]);
|
|
//}
|
|
|
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
|
}
|
|
|
|
void recVUMI_SQRT( VURegs *VU, int info )
|
|
{
|
|
if( _Ftf_ ) {
|
|
if( xmmregs[EEREC_T].mode & MODE_WRITE ) {
|
|
SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip);
|
|
SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_TEMP, _Ftf_);
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (u32)&VU->VF[_Ft_].UL[_Ftf_]);
|
|
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip);
|
|
}
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (u32)const_clip);
|
|
SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
}
|
|
|
|
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
|
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
|
}
|
|
|
|
void recVUMI_RSQRT(VURegs *VU, int info)
|
|
{
|
|
if( _Ftf_ ) {
|
|
SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, (u32)const_clip);
|
|
SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_TEMP, _Ftf_);
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (u32)const_clip);
|
|
SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
}
|
|
|
|
if( _Fs_ == 0 ) {
|
|
if( _Fsf_ == 3 ) SSE_RSQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
else SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_RSQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
if( _Fsf_ ) SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, (0xe4e4>>(2*_Fsf_))&0xff);
|
|
SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
if( _Fsf_ ) SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, (0xe4e4>>(8-2*_Fsf_))&0xff);
|
|
}
|
|
|
|
//if( !CHECK_FORCEABS ) {
|
|
SSE_MAXSS_M32_to_XMM(EEREC_TEMP, (u32)&g_minvals[0]);
|
|
SSE_MINSS_M32_to_XMM(EEREC_TEMP, (u32)&g_maxvals[0]);
|
|
//}
|
|
|
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
|
}
|
|
|
|
void _addISIMMtoIT(VURegs *VU, s16 imm, int info)
|
|
{
|
|
int fsreg = -1, ftreg;
|
|
if (_Ft_ == 0) return;
|
|
|
|
if( _Fs_ == 0 ) {
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE);
|
|
MOV32ItoR(ftreg, imm&0xffff);
|
|
return;
|
|
}
|
|
|
|
ADD_VI_NEEDED(_Ft_);
|
|
fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE);
|
|
|
|
if (ftreg == fsreg) {
|
|
if (imm != 0 ) {
|
|
ADD16ItoR(ftreg, imm);
|
|
}
|
|
} else {
|
|
if( imm ) LEA16RtoR(ftreg, fsreg, imm);
|
|
else MOV32RtoR(ftreg, fsreg);
|
|
}
|
|
}
|
|
|
|
void recVUMI_IADDI(VURegs *VU, int info)
|
|
{
|
|
s16 imm;
|
|
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
imm = ( VU->code >> 6 ) & 0x1f;
|
|
imm = ( imm & 0x10 ? 0xfff0 : 0) | ( imm & 0xf );
|
|
_addISIMMtoIT(VU, imm, info);
|
|
}
|
|
|
|
void recVUMI_IADDIU(VURegs *VU, int info)
|
|
{
|
|
int imm;
|
|
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
imm = ( ( VU->code >> 10 ) & 0x7800 ) | ( VU->code & 0x7ff );
|
|
_addISIMMtoIT(VU, imm, info);
|
|
}
|
|
|
|
void recVUMI_IADD( VURegs *VU, int info )
|
|
{
|
|
int fdreg, fsreg = -1, ftreg = -1;
|
|
if ( _Fd_ == 0 ) return;
|
|
|
|
if ( ( _Ft_ == 0 ) && ( _Fs_ == 0 ) ) {
|
|
fdreg = ALLOCVI(_Fd_, MODE_WRITE);
|
|
XOR32RtoR(fdreg, fdreg);
|
|
return;
|
|
}
|
|
|
|
ADD_VI_NEEDED(_Fs_);
|
|
ADD_VI_NEEDED(_Ft_);
|
|
fdreg = ALLOCVI(_Fd_, MODE_WRITE);
|
|
|
|
if ( _Fs_ == 0 )
|
|
{
|
|
if( (ftreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Ft_, MODE_READ)) >= 0 ) {
|
|
if( fdreg != ftreg ) MOV32RtoR(fdreg, ftreg);
|
|
}
|
|
else {
|
|
MOVZX32M16toR(fdreg, VU_VI_ADDR(_Ft_, 1));
|
|
}
|
|
}
|
|
else if ( _Ft_ == 0 )
|
|
{
|
|
if( (fsreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Fs_, MODE_READ)) >= 0 ) {
|
|
if( fdreg != fsreg ) MOV32RtoR(fdreg, fsreg);
|
|
}
|
|
else {
|
|
MOVZX32M16toR(fdreg, VU_VI_ADDR(_Fs_, 1));
|
|
}
|
|
}
|
|
else {
|
|
ADD_VI_NEEDED(_Ft_);
|
|
fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
ftreg = ALLOCVI(_Ft_, MODE_READ);
|
|
|
|
if( fdreg == fsreg ) ADD32RtoR(fdreg, ftreg);
|
|
else if( fdreg == ftreg ) ADD32RtoR(fdreg, fsreg);
|
|
else LEA16RRtoR(fdreg, fsreg, ftreg);
|
|
MOVZX32R16toR(fdreg, fdreg); // neeed since don't know if fdreg's upper bits are 0
|
|
}
|
|
}
|
|
|
|
void recVUMI_IAND( VURegs *VU, int info )
|
|
{
|
|
int fdreg, fsreg = -1, ftreg = -1;
|
|
if ( _Fd_ == 0 ) return;
|
|
|
|
if ( ( _Fs_ == 0 ) || ( _Ft_ == 0 ) ) {
|
|
fdreg = ALLOCVI(_Fd_, MODE_WRITE);
|
|
XOR32RtoR(fdreg, fdreg);
|
|
return;
|
|
}
|
|
|
|
ADD_VI_NEEDED(_Fs_);
|
|
ADD_VI_NEEDED(_Ft_);
|
|
fdreg = ALLOCVI(_Fd_, MODE_WRITE);
|
|
|
|
fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
ftreg = ALLOCVI(_Ft_, MODE_READ);
|
|
|
|
if( fdreg == fsreg ) AND16RtoR(fdreg, ftreg);
|
|
else if( fdreg == ftreg ) AND16RtoR(fdreg, fsreg);
|
|
else {
|
|
MOV32RtoR(fdreg, ftreg);
|
|
AND32RtoR(fdreg, fsreg);
|
|
}
|
|
}
|
|
|
|
void recVUMI_IOR( VURegs *VU, int info )
|
|
{
|
|
int fdreg, fsreg = -1, ftreg = -1;
|
|
if ( _Fd_ == 0 ) return;
|
|
|
|
if ( ( _Ft_ == 0 ) && ( _Fs_ == 0 ) ) {
|
|
fdreg = ALLOCVI(_Fd_, MODE_WRITE);
|
|
XOR32RtoR(fdreg, fdreg);
|
|
return;
|
|
}
|
|
|
|
ADD_VI_NEEDED(_Fs_);
|
|
ADD_VI_NEEDED(_Ft_);
|
|
fdreg = ALLOCVI(_Fd_, MODE_WRITE);
|
|
|
|
if ( _Fs_ == 0 )
|
|
{
|
|
if( (ftreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Ft_, MODE_READ)) >= 0 ) {
|
|
if( fdreg != ftreg ) MOV32RtoR(fdreg, ftreg);
|
|
}
|
|
else {
|
|
MOVZX32M16toR(fdreg, VU_VI_ADDR(_Ft_, 1));
|
|
}
|
|
}
|
|
else if ( _Ft_ == 0 )
|
|
{
|
|
if( (fsreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Fs_, MODE_READ)) >= 0 ) {
|
|
if( fdreg != fsreg ) MOV32RtoR(fdreg, fsreg);
|
|
}
|
|
else {
|
|
MOVZX32M16toR(fdreg, VU_VI_ADDR(_Fs_, 1));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
ftreg = ALLOCVI(_Ft_, MODE_READ);
|
|
|
|
if( fdreg == fsreg ) OR16RtoR(fdreg, ftreg);
|
|
else if( fdreg == ftreg ) OR16RtoR(fdreg, fsreg);
|
|
else {
|
|
MOV32RtoR(fdreg, fsreg);
|
|
OR32RtoR(fdreg, ftreg);
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_ISUB( VURegs *VU, int info )
|
|
{
|
|
int fdreg, fsreg = -1, ftreg = -1;
|
|
if ( _Fd_ == 0 ) return;
|
|
|
|
if ( ( _Ft_ == 0 ) && ( _Fs_ == 0 ) ) {
|
|
fdreg = ALLOCVI(_Fd_, MODE_WRITE);
|
|
XOR32RtoR(fdreg, fdreg);
|
|
return;
|
|
}
|
|
|
|
ADD_VI_NEEDED(_Fs_);
|
|
ADD_VI_NEEDED(_Ft_);
|
|
fdreg = ALLOCVI(_Fd_, MODE_WRITE);
|
|
|
|
if ( _Fs_ == 0 )
|
|
{
|
|
if( (ftreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Ft_, MODE_READ)) >= 0 ) {
|
|
if( fdreg != ftreg ) MOV32RtoR(fdreg, ftreg);
|
|
}
|
|
else {
|
|
MOVZX32M16toR(fdreg, VU_VI_ADDR(_Ft_, 1));
|
|
}
|
|
NEG16R(fdreg);
|
|
}
|
|
else if ( _Ft_ == 0 )
|
|
{
|
|
if( (fsreg = _checkX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Fs_, MODE_READ)) >= 0 ) {
|
|
if( fdreg != fsreg ) MOV32RtoR(fdreg, fsreg);
|
|
}
|
|
else {
|
|
MOVZX32M16toR(fdreg, VU_VI_ADDR(_Fs_, 1));
|
|
}
|
|
}
|
|
else
|
|
{
|
|
fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
ftreg = ALLOCVI(_Ft_, MODE_READ);
|
|
|
|
if( fdreg == fsreg ) SUB16RtoR(fdreg, ftreg);
|
|
else if( fdreg == ftreg ) {
|
|
SUB16RtoR(fdreg, fsreg);
|
|
NEG16R(fdreg);
|
|
}
|
|
else {
|
|
MOV32RtoR(fdreg, fsreg);
|
|
SUB16RtoR(fdreg, ftreg);
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_ISUBIU( VURegs *VU, int info )
|
|
{
|
|
s16 imm;
|
|
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
imm = ( ( VU->code >> 10 ) & 0x7800 ) | ( VU->code & 0x7ff );
|
|
imm = -imm;
|
|
_addISIMMtoIT(VU, (u32)imm & 0xffff, info);
|
|
}
|
|
|
|
void recVUMI_MOVE( VURegs *VU, int info )
|
|
{
|
|
if (_Ft_ == 0) return;
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( EEREC_T != EEREC_S ) SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S);
|
|
}
|
|
}
|
|
|
|
void recVUMI_MFIR( VURegs *VU, int info )
|
|
{
|
|
static u32 s_temp;
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
_deleteX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Fs_, 1);
|
|
|
|
if( cpucaps.hasStreamingSIMD2Extensions ) {
|
|
if( _XYZW_SS ) {
|
|
SSE2_MOVD_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(_Fs_, 1)-2);
|
|
_vuFlipRegSS(VU, EEREC_T);
|
|
SSE2_PSRAD_I8_to_XMM(EEREC_TEMP, 16);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP);
|
|
_vuFlipRegSS(VU, EEREC_T);
|
|
}
|
|
else if (_X_Y_Z_W != 0xf) {
|
|
SSE2_MOVD_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(_Fs_, 1)-2);
|
|
SSE2_PSRAD_I8_to_XMM(EEREC_TEMP, 16);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0);
|
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
|
} else {
|
|
SSE2_MOVD_M32_to_XMM(EEREC_T, VU_VI_ADDR(_Fs_, 1)-2);
|
|
SSE2_PSRAD_I8_to_XMM(EEREC_T, 16);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0);
|
|
}
|
|
}
|
|
else {
|
|
MOVSX32M16toR(EAX, VU_VI_ADDR(_Fs_, 1));
|
|
MOV32RtoM((u32)&s_temp, EAX);
|
|
|
|
if( _X_Y_Z_W != 0xf ) {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (u32)&s_temp);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0);
|
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_T, (u32)&s_temp);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0);
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_MTIR( VURegs *VU, int info )
|
|
{
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
_deleteX86reg(X86TYPE_VI|((VU==&VU1)?X86TYPE_VU1:0), _Ft_, 2);
|
|
|
|
if( _Fsf_ == 0 ) {
|
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(_Ft_, 0), EEREC_S);
|
|
}
|
|
else {
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(_Ft_, 0), EEREC_TEMP);
|
|
}
|
|
|
|
AND32ItoM(VU_VI_ADDR(_Ft_, 0), 0xffff);
|
|
}
|
|
|
|
void recVUMI_MR32( VURegs *VU, int info )
|
|
{
|
|
if (_Ft_ == 0) return;
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x39);
|
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( EEREC_T != EEREC_S ) SSE_MOVAPS_XMM_to_XMM(EEREC_T, EEREC_S);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0x39);
|
|
}
|
|
}
|
|
|
|
// if x86reg < 0, reads directly from offset
|
|
void _loadEAX(VURegs *VU, int x86reg, u32 offset, int info)
|
|
{
|
|
if( x86reg >= 0 ) {
|
|
switch(_X_Y_Z_W) {
|
|
case 3: // ZW
|
|
SSE_MOVHPS_RmOffset_to_XMM(EEREC_T, x86reg, offset+8);
|
|
break;
|
|
case 6: // YZ
|
|
SSE_SHUFPS_RmOffset_to_XMM(EEREC_T, x86reg, offset, 0x9c);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0x78);
|
|
break;
|
|
|
|
case 8: // X
|
|
SSE_MOVSS_RmOffset_to_XMM(EEREC_TEMP, x86reg, offset);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP);
|
|
break;
|
|
case 9: // XW
|
|
SSE_SHUFPS_RmOffset_to_XMM(EEREC_T, x86reg, offset, 0xc9);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xd2);
|
|
break;
|
|
case 12: // XY
|
|
SSE_MOVLPS_RmOffset_to_XMM(EEREC_T, x86reg, offset);
|
|
break;
|
|
case 15:
|
|
if( VU == &VU1 ) SSE_MOVAPSRmtoROffset(EEREC_T, x86reg, offset);
|
|
else SSE_MOVUPSRmtoROffset(EEREC_T, x86reg, offset);
|
|
break;
|
|
default:
|
|
if( VU == &VU1 ) SSE_MOVAPSRmtoROffset(EEREC_TEMP, x86reg, offset);
|
|
else SSE_MOVUPSRmtoROffset(EEREC_TEMP, x86reg, offset);
|
|
|
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
|
break;
|
|
}
|
|
}
|
|
else {
|
|
switch(_X_Y_Z_W) {
|
|
case 3: // ZW
|
|
SSE_MOVHPS_M64_to_XMM(EEREC_T, offset+8);
|
|
break;
|
|
case 6: // YZ
|
|
SSE_SHUFPS_M128_to_XMM(EEREC_T, offset, 0x9c);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0x78);
|
|
break;
|
|
case 8: // X
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, offset);
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP);
|
|
break;
|
|
case 9: // XW
|
|
SSE_SHUFPS_M128_to_XMM(EEREC_T, offset, 0xc9);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0xd2);
|
|
break;
|
|
case 12: // XY
|
|
SSE_MOVLPS_M64_to_XMM(EEREC_T, offset);
|
|
break;
|
|
case 15:
|
|
if( VU == &VU1 ) SSE_MOVAPS_M128_to_XMM(EEREC_T, offset);
|
|
else SSE_MOVUPS_M128_to_XMM(EEREC_T, offset);
|
|
break;
|
|
default:
|
|
if( VU == &VU1 ) SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, offset);
|
|
else SSE_MOVUPS_M128_to_XMM(EEREC_TEMP, offset);
|
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
int recVUTransformAddr(int x86reg, VURegs* VU, int vireg, int imm)
|
|
{
|
|
u8* pjmp[2];
|
|
if( x86reg == EAX ) {
|
|
if (imm) ADD32ItoR(x86reg, imm);
|
|
}
|
|
else {
|
|
if( imm ) LEA32RtoR(EAX, x86reg, imm);
|
|
else MOV32RtoR(EAX, x86reg);
|
|
}
|
|
|
|
if( VU == &VU1 ) {
|
|
SHL32ItoR(EAX, 4);
|
|
AND32ItoR(EAX, 0x3fff);
|
|
}
|
|
else {
|
|
// if addr >= 4200, reads integers
|
|
CMP32ItoR(EAX, 0x420);
|
|
pjmp[0] = JL8(0);
|
|
AND32ItoR(EAX, 0x1f);
|
|
SHL32ItoR(EAX, 2);
|
|
OR32ItoR(EAX, 0x4200);
|
|
|
|
pjmp[1] = JMP8(0);
|
|
x86SetJ8(pjmp[0]);
|
|
SHL32ItoR(EAX, 4);
|
|
AND32ItoR(EAX, 0xfff); // can be removed
|
|
|
|
x86SetJ8(pjmp[1]);
|
|
}
|
|
|
|
return EAX;
|
|
}
|
|
|
|
void recVUMI_LQ(VURegs *VU, int info)
|
|
{
|
|
s16 imm;
|
|
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
imm = (VU->code & 0x400) ? (VU->code & 0x3ff) | 0xfc00 : (VU->code & 0x3ff);
|
|
if (_Fs_ == 0) {
|
|
_loadEAX(VU, -1, (u32)GET_VU_MEM(VU, (u32)imm*16), info);
|
|
} else {
|
|
int fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
_loadEAX(VU, recVUTransformAddr(fsreg, VU, _Fs_, imm), (u32)VU->Mem, info);
|
|
}
|
|
}
|
|
|
|
void recVUMI_LQD( VURegs *VU, int info )
|
|
{
|
|
int fsreg;
|
|
|
|
if ( _Fs_ != 0 ) {
|
|
fsreg = ALLOCVI(_Fs_, MODE_READ|MODE_WRITE);
|
|
SUB16ItoR( fsreg, 1 );
|
|
}
|
|
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
if ( _Fs_ == 0 ) {
|
|
_loadEAX(VU, -1, (u32)VU->Mem, info);
|
|
} else {
|
|
_loadEAX(VU, recVUTransformAddr(fsreg, VU, _Fs_, 0), (u32)VU->Mem, info);
|
|
}
|
|
}
|
|
|
|
void recVUMI_LQI(VURegs *VU, int info)
|
|
{
|
|
int fsreg;
|
|
|
|
if ( _Ft_ == 0 ) {
|
|
if( _Fs_ != 0 ) {
|
|
if( (fsreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Fs_, MODE_WRITE|MODE_READ)) >= 0 ) {
|
|
ADD16ItoR(fsreg, 1);
|
|
}
|
|
else {
|
|
ADD16ItoM( VU_VI_ADDR( _Fs_, 0 ), 1 );
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (_Fs_ == 0) {
|
|
_loadEAX(VU, -1, (u32)VU->Mem, info);
|
|
} else {
|
|
fsreg = ALLOCVI(_Fs_, MODE_READ|MODE_WRITE);
|
|
_loadEAX(VU, recVUTransformAddr(fsreg, VU, _Fs_, 0), (u32)VU->Mem, info);
|
|
ADD16ItoR( fsreg, 1 );
|
|
}
|
|
}
|
|
|
|
void _saveEAX(VURegs *VU, int x86reg, u32 offset, int info)
|
|
{
|
|
int t1reg;
|
|
|
|
if( _Fs_ == 0 ) {
|
|
if( _XYZW_SS ) {
|
|
u32 c = _W ? 0x3f800000 : 0;
|
|
if( x86reg >= 0 ) MOV32ItoRmOffset(x86reg, c, offset+(_W?12:(_Z?8:(_Y?4:0))));
|
|
else MOV32ItoM(offset+(_W?12:(_Z?8:(_Y?4:0))), c);
|
|
}
|
|
else {
|
|
int zeroreg = (x86reg == EAX) ? ALLOCTEMPX86(0) : EAX;
|
|
|
|
XOR32RtoR(zeroreg, zeroreg);
|
|
if( x86reg >= 0 ) {
|
|
if( _X ) MOV32RtoRmOffset(x86reg, zeroreg, offset);
|
|
if( _Y ) MOV32RtoRmOffset(x86reg, zeroreg, offset+4);
|
|
if( _Z ) MOV32RtoRmOffset(x86reg, zeroreg, offset+8);
|
|
if( _W ) MOV32ItoRmOffset(x86reg, 0x3f800000, offset+12);
|
|
}
|
|
else {
|
|
if( _X ) MOV32RtoM(offset, zeroreg);
|
|
if( _Y ) MOV32RtoM(offset+4, zeroreg);
|
|
if( _Z ) MOV32RtoM(offset+8, zeroreg);
|
|
if( _W ) MOV32ItoM(offset+12, 0x3f800000);
|
|
}
|
|
|
|
if( zeroreg != EAX ) _freeX86reg(zeroreg);
|
|
}
|
|
return;
|
|
}
|
|
|
|
switch(_X_Y_Z_W) {
|
|
case 1: // W
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0x27);
|
|
if( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset+12);
|
|
else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_S);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0x27);
|
|
break;
|
|
case 2: // Z
|
|
SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
if( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+8);
|
|
else SSE_MOVSS_XMM_to_M32(offset+8, EEREC_TEMP);
|
|
break;
|
|
case 3: // ZW
|
|
if( x86reg >= 0 ) SSE_MOVHPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+8);
|
|
else SSE_MOVHPS_XMM_to_M64(offset+8, EEREC_S);
|
|
break;
|
|
case 4: // Y
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xe1);
|
|
if( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset+4);
|
|
else SSE_MOVSS_XMM_to_M32(offset+4, EEREC_S);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xe1);
|
|
break;
|
|
case 6: // YZ
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xc9);
|
|
if( x86reg >= 0 ) SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+4);
|
|
else SSE_MOVLPS_XMM_to_M64(offset+4, EEREC_S);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, 0xd2);
|
|
break;
|
|
case 8: // X
|
|
if( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset);
|
|
else SSE_MOVSS_XMM_to_M32(offset, EEREC_S);
|
|
break;
|
|
case 9: // XW
|
|
SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
if( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_S, offset);
|
|
else SSE_MOVSS_XMM_to_M32(offset, EEREC_S);
|
|
|
|
if( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSLDUP_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
else SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55);
|
|
|
|
if( x86reg >= 0 ) SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+12);
|
|
else SSE_MOVSS_XMM_to_M32(offset+12, EEREC_TEMP);
|
|
|
|
break;
|
|
case 12: // XY
|
|
if( x86reg >= 0 ) SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+0);
|
|
else SSE_MOVLPS_XMM_to_M64(offset, EEREC_S);
|
|
break;
|
|
|
|
case 14: // XYZ
|
|
SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
if( x86reg >= 0 ) {
|
|
SSE_MOVLPS_XMM_to_RmOffset(x86reg, EEREC_S, offset+0);
|
|
SSE_MOVSS_XMM_to_RmOffset(x86reg, EEREC_TEMP, offset+8);
|
|
}
|
|
else {
|
|
SSE_MOVLPS_XMM_to_M64(offset, EEREC_S);
|
|
SSE_MOVSS_XMM_to_M32(offset+8, EEREC_TEMP);
|
|
}
|
|
break;
|
|
case 15: // XYZW
|
|
if( VU == &VU1 ) {
|
|
if( x86reg >= 0 ) SSE_MOVAPSRtoRmOffset(x86reg, EEREC_S, offset+0);
|
|
else SSE_MOVAPS_XMM_to_M128(offset, EEREC_S);
|
|
}
|
|
else {
|
|
if( x86reg >= 0 ) SSE_MOVUPSRtoRmOffset(x86reg, EEREC_S, offset+0);
|
|
else {
|
|
if( offset & 15 ) SSE_MOVUPS_XMM_to_M128(offset, EEREC_S);
|
|
else SSE_MOVAPS_XMM_to_M128(offset, EEREC_S);
|
|
}
|
|
}
|
|
break;
|
|
default:
|
|
|
|
// EEREC_D is a temp reg
|
|
// find the first nonwrite reg
|
|
t1reg = _vuGetTempXMMreg(info);
|
|
|
|
if( t1reg < 0 ) {
|
|
for(t1reg = 0; t1reg < XMMREGS; ++t1reg) {
|
|
if( xmmregs[t1reg].inuse && !(xmmregs[t1reg].mode&MODE_WRITE) ) break;
|
|
}
|
|
|
|
if( t1reg == XMMREGS ) t1reg = -1;
|
|
else {
|
|
if( t1reg != EEREC_S ) _allocTempXMMreg(XMMT_FPS, t1reg);
|
|
}
|
|
}
|
|
|
|
if( t1reg >= 0 ) {
|
|
// found a temp reg
|
|
if( VU == &VU1 ) {
|
|
if( x86reg >= 0 ) SSE_MOVAPSRmtoROffset(EEREC_TEMP, x86reg, offset);
|
|
else SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, offset);
|
|
}
|
|
else {
|
|
if( x86reg >= 0 ) SSE_MOVUPSRmtoROffset(EEREC_TEMP, x86reg, offset);
|
|
else {
|
|
if( offset & 15 ) SSE_MOVUPS_M128_to_XMM(EEREC_TEMP, offset);
|
|
else SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, offset);
|
|
}
|
|
}
|
|
|
|
if( t1reg != EEREC_S ) SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_S);
|
|
|
|
VU_MERGE_REGS(EEREC_TEMP, t1reg);
|
|
|
|
if( VU == &VU1 ) {
|
|
if( x86reg >= 0 ) SSE_MOVAPSRtoRmOffset(x86reg, EEREC_TEMP, offset);
|
|
else SSE_MOVAPS_XMM_to_M128(offset, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( x86reg >= 0 ) SSE_MOVUPSRtoRmOffset(x86reg, EEREC_TEMP, offset);
|
|
else SSE_MOVUPS_XMM_to_M128(offset, EEREC_TEMP);
|
|
}
|
|
|
|
if( t1reg != EEREC_S ) _freeXMMreg(t1reg);
|
|
else {
|
|
// read back the data
|
|
SSE_MOVAPS_M128_to_XMM(EEREC_S, (u32)&VU->VF[_Fs_]);
|
|
}
|
|
}
|
|
else {
|
|
// do it with one reg
|
|
SSE_MOVAPS_XMM_to_M128((u32)&VU->VF[_Fs_], EEREC_S);
|
|
|
|
if( VU == &VU1 ) {
|
|
if( x86reg >= 0 ) SSE_MOVAPSRmtoROffset(EEREC_TEMP, x86reg, offset);
|
|
else SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, offset);
|
|
}
|
|
else {
|
|
if( x86reg >= 0 ) SSE_MOVUPSRmtoROffset(EEREC_TEMP, x86reg, offset);
|
|
else {
|
|
if( offset & 15 ) SSE_MOVUPS_M128_to_XMM(EEREC_TEMP, offset);
|
|
else SSE_MOVAPS_M128_to_XMM(EEREC_TEMP, offset);
|
|
}
|
|
}
|
|
|
|
VU_MERGE_REGS(EEREC_TEMP, EEREC_S);
|
|
|
|
if( VU == &VU1 ) {
|
|
if( x86reg >= 0 ) SSE_MOVAPSRtoRmOffset(x86reg, EEREC_TEMP, offset);
|
|
else SSE_MOVAPS_XMM_to_M128(offset, EEREC_TEMP);
|
|
}
|
|
else {
|
|
if( x86reg >= 0 ) SSE_MOVUPSRtoRmOffset(x86reg, EEREC_TEMP, offset);
|
|
else {
|
|
if( offset & 15 ) SSE_MOVUPS_XMM_to_M128(offset, EEREC_TEMP);
|
|
else SSE_MOVAPS_XMM_to_M128(offset, EEREC_TEMP);
|
|
}
|
|
}
|
|
|
|
// read back the data
|
|
SSE_MOVAPS_M128_to_XMM(EEREC_S, (u32)&VU->VF[_Fs_]);
|
|
}
|
|
|
|
break;
|
|
}
|
|
}
|
|
|
|
void recVUMI_SQ(VURegs *VU, int info)
|
|
{
|
|
s16 imm;
|
|
|
|
imm = ( VU->code & 0x400) ? ( VU->code & 0x3ff) | 0xfc00 : ( VU->code & 0x3ff);
|
|
if ( _Ft_ == 0 ) {
|
|
_saveEAX(VU, -1, (u32)GET_VU_MEM(VU, (int)imm * 16), info);
|
|
}
|
|
else {
|
|
int ftreg = ALLOCVI(_Ft_, MODE_READ);
|
|
_saveEAX(VU, recVUTransformAddr(ftreg, VU, _Ft_, imm), (u32)VU->Mem, info);
|
|
}
|
|
}
|
|
|
|
void recVUMI_SQD(VURegs *VU, int info)
|
|
{
|
|
if (_Ft_ == 0) {
|
|
_saveEAX(VU, -1, (u32)VU->Mem, info);
|
|
} else {
|
|
int ftreg = ALLOCVI(_Ft_, MODE_READ|MODE_WRITE);
|
|
SUB16ItoR( ftreg, 1 );
|
|
_saveEAX(VU, recVUTransformAddr(ftreg, VU, _Ft_, 0), (u32)VU->Mem, info);
|
|
}
|
|
}
|
|
|
|
void recVUMI_SQI(VURegs *VU, int info)
|
|
{
|
|
if (_Ft_ == 0) {
|
|
_saveEAX(VU, -1, (u32)VU->Mem, info);
|
|
} else {
|
|
int ftreg = ALLOCVI(_Ft_, MODE_READ|MODE_WRITE);
|
|
_saveEAX(VU, recVUTransformAddr(ftreg, VU, _Ft_, 0), (u32)VU->Mem, info);
|
|
|
|
ADD16ItoR( ftreg, 1 );
|
|
}
|
|
}
|
|
|
|
void recVUMI_ILW(VURegs *VU, int info)
|
|
{
|
|
int ftreg;
|
|
s16 imm, off;
|
|
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
imm = ( VU->code & 0x400) ? ( VU->code & 0x3ff) | 0xfc00 : ( VU->code & 0x3ff);
|
|
if (_X) off = 0;
|
|
else if (_Y) off = 4;
|
|
else if (_Z) off = 8;
|
|
else if (_W) off = 12;
|
|
|
|
ADD_VI_NEEDED(_Fs_);
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE);
|
|
|
|
if ( _Fs_ == 0 ) {
|
|
MOVZX32M16toR( ftreg, (u32)GET_VU_MEM(VU, (int)imm * 16 + off) );
|
|
}
|
|
else {
|
|
int fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
MOV32RmtoROffset(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, imm), (u32)VU->Mem + off);
|
|
}
|
|
}
|
|
|
|
void recVUMI_ISW( VURegs *VU, int info )
|
|
{
|
|
s16 imm;
|
|
|
|
imm = ( VU->code & 0x400) ? ( VU->code & 0x3ff) | 0xfc00 : ( VU->code & 0x3ff);
|
|
|
|
if (_Fs_ == 0) {
|
|
u32 off = (u32)GET_VU_MEM(VU, (int)imm * 16);
|
|
int ftreg = ALLOCVI(_Ft_, MODE_READ);
|
|
|
|
if (_X) MOV32RtoM(off, ftreg);
|
|
if (_Y) MOV32RtoM(off+4, ftreg);
|
|
if (_Z) MOV32RtoM(off+8, ftreg);
|
|
if (_W) MOV32RtoM(off+12, ftreg);
|
|
}
|
|
else {
|
|
int x86reg, fsreg, ftreg;
|
|
|
|
ADD_VI_NEEDED(_Ft_);
|
|
fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
ftreg = ALLOCVI(_Ft_, MODE_READ);
|
|
|
|
x86reg = recVUTransformAddr(fsreg, VU, _Fs_, imm);
|
|
|
|
if (_X) MOV32RtoRmOffset(x86reg, ftreg, (u32)VU->Mem);
|
|
if (_Y) MOV32RtoRmOffset(x86reg, ftreg, (u32)VU->Mem+4);
|
|
if (_Z) MOV32RtoRmOffset(x86reg, ftreg, (u32)VU->Mem+8);
|
|
if (_W) MOV32RtoRmOffset(x86reg, ftreg, (u32)VU->Mem+12);
|
|
}
|
|
}
|
|
|
|
void recVUMI_ILWR( VURegs *VU, int info )
|
|
{
|
|
int off, ftreg;
|
|
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
if (_X) off = 0;
|
|
else if (_Y) off = 4;
|
|
else if (_Z) off = 8;
|
|
else if (_W) off = 12;
|
|
|
|
ADD_VI_NEEDED(_Fs_);
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE);
|
|
|
|
if ( _Fs_ == 0 ) {
|
|
MOVZX32M16toR( ftreg, (int)VU->Mem + off );
|
|
}
|
|
else {
|
|
int fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
MOV32RmtoROffset(ftreg, recVUTransformAddr(fsreg, VU, _Fs_, 0), (int)VU->Mem + off);
|
|
}
|
|
}
|
|
|
|
void recVUMI_ISWR( VURegs *VU, int info )
|
|
{
|
|
int ftreg;
|
|
ADD_VI_NEEDED(_Fs_);
|
|
ftreg = ALLOCVI(_Ft_, MODE_READ);
|
|
|
|
if (_Fs_ == 0) {
|
|
if (_X) MOV32RtoM((int)VU->Mem, ftreg);
|
|
if (_Y) MOV32RtoM((int)VU->Mem+4, ftreg);
|
|
if (_Z) MOV32RtoM((int)VU->Mem+8, ftreg);
|
|
if (_W) MOV32RtoM((int)VU->Mem+12, ftreg);
|
|
}
|
|
else {
|
|
int x86reg;
|
|
int fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
x86reg = recVUTransformAddr(fsreg, VU, _Fs_, 0);
|
|
|
|
if (_X) MOV32RtoRmOffset(x86reg, ftreg, (u32)VU->Mem);
|
|
if (_Y) MOV32RtoRmOffset(x86reg, ftreg, (u32)VU->Mem+4);
|
|
if (_Z) MOV32RtoRmOffset(x86reg, ftreg, (u32)VU->Mem+8);
|
|
if (_W) MOV32RtoRmOffset(x86reg, ftreg, (u32)VU->Mem+12);
|
|
}
|
|
}
|
|
|
|
void recVUMI_RINIT(VURegs *VU, int info)
|
|
{
|
|
if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) {
|
|
|
|
_deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 2);
|
|
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
|
|
|
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)s_mask);
|
|
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (u32)s_fones);
|
|
SSE_MOVSS_XMM_to_M32(VU_REGR_ADDR, EEREC_TEMP);
|
|
}
|
|
else {
|
|
int rreg = ALLOCVI(REG_R, MODE_WRITE);
|
|
|
|
if( xmmregs[EEREC_S].mode & MODE_WRITE ) {
|
|
SSE_MOVAPS_XMM_to_M128((u32)&VU->VF[_Fs_], EEREC_S);
|
|
xmmregs[EEREC_S].mode &= ~MODE_WRITE;
|
|
}
|
|
|
|
MOV32MtoR( rreg, VU_VFx_ADDR( _Fs_ ) + 4 * _Fsf_ );
|
|
|
|
AND32ItoR( rreg, 0x7fffff );
|
|
OR32ItoR( rreg, 0x7f << 23 );
|
|
}
|
|
}
|
|
|
|
void recVUMI_RGET(VURegs *VU, int info)
|
|
{
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
_deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1);
|
|
|
|
if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, VU_REGR_ADDR);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0);
|
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_T, VU_REGR_ADDR);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0);
|
|
}
|
|
}
|
|
|
|
void recVUMI_RNEXT( VURegs *VU, int info )
|
|
{
|
|
int rreg, x86temp0, x86temp1;
|
|
if ( _Ft_ == 0) return;
|
|
|
|
rreg = ALLOCVI(REG_R, MODE_WRITE|MODE_READ);
|
|
|
|
x86temp0 = ALLOCTEMPX86(0);
|
|
x86temp1 = ALLOCTEMPX86(0);
|
|
|
|
// code from www.project-fao.org
|
|
MOV32MtoR(rreg, VU_REGR_ADDR);
|
|
MOV32RtoR(x86temp0, rreg);
|
|
SHR32ItoR(x86temp0, 4);
|
|
AND32ItoR(x86temp0, 1);
|
|
|
|
MOV32RtoR(x86temp1, rreg);
|
|
SHR32ItoR(x86temp1, 22);
|
|
AND32ItoR(x86temp1, 1);
|
|
|
|
SHL32ItoR(rreg, 1);
|
|
XOR32RtoR(x86temp0, x86temp1);
|
|
XOR32RtoR(rreg, x86temp0);
|
|
AND32ItoR(rreg, 0x7fffff);
|
|
OR32ItoR(rreg, 0x3f800000);
|
|
|
|
_freeX86reg(x86temp0);
|
|
_freeX86reg(x86temp1);
|
|
|
|
recVUMI_RGET(VU, info);
|
|
}
|
|
|
|
void recVUMI_RXOR( VURegs *VU, int info )
|
|
{
|
|
if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode & MODE_NOFLUSH) ) {
|
|
_deleteX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), REG_R, 1);
|
|
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
|
|
|
SSE_XORPS_M128_to_XMM(EEREC_TEMP, VU_REGR_ADDR);
|
|
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (u32)s_mask);
|
|
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (u32)s_fones);
|
|
SSE_MOVSS_XMM_to_M32(VU_REGR_ADDR, EEREC_TEMP);
|
|
}
|
|
else {
|
|
int rreg = ALLOCVI(REG_R, MODE_WRITE|MODE_READ);
|
|
|
|
if( xmmregs[EEREC_S].mode & MODE_WRITE ) {
|
|
SSE_MOVAPS_XMM_to_M128((u32)&VU->VF[_Fs_], EEREC_S);
|
|
xmmregs[EEREC_S].mode &= ~MODE_WRITE;
|
|
}
|
|
|
|
XOR32MtoR( rreg, VU_VFx_ADDR( _Fs_ ) + 4 * _Fsf_ );
|
|
AND32ItoR( rreg, 0x7fffff );
|
|
OR32ItoR ( rreg, 0x3f800000 );
|
|
}
|
|
}
|
|
|
|
void recVUMI_WAITQ( VURegs *VU, int info )
|
|
{
|
|
// if( info & PROCESS_VU_SUPER ) {
|
|
// //CALLFunc(waitqfn);
|
|
// SuperVUFlush(0, 1);
|
|
// }
|
|
}
|
|
|
|
void recVUMI_FSAND( VURegs *VU, int info )
|
|
{
|
|
int ftreg;
|
|
u16 imm;
|
|
imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff);
|
|
if(_Ft_ == 0) return;
|
|
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE);
|
|
MOV32MtoR(ftreg, VU_VI_ADDR(REG_STATUS_FLAG, 1));
|
|
AND32ItoR( ftreg, 0xFFF&imm );
|
|
}
|
|
|
|
void recVUMI_FSEQ( VURegs *VU, int info )
|
|
{
|
|
int ftreg;
|
|
u32 imm;
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff);
|
|
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE|MODE_8BITREG);
|
|
|
|
MOVZX32M16toR( EAX, VU_VI_ADDR(REG_STATUS_FLAG, 1) );
|
|
XOR32RtoR(ftreg, ftreg);
|
|
AND16ItoR( EAX, 0xFFF );
|
|
|
|
CMP16ItoR(EAX, imm);
|
|
SETE8R(ftreg);
|
|
}
|
|
|
|
void recVUMI_FSOR( VURegs *VU, int info )
|
|
{
|
|
int ftreg;
|
|
u32 imm;
|
|
if(_Ft_ == 0) return;
|
|
|
|
imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7ff);
|
|
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE);
|
|
|
|
MOVZX32M16toR( ftreg, VU_VI_ADDR(REG_STATUS_FLAG, 1) );
|
|
OR16ItoR( ftreg, imm );
|
|
}
|
|
|
|
void recVUMI_FSSET(VURegs *VU, int info)
|
|
{
|
|
u32 writeaddr = VU_VI_ADDR(REG_STATUS_FLAG, 0);
|
|
u32 prevaddr = VU_VI_ADDR(REG_STATUS_FLAG, 2);
|
|
|
|
u16 imm = 0;
|
|
imm = (((VU->code >> 21 ) & 0x1) << 11) | (VU->code & 0x7FF);
|
|
|
|
MOV32MtoR(EAX, prevaddr);
|
|
AND32ItoR(EAX, 0x3f);
|
|
if ((imm&0xfc0) != 0) OR32ItoR(EAX, imm & 0xFC0);
|
|
|
|
MOV32RtoM(writeaddr ? writeaddr : prevaddr, EAX);
|
|
}
|
|
|
|
void recVUMI_FMAND( VURegs *VU, int info )
|
|
{
|
|
int fsreg, ftreg;
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE);
|
|
|
|
if( (fsreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Fs_, MODE_READ)) >= 0 ) {
|
|
if( ftreg != fsreg ) MOVZX32R16toR(ftreg, fsreg);
|
|
}
|
|
else MOVZX32M16toR(ftreg, VU_VI_ADDR(_Fs_, 1));
|
|
|
|
AND16MtoR( ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1));
|
|
}
|
|
|
|
void recVUMI_FMEQ( VURegs *VU, int info )
|
|
{
|
|
int ftreg, fsreg;
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
if( _Ft_ == _Fs_ ) {
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE|MODE_READ|MODE_8BITREG);
|
|
CMP16MtoR(ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1));
|
|
SETE8R(EAX);
|
|
MOVZX32R8toR(ftreg, EAX);
|
|
}
|
|
else {
|
|
ADD_VI_NEEDED(_Fs_);
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE|MODE_8BITREG);
|
|
fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
|
|
XOR32RtoR(ftreg, ftreg);
|
|
|
|
CMP16MtoR(fsreg, VU_VI_ADDR(REG_MAC_FLAG, 1));
|
|
SETE8R(ftreg);
|
|
}
|
|
}
|
|
|
|
void recVUMI_FMOR( VURegs *VU, int info )
|
|
{
|
|
int fsreg, ftreg;
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
if( _Fs_ == 0 ) {
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE);
|
|
MOV16MtoR(ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1));
|
|
}
|
|
if( _Ft_ == _Fs_ ) {
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE|MODE_READ);
|
|
OR16MtoR(ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1));
|
|
}
|
|
else {
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE);
|
|
|
|
MOVZX32M16toR( ftreg, VU_VI_ADDR(REG_MAC_FLAG, 1));
|
|
|
|
if( (fsreg = _checkX86reg(X86TYPE_VI|(VU==&VU1?X86TYPE_VU1:0), _Fs_, MODE_READ)) >= 0 ) {
|
|
OR16RtoR( ftreg, fsreg);
|
|
}
|
|
else {
|
|
OR16MtoR( ftreg, VU_VI_ADDR(_Fs_, 1));
|
|
}
|
|
}
|
|
}
|
|
|
|
void recVUMI_FCAND( VURegs *VU, int info )
|
|
{
|
|
int ftreg = ALLOCVI(1, MODE_WRITE|MODE_8BITREG);
|
|
|
|
MOV32MtoR( EAX, VU_VI_ADDR(REG_CLIP_FLAG, 1));
|
|
XOR32RtoR(ftreg, ftreg);
|
|
AND32ItoR( EAX, VU->code & 0xFFFFFF );
|
|
SETNZ8R(ftreg);
|
|
}
|
|
|
|
void recVUMI_FCEQ( VURegs *VU, int info )
|
|
{
|
|
int ftreg = ALLOCVI(1, MODE_WRITE|MODE_8BITREG);
|
|
|
|
MOV32MtoR( EAX, VU_VI_ADDR(REG_CLIP_FLAG, 1));
|
|
//AND32ItoR( EAX, 0xffffff);
|
|
XOR32RtoR(ftreg, ftreg);
|
|
CMP32ItoR( EAX, VU->code&0xffffff );
|
|
SETE8R(ftreg);
|
|
}
|
|
|
|
void recVUMI_FCOR( VURegs *VU, int info )
|
|
{
|
|
int ftreg = ALLOCVI(1, MODE_WRITE|MODE_8BITREG);
|
|
|
|
MOV32MtoR( EAX, VU_VI_ADDR(REG_CLIP_FLAG, 1));
|
|
//AND32ItoR( EAX, 0xffffff);
|
|
XOR32RtoR(ftreg, ftreg);
|
|
OR32ItoR( EAX, (VU->code & 0xFFFFFF)|0xff000000 );
|
|
ADD32ItoR(EAX, 1);
|
|
|
|
// set to 1 if EAX is 0
|
|
SETZ8R(ftreg);
|
|
}
|
|
|
|
void recVUMI_FCSET( VURegs *VU, int info )
|
|
{
|
|
u32 addr = VU_VI_ADDR(REG_CLIP_FLAG, 0);
|
|
|
|
MOV32ItoM(addr ? addr : VU_VI_ADDR(REG_CLIP_FLAG, 2), VU->code&0xffffff );
|
|
|
|
if( !(info & (PROCESS_VU_SUPER|PROCESS_VU_COP2)) )
|
|
MOV32ItoM( VU_VI_ADDR(REG_CLIP_FLAG, 1), VU->code&0xffffff );
|
|
}
|
|
|
|
void recVUMI_FCGET( VURegs *VU, int info )
|
|
{
|
|
int ftreg;
|
|
if(_Ft_ == 0) return;
|
|
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE);
|
|
|
|
MOV32MtoR(ftreg, VU_VI_ADDR(REG_CLIP_FLAG, 1));
|
|
AND32ItoR(ftreg, 0x0fff);
|
|
}
|
|
|
|
// SuperVU branch fns are in ivuzerorec.cpp
|
|
static s32 _recbranchAddr(VURegs * VU)
|
|
{
|
|
bpc = pc + (_Imm11_ << 3);
|
|
if (bpc < 0) {
|
|
bpc = pc + (_UImm11_ << 3);
|
|
}
|
|
if (VU == &VU1) {
|
|
bpc&= 0x3fff;
|
|
} else {
|
|
bpc&= 0x0fff;
|
|
}
|
|
|
|
return bpc;
|
|
}
|
|
|
|
void recVUMI_IBEQ(VURegs *VU, int info)
|
|
{
|
|
int fsreg, ftreg;
|
|
ADD_VI_NEEDED(_Ft_);
|
|
fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
ftreg = ALLOCVI(_Ft_, MODE_READ);
|
|
|
|
bpc = _recbranchAddr(VU);
|
|
|
|
CMP16RtoR( fsreg, ftreg );
|
|
j8Ptr[ 0 ] = JNE8( 0 );
|
|
|
|
MOV16ItoM((uptr)&VU->branch, 2);
|
|
MOV32ItoM((uptr)&VU->branchpc, bpc);
|
|
|
|
// only jump when not E bit
|
|
x86SetJ8( j8Ptr[ 0 ] );
|
|
|
|
branch |= 1;
|
|
}
|
|
|
|
void recVUMI_IBGEZ( VURegs *VU, int info )
|
|
{
|
|
int fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
|
|
bpc = _recbranchAddr(VU);
|
|
|
|
MOV16ItoM( VU_VI_ADDR(REG_TPC, 0), (int)pc );
|
|
CMP16ItoR( fsreg, 0x0 );
|
|
j8Ptr[ 0 ] = JL8( 0 );
|
|
|
|
// supervu will take care of the rest
|
|
MOV16ItoM((uptr)&VU->branch, 2);
|
|
MOV32ItoM((uptr)&VU->branchpc, bpc);
|
|
|
|
// only jump when not E bit
|
|
x86SetJ8( j8Ptr[ 0 ] );
|
|
|
|
branch |= 1;
|
|
}
|
|
|
|
void recVUMI_IBGTZ( VURegs *VU, int info )
|
|
{
|
|
int fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
bpc = _recbranchAddr(VU);
|
|
|
|
MOV16ItoM( VU_VI_ADDR(REG_TPC, 0), (int)pc );
|
|
CMP16ItoR( fsreg, 0x0 );
|
|
j8Ptr[ 0 ] = JLE8( 0 );
|
|
|
|
// supervu will take care of the rest
|
|
MOV16ItoM((uptr)&VU->branch, 2);
|
|
MOV32ItoM((uptr)&VU->branchpc, bpc);
|
|
|
|
// only jump when not E bit
|
|
x86SetJ8( j8Ptr[ 0 ] );
|
|
|
|
branch |= 1;
|
|
}
|
|
|
|
void recVUMI_IBLEZ( VURegs *VU, int info )
|
|
{
|
|
int fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
|
|
bpc = _recbranchAddr(VU);
|
|
|
|
MOV16ItoM( VU_VI_ADDR(REG_TPC, 0), (int)pc );
|
|
CMP16ItoR( fsreg, 0x0 );
|
|
j8Ptr[ 0 ] = JG8( 0 );
|
|
|
|
MOV16ItoM((uptr)&VU->branch, 2);
|
|
MOV32ItoM((uptr)&VU->branchpc, bpc);
|
|
|
|
// only jump when not E bit
|
|
x86SetJ8( j8Ptr[ 0 ] );
|
|
|
|
branch |= 1;
|
|
}
|
|
|
|
void recVUMI_IBLTZ( VURegs *VU, int info )
|
|
{
|
|
int fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
|
|
bpc = _recbranchAddr(VU);
|
|
|
|
MOV16ItoM( VU_VI_ADDR(REG_TPC, 0), (int)pc );
|
|
CMP16ItoR( fsreg, 0x0 );
|
|
j8Ptr[ 0 ] = JGE8( 0 );
|
|
|
|
MOV16ItoM((uptr)&VU->branch, 2);
|
|
MOV32ItoM((uptr)&VU->branchpc, bpc);
|
|
|
|
// only jump when not E bit
|
|
x86SetJ8( j8Ptr[ 0 ] );
|
|
|
|
branch |= 1;
|
|
}
|
|
|
|
void recVUMI_IBNE( VURegs *VU, int info )
|
|
{
|
|
int fsreg, ftreg;
|
|
ADD_VI_NEEDED(_Ft_);
|
|
fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
ftreg = ALLOCVI(_Ft_, MODE_READ);
|
|
|
|
bpc = _recbranchAddr(VU);
|
|
|
|
MOV16ItoM( VU_VI_ADDR(REG_TPC, 0), (int)pc );
|
|
CMP16RtoR( fsreg, ftreg );
|
|
j8Ptr[ 0 ] = JE8( 0 );
|
|
|
|
MOV16ItoM((uptr)&VU->branch, 2);
|
|
MOV32ItoM((uptr)&VU->branchpc, bpc);
|
|
|
|
// only jump when not E bit
|
|
x86SetJ8( j8Ptr[ 0 ] );
|
|
|
|
branch |= 1;
|
|
}
|
|
|
|
void recVUMI_B(VURegs *VU, int info)
|
|
{
|
|
// supervu will take care of the rest
|
|
bpc = _recbranchAddr(VU);
|
|
|
|
MOV32ItoM(VU_VI_ADDR(REG_TPC, 0), bpc);
|
|
|
|
branch |= 3;
|
|
}
|
|
|
|
void recVUMI_BAL( VURegs *VU, int info )
|
|
{
|
|
bpc = _recbranchAddr(VU);
|
|
|
|
MOV32ItoM(VU_VI_ADDR(REG_TPC, 0), bpc);
|
|
|
|
if ( _Ft_ ) {
|
|
int ftreg = ALLOCVI(_Ft_, MODE_WRITE);
|
|
MOV16ItoR( ftreg, (pc+8)>>3 );
|
|
}
|
|
|
|
branch |= 3;
|
|
}
|
|
|
|
void recVUMI_JR(VURegs *VU, int info)
|
|
{
|
|
// fsreg cannot be ESP
|
|
int fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
LEA32RStoR(EAX, fsreg, 3);
|
|
MOV32RtoM(VU_VI_ADDR(REG_TPC, 0), EAX);
|
|
|
|
branch |= 3;
|
|
}
|
|
|
|
void recVUMI_JALR(VURegs *VU, int info)
|
|
{
|
|
// fsreg cannot be ESP
|
|
int fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
LEA32RStoR(EAX, fsreg, 3);
|
|
MOV32RtoM(VU_VI_ADDR(REG_TPC, 0), EAX);
|
|
|
|
if ( _Ft_ ) {
|
|
int ftreg = ALLOCVI(_Ft_, MODE_WRITE);
|
|
MOV16ItoR( ftreg, (pc+8)>>3 );
|
|
}
|
|
|
|
branch |= 3;
|
|
}
|
|
|
|
void recVUMI_MFP(VURegs *VU, int info)
|
|
{
|
|
if (_Ft_ == 0) return;
|
|
|
|
if( _XYZW_SS ) {
|
|
_vuFlipRegSS(VU, EEREC_T);
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_P, 1));
|
|
SSE_MOVSS_XMM_to_XMM(EEREC_T, EEREC_TEMP);
|
|
_vuFlipRegSS(VU, EEREC_T);
|
|
}
|
|
else if (_X_Y_Z_W != 0xf) {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_P, 1));
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0);
|
|
VU_MERGE_REGS(EEREC_T, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_MOVSS_M32_to_XMM(EEREC_T, VU_VI_ADDR(REG_P, 1));
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, 0);
|
|
}
|
|
}
|
|
|
|
static __declspec(align(16)) float s_tempmem[4];
|
|
|
|
void recVUMI_WAITP(VURegs *VU, int info)
|
|
{
|
|
// if( info & PROCESS_VU_SUPER )
|
|
// SuperVUFlush(1, 1);
|
|
}
|
|
|
|
// in all EFU insts, EEREC_D is a temp reg
|
|
void vuSqSumXYZ(int regd, int regs, int regtemp)
|
|
{
|
|
SSE_MOVAPS_XMM_to_XMM(regtemp, regs);
|
|
SSE_MULPS_XMM_to_XMM(regtemp, regtemp);
|
|
|
|
if( cpucaps.hasStreamingSIMD3Extensions ) {
|
|
SSE3_HADDPS_XMM_to_XMM(regd, regtemp);
|
|
SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x+y+z
|
|
SSE_MOVHLPS_XMM_to_XMM(regd, regd); // move to x
|
|
}
|
|
else {
|
|
SSE_MOVHLPS_XMM_to_XMM(regd, regtemp);
|
|
SSE_ADDSS_XMM_to_XMM(regd, regtemp);
|
|
SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0x55);
|
|
SSE_ADDSS_XMM_to_XMM(regd, regtemp);
|
|
}
|
|
}
|
|
|
|
void recVUMI_ESADD( VURegs *VU, int info)
|
|
{
|
|
assert( VU == &VU1 );
|
|
vuSqSumXYZ(EEREC_TEMP, EEREC_S, EEREC_D);
|
|
|
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
|
|
}
|
|
|
|
void recVUMI_ERSADD( VURegs *VU, int info )
|
|
{
|
|
assert( VU == &VU1 );
|
|
// almost same as vuSqSumXYZ
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
|
|
if( cpucaps.hasStreamingSIMD3Extensions ) {
|
|
SSE3_HADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // EEREC_D.z = x+y+z
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (u32)&VU->VF[0].UL[3]);
|
|
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_D); // move to x
|
|
}
|
|
else {
|
|
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55);
|
|
SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (u32)&VU->VF[0].UL[3]);
|
|
}
|
|
|
|
// don't use RCPSS (very bad precision)
|
|
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D);
|
|
CheckForOverflowSS_(EEREC_TEMP, EEREC_D);
|
|
//SSE_MINSS_M32_to_XMM(EEREC_TEMP, (u32)&g_maxvals[0]);
|
|
//SSE_MAXSS_M32_to_XMM(EEREC_TEMP, (u32)&g_minvals[0]);
|
|
|
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
|
|
}
|
|
|
|
void recVUMI_ELENG( VURegs *VU, int info )
|
|
{
|
|
assert( VU == &VU1 );
|
|
vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP);
|
|
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_D);
|
|
|
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
|
|
}
|
|
|
|
void recVUMI_ERLENG( VURegs *VU, int info )
|
|
{
|
|
assert( VU == &VU1 );
|
|
vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP);
|
|
SSE_RSQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_D);
|
|
CheckForOverflowSS_(EEREC_TEMP, EEREC_D);
|
|
//SSE_MINSS_M32_to_XMM(EEREC_TEMP, (u32)&g_maxvals[0]);
|
|
//SSE_MAXSS_M32_to_XMM(EEREC_TEMP, (u32)&g_minvals[0]);
|
|
|
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
|
|
}
|
|
|
|
void recVUMI_EATANxy( VURegs *VU, int info )
|
|
{
|
|
assert( VU == &VU1 );
|
|
|
|
if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) {
|
|
SSE_MOVLPS_XMM_to_M64((u32)s_tempmem, EEREC_S);
|
|
FLD32((u32)&s_tempmem[0]);
|
|
FLD32((u32)&s_tempmem[1]);
|
|
}
|
|
else {
|
|
if( xmmregs[EEREC_S].mode & MODE_WRITE ) {
|
|
SSE_MOVAPS_XMM_to_M128((u32)&VU->VF[_Fs_], EEREC_S);
|
|
xmmregs[EEREC_S].mode &= ~MODE_WRITE;
|
|
}
|
|
|
|
FLD32((u32)&VU->VF[_Fs_].UL[0]);
|
|
FLD32((u32)&VU->VF[_Fs_].UL[1]);
|
|
}
|
|
|
|
FPATAN();
|
|
FSTP32(VU_VI_ADDR(REG_P, 0));
|
|
}
|
|
|
|
void recVUMI_EATANxz( VURegs *VU, int info )
|
|
{
|
|
assert( VU == &VU1 );
|
|
|
|
if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) {
|
|
SSE_MOVLPS_XMM_to_M64((u32)s_tempmem, EEREC_S);
|
|
FLD32((u32)&s_tempmem[0]);
|
|
FLD32((u32)&s_tempmem[2]);
|
|
}
|
|
else {
|
|
if( xmmregs[EEREC_S].mode & MODE_WRITE ) {
|
|
SSE_MOVAPS_XMM_to_M128((u32)&VU->VF[_Fs_], EEREC_S);
|
|
xmmregs[EEREC_S].mode &= ~MODE_WRITE;
|
|
}
|
|
|
|
FLD32((u32)&VU->VF[_Fs_].UL[0]);
|
|
FLD32((u32)&VU->VF[_Fs_].UL[2]);
|
|
}
|
|
FPATAN();
|
|
FSTP32(VU_VI_ADDR(REG_P, 0));
|
|
}
|
|
|
|
void recVUMI_ESUM( VURegs *VU, int info )
|
|
{
|
|
assert( VU == &VU1 );
|
|
|
|
if( cpucaps.hasStreamingSIMD3Extensions ) {
|
|
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE3_HADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
SSE3_HADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_MOVHLPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
SSE_ADDPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); // y+w, x+z
|
|
SSE_UNPCKLPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // y+w, y+w, x+z, x+z
|
|
SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP);
|
|
SSE_ADDSS_XMM_to_XMM(EEREC_TEMP, EEREC_D);
|
|
}
|
|
|
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
|
|
}
|
|
|
|
void recVUMI_ERCPR( VURegs *VU, int info )
|
|
{
|
|
assert( VU == &VU1 );
|
|
|
|
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (u32)&VU->VF[0].UL[3]);
|
|
|
|
// don't use RCPSS (very bad precision)
|
|
if( xmmregs[EEREC_S].mode & MODE_WRITE ) {
|
|
if( _Fsf_ ) SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, (0xe4e4>>(2*_Fsf_))&0xff);
|
|
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
|
|
// revert
|
|
if( _Fsf_ ) SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, (0xe4e4>>(8-2*_Fsf_))&0xff);
|
|
}
|
|
else {
|
|
SSE_DIVSS_M32_to_XMM(EEREC_TEMP, (u32)&VU->VF[_Fs_].UL[_Fsf_]);
|
|
}
|
|
|
|
CheckForOverflowSS_(EEREC_TEMP, EEREC_D);
|
|
//SSE_MINSS_M32_to_XMM(EEREC_TEMP, (u32)&g_maxvals[0]);
|
|
//SSE_MAXSS_M32_to_XMM(EEREC_TEMP, (u32)&g_minvals[0]);
|
|
|
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
|
|
}
|
|
|
|
void recVUMI_ESQRT( VURegs *VU, int info )
|
|
{
|
|
assert( VU == &VU1 );
|
|
|
|
if( _Fsf_ ) {
|
|
if( xmmregs[EEREC_S].mode & MODE_WRITE ) {
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
|
SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_SQRTSS_M32_to_XMM(EEREC_TEMP, (u32)&VU->VF[_Fs_].UL[_Fsf_]);
|
|
}
|
|
}
|
|
else SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
|
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
|
|
}
|
|
|
|
void recVUMI_ERSQRT( VURegs *VU, int info )
|
|
{
|
|
assert( VU == &VU1 );
|
|
|
|
if( _Fsf_ ) {
|
|
if( xmmregs[EEREC_S].mode & MODE_WRITE ) {
|
|
_unpackVF_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
|
SSE_RSQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP);
|
|
}
|
|
else {
|
|
SSE_RSQRTSS_M32_to_XMM(EEREC_TEMP, (u32)&VU->VF[_Fs_].UL[_Fsf_]);
|
|
}
|
|
}
|
|
else SSE_RSQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
|
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP);
|
|
}
|
|
|
|
void recVUMI_ESIN( VURegs *VU, int info )
|
|
{
|
|
assert( VU == &VU1 );
|
|
|
|
if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) {
|
|
switch(_Fsf_) {
|
|
case 0: SSE_MOVSS_XMM_to_M32((u32)s_tempmem, EEREC_S);
|
|
case 1: SSE_MOVLPS_XMM_to_M64((u32)s_tempmem, EEREC_S);
|
|
default: SSE_MOVHPS_XMM_to_M64((u32)&s_tempmem[2], EEREC_S);
|
|
}
|
|
FLD32((u32)&s_tempmem[_Fsf_]);
|
|
}
|
|
else {
|
|
if( xmmregs[EEREC_S].mode & MODE_WRITE ) {
|
|
SSE_MOVAPS_XMM_to_M128((u32)&VU->VF[_Fs_], EEREC_S);
|
|
xmmregs[EEREC_S].mode &= ~MODE_WRITE;
|
|
}
|
|
|
|
FLD32((u32)&VU->VF[_Fs_].UL[_Fsf_]);
|
|
}
|
|
|
|
FSIN();
|
|
FSTP32(VU_VI_ADDR(REG_P, 0));
|
|
}
|
|
|
|
void recVUMI_EATAN( VURegs *VU, int info )
|
|
{
|
|
assert( VU == &VU1 );
|
|
|
|
if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) {
|
|
switch(_Fsf_) {
|
|
case 0: SSE_MOVSS_XMM_to_M32((u32)s_tempmem, EEREC_S);
|
|
case 1: SSE_MOVLPS_XMM_to_M64((u32)s_tempmem, EEREC_S);
|
|
default: SSE_MOVHPS_XMM_to_M64((u32)&s_tempmem[2], EEREC_S);
|
|
}
|
|
FLD32((u32)&s_tempmem[_Fsf_]);
|
|
}
|
|
else {
|
|
if( xmmregs[EEREC_S].mode & MODE_WRITE ) {
|
|
SSE_MOVAPS_XMM_to_M128((u32)&VU->VF[_Fs_], EEREC_S);
|
|
xmmregs[EEREC_S].mode &= ~MODE_WRITE;
|
|
}
|
|
}
|
|
|
|
FLD1();
|
|
FLD32((u32)&VU->VF[_Fs_].UL[_Fsf_]);
|
|
FPATAN();
|
|
FSTP32(VU_VI_ADDR(REG_P, 0));
|
|
}
|
|
|
|
void recVUMI_EEXP( VURegs *VU, int info )
|
|
{
|
|
assert( VU == &VU1 );
|
|
|
|
FLDL2E();
|
|
|
|
if( (xmmregs[EEREC_S].mode & MODE_WRITE) && (xmmregs[EEREC_S].mode&MODE_NOFLUSH) ) {
|
|
switch(_Fsf_) {
|
|
case 0: SSE_MOVSS_XMM_to_M32((u32)s_tempmem, EEREC_S);
|
|
case 1: SSE_MOVLPS_XMM_to_M64((u32)s_tempmem, EEREC_S);
|
|
default: SSE_MOVHPS_XMM_to_M64((u32)&s_tempmem[2], EEREC_S);
|
|
}
|
|
FMUL32((u32)&s_tempmem[_Fsf_]);
|
|
}
|
|
else {
|
|
if( xmmregs[EEREC_S].mode & MODE_WRITE ) {
|
|
SSE_MOVAPS_XMM_to_M128((u32)&VU->VF[_Fs_], EEREC_S);
|
|
xmmregs[EEREC_S].mode &= ~MODE_WRITE;
|
|
}
|
|
|
|
FMUL32((u32)&VU->VF[_Fs_].UL[_Fsf_]);
|
|
}
|
|
|
|
// basically do 2^(log_2(e) * val)
|
|
FLD(0);
|
|
FRNDINT();
|
|
FXCH(1);
|
|
FSUB32Rto0(1);
|
|
F2XM1();
|
|
FLD1();
|
|
FADD320toR(1);
|
|
FSCALE();
|
|
FSTP(1);
|
|
|
|
FSTP32(VU_VI_ADDR(REG_P, 0));
|
|
}
|
|
|
|
void recVUMI_XITOP( VURegs *VU, int info )
|
|
{
|
|
int ftreg;
|
|
if (_Ft_ == 0) return;
|
|
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE);
|
|
MOVZX32M16toR( ftreg, (int)&VU->vifRegs->itop );
|
|
}
|
|
|
|
void recVUMI_XTOP( VURegs *VU, int info )
|
|
{
|
|
int ftreg;
|
|
if ( _Ft_ == 0 ) return;
|
|
|
|
ftreg = ALLOCVI(_Ft_, MODE_WRITE);
|
|
MOVZX32M16toR( ftreg, (int)&VU->vifRegs->top );
|
|
}
|
|
|
|
extern HANDLE g_hGsEvent;
|
|
|
|
void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
|
|
{
|
|
u32 size;
|
|
u8* pmem;
|
|
u32* data = (u32*)((u8*)pMem + (addr&0x3fff));
|
|
|
|
size = GSgifTransferDummy(0, data, 0x4000>>4);
|
|
|
|
size = 0x4000-(size<<4);
|
|
pmem = GSRingBufCopy(NULL, size, GS_RINGTYPE_P1);
|
|
assert( pmem != NULL );
|
|
|
|
memcpy_amd(pmem, (u8*)pMem+addr, size);
|
|
GSRINGBUF_DONECOPY(pmem, size);
|
|
|
|
if( !CHECK_DUALCORE ) {
|
|
SetEvent(g_hGsEvent);
|
|
}
|
|
}
|
|
|
|
//extern u32 vudump;
|
|
//void countfn()
|
|
//{
|
|
// static int scount = 0;
|
|
// scount++;
|
|
//
|
|
// if( scount > 766 )
|
|
// vudump |= 8;
|
|
//}
|
|
|
|
void recVUMI_XGKICK( VURegs *VU, int info )
|
|
{
|
|
int fsreg = ALLOCVI(_Fs_, MODE_READ);
|
|
_freeX86reg(fsreg);
|
|
SHL32ItoR(fsreg, 4);
|
|
AND32ItoR(fsreg, 0x3fff);
|
|
|
|
PUSH32R(fsreg);
|
|
PUSH32I((int)VU->Mem);
|
|
iFlushCall(FLUSH_NOCONST);
|
|
|
|
//CALLFunc((u32)countfn);
|
|
|
|
if( CHECK_MULTIGS ) {
|
|
CALLFunc((int)VU1XGKICK_MTGSTransfer);
|
|
ADD32ItoR(ESP, 8);
|
|
}
|
|
else {
|
|
CALLFunc((int)GSgifTransfer1);
|
|
}
|
|
}
|