mirror of https://github.com/PCSX2/pcsx2.git
backup, just ignore this
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@672 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
c1a59e6cc6
commit
2add61d729
|
@ -236,7 +236,7 @@ BEGIN
|
||||||
GROUPBOX "Other Options",IDC_STATIC,281,210,237,34,BS_LEFT
|
GROUPBOX "Other Options",IDC_STATIC,281,210,237,34,BS_LEFT
|
||||||
LTEXT "These options specify how PCSX2's recompilers will clamp Infinities and NaN (Not a Number) values in the opcode instructions.",IDC_STATIC,286,94,224,19
|
LTEXT "These options specify how PCSX2's recompilers will clamp Infinities and NaN (Not a Number) values in the opcode instructions.",IDC_STATIC,286,94,224,19
|
||||||
LTEXT "*None* - No clamping. (Fastest Mode)\n*Normal* - Clamps the result.\n*Extra* - Clamps the operands, the result, and anywhere in between.\n*Extra + Preserve Sign* - Same as ""Extra"", except preserves NaN's sign when clamping the operands.",IDC_STATIC,286,114,224,48
|
LTEXT "*None* - No clamping. (Fastest Mode)\n*Normal* - Clamps the result.\n*Extra* - Clamps the operands, the result, and anywhere in between.\n*Extra + Preserve Sign* - Same as ""Extra"", except preserves NaN's sign when clamping the operands.",IDC_STATIC,286,114,224,48
|
||||||
LTEXT "*Full* - Attempts to emulates large numbers correctly for the EE's FPU. VU's clamp mode should be set to ""Extra + Preserve Sign"" for this to work best. (but still works for most games even with ""Normal"" VU clamping)",IDC_STATIC,287,163,214,36
|
LTEXT "*Full* - Attempts to emulate large numbers correctly for the EE's FPU. VU's clamp mode should be set to ""Extra + Preserve Sign"" for this to work best. (but still works for most games even with ""Normal"" VU clamping)",IDC_STATIC,287,163,214,36
|
||||||
LTEXT "Flush to Zero - Makes floating point underflows become zero.\nDenormals are Zero - Makes floating point denormals become zero.",IDC_STATIC,287,222,224,18
|
LTEXT "Flush to Zero - Makes floating point underflows become zero.\nDenormals are Zero - Makes floating point denormals become zero.",IDC_STATIC,287,222,224,18
|
||||||
END
|
END
|
||||||
|
|
||||||
|
|
|
@ -17,8 +17,10 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
#define _EmitterId_ (vuIndex+1)
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
#include "VU.h"
|
#include "VU.h"
|
||||||
|
#include "ix86/ix86.h"
|
||||||
#include "microVU_Misc.h"
|
#include "microVU_Misc.h"
|
||||||
#include "microVU_Alloc.h"
|
#include "microVU_Alloc.h"
|
||||||
#include "microVU_Tables.h"
|
#include "microVU_Tables.h"
|
||||||
|
@ -110,6 +112,7 @@ struct microVU {
|
||||||
VURegs* regs; // VU Regs Struct
|
VURegs* regs; // VU Regs Struct
|
||||||
u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to)
|
u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to)
|
||||||
u8* ptr; // Pointer to next place to write recompiled code to
|
u8* ptr; // Pointer to next place to write recompiled code to
|
||||||
|
u32 code; // Contains the current Instruction
|
||||||
/*
|
/*
|
||||||
uptr x86eax; // Accumulator register. Used in arithmetic operations.
|
uptr x86eax; // Accumulator register. Used in arithmetic operations.
|
||||||
uptr x86ecx; // Counter register. Used in shift/rotate instructions.
|
uptr x86ecx; // Counter register. Used in shift/rotate instructions.
|
||||||
|
|
|
@ -30,32 +30,59 @@ extern PCSX2_ALIGNED16(microVU microVU1);
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Micro VU - recPass 1 Functions
|
// Micro VU - recPass 1 Functions
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
/*
|
|
||||||
#define setFd (mVU->prog.prog[mVU->prog.cur].allocInfo.info[pc] & (1<<7))
|
|
||||||
#define getFd (mVU->prog.prog[mVU->prog.cur].allocInfo.info[pc] & (1<<1))
|
|
||||||
#define getFs (mVU->prog.prog[mVU->prog.cur].allocInfo.info[pc] & (1<<2))
|
|
||||||
#define getFt (mVU->prog.prog[mVU->prog.cur].allocInfo.info[pc] & (1<<3))
|
|
||||||
*/
|
|
||||||
#define makeFdFd (makeFd == 0)
|
#define makeFdFd (makeFd == 0)
|
||||||
#define makeFdFs (makeFd == 1)
|
#define makeFdFs (makeFd == 1)
|
||||||
#define makeFdFt (makeFd == 2)
|
|
||||||
|
|
||||||
microVUt(void) mVUallocFMAC1a(u32 code, int& Fd, int& Fs, int& Ft, const int makeFd) {
|
#define getReg(reg, _reg_) { \
|
||||||
microVU* mVU = mVUx;
|
mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], _X_Y_Z_W); \
|
||||||
if (_Fs_ == 0) { Fs = xmmZ; } else { Fs = xmmFs; }
|
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, _X_Y_Z_W); \
|
||||||
if (_Ft_ == 0) { Ft = xmmZ; } else { Ft = xmmFt; }
|
|
||||||
if (makeFdFd) {Fd = xmmFd;}
|
|
||||||
else if (makeFdFs) {Fd = Fs;}
|
|
||||||
else if (makeFdFt) {Fd = Ft;}
|
|
||||||
|
|
||||||
if (_Fs_) SSE_MOVAPS_M128_to_XMM(Fs, (uptr)&mVU->regs->VF[_Fs_].UL[0]);
|
|
||||||
if (_Ft_ == _Ft_) SSE_MOVAPS_M128_to_XMM(Ft, (uptr)&mVU->regs->VF[_Ft_].UL[0]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
microVUt(void) mVUallocFMAC1b(u32 code, u32 pc, int& Fd) {
|
#define getZeroSS(reg) { \
|
||||||
|
if (_W) { mVUloadReg(reg, (uptr)&mVU->regs->VF[0].UL[0], _X_Y_Z_W); } \
|
||||||
|
else { SSE_XORPS_XMM_to_XMM(reg, reg); } \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define getZero(reg) { \
|
||||||
|
if (_W) { mVUloadReg(reg, (uptr)&mVU->regs->VF[0].UL[0], _X_Y_Z_W); } \
|
||||||
|
else { SSE_XORPS_XMM_to_XMM(reg, reg); } \
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note: If _Ft_ is 0, then don't modify xmm reg Ft, because its equal to xmmZ (unless _XYZW_SS, then you can modify xmm reg Ft)
|
||||||
|
microVUt(void) mVUallocFMAC1a(int& Fd, int& Fs, int& Ft, const bool makeFd) {
|
||||||
microVU* mVU = mVUx;
|
microVU* mVU = mVUx;
|
||||||
if (_Fd_ == 0) return;
|
Fs = xmmFs;
|
||||||
else mVUsaveReg<vuIndex>(code, Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0]);
|
Ft = xmmFt;
|
||||||
|
if (_XYZW_SS) {
|
||||||
|
if (!_Fs_) { getZeroSS(Fs); }
|
||||||
|
else { getReg(Fs, _Fs_); }
|
||||||
|
|
||||||
|
if (_Ft_ == _Fs_) { Ft = Fs; }
|
||||||
|
else {
|
||||||
|
if (!_Ft_) { getZeroSS(Ft); }
|
||||||
|
else { getReg(Ft, _Ft_); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (!_Fs_) { getZero(Fs); }
|
||||||
|
else { getReg(Fs, _Fs_); }
|
||||||
|
|
||||||
|
if (_Ft_ == _Fs_) { Ft = Fs; }
|
||||||
|
else {
|
||||||
|
if (!_Ft_) { getZero(Ft); }
|
||||||
|
else { getReg(Ft, _Ft_); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (makeFdFs) {Fd = Fs;}
|
||||||
|
else {Fd = xmmFd;}
|
||||||
|
}
|
||||||
|
|
||||||
|
microVUt(void) mVUallocFMAC1b(int& Fd) {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
if (!_Fd_) return;
|
||||||
|
if (CHECK_VU_OVERFLOW) mVUclamp1<vuIndex>(Fd, xmmT1, _X_Y_Z_W);
|
||||||
|
mVUsaveReg<vuIndex>(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif //PCSX2_MICROVU
|
#endif //PCSX2_MICROVU
|
||||||
|
|
|
@ -44,6 +44,13 @@ struct microAllocInfo {
|
||||||
// bit 4 = ACC1 or ACC2?
|
// bit 4 = ACC1 or ACC2?
|
||||||
// bit 5 = Read Q1/P1 or backup?
|
// bit 5 = Read Q1/P1 or backup?
|
||||||
// bit 6 = Write to Q2/P2?
|
// bit 6 = Write to Q2/P2?
|
||||||
// bit 7 = Write Fd/Acc to backup memory?
|
// bit 7 = Write Fd/Acc/Result to backup memory?
|
||||||
|
// bit 8 = Update Status Flags?
|
||||||
|
// bit 9 = Update Mac Flags?
|
||||||
|
// bit 10 = Used with bit 11 to make a 2-bit key for status/mac flag instance
|
||||||
|
// bit 11 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3)
|
||||||
u32 curPC;
|
u32 curPC;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
microVUt(void) mVUallocFMAC1a(int& Fd, int& Fs, int& Ft, const bool makeFd);
|
||||||
|
microVUt(void) mVUallocFMAC1b(int& Fd);
|
|
@ -23,28 +23,55 @@
|
||||||
extern PCSX2_ALIGNED16(microVU microVU0);
|
extern PCSX2_ALIGNED16(microVU microVU0);
|
||||||
extern PCSX2_ALIGNED16(microVU microVU1);
|
extern PCSX2_ALIGNED16(microVU microVU1);
|
||||||
|
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
// Micro VU - Clamp Functions
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
|
// Used for Result Clamping
|
||||||
|
microVUx(void) mVUclamp1(int reg, int regTemp, int xyzw) {
|
||||||
|
}
|
||||||
|
|
||||||
|
// Used for Operand Clamping
|
||||||
|
microVUx(void) mVUclamp2(int reg, int regTemp, int xyzw) {
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Micro VU - Misc Functions
|
// Micro VU - Misc Functions
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
microVUx(void) mVUsaveReg(u32 code, int reg, u32 offset) {
|
microVUx(void) mVUunpack_xyzw(int dstreg, int srcreg, int xyzw) {
|
||||||
switch ( _X_Y_Z_W ) {
|
switch ( xyzw ) {
|
||||||
|
case 0: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0x00); break;
|
||||||
|
case 1: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0x55); break;
|
||||||
|
case 2: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0xaa); break;
|
||||||
|
case 3: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0xff); break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
microVUx(void) mVUloadReg(int reg, u32 offset, int xyzw) {
|
||||||
|
switch( xyzw ) {
|
||||||
|
case 8: SSE_MOVSS_M32_to_XMM(reg, offset); break; // X
|
||||||
|
case 4: SSE_MOVSS_M32_to_XMM(reg, offset+4); break; // Y
|
||||||
|
case 2: SSE_MOVSS_M32_to_XMM(reg, offset+8); break; // Z
|
||||||
|
case 1: SSE_MOVSS_M32_to_XMM(reg, offset+12); break; // W
|
||||||
|
case 3: SSE_MOVHPS_M64_to_XMM(reg, offset+8); break; // ZW (not sure if this is faster than default)
|
||||||
|
case 12: SSE_MOVLPS_M64_to_XMM(reg, offset); break; // XY (not sure if this is faster than default)
|
||||||
|
default: SSE_MOVAPS_M128_to_XMM(reg, offset); break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
microVUx(void) mVUsaveReg(int reg, u32 offset, int xyzw) {
|
||||||
|
switch ( xyzw ) {
|
||||||
case 1: // W
|
case 1: // W
|
||||||
//SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x27);
|
|
||||||
//SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
|
|
||||||
SSE_MOVSS_XMM_to_M32(offset+12, reg);
|
SSE_MOVSS_XMM_to_M32(offset+12, reg);
|
||||||
break;
|
break;
|
||||||
case 2: // Z
|
case 2: // Z
|
||||||
//SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
|
|
||||||
//SSE_MOVSS_XMM_to_M32(offset+8, xmmT1);
|
|
||||||
SSE_MOVSS_XMM_to_M32(offset+8, reg);
|
SSE_MOVSS_XMM_to_M32(offset+8, reg);
|
||||||
break;
|
break;
|
||||||
case 3: // ZW
|
case 3: // ZW
|
||||||
SSE_MOVHPS_XMM_to_M64(offset+8, reg);
|
SSE_MOVHPS_XMM_to_M64(offset+8, reg);
|
||||||
break;
|
break;
|
||||||
case 4: // Y
|
case 4: // Y
|
||||||
//SSE2_PSHUFLW_XMM_to_XMM(xmmT1, reg, 0x4e);
|
|
||||||
//SSE_MOVSS_XMM_to_M32(offset+4, xmmT1);
|
|
||||||
SSE_MOVSS_XMM_to_M32(offset+4, reg);
|
SSE_MOVSS_XMM_to_M32(offset+4, reg);
|
||||||
break;
|
break;
|
||||||
case 5: // YW
|
case 5: // YW
|
||||||
|
@ -52,7 +79,6 @@ microVUx(void) mVUsaveReg(u32 code, int reg, u32 offset) {
|
||||||
SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
|
SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
|
||||||
SSE_MOVSS_XMM_to_M32(offset+4, reg);
|
SSE_MOVSS_XMM_to_M32(offset+4, reg);
|
||||||
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
|
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
|
||||||
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xB1);
|
|
||||||
break;
|
break;
|
||||||
case 6: // YZ
|
case 6: // YZ
|
||||||
SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9);
|
SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9);
|
||||||
|
@ -96,8 +122,7 @@ microVUx(void) mVUsaveReg(u32 code, int reg, u32 offset) {
|
||||||
SSE_MOVSS_XMM_to_M32(offset+8, xmmT1);
|
SSE_MOVSS_XMM_to_M32(offset+8, xmmT1);
|
||||||
break;
|
break;
|
||||||
case 15: // XYZW
|
case 15: // XYZW
|
||||||
if( offset & 15 ) SSE_MOVUPS_XMM_to_M128(offset, reg);
|
SSE_MOVAPS_XMM_to_M128(offset, reg);
|
||||||
else SSE_MOVAPS_XMM_to_M128(offset, reg);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,57 +18,36 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#ifdef __LINUX__
|
|
||||||
#include "ix86/ix86.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Helper Macros
|
// Helper Macros
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
#define _Ft_ ((code >> 16) & 0x1F) // The rt part of the instruction register
|
#define _Ft_ ((mVU->code >> 16) & 0x1F) // The rt part of the instruction register
|
||||||
#define _Fs_ ((code >> 11) & 0x1F) // The rd part of the instruction register
|
#define _Fs_ ((mVU->code >> 11) & 0x1F) // The rd part of the instruction register
|
||||||
#define _Fd_ ((code >> 6) & 0x1F) // The sa part of the instruction register
|
#define _Fd_ ((mVU->code >> 6) & 0x1F) // The sa part of the instruction register
|
||||||
|
|
||||||
#define _X ((code>>24) & 0x1)
|
#define _X ((mVU->code>>24) & 0x1)
|
||||||
#define _Y ((code>>23) & 0x1)
|
#define _Y ((mVU->code>>23) & 0x1)
|
||||||
#define _Z ((code>>22) & 0x1)
|
#define _Z ((mVU->code>>22) & 0x1)
|
||||||
#define _W ((code>>21) & 0x1)
|
#define _W ((mVU->code>>21) & 0x1)
|
||||||
|
|
||||||
#define _XYZW_SS (_X+_Y+_Z+_W==1)
|
#define _XYZW_SS (_X+_Y+_Z+_W==1)
|
||||||
|
|
||||||
#define _X_Y_Z_W (((code >> 21 ) & 0xF ) )
|
#define _X_Y_Z_W (((mVU->code >> 21 ) & 0xF ) )
|
||||||
|
|
||||||
#define _Fsf_ ((code >> 21) & 0x03)
|
#define _Fsf_ ((mVU->code >> 21) & 0x03)
|
||||||
#define _Ftf_ ((code >> 23) & 0x03)
|
#define _Ftf_ ((mVU->code >> 23) & 0x03)
|
||||||
|
|
||||||
#define _Imm11_ (s32)(code & 0x400 ? 0xfffffc00 | (code & 0x3ff) : code & 0x3ff)
|
#define _Imm11_ (s32)(mVU->code & 0x400 ? 0xfffffc00 | (mVU->code & 0x3ff) : mVU->code & 0x3ff)
|
||||||
#define _UImm11_ (s32)(code & 0x7ff)
|
#define _UImm11_ (s32)(mVU->code & 0x7ff)
|
||||||
/*
|
|
||||||
#define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0]
|
|
||||||
#define VU_VFy_ADDR(x) (uptr)&VU->VF[x].UL[1]
|
|
||||||
#define VU_VFz_ADDR(x) (uptr)&VU->VF[x].UL[2]
|
|
||||||
#define VU_VFw_ADDR(x) (uptr)&VU->VF[x].UL[3]
|
|
||||||
|
|
||||||
#define VU_REGR_ADDR (uptr)&VU->VI[REG_R]
|
#define xmmT1 0 // XMM0 // Temp Reg
|
||||||
#define VU_REGQ_ADDR (uptr)&VU->VI[REG_Q]
|
#define xmmFd 1 // XMM1 // Holds the Value of Fd
|
||||||
#define VU_REGMAC_ADDR (uptr)&VU->VI[REG_MAC_FLAG]
|
#define xmmFs 2 // XMM2 // Holds the Value of Fs
|
||||||
|
#define xmmFt 3 // XMM3 // Holds the Value of Ft
|
||||||
#define VU_VI_ADDR(x, read) GetVIAddr(VU, x, read, info)
|
#define xmmACC1 4 // XMM4 // Holds the Value of ACC
|
||||||
|
#define xmmACC2 5 // XMM5 // Holds the Backup Value of ACC
|
||||||
#define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0]
|
#define xmmPQ 6 // XMM6 // Holds the Value and Backup Values of P and Q regs
|
||||||
#define VU_ACCy_ADDR (uptr)&VU->ACC.UL[1]
|
#define xmmF 7 // XMM7 // Holds 4 instances of the status and mac flags (macflagX4::statusflagX4)
|
||||||
#define VU_ACCz_ADDR (uptr)&VU->ACC.UL[2]
|
|
||||||
#define VU_ACCw_ADDR (uptr)&VU->ACC.UL[3]
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define xmmT1 0 // XMM0
|
|
||||||
#define xmmFd 1 // XMM1
|
|
||||||
#define xmmFs 2 // XMM2
|
|
||||||
#define xmmFt 3 // XMM3
|
|
||||||
#define xmmACC1 4 // XMM4
|
|
||||||
#define xmmACC2 5 // XMM5
|
|
||||||
#define xmmPQ 6 // XMM6
|
|
||||||
#define xmmZ 7 // XMM7
|
|
||||||
|
|
||||||
// Template Stuff
|
// Template Stuff
|
||||||
#define mVUx (vuIndex ? µVU1 : µVU0)
|
#define mVUx (vuIndex ? µVU1 : µVU0)
|
||||||
|
@ -76,4 +55,11 @@
|
||||||
#define microVUx(aType) template<int vuIndex> aType
|
#define microVUx(aType) template<int vuIndex> aType
|
||||||
#define microVUf(aType) template<int vuIndex, int recPass> aType
|
#define microVUf(aType) template<int vuIndex, int recPass> aType
|
||||||
|
|
||||||
microVUx(void) mVUsaveReg(u32 code, int reg, u32 offset);
|
#define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo
|
||||||
|
|
||||||
|
#define isNOP (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<0))
|
||||||
|
#define getFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<1))
|
||||||
|
#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<2))
|
||||||
|
#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<3))
|
||||||
|
#define setFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<7))
|
||||||
|
#define doFlags (mVUallocInfo.info[mVUallocInfo.curPC] & (3<<8))
|
||||||
|
|
|
@ -20,66 +20,85 @@
|
||||||
#include "microVU.h"
|
#include "microVU.h"
|
||||||
#ifdef PCSX2_MICROVU
|
#ifdef PCSX2_MICROVU
|
||||||
|
|
||||||
/*
|
//------------------------------------------------------------------
|
||||||
Cotton's Notes on how things will work (*experimental*, subject to change if I get different ideas):
|
// mVUupdateFlags() - Updates status/mac flags
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
Guide:
|
microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) {
|
||||||
Fd, Fs, Ft = operands in the Micro Instructions
|
microVU* mVU = mVUx;
|
||||||
Acc = VU's Accumulator register
|
static u8 *pjmp, *pjmp2;
|
||||||
Fs/t = shorthand notation I made-up for "Fs or Ft"
|
static u32 *pjmp32;
|
||||||
xmmFd, xmmFs, xmmFt, xmmAcc = XMM regs that hold Fd, Fs, Ft, and Acc values respectively.
|
static u32 macaddr, stataddr, prevstataddr;
|
||||||
xmmZ = XMM reg that holds the zero Register; always {0, 0, 0, 1.0}
|
static int x86macflag, x86statflag, x86temp;
|
||||||
xmmT1, xmmT2, xmmT3 = temp regs.
|
|
||||||
|
|
||||||
General:
|
//SysPrintf ("mVUupdateFlags\n");
|
||||||
XMM0 is a volatile temp reg throughout the recs. You can always freely use it.
|
if( !(doFlags) ) return;
|
||||||
EAX is a volatile temp reg. You can always freely use it.
|
|
||||||
|
|
||||||
Mapping:
|
//macaddr = VU_VI_ADDR(REG_MAC_FLAG, 0);
|
||||||
xmmT1 = xmm0
|
//stataddr = VU_VI_ADDR(REG_STATUS_FLAG, 0); // write address
|
||||||
xmmFd = xmm1
|
//prevstataddr = VU_VI_ADDR(REG_STATUS_FLAG, 2); // previous address
|
||||||
xmmFs = xmm2
|
|
||||||
xmmFt = xmm3
|
|
||||||
xmmACC1 = xmm4
|
|
||||||
xmmACC2 = xmm5
|
|
||||||
xmmPQ = xmm6
|
|
||||||
xmmZ = xmm7
|
|
||||||
|
|
||||||
Most of the time the above mapping will be true, unless I find a reason not to do it this way :)
|
|
||||||
|
|
||||||
Opcodes:
|
SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); // Flip wzyx to xyzw
|
||||||
Fd's 4-vectors must be preserved (kept valid); Unless operation is single-scalar, then only 'x' XMM vector
|
MOV32MtoR(x86statflag, prevstataddr); // Load the previous status in to x86statflag
|
||||||
will contain valid data for X, Y, Z, or W, and the other XMM vectors will be garbage and freely modifiable.
|
AND16ItoR(x86statflag, 0xff0); // Keep Sticky and D/I flags
|
||||||
|
|
||||||
Fs and Ft are temp regs that won't be used after the opcode, so their values can be freely modified.
|
//-------------------------Check for Signed flags------------------------------
|
||||||
|
|
||||||
If (Fd == 0), Then you don't need to explicitly handle this case in the opcode implementation,
|
// The following code makes sure the Signed Bit isn't set with Negative Zero
|
||||||
since its dealt-with in the analyzing microVU pipeline functions.
|
SSE_XORPS_XMM_to_XMM(regT2, regT2); // Clear regT2
|
||||||
(So just do the normal operation and don't worry about it.)
|
SSE_CMPEQPS_XMM_to_XMM(regT2, regT1); // Set all F's if each vector is zero
|
||||||
|
SSE_MOVMSKPS_XMM_to_R32(EAX, regT2); // Used for Zero Flag Calculation
|
||||||
|
SSE_ANDNPS_XMM_to_XMM(regT2, regT1);
|
||||||
|
|
||||||
If (_X_Y_Z_W == 0) Then same as above. (btw, I'm'm not sure if this case ever happens...)
|
SSE_MOVMSKPS_XMM_to_R32(x86macflag, regT2); // Move the sign bits of the t1reg
|
||||||
|
|
||||||
If (Fd == Fs/t), Then xmmFd != xmmFs/t (unless its more optimized this way! it'll be commented on the opcode)
|
AND16ItoR(x86macflag, _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation
|
||||||
|
pjmp = JZ8(0); // Skip if none are
|
||||||
|
OR16ItoR(x86statflag, 0x82); // SS, S flags
|
||||||
|
SHL16ItoR(x86macflag, 4);
|
||||||
|
if (_XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking
|
||||||
|
x86SetJ8(pjmp);
|
||||||
|
|
||||||
Clamping:
|
//-------------------------Check for Zero flags------------------------------
|
||||||
Fs/t can always be clamped by case 15 (all vectors modified) since they won't be written back.
|
|
||||||
|
|
||||||
Problems:
|
AND16ItoR(EAX, _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation
|
||||||
The biggest problem I think I'll have is xgkick opcode having variable timing/stalling.
|
pjmp = JZ8(0); // Skip if none are
|
||||||
|
OR16ItoR(x86statflag, 0x41); // ZS, Z flags
|
||||||
|
OR32RtoR(x86macflag, EAX);
|
||||||
|
x86SetJ8(pjmp);
|
||||||
|
|
||||||
Other Notes:
|
//-------------------------Finally: Send the Flags to the Mac Flag Address------------------------------
|
||||||
These notes are mostly to help me (cottonvibes) remember good ideas and to help confused devs to
|
|
||||||
have an idea of how things work. Right now its all theoretical and I'll change things once implemented ;p
|
if (_XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here
|
||||||
*/
|
|
||||||
|
MOV16RtoM(macaddr, x86macflag);
|
||||||
|
MOV16RtoM(stataddr, x86statflag);
|
||||||
|
}
|
||||||
|
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
// Helper Macros
|
||||||
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
|
#define mVU_FMAC1(operation) { \
|
||||||
|
if (isNOP) return; \
|
||||||
|
int Fd, Fs, Ft; \
|
||||||
|
mVUallocFMAC1a<vuIndex>(Fd, Fs, Ft, 1); \
|
||||||
|
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
|
||||||
|
mVUupdateFlags<vuIndex>(Fd, xmmT1, Ft, _X_Y_Z_W); \
|
||||||
|
mVUallocFMAC1b<vuIndex>(Fd); \
|
||||||
|
}
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// Micro VU Micromode Upper instructions
|
// Micro VU Micromode Upper instructions
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
|
||||||
microVUf(void) mVU_ABS(){}
|
microVUf(void) mVU_ABS(){}
|
||||||
microVUf(void) mVU_ADD(){
|
microVUf(void) mVU_ADD() {
|
||||||
|
microVU* mVU = mVUx;
|
||||||
if (recPass == 0) {}
|
if (recPass == 0) {}
|
||||||
else {}
|
else { mVU_FMAC1(ADD); }
|
||||||
}
|
}
|
||||||
microVUf(void) mVU_ADDi(){}
|
microVUf(void) mVU_ADDi(){}
|
||||||
microVUf(void) mVU_ADDq(){}
|
microVUf(void) mVU_ADDq(){}
|
||||||
|
@ -94,7 +113,11 @@ microVUf(void) mVU_ADDAx(){}
|
||||||
microVUf(void) mVU_ADDAy(){}
|
microVUf(void) mVU_ADDAy(){}
|
||||||
microVUf(void) mVU_ADDAz(){}
|
microVUf(void) mVU_ADDAz(){}
|
||||||
microVUf(void) mVU_ADDAw(){}
|
microVUf(void) mVU_ADDAw(){}
|
||||||
microVUf(void) mVU_SUB(){}
|
microVUf(void) mVU_SUB(){
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
if (recPass == 0) {}
|
||||||
|
else { mVU_FMAC1(SUB); }
|
||||||
|
}
|
||||||
microVUf(void) mVU_SUBi(){}
|
microVUf(void) mVU_SUBi(){}
|
||||||
microVUf(void) mVU_SUBq(){}
|
microVUf(void) mVU_SUBq(){}
|
||||||
microVUf(void) mVU_SUBx(){}
|
microVUf(void) mVU_SUBx(){}
|
||||||
|
@ -108,7 +131,11 @@ microVUf(void) mVU_SUBAx(){}
|
||||||
microVUf(void) mVU_SUBAy(){}
|
microVUf(void) mVU_SUBAy(){}
|
||||||
microVUf(void) mVU_SUBAz(){}
|
microVUf(void) mVU_SUBAz(){}
|
||||||
microVUf(void) mVU_SUBAw(){}
|
microVUf(void) mVU_SUBAw(){}
|
||||||
microVUf(void) mVU_MUL(){}
|
microVUf(void) mVU_MUL(){
|
||||||
|
microVU* mVU = mVUx;
|
||||||
|
if (recPass == 0) {}
|
||||||
|
else { mVU_FMAC1(MUL); }
|
||||||
|
}
|
||||||
microVUf(void) mVU_MULi(){}
|
microVUf(void) mVU_MULi(){}
|
||||||
microVUf(void) mVU_MULq(){}
|
microVUf(void) mVU_MULq(){}
|
||||||
microVUf(void) mVU_MULx(){}
|
microVUf(void) mVU_MULx(){}
|
||||||
|
|
Loading…
Reference in New Issue