backup, just ignore this

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@672 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2009-03-04 07:11:59 +00:00
parent c1a59e6cc6
commit 2add61d729
7 changed files with 193 additions and 118 deletions

View File

@ -236,7 +236,7 @@ BEGIN
GROUPBOX "Other Options",IDC_STATIC,281,210,237,34,BS_LEFT
LTEXT "These options specify how PCSX2's recompilers will clamp Infinities and NaN (Not a Number) values in the opcode instructions.",IDC_STATIC,286,94,224,19
LTEXT "*None* - No clamping. (Fastest Mode)\n*Normal* - Clamps the result.\n*Extra* - Clamps the operands, the result, and anywhere in between.\n*Extra + Preserve Sign* - Same as ""Extra"", except preserves NaN's sign when clamping the operands.",IDC_STATIC,286,114,224,48
LTEXT "*Full* - Attempts to emulates large numbers correctly for the EE's FPU. VU's clamp mode should be set to ""Extra + Preserve Sign"" for this to work best. (but still works for most games even with ""Normal"" VU clamping)",IDC_STATIC,287,163,214,36
LTEXT "*Full* - Attempts to emulate large numbers correctly for the EE's FPU. VU's clamp mode should be set to ""Extra + Preserve Sign"" for this to work best. (but still works for most games even with ""Normal"" VU clamping)",IDC_STATIC,287,163,214,36
LTEXT "Flush to Zero - Makes floating point underflows become zero.\nDenormals are Zero - Makes floating point denormals become zero.",IDC_STATIC,287,222,224,18
END

View File

@ -17,8 +17,10 @@
*/
#pragma once
#define _EmitterId_ (vuIndex+1)
#include "Common.h"
#include "VU.h"
#include "ix86/ix86.h"
#include "microVU_Misc.h"
#include "microVU_Alloc.h"
#include "microVU_Tables.h"
@ -110,6 +112,7 @@ struct microVU {
VURegs* regs; // VU Regs Struct
u8* cache; // Dynarec Cache Start (where we will start writing the recompiled code to)
u8* ptr; // Pointer to next place to write recompiled code to
u32 code; // Contains the current Instruction
/*
uptr x86eax; // Accumulator register. Used in arithmetic operations.
uptr x86ecx; // Counter register. Used in shift/rotate instructions.

View File

@ -30,32 +30,59 @@ extern PCSX2_ALIGNED16(microVU microVU1);
//------------------------------------------------------------------
// Micro VU - recPass 1 Functions
//------------------------------------------------------------------
/*
#define setFd (mVU->prog.prog[mVU->prog.cur].allocInfo.info[pc] & (1<<7))
#define getFd (mVU->prog.prog[mVU->prog.cur].allocInfo.info[pc] & (1<<1))
#define getFs (mVU->prog.prog[mVU->prog.cur].allocInfo.info[pc] & (1<<2))
#define getFt (mVU->prog.prog[mVU->prog.cur].allocInfo.info[pc] & (1<<3))
*/
#define makeFdFd (makeFd == 0)
#define makeFdFs (makeFd == 1)
#define makeFdFt (makeFd == 2)
microVUt(void) mVUallocFMAC1a(u32 code, int& Fd, int& Fs, int& Ft, const int makeFd) {
microVU* mVU = mVUx;
if (_Fs_ == 0) { Fs = xmmZ; } else { Fs = xmmFs; }
if (_Ft_ == 0) { Ft = xmmZ; } else { Ft = xmmFt; }
if (makeFdFd) {Fd = xmmFd;}
else if (makeFdFs) {Fd = Fs;}
else if (makeFdFt) {Fd = Ft;}
if (_Fs_) SSE_MOVAPS_M128_to_XMM(Fs, (uptr)&mVU->regs->VF[_Fs_].UL[0]);
if (_Ft_ == _Ft_) SSE_MOVAPS_M128_to_XMM(Ft, (uptr)&mVU->regs->VF[_Ft_].UL[0]);
#define getReg(reg, _reg_) { \
mVUloadReg(reg, (uptr)&mVU->regs->VF[_reg_].UL[0], _X_Y_Z_W); \
if (CHECK_VU_EXTRA_OVERFLOW) mVUclamp2<vuIndex>(reg, xmmT1, _X_Y_Z_W); \
}
microVUt(void) mVUallocFMAC1b(u32 code, u32 pc, int& Fd) {
#define getZeroSS(reg) { \
if (_W) { mVUloadReg(reg, (uptr)&mVU->regs->VF[0].UL[0], _X_Y_Z_W); } \
else { SSE_XORPS_XMM_to_XMM(reg, reg); } \
}
#define getZero(reg) { \
if (_W) { mVUloadReg(reg, (uptr)&mVU->regs->VF[0].UL[0], _X_Y_Z_W); } \
else { SSE_XORPS_XMM_to_XMM(reg, reg); } \
}
// Note: If _Ft_ is 0, then don't modify xmm reg Ft, because its equal to xmmZ (unless _XYZW_SS, then you can modify xmm reg Ft)
microVUt(void) mVUallocFMAC1a(int& Fd, int& Fs, int& Ft, const bool makeFd) {
microVU* mVU = mVUx;
if (_Fd_ == 0) return;
else mVUsaveReg<vuIndex>(code, Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0]);
Fs = xmmFs;
Ft = xmmFt;
if (_XYZW_SS) {
if (!_Fs_) { getZeroSS(Fs); }
else { getReg(Fs, _Fs_); }
if (_Ft_ == _Fs_) { Ft = Fs; }
else {
if (!_Ft_) { getZeroSS(Ft); }
else { getReg(Ft, _Ft_); }
}
}
else {
if (!_Fs_) { getZero(Fs); }
else { getReg(Fs, _Fs_); }
if (_Ft_ == _Fs_) { Ft = Fs; }
else {
if (!_Ft_) { getZero(Ft); }
else { getReg(Ft, _Ft_); }
}
}
if (makeFdFs) {Fd = Fs;}
else {Fd = xmmFd;}
}
microVUt(void) mVUallocFMAC1b(int& Fd) {
microVU* mVU = mVUx;
if (!_Fd_) return;
if (CHECK_VU_OVERFLOW) mVUclamp1<vuIndex>(Fd, xmmT1, _X_Y_Z_W);
mVUsaveReg<vuIndex>(Fd, (uptr)&mVU->regs->VF[_Fd_].UL[0], _X_Y_Z_W);
}
#endif //PCSX2_MICROVU

View File

@ -44,6 +44,13 @@ struct microAllocInfo {
// bit 4 = ACC1 or ACC2?
// bit 5 = Read Q1/P1 or backup?
// bit 6 = Write to Q2/P2?
// bit 7 = Write Fd/Acc to backup memory?
// bit 7 = Write Fd/Acc/Result to backup memory?
// bit 8 = Update Status Flags?
// bit 9 = Update Mac Flags?
// bit 10 = Used with bit 11 to make a 2-bit key for status/mac flag instance
// bit 11 = (00 = instance #0, 01 = instance #1, 10 = instance #2, 11 = instance #3)
u32 curPC;
};
microVUt(void) mVUallocFMAC1a(int& Fd, int& Fs, int& Ft, const bool makeFd);
microVUt(void) mVUallocFMAC1b(int& Fd);

View File

@ -23,28 +23,55 @@
extern PCSX2_ALIGNED16(microVU microVU0);
extern PCSX2_ALIGNED16(microVU microVU1);
//------------------------------------------------------------------
// Micro VU - Clamp Functions
//------------------------------------------------------------------
// Used for Result Clamping
microVUx(void) mVUclamp1(int reg, int regTemp, int xyzw) {
}
// Used for Operand Clamping
microVUx(void) mVUclamp2(int reg, int regTemp, int xyzw) {
}
//------------------------------------------------------------------
// Micro VU - Misc Functions
//------------------------------------------------------------------
microVUx(void) mVUsaveReg(u32 code, int reg, u32 offset) {
switch ( _X_Y_Z_W ) {
microVUx(void) mVUunpack_xyzw(int dstreg, int srcreg, int xyzw) {
switch ( xyzw ) {
case 0: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0x00); break;
case 1: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0x55); break;
case 2: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0xaa); break;
case 3: SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0xff); break;
}
}
microVUx(void) mVUloadReg(int reg, u32 offset, int xyzw) {
switch( xyzw ) {
case 8: SSE_MOVSS_M32_to_XMM(reg, offset); break; // X
case 4: SSE_MOVSS_M32_to_XMM(reg, offset+4); break; // Y
case 2: SSE_MOVSS_M32_to_XMM(reg, offset+8); break; // Z
case 1: SSE_MOVSS_M32_to_XMM(reg, offset+12); break; // W
case 3: SSE_MOVHPS_M64_to_XMM(reg, offset+8); break; // ZW (not sure if this is faster than default)
case 12: SSE_MOVLPS_M64_to_XMM(reg, offset); break; // XY (not sure if this is faster than default)
default: SSE_MOVAPS_M128_to_XMM(reg, offset); break;
}
}
microVUx(void) mVUsaveReg(int reg, u32 offset, int xyzw) {
switch ( xyzw ) {
case 1: // W
//SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0x27);
//SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
SSE_MOVSS_XMM_to_M32(offset+12, reg);
break;
case 2: // Z
//SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
//SSE_MOVSS_XMM_to_M32(offset+8, xmmT1);
SSE_MOVSS_XMM_to_M32(offset+8, reg);
break;
case 3: // ZW
SSE_MOVHPS_XMM_to_M64(offset+8, reg);
break;
case 4: // Y
//SSE2_PSHUFLW_XMM_to_XMM(xmmT1, reg, 0x4e);
//SSE_MOVSS_XMM_to_M32(offset+4, xmmT1);
SSE_MOVSS_XMM_to_M32(offset+4, reg);
break;
case 5: // YW
@ -52,7 +79,6 @@ microVUx(void) mVUsaveReg(u32 code, int reg, u32 offset) {
SSE_MOVHLPS_XMM_to_XMM(xmmT1, reg);
SSE_MOVSS_XMM_to_M32(offset+4, reg);
SSE_MOVSS_XMM_to_M32(offset+12, xmmT1);
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0xB1);
break;
case 6: // YZ
SSE2_PSHUFD_XMM_to_XMM(xmmT1, reg, 0xc9);
@ -96,8 +122,7 @@ microVUx(void) mVUsaveReg(u32 code, int reg, u32 offset) {
SSE_MOVSS_XMM_to_M32(offset+8, xmmT1);
break;
case 15: // XYZW
if( offset & 15 ) SSE_MOVUPS_XMM_to_M128(offset, reg);
else SSE_MOVAPS_XMM_to_M128(offset, reg);
SSE_MOVAPS_XMM_to_M128(offset, reg);
break;
}
}

View File

@ -18,57 +18,36 @@
#pragma once
#ifdef __LINUX__
#include "ix86/ix86.h"
#endif
//------------------------------------------------------------------
// Helper Macros
//------------------------------------------------------------------
#define _Ft_ ((code >> 16) & 0x1F) // The rt part of the instruction register
#define _Fs_ ((code >> 11) & 0x1F) // The rd part of the instruction register
#define _Fd_ ((code >> 6) & 0x1F) // The sa part of the instruction register
#define _Ft_ ((mVU->code >> 16) & 0x1F) // The rt part of the instruction register
#define _Fs_ ((mVU->code >> 11) & 0x1F) // The rd part of the instruction register
#define _Fd_ ((mVU->code >> 6) & 0x1F) // The sa part of the instruction register
#define _X ((code>>24) & 0x1)
#define _Y ((code>>23) & 0x1)
#define _Z ((code>>22) & 0x1)
#define _W ((code>>21) & 0x1)
#define _X ((mVU->code>>24) & 0x1)
#define _Y ((mVU->code>>23) & 0x1)
#define _Z ((mVU->code>>22) & 0x1)
#define _W ((mVU->code>>21) & 0x1)
#define _XYZW_SS (_X+_Y+_Z+_W==1)
#define _X_Y_Z_W (((code >> 21 ) & 0xF ) )
#define _X_Y_Z_W (((mVU->code >> 21 ) & 0xF ) )
#define _Fsf_ ((code >> 21) & 0x03)
#define _Ftf_ ((code >> 23) & 0x03)
#define _Fsf_ ((mVU->code >> 21) & 0x03)
#define _Ftf_ ((mVU->code >> 23) & 0x03)
#define _Imm11_ (s32)(code & 0x400 ? 0xfffffc00 | (code & 0x3ff) : code & 0x3ff)
#define _UImm11_ (s32)(code & 0x7ff)
/*
#define VU_VFx_ADDR(x) (uptr)&VU->VF[x].UL[0]
#define VU_VFy_ADDR(x) (uptr)&VU->VF[x].UL[1]
#define VU_VFz_ADDR(x) (uptr)&VU->VF[x].UL[2]
#define VU_VFw_ADDR(x) (uptr)&VU->VF[x].UL[3]
#define _Imm11_ (s32)(mVU->code & 0x400 ? 0xfffffc00 | (mVU->code & 0x3ff) : mVU->code & 0x3ff)
#define _UImm11_ (s32)(mVU->code & 0x7ff)
#define VU_REGR_ADDR (uptr)&VU->VI[REG_R]
#define VU_REGQ_ADDR (uptr)&VU->VI[REG_Q]
#define VU_REGMAC_ADDR (uptr)&VU->VI[REG_MAC_FLAG]
#define VU_VI_ADDR(x, read) GetVIAddr(VU, x, read, info)
#define VU_ACCx_ADDR (uptr)&VU->ACC.UL[0]
#define VU_ACCy_ADDR (uptr)&VU->ACC.UL[1]
#define VU_ACCz_ADDR (uptr)&VU->ACC.UL[2]
#define VU_ACCw_ADDR (uptr)&VU->ACC.UL[3]
*/
#define xmmT1 0 // XMM0
#define xmmFd 1 // XMM1
#define xmmFs 2 // XMM2
#define xmmFt 3 // XMM3
#define xmmACC1 4 // XMM4
#define xmmACC2 5 // XMM5
#define xmmPQ 6 // XMM6
#define xmmZ 7 // XMM7
#define xmmT1 0 // XMM0 // Temp Reg
#define xmmFd 1 // XMM1 // Holds the Value of Fd
#define xmmFs 2 // XMM2 // Holds the Value of Fs
#define xmmFt 3 // XMM3 // Holds the Value of Ft
#define xmmACC1 4 // XMM4 // Holds the Value of ACC
#define xmmACC2 5 // XMM5 // Holds the Backup Value of ACC
#define xmmPQ 6 // XMM6 // Holds the Value and Backup Values of P and Q regs
#define xmmF 7 // XMM7 // Holds 4 instances of the status and mac flags (macflagX4::statusflagX4)
// Template Stuff
#define mVUx (vuIndex ? &microVU1 : &microVU0)
@ -76,4 +55,11 @@
#define microVUx(aType) template<int vuIndex> aType
#define microVUf(aType) template<int vuIndex, int recPass> aType
microVUx(void) mVUsaveReg(u32 code, int reg, u32 offset);
#define mVUallocInfo mVU->prog.prog[mVU->prog.cur].allocInfo
#define isNOP (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<0))
#define getFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<1))
#define getFs (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<2))
#define getFt (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<3))
#define setFd (mVUallocInfo.info[mVUallocInfo.curPC] & (1<<7))
#define doFlags (mVUallocInfo.info[mVUallocInfo.curPC] & (3<<8))

View File

@ -20,57 +20,75 @@
#include "microVU.h"
#ifdef PCSX2_MICROVU
/*
Cotton's Notes on how things will work (*experimental*, subject to change if I get different ideas):
//------------------------------------------------------------------
// mVUupdateFlags() - Updates status/mac flags
//------------------------------------------------------------------
Guide:
Fd, Fs, Ft = operands in the Micro Instructions
Acc = VU's Accumulator register
Fs/t = shorthand notation I made-up for "Fs or Ft"
xmmFd, xmmFs, xmmFt, xmmAcc = XMM regs that hold Fd, Fs, Ft, and Acc values respectively.
xmmZ = XMM reg that holds the zero Register; always {0, 0, 0, 1.0}
xmmT1, xmmT2, xmmT3 = temp regs.
microVUt(void) mVUupdateFlags(int reg, int regT1, int regT2, int xyzw) {
microVU* mVU = mVUx;
static u8 *pjmp, *pjmp2;
static u32 *pjmp32;
static u32 macaddr, stataddr, prevstataddr;
static int x86macflag, x86statflag, x86temp;
General:
XMM0 is a volatile temp reg throughout the recs. You can always freely use it.
EAX is a volatile temp reg. You can always freely use it.
//SysPrintf ("mVUupdateFlags\n");
if( !(doFlags) ) return;
Mapping:
xmmT1 = xmm0
xmmFd = xmm1
xmmFs = xmm2
xmmFt = xmm3
xmmACC1 = xmm4
xmmACC2 = xmm5
xmmPQ = xmm6
xmmZ = xmm7
//macaddr = VU_VI_ADDR(REG_MAC_FLAG, 0);
//stataddr = VU_VI_ADDR(REG_STATUS_FLAG, 0); // write address
//prevstataddr = VU_VI_ADDR(REG_STATUS_FLAG, 2); // previous address
Most of the time the above mapping will be true, unless I find a reason not to do it this way :)
Opcodes:
Fd's 4-vectors must be preserved (kept valid); Unless operation is single-scalar, then only 'x' XMM vector
will contain valid data for X, Y, Z, or W, and the other XMM vectors will be garbage and freely modifiable.
SSE2_PSHUFD_XMM_to_XMM(regT1, reg, 0x1B); // Flip wzyx to xyzw
MOV32MtoR(x86statflag, prevstataddr); // Load the previous status in to x86statflag
AND16ItoR(x86statflag, 0xff0); // Keep Sticky and D/I flags
Fs and Ft are temp regs that won't be used after the opcode, so their values can be freely modified.
//-------------------------Check for Signed flags------------------------------
If (Fd == 0), Then you don't need to explicitly handle this case in the opcode implementation,
since its dealt-with in the analyzing microVU pipeline functions.
(So just do the normal operation and don't worry about it.)
// The following code makes sure the Signed Bit isn't set with Negative Zero
SSE_XORPS_XMM_to_XMM(regT2, regT2); // Clear regT2
SSE_CMPEQPS_XMM_to_XMM(regT2, regT1); // Set all F's if each vector is zero
SSE_MOVMSKPS_XMM_to_R32(EAX, regT2); // Used for Zero Flag Calculation
SSE_ANDNPS_XMM_to_XMM(regT2, regT1);
If (_X_Y_Z_W == 0) Then same as above. (btw, I'm'm not sure if this case ever happens...)
SSE_MOVMSKPS_XMM_to_R32(x86macflag, regT2); // Move the sign bits of the t1reg
If (Fd == Fs/t), Then xmmFd != xmmFs/t (unless its more optimized this way! it'll be commented on the opcode)
AND16ItoR(x86macflag, _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are
OR16ItoR(x86statflag, 0x82); // SS, S flags
SHL16ItoR(x86macflag, 4);
if (_XYZW_SS) pjmp2 = JMP8(0); // If negative and not Zero, we can skip the Zero Flag checking
x86SetJ8(pjmp);
Clamping:
Fs/t can always be clamped by case 15 (all vectors modified) since they won't be written back.
//-------------------------Check for Zero flags------------------------------
Problems:
The biggest problem I think I'll have is xgkick opcode having variable timing/stalling.
AND16ItoR(EAX, _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are
OR16ItoR(x86statflag, 0x41); // ZS, Z flags
OR32RtoR(x86macflag, EAX);
x86SetJ8(pjmp);
Other Notes:
These notes are mostly to help me (cottonvibes) remember good ideas and to help confused devs to
have an idea of how things work. Right now its all theoretical and I'll change things once implemented ;p
*/
//-------------------------Finally: Send the Flags to the Mac Flag Address------------------------------
if (_XYZW_SS) x86SetJ8(pjmp2); // If we skipped the Zero Flag Checking, return here
MOV16RtoM(macaddr, x86macflag);
MOV16RtoM(stataddr, x86statflag);
}
//------------------------------------------------------------------
// Helper Macros
//------------------------------------------------------------------
#define mVU_FMAC1(operation) { \
if (isNOP) return; \
int Fd, Fs, Ft; \
mVUallocFMAC1a<vuIndex>(Fd, Fs, Ft, 1); \
if (_XYZW_SS) SSE_##operation##SS_XMM_to_XMM(Fs, Ft); \
else SSE_##operation##PS_XMM_to_XMM(Fs, Ft); \
mVUupdateFlags<vuIndex>(Fd, xmmT1, Ft, _X_Y_Z_W); \
mVUallocFMAC1b<vuIndex>(Fd); \
}
//------------------------------------------------------------------
// Micro VU Micromode Upper instructions
@ -78,8 +96,9 @@ have an idea of how things work. Right now its all theoretical and I'll change t
microVUf(void) mVU_ABS(){}
microVUf(void) mVU_ADD() {
microVU* mVU = mVUx;
if (recPass == 0) {}
else {}
else { mVU_FMAC1(ADD); }
}
microVUf(void) mVU_ADDi(){}
microVUf(void) mVU_ADDq(){}
@ -94,7 +113,11 @@ microVUf(void) mVU_ADDAx(){}
microVUf(void) mVU_ADDAy(){}
microVUf(void) mVU_ADDAz(){}
microVUf(void) mVU_ADDAw(){}
microVUf(void) mVU_SUB(){}
microVUf(void) mVU_SUB(){
microVU* mVU = mVUx;
if (recPass == 0) {}
else { mVU_FMAC1(SUB); }
}
microVUf(void) mVU_SUBi(){}
microVUf(void) mVU_SUBq(){}
microVUf(void) mVU_SUBx(){}
@ -108,7 +131,11 @@ microVUf(void) mVU_SUBAx(){}
microVUf(void) mVU_SUBAy(){}
microVUf(void) mVU_SUBAz(){}
microVUf(void) mVU_SUBAw(){}
microVUf(void) mVU_MUL(){}
microVUf(void) mVU_MUL(){
microVU* mVU = mVUx;
if (recPass == 0) {}
else { mVU_FMAC1(MUL); }
}
microVUf(void) mVU_MULi(){}
microVUf(void) mVU_MULq(){}
microVUf(void) mVU_MULx(){}