finally completed recUpdateFlags() to fully support underflow/overflow checks; theres still some room for optimization that i might do later though.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@16 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
cottonvibes 2008-08-13 08:59:36 +00:00 committed by Gregory Hainaut
parent c667ffe343
commit 2d865e39fb
1 changed files with 94 additions and 62 deletions

View File

@ -1300,7 +1300,7 @@ void vuFloat3(uptr x86ptr)
void CheckForOverflow(VURegs *VU, int info, int regd)
{
//testWhenOverflow(info, regd, EEREC_TEMP);
testWhenOverflow(info, regd, EEREC_TEMP);
//CheckForOverflow_(regd, EEREC_TEMP, _X_Y_Z_W);
if (EEREC_TEMP != regd) {
//testWhenOverflow(info, regd, EEREC_TEMP);
@ -1333,8 +1333,7 @@ const static PCSX2_ALIGNED16(u32 VU_Zero_Helper_Mask[4]) = {0x7fffffff, 0x7fffff
const static PCSX2_ALIGNED16(u32 VU_Signed_Zero_Mask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
// VU Flags
// NOTE: flags don't compute under/over flows since it is highly unlikely
// that games used them. Including them will lower performance.
// NOTE: Flags now compute under/over flows! :p
void recUpdateFlags(VURegs * VU, int reg, int info)
{
u32 flagmask;
@ -1351,9 +1350,8 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
stataddr = VU_VI_ADDR(REG_STATUS_FLAG, 0); // write address
prevstataddr = VU_VI_ADDR(REG_STATUS_FLAG, 2); // previous address
if( stataddr == 0 ) {
if( stataddr == 0 )
stataddr = prevstataddr;
}
//assert( stataddr != 0);
@ -1364,10 +1362,7 @@ void recUpdateFlags(VURegs * VU, int reg, int info)
// can do with 8 bits since only computing zero/sign flags
if( EEREC_TEMP != reg ) {
/*
DD:CC:BB:AA
11:10:01:00
00:01:10:11*/
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw
MOV32MtoR(x86oldflag, prevstataddr); // Load the previous status in to x86oldflag
@ -1475,82 +1470,119 @@ DD:CC:BB:AA
//-------------------------Flag Setting if (reg == EEREC_TEMP)------------------------------
else {
int t1reg = _allocTempXMMreg(XMMT_FPS, -1);//_getFreeXMMreg();//_vuGetTempXMMreg(info);
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw
MOV32MtoR(x86oldflag, prevstataddr); //move current (previous) status register to x86oldflag
MOV32MtoR(x86oldflag, prevstataddr); // Load the previous status in to x86oldflag
SSE_MOVMSKPS_XMM_to_R32(x86macflag, EEREC_TEMP); // mask is < 0 (including 80000000) Get sign bits of all 4 vectors
// put results in lower 4 bits of x86macflag
//-------------------------Check for Overflow flags------------------------------
XOR32RtoR(EAX, EAX); //Clear EAX for our new flag
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
SSE_CMPUNORDPS_XMM_to_XMM(t1reg, reg); // If reg == NaN then set Vector to 0xFFFFFFFF
SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (uptr)&s_FloatMinMax[8]); //if the result zero?
//set to all F's (true) or All 0's on each vector (depending on result)
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // put the sign bit results from the previous calculation in x86newflag
// so x86newflag == 0xf if EEREC_TEMP is zero and == 0x0 if it is all a value.
XOR32RtoR(x86macflag, x86macflag); // Clear Mac Flag
//if( !(g_VUGameFixes&VUFIX_SIGNEDZERO) ) {
NOT32R(x86newflag); //flip all bits from previous calculation, so now if the result was zero, the result here is 0's
AND32RtoR(x86macflag, x86newflag); //check non-zero macs against signs of initial register values
// so if the result was zero, regardless of if its signed or not, it wont set the signed flags
//}
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
AND32ItoR(x86macflag, 0x0f & _X_Y_Z_W ); //seperate out the flags we are actually using?
pjmp = JZ8(0); //if none are the flags are set to 1 (aka the result is non-zero & positive, or they were zero) dont set the "signed" flag
OR32ItoR(EAX, 2); //else we are signed
XOR32RtoR(EAX, EAX); //Clear EAX
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified)
pjmp = JZ8(0); // Skip if none are
OR32ItoR(EAX, 8); // Set if they are
x86SetJ8(pjmp);
//if( !(g_VUGameFixes&VUFIX_SIGNEDZERO) ) { //Flip the bits back again so we have our "its zero" values
NOT32R(x86newflag); //flip!
//}
OR32RtoR(x86macflag, x86newflag);
SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); //mask out the vectors we didnt use
pjmp = JZ8(0); //If none were zero skip
OR32ItoR(EAX, 1); //We had a zero, so set el status flag with "zero":p
//-------------------------Check for Underflow flags------------------------------
SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask1[ 0 ]);
SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg == zero exponent) then set Vector to 0xFFFFFFFF
SSE_ANDPS_XMM_to_XMM(t1reg, reg);
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask2[ 0 ]);
SSE_CMPNEPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg != zero mantisa) then set Vector to 0xFFFFFFFF
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are
OR32ItoR(EAX, 4); // Set if they are
x86SetJ8(pjmp);
SHL32ItoR(x86macflag, 4); //Move our signed flags left 4
OR32RtoR(x86newflag, x86macflag); //then stick our zero flags after it
OR32RtoR(x86macflag, x86newflag);
SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4
//MOV8RmtoROffset(x86newflag, x86macflag, (u32)g_MACFlagTransform); // transform
//-------------------------Optional Code: Denormals Are Zero------------------------------
if (!(Config.Hacks & 0x8)) { //only use if denormals hack is off
SSE_ANDNPS_XMM_to_XMM(t1reg, reg); // t1reg = !t1reg & reg
// Now we have Denormals are Positive Zero in t1reg; the next two lines take Signed Zero into account
SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
SSE_ORPS_XMM_to_XMM(reg, t1reg);
}
//-------------------------Check for Signed flags------------------------------
//SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
//SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]); // If (t1reg == 0x80000000) set all F's for that vector
//SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg
// The following code makes sure the Signed Bit isn't set with Negative Zero
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
SSE_CMPNEPS_XMM_to_XMM(t1reg, reg); // Set all F's if each vector is not zero
SSE_ANDPS_XMM_to_XMM(t1reg, reg);
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the t1reg
// Replace the 4 lines of code above with this line if you don't care that Negative Zero sets the Signed flag
//SSE_MOVMSKPS_XMM_to_R32(x86newflag, reg); // Move the sign bits of the reg
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are
OR32ItoR(EAX, 2); // Set if they are
x86SetJ8(pjmp);
OR32RtoR(x86macflag, x86newflag);
SHL32ItoR(x86macflag, 4); // Shift the Overflow, Underflow, and Zero flags left 4
//-------------------------Check for Zero flags------------------------------
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
SSE_CMPEQPS_XMM_to_XMM(t1reg, reg); // Set all F's if each vector is zero
/* This code does the same thing as the above two lines
SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Helper_Mask[ 0 ]); // t1reg &= 0x7fffffff
SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg == 0x00000000) set all F's for that vector
*/
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation
pjmp = JZ8(0); // Skip if none are
OR32ItoR(EAX, 1); // Set if they are
x86SetJ8(pjmp);
OR32RtoR(x86macflag, x86newflag);
//-------------------------Finally: Send the Flags to the Mac Address------------------------------
//SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Don't need to restore the reg since this is a temp reg
_freeXMMreg(t1reg);
if( macaddr != 0 )
MOV8RtoM(macaddr, x86newflag);
MOV16RtoM(macaddr, x86macflag);
else
SysPrintf( "VU ALLOCATION ERROR: Macaddr == EAX!!! Can't set Mac Flags!\n" );
}
// x86macflag - new untransformed mac flag, EAX - new status bits, x86oldflag - old status flag
// x86macflag = zero_wzyx | sign_wzyx
//MOV8RmtoROffset(x86newflag, x86macflag, (u32)g_MACFlagTransform); // transform
//MOV32RtoR(x86macflag, x86newflag );
//MOV32RtoR(x86macflag, x86oldflag);
//SHL32ItoR(x86macflag, 6);
//OR32RtoR(x86oldflag, x86macflag);
//if( macaddr != 0 ) {
// has to write full 32 bits!
//MOV8RtoM(macaddr, x86newflag);
// vampire night breaks with (g_VUGameFixes&VUFIX_EXTRAFLAGS), crazi taxi needs it
//if( (g_VUGameFixes&VUFIX_EXTRAFLAGS) && flagmask != 0xf ) {
// MOV8MtoR(x86newflag, VU_VI_ADDR(REG_MAC_FLAG, 2)); // get previous written
// AND8ItoR(x86newflag, ~g_MACFlagTransform[(flagmask|(flagmask<<4))]);
// OR8RtoM(macaddr, x86newflag);
//}
//}
//AND32ItoR(x86oldflag, 0x0c0);
SHR32ItoR(x86oldflag, 6);
OR32RtoR(x86oldflag, EAX);
SHL32ItoR(x86oldflag, 6);
OR32RtoR(x86oldflag, EAX);
//SHL32ItoR(EAX,6);
//OR32RtoR(x86oldflag, EAX);
MOV32RtoM(stataddr, x86oldflag);
_freeX86reg(x86macflag);