mirror of https://github.com/PCSX2/pcsx2.git
took me a while, but i managed to do some nice recupdateflag() optimizations :D
git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@137 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
parent
d3764fc97d
commit
e7bc472969
|
@ -1383,277 +1383,155 @@ const static PCSX2_ALIGNED16(u32 VU_Underflow_Mask2[4]) = {0x007fffff, 0x007fff
|
||||||
const static PCSX2_ALIGNED16(u32 VU_Zero_Mask[4]) = {0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
const static PCSX2_ALIGNED16(u32 VU_Zero_Mask[4]) = {0x00000000, 0x00000000, 0x00000000, 0x00000000};
|
||||||
const static PCSX2_ALIGNED16(u32 VU_Zero_Helper_Mask[4]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
|
const static PCSX2_ALIGNED16(u32 VU_Zero_Helper_Mask[4]) = {0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff};
|
||||||
const static PCSX2_ALIGNED16(u32 VU_Signed_Zero_Mask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
|
const static PCSX2_ALIGNED16(u32 VU_Signed_Zero_Mask[4]) = {0x80000000, 0x80000000, 0x80000000, 0x80000000};
|
||||||
|
const static PCSX2_ALIGNED16(u32 VU_Pos_Infinity[4]) = {0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000};
|
||||||
|
const static PCSX2_ALIGNED16(u32 VU_Neg_Infinity[4]) = {0xff800000, 0xff800000, 0xff800000, 0xff800000};
|
||||||
PCSX2_ALIGNED16(u64 TEMPXMMData[2]);
|
PCSX2_ALIGNED16(u64 TEMPXMMData[2]);
|
||||||
|
|
||||||
// VU Flags
|
|
||||||
// NOTE: Flags now compute under/over flows! :p
|
// NOTE: Flags now compute under/over flows! :p
|
||||||
void recUpdateFlags(VURegs * VU, int reg, int info)
|
void recUpdateFlags(VURegs * VU, int reg, int info)
|
||||||
{
|
{
|
||||||
u32 flagmask;
|
static u8* pjmp;
|
||||||
u8* pjmp;
|
static u32* pjmp32;
|
||||||
u32 macaddr, stataddr, prevstataddr;
|
static u32 macaddr, stataddr, prevstataddr;
|
||||||
int x86macflag, x86newflag, x86temp;
|
static int x86macflag, x86newflag, x86temp;
|
||||||
const static u8 macarr[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 };
|
static int t1reg, t1regBoolean;
|
||||||
|
|
||||||
if( !(info & PROCESS_VU_UPDATEFLAGS) )
|
if( !(info & PROCESS_VU_UPDATEFLAGS) ) return;
|
||||||
return;
|
|
||||||
|
|
||||||
flagmask = macarr[_X_Y_Z_W];
|
|
||||||
macaddr = VU_VI_ADDR(REG_MAC_FLAG, 0);
|
macaddr = VU_VI_ADDR(REG_MAC_FLAG, 0);
|
||||||
stataddr = VU_VI_ADDR(REG_STATUS_FLAG, 0); // write address
|
stataddr = VU_VI_ADDR(REG_STATUS_FLAG, 0); // write address
|
||||||
prevstataddr = VU_VI_ADDR(REG_STATUS_FLAG, 2); // previous address
|
prevstataddr = VU_VI_ADDR(REG_STATUS_FLAG, 2); // previous address
|
||||||
|
|
||||||
if( stataddr == 0 )
|
if( stataddr == 0 ) stataddr = prevstataddr;
|
||||||
stataddr = prevstataddr;
|
if( macaddr == 0 ) {
|
||||||
//assert( stataddr != 0);
|
SysPrintf( "VU ALLOCATION WARNING: Using Mac Flag Previous Address!\n" );
|
||||||
|
macaddr = VU_VI_ADDR(REG_MAC_FLAG, 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// 20 insts
|
|
||||||
x86newflag = ALLOCTEMPX86(MODE_8BITREG);
|
x86newflag = ALLOCTEMPX86(MODE_8BITREG);
|
||||||
x86macflag = ALLOCTEMPX86(0);
|
x86macflag = ALLOCTEMPX86(0);
|
||||||
x86temp = ALLOCTEMPX86(0);
|
x86temp = ALLOCTEMPX86(0);
|
||||||
|
|
||||||
// can do with 8 bits since only computing zero/sign flags
|
if (reg == EEREC_TEMP) {
|
||||||
if( EEREC_TEMP != reg ) {
|
t1reg = _vuGetTempXMMreg(info);
|
||||||
|
if (t1reg < 0) {
|
||||||
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw
|
|
||||||
XOR32RtoR(x86macflag, x86macflag); // Clear Mac Flag
|
|
||||||
XOR32RtoR(x86temp, x86temp); //Clear x86temp
|
|
||||||
|
|
||||||
if (CHECK_VU_EXTRA_FLAGS) {
|
|
||||||
//-------------------------Check for Overflow flags------------------------------
|
|
||||||
|
|
||||||
//SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP
|
|
||||||
//SSE_CMPUNORDPS_XMM_to_XMM(EEREC_TEMP, reg); // If reg == NaN then set Vector to 0xFFFFFFFF
|
|
||||||
|
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, reg);
|
|
||||||
SSE_MINPS_M128_to_XMM(EEREC_TEMP, (uptr)g_maxvals);
|
|
||||||
SSE_MAXPS_M128_to_XMM(EEREC_TEMP, (uptr)g_minvals);
|
|
||||||
SSE_CMPNEPS_XMM_to_XMM(EEREC_TEMP, reg); // If they're not equal, then overflow has occured
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified)
|
|
||||||
pjmp = JZ8(0); // Skip if none are
|
|
||||||
OR32ItoR(x86temp, 8); // Set if they are
|
|
||||||
x86SetJ8(pjmp);
|
|
||||||
|
|
||||||
OR32RtoR(x86macflag, x86newflag);
|
|
||||||
SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4
|
|
||||||
|
|
||||||
//-------------------------Check for Underflow flags------------------------------
|
|
||||||
|
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP <- reg
|
|
||||||
|
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask1[ 0 ]);
|
|
||||||
SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP == zero exponent) then set Vector to 0xFFFFFFFF
|
|
||||||
|
|
||||||
SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, reg);
|
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Underflow_Mask2[ 0 ]);
|
|
||||||
SSE_CMPNEPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP != zero mantisa) then set Vector to 0xFFFFFFFF
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation
|
|
||||||
pjmp = JZ8(0); // Skip if none are
|
|
||||||
OR32ItoR(x86temp, 4); // Set if they are
|
|
||||||
x86SetJ8(pjmp);
|
|
||||||
|
|
||||||
OR32RtoR(x86macflag, x86newflag);
|
|
||||||
SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4
|
|
||||||
|
|
||||||
//-------------------------Optional Code: Denormals Are Zero------------------------------
|
|
||||||
if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero
|
|
||||||
SSE_ANDNPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP = !EEREC_TEMP & reg
|
|
||||||
// Now we have Denormals are Positive Zero in EEREC_TEMP; the next two lines take Signed Zero into account
|
|
||||||
SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
|
|
||||||
SSE_ORPS_XMM_to_XMM(reg, EEREC_TEMP);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
vuFloat2(reg, EEREC_TEMP, _X_Y_Z_W); // Clamp overflowed vectors that were modified
|
|
||||||
|
|
||||||
//-------------------------Check for Signed flags------------------------------
|
|
||||||
|
|
||||||
//SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
|
|
||||||
//SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[ 0 ]); // If (EEREC_TEMP == 0x80000000) set all F's for that vector
|
|
||||||
|
|
||||||
//SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP <- reg
|
|
||||||
|
|
||||||
// The following code makes sure the Signed Bit isn't set with Negative Zero
|
|
||||||
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP
|
|
||||||
SSE_CMPNEPS_XMM_to_XMM(EEREC_TEMP, reg); // Set all F's if each vector is not zero
|
|
||||||
SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, reg);
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the EEREC_TEMP
|
|
||||||
|
|
||||||
// Replace the 4 lines of code above with this line if you don't care that Negative Zero sets the Signed flag
|
|
||||||
//SSE_MOVMSKPS_XMM_to_R32(x86newflag, reg); // Move the sign bits of the reg
|
|
||||||
|
|
||||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation
|
|
||||||
pjmp = JZ8(0); // Skip if none are
|
|
||||||
OR32ItoR(x86temp, 2); // Set if they are
|
|
||||||
x86SetJ8(pjmp);
|
|
||||||
|
|
||||||
OR32RtoR(x86macflag, x86newflag);
|
|
||||||
SHL32ItoR(x86macflag, 4); // Shift the Overflow, Underflow, and Zero flags left 4
|
|
||||||
|
|
||||||
//-------------------------Check for Zero flags------------------------------
|
|
||||||
|
|
||||||
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP
|
|
||||||
SSE_CMPEQPS_XMM_to_XMM(EEREC_TEMP, reg); // Set all F's if each vector is zero
|
|
||||||
|
|
||||||
/* This code does the same thing as the above two lines
|
|
||||||
SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, reg); // EEREC_TEMP <- reg
|
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Helper_Mask[ 0 ]); // EEREC_TEMP &= 0x7fffffff
|
|
||||||
SSE_CMPEQPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Zero_Mask[ 0 ]); // If (EEREC_TEMP == 0x00000000) set all F's for that vector
|
|
||||||
*/
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, EEREC_TEMP); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation
|
|
||||||
pjmp = JZ8(0); // Skip if none are
|
|
||||||
OR32ItoR(x86temp, 1); // Set if they are
|
|
||||||
x86SetJ8(pjmp);
|
|
||||||
|
|
||||||
OR32RtoR(x86macflag, x86newflag);
|
|
||||||
|
|
||||||
//-------------------------Finally: Send the Flags to the Mac Address------------------------------
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip back reg to wzyx
|
|
||||||
|
|
||||||
if( macaddr != 0 )
|
|
||||||
MOV16RtoM(macaddr, x86macflag);
|
|
||||||
else
|
|
||||||
SysPrintf( "VU ALLOCATION ERROR: Can't set Mac Flags!\n" );
|
|
||||||
}
|
|
||||||
//-------------------------Flag Setting if (reg == EEREC_TEMP)------------------------------
|
|
||||||
else {
|
|
||||||
|
|
||||||
int t1reg = _vuGetTempXMMreg(info);
|
|
||||||
int t1regBoolean = 0;
|
|
||||||
if (t1reg == -1) {
|
|
||||||
//SysPrintf( "VU ALLOCATION ERROR: Temp reg can't be allocated!!!!\n" );
|
//SysPrintf( "VU ALLOCATION ERROR: Temp reg can't be allocated!!!!\n" );
|
||||||
t1reg = (reg == 0) ? (reg + 1) : (reg - 1);
|
t1reg = (reg == 0) ? (reg + 1) : (reg - 1);
|
||||||
SSE_MOVAPS_XMM_to_M128( (uptr)TEMPXMMData, t1reg );
|
SSE_MOVAPS_XMM_to_M128( (uptr)TEMPXMMData, t1reg );
|
||||||
t1regBoolean = 1;
|
t1regBoolean = 1;
|
||||||
}
|
}
|
||||||
|
else t1regBoolean = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
t1reg = EEREC_TEMP;
|
||||||
|
t1regBoolean = 2;
|
||||||
|
}
|
||||||
|
|
||||||
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw
|
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip wzyx to xyzw
|
||||||
XOR32RtoR(x86macflag, x86macflag); // Clear Mac Flag
|
XOR32RtoR(x86macflag, x86macflag); // Clear Mac Flag
|
||||||
XOR32RtoR(x86temp, x86temp); //Clear x86temp
|
XOR32RtoR(x86temp, x86temp); //Clear x86temp
|
||||||
|
|
||||||
if (CHECK_VU_EXTRA_FLAGS) {
|
if (CHECK_VU_EXTRA_FLAGS) {
|
||||||
//-------------------------Check for Overflow flags------------------------------
|
//-------------------------Check for Overflow flags------------------------------
|
||||||
|
|
||||||
//SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
|
//SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
|
||||||
//SSE_CMPUNORDPS_XMM_to_XMM(t1reg, reg); // If reg == NaN then set Vector to 0xFFFFFFFF
|
//SSE_CMPUNORDPS_XMM_to_XMM(t1reg, reg); // If reg == NaN then set Vector to 0xFFFFFFFF
|
||||||
|
|
||||||
SSE_MOVAPS_XMM_to_XMM(t1reg, reg);
|
//SSE_MOVAPS_XMM_to_XMM(t1reg, reg);
|
||||||
SSE_MINPS_M128_to_XMM(t1reg, (uptr)g_maxvals);
|
//SSE_MINPS_M128_to_XMM(t1reg, (uptr)g_maxvals);
|
||||||
SSE_MAXPS_M128_to_XMM(t1reg, (uptr)g_minvals);
|
//SSE_MAXPS_M128_to_XMM(t1reg, (uptr)g_minvals);
|
||||||
SSE_CMPNEPS_XMM_to_XMM(t1reg, reg); // If they're not equal, then overflow has occured
|
//SSE_CMPNEPS_XMM_to_XMM(t1reg, reg); // If they're not equal, then overflow has occured
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
|
SSE_MOVAPS_XMM_to_XMM(t1reg, reg);
|
||||||
|
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)VU_Zero_Helper_Mask);
|
||||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified)
|
SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)VU_Pos_Infinity); // If infinity, then overflow has occured (NaN's don't report as overflow) (NaN's and Infinities report as overflow)
|
||||||
pjmp = JZ8(0); // Skip if none are
|
|
||||||
OR32ItoR(x86temp, 8); // Set if they are
|
|
||||||
x86SetJ8(pjmp);
|
|
||||||
|
|
||||||
OR32RtoR(x86macflag, x86newflag);
|
|
||||||
SHL32ItoR(x86macflag, 4); // Shift the Overflow flags left 4
|
|
||||||
|
|
||||||
//-------------------------Check for Underflow flags------------------------------
|
|
||||||
|
|
||||||
SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg
|
|
||||||
|
|
||||||
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask1[ 0 ]);
|
|
||||||
SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg == zero exponent) then set Vector to 0xFFFFFFFF
|
|
||||||
|
|
||||||
SSE_ANDPS_XMM_to_XMM(t1reg, reg);
|
|
||||||
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask2[ 0 ]);
|
|
||||||
SSE_CMPNEPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg != zero mantisa) then set Vector to 0xFFFFFFFF
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation
|
|
||||||
pjmp = JZ8(0); // Skip if none are
|
|
||||||
OR32ItoR(x86temp, 4); // Set if they are
|
|
||||||
x86SetJ8(pjmp);
|
|
||||||
|
|
||||||
OR32RtoR(x86macflag, x86newflag);
|
|
||||||
SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4
|
|
||||||
|
|
||||||
//-------------------------Optional Code: Denormals Are Zero------------------------------
|
|
||||||
if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero
|
|
||||||
SSE_ANDNPS_XMM_to_XMM(t1reg, reg); // t1reg = !t1reg & reg
|
|
||||||
// Now we have Denormals are Positive Zero in t1reg; the next two lines take Signed Zero into account
|
|
||||||
SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
|
|
||||||
SSE_ORPS_XMM_to_XMM(reg, t1reg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
vuFloat2(reg, t1reg, _X_Y_Z_W); // Clamp overflowed vectors that were modified
|
|
||||||
|
|
||||||
//-------------------------Check for Signed flags------------------------------
|
|
||||||
|
|
||||||
//SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
|
|
||||||
//SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]); // If (t1reg == 0x80000000) set all F's for that vector
|
|
||||||
|
|
||||||
//SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg
|
|
||||||
|
|
||||||
// The following code makes sure the Signed Bit isn't set with Negative Zero
|
|
||||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
|
|
||||||
SSE_CMPNEPS_XMM_to_XMM(t1reg, reg); // Set all F's if each vector is not zero
|
|
||||||
SSE_ANDPS_XMM_to_XMM(t1reg, reg);
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the t1reg
|
|
||||||
|
|
||||||
// Replace the 4 lines of code above with this line if you don't care that Negative Zero sets the Signed flag
|
|
||||||
//SSE_MOVMSKPS_XMM_to_R32(x86newflag, reg); // Move the sign bits of the reg
|
|
||||||
|
|
||||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation
|
|
||||||
pjmp = JZ8(0); // Skip if none are
|
|
||||||
OR32ItoR(x86temp, 2); // Set if they are
|
|
||||||
x86SetJ8(pjmp);
|
|
||||||
|
|
||||||
OR32RtoR(x86macflag, x86newflag);
|
|
||||||
SHL32ItoR(x86macflag, 4); // Shift the Overflow, Underflow, and Zero flags left 4
|
|
||||||
|
|
||||||
//-------------------------Check for Zero flags------------------------------
|
|
||||||
|
|
||||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
|
|
||||||
SSE_CMPEQPS_XMM_to_XMM(t1reg, reg); // Set all F's if each vector is zero
|
|
||||||
|
|
||||||
/* This code does the same thing as the above two lines
|
|
||||||
SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg
|
|
||||||
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Helper_Mask[ 0 ]); // t1reg &= 0x7fffffff
|
|
||||||
SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg == 0x00000000) set all F's for that vector
|
|
||||||
*/
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
|
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
|
||||||
|
|
||||||
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation
|
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Overflowed" bits from the previous calculation (also make sure we're only grabbing from the XYZW being modified)
|
||||||
pjmp = JZ8(0); // Skip if none are
|
pjmp = JZ8(0); // Skip if none are
|
||||||
OR32ItoR(x86temp, 1); // Set if they are
|
OR32ItoR(x86temp, 8); // Set if they are
|
||||||
|
OR32RtoR(x86macflag, x86newflag);
|
||||||
|
SHL32ItoR(x86macflag, 8); // Shift the Overflow flags left 8
|
||||||
|
pjmp32 = JMP32(0); // Skip Underflow Check
|
||||||
x86SetJ8(pjmp);
|
x86SetJ8(pjmp);
|
||||||
|
|
||||||
OR32RtoR(x86macflag, x86newflag);
|
//-------------------------Check for Underflow flags------------------------------
|
||||||
|
|
||||||
//-------------------------Finally: Send the Flags to the Mac Address------------------------------
|
SSE_MOVAPS_XMM_to_XMM(t1reg, reg); // t1reg <- reg
|
||||||
//SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Don't need to restore the reg since this is a temp reg
|
|
||||||
|
|
||||||
if (t1regBoolean)
|
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask1[ 0 ]);
|
||||||
SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)TEMPXMMData );
|
SSE_CMPEQPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg == zero exponent) then set Vector to 0xFFFFFFFF
|
||||||
else
|
|
||||||
_freeXMMreg(t1reg);
|
|
||||||
|
|
||||||
if( macaddr != 0 )
|
SSE_ANDPS_XMM_to_XMM(t1reg, reg);
|
||||||
MOV16RtoM(macaddr, x86macflag);
|
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Underflow_Mask2[ 0 ]);
|
||||||
else
|
SSE_CMPNEPS_M128_to_XMM(t1reg, (uptr)&VU_Zero_Mask[ 0 ]); // If (t1reg != zero mantisa) then set Vector to 0xFFFFFFFF
|
||||||
SysPrintf( "VU ALLOCATION ERROR: Can't set Mac Flags!\n" );
|
|
||||||
|
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
|
||||||
|
|
||||||
|
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Has Underflowed" bits from the previous calculation
|
||||||
|
pjmp = JZ8(0); // Skip if none are
|
||||||
|
OR32ItoR(x86temp, 4); // Set if they are
|
||||||
|
OR32RtoR(x86macflag, x86newflag);
|
||||||
|
SHL32ItoR(x86macflag, 4); // Shift the Overflow and Underflow flags left 4
|
||||||
|
x86SetJ8(pjmp);
|
||||||
|
|
||||||
|
//-------------------------Optional Code: Denormals Are Zero------------------------------
|
||||||
|
if (CHECK_UNDERFLOW) { // Sets underflow/denormals to zero
|
||||||
|
SSE_ANDNPS_XMM_to_XMM(t1reg, reg); // t1reg = !t1reg & reg
|
||||||
|
// Now we have Denormals are Positive Zero in t1reg; the next two lines take Signed Zero into account
|
||||||
|
SSE_ANDPS_M128_to_XMM(reg, (uptr)&VU_Signed_Zero_Mask[ 0 ]);
|
||||||
|
SSE_ORPS_XMM_to_XMM(reg, t1reg);
|
||||||
|
}
|
||||||
|
|
||||||
|
x86SetJ32(pjmp32); // If we skipped the Underflow Flag Checking (when we had an Overflow), return here
|
||||||
}
|
}
|
||||||
|
|
||||||
|
vuFloat2(reg, t1reg, _X_Y_Z_W); // Clamp overflowed vectors that were modified
|
||||||
|
|
||||||
|
//-------------------------Check for Signed flags------------------------------
|
||||||
|
|
||||||
|
// The following code makes sure the Signed Bit isn't set with Negative Zero
|
||||||
|
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
|
||||||
|
SSE_CMPNEPS_XMM_to_XMM(t1reg, reg); // Set all F's if each vector is not zero
|
||||||
|
SSE_ANDPS_XMM_to_XMM(t1reg, reg);
|
||||||
|
|
||||||
|
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the t1reg
|
||||||
|
|
||||||
|
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Signed" bits from the previous calculation
|
||||||
|
pjmp = JZ8(0); // Skip if none are
|
||||||
|
OR32ItoR(x86temp, 2); // Set if they are
|
||||||
|
OR32RtoR(x86macflag, x86newflag);
|
||||||
|
SHL32ItoR(x86macflag, 4); // Shift the Overflow, Underflow, and Zero flags left 4
|
||||||
|
pjmp32 = JMP32(0); // If negative and not Zero, we can skip the Zero Flag checking
|
||||||
|
x86SetJ8(pjmp);
|
||||||
|
|
||||||
|
SHL32ItoR(x86macflag, 4); // Shift the Overflow, Underflow, and Zero flags left 4
|
||||||
|
|
||||||
|
//-------------------------Check for Zero flags------------------------------
|
||||||
|
|
||||||
|
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
|
||||||
|
SSE_CMPEQPS_XMM_to_XMM(t1reg, reg); // Set all F's if each vector is zero
|
||||||
|
|
||||||
|
SSE_MOVMSKPS_XMM_to_R32(x86newflag, t1reg); // Move the sign bits of the previous calculation
|
||||||
|
|
||||||
|
AND32ItoR(x86newflag, 0x0f & _X_Y_Z_W ); // Grab "Is Zero" bits from the previous calculation
|
||||||
|
pjmp = JZ8(0); // Skip if none are
|
||||||
|
OR32ItoR(x86temp, 1); // Set if they are
|
||||||
|
OR32RtoR(x86macflag, x86newflag);
|
||||||
|
x86SetJ8(pjmp);
|
||||||
|
|
||||||
|
//-------------------------Finally: Send the Flags to the Mac Flag Address------------------------------
|
||||||
|
|
||||||
|
x86SetJ32(pjmp32); // If we skipped the Zero Flag Checking, return here
|
||||||
|
|
||||||
|
SSE_SHUFPS_XMM_to_XMM(reg, reg, 0x1B); // Flip back reg to wzyx
|
||||||
|
|
||||||
|
if (t1regBoolean == 1) SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)TEMPXMMData );
|
||||||
|
else if (t1regBoolean == 0) _freeXMMreg(t1reg);
|
||||||
|
|
||||||
|
MOV16RtoM(macaddr, x86macflag);
|
||||||
|
|
||||||
MOV32MtoR(x86macflag, prevstataddr); // Load the previous status in to x86macflag
|
MOV32MtoR(x86macflag, prevstataddr); // Load the previous status in to x86macflag
|
||||||
AND32ItoR(x86macflag, 0xff0); // Keep Sticky and D/I flags
|
AND32ItoR(x86macflag, 0xff0); // Keep Sticky and D/I flags
|
||||||
OR32RtoR(x86macflag, x86temp);
|
OR32RtoR(x86macflag, x86temp);
|
||||||
|
|
Loading…
Reference in New Issue