mirror of https://github.com/PCSX2/pcsx2.git
re-recoded the VU div opcode lol. now its more efficient, faster, and 1/6 the size :D
git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@160 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
parent
ae714c14b3
commit
842b472a40
|
@ -50,8 +50,8 @@
|
||||||
extern PCSX2_ALIGNED16_DECL(u32 g_minvals[4]);
|
extern PCSX2_ALIGNED16_DECL(u32 g_minvals[4]);
|
||||||
extern PCSX2_ALIGNED16_DECL(u32 g_maxvals[4]);
|
extern PCSX2_ALIGNED16_DECL(u32 g_maxvals[4]);
|
||||||
|
|
||||||
static u32 PCSX2_ALIGNED16(s_neg[4]) = { 0x80000000, 0, 0, 0 };
|
static u32 PCSX2_ALIGNED16(s_neg[4]) = { 0x80000000, 0xffffffff, 0xffffffff, 0xffffffff };
|
||||||
static u32 PCSX2_ALIGNED16(s_pos[4]) = { 0x7fffffff, 0, 0, 0 };
|
static u32 PCSX2_ALIGNED16(s_pos[4]) = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff };
|
||||||
|
|
||||||
#define REC_FPUBRANCH(f) \
|
#define REC_FPUBRANCH(f) \
|
||||||
void f(); \
|
void f(); \
|
||||||
|
|
|
@ -3665,480 +3665,83 @@ void recVUMI_CLIP(VURegs *VU, int info)
|
||||||
/* VU Lower instructions */
|
/* VU Lower instructions */
|
||||||
/******************************/
|
/******************************/
|
||||||
PCSX2_ALIGNED16(u64 DIV_TEMP_XMM[2]);
|
PCSX2_ALIGNED16(u64 DIV_TEMP_XMM[2]);
|
||||||
PCSX2_ALIGNED16(u64 DIV_TEMP_XMM2[2]);
|
|
||||||
|
|
||||||
void recVUMI_DIV(VURegs *VU, int info)
|
void recVUMI_DIV(VURegs *VU, int info)
|
||||||
{
|
{
|
||||||
int t1reg, t2reg;
|
int t1reg, t1boolean, vftemp;
|
||||||
u8* pjmp;
|
u8* pjmp, * pjmp1;
|
||||||
u8* pjmp1;
|
u32* pjmp2, * pjmp32;
|
||||||
u32* pjmp2;
|
|
||||||
u32* pjmp32;
|
|
||||||
|
|
||||||
|
//SysPrintf("VU DIV Opcode \n");
|
||||||
AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags
|
AND32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0xFCF); // Clear D/I flags
|
||||||
|
|
||||||
|
vftemp = ALLOCTEMPX86(MODE_8BITREG);
|
||||||
|
if (vftemp < 0) {SysPrintf("VU: SQRT allocation error!!!\n"); vftemp = EAX;}
|
||||||
|
|
||||||
if( _Fs_ == 0 ) {
|
t1reg = _vuGetTempXMMreg(info);
|
||||||
|
if( t1reg < 0 ) {
|
||||||
if( _Ft_ == 0 ) {
|
for (t1reg = 0; ( (t1reg == EEREC_TEMP) || (t1reg == EEREC_S)|| (t1reg == EEREC_T) ); t1reg++)
|
||||||
if( _Fsf_ < 3 ) { // 0/ft
|
; // Makes t1reg not be EEREC_TEMP, EEREC_S, or EEREC_T.
|
||||||
if( _Ftf_ < 3 ) { // 0/0
|
SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address
|
||||||
//SysPrintf("DIV 0/0\n");
|
t1boolean = 1;
|
||||||
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410); //Invalid Flag (only when 0/0)
|
|
||||||
MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0x7f7fffff);
|
|
||||||
}
|
|
||||||
else { // 0/1 ----- zero divided by 1 is zero! :p
|
|
||||||
//SysPrintf("DIV 0/0\n");
|
|
||||||
MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0x00000000);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if( _Ftf_ < 3 ) { // 1/0
|
|
||||||
//SysPrintf("DIV 1/0\n");
|
|
||||||
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820); //Zero divide (only when not 0/0)
|
|
||||||
MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0x7f7fffff);
|
|
||||||
}
|
|
||||||
else { // 1/1
|
|
||||||
//SysPrintf("DIV 1/1\n");
|
|
||||||
MOV32ItoM(VU_VI_ADDR(REG_Q, 0), 0x3f800000);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( _Fsf_ == 3 ) // = 1
|
|
||||||
{ // don't use RCPSS (very bad precision)
|
|
||||||
if( _Ftf_ != 0 || (xmmregs[EEREC_T].mode & MODE_WRITE) )
|
|
||||||
{
|
|
||||||
if( _Ftf_ )
|
|
||||||
{
|
|
||||||
t1reg = _vuGetTempXMMreg(info);
|
|
||||||
|
|
||||||
if( t1reg >= 0 ) // 1/n ---- needs work, ft can also be zero!
|
|
||||||
{
|
|
||||||
//SysPrintf("DIV: Fixed! 1 \n");
|
|
||||||
|
|
||||||
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_);
|
|
||||||
|
|
||||||
if (CHECK_EXTRA_OVERFLOW)
|
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
|
||||||
|
|
||||||
// FT can still be zero here! so we need to check if its zero and set the correct flag.
|
|
||||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
|
|
||||||
XOR32RtoR( EAX, EAX ); // Clear EAX
|
|
||||||
SSE_CMPEQSS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation
|
|
||||||
pjmp32 = JZ32(0); // Skip to pjmp32 if its not a division by zero
|
|
||||||
|
|
||||||
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); //Zero divide (only when not 0/0)
|
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]);
|
|
||||||
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If 0, then EEREC_TEMP = +/- fmax
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); // q <- EEREC_TEMP
|
|
||||||
|
|
||||||
pjmp2 = JMP32(0);
|
|
||||||
|
|
||||||
x86SetJ32(pjmp32);
|
|
||||||
|
|
||||||
SSE_MOVSS_M32_to_XMM(t1reg, (uptr)&VU->VF[0].UL[3]); // t1reg.x <- 1
|
|
||||||
SSE_DIVSS_XMM_to_XMM(t1reg, EEREC_TEMP); // t1reg = 1 / EEREC_TEMP
|
|
||||||
vuFloat2(t1reg, t1reg, 0x8); // check for overflow
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), t1reg); // q <- t1reg
|
|
||||||
|
|
||||||
x86SetJ32(pjmp2);
|
|
||||||
|
|
||||||
_freeXMMreg(t1reg);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else // 1/n ---- needs work, ft can also be zero!
|
|
||||||
{
|
|
||||||
//SysPrintf("DIV: Fixed! 2 \n");
|
|
||||||
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_);
|
|
||||||
|
|
||||||
if (CHECK_EXTRA_OVERFLOW)
|
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
|
||||||
|
|
||||||
t1reg = (EEREC_TEMP == 0) ? (EEREC_TEMP + 1) : (EEREC_TEMP - 1); // find a xmm reg thats not EEREC_TEMP
|
|
||||||
SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address
|
|
||||||
|
|
||||||
// FT can still be zero here! so we need to check if its zero and set the correct flag.
|
|
||||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
|
|
||||||
XOR32RtoR( EAX, EAX ); // Clear EAX
|
|
||||||
SSE_CMPEQSS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation
|
|
||||||
pjmp32 = JZ32(0); // Skip to pjmp32 if its not a division by zero
|
|
||||||
|
|
||||||
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); //Zero divide (only when not 0/0)
|
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]);
|
|
||||||
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If 0, then EEREC_TEMP = +/- fmax
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); // q <- EEREC_TEMP
|
|
||||||
|
|
||||||
pjmp2 = JMP32(0);
|
|
||||||
|
|
||||||
x86SetJ32(pjmp32);
|
|
||||||
|
|
||||||
SSE_MOVSS_M32_to_XMM(t1reg, (uptr)&VU->VF[0].UL[3]); // t1reg.x <- 1
|
|
||||||
SSE_DIVSS_XMM_to_XMM(t1reg, EEREC_TEMP); // t1reg = 1 / EEREC_TEMP
|
|
||||||
vuFloat2(t1reg, t1reg, 0x8); // check for overflow
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), t1reg); // q <- t1reg
|
|
||||||
|
|
||||||
x86SetJ32(pjmp2);
|
|
||||||
|
|
||||||
SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&DIV_TEMP_XMM[0] ); // restore data to t1reg
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{ // 1/n ---- (SS) needs work, ft can also be zero!
|
|
||||||
//SysPrintf("DIV: Fixed! 3 \n");
|
|
||||||
|
|
||||||
if (CHECK_EXTRA_OVERFLOW)
|
|
||||||
vuFloat2(EEREC_T, EEREC_TEMP, 0x8);
|
|
||||||
|
|
||||||
// FT can still be zero here! so we need to check if its zero and set the correct flag.
|
|
||||||
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP
|
|
||||||
XOR32RtoR( EAX, EAX ); // Clear EAX
|
|
||||||
SSE_CMPEQSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); // Set all F's if each vector is zero
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(EAX, EEREC_TEMP); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation
|
|
||||||
pjmp32 = JZ32(0); // Skip to pjmp32 if its not a division by zero
|
|
||||||
|
|
||||||
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); //Zero divide (only when not 0/0)
|
|
||||||
SSE_MOVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); // EEREC_TEMP <- EEREC_T
|
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]);
|
|
||||||
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If 0, then EEREC_TEMP = +/- fmax
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); // q <- EEREC_TEMP
|
|
||||||
|
|
||||||
pjmp2 = JMP32(0);
|
|
||||||
|
|
||||||
x86SetJ32(pjmp32);
|
|
||||||
|
|
||||||
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); // t1reg.x <- 1
|
|
||||||
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); // EEREC_TEMP = 1 / EEREC_T
|
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); // check for overflow
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); // q <- t1reg
|
|
||||||
|
|
||||||
x86SetJ32(pjmp2);
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else { // 1/n ---- (SS) needs work, ft can also be zero!
|
|
||||||
//SysPrintf("DIV: Fixed! 4 \n");
|
|
||||||
|
|
||||||
t1reg = (EEREC_TEMP == 0) ? (EEREC_TEMP + 1) : (EEREC_TEMP - 1); // find a xmm reg thats not EEREC_TEMP
|
|
||||||
SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address
|
|
||||||
|
|
||||||
SSE_MOVSS_M32_to_XMM(t1reg, (uptr)&VU->VF[_Ft_].UL[_Ftf_]); // t1reg.x <- Ft.Ftf
|
|
||||||
|
|
||||||
if (CHECK_EXTRA_OVERFLOW)
|
|
||||||
vuFloat2(t1reg, EEREC_TEMP, 0x8);
|
|
||||||
|
|
||||||
// FT can still be zero here! so we need to check if its zero and set the correct flag.
|
|
||||||
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP
|
|
||||||
XOR32RtoR( EAX, EAX ); // Clear EAX
|
|
||||||
SSE_CMPEQSS_XMM_to_XMM(EEREC_TEMP, t1reg); // Set all F's if each vector is zero
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(EAX, EEREC_TEMP); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation
|
|
||||||
pjmp32 = JZ32(0); // Skip to pjmp32 if its not a division by zero
|
|
||||||
|
|
||||||
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); //Zero divide (only when not 0/0)
|
|
||||||
SSE_ANDPS_M128_to_XMM(t1reg, (uptr)&VU_Signed_Zero_Mask[0]);
|
|
||||||
SSE_ORPS_M128_to_XMM(t1reg, (uptr)&g_maxvals[0]); // If 0, then t1reg = +/- fmax
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), t1reg); // q <- t1reg
|
|
||||||
|
|
||||||
pjmp2 = JMP32(0);
|
|
||||||
|
|
||||||
x86SetJ32(pjmp32);
|
|
||||||
|
|
||||||
SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); // t1reg.x <- 1
|
|
||||||
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, t1reg); // EEREC_TEMP = 1 / t1reg
|
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8); // check for overflow
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); // q <- t1reg
|
|
||||||
|
|
||||||
x86SetJ32(pjmp2);
|
|
||||||
|
|
||||||
SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&DIV_TEMP_XMM[0] ); // restore data to t1reg
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else { // 0/n ---- So result is +/- 0, or +/- Fmax if (FT == 0)
|
|
||||||
//SysPrintf("FS = 0, FT = n \n");
|
|
||||||
|
|
||||||
if( _Ftf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
||||||
else _unpackVFSS_xyzw(EEREC_TEMP, EEREC_T, _Ftf_); // EEREC_TEMP.x <- EEREC_T.ftf
|
|
||||||
|
|
||||||
t1reg = (EEREC_TEMP == 0) ? (EEREC_TEMP + 1) : (EEREC_TEMP - 1);
|
|
||||||
//t2reg = (EEREC_TEMP <= 1) ? (EEREC_TEMP + 2) : (EEREC_TEMP - 2);
|
|
||||||
SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address
|
|
||||||
|
|
||||||
// FT can still be zero here! so we need to check if its zero and set the correct flag.
|
|
||||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
|
|
||||||
XOR32RtoR( EAX, EAX ); // Clear EAX
|
|
||||||
SSE_CMPEQSS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation
|
|
||||||
pjmp32 = JZ32(0); // Skip if none are
|
|
||||||
|
|
||||||
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); //Zero divide (only when not 0/0)
|
|
||||||
|
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]);
|
|
||||||
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If 0, then EEREC_TEMP = +/- fmax
|
|
||||||
|
|
||||||
pjmp2 = JMP32(0);
|
|
||||||
|
|
||||||
x86SetJ32(pjmp32);
|
|
||||||
|
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]); // If != 0, then EEREC_TEMP = +/- 0
|
|
||||||
|
|
||||||
x86SetJ32(pjmp2);
|
|
||||||
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
|
||||||
SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&DIV_TEMP_XMM[0] ); // restore t1reg data
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else { // _Fs_ != 0
|
else t1boolean = 0;
|
||||||
if( _Ft_ == 0 ) {
|
|
||||||
if( _Ftf_ < 3 ) { // needs extra work, fs can also be zero!
|
|
||||||
//SysPrintf("DIV: FS = n, FT == 0 ---- Finished! \n");
|
|
||||||
|
|
||||||
_unpackVFSS_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); // EEREC_TEMP.x <- EEREC_S.fsf
|
// FT can be zero here! so we need to check if its zero and set the correct flag.
|
||||||
|
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP
|
||||||
|
SSE_CMPEQSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); // Set all F's if each vector is zero
|
||||||
|
|
||||||
t1reg = (EEREC_TEMP == 0) ? (EEREC_TEMP + 1) : (EEREC_TEMP - 1); // find a xmm reg thats not EEREC_TEMP
|
SSE_MOVMSKPS_XMM_to_R32( vftemp, EEREC_TEMP); // Move the sign bits of the previous calculation
|
||||||
SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address
|
|
||||||
|
|
||||||
// FS can still be zero here! so we need to check if its zero and set the correct flag.
|
AND32ItoR( vftemp, (1<<_Ftf_) ); // Grab "Is Zero" bits from the previous calculation
|
||||||
SSE_XORPS_XMM_to_XMM(t1reg, t1reg); // Clear t1reg
|
pjmp32 = JZ32(0); // Skip if none are
|
||||||
XOR32RtoR( EAX, EAX ); // Clear EAX
|
|
||||||
SSE_CMPEQSS_XMM_to_XMM(t1reg, EEREC_TEMP); // Set all F's if each vector is zero
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(EAX, t1reg); // Move the sign bits of the previous calculation
|
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); // Clear EEREC_TEMP
|
||||||
|
SSE_CMPEQSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); // Set all F's if each vector is zero
|
||||||
|
SSE_MOVMSKPS_XMM_to_R32(vftemp, EEREC_TEMP); // Move the sign bits of the previous calculation
|
||||||
|
|
||||||
AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation
|
AND32ItoR( vftemp, (1<<_Fsf_) ); // Grab "Is Zero" bits from the previous calculation
|
||||||
pjmp = JZ8(0); // Skip if none are
|
pjmp = JZ8(0);
|
||||||
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0)
|
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0)
|
||||||
pjmp1 = JMP8(0);
|
pjmp1 = JMP8(0);
|
||||||
x86SetJ8(pjmp);
|
x86SetJ8(pjmp);
|
||||||
OR32ItoM(VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820); // Zero divide (only when not 0/0)
|
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0)
|
||||||
x86SetJ8(pjmp1);
|
x86SetJ8(pjmp1);
|
||||||
|
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]);
|
|
||||||
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]);
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
|
||||||
|
|
||||||
SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&DIV_TEMP_XMM[0] ); // restore data to t1reg
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
//SysPrintf("DIV: FS = n, FT == 1 \n");
|
|
||||||
if( _Fsf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
|
||||||
else _unpackVF_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
|
||||||
if (CHECK_EXTRA_OVERFLOW)
|
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
|
||||||
}
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( _Fsf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
if( _Fsf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
else _unpackVF_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
else _unpackVF_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
||||||
|
if( _Ftf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_T);
|
||||||
|
else _unpackVF_xyzw(t1reg, EEREC_T, _Ftf_);
|
||||||
|
|
||||||
if( _Ftf_ )
|
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, t1reg);
|
||||||
{
|
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]);
|
||||||
t1reg = _vuGetTempXMMreg(info);
|
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = +/- fmax
|
||||||
|
|
||||||
if( t1reg >= 0 )
|
pjmp2 = JMP32(0);
|
||||||
{
|
|
||||||
//SysPrintf("Second Half of DIV Opcode: Fixed 1 \n");
|
|
||||||
_unpackVFSS_xyzw(t1reg, EEREC_T, _Ftf_);
|
|
||||||
|
|
||||||
if (CHECK_EXTRA_OVERFLOW) {
|
x86SetJ32(pjmp32);
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
|
||||||
vuFloat2(t1reg, t1reg, 0x8);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (t2reg = 0; ( (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++)
|
|
||||||
; // Makes t2reg not be EEREC_TEMP or t1reg.
|
|
||||||
|
|
||||||
SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM2[0], t2reg ); // backup data in t2reg to a temp address
|
if( _Fsf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S);
|
||||||
|
else _unpackVF_xyzw(EEREC_TEMP, EEREC_S, _Fsf_);
|
||||||
|
if( _Ftf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(t1reg, EEREC_T);
|
||||||
|
else _unpackVF_xyzw(t1reg, EEREC_T, _Ftf_);
|
||||||
|
|
||||||
// FT can still be zero here! so we need to check if its zero and set the correct flag.
|
if (CHECK_EXTRA_OVERFLOW) {
|
||||||
SSE_XORPS_XMM_to_XMM(t2reg, t2reg); // Clear t2reg
|
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
||||||
XOR32RtoR( EAX, EAX ); // Clear EAX
|
vuFloat2(t1reg, t1reg, 0x8);
|
||||||
SSE_CMPEQSS_XMM_to_XMM(t2reg, t1reg); // Set all F's if each vector is zero
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32( EAX, t2reg); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation
|
|
||||||
pjmp32 = JZ32(0); // Skip if none are
|
|
||||||
|
|
||||||
SSE_XORPS_XMM_to_XMM(t2reg, t2reg); // Clear t2reg
|
|
||||||
XOR32RtoR( EAX, EAX ); // Clear EAX
|
|
||||||
SSE_CMPEQSS_XMM_to_XMM(t2reg, EEREC_TEMP); // Set all F's if each vector is zero
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(EAX, t2reg); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation
|
|
||||||
pjmp = JZ8(0);
|
|
||||||
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0)
|
|
||||||
pjmp1 = JMP8(0);
|
|
||||||
x86SetJ8(pjmp);
|
|
||||||
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0)
|
|
||||||
x86SetJ8(pjmp1);
|
|
||||||
|
|
||||||
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, t1reg);
|
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]);
|
|
||||||
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = +/- fmax
|
|
||||||
|
|
||||||
pjmp2 = JMP32(0);
|
|
||||||
|
|
||||||
x86SetJ32(pjmp32);
|
|
||||||
|
|
||||||
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, t1reg);
|
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
|
||||||
|
|
||||||
x86SetJ32(pjmp2);
|
|
||||||
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
|
||||||
|
|
||||||
SSE_MOVAPS_M128_to_XMM( t2reg, (uptr)&DIV_TEMP_XMM2[0] ); // restore t2reg data
|
|
||||||
_freeXMMreg(t1reg); // free t1reg
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
//SysPrintf("Second Half of DIV Opcode: Fixed 2 \n");
|
|
||||||
t1reg = EEREC_T;
|
|
||||||
SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM[0], t1reg ); // backup data in t1reg to a temp address
|
|
||||||
_unpackVFSS_xyzw(t1reg, EEREC_T, _Ftf_);
|
|
||||||
|
|
||||||
if (CHECK_EXTRA_OVERFLOW) {
|
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
|
||||||
vuFloat2(t1reg, t1reg, 0x8);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (t2reg = 0; ( (t2reg == EEREC_TEMP) || (t2reg == t1reg) ); t2reg++)
|
|
||||||
; // Makes t2reg not be EEREC_TEMP or t1reg.
|
|
||||||
|
|
||||||
SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM2[0], t2reg ); // backup data in t2reg to a temp address
|
|
||||||
|
|
||||||
// FT can still be zero here! so we need to check if its zero and set the correct flag.
|
|
||||||
SSE_XORPS_XMM_to_XMM(t2reg, t2reg); // Clear t2reg
|
|
||||||
XOR32RtoR( EAX, EAX ); // Clear EAX
|
|
||||||
SSE_CMPEQSS_XMM_to_XMM(t2reg, t1reg); // Set all F's if each vector is zero
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32( EAX, t2reg); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation
|
|
||||||
pjmp32 = JZ32(0); // Skip if none are
|
|
||||||
|
|
||||||
SSE_XORPS_XMM_to_XMM(t2reg, t2reg); // Clear t2reg
|
|
||||||
XOR32RtoR( EAX, EAX ); // Clear EAX
|
|
||||||
SSE_CMPEQSS_XMM_to_XMM(t2reg, EEREC_TEMP); // Set all F's if each vector is zero
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(EAX, t2reg); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation
|
|
||||||
pjmp = JZ8(0);
|
|
||||||
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0)
|
|
||||||
pjmp1 = JMP8(0);
|
|
||||||
x86SetJ8(pjmp);
|
|
||||||
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0)
|
|
||||||
x86SetJ8(pjmp1);
|
|
||||||
|
|
||||||
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, t1reg);
|
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]);
|
|
||||||
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = +/- fmax
|
|
||||||
|
|
||||||
pjmp2 = JMP32(0);
|
|
||||||
|
|
||||||
x86SetJ32(pjmp32);
|
|
||||||
|
|
||||||
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, t1reg);
|
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
|
||||||
|
|
||||||
x86SetJ32(pjmp2);
|
|
||||||
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
|
||||||
|
|
||||||
SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&DIV_TEMP_XMM[0] ); // restore t1reg data
|
|
||||||
SSE_MOVAPS_M128_to_XMM( t2reg, (uptr)&DIV_TEMP_XMM2[0] ); // restore t2reg data
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
//SysPrintf("Second Half of DIV Opcode: Fixed 3 \n");
|
|
||||||
|
|
||||||
if (CHECK_EXTRA_OVERFLOW) {
|
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
|
||||||
vuFloat2(EEREC_T, EEREC_T, 0x8);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (t2reg = 0; ( (t2reg == EEREC_TEMP) || (t2reg == EEREC_T) ); t2reg++)
|
|
||||||
; // Makes t2reg not be EEREC_TEMP or EEREC_T.
|
|
||||||
|
|
||||||
SSE_MOVAPS_XMM_to_M128( (uptr)&DIV_TEMP_XMM2[0], t2reg ); // backup data in t2reg to a temp address
|
|
||||||
|
|
||||||
// FT can still be zero here! so we need to check if its zero and set the correct flag.
|
|
||||||
SSE_XORPS_XMM_to_XMM(t2reg, t2reg); // Clear t2reg
|
|
||||||
XOR32RtoR( EAX, EAX ); // Clear EAX
|
|
||||||
SSE_CMPEQSS_XMM_to_XMM(t2reg, EEREC_T); // Set all F's if each vector is zero
|
|
||||||
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32( EAX, t2reg); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation
|
|
||||||
pjmp32 = JZ32(0); // Skip if none are
|
|
||||||
|
|
||||||
SSE_XORPS_XMM_to_XMM(t2reg, t2reg); // Clear t2reg
|
|
||||||
XOR32RtoR( EAX, EAX ); // Clear EAX
|
|
||||||
SSE_CMPEQSS_XMM_to_XMM(t2reg, EEREC_TEMP); // Set all F's if each vector is zero
|
|
||||||
SSE_MOVMSKPS_XMM_to_R32(EAX, t2reg); // Move the sign bits of the previous calculation
|
|
||||||
|
|
||||||
AND32ItoR( EAX, 0x00000001 ); // Grab "Is Zero" bits from the previous calculation
|
|
||||||
pjmp = JZ8(0);
|
|
||||||
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x410 ); // Set invalid flag (0/0)
|
|
||||||
pjmp1 = JMP8(0);
|
|
||||||
x86SetJ8(pjmp);
|
|
||||||
OR32ItoM( VU_VI_ADDR(REG_STATUS_FLAG, 2), 0x820 ); // Zero divide (only when not 0/0)
|
|
||||||
x86SetJ8(pjmp1);
|
|
||||||
|
|
||||||
SSE_XORPS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
||||||
SSE_ANDPS_M128_to_XMM(EEREC_TEMP, (uptr)&VU_Signed_Zero_Mask[0]);
|
|
||||||
SSE_ORPS_M128_to_XMM(EEREC_TEMP, (uptr)&g_maxvals[0]); // If division by zero, then EEREC_TEMP = +/- fmax
|
|
||||||
|
|
||||||
pjmp2 = JMP32(0);
|
|
||||||
|
|
||||||
x86SetJ32(pjmp32);
|
|
||||||
|
|
||||||
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T);
|
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
|
||||||
|
|
||||||
x86SetJ32(pjmp2);
|
|
||||||
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
|
||||||
SSE_MOVAPS_M128_to_XMM( t2reg, (uptr)&DIV_TEMP_XMM2[0] ); // restore t2reg data
|
|
||||||
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, t1reg);
|
||||||
|
if (CHECK_OVERFLOW) vuFloat2(EEREC_TEMP, EEREC_TEMP, 0x8);
|
||||||
|
|
||||||
|
x86SetJ32(pjmp2);
|
||||||
|
|
||||||
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP);
|
||||||
|
|
||||||
|
if (t1boolean) SSE_MOVAPS_M128_to_XMM( t1reg, (uptr)&DIV_TEMP_XMM[0] ); // restore t1reg data
|
||||||
|
else _freeXMMreg(t1reg); // free t1reg
|
||||||
|
|
||||||
|
_freeX86reg(vftemp); // free vftemp
|
||||||
}
|
}
|
||||||
|
|
||||||
void recVUMI_SQRT( VURegs *VU, int info )
|
void recVUMI_SQRT( VURegs *VU, int info )
|
||||||
|
@ -5187,7 +4790,7 @@ void recVUMI_RNEXT( VURegs *VU, int info )
|
||||||
int rreg, x86temp0, x86temp1;
|
int rreg, x86temp0, x86temp1;
|
||||||
if ( _Ft_ == 0) return;
|
if ( _Ft_ == 0) return;
|
||||||
|
|
||||||
SysPrintf("VU RNEXT Opcode \n");
|
//SysPrintf("VU RNEXT Opcode \n");
|
||||||
|
|
||||||
rreg = ALLOCVI(REG_R, MODE_WRITE|MODE_READ);
|
rreg = ALLOCVI(REG_R, MODE_WRITE|MODE_READ);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue