JitIL: Fixed a bug (hack?) that NaN is considered as -0.0 in fcmpx. Implemented Nan check routine in FDCmpCR.

The bug (hack?) was introduced in r3312. I could not determine whether it was a hack or not. 

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6165 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
nodchip 2010-09-02 09:36:19 +00:00
parent efd9bae449
commit 26eac5d22a
3 changed files with 69 additions and 8 deletions

View File

@ -518,8 +518,8 @@ public:
InstLoc EmitDoubleToSingle(InstLoc op1) {
return FoldUOp(DoubleToSingle, op1);
}
InstLoc EmitFDCmpCR(InstLoc op1, InstLoc op2) {
return FoldBiOp(FDCmpCR, op1, op2);
InstLoc EmitFDCmpCR(InstLoc op1, InstLoc op2, int ordered) {
return FoldBiOp(FDCmpCR, op1, op2, ordered);
}
InstLoc EmitLoadGQR(unsigned gqr) {
return FoldZeroOp(LoadGQR, gqr);

View File

@ -48,6 +48,7 @@ The register allocation is linear scan allocation.
#include "../../ConfigManager.h"
#include "x64Emitter.h"
#include "../../../../Common/Src/CPUDetect.h"
#include "MathUtil.h"
static ThunkManager thunks;
@ -714,6 +715,12 @@ static void regWriteExit(RegInfo& RI, InstLoc dest) {
}
}
// Helper function to check floating point exceptions
static double GC_ALIGNED16(isSNANTemp[2][2]);
static bool checkIsSNAN() {
return MathUtil::IsSNAN(isSNANTemp[0][0]) || MathUtil::IsSNAN(isSNANTemp[1][0]);
}
static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool MakeProfile) {
//printf("Writing block: %x\n", js.blockStart);
RegInfo RI(Jit, ibuild->getFirstInst(), ibuild->getNumInsts());
@ -1532,10 +1539,13 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
break;
}
case FDCmpCR: {
const u32 ordered = *I >> 24;
X64Reg destreg = regFindFreeReg(RI);
// TODO: Add case for NaN (CC_P)
Jit->MOVSD(XMM0, fregLocForInst(RI, getOp1(I)));
Jit->UCOMISD(XMM0, fregLocForInst(RI, getOp2(I)));
// TODO: Remove an extra MOVSD if loc1.IsSimpleReg()
OpArg loc1 = fregLocForInst(RI, getOp1(I));
OpArg loc2 = fregLocForInst(RI, getOp2(I));
Jit->MOVSD(XMM0, loc1);
Jit->UCOMISD(XMM0, loc2);
FixupBranch pNan = Jit->J_CC(CC_P);
FixupBranch pEqual = Jit->J_CC(CC_Z);
FixupBranch pLesser = Jit->J_CC(CC_C);
@ -1545,6 +1555,56 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
// NaN
Jit->SetJumpTarget(pNan);
Jit->MOV(32, R(destreg), Imm32(0x1));
static const u32 FPSCR_VE = (u32)1 << (31 - 24);
static const u32 FPSCR_VXVC = (u32)1 << (31 - 12);
static const u32 FPSCR_VXSNAN = (u32)1 << (31 - 7);
static const u32 FPSCR_FX = (u32)1 << (31 - 0);
if (ordered) {
// fcmpo
// TODO: Optimize the following code if slow.
// SNAN check may not be needed
// because it does not happen so much.
Jit->MOVSD(M(isSNANTemp[0]), XMM0);
if (loc2.IsSimpleReg()) {
Jit->MOVSD(M(isSNANTemp[1]), loc2.GetSimpleReg());
} else {
Jit->MOVSD(XMM0, loc2);
Jit->MOVSD(M(isSNANTemp[1]), XMM0);
}
Jit->ABI_CallFunction(checkIsSNAN);
Jit->TEST(8, R(EAX), R(EAX));
FixupBranch ok = Jit->J_CC(CC_Z);
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1;
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXSNAN)); // FPSCR.Hex |= mask;
Jit->TEST(32, M(&FPSCR), Imm32(FPSCR_VE));
FixupBranch finish0 = Jit->J_CC(CC_NZ);
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask;
FixupBranch finish1 = Jit->J();
Jit->SetJumpTarget(ok);
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1;
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask;
Jit->SetJumpTarget(finish0);
Jit->SetJumpTarget(finish1);
} else {
// fcmpu
// TODO: Optimize the following code if slow
Jit->MOVSD(M(isSNANTemp[0]), XMM0);
if (loc2.IsSimpleReg()) {
Jit->MOVSD(M(isSNANTemp[1]), loc2.GetSimpleReg());
} else {
Jit->MOVSD(XMM0, loc2);
Jit->MOVSD(M(isSNANTemp[1]), XMM0);
}
Jit->ABI_CallFunction(checkIsSNAN);
Jit->TEST(8, R(EAX), R(EAX));
FixupBranch finish = Jit->J_CC(CC_Z);
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1;
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask;
Jit->SetJumpTarget(finish);
}
FixupBranch continue2 = Jit->J();
// Equal
Jit->SetJumpTarget(pEqual);

View File

@ -115,9 +115,10 @@ void JitIL::fcmpx(UGeckoInstruction inst)
INSTRUCTION_START
JITDISABLE(FloatingPoint)
IREmitter::InstLoc lhs, rhs, res;
lhs = ibuild.EmitLoadFRegDENToZero(inst.FA);
rhs = ibuild.EmitLoadFRegDENToZero(inst.FB);
res = ibuild.EmitFDCmpCR(lhs, rhs);
lhs = ibuild.EmitLoadFReg(inst.FA);
rhs = ibuild.EmitLoadFReg(inst.FB);
int ordered = (inst.SUBOP10 == 32) ? 1 : 0;
res = ibuild.EmitFDCmpCR(lhs, rhs, ordered);
ibuild.EmitStoreFPRF(res);
ibuild.EmitStoreCR(res, inst.CRFD);
}