JitIL: Fixed a bug (hack?) that NaN is considered as -0.0 in fcmpx. Implemented Nan check routine in FDCmpCR.

The bug (hack?) was introduced in r3312. I could not determine whether it was a hack or not. 

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6165 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
nodchip 2010-09-02 09:36:19 +00:00
parent efd9bae449
commit 26eac5d22a
3 changed files with 69 additions and 8 deletions

View File

@ -518,8 +518,8 @@ public:
InstLoc EmitDoubleToSingle(InstLoc op1) { InstLoc EmitDoubleToSingle(InstLoc op1) {
return FoldUOp(DoubleToSingle, op1); return FoldUOp(DoubleToSingle, op1);
} }
InstLoc EmitFDCmpCR(InstLoc op1, InstLoc op2) { InstLoc EmitFDCmpCR(InstLoc op1, InstLoc op2, int ordered) {
return FoldBiOp(FDCmpCR, op1, op2); return FoldBiOp(FDCmpCR, op1, op2, ordered);
} }
InstLoc EmitLoadGQR(unsigned gqr) { InstLoc EmitLoadGQR(unsigned gqr) {
return FoldZeroOp(LoadGQR, gqr); return FoldZeroOp(LoadGQR, gqr);

View File

@ -48,6 +48,7 @@ The register allocation is linear scan allocation.
#include "../../ConfigManager.h" #include "../../ConfigManager.h"
#include "x64Emitter.h" #include "x64Emitter.h"
#include "../../../../Common/Src/CPUDetect.h" #include "../../../../Common/Src/CPUDetect.h"
#include "MathUtil.h"
static ThunkManager thunks; static ThunkManager thunks;
@ -714,6 +715,12 @@ static void regWriteExit(RegInfo& RI, InstLoc dest) {
} }
} }
// Helper function to check floating point exceptions
static double GC_ALIGNED16(isSNANTemp[2][2]);
static bool checkIsSNAN() {
return MathUtil::IsSNAN(isSNANTemp[0][0]) || MathUtil::IsSNAN(isSNANTemp[1][0]);
}
static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool MakeProfile) { static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool MakeProfile) {
//printf("Writing block: %x\n", js.blockStart); //printf("Writing block: %x\n", js.blockStart);
RegInfo RI(Jit, ibuild->getFirstInst(), ibuild->getNumInsts()); RegInfo RI(Jit, ibuild->getFirstInst(), ibuild->getNumInsts());
@ -1532,10 +1539,13 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
break; break;
} }
case FDCmpCR: { case FDCmpCR: {
const u32 ordered = *I >> 24;
X64Reg destreg = regFindFreeReg(RI); X64Reg destreg = regFindFreeReg(RI);
// TODO: Add case for NaN (CC_P) // TODO: Remove an extra MOVSD if loc1.IsSimpleReg()
Jit->MOVSD(XMM0, fregLocForInst(RI, getOp1(I))); OpArg loc1 = fregLocForInst(RI, getOp1(I));
Jit->UCOMISD(XMM0, fregLocForInst(RI, getOp2(I))); OpArg loc2 = fregLocForInst(RI, getOp2(I));
Jit->MOVSD(XMM0, loc1);
Jit->UCOMISD(XMM0, loc2);
FixupBranch pNan = Jit->J_CC(CC_P); FixupBranch pNan = Jit->J_CC(CC_P);
FixupBranch pEqual = Jit->J_CC(CC_Z); FixupBranch pEqual = Jit->J_CC(CC_Z);
FixupBranch pLesser = Jit->J_CC(CC_C); FixupBranch pLesser = Jit->J_CC(CC_C);
@ -1545,6 +1555,56 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
// NaN // NaN
Jit->SetJumpTarget(pNan); Jit->SetJumpTarget(pNan);
Jit->MOV(32, R(destreg), Imm32(0x1)); Jit->MOV(32, R(destreg), Imm32(0x1));
static const u32 FPSCR_VE = (u32)1 << (31 - 24);
static const u32 FPSCR_VXVC = (u32)1 << (31 - 12);
static const u32 FPSCR_VXSNAN = (u32)1 << (31 - 7);
static const u32 FPSCR_FX = (u32)1 << (31 - 0);
if (ordered) {
// fcmpo
// TODO: Optimize the following code if slow.
// SNAN check may not be needed
// because it does not happen so much.
Jit->MOVSD(M(isSNANTemp[0]), XMM0);
if (loc2.IsSimpleReg()) {
Jit->MOVSD(M(isSNANTemp[1]), loc2.GetSimpleReg());
} else {
Jit->MOVSD(XMM0, loc2);
Jit->MOVSD(M(isSNANTemp[1]), XMM0);
}
Jit->ABI_CallFunction(checkIsSNAN);
Jit->TEST(8, R(EAX), R(EAX));
FixupBranch ok = Jit->J_CC(CC_Z);
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1;
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXSNAN)); // FPSCR.Hex |= mask;
Jit->TEST(32, M(&FPSCR), Imm32(FPSCR_VE));
FixupBranch finish0 = Jit->J_CC(CC_NZ);
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask;
FixupBranch finish1 = Jit->J();
Jit->SetJumpTarget(ok);
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1;
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask;
Jit->SetJumpTarget(finish0);
Jit->SetJumpTarget(finish1);
} else {
// fcmpu
// TODO: Optimize the following code if slow
Jit->MOVSD(M(isSNANTemp[0]), XMM0);
if (loc2.IsSimpleReg()) {
Jit->MOVSD(M(isSNANTemp[1]), loc2.GetSimpleReg());
} else {
Jit->MOVSD(XMM0, loc2);
Jit->MOVSD(M(isSNANTemp[1]), XMM0);
}
Jit->ABI_CallFunction(checkIsSNAN);
Jit->TEST(8, R(EAX), R(EAX));
FixupBranch finish = Jit->J_CC(CC_Z);
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1;
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask;
Jit->SetJumpTarget(finish);
}
FixupBranch continue2 = Jit->J(); FixupBranch continue2 = Jit->J();
// Equal // Equal
Jit->SetJumpTarget(pEqual); Jit->SetJumpTarget(pEqual);

View File

@ -115,9 +115,10 @@ void JitIL::fcmpx(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(FloatingPoint) JITDISABLE(FloatingPoint)
IREmitter::InstLoc lhs, rhs, res; IREmitter::InstLoc lhs, rhs, res;
lhs = ibuild.EmitLoadFRegDENToZero(inst.FA); lhs = ibuild.EmitLoadFReg(inst.FA);
rhs = ibuild.EmitLoadFRegDENToZero(inst.FB); rhs = ibuild.EmitLoadFReg(inst.FB);
res = ibuild.EmitFDCmpCR(lhs, rhs); int ordered = (inst.SUBOP10 == 32) ? 1 : 0;
res = ibuild.EmitFDCmpCR(lhs, rhs, ordered);
ibuild.EmitStoreFPRF(res); ibuild.EmitStoreFPRF(res);
ibuild.EmitStoreCR(res, inst.CRFD); ibuild.EmitStoreCR(res, inst.CRFD);
} }