JitIL: Fixed a bug (hack?) that NaN is considered as -0.0 in fcmpx. Implemented Nan check routine in FDCmpCR.
The bug (hack?) was introduced in r3312. I could not determine whether it was a hack or not. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6165 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
efd9bae449
commit
26eac5d22a
|
@ -518,8 +518,8 @@ public:
|
||||||
InstLoc EmitDoubleToSingle(InstLoc op1) {
|
InstLoc EmitDoubleToSingle(InstLoc op1) {
|
||||||
return FoldUOp(DoubleToSingle, op1);
|
return FoldUOp(DoubleToSingle, op1);
|
||||||
}
|
}
|
||||||
InstLoc EmitFDCmpCR(InstLoc op1, InstLoc op2) {
|
InstLoc EmitFDCmpCR(InstLoc op1, InstLoc op2, int ordered) {
|
||||||
return FoldBiOp(FDCmpCR, op1, op2);
|
return FoldBiOp(FDCmpCR, op1, op2, ordered);
|
||||||
}
|
}
|
||||||
InstLoc EmitLoadGQR(unsigned gqr) {
|
InstLoc EmitLoadGQR(unsigned gqr) {
|
||||||
return FoldZeroOp(LoadGQR, gqr);
|
return FoldZeroOp(LoadGQR, gqr);
|
||||||
|
|
|
@ -48,6 +48,7 @@ The register allocation is linear scan allocation.
|
||||||
#include "../../ConfigManager.h"
|
#include "../../ConfigManager.h"
|
||||||
#include "x64Emitter.h"
|
#include "x64Emitter.h"
|
||||||
#include "../../../../Common/Src/CPUDetect.h"
|
#include "../../../../Common/Src/CPUDetect.h"
|
||||||
|
#include "MathUtil.h"
|
||||||
|
|
||||||
static ThunkManager thunks;
|
static ThunkManager thunks;
|
||||||
|
|
||||||
|
@ -714,6 +715,12 @@ static void regWriteExit(RegInfo& RI, InstLoc dest) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper function to check floating point exceptions
|
||||||
|
static double GC_ALIGNED16(isSNANTemp[2][2]);
|
||||||
|
static bool checkIsSNAN() {
|
||||||
|
return MathUtil::IsSNAN(isSNANTemp[0][0]) || MathUtil::IsSNAN(isSNANTemp[1][0]);
|
||||||
|
}
|
||||||
|
|
||||||
static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool MakeProfile) {
|
static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool MakeProfile) {
|
||||||
//printf("Writing block: %x\n", js.blockStart);
|
//printf("Writing block: %x\n", js.blockStart);
|
||||||
RegInfo RI(Jit, ibuild->getFirstInst(), ibuild->getNumInsts());
|
RegInfo RI(Jit, ibuild->getFirstInst(), ibuild->getNumInsts());
|
||||||
|
@ -1532,10 +1539,13 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case FDCmpCR: {
|
case FDCmpCR: {
|
||||||
|
const u32 ordered = *I >> 24;
|
||||||
X64Reg destreg = regFindFreeReg(RI);
|
X64Reg destreg = regFindFreeReg(RI);
|
||||||
// TODO: Add case for NaN (CC_P)
|
// TODO: Remove an extra MOVSD if loc1.IsSimpleReg()
|
||||||
Jit->MOVSD(XMM0, fregLocForInst(RI, getOp1(I)));
|
OpArg loc1 = fregLocForInst(RI, getOp1(I));
|
||||||
Jit->UCOMISD(XMM0, fregLocForInst(RI, getOp2(I)));
|
OpArg loc2 = fregLocForInst(RI, getOp2(I));
|
||||||
|
Jit->MOVSD(XMM0, loc1);
|
||||||
|
Jit->UCOMISD(XMM0, loc2);
|
||||||
FixupBranch pNan = Jit->J_CC(CC_P);
|
FixupBranch pNan = Jit->J_CC(CC_P);
|
||||||
FixupBranch pEqual = Jit->J_CC(CC_Z);
|
FixupBranch pEqual = Jit->J_CC(CC_Z);
|
||||||
FixupBranch pLesser = Jit->J_CC(CC_C);
|
FixupBranch pLesser = Jit->J_CC(CC_C);
|
||||||
|
@ -1545,6 +1555,56 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, bool UseProfile, bool Mak
|
||||||
// NaN
|
// NaN
|
||||||
Jit->SetJumpTarget(pNan);
|
Jit->SetJumpTarget(pNan);
|
||||||
Jit->MOV(32, R(destreg), Imm32(0x1));
|
Jit->MOV(32, R(destreg), Imm32(0x1));
|
||||||
|
|
||||||
|
static const u32 FPSCR_VE = (u32)1 << (31 - 24);
|
||||||
|
static const u32 FPSCR_VXVC = (u32)1 << (31 - 12);
|
||||||
|
static const u32 FPSCR_VXSNAN = (u32)1 << (31 - 7);
|
||||||
|
static const u32 FPSCR_FX = (u32)1 << (31 - 0);
|
||||||
|
|
||||||
|
if (ordered) {
|
||||||
|
// fcmpo
|
||||||
|
// TODO: Optimize the following code if slow.
|
||||||
|
// SNAN check may not be needed
|
||||||
|
// because it does not happen so much.
|
||||||
|
Jit->MOVSD(M(isSNANTemp[0]), XMM0);
|
||||||
|
if (loc2.IsSimpleReg()) {
|
||||||
|
Jit->MOVSD(M(isSNANTemp[1]), loc2.GetSimpleReg());
|
||||||
|
} else {
|
||||||
|
Jit->MOVSD(XMM0, loc2);
|
||||||
|
Jit->MOVSD(M(isSNANTemp[1]), XMM0);
|
||||||
|
}
|
||||||
|
Jit->ABI_CallFunction(checkIsSNAN);
|
||||||
|
Jit->TEST(8, R(EAX), R(EAX));
|
||||||
|
FixupBranch ok = Jit->J_CC(CC_Z);
|
||||||
|
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1;
|
||||||
|
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXSNAN)); // FPSCR.Hex |= mask;
|
||||||
|
Jit->TEST(32, M(&FPSCR), Imm32(FPSCR_VE));
|
||||||
|
FixupBranch finish0 = Jit->J_CC(CC_NZ);
|
||||||
|
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask;
|
||||||
|
FixupBranch finish1 = Jit->J();
|
||||||
|
Jit->SetJumpTarget(ok);
|
||||||
|
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1;
|
||||||
|
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask;
|
||||||
|
Jit->SetJumpTarget(finish0);
|
||||||
|
Jit->SetJumpTarget(finish1);
|
||||||
|
} else {
|
||||||
|
// fcmpu
|
||||||
|
// TODO: Optimize the following code if slow
|
||||||
|
Jit->MOVSD(M(isSNANTemp[0]), XMM0);
|
||||||
|
if (loc2.IsSimpleReg()) {
|
||||||
|
Jit->MOVSD(M(isSNANTemp[1]), loc2.GetSimpleReg());
|
||||||
|
} else {
|
||||||
|
Jit->MOVSD(XMM0, loc2);
|
||||||
|
Jit->MOVSD(M(isSNANTemp[1]), XMM0);
|
||||||
|
}
|
||||||
|
Jit->ABI_CallFunction(checkIsSNAN);
|
||||||
|
Jit->TEST(8, R(EAX), R(EAX));
|
||||||
|
FixupBranch finish = Jit->J_CC(CC_Z);
|
||||||
|
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_FX)); // FPSCR.FX = 1;
|
||||||
|
Jit->OR(32, M(&FPSCR), Imm32(FPSCR_VXVC)); // FPSCR.Hex |= mask;
|
||||||
|
Jit->SetJumpTarget(finish);
|
||||||
|
}
|
||||||
|
|
||||||
FixupBranch continue2 = Jit->J();
|
FixupBranch continue2 = Jit->J();
|
||||||
// Equal
|
// Equal
|
||||||
Jit->SetJumpTarget(pEqual);
|
Jit->SetJumpTarget(pEqual);
|
||||||
|
|
|
@ -115,9 +115,10 @@ void JitIL::fcmpx(UGeckoInstruction inst)
|
||||||
INSTRUCTION_START
|
INSTRUCTION_START
|
||||||
JITDISABLE(FloatingPoint)
|
JITDISABLE(FloatingPoint)
|
||||||
IREmitter::InstLoc lhs, rhs, res;
|
IREmitter::InstLoc lhs, rhs, res;
|
||||||
lhs = ibuild.EmitLoadFRegDENToZero(inst.FA);
|
lhs = ibuild.EmitLoadFReg(inst.FA);
|
||||||
rhs = ibuild.EmitLoadFRegDENToZero(inst.FB);
|
rhs = ibuild.EmitLoadFReg(inst.FB);
|
||||||
res = ibuild.EmitFDCmpCR(lhs, rhs);
|
int ordered = (inst.SUBOP10 == 32) ? 1 : 0;
|
||||||
|
res = ibuild.EmitFDCmpCR(lhs, rhs, ordered);
|
||||||
ibuild.EmitStoreFPRF(res);
|
ibuild.EmitStoreFPRF(res);
|
||||||
ibuild.EmitStoreCR(res, inst.CRFD);
|
ibuild.EmitStoreCR(res, inst.CRFD);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue