From 0bcc20ca5baac14659a782a48278c20a343c456e Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 16 Sep 2013 08:57:51 +0000 Subject: [PATCH] [ARM] fcmpo/fcmpu implementations. --- Source/Core/Core/Src/PowerPC/JitArm32/Jit.h | 4 +- .../PowerPC/JitArm32/JitArm_FloatingPoint.cpp | 142 +++++++++++++++++- .../Src/PowerPC/JitArm32/JitArm_Tables.cpp | 4 +- 3 files changed, 145 insertions(+), 5 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h index 09745cf8df..b2034c7c13 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h @@ -200,7 +200,9 @@ public: void fmrx(UGeckoInstruction _inst); void fmaddsx(UGeckoInstruction _inst); void fmaddx(UGeckoInstruction _inst); - void fctiwzx(UGeckoInstruction inst); + void fctiwzx(UGeckoInstruction _inst); + void fcmpo(UGeckoInstruction _inst); + void fcmpu(UGeckoInstruction _inst); // Floating point loadStore void lfs(UGeckoInstruction _inst); diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp index 453d9d739a..9f5c2ce96a 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_FloatingPoint.cpp @@ -38,10 +38,12 @@ static Operand2 FRFIMask(5, 0x8); // 0x60000 static Operand2 FIMask(2, 8); // 0x20000 static Operand2 FRMask(4, 8); // 0x40000 static Operand2 FXMask(2, 1); // 0x80000000 +static Operand2 VEMask(0x40, 0); // 0x40 static Operand2 XXException(2, 4); // 0x2000000 static Operand2 CVIException(1, 0xC); // 0x100 - +static Operand2 NANException(1, 4); // 0x1000000 +static Operand2 VXVCException(8, 8); // 0x80000 void JitArm::Helper_UpdateCR1(ARMReg value) { // Should just update exception flags, not do any compares. @@ -59,6 +61,12 @@ void JitArm::SetFPException(ARMReg Reg, u32 Exception) case FPSCR_XX: ExceptionMask = &XXException; break; + case FPSCR_VXSNAN: + ExceptionMask = &NANException; + break; + case FPSCR_VXVC: + ExceptionMask = &VXVCException; + break; default: _assert_msg_(DYNA_REC, false, "Passed unsupported FPexception: 0x%08x", Exception); return; @@ -87,7 +95,7 @@ void JitArm::fctiwzx(UGeckoInstruction inst) ARMReg V0 = fpr.GetReg(); ARMReg V1 = fpr.GetReg(); ARMReg V2 = fpr.GetReg(); - + ARMReg rA = gpr.GetReg(); ARMReg fpscrReg = gpr.GetReg(); @@ -155,6 +163,136 @@ void JitArm::fctiwzx(UGeckoInstruction inst) fpr.Unlock(V2); } +void JitArm::fcmpo(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff) + u32 a = inst.FA, b = inst.FB; + int cr = inst.CRFD; + + ARMReg vA = fpr.R0(a); + ARMReg vB = fpr.R0(b); + ARMReg fpscrReg = gpr.GetReg(); + ARMReg crReg = gpr.GetReg(); + Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 + Operand2 LessThan(0x8, 0xA); // 0x8000 + Operand2 GreaterThan(0x4, 0xA); // 0x4000 + Operand2 EqualTo(0x2, 0xA); // 0x2000 + Operand2 NANRes(0x1, 0xA); // 0x1000 + FixupBranch Done1, Done2, Done3; + LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); + BIC(fpscrReg, fpscrReg, FPRFMask); + + VCMPE(vA, vB); + VMRS(_PC); + SetCC(CC_LT); + ORR(fpscrReg, fpscrReg, LessThan); + MOV(crReg, 8); + Done1 = B(); + SetCC(CC_GT); + ORR(fpscrReg, fpscrReg, GreaterThan); + MOV(crReg, 4); + Done2 = B(); + SetCC(CC_EQ); + ORR(fpscrReg, fpscrReg, EqualTo); + MOV(crReg, 2); + Done3 = B(); + SetCC(); + + ORR(fpscrReg, fpscrReg, NANRes); + MOV(crReg, 1); + + VCMPE(vA, vA); + VMRS(_PC); + FixupBranch NanA = B_CC(CC_NEQ); + VCMPE(vB, vB); + VMRS(_PC); + FixupBranch NanB = B_CC(CC_NEQ); + + SetFPException(fpscrReg, FPSCR_VXVC); + FixupBranch Done4 = B(); + + SetJumpTarget(NanA); + SetJumpTarget(NanB); + + SetFPException(fpscrReg, FPSCR_VXSNAN); + + TST(fpscrReg, VEMask); + + FixupBranch noVXVC = B_CC(CC_NEQ); + SetFPException(fpscrReg, FPSCR_VXVC); + + SetJumpTarget(noVXVC); + SetJumpTarget(Done1); + SetJumpTarget(Done2); + SetJumpTarget(Done3); + SetJumpTarget(Done4); + STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); + STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); + gpr.Unlock(fpscrReg, crReg); +} + +void JitArm::fcmpu(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff) + u32 a = inst.FA, b = inst.FB; + int cr = inst.CRFD; + + ARMReg vA = fpr.R0(a); + ARMReg vB = fpr.R0(b); + ARMReg fpscrReg = gpr.GetReg(); + ARMReg crReg = gpr.GetReg(); + Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 + Operand2 LessThan(0x8, 0xA); // 0x8000 + Operand2 GreaterThan(0x4, 0xA); // 0x4000 + Operand2 EqualTo(0x2, 0xA); // 0x2000 + Operand2 NANRes(0x1, 0xA); // 0x1000 + FixupBranch Done1, Done2, Done3; + LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); + BIC(fpscrReg, fpscrReg, FPRFMask); + + VCMPE(vA, vB); + VMRS(_PC); + SetCC(CC_LT); + ORR(fpscrReg, fpscrReg, LessThan); + MOV(crReg, 8); + Done1 = B(); + SetCC(CC_GT); + ORR(fpscrReg, fpscrReg, GreaterThan); + MOV(crReg, 4); + Done2 = B(); + SetCC(CC_EQ); + ORR(fpscrReg, fpscrReg, EqualTo); + MOV(crReg, 2); + Done3 = B(); + SetCC(); + + ORR(fpscrReg, fpscrReg, NANRes); + MOV(crReg, 1); + + VCMPE(vA, vA); + VMRS(_PC); + FixupBranch NanA = B_CC(CC_NEQ); + VCMPE(vB, vB); + VMRS(_PC); + FixupBranch NanB = B_CC(CC_NEQ); + FixupBranch Done4 = B(); + + SetJumpTarget(NanA); + SetJumpTarget(NanB); + + SetFPException(fpscrReg, FPSCR_VXSNAN); + + SetJumpTarget(Done1); + SetJumpTarget(Done2); + SetJumpTarget(Done3); + SetJumpTarget(Done4); + STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); + STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); + gpr.Unlock(fpscrReg, crReg); +} + void JitArm::fabsx(UGeckoInstruction inst) { INSTRUCTION_START diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp index ae2d2e6558..d37a0206e0 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp @@ -347,8 +347,8 @@ static GekkoOPTemplate table59[] = static GekkoOPTemplate table63[] = { {264, &JitArm::fabsx}, //"fabsx", OPTYPE_FPU, FL_RC_BIT_F}}, - {32, &JitArm::Default}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, - {0, &JitArm::Default}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, + {32, &JitArm::fcmpo}, //"fcmpo", OPTYPE_FPU, FL_RC_BIT_F}}, + {0, &JitArm::fcmpu}, //"fcmpu", OPTYPE_FPU, FL_RC_BIT_F}}, {14, &JitArm::Default}, //"fctiwx", OPTYPE_FPU, FL_RC_BIT_F}}, {15, &JitArm::fctiwzx}, //"fctiwzx", OPTYPE_FPU, FL_RC_BIT_F}}, {72, &JitArm::fmrx}, //"fmrx", OPTYPE_FPU, FL_RC_BIT_F}},