diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h index d465c1a6d4..9b354b29d3 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.h @@ -246,6 +246,10 @@ public: void ps_nabs(UGeckoInstruction _inst); void ps_rsqrte(UGeckoInstruction _inst); void ps_sel(UGeckoInstruction _inst); + void ps_cmpu0(UGeckoInstruction _inst); + void ps_cmpu1(UGeckoInstruction _inst); + void ps_cmpo0(UGeckoInstruction _inst); + void ps_cmpo1(UGeckoInstruction _inst); // LoadStore paired void psq_l(UGeckoInstruction _inst); diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp index e5cda2ee2e..185af91a9d 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Paired.cpp @@ -658,3 +658,263 @@ void JitArm::ps_nabs(UGeckoInstruction inst) VABS(vD1, vB1); VNEG(vD1, vD1); } +void JitArm::ps_cmpu0(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff) + u32 a = inst.FA, b = inst.FB; + int cr = inst.CRFD; + + ARMReg vA = fpr.R0(a); + ARMReg vB = fpr.R0(b); + ARMReg fpscrReg = gpr.GetReg(); + ARMReg crReg = gpr.GetReg(); + Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 + Operand2 LessThan(0x8, 0xA); // 0x8000 + Operand2 GreaterThan(0x4, 0xA); // 0x4000 + Operand2 EqualTo(0x2, 0xA); // 0x2000 + Operand2 NANRes(0x1, 0xA); // 0x1000 + FixupBranch Done1, Done2, Done3; + LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); + BIC(fpscrReg, fpscrReg, FPRFMask); + + VCMPE(vA, vB); + VMRS(_PC); + SetCC(CC_LT); + ORR(fpscrReg, fpscrReg, LessThan); + MOV(crReg, 8); + Done1 = B(); + SetCC(CC_GT); + ORR(fpscrReg, fpscrReg, GreaterThan); + MOV(crReg, 4); + Done2 = B(); + SetCC(CC_EQ); + ORR(fpscrReg, fpscrReg, EqualTo); + MOV(crReg, 2); + Done3 = B(); + SetCC(); + + ORR(fpscrReg, fpscrReg, NANRes); + MOV(crReg, 1); + + VCMPE(vA, vA); + VMRS(_PC); + FixupBranch NanA = B_CC(CC_NEQ); + VCMPE(vB, vB); + VMRS(_PC); + FixupBranch NanB = B_CC(CC_NEQ); + FixupBranch Done4 = B(); + + SetJumpTarget(NanA); + SetJumpTarget(NanB); + + SetFPException(fpscrReg, FPSCR_VXSNAN); + + SetJumpTarget(Done1); + SetJumpTarget(Done2); + SetJumpTarget(Done3); + SetJumpTarget(Done4); + STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); + STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); + gpr.Unlock(fpscrReg, crReg); +} + +void JitArm::ps_cmpu1(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff) + u32 a = inst.FA, b = inst.FB; + int cr = inst.CRFD; + + ARMReg vA = fpr.R1(a); + ARMReg vB = fpr.R1(b); + ARMReg fpscrReg = gpr.GetReg(); + ARMReg crReg = gpr.GetReg(); + Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 + Operand2 LessThan(0x8, 0xA); // 0x8000 + Operand2 GreaterThan(0x4, 0xA); // 0x4000 + Operand2 EqualTo(0x2, 0xA); // 0x2000 + Operand2 NANRes(0x1, 0xA); // 0x1000 + FixupBranch Done1, Done2, Done3; + LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); + BIC(fpscrReg, fpscrReg, FPRFMask); + + VCMPE(vA, vB); + VMRS(_PC); + SetCC(CC_LT); + ORR(fpscrReg, fpscrReg, LessThan); + MOV(crReg, 8); + Done1 = B(); + SetCC(CC_GT); + ORR(fpscrReg, fpscrReg, GreaterThan); + MOV(crReg, 4); + Done2 = B(); + SetCC(CC_EQ); + ORR(fpscrReg, fpscrReg, EqualTo); + MOV(crReg, 2); + Done3 = B(); + SetCC(); + + ORR(fpscrReg, fpscrReg, NANRes); + MOV(crReg, 1); + + VCMPE(vA, vA); + VMRS(_PC); + FixupBranch NanA = B_CC(CC_NEQ); + VCMPE(vB, vB); + VMRS(_PC); + FixupBranch NanB = B_CC(CC_NEQ); + FixupBranch Done4 = B(); + + SetJumpTarget(NanA); + SetJumpTarget(NanB); + + SetFPException(fpscrReg, FPSCR_VXSNAN); + + SetJumpTarget(Done1); + SetJumpTarget(Done2); + SetJumpTarget(Done3); + SetJumpTarget(Done4); + STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); + STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); + gpr.Unlock(fpscrReg, crReg); +} + +void JitArm::ps_cmpo0(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff) + u32 a = inst.FA, b = inst.FB; + int cr = inst.CRFD; + + ARMReg vA = fpr.R0(a); + ARMReg vB = fpr.R0(b); + ARMReg fpscrReg = gpr.GetReg(); + ARMReg crReg = gpr.GetReg(); + Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 + Operand2 LessThan(0x8, 0xA); // 0x8000 + Operand2 GreaterThan(0x4, 0xA); // 0x4000 + Operand2 EqualTo(0x2, 0xA); // 0x2000 + Operand2 NANRes(0x1, 0xA); // 0x1000 + FixupBranch Done1, Done2, Done3; + LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); + BIC(fpscrReg, fpscrReg, FPRFMask); + + VCMPE(vA, vB); + VMRS(_PC); + SetCC(CC_LT); + ORR(fpscrReg, fpscrReg, LessThan); + MOV(crReg, 8); + Done1 = B(); + SetCC(CC_GT); + ORR(fpscrReg, fpscrReg, GreaterThan); + MOV(crReg, 4); + Done2 = B(); + SetCC(CC_EQ); + ORR(fpscrReg, fpscrReg, EqualTo); + MOV(crReg, 2); + Done3 = B(); + SetCC(); + + ORR(fpscrReg, fpscrReg, NANRes); + MOV(crReg, 1); + + VCMPE(vA, vA); + VMRS(_PC); + FixupBranch NanA = B_CC(CC_NEQ); + VCMPE(vB, vB); + VMRS(_PC); + FixupBranch NanB = B_CC(CC_NEQ); + + SetFPException(fpscrReg, FPSCR_VXVC); + FixupBranch Done4 = B(); + + SetJumpTarget(NanA); + SetJumpTarget(NanB); + + SetFPException(fpscrReg, FPSCR_VXSNAN); + + TST(fpscrReg, VEMask); + + FixupBranch noVXVC = B_CC(CC_NEQ); + SetFPException(fpscrReg, FPSCR_VXVC); + + SetJumpTarget(noVXVC); + SetJumpTarget(Done1); + SetJumpTarget(Done2); + SetJumpTarget(Done3); + SetJumpTarget(Done4); + STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); + STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); + gpr.Unlock(fpscrReg, crReg); +} + +void JitArm::ps_cmpo1(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITFloatingPointOff) + u32 a = inst.FA, b = inst.FB; + int cr = inst.CRFD; + + ARMReg vA = fpr.R1(a); + ARMReg vB = fpr.R1(b); + ARMReg fpscrReg = gpr.GetReg(); + ARMReg crReg = gpr.GetReg(); + Operand2 FPRFMask(0x1F, 0xA); // 0x1F000 + Operand2 LessThan(0x8, 0xA); // 0x8000 + Operand2 GreaterThan(0x4, 0xA); // 0x4000 + Operand2 EqualTo(0x2, 0xA); // 0x2000 + Operand2 NANRes(0x1, 0xA); // 0x1000 + FixupBranch Done1, Done2, Done3; + LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); + BIC(fpscrReg, fpscrReg, FPRFMask); + + VCMPE(vA, vB); + VMRS(_PC); + SetCC(CC_LT); + ORR(fpscrReg, fpscrReg, LessThan); + MOV(crReg, 8); + Done1 = B(); + SetCC(CC_GT); + ORR(fpscrReg, fpscrReg, GreaterThan); + MOV(crReg, 4); + Done2 = B(); + SetCC(CC_EQ); + ORR(fpscrReg, fpscrReg, EqualTo); + MOV(crReg, 2); + Done3 = B(); + SetCC(); + + ORR(fpscrReg, fpscrReg, NANRes); + MOV(crReg, 1); + + VCMPE(vA, vA); + VMRS(_PC); + FixupBranch NanA = B_CC(CC_NEQ); + VCMPE(vB, vB); + VMRS(_PC); + FixupBranch NanB = B_CC(CC_NEQ); + + SetFPException(fpscrReg, FPSCR_VXVC); + FixupBranch Done4 = B(); + + SetJumpTarget(NanA); + SetJumpTarget(NanB); + + SetFPException(fpscrReg, FPSCR_VXSNAN); + + TST(fpscrReg, VEMask); + + FixupBranch noVXVC = B_CC(CC_NEQ); + SetFPException(fpscrReg, FPSCR_VXVC); + + SetJumpTarget(noVXVC); + SetJumpTarget(Done1); + SetJumpTarget(Done2); + SetJumpTarget(Done3); + SetJumpTarget(Done4); + STRB(crReg, R9, PPCSTATE_OFF(cr_fast) + cr); + STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); + gpr.Unlock(fpscrReg, crReg); +} + diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp index df913230e2..4f37d72c5f 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Tables.cpp @@ -125,14 +125,14 @@ static GekkoOPTemplate primarytable[] = static GekkoOPTemplate table4[] = { //SUBOP10 - {0, &JitArm::Default}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, - {32, &JitArm::Default}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, + {0, &JitArm::ps_cmpu0}, //"ps_cmpu0", OPTYPE_PS, FL_SET_CRn}}, + {32, &JitArm::ps_cmpo0}, //"ps_cmpo0", OPTYPE_PS, FL_SET_CRn}}, {40, &JitArm::ps_neg}, //"ps_neg", OPTYPE_PS, FL_RC_BIT}}, {136, &JitArm::ps_nabs}, //"ps_nabs", OPTYPE_PS, FL_RC_BIT}}, {264, &JitArm::ps_abs}, //"ps_abs", OPTYPE_PS, FL_RC_BIT}}, - {64, &JitArm::Default}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, + {64, &JitArm::ps_cmpu1}, //"ps_cmpu1", OPTYPE_PS, FL_RC_BIT}}, {72, &JitArm::ps_mr}, //"ps_mr", OPTYPE_PS, FL_RC_BIT}}, - {96, &JitArm::Default}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, + {96, &JitArm::ps_cmpo1}, //"ps_cmpo1", OPTYPE_PS, FL_RC_BIT}}, {528, &JitArm::ps_merge00}, //"ps_merge00", OPTYPE_PS, FL_RC_BIT}}, {560, &JitArm::ps_merge01}, //"ps_merge01", OPTYPE_PS, FL_RC_BIT}}, {592, &JitArm::ps_merge10}, //"ps_merge10", OPTYPE_PS, FL_RC_BIT}},