From 8fc57d61ba701088c2517001066d24fbc6035b2d Mon Sep 17 00:00:00 2001 From: Fiora Date: Sun, 17 Aug 2014 23:12:16 -0700 Subject: [PATCH] JIT64: support merged branching for rlwinmx, too Not quite as common a branch instruction as cmpwi, but close. --- Source/Core/Core/PowerPC/Jit64/Jit.h | 2 + .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 216 ++++++++++-------- 2 files changed, 120 insertions(+), 98 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index ac3defbb9f..821df102d1 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -106,6 +106,8 @@ public: // Use to extract bytes from a register using the regcache. offset is in bytes. Gen::OpArg ExtractFromReg(int reg, int offset); void AndWithMask(Gen::X64Reg reg, u32 mask); + bool CheckMergedBranch(int crf); + void DoMergedBranch(); // Reads a given bit of a given CR register part. void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index a3f0eec8c6..5b405de5bb 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -264,6 +264,54 @@ void Jit64::reg_imm(UGeckoInstruction inst) } } +bool Jit64::CheckMergedBranch(int crf) +{ + const UGeckoInstruction& next = js.next_inst; + return (((next.OPCD == 16 /* bcx */) || + ((next.OPCD == 19) && (next.SUBOP10 == 528) /* bcctrx */) || + ((next.OPCD == 19) && (next.SUBOP10 == 16) /* bclrx */)) && + (next.BO & BO_DONT_DECREMENT_FLAG) && + !(next.BO & BO_DONT_CHECK_CONDITION) && + (next.BI >> 2) == crf); +} + +void Jit64::DoMergedBranch() +{ + // Code that handles successful PPC branching. + if (js.next_inst.OPCD == 16) // bcx + { + if (js.next_inst.LK) + MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + + u32 destination; + if (js.next_inst.AA) + destination = SignExt16(js.next_inst.BD << 2); + else + destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); + WriteExit(destination); + } + else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx + { + if (js.next_inst.LK) + MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + MOV(32, R(RSCRATCH), M(&CTR)); + AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); + WriteExitDestInRSCRATCH(); + } + else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx + { + MOV(32, R(RSCRATCH), M(&LR)); + AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); + if (js.next_inst.LK) + MOV(32, M(&LR), Imm32(js.next_compilerPC + 4)); + WriteExitDestInRSCRATCH(); + } + else + { + PanicAlert("WTF invalid branch"); + } +} + void Jit64::cmpXX(UGeckoInstruction inst) { // USES_CR @@ -272,23 +320,7 @@ void Jit64::cmpXX(UGeckoInstruction inst) int a = inst.RA; int b = inst.RB; int crf = inst.CRFD; - - bool merge_branch = false; - int test_crf = js.next_inst.BI >> 2; - // Check if the next instruction is a branch - if it is, merge the two. - if (((js.next_inst.OPCD == 16 /* bcx */) || - ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528) /* bcctrx */) || - ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16) /* bclrx */)) && - (js.next_inst.BO & BO_DONT_DECREMENT_FLAG) && - !(js.next_inst.BO & BO_DONT_CHECK_CONDITION)) - { - // Looks like a decent conditional branch that we can merge with. - // It only test CR, not CTR. - if (test_crf == crf) - { - merge_branch = true; - } - } + bool merge_branch = CheckMergedBranch(crf); OpArg comparand; bool signedCompare; @@ -358,47 +390,13 @@ void Jit64::cmpXX(UGeckoInstruction inst) { gpr.Flush(); fpr.Flush(); - - if (js.next_inst.OPCD == 16) // bcx - { - if (js.next_inst.LK) - MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); - - u32 destination; - if (js.next_inst.AA) - destination = SignExt16(js.next_inst.BD << 2); - else - destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); - WriteExit(destination); - } - else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx - { - if (js.next_inst.LK) - MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); - MOV(32, R(RSCRATCH), PPCSTATE_CTR); - AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); - WriteExitDestInRSCRATCH(); - } - else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx - { - MOV(32, R(RSCRATCH), PPCSTATE_LR); - if (js.next_inst.LK) - MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); - WriteExitDestInRSCRATCH(); - } - else - { - PanicAlert("WTF invalid branch"); - } + DoMergedBranch(); } - else + else if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) { - if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) - { - gpr.Flush(); - fpr.Flush(); - WriteExit(js.next_compilerPC + 4); - } + gpr.Flush(); + fpr.Flush(); + WriteExit(js.next_compilerPC + 4); } } } @@ -487,51 +485,12 @@ void Jit64::cmpXX(UGeckoInstruction inst) gpr.Flush(FLUSH_MAINTAIN_STATE); fpr.Flush(FLUSH_MAINTAIN_STATE); - // Code that handles successful PPC branching. - if (js.next_inst.OPCD == 16) // bcx - { - if (js.next_inst.LK) - MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); - - u32 destination; - if (js.next_inst.AA) - destination = SignExt16(js.next_inst.BD << 2); - else - destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2); - WriteExit(destination); - } - else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx - { - if (js.next_inst.LK) - MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); - - MOV(32, R(RSCRATCH), PPCSTATE_CTR); - AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); - WriteExitDestInRSCRATCH(); - } - else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx - { - MOV(32, R(RSCRATCH), PPCSTATE_LR); - AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC)); - - if (js.next_inst.LK) - MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4)); - - WriteExitDestInRSCRATCH(); - } - else - { - PanicAlert("WTF invalid branch"); - } + DoMergedBranch(); SetJumpTarget(pDontBranch); if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) - { - gpr.Flush(); - fpr.Flush(); WriteExit(js.next_compilerPC + 4); - } } } @@ -1494,6 +1453,11 @@ void Jit64::rlwinmx(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA; int s = inst.RS; + + // rlwinm is commonly used as a branch test, second only to the more obvious cmpw. + // since it's almost never used with any check other than beq, only support beq for simplicity. + bool merge_branch = inst.Rc && CheckMergedBranch(0) && (js.next_inst.BI & 3) == 2; + if (gpr.R(s).IsImm()) { u32 result = (int)gpr.R(s).offset; @@ -1510,6 +1474,11 @@ void Jit64::rlwinmx(UGeckoInstruction inst) bool right_shift = inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH; u32 mask = Helper_Mask(inst.MB, inst.ME); bool simple_mask = mask == 0xff || mask == 0xffff; + // in case of a merged branch, track whether or not we've set flags. + // if not, we need to do a TEST later to get them. + bool needs_test = false; + // if we know the high bit can't be set, we can avoid doing a sign extend for flag storage + bool needs_sext = true; int mask_size = inst.ME - inst.MB + 1; gpr.Lock(a, s); @@ -1517,11 +1486,14 @@ void Jit64::rlwinmx(UGeckoInstruction inst) if (a != s && left_shift && gpr.R(s).IsSimpleReg() && inst.SH <= 3) { LEA(32, gpr.RX(a), MScaled(gpr.RX(s), SCALE_1 << inst.SH, 0)); + needs_test = true; } // common optimized case: byte/word extract else if (simple_mask && !(inst.SH & (mask_size - 1))) { MOVZX(32, mask_size, gpr.RX(a), ExtractFromReg(s, inst.SH ? (32 - inst.SH) >> 3 : 0)); + needs_test = true; + needs_sext = false; } // another optimized special case: byte/word extract plus shift else if (((mask >> inst.SH) << inst.SH) == mask && !left_shift && @@ -1529,6 +1501,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst) { MOVZX(32, mask_size, gpr.RX(a), gpr.R(s)); SHL(32, gpr.R(a), Imm8(inst.SH)); + needs_sext = inst.SH + mask_size >= 32; } else { @@ -1542,17 +1515,64 @@ void Jit64::rlwinmx(UGeckoInstruction inst) else if (right_shift) { SHR(32, gpr.R(a), Imm8(inst.MB)); + needs_sext = false; } else { if (inst.SH != 0) ROL(32, gpr.R(a), Imm8(inst.SH)); if (!(inst.MB == 0 && inst.ME == 31)) - AndWithMask(gpr.RX(a), mask); + { + // we need flags if we're merging the branch + if (merge_branch) + AND(32, gpr.R(a), Imm32(mask)); + else + AndWithMask(gpr.RX(a), mask); + needs_sext = inst.MB == 0; + } + else + { + needs_test = true; + } } } - if (inst.Rc) + if (merge_branch) + { + js.downcountAmount++; + js.skipnext = true; + if (needs_sext) + { + MOVSX(64, 32, RSCRATCH, gpr.R(a)); + MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RSCRATCH)); + } + else + { + MOV(64, M(&PowerPC::ppcState.cr_val[0]), gpr.R(a)); + } + if (needs_test) + TEST(32, gpr.R(a), gpr.R(a)); + + gpr.UnlockAll(); + FixupBranch dont_branch = J_CC((js.next_inst.BO & BO_BRANCH_IF_TRUE) ? CC_NE : CC_E, true); + + gpr.Flush(FLUSH_MAINTAIN_STATE); + fpr.Flush(FLUSH_MAINTAIN_STATE); + + DoMergedBranch(); + + SetJumpTarget(dont_branch); + + if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) + { + gpr.Flush(); + fpr.Flush(); + WriteExit(js.next_compilerPC + 4); + } + } + else if (inst.Rc) + { ComputeRC(gpr.R(a)); + } gpr.UnlockAll(); } }