JIT64: support merged branching for rlwinmx, too
Not quite as common a branch instruction as cmpwi, but close.
This commit is contained in:
parent
b56117de05
commit
8fc57d61ba
|
@ -106,6 +106,8 @@ public:
|
||||||
// Use to extract bytes from a register using the regcache. offset is in bytes.
|
// Use to extract bytes from a register using the regcache. offset is in bytes.
|
||||||
Gen::OpArg ExtractFromReg(int reg, int offset);
|
Gen::OpArg ExtractFromReg(int reg, int offset);
|
||||||
void AndWithMask(Gen::X64Reg reg, u32 mask);
|
void AndWithMask(Gen::X64Reg reg, u32 mask);
|
||||||
|
bool CheckMergedBranch(int crf);
|
||||||
|
void DoMergedBranch();
|
||||||
|
|
||||||
// Reads a given bit of a given CR register part.
|
// Reads a given bit of a given CR register part.
|
||||||
void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false);
|
void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false);
|
||||||
|
|
|
@ -264,6 +264,54 @@ void Jit64::reg_imm(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool Jit64::CheckMergedBranch(int crf)
|
||||||
|
{
|
||||||
|
const UGeckoInstruction& next = js.next_inst;
|
||||||
|
return (((next.OPCD == 16 /* bcx */) ||
|
||||||
|
((next.OPCD == 19) && (next.SUBOP10 == 528) /* bcctrx */) ||
|
||||||
|
((next.OPCD == 19) && (next.SUBOP10 == 16) /* bclrx */)) &&
|
||||||
|
(next.BO & BO_DONT_DECREMENT_FLAG) &&
|
||||||
|
!(next.BO & BO_DONT_CHECK_CONDITION) &&
|
||||||
|
(next.BI >> 2) == crf);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Jit64::DoMergedBranch()
|
||||||
|
{
|
||||||
|
// Code that handles successful PPC branching.
|
||||||
|
if (js.next_inst.OPCD == 16) // bcx
|
||||||
|
{
|
||||||
|
if (js.next_inst.LK)
|
||||||
|
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
|
||||||
|
|
||||||
|
u32 destination;
|
||||||
|
if (js.next_inst.AA)
|
||||||
|
destination = SignExt16(js.next_inst.BD << 2);
|
||||||
|
else
|
||||||
|
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
|
||||||
|
WriteExit(destination);
|
||||||
|
}
|
||||||
|
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx
|
||||||
|
{
|
||||||
|
if (js.next_inst.LK)
|
||||||
|
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
|
||||||
|
MOV(32, R(RSCRATCH), M(&CTR));
|
||||||
|
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
||||||
|
WriteExitDestInRSCRATCH();
|
||||||
|
}
|
||||||
|
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
|
||||||
|
{
|
||||||
|
MOV(32, R(RSCRATCH), M(&LR));
|
||||||
|
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
||||||
|
if (js.next_inst.LK)
|
||||||
|
MOV(32, M(&LR), Imm32(js.next_compilerPC + 4));
|
||||||
|
WriteExitDestInRSCRATCH();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
PanicAlert("WTF invalid branch");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Jit64::cmpXX(UGeckoInstruction inst)
|
void Jit64::cmpXX(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
// USES_CR
|
// USES_CR
|
||||||
|
@ -272,23 +320,7 @@ void Jit64::cmpXX(UGeckoInstruction inst)
|
||||||
int a = inst.RA;
|
int a = inst.RA;
|
||||||
int b = inst.RB;
|
int b = inst.RB;
|
||||||
int crf = inst.CRFD;
|
int crf = inst.CRFD;
|
||||||
|
bool merge_branch = CheckMergedBranch(crf);
|
||||||
bool merge_branch = false;
|
|
||||||
int test_crf = js.next_inst.BI >> 2;
|
|
||||||
// Check if the next instruction is a branch - if it is, merge the two.
|
|
||||||
if (((js.next_inst.OPCD == 16 /* bcx */) ||
|
|
||||||
((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528) /* bcctrx */) ||
|
|
||||||
((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16) /* bclrx */)) &&
|
|
||||||
(js.next_inst.BO & BO_DONT_DECREMENT_FLAG) &&
|
|
||||||
!(js.next_inst.BO & BO_DONT_CHECK_CONDITION))
|
|
||||||
{
|
|
||||||
// Looks like a decent conditional branch that we can merge with.
|
|
||||||
// It only test CR, not CTR.
|
|
||||||
if (test_crf == crf)
|
|
||||||
{
|
|
||||||
merge_branch = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
OpArg comparand;
|
OpArg comparand;
|
||||||
bool signedCompare;
|
bool signedCompare;
|
||||||
|
@ -358,42 +390,9 @@ void Jit64::cmpXX(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
gpr.Flush();
|
gpr.Flush();
|
||||||
fpr.Flush();
|
fpr.Flush();
|
||||||
|
DoMergedBranch();
|
||||||
if (js.next_inst.OPCD == 16) // bcx
|
|
||||||
{
|
|
||||||
if (js.next_inst.LK)
|
|
||||||
MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
|
|
||||||
|
|
||||||
u32 destination;
|
|
||||||
if (js.next_inst.AA)
|
|
||||||
destination = SignExt16(js.next_inst.BD << 2);
|
|
||||||
else
|
|
||||||
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
|
|
||||||
WriteExit(destination);
|
|
||||||
}
|
}
|
||||||
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx
|
else if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||||
{
|
|
||||||
if (js.next_inst.LK)
|
|
||||||
MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
|
|
||||||
MOV(32, R(RSCRATCH), PPCSTATE_CTR);
|
|
||||||
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
|
||||||
WriteExitDestInRSCRATCH();
|
|
||||||
}
|
|
||||||
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
|
|
||||||
{
|
|
||||||
MOV(32, R(RSCRATCH), PPCSTATE_LR);
|
|
||||||
if (js.next_inst.LK)
|
|
||||||
MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
|
|
||||||
WriteExitDestInRSCRATCH();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
PanicAlert("WTF invalid branch");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
|
||||||
{
|
{
|
||||||
gpr.Flush();
|
gpr.Flush();
|
||||||
fpr.Flush();
|
fpr.Flush();
|
||||||
|
@ -401,7 +400,6 @@ void Jit64::cmpXX(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
X64Reg input = RSCRATCH;
|
X64Reg input = RSCRATCH;
|
||||||
|
@ -487,53 +485,14 @@ void Jit64::cmpXX(UGeckoInstruction inst)
|
||||||
gpr.Flush(FLUSH_MAINTAIN_STATE);
|
gpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||||
fpr.Flush(FLUSH_MAINTAIN_STATE);
|
fpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||||
|
|
||||||
// Code that handles successful PPC branching.
|
DoMergedBranch();
|
||||||
if (js.next_inst.OPCD == 16) // bcx
|
|
||||||
{
|
|
||||||
if (js.next_inst.LK)
|
|
||||||
MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
|
|
||||||
|
|
||||||
u32 destination;
|
|
||||||
if (js.next_inst.AA)
|
|
||||||
destination = SignExt16(js.next_inst.BD << 2);
|
|
||||||
else
|
|
||||||
destination = js.next_compilerPC + SignExt16(js.next_inst.BD << 2);
|
|
||||||
WriteExit(destination);
|
|
||||||
}
|
|
||||||
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 528)) // bcctrx
|
|
||||||
{
|
|
||||||
if (js.next_inst.LK)
|
|
||||||
MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
|
|
||||||
|
|
||||||
MOV(32, R(RSCRATCH), PPCSTATE_CTR);
|
|
||||||
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
|
||||||
WriteExitDestInRSCRATCH();
|
|
||||||
}
|
|
||||||
else if ((js.next_inst.OPCD == 19) && (js.next_inst.SUBOP10 == 16)) // bclrx
|
|
||||||
{
|
|
||||||
MOV(32, R(RSCRATCH), PPCSTATE_LR);
|
|
||||||
AND(32, R(RSCRATCH), Imm32(0xFFFFFFFC));
|
|
||||||
|
|
||||||
if (js.next_inst.LK)
|
|
||||||
MOV(32, PPCSTATE_LR, Imm32(js.next_compilerPC + 4));
|
|
||||||
|
|
||||||
WriteExitDestInRSCRATCH();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
PanicAlert("WTF invalid branch");
|
|
||||||
}
|
|
||||||
|
|
||||||
SetJumpTarget(pDontBranch);
|
SetJumpTarget(pDontBranch);
|
||||||
|
|
||||||
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||||
{
|
|
||||||
gpr.Flush();
|
|
||||||
fpr.Flush();
|
|
||||||
WriteExit(js.next_compilerPC + 4);
|
WriteExit(js.next_compilerPC + 4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
@ -1494,6 +1453,11 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
|
||||||
JITDISABLE(bJITIntegerOff);
|
JITDISABLE(bJITIntegerOff);
|
||||||
int a = inst.RA;
|
int a = inst.RA;
|
||||||
int s = inst.RS;
|
int s = inst.RS;
|
||||||
|
|
||||||
|
// rlwinm is commonly used as a branch test, second only to the more obvious cmpw.
|
||||||
|
// since it's almost never used with any check other than beq, only support beq for simplicity.
|
||||||
|
bool merge_branch = inst.Rc && CheckMergedBranch(0) && (js.next_inst.BI & 3) == 2;
|
||||||
|
|
||||||
if (gpr.R(s).IsImm())
|
if (gpr.R(s).IsImm())
|
||||||
{
|
{
|
||||||
u32 result = (int)gpr.R(s).offset;
|
u32 result = (int)gpr.R(s).offset;
|
||||||
|
@ -1510,6 +1474,11 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
|
||||||
bool right_shift = inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH;
|
bool right_shift = inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH;
|
||||||
u32 mask = Helper_Mask(inst.MB, inst.ME);
|
u32 mask = Helper_Mask(inst.MB, inst.ME);
|
||||||
bool simple_mask = mask == 0xff || mask == 0xffff;
|
bool simple_mask = mask == 0xff || mask == 0xffff;
|
||||||
|
// in case of a merged branch, track whether or not we've set flags.
|
||||||
|
// if not, we need to do a TEST later to get them.
|
||||||
|
bool needs_test = false;
|
||||||
|
// if we know the high bit can't be set, we can avoid doing a sign extend for flag storage
|
||||||
|
bool needs_sext = true;
|
||||||
int mask_size = inst.ME - inst.MB + 1;
|
int mask_size = inst.ME - inst.MB + 1;
|
||||||
|
|
||||||
gpr.Lock(a, s);
|
gpr.Lock(a, s);
|
||||||
|
@ -1517,11 +1486,14 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
|
||||||
if (a != s && left_shift && gpr.R(s).IsSimpleReg() && inst.SH <= 3)
|
if (a != s && left_shift && gpr.R(s).IsSimpleReg() && inst.SH <= 3)
|
||||||
{
|
{
|
||||||
LEA(32, gpr.RX(a), MScaled(gpr.RX(s), SCALE_1 << inst.SH, 0));
|
LEA(32, gpr.RX(a), MScaled(gpr.RX(s), SCALE_1 << inst.SH, 0));
|
||||||
|
needs_test = true;
|
||||||
}
|
}
|
||||||
// common optimized case: byte/word extract
|
// common optimized case: byte/word extract
|
||||||
else if (simple_mask && !(inst.SH & (mask_size - 1)))
|
else if (simple_mask && !(inst.SH & (mask_size - 1)))
|
||||||
{
|
{
|
||||||
MOVZX(32, mask_size, gpr.RX(a), ExtractFromReg(s, inst.SH ? (32 - inst.SH) >> 3 : 0));
|
MOVZX(32, mask_size, gpr.RX(a), ExtractFromReg(s, inst.SH ? (32 - inst.SH) >> 3 : 0));
|
||||||
|
needs_test = true;
|
||||||
|
needs_sext = false;
|
||||||
}
|
}
|
||||||
// another optimized special case: byte/word extract plus shift
|
// another optimized special case: byte/word extract plus shift
|
||||||
else if (((mask >> inst.SH) << inst.SH) == mask && !left_shift &&
|
else if (((mask >> inst.SH) << inst.SH) == mask && !left_shift &&
|
||||||
|
@ -1529,6 +1501,7 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
MOVZX(32, mask_size, gpr.RX(a), gpr.R(s));
|
MOVZX(32, mask_size, gpr.RX(a), gpr.R(s));
|
||||||
SHL(32, gpr.R(a), Imm8(inst.SH));
|
SHL(32, gpr.R(a), Imm8(inst.SH));
|
||||||
|
needs_sext = inst.SH + mask_size >= 32;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1542,17 +1515,64 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
|
||||||
else if (right_shift)
|
else if (right_shift)
|
||||||
{
|
{
|
||||||
SHR(32, gpr.R(a), Imm8(inst.MB));
|
SHR(32, gpr.R(a), Imm8(inst.MB));
|
||||||
|
needs_sext = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (inst.SH != 0)
|
if (inst.SH != 0)
|
||||||
ROL(32, gpr.R(a), Imm8(inst.SH));
|
ROL(32, gpr.R(a), Imm8(inst.SH));
|
||||||
if (!(inst.MB == 0 && inst.ME == 31))
|
if (!(inst.MB == 0 && inst.ME == 31))
|
||||||
|
{
|
||||||
|
// we need flags if we're merging the branch
|
||||||
|
if (merge_branch)
|
||||||
|
AND(32, gpr.R(a), Imm32(mask));
|
||||||
|
else
|
||||||
AndWithMask(gpr.RX(a), mask);
|
AndWithMask(gpr.RX(a), mask);
|
||||||
|
needs_sext = inst.MB == 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
needs_test = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (inst.Rc)
|
}
|
||||||
|
if (merge_branch)
|
||||||
|
{
|
||||||
|
js.downcountAmount++;
|
||||||
|
js.skipnext = true;
|
||||||
|
if (needs_sext)
|
||||||
|
{
|
||||||
|
MOVSX(64, 32, RSCRATCH, gpr.R(a));
|
||||||
|
MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RSCRATCH));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MOV(64, M(&PowerPC::ppcState.cr_val[0]), gpr.R(a));
|
||||||
|
}
|
||||||
|
if (needs_test)
|
||||||
|
TEST(32, gpr.R(a), gpr.R(a));
|
||||||
|
|
||||||
|
gpr.UnlockAll();
|
||||||
|
FixupBranch dont_branch = J_CC((js.next_inst.BO & BO_BRANCH_IF_TRUE) ? CC_NE : CC_E, true);
|
||||||
|
|
||||||
|
gpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||||
|
fpr.Flush(FLUSH_MAINTAIN_STATE);
|
||||||
|
|
||||||
|
DoMergedBranch();
|
||||||
|
|
||||||
|
SetJumpTarget(dont_branch);
|
||||||
|
|
||||||
|
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
|
||||||
|
{
|
||||||
|
gpr.Flush();
|
||||||
|
fpr.Flush();
|
||||||
|
WriteExit(js.next_compilerPC + 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (inst.Rc)
|
||||||
|
{
|
||||||
ComputeRC(gpr.R(a));
|
ComputeRC(gpr.R(a));
|
||||||
|
}
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue