Merge pull request #1087 from FioraAeterna/moremerges

JIT: generic branch merging
This commit is contained in:
skidau 2014-09-25 13:40:05 +10:00
commit a8b1a10025
3 changed files with 174 additions and 257 deletions

View File

@ -118,13 +118,15 @@ public:
void FinalizeCarryOverflow(bool oe, bool inv = false); void FinalizeCarryOverflow(bool oe, bool inv = false);
void FinalizeCarry(Gen::CCFlags cond); void FinalizeCarry(Gen::CCFlags cond);
void FinalizeCarry(bool ca); void FinalizeCarry(bool ca);
void ComputeRC(const Gen::OpArg & arg); void ComputeRC(const Gen::OpArg & arg, bool needs_test = true, bool needs_sext = true);
// Use to extract bytes from a register using the regcache. offset is in bytes. // Use to extract bytes from a register using the regcache. offset is in bytes.
Gen::OpArg ExtractFromReg(int reg, int offset); Gen::OpArg ExtractFromReg(int reg, int offset);
void AndWithMask(Gen::X64Reg reg, u32 mask); void AndWithMask(Gen::X64Reg reg, u32 mask);
bool CheckMergedBranch(int crf); bool CheckMergedBranch(int crf);
void DoMergedBranch(); void DoMergedBranch();
void DoMergedBranchCondition();
void DoMergedBranchImmediate(s64 val);
// Reads a given bit of a given CR register part. // Reads a given bit of a given CR register part.
void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false); void GetCRFieldBit(int field, int bit, Gen::X64Reg out, bool negate = false);

View File

@ -40,7 +40,9 @@ void Jit64::GenerateOverflow()
FixupBranch exit = J(); FixupBranch exit = J();
SetJumpTarget(jno); SetJumpTarget(jno);
//XER[OV] = 0 //XER[OV] = 0
PUSHF();
AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK)); AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK));
POPF();
SetJumpTarget(exit); SetJumpTarget(exit);
} }
@ -115,17 +117,39 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv)
FinalizeCarry(inv ? CC_NC : CC_C); FinalizeCarry(inv ? CC_NC : CC_C);
} }
void Jit64::ComputeRC(const Gen::OpArg & arg) // Be careful; only set needs_test to false if we can be absolutely sure flags don't need
// to be recalculated and haven't been clobbered. Keep in mind not all instructions set
// sufficient flags -- for example, the flags from SHL/SHR are *not* sufficient for LT/GT
// branches, only EQ.
void Jit64::ComputeRC(const Gen::OpArg & arg, bool needs_test, bool needs_sext)
{ {
_assert_msg_(DYNA_REC, arg.IsSimpleReg() || arg.IsImm(), "Invalid ComputeRC operand");
if (arg.IsImm()) if (arg.IsImm())
{ {
MOV(64, PPCSTATE(cr_val[0]), Imm32((s32)arg.offset)); MOV(64, PPCSTATE(cr_val[0]), Imm32((s32)arg.offset));
} }
else else if (needs_sext)
{ {
MOVSX(64, 32, RSCRATCH, arg); MOVSX(64, 32, RSCRATCH, arg);
MOV(64, PPCSTATE(cr_val[0]), R(RSCRATCH)); MOV(64, PPCSTATE(cr_val[0]), R(RSCRATCH));
} }
else
{
MOV(64, PPCSTATE(cr_val[0]), arg);
}
if (CheckMergedBranch(0))
{
if (arg.IsImm())
{
DoMergedBranchImmediate((s32)arg.offset);
}
else
{
if (needs_test)
TEST(32, arg, arg);
DoMergedBranchCondition();
}
}
} }
OpArg Jit64::ExtractFromReg(int reg, int offset) OpArg Jit64::ExtractFromReg(int reg, int offset)
@ -175,6 +199,7 @@ static u32 Xor(u32 a, u32 b)
void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry) void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void (XEmitter::*op)(int, const Gen::OpArg&, const Gen::OpArg&), bool Rc, bool carry)
{ {
bool needs_test = false;
gpr.Lock(d, a); gpr.Lock(d, a);
// Be careful; addic treats r0 as r0, but addi treats r0 as zero. // Be careful; addic treats r0 as r0, but addi treats r0 as zero.
if (a || binary || carry) if (a || binary || carry)
@ -186,7 +211,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
} }
else if (a == d) else if (a == d)
{ {
gpr.KillImmediate(d, true, true); gpr.BindToRegister(d, true);
(this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16; (this->*op)(32, gpr.R(d), Imm32(value)); //m_GPR[d] = m_GPR[_inst.RA] + _inst.SIMM_16;
} }
else else
@ -194,6 +219,7 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
gpr.BindToRegister(d, false); gpr.BindToRegister(d, false);
if (doop == Add && gpr.R(a).IsSimpleReg() && !carry) if (doop == Add && gpr.R(a).IsSimpleReg() && !carry)
{ {
needs_test = true;
LEA(32, gpr.RX(d), MDisp(gpr.RX(a), value)); LEA(32, gpr.RX(d), MDisp(gpr.RX(a), value));
} }
else else
@ -204,20 +230,18 @@ void Jit64::regimmop(int d, int a, bool binary, u32 value, Operation doop, void
} }
if (carry) if (carry)
FinalizeCarry(CC_C); FinalizeCarry(CC_C);
if (Rc)
ComputeRC(gpr.R(d));
} }
else if (doop == Add) else if (doop == Add)
{ {
// a == 0, which for these instructions imply value = 0 // a == 0, which for these instructions imply value = 0
gpr.SetImmediate32(d, value); gpr.SetImmediate32(d, value);
if (Rc)
ComputeRC(gpr.R(d));
} }
else else
{ {
_assert_msg_(DYNA_REC, 0, "WTF regimmop"); _assert_msg_(DYNA_REC, 0, "WTF regimmop");
} }
if (Rc)
ComputeRC(gpr.R(d), needs_test, doop != And || (value & 0x80000000));
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -335,6 +359,73 @@ void Jit64::DoMergedBranch()
} }
} }
void Jit64::DoMergedBranchCondition()
{
js.downcountAmount++;
js.skipnext = true;
int test_bit = 8 >> (js.next_inst.BI & 3);
bool condition = !!(js.next_inst.BO & BO_BRANCH_IF_TRUE);
gpr.UnlockAll();
gpr.UnlockAllX();
FixupBranch pDontBranch;
if (test_bit & 8)
pDontBranch = J_CC(condition ? CC_GE : CC_L, true); // Test < 0, so jump over if >= 0.
else if (test_bit & 4)
pDontBranch = J_CC(condition ? CC_LE : CC_G, true); // Test > 0, so jump over if <= 0.
else if (test_bit & 2)
pDontBranch = J_CC(condition ? CC_NE : CC_E, true); // Test = 0, so jump over if != 0.
else // SO bit, do not branch (we don't emulate SO for cmp).
pDontBranch = J(true);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
DoMergedBranch();
SetJumpTarget(pDontBranch);
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.next_compilerPC + 4);
}
}
void Jit64::DoMergedBranchImmediate(s64 val)
{
js.downcountAmount++;
js.skipnext = true;
int test_bit = 8 >> (js.next_inst.BI & 3);
bool condition = !!(js.next_inst.BO & BO_BRANCH_IF_TRUE);
gpr.UnlockAll();
gpr.UnlockAllX();
bool branch;
if (test_bit & 8)
branch = condition ? val < 0 : val >= 0;
else if (test_bit & 4)
branch = condition ? val > 0 : val <= 0;
else if (test_bit & 2)
branch = condition ? val == 0 : val != 0;
else // SO bit, do not branch (we don't emulate SO for cmp).
branch = false;
if (branch)
{
gpr.Flush();
fpr.Flush();
DoMergedBranch();
}
else if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.next_compilerPC + 4);
}
}
void Jit64::cmpXX(UGeckoInstruction inst) void Jit64::cmpXX(UGeckoInstruction inst)
{ {
// USES_CR // USES_CR
@ -379,49 +470,20 @@ void Jit64::cmpXX(UGeckoInstruction inst)
if (gpr.R(a).IsImm() && comparand.IsImm()) if (gpr.R(a).IsImm() && comparand.IsImm())
{ {
// Both registers contain immediate values, so we can pre-compile the compare result // Both registers contain immediate values, so we can pre-compile the compare result
u8 compareResult; s64 compareResult = signedCompare ? (s64)(s32)gpr.R(a).offset - (s64)(s32)comparand.offset :
if (signedCompare) (u64)(u32)gpr.R(a).offset - (u64)(u32)comparand.offset;
if (compareResult == (s32)compareResult)
{ {
if ((s32)gpr.R(a).offset == (s32)comparand.offset) MOV(64, PPCSTATE(cr_val[crf]), Imm32((u32)compareResult));
compareResult = CR_EQ;
else if ((s32)gpr.R(a).offset > (s32)comparand.offset)
compareResult = CR_GT;
else
compareResult = CR_LT;
} }
else else
{ {
if ((u32)gpr.R(a).offset == (u32)comparand.offset) MOV(64, R(RSCRATCH), Imm64(compareResult));
compareResult = CR_EQ;
else if ((u32)gpr.R(a).offset > (u32)comparand.offset)
compareResult = CR_GT;
else
compareResult = CR_LT;
}
MOV(64, R(RSCRATCH), Imm64(PPCCRToInternal(compareResult)));
MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH)); MOV(64, PPCSTATE(cr_val[crf]), R(RSCRATCH));
gpr.UnlockAll(); }
if (merge_branch) if (merge_branch)
{ DoMergedBranchImmediate(compareResult);
js.downcountAmount++;
js.skipnext = true;
int test_bit = 8 >> (js.next_inst.BI & 3);
u8 conditionResult = (js.next_inst.BO & BO_BRANCH_IF_TRUE) ? test_bit : 0;
if ((compareResult & test_bit) == conditionResult)
{
gpr.Flush();
fpr.Flush();
DoMergedBranch();
}
else if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.next_compilerPC + 4);
}
}
} }
else else
{ {
@ -484,41 +546,7 @@ void Jit64::cmpXX(UGeckoInstruction inst)
} }
if (merge_branch) if (merge_branch)
{ DoMergedBranchCondition();
js.downcountAmount++;
js.skipnext = true;
int test_bit = 8 >> (js.next_inst.BI & 3);
bool condition = !!(js.next_inst.BO & BO_BRANCH_IF_TRUE);
// Test swapping (in the future, will be used to inline across branches the right way)
// if (rand() & 1)
// std::swap(destination1, destination2), condition = !condition;
gpr.UnlockAll();
FixupBranch pDontBranch;
if (test_bit & 8)
pDontBranch = J_CC(condition ? CC_GE : CC_L, true); // Test < 0, so jump over if >= 0.
else if (test_bit & 4)
pDontBranch = J_CC(condition ? CC_LE : CC_G, true); // Test > 0, so jump over if <= 0.
else if (test_bit & 2)
pDontBranch = J_CC(condition ? CC_NE : CC_E, true); // Test = 0, so jump over if != 0.
else // SO bit, do not branch (we don't emulate SO for cmp).
pDontBranch = J(true);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
DoMergedBranch();
SetJumpTarget(pDontBranch);
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.next_compilerPC + 4);
}
}
} }
gpr.UnlockAll(); gpr.UnlockAll();
@ -529,6 +557,7 @@ void Jit64::boolX(UGeckoInstruction inst)
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITIntegerOff); JITDISABLE(bJITIntegerOff);
int a = inst.RA, s = inst.RS, b = inst.RB; int a = inst.RA, s = inst.RS, b = inst.RB;
bool needs_test = false;
_dbg_assert_msg_(DYNA_REC, inst.OPCD == 31, "Invalid boolX"); _dbg_assert_msg_(DYNA_REC, inst.OPCD == 31, "Invalid boolX");
if (gpr.R(s).IsImm() && gpr.R(b).IsImm()) if (gpr.R(s).IsImm() && gpr.R(b).IsImm())
@ -549,11 +578,6 @@ void Jit64::boolX(UGeckoInstruction inst)
gpr.SetImmediate32(a, (u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset); gpr.SetImmediate32(a, (u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset);
else if (inst.SUBOP10 == 284) // eqvx else if (inst.SUBOP10 == 284) // eqvx
gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset)); gpr.SetImmediate32(a, ~((u32)gpr.R(s).offset ^ (u32)gpr.R(b).offset));
if (inst.Rc)
{
ComputeRC(gpr.R(a));
}
} }
else if (s == b) else if (s == b)
{ {
@ -564,8 +588,8 @@ void Jit64::boolX(UGeckoInstruction inst)
gpr.Lock(a,s); gpr.Lock(a,s);
gpr.BindToRegister(a, false, true); gpr.BindToRegister(a, false, true);
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
gpr.UnlockAll();
} }
needs_test = true;
} }
else if ((inst.SUBOP10 == 476 /* nandx */) || (inst.SUBOP10 == 124 /* norx */)) else if ((inst.SUBOP10 == 476 /* nandx */) || (inst.SUBOP10 == 124 /* norx */))
{ {
@ -580,7 +604,6 @@ void Jit64::boolX(UGeckoInstruction inst)
gpr.KillImmediate(a, true, true); gpr.KillImmediate(a, true, true);
} }
NOT(32, gpr.R(a)); NOT(32, gpr.R(a));
gpr.UnlockAll();
} }
else if ((inst.SUBOP10 == 412 /* orcx */) || (inst.SUBOP10 == 284 /* eqvx */)) else if ((inst.SUBOP10 == 412 /* orcx */) || (inst.SUBOP10 == 284 /* eqvx */))
{ {
@ -594,8 +617,6 @@ void Jit64::boolX(UGeckoInstruction inst)
{ {
PanicAlert("WTF!"); PanicAlert("WTF!");
} }
if (inst.Rc)
ComputeRC(gpr.R(a));
} }
else if ((a == s) || (a == b)) else if ((a == s) || (a == b))
{ {
@ -662,9 +683,6 @@ void Jit64::boolX(UGeckoInstruction inst)
{ {
PanicAlert("WTF"); PanicAlert("WTF");
} }
if (inst.Rc)
ComputeRC(gpr.R(a));
gpr.UnlockAll();
} }
else else
{ {
@ -720,10 +738,10 @@ void Jit64::boolX(UGeckoInstruction inst)
{ {
PanicAlert("WTF!"); PanicAlert("WTF!");
} }
if (inst.Rc)
ComputeRC(gpr.R(a));
gpr.UnlockAll();
} }
if (inst.Rc)
ComputeRC(gpr.R(a), needs_test);
gpr.UnlockAll();
} }
void Jit64::extsXx(UGeckoInstruction inst) void Jit64::extsXx(UGeckoInstruction inst)
@ -736,28 +754,17 @@ void Jit64::extsXx(UGeckoInstruction inst)
if (gpr.R(s).IsImm()) if (gpr.R(s).IsImm())
{ {
gpr.SetImmediate32(a, (u32)(s32)(size == 16 ? (s16)gpr.R(s).offset : (s8)gpr.R(s).offset)); gpr.SetImmediate32(a, (u32)(s32)(size == 16 ? (s16)gpr.R(s).offset : (s8)gpr.R(s).offset));
if (inst.Rc)
ComputeRC(gpr.R(a));
} }
else else
{ {
gpr.Lock(a, s); gpr.Lock(a, s);
gpr.BindToRegister(a, a == s, true); gpr.BindToRegister(a, a == s, true);
// exts is moderately commonly used with inst.Rc, so try to optimize it.
if (inst.Rc)
{
// Only do one movsx; the movzx is free on most modern CPUs.
MOVSX(64, size, gpr.RX(a), gpr.R(s));
MOV(64, PPCSTATE(cr_val[0]), gpr.R(a));
MOVZX(64, 32, gpr.RX(a), gpr.R(a));
}
else
{
MOVSX(32, size, gpr.RX(a), gpr.R(s)); MOVSX(32, size, gpr.RX(a), gpr.R(s));
} }
if (inst.Rc)
ComputeRC(gpr.R(a));
gpr.UnlockAll(); gpr.UnlockAll();
} }
}
void Jit64::subfic(UGeckoInstruction inst) void Jit64::subfic(UGeckoInstruction inst)
{ {
@ -811,8 +818,6 @@ void Jit64::subfx(UGeckoInstruction inst)
{ {
s32 i = (s32)gpr.R(b).offset, j = (s32)gpr.R(a).offset; s32 i = (s32)gpr.R(b).offset, j = (s32)gpr.R(a).offset;
gpr.SetImmediate32(d, i - j); gpr.SetImmediate32(d, i - j);
if (inst.Rc)
ComputeRC(gpr.R(d));
if (inst.OE) if (inst.OE)
GenerateConstantOverflow((s64)i - (s64)j); GenerateConstantOverflow((s64)i - (s64)j);
} }
@ -837,10 +842,10 @@ void Jit64::subfx(UGeckoInstruction inst)
} }
if (inst.OE) if (inst.OE)
GenerateOverflow(); GenerateOverflow();
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
} }
if (inst.Rc)
ComputeRC(gpr.R(d), false);
gpr.UnlockAll();
} }
void Jit64::MultiplyImmediate(u32 imm, int a, int d, bool overflow) void Jit64::MultiplyImmediate(u32 imm, int a, int d, bool overflow)
@ -932,10 +937,8 @@ void Jit64::mullwx(UGeckoInstruction inst)
s32 i = (s32)gpr.R(a).offset, j = (s32)gpr.R(b).offset; s32 i = (s32)gpr.R(a).offset, j = (s32)gpr.R(b).offset;
gpr.SetImmediate32(d, i * j); gpr.SetImmediate32(d, i * j);
if (inst.OE) if (inst.OE)
{
GenerateConstantOverflow((s64)i * (s64)j); GenerateConstantOverflow((s64)i * (s64)j);
} }
}
else else
{ {
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
@ -960,15 +963,11 @@ void Jit64::mullwx(UGeckoInstruction inst)
IMUL(32, gpr.RX(d), gpr.R(a)); IMUL(32, gpr.RX(d), gpr.R(a));
} }
if (inst.OE) if (inst.OE)
{
GenerateOverflow(); GenerateOverflow();
} }
gpr.UnlockAll();
}
if (inst.Rc) if (inst.Rc)
{
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
} gpr.UnlockAll();
} }
void Jit64::mulhwXx(UGeckoInstruction inst) void Jit64::mulhwXx(UGeckoInstruction inst)
@ -997,13 +996,12 @@ void Jit64::mulhwXx(UGeckoInstruction inst)
IMUL(32, gpr.R(b)); IMUL(32, gpr.R(b));
else else
MUL(32, gpr.R(b)); MUL(32, gpr.R(b));
gpr.UnlockAll();
gpr.UnlockAllX();
MOV(32, gpr.R(d), R(EDX)); MOV(32, gpr.R(d), R(EDX));
} }
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d));
gpr.UnlockAll();
gpr.UnlockAllX();
} }
void Jit64::divwux(UGeckoInstruction inst) void Jit64::divwux(UGeckoInstruction inst)
@ -1018,19 +1016,15 @@ void Jit64::divwux(UGeckoInstruction inst)
{ {
gpr.SetImmediate32(d, 0); gpr.SetImmediate32(d, 0);
if (inst.OE) if (inst.OE)
{
GenerateConstantOverflow(true); GenerateConstantOverflow(true);
} }
}
else else
{ {
gpr.SetImmediate32(d, (u32)gpr.R(a).offset / (u32)gpr.R(b).offset); gpr.SetImmediate32(d, (u32)gpr.R(a).offset / (u32)gpr.R(b).offset);
if (inst.OE) if (inst.OE)
{
GenerateConstantOverflow(false); GenerateConstantOverflow(false);
} }
} }
}
else if (gpr.R(b).IsImm()) else if (gpr.R(b).IsImm())
{ {
u32 divisor = (u32)gpr.R(b).offset; u32 divisor = (u32)gpr.R(b).offset;
@ -1038,10 +1032,8 @@ void Jit64::divwux(UGeckoInstruction inst)
{ {
gpr.SetImmediate32(d, 0); gpr.SetImmediate32(d, 0);
if (inst.OE) if (inst.OE)
{
GenerateConstantOverflow(true); GenerateConstantOverflow(true);
} }
}
else else
{ {
u32 shift = 31; u32 shift = 31;
@ -1096,11 +1088,8 @@ void Jit64::divwux(UGeckoInstruction inst)
} }
} }
if (inst.OE) if (inst.OE)
{
GenerateConstantOverflow(false); GenerateConstantOverflow(false);
} }
gpr.UnlockAll();
}
} }
else else
{ {
@ -1128,16 +1117,13 @@ void Jit64::divwux(UGeckoInstruction inst)
GenerateConstantOverflow(false); GenerateConstantOverflow(false);
} }
SetJumpTarget(end); SetJumpTarget(end);
}
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
if (inst.Rc)
{
ComputeRC(gpr.R(d));
}
}
void Jit64::divwx(UGeckoInstruction inst) void Jit64::divwx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
@ -1151,19 +1137,15 @@ void Jit64::divwx(UGeckoInstruction inst)
{ {
gpr.SetImmediate32(d, (i >> 31) ^ j); gpr.SetImmediate32(d, (i >> 31) ^ j);
if (inst.OE) if (inst.OE)
{
GenerateConstantOverflow(true); GenerateConstantOverflow(true);
} }
}
else else
{ {
gpr.SetImmediate32(d, i / j); gpr.SetImmediate32(d, i / j);
if (inst.OE) if (inst.OE)
{
GenerateConstantOverflow(false); GenerateConstantOverflow(false);
} }
} }
}
else else
{ {
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
@ -1203,35 +1185,27 @@ void Jit64::divwx(UGeckoInstruction inst)
} }
SetJumpTarget(end1); SetJumpTarget(end1);
SetJumpTarget(end2); SetJumpTarget(end2);
}
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
if (inst.Rc)
{
ComputeRC(gpr.R(d));
}
}
void Jit64::addx(UGeckoInstruction inst) void Jit64::addx(UGeckoInstruction inst)
{ {
INSTRUCTION_START INSTRUCTION_START
JITDISABLE(bJITIntegerOff); JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, d = inst.RD; int a = inst.RA, b = inst.RB, d = inst.RD;
bool needs_test = false;
if (gpr.R(a).IsImm() && gpr.R(b).IsImm()) if (gpr.R(a).IsImm() && gpr.R(b).IsImm())
{ {
s32 i = (s32)gpr.R(a).offset, j = (s32)gpr.R(b).offset; s32 i = (s32)gpr.R(a).offset, j = (s32)gpr.R(b).offset;
gpr.SetImmediate32(d, i + j); gpr.SetImmediate32(d, i + j);
if (inst.Rc)
{
ComputeRC(gpr.R(d));
}
if (inst.OE) if (inst.OE)
{
GenerateConstantOverflow((s64)i + (s64)j); GenerateConstantOverflow((s64)i + (s64)j);
} }
}
else if ((d == a) || (d == b)) else if ((d == a) || (d == b))
{ {
int operand = ((d == a) ? b : a); int operand = ((d == a) ? b : a);
@ -1240,18 +1214,13 @@ void Jit64::addx(UGeckoInstruction inst)
ADD(32, gpr.R(d), gpr.R(operand)); ADD(32, gpr.R(d), gpr.R(operand));
if (inst.OE) if (inst.OE)
GenerateOverflow(); GenerateOverflow();
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
} }
else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && !inst.OE) else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg() && !inst.OE)
{ {
gpr.Lock(a, b, d); gpr.Lock(a, b, d);
gpr.BindToRegister(d, false); gpr.BindToRegister(d, false);
LEA(32, gpr.RX(d), MComplex(gpr.RX(a), gpr.RX(b), 1, 0)); LEA(32, gpr.RX(d), MComplex(gpr.RX(a), gpr.RX(b), 1, 0));
if (inst.Rc) needs_test = true;
ComputeRC(gpr.R(d));
gpr.UnlockAll();
} }
else else
{ {
@ -1261,10 +1230,10 @@ void Jit64::addx(UGeckoInstruction inst)
ADD(32, gpr.R(d), gpr.R(b)); ADD(32, gpr.R(d), gpr.R(b));
if (inst.OE) if (inst.OE)
GenerateOverflow(); GenerateOverflow();
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
} }
if (inst.Rc)
ComputeRC(gpr.R(d), needs_test);
gpr.UnlockAll();
} }
void Jit64::arithXex(UGeckoInstruction inst) void Jit64::arithXex(UGeckoInstruction inst)
@ -1316,7 +1285,7 @@ void Jit64::arithXex(UGeckoInstruction inst)
} }
FinalizeCarryOverflow(inst.OE, invertedCarry); FinalizeCarryOverflow(inst.OE, invertedCarry);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d), false);
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -1355,7 +1324,7 @@ void Jit64::arithcx(UGeckoInstruction inst)
FinalizeCarryOverflow(inst.OE, !add); FinalizeCarryOverflow(inst.OE, !add);
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(d)); ComputeRC(gpr.R(d), false);
gpr.UnlockAll(); gpr.UnlockAll();
} }
@ -1366,10 +1335,6 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
int a = inst.RA; int a = inst.RA;
int s = inst.RS; int s = inst.RS;
// rlwinm is commonly used as a branch test, second only to the more obvious cmpw.
// since it's almost never used with any check other than beq, only support beq for simplicity.
bool merge_branch = inst.Rc && CheckMergedBranch(0) && (js.next_inst.BI & 3) == 2;
if (gpr.R(s).IsImm()) if (gpr.R(s).IsImm())
{ {
u32 result = (int)gpr.R(s).offset; u32 result = (int)gpr.R(s).offset;
@ -1386,10 +1351,10 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
bool right_shift = inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH; bool right_shift = inst.SH && inst.ME == 31 && inst.MB == 32 - inst.SH;
u32 mask = Helper_Mask(inst.MB, inst.ME); u32 mask = Helper_Mask(inst.MB, inst.ME);
bool simple_mask = mask == 0xff || mask == 0xffff; bool simple_mask = mask == 0xff || mask == 0xffff;
// in case of a merged branch, track whether or not we've set flags. // In case of a merged branch, track whether or not we've set flags.
// if not, we need to do a TEST later to get them. // If not, we need to do a test later to get them.
bool needs_test = false; bool needs_test = true;
// if we know the high bit can't be set, we can avoid doing a sign extend for flag storage // If we know the high bit can't be set, we can avoid doing a sign extend for flag storage.
bool needs_sext = true; bool needs_sext = true;
int mask_size = inst.ME - inst.MB + 1; int mask_size = inst.ME - inst.MB + 1;
@ -1398,13 +1363,11 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
if (a != s && left_shift && gpr.R(s).IsSimpleReg() && inst.SH <= 3) if (a != s && left_shift && gpr.R(s).IsSimpleReg() && inst.SH <= 3)
{ {
LEA(32, gpr.RX(a), MScaled(gpr.RX(s), SCALE_1 << inst.SH, 0)); LEA(32, gpr.RX(a), MScaled(gpr.RX(s), SCALE_1 << inst.SH, 0));
needs_test = true;
} }
// common optimized case: byte/word extract // common optimized case: byte/word extract
else if (simple_mask && !(inst.SH & (mask_size - 1))) else if (simple_mask && !(inst.SH & (mask_size - 1)))
{ {
MOVZX(32, mask_size, gpr.RX(a), ExtractFromReg(s, inst.SH ? (32 - inst.SH) >> 3 : 0)); MOVZX(32, mask_size, gpr.RX(a), ExtractFromReg(s, inst.SH ? (32 - inst.SH) >> 3 : 0));
needs_test = true;
needs_sext = false; needs_sext = false;
} }
// another optimized special case: byte/word extract plus shift // another optimized special case: byte/word extract plus shift
@ -1436,55 +1399,17 @@ void Jit64::rlwinmx(UGeckoInstruction inst)
if (!(inst.MB == 0 && inst.ME == 31)) if (!(inst.MB == 0 && inst.ME == 31))
{ {
// we need flags if we're merging the branch // we need flags if we're merging the branch
if (merge_branch) if (inst.Rc && CheckMergedBranch(0))
AND(32, gpr.R(a), Imm32(mask)); AND(32, gpr.R(a), Imm32(mask));
else else
AndWithMask(gpr.RX(a), mask); AndWithMask(gpr.RX(a), mask);
needs_sext = inst.MB == 0; needs_sext = inst.MB == 0;
} needs_test = false;
else
{
needs_test = true;
} }
} }
} }
if (merge_branch) if (inst.Rc)
{ ComputeRC(gpr.R(a), needs_test, needs_sext);
js.downcountAmount++;
js.skipnext = true;
if (needs_sext)
{
MOVSX(64, 32, RSCRATCH, gpr.R(a));
MOV(64, M(&PowerPC::ppcState.cr_val[0]), R(RSCRATCH));
}
else
{
MOV(64, M(&PowerPC::ppcState.cr_val[0]), gpr.R(a));
}
if (needs_test)
TEST(32, gpr.R(a), gpr.R(a));
gpr.UnlockAll();
FixupBranch dont_branch = J_CC((js.next_inst.BO & BO_BRANCH_IF_TRUE) ? CC_NE : CC_E, true);
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
DoMergedBranch();
SetJumpTarget(dont_branch);
if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE))
{
gpr.Flush();
fpr.Flush();
WriteExit(js.next_compilerPC + 4);
}
}
else if (inst.Rc)
{
ComputeRC(gpr.R(a));
}
gpr.UnlockAll(); gpr.UnlockAll();
} }
} }
@ -1508,9 +1433,10 @@ void Jit64::rlwimix(UGeckoInstruction inst)
{ {
gpr.Lock(a, s); gpr.Lock(a, s);
u32 mask = Helper_Mask(inst.MB, inst.ME); u32 mask = Helper_Mask(inst.MB, inst.ME);
bool needs_test = false;
if (mask == 0 || (a == s && inst.SH == 0)) if (mask == 0 || (a == s && inst.SH == 0))
{ {
// nothing to do needs_test = true;
} }
else if (mask == 0xFFFFFFFF) else if (mask == 0xFFFFFFFF)
{ {
@ -1519,6 +1445,7 @@ void Jit64::rlwimix(UGeckoInstruction inst)
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
if (inst.SH) if (inst.SH)
ROL(32, gpr.R(a), Imm8(inst.SH)); ROL(32, gpr.R(a), Imm8(inst.SH));
needs_test = true;
} }
else if(gpr.R(s).IsImm()) else if(gpr.R(s).IsImm())
{ {
@ -1584,7 +1511,7 @@ void Jit64::rlwimix(UGeckoInstruction inst)
XOR(32, gpr.R(a), gpr.R(s)); XOR(32, gpr.R(a), gpr.R(s));
} }
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a), needs_test);
gpr.UnlockAll(); gpr.UnlockAll();
} }
} }
@ -1599,10 +1526,6 @@ void Jit64::rlwnmx(UGeckoInstruction inst)
if (gpr.R(b).IsImm() && gpr.R(s).IsImm()) if (gpr.R(b).IsImm() && gpr.R(s).IsImm())
{ {
gpr.SetImmediate32(a, _rotl((u32)gpr.R(s).offset, (u32)gpr.R(b).offset & 0x1F) & mask); gpr.SetImmediate32(a, _rotl((u32)gpr.R(s).offset, (u32)gpr.R(b).offset & 0x1F) & mask);
if (inst.Rc)
{
ComputeRC(gpr.R(a));
}
} }
else else
{ {
@ -1616,13 +1539,17 @@ void Jit64::rlwnmx(UGeckoInstruction inst)
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
} }
ROL(32, gpr.R(a), R(ECX)); ROL(32, gpr.R(a), R(ECX));
// we need flags if we're merging the branch
if (inst.Rc && CheckMergedBranch(0))
AND(32, gpr.R(a), Imm32(mask));
else
AndWithMask(gpr.RX(a), mask); AndWithMask(gpr.RX(a), mask);
}
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a), false);
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
}
void Jit64::negx(UGeckoInstruction inst) void Jit64::negx(UGeckoInstruction inst)
{ {
@ -1634,16 +1561,9 @@ void Jit64::negx(UGeckoInstruction inst)
if (gpr.R(a).IsImm()) if (gpr.R(a).IsImm())
{ {
gpr.SetImmediate32(d, ~((u32)gpr.R(a).offset) + 1); gpr.SetImmediate32(d, ~((u32)gpr.R(a).offset) + 1);
if (inst.Rc)
{
ComputeRC(gpr.R(d));
}
if (inst.OE) if (inst.OE)
{
GenerateConstantOverflow(gpr.R(d).offset == 0x80000000); GenerateConstantOverflow(gpr.R(d).offset == 0x80000000);
} }
}
else else
{ {
gpr.Lock(a, d); gpr.Lock(a, d);
@ -1653,10 +1573,10 @@ void Jit64::negx(UGeckoInstruction inst)
NEG(32, gpr.R(d)); NEG(32, gpr.R(d));
if (inst.OE) if (inst.OE)
GenerateOverflow(); GenerateOverflow();
if (inst.Rc)
ComputeRC(gpr.R(d));
gpr.UnlockAll();
} }
if (inst.Rc)
ComputeRC(gpr.R(d), false);
gpr.UnlockAll();
} }
void Jit64::srwx(UGeckoInstruction inst) void Jit64::srwx(UGeckoInstruction inst)
@ -1684,15 +1604,13 @@ void Jit64::srwx(UGeckoInstruction inst)
MOV(32, gpr.R(a), gpr.R(s)); MOV(32, gpr.R(a), gpr.R(s));
} }
SHR(64, gpr.R(a), R(ECX)); SHR(64, gpr.R(a), R(ECX));
}
// Shift of 0 doesn't update flags, so we need to test just in case
if (inst.Rc)
ComputeRC(gpr.R(a));
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
// Shift of 0 doesn't update flags, so compare manually just in case
if (inst.Rc)
{
ComputeRC(gpr.R(a));
}
}
void Jit64::slwx(UGeckoInstruction inst) void Jit64::slwx(UGeckoInstruction inst)
{ {
@ -1707,10 +1625,8 @@ void Jit64::slwx(UGeckoInstruction inst)
u32 amount = (u32)gpr.R(b).offset; u32 amount = (u32)gpr.R(b).offset;
gpr.SetImmediate32(a, (amount & 0x20) ? 0 : (u32)gpr.R(s).offset << amount); gpr.SetImmediate32(a, (amount & 0x20) ? 0 : (u32)gpr.R(s).offset << amount);
if (inst.Rc) if (inst.Rc)
{
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a));
} }
}
else else
{ {
// no register choice // no register choice
@ -1724,11 +1640,11 @@ void Jit64::slwx(UGeckoInstruction inst)
if (inst.Rc) if (inst.Rc)
{ {
AND(32, gpr.R(a), gpr.R(a)); AND(32, gpr.R(a), gpr.R(a));
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a), false);
} }
else else
{ {
MOVZX(64, 32, gpr.R(a).GetSimpleReg(), gpr.R(a)); MOVZX(64, 32, gpr.RX(a), gpr.R(a));
} }
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
@ -1743,6 +1659,7 @@ void Jit64::srawx(UGeckoInstruction inst)
int a = inst.RA; int a = inst.RA;
int b = inst.RB; int b = inst.RB;
int s = inst.RS; int s = inst.RS;
gpr.FlushLockX(ECX); gpr.FlushLockX(ECX);
gpr.Lock(a, s, b); gpr.Lock(a, s, b);
gpr.BindToRegister(a, (a == s || a == b), true); gpr.BindToRegister(a, (a == s || a == b), true);
@ -1762,10 +1679,10 @@ void Jit64::srawx(UGeckoInstruction inst)
SHR(64, gpr.R(a), Imm8(32)); SHR(64, gpr.R(a), Imm8(32));
} }
FinalizeCarry(CC_NZ); FinalizeCarry(CC_NZ);
gpr.UnlockAll();
gpr.UnlockAllX();
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a));
gpr.UnlockAll();
gpr.UnlockAllX();
} }
void Jit64::srawix(UGeckoInstruction inst) void Jit64::srawix(UGeckoInstruction inst)
@ -1775,6 +1692,7 @@ void Jit64::srawix(UGeckoInstruction inst)
int a = inst.RA; int a = inst.RA;
int s = inst.RS; int s = inst.RS;
int amount = inst.SH; int amount = inst.SH;
if (amount != 0) if (amount != 0)
{ {
gpr.Lock(a, s); gpr.Lock(a, s);
@ -1828,6 +1746,7 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff); JITDISABLE(bJITIntegerOff);
int a = inst.RA; int a = inst.RA;
int s = inst.RS; int s = inst.RS;
bool needs_test = false;
if (gpr.R(s).IsImm()) if (gpr.R(s).IsImm())
{ {
@ -1847,6 +1766,7 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
if (cpu_info.bLZCNT) if (cpu_info.bLZCNT)
{ {
LZCNT(32, gpr.RX(a), gpr.R(s)); LZCNT(32, gpr.RX(a), gpr.R(s));
needs_test = true;
} }
else else
{ {
@ -1856,11 +1776,11 @@ void Jit64::cntlzwx(UGeckoInstruction inst)
SetJumpTarget(gotone); SetJumpTarget(gotone);
XOR(32, gpr.R(a), Imm8(0x1f)); // flip order XOR(32, gpr.R(a), Imm8(0x1f)); // flip order
} }
gpr.UnlockAll();
} }
if (inst.Rc) if (inst.Rc)
ComputeRC(gpr.R(a)); ComputeRC(gpr.R(a), needs_test, false);
gpr.UnlockAll();
} }
void Jit64::twx(UGeckoInstruction inst) void Jit64::twx(UGeckoInstruction inst)

View File

@ -407,11 +407,6 @@ static bool isCmp(const CodeOp& a)
return (a.inst.OPCD == 10 || a.inst.OPCD == 11) || (a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32)); return (a.inst.OPCD == 10 || a.inst.OPCD == 11) || (a.inst.OPCD == 31 && (a.inst.SUBOP10 == 0 || a.inst.SUBOP10 == 32));
} }
static bool isRlwinm_rc(const CodeOp& a)
{
return a.inst.OPCD == 21 && a.inst.Rc;
}
static bool isCarryOp(const CodeOp& a) static bool isCarryOp(const CodeOp& a)
{ {
return (a.opinfo->flags & FL_SET_CA) && !(a.opinfo->flags & FL_SET_OE) && a.opinfo->type == OPTYPE_INTEGER; return (a.opinfo->flags & FL_SET_CA) && !(a.opinfo->flags & FL_SET_OE) && a.opinfo->type == OPTYPE_INTEGER;
@ -437,7 +432,7 @@ void PPCAnalyzer::ReorderInstructionsCore(u32 instructions, CodeOp* code, bool r
CodeOp &b = code[i + increment]; CodeOp &b = code[i + increment];
// Reorder integer compares, rlwinm., and carry-affecting ops // Reorder integer compares, rlwinm., and carry-affecting ops
// (if we add more merged branch instructions, add them here!) // (if we add more merged branch instructions, add them here!)
if ((type == REORDER_CARRY && isCarryOp(a)) || (type == REORDER_CMP && (isCmp(a) || isRlwinm_rc(a)))) if ((type == REORDER_CARRY && isCarryOp(a)) || (type == REORDER_CMP && (isCmp(a) || a.outputCR0)))
{ {
// once we're next to a carry instruction, don't move away! // once we're next to a carry instruction, don't move away!
if (type == REORDER_CARRY && i != start) if (type == REORDER_CARRY && i != start)
@ -469,8 +464,8 @@ void PPCAnalyzer::ReorderInstructions(u32 instructions, CodeOp *code)
// to get pairs like addc/adde next to each other. // to get pairs like addc/adde next to each other.
if (HasOption(OPTION_CARRY_MERGE)) if (HasOption(OPTION_CARRY_MERGE))
{ {
ReorderInstructionsCore(instructions, code, true, REORDER_CARRY);
ReorderInstructionsCore(instructions, code, false, REORDER_CARRY); ReorderInstructionsCore(instructions, code, false, REORDER_CARRY);
ReorderInstructionsCore(instructions, code, true, REORDER_CARRY);
} }
if (HasOption(OPTION_BRANCH_MERGE)) if (HasOption(OPTION_BRANCH_MERGE))
ReorderInstructionsCore(instructions, code, false, REORDER_CMP); ReorderInstructionsCore(instructions, code, false, REORDER_CMP);