Merge pull request #4400 from degasus/master

JitArm64: Store the carry flag within the host flag.
This commit is contained in:
Markus Wick 2016-11-01 23:29:50 +01:00 committed by GitHub
commit 24fa4c9e57
6 changed files with 149 additions and 50 deletions

View File

@ -54,6 +54,7 @@ void JitArm64::Init()
code_block.m_gpa = &js.gpa; code_block.m_gpa = &js.gpa;
code_block.m_fpa = &js.fpa; code_block.m_fpa = &js.fpa;
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE);
analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE);
m_supports_cycle_counter = HasCycleCounters(); m_supports_cycle_counter = HasCycleCounters();
} }
@ -79,6 +80,7 @@ void JitArm64::Shutdown()
void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) void JitArm64::FallBackToInterpreter(UGeckoInstruction inst)
{ {
FlushCarry();
gpr.Flush(FlushMode::FLUSH_ALL, js.op); gpr.Flush(FlushMode::FLUSH_ALL, js.op);
fpr.Flush(FlushMode::FLUSH_ALL, js.op); fpr.Flush(FlushMode::FLUSH_ALL, js.op);
@ -419,6 +421,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
js.downcountAmount = 0; js.downcountAmount = 0;
js.skipInstructions = 0; js.skipInstructions = 0;
js.curBlock = b; js.curBlock = b;
js.carryFlagSet = false;
PPCAnalyst::CodeOp* ops = code_buf->codebuffer; PPCAnalyst::CodeOp* ops = code_buf->codebuffer;
@ -603,6 +606,8 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB
} }
JitArm64Tables::CompileInstruction(ops[i]); JitArm64Tables::CompileInstruction(ops[i]);
if (!MergeAllowedNextInstructions(1) || js.op[1].opinfo->type != OPTYPE_INTEGER)
FlushCarry();
// If we have a register that will never be used again, flush it. // If we have a register that will never be used again, flush it.
gpr.StoreRegisters(~ops[i].gprInUse); gpr.StoreRegisters(~ops[i].gprInUse);

View File

@ -241,6 +241,7 @@ private:
void ComputeRC(u64 imm, int crf = 0, bool needs_sext = true); void ComputeRC(u64 imm, int crf = 0, bool needs_sext = true);
void ComputeCarry(bool Carry); void ComputeCarry(bool Carry);
void ComputeCarry(); void ComputeCarry();
void FlushCarry();
void reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32), void reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32),
void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, u64, ARM64Reg), bool Rc = false); void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, u64, ARM64Reg), bool Rc = false);

View File

@ -313,8 +313,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
} }
SetJumpTarget(continue1); SetJumpTarget(continue1);
STR(INDEX_UNSIGNED, XA, PPC_REG, STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf));
gpr.Unlock(WA); gpr.Unlock(WA);
} }

View File

@ -49,6 +49,8 @@ void JitArm64::ComputeRC(u64 imm, int crf, bool needs_sext)
void JitArm64::ComputeCarry(bool Carry) void JitArm64::ComputeCarry(bool Carry)
{ {
js.carryFlagSet = false;
if (!js.op->wantsCA) if (!js.op->wantsCA)
return; return;
@ -66,13 +68,31 @@ void JitArm64::ComputeCarry(bool Carry)
void JitArm64::ComputeCarry() void JitArm64::ComputeCarry()
{ {
js.carryFlagSet = false;
if (!js.op->wantsCA) if (!js.op->wantsCA)
return; return;
js.carryFlagSet = true;
if (MergeAllowedNextInstructions(1) && js.op[1].opinfo->type == OPTYPE_INTEGER)
{
return;
}
FlushCarry();
}
void JitArm64::FlushCarry()
{
if (!js.carryFlagSet)
return;
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
CSINC(WA, WSP, WSP, CC_CC); CSINC(WA, WSP, WSP, CC_CC);
STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
gpr.Unlock(WA); gpr.Unlock(WA);
js.carryFlagSet = false;
} }
void JitArm64::reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32), void JitArm64::reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32),
@ -403,8 +423,7 @@ void JitArm64::cmp(UGeckoInstruction inst)
SXTW(XB, RB); SXTW(XB, RB);
SUB(XA, XA, XB); SUB(XA, XA, XB);
STR(INDEX_UNSIGNED, XA, PPC_REG, STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf));
gpr.Unlock(WA, WB); gpr.Unlock(WA, WB);
} }
@ -431,8 +450,7 @@ void JitArm64::cmpl(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XA = EncodeRegTo64(WA);
SUB(XA, EncodeRegTo64(gpr.R(a)), EncodeRegTo64(gpr.R(b))); SUB(XA, EncodeRegTo64(gpr.R(a)), EncodeRegTo64(gpr.R(b)));
STR(INDEX_UNSIGNED, XA, PPC_REG, STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf));
gpr.Unlock(WA); gpr.Unlock(WA);
} }
@ -482,8 +500,7 @@ void JitArm64::cmpli(UGeckoInstruction inst)
SUBI2R(XA, EncodeRegTo64(gpr.R(a)), inst.UIMM, XA); SUBI2R(XA, EncodeRegTo64(gpr.R(a)), inst.UIMM, XA);
STR(INDEX_UNSIGNED, XA, PPC_REG, STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf]));
PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf));
gpr.Unlock(WA); gpr.Unlock(WA);
} }
@ -559,6 +576,7 @@ void JitArm64::srawix(UGeckoInstruction inst)
int a = inst.RA; int a = inst.RA;
int s = inst.RS; int s = inst.RS;
int amount = inst.SH; int amount = inst.SH;
bool inplace_carry = MergeAllowedNextInstructions(1) && js.op[1].wantsCAInFlags;
if (gpr.IsImm(s)) if (gpr.IsImm(s))
{ {
@ -570,30 +588,54 @@ void JitArm64::srawix(UGeckoInstruction inst)
else else
ComputeCarry(false); ComputeCarry(false);
} }
else if (amount != 0) else if (amount == 0)
{ {
gpr.BindToRegister(a, a == s); gpr.BindToRegister(a, a == s);
ARM64Reg RA = gpr.R(a); ARM64Reg RA = gpr.R(a);
ARM64Reg RS = gpr.R(s); ARM64Reg RS = gpr.R(s);
ARM64Reg WA = gpr.GetReg(); MOV(RA, RS);
ComputeCarry(false);
ORR(WA, WSP, RS, ArithOption(RS, ST_LSL, 32 - amount));
ORR(RA, WSP, RS, ArithOption(RS, ST_ASR, amount));
if (inst.Rc)
ComputeRC(RA, 0);
ANDS(WSP, WA, RA);
CSINC(WA, WSP, WSP, CC_EQ);
STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
gpr.Unlock(WA);
} }
else else
{ {
gpr.BindToRegister(a, a == s); gpr.BindToRegister(a, a == s);
ARM64Reg RA = gpr.R(a); ARM64Reg RA = gpr.R(a);
ARM64Reg RS = gpr.R(s); ARM64Reg RS = gpr.R(s);
MOV(RA, RS);
STRB(INDEX_UNSIGNED, WSP, PPC_REG, PPCSTATE_OFF(xer_ca)); if (js.op->wantsCA)
{
ARM64Reg WA = gpr.GetReg();
ARM64Reg dest = inplace_carry ? WA : WSP;
if (a != s)
{
ASR(RA, RS, amount);
ANDS(dest, RA, RS, ArithOption(RS, ST_LSL, 32 - amount));
}
else
{
LSL(WA, RS, 32 - amount);
ASR(RA, RS, amount);
ANDS(dest, WA, RA);
}
if (inplace_carry)
{
CMP(dest, 1);
ComputeCarry();
}
else
{
CSINC(WA, WSP, WSP, CC_EQ);
STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
}
gpr.Unlock(WA);
}
else
{
ASR(RA, RS, amount);
}
if (inst.Rc)
ComputeRC(RA, 0);
} }
} }
@ -734,7 +776,12 @@ void JitArm64::addzex(UGeckoInstruction inst)
int a = inst.RA, d = inst.RD; int a = inst.RA, d = inst.RD;
if (d == a) if (js.carryFlagSet)
{
gpr.BindToRegister(d, d == a);
ADCS(gpr.R(d), gpr.R(a), WZR);
}
else if (d == a)
{ {
gpr.BindToRegister(d, true); gpr.BindToRegister(d, true);
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
@ -792,8 +839,16 @@ void JitArm64::subfex(UGeckoInstruction inst)
gpr.BindToRegister(d, false); gpr.BindToRegister(d, false);
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); if (js.carryFlagSet)
ADDI2R(gpr.R(d), WA, ~i + j, gpr.R(d)); {
MOVI2R(WA, ~i + j, gpr.R(d));
ADC(gpr.R(d), WA, WZR);
}
else
{
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
ADDI2R(gpr.R(d), WA, ~i + j, gpr.R(d));
}
gpr.Unlock(WA); gpr.Unlock(WA);
bool must_have_carry = Interpreter::Helper_Carry(~i, j); bool must_have_carry = Interpreter::Helper_Carry(~i, j);
@ -818,8 +873,11 @@ void JitArm64::subfex(UGeckoInstruction inst)
gpr.BindToRegister(d, d == a || d == b); gpr.BindToRegister(d, d == a || d == b);
// upload the carry state // upload the carry state
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); if (!js.carryFlagSet)
CMP(WA, 1); {
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
CMP(WA, 1);
}
// d = ~a + b + carry; // d = ~a + b + carry;
if (gpr.IsImm(a)) if (gpr.IsImm(a))
@ -879,11 +937,19 @@ void JitArm64::subfzex(UGeckoInstruction inst)
gpr.BindToRegister(d, d == a); gpr.BindToRegister(d, d == a);
ARM64Reg WA = gpr.GetReg(); if (js.carryFlagSet)
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); {
MVN(gpr.R(d), gpr.R(a)); MVN(gpr.R(d), gpr.R(a));
ADDS(gpr.R(d), gpr.R(d), WA); ADCS(gpr.R(d), gpr.R(d), WZR);
gpr.Unlock(WA); }
else
{
ARM64Reg WA = gpr.GetReg();
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
MVN(gpr.R(d), gpr.R(a));
ADDS(gpr.R(d), gpr.R(d), WA);
gpr.Unlock(WA);
}
ComputeCarry(); ComputeCarry();
@ -934,8 +1000,16 @@ void JitArm64::addex(UGeckoInstruction inst)
gpr.BindToRegister(d, false); gpr.BindToRegister(d, false);
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); if (js.carryFlagSet)
ADDI2R(gpr.R(d), WA, i + j, gpr.R(d)); {
MOVI2R(WA, i + j);
ADC(gpr.R(d), WA, WZR);
}
else
{
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
ADDI2R(gpr.R(d), WA, i + j, gpr.R(d));
}
gpr.Unlock(WA); gpr.Unlock(WA);
bool must_have_carry = Interpreter::Helper_Carry(i, j); bool must_have_carry = Interpreter::Helper_Carry(i, j);
@ -959,10 +1033,13 @@ void JitArm64::addex(UGeckoInstruction inst)
gpr.BindToRegister(d, d == a || d == b); gpr.BindToRegister(d, d == a || d == b);
// upload the carry state // upload the carry state
ARM64Reg WA = gpr.GetReg(); if (!js.carryFlagSet)
LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); {
CMP(WA, 1); ARM64Reg WA = gpr.GetReg();
gpr.Unlock(WA); LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
CMP(WA, 1);
gpr.Unlock(WA);
}
// d = a + b + carry; // d = a + b + carry;
ADCS(gpr.R(d), gpr.R(a), gpr.R(b)); ADCS(gpr.R(d), gpr.R(a), gpr.R(b));
@ -1076,6 +1153,8 @@ void JitArm64::divwx(UGeckoInstruction inst)
} }
else else
{ {
FlushCarry();
gpr.BindToRegister(d, d == a || d == b); gpr.BindToRegister(d, d == a || d == b);
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
@ -1205,6 +1284,7 @@ void JitArm64::srawx(UGeckoInstruction inst)
JITDISABLE(bJITIntegerOff); JITDISABLE(bJITIntegerOff);
int a = inst.RA, b = inst.RB, s = inst.RS; int a = inst.RA, b = inst.RB, s = inst.RS;
bool inplace_carry = MergeAllowedNextInstructions(1) && js.op[1].wantsCAInFlags;
if (gpr.IsImm(b) && gpr.IsImm(s)) if (gpr.IsImm(b) && gpr.IsImm(s))
{ {
@ -1225,10 +1305,16 @@ void JitArm64::srawx(UGeckoInstruction inst)
ComputeRC(gpr.GetImm(a), 0); ComputeRC(gpr.GetImm(a), 0);
return; return;
} }
else if (gpr.IsImm(b) && (gpr.GetImm(b) & 0x20) == 0 && !js.op->wantsCA)
if (gpr.IsImm(b) && !js.op->wantsCA)
{ {
int amount = gpr.GetImm(b);
if (amount & 0x20)
amount = 0x1F;
else
amount &= 0x1F;
gpr.BindToRegister(a, a == s); gpr.BindToRegister(a, a == s);
ASR(gpr.R(a), gpr.R(a), gpr.GetImm(b) & 0x1F); ASR(gpr.R(a), gpr.R(s), amount);
} }
else if (!js.op->wantsCA) else if (!js.op->wantsCA)
{ {
@ -1276,7 +1362,15 @@ void JitArm64::srawx(UGeckoInstruction inst)
SetJumpTarget(end); SetJumpTarget(end);
MOV(gpr.R(a), WB); MOV(gpr.R(a), WB);
STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); if (inplace_carry)
{
CMP(WA, 1);
ComputeCarry();
}
else
{
STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca));
}
gpr.Unlock(WA, WB, WC); gpr.Unlock(WA, WB, WC);
} }

View File

@ -417,7 +417,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XA = EncodeRegTo64(WA);
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
switch (bit) switch (bit)
{ {
case CR_SO_BIT: case CR_SO_BIT:
@ -436,7 +436,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
AND(XA, XA, 64 - 63, 62, true); // XA & ~(1<<62) AND(XA, XA, 64 - 63, 62, true); // XA & ~(1<<62)
break; break;
} }
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
gpr.Unlock(WA); gpr.Unlock(WA);
return; return;
} }
@ -450,7 +450,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
ARM64Reg WA = gpr.GetReg(); ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA); ARM64Reg XA = EncodeRegTo64(WA);
LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
if (bit != CR_GT_BIT) if (bit != CR_GT_BIT)
{ {
@ -483,7 +483,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
ORR(XA, XA, 32, 0, true); // XA | 1<<32 ORR(XA, XA, 32, 0, true); // XA | 1<<32
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field]));
gpr.Unlock(WA); gpr.Unlock(WA);
return; return;
} }
@ -509,7 +509,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
ARM64Reg WC = gpr.GetReg(); ARM64Reg WC = gpr.GetReg();
ARM64Reg XC = EncodeRegTo64(WC); ARM64Reg XC = EncodeRegTo64(WC);
LDR(INDEX_UNSIGNED, XC, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); LDR(INDEX_UNSIGNED, XC, PPC_REG, PPCSTATE_OFF(cr_val[field]));
switch (bit) switch (bit)
{ {
case CR_SO_BIT: // check bit 61 set case CR_SO_BIT: // check bit 61 set
@ -565,7 +565,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
int field = inst.CRBD >> 2; int field = inst.CRBD >> 2;
int bit = 3 - (inst.CRBD & 3); int bit = 3 - (inst.CRBD & 3);
LDR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); LDR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[field]));
// Gross but necessary; if the input is totally zero and we set SO or LT, // Gross but necessary; if the input is totally zero and we set SO or LT,
// or even just add the (1<<32), GT will suddenly end up set without us // or even just add the (1<<32), GT will suddenly end up set without us
@ -603,7 +603,7 @@ void JitArm64::crXXX(UGeckoInstruction inst)
} }
ORR(XA, XA, 32, 0, true); // XA | 1<<32 ORR(XA, XA, 32, 0, true); // XA | 1<<32
STR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); STR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[field]));
gpr.Unlock(WA); gpr.Unlock(WA);
gpr.Unlock(WB); gpr.Unlock(WB);
@ -653,7 +653,7 @@ void JitArm64::mtcrf(UGeckoInstruction inst)
} }
LDR(XA, XB, ArithOption(XA, true)); LDR(XA, XB, ArithOption(XA, true));
STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * i); STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[i]));
} }
} }
gpr.Unlock(WA, WB); gpr.Unlock(WA, WB);

View File

@ -629,7 +629,7 @@ void JitArm64::GenMfcr()
const u8* start = GetCodePtr(); const u8* start = GetCodePtr();
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++)
{ {
LDR(INDEX_UNSIGNED, X1, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * i); LDR(INDEX_UNSIGNED, X1, PPC_REG, PPCSTATE_OFF(cr_val[i]));
// SO // SO
if (i == 0) if (i == 0)