diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp index 977dda4867..3aec0032ae 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.cpp @@ -54,6 +54,7 @@ void JitArm64::Init() code_block.m_gpa = &js.gpa; code_block.m_fpa = &js.fpa; analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); + analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CARRY_MERGE); m_supports_cycle_counter = HasCycleCounters(); } @@ -79,6 +80,7 @@ void JitArm64::Shutdown() void JitArm64::FallBackToInterpreter(UGeckoInstruction inst) { + FlushCarry(); gpr.Flush(FlushMode::FLUSH_ALL, js.op); fpr.Flush(FlushMode::FLUSH_ALL, js.op); @@ -419,6 +421,7 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB js.downcountAmount = 0; js.skipInstructions = 0; js.curBlock = b; + js.carryFlagSet = false; PPCAnalyst::CodeOp* ops = code_buf->codebuffer; @@ -603,6 +606,8 @@ const u8* JitArm64::DoJit(u32 em_address, PPCAnalyst::CodeBuffer* code_buf, JitB } JitArm64Tables::CompileInstruction(ops[i]); + if (!MergeAllowedNextInstructions(1) || js.op[1].opinfo->type != OPTYPE_INTEGER) + FlushCarry(); // If we have a register that will never be used again, flush it. gpr.StoreRegisters(~ops[i].gprInUse); diff --git a/Source/Core/Core/PowerPC/JitArm64/Jit.h b/Source/Core/Core/PowerPC/JitArm64/Jit.h index 60746b580a..3393d12b0d 100644 --- a/Source/Core/Core/PowerPC/JitArm64/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm64/Jit.h @@ -241,6 +241,7 @@ private: void ComputeRC(u64 imm, int crf = 0, bool needs_sext = true); void ComputeCarry(bool Carry); void ComputeCarry(); + void FlushCarry(); void reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32), void (ARM64XEmitter::*op)(ARM64Reg, ARM64Reg, u64, ARM64Reg), bool Rc = false); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp index 3ea2abdaf1..79ad36faab 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp @@ -313,8 +313,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst) } SetJumpTarget(continue1); - STR(INDEX_UNSIGNED, XA, PPC_REG, - PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf)); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf])); gpr.Unlock(WA); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp index 4882f82176..124f354184 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp @@ -49,6 +49,8 @@ void JitArm64::ComputeRC(u64 imm, int crf, bool needs_sext) void JitArm64::ComputeCarry(bool Carry) { + js.carryFlagSet = false; + if (!js.op->wantsCA) return; @@ -66,13 +68,31 @@ void JitArm64::ComputeCarry(bool Carry) void JitArm64::ComputeCarry() { + js.carryFlagSet = false; + if (!js.op->wantsCA) return; + js.carryFlagSet = true; + if (MergeAllowedNextInstructions(1) && js.op[1].opinfo->type == OPTYPE_INTEGER) + { + return; + } + + FlushCarry(); +} + +void JitArm64::FlushCarry() +{ + if (!js.carryFlagSet) + return; + ARM64Reg WA = gpr.GetReg(); CSINC(WA, WSP, WSP, CC_CC); STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); gpr.Unlock(WA); + + js.carryFlagSet = false; } void JitArm64::reg_imm(u32 d, u32 a, u32 value, u32 (*do_op)(u32, u32), @@ -403,8 +423,7 @@ void JitArm64::cmp(UGeckoInstruction inst) SXTW(XB, RB); SUB(XA, XA, XB); - STR(INDEX_UNSIGNED, XA, PPC_REG, - PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf)); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf])); gpr.Unlock(WA, WB); } @@ -431,8 +450,7 @@ void JitArm64::cmpl(UGeckoInstruction inst) ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); SUB(XA, EncodeRegTo64(gpr.R(a)), EncodeRegTo64(gpr.R(b))); - STR(INDEX_UNSIGNED, XA, PPC_REG, - PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf)); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf])); gpr.Unlock(WA); } @@ -482,8 +500,7 @@ void JitArm64::cmpli(UGeckoInstruction inst) SUBI2R(XA, EncodeRegTo64(gpr.R(a)), inst.UIMM, XA); - STR(INDEX_UNSIGNED, XA, PPC_REG, - PPCSTATE_OFF(cr_val[0]) + (sizeof(PowerPC::ppcState.cr_val[0]) * crf)); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[crf])); gpr.Unlock(WA); } @@ -559,6 +576,7 @@ void JitArm64::srawix(UGeckoInstruction inst) int a = inst.RA; int s = inst.RS; int amount = inst.SH; + bool inplace_carry = MergeAllowedNextInstructions(1) && js.op[1].wantsCAInFlags; if (gpr.IsImm(s)) { @@ -570,30 +588,54 @@ void JitArm64::srawix(UGeckoInstruction inst) else ComputeCarry(false); } - else if (amount != 0) + else if (amount == 0) { gpr.BindToRegister(a, a == s); ARM64Reg RA = gpr.R(a); ARM64Reg RS = gpr.R(s); - ARM64Reg WA = gpr.GetReg(); - - ORR(WA, WSP, RS, ArithOption(RS, ST_LSL, 32 - amount)); - ORR(RA, WSP, RS, ArithOption(RS, ST_ASR, amount)); - if (inst.Rc) - ComputeRC(RA, 0); - - ANDS(WSP, WA, RA); - CSINC(WA, WSP, WSP, CC_EQ); - STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - gpr.Unlock(WA); + MOV(RA, RS); + ComputeCarry(false); } else { gpr.BindToRegister(a, a == s); ARM64Reg RA = gpr.R(a); ARM64Reg RS = gpr.R(s); - MOV(RA, RS); - STRB(INDEX_UNSIGNED, WSP, PPC_REG, PPCSTATE_OFF(xer_ca)); + + if (js.op->wantsCA) + { + ARM64Reg WA = gpr.GetReg(); + ARM64Reg dest = inplace_carry ? WA : WSP; + if (a != s) + { + ASR(RA, RS, amount); + ANDS(dest, RA, RS, ArithOption(RS, ST_LSL, 32 - amount)); + } + else + { + LSL(WA, RS, 32 - amount); + ASR(RA, RS, amount); + ANDS(dest, WA, RA); + } + if (inplace_carry) + { + CMP(dest, 1); + ComputeCarry(); + } + else + { + CSINC(WA, WSP, WSP, CC_EQ); + STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); + } + gpr.Unlock(WA); + } + else + { + ASR(RA, RS, amount); + } + + if (inst.Rc) + ComputeRC(RA, 0); } } @@ -734,7 +776,12 @@ void JitArm64::addzex(UGeckoInstruction inst) int a = inst.RA, d = inst.RD; - if (d == a) + if (js.carryFlagSet) + { + gpr.BindToRegister(d, d == a); + ADCS(gpr.R(d), gpr.R(a), WZR); + } + else if (d == a) { gpr.BindToRegister(d, true); ARM64Reg WA = gpr.GetReg(); @@ -792,8 +839,16 @@ void JitArm64::subfex(UGeckoInstruction inst) gpr.BindToRegister(d, false); ARM64Reg WA = gpr.GetReg(); - LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - ADDI2R(gpr.R(d), WA, ~i + j, gpr.R(d)); + if (js.carryFlagSet) + { + MOVI2R(WA, ~i + j, gpr.R(d)); + ADC(gpr.R(d), WA, WZR); + } + else + { + LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); + ADDI2R(gpr.R(d), WA, ~i + j, gpr.R(d)); + } gpr.Unlock(WA); bool must_have_carry = Interpreter::Helper_Carry(~i, j); @@ -818,8 +873,11 @@ void JitArm64::subfex(UGeckoInstruction inst) gpr.BindToRegister(d, d == a || d == b); // upload the carry state - LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - CMP(WA, 1); + if (!js.carryFlagSet) + { + LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); + CMP(WA, 1); + } // d = ~a + b + carry; if (gpr.IsImm(a)) @@ -879,11 +937,19 @@ void JitArm64::subfzex(UGeckoInstruction inst) gpr.BindToRegister(d, d == a); - ARM64Reg WA = gpr.GetReg(); - LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - MVN(gpr.R(d), gpr.R(a)); - ADDS(gpr.R(d), gpr.R(d), WA); - gpr.Unlock(WA); + if (js.carryFlagSet) + { + MVN(gpr.R(d), gpr.R(a)); + ADCS(gpr.R(d), gpr.R(d), WZR); + } + else + { + ARM64Reg WA = gpr.GetReg(); + LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); + MVN(gpr.R(d), gpr.R(a)); + ADDS(gpr.R(d), gpr.R(d), WA); + gpr.Unlock(WA); + } ComputeCarry(); @@ -934,8 +1000,16 @@ void JitArm64::addex(UGeckoInstruction inst) gpr.BindToRegister(d, false); ARM64Reg WA = gpr.GetReg(); - LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - ADDI2R(gpr.R(d), WA, i + j, gpr.R(d)); + if (js.carryFlagSet) + { + MOVI2R(WA, i + j); + ADC(gpr.R(d), WA, WZR); + } + else + { + LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); + ADDI2R(gpr.R(d), WA, i + j, gpr.R(d)); + } gpr.Unlock(WA); bool must_have_carry = Interpreter::Helper_Carry(i, j); @@ -959,10 +1033,13 @@ void JitArm64::addex(UGeckoInstruction inst) gpr.BindToRegister(d, d == a || d == b); // upload the carry state - ARM64Reg WA = gpr.GetReg(); - LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); - CMP(WA, 1); - gpr.Unlock(WA); + if (!js.carryFlagSet) + { + ARM64Reg WA = gpr.GetReg(); + LDRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); + CMP(WA, 1); + gpr.Unlock(WA); + } // d = a + b + carry; ADCS(gpr.R(d), gpr.R(a), gpr.R(b)); @@ -1076,6 +1153,8 @@ void JitArm64::divwx(UGeckoInstruction inst) } else { + FlushCarry(); + gpr.BindToRegister(d, d == a || d == b); ARM64Reg WA = gpr.GetReg(); @@ -1205,6 +1284,7 @@ void JitArm64::srawx(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); int a = inst.RA, b = inst.RB, s = inst.RS; + bool inplace_carry = MergeAllowedNextInstructions(1) && js.op[1].wantsCAInFlags; if (gpr.IsImm(b) && gpr.IsImm(s)) { @@ -1225,10 +1305,16 @@ void JitArm64::srawx(UGeckoInstruction inst) ComputeRC(gpr.GetImm(a), 0); return; } - else if (gpr.IsImm(b) && (gpr.GetImm(b) & 0x20) == 0 && !js.op->wantsCA) + + if (gpr.IsImm(b) && !js.op->wantsCA) { + int amount = gpr.GetImm(b); + if (amount & 0x20) + amount = 0x1F; + else + amount &= 0x1F; gpr.BindToRegister(a, a == s); - ASR(gpr.R(a), gpr.R(a), gpr.GetImm(b) & 0x1F); + ASR(gpr.R(a), gpr.R(s), amount); } else if (!js.op->wantsCA) { @@ -1276,7 +1362,15 @@ void JitArm64::srawx(UGeckoInstruction inst) SetJumpTarget(end); MOV(gpr.R(a), WB); - STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); + if (inplace_carry) + { + CMP(WA, 1); + ComputeCarry(); + } + else + { + STRB(INDEX_UNSIGNED, WA, PPC_REG, PPCSTATE_OFF(xer_ca)); + } gpr.Unlock(WA, WB, WC); } diff --git a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp index 84deac9703..e74e0573d3 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_SystemRegisters.cpp @@ -417,7 +417,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); - LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); + LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field])); switch (bit) { case CR_SO_BIT: @@ -436,7 +436,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) AND(XA, XA, 64 - 63, 62, true); // XA & ~(1<<62) break; } - STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field])); gpr.Unlock(WA); return; } @@ -450,7 +450,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) ARM64Reg WA = gpr.GetReg(); ARM64Reg XA = EncodeRegTo64(WA); - LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); + LDR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field])); if (bit != CR_GT_BIT) { @@ -483,7 +483,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) ORR(XA, XA, 32, 0, true); // XA | 1<<32 - STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[field])); gpr.Unlock(WA); return; } @@ -509,7 +509,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) ARM64Reg WC = gpr.GetReg(); ARM64Reg XC = EncodeRegTo64(WC); - LDR(INDEX_UNSIGNED, XC, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); + LDR(INDEX_UNSIGNED, XC, PPC_REG, PPCSTATE_OFF(cr_val[field])); switch (bit) { case CR_SO_BIT: // check bit 61 set @@ -565,7 +565,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) int field = inst.CRBD >> 2; int bit = 3 - (inst.CRBD & 3); - LDR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); + LDR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[field])); // Gross but necessary; if the input is totally zero and we set SO or LT, // or even just add the (1<<32), GT will suddenly end up set without us @@ -603,7 +603,7 @@ void JitArm64::crXXX(UGeckoInstruction inst) } ORR(XA, XA, 32, 0, true); // XA | 1<<32 - STR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * field); + STR(INDEX_UNSIGNED, XB, PPC_REG, PPCSTATE_OFF(cr_val[field])); gpr.Unlock(WA); gpr.Unlock(WB); @@ -653,7 +653,7 @@ void JitArm64::mtcrf(UGeckoInstruction inst) } LDR(XA, XB, ArithOption(XA, true)); - STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * i); + STR(INDEX_UNSIGNED, XA, PPC_REG, PPCSTATE_OFF(cr_val[i])); } } gpr.Unlock(WA, WB); diff --git a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp index 7c47e7a2b6..078dd877ad 100644 --- a/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm64/JitAsm.cpp @@ -629,7 +629,7 @@ void JitArm64::GenMfcr() const u8* start = GetCodePtr(); for (int i = 0; i < 8; i++) { - LDR(INDEX_UNSIGNED, X1, PPC_REG, PPCSTATE_OFF(cr_val) + 8 * i); + LDR(INDEX_UNSIGNED, X1, PPC_REG, PPCSTATE_OFF(cr_val[i])); // SO if (i == 0)