diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp index 59ddf184e9..9143885fc3 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp @@ -385,13 +385,13 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo // Downcount flag check, Only valid for linked blocks { - SetCC(CC_MI); + FixupBranch no_downcount = B_CC(CC_PL); ARMReg rA = gpr.GetReg(false); MOVI2R(rA, js.blockStart); STR(rA, R9, PPCSTATE_OFF(pc)); MOVI2R(rA, (u32)asm_routines.doTiming); B(rA); - SetCC(); + SetJumpTarget(no_downcount); } const u8 *normalEntry = GetCodePtr(); @@ -409,7 +409,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo MOVI2R(C, js.blockStart); // R3 LDR(A, R9, PPCSTATE_OFF(msr)); TST(A, Shift); - SetCC(CC_EQ); + FixupBranch no_fpe = B_CC(CC_NEQ); STR(C, R9, PPCSTATE_OFF(pc)); LDR(A, R9, PPCSTATE_OFF(Exceptions)); @@ -422,7 +422,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo MOVI2R(A, (u32)asm_routines.dispatcher); B(A); - SetCC(); + SetJumpTarget(no_fpe); gpr.Unlock(A, C); } diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.h b/Source/Core/Core/PowerPC/JitArm32/Jit.h index 1f8684bcc9..98f6884e1c 100644 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.h +++ b/Source/Core/Core/PowerPC/JitArm32/Jit.h @@ -60,6 +60,7 @@ private: FLAG_SIZE_F32 = (1 << 5), FLAG_SIZE_F64 = (1 << 6), FLAG_REVERSE = (1 << 7), + FLAG_EXTEND = (1 << 8), }; u32 m_fastmem_size; @@ -178,7 +179,9 @@ public: void subfic(UGeckoInstruction _inst); void cntlzwx(UGeckoInstruction _inst); void cmp (UGeckoInstruction _inst); + void cmpl(UGeckoInstruction _inst); void cmpi(UGeckoInstruction _inst); + void cmpli(UGeckoInstruction _inst); void negx(UGeckoInstruction _inst); void mulhwux(UGeckoInstruction _inst); void rlwimix(UGeckoInstruction _inst); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp index c703e5a88d..3351f2a8fe 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp @@ -225,9 +225,8 @@ u32 JitArm::EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, boo { ARMReg temp2 = R10; Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) - emit->BIC(temp, addr, mask); // 1 - emit->MOVI2R(temp2, (u32)Memory::base); // 2-3 - emit->ADD(temp, temp, temp2); // 4 + emit->BIC(temp, addr, mask); + emit->ADD(temp, temp, R8); if (flags & BackPatchInfo::FLAG_STORE && flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) @@ -301,6 +300,9 @@ u32 JitArm::EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, boo else if (flags & BackPatchInfo::FLAG_SIZE_16) emit->REV16(RS, RS); } + + if (flags & BackPatchInfo::FLAG_EXTEND) + emit->SXTH(RS, RS); } } else @@ -591,7 +593,6 @@ void JitArm::InitBackpatch() m_backpatch_info[flags] = info; } - // 16bit - reverse { flags = @@ -613,6 +614,27 @@ void JitArm::InitBackpatch() m_backpatch_info[flags] = info; } + // 16bit - sign extend + { + flags = + BackPatchInfo::FLAG_LOAD | + BackPatchInfo::FLAG_SIZE_16 | + BackPatchInfo::FLAG_EXTEND; + EmitBackpatchRoutine(this, flags, false, false, R0); + code_end = GetWritableCodePtr(); + info.m_slowmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + info.m_fastmem_trouble_inst_offset = + EmitBackpatchRoutine(this, flags, true, false, R0); + code_end = GetWritableCodePtr(); + info.m_fastmem_size = (code_end - code_base) / 4; + + SetCodePtr(code_base); + + m_backpatch_info[flags] = info; + } // 32bit - reverse { flags = diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp index 5c897227b3..0c420d5ce6 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp @@ -723,6 +723,14 @@ void JitArm::cntlzwx(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); u32 a = inst.RA, s = inst.RS; + if (gpr.IsImm(s)) + { + gpr.SetImmediate(a, __builtin_clz(gpr.GetImm(s))); + if (inst.Rc) + ComputeRC(gpr.GetImm(a), 0); + return; + } + gpr.BindToRegister(a, a == s); ARMReg RA = gpr.R(a); ARMReg RS = gpr.R(s); @@ -817,10 +825,34 @@ void JitArm::cmp (UGeckoInstruction inst) gpr.Unlock(rA); } + +void JitArm::cmpl(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + + int crf = inst.CRFD; + u32 a = inst.RA, b = inst.RB; + + if (gpr.IsImm(a) && gpr.IsImm(b)) + { + ComputeRC(gpr.GetImm(a) - gpr.GetImm(b), crf); + return; + } + else if (gpr.IsImm(b) && !gpr.GetImm(b)) + { + ComputeRC(gpr.R(a), crf); + return; + } + + FALLBACK_IF(true); +} + void JitArm::cmpi(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); + u32 a = inst.RA; int crf = inst.CRFD; if (gpr.IsImm(a)) @@ -830,10 +862,15 @@ void JitArm::cmpi(UGeckoInstruction inst) } ARMReg rA = gpr.GetReg(); ARMReg RA = gpr.R(a); + bool negated = false; + Operand2 off; - if (inst.SIMM_16 >= 0 && inst.SIMM_16 < 256) + if (TryMakeOperand2_AllowNegation(inst.SIMM_16, off, &negated)) { - SUB(rA, RA, inst.SIMM_16); + if (negated) + ADD(rA, RA, off); + else + SUB(rA, RA, off); } else { @@ -845,11 +882,41 @@ void JitArm::cmpi(UGeckoInstruction inst) gpr.Unlock(rA); } +void JitArm::cmpli(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITIntegerOff); + u32 a = inst.RA; + int crf = inst.CRFD; + + if (gpr.IsImm(a)) + { + ComputeRC(gpr.GetImm(a) - inst.UIMM, crf); + return; + } + + if (!inst.UIMM) + { + ComputeRC(gpr.R(a), crf); + return; + } + + FALLBACK_IF(true); +} + void JitArm::negx(UGeckoInstruction inst) { INSTRUCTION_START JITDISABLE(bJITIntegerOff); + if (gpr.IsImm(inst.RA)) + { + gpr.SetImmediate(inst.RD, ~gpr.GetImm(inst.RA) + 1); + if (inst.Rc) + ComputeRC(gpr.GetImm(inst.RD), 0); + return; + } + gpr.BindToRegister(inst.RD, inst.RD == inst.RA); ARMReg RD = gpr.R(inst.RD); ARMReg RA = gpr.R(inst.RA); @@ -870,13 +937,50 @@ void JitArm::rlwimix(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); u32 mask = Helper_Mask(inst.MB,inst.ME); - ARMReg RA = gpr.R(inst.RA); - ARMReg RS = gpr.R(inst.RS); + int a = inst.RA, s = inst.RS; + if (gpr.IsImm(s) && inst.MB <= inst.ME) + { + u32 imm = _rotl(gpr.GetImm(s), inst.SH) & mask; + imm >>= 31 - inst.ME; + ARMReg rA = gpr.GetReg(); + + MOVI2R(rA, imm); + BFI(gpr.R(a), rA, 31 - inst.ME, inst.ME - inst.MB + 1); + if (inst.Rc) + ComputeRC(gpr.R(a)); + + gpr.Unlock(rA); + return; + } + + ARMReg RA = gpr.R(a); + ARMReg RS = gpr.R(s); + + if (inst.SH == 0 && inst.MB <= inst.ME) + { + if (inst.ME != 31) + { + ARMReg rA = gpr.GetReg(); + LSR(rA, RS, 31 - inst.ME); + BFI(RA, rA, 31 - inst.ME, inst.ME - inst.MB + 1); + gpr.Unlock(rA); + } + else + { + BFI(RA, RS, 0, inst.ME - inst.MB + 1); + } + if (inst.Rc) + ComputeRC(RA); + + return; + } + ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); + Operand2 Shift(RS, ST_ROR, 32 - inst.SH); // This rotates left, while ARM has only rotate right, so swap it. + MOVI2R(rA, mask); - Operand2 Shift(RS, ST_ROR, 32 - inst.SH); // This rotates left, while ARM has only rotate right, so swap it. BIC (rB, RA, rA); // RA & ~mask AND (rA, rA, Shift); ORR(RA, rB, rA); @@ -892,13 +996,62 @@ void JitArm::rlwinmx(UGeckoInstruction inst) JITDISABLE(bJITIntegerOff); u32 mask = Helper_Mask(inst.MB,inst.ME); + if (gpr.IsImm(inst.RS)) + { + gpr.SetImmediate(inst.RA, _rotl(gpr.GetImm(inst.RS), inst.SH) & mask); + if (inst.Rc) + ComputeRC(gpr.GetImm(inst.RA), 0); + return; + } + + gpr.BindToRegister(inst.RA, inst.RA == inst.RS); ARMReg RA = gpr.R(inst.RA); ARMReg RS = gpr.R(inst.RS); ARMReg rA = gpr.GetReg(); - MOVI2R(rA, mask); + bool inverse = false; + bool fit_op = false; + Operand2 op2; + fit_op = TryMakeOperand2_AllowInverse(mask, op2, &inverse); - Operand2 Shift(RS, ST_ROR, 32 - inst.SH); // This rotates left, while ARM has only rotate right, so swap it. - AND(RA, rA, Shift); + if (!inst.SH && fit_op) + { + if (inverse) + BIC(RA, RS, op2); + else + AND(RA, RS, op2); + } + else if (!inst.SH && inst.ME == 31) + { + UBFX(RA, RS, 0, inst.ME - inst.MB + 1); + } + else if (!inst.SH && inst.MB == 0) + { + LSR(RA, RS, 31 - inst.ME); + LSL(RA, RA, 31 - inst.ME); + } + else if (inst.SH == 16 && inst.MB >= 16 && inst.ME == 31) + { + UBFX(RA, RS, 16, 32 - inst.MB); + } + else if (inst.SH == 16 && inst.MB == 0 && inst.ME == 15) + { + LSL(RA, RS, 16); + } + else if (fit_op) + { + Operand2 Shift(RS, ST_ROR, 32 - inst.SH); // This rotates left, while ARM has only rotate right, so swap it. + MOV(RA, Shift); + if (inverse) + BIC(RA, RA, op2); + else + AND(RA, RA, op2); + } + else + { + MOVI2R(rA, mask); + Operand2 Shift(RS, ST_ROR, 32 - inst.SH); // This rotates left, while ARM has only rotate right, so swap it. + AND(RA, rA, Shift); + } if (inst.Rc) ComputeRC(RA); @@ -939,9 +1092,19 @@ void JitArm::srawix(UGeckoInstruction inst) int s = inst.RS; int amount = inst.SH; - gpr.BindToRegister(a, a == s); - if (amount != 0) + if (gpr.IsImm(s)) { + s32 imm = (s32)gpr.GetImm(s); + gpr.SetImmediate(a, imm >> amount); + + if (amount != 0 && (imm < 0) && (imm << (32 - amount))) + ComputeCarry(true); + else + ComputeCarry(false); + } + else if (amount != 0) + { + gpr.BindToRegister(a, a == s); ARMReg RA = gpr.R(a); ARMReg RS = gpr.R(s); ARMReg tmp = gpr.GetReg(); @@ -963,6 +1126,7 @@ void JitArm::srawix(UGeckoInstruction inst) } else { + gpr.BindToRegister(a, a == s); ARMReg RA = gpr.R(a); ARMReg RS = gpr.R(s); MOV(RA, RS); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp index 1f50c7fb49..8cd32c561c 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp @@ -148,7 +148,7 @@ void JitArm::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, int accessSize else if (Memory::IsRAMAddress(imm_addr)) { MOVI2R(rA, imm_addr); - EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, false, RS); + EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, true, RS); } else { @@ -227,7 +227,7 @@ void JitArm::stX(UGeckoInstruction inst) // Check for DSI exception prior to writing back address LDR(rA, R9, PPCSTATE_OFF(Exceptions)); TST(rA, EXCEPTION_DSI); - SetCC(CC_EQ); + FixupBranch has_exception = B_CC(CC_NEQ); if (regOffset == -1) { MOVI2R(rA, offset); @@ -237,7 +237,7 @@ void JitArm::stX(UGeckoInstruction inst) { ADD(RA, RA, RB); } - SetCC(); + SetJumpTarget(has_exception); gpr.Unlock(rA); } } @@ -347,13 +347,13 @@ void JitArm::SafeLoadToReg(ARMReg dest, s32 addr, s32 offsetReg, int accessSize, if (reverse) flags |= BackPatchInfo::FLAG_REVERSE; + if (signExtend) + flags |= BackPatchInfo::FLAG_EXTEND; + EmitBackpatchRoutine(this, flags, SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem, !(is_immediate && Memory::IsRAMAddress(imm_addr)), dest); - if (signExtend) // Only on 16 loads - SXTH(dest, dest); - if (update) MOV(gpr.R(addr), rA); } @@ -487,14 +487,12 @@ void JitArm::lmw(UGeckoInstruction inst) u32 a = inst.RA; ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); MOVI2R(rA, inst.SIMM_16); if (a) ADD(rA, rA, gpr.R(a)); Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) - BIC(rA, rA, mask); // 3 - MOVI2R(rB, (u32)Memory::base, false); // 4-5 - ADD(rA, rA, rB); // 6 + BIC(rA, rA, mask); + ADD(rA, rA, R8); for (int i = inst.RD; i < 32; i++) { @@ -502,7 +500,7 @@ void JitArm::lmw(UGeckoInstruction inst) LDR(RX, rA, (i - inst.RD) * 4); REV(RX, RX); } - gpr.Unlock(rA, rB); + gpr.Unlock(rA); } void JitArm::stmw(UGeckoInstruction inst) @@ -514,22 +512,20 @@ void JitArm::stmw(UGeckoInstruction inst) u32 a = inst.RA; ARMReg rA = gpr.GetReg(); ARMReg rB = gpr.GetReg(); - ARMReg rC = gpr.GetReg(); MOVI2R(rA, inst.SIMM_16); if (a) ADD(rA, rA, gpr.R(a)); Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) - BIC(rA, rA, mask); // 3 - MOVI2R(rB, (u32)Memory::base, false); // 4-5 - ADD(rA, rA, rB); // 6 + BIC(rA, rA, mask); + ADD(rA, rA, R8); for (int i = inst.RD; i < 32; i++) { ARMReg RX = gpr.R(i); - REV(rC, RX); - STR(rC, rA, (i - inst.RD) * 4); + REV(rB, RX); + STR(rB, rA, (i - inst.RD) * 4); } - gpr.Unlock(rA, rB, rC); + gpr.Unlock(rA, rB); } void JitArm::dcbst(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp index 6742e1ccc5..e0e485903a 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp @@ -35,9 +35,21 @@ void JitArm::psq_l(UGeckoInstruction inst) UBFX(R11, R11, 24, 6); // Scale LSL(R11, R11, 2); - MOVI2R(R10, (u32)offset); - if (inst.RA || update) // Always uses the register on update - ADD(R10, R10, gpr.R(inst.RA)); + Operand2 off; + if (TryMakeOperand2(offset, off)) + { + if (inst.RA || update) + ADD(R10, gpr.R(inst.RA), off); + else + MOV(R10, off); + } + else + { + MOVI2R(R10, (u32)offset); + if (inst.RA || update) // Always uses the register on update + ADD(R10, R10, gpr.R(inst.RA)); + } + if (update) MOV(gpr.R(inst.RA), R10); MOVI2R(R14, (u32)asm_routines.pairedLoadQuantized); @@ -126,14 +138,19 @@ void JitArm::psq_st(UGeckoInstruction inst) UBFX(R11, R11, 8, 6); // Scale LSL(R11, R11, 2); - if (inst.RA || update) // Always uses the register on update + Operand2 off; + if (TryMakeOperand2(offset, off)) { - MOVI2R(R14, offset); - ADD(R10, gpr.R(inst.RA), R14); + if (inst.RA || update) + ADD(R10, gpr.R(inst.RA), off); + else + MOV(R10, off); } else { MOVI2R(R10, (u32)offset); + if (inst.RA || update) // Always uses the register on update + ADD(R10, R10, gpr.R(inst.RA)); } if (update) diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp index 198a73fba3..79a7399d62 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp @@ -47,7 +47,7 @@ static GekkoOPTemplate primarytable[] = {7, &JitArm::arith}, //"mulli", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_RC_BIT, 2}}, {8, &JitArm::subfic}, //"subfic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, - {10, &JitArm::FallBackToInterpreter}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, + {10, &JitArm::cmpli}, //"cmpli", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, {11, &JitArm::cmpi}, //"cmpi", OPTYPE_INTEGER, FL_IN_A | FL_SET_CRn}}, {12, &JitArm::arith}, //"addic", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CA}}, {13, &JitArm::arith}, //"addic_rc", OPTYPE_INTEGER, FL_OUT_D | FL_IN_A | FL_SET_CR0}}, @@ -190,7 +190,7 @@ static GekkoOPTemplate table31[] = {476, &JitArm::arith}, //"nandx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, {284, &JitArm::arith}, //"eqvx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_SB | FL_RC_BIT}}, {0, &JitArm::cmp}, //"cmp", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, - {32, &JitArm::FallBackToInterpreter}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, + {32, &JitArm::cmpl}, //"cmpl", OPTYPE_INTEGER, FL_IN_AB | FL_SET_CRn}}, {26, &JitArm::cntlzwx}, //"cntlzwx",OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {922, &JitArm::extshx}, //"extshx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, {954, &JitArm::extsbx}, //"extsbx", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_RC_BIT}}, diff --git a/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp index bb19c300c4..b0966d4f58 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp @@ -96,6 +96,7 @@ void JitArmAsmRoutineManager::Generate() SUB(_SP, _SP, 4); MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]); + MOVI2R(R8, (u32)Memory::base); FixupBranch skipToRealDispatcher = B(); dispatcher = GetCodePtr(); @@ -134,7 +135,7 @@ void JitArmAsmRoutineManager::Generate() // R12 Confirmed this is the correct iCache Location loaded. TST(R12, 0x80); // Test to see if it is a JIT block. - SetCC(CC_EQ); + FixupBranch no_block = B_CC(CC_NEQ); // Success, it is our Jitblock. MOVI2R(R14, (u32)jit->GetBlockCache()->GetCodePointers()); // LDR R14 right here to get CodePointers()[0] pointer. @@ -143,7 +144,7 @@ void JitArmAsmRoutineManager::Generate() B(R14); // No need to jump anywhere after here, the block will go back to dispatcher start - SetCC(); + SetJumpTarget(no_block); // If we get to this point, that means that we don't have the block cached to execute // So call ArmJit to compile the block and then execute it. @@ -203,8 +204,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedFloatTwo = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); nemit.VLD1(I_32, D0, R10); nemit.VREV32(I_8, D0, D0); @@ -214,8 +214,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedFloatOne = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); nemit.VLD1(I_32, D0, R10); nemit.VREV32(I_8, D0, D0); @@ -225,15 +224,12 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedU8Two = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); - LDRH(R12, R10); - SXTB(R12, R12); + LDRB(R12, R10); VMOV(S0, R12); - LDRH(R12, R10, 2); - SXTB(R12, R12); + LDRB(R12, R10, 1); VMOV(S1, R12); MOVI2R(R10, (u32)&m_dequantizeTableS); @@ -251,11 +247,9 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedU8One = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); LDRB(R12, R10); - SXTB(R12, R12); VMOV(S0, R12); MOVI2R(R10, (u32)&m_dequantizeTableS); @@ -271,15 +265,12 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedS8Two = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); - LDRH(R12, R10); - SXTB(R12, R12); + LDRSB(R12, R10); VMOV(S0, R12); - LDRH(R12, R10, 2); - SXTB(R12, R12); + LDRSB(R12, R10, 1); VMOV(S1, R12); MOVI2R(R10, (u32)&m_dequantizeTableS); @@ -297,11 +288,9 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedS8One = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); - LDRB(R12, R10); - SXTB(R12, R12); + LDRSB(R12, R10); VMOV(S0, R12); MOVI2R(R10, (u32)&m_dequantizeTableS); @@ -317,17 +306,14 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedU16Two = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); LDRH(R12, R10); REV16(R12, R12); - SXTH(R12, R12); VMOV(S0, R12); LDRH(R12, R10, 2); REV16(R12, R12); - SXTH(R12, R12); VMOV(S1, R12); MOVI2R(R10, (u32)&m_dequantizeTableS); @@ -345,8 +331,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedU16One = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); LDRH(R12, R10); REV16(R12, R12); @@ -364,8 +349,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedS16Two = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); LDRH(R12, R10); REV16(R12, R12); @@ -392,8 +376,7 @@ void JitArmAsmRoutineManager::GenerateCommon() const u8* loadPairedS16One = GetCodePtr(); { BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); LDRH(R12, R10); @@ -439,8 +422,7 @@ void JitArmAsmRoutineManager::GenerateCommon() TST(R10, arghmask); FixupBranch argh = B_CC(CC_NEQ); BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); nemit.VREV32(I_8, D0, D0); nemit.VST1(I_32, D0, R10); @@ -511,8 +493,7 @@ void JitArmAsmRoutineManager::GenerateCommon() TST(R10, arghmask); FixupBranch argh = B_CC(CC_NEQ); BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); VMOV(R12, S0); REV(R12, R12); @@ -540,8 +521,7 @@ void JitArmAsmRoutineManager::GenerateCommon() TST(R10, arghmask); FixupBranch argh = B_CC(CC_NEQ); BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); VCVT(S0, S0, TO_INT | ROUND_TO_ZERO); VMOV(R12, S0); @@ -568,8 +548,7 @@ void JitArmAsmRoutineManager::GenerateCommon() TST(R10, arghmask); FixupBranch argh = B_CC(CC_NEQ); BIC(R10, R10, mask); - MOVI2R(R12, (u32)Memory::base); - ADD(R10, R10, R12); + ADD(R10, R10, R8); VCVT(S0, S0, TO_INT | ROUND_TO_ZERO); VMOV(R12, S0); diff --git a/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp b/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp index 8a7c2990a9..8379b6aa9a 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp @@ -54,7 +54,7 @@ ARMReg *ArmRegCache::GetPPCAllocationOrder(int &count) // the ppc side. static ARMReg allocationOrder[] = { - R0, R1, R2, R3, R4, R5, R6, R7, R8 + R0, R1, R2, R3, R4, R5, R6, R7 }; count = sizeof(allocationOrder) / sizeof(const int); return allocationOrder;