diff --git a/Source/Core/Common/Src/ArmEmitter.cpp b/Source/Core/Common/Src/ArmEmitter.cpp index 85e18e26aa..6b7a6f32ad 100644 --- a/Source/Core/Common/Src/ArmEmitter.cpp +++ b/Source/Core/Common/Src/ArmEmitter.cpp @@ -83,65 +83,104 @@ bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated) } } +void ARMXEmitter::MOVI2F(ARMReg dest, float val, ARMReg tempReg) +{ + union {float f; u32 u;} conv; + conv.f = val; + MOVI2R(tempReg, conv.u); + VMOV(dest, tempReg); + // TODO: VMOV an IMM directly if possible + // Otherwise, use a literal pool and VLDR directly (+- 1020) +} + +void ARMXEmitter::ANDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch) +{ + Operand2 op2; + bool inverse; + if (TryMakeOperand2_AllowInverse(val, op2, &inverse)) { + if (!inverse) { + AND(rd, rs, op2); + } else { + BIC(rd, rs, op2); + } + } else { + MOVI2R(scratch, val); + AND(rd, rs, scratch); + } +} + +void ARMXEmitter::ORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch) +{ + Operand2 op2; + if (TryMakeOperand2(val, op2)) { + ORR(rd, rs, op2); + } else { + MOVI2R(scratch, val); + ORR(rd, rs, scratch); + } +} + +void ARMXEmitter::FlushLitPool() +{ + for(std::vector::iterator it = currentLitPool.begin(); it != currentLitPool.end(); ++it) { + // Search for duplicates + for(std::vector::iterator old_it = currentLitPool.begin(); old_it != it; ++old_it) { + if ((*old_it).val == (*it).val) + (*it).loc = (*old_it).loc; + } + + // Write the constant to Literal Pool + if (!(*it).loc) + { + (*it).loc = (s32)code; + Write32((*it).val); + } + s32 offset = (*it).loc - (s32)(*it).ldr_address - 8; + + // Backpatch the LDR + *(u32*)(*it).ldr_address |= (offset >= 0) << 23 | abs(offset); + } + // TODO: Save a copy of previous pools in case they are still in range. + currentLitPool.clear(); +} + +void ARMXEmitter::AddNewLit(u32 val) +{ + LiteralPool pool_item; + pool_item.loc = 0; + pool_item.val = val; + pool_item.ldr_address = code; + currentLitPool.push_back(pool_item); +} + void ARMXEmitter::MOVI2R(ARMReg reg, u32 val, bool optimize) { Operand2 op2; bool inverse; - if (!optimize) + + if (cpu_info.bArmV7 && !optimize) { - // Only used in backpatch atm - // Only support ARMv7 right now - if (cpu_info.bArmV7) { - MOVW(reg, val & 0xFFFF); - MOVT(reg, val, true); - } - else - { - // ARMv6 version won't use backpatch for now - // Run again with optimizations - MOVI2R(reg, val); - } - } else if (TryMakeOperand2_AllowInverse(val, op2, &inverse)) { - if (!inverse) - MOV(reg, op2); - else - MVN(reg, op2); + // For backpatching on ARMv7 + MOVW(reg, val & 0xFFFF); + MOVT(reg, val, true); + } + else if (TryMakeOperand2_AllowInverse(val, op2, &inverse)) { + inverse ? MVN(reg, op2) : MOV(reg, op2); } else { - if (cpu_info.bArmV7) { - // ARMv7 - can use MOVT/MOVW, best choice + if (cpu_info.bArmV7) + { + // Use MOVW+MOVT for ARMv7+ MOVW(reg, val & 0xFFFF); if(val & 0xFFFF0000) MOVT(reg, val, true); } else { - // ARMv6 - fallback sequence. - // TODO: Optimize further. Can for example choose negation etc. - // Literal pools is another way to do this but much more complicated - // so I can't really be bothered for an outdated CPU architecture like ARMv6. - bool first = true; - int shift = 16; - for (int i = 0; i < 4; i++) { - if (val & 0xFF) { - if (first) { - MOV(reg, Operand2((u8)val, (u8)(shift & 0xF))); - first = false; - } else { - ORR(reg, reg, Operand2((u8)val, (u8)(shift & 0xF))); - } - } - shift -= 4; - val >>= 8; - } + // Use literal pool for ARMv6. + AddNewLit(val); + LDRLIT(reg, 0, 0); // To be backpatched later } } } -// Moves IMM to memory location -void ARMXEmitter::ARMABI_MOVI2M(Operand2 op, Operand2 val) -{ - // This moves imm to a memory location - MOVW(R14, val); MOVT(R14, val, true); - MOVW(R12, op); MOVT(R12, op, true); - STR(R12, R14); // R10 is what we want to store -} + void ARMXEmitter::QuickCallFunction(ARMReg reg, void *func) { MOVI2R(reg, (u32)(func)); BL(reg); @@ -151,6 +190,9 @@ void ARMXEmitter::SetCodePtr(u8 *ptr) { code = ptr; startcode = code; +#ifdef IOS + lastCacheFlushEnd = ptr; +#endif } const u8 *ARMXEmitter::GetCodePtr() const @@ -194,10 +236,11 @@ void ARMXEmitter::FlushIcacheSection(u8 *start, u8 *end) #elif defined(BLACKBERRY) msync(start, end - start, MS_SYNC | MS_INVALIDATE_ICACHE); #elif defined(IOS) - sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start); + if (start != NULL) + sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start); #elif !defined(_WIN32) #ifndef ANDROID - start = startcode; + start = startcode; // Should be Linux Only #endif __builtin___clear_cache(start, end); #endif @@ -474,6 +517,7 @@ void ARMXEmitter::LSL (ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedData void ARMXEmitter::LSLS(ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, true, dest, src, op2);} void ARMXEmitter::LSL (ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(1, false, dest, src, op2);} void ARMXEmitter::LSLS(ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(1, true, dest, src, op2);} +void ARMXEmitter::LSR (ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(3, false, dest, src, op2);} void ARMXEmitter::MUL (ARMReg dest, ARMReg src, ARMReg op2) { Write32(condition | (dest << 16) | (src << 8) | (9 << 4) | op2); @@ -497,10 +541,38 @@ void ARMXEmitter::SMULL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) Write4OpMultiply(0xC, destLo, destHi, rn, rm); } +void ARMXEmitter::UMLAL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) +{ + Write4OpMultiply(0xA, destLo, destHi, rn, rm); +} + +void ARMXEmitter::SMLAL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) +{ + Write4OpMultiply(0xE, destLo, destHi, rn, rm); +} + +void ARMXEmitter::UBFX(ARMReg dest, ARMReg rn, u8 lsb, u8 width) +{ + Write32(condition | (0x7E0 << 16) | ((width - 1) << 16) | (dest << 12) | (lsb << 7) | (5 << 4) | rn); +} + +void ARMXEmitter::CLZ(ARMReg rd, ARMReg rm) +{ + Write32(condition | (0x16F << 16) | (rd << 12) | (0xF1 << 4) | rm); +} + +void ARMXEmitter::BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width) +{ + u32 msb = (lsb + width - 1); + if (msb > 31) msb = 31; + Write32(condition | (0x7C0 << 16) | (msb << 16) | (rd << 12) | (lsb << 7) | (1 << 4) | rn); +} + void ARMXEmitter::SXTB (ARMReg dest, ARMReg op2) { Write32(condition | (0x6AF << 16) | (dest << 12) | (7 << 4) | op2); } + void ARMXEmitter::SXTH (ARMReg dest, ARMReg op2, u8 rotation) { SXTAH(dest, (ARMReg)15, op2, rotation); @@ -511,9 +583,13 @@ void ARMXEmitter::SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation) // information Write32(condition | (0x6B << 20) | (src << 16) | (dest << 12) | (rotation << 10) | (7 << 4) | op2); } -void ARMXEmitter::REV (ARMReg dest, ARMReg src ) +void ARMXEmitter::RBIT(ARMReg dest, ARMReg src) { - Write32(condition | (107 << 20) | (15 << 16) | (dest << 12) | (243 << 4) | src); + Write32(condition | (0x6F << 20) | (0xF << 16) | (dest << 12) | (0xF3 << 4) | src); +} +void ARMXEmitter::REV (ARMReg dest, ARMReg src) +{ + Write32(condition | (0x6B << 20) | (0xF << 16) | (dest << 12) | (0xF3 << 4) | src); } void ARMXEmitter::REV16(ARMReg dest, ARMReg src) { @@ -533,29 +609,46 @@ void ARMXEmitter::MRS (ARMReg dest) Write32(condition | (16 << 20) | (15 << 16) | (dest << 12)); } -void ARMXEmitter::WriteStoreOp(u32 op, ARMReg dest, ARMReg src, s16 op2) +void ARMXEmitter::WriteStoreOp(u32 op, ARMReg src, ARMReg dest, s16 op2) { - // Qualcomm chipsets get /really/ angry if you don't use index, even if the offset is zero. - // bool Index = op2 != 0 ? true : false; - bool Index = true; - bool Add = op2 >= 0 ? true : false; - u32 imm = abs(op2); - Write32(condition | (op << 20) | (Index << 24) | (Add << 23) | (dest << 16) | (src << 12) | imm); + // Qualcomm chipsets get /really/ angry if you don't use index, even if the offset is zero. + // bool Index = op2 != 0 ? true : false; + bool Index = true; + bool Add = op2 >= 0 ? true : false; + u32 imm = abs(op2); + Write32(condition | (op << 20) | (Index << 24) | (Add << 23) | (dest << 16) | (src << 12) | imm); } -void ARMXEmitter::STR (ARMReg dest, ARMReg src, s16 op) { WriteStoreOp(0x40, dest, src, op);} -void ARMXEmitter::STRB(ARMReg dest, ARMReg src, s16 op) { WriteStoreOp(0x44, dest, src, op);} -void ARMXEmitter::STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add) +void ARMXEmitter::STR (ARMReg result, ARMReg base, s16 op) { WriteStoreOp(0x40, base, result, op);} +void ARMXEmitter::STRH (ARMReg result, ARMReg base, Operand2 op) { - Write32(condition | (0x60 << 20) | (Index << 24) | (Add << 23) | (dest << 16) | (base << 12) | offset); + u8 Imm = op.Imm8(); + Write32(condition | (0x04 << 20) | (base << 16) | (result << 12) | ((Imm >> 4) << 8) | (0xB << 4) | (Imm & 0x0F)); +} +void ARMXEmitter::STRB (ARMReg result, ARMReg base, s16 op) { WriteStoreOp(0x44, base, result, op);} +void ARMXEmitter::STR (ARMReg result, ARMReg base, Operand2 op2, bool Index, bool Add) +{ + Write32(condition | (0x60 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (result << 12) | op2.IMMSR()); +} +void ARMXEmitter::STR (ARMReg result, ARMReg base, ARMReg offset, bool Index, bool Add) +{ + Write32(condition | (0x60 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (result << 12) | offset); +} +void ARMXEmitter::STRH (ARMReg result, ARMReg base, ARMReg offset, bool Index, bool Add) +{ + Write32(condition | (0x00 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (result << 12) | (0xB << 4) | offset); +} +void ARMXEmitter::STRB (ARMReg result, ARMReg base, ARMReg offset, bool Index, bool Add) +{ + Write32(condition | (0x64 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (result << 12) | offset); } void ARMXEmitter::LDREX(ARMReg dest, ARMReg base) { Write32(condition | (25 << 20) | (base << 16) | (dest << 12) | 0xF9F); } -void ARMXEmitter::STREX(ARMReg dest, ARMReg base, ARMReg op) +void ARMXEmitter::STREX(ARMReg result, ARMReg base, ARMReg op) { - _assert_msg_(DYNA_REC, (dest != base && dest != op), "STREX dest can't be other two registers"); - Write32(condition | (24 << 20) | (base << 16) | (dest << 12) | (0xF9 << 4) | op); + _assert_msg_(DYNA_REC, (result != base && result != op), "STREX dest can't be other two registers"); + Write32(condition | (24 << 20) | (base << 16) | (result << 12) | (0xF9 << 4) | op); } void ARMXEmitter::DMB () { @@ -572,12 +665,44 @@ void ARMXEmitter::LDRH(ARMReg dest, ARMReg src, Operand2 op) u8 Imm = op.Imm8(); Write32(condition | (0x05 << 20) | (src << 16) | (dest << 12) | ((Imm >> 4) << 8) | (0xB << 4) | (Imm & 0x0F)); } +void ARMXEmitter::LDRSH(ARMReg dest, ARMReg src, Operand2 op) +{ + u8 Imm = op.Imm8(); + Write32(condition | (0x05 << 20) | (src << 16) | (dest << 12) | ((Imm >> 4) << 8) | (0xF << 4) | (Imm & 0x0F)); +} void ARMXEmitter::LDRB(ARMReg dest, ARMReg src, s16 op) { WriteStoreOp(0x45, src, dest, op);} +void ARMXEmitter::LDRSB(ARMReg dest, ARMReg src, Operand2 op) +{ + u8 Imm = op.Imm8(); + Write32(condition | (0x05 << 20) | (src << 16) | (dest << 12) | ((Imm >> 4) << 8) | (0xD << 4) | (Imm & 0x0F)); +} -void ARMXEmitter::LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add) +void ARMXEmitter::LDR (ARMReg dest, ARMReg base, Operand2 op2, bool Index, bool Add) +{ + Write32(condition | (0x61 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (dest << 12) | op2.IMMSR()); +} +void ARMXEmitter::LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add) { Write32(condition | (0x61 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (dest << 12) | offset); } +void ARMXEmitter::LDRH (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add) +{ + Write32(condition | (0x01 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (dest << 12) | (0xB << 4) | offset); +} +void ARMXEmitter::LDRSH(ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add) +{ + Write32(condition | (0x01 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (dest << 12) | (0xF << 4) | offset); +} +void ARMXEmitter::LDRB (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add) +{ + Write32(condition | (0x65 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (dest << 12) | offset); +} +void ARMXEmitter::LDRSB(ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add) +{ + Write32(condition | (0x01 << 20) | (Index << 24) | (Add << 23) | (base << 16) | (dest << 12) | (0xD << 4) | offset); +} +void ARMXEmitter::LDRLIT (ARMReg dest, u32 offset, bool Add) { Write32(condition | 0x05 << 24 | Add << 23 | 0x1F << 16 | dest << 12 | offset);} + void ARMXEmitter::WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList) { Write32(condition | (op << 20) | (WriteBack << 21) | (dest << 16) | RegList); @@ -673,9 +798,8 @@ void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, s16 offset) _assert_msg_(DYNA_REC, (imm & 0xC03) == 0, "VLDR: Offset needs to be word aligned and small enough"); - if (imm & 0xC03) { + if (imm & 0xC03) ERROR_LOG(DYNA_REC, "VLDR: Bad offset %08x", imm); - } bool single_reg = Dest < D0; @@ -683,13 +807,13 @@ void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, s16 offset) if (single_reg) { - Write32(NO_COND | (0xD << 24) | (Add << 23) | ((Dest & 0x1) << 22) | (1 << 20) | (Base << 16) \ + Write32(condition | (0xD << 24) | (Add << 23) | ((Dest & 0x1) << 22) | (1 << 20) | (Base << 16) \ | ((Dest & 0x1E) << 11) | (10 << 8) | (imm >> 2)); } else { - Write32(NO_COND | (0xD << 24) | (Add << 23) | ((Dest & 0x10) << 18) | (1 << 20) | (Base << 16) \ + Write32(condition | (0xD << 24) | (Add << 23) | ((Dest & 0x10) << 18) | (1 << 20) | (Base << 16) \ | ((Dest & 0xF) << 12) | (11 << 8) | (imm >> 2)); } } @@ -703,9 +827,8 @@ void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, s16 offset) _assert_msg_(DYNA_REC, (imm & 0xC03) == 0, "VSTR: Offset needs to be word aligned and small enough"); - if (imm & 0xC03) { + if (imm & 0xC03) ERROR_LOG(DYNA_REC, "VSTR: Bad offset %08x", imm); - } bool single_reg = Src < D0; @@ -713,17 +836,16 @@ void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, s16 offset) if (single_reg) { - Write32(NO_COND | (0xD << 24) | (Add << 23) | ((Src & 0x1) << 22) | (Base << 16) \ + Write32(condition | (0xD << 24) | (Add << 23) | ((Src & 0x1) << 22) | (Base << 16) \ | ((Src & 0x1E) << 11) | (10 << 8) | (imm >> 2)); - } else { - Write32(NO_COND | (0xD << 24) | (Add << 23) | ((Src & 0x10) << 18) | (Base << 16) \ + Write32(condition | (0xD << 24) | (Add << 23) | ((Src & 0x10) << 18) | (Base << 16) \ | ((Src & 0xF) << 12) | (11 << 8) | (imm >> 2)); } } -void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm) +void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm, bool E) { _assert_msg_(DYNA_REC, Vd < Q0, "Passed invalid Vd to VCMP"); bool single_reg = Vd < D0; @@ -733,16 +855,16 @@ void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm) if (single_reg) { - Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x34 << 16) | ((Vd & 0x1E) << 11) \ - | (0x2B << 6) | ((Vm & 0x1) << 5) | (Vm >> 1)); + Write32(condition | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x34 << 16) | ((Vd & 0x1E) << 11) \ + | (E << 7) | (0x29 << 6) | ((Vm & 0x1) << 5) | (Vm >> 1)); } else { - Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x34 << 16) | ((Vd & 0xF) << 12) \ - | (0x2F << 6) | ((Vm & 0x10) << 1) | (Vm & 0xF)); + Write32(condition | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x34 << 16) | ((Vd & 0xF) << 12) \ + | (E << 7) | (0x2C << 6) | ((Vm & 0x10) << 1) | (Vm & 0xF)); } } -void ARMXEmitter::VCMP(ARMReg Vd) +void ARMXEmitter::VCMP(ARMReg Vd, bool E) { _assert_msg_(DYNA_REC, Vd < Q0, "Passed invalid Vd to VCMP"); bool single_reg = Vd < D0; @@ -751,15 +873,26 @@ void ARMXEmitter::VCMP(ARMReg Vd) if (single_reg) { - Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x35 << 16) | ((Vd & 0x1E) << 11) \ - | (0x2B << 6)); + Write32(condition | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x35 << 16) | ((Vd & 0x1E) << 11) \ + | (E << 7) | (0x29 << 6)); } else { - Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x35 << 16) | ((Vd & 0xF) << 12) \ - | (0x2F << 6)); + Write32(condition | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x35 << 16) | ((Vd & 0xF) << 12) \ + | (E << 7) | (0x2C << 6)); } } + +void ARMXEmitter::VMRS_APSR() { + Write32(condition | 0xEF10A10 | (15 << 12)); +} +void ARMXEmitter::VMRS(ARMReg Rt) { + Write32(condition | (0xEF << 20) | (1 << 16) | (Rt << 12) | 0xA10); +} +void ARMXEmitter::VMSR(ARMReg Rt) { + Write32(condition | (0xEE << 20) | (1 << 16) | (Rt << 12) | 0xA10); +} + void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm) { _assert_msg_(DYNA_REC, Vd < Q0, "Pased invalid dest register to VSQRT"); @@ -773,13 +906,13 @@ void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm) if (single_reg) { - Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | ((Vn & 0x1E) << 15) \ + Write32(condition | (0x1D << 23) | ((Vd & 0x1) << 22) | ((Vn & 0x1E) << 15) \ | ((Vd & 0x1E) << 11) | (0xA << 8) | ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) \ | (Vm >> 1)); } else { - Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | ((Vn & 0xF) << 16) \ + Write32(condition | (0x1D << 23) | ((Vd & 0x10) << 18) | ((Vn & 0xF) << 16) \ | ((Vd & 0xF) << 12) | (0xB << 8) | ((Vn & 0x10) << 3) | ((Vm & 0x10) << 2) \ | (Vm & 0xF)); } @@ -795,47 +928,17 @@ void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm) if (single_reg) { - Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x31 << 16) \ + Write32(condition | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x31 << 16) \ | ((Vd & 0x1E) << 11) | (0x2B << 6) | ((Vm & 0x1) << 5) | (Vm >> 1)); } else { - Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x31 << 16) \ + Write32(condition | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x31 << 16) \ | ((Vd & 0xF) << 12) | (0x2F << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF)); } } // VFP and ASIMD -void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd < Q0, "VABS doesn't currently support Quad reg"); - _assert_msg_(DYNA_REC, Vd >= S0, "VABS doesn't support ARM Regs"); - bool single_reg = Vd < D0; - bool double_reg = Vd < Q0; - - Vd = SubBase(Vd); - Vm = SubBase(Vm); - - if (single_reg) - { - Write32(NO_COND | (0xEB << 20) | ((Vd & 0x1) << 22) | ((Vd & 0x1E) << 11) \ - | (0xAC << 4) | ((Vm & 0x1) << 5) | (Vm >> 1)); - } - else - { - if (double_reg) - { - Write32(NO_COND | (0xEB << 20) | ((Vd & 0x10) << 18) | ((Vd & 0xF) << 12) \ - | (0xBC << 4) | ((Vm & 0x10) << 1) | (Vm & 0xF)); - } - else - { - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use VADD with Quad Reg without support!"); - // XXX: TODO - } - } -} - void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm) { _assert_msg_(DYNA_REC, Vd >= S0, "Passed invalid dest register to VADD"); @@ -850,7 +953,7 @@ void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm) if (single_reg) { - Write32(NO_COND | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x3 << 20) \ + Write32(condition | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x3 << 20) \ | ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 11) | (0x5 << 9) \ | ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) | (Vm >> 1)); } @@ -858,7 +961,7 @@ void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm) { if (double_reg) { - Write32(NO_COND | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x3 << 20) \ + Write32(condition | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x3 << 20) \ | ((Vn & 0xF) << 16) | ((Vd & 0xF) << 12) | (0xB << 8) \ | ((Vn & 0x10) << 3) | ((Vm & 0x10) << 2) | (Vm & 0xF)); } @@ -885,7 +988,7 @@ void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm) if (single_reg) { - Write32(NO_COND | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x3 << 20) \ + Write32(condition | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x3 << 20) \ | ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 11) | (0x5 << 9) \ | ((Vn & 0x1) << 7) | (1 << 6) | ((Vm & 0x1) << 5) | (Vm >> 1)); } @@ -893,7 +996,7 @@ void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm) { if (double_reg) { - Write32(NO_COND | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x3 << 20) \ + Write32(condition | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x3 << 20) \ | ((Vn & 0xF) << 16) | ((Vd & 0xF) << 12) | (0xB << 8) \ | ((Vn & 0x10) << 3) | (1 << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF)); } @@ -906,6 +1009,7 @@ void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm) } } } +// VFP and ASIMD void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm) { _assert_msg_(DYNA_REC, Vd >= S0, "Passed invalid dest register to VADD"); @@ -920,7 +1024,7 @@ void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm) if (single_reg) { - Write32(NO_COND | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x2 << 20) \ + Write32(condition | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x2 << 20) \ | ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 11) | (0x5 << 9) \ | ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) | (Vm >> 1)); } @@ -928,22 +1032,45 @@ void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm) { if (double_reg) { - Write32(NO_COND | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x2 << 20) \ + Write32(condition | (0x1C << 23) | ((Vd & 0x10) << 18) | (0x2 << 20) \ | ((Vn & 0xF) << 16) | ((Vd & 0xF) << 12) | (0xB << 8) \ | ((Vn & 0x10) << 3) | ((Vm & 0x10) << 2) | (Vm & 0xF)); } else { _assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use VMUL with Quad Reg without support!"); - // XXX: TODO } } } -void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm) +void ARMXEmitter::VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm) { - _assert_msg_(DYNA_REC, Vd < Q0, "VNEG doesn't currently support Quad reg"); - _assert_msg_(DYNA_REC, Vd >= S0, "VNEG doesn't support ARM Regs"); + _assert_msg_(DYNA_REC, Vd >= S0, "Passed invalid dest register to VMLA"); + _assert_msg_(DYNA_REC, Vn >= S0, "Passed invalid Vn to VMLA"); + _assert_msg_(DYNA_REC, Vm >= S0, "Passed invalid Vm to VMLA"); + bool single_reg = Vd < D0; + bool double_reg = Vd < Q0; + + Vd = SubBase(Vd); + Vn = SubBase(Vn); + Vm = SubBase(Vm); + + if (single_reg) + { + Write32(condition | (0x1C << 23) | ((Vd & 0x1) << 22) | (0x0 << 20) \ + | ((Vn & 0x1E) << 15) | ((Vd & 0x1E) << 11) | (0x5 << 9) \ + | ((Vn & 0x1) << 7) | ((Vm & 0x1) << 5) | (Vm >> 1)); + } + else + { + _assert_msg_(DYNA_REC, false, "VMLA: Please implement!"); + } +} + +void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm) +{ + _assert_msg_(DYNA_REC, Vd < Q0, "VABS doesn't currently support Quad reg"); + _assert_msg_(DYNA_REC, Vd >= S0, "VABS doesn't support ARM Regs"); bool single_reg = Vd < D0; bool double_reg = Vd < Q0; @@ -952,22 +1079,40 @@ void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm) if (single_reg) { - Write32(NO_COND | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x31 << 16) \ - | ((Vd & 0x1E) << 11) | (0x29 << 6) | ((Vm & 0x1) << 5) | (Vm >> 1)); + Write32(condition | (0xEB << 20) | ((Vd & 0x1) << 22) | ((Vd & 0x1E) << 11) \ + | (0xAC << 4) | ((Vm & 0x1) << 5) | (Vm >> 1)); } else { if (double_reg) { - Write32(NO_COND | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x31 << 16) \ - | ((Vd & 0xF) << 12) | (0x2D << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF)); + Write32(condition | (0xEB << 20) | ((Vd & 0x10) << 18) | ((Vd & 0xF) << 12) \ + | (0xBC << 4) | ((Vm & 0x10) << 1) | (Vm & 0xF)); } else { - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use VNEG with Quad Reg without support!"); + _assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use VADD with Quad Reg without support!"); // XXX: TODO - } + } +} + +void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm) +{ + bool single_reg = Vd < D0; + + Vd = SubBase(Vd); + Vm = SubBase(Vm); + + if (single_reg) + { + Write32(condition | (0x1D << 23) | ((Vd & 0x1) << 22) | (0x31 << 16) \ + | ((Vd & 0x1E) << 11) | (0x29 << 6) | ((Vm & 0x1) << 5) | (Vm >> 1)); + } + else + { + Write32(condition | (0x1D << 23) | ((Vd & 0x10) << 18) | (0x31 << 16) \ + | ((Vd & 0xF) << 12) | (0x2D << 6) | ((Vm & 0x10) << 2) | (Vm & 0xF)); } } @@ -978,7 +1123,7 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src, bool high) Dest = SubBase(Dest); - Write32(NO_COND | (0xE << 24) | (high << 21) | ((Dest & 0xF) << 16) | (Src << 12) \ + Write32(condition | (0xE << 24) | (high << 21) | ((Dest & 0xF) << 16) | (Src << 12) \ | (11 << 8) | ((Dest & 0x10) << 3) | (1 << 4)); } @@ -992,7 +1137,7 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) { // Moving to a Neon register FROM ARM Reg Dest = (ARMReg)(Dest - S0); - Write32(NO_COND | (0xE0 << 20) | ((Dest & 0x1E) << 15) | (Src << 12) \ + Write32(condition | (0xE0 << 20) | ((Dest & 0x1E) << 15) | (Src << 12) \ | (0xA << 8) | ((Dest & 0x1) << 7) | (1 << 4)); return; } @@ -1012,7 +1157,7 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) { // Moving to ARM Reg from Neon Register Src = (ARMReg)(Src - S0); - Write32(NO_COND | (0xE1 << 20) | ((Src & 0x1E) << 15) | (Dest << 12) \ + Write32(condition | (0xE1 << 20) | ((Src & 0x1E) << 15) | (Dest << 12) \ | (0xA << 8) | ((Src & 0x1) << 7) | (1 << 4)); return; } @@ -1042,7 +1187,7 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) if (Single) { - Write32(NO_COND | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | ((Dest & 0x1E) << 11) \ + Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | ((Dest & 0x1E) << 11) \ | (0x5 << 9) | (1 << 6) | ((Src & 0x1) << 5) | ((Src & 0x1E) >> 1)); } else @@ -1059,10 +1204,28 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) } else { - Write32(NO_COND | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | ((Dest & 0xF) << 12) \ + Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | ((Dest & 0xF) << 12) \ | (0x2D << 6) | ((Src & 0x10) << 1) | (Src & 0xF)); } } } +void ARMXEmitter::VCVT(ARMReg Dest, ARMReg Source, int flags) +{ + bool single_reg = (Dest < D0) && (Source < D0); + int op = ((flags & TO_INT) ? (flags & ROUND_TO_ZERO) : (flags & IS_SIGNED)) ? 1 : 0; + int op2 = ((flags & TO_INT) ? (flags & IS_SIGNED) : 0) ? 1 : 0; + Dest = SubBase(Dest); + Source = SubBase(Source); + + if (single_reg) + { + Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \ + | ((Dest & 0x1E) << 11) | (op << 7) | (0x29 << 6) | ((Source & 0x1) << 5) | (Source >> 1)); + } else { + Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \ + | ((Dest & 0xF) << 12) | (1 << 8) | (op << 7) | (0x29 << 6) | ((Source & 0x10) << 1) | (Source & 0xF)); + } +} + } diff --git a/Source/Core/Common/Src/ArmEmitter.h b/Source/Core/Common/Src/ArmEmitter.h index d3155285f3..ce10008a37 100644 --- a/Source/Core/Common/Src/ArmEmitter.h +++ b/Source/Core/Common/Src/ArmEmitter.h @@ -25,6 +25,7 @@ #if defined(__SYMBIAN32__) || defined(PANDORA) #include #endif +#include #undef _IP #undef R0 @@ -32,6 +33,12 @@ #undef _LR #undef _PC +// VCVT flags +#define TO_FLOAT 0 +#define TO_INT 1 << 0 +#define IS_SIGNED 1 << 1 +#define ROUND_TO_ZERO 1 << 2 + namespace ArmGen { enum ARMReg @@ -171,7 +178,7 @@ public: Value = base; } - Operand2(u8 shift, ShiftType type, ARMReg base)// For IMM shifted register + Operand2(ARMReg base, ShiftType type, u8 shift)// For IMM shifted register { if(shift == 32) shift = 0; switch (type) @@ -327,6 +334,13 @@ struct FixupBranch int type; //0 = B 1 = BL }; +struct LiteralPool +{ + s32 loc; + u8* ldr_address; + u32 val; +}; + typedef const u8* JumpTarget; class ARMXEmitter @@ -336,8 +350,9 @@ private: u8 *code, *startcode; u8 *lastCacheFlushEnd; u32 condition; + std::vector currentLitPool; - void WriteStoreOp(u32 op, ARMReg dest, ARMReg src, s16 op2); + void WriteStoreOp(u32 op, ARMReg src, ARMReg dest, s16 op2); void WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList); void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, ARMReg op2); void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2); @@ -373,6 +388,10 @@ public: void FlushIcacheSection(u8 *start, u8 *end); u8 *GetWritableCodePtr(); + void FlushLitPool(); + void AddNewLit(u32 val); + + CCFlags GetCC() { return CCFlags(condition >> 28); } void SetCC(CCFlags cond = CC_AL); // Special purpose instructions @@ -425,8 +444,10 @@ public: void LSL (ARMReg dest, ARMReg src, ARMReg op2); void LSLS(ARMReg dest, ARMReg src, Operand2 op2); void LSLS(ARMReg dest, ARMReg src, ARMReg op2); + void LSR (ARMReg dest, ARMReg src, Operand2 op2); void SBC (ARMReg dest, ARMReg src, Operand2 op2); void SBCS(ARMReg dest, ARMReg src, Operand2 op2); + void RBIT(ARMReg dest, ARMReg src); void REV (ARMReg dest, ARMReg src); void REV16 (ARMReg dest, ARMReg src); void RSC (ARMReg dest, ARMReg src, Operand2 op2); @@ -457,33 +478,53 @@ public: void UMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); void SMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); + void UMLAL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); + void SMLAL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); + void SXTB(ARMReg dest, ARMReg op2); void SXTH(ARMReg dest, ARMReg op2, u8 rotation = 0); void SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation = 0); + void BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width); + void UBFX(ARMReg dest, ARMReg op2, u8 lsb, u8 width); + void CLZ(ARMReg rd, ARMReg rm); + // Using just MSR here messes with our defines on the PPC side of stuff (when this code was in dolphin...) // Just need to put an underscore here, bit annoying. void _MSR (bool nzcvq, bool g, Operand2 op2); - void _MSR (bool nzcvq, bool g, ARMReg src ); + void _MSR (bool nzcvq, bool g, ARMReg src); void MRS (ARMReg dest); // Memory load/store operations - void LDR (ARMReg dest, ARMReg src, s16 op2 = 0); + void LDR (ARMReg dest, ARMReg src, s16 op2 = 0); + void LDRH (ARMReg dest, ARMReg src, Operand2 op2 = 0); + void LDRSH(ARMReg dest, ARMReg src, s16 op2 = 0); + void LDRB (ARMReg dest, ARMReg src, s16 op2 = 0); + void LDRSB(ARMReg dest, ARMReg src, Operand2 op2 = 0); // Offset adds to the base register in LDR - void LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); - void LDRH(ARMReg dest, ARMReg src, Operand2 op = 0); - void LDRB(ARMReg dest, ARMReg src, s16 op2 = 0); - void STR (ARMReg dest, ARMReg src, s16 op2 = 0); - // Offset adds on to the destination register in STR - void STR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); + void LDR (ARMReg dest, ARMReg base, Operand2 op2, bool Index, bool Add); + void LDR (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); + void LDRH (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); + void LDRSH(ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); + void LDRB (ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); + void LDRSB(ARMReg dest, ARMReg base, ARMReg offset, bool Index, bool Add); + void LDRLIT(ARMReg dest, u32 offset, bool Add); + + void STR (ARMReg result, ARMReg base, s16 op2 = 0); + void STRH (ARMReg result, ARMReg base, Operand2 op2 = 0); + void STRB (ARMReg result, ARMReg base, s16 op2 = 0); + // Offset adds on to the destination register in STR + void STR (ARMReg result, ARMReg base, Operand2 op2, bool Index, bool Add); + void STR (ARMReg result, ARMReg base, ARMReg offset, bool Index, bool Add); + void STRH (ARMReg result, ARMReg base, ARMReg offset, bool Index, bool Add); + void STRB (ARMReg result, ARMReg base, ARMReg offset, bool Index, bool Add); - void STRB(ARMReg dest, ARMReg src, s16 op2 = 0); void STMFD(ARMReg dest, bool WriteBack, const int Regnum, ...); void LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...); // Exclusive Access operations void LDREX(ARMReg dest, ARMReg base); - // dest contains the result if the instruction managed to store the value - void STREX(ARMReg dest, ARMReg base, ARMReg op); + // result contains the result if the instruction managed to store the value + void STREX(ARMReg result, ARMReg base, ARMReg op); void DMB (); void SVC(Operand2 op); @@ -501,9 +542,9 @@ public: // VFP Only void VLDR(ARMReg Dest, ARMReg Base, s16 offset); void VSTR(ARMReg Src, ARMReg Base, s16 offset); - void VCMP(ARMReg Vd, ARMReg Vm); + void VCMP(ARMReg Vd, ARMReg Vm, bool E); // Compares against zero - void VCMP(ARMReg Vd); + void VCMP(ARMReg Vd, bool E); void VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VSQRT(ARMReg Vd, ARMReg Vm); @@ -513,13 +554,25 @@ public: void VABS(ARMReg Vd, ARMReg Vm); void VNEG(ARMReg Vd, ARMReg Vm); void VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm); + void VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm); void VMOV(ARMReg Dest, ARMReg Src, bool high); void VMOV(ARMReg Dest, ARMReg Src); + void VCVT(ARMReg Dest, ARMReg Src, int flags); + + void VMRS_APSR(); + void VMRS(ARMReg Rt); + void VMSR(ARMReg Rt); void QuickCallFunction(ARMReg scratchreg, void *func); - // Utility functions + + // Wrapper around MOVT/MOVW with fallbacks. void MOVI2R(ARMReg reg, u32 val, bool optimize = true); - void ARMABI_MOVI2M(Operand2 op, Operand2 val); + void MOVI2F(ARMReg dest, float val, ARMReg tempReg); + + void ANDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch); + void ORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch); + + }; // class ARMXEmitter @@ -556,7 +609,9 @@ public: // Call this when shutting down. Don't rely on the destructor, even though it'll do the job. void FreeCodeSpace() { +#ifndef __SYMBIAN32__ FreeMemoryPages(region, region_size); +#endif region = NULL; region_size = 0; } diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp index 8748ebac2d..45b17ab847 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/Jit.cpp @@ -148,20 +148,20 @@ void JitArm::DoDownCount() if(js.downcountAmount < 255) // We can enlarge this if we used rotations { SUBS(rB, rB, js.downcountAmount); - STR(rA, rB); + STR(rB, rA); } else { ARMReg rC = gpr.GetReg(false); MOVI2R(rC, js.downcountAmount); SUBS(rB, rB, rC); - STR(rA, rB); + STR(rB, rA); } gpr.Unlock(rA, rB); } void JitArm::WriteExitDestInR(ARMReg Reg) { - STR(R9, Reg, PPCSTATE_OFF(pc)); + STR(Reg, R9, PPCSTATE_OFF(pc)); Cleanup(); DoDownCount(); MOVI2R(Reg, (u32)asm_routines.dispatcher); @@ -170,7 +170,7 @@ void JitArm::WriteExitDestInR(ARMReg Reg) } void JitArm::WriteRfiExitDestInR(ARMReg Reg) { - STR(R9, Reg, PPCSTATE_OFF(pc)); + STR(Reg, R9, PPCSTATE_OFF(pc)); Cleanup(); DoDownCount(); @@ -209,7 +209,7 @@ void JitArm::WriteExit(u32 destination, int exit_num) { ARMReg A = gpr.GetReg(false); MOVI2R(A, destination); - STR(R9, A, PPCSTATE_OFF(pc)); + STR(A, R9, PPCSTATE_OFF(pc)); MOVI2R(A, (u32)asm_routines.dispatcher); B(A); } @@ -384,7 +384,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo LDR(A, R9, PPCSTATE_OFF(msr)); TST(A, Shift); FixupBranch b1 = B_CC(CC_NEQ); - STR(R9, C, PPCSTATE_OFF(pc)); + STR(C, R9, PPCSTATE_OFF(pc)); MOVI2R(A, (u32)asm_routines.fpException); B(A); SetJumpTarget(b1); @@ -397,7 +397,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo MOVI2R(rA, (u32)&b->runCount); // Load in to register LDR(rB, rA); // Load the actual value in to R11. ADD(rB, rB, 1); // Add one to the value - STR(rA, rB); // Now store it back in the memory location + STR(rB, rA); // Now store it back in the memory location // get start tic PROFILER_QUERY_PERFORMANCE_COUNTER(&b->ticStart); gpr.Unlock(rA, rB); @@ -461,7 +461,7 @@ const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlo MOVI2R(RA, (u32)&opinfo->runCount); LDR(RB, RA); ADD(RB, RB, 1); - STR(RA, RB); + STR(RB, RA); gpr.Unlock(RA, RB); } if (!ops[i].skip) diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArmCache.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArmCache.cpp index f95c2e58e6..690f56fdf6 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArmCache.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArmCache.cpp @@ -38,7 +38,7 @@ using namespace ArmGen; ARMXEmitter emit((u8 *)location); emit.MOVI2R(R11, address); emit.MOVI2R(R12, (u32)jit->GetAsmRoutines()->dispatcher); - emit.STR(R9, R11, PPCSTATE_OFF(pc)); + emit.STR(R11, R9, PPCSTATE_OFF(pc)); emit.B(R12); } diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp index 3aa972248d..8163415d8b 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_Branch.cpp @@ -52,7 +52,7 @@ void JitArm::sc(UGeckoInstruction inst) ARMReg rA = gpr.GetReg(); LDR(rA, R9, PPCSTATE_OFF(Exceptions)); ORR(rA, rA, EXCEPTION_SYSCALL); - STR(R9, rA, PPCSTATE_OFF(Exceptions)); + STR(rA, R9, PPCSTATE_OFF(Exceptions)); gpr.Unlock(rA); WriteExceptionExit(); @@ -84,14 +84,14 @@ void JitArm::rfi(UGeckoInstruction inst) LDR(rD, R9, PPCSTATE_OFF(msr)); AND(rD, rD, rB); // rD = Masked MSR - STR(R9, rD, PPCSTATE_OFF(msr)); + STR(rD, R9, PPCSTATE_OFF(msr)); LDR(rB, R9, PPCSTATE_OFF(spr[SPR_SRR1])); // rB contains SRR1 here AND(rB, rB, rC); // rB contains masked SRR1 here ORR(rB, rD, rB); // rB = Masked MSR OR masked SRR1 - STR(R9, rB, PPCSTATE_OFF(msr)); // STR rB in to rA + STR(rB, R9, PPCSTATE_OFF(msr)); // STR rB in to rA LDR(rA, R9, PPCSTATE_OFF(spr[SPR_SRR0])); @@ -117,7 +117,7 @@ void JitArm::bx(UGeckoInstruction inst) ARMReg rA = gpr.GetReg(false); u32 Jumpto = js.compilerPC + 4; MOVI2R(rA, Jumpto); - STR(R9, rA, PPCSTATE_OFF(spr[SPR_LR])); + STR(rA, R9, PPCSTATE_OFF(spr[SPR_LR])); //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); } // If this is not the last instruction of a block, @@ -168,7 +168,7 @@ void JitArm::bcx(UGeckoInstruction inst) { LDR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR])); SUBS(rB, rB, 1); - STR(R9, rB, PPCSTATE_OFF(spr[SPR_CTR])); + STR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR])); //SUB(32, M(&CTR), Imm8(1)); if (inst.BO & BO_BRANCH_IF_CTR_0) @@ -193,7 +193,7 @@ void JitArm::bcx(UGeckoInstruction inst) { u32 Jumpto = js.compilerPC + 4; MOVI2R(rB, Jumpto); - STR(R9, rB, PPCSTATE_OFF(spr[SPR_LR])); + STR(rB, R9, PPCSTATE_OFF(spr[SPR_LR])); //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); // Careful, destroys R14, R12 } gpr.Unlock(rA, rB); @@ -235,7 +235,7 @@ void JitArm::bcctrx(UGeckoInstruction inst) { u32 Jumpto = js.compilerPC + 4; MOVI2R(rA, Jumpto); - STR(R9, rA, PPCSTATE_OFF(spr[SPR_LR])); + STR(rA, R9, PPCSTATE_OFF(spr[SPR_LR])); // ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); } MVN(rB, 0x3); // 0xFFFFFFFC @@ -269,7 +269,7 @@ void JitArm::bcctrx(UGeckoInstruction inst) if (inst.LK_3){ u32 Jumpto = js.compilerPC + 4; MOVI2R(rB, Jumpto); - STR(R9, rB, PPCSTATE_OFF(spr[SPR_LR])); + STR(rB, R9, PPCSTATE_OFF(spr[SPR_LR])); //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); } gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR @@ -290,7 +290,7 @@ void JitArm::bclrx(UGeckoInstruction inst) ARMReg rA = gpr.GetReg(false); u32 Jumpto = js.compilerPC + 4; MOVI2R(rA, Jumpto); - STR(R9, rA, PPCSTATE_OFF(spr[SPR_LR])); + STR(rA, R9, PPCSTATE_OFF(spr[SPR_LR])); // ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); } return; @@ -305,7 +305,7 @@ void JitArm::bclrx(UGeckoInstruction inst) { LDR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR])); SUBS(rB, rB, 1); - STR(R9, rB, PPCSTATE_OFF(spr[SPR_CTR])); + STR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR])); //SUB(32, M(&CTR), Imm8(1)); if (inst.BO & BO_BRANCH_IF_CTR_0) @@ -341,7 +341,7 @@ void JitArm::bclrx(UGeckoInstruction inst) if (inst.LK){ u32 Jumpto = js.compilerPC + 4; MOVI2R(rB, Jumpto); - STR(R9, rB, PPCSTATE_OFF(spr[SPR_LR])); + STR(rB, R9, PPCSTATE_OFF(spr[SPR_LR])); //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); } gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp index 9d248ebd02..1617b0fef6 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_LoadStore.cpp @@ -141,7 +141,7 @@ void JitArm::StoreFromReg(ARMReg dest, ARMReg value, int accessSize, s32 offset) switch (accessSize) { case 32: - STR(dest, value); // 8 + STR(value, dest); // 8 break; case 16: // Not implemented diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp index bbad7b780a..7eb8963e15 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitArm_SystemRegisters.cpp @@ -70,7 +70,7 @@ void JitArm::mtspr(UGeckoInstruction inst) } // OK, this is easy. - STR(R9, RD, PPCSTATE_OFF(spr) + iIndex * 4); + STR(RD, R9, PPCSTATE_OFF(spr) + iIndex * 4); } void JitArm::mfspr(UGeckoInstruction inst) @@ -99,6 +99,6 @@ void JitArm::mtmsr(UGeckoInstruction inst) // Don't interpret this, if we do we get thrown out //JITDISABLE(SystemRegisters) - STR(R9, gpr.R(inst.RS), PPCSTATE_OFF(msr)); + STR(gpr.R(inst.RS), R9, PPCSTATE_OFF(msr)); WriteExit(js.compilerPC + 4, 0); } diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.cpp index a5ae0a1e81..28263baa32 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitAsm.cpp @@ -102,10 +102,10 @@ void JitArmAsmRoutineManager::Generate() fpException = GetCodePtr(); LDR(R0, R9, PPCSTATE_OFF(Exceptions)); ORR(R0, R0, EXCEPTION_FPU_UNAVAILABLE); - STR(R9, R0, PPCSTATE_OFF(Exceptions)); + STR(R0, R9, PPCSTATE_OFF(Exceptions)); QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions); LDR(R0, R9, PPCSTATE_OFF(npc)); - STR(R9, R0, PPCSTATE_OFF(pc)); + STR(R0, R9, PPCSTATE_OFF(pc)); B(dispatcher); SetJumpTarget(bail); @@ -117,10 +117,10 @@ void JitArmAsmRoutineManager::Generate() // Does exception checking testExceptions = GetCodePtr(); LDR(R0, R9, PPCSTATE_OFF(pc)); - STR(R9, R0, PPCSTATE_OFF(npc)); + STR(R0, R9, PPCSTATE_OFF(npc)); QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions); LDR(R0, R9, PPCSTATE_OFF(npc)); - STR(R9, R0, PPCSTATE_OFF(pc)); + STR(R0, R9, PPCSTATE_OFF(pc)); // Check the state pointer to see if we are exiting // Gets checked on every exception check MOVI2R(R0, (u32)PowerPC::GetStatePtr()); diff --git a/Source/Core/Core/Src/PowerPC/JitArm32/JitRegCache.cpp b/Source/Core/Core/Src/PowerPC/JitArm32/JitRegCache.cpp index 18777fd8c9..2a9aa2154f 100644 --- a/Source/Core/Core/Src/PowerPC/JitArm32/JitRegCache.cpp +++ b/Source/Core/Core/Src/PowerPC/JitArm32/JitRegCache.cpp @@ -148,7 +148,7 @@ ARMReg ArmRegCache::R(u32 preg) return ArmCRegs[a].Reg; } // Alright, we couldn't get a free space, dump that least used register - emit->STR(R9, ArmCRegs[Num].Reg, PPCSTATE_OFF(gpr) + ArmCRegs[Num].PPCReg * 4); + emit->STR(ArmCRegs[Num].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[Num].PPCReg * 4); emit->LDR(ArmCRegs[Num].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4); ArmCRegs[Num].PPCReg = preg; ArmCRegs[Num].LastLoad = 0; @@ -160,7 +160,7 @@ void ArmRegCache::Flush() for(u8 a = 0; a < NUMPPCREG; ++a) if (ArmCRegs[a].PPCReg != 33) { - emit->STR(R9, ArmCRegs[a].Reg, PPCSTATE_OFF(gpr) + ArmCRegs[a].PPCReg * 4); + emit->STR(ArmCRegs[a].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[a].PPCReg * 4); ArmCRegs[a].PPCReg = 33; ArmCRegs[a].LastLoad = 0; }