diff --git a/CMakeLists.txt b/CMakeLists.txt index 36a3bb3e7a..7c4f4d440d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -159,11 +159,7 @@ if(NOT ENABLE_GENERIC) message(FATAL_ERROR "x86_32 is an unsupported platform. Enable generic build if you really want a JIT-less binary.") endif() elseif(${CMAKE_SYSTEM_PROCESSOR} MATCHES "^arm") - # This option only applies to 32bit ARM - set(_M_ARM 1) - set(_M_ARM_32 1) - add_definitions(-D_M_ARM=1 -D_M_ARM_32=1) - add_definitions(-marm -march=armv7-a) + message(FATAL_ERROR "ARMv7 is an unsupported platform. Enable generic build if you really want a JIT-less binary.") elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64") # This option only applies to 64bit ARM set(_M_ARM 1) diff --git a/Source/Android/app/build.gradle b/Source/Android/app/build.gradle index 56d8a88fbc..937bdcd538 100644 --- a/Source/Android/app/build.gradle +++ b/Source/Android/app/build.gradle @@ -52,16 +52,6 @@ android { // Define product flavors, which can be split into categories. Common examples // of product flavors are paid vs. free, ARM vs. x86, etc. productFlavors { - arm { - // This flavor is mutually exclusive against any flavor in the same dimension. - flavorDimension "abi" - - // When building this flavor, only include native libs from the specified folder. - ndk { - abiFilter "armeabi-v7a" - } - } - arm_64 { flavorDimension "abi" ndk { diff --git a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/settings/cpu/CPUSettingsFragment.java b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/settings/cpu/CPUSettingsFragment.java index a927d9824a..9dcc8776aa 100644 --- a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/settings/cpu/CPUSettingsFragment.java +++ b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/settings/cpu/CPUSettingsFragment.java @@ -42,11 +42,6 @@ public final class CPUSettingsFragment extends PreferenceFragment cpuCores.setEntries(R.array.emuCoreEntriesARM64); cpuCores.setEntryValues(R.array.emuCoreValuesARM64); } - else if (Build.CPU_ABI.contains("arm")) - { - cpuCores.setEntries(R.array.emuCoreEntriesARM); - cpuCores.setEntryValues(R.array.emuCoreValuesARM); - } else { cpuCores.setEntries(R.array.emuCoreEntriesOther); diff --git a/Source/Core/Common/ArmEmitter.cpp b/Source/Core/Common/ArmEmitter.cpp deleted file mode 100644 index e2d35c8d88..0000000000 --- a/Source/Core/Common/ArmEmitter.cpp +++ /dev/null @@ -1,2227 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include -#include - -#include "Common/ArmEmitter.h" -#include "Common/Common.h" -#include "Common/CPUDetect.h" - -// For cache flushing on Symbian/iOS/Blackberry -#ifdef __SYMBIAN32__ -#include -#endif - -#ifdef IOS -#include -#include -#endif - -#ifdef BLACKBERRY -#include -#endif - -namespace ArmGen -{ - -inline u32 RotR(u32 a, int amount) -{ - if (!amount) return a; - return (a >> amount) | (a << (32 - amount)); -} - -inline u32 RotL(u32 a, int amount) -{ - if (!amount) return a; - return (a << amount) | (a >> (32 - amount)); -} - -bool TryMakeOperand2(u32 imm, Operand2 &op2) -{ - // Just brute force it. - for (int i = 0; i < 16; i++) - { - int mask = RotR(0xFF, i * 2); - if ((imm & mask) == imm) - { - op2 = Operand2((u8)(RotL(imm, i * 2)), (u8)i); - return true; - } - } - - return false; -} - -bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse) -{ - if (!TryMakeOperand2(imm, op2)) - { - *inverse = true; - return TryMakeOperand2(~imm, op2); - } - else - { - *inverse = false; - return true; - } -} - -bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated) -{ - if (!TryMakeOperand2(imm, op2)) - { - *negated = true; - return TryMakeOperand2(-imm, op2); - } - else - { - *negated = false; - return true; - } -} - -Operand2 AssumeMakeOperand2(u32 imm) -{ - Operand2 op2; - bool result = TryMakeOperand2(imm, op2); - (void) result; - _assert_msg_(DYNA_REC, result, "Could not make assumed Operand2."); - return op2; -} - -bool ARMXEmitter::TrySetValue_TwoOp(ARMReg reg, u32 val) -{ - int ops = 0; - for (int i = 0; i < 16; i++) - { - if ((val >> (i*2)) & 0x3) - { - ops++; - i+=3; - } - } - if (ops > 2) - return false; - - bool first = true; - for (int i = 0; i < 16; i++, val >>=2) - { - if (val & 0x3) - { - first ? MOV(reg, Operand2((u8)val, (u8)((16-i) & 0xF))) - : ORR(reg, reg, Operand2((u8)val, (u8)((16-i) & 0xF))); - first = false; - i+=3; - val >>= 6; - } - } - return true; -} - -void ARMXEmitter::MOVI2F(ARMReg dest, float val, ARMReg tempReg, bool negate) -{ - union {float f; u32 u;} conv; - conv.f = negate ? -val : val; - // Try moving directly first if mantisse is empty - if (cpu_info.bVFPv3 && ((conv.u & 0x7FFFF) == 0)) - { - // VFP Encoding for Imms: <7> Not(<6>) Repeat(<6>,5) <5:0> Zeros(19) - bool bit6 = (conv.u & 0x40000000) == 0x40000000; - bool canEncode = true; - for (u32 mask = 0x20000000; mask >= 0x2000000; mask >>= 1) - { - if (((conv.u & mask) == mask) == bit6) - canEncode = false; - } - if (canEncode) - { - u32 imm8 = (conv.u & 0x80000000) >> 24; // sign bit - imm8 |= (!bit6 << 6); - imm8 |= (conv.u & 0x1F80000) >> 19; - VMOV(dest, IMM(imm8)); - return; - } - } - MOVI2R(tempReg, conv.u); - VMOV(dest, tempReg); - // Otherwise, possible to use a literal pool and VLDR directly (+- 1020) -} - -void ARMXEmitter::ADDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch) -{ - Operand2 op2; - bool negated; - if (TryMakeOperand2_AllowNegation(val, op2, &negated)) - { - if (!negated) - ADD(rd, rs, op2); - else - SUB(rd, rs, op2); - } - else - { - MOVI2R(scratch, val); - ADD(rd, rs, scratch); - } -} - -void ARMXEmitter::ANDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch) -{ - Operand2 op2; - bool inverse; - if (TryMakeOperand2_AllowInverse(val, op2, &inverse)) - { - if (!inverse) - { - AND(rd, rs, op2); - } - else - { - BIC(rd, rs, op2); - } - } - else - { - MOVI2R(scratch, val); - AND(rd, rs, scratch); - } -} - -void ARMXEmitter::CMPI2R(ARMReg rs, u32 val, ARMReg scratch) -{ - Operand2 op2; - bool negated; - if (TryMakeOperand2_AllowNegation(val, op2, &negated)) - { - if (!negated) - CMP(rs, op2); - else - CMN(rs, op2); - } - else - { - MOVI2R(scratch, val); - CMP(rs, scratch); - } -} - -void ARMXEmitter::ORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch) -{ - Operand2 op2; - if (TryMakeOperand2(val, op2)) - { - ORR(rd, rs, op2); - } - else - { - MOVI2R(scratch, val); - ORR(rd, rs, scratch); - } -} - -void ARMXEmitter::FlushLitPool() -{ - for (LiteralPool& pool : currentLitPool) - { - // Search for duplicates - for (LiteralPool& old_pool : currentLitPool) - { - if (old_pool.val == pool.val) - pool.loc = old_pool.loc; - } - - // Write the constant to Literal Pool - if (!pool.loc) - { - pool.loc = (s32)code; - Write32(pool.val); - } - s32 offset = pool.loc - (s32)pool.ldr_address - 8; - - // Backpatch the LDR - *(u32*)pool.ldr_address |= (offset >= 0) << 23 | abs(offset); - } - // TODO: Save a copy of previous pools in case they are still in range. - currentLitPool.clear(); -} - -void ARMXEmitter::AddNewLit(u32 val) -{ - LiteralPool pool_item; - pool_item.loc = 0; - pool_item.val = val; - pool_item.ldr_address = code; - currentLitPool.push_back(pool_item); -} - -void ARMXEmitter::MOVI2R(ARMReg reg, u32 val, bool optimize) -{ - Operand2 op2; - bool inverse; - - if (cpu_info.bArmV7 && !optimize) - { - // For backpatching on ARMv7 - MOVW(reg, val & 0xFFFF); - MOVT(reg, val, true); - } - else if (TryMakeOperand2_AllowInverse(val, op2, &inverse)) - { - inverse ? MVN(reg, op2) : MOV(reg, op2); - } - else - { - if (cpu_info.bArmV7) - { - // Use MOVW+MOVT for ARMv7+ - MOVW(reg, val & 0xFFFF); - if (val & 0xFFFF0000) - MOVT(reg, val, true); - } - else if (!TrySetValue_TwoOp(reg,val)) - { - // Use literal pool for ARMv6. - AddNewLit(val); - LDR(reg, _PC); // To be backpatched later - } - } -} - -void ARMXEmitter::QuickCallFunction(ARMReg reg, void *func) -{ - if (BLInRange(func)) - { - BL(func); - } - else - { - MOVI2R(reg, (u32)(func)); - BL(reg); - } -} - -void ARMXEmitter::SetCodePtr(u8 *ptr) -{ - code = ptr; - startcode = code; - lastCacheFlushEnd = ptr; -} - -const u8 *ARMXEmitter::GetCodePtr() const -{ - return code; -} - -u8 *ARMXEmitter::GetWritableCodePtr() -{ - return code; -} - -void ARMXEmitter::ReserveCodeSpace(u32 bytes) -{ - for (u32 i = 0; i < bytes/4; i++) - Write32(0xE1200070); //bkpt 0 -} - -const u8 *ARMXEmitter::AlignCode16() -{ - ReserveCodeSpace((-(s32)code) & 15); - return code; -} - -const u8 *ARMXEmitter::AlignCodePage() -{ - ReserveCodeSpace((-(s32)code) & 4095); - return code; -} - -void ARMXEmitter::FlushIcache() -{ - FlushIcacheSection(lastCacheFlushEnd, code); - lastCacheFlushEnd = code; -} - -void ARMXEmitter::FlushIcacheSection(u8 *start, u8 *end) -{ -#ifdef __SYMBIAN32__ - User::IMB_Range(start, end); -#elif defined(BLACKBERRY) - msync(start, end - start, MS_SYNC | MS_INVALIDATE_ICACHE); -#elif defined(IOS) - // Header file says this is equivalent to: sys_icache_invalidate(start, end - start); - sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start); -#elif !defined(_WIN32) -#ifdef __clang__ - __clear_cache(start, end); -#else - __builtin___clear_cache(start, end); -#endif -#endif -} - -void ARMXEmitter::SetCC(CCFlags cond) -{ - condition = cond << 28; -} - -void ARMXEmitter::NOP(int count) -{ - for (int i = 0; i < count; i++) - { - Write32(condition | 0x0320F000); - } -} - -void ARMXEmitter::SETEND(bool BE) -{ - //SETEND is non-conditional - Write32(0xF1010000 | (BE << 9)); -} -void ARMXEmitter::BKPT(u16 arg) -{ - Write32(condition | 0x01200070 | (arg << 4 & 0x000FFF00) | (arg & 0x0000000F)); -} -void ARMXEmitter::YIELD() -{ - Write32(condition | 0x0320F001); -} - -void ARMXEmitter::MRC(u32 coproc, u32 opc1, ARMReg Rt, u32 CRn, u32 CRm, u32 opc2) -{ - _assert_msg_(DYNA_REC, coproc <= 0xF, "%s has co-processor that is %d when it must be under 16!", __FUNCTION__, coproc); - _assert_msg_(DYNA_REC, opc1 <= 7, "%s has opc1 that is %d when it must be under 8!", __FUNCTION__, opc1); - _assert_msg_(DYNA_REC, CRn <= 0xF, "%s has CRn that is %d when it must be under 16!", __FUNCTION__, CRn); - _assert_msg_(DYNA_REC, opc2 <= 7, "%s has opc2 that is %d when it must be under 8!", __FUNCTION__, opc2); - - Write32(condition | (0b1110 << 24) | (opc1 << 21) | (1 << 20) | (CRn << 16) \ - | (Rt << 12) | (coproc << 8) | (opc2 << 5) | (1 << 4) | CRm); -} - -void ARMXEmitter::MCR(u32 coproc, u32 opc1, ARMReg Rt, u32 CRn, u32 CRm, u32 opc2) -{ - _assert_msg_(DYNA_REC, coproc <= 0xF, "%s has co-processor that is %d when it must be under 16!", __FUNCTION__, coproc); - _assert_msg_(DYNA_REC, opc1 <= 7, "%s has opc1 that is %d when it must be under 8!", __FUNCTION__, opc1); - _assert_msg_(DYNA_REC, CRn <= 0xF, "%s has CRn that is %d when it must be under 16!", __FUNCTION__, CRn); - _assert_msg_(DYNA_REC, opc2 <= 7, "%s has opc2 that is %d when it must be under 8!", __FUNCTION__, opc2); - - Write32(condition | (0b1110 << 24) | (opc1 << 21) | (CRn << 16) \ - | (Rt << 12) | (coproc << 8) | (opc2 << 5) | (1 << 4) | CRm); -} - -FixupBranch ARMXEmitter::B() -{ - FixupBranch branch; - branch.type = 0; // Zero for B - branch.ptr = code; - branch.condition = condition; - //We'll write NOP here for now. - Write32(condition | 0x0320F000); - return branch; -} -FixupBranch ARMXEmitter::BL() -{ - FixupBranch branch; - branch.type = 1; // Zero for B - branch.ptr = code; - branch.condition = condition; - //We'll write NOP here for now. - Write32(condition | 0x0320F000); - return branch; -} - -FixupBranch ARMXEmitter::B_CC(CCFlags Cond) -{ - FixupBranch branch; - branch.type = 0; // Zero for B - branch.ptr = code; - branch.condition = Cond << 28; - //We'll write NOP here for now. - Write32(condition | 0x0320F000); - return branch; -} -void ARMXEmitter::B_CC(CCFlags Cond, const void *fnptr) -{ - s32 distance = (s32)fnptr - (s32(code) + 8); - _assert_msg_(DYNA_REC, distance > -0x2000000 && distance <= 0x2000000, - "B_CC out of range (%p calls %p)", code, fnptr); - - Write32((Cond << 28) | 0x0A000000 | ((distance >> 2) & 0x00FFFFFF)); -} -FixupBranch ARMXEmitter::BL_CC(CCFlags Cond) -{ - FixupBranch branch; - branch.type = 1; // Zero for B - branch.ptr = code; - branch.condition = Cond << 28; - //We'll write NOP here for now. - Write32(condition | 0x0320F000); - return branch; -} -void ARMXEmitter::SetJumpTarget(FixupBranch const &branch) -{ - s32 distance = (s32(code) - 8) - (s32)branch.ptr; - _assert_msg_(DYNA_REC, distance > -0x2000000 && distance <= 0x2000000, - "SetJumpTarget out of range (%p calls %p)", code, branch.ptr); - u32 instr = (u32)(branch.condition | ((distance >> 2) & 0x00FFFFFF)); - instr |= (0 == branch.type) ? /* B */ 0x0A000000 : /* BL */ 0x0B000000; - *(u32*)branch.ptr = instr; -} -void ARMXEmitter::B(const void *fnptr) -{ - s32 distance = (s32)fnptr - (s32(code) + 8); - _assert_msg_(DYNA_REC, distance > -0x2000000 && distance <= 0x2000000, - "B out of range (%p calls %p)", code, fnptr); - - Write32(condition | 0x0A000000 | ((distance >> 2) & 0x00FFFFFF)); -} - -void ARMXEmitter::B(ARMReg src) -{ - Write32(condition | 0x12FFF10 | src); -} - -bool ARMXEmitter::BLInRange(const void *fnptr) -{ - s32 distance = (s32)fnptr - (s32(code) + 8); - if (distance <= -0x2000000 || distance > 0x2000000) - return false; - else - return true; -} - -void ARMXEmitter::BL(const void *fnptr) -{ - s32 distance = (s32)fnptr - (s32(code) + 8); - _assert_msg_(DYNA_REC, distance > -0x2000000 && distance <= 0x2000000, - "BL out of range (%p calls %p)", code, fnptr); - Write32(condition | 0x0B000000 | ((distance >> 2) & 0x00FFFFFF)); -} -void ARMXEmitter::BL(ARMReg src) -{ - Write32(condition | 0x12FFF30 | src); -} -void ARMXEmitter::PUSH(const int num, ...) -{ - u16 RegList = 0; - - va_list vl; - va_start(vl, num); - for (int i = 0; i < num; i++) - { - u8 Reg = va_arg(vl, u32); - RegList |= (1 << Reg); - } - va_end(vl); - - Write32(condition | (2349 << 16) | RegList); -} -void ARMXEmitter::POP(const int num, ...) -{ - u16 RegList = 0; - - va_list vl; - va_start(vl, num); - for (int i = 0; i < num; i++) - { - u8 Reg = va_arg(vl, u32); - RegList |= (1 << Reg); - } - va_end(vl); - - Write32(condition | (2237 << 16) | RegList); -} - -void ARMXEmitter::WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2) -{ - if (op2.GetType() == TYPE_REG) - Write32(condition | (13 << 21) | (SetFlags << 20) | (dest << 12) | (op2.GetData() << 8) | ((op + 1) << 4) | src); - else - Write32(condition | (13 << 21) | (SetFlags << 20) | (dest << 12) | op2.Imm5() | (op << 4) | src); -} - -// IMM, REG, IMMSREG, RSR -// -1 for invalid if the instruction doesn't support that -static const s32 InstOps[][4] = { - {16, 0, 0, 0}, // AND(s) - {17, 1, 1, 1}, // EOR(s) - {18, 2, 2, 2}, // SUB(s) - {19, 3, 3, 3}, // RSB(s) - {20, 4, 4, 4}, // ADD(s) - {21, 5, 5, 5}, // ADC(s) - {22, 6, 6, 6}, // SBC(s) - {23, 7, 7, 7}, // RSC(s) - {24, 8, 8, 8}, // TST - {25, 9, 9, 9}, // TEQ - {26, 10, 10, 10}, // CMP - {27, 11, 11, 11}, // CMN - {28, 12, 12, 12}, // ORR(s) - {29, 13, 13, 13}, // MOV(s) - {30, 14, 14, 14}, // BIC(s) - {31, 15, 15, 15}, // MVN(s) - {24, -1, -1, -1}, // MOVW - {26, -1, -1, -1}, // MOVT -}; - -static const char* InstNames[] = { - "AND", - "EOR", - "SUB", - "RSB", - "ADD", - "ADC", - "SBC", - "RSC", - "TST", - "TEQ", - "CMP", - "CMN", - "ORR", - "MOV", - "BIC", - "MVN" -}; - -void ARMXEmitter::AND (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(0, Rd, Rn, Rm); } -void ARMXEmitter::ANDS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(0, Rd, Rn, Rm, true); } -void ARMXEmitter::EOR (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(1, Rd, Rn, Rm); } -void ARMXEmitter::EORS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(1, Rd, Rn, Rm, true); } -void ARMXEmitter::SUB (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(2, Rd, Rn, Rm); } -void ARMXEmitter::SUBS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(2, Rd, Rn, Rm, true); } -void ARMXEmitter::RSB (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(3, Rd, Rn, Rm); } -void ARMXEmitter::RSBS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(3, Rd, Rn, Rm, true); } -void ARMXEmitter::ADD (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(4, Rd, Rn, Rm); } -void ARMXEmitter::ADDS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(4, Rd, Rn, Rm, true); } -void ARMXEmitter::ADC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(5, Rd, Rn, Rm); } -void ARMXEmitter::ADCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(5, Rd, Rn, Rm, true); } -void ARMXEmitter::SBC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(6, Rd, Rn, Rm); } -void ARMXEmitter::SBCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(6, Rd, Rn, Rm, true); } -void ARMXEmitter::RSC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(7, Rd, Rn, Rm); } -void ARMXEmitter::RSCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(7, Rd, Rn, Rm, true); } -void ARMXEmitter::TST ( ARMReg Rn, Operand2 Rm) { WriteInstruction(8, R0, Rn, Rm, true); } -void ARMXEmitter::TEQ ( ARMReg Rn, Operand2 Rm) { WriteInstruction(9, R0, Rn, Rm, true); } -void ARMXEmitter::CMP ( ARMReg Rn, Operand2 Rm) { WriteInstruction(10, R0, Rn, Rm, true); } -void ARMXEmitter::CMN ( ARMReg Rn, Operand2 Rm) { WriteInstruction(11, R0, Rn, Rm, true); } -void ARMXEmitter::ORR (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(12, Rd, Rn, Rm); } -void ARMXEmitter::ORRS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(12, Rd, Rn, Rm, true); } -void ARMXEmitter::MOV (ARMReg Rd, Operand2 Rm) { WriteInstruction(13, Rd, R0, Rm); } -void ARMXEmitter::MOVS(ARMReg Rd, Operand2 Rm) { WriteInstruction(13, Rd, R0, Rm, true); } -void ARMXEmitter::BIC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(14, Rd, Rn, Rm); } -void ARMXEmitter::BICS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(14, Rd, Rn, Rm, true); } -void ARMXEmitter::MVN (ARMReg Rd, Operand2 Rm) { WriteInstruction(15, Rd, R0, Rm); } -void ARMXEmitter::MVNS(ARMReg Rd, Operand2 Rm) { WriteInstruction(15, Rd, R0, Rm, true); } -void ARMXEmitter::MOVW(ARMReg Rd, Operand2 Rm) { WriteInstruction(16, Rd, R0, Rm); } -void ARMXEmitter::MOVT(ARMReg Rd, Operand2 Rm, bool TopBits) { WriteInstruction(17, Rd, R0, TopBits ? Rm.Value >> 16 : Rm); } - -void ARMXEmitter::WriteInstruction (u32 Op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags) // This can get renamed later -{ - s32 op = InstOps[Op][Rm.GetType()]; // Type always decided by last operand - u32 Data = Rm.GetData(); - if (Rm.GetType() == TYPE_IMM) - { - switch (Op) - { - // MOV cases that support IMM16 - case 16: - case 17: - Data = Rm.Imm16(); - break; - default: - break; - } - } - if (op == -1) - _assert_msg_(DYNA_REC, false, "%s not yet support %d", InstNames[Op], Rm.GetType()); - Write32(condition | (op << 21) | (SetFlags ? (1 << 20) : 0) | Rn << 16 | Rd << 12 | Data); -} - -// Data Operations -void ARMXEmitter::WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2) -{ - Write32(condition | (0x7 << 24) | (Op << 20) | (dest << 16) | (Op2 << 12) | (r1 << 8) | (Op3 << 5) | (1 << 4) | r2); -} -void ARMXEmitter::UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor) -{ - if (!cpu_info.bIDIVa) - PanicAlert("Trying to use integer divide on hardware that doesn't support it. Bad programmer."); - WriteSignedMultiply(3, 0xF, 0, dest, divisor, dividend); -} -void ARMXEmitter::SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor) -{ - if (!cpu_info.bIDIVa) - PanicAlert("Trying to use integer divide on hardware that doesn't support it. Bad programmer."); - WriteSignedMultiply(1, 0xF, 0, dest, divisor, dividend); -} -void ARMXEmitter::LSL (ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, false, dest, src, op2);} -void ARMXEmitter::LSLS(ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, true, dest, src, op2);} -void ARMXEmitter::LSR (ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(2, false, dest, src, op2);} -void ARMXEmitter::LSRS(ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(2, true, dest, src, op2);} -void ARMXEmitter::ASR (ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(4, false, dest, src, op2);} -void ARMXEmitter::ASRS(ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(4, true, dest, src, op2);} - -void ARMXEmitter::MUL (ARMReg dest, ARMReg src, ARMReg op2) -{ - Write32(condition | (dest << 16) | (src << 8) | (9 << 4) | op2); -} -void ARMXEmitter::MULS(ARMReg dest, ARMReg src, ARMReg op2) -{ - Write32(condition | (1 << 20) | (dest << 16) | (src << 8) | (9 << 4) | op2); -} - -void ARMXEmitter::Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) -{ - Write32(condition | (op << 20) | (destHi << 16) | (destLo << 12) | (rm << 8) | (9 << 4) | rn); -} - -void ARMXEmitter::UMULL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) -{ - Write4OpMultiply(0x8, destLo, destHi, rn, rm); -} - -void ARMXEmitter::UMULLS(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) -{ - Write4OpMultiply(0x9, destLo, destHi, rn, rm); -} - -void ARMXEmitter::SMULL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) -{ - Write4OpMultiply(0xC, destLo, destHi, rn, rm); -} - -void ARMXEmitter::UMLAL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) -{ - Write4OpMultiply(0xA, destLo, destHi, rn, rm); -} - -void ARMXEmitter::SMLAL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) -{ - Write4OpMultiply(0xE, destLo, destHi, rn, rm); -} - -void ARMXEmitter::UBFX(ARMReg dest, ARMReg rn, u8 lsb, u8 width) -{ - Write32(condition | (0x7E0 << 16) | ((width - 1) << 16) | (dest << 12) | (lsb << 7) | (5 << 4) | rn); -} - -void ARMXEmitter::CLZ(ARMReg rd, ARMReg rm) -{ - Write32(condition | (0x16F << 16) | (rd << 12) | (0xF1 << 4) | rm); -} - -void ARMXEmitter::BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width) -{ - u32 msb = (lsb + width - 1); - if (msb > 31) msb = 31; - Write32(condition | (0x7C0 << 16) | (msb << 16) | (rd << 12) | (lsb << 7) | (1 << 4) | rn); -} - -void ARMXEmitter::SXTB (ARMReg dest, ARMReg op2) -{ - Write32(condition | (0x6AF << 16) | (dest << 12) | (7 << 4) | op2); -} - -void ARMXEmitter::SXTH (ARMReg dest, ARMReg op2, u8 rotation) -{ - SXTAH(dest, (ARMReg)15, op2, rotation); -} -void ARMXEmitter::SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation) -{ - // bits ten and 11 are the rotation amount, see 8.8.232 for more - // information - Write32(condition | (0x6B << 20) | (src << 16) | (dest << 12) | (rotation << 10) | (7 << 4) | op2); -} -void ARMXEmitter::RBIT(ARMReg dest, ARMReg src) -{ - Write32(condition | (0x6F << 20) | (0xF << 16) | (dest << 12) | (0xF3 << 4) | src); -} -void ARMXEmitter::REV (ARMReg dest, ARMReg src) -{ - Write32(condition | (0x6BF << 16) | (dest << 12) | (0xF3 << 4) | src); -} -void ARMXEmitter::REV16(ARMReg dest, ARMReg src) -{ - Write32(condition | (0x6BF << 16) | (dest << 12) | (0xFB << 4) | src); -} - -void ARMXEmitter::_MSR (bool write_nzcvq, bool write_g, Operand2 op2) -{ - Write32(condition | (0x320F << 12) | (write_nzcvq << 19) | (write_g << 18) | op2.Imm12Mod()); -} -void ARMXEmitter::_MSR (bool write_nzcvq, bool write_g, ARMReg src) -{ - Write32(condition | (0x120F << 12) | (write_nzcvq << 19) | (write_g << 18) | src); -} -void ARMXEmitter::MRS (ARMReg dest) -{ - Write32(condition | (16 << 20) | (15 << 16) | (dest << 12)); -} -void ARMXEmitter::LDREX(ARMReg dest, ARMReg base) -{ - Write32(condition | (25 << 20) | (base << 16) | (dest << 12) | 0xF9F); -} -void ARMXEmitter::STREX(ARMReg result, ARMReg base, ARMReg op) -{ - _assert_msg_(DYNA_REC, (result != base && result != op), "STREX dest can't be other two registers"); - Write32(condition | (24 << 20) | (base << 16) | (result << 12) | (0xF9 << 4) | op); -} -void ARMXEmitter::DMB () -{ - Write32(0xF57FF05E); -} -void ARMXEmitter::SVC(Operand2 op) -{ - Write32(condition | (0x0F << 24) | op.Imm24()); -} - -// IMM, REG, IMMSREG, RSR -// -1 for invalid if the instruction doesn't support that -static const s32 LoadStoreOps[][4] = { - {0x40, 0x60, 0x60, -1}, // STR - {0x41, 0x61, 0x61, -1}, // LDR - {0x44, 0x64, 0x64, -1}, // STRB - {0x45, 0x65, 0x65, -1}, // LDRB - // Special encodings - { 0x4, 0x0, -1, -1}, // STRH - { 0x5, 0x1, -1, -1}, // LDRH - { 0x5, 0x1, -1, -1}, // LDRSB - { 0x5, 0x1, -1, -1}, // LDRSH -}; -static const char* LoadStoreNames[] = { - "STR", - "LDR", - "STRB", - "LDRB", - "STRH", - "LDRH", - "LDRSB", - "LDRSH", -}; - -void ARMXEmitter::WriteStoreOp(u32 Op, ARMReg Rt, ARMReg Rn, Operand2 Rm, bool RegAdd) -{ - s32 op = LoadStoreOps[Op][Rm.GetType()]; // Type always decided by last operand - u32 Data; - - // Qualcomm chipsets get /really/ angry if you don't use index, even if the offset is zero. - // Some of these encodings require Index at all times anyway. Doesn't really matter. - // bool Index = op2 != 0 ? true : false; - bool Index = true; - bool Add = false; - - // Special Encoding (misc addressing mode) - bool SpecialOp = false; - bool Half = false; - bool SignedLoad = false; - - if (op == -1) - _assert_msg_(DYNA_REC, false, "%s does not support %d", LoadStoreNames[Op], Rm.GetType()); - - switch (Op) - { - case 4: // STRH - SpecialOp = true; - Half = true; - SignedLoad = false; - break; - case 5: // LDRH - SpecialOp = true; - Half = true; - SignedLoad = false; - break; - case 6: // LDRSB - SpecialOp = true; - Half = false; - SignedLoad = true; - break; - case 7: // LDRSH - SpecialOp = true; - Half = true; - SignedLoad = true; - break; - } - switch (Rm.GetType()) - { - case TYPE_IMM: - { - s32 Temp = (s32)Rm.Value; - Data = abs(Temp); - // The offset is encoded differently on this one. - if (SpecialOp) - Data = ((Data & 0xF0) << 4) | (Data & 0xF); - if (Temp >= 0) Add = true; - } - break; - case TYPE_REG: - Data = Rm.GetData(); - Add = RegAdd; - break; - case TYPE_IMMSREG: - if (!SpecialOp) - { - Data = Rm.GetData(); - Add = RegAdd; - break; - } - // Intentional fallthrough: TYPE_IMMSREG not supported for misc addressing. - default: - // RSR not supported for any of these - // We already have the warning above - BKPT(0x2); - return; - break; - } - if (SpecialOp) - { - // Add SpecialOp things - Data = (0x9 << 4) | (SignedLoad << 6) | (Half << 5) | Data; - } - Write32(condition | (op << 20) | (Index << 24) | (Add << 23) | (Rn << 16) | (Rt << 12) | Data); -} - -void ARMXEmitter::LDR (ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(1, dest, base, op2, RegAdd);} -void ARMXEmitter::LDRB(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(3, dest, base, op2, RegAdd);} -void ARMXEmitter::LDRH(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(5, dest, base, op2, RegAdd);} -void ARMXEmitter::LDRSB(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(6, dest, base, op2, RegAdd);} -void ARMXEmitter::LDRSH(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(7, dest, base, op2, RegAdd);} -void ARMXEmitter::STR (ARMReg result, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(0, result, base, op2, RegAdd);} -void ARMXEmitter::STRH (ARMReg result, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(4, result, base, op2, RegAdd);} -void ARMXEmitter::STRB (ARMReg result, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(2, result, base, op2, RegAdd);} - -void ARMXEmitter::WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList) -{ - Write32(condition | (op << 20) | (WriteBack << 21) | (dest << 16) | RegList); -} -void ARMXEmitter::STMFD(ARMReg dest, bool WriteBack, const int Regnum, ...) -{ - u16 RegList = 0; - - va_list vl; - va_start(vl, Regnum); - for (int i = 0; i < Regnum; i++) - { - u8 Reg = va_arg(vl, u32); - RegList |= (1 << Reg); - } - va_end(vl); - - WriteRegStoreOp(0x90, dest, WriteBack, RegList); -} -void ARMXEmitter::LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...) -{ - u16 RegList = 0; - - va_list vl; - va_start(vl, Regnum); - for (int i = 0; i < Regnum; i++) - { - u8 Reg = va_arg(vl, u32); - RegList |= (1 << Reg); - } - va_end(vl); - - WriteRegStoreOp(0x89, dest, WriteBack, RegList); -} - -ARMReg SubBase(ARMReg Reg) -{ - if (Reg >= S0) - { - if (Reg >= D0) - { - if (Reg >= Q0) - return (ARMReg)((Reg - Q0) * 2); // Always gets encoded as a double register - return (ARMReg)(Reg - D0); - } - return (ARMReg)(Reg - S0); - } - return Reg; -} - -u32 EncodeVd(ARMReg Vd) -{ - bool quad_reg = Vd >= Q0; - bool double_reg = Vd >= D0; - - ARMReg Reg = SubBase(Vd); - - if (quad_reg) - return ((Reg & 0x10) << 18) | ((Reg & 0xF) << 12); - else - if (double_reg) - return ((Reg & 0x10) << 18) | ((Reg & 0xF) << 12); - else - return ((Reg & 0x1) << 22) | ((Reg & 0x1E) << 11); -} -u32 EncodeVn(ARMReg Vn) -{ - bool quad_reg = Vn >= Q0; - bool double_reg = Vn >= D0; - - ARMReg Reg = SubBase(Vn); - if (quad_reg) - return ((Reg & 0xF) << 16) | ((Reg & 0x10) << 3); - else - if (double_reg) - return ((Reg & 0xF) << 16) | ((Reg & 0x10) << 3); - else - return ((Reg & 0x1E) << 15) | ((Reg & 0x1) << 7); -} -u32 EncodeVm(ARMReg Vm) -{ - bool quad_reg = Vm >= Q0; - bool double_reg = Vm >= D0; - - ARMReg Reg = SubBase(Vm); - - if (quad_reg) - return ((Reg & 0x10) << 1) | (Reg & 0xF); - else - if (double_reg) - return ((Reg & 0x10) << 1) | (Reg & 0xF); - else - return ((Reg & 0x1) << 5) | (Reg >> 1); -} - -// Double/single, Neon -static const VFPEnc VFPOps[16][2] = { - {{0xE0, 0xA0}, { -1, -1}}, // 0: VMLA - {{0xE1, 0xA4}, { -1, -1}}, // 1: VNMLA - {{0xE0, 0xA4}, { -1, -1}}, // 2: VMLS - {{0xE1, 0xA0}, { -1, -1}}, // 3: VNMLS - {{0xE3, 0xA0}, { -1, -1}}, // 4: VADD - {{0xE3, 0xA4}, { -1, -1}}, // 5: VSUB - {{0xE2, 0xA0}, { -1, -1}}, // 6: VMUL - {{0xE2, 0xA4}, { -1, -1}}, // 7: VNMUL - {{0xEB, 0xAC}, { -1 /* 0x3B */, -1 /* 0x70 */}}, // 8: VABS(Vn(0x0) used for encoding) - {{0xE8, 0xA0}, { -1, -1}}, // 9: VDIV - {{0xEB, 0xA4}, { -1 /* 0x3B */, -1 /* 0x78 */}}, // 10: VNEG(Vn(0x1) used for encoding) - {{0xEB, 0xAC}, { -1, -1}}, // 11: VSQRT (Vn(0x1) used for encoding) - {{0xEB, 0xA4}, { -1, -1}}, // 12: VCMP (Vn(0x4 | #0 ? 1 : 0) used for encoding) - {{0xEB, 0xAC}, { -1, -1}}, // 13: VCMPE (Vn(0x4 | #0 ? 1 : 0) used for encoding) - {{ -1, -1}, {0x3B, 0x30}}, // 14: VABSi - }; - -static const char *VFPOpNames[16] = { - "VMLA", - "VNMLA", - "VMLS", - "VNMLS", - "VADD", - "VSUB", - "VMUL", - "VNMUL", - "VABS", - "VDIV", - "VNEG", - "VSQRT", - "VCMP", - "VCMPE", - "VABSi", -}; - -void ARMXEmitter::WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - bool quad_reg = Vd >= Q0; - bool double_reg = Vd >= D0 && Vd < Q0; - - VFPEnc enc = VFPOps[Op][quad_reg]; - if (enc.opc1 == -1 && enc.opc2 == -1) - _assert_msg_(DYNA_REC, false, "%s does not support %s", VFPOpNames[Op], quad_reg ? "NEON" : "VFP"); - u32 VdEnc = EncodeVd(Vd); - u32 VnEnc = EncodeVn(Vn); - u32 VmEnc = EncodeVm(Vm); - u32 cond = quad_reg ? (0xF << 28) : condition; - - Write32(cond | (enc.opc1 << 20) | VnEnc | VdEnc | (enc.opc2 << 4) | (quad_reg << 6) | (double_reg << 8) | VmEnc); -} -void ARMXEmitter::WriteVFPDataOp6bit(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm, u32 bit6) -{ - bool quad_reg = Vd >= Q0; - bool double_reg = Vd >= D0 && Vd < Q0; - - VFPEnc enc = VFPOps[Op][quad_reg]; - if (enc.opc1 == -1 && enc.opc2 == -1) - _assert_msg_(DYNA_REC, false, "%s does not support %s", VFPOpNames[Op], quad_reg ? "NEON" : "VFP"); - u32 VdEnc = EncodeVd(Vd); - u32 VnEnc = EncodeVn(Vn); - u32 VmEnc = EncodeVm(Vm); - u32 cond = quad_reg ? (0xF << 28) : condition; - - Write32(cond | (enc.opc1 << 20) | VnEnc | VdEnc | (enc.opc2 << 4) | (bit6 << 6) | (double_reg << 8) | VmEnc); -} - -void ARMXEmitter::VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(0, Vd, Vn, Vm); } -void ARMXEmitter::VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(1, Vd, Vn, Vm); } -void ARMXEmitter::VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(2, Vd, Vn, Vm); } -void ARMXEmitter::VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(3, Vd, Vn, Vm); } -void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(4, Vd, Vn, Vm); } -void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(5, Vd, Vn, Vm); } -void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(6, Vd, Vn, Vm); } -void ARMXEmitter::VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(7, Vd, Vn, Vm); } -void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(8, Vd, D0, Vm); } -void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(9, Vd, Vn, Vm); } -void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(10, Vd, D1, Vm); } -void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp6bit(11, Vd, D1, Vm, 3); } -void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp6bit(12, Vd, D4, Vm, 1); } -void ARMXEmitter::VCMPE(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp6bit(13, Vd, D4, Vm, 1); } -void ARMXEmitter::VCMP(ARMReg Vd){ WriteVFPDataOp6bit(12, Vd, D5, D0, 1); } -void ARMXEmitter::VCMPE(ARMReg Vd){ WriteVFPDataOp6bit(13, Vd, D5, D0, 1); } - -void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, s16 offset) -{ - _assert_msg_(DYNA_REC, Dest >= S0 && Dest <= D31, "Passed Invalid dest register to VLDR"); - _assert_msg_(DYNA_REC, Base <= R15, "Passed invalid Base register to VLDR"); - - bool Add = offset >= 0 ? true : false; - u32 imm = abs(offset); - - _assert_msg_(DYNA_REC, (imm & 0xC03) == 0, "VLDR: Offset needs to be word aligned and small enough"); - - if (imm & 0xC03) - ERROR_LOG(DYNA_REC, "VLDR: Bad offset %08x", imm); - - bool single_reg = Dest < D0; - - Dest = SubBase(Dest); - - if (single_reg) - { - Write32(condition | (0xD << 24) | (Add << 23) | ((Dest & 0x1) << 22) | (1 << 20) | (Base << 16) \ - | ((Dest & 0x1E) << 11) | (10 << 8) | (imm >> 2)); - } - else - { - Write32(condition | (0xD << 24) | (Add << 23) | ((Dest & 0x10) << 18) | (1 << 20) | (Base << 16) \ - | ((Dest & 0xF) << 12) | (11 << 8) | (imm >> 2)); - } -} -void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, s16 offset) -{ - _assert_msg_(DYNA_REC, Src >= S0 && Src <= D31, "Passed invalid src register to VSTR"); - _assert_msg_(DYNA_REC, Base <= R15, "Passed invalid base register to VSTR"); - - bool Add = offset >= 0 ? true : false; - u32 imm = abs(offset); - - _assert_msg_(DYNA_REC, (imm & 0xC03) == 0, "VSTR: Offset needs to be word aligned and small enough"); - - if (imm & 0xC03) - ERROR_LOG(DYNA_REC, "VSTR: Bad offset %08x", imm); - - bool single_reg = Src < D0; - - Src = SubBase(Src); - - if (single_reg) - { - Write32(condition | (0xD << 24) | (Add << 23) | ((Src & 0x1) << 22) | (Base << 16) \ - | ((Src & 0x1E) << 11) | (10 << 8) | (imm >> 2)); - } - else - { - Write32(condition | (0xD << 24) | (Add << 23) | ((Src & 0x10) << 18) | (Base << 16) \ - | ((Src & 0xF) << 12) | (11 << 8) | (imm >> 2)); - } -} - -void ARMXEmitter::VMRS(ARMReg Rt) -{ - Write32(condition | (0xEF << 20) | (1 << 16) | (Rt << 12) | 0xA10); -} - -void ARMXEmitter::VMSR(ARMReg Rt) -{ - Write32(condition | (0xEE << 20) | (1 << 16) | (Rt << 12) | 0xA10); -} - -// VFP and ASIMD -void ARMXEmitter::VMOV(ARMReg Dest, Operand2 op2) -{ - _assert_msg_(DYNA_REC, cpu_info.bVFPv3, "VMOV #imm requires VFPv3"); - bool double_reg = Dest >= D0; - Write32(condition | (0xEB << 20) | EncodeVd(Dest) | (0x5 << 9) | (double_reg << 8) | op2.Imm8VFP()); -} -void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src, bool high) -{ - _assert_msg_(DYNA_REC, Src < S0, "This VMOV doesn't support SRC other than ARM Reg"); - _assert_msg_(DYNA_REC, Dest >= D0, "This VMOV doesn't support DEST other than VFP"); - - Dest = SubBase(Dest); - - Write32(condition | (0xE << 24) | (high << 21) | ((Dest & 0xF) << 16) | (Src << 12) \ - | (0xB << 8) | ((Dest & 0x10) << 3) | (1 << 4)); -} - -void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src) -{ - if (Dest > R15) - { - if (Src < S0) - { - if (Dest < D0) - { - // Moving to a Neon register FROM ARM Reg - Dest = (ARMReg)(Dest - S0); - Write32(condition | (0xE0 << 20) | ((Dest & 0x1E) << 15) | (Src << 12) \ - | (0xA << 8) | ((Dest & 0x1) << 7) | (1 << 4)); - return; - } - else - { - // Move 64bit from Arm reg - ARMReg Src2 = (ARMReg)(Src + 1); - Dest = SubBase(Dest); - Write32(condition | (0xC4 << 20) | (Src2 << 16) | (Src << 12) \ - | (0xB << 8) | ((Dest & 0x10) << 1) | (1 << 4) | (Dest & 0xF)); - return; - } - } - } - else - { - if (Src > R15) - { - if (Src < D0) - { - // Moving to ARM Reg from Neon Register - Src = (ARMReg)(Src - S0); - Write32(condition | (0xE1 << 20) | ((Src & 0x1E) << 15) | (Dest << 12) \ - | (0xA << 8) | ((Src & 0x1) << 7) | (1 << 4)); - return; - } - else - { - // Move 64bit To Arm reg - ARMReg Dest2 = (ARMReg)(Dest + 1); - Src = SubBase(Src); - Write32(condition | (0xC5 << 20) | (Dest2 << 16) | (Dest << 12) \ - | (0xB << 8) | ((Dest & 0x10) << 1) | (1 << 4) | (Src & 0xF)); - return; - } - } - else - { - // Move Arm reg to Arm reg - _assert_msg_(DYNA_REC, false, "VMOV doesn't support moving ARM registers"); - } - } - // Moving NEON registers - int SrcSize = Src < D0 ? 1 : Src < Q0 ? 2 : 4; - (void) SrcSize; - int DestSize = Dest < D0 ? 1 : Dest < Q0 ? 2 : 4; - bool Single = DestSize == 1; - bool Quad = DestSize == 4; - - _assert_msg_(DYNA_REC, SrcSize == DestSize, "VMOV doesn't support moving different register sizes"); - - Dest = SubBase(Dest); - Src = SubBase(Src); - - if (Single) - { - Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | ((Dest & 0x1E) << 11) \ - | (0x5 << 9) | (1 << 6) | ((Src & 0x1) << 5) | ((Src & 0x1E) >> 1)); - } - else - { - // Double and quad - if (Quad) - { - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Trying to use quad registers when you don't support ASIMD."); - // Gets encoded as a Double register - Write32((0xF2 << 24) | ((Dest & 0x10) << 18) | (2 << 20) | ((Src & 0xF) << 16) \ - | ((Dest & 0xF) << 12) | (1 << 8) | ((Src & 0x10) << 3) | (1 << 6) \ - | ((Src & 0x10) << 1) | (1 << 4) | (Src & 0xF)); - - } - else - { - Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | ((Dest & 0xF) << 12) \ - | (0x2D << 6) | ((Src & 0x10) << 1) | (Src & 0xF)); - } - } -} - -void ARMXEmitter::VCVT(ARMReg Dest, ARMReg Source, int flags) -{ - bool single_reg = (Dest < D0) && (Source < D0); - bool single_double = !single_reg && (Source < D0 || Dest < D0); - bool single_to_double = Source < D0; - int op = ((flags & TO_INT) ? (flags & ROUND_TO_ZERO) : (flags & IS_SIGNED)) ? 1 : 0; - int op2 = ((flags & TO_INT) ? (flags & IS_SIGNED) : 0) ? 1 : 0; - Dest = SubBase(Dest); - Source = SubBase(Source); - - if (single_double) - { - // S32<->F64 - if ((flags & TO_INT) || (flags & TO_FLOAT)) - { - if (single_to_double) - { - Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x7 << 19) \ - | ((Dest & 0xF) << 12) | (op << 7) | (0x2D << 6) | ((Source & 0x1) << 5) | (Source >> 1)); - } - else - { - Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \ - | ((Dest & 0x1E) << 11) | (op << 7) | (0x2D << 6) | ((Source & 0x10) << 1) | (Source & 0xF)); - } - } - else // F32<->F64 - { - if (single_to_double) - { - Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | (0x7 << 16) \ - | ((Dest & 0xF) << 12) | (0x2B << 6) | ((Source & 0x1) << 5) | (Source >> 1)); - } - else - { - Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | (0x7 << 16) \ - | ((Dest & 0x1E) << 11) | (0x2F << 6) | ((Source & 0x10) << 1) | (Source & 0xF)); - } - } - } else if (single_reg) - { - Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \ - | ((Dest & 0x1E) << 11) | (op << 7) | (0x29 << 6) | ((Source & 0x1) << 5) | (Source >> 1)); - } - else - { - Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \ - | ((Dest & 0xF) << 12) | (1 << 8) | (op << 7) | (0x29 << 6) | ((Source & 0x10) << 1) | (Source & 0xF)); - } -} - -void NEONXEmitter::VABA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | EncodeVn(Vn) \ - | (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x71 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} - -void NEONXEmitter::VABAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, Vn >= D0 && Vn < Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | EncodeVn(Vn) \ - | (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x50 << 4) | EncodeVm(Vm)); -} - -void NEONXEmitter::VABD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - bool register_quad = Vd >= Q0; - - if (Size & F_32) - Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD << 8) | EncodeVm(Vm)); - else - Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | EncodeVn(Vn) \ - | (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x70 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} - -void NEONXEmitter::VABDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, Vn >= D0 && Vn < Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | EncodeVn(Vn) \ - | (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x70 << 4) | EncodeVm(Vm)); -} - -void NEONXEmitter::VABS(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (0xB1 << 16) | (encodedSize(Size) << 18) | EncodeVd(Vd) \ - | ((Size & F_32 ? 1 : 0) << 10) | (0x30 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} - -void NEONXEmitter::VACGE(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - // Only Float - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) \ - | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} - -void NEONXEmitter::VACGT(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - // Only Float - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) \ - | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} - -void NEONXEmitter::VACLE(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - VACGE(Vd, Vm, Vn); -} - -void NEONXEmitter::VACLT(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - VACGT(Vd, Vn, Vm); -} - -void NEONXEmitter::VADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - if (Size & F_32) - Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD0 << 4) | (register_quad << 6) | EncodeVm(Vm)); - else - Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \ - | (0x8 << 8) | (register_quad << 6) | EncodeVm(Vm)); -} - -void NEONXEmitter::VADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd < Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, Vn >= Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, Vm >= Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \ - | EncodeVd(Vd) | (0x80 << 4) | EncodeVm(Vm)); -} - -void NEONXEmitter::VADDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, Vn >= D0 && Vn < Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \ - | EncodeVd(Vd) | EncodeVm(Vm)); -} -void NEONXEmitter::VADDW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, Vn >= Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \ - | EncodeVd(Vd) | (1 << 8) | EncodeVm(Vm)); -} -void NEONXEmitter::VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (1 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VBIF(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (3 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VBIT(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (1 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VCEQ(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - if (Size & F_32) - Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm)); - else - Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \ - | (0x81 << 4) | (register_quad << 6) | EncodeVm(Vm)); - -} -void NEONXEmitter::VCEQ(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) \ - | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x10 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VCGE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - if (Size & F_32) - Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm)); - else - Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \ - | (0x31 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VCGE(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) \ - | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x8 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VCGT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - if (Size & F_32) - Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm)); - else - Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \ - | (0x30 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VCGT(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \ - | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VCLE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - VCGE(Size, Vd, Vm, Vn); -} -void NEONXEmitter::VCLE(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \ - | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (3 << 7) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VCLS(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - bool register_quad = Vd >= Q0; - Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \ - | EncodeVd(Vd) | (1 << 10) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VCLT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - VCGT(Size, Vd, Vm, Vn); -} -void NEONXEmitter::VCLT(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \ - | EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x20 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VCLZ(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \ - | EncodeVd(Vd) | (0x48 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VCNT(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, Size & I_8, "Can only use I_8 with %s", __FUNCTION__); - - bool register_quad = Vd >= Q0; - Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \ - | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VDUP(u32 Size, ARMReg Vd, ARMReg Vm, u8 index) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - u32 sizeEncoded = 0, indexEncoded = 0; - if (Size & I_8) - sizeEncoded = 1; - else if (Size & I_16) - sizeEncoded = 2; - else if (Size & I_32) - sizeEncoded = 4; - if (Size & I_8) - indexEncoded <<= 1; - else if (Size & I_16) - indexEncoded <<= 2; - else if (Size & I_32) - indexEncoded <<= 3; - Write32((0xF3 << 24) | (0xD << 20) | (sizeEncoded << 16) | (indexEncoded << 16) \ - | EncodeVd(Vd) | (0xC0 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VDUP(u32 Size, ARMReg Vd, ARMReg Rt) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, Rt < D0, "Pass invalid register to %s", __FUNCTION__); - - bool register_quad = Vd >= Q0; - Vd = SubBase(Vd); - u8 sizeEncoded = 0; - if (Size & I_8) - sizeEncoded = 2; - else if (Size & I_16) - sizeEncoded = 1; - else if (Size & I_32) - sizeEncoded = 0; - - Write32((0xEE << 24) | (0x8 << 20) | ((sizeEncoded & 2) << 21) | (register_quad << 21) \ - | ((Vd & 0xF) << 16) | (Rt << 12) | (0xD1 << 4) | ((Vd & 0x10) << 3) | (1 << 4)); -} -void NEONXEmitter::VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (0xB << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (index & 0xF) \ - | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VFMA(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bVFPv4, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xC1 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VFMS(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bVFPv4, "Can't use %s when CPU doesn't support it", __FUNCTION__); - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xC1 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 23) | (encodedSize(Size) << 20) \ - | EncodeVn(Vn) | EncodeVd(Vd) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VHSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 23) | (encodedSize(Size) << 20) \ - | EncodeVn(Vn) | EncodeVd(Vd) | (1 << 9) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - if (Size & F_32) - Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | (register_quad << 6) | EncodeVm(Vm)); - else - Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 23) | (encodedSize(Size) << 20) \ - | EncodeVn(Vn) | EncodeVd(Vd) | (0x60 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - if (Size & F_32) - Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | (register_quad << 6) | EncodeVm(Vm)); - else - Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 23) | (encodedSize(Size) << 20) \ - | EncodeVn(Vn) | EncodeVd(Vd) | (0x61 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VMLA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - if (Size & F_32) - Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm)); - else - Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VMLS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - if (Size & F_32) - Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm)); - else - Write32((0xF2 << 24) | (1 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, Vn >= Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) \ - | EncodeVn(Vn) | EncodeVd(Vd) | (0x80 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, Vn >= Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) \ - | EncodeVn(Vn) | EncodeVd(Vd) | (0xA0 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VMUL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - if (Size & F_32) - Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm)); - else - Write32((0xF2 << 24) | ((Size & I_POLYNOMIAL) ? (1 << 24) : 0) | (encodedSize(Size) << 20) | \ - EncodeVn(Vn) | EncodeVd(Vd) | (0x91 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0xC0 << 4) | ((Size & I_POLYNOMIAL) ? 1 << 9 : 0) | EncodeVm(Vm)); -} -void NEONXEmitter::VNEG(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) | \ - EncodeVd(Vd) | ((Size & F_32) ? 1 << 10 : 0) | (0xE << 6) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VORN(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (3 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VPADAL(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \ - (0x60 << 4) | ((Size & I_UNSIGNED) ? 1 << 7 : 0) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VPADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - if (Size & F_32) - Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD0 << 4) | EncodeVm(Vm)); - else - Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0xB1 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VPADDL(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \ - (0x20 << 4) | (Size & I_UNSIGNED ? 1 << 7 : 0) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VPMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - if (Size & F_32) - Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | EncodeVm(Vm)); - else - Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0xA0 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VPMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - if (Size & F_32) - Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | EncodeVm(Vm)); - else - Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0xA1 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VQABS(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \ - (0x70 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VQADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x1 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VQDMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x90 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VQDMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0xB0 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VQDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0xB0 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VQDMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0xD0 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VQNEG(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \ - (0x78 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VQRDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0xB0 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VQRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x51 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VQSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x41 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VQSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x21 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VRADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF3 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x40 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VRECPE(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (0xB << 20) | (0xB << 16) | EncodeVd(Vd) | \ - (0x40 << 4) | (Size & F_32 ? 1 << 8 : 0) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VRECPS(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF1 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VRHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x10 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x50 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VRSQRTE(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - Vd = SubBase(Vd); - Vm = SubBase(Vm); - - Write32((0xF3 << 24) | (0xB << 20) | ((Vd & 0x10) << 18) | (0xB << 16) - | ((Vd & 0xF) << 12) | (9 << 7) | (Size & F_32 ? (1 << 8) : 0) | (register_quad << 6) - | ((Vm & 0x10) << 1) | (Vm & 0xF)); -} -void NEONXEmitter::VRSQRTS(ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0xF1 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VRSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - Write32((0xF3 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x60 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - _assert_msg_(DYNA_REC, !(Size & F_32), "%s doesn't support float.", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x40 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - if (Size & F_32) - Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0xD0 << 4) | (register_quad << 6) | EncodeVm(Vm)); - else - Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x80 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - Write32((0xF2 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x60 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VSUBL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x20 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VSUBW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= Q0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x30 << 4) | EncodeVm(Vm)); -} -void NEONXEmitter::VSWP(ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (0xB << 20) | (1 << 17) | EncodeVd(Vd) | \ - (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VTRN(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \ - (1 << 7) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VTST(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \ - (0x81 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VUZP(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \ - (0x10 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VZIP(u32 Size, ARMReg Vd, ARMReg Vm) -{ - _assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to %s", __FUNCTION__); - _assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use %s when CPU doesn't support it", __FUNCTION__); - - bool register_quad = Vd >= Q0; - - Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \ - (0x18 << 4) | (register_quad << 6) | EncodeVm(Vm)); -} -void NEONXEmitter::VLD1(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align, ARMReg Rm) -{ - u32 spacing = 0x7; // Only support loading to 1 reg - // Gets encoded as a double register - Vd = SubBase(Vd); - - Write32((0xF4 << 24) | ((Vd & 0x10) << 18) | (1 << 21) | (Rn << 16) - | ((Vd & 0xF) << 12) | (spacing << 8) | (encodedSize(Size) << 6) - | (align << 4) | Rm); -} -void NEONXEmitter::VLD2(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align, ARMReg Rm) -{ - u32 spacing = 0x8; // Single spaced registers - // Gets encoded as a double register - Vd = SubBase(Vd); - - Write32((0xF4 << 24) | ((Vd & 0x10) << 18) | (1 << 21) | (Rn << 16) - | ((Vd & 0xF) << 12) | (spacing << 8) | (encodedSize(Size) << 6) - | (align << 4) | Rm); -} -void NEONXEmitter::VST1(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align, ARMReg Rm) -{ - u32 spacing = 0x7; // Single spaced registers - // Gets encoded as a double register - Vd = SubBase(Vd); - - Write32((0xF4 << 24) | ((Vd & 0x10) << 18) | (Rn << 16) - | ((Vd & 0xF) << 12) | (spacing << 8) | (encodedSize(Size) << 6) - | (align << 4) | Rm); -} - -void NEONXEmitter::VREVX(u32 size, u32 Size, ARMReg Vd, ARMReg Vm) -{ - bool register_quad = Vd >= Q0; - Vd = SubBase(Vd); - Vm = SubBase(Vm); - - Write32((0xF3 << 24) | (1 << 23) | ((Vd & 0x10) << 18) | (0x3 << 20) - | (encodedSize(Size) << 18) | ((Vd & 0xF) << 12) | (size << 7) - | (register_quad << 6) | ((Vm & 0x10) << 1) | (Vm & 0xF)); -} - -void NEONXEmitter::VREV64(u32 Size, ARMReg Vd, ARMReg Vm) -{ - VREVX(0, Size, Vd, Vm); -} - -void NEONXEmitter::VREV32(u32 Size, ARMReg Vd, ARMReg Vm) -{ - VREVX(1, Size, Vd, Vm); -} - -void NEONXEmitter::VREV16(u32 Size, ARMReg Vd, ARMReg Vm) -{ - VREVX(2, Size, Vd, Vm); -} -} - diff --git a/Source/Core/Common/ArmEmitter.h b/Source/Core/Common/ArmEmitter.h deleted file mode 100644 index 228700f706..0000000000 --- a/Source/Core/Common/ArmEmitter.h +++ /dev/null @@ -1,706 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -// WARNING - THIS LIBRARY IS NOT THREAD SAFE!!! - -#pragma once - -#include - -#include "Common/ArmCommon.h" -#include "Common/CodeBlock.h" -#include "Common/CommonTypes.h" - -#if defined(__SYMBIAN32__) || defined(PANDORA) -#include -#endif - -#undef _IP -#undef R0 -#undef _SP -#undef _LR -#undef _PC - -// VCVT flags -#define TO_FLOAT 0 -#define TO_INT 1 << 0 -#define IS_SIGNED 1 << 1 -#define ROUND_TO_ZERO 1 << 2 - -namespace ArmGen -{ -enum ARMReg -{ - // GPRs - R0 = 0, R1, R2, R3, R4, R5, - R6, R7, R8, R9, R10, R11, - - // SPRs - // R13 - R15 are SP, LR, and PC. - // Almost always referred to by name instead of register number - R12 = 12, R13 = 13, R14 = 14, R15 = 15, - _IP = 12, _SP = 13, _LR = 14, _PC = 15, - - - // VFP single precision registers - S0, S1, S2, S3, S4, S5, S6, - S7, S8, S9, S10, S11, S12, S13, - S14, S15, S16, S17, S18, S19, S20, - S21, S22, S23, S24, S25, S26, S27, - S28, S29, S30, S31, - - // VFP Double Precision registers - D0, D1, D2, D3, D4, D5, D6, D7, - D8, D9, D10, D11, D12, D13, D14, D15, - D16, D17, D18, D19, D20, D21, D22, D23, - D24, D25, D26, D27, D28, D29, D30, D31, - - // ASIMD Quad-Word registers - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, - Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15, - INVALID_REG = 0xFFFFFFFF -}; - -enum ShiftType -{ - ST_LSL = 0, - ST_ASL = 0, - ST_LSR = 1, - ST_ASR = 2, - ST_ROR = 3, - ST_RRX = 4 -}; - -enum -{ - NUMGPRs = 13, -}; - -class ARMXEmitter; - -enum OpType -{ - TYPE_IMM = 0, - TYPE_REG, - TYPE_IMMSREG, - TYPE_RSR, - TYPE_MEM -}; - -// This is no longer a proper operand2 class. Need to split up. -class Operand2 -{ - friend class ARMXEmitter; -protected: - u32 Value; - -private: - OpType Type; - - // IMM types - u8 Rotation; // Only for u8 values - - // Register types - u8 IndexOrShift; - ShiftType Shift; -public: - OpType GetType() - { - return Type; - } - Operand2() {} - Operand2(u32 imm, OpType type = TYPE_IMM) - { - Type = type; - Value = imm; - Rotation = 0; - } - - Operand2(ARMReg Reg) - { - Type = TYPE_REG; - Value = Reg; - Rotation = 0; - } - Operand2(u8 imm, u8 rotation) - { - Type = TYPE_IMM; - Value = imm; - Rotation = rotation; - } - Operand2(ARMReg base, ShiftType type, ARMReg shift) // RSR - { - Type = TYPE_RSR; - _assert_msg_(DYNA_REC, type != ST_RRX, "Invalid Operand2: RRX does not take a register shift amount"); - IndexOrShift = shift; - Shift = type; - Value = base; - } - - Operand2(ARMReg base, ShiftType type, u8 shift)// For IMM shifted register - { - if (shift == 32) shift = 0; - switch (type) - { - case ST_LSL: - _assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: LSL %u", shift); - break; - case ST_LSR: - _assert_msg_(DYNA_REC, shift <= 32, "Invalid Operand2: LSR %u", shift); - if (!shift) - type = ST_LSL; - if (shift == 32) - shift = 0; - break; - case ST_ASR: - _assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: LSR %u", shift); - if (!shift) - type = ST_LSL; - if (shift == 32) - shift = 0; - break; - case ST_ROR: - _assert_msg_(DYNA_REC, shift < 32, "Invalid Operand2: ROR %u", shift); - if (!shift) - type = ST_LSL; - break; - case ST_RRX: - _assert_msg_(DYNA_REC, shift == 0, "Invalid Operand2: RRX does not take an immediate shift amount"); - type = ST_ROR; - break; - } - IndexOrShift = shift; - Shift = type; - Value = base; - Type = TYPE_IMMSREG; - } - u32 GetData() - { - switch (Type) - { - case TYPE_IMM: - return Imm12Mod(); // This'll need to be changed later - case TYPE_REG: - return Rm(); - case TYPE_IMMSREG: - return IMMSR(); - case TYPE_RSR: - return RSR(); - default: - _assert_msg_(DYNA_REC, false, "GetData with Invalid Type"); - return 0; - } - } - u32 IMMSR() // IMM shifted register - { - _assert_msg_(DYNA_REC, Type == TYPE_IMMSREG, "IMMSR must be imm shifted register"); - return ((IndexOrShift & 0x1f) << 7 | (Shift << 5) | Value); - } - u32 RSR() // Register shifted register - { - _assert_msg_(DYNA_REC, Type == TYPE_RSR, "RSR must be RSR Of Course"); - return (IndexOrShift << 8) | (Shift << 5) | 0x10 | Value; - } - u32 Rm() - { - _assert_msg_(DYNA_REC, Type == TYPE_REG, "Rm must be with Reg"); - return Value; - } - - u32 Imm5() - { - _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm5 not IMM value"); - return ((Value & 0x0000001F) << 7); - } - u32 Imm8() - { - _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8Rot not IMM value"); - return Value & 0xFF; - } - u32 Imm8Rot() // IMM8 with Rotation - { - _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8Rot not IMM value"); - _assert_msg_(DYNA_REC, (Rotation & 0xE1) != 0, "Invalid Operand2: immediate rotation %u", Rotation); - return (1 << 25) | (Rotation << 7) | (Value & 0x000000FF); - } - u32 Imm12() - { - _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm12 not IMM"); - return (Value & 0x00000FFF); - } - - u32 Imm12Mod() - { - // This is a IMM12 with the top four bits being rotation and the - // bottom eight being a IMM. This is for instructions that need to - // expand a 8bit IMM to a 32bit value and gives you some rotation as - // well. - // Each rotation rotates to the right by 2 bits - _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm12Mod not IMM"); - return ((Rotation & 0xF) << 8) | (Value & 0xFF); - } - u32 Imm16() - { - _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm16 not IMM"); - return ( (Value & 0xF000) << 4) | (Value & 0x0FFF); - } - u32 Imm16Low() - { - return Imm16(); - } - u32 Imm16High() // Returns high 16bits - { - _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm16 not IMM"); - return ( ((Value >> 16) & 0xF000) << 4) | ((Value >> 16) & 0x0FFF); - } - u32 Imm24() - { - _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm16 not IMM"); - return (Value & 0x0FFFFFFF); - } - // NEON and ASIMD specific - u32 Imm8ASIMD() - { - _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8ASIMD not IMM"); - return ((Value & 0x80) << 17) | ((Value & 0x70) << 12) | (Value & 0xF); - } - u32 Imm8VFP() - { - _assert_msg_(DYNA_REC, (Type == TYPE_IMM), "Imm8VFP not IMM"); - return ((Value & 0xF0) << 12) | (Value & 0xF); - } -}; - -// Use these when you don't know if an imm can be represented as an operand2. -// This lets you generate both an optimal and a fallback solution by checking -// the return value, which will be false if these fail to find a Operand2 that -// represents your 32-bit imm value. -bool TryMakeOperand2(u32 imm, Operand2 &op2); -bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse); -bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated); - -// Use this only when you know imm can be made into an Operand2. -Operand2 AssumeMakeOperand2(u32 imm); - -inline Operand2 R(ARMReg Reg) { return Operand2(Reg, TYPE_REG); } -inline Operand2 IMM(u32 Imm) { return Operand2(Imm, TYPE_IMM); } -inline Operand2 Mem(void *ptr) { return Operand2((u32)ptr, TYPE_IMM); } -//usage: struct {int e;} s; STRUCT_OFFSET(s,e) -#define STRUCT_OFF(str,elem) ((u32)((u32)&(str).elem-(u32)&(str))) - - -struct FixupBranch -{ - u8 *ptr; - u32 condition; // Remembers our codition at the time - int type; //0 = B 1 = BL -}; - -struct LiteralPool -{ - s32 loc; - u8* ldr_address; - u32 val; -}; - -typedef const u8* JumpTarget; -// XXX: Stop polluting the global namespace -const u32 I_8 = (1 << 0); -const u32 I_16 = (1 << 1); -const u32 I_32 = (1 << 2); -const u32 I_64 = (1 << 3); -const u32 I_SIGNED = (1 << 4); -const u32 I_UNSIGNED = (1 << 5); -const u32 F_32 = (1 << 6); -const u32 I_POLYNOMIAL = (1 << 7); // Only used in VMUL/VMULL - -u32 EncodeVd(ARMReg Vd); -u32 EncodeVn(ARMReg Vn); -u32 EncodeVm(ARMReg Vm); -// Subtracts the base from the register to give us the real one -ARMReg SubBase(ARMReg Reg); - -class ARMXEmitter -{ - friend struct OpArg; // for Write8 etc - friend class NEONXEmitter; -private: - u8 *code, *startcode; - u8 *lastCacheFlushEnd; - u32 condition; - std::vector currentLitPool; - - void WriteStoreOp(u32 Op, ARMReg Rt, ARMReg Rn, Operand2 op2, bool RegAdd); - void WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList); - void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2); - void WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2); - - void WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void WriteVFPDataOp6bit(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm, u32 bit6); - - void Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); - - // New Ops - void WriteInstruction(u32 op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags = false); - -protected: - inline void Write32(u32 value) {*(u32*)code = value; code+=4;} - -public: - ARMXEmitter() : code(nullptr), startcode(nullptr), lastCacheFlushEnd(nullptr) { - condition = CC_AL << 28; - } - ARMXEmitter(u8* code_ptr) { - code = code_ptr; - lastCacheFlushEnd = code_ptr; - startcode = code_ptr; - condition = CC_AL << 28; - } - virtual ~ARMXEmitter() {} - - void SetCodePtr(u8 *ptr); - void ReserveCodeSpace(u32 bytes); - const u8 *AlignCode16(); - const u8 *AlignCodePage(); - const u8 *GetCodePtr() const; - void FlushIcache(); - void FlushIcacheSection(u8 *start, u8 *end); - u8 *GetWritableCodePtr(); - - void FlushLitPool(); - void AddNewLit(u32 val); - bool TrySetValue_TwoOp(ARMReg reg, u32 val); - - CCFlags GetCC() { return CCFlags(condition >> 28); } - void SetCC(CCFlags cond = CC_AL); - - // Special purpose instructions - - // Dynamic Endian Switching - void SETEND(bool BE); - // Debug Breakpoint - void BKPT(u16 arg); - - // Hint instruction - void YIELD(); - - // System - void MRC(u32 coproc, u32 opc1, ARMReg Rt, u32 CRn, u32 CRm, u32 opc2 = 0); - void MCR(u32 coproc, u32 opc1, ARMReg Rt, u32 CRn, u32 CRm, u32 opc2 = 0); - - // Do nothing - void NOP(int count = 1); //nop padding - TODO: fast nop slides, for AMD and Intel (check their manuals) - -#ifdef CALL -#undef CALL -#endif - - // Branching - FixupBranch B(); - FixupBranch B_CC(CCFlags Cond); - void B_CC(CCFlags Cond, const void *fnptr); - FixupBranch BL(); - FixupBranch BL_CC(CCFlags Cond); - void SetJumpTarget(FixupBranch const &branch); - - void B (const void *fnptr); - void B (ARMReg src); - void BL(const void *fnptr); - void BL(ARMReg src); - bool BLInRange(const void *fnptr); - - void PUSH(const int num, ...); - void POP(const int num, ...); - - // New Data Ops - void AND (ARMReg Rd, ARMReg Rn, Operand2 Rm); - void ANDS(ARMReg Rd, ARMReg Rn, Operand2 Rm); - void EOR (ARMReg dest, ARMReg src, Operand2 op2); - void EORS(ARMReg dest, ARMReg src, Operand2 op2); - void SUB (ARMReg dest, ARMReg src, Operand2 op2); - void SUBS(ARMReg dest, ARMReg src, Operand2 op2); - void RSB (ARMReg dest, ARMReg src, Operand2 op2); - void RSBS(ARMReg dest, ARMReg src, Operand2 op2); - void ADD (ARMReg dest, ARMReg src, Operand2 op2); - void ADDS(ARMReg dest, ARMReg src, Operand2 op2); - void ADC (ARMReg dest, ARMReg src, Operand2 op2); - void ADCS(ARMReg dest, ARMReg src, Operand2 op2); - void LSL (ARMReg dest, ARMReg src, Operand2 op2); - void LSLS(ARMReg dest, ARMReg src, Operand2 op2); - void LSR (ARMReg dest, ARMReg src, Operand2 op2); - void LSRS(ARMReg dest, ARMReg src, Operand2 op2); - void ASR (ARMReg dest, ARMReg src, Operand2 op2); - void ASRS(ARMReg dest, ARMReg src, Operand2 op2); - void SBC (ARMReg dest, ARMReg src, Operand2 op2); - void SBCS(ARMReg dest, ARMReg src, Operand2 op2); - void RBIT(ARMReg dest, ARMReg src); - void REV (ARMReg dest, ARMReg src); - void REV16 (ARMReg dest, ARMReg src); - void RSC (ARMReg dest, ARMReg src, Operand2 op2); - void RSCS(ARMReg dest, ARMReg src, Operand2 op2); - void TST ( ARMReg src, Operand2 op2); - void TEQ ( ARMReg src, Operand2 op2); - void CMP ( ARMReg src, Operand2 op2); - void CMN ( ARMReg src, Operand2 op2); - void ORR (ARMReg dest, ARMReg src, Operand2 op2); - void ORRS(ARMReg dest, ARMReg src, Operand2 op2); - void MOV (ARMReg dest, Operand2 op2); - void MOVS(ARMReg dest, Operand2 op2); - void BIC (ARMReg dest, ARMReg src, Operand2 op2); // BIC = ANDN - void BICS(ARMReg dest, ARMReg src, Operand2 op2); - void MVN (ARMReg dest, Operand2 op2); - void MVNS(ARMReg dest, Operand2 op2); - void MOVW(ARMReg dest, Operand2 op2); - void MOVT(ARMReg dest, Operand2 op2, bool TopBits = false); - - // UDIV and SDIV are only available on CPUs that have - // the idiva hardare capacity - void UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor); - void SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor); - - void MUL (ARMReg dest, ARMReg src, ARMReg op2); - void MULS(ARMReg dest, ARMReg src, ARMReg op2); - - void UMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); - void UMULLS(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); - void SMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); - - void UMLAL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); - void SMLAL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm); - - void SXTB(ARMReg dest, ARMReg op2); - void SXTH(ARMReg dest, ARMReg op2, u8 rotation = 0); - void SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation = 0); - void BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width); - void UBFX(ARMReg dest, ARMReg op2, u8 lsb, u8 width); - void CLZ(ARMReg rd, ARMReg rm); - - // Using just MSR here messes with our defines on the PPC side of stuff (when this code was in Dolphin...) - // Just need to put an underscore here, bit annoying. - void _MSR (bool nzcvq, bool g, Operand2 op2); - void _MSR (bool nzcvq, bool g, ARMReg src); - void MRS (ARMReg dest); - - // Memory load/store operations - void LDR (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true); - void LDRB (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true); - void LDRH (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true); - void LDRSB(ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true); - void LDRSH(ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true); - void STR (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true); - void STRB (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true); - void STRH (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true); - - void STMFD(ARMReg dest, bool WriteBack, const int Regnum, ...); - void LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...); - - // Exclusive Access operations - void LDREX(ARMReg dest, ARMReg base); - // result contains the result if the instruction managed to store the value - void STREX(ARMReg result, ARMReg base, ARMReg op); - void DMB (); - void SVC(Operand2 op); - - // NEON and ASIMD instructions - // None of these will be created with conditional since ARM - // is deprecating conditional execution of ASIMD instructions. - // ASIMD instructions don't even have a conditional encoding. - - // VFP Only - void VLDR(ARMReg Dest, ARMReg Base, s16 offset); - void VSTR(ARMReg Src, ARMReg Base, s16 offset); - void VCMP(ARMReg Vd, ARMReg Vm); - void VCMPE(ARMReg Vd, ARMReg Vm); - // Compares against zero - void VCMP(ARMReg Vd); - void VCMPE(ARMReg Vd); - - void VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VSQRT(ARMReg Vd, ARMReg Vm); - - // NEON and VFP - void VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VABS(ARMReg Vd, ARMReg Vm); - void VNEG(ARMReg Vd, ARMReg Vm); - void VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMOV(ARMReg Dest, Operand2 op2); - void VMOV(ARMReg Dest, ARMReg Src, bool high); - void VMOV(ARMReg Dest, ARMReg Src); - void VCVT(ARMReg Dest, ARMReg Src, int flags); - - void VMRS(ARMReg Rt); - void VMSR(ARMReg Rt); - - void QuickCallFunction(ARMReg scratchreg, void *func); - - // Wrapper around MOVT/MOVW with fallbacks. - void MOVI2R(ARMReg reg, u32 val, bool optimize = true); - void MOVI2F(ARMReg dest, float val, ARMReg tempReg, bool negate = false); - - void ADDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch); - void ANDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch); - void CMPI2R(ARMReg rs, u32 val, ARMReg scratch); - void ORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch); - - -}; // class ARMXEmitter - -enum NEONAlignment -{ - ALIGN_NONE = 0, - ALIGN_64 = 1, - ALIGN_128 = 2, - ALIGN_256 = 3 -}; - - -class NEONXEmitter -{ -private: - ARMXEmitter *_emit; - inline void Write32(u32 value) { _emit->Write32(value); } - - inline u32 encodedSize(u32 value) - { - if (value & I_8) - return 0; - else if (value & I_16) - return 1; - else if ((value & I_32) || (value & F_32)) - return 2; - else if (value & I_64) - return 3; - else - _dbg_assert_msg_(DYNA_REC, false, "Passed invalid size to integer NEON instruction"); - return 0; - } - - void VREVX(u32 size, u32 Size, ARMReg Vd, ARMReg Vm); - -public: - NEONXEmitter(ARMXEmitter *emit) - : _emit(emit) - {} - - void VABA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VABAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VABD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VABDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VABS(u32 Size, ARMReg Vd, ARMReg Vm); - void VACGE(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VACGT(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VACLE(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VACLT(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VADDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VADDW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VBIF(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VBIT(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VCEQ(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VCEQ(u32 Size, ARMReg Vd, ARMReg Vm); - void VCGE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VCGE(u32 Size, ARMReg Vd, ARMReg Vm); - void VCGT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VCGT(u32 Size, ARMReg Vd, ARMReg Vm); - void VCLE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VCLE(u32 Size, ARMReg Vd, ARMReg Vm); - void VCLS(u32 Size, ARMReg Vd, ARMReg Vm); - void VCLT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VCLT(u32 Size, ARMReg Vd, ARMReg Vm); - void VCLZ(u32 Size, ARMReg Vd, ARMReg Vm); - void VCNT(u32 Size, ARMReg Vd, ARMReg Vm); - void VDUP(u32 Size, ARMReg Vd, ARMReg Vm, u8 index); - void VDUP(u32 Size, ARMReg Vd, ARMReg Rt); - void VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index); - void VFMA(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VFMS(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VHSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMLA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMLS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMUL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VNEG(u32 Size, ARMReg Vd, ARMReg Vm); - void VORN(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VPADAL(u32 Size, ARMReg Vd, ARMReg Vm); - void VPADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VPADDL(u32 Size, ARMReg Vd, ARMReg Vm); - void VPMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VPMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQABS(u32 Size, ARMReg Vd, ARMReg Vm); - void VQADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQDMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQDMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQDMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQNEG(u32 Size, ARMReg Vd, ARMReg Vm); - void VQRDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VQSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VRADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VRECPE(u32 Size, ARMReg Vd, ARMReg Vm); - void VRECPS(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VRHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VRSQRTE(u32 Size, ARMReg Vd, ARMReg Vm); - void VRSQRTS(ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VRSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VSUBL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VSUBW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VSWP(ARMReg Vd, ARMReg Vm); - void VTRN(u32 Size, ARMReg Vd, ARMReg Vm); - void VTST(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm); - void VUZP(u32 Size, ARMReg Vd, ARMReg Vm); - void VZIP(u32 Size, ARMReg Vd, ARMReg Vm); - void VREV64(u32 Size, ARMReg Vd, ARMReg Vm); - void VREV32(u32 Size, ARMReg Vd, ARMReg Vm); - void VREV16(u32 Size, ARMReg Vd, ARMReg Vm); - - void VLD1(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align = ALIGN_NONE, ARMReg Rm = _PC); - void VLD2(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align = ALIGN_NONE, ARMReg Rm = _PC); - - void VST1(u32 Size, ARMReg Vd, ARMReg Rn, NEONAlignment align = ALIGN_NONE, ARMReg Rm = _PC); -}; - -class ARMCodeBlock : public CodeBlock -{ -private: - void PoisonMemory() override - { - u32* ptr = (u32*)region; - u32* maxptr = (u32*)(region + region_size); - // If our memory isn't a multiple of u32 then this won't write the last remaining bytes with anything - // Less than optimal, but there would be nothing we could do but throw a runtime warning anyway. - // ARM: 0x01200070 = BKPT 0 - while (ptr < maxptr) - *ptr++ = 0x01200070; - } -}; - -// VFP Specific -struct VFPEnc { - s16 opc1; - s16 opc2; -}; -} // namespace diff --git a/Source/Core/Common/CMakeLists.txt b/Source/Core/Common/CMakeLists.txt index 6c3b6ae79b..ff996887a0 100644 --- a/Source/Core/Common/CMakeLists.txt +++ b/Source/Core/Common/CMakeLists.txt @@ -36,17 +36,11 @@ set(SRCS BreakPoints.cpp Logging/LogManager.cpp) set(LIBS enet) -if(_M_ARM) - if (_M_ARM_32) #ARMv7 - set(SRCS ${SRCS} - ArmEmitter.cpp) - else() #AArch64 - set(SRCS ${SRCS} - Arm64Emitter.cpp) - endif() +if(_M_ARM_64) set(SRCS ${SRCS} - ArmCPUDetect.cpp - GenericFPURoundMode.cpp) + Arm64Emitter.cpp + ArmCPUDetect.cpp + GenericFPURoundMode.cpp) else() if(_M_X86) #X86 set(SRCS ${SRCS} diff --git a/Source/Core/Common/CommonFuncs.h b/Source/Core/Common/CommonFuncs.h index 2804b8c80f..ba398ff51d 100644 --- a/Source/Core/Common/CommonFuncs.h +++ b/Source/Core/Common/CommonFuncs.h @@ -133,10 +133,6 @@ inline u32 swap24(const u8* _data) {return (_data[0] << 16) | (_data[1] << 8) | inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);} inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);} inline u64 swap64(u64 _data) {return _byteswap_uint64(_data);} -#elif _M_ARM_32 -inline u16 swap16 (u16 _data) { u32 data = _data; __asm__ ("rev16 %0, %1\n" : "=l" (data) : "l" (data)); return (u16)data;} -inline u32 swap32 (u32 _data) {__asm__ ("rev %0, %1\n" : "=l" (_data) : "l" (_data)); return _data;} -inline u64 swap64(u64 _data) {return ((u64)swap32(_data) << 32) | swap32(_data >> 32);} #elif __linux__ && !(ANDROID && _M_ARM_64) // Android NDK r10c has broken builtin byte swap routines // Disabled for now. diff --git a/Source/Core/Core/CMakeLists.txt b/Source/Core/Core/CMakeLists.txt index 94a8e3380a..da3dcfcff3 100644 --- a/Source/Core/Core/CMakeLists.txt +++ b/Source/Core/Core/CMakeLists.txt @@ -204,24 +204,6 @@ if(_M_X86) PowerPC/JitCommon/JitBackpatch.cpp PowerPC/JitCommon/Jit_Util.cpp PowerPC/JitCommon/TrampolineCache.cpp) -elseif(_M_ARM_32) - set(SRCS ${SRCS} - PowerPC/JitArm32/Jit.cpp - PowerPC/JitArm32/JitAsm.cpp - PowerPC/JitArm32/JitArm_BackPatch.cpp - PowerPC/JitArm32/JitArm_Tables.cpp - PowerPC/JitArm32/JitArmCache.cpp - PowerPC/JitArm32/JitRegCache.cpp - PowerPC/JitArm32/JitFPRCache.cpp - PowerPC/JitArm32/JitArm_Branch.cpp - PowerPC/JitArm32/JitArm_Integer.cpp - PowerPC/JitArm32/JitArm_LoadStore.cpp - PowerPC/JitArm32/JitArm_FloatingPoint.cpp - PowerPC/JitArm32/JitArm_Paired.cpp - PowerPC/JitArm32/JitArm_LoadStorePaired.cpp - PowerPC/JitArm32/JitArm_SystemRegisters.cpp - PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp - ) elseif(_M_ARM_64) set(SRCS ${SRCS} PowerPC/JitArm64/Jit.cpp diff --git a/Source/Core/Core/ConfigManager.cpp b/Source/Core/Core/ConfigManager.cpp index feddb8dda1..9439e3a228 100644 --- a/Source/Core/Core/ConfigManager.cpp +++ b/Source/Core/Core/ConfigManager.cpp @@ -418,8 +418,6 @@ void SConfig::LoadCoreSettings(IniFile& ini) core->Get("HLE_BS2", &m_LocalCoreStartupParameter.bHLE_BS2, false); #ifdef _M_X86 core->Get("CPUCore", &m_LocalCoreStartupParameter.iCPUCore, PowerPC::CORE_JIT64); -#elif _M_ARM_32 - core->Get("CPUCore", &m_LocalCoreStartupParameter.iCPUCore, PowerPC::CORE_JITARM); #elif _M_ARM_64 core->Get("CPUCore", &m_LocalCoreStartupParameter.iCPUCore, PowerPC::CORE_JITARM64); #else diff --git a/Source/Core/Core/MachineContext.h b/Source/Core/Core/MachineContext.h index 53eaac74b5..75305f97e8 100644 --- a/Source/Core/Core/MachineContext.h +++ b/Source/Core/Core/MachineContext.h @@ -113,9 +113,6 @@ #define CTX_REG(x) regs[x] #define CTX_SP sp #define CTX_PC pc - #elif _M_ARM_32 - // Add others if required. - #define CTX_PC arm_pc #else #warning No context definition for OS #endif diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp b/Source/Core/Core/PowerPC/JitArm32/Jit.cpp deleted file mode 100644 index 7ec6cf3381..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.cpp +++ /dev/null @@ -1,513 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Common/ArmEmitter.h" -#include "Common/CommonTypes.h" - -#include "Core/ConfigManager.h" -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/PatchEngine.h" -#include "Core/HLE/HLE.h" -#include "Core/HW/GPFifo.h" -#include "Core/HW/Memmap.h" -#include "Core/PowerPC/JitInterface.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/PPCAnalyst.h" -#include "Core/PowerPC/PPCTables.h" -#include "Core/PowerPC/Profiler.h" -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitArm_Tables.h" - -using namespace ArmGen; - -void JitArm::Init() -{ - AllocCodeSpace(CODE_SIZE); - blocks.Init(); - asm_routines.Init(); - gpr.Init(this); - fpr.Init(this); - jo.enableBlocklink = true; - jo.optimizeGatherPipe = true; - UpdateMemoryOptions(); - - code_block.m_stats = &js.st; - code_block.m_gpa = &js.gpa; - code_block.m_fpa = &js.fpa; - analyzer.SetOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE); - InitBackpatch(); - - // Disable all loadstores - // Ever since the MMU has been optimized for x86, loadstores on ARMv7 have been knackered - // XXX: Investigate exactly why these are broken - SConfig::GetInstance().m_LocalCoreStartupParameter.bJITLoadStoreOff = true; - SConfig::GetInstance().m_LocalCoreStartupParameter.bJITLoadStoreFloatingOff = true; - SConfig::GetInstance().m_LocalCoreStartupParameter.bJITLoadStorePairedOff= true; -} - -void JitArm::ClearCache() -{ - ClearCodeSpace(); - blocks.Clear(); - UpdateMemoryOptions(); -} - -void JitArm::Shutdown() -{ - FreeCodeSpace(); - blocks.Shutdown(); - asm_routines.Shutdown(); -} - -// This is only called by FallBackToInterpreter() in this file. It will execute an instruction with the interpreter functions. -void JitArm::WriteCallInterpreter(UGeckoInstruction inst) -{ - gpr.Flush(); - fpr.Flush(); - Interpreter::_interpreterInstruction instr = GetInterpreterOp(inst); - MOVI2R(R0, inst.hex); - MOVI2R(R12, (u32)instr); - BL(R12); -} - -void JitArm::FallBackToInterpreter(UGeckoInstruction _inst) -{ - WriteCallInterpreter(_inst.hex); -} - -void JitArm::HLEFunction(UGeckoInstruction _inst) -{ - gpr.Flush(); - fpr.Flush(); - MOVI2R(R0, js.compilerPC); - MOVI2R(R1, _inst.hex); - QuickCallFunction(R14, (void*)&HLE::Execute); - ARMReg rA = gpr.GetReg(); - LDR(rA, R9, PPCSTATE_OFF(npc)); - WriteExitDestInR(rA); -} - -void JitArm::DoNothing(UGeckoInstruction _inst) -{ - // Yup, just don't do anything. -} - -static const bool ImHereDebug = false; -static const bool ImHereLog = false; -static std::map been_here; - -static void ImHere() -{ - static File::IOFile f; - if (ImHereLog) - { - if (!f) - { - f.Open("log32.txt", "w"); - } - fprintf(f.GetHandle(), "%08x\n", PC); - } - - if (been_here.find(PC) != been_here.end()) - { - been_here.find(PC)->second++; - if ((been_here.find(PC)->second) & 1023) - return; - } - - DEBUG_LOG(DYNA_REC, "I'm here - PC = %08x , LR = %08x", PC, LR); - been_here[PC] = 1; -} - -void JitArm::Cleanup() -{ - if (jo.optimizeGatherPipe && js.fifoBytesThisBlock > 0) - { - PUSH(4, R0, R1, R2, R3); - QuickCallFunction(R14, (void*)&GPFifo::FastCheckGatherPipe); - POP(4, R0, R1, R2, R3); - } -} -void JitArm::DoDownCount() -{ - ARMReg rA = gpr.GetReg(); - LDR(rA, R9, PPCSTATE_OFF(downcount)); - if (js.downcountAmount < 255) // We can enlarge this if we used rotations - { - SUBS(rA, rA, js.downcountAmount); - } - else - { - ARMReg rB = gpr.GetReg(false); - MOVI2R(rB, js.downcountAmount); - SUBS(rA, rA, rB); - } - STR(rA, R9, PPCSTATE_OFF(downcount)); - gpr.Unlock(rA); -} -void JitArm::WriteExitDestInR(ARMReg Reg) -{ - STR(Reg, R9, PPCSTATE_OFF(pc)); - Cleanup(); - DoDownCount(); - - if (Profiler::g_ProfileBlocks) - EndTimeProfile(js.curBlock); - - MOVI2R(Reg, (u32)asm_routines.dispatcher); - B(Reg); - gpr.Unlock(Reg); -} -void JitArm::WriteRfiExitDestInR(ARMReg Reg) -{ - STR(Reg, R9, PPCSTATE_OFF(pc)); - Cleanup(); - DoDownCount(); - - if (Profiler::g_ProfileBlocks) - EndTimeProfile(js.curBlock); - - ARMReg A = gpr.GetReg(false); - - LDR(A, R9, PPCSTATE_OFF(pc)); - STR(A, R9, PPCSTATE_OFF(npc)); - QuickCallFunction(A, (void*)&PowerPC::CheckExceptions); - LDR(A, R9, PPCSTATE_OFF(npc)); - STR(A, R9, PPCSTATE_OFF(pc)); - gpr.Unlock(Reg); // This was locked in the instruction beforehand - - MOVI2R(A, (u32)asm_routines.dispatcher); - B(A); -} -void JitArm::WriteExceptionExit() -{ - Cleanup(); - DoDownCount(); - - if (Profiler::g_ProfileBlocks) - EndTimeProfile(js.curBlock); - - ARMReg A = gpr.GetReg(false); - - LDR(A, R9, PPCSTATE_OFF(pc)); - STR(A, R9, PPCSTATE_OFF(npc)); - QuickCallFunction(A, (void*)&PowerPC::CheckExceptions); - LDR(A, R9, PPCSTATE_OFF(npc)); - STR(A, R9, PPCSTATE_OFF(pc)); - - MOVI2R(A, (u32)asm_routines.dispatcher); - B(A); -} -void JitArm::WriteExit(u32 destination) -{ - Cleanup(); - - DoDownCount(); - - if (Profiler::g_ProfileBlocks) - EndTimeProfile(js.curBlock); - - //If nobody has taken care of this yet (this can be removed when all branches are done) - JitBlock *b = js.curBlock; - JitBlock::LinkData linkData; - linkData.exitAddress = destination; - linkData.exitPtrs = GetWritableCodePtr(); - linkData.linkStatus = false; - - // Link opportunity! - int block; - if (jo.enableBlocklink && (block = blocks.GetBlockNumberFromStartAddress(destination)) >= 0) - { - // It exists! Joy of joy! - B(blocks.GetBlock(block)->checkedEntry); - linkData.linkStatus = true; - } - else - { - ARMReg A = gpr.GetReg(false); - MOVI2R(A, destination); - STR(A, R9, PPCSTATE_OFF(pc)); - MOVI2R(A, (u32)asm_routines.dispatcher); - B(A); - } - - b->linkData.push_back(linkData); -} - -void JitArm::Run() -{ - CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; - pExecAddr(); -} - -void JitArm::SingleStep() -{ - CompiledCode pExecAddr = (CompiledCode)asm_routines.enterCode; - pExecAddr(); -} - -void JitArm::Trace() -{ - std::string regs; - std::string fregs; - -#ifdef JIT_LOG_GPR - for (int i = 0; i < 32; i++) - { - regs += StringFromFormat("r%02d: %08x ", i, PowerPC::ppcState.gpr[i]); - } -#endif - -#ifdef JIT_LOG_FPR - for (int i = 0; i < 32; i++) - { - fregs += StringFromFormat("f%02d: %016x ", i, riPS0(i)); - } -#endif - - DEBUG_LOG(DYNA_REC, "JIT64 PC: %08x SRR0: %08x SRR1: %08x FPSCR: %08x MSR: %08x LR: %08x %s %s", - PC, SRR0, SRR1, PowerPC::ppcState.fpscr, PowerPC::ppcState.msr, PowerPC::ppcState.spr[8], regs.c_str(), fregs.c_str()); -} - -void JitArm::Jit(u32 em_address) -{ - if (GetSpaceLeft() < 0x10000 || blocks.IsFull() || SConfig::GetInstance().m_LocalCoreStartupParameter.bJITNoBlockCache) - { - ClearCache(); - } - - int block_num = blocks.AllocateBlock(PowerPC::ppcState.pc); - JitBlock *b = blocks.GetBlock(block_num); - const u8* BlockPtr = DoJit(PowerPC::ppcState.pc, &code_buffer, b); - blocks.FinalizeBlock(block_num, jo.enableBlocklink, BlockPtr); -} -void JitArm::Break(UGeckoInstruction inst) -{ - ERROR_LOG(DYNA_REC, "%s called a Break instruction!", PPCTables::GetInstructionName(inst)); - BKPT(0x4444); -} - -void JitArm::BeginTimeProfile(JitBlock* b) -{ - b->ticCounter = 0; - b->ticStart = 0; - b->ticStop = 0; - - // Performance counters are bit finnicky on ARM - // We must first enable and program the PMU before using it - // This is a per core operation so with thread scheduling we may jump to a core we haven't enabled PMU yet - // Work around this by enabling PMU each time at the start of a block - // Some ARM CPUs are getting absurd core counts(48+!) - // We have to reset counters at the start of every block anyway, so may as well. - // One thing to note about performance counters on ARM - // The kernel can block access to these co-processor registers - // In the case that this happens, these will generate a SIGILL - - // Refer to the ARM ARM about PMCR for what these do exactly - enum - { - PERF_OPTION_ENABLE = (1 << 0), - PERF_OPTION_RESET_CR = (1 << 1), - PERF_OPTION_RESET_CCR = (1 << 2), - PERF_OPTION_DIVIDER_MODE = (1 << 3), - PERF_OPTION_EXPORT_ENABLE = (1 << 4), - }; - const u32 perf_options = - PERF_OPTION_ENABLE | - PERF_OPTION_RESET_CR | - PERF_OPTION_RESET_CCR | - PERF_OPTION_EXPORT_ENABLE; - MOVI2R(R0, perf_options); - // Programs the PMCR - MCR(15, 0, R0, 9, 12, 0); - - MOVI2R(R0, 0x8000000F); - // Enables all counters - MCR(15, 0, R0, 9, 12, 1); - // Clears all counter overflows - MCR(15, 0, R0, 9, 12, 3); - - // Gets the cycle counter - MRC(15, 0, R1, 9, 13, 0); - MOVI2R(R0, (u32)&b->ticStart); - STR(R1, R0, 0); -} - -void JitArm::EndTimeProfile(JitBlock* b) -{ - // Gets the cycle counter - MRC(15, 0, R1, 9, 13, 0); - MOVI2R(R0, (u32)&b->ticStop); - STR(R1, R0, 0); - - MOVI2R(R0, (u32)&b->ticStart); - MOVI2R(R14, (u32)asm_routines.m_increment_profile_counter); - BL(R14); -} - -const u8* JitArm::DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b) -{ - int blockSize = code_buf->GetSize(); - - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging) - { - // Comment out the following to disable breakpoints (speed-up) - if (!Profiler::g_ProfileBlocks) - { - if (PowerPC::GetState() == PowerPC::CPU_STEPPING) - blockSize = 1; - Trace(); - } - } - - if (em_address == 0) - { - Core::SetState(Core::CORE_PAUSE); - PanicAlert("ERROR: Compiling at 0. LR=%08x CTR=%08x", LR, CTR); - } - - js.isLastInstruction = false; - js.blockStart = em_address; - js.fifoBytesThisBlock = 0; - js.curBlock = b; - - u32 nextPC = em_address; - // Analyze the block, collect all instructions it is made of (including inlining, - // if that is enabled), reorder instructions for optimal performance, and join joinable instructions. - nextPC = analyzer.Analyze(em_address, &code_block, code_buf, blockSize); - - PPCAnalyst::CodeOp *ops = code_buf->codebuffer; - - const u8 *start = GetCodePtr(); - b->checkedEntry = start; - b->runCount = 0; - - // Downcount flag check, Only valid for linked blocks - { - FixupBranch no_downcount = B_CC(CC_PL); - ARMReg rA = gpr.GetReg(false); - MOVI2R(rA, js.blockStart); - STR(rA, R9, PPCSTATE_OFF(pc)); - MOVI2R(rA, (u32)asm_routines.doTiming); - B(rA); - SetJumpTarget(no_downcount); - } - - const u8 *normalEntry = GetCodePtr(); - b->normalEntry = normalEntry; - - if (ImHereDebug) - QuickCallFunction(R14, (void *)&ImHere); //Used to get a trace of the last few blocks before a crash, sometimes VERY useful - - if (js.fpa.any) - { - // This block uses FPU - needs to add FP exception bailout - ARMReg A = gpr.GetReg(); - ARMReg C = gpr.GetReg(); - Operand2 Shift(2, 10); // 1 << 13 - MOVI2R(C, js.blockStart); // R3 - LDR(A, R9, PPCSTATE_OFF(msr)); - TST(A, Shift); - FixupBranch no_fpe = B_CC(CC_NEQ); - STR(C, R9, PPCSTATE_OFF(pc)); - - LDR(A, R9, PPCSTATE_OFF(Exceptions)); - ORR(A, A, EXCEPTION_FPU_UNAVAILABLE); - STR(A, R9, PPCSTATE_OFF(Exceptions)); - QuickCallFunction(A, (void*)&PowerPC::CheckExceptions); - LDR(A, R9, PPCSTATE_OFF(npc)); - STR(A, R9, PPCSTATE_OFF(pc)); - - MOVI2R(A, (u32)asm_routines.dispatcher); - B(A); - - SetJumpTarget(no_fpe); - gpr.Unlock(A, C); - } - - // Conditionally add profiling code. - if (Profiler::g_ProfileBlocks) - { - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - MOVI2R(rA, (u32)&b->runCount); // Load in to register - LDR(rB, rA); // Load the actual value in to R11. - ADD(rB, rB, 1); // Add one to the value - STR(rB, rA); // Now store it back in the memory location - BeginTimeProfile(b); - gpr.Unlock(rA, rB); - } - gpr.Start(js.gpa); - fpr.Start(js.fpa); - js.downcountAmount = 0; - - if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging) - js.downcountAmount += PatchEngine::GetSpeedhackCycles(em_address); - - js.skipInstructions = 0; - js.compilerPC = nextPC; - - // Translate instructions - for (u32 i = 0; i < code_block.m_num_instructions; i++) - { - js.compilerPC = ops[i].address; - js.op = &ops[i]; - js.instructionNumber = i; - const GekkoOPInfo *opinfo = ops[i].opinfo; - js.downcountAmount += opinfo->numCycles; - - if (i == (code_block.m_num_instructions - 1)) - { - // WARNING - cmp->branch merging will screw this up. - js.isLastInstruction = true; - } - - if (jo.optimizeGatherPipe && js.fifoBytesThisBlock >= 32) - { - js.fifoBytesThisBlock -= 32; - PUSH(4, R0, R1, R2, R3); - QuickCallFunction(R14, (void*)&GPFifo::FastCheckGatherPipe); - POP(4, R0, R1, R2, R3); - } - - if (!ops[i].skip) - { - if (jo.memcheck && (opinfo->flags & FL_USE_FPU)) - { - // Don't do this yet - BKPT(0x7777); - } - JitArmTables::CompileInstruction(ops[i]); - - // If we have a register that will never be used again, flush it. - for (int j : ~ops[i].gprInUse) - gpr.StoreFromRegister(j); - for (int j : ~ops[i].fprInUse) - fpr.StoreFromRegister(j); - - if (jo.memcheck && (opinfo->flags & FL_LOADSTORE)) - { - // Don't do this yet - BKPT(0x666); - } - } - } - - if (code_block.m_memory_exception) - BKPT(0x500); - - if (code_block.m_broken) - { - printf("Broken Block going to 0x%08x\n", nextPC); - WriteExit(nextPC); - } - - b->codeSize = (u32)(GetCodePtr() - start); - b->originalSize = code_block.m_num_instructions; - FlushIcache(); - return start; -} diff --git a/Source/Core/Core/PowerPC/JitArm32/Jit.h b/Source/Core/Core/PowerPC/JitArm32/Jit.h deleted file mode 100644 index 565bdcf7df..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/Jit.h +++ /dev/null @@ -1,248 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -// ======================== -// See comments in Jit.cpp. -// ======================== - -// Mystery: Capcom vs SNK 800aa278 - -// CR flags approach: -// * Store that "N+Z flag contains CR0" or "S+Z flag contains CR3". -// * All flag altering instructions flush this -// * A flush simply does a conditional write to the appropriate CRx. -// * If flag available, branch code can become absolutely trivial. - -// Settings -// ---------- -#pragma once - -#include "Core/PowerPC/CPUCoreBase.h" -#include "Core/PowerPC/PPCAnalyst.h" -#include "Core/PowerPC/JitArm32/JitArmCache.h" -#include "Core/PowerPC/JitArm32/JitAsm.h" -#include "Core/PowerPC/JitArm32/JitFPRCache.h" -#include "Core/PowerPC/JitArm32/JitRegCache.h" -#include "Core/PowerPC/JitArmCommon/BackPatch.h" -#include "Core/PowerPC/JitCommon/JitBase.h" - -#define PPCSTATE_OFF(elem) ((s32)STRUCT_OFF(PowerPC::ppcState, elem) - (s32)STRUCT_OFF(PowerPC::ppcState, spr[0])) - -// Some asserts to make sure we will be able to load everything -static_assert(PPCSTATE_OFF(spr[1023]) > -4096 && PPCSTATE_OFF(spr[1023]) < 4096, "LDR can't reach all of the SPRs"); -static_assert(PPCSTATE_OFF(ps[0][0]) >= -1020 && PPCSTATE_OFF(ps[0][0]) <= 1020, "VLDR can't reach all of the FPRs"); -static_assert((PPCSTATE_OFF(ps[0][0]) % 4) == 0, "VLDR requires FPRs to be 4 byte aligned"); - -class JitArm : public JitBase, public ArmGen::ARMCodeBlock -{ -private: - JitArmBlockCache blocks; - - JitArmAsmRoutineManager asm_routines; - - // TODO: Make arm specific versions of these, shouldn't be too hard to - // make it so we allocate some space at the start(?) of code generation - // and keep the registers in a cache. Will burn this bridge when we get to - // it. - ArmRegCache gpr; - ArmFPRCache fpr; - - PPCAnalyst::CodeBuffer code_buffer; - - // The key is the backpatch flags - std::map m_backpatch_info; - - void DoDownCount(); - - void Helper_UpdateCR1(ArmGen::ARMReg fpscr, ArmGen::ARMReg temp); - - void SetFPException(ArmGen::ARMReg Reg, u32 Exception); - - ArmGen::FixupBranch JumpIfCRFieldBit(int field, int bit, bool jump_if_set); - - void BeginTimeProfile(JitBlock* b); - void EndTimeProfile(JitBlock* b); - - bool BackPatch(SContext* ctx); - bool DisasmLoadStore(const u8* ptr, u32* flags, ArmGen::ARMReg* rD, ArmGen::ARMReg* V1); - // Initializes the information that backpatching needs - // This is required so we know the backpatch routine sizes and trouble offsets - void InitBackpatch(); - - // Returns the trouble instruction offset - // Zero if it isn't a fastmem routine - u32 EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, bool do_padding, ArmGen::ARMReg RS, ArmGen::ARMReg V1 = ArmGen::ARMReg::INVALID_REG); - -public: - JitArm() : code_buffer(32000) {} - ~JitArm() {} - - void Init(); - void Shutdown(); - - // Jit! - - void Jit(u32 em_address); - const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b); - - JitBaseBlockCache *GetBlockCache() { return &blocks; } - - bool HandleFault(uintptr_t access_address, SContext* ctx) override; - - void Trace(); - - void ClearCache(); - - CommonAsmRoutinesBase *GetAsmRoutines() - { - return &asm_routines; - } - - const char *GetName() - { - return "JITARM"; - } - - // Run! - void Run(); - void SingleStep(); - - // Utilities for use by opcodes - - void WriteExit(u32 destination); - void WriteExitDestInR(ArmGen::ARMReg Reg); - void WriteRfiExitDestInR(ArmGen::ARMReg Reg); - void WriteExceptionExit(); - void WriteCallInterpreter(UGeckoInstruction _inst); - void Cleanup(); - - void ComputeRC(ArmGen::ARMReg value, int cr = 0); - void ComputeRC(s32 value, int cr); - - void ComputeCarry(); - void ComputeCarry(bool Carry); - void GetCarryAndClear(ArmGen::ARMReg reg); - void FinalizeCarry(ArmGen::ARMReg reg); - - void SafeStoreFromReg(s32 dest, u32 value, s32 offsetReg, int accessSize, s32 offset); - void SafeLoadToReg(ArmGen::ARMReg dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse, bool update); - - // OPCODES - void FallBackToInterpreter(UGeckoInstruction _inst); - void DoNothing(UGeckoInstruction _inst); - void HLEFunction(UGeckoInstruction _inst); - - void DynaRunTable4(UGeckoInstruction _inst); - void DynaRunTable19(UGeckoInstruction _inst); - void DynaRunTable31(UGeckoInstruction _inst); - void DynaRunTable59(UGeckoInstruction _inst); - void DynaRunTable63(UGeckoInstruction _inst); - - // Breakin shit - void Break(UGeckoInstruction _inst); - // Branch - void bx(UGeckoInstruction _inst); - void bcx(UGeckoInstruction _inst); - void bclrx(UGeckoInstruction _inst); - void sc(UGeckoInstruction _inst); - void rfi(UGeckoInstruction _inst); - void bcctrx(UGeckoInstruction _inst); - - // Integer - void arith(UGeckoInstruction _inst); - - void addex(UGeckoInstruction _inst); - void subfic(UGeckoInstruction _inst); - void cntlzwx(UGeckoInstruction _inst); - void cmp (UGeckoInstruction _inst); - void cmpl(UGeckoInstruction _inst); - void cmpi(UGeckoInstruction _inst); - void cmpli(UGeckoInstruction _inst); - void negx(UGeckoInstruction _inst); - void mulhwux(UGeckoInstruction _inst); - void rlwimix(UGeckoInstruction _inst); - void rlwinmx(UGeckoInstruction _inst); - void rlwnmx(UGeckoInstruction _inst); - void srawix(UGeckoInstruction _inst); - void extshx(UGeckoInstruction inst); - void extsbx(UGeckoInstruction inst); - - // System Registers - void mtmsr(UGeckoInstruction _inst); - void mfmsr(UGeckoInstruction _inst); - void mtspr(UGeckoInstruction _inst); - void mfspr(UGeckoInstruction _inst); - void mftb(UGeckoInstruction _inst); - void mcrf(UGeckoInstruction _inst); - void mtsr(UGeckoInstruction _inst); - void mfsr(UGeckoInstruction _inst); - void twx(UGeckoInstruction _inst); - - // LoadStore - void stX(UGeckoInstruction _inst); - void lXX(UGeckoInstruction _inst); - void lmw(UGeckoInstruction _inst); - void stmw(UGeckoInstruction _inst); - - void icbi(UGeckoInstruction _inst); - void dcbst(UGeckoInstruction _inst); - - // Floating point - void fabsx(UGeckoInstruction _inst); - void fnabsx(UGeckoInstruction _inst); - void fnegx(UGeckoInstruction _inst); - void faddsx(UGeckoInstruction _inst); - void faddx(UGeckoInstruction _inst); - void fsubsx(UGeckoInstruction _inst); - void fsubx(UGeckoInstruction _inst); - void fmulsx(UGeckoInstruction _inst); - void fmulx(UGeckoInstruction _inst); - void fmrx(UGeckoInstruction _inst); - void fmaddsx(UGeckoInstruction _inst); - void fmaddx(UGeckoInstruction _inst); - void fctiwx(UGeckoInstruction _inst); - void fctiwzx(UGeckoInstruction _inst); - void fnmaddx(UGeckoInstruction _inst); - void fnmaddsx(UGeckoInstruction _inst); - void fresx(UGeckoInstruction _inst); - void fselx(UGeckoInstruction _inst); - void frsqrtex(UGeckoInstruction _inst); - - // Floating point loadStore - void lfXX(UGeckoInstruction _inst); - void stfXX(UGeckoInstruction _inst); - - // Paired Singles - void ps_add(UGeckoInstruction _inst); - void ps_div(UGeckoInstruction _inst); - void ps_res(UGeckoInstruction _inst); - void ps_sum0(UGeckoInstruction _inst); - void ps_sum1(UGeckoInstruction _inst); - void ps_madd(UGeckoInstruction _inst); - void ps_nmadd(UGeckoInstruction _inst); - void ps_msub(UGeckoInstruction _inst); - void ps_nmsub(UGeckoInstruction _inst); - void ps_madds0(UGeckoInstruction _inst); - void ps_madds1(UGeckoInstruction _inst); - void ps_sub(UGeckoInstruction _inst); - void ps_mul(UGeckoInstruction _inst); - void ps_muls0(UGeckoInstruction _inst); - void ps_muls1(UGeckoInstruction _inst); - void ps_merge00(UGeckoInstruction _inst); - void ps_merge01(UGeckoInstruction _inst); - void ps_merge10(UGeckoInstruction _inst); - void ps_merge11(UGeckoInstruction _inst); - void ps_mr(UGeckoInstruction _inst); - void ps_neg(UGeckoInstruction _inst); - void ps_abs(UGeckoInstruction _inst); - void ps_nabs(UGeckoInstruction _inst); - void ps_rsqrte(UGeckoInstruction _inst); - void ps_sel(UGeckoInstruction _inst); - - // LoadStore paired - void psq_l(UGeckoInstruction _inst); - void psq_lx(UGeckoInstruction _inst); - void psq_st(UGeckoInstruction _inst); - void psq_stx(UGeckoInstruction _inst); -}; diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArmCache.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArmCache.cpp deleted file mode 100644 index b23c41ff4b..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitArmCache.cpp +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -// Enable define below to enable oprofile integration. For this to work, -// it requires at least oprofile version 0.9.4, and changing the build -// system to link the Dolphin executable against libopagent. Since the -// dependency is a little inconvenient and this is possibly a slight -// performance hit, it's not enabled by default, but it's useful for -// locating performance issues. - -#include "Core/PowerPC/JitInterface.h" -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitArmCache.h" - - -using namespace ArmGen; - -void JitArmBlockCache::WriteLinkBlock(u8* location, const u8* address) -{ - ARMXEmitter emit(location); - emit.B(address); - emit.FlushIcache(); -} - -void JitArmBlockCache::WriteDestroyBlock(const u8* location, u32 address) -{ - ARMXEmitter emit((u8 *)location); - emit.MOVI2R(R11, address); - emit.MOVI2R(R12, (u32)jit->GetAsmRoutines()->dispatcher); - emit.STR(R11, R9, PPCSTATE_OFF(pc)); - emit.B(R12); - emit.FlushIcache(); -} - - diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArmCache.h b/Source/Core/Core/PowerPC/JitArm32/JitArmCache.h deleted file mode 100644 index 7eefc65981..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitArmCache.h +++ /dev/null @@ -1,17 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "Core/PowerPC/JitCommon/JitCache.h" - - -typedef void (*CompiledCode)(); - -class JitArmBlockCache : public JitBaseBlockCache -{ -private: - void WriteLinkBlock(u8* location, const u8* address); - void WriteDestroyBlock(const u8* location, u32 address); -}; diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp deleted file mode 100644 index 55f21b0bfd..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_BackPatch.cpp +++ /dev/null @@ -1,707 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include - -#include "Common/CommonTypes.h" -#include "Common/StringUtil.h" - -#include "Core/HW/Memmap.h" -#include "Core/PowerPC/JitArm32/Jit.h" - -using namespace ArmGen; - -// This generates some fairly heavy trampolines, but: -// 1) It's really necessary. We don't know anything about the context. -// 2) It doesn't really hurt. Only instructions that access I/O will get these, and there won't be -// that many of them in a typical program/game. -bool JitArm::DisasmLoadStore(const u8* ptr, u32* flags, ARMReg* rD, ARMReg* V1) -{ - u32 inst = *(u32*)ptr; - u32 prev_inst = *(u32*)(ptr - 4); - u32 next_inst = *(u32*)(ptr + 4); - u8 op = (inst >> 20) & 0xFF; - *rD = (ARMReg)((inst >> 12) & 0xF); - - switch (op) - { - case 0b01011000: // STR(imm) - case 0b01111000: // STR(register) - { - *flags |= - BackPatchInfo::FLAG_STORE | - BackPatchInfo::FLAG_SIZE_32; - *rD = (ARMReg)(prev_inst & 0xF); - } - break; - case 0b01011001: // LDR(imm) - case 0b01111001: // LDR(register) - { - *flags |= - BackPatchInfo::FLAG_LOAD | - BackPatchInfo::FLAG_SIZE_32; - // REV - if ((next_inst & 0x0FFF0FF0) != 0x06BF0F30) - *flags |= BackPatchInfo::FLAG_REVERSE; - } - break; - case 0b00011101: // LDRH(imm) - case 0b00011001: // LDRH(register) - { - *flags |= - BackPatchInfo::FLAG_LOAD | - BackPatchInfo::FLAG_SIZE_16; - // REV16 - if((next_inst & 0x0FFF0FF0) != 0x06BF0FB0) - *flags |= BackPatchInfo::FLAG_REVERSE; - } - break; - case 0b01011101: // LDRB(imm) - case 0b01111101: // LDRB(register) - { - *flags |= - BackPatchInfo::FLAG_LOAD | - BackPatchInfo::FLAG_SIZE_8; - } - break; - case 0b01011100: // STRB(imm) - case 0b01111100: // STRB(register) - { - *flags |= - BackPatchInfo::FLAG_STORE | - BackPatchInfo::FLAG_SIZE_8; - *rD = (ARMReg)((inst >> 12) & 0xF); - } - break; - case 0b00011100: // STRH(imm) - case 0b00011000: // STRH(register) - { - *flags |= - BackPatchInfo::FLAG_STORE | - BackPatchInfo::FLAG_SIZE_16; - *rD = (ARMReg)(prev_inst & 0xF); - } - break; - default: - { - // Could be a floating point loadstore - u8 op2 = (inst >> 24) & 0xF; - switch (op2) - { - case 0xD: // VLDR/VSTR - { - bool load = (inst >> 20) & 1; - bool single = !((inst >> 8) & 1); - - if (load) - *flags |= BackPatchInfo::FLAG_LOAD; - else - *flags |= BackPatchInfo::FLAG_STORE; - - if (single) - *flags |= BackPatchInfo::FLAG_SIZE_F32; - else - *flags |= BackPatchInfo::FLAG_SIZE_F64; - if (single) - { - if (!load) - { - u32 vcvt = *(u32*)(ptr - 8); - u32 src_register = vcvt & 0xF; - src_register |= (vcvt >> 1) & 0x10; - *rD = (ARMReg)(src_register + D0); - } - } - } - break; - case 0x4: // VST1/VLD1 - { - u32 size = (inst >> 6) & 0x3; - bool load = (inst >> 21) & 1; - if (load) - *flags |= BackPatchInfo::FLAG_LOAD; - else - *flags |= BackPatchInfo::FLAG_STORE; - - - if (size == 2) // 32bit - { - if (load) - { - // For 32bit loads we are loading to a temporary - // So we need to read PC+8,PC+12 to get the two destination registers - u32 vcvt_1 = *(u32*)(ptr + 8); - u32 vcvt_2 = *(u32*)(ptr + 12); - - u32 dest_register_1 = (vcvt_1 >> 12) & 0xF; - dest_register_1 |= (vcvt_1 >> 18) & 0x10; - - u32 dest_register_2 = (vcvt_2 >> 12) & 0xF; - dest_register_2 |= (vcvt_2 >> 18) & 0x10; - - // Make sure to encode the destination register to something our emitter understands - *rD = (ARMReg)(dest_register_1 + D0); - *V1 = (ARMReg)(dest_register_2 + D0); - } - else - { - // For 32bit stores we are storing from a temporary - // So we need to check the VCVT at PC-8 for the source register - u32 vcvt = *(u32*)(ptr - 8); - u32 src_register = vcvt & 0xF; - src_register |= (vcvt >> 1) & 0x10; - *rD = (ARMReg)(src_register + D0); - } - *flags |= BackPatchInfo::FLAG_SIZE_F32; - } - else if (size == 3) // 64bit - { - if (load) - { - // For 64bit loads we load directly in to the VFP register - u32 dest_register = (inst >> 12) & 0xF; - dest_register |= (inst >> 18) & 0x10; - // Make sure to encode the destination register to something our emitter understands - *rD = (ARMReg)(dest_register + D0); - } - else - { - // For 64bit stores we are storing from a temporary - // Check the previous VREV64 instruction for the real register - u32 src_register = prev_inst & 0xF; - src_register |= (prev_inst >> 1) & 0x10; - *rD = (ARMReg)(src_register + D0); - } - *flags |= BackPatchInfo::FLAG_SIZE_F64; - } - } - break; - default: - printf("Op is 0x%02x\n", op); - return false; - break; - } - } - } - return true; -} - -bool JitArm::HandleFault(uintptr_t access_address, SContext* ctx) -{ - if (access_address < (uintptr_t)Memory::physical_base) - PanicAlertT("Exception handler - access below memory space. 0x%08x", access_address); - return BackPatch(ctx); -} - -bool JitArm::BackPatch(SContext* ctx) -{ - // TODO: This ctx needs to be filled with our information - - // We need to get the destination register before we start - u8* codePtr = (u8*)ctx->CTX_PC; - u32 Value = *(u32*)codePtr; - ARMReg rD = INVALID_REG; - ARMReg V1 = INVALID_REG; - u32 flags = 0; - - if (!DisasmLoadStore(codePtr, &flags, &rD, &V1)) - { - WARN_LOG(DYNA_REC, "Invalid backpatch at location 0x%08lx(0x%08x)", ctx->CTX_PC, Value); - exit(0); - } - - BackPatchInfo& info = m_backpatch_info[flags]; - ARMXEmitter emitter(codePtr - info.m_fastmem_trouble_inst_offset * 4); - u32 new_pc = (u32)emitter.GetCodePtr(); - EmitBackpatchRoutine(&emitter, flags, false, true, rD, V1); - emitter.FlushIcache(); - ctx->CTX_PC = new_pc; - return true; -} - -u32 JitArm::EmitBackpatchRoutine(ARMXEmitter* emit, u32 flags, bool fastmem, bool do_padding, ARMReg RS, ARMReg V1) -{ - ARMReg addr = R12; - ARMReg temp = R11; - u32 trouble_offset = 0; - const u8* code_base = emit->GetCodePtr(); - - if (fastmem) - { - ARMReg temp2 = R10; - Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) - emit->BIC(temp, addr, mask); - - if (flags & BackPatchInfo::FLAG_STORE && - flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) - { - emit->ADD(temp, temp, R8); - NEONXEmitter nemit(emit); - if (flags & BackPatchInfo::FLAG_SIZE_F32) - { - emit->VCVT(S0, RS, 0); - nemit.VREV32(I_8, D0, D0); - trouble_offset = (emit->GetCodePtr() - code_base) / 4; - emit->VSTR(S0, temp, 0); - } - else - { - nemit.VREV64(I_8, D0, RS); - trouble_offset = (emit->GetCodePtr() - code_base) / 4; - nemit.VST1(I_64, D0, temp); - } - } - else if (flags & BackPatchInfo::FLAG_LOAD && - flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) - { - - emit->ADD(temp, temp, R8); - NEONXEmitter nemit(emit); - trouble_offset = (emit->GetCodePtr() - code_base) / 4; - if (flags & BackPatchInfo::FLAG_SIZE_F32) - { - nemit.VLD1(F_32, D0, temp); - nemit.VREV32(I_8, D0, D0); // Byte swap to result - emit->VCVT(RS, S0, 0); - emit->VCVT(V1, S0, 0); - } - else - { - nemit.VLD1(I_64, RS, temp); - nemit.VREV64(I_8, RS, RS); // Byte swap to result - } - } - else if (flags & BackPatchInfo::FLAG_STORE) - { - if (flags & BackPatchInfo::FLAG_SIZE_32) - emit->REV(temp2, RS); - else if (flags & BackPatchInfo::FLAG_SIZE_16) - emit->REV16(temp2, RS); - - trouble_offset = (emit->GetCodePtr() - code_base) / 4; - - if (flags & BackPatchInfo::FLAG_SIZE_32) - emit->STR(temp2, R8, temp); - else if (flags & BackPatchInfo::FLAG_SIZE_16) - emit->STRH(temp2, R8, temp); - else - emit->STRB(RS, R8, temp); - } - else - { - trouble_offset = (emit->GetCodePtr() - code_base) / 4; - - if (flags & BackPatchInfo::FLAG_SIZE_32) - emit->LDR(RS, R8, temp); // 5 - else if (flags & BackPatchInfo::FLAG_SIZE_16) - emit->LDRH(RS, R8, temp); - else if (flags & BackPatchInfo::FLAG_SIZE_8) - emit->LDRB(RS, R8, temp); - - - if (!(flags & BackPatchInfo::FLAG_REVERSE)) - { - if (flags & BackPatchInfo::FLAG_SIZE_32) - emit->REV(RS, RS); // 6 - else if (flags & BackPatchInfo::FLAG_SIZE_16) - emit->REV16(RS, RS); - } - - if (flags & BackPatchInfo::FLAG_EXTEND) - emit->SXTH(RS, RS); - } - } - else - { - if (flags & BackPatchInfo::FLAG_STORE && - flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) - { - emit->PUSH(4, R0, R1, R2, R3); - if (flags & BackPatchInfo::FLAG_SIZE_F32) - { - emit->MOV(R1, addr); - emit->VCVT(S0, RS, 0); - emit->VMOV(R0, S0); - emit->MOVI2R(temp, (u32)&PowerPC::Write_U32); - emit->BL(temp); - } - else - { - emit->MOVI2R(temp, (u32)&PowerPC::Write_F64); -#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 - emit->VMOV(R0, RS); - emit->MOV(R2, addr); -#else - emit->VMOV(D0, RS); - emit->MOV(R0, addr); -#endif - emit->BL(temp); - } - emit->POP(4, R0, R1, R2, R3); - } - else if (flags & BackPatchInfo::FLAG_LOAD && - flags & (BackPatchInfo::FLAG_SIZE_F32 | BackPatchInfo::FLAG_SIZE_F64)) - { - emit->PUSH(4, R0, R1, R2, R3); - emit->MOV(R0, addr); - if (flags & BackPatchInfo::FLAG_SIZE_F32) - { - emit->MOVI2R(temp, (u32)&PowerPC::Read_U32); - emit->BL(temp); - emit->VMOV(S0, R0); - emit->VCVT(RS, S0, 0); - emit->VCVT(V1, S0, 0); - } - else - { - emit->MOVI2R(temp, (u32)&PowerPC::Read_F64); - emit->BL(temp); - -#if !defined(__ARM_PCS_VFP) // SoftFP returns in R0 and R1 - emit->VMOV(RS, R0); -#else - emit->VMOV(RS, D0); -#endif - } - emit->POP(4, R0, R1, R2, R3); - } - else if (flags & BackPatchInfo::FLAG_STORE) - { - emit->PUSH(4, R0, R1, R2, R3); - emit->MOV(R0, RS); - emit->MOV(R1, addr); - - if (flags & BackPatchInfo::FLAG_SIZE_32) - emit->MOVI2R(temp, (u32)&PowerPC::Write_U32); - else if (flags & BackPatchInfo::FLAG_SIZE_16) - emit->MOVI2R(temp, (u32)&PowerPC::Write_U16); - else - emit->MOVI2R(temp, (u32)&PowerPC::Write_U8); - - emit->BL(temp); - emit->POP(4, R0, R1, R2, R3); - } - else - { - emit->PUSH(4, R0, R1, R2, R3); - emit->MOV(R0, addr); - - if (flags & BackPatchInfo::FLAG_SIZE_32) - emit->MOVI2R(temp, (u32)&PowerPC::Read_U32); - else if (flags & BackPatchInfo::FLAG_SIZE_16) - emit->MOVI2R(temp, (u32)&PowerPC::Read_U16); - else if (flags & BackPatchInfo::FLAG_SIZE_8) - emit->MOVI2R(temp, (u32)&PowerPC::Read_U8); - - emit->BL(temp); - emit->MOV(temp, R0); - emit->POP(4, R0, R1, R2, R3); - - if (!(flags & BackPatchInfo::FLAG_REVERSE)) - { - emit->MOV(RS, temp); - } - else - { - if (flags & BackPatchInfo::FLAG_SIZE_32) - emit->REV(RS, temp); // 6 - else if (flags & BackPatchInfo::FLAG_SIZE_16) - emit->REV16(RS, temp); - } - } - } - - if (do_padding) - { - BackPatchInfo& info = m_backpatch_info[flags]; - u32 num_insts_max = std::max(info.m_fastmem_size, info.m_slowmem_size); - - u32 code_size = emit->GetCodePtr() - code_base; - code_size /= 4; - - emit->NOP(num_insts_max - code_size); - } - - return trouble_offset; -} - -void JitArm::InitBackpatch() -{ - u32 flags = 0; - BackPatchInfo info; - u8* code_base = GetWritableCodePtr(); - u8* code_end; - - // Writes - { - // 8bit - { - flags = - BackPatchInfo::FLAG_STORE | - BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(this, flags, false, false, R0); - code_end = GetWritableCodePtr(); - info.m_slowmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, R0); - code_end = GetWritableCodePtr(); - info.m_fastmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - m_backpatch_info[flags] = info; - } - // 16bit - { - flags = - BackPatchInfo::FLAG_STORE | - BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(this, flags, false, false, R0); - code_end = GetWritableCodePtr(); - info.m_slowmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, R0); - code_end = GetWritableCodePtr(); - info.m_fastmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - m_backpatch_info[flags] = info; - } - // 32bit - { - flags = - BackPatchInfo::FLAG_STORE | - BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(this, flags, false, false, R0); - code_end = GetWritableCodePtr(); - info.m_slowmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, R0); - code_end = GetWritableCodePtr(); - info.m_fastmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - m_backpatch_info[flags] = info; - } - // 32bit float - { - flags = - BackPatchInfo::FLAG_STORE | - BackPatchInfo::FLAG_SIZE_F32; - EmitBackpatchRoutine(this, flags, false, false, D0); - code_end = GetWritableCodePtr(); - info.m_slowmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, D0); - code_end = GetWritableCodePtr(); - info.m_fastmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - m_backpatch_info[flags] = info; - } - // 64bit float - { - flags = - BackPatchInfo::FLAG_STORE | - BackPatchInfo::FLAG_SIZE_F64; - EmitBackpatchRoutine(this, flags, false, false, D0); - code_end = GetWritableCodePtr(); - info.m_slowmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, D0); - code_end = GetWritableCodePtr(); - info.m_fastmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - m_backpatch_info[flags] = info; - } - - } - - // Loads - { - // 8bit - { - flags = - BackPatchInfo::FLAG_LOAD | - BackPatchInfo::FLAG_SIZE_8; - EmitBackpatchRoutine(this, flags, false, false, R0); - code_end = GetWritableCodePtr(); - info.m_slowmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, R0); - code_end = GetWritableCodePtr(); - info.m_fastmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - m_backpatch_info[flags] = info; - } - // 16bit - { - flags = - BackPatchInfo::FLAG_LOAD | - BackPatchInfo::FLAG_SIZE_16; - EmitBackpatchRoutine(this, flags, false, false, R0); - code_end = GetWritableCodePtr(); - info.m_slowmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, R0); - code_end = GetWritableCodePtr(); - info.m_fastmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - m_backpatch_info[flags] = info; - } - // 32bit - { - flags = - BackPatchInfo::FLAG_LOAD | - BackPatchInfo::FLAG_SIZE_32; - EmitBackpatchRoutine(this, flags, false, false, R0); - code_end = GetWritableCodePtr(); - info.m_slowmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, R0); - code_end = GetWritableCodePtr(); - info.m_fastmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - m_backpatch_info[flags] = info; - } - // 16bit - reverse - { - flags = - BackPatchInfo::FLAG_LOAD | - BackPatchInfo::FLAG_SIZE_16 | - BackPatchInfo::FLAG_REVERSE; - EmitBackpatchRoutine(this, flags, false, false, R0); - code_end = GetWritableCodePtr(); - info.m_slowmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, R0); - code_end = GetWritableCodePtr(); - info.m_fastmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - m_backpatch_info[flags] = info; - } - // 16bit - sign extend - { - flags = - BackPatchInfo::FLAG_LOAD | - BackPatchInfo::FLAG_SIZE_16 | - BackPatchInfo::FLAG_EXTEND; - EmitBackpatchRoutine(this, flags, false, false, R0); - code_end = GetWritableCodePtr(); - info.m_slowmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, R0); - code_end = GetWritableCodePtr(); - info.m_fastmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - m_backpatch_info[flags] = info; - } - // 32bit - reverse - { - flags = - BackPatchInfo::FLAG_LOAD | - BackPatchInfo::FLAG_SIZE_32 | - BackPatchInfo::FLAG_REVERSE; - EmitBackpatchRoutine(this, flags, false, false, R0); - code_end = GetWritableCodePtr(); - info.m_slowmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, R0); - code_end = GetWritableCodePtr(); - info.m_fastmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - m_backpatch_info[flags] = info; - } - // 32bit float - { - flags = - BackPatchInfo::FLAG_LOAD | - BackPatchInfo::FLAG_SIZE_F32; - EmitBackpatchRoutine(this, flags, false, false, D0, D1); - code_end = GetWritableCodePtr(); - info.m_slowmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, D0, D1); - code_end = GetWritableCodePtr(); - info.m_fastmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - m_backpatch_info[flags] = info; - } - // 64bit float - { - flags = - BackPatchInfo::FLAG_LOAD | - BackPatchInfo::FLAG_SIZE_F64; - EmitBackpatchRoutine(this, flags, false, false, D0); - code_end = GetWritableCodePtr(); - info.m_slowmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - info.m_fastmem_trouble_inst_offset = - EmitBackpatchRoutine(this, flags, true, false, D0); - code_end = GetWritableCodePtr(); - info.m_fastmem_size = (code_end - code_base) / 4; - - SetCodePtr(code_base); - - m_backpatch_info[flags] = info; - } - } -} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp deleted file mode 100644 index 704b4fd9e9..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Branch.cpp +++ /dev/null @@ -1,309 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Common/ArmEmitter.h" -#include "Common/CommonTypes.h" - -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/PPCTables.h" -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitAsm.h" -#include "Core/PowerPC/JitArm32/JitRegCache.h" - -// The branches are known good, or at least reasonably good. -// No need for a disable-mechanism. - -using namespace ArmGen; -void JitArm::sc(UGeckoInstruction inst) -{ - INSTRUCTION_START - - gpr.Flush(); - fpr.Flush(); - - ARMReg rA = gpr.GetReg(); - MOVI2R(rA, js.compilerPC + 4); - STR(rA, R9, PPCSTATE_OFF(pc)); - LDR(rA, R9, PPCSTATE_OFF(Exceptions)); - ORR(rA, rA, EXCEPTION_SYSCALL); - STR(rA, R9, PPCSTATE_OFF(Exceptions)); - gpr.Unlock(rA); - - WriteExceptionExit(); -} - -void JitArm::rfi(UGeckoInstruction inst) -{ - INSTRUCTION_START - - gpr.Flush(); - fpr.Flush(); - - // See Interpreter rfi for details - const u32 mask = 0x87C0FFFF; - const u32 clearMSR13 = 0xFFFBFFFF; // Mask used to clear the bit MSR[13] - // MSR = ((MSR & ~mask) | (SRR1 & mask)) & clearMSR13; - // R0 = MSR location - // R1 = MSR contents - // R2 = Mask - // R3 = Mask - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - ARMReg rC = gpr.GetReg(); - ARMReg rD = gpr.GetReg(); - MOVI2R(rB, (~mask) & clearMSR13); - MOVI2R(rC, mask & clearMSR13); - - LDR(rD, R9, PPCSTATE_OFF(msr)); - - AND(rD, rD, rB); // rD = Masked MSR - - LDR(rB, R9, PPCSTATE_OFF(spr[SPR_SRR1])); // rB contains SRR1 here - - AND(rB, rB, rC); // rB contains masked SRR1 here - ORR(rB, rD, rB); // rB = Masked MSR OR masked SRR1 - - STR(rB, R9, PPCSTATE_OFF(msr)); // STR rB in to rA - - LDR(rA, R9, PPCSTATE_OFF(spr[SPR_SRR0])); - - gpr.Unlock(rB, rC, rD); - WriteRfiExitDestInR(rA); // rA gets unlocked here - //AND(32, M(&MSR), Imm32((~mask) & clearMSR13)); - //MOV(32, R(EAX), M(&SRR1)); - //AND(32, R(EAX), Imm32(mask & clearMSR13)); - //OR(32, M(&MSR), R(EAX)); - // NPC = SRR0; - //MOV(32, R(EAX), M(&SRR0)); - //WriteRfiExitDestInEAX(); -} - -void JitArm::bx(UGeckoInstruction inst) -{ - INSTRUCTION_START - // We must always process the following sentence - // even if the blocks are merged by PPCAnalyst::Flatten(). - if (inst.LK) - { - ARMReg rA = gpr.GetReg(false); - u32 Jumpto = js.compilerPC + 4; - MOVI2R(rA, Jumpto); - STR(rA, R9, PPCSTATE_OFF(spr[SPR_LR])); - //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); - } - - // If this is not the last instruction of a block, - // we will skip the rest process. - // Because PPCAnalyst::Flatten() merged the blocks. - if (!js.isLastInstruction) - { - return; - } - - gpr.Flush(); - fpr.Flush(); - - u32 destination; - if (inst.AA) - destination = SignExt26(inst.LI << 2); - else - destination = js.compilerPC + SignExt26(inst.LI << 2); - - if (destination == js.compilerPC) - { - //PanicAlert("Idle loop detected at %08x", destination); - // CALL(ProtectFunction(&CoreTiming::Idle, 0)); - // JMP(Asm::testExceptions, true); - // make idle loops go faster - MOVI2R(R14, (u32)&CoreTiming::Idle); - BL(R14); - MOVI2R(R14, js.compilerPC); - STR(R14, R9, PPCSTATE_OFF(pc)); - WriteExceptionExit(); - } - WriteExit(destination); -} - -void JitArm::bcx(UGeckoInstruction inst) -{ - INSTRUCTION_START - // USES_CR - - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - FixupBranch pCTRDontBranch; - if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR - { - LDR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR])); - SUBS(rB, rB, 1); - STR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR])); - - //SUB(32, M(&CTR), Imm8(1)); - if (inst.BO & BO_BRANCH_IF_CTR_0) - pCTRDontBranch = B_CC(CC_NEQ); - else - pCTRDontBranch = B_CC(CC_EQ); - } - - FixupBranch pConditionDontBranch; - if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit - { - pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), - !(inst.BO_2 & BO_BRANCH_IF_TRUE)); - } - - if (inst.LK) - { - u32 Jumpto = js.compilerPC + 4; - MOVI2R(rB, Jumpto); - STR(rB, R9, PPCSTATE_OFF(spr[SPR_LR])); - //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); // Careful, destroys R14, R12 - } - gpr.Unlock(rA, rB); - - u32 destination; - if (inst.AA) - destination = SignExt16(inst.BD << 2); - else - destination = js.compilerPC + SignExt16(inst.BD << 2); - - gpr.Flush(FLUSH_MAINTAIN_STATE); - fpr.Flush(FLUSH_MAINTAIN_STATE); - WriteExit(destination); - - if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) - SetJumpTarget( pConditionDontBranch ); - if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) - SetJumpTarget( pCTRDontBranch ); - - if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) - { - gpr.Flush(); - fpr.Flush(); - WriteExit(js.compilerPC + 4); - } -} -void JitArm::bcctrx(UGeckoInstruction inst) -{ - INSTRUCTION_START - - // bcctrx doesn't decrement and/or test CTR - _dbg_assert_msg_(POWERPC, inst.BO_2 & BO_DONT_DECREMENT_FLAG, "bcctrx with decrement and test CTR option is invalid!"); - - if (inst.BO_2 & BO_DONT_CHECK_CONDITION) - { - // BO_2 == 1z1zz -> b always - - //NPC = CTR & 0xfffffffc; - gpr.Flush(); - fpr.Flush(); - - ARMReg rA = gpr.GetReg(); - - if (inst.LK_3) - { - u32 Jumpto = js.compilerPC + 4; - MOVI2R(rA, Jumpto); - STR(rA, R9, PPCSTATE_OFF(spr[SPR_LR])); - // ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); - } - LDR(rA, R9, PPCSTATE_OFF(spr[SPR_CTR])); - BIC(rA, rA, 0x3); - WriteExitDestInR(rA); - } - else - { - // Rare condition seen in (just some versions of?) Nintendo's NES Emulator - // BO_2 == 001zy -> b if false - // BO_2 == 011zy -> b if true - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - - FixupBranch b = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), - !(inst.BO_2 & BO_BRANCH_IF_TRUE)); - - LDR(rA, R9, PPCSTATE_OFF(spr[SPR_CTR])); - BIC(rA, rA, 0x3); - - if (inst.LK_3) - { - u32 Jumpto = js.compilerPC + 4; - MOVI2R(rB, Jumpto); - STR(rB, R9, PPCSTATE_OFF(spr[SPR_LR])); - //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); - } - gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR - gpr.Flush(FLUSH_MAINTAIN_STATE); - fpr.Flush(FLUSH_MAINTAIN_STATE); - - WriteExitDestInR(rA); - - SetJumpTarget(b); - - if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) - { - gpr.Flush(); - fpr.Flush(); - WriteExit(js.compilerPC + 4); - } - } -} -void JitArm::bclrx(UGeckoInstruction inst) -{ - INSTRUCTION_START - - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - FixupBranch pCTRDontBranch; - if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) // Decrement and test CTR - { - LDR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR])); - SUBS(rB, rB, 1); - STR(rB, R9, PPCSTATE_OFF(spr[SPR_CTR])); - - //SUB(32, M(&CTR), Imm8(1)); - if (inst.BO & BO_BRANCH_IF_CTR_0) - pCTRDontBranch = B_CC(CC_NEQ); - else - pCTRDontBranch = B_CC(CC_EQ); - } - - FixupBranch pConditionDontBranch; - if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) // Test a CR bit - { - pConditionDontBranch = JumpIfCRFieldBit(inst.BI >> 2, 3 - (inst.BI & 3), - !(inst.BO_2 & BO_BRANCH_IF_TRUE)); - } - - //MOV(32, R(EAX), M(&LR)); - //AND(32, R(EAX), Imm32(0xFFFFFFFC)); - LDR(rA, R9, PPCSTATE_OFF(spr[SPR_LR])); - BIC(rA, rA, 0x3); - if (inst.LK) - { - u32 Jumpto = js.compilerPC + 4; - MOVI2R(rB, Jumpto); - STR(rB, R9, PPCSTATE_OFF(spr[SPR_LR])); - //ARMABI_MOVI2M((u32)&LR, js.compilerPC + 4); - } - gpr.Unlock(rB); // rA gets unlocked in WriteExitDestInR - - gpr.Flush(FLUSH_MAINTAIN_STATE); - fpr.Flush(FLUSH_MAINTAIN_STATE); - WriteExitDestInR(rA); - - if ((inst.BO & BO_DONT_CHECK_CONDITION) == 0) - SetJumpTarget( pConditionDontBranch ); - if ((inst.BO & BO_DONT_DECREMENT_FLAG) == 0) - SetJumpTarget( pCTRDontBranch ); - - if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) - { - gpr.Flush(); - fpr.Flush(); - WriteExit(js.compilerPC + 4); - } -} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_FPUtils.h b/Source/Core/Core/PowerPC/JitArm32/JitArm_FPUtils.h deleted file mode 100644 index 1cbb4a9cde..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_FPUtils.h +++ /dev/null @@ -1,69 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "Core/PowerPC/Interpreter/Interpreter_FPUtils.h" - -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitAsm.h" -#include "Core/PowerPC/JitArm32/JitFPRCache.h" -#include "Core/PowerPC/JitArm32/JitRegCache.h" - -static const double minmaxFloat[2] = {-(double)0x80000000, (double)0x7FFFFFFF}; -static const double doublenum = 0xfff8000000000000ull; - -// Exception masks -static ArmGen::Operand2 FRFIMask(5, 0x8); // 0x60000 -static ArmGen::Operand2 FIMask(2, 8); // 0x20000 -static ArmGen::Operand2 FRMask(4, 8); // 0x40000 -static ArmGen::Operand2 FXMask(2, 1); // 0x80000000 -static ArmGen::Operand2 VEMask(0x40, 0); // 0x40 - -static ArmGen::Operand2 XXException(2, 4); // 0x2000000 -static ArmGen::Operand2 CVIException(1, 0xC); // 0x100 -static ArmGen::Operand2 NANException(1, 4); // 0x1000000 -static ArmGen::Operand2 VXVCException(8, 8); // 0x80000 -static ArmGen::Operand2 ZXException(1, 3); // 0x4000000 -static ArmGen::Operand2 VXSQRTException(2, 5); // 0x200 - -inline void JitArm::SetFPException(ArmGen::ARMReg Reg, u32 Exception) -{ - ArmGen::Operand2 *ExceptionMask; - switch (Exception) - { - case FPSCR_VXCVI: - ExceptionMask = &CVIException; - break; - case FPSCR_XX: - ExceptionMask = &XXException; - break; - case FPSCR_VXSNAN: - ExceptionMask = &NANException; - break; - case FPSCR_VXVC: - ExceptionMask = &VXVCException; - break; - case FPSCR_ZX: - ExceptionMask = &ZXException; - break; - case FPSCR_VXSQRT: - ExceptionMask = &VXSQRTException; - break; - default: - _assert_msg_(DYNA_REC, false, "Passed unsupported FPexception: 0x%08x", Exception); - return; - break; - } - ArmGen::ARMReg rB = gpr.GetReg(); - MOV(rB, Reg); - ORR(Reg, Reg, *ExceptionMask); - CMP(rB, Reg); - SetCC(CC_NEQ); - ORR(Reg, Reg, FXMask); // If exception is set, set exception bit - SetCC(); - BIC(Reg, Reg, FRFIMask); - gpr.Unlock(rB); -} - diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp deleted file mode 100644 index b948f3c4d3..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_FloatingPoint.cpp +++ /dev/null @@ -1,536 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Common/ArmEmitter.h" -#include "Common/CommonTypes.h" - -#include "Core/ConfigManager.h" -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/HW/Memmap.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/PPCTables.h" -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitArm_FPUtils.h" -#include "Core/PowerPC/JitArm32/JitAsm.h" -#include "Core/PowerPC/JitArm32/JitFPRCache.h" -#include "Core/PowerPC/JitArm32/JitRegCache.h" - -using namespace ArmGen; - -void JitArm::Helper_UpdateCR1(ARMReg fpscr, ARMReg temp) -{ -} - -void JitArm::fctiwx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(true); - u32 b = inst.FB; - u32 d = inst.FD; - - ARMReg vB = fpr.R0(b); - ARMReg vD = fpr.R0(d); - ARMReg V0 = fpr.GetReg(); - ARMReg V1 = fpr.GetReg(); - ARMReg V2 = fpr.GetReg(); - - ARMReg rA = gpr.GetReg(); - ARMReg fpscrReg = gpr.GetReg(); - - FixupBranch DoneMax, DoneMin; - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - MOVI2R(rA, (u32)minmaxFloat); - - // Check if greater than max float - { - VLDR(V0, rA, 8); // Load Max - VCMPE(vB, V0); - VMRS(_PC); // Loads in to APSR - FixupBranch noException = B_CC(CC_LE); - VMOV(vD, V0); // Set to max - SetFPException(fpscrReg, FPSCR_VXCVI); - DoneMax = B(); - SetJumpTarget(noException); - } - // Check if less than min float - { - VLDR(V0, rA, 0); - VCMPE(vB, V0); - VMRS(_PC); - FixupBranch noException = B_CC(CC_GE); - VMOV(vD, V0); - SetFPException(fpscrReg, FPSCR_VXCVI); - DoneMin = B(); - SetJumpTarget(noException); - } - // Within ranges, convert to integer - // Set rounding mode first - // PPC <-> ARM rounding modes - // 0, 1, 2, 3 <-> 0, 3, 1, 2 - ARMReg rB = gpr.GetReg(); - VMRS(rA); - // Bits 22-23 - BIC(rA, rA, Operand2(3, 5)); - - LDR(rB, R9, PPCSTATE_OFF(fpscr)); - AND(rB, rB, 0x3); // Get the FPSCR rounding bits - CMP(rB, 1); - SetCC(CC_EQ); // zero - ORR(rA, rA, Operand2(3, 5)); - SetCC(CC_NEQ); - CMP(rB, 2); // +inf - SetCC(CC_EQ); - ORR(rA, rA, Operand2(1, 5)); - SetCC(CC_NEQ); - CMP(rB, 3); // -inf - SetCC(CC_EQ); - ORR(rA, rA, Operand2(2, 5)); - SetCC(); - VMSR(rA); - ORR(rA, rA, Operand2(3, 5)); - VCVT(vD, vB, TO_INT | IS_SIGNED); - VMSR(rA); - gpr.Unlock(rB); - VCMPE(vD, vB); - VMRS(_PC); - - SetCC(CC_EQ); - BIC(fpscrReg, fpscrReg, FRFIMask); - FixupBranch DoneEqual = B(); - SetCC(); - SetFPException(fpscrReg, FPSCR_XX); - ORR(fpscrReg, fpscrReg, FIMask); - VABS(V1, vB); - VABS(V2, vD); - VCMPE(V2, V1); - VMRS(_PC); - SetCC(CC_GT); - ORR(fpscrReg, fpscrReg, FRMask); - SetCC(); - SetJumpTarget(DoneEqual); - - SetJumpTarget(DoneMax); - SetJumpTarget(DoneMin); - - MOVI2R(rA, (u32)&doublenum); - VLDR(V0, rA, 0); - NEONXEmitter nemit(this); - nemit.VORR(vD, vD, V0); - - if (inst.Rc) - Helper_UpdateCR1(fpscrReg, rA); - - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(rA); - gpr.Unlock(fpscrReg); - fpr.Unlock(V0); - fpr.Unlock(V1); - fpr.Unlock(V2); -} - -void JitArm::fctiwzx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(true); - - u32 b = inst.FB; - u32 d = inst.FD; - - ARMReg vB = fpr.R0(b); - ARMReg vD = fpr.R0(d); - ARMReg V0 = fpr.GetReg(); - ARMReg V1 = fpr.GetReg(); - ARMReg V2 = fpr.GetReg(); - - ARMReg rA = gpr.GetReg(); - ARMReg fpscrReg = gpr.GetReg(); - - FixupBranch DoneMax, DoneMin; - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - MOVI2R(rA, (u32)minmaxFloat); - - // Check if greater than max float - { - VLDR(V0, rA, 8); // Load Max - VCMPE(vB, V0); - VMRS(_PC); // Loads in to APSR - FixupBranch noException = B_CC(CC_LE); - VMOV(vD, V0); // Set to max - SetFPException(fpscrReg, FPSCR_VXCVI); - DoneMax = B(); - SetJumpTarget(noException); - } - // Check if less than min float - { - VLDR(V0, rA, 0); - VCMPE(vB, V0); - VMRS(_PC); - FixupBranch noException = B_CC(CC_GE); - VMOV(vD, V0); - SetFPException(fpscrReg, FPSCR_VXCVI); - DoneMin = B(); - SetJumpTarget(noException); - } - // Within ranges, convert to integer - VCVT(vD, vB, TO_INT | IS_SIGNED | ROUND_TO_ZERO); - VCMPE(vD, vB); - VMRS(_PC); - - SetCC(CC_EQ); - BIC(fpscrReg, fpscrReg, FRFIMask); - FixupBranch DoneEqual = B(); - SetCC(); - SetFPException(fpscrReg, FPSCR_XX); - ORR(fpscrReg, fpscrReg, FIMask); - VABS(V1, vB); - VABS(V2, vD); - VCMPE(V2, V1); - VMRS(_PC); - SetCC(CC_GT); - ORR(fpscrReg, fpscrReg, FRMask); - SetCC(); - SetJumpTarget(DoneEqual); - - SetJumpTarget(DoneMax); - SetJumpTarget(DoneMin); - - MOVI2R(rA, (u32)&doublenum); - VLDR(V0, rA, 0); - NEONXEmitter nemit(this); - nemit.VORR(vD, vD, V0); - - if (inst.Rc) - Helper_UpdateCR1(fpscrReg, rA); - - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(rA); - gpr.Unlock(fpscrReg); - fpr.Unlock(V0); - fpr.Unlock(V1); - fpr.Unlock(V2); -} - -void JitArm::fabsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - ARMReg vB = fpr.R0(inst.FB); - ARMReg vD = fpr.R0(inst.FD, false); - - VABS(vD, vB); -} - -void JitArm::fnabsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - ARMReg vB = fpr.R0(inst.FB); - ARMReg vD = fpr.R0(inst.FD, false); - - VABS(vD, vB); - VNEG(vD, vD); -} - -void JitArm::fnegx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - ARMReg vB = fpr.R0(inst.FB); - ARMReg vD = fpr.R0(inst.FD, false); - - VNEG(vD, vB); -} - -void JitArm::faddsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - ARMReg vA = fpr.R0(inst.FA); - ARMReg vB = fpr.R0(inst.FB); - ARMReg vD0 = fpr.R0(inst.FD, false); - ARMReg vD1 = fpr.R1(inst.FD, false); - - VADD(vD0, vA, vB); - VMOV(vD1, vD0); -} - -void JitArm::faddx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - ARMReg vA = fpr.R0(inst.FA); - ARMReg vB = fpr.R0(inst.FB); - ARMReg vD = fpr.R0(inst.FD, false); - - VADD(vD, vA, vB); -} - -void JitArm::fsubsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - ARMReg vA = fpr.R0(inst.FA); - ARMReg vB = fpr.R0(inst.FB); - ARMReg vD0 = fpr.R0(inst.FD, false); - ARMReg vD1 = fpr.R1(inst.FD, false); - - VSUB(vD0, vA, vB); - VMOV(vD1, vD0); -} - -void JitArm::fsubx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - ARMReg vA = fpr.R0(inst.FA); - ARMReg vB = fpr.R0(inst.FB); - ARMReg vD = fpr.R0(inst.FD, false); - - VSUB(vD, vA, vB); -} - -void JitArm::fmulsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - ARMReg vA = fpr.R0(inst.FA); - ARMReg vC = fpr.R0(inst.FC); - ARMReg vD0 = fpr.R0(inst.FD, false); - ARMReg vD1 = fpr.R1(inst.FD, false); - - VMUL(vD0, vA, vC); - VMOV(vD1, vD0); -} -void JitArm::fmulx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - ARMReg vA = fpr.R0(inst.FA); - ARMReg vC = fpr.R0(inst.FC); - ARMReg vD0 = fpr.R0(inst.FD, false); - - VMUL(vD0, vA, vC); -} -void JitArm::fmrx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - ARMReg vB = fpr.R0(inst.FB); - ARMReg vD = fpr.R0(inst.FD, false); - - VMOV(vD, vB); -} - -void JitArm::fmaddsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vC0 = fpr.R0(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - ARMReg V0 = fpr.GetReg(); - - VMOV(V0, vB0); - - VMLA(V0, vA0, vC0); - - VMOV(vD0, V0); - VMOV(vD1, V0); - - fpr.Unlock(V0); -} - -void JitArm::fmaddx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vC0 = fpr.R0(c); - ARMReg vD0 = fpr.R0(d, false); - - ARMReg V0 = fpr.GetReg(); - - VMOV(V0, vB0); - - VMLA(V0, vA0, vC0); - - VMOV(vD0, V0); - - fpr.Unlock(V0); -} - -void JitArm::fnmaddx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vC0 = fpr.R0(c); - ARMReg vD0 = fpr.R0(d, false); - - ARMReg V0 = fpr.GetReg(); - - VMOV(V0, vB0); - - VMLA(V0, vA0, vC0); - - VNEG(vD0, V0); - - fpr.Unlock(V0); -} -void JitArm::fnmaddsx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vC0 = fpr.R0(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - ARMReg V0 = fpr.GetReg(); - - VMOV(V0, vB0); - - VMLA(V0, vA0, vC0); - - VNEG(vD0, V0); - VNEG(vD1, V0); - - fpr.Unlock(V0); -} - -// XXX: Messes up Super Mario Sunshine title screen -void JitArm::fresx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITFloatingPointOff); - FALLBACK_IF(inst.Rc); - - // FIXME - FALLBACK_IF(true); - - u32 b = inst.FB, d = inst.FD; - - ARMReg vB0 = fpr.R0(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - ARMReg V0 = fpr.GetReg(); - MOVI2R(V0, 1.0, INVALID_REG); // temp reg isn't needed for 1.0 - - VDIV(vD1, V0, vB0); - VDIV(vD0, V0, vB0); - fpr.Unlock(V0); -} - -void JitArm::fselx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vC0 = fpr.R0(c); - ARMReg vD0 = fpr.R0(d, false); - - VCMP(vA0); - VMRS(_PC); - - FixupBranch GT0 = B_CC(CC_GE); - VMOV(vD0, vB0); - FixupBranch EQ0 = B(); - SetJumpTarget(GT0); - VMOV(vD0, vC0); - SetJumpTarget(EQ0); -} - -void JitArm::frsqrtex(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(true); - - FALLBACK_IF(inst.Rc); - - u32 b = inst.FB, d = inst.FD; - - ARMReg vB0 = fpr.R0(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg fpscrReg = gpr.GetReg(); - ARMReg V0 = D1; - ARMReg rA = gpr.GetReg(); - - MOVI2R(fpscrReg, (u32)&PPC_NAN); - VLDR(V0, fpscrReg, 0); - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - - VCMP(vB0); - VMRS(_PC); - FixupBranch Less0 = B_CC(CC_LT); - VMOV(vD0, V0); - SetFPException(fpscrReg, FPSCR_VXSQRT); - FixupBranch SkipOrr0 = B(); - SetJumpTarget(Less0); - FixupBranch noException = B_CC(CC_EQ); - SetFPException(fpscrReg, FPSCR_ZX); - SetJumpTarget(noException); - SetJumpTarget(SkipOrr0); - - VCVT(S0, vB0, 0); - - NEONXEmitter nemit(this); - nemit.VRSQRTE(F_32, D0, D0); - VCVT(vD0, S0, 0); - - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscrReg, rA); -} - diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp deleted file mode 100644 index 6faa597e9c..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp +++ /dev/null @@ -1,1222 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Common/ArmEmitter.h" -#include "Common/CommonTypes.h" - -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/PPCTables.h" - -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitAsm.h" -#include "Core/PowerPC/JitArm32/JitRegCache.h" - -using namespace ArmGen; - -void JitArm::ComputeRC(ARMReg value, int cr) -{ - ARMReg rB = gpr.GetReg(); - - Operand2 ASRReg(value, ST_ASR, 31); - - STR(value, R9, PPCSTATE_OFF(cr_val[cr])); - MOV(rB, ASRReg); - STR(rB, R9, PPCSTATE_OFF(cr_val[cr]) + sizeof(u32)); - - gpr.Unlock(rB); -} - -void JitArm::ComputeRC(s32 value, int cr) -{ - ARMReg rB = gpr.GetReg(); - - Operand2 ASRReg(rB, ST_ASR, 31); - - MOVI2R(rB, value); - STR(rB, R9, PPCSTATE_OFF(cr_val[cr])); - MOV(rB, ASRReg); - STR(rB, R9, PPCSTATE_OFF(cr_val[cr]) + sizeof(u32)); - - gpr.Unlock(rB); -} - -void JitArm::ComputeCarry() -{ - ARMReg tmp = gpr.GetReg(); - SetCC(CC_CS); - MOV(tmp, 1); - SetCC(CC_CC); - EOR(tmp, tmp, tmp); - SetCC(); - STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - gpr.Unlock(tmp); -} - -void JitArm::ComputeCarry(bool Carry) -{ - ARMReg tmp = gpr.GetReg(); - if (Carry) - MOV(tmp, 1); - else - EOR(tmp, tmp, tmp); - STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - gpr.Unlock(tmp); -} - -void JitArm::GetCarryAndClear(ARMReg reg) -{ - ARMReg tmp = gpr.GetReg(); - LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - AND(reg, tmp, 1); - BIC(tmp, tmp, 1); - STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - gpr.Unlock(tmp); -} - -void JitArm::FinalizeCarry(ARMReg reg) -{ - ARMReg tmp = gpr.GetReg(); - SetCC(CC_CS); - ORR(reg, reg, 1); - SetCC(); - LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - ORR(tmp, tmp, reg); - STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - gpr.Unlock(tmp); -} - -void JitArm::subfic(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - int a = inst.RA, d = inst.RD; - - int imm = inst.SIMM_16; - if (d == a) - { - if (imm == 0) - { - ARMReg tmp = gpr.GetReg(); - LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - BIC(tmp, tmp, 1); - // Flags act exactly like subtracting from 0 - RSBS(gpr.R(d), gpr.R(d), 0); - SetCC(CC_CS); - ORR(tmp, tmp, 1); - SetCC(); - STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - gpr.Unlock(tmp); - } - else if (imm == -1) - { - // CA is always set in this case - ARMReg tmp = gpr.GetReg(); - LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - ORR(tmp, tmp, 1); - STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - gpr.Unlock(tmp); - - MVN(gpr.R(d), gpr.R(d)); - } - else - { - ARMReg tmp = gpr.GetReg(); - ARMReg rA = gpr.GetReg(); - MOVI2R(rA, imm + 1); - LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - BIC(tmp, tmp, 1); - // Flags act exactly like subtracting from 0 - MVN(gpr.R(d), gpr.R(d)); - ADDS(gpr.R(d), gpr.R(d), rA); - // Output carry is inverted - SetCC(CC_CS); - ORR(tmp, tmp, 1); - SetCC(); - STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - gpr.Unlock(tmp, rA); - } - } - else - { - ARMReg tmp = gpr.GetReg(); - gpr.BindToRegister(d, false); - MOVI2R(gpr.R(d), imm); - LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - BIC(tmp, tmp, 1); - // Flags act exactly like subtracting from 0 - SUBS(gpr.R(d), gpr.R(d), gpr.R(a)); - // Output carry is inverted - SetCC(CC_CS); - ORR(tmp, tmp, 1); - SetCC(); - STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - gpr.Unlock(tmp); - } - // This instruction has no RC flag -} - -static u32 Add(u32 a, u32 b) -{ - return a + b; -} - -static u32 Sub(u32 a, u32 b) -{ - return a - b; -} - -static u32 Mul(u32 a, u32 b) -{ - return a * b; -} - -static u32 Or (u32 a, u32 b) -{ - return a | b; -} - -static u32 And(u32 a, u32 b) -{ - return a & b; -} - -static u32 Xor(u32 a, u32 b) -{ - return a ^ b; -} - -void JitArm::arith(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - u32 a = inst.RA, b = inst.RB, d = inst.RD, s = inst.RS; - ARMReg RA, RB, RD, RS; - bool isImm[2] = {false, false}; // Arg1 & Arg2 - u32 Imm[2] = {0, 0}; - bool Rc = false; - bool carry = false; - bool shiftedImm = false; - - switch (inst.OPCD) - { - case 7: // mulli - if (gpr.IsImm(a)) - { - isImm[0] = true; - Imm[0] = gpr.GetImm(a); - } - isImm[1] = true; - Imm[1] = inst.SIMM_16; - break; - case 13: // addic_rc - Rc = true; - case 12: // addic - if (gpr.IsImm(a)) - { - isImm[0] = true; - Imm[0] = gpr.GetImm(a); - } - isImm[1] = true; - Imm[1] = inst.SIMM_16; - carry = true; - break; - case 15: // addis - shiftedImm = true; - case 14: // addi - if (a) - { - if (gpr.IsImm(a)) - { - isImm[0] = true; - Imm[0] = gpr.GetImm(a); - } - } - else - { - isImm[0] = true; - Imm[0] = 0; - } - isImm[1] = true; - Imm[1] = inst.SIMM_16 << (shiftedImm ? 16 : 0); - break; - case 25: // oris - shiftedImm = true; - case 24: // ori - if (gpr.IsImm(s)) - { - isImm[0] = true; - Imm[0] = gpr.GetImm(s); - } - isImm[1] = true; - Imm[1] = inst.UIMM; - break; - case 27: // xoris - shiftedImm = true; - case 26: // xori - if (gpr.IsImm(s)) - { - isImm[0] = true; - Imm[0] = gpr.GetImm(s); - } - isImm[1] = true; - Imm[1] = inst.UIMM << (shiftedImm ? 16 : 0); - break; - case 29: // andis_rc - shiftedImm = true; - case 28: // andi_rc - if (gpr.IsImm(s)) - { - isImm[0] = true; - Imm[0] = gpr.GetImm(s); - } - isImm[1] = true; - Imm[1] = inst.UIMM; - Rc = true; - break; - - case 31: // addcx, addx, subfx - switch (inst.SUBOP10) - { - case 24: // slwx - case 28: // andx - case 60: // andcx - case 124: // norx - case 284: // eqvx - case 316: // xorx - case 412: // orcx - case 444: // orx - case 476: // nandx - case 536: // srwx - case 792: // srawx - if (gpr.IsImm(s)) - { - isImm[0] = true; - Imm[0] = gpr.GetImm(s); - } - if (gpr.IsImm(b)) - { - isImm[1] = true; - Imm[1] = gpr.GetImm(b); - } - Rc = inst.Rc; - break; - - case 10: // addcx - case 522: // addcox - carry = true; - case 40: // subfx - case 235: // mullwx - case 266: - case 747: // mullwox - case 778: // both addx - if (gpr.IsImm(a)) - { - isImm[0] = true; - Imm[0] = gpr.GetImm(a); - } - if (gpr.IsImm(b)) - { - isImm[1] = true; - Imm[1] = gpr.GetImm(b); - } - Rc = inst.Rc; - break; - } - break; - default: - WARN_LOG(DYNA_REC, "Unknown OPCD %d with arith function", inst.OPCD); - FALLBACK_IF(true); - break; - } - if (isImm[0] && isImm[1]) // Immediate propagation - { - bool hasCarry = false; - u32 dest = d; - switch (inst.OPCD) - { - case 7: - gpr.SetImmediate(d, Mul(Imm[0], Imm[1])); - break; - case 12: - case 13: - gpr.SetImmediate(d, Add(Imm[0], Imm[1])); - hasCarry = Interpreter::Helper_Carry(Imm[0], Imm[1]); - break; - case 14: - case 15: - gpr.SetImmediate(d, Add(Imm[0], Imm[1])); - hasCarry = Interpreter::Helper_Carry(Imm[0], Imm[1]); - break; - case 24: - case 25: - gpr.SetImmediate(a, Or(Imm[0], Imm[1] << (shiftedImm ? 16 : 0))); - dest = a; - break; - case 26: - case 27: - gpr.SetImmediate(a, Xor(Imm[0], Imm[1])); - dest = a; - break; - case 28: - case 29: - gpr.SetImmediate(a, And(Imm[0], Imm[1] << (shiftedImm ? 16 : 0))); - dest = a; - break; - case 31: // addcx, addx, subfx - switch (inst.SUBOP10) - { - case 24: - gpr.SetImmediate(a, Imm[0] << Imm[1]); - dest = a; - break; - case 28: - gpr.SetImmediate(a, And(Imm[0], Imm[1])); - dest = a; - break; - case 40: // subfx - gpr.SetImmediate(d, Sub(Imm[1], Imm[0])); - break; - case 60: - gpr.SetImmediate(a, And(Imm[1], ~Imm[0])); - dest = a; - break; - case 124: - gpr.SetImmediate(a, ~Or(Imm[0], Imm[1])); - dest = a; - break; - case 747: - case 235: - gpr.SetImmediate(d, Mul(Imm[0], Imm[1])); - break; - case 284: - gpr.SetImmediate(a, ~Xor(Imm[0], Imm[1])); - dest = a; - break; - case 316: - gpr.SetImmediate(a, Xor(Imm[0], Imm[1])); - dest = a; - break; - case 412: - gpr.SetImmediate(a, Or(Imm[0], ~Imm[1])); - dest = a; - break; - case 444: - gpr.SetImmediate(a, Or(Imm[0], Imm[1])); - dest = a; - break; - case 476: - gpr.SetImmediate(a, ~And(Imm[1], Imm[0])); - dest = a; - break; - case 536: - gpr.SetImmediate(a, Imm[0] >> Imm[1]); - dest = a; - break; - case 792: - gpr.SetImmediate(a, ((s32)Imm[0]) >> Imm[1]); - dest = a; - break; - case 10: // addcx - case 266: - case 778: // both addx - gpr.SetImmediate(d, Add(Imm[0], Imm[1])); - hasCarry = Interpreter::Helper_Carry(Imm[0], Imm[1]); - break; - } - break; - } - - if (carry) - ComputeCarry(hasCarry); - - if (Rc) - ComputeRC(gpr.GetImm(dest), 0); - - return; - } - - u32 dest = d; - // One or the other isn't a IMM - switch (inst.OPCD) - { - case 7: - { - gpr.BindToRegister(d, d == a); - ARMReg rA = gpr.GetReg(); - RD = gpr.R(d); - RA = gpr.R(a); - MOVI2R(rA, Imm[1]); - MUL(RD, RA, rA); - gpr.Unlock(rA); - } - break; - case 12: - case 13: - { - gpr.BindToRegister(d, d == a); - RD = gpr.R(d); - RA = gpr.R(a); - - if (Imm[1] < 256) - { - ADDS(RD, RA, Imm[1]); - } - else - { - ARMReg rA = gpr.GetReg(); - MOVI2R(rA, Imm[1]); - ADDS(RD, RA, rA); - gpr.Unlock(rA); - } - } - break; - case 14: - case 15: // Arg2 is always Imm - if (!isImm[0]) - { - gpr.BindToRegister(d, d == a); - RD = gpr.R(d); - RA = gpr.R(a); - - if (Imm[1] < 256) - { - ADD(RD, RA, Imm[1]); - } - else - { - ARMReg rA = gpr.GetReg(); - MOVI2R(rA, Imm[1]); - ADD(RD, RA, rA); - gpr.Unlock(rA); - } - } - else - { - gpr.SetImmediate(d, Imm[1]); - } - break; - case 24: - case 25: - { - dest = a; - gpr.BindToRegister(a, s == a); - RS = gpr.R(s); - RA = gpr.R(a); - - if (Imm[1] < 256) - { - // Rotation of encoding 8 is the same as << 16 - Operand2 imm(Imm[1], shiftedImm ? 8 : 0); - ORR(RA, RS, imm); - } - else - { - ARMReg rA = gpr.GetReg(); - MOVI2R(rA, Imm[1] << (shiftedImm ? 16 : 0)); - ORR(RA, RS, rA); - gpr.Unlock(rA); - } - } - break; - case 26: - case 27: - { - dest = a; - gpr.BindToRegister(a, s == a); - ARMReg rA = gpr.GetReg(); - RS = gpr.R(s); - RA = gpr.R(a); - MOVI2R(rA, Imm[1]); - EOR(RA, RS, rA); - gpr.Unlock(rA); - } - - break; - case 28: - case 29: - { - dest = a; - gpr.BindToRegister(a, s == a); - RS = gpr.R(s); - RA = gpr.R(a); - - Operand2 imm_val; - if (TryMakeOperand2(Imm[1] << (shiftedImm ? 16 : 0), imm_val)) - { - AND(RA, RS, imm_val); - } - else - { - ARMReg rA = gpr.GetReg(); - MOVI2R(rA, Imm[1]); - Operand2 rotated_reg(rA, ST_ROR, shiftedImm ? 16 : 0); - AND(RA, RS, rotated_reg); - gpr.Unlock(rA); - } - } - break; - case 31: - switch (inst.SUBOP10) - { - case 24: - dest = a; - gpr.BindToRegister(a, a == s || a == b); - RA = gpr.R(a); - RS = gpr.R(s); - RB = gpr.R(b); - LSLS(RA, RS, RB); - break; - case 28: - dest = a; - gpr.BindToRegister(a, a == s || a == b); - RA = gpr.R(a); - RS = gpr.R(s); - RB = gpr.R(b); - ANDS(RA, RS, RB); - break; - case 40: // subfx - gpr.BindToRegister(d, d == b || d == a); - RD = gpr.R(d); - RB = gpr.R(b); - RA = gpr.R(a); - SUBS(RD, RB, RA); - break; - case 60: - dest = a; - gpr.BindToRegister(a, a == s || a == b); - RA = gpr.R(a); - RS = gpr.R(s); - RB = gpr.R(b); - BICS(RA, RS, RB); - break; - case 124: - dest = a; - gpr.BindToRegister(a, a == s || a == b); - RA = gpr.R(a); - RS = gpr.R(s); - RB = gpr.R(b); - ORR(RA, RS, RB); - MVNS(RA, RA); - break; - case 747: - case 235: - gpr.BindToRegister(d, d == a || d == b); - RD = gpr.R(d); - RA = gpr.R(a); - RB = gpr.R(b); - MULS(RD, RA, RB); - break; - case 284: - dest = a; - gpr.BindToRegister(a, a == s || a == b); - RA = gpr.R(a); - RS = gpr.R(s); - RB = gpr.R(b); - EOR(RA, RS, RB); - MVNS(RA, RA); - break; - case 316: - dest = a; - gpr.BindToRegister(a, a == s || a == b); - RA = gpr.R(a); - RS = gpr.R(s); - RB = gpr.R(b); - EORS(RA, RS, RB); - break; - case 412: - { - dest = a; - ARMReg rA = gpr.GetReg(); - gpr.BindToRegister(a, a == s || a == b); - RA = gpr.R(a); - RS = gpr.R(s); - RB = gpr.R(b); - MVN(rA, RB); - ORRS(RA, RS, rA); - gpr.Unlock(rA); - } - break; - case 444: - dest = a; - gpr.BindToRegister(a, a == s || a == b); - RA = gpr.R(a); - RS = gpr.R(s); - RB = gpr.R(b); - ORRS(RA, RS, RB); - break; - case 476: - dest = a; - gpr.BindToRegister(a, a == s || a == b); - RA = gpr.R(a); - RS = gpr.R(s); - RB = gpr.R(b); - AND(RA, RS, RB); - MVNS(RA, RA); - break; - case 536: - dest = a; - gpr.BindToRegister(a, a == s || a == b); - RA = gpr.R(a); - RS = gpr.R(s); - RB = gpr.R(b); - LSRS(RA, RS, RB); - break; - case 792: - dest = a; - gpr.BindToRegister(a, a == s || a == b); - RA = gpr.R(a); - RS = gpr.R(s); - RB = gpr.R(b); - ASRS(RA, RS, RB); - break; - case 10: // addcx - case 266: - case 778: // both addx - gpr.BindToRegister(d, d == a || d == b); - RD = gpr.R(d); - RA = gpr.R(a); - RB = gpr.R(b); - ADDS(RD, RA, RB); - break; - } - break; - } - - if (carry) - ComputeCarry(); - - if (Rc) - ComputeRC(gpr.R(dest)); -} - -void JitArm::addex(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - u32 a = inst.RA, b = inst.RB, d = inst.RD; - - // FIXME - FALLBACK_IF(true); - - ARMReg RA = gpr.R(a); - ARMReg RB = gpr.R(b); - ARMReg RD = gpr.R(d); - ARMReg rA = gpr.GetReg(); - GetCarryAndClear(rA); - ADDS(RD, RA, RB); - FinalizeCarry(rA); - - if (inst.Rc) - ComputeRC(RD); - - gpr.Unlock(rA); -} - -void JitArm::cntlzwx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - u32 a = inst.RA, s = inst.RS; - - if (gpr.IsImm(s)) - { - gpr.SetImmediate(a, __builtin_clz(gpr.GetImm(s))); - if (inst.Rc) - ComputeRC(gpr.GetImm(a), 0); - return; - } - - gpr.BindToRegister(a, a == s); - ARMReg RA = gpr.R(a); - ARMReg RS = gpr.R(s); - CLZ(RA, RS); - if (inst.Rc) - ComputeRC(RA); -} - -void JitArm::mulhwux(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - u32 a = inst.RA, b = inst.RB, d = inst.RD; - - ARMReg RA = gpr.R(a); - ARMReg RB = gpr.R(b); - ARMReg RD = gpr.R(d); - ARMReg rA = gpr.GetReg(false); - UMULL(rA, RD, RA, RB); - - if (inst.Rc) - ComputeRC(RD); -} - -void JitArm::extshx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - u32 a = inst.RA, s = inst.RS; - - if (gpr.IsImm(s)) - { - gpr.SetImmediate(a, (u32)(s32)(s16)gpr.GetImm(s)); - - if (inst.Rc) - ComputeRC(gpr.GetImm(a), 0); - - return; - } - - gpr.BindToRegister(a, a == s); - ARMReg rA = gpr.R(a); - ARMReg rS = gpr.R(s); - SXTH(rA, rS); - if (inst.Rc) - ComputeRC(rA); -} -void JitArm::extsbx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - u32 a = inst.RA, s = inst.RS; - - if (gpr.IsImm(s)) - { - gpr.SetImmediate(a, (u32)(s32)(s8)gpr.GetImm(s)); - - if (inst.Rc) - ComputeRC(gpr.GetImm(a), 0); - - return; - } - - gpr.BindToRegister(a, a == s); - ARMReg rA = gpr.R(a); - ARMReg rS = gpr.R(s); - SXTB(rA, rS); - if (inst.Rc) - ComputeRC(rA); -} -void JitArm::cmp (UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - int crf = inst.CRFD; - u32 a = inst.RA, b = inst.RB; - - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - ComputeRC((s32)gpr.GetImm(a) - (s32)gpr.GetImm(b), crf); - return; - } - - ARMReg rA = gpr.GetReg(); - ARMReg RA = gpr.R(a); - ARMReg RB = gpr.R(b); - - SUB(rA, RA, RB); - ComputeRC(rA, crf); - - gpr.Unlock(rA); -} - -void JitArm::cmpl(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - int crf = inst.CRFD; - u32 a = inst.RA, b = inst.RB; - - if (gpr.IsImm(a) && gpr.IsImm(b)) - { - ComputeRC(gpr.GetImm(a) - gpr.GetImm(b), crf); - return; - } - else if (gpr.IsImm(b) && !gpr.GetImm(b)) - { - ComputeRC(gpr.R(a), crf); - return; - } - - FALLBACK_IF(true); -} - -void JitArm::cmpi(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - u32 a = inst.RA; - int crf = inst.CRFD; - if (gpr.IsImm(a)) - { - ComputeRC((s32)gpr.GetImm(a) - inst.SIMM_16, crf); - return; - } - ARMReg rA = gpr.GetReg(); - ARMReg RA = gpr.R(a); - bool negated = false; - Operand2 off; - - if (TryMakeOperand2_AllowNegation(inst.SIMM_16, off, &negated)) - { - if (negated) - ADD(rA, RA, off); - else - SUB(rA, RA, off); - } - else - { - MOVI2R(rA, inst.SIMM_16); - SUB(rA, RA, rA); - } - ComputeRC(rA, crf); - - gpr.Unlock(rA); -} - -void JitArm::cmpli(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - u32 a = inst.RA; - int crf = inst.CRFD; - - if (gpr.IsImm(a)) - { - ComputeRC(gpr.GetImm(a) - inst.UIMM, crf); - return; - } - - if (!inst.UIMM) - { - ComputeRC(gpr.R(a), crf); - return; - } - - FALLBACK_IF(true); -} - -void JitArm::negx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - if (gpr.IsImm(inst.RA)) - { - gpr.SetImmediate(inst.RD, ~gpr.GetImm(inst.RA) + 1); - if (inst.Rc) - ComputeRC(gpr.GetImm(inst.RD), 0); - return; - } - - gpr.BindToRegister(inst.RD, inst.RD == inst.RA); - ARMReg RD = gpr.R(inst.RD); - ARMReg RA = gpr.R(inst.RA); - - RSB(RD, RA, 0); - if (inst.Rc) - ComputeRC(RD); - - if (inst.OE) - { - BKPT(0x333); - //GenerateOverflow(); - } -} -void JitArm::rlwimix(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - u32 mask = Helper_Mask(inst.MB,inst.ME); - int a = inst.RA, s = inst.RS; - if (gpr.IsImm(s) && inst.MB <= inst.ME) - { - u32 imm = _rotl(gpr.GetImm(s), inst.SH) & mask; - imm >>= 31 - inst.ME; - ARMReg rA = gpr.GetReg(); - - MOVI2R(rA, imm); - BFI(gpr.R(a), rA, 31 - inst.ME, inst.ME - inst.MB + 1); - if (inst.Rc) - ComputeRC(gpr.R(a)); - - gpr.Unlock(rA); - return; - } - - ARMReg RA = gpr.R(a); - ARMReg RS = gpr.R(s); - - if (inst.SH == 0 && inst.MB <= inst.ME) - { - if (inst.ME != 31) - { - ARMReg rA = gpr.GetReg(); - LSR(rA, RS, 31 - inst.ME); - BFI(RA, rA, 31 - inst.ME, inst.ME - inst.MB + 1); - gpr.Unlock(rA); - } - else - { - BFI(RA, RS, 0, inst.ME - inst.MB + 1); - } - if (inst.Rc) - ComputeRC(RA); - - return; - } - - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - Operand2 Shift(RS, ST_ROR, 32 - inst.SH); // This rotates left, while ARM has only rotate right, so swap it. - - MOVI2R(rA, mask); - - BIC (rB, RA, rA); // RA & ~mask - AND (rA, rA, Shift); - ORR(RA, rB, rA); - - if (inst.Rc) - ComputeRC(RA); - gpr.Unlock(rA, rB); -} - -void JitArm::rlwinmx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - u32 mask = Helper_Mask(inst.MB,inst.ME); - if (gpr.IsImm(inst.RS)) - { - gpr.SetImmediate(inst.RA, _rotl(gpr.GetImm(inst.RS), inst.SH) & mask); - if (inst.Rc) - ComputeRC(gpr.GetImm(inst.RA), 0); - return; - } - - gpr.BindToRegister(inst.RA, inst.RA == inst.RS); - ARMReg RA = gpr.R(inst.RA); - ARMReg RS = gpr.R(inst.RS); - ARMReg rA = gpr.GetReg(); - bool inverse = false; - bool fit_op = false; - Operand2 op2; - fit_op = TryMakeOperand2_AllowInverse(mask, op2, &inverse); - - if (!inst.SH && fit_op) - { - if (inverse) - BIC(RA, RS, op2); - else - AND(RA, RS, op2); - } - else if (!inst.SH && inst.ME == 31) - { - UBFX(RA, RS, 0, inst.ME - inst.MB + 1); - } - else if (!inst.SH && inst.MB == 0) - { - LSR(RA, RS, 31 - inst.ME); - LSL(RA, RA, 31 - inst.ME); - } - else if (inst.SH == 16 && inst.MB >= 16 && inst.ME == 31) - { - UBFX(RA, RS, 16, 32 - inst.MB); - } - else if (inst.SH == 16 && inst.MB == 0 && inst.ME == 15) - { - LSL(RA, RS, 16); - } - else if (fit_op) - { - Operand2 Shift(RS, ST_ROR, 32 - inst.SH); // This rotates left, while ARM has only rotate right, so swap it. - MOV(RA, Shift); - if (inverse) - BIC(RA, RA, op2); - else - AND(RA, RA, op2); - } - else - { - MOVI2R(rA, mask); - Operand2 Shift(RS, ST_ROR, 32 - inst.SH); // This rotates left, while ARM has only rotate right, so swap it. - AND(RA, rA, Shift); - } - - if (inst.Rc) - ComputeRC(RA); - gpr.Unlock(rA); - - //m_GPR[inst.RA] = _rotl(m_GPR[inst.RS],inst.SH) & mask; -} -void JitArm::rlwnmx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - u32 mask = Helper_Mask(inst.MB,inst.ME); - ARMReg RA = gpr.R(inst.RA); - ARMReg RS = gpr.R(inst.RS); - ARMReg RB = gpr.R(inst.RB); - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - MOVI2R(rA, mask); - - // PPC rotates left, ARM rotates right. Swap it - MOV(rB, 32); - SUB(rB, rB, RB); - - Operand2 Shift(RS, ST_ROR, rB); // Register shifted register - AND(RA, rA, Shift); - - if (inst.Rc) - ComputeRC(RA); - gpr.Unlock(rA, rB); -} - -void JitArm::srawix(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - int a = inst.RA; - int s = inst.RS; - int amount = inst.SH; - - if (gpr.IsImm(s)) - { - s32 imm = (s32)gpr.GetImm(s); - gpr.SetImmediate(a, imm >> amount); - - if (amount != 0 && (imm < 0) && (imm << (32 - amount))) - ComputeCarry(true); - else - ComputeCarry(false); - } - else if (amount != 0) - { - gpr.BindToRegister(a, a == s); - ARMReg RA = gpr.R(a); - ARMReg RS = gpr.R(s); - ARMReg tmp = gpr.GetReg(); - - MOV(tmp, RS); - ASR(RA, RS, amount); - if (inst.Rc) - ComputeRC(RA); - LSL(tmp, tmp, 32 - amount); - TST(tmp, RA); - - LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - BIC(tmp, tmp, 1); - SetCC(CC_NEQ); - ORR(tmp, tmp, 1); - SetCC(); - STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - gpr.Unlock(tmp); - } - else - { - gpr.BindToRegister(a, a == s); - ARMReg RA = gpr.R(a); - ARMReg RS = gpr.R(s); - MOV(RA, RS); - - ARMReg tmp = gpr.GetReg(); - LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - BIC(tmp, tmp, 1); - STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - gpr.Unlock(tmp); - - } -} - -void JitArm::twx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITIntegerOff); - - s32 a = inst.RA; - - ARMReg RA = gpr.GetReg(); - ARMReg RB = gpr.GetReg(); - MOV(RA, inst.TO); - - if (inst.OPCD == 3) // twi - { - MOVI2R(RB, (s32)(s16)inst.SIMM_16); - CMP(gpr.R(a), RB); - } - else // tw - { - CMP(gpr.R(a), gpr.R(inst.RB)); - } - - FixupBranch al = B_CC(CC_LT); - FixupBranch ag = B_CC(CC_GT); - FixupBranch ae = B_CC(CC_EQ); - // FIXME: will never be reached. But also no known code uses it... - FixupBranch ll = B_CC(CC_VC); - FixupBranch lg = B_CC(CC_VS); - - SetJumpTarget(al); - TST(RA, 16); - FixupBranch exit1 = B_CC(CC_NEQ); - FixupBranch take1 = B(); - SetJumpTarget(ag); - TST(RA, 8); - FixupBranch exit2 = B_CC(CC_NEQ); - FixupBranch take2 = B(); - SetJumpTarget(ae); - TST(RA, 4); - FixupBranch exit3 = B_CC(CC_NEQ); - FixupBranch take3 = B(); - SetJumpTarget(ll); - TST(RA, 2); - FixupBranch exit4 = B_CC(CC_NEQ); - FixupBranch take4 = B(); - SetJumpTarget(lg); - TST(RA, 1); - FixupBranch exit5 = B_CC(CC_NEQ); - FixupBranch take5 = B(); - - SetJumpTarget(take1); - SetJumpTarget(take2); - SetJumpTarget(take3); - SetJumpTarget(take4); - SetJumpTarget(take5); - - gpr.Flush(FLUSH_MAINTAIN_STATE); - fpr.Flush(FLUSH_MAINTAIN_STATE); - - LDR(RA, R9, PPCSTATE_OFF(Exceptions)); - MOVI2R(RB, EXCEPTION_PROGRAM); // XXX: Can be optimized - ORR(RA, RA, RB); - STR(RA, R9, PPCSTATE_OFF(Exceptions)); - WriteExceptionExit(); - - SetJumpTarget(exit1); - SetJumpTarget(exit2); - SetJumpTarget(exit3); - SetJumpTarget(exit4); - SetJumpTarget(exit5); - - if (!analyzer.HasOption(PPCAnalyst::PPCAnalyzer::OPTION_CONDITIONAL_CONTINUE)) - { - gpr.Flush(); - fpr.Flush(); - - WriteExit(js.compilerPC + 4); - } - - gpr.Unlock(RA, RB); -} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp deleted file mode 100644 index 5b3fe60175..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStore.cpp +++ /dev/null @@ -1,547 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Common/ArmEmitter.h" -#include "Common/CommonTypes.h" - -#include "Core/ConfigManager.h" -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/HW/Memmap.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/PPCTables.h" - -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitAsm.h" -#include "Core/PowerPC/JitArm32/JitRegCache.h" - -using namespace ArmGen; - -void JitArm::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, int accessSize, s32 offset) -{ - // We want to make sure to not get LR as a temp register - ARMReg rA = R12; - - u32 imm_addr = 0; - bool is_immediate = false; - - if (regOffset == -1) - { - if (dest != -1) - { - if (gpr.IsImm(dest)) - { - is_immediate = true; - imm_addr = gpr.GetImm(dest) + offset; - } - else - { - Operand2 off; - if (TryMakeOperand2(offset, off)) - { - ADD(rA, gpr.R(dest), off); - } - else - { - MOVI2R(rA, offset); - ADD(rA, rA, gpr.R(dest)); - } - } - } - else - { - is_immediate = true; - imm_addr = offset; - } - } - else - { - if (dest != -1) - { - if (gpr.IsImm(dest) && gpr.IsImm(regOffset)) - { - is_immediate = true; - imm_addr = gpr.GetImm(dest) + gpr.GetImm(regOffset); - } - else if (gpr.IsImm(dest) && !gpr.IsImm(regOffset)) - { - Operand2 off; - if (TryMakeOperand2(gpr.GetImm(dest), off)) - { - ADD(rA, gpr.R(regOffset), off); - } - else - { - MOVI2R(rA, gpr.GetImm(dest)); - ADD(rA, rA, gpr.R(regOffset)); - } - } - else if (!gpr.IsImm(dest) && gpr.IsImm(regOffset)) - { - Operand2 off; - if (TryMakeOperand2(gpr.GetImm(regOffset), off)) - { - ADD(rA, gpr.R(dest), off); - } - else - { - MOVI2R(rA, gpr.GetImm(regOffset)); - ADD(rA, rA, gpr.R(dest)); - } - } - else - { - ADD(rA, gpr.R(dest), gpr.R(regOffset)); - } - } - else - { - if (gpr.IsImm(regOffset)) - { - is_immediate = true; - imm_addr = gpr.GetImm(regOffset); - } - else - { - MOV(rA, gpr.R(regOffset)); - } - } - } - ARMReg RS = gpr.R(value); - - u32 flags = BackPatchInfo::FLAG_STORE; - if (accessSize == 32) - flags |= BackPatchInfo::FLAG_SIZE_32; - else if (accessSize == 16) - flags |= BackPatchInfo::FLAG_SIZE_16; - else - flags |= BackPatchInfo::FLAG_SIZE_8; - - if (is_immediate) - { - if (jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr)) - { - MOVI2R(R14, (u32)&GPFifo::m_gatherPipeCount); - MOVI2R(R10, (u32)GPFifo::m_gatherPipe); - LDR(R11, R14); - if (accessSize == 32) - { - REV(RS, RS); - STR(RS, R10, R11); - REV(RS, RS); - } - else if (accessSize == 16) - { - REV16(RS, RS); - STRH(RS, R10, R11); - REV16(RS, RS); - } - else - { - STRB(RS, R10, R11); - } - ADD(R11, R11, accessSize >> 3); - STR(R11, R14); - jit->js.fifoBytesThisBlock += accessSize >> 3; - } - else if (PowerPC::IsOptimizableRAMAddress(imm_addr)) - { - MOVI2R(rA, imm_addr); - EmitBackpatchRoutine(this, flags, jo.fastmem, true, RS); - } - else - { - MOVI2R(rA, imm_addr); - EmitBackpatchRoutine(this, flags, false, false, RS); - } - } - else - { - EmitBackpatchRoutine(this, flags, jo.fastmem, true, RS); - } - -} - -void JitArm::stX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - - u32 a = inst.RA, b = inst.RB, s = inst.RS; - s32 offset = inst.SIMM_16; - u32 accessSize = 0; - s32 regOffset = -1; - bool update = false; - switch (inst.OPCD) - { - case 45: // sthu - update = true; - case 44: // sth - accessSize = 16; - break; - case 31: - switch (inst.SUBOP10) - { - case 183: // stwux - update = true; - case 151: // stwx - accessSize = 32; - regOffset = b; - break; - case 247: // stbux - update = true; - case 215: // stbx - accessSize = 8; - regOffset = b; - break; - case 439: // sthux - update = true; - case 407: // sthx - accessSize = 16; - regOffset = b; - break; - } - break; - case 37: // stwu - update = true; - case 36: // stw - accessSize = 32; - break; - case 39: // stbu - update = true; - case 38: // stb - accessSize = 8; - break; - } - - SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, accessSize, offset); - - if (update) - { - ARMReg rA = gpr.GetReg(); - ARMReg RB; - ARMReg RA = gpr.R(a); - if (regOffset != -1) - RB = gpr.R(regOffset); - // Check for DSI exception prior to writing back address - LDR(rA, R9, PPCSTATE_OFF(Exceptions)); - TST(rA, EXCEPTION_DSI); - FixupBranch has_exception = B_CC(CC_NEQ); - if (regOffset == -1) - { - MOVI2R(rA, offset); - ADD(RA, RA, rA); - } - else - { - ADD(RA, RA, RB); - } - SetJumpTarget(has_exception); - gpr.Unlock(rA); - } -} - -void JitArm::SafeLoadToReg(ARMReg dest, s32 addr, s32 offsetReg, int accessSize, s32 offset, bool signExtend, bool reverse, bool update) -{ - // We want to make sure to not get LR as a temp register - ARMReg rA = R12; - - u32 imm_addr = 0; - bool is_immediate = false; - - if (offsetReg == -1) - { - if (addr != -1) - { - if (gpr.IsImm(addr)) - { - is_immediate = true; - imm_addr = gpr.GetImm(addr) + offset; - } - else - { - Operand2 off; - if (TryMakeOperand2(offset, off)) - { - ADD(rA, gpr.R(addr), off); - } - else - { - MOVI2R(rA, offset); - ADD(rA, rA, gpr.R(addr)); - } - } - } - else - { - is_immediate = true; - imm_addr = offset; - } - } - else - { - if (addr != -1) - { - if (gpr.IsImm(addr) && gpr.IsImm(offsetReg)) - { - is_immediate = true; - imm_addr = gpr.GetImm(addr) + gpr.GetImm(offsetReg); - } - else if (gpr.IsImm(addr) && !gpr.IsImm(offsetReg)) - { - Operand2 off; - if (TryMakeOperand2(gpr.GetImm(addr), off)) - { - ADD(rA, gpr.R(offsetReg), off); - } - else - { - MOVI2R(rA, gpr.GetImm(addr)); - ADD(rA, rA, gpr.R(offsetReg)); - } - } - else if (!gpr.IsImm(addr) && gpr.IsImm(offsetReg)) - { - Operand2 off; - if (TryMakeOperand2(gpr.GetImm(offsetReg), off)) - { - ADD(rA, gpr.R(addr), off); - } - else - { - MOVI2R(rA, gpr.GetImm(offsetReg)); - ADD(rA, rA, gpr.R(addr)); - } - } - else - { - ADD(rA, gpr.R(addr), gpr.R(offsetReg)); - } - } - else - { - if (gpr.IsImm(offsetReg)) - { - is_immediate = true; - imm_addr = gpr.GetImm(offsetReg); - } - else - { - MOV(rA, gpr.R(offsetReg)); - } - } - } - - if (is_immediate) - MOVI2R(rA, imm_addr); - - u32 flags = BackPatchInfo::FLAG_LOAD; - if (accessSize == 32) - flags |= BackPatchInfo::FLAG_SIZE_32; - else if (accessSize == 16) - flags |= BackPatchInfo::FLAG_SIZE_16; - else - flags |= BackPatchInfo::FLAG_SIZE_8; - - if (reverse) - flags |= BackPatchInfo::FLAG_REVERSE; - - if (signExtend) - flags |= BackPatchInfo::FLAG_EXTEND; - - EmitBackpatchRoutine(this, flags, - jo.fastmem, - true, dest); - - if (update) - MOV(gpr.R(addr), rA); -} - -void JitArm::lXX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - - u32 a = inst.RA, b = inst.RB, d = inst.RD; - s32 offset = inst.SIMM_16; - u32 accessSize = 0; - s32 offsetReg = -1; - bool update = false; - bool signExtend = false; - bool reverse = false; - - switch (inst.OPCD) - { - case 31: - switch (inst.SUBOP10) - { - case 55: // lwzux - update = true; - case 23: // lwzx - accessSize = 32; - offsetReg = b; - break; - case 119: //lbzux - update = true; - case 87: // lbzx - accessSize = 8; - offsetReg = b; - break; - case 311: // lhzux - update = true; - case 279: // lhzx - accessSize = 16; - offsetReg = b; - break; - case 375: // lhaux - update = true; - case 343: // lhax - accessSize = 16; - signExtend = true; - offsetReg = b; - break; - case 534: // lwbrx - accessSize = 32; - reverse = true; - break; - case 790: // lhbrx - accessSize = 16; - reverse = true; - break; - } - break; - case 33: // lwzu - update = true; - case 32: // lwz - accessSize = 32; - break; - case 35: // lbzu - update = true; - case 34: // lbz - accessSize = 8; - break; - case 41: // lhzu - update = true; - case 40: // lhz - accessSize = 16; - break; - case 43: // lhau - update = true; - case 42: // lha - signExtend = true; - accessSize = 16; - break; - } - - // Check for exception before loading - ARMReg rA = gpr.GetReg(false); - ARMReg RD = gpr.R(d); - - LDR(rA, R9, PPCSTATE_OFF(Exceptions)); - TST(rA, EXCEPTION_DSI); - FixupBranch DoNotLoad = B_CC(CC_NEQ); - - SafeLoadToReg(RD, update ? a : (a ? a : -1), offsetReg, accessSize, offset, signExtend, reverse, update); - - SetJumpTarget(DoNotLoad); - - // LWZ idle skipping - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle && - inst.OPCD == 32 && - (inst.hex & 0xFFFF0000) == 0x800D0000 && - (PowerPC::HostRead_U32(js.compilerPC + 4) == 0x28000000 || - (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && PowerPC::HostRead_U32(js.compilerPC + 4) == 0x2C000000)) && - PowerPC::HostRead_U32(js.compilerPC + 8) == 0x4182fff8) - { - // if it's still 0, we can wait until the next event - TST(RD, RD); - FixupBranch noIdle = B_CC(CC_NEQ); - - gpr.Flush(FLUSH_MAINTAIN_STATE); - fpr.Flush(FLUSH_MAINTAIN_STATE); - - rA = gpr.GetReg(); - - MOVI2R(rA, (u32)&PowerPC::OnIdle); - BL(rA); - - gpr.Unlock(rA); - WriteExceptionExit(); - - SetJumpTarget(noIdle); - - //js.compilerPC += 8; - return; - } -} - -// Some games use this heavily in video codecs -// We make the assumption that this pulls from main RAM at /all/ times -void JitArm::lmw(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(!jo.fastmem); - - u32 a = inst.RA; - ARMReg rA = gpr.GetReg(); - MOVI2R(rA, inst.SIMM_16); - if (a) - ADD(rA, rA, gpr.R(a)); - Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) - BIC(rA, rA, mask); - ADD(rA, rA, R8); - - for (int i = inst.RD; i < 32; i++) - { - ARMReg RX = gpr.R(i); - LDR(RX, rA, (i - inst.RD) * 4); - REV(RX, RX); - } - gpr.Unlock(rA); -} - -void JitArm::stmw(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - FALLBACK_IF(!jo.fastmem); - - u32 a = inst.RA; - ARMReg rA = gpr.GetReg(); - ARMReg rB = gpr.GetReg(); - MOVI2R(rA, inst.SIMM_16); - if (a) - ADD(rA, rA, gpr.R(a)); - Operand2 mask(2, 1); // ~(Memory::MEMVIEW32_MASK) - BIC(rA, rA, mask); - ADD(rA, rA, R8); - - for (int i = inst.RD; i < 32; i++) - { - ARMReg RX = gpr.R(i); - REV(rB, RX); - STR(rB, rA, (i - inst.RD) * 4); - } - gpr.Unlock(rA, rB); -} - -void JitArm::dcbst(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreOff); - - // If the dcbst instruction is preceded by dcbt, it is flushing a prefetched - // memory location. Do not invalidate the JIT cache in this case as the memory - // will be the same. - // dcbt = 0x7c00022c - FALLBACK_IF((PowerPC::HostRead_U32(js.compilerPC - 4) & 0x7c00022c) != 0x7c00022c); -} - -void JitArm::icbi(UGeckoInstruction inst) -{ - FallBackToInterpreter(inst); - WriteExit(js.compilerPC + 4); -} - diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp deleted file mode 100644 index 6dd1b5e268..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStoreFloating.cpp +++ /dev/null @@ -1,403 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Common/ArmEmitter.h" -#include "Common/CommonTypes.h" - -#include "Core/ConfigManager.h" -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/HW/Memmap.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/PPCTables.h" - -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitAsm.h" -#include "Core/PowerPC/JitArm32/JitFPRCache.h" -#include "Core/PowerPC/JitArm32/JitRegCache.h" - -using namespace ArmGen; - -void JitArm::lfXX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreFloatingOff); - - ARMReg RA; - - u32 a = inst.RA, b = inst.RB; - - s32 offset = inst.SIMM_16; - u32 flags = BackPatchInfo::FLAG_LOAD; - bool update = false; - s32 offsetReg = -1; - - switch (inst.OPCD) - { - case 31: - switch (inst.SUBOP10) - { - case 567: // lfsux - flags |= BackPatchInfo::FLAG_SIZE_F32; - update = true; - offsetReg = b; - break; - case 535: // lfsx - flags |= BackPatchInfo::FLAG_SIZE_F32; - offsetReg = b; - break; - case 631: // lfdux - flags |= BackPatchInfo::FLAG_SIZE_F64; - update = true; - offsetReg = b; - break; - case 599: // lfdx - flags |= BackPatchInfo::FLAG_SIZE_F64; - offsetReg = b; - break; - } - break; - case 49: // lfsu - flags |= BackPatchInfo::FLAG_SIZE_F32; - update = true; - break; - case 48: // lfs - flags |= BackPatchInfo::FLAG_SIZE_F32; - break; - case 51: // lfdu - flags |= BackPatchInfo::FLAG_SIZE_F64; - update = true; - break; - case 50: // lfd - flags |= BackPatchInfo::FLAG_SIZE_F64; - break; - } - - ARMReg v0 = fpr.R0(inst.FD, false), v1 = INVALID_REG; - if (flags & BackPatchInfo::FLAG_SIZE_F32) - v1 = fpr.R1(inst.FD, false); - - ARMReg rA = R11; - ARMReg addr = R12; - - u32 imm_addr = 0; - bool is_immediate = false; - if (update) - { - // Always uses RA - if (gpr.IsImm(a) && offsetReg == -1) - { - is_immediate = true; - imm_addr = offset + gpr.GetImm(a); - } - else if (gpr.IsImm(a) && offsetReg != -1 && gpr.IsImm(offsetReg)) - { - is_immediate = true; - imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg); - } - else - { - if (offsetReg == -1) - { - Operand2 off; - if (TryMakeOperand2(offset, off)) - { - ADD(addr, gpr.R(a), off); - } - else - { - MOVI2R(addr, offset); - ADD(addr, addr, gpr.R(a)); - } - } - else - { - ADD(addr, gpr.R(offsetReg), gpr.R(a)); - } - } - } - else - { - if (offsetReg == -1) - { - if (a && gpr.IsImm(a)) - { - is_immediate = true; - imm_addr = gpr.GetImm(a) + offset; - } - else if (a) - { - Operand2 off; - if (TryMakeOperand2(offset, off)) - { - ADD(addr, gpr.R(a), off); - } - else - { - MOVI2R(addr, offset); - ADD(addr, addr, gpr.R(a)); - } - } - else - { - is_immediate = true; - imm_addr = offset; - } - } - else - { - if (a && gpr.IsImm(a) && gpr.IsImm(offsetReg)) - { - is_immediate = true; - imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg); - } - else if (!a && gpr.IsImm(offsetReg)) - { - is_immediate = true; - imm_addr = gpr.GetImm(offsetReg); - } - else if (a) - { - ADD(addr, gpr.R(a), gpr.R(offsetReg)); - } - else - { - MOV(addr, gpr.R(offsetReg)); - } - } - } - - if (update) - RA = gpr.R(a); - - if (is_immediate) - MOVI2R(addr, imm_addr); - - LDR(rA, R9, PPCSTATE_OFF(Exceptions)); - CMP(rA, EXCEPTION_DSI); - FixupBranch DoNotLoad = B_CC(CC_EQ); - - if (update) - MOV(RA, addr); - - EmitBackpatchRoutine(this, flags, - jo.fastmem, - !(is_immediate && PowerPC::IsOptimizableRAMAddress(imm_addr)), v0, v1); - - SetJumpTarget(DoNotLoad); -} - -void JitArm::stfXX(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStoreFloatingOff); - - ARMReg RA; - - u32 a = inst.RA, b = inst.RB; - - s32 offset = inst.SIMM_16; - u32 flags = BackPatchInfo::FLAG_STORE; - bool update = false; - s32 offsetReg = -1; - - switch (inst.OPCD) - { - case 31: - switch (inst.SUBOP10) - { - case 663: // stfsx - flags |= BackPatchInfo::FLAG_SIZE_F32; - offsetReg = b; - break; - case 695: // stfsux - flags |= BackPatchInfo::FLAG_SIZE_F32; - offsetReg = b; - break; - case 727: // stfdx - flags |= BackPatchInfo::FLAG_SIZE_F64; - offsetReg = b; - break; - case 759: // stfdux - flags |= BackPatchInfo::FLAG_SIZE_F64; - update = true; - offsetReg = b; - break; - } - break; - case 53: // stfsu - flags |= BackPatchInfo::FLAG_SIZE_F32; - update = true; - break; - case 52: // stfs - flags |= BackPatchInfo::FLAG_SIZE_F32; - break; - case 55: // stfdu - flags |= BackPatchInfo::FLAG_SIZE_F64; - update = true; - break; - case 54: // stfd - flags |= BackPatchInfo::FLAG_SIZE_F64; - break; - } - - ARMReg v0 = fpr.R0(inst.FS); - - ARMReg rA = R11; - ARMReg addr = R12; - - u32 imm_addr = 0; - bool is_immediate = false; - if (update) - { - // Always uses RA - if (gpr.IsImm(a) && offsetReg == -1) - { - is_immediate = true; - imm_addr = offset + gpr.GetImm(a); - } - else if (gpr.IsImm(a) && offsetReg != -1 && gpr.IsImm(offsetReg)) - { - is_immediate = true; - imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg); - } - else - { - if (offsetReg == -1) - { - Operand2 off; - if (TryMakeOperand2(offset, off)) - { - ADD(addr, gpr.R(a), off); - } - else - { - MOVI2R(addr, offset); - ADD(addr, addr, gpr.R(a)); - } - } - else - { - ADD(addr, gpr.R(offsetReg), gpr.R(a)); - } - } - } - else - { - if (offsetReg == -1) - { - if (a && gpr.IsImm(a)) - { - is_immediate = true; - imm_addr = gpr.GetImm(a) + offset; - } - else if (a) - { - Operand2 off; - if (TryMakeOperand2(offset, off)) - { - ADD(addr, gpr.R(a), off); - } - else - { - MOVI2R(addr, offset); - ADD(addr, addr, gpr.R(a)); - } - } - else - { - is_immediate = true; - imm_addr = offset; - } - } - else - { - if (a && gpr.IsImm(a) && gpr.IsImm(offsetReg)) - { - is_immediate = true; - imm_addr = gpr.GetImm(a) + gpr.GetImm(offsetReg); - } - else if (!a && gpr.IsImm(offsetReg)) - { - is_immediate = true; - imm_addr = gpr.GetImm(offsetReg); - } - else if (a) - { - ADD(addr, gpr.R(a), gpr.R(offsetReg)); - } - else - { - MOV(addr, gpr.R(offsetReg)); - } - } - } - - if (is_immediate) - MOVI2R(addr, imm_addr); - - if (update) - { - RA = gpr.R(a); - LDR(rA, R9, PPCSTATE_OFF(Exceptions)); - CMP(rA, EXCEPTION_DSI); - - SetCC(CC_NEQ); - MOV(RA, addr); - SetCC(); - } - - if (is_immediate) - { - if (jit->jo.optimizeGatherPipe && PowerPC::IsOptimizableGatherPipeWrite(imm_addr)) - { - int accessSize; - if (flags & BackPatchInfo::FLAG_SIZE_F64) - accessSize = 64; - else - accessSize = 32; - - MOVI2R(R14, (u32)&GPFifo::m_gatherPipeCount); - MOVI2R(R10, (u32)GPFifo::m_gatherPipe); - LDR(R11, R14); - ADD(R10, R10, R11); - NEONXEmitter nemit(this); - if (accessSize == 64) - { - PUSH(2, R0, R1); - nemit.VREV64(I_8, D0, v0); - VMOV(R0, D0); - STR(R0, R10, 0); - STR(R1, R10, 4); - POP(2, R0, R1); - } - else if (accessSize == 32) - { - VCVT(S0, v0, 0); - nemit.VREV32(I_8, D0, D0); - VMOV(addr, S0); - STR(addr, R10); - } - ADD(R11, R11, accessSize >> 3); - STR(R11, R14); - jit->js.fifoBytesThisBlock += accessSize >> 3; - - } - else if (PowerPC::IsOptimizableRAMAddress(imm_addr)) - { - MOVI2R(addr, imm_addr); - EmitBackpatchRoutine(this, flags, jo.fastmem, false, v0); - } - else - { - MOVI2R(addr, imm_addr); - EmitBackpatchRoutine(this, flags, false, false, v0); - } - } - else - { - EmitBackpatchRoutine(this, flags, jo.fastmem, true, v0); - } -} - diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp deleted file mode 100644 index 5b815525c9..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_LoadStorePaired.cpp +++ /dev/null @@ -1,218 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Common/ArmEmitter.h" -#include "Common/CommonTypes.h" - -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/PPCTables.h" - -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitAsm.h" -#include "Core/PowerPC/JitArm32/JitRegCache.h" - -using namespace ArmGen; - -void JitArm::psq_l(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStorePairedOff); - - // R12 contains scale - // R11 contains type - // R10 is the ADDR - FALLBACK_IF(jo.memcheck || !jo.fastmem); - - bool update = inst.OPCD == 57; - s32 offset = inst.SIMM_12; - - LDR(R11, R9, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); - UBFX(R12, R11, 16, 3); // Type - LSL(R12, R12, 2); - UBFX(R11, R11, 24, 6); // Scale - LSL(R11, R11, 3); - - Operand2 off; - if (TryMakeOperand2(offset, off)) - { - if (inst.RA || update) - ADD(R10, gpr.R(inst.RA), off); - else - MOV(R10, off); - } - else - { - MOVI2R(R10, (u32)offset); - if (inst.RA || update) // Always uses the register on update - ADD(R10, R10, gpr.R(inst.RA)); - } - - if (update) - MOV(gpr.R(inst.RA), R10); - MOVI2R(R14, (u32)asm_routines.pairedLoadQuantized); - ADD(R14, R14, R12); - LDR(R14, R14, inst.W ? 8 * 4 : 0); - - // Values returned in S0, S1 - BL(R14); // Jump to the quantizer Load - - ARMReg vD0 = fpr.R0(inst.RS, false); - ARMReg vD1 = fpr.R1(inst.RS, false); - VCVT(vD0, S0, 0); - if (!inst.W) - VCVT(vD1, S1, 0); - else - MOVI2F(vD1, 1.0f, INVALID_REG); // No need for temp reg with 1.0f -} - -void JitArm::psq_lx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStorePairedOff); - - // R12 contains scale - // R11 contains type - // R10 is the ADDR - FALLBACK_IF(jo.memcheck || !jo.fastmem); - - bool update = inst.SUBOP10 == 38; - - LDR(R11, R9, PPCSTATE_OFF(spr[SPR_GQR0 + inst.Ix])); - UBFX(R12, R11, 16, 3); // Type - LSL(R12, R12, 2); - UBFX(R11, R11, 24, 6); // Scale - LSL(R11, R11, 3); - - if (inst.RA || update) // Always uses the register on update - { - ADD(R10, gpr.R(inst.RB), gpr.R(inst.RA)); - } - else - { - MOV(R10, gpr.R(inst.RB)); - } - - if (update) - MOV(gpr.R(inst.RA), R10); - - MOVI2R(R14, (u32)asm_routines.pairedLoadQuantized); - ADD(R14, R14, R12); - LDR(R14, R14, inst.Wx ? 8 * 4 : 0); - - // Values returned in S0, S1 - BL(R14); // Jump to the quantizer Load - - ARMReg vD0 = fpr.R0(inst.RS, false); - ARMReg vD1 = fpr.R1(inst.RS, false); - LDR(R14, R9, PPCSTATE_OFF(Exceptions)); - CMP(R14, EXCEPTION_DSI); - SetCC(CC_NEQ); - - VCVT(vD0, S0, 0); - if (!inst.Wx) - VCVT(vD1, S1, 0); - else - MOVI2F(vD1, 1.0f, INVALID_REG); // No need for temp reg with 1.0f - SetCC(); -} - -void JitArm::psq_st(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStorePairedOff); - - // R12 contains scale - // R11 contains type - // R10 is the ADDR - FALLBACK_IF(jo.memcheck || !jo.fastmem); - - bool update = inst.OPCD == 61; - s32 offset = inst.SIMM_12; - - LDR(R11, R9, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); - UBFX(R12, R11, 0, 3); // Type - LSL(R12, R12, 2); - UBFX(R11, R11, 8, 6); // Scale - LSL(R11, R11, 3); - - Operand2 off; - if (TryMakeOperand2(offset, off)) - { - if (inst.RA || update) - ADD(R10, gpr.R(inst.RA), off); - else - MOV(R10, off); - } - else - { - MOVI2R(R10, (u32)offset); - if (inst.RA || update) // Always uses the register on update - ADD(R10, R10, gpr.R(inst.RA)); - } - - if (update) - MOV(gpr.R(inst.RA), R10); - MOVI2R(R14, (u32)asm_routines.pairedStoreQuantized); - ADD(R14, R14, R12); - LDR(R14, R14, inst.W ? 8 * 4 : 0); - - ARMReg vD0 = fpr.R0(inst.RS); - VCVT(S0, vD0, 0); - - if (!inst.W) - { - ARMReg vD1 = fpr.R1(inst.RS); - VCVT(S1, vD1, 0); - } - // floats passed through D0 - BL(R14); // Jump to the quantizer Store -} - -void JitArm::psq_stx(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITLoadStorePairedOff); - - // R12 contains scale - // R11 contains type - // R10 is the ADDR - FALLBACK_IF(jo.memcheck || !jo.fastmem); - - bool update = inst.SUBOP10 == 39; - - LDR(R11, R9, PPCSTATE_OFF(spr[SPR_GQR0 + inst.I])); - UBFX(R12, R11, 0, 3); // Type - LSL(R12, R12, 2); - UBFX(R11, R11, 8, 6); // Scale - LSL(R11, R11, 3); - - if (inst.RA || update) // Always uses the register on update - { - ADD(R10, gpr.R(inst.RA), gpr.R(inst.RB)); - } - else - { - MOV(R10, gpr.R(inst.RB)); - } - - if (update) - MOV(gpr.R(inst.RA), R10); - - MOVI2R(R14, (u32)asm_routines.pairedStoreQuantized); - ADD(R14, R14, R12); - LDR(R14, R14, inst.W ? 8 * 4 : 0); - - ARMReg vD0 = fpr.R0(inst.RS); - VCVT(S0, vD0, 0); - - if (!inst.W) - { - ARMReg vD1 = fpr.R1(inst.RS); - VCVT(S1, vD1, 0); - } - // floats passed through D0 - BL(R14); // Jump to the quantizer Store -} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Paired.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Paired.cpp deleted file mode 100644 index f6092451c5..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Paired.cpp +++ /dev/null @@ -1,618 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Common/ArmEmitter.h" -#include "Common/CommonTypes.h" - -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/PPCTables.h" - -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitArm_FPUtils.h" -#include "Core/PowerPC/JitArm32/JitAsm.h" -#include "Core/PowerPC/JitArm32/JitRegCache.h" - -using namespace ArmGen; - -void JitArm::ps_rsqrte(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(true); - - FALLBACK_IF(inst.Rc); - - u32 b = inst.FB, d = inst.FD; - - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - ARMReg fpscrReg = gpr.GetReg(); - ARMReg V0 = D1; - ARMReg rA = gpr.GetReg(); - - MOVI2R(fpscrReg, (u32)&PPC_NAN); - VLDR(V0, fpscrReg, 0); - LDR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - - VCMP(vB0); - VMRS(_PC); - FixupBranch Less0 = B_CC(CC_LT); - VMOV(vD0, V0); - SetFPException(fpscrReg, FPSCR_VXSQRT); - FixupBranch SkipOrr0 = B(); - SetJumpTarget(Less0); - SetCC(CC_EQ); - ORR(rA, rA, 1); - SetCC(); - SetJumpTarget(SkipOrr0); - - VCMP(vB1); - VMRS(_PC); - FixupBranch Less1 = B_CC(CC_LT); - VMOV(vD1, V0); - SetFPException(fpscrReg, FPSCR_VXSQRT); - FixupBranch SkipOrr1 = B(); - SetJumpTarget(Less1); - SetCC(CC_EQ); - ORR(rA, rA, 2); - SetCC(); - SetJumpTarget(SkipOrr1); - - CMP(rA, 0); - FixupBranch noException = B_CC(CC_EQ); - SetFPException(fpscrReg, FPSCR_ZX); - SetJumpTarget(noException); - - VCVT(S0, vB0, 0); - VCVT(S1, vB1, 0); - - NEONXEmitter nemit(this); - nemit.VRSQRTE(F_32, D0, D0); - VCVT(vD0, S0, 0); - VCVT(vD1, S1, 0); - - STR(fpscrReg, R9, PPCSTATE_OFF(fpscr)); - gpr.Unlock(fpscrReg, rA); -} - -void JitArm::ps_sel(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vA1 = fpr.R1(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vC0 = fpr.R0(c); - ARMReg vC1 = fpr.R1(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - VCMP(vA0); - VMRS(_PC); - - FixupBranch GT0 = B_CC(CC_GE); - VMOV(vD0, vB0); - FixupBranch EQ0 = B(); - SetJumpTarget(GT0); - VMOV(vD0, vC0); - SetJumpTarget(EQ0); - - VCMP(vA1); - VMRS(_PC); - FixupBranch GT1 = B_CC(CC_GE); - VMOV(vD1, vB1); - FixupBranch EQ1 = B(); - SetJumpTarget(GT1); - VMOV(vD1, vC1); - SetJumpTarget(EQ1); -} - -void JitArm::ps_add(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vA1 = fpr.R1(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - VADD(vD0, vA0, vB0); - VADD(vD1, vA1, vB1); -} - -void JitArm::ps_div(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vA1 = fpr.R1(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - VDIV(vD0, vA0, vB0); - VDIV(vD1, vA1, vB1); -} - -void JitArm::ps_res(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 b = inst.FB, d = inst.FD; - - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - ARMReg V0 = fpr.GetReg(); - MOVI2R(V0, 1.0, INVALID_REG); // temp reg not needed for 1.0 - - VDIV(vD0, V0, vB0); - VDIV(vD1, V0, vB1); - fpr.Unlock(V0); -} - -void JitArm::ps_nmadd(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vA1 = fpr.R1(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vC0 = fpr.R0(c); - ARMReg vC1 = fpr.R1(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - ARMReg V0 = fpr.GetReg(); - ARMReg V1 = fpr.GetReg(); - - VMUL(V0, vA0, vC0); - VMUL(V1, vA1, vC1); - VADD(vD0, V0, vB0); - VADD(vD1, V1, vB1); - VNEG(vD0, vD0); - VNEG(vD1, vD1); - - fpr.Unlock(V0); - fpr.Unlock(V1); -} - -void JitArm::ps_madd(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vA1 = fpr.R1(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vC0 = fpr.R0(c); - ARMReg vC1 = fpr.R1(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - ARMReg V0 = fpr.GetReg(); - ARMReg V1 = fpr.GetReg(); - - VMUL(V0, vA0, vC0); - VMUL(V1, vA1, vC1); - VADD(vD0, V0, vB0); - VADD(vD1, V1, vB1); - - fpr.Unlock(V0); - fpr.Unlock(V1); -} - -void JitArm::ps_nmsub(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vA1 = fpr.R1(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vC0 = fpr.R0(c); - ARMReg vC1 = fpr.R1(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - ARMReg V0 = fpr.GetReg(); - ARMReg V1 = fpr.GetReg(); - - VMUL(V0, vA0, vC0); - VMUL(V1, vA1, vC1); - VSUB(vD0, V0, vB0); - VSUB(vD1, V1, vB1); - VNEG(vD0, vD0); - VNEG(vD1, vD1); - - fpr.Unlock(V0); - fpr.Unlock(V1); -} - -void JitArm::ps_msub(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vA1 = fpr.R1(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vC0 = fpr.R0(c); - ARMReg vC1 = fpr.R1(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - ARMReg V0 = fpr.GetReg(); - ARMReg V1 = fpr.GetReg(); - - VMUL(V0, vA0, vC0); - VMUL(V1, vA1, vC1); - VSUB(vD0, V0, vB0); - VSUB(vD1, V1, vB1); - - fpr.Unlock(V0); - fpr.Unlock(V1); -} - -void JitArm::ps_madds0(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vA1 = fpr.R1(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vC0 = fpr.R0(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - ARMReg V0 = fpr.GetReg(); - ARMReg V1 = fpr.GetReg(); - - VMUL(V0, vA0, vC0); - VMUL(V1, vA1, vC0); - - VADD(vD0, V0, vB0); - VADD(vD1, V1, vB1); - - fpr.Unlock(V0); - fpr.Unlock(V1); -} - -void JitArm::ps_madds1(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vA1 = fpr.R1(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vC1 = fpr.R1(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - ARMReg V0 = fpr.GetReg(); - ARMReg V1 = fpr.GetReg(); - - VMUL(V0, vA0, vC1); - VMUL(V1, vA1, vC1); - VADD(vD0, V0, vB0); - VADD(vD1, V1, vB1); - - fpr.Unlock(V0); - fpr.Unlock(V1); -} -void JitArm::ps_sum0(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vB1 = fpr.R1(b); - ARMReg vC1 = fpr.R1(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - VADD(vD0, vA0, vB1); - VMOV(vD1, vC1); - -} - -void JitArm::ps_sum1(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vB1 = fpr.R1(b); - ARMReg vC0 = fpr.R0(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - VMOV(vD0, vC0); - VADD(vD1, vA0, vB1); -} - - -void JitArm::ps_sub(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vA1 = fpr.R1(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - VSUB(vD0, vA0, vB0); - VSUB(vD1, vA1, vB1); -} - -void JitArm::ps_mul(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vA1 = fpr.R1(a); - ARMReg vC0 = fpr.R0(c); - ARMReg vC1 = fpr.R1(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - VMUL(vD0, vA0, vC0); - VMUL(vD1, vA1, vC1); -} - -void JitArm::ps_muls0(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vA1 = fpr.R1(a); - ARMReg vC0 = fpr.R0(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - ARMReg V0 = fpr.GetReg(); - ARMReg V1 = fpr.GetReg(); - - - VMUL(V0, vA0, vC0); - VMUL(V1, vA1, vC0); - VMOV(vD0, V0); - VMOV(vD1, V1); - - fpr.Unlock(V0); - fpr.Unlock(V1); -} - -void JitArm::ps_muls1(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, c = inst.FC, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vA1 = fpr.R1(a); - ARMReg vC1 = fpr.R1(c); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - ARMReg V0 = fpr.GetReg(); - ARMReg V1 = fpr.GetReg(); - - - VMUL(V0, vA0, vC1); - VMUL(V1, vA1, vC1); - VMOV(vD0, V0); - VMOV(vD1, V1); - - fpr.Unlock(V0); - fpr.Unlock(V1); -} - -void JitArm::ps_merge00(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - VMOV(vD1, vB0); - VMOV(vD0, vA0); -} - -void JitArm::ps_merge01(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARMReg vA0 = fpr.R0(a); - ARMReg vB1 = fpr.R1(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - VMOV(vD0, vA0); - VMOV(vD1, vB1); -} - -void JitArm::ps_merge10(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARMReg vA1 = fpr.R1(a); - ARMReg vB0 = fpr.R0(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - ARMReg V0 = fpr.GetReg(); - - VMOV(V0, vB0); - VMOV(vD0, vA1); - VMOV(vD1, V0); - - fpr.Unlock(V0); -} - -void JitArm::ps_merge11(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 a = inst.FA, b = inst.FB, d = inst.FD; - - ARMReg vA1 = fpr.R1(a); - ARMReg vB1 = fpr.R1(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - VMOV(vD0, vA1); - VMOV(vD1, vB1); -} - -void JitArm::ps_mr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 b = inst.FB, d = inst.FD; - - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - VMOV(vD0, vB0); - VMOV(vD1, vB1); -} - -void JitArm::ps_neg(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 b = inst.FB, d = inst.FD; - - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - VNEG(vD0, vB0); - VNEG(vD1, vB1); -} - -void JitArm::ps_abs(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 b = inst.FB, d = inst.FD; - - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - VABS(vD0, vB0); - VABS(vD1, vB1); -} - -void JitArm::ps_nabs(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITPairedOff); - FALLBACK_IF(inst.Rc); - - u32 b = inst.FB, d = inst.FD; - - ARMReg vB0 = fpr.R0(b); - ARMReg vB1 = fpr.R1(b); - ARMReg vD0 = fpr.R0(d, false); - ARMReg vD1 = fpr.R1(d, false); - - VABS(vD0, vB0); - VNEG(vD0, vD0); - VABS(vD1, vB1); - VNEG(vD1, vD1); -} - diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp deleted file mode 100644 index e8cdbdc8d4..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Common/ArmEmitter.h" -#include "Common/CommonTypes.h" - -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/PPCTables.h" - -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitAsm.h" -#include "Core/PowerPC/JitArm32/JitRegCache.h" - -using namespace ArmGen; - -FixupBranch JitArm::JumpIfCRFieldBit(int field, int bit, bool jump_if_set) -{ - ARMReg RA = gpr.GetReg(); - - Operand2 SOBit(2, 2); // 0x10000000 - Operand2 LTBit(1, 1); // 0x80000000 - - FixupBranch branch; - switch (bit) - { - case CR_SO_BIT: // check bit 61 set - LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32)); - TST(RA, SOBit); - branch = B_CC(jump_if_set ? CC_NEQ : CC_EQ); - break; - case CR_EQ_BIT: // check bits 31-0 == 0 - LDR(RA, R9, PPCSTATE_OFF(cr_val[field])); - CMP(RA, 0); - branch = B_CC(jump_if_set ? CC_EQ : CC_NEQ); - break; - case CR_GT_BIT: // check val > 0 - LDR(RA, R9, PPCSTATE_OFF(cr_val[field])); - CMP(RA, 1); - LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32)); - SBCS(RA, RA, 0); - branch = B_CC(jump_if_set ? CC_GE : CC_LT); - break; - case CR_LT_BIT: // check bit 62 set - LDR(RA, R9, PPCSTATE_OFF(cr_val[field]) + sizeof(u32)); - TST(RA, LTBit); - branch = B_CC(jump_if_set ? CC_NEQ : CC_EQ); - break; - default: - _assert_msg_(DYNA_REC, false, "Invalid CR bit"); - } - - gpr.Unlock(RA); - return branch; -} - -void JitArm::mtspr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); - - switch (iIndex) - { - - case SPR_DMAU: - - case SPR_SPRG0: - case SPR_SPRG1: - case SPR_SPRG2: - case SPR_SPRG3: - - case SPR_SRR0: - case SPR_SRR1: - // These are safe to do the easy way, see the bottom of this function. - break; - - case SPR_LR: - case SPR_CTR: - case SPR_GQR0: - case SPR_GQR0 + 1: - case SPR_GQR0 + 2: - case SPR_GQR0 + 3: - case SPR_GQR0 + 4: - case SPR_GQR0 + 5: - case SPR_GQR0 + 6: - case SPR_GQR0 + 7: - // These are safe to do the easy way, see the bottom of this function. - break; - case SPR_XER: - { - ARMReg RD = gpr.R(inst.RD); - ARMReg tmp = gpr.GetReg(); - ARMReg mask = gpr.GetReg(); - MOVI2R(mask, 0xFF7F); - AND(tmp, RD, mask); - STRH(tmp, R9, PPCSTATE_OFF(xer_stringctrl)); - LSR(tmp, RD, XER_CA_SHIFT); - AND(tmp, tmp, 1); - STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - LSR(tmp, RD, XER_OV_SHIFT); - STRB(tmp, R9, PPCSTATE_OFF(xer_so_ov)); - gpr.Unlock(tmp, mask); - } - break; - default: - FALLBACK_IF(true); - } - - // OK, this is easy. - ARMReg RD = gpr.R(inst.RD); - STR(RD, R9, PPCSTATE_OFF(spr) + iIndex * 4); -} - -void JitArm::mftb(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - mfspr(inst); -} - -void JitArm::mfspr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); - switch (iIndex) - { - case SPR_XER: - { - gpr.BindToRegister(inst.RD, false); - ARMReg RD = gpr.R(inst.RD); - ARMReg tmp = gpr.GetReg(); - LDRH(RD, R9, PPCSTATE_OFF(xer_stringctrl)); - LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); - LSL(tmp, tmp, XER_CA_SHIFT); - ORR(RD, RD, tmp); - LDRB(tmp, R9, PPCSTATE_OFF(xer_so_ov)); - LSL(tmp, tmp, XER_OV_SHIFT); - ORR(RD, RD, tmp); - gpr.Unlock(tmp); - } - break; - case SPR_WPAR: - case SPR_DEC: - case SPR_TL: - case SPR_TU: - FALLBACK_IF(true); - default: - gpr.BindToRegister(inst.RD, false); - ARMReg RD = gpr.R(inst.RD); - LDR(RD, R9, PPCSTATE_OFF(spr) + iIndex * 4); - break; - } -} - -void JitArm::mtsr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - STR(gpr.R(inst.RS), R9, PPCSTATE_OFF(sr[inst.SR])); -} - -void JitArm::mfsr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - gpr.BindToRegister(inst.RD, false); - LDR(gpr.R(inst.RD), R9, PPCSTATE_OFF(sr[inst.SR])); -} - -void JitArm::mtmsr(UGeckoInstruction inst) -{ - INSTRUCTION_START - // Don't interpret this, if we do we get thrown out - //JITDISABLE(bJITSystemRegistersOff); - - STR(gpr.R(inst.RS), R9, PPCSTATE_OFF(msr)); - - gpr.Flush(); - fpr.Flush(); - - WriteExit(js.compilerPC + 4); -} - -void JitArm::mfmsr(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - gpr.BindToRegister(inst.RD, false); - LDR(gpr.R(inst.RD), R9, PPCSTATE_OFF(msr)); -} - -void JitArm::mcrf(UGeckoInstruction inst) -{ - INSTRUCTION_START - JITDISABLE(bJITSystemRegistersOff); - - ARMReg rA = gpr.GetReg(); - - if (inst.CRFS != inst.CRFD) - { - LDR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFS])); - STR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFD])); - LDR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFS]) + sizeof(u32)); - STR(rA, R9, PPCSTATE_OFF(cr_val[inst.CRFD]) + sizeof(u32)); - } - gpr.Unlock(rA); -} - diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp deleted file mode 100644 index 50975b69e6..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.cpp +++ /dev/null @@ -1,483 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Core/PowerPC/JitInterface.h" -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitArm_Tables.h" - -// Should be moved in to the Jit class -typedef void (JitArm::*_Instruction) (UGeckoInstruction instCode); - -static _Instruction dynaOpTable[64]; -static _Instruction dynaOpTable4[1024]; -static _Instruction dynaOpTable19[1024]; -static _Instruction dynaOpTable31[1024]; -static _Instruction dynaOpTable59[32]; -static _Instruction dynaOpTable63[1024]; - -void JitArm::DynaRunTable4(UGeckoInstruction _inst) {(this->*dynaOpTable4 [_inst.SUBOP10])(_inst);} -void JitArm::DynaRunTable19(UGeckoInstruction _inst) {(this->*dynaOpTable19[_inst.SUBOP10])(_inst);} -void JitArm::DynaRunTable31(UGeckoInstruction _inst) {(this->*dynaOpTable31[_inst.SUBOP10])(_inst);} -void JitArm::DynaRunTable59(UGeckoInstruction _inst) {(this->*dynaOpTable59[_inst.SUBOP5 ])(_inst);} -void JitArm::DynaRunTable63(UGeckoInstruction _inst) {(this->*dynaOpTable63[_inst.SUBOP10])(_inst);} - -struct GekkoOPTemplate -{ - int opcode; - _Instruction Inst; - //GekkoOPInfo opinfo; // Doesn't need opinfo, Interpreter fills it out -}; - -static GekkoOPTemplate primarytable[] = -{ - {4, &JitArm::DynaRunTable4}, // RunTable4 - {19, &JitArm::DynaRunTable19}, // RunTable19 - {31, &JitArm::DynaRunTable31}, // RunTable31 - {59, &JitArm::DynaRunTable59}, // RunTable59 - {63, &JitArm::DynaRunTable63}, // RunTable63 - - {16, &JitArm::bcx}, // bcx - {18, &JitArm::bx}, // bx - - {3, &JitArm::twx}, // twi - {17, &JitArm::sc}, // sc - - {7, &JitArm::arith}, // mulli - {8, &JitArm::subfic}, // subfic - {10, &JitArm::cmpli}, // cmpli - {11, &JitArm::cmpi}, // cmpi - {12, &JitArm::arith}, // addic - {13, &JitArm::arith}, // addic_rc - {14, &JitArm::arith}, // addi - {15, &JitArm::arith}, // addis - - {20, &JitArm::rlwimix}, // rlwimix - {21, &JitArm::rlwinmx}, // rlwinmx - {23, &JitArm::rlwnmx}, // rlwnmx - - {24, &JitArm::arith}, // ori - {25, &JitArm::arith}, // oris - {26, &JitArm::arith}, // xori - {27, &JitArm::arith}, // xoris - {28, &JitArm::arith}, // andi_rc - {29, &JitArm::arith}, // andis_rc - - {32, &JitArm::lXX}, // lwz - {33, &JitArm::lXX}, // lwzu - {34, &JitArm::lXX}, // lbz - {35, &JitArm::lXX}, // lbzu - {40, &JitArm::lXX}, // lhz - {41, &JitArm::lXX}, // lhzu - {42, &JitArm::lXX}, // lha - {43, &JitArm::lXX}, // lhau - - {44, &JitArm::stX}, // sth - {45, &JitArm::stX}, // sthu - {36, &JitArm::stX}, // stw - {37, &JitArm::stX}, // stwu - {38, &JitArm::stX}, // stb - {39, &JitArm::stX}, // stbu - - {46, &JitArm::lmw}, // lmw - {47, &JitArm::stmw}, // stmw - - {48, &JitArm::lfXX}, // lfs - {49, &JitArm::lfXX}, // lfsu - {50, &JitArm::lfXX}, // lfd - {51, &JitArm::lfXX}, // lfdu - - {52, &JitArm::stfXX}, // stfs - {53, &JitArm::stfXX}, // stfsu - {54, &JitArm::stfXX}, // stfd - {55, &JitArm::stfXX}, // stfdu - - {56, &JitArm::psq_l}, // psq_l - {57, &JitArm::psq_l}, // psq_lu - {60, &JitArm::psq_st}, // psq_st - {61, &JitArm::psq_st}, // psq_stu - - //missing: 0, 1, 2, 5, 6, 9, 22, 30, 62, 58 -}; - -static GekkoOPTemplate table4[] = -{ //SUBOP10 - {0, &JitArm::FallBackToInterpreter}, // ps_cmpu0 - {32, &JitArm::FallBackToInterpreter}, // ps_cmpo0 - {40, &JitArm::ps_neg}, // ps_neg - {136, &JitArm::ps_nabs}, // ps_nabs - {264, &JitArm::ps_abs}, // ps_abs - {64, &JitArm::FallBackToInterpreter}, // ps_cmpu1 - {72, &JitArm::ps_mr}, // ps_mr - {96, &JitArm::FallBackToInterpreter}, // ps_cmpo1 - {528, &JitArm::ps_merge00}, // ps_merge00 - {560, &JitArm::ps_merge01}, // ps_merge01 - {592, &JitArm::ps_merge10}, // ps_merge10 - {624, &JitArm::ps_merge11}, // ps_merge11 - - {1014, &JitArm::FallBackToInterpreter}, // dcbz_l -}; - -static GekkoOPTemplate table4_2[] = -{ - {10, &JitArm::ps_sum0}, // ps_sum0 - {11, &JitArm::ps_sum1}, // ps_sum1 - {12, &JitArm::ps_muls0}, // ps_muls0 - {13, &JitArm::ps_muls1}, // ps_muls1 - {14, &JitArm::ps_madds0}, // ps_madds0 - {15, &JitArm::ps_madds1}, // ps_madds1 - {18, &JitArm::ps_div}, // ps_div - {20, &JitArm::ps_sub}, // ps_sub - {21, &JitArm::ps_add}, // ps_add - {23, &JitArm::ps_sel}, // ps_sel - {24, &JitArm::ps_res}, // ps_res - {25, &JitArm::ps_mul}, // ps_mul - {26, &JitArm::ps_rsqrte}, // ps_rsqrte - {28, &JitArm::ps_msub}, // ps_msub - {29, &JitArm::ps_madd}, // ps_madd - {30, &JitArm::ps_nmsub}, // ps_nmsub - {31, &JitArm::ps_nmadd}, // ps_nmadd -}; - - -static GekkoOPTemplate table4_3[] = -{ - {6, &JitArm::psq_lx}, // psq_lx - {7, &JitArm::psq_stx}, // psq_stx - {38, &JitArm::psq_lx}, // psq_lux - {39, &JitArm::psq_stx}, // psq_stux -}; - -static GekkoOPTemplate table19[] = -{ - {528, &JitArm::bcctrx}, // bcctrx - {16, &JitArm::bclrx}, // bclrx - {257, &JitArm::FallBackToInterpreter}, // crand - {129, &JitArm::FallBackToInterpreter}, // crandc - {289, &JitArm::FallBackToInterpreter}, // creqv - {225, &JitArm::FallBackToInterpreter}, // crnand - {33, &JitArm::FallBackToInterpreter}, // crnor - {449, &JitArm::FallBackToInterpreter}, // cror - {417, &JitArm::FallBackToInterpreter}, // crorc - {193, &JitArm::FallBackToInterpreter}, // crxor - - {150, &JitArm::DoNothing}, // isync - {0, &JitArm::mcrf}, // mcrf - - {50, &JitArm::rfi}, // rfi - {18, &JitArm::Break}, // rfid -}; - - -static GekkoOPTemplate table31[] = -{ - {266, &JitArm::arith}, // addx - {778, &JitArm::arith}, // addox - {10, &JitArm::arith}, // addcx - {522, &JitArm::arith}, // addcox - {138, &JitArm::addex}, // addex - {650, &JitArm::addex}, // addeox - {234, &JitArm::FallBackToInterpreter}, // addmex - {746, &JitArm::FallBackToInterpreter}, // addmeox - {202, &JitArm::FallBackToInterpreter}, // addzex - {714, &JitArm::FallBackToInterpreter}, // addzeox - {491, &JitArm::FallBackToInterpreter}, // divwx - {1003, &JitArm::FallBackToInterpreter}, // divwox - {459, &JitArm::FallBackToInterpreter}, // divwux - {971, &JitArm::FallBackToInterpreter}, // divwuox - {75, &JitArm::FallBackToInterpreter}, // mulhwx - {11, &JitArm::mulhwux}, // mulhwux - {235, &JitArm::arith}, // mullwx - {747, &JitArm::arith}, // mullwox - {104, &JitArm::negx}, // negx - {616, &JitArm::negx}, // negox - {40, &JitArm::arith}, // subfx - {552, &JitArm::arith}, // subfox - {8, &JitArm::FallBackToInterpreter}, // subfcx - {520, &JitArm::FallBackToInterpreter}, // subfcox - {136, &JitArm::FallBackToInterpreter}, // subfex - {648, &JitArm::FallBackToInterpreter}, // subfeox - {232, &JitArm::FallBackToInterpreter}, // subfmex - {744, &JitArm::FallBackToInterpreter}, // subfmeox - {200, &JitArm::FallBackToInterpreter}, // subfzex - {712, &JitArm::FallBackToInterpreter}, // subfzeox - - {28, &JitArm::arith}, // andx - {60, &JitArm::arith}, // andcx - {444, &JitArm::arith}, // orx - {124, &JitArm::arith}, // norx - {316, &JitArm::arith}, // xorx - {412, &JitArm::arith}, // orcx - {476, &JitArm::arith}, // nandx - {284, &JitArm::arith}, // eqvx - {0, &JitArm::cmp}, // cmp - {32, &JitArm::cmpl}, // cmpl - {26, &JitArm::cntlzwx}, // cntlzwx - {922, &JitArm::extshx}, // extshx - {954, &JitArm::extsbx}, // extsbx - {536, &JitArm::arith}, // srwx - {792, &JitArm::arith}, // srawx - {824, &JitArm::srawix}, // srawix - {24, &JitArm::arith}, // slwx - - {54, &JitArm::dcbst}, // dcbst - {86, &JitArm::FallBackToInterpreter}, // dcbf - {246, &JitArm::DoNothing}, // dcbtst - {278, &JitArm::DoNothing}, // dcbt - {470, &JitArm::FallBackToInterpreter}, // dcbi - {758, &JitArm::DoNothing}, // dcba - {1014, &JitArm::FallBackToInterpreter}, // dcbz - - //load word - {23, &JitArm::lXX}, // lwzx - {55, &JitArm::FallBackToInterpreter}, // lwzux - - //load halfword - {279, &JitArm::lXX}, // lhzx - {311, &JitArm::lXX}, // lhzux - - //load halfword signextend - {343, &JitArm::lXX}, // lhax - {375, &JitArm::lXX}, // lhaux - - //load byte - {87, &JitArm::lXX}, // lbzx - {119, &JitArm::lXX}, // lbzux - - //load byte reverse - {534, &JitArm::lXX}, // lwbrx - {790, &JitArm::lXX}, // lhbrx - - // Conditional load/store (Wii SMP) - {150, &JitArm::FallBackToInterpreter}, // stwcxd - {20, &JitArm::FallBackToInterpreter}, // lwarx - - //load string (interpret these) - {533, &JitArm::FallBackToInterpreter}, // lswx - {597, &JitArm::FallBackToInterpreter}, // lswi - - //store word - {151, &JitArm::stX}, // stwx - {183, &JitArm::stX}, // stwux - - //store halfword - {407, &JitArm::stX}, // sthx - {439, &JitArm::stX}, // sthux - - //store byte - {215, &JitArm::stX}, // stbx - {247, &JitArm::stX}, // stbux - - //store bytereverse - {662, &JitArm::FallBackToInterpreter}, // stwbrx - {918, &JitArm::FallBackToInterpreter}, // sthbrx - - {661, &JitArm::FallBackToInterpreter}, // stswx - {725, &JitArm::FallBackToInterpreter}, // stswi - - // fp load/store - {535, &JitArm::lfXX}, // lfsx - {567, &JitArm::lfXX}, // lfsux - {599, &JitArm::lfXX}, // lfdx - {631, &JitArm::lfXX}, // lfdux - - {663, &JitArm::stfXX}, // stfsx - {695, &JitArm::stfXX}, // stfsux - {727, &JitArm::stfXX}, // stfdx - {759, &JitArm::stfXX}, // stfdux - {983, &JitArm::FallBackToInterpreter}, // stfiwx - - {19, &JitArm::FallBackToInterpreter}, // mfcr - {83, &JitArm::mfmsr}, // mfmsr - {144, &JitArm::FallBackToInterpreter}, // mtcrf - {146, &JitArm::mtmsr}, // mtmsr - {210, &JitArm::mtsr}, // mtsr - {242, &JitArm::FallBackToInterpreter}, // mtsrin - {339, &JitArm::mfspr}, // mfspr - {467, &JitArm::mtspr}, // mtspr - {371, &JitArm::mftb}, // mftb - {512, &JitArm::FallBackToInterpreter}, // mcrxr - {595, &JitArm::mfsr}, // mfsr - {659, &JitArm::FallBackToInterpreter}, // mfsrin - - {4, &JitArm::twx}, // tw - {598, &JitArm::DoNothing}, // sync - {982, &JitArm::icbi}, // icbi - - // Unused instructions on GC - {310, &JitArm::FallBackToInterpreter}, // eciwx - {438, &JitArm::FallBackToInterpreter}, // ecowx - {854, &JitArm::DoNothing}, // eieio - {306, &JitArm::FallBackToInterpreter}, // tlbie - {370, &JitArm::FallBackToInterpreter}, // tlbia - {566, &JitArm::DoNothing}, // tlbsync -}; - -static GekkoOPTemplate table59[] = -{ - {18, &JitArm::FallBackToInterpreter}, // fdivsx - {20, &JitArm::fsubsx}, // fsubsx - {21, &JitArm::faddsx}, // faddsx -// {22, &JitArm::FallBackToInterpreter}, // fsqrtsx - {24, &JitArm::fresx}, // fresx - {25, &JitArm::fmulsx}, // fmulsx - {28, &JitArm::FallBackToInterpreter}, // fmsubsx - {29, &JitArm::fmaddsx}, // fmaddsx - {30, &JitArm::FallBackToInterpreter}, // fnmsubsx - {31, &JitArm::fnmaddsx}, // fnmaddsx -}; - -static GekkoOPTemplate table63[] = -{ - {264, &JitArm::fabsx}, // fabsx - {32, &JitArm::FallBackToInterpreter}, // fcmpo - {0, &JitArm::FallBackToInterpreter}, // fcmpu - {14, &JitArm::fctiwx}, // fctiwx - {15, &JitArm::fctiwzx}, // fctiwzx - {72, &JitArm::fmrx}, // fmrx - {136, &JitArm::fnabsx}, // fnabsx - {40, &JitArm::fnegx}, // fnegx - {12, &JitArm::FallBackToInterpreter}, // frspx - - {64, &JitArm::FallBackToInterpreter}, // mcrfs - {583, &JitArm::FallBackToInterpreter}, // mffsx - {70, &JitArm::FallBackToInterpreter}, // mtfsb0x - {38, &JitArm::FallBackToInterpreter}, // mtfsb1x - {134, &JitArm::FallBackToInterpreter}, // mtfsfix - {711, &JitArm::FallBackToInterpreter}, // mtfsfx -}; - -static GekkoOPTemplate table63_2[] = -{ - {18, &JitArm::FallBackToInterpreter}, // fdivx - {20, &JitArm::fsubx}, // fsubx - {21, &JitArm::faddx}, // faddx - {22, &JitArm::FallBackToInterpreter}, // fsqrtx - {23, &JitArm::fselx}, // fselx - {25, &JitArm::fmulx}, // fmulx - {26, &JitArm::frsqrtex}, // frsqrtex - {28, &JitArm::FallBackToInterpreter}, // fmsubx - {29, &JitArm::fmaddx}, // fmaddx - {30, &JitArm::FallBackToInterpreter}, // fnmsubx - {31, &JitArm::fnmaddx}, // fnmaddx -}; - - -namespace JitArmTables -{ - -void CompileInstruction(PPCAnalyst::CodeOp & op) -{ - JitArm *jitarm = (JitArm *)jit; - (jitarm->*dynaOpTable[op.inst.OPCD])(op.inst); - GekkoOPInfo *info = op.opinfo; - - if (info) - { -#ifdef OPLOG - if (!strcmp(info->opname, OP_TO_LOG)) // "mcrfs" - { - rsplocations.push_back(jit.js.compilerPC); - } -#endif - info->compileCount++; - info->lastUse = jit->js.compilerPC; - } -} - -void InitTables() -{ - // once initialized, tables are read-only - static bool initialized = false; - if (initialized) - return; - - //clear - for (auto& tpl : dynaOpTable) - { - tpl = &JitArm::FallBackToInterpreter; - } - - for (int i = 0; i < 32; i++) - { - dynaOpTable59[i] = &JitArm::FallBackToInterpreter; - } - - for (int i = 0; i < 1024; i++) - { - dynaOpTable4 [i] = &JitArm::FallBackToInterpreter; - dynaOpTable19[i] = &JitArm::FallBackToInterpreter; - dynaOpTable31[i] = &JitArm::FallBackToInterpreter; - dynaOpTable63[i] = &JitArm::FallBackToInterpreter; - } - - for (int i = 0; i < (int)(sizeof(primarytable) / sizeof(GekkoOPTemplate)); i++) - { - dynaOpTable[primarytable[i].opcode] = primarytable[i].Inst; - } - - for (int i = 0; i < 32; i++) - { - int fill = i << 5; - for (int j = 0; j < (int)(sizeof(table4_2) / sizeof(GekkoOPTemplate)); j++) - { - int op = fill+table4_2[j].opcode; - dynaOpTable4[op] = table4_2[j].Inst; - } - } - - for (int i = 0; i < 16; i++) - { - int fill = i << 6; - for (int j = 0; j < (int)(sizeof(table4_3) / sizeof(GekkoOPTemplate)); j++) - { - int op = fill+table4_3[j].opcode; - dynaOpTable4[op] = table4_3[j].Inst; - } - } - - for (int i = 0; i < (int)(sizeof(table4) / sizeof(GekkoOPTemplate)); i++) - { - int op = table4[i].opcode; - dynaOpTable4[op] = table4[i].Inst; - } - - for (int i = 0; i < (int)(sizeof(table31) / sizeof(GekkoOPTemplate)); i++) - { - int op = table31[i].opcode; - dynaOpTable31[op] = table31[i].Inst; - } - - for (int i = 0; i < (int)(sizeof(table19) / sizeof(GekkoOPTemplate)); i++) - { - int op = table19[i].opcode; - dynaOpTable19[op] = table19[i].Inst; - } - - for (int i = 0; i < (int)(sizeof(table59) / sizeof(GekkoOPTemplate)); i++) - { - int op = table59[i].opcode; - dynaOpTable59[op] = table59[i].Inst; - } - - for (int i = 0; i < (int)(sizeof(table63) / sizeof(GekkoOPTemplate)); i++) - { - int op = table63[i].opcode; - dynaOpTable63[op] = table63[i].Inst; - } - - for (int i = 0; i < 32; i++) - { - int fill = i << 5; - for (int j = 0; j < (int)(sizeof(table63_2) / sizeof(GekkoOPTemplate)); j++) - { - int op = fill + table63_2[j].opcode; - dynaOpTable63[op] = table63_2[j].Inst; - } - } - - initialized = true; - -} - -} // namespace diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.h b/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.h deleted file mode 100644 index 4b49c4b9f4..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Tables.h +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "Core/PowerPC/Gekko.h" -#include "Core/PowerPC/PPCTables.h" - -namespace JitArmTables -{ - void CompileInstruction(PPCAnalyst::CodeOp & op); - void InitTables(); -} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp deleted file mode 100644 index 5f1e19f1a7..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp +++ /dev/null @@ -1,659 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Common/ArmEmitter.h" -#include "Common/MemoryUtil.h" - -#include "Core/Core.h" -#include "Core/CoreTiming.h" -#include "Core/HW/GPFifo.h" -#include "Core/HW/Memmap.h" - -#include "Core/PowerPC/PowerPC.h" -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitAsm.h" -#include "Core/PowerPC/JitCommon/JitCache.h" - - -using namespace ArmGen; - -//TODO - make an option -//#if _DEBUG -// bool enableDebug = false; -//#else -// bool enableDebug = false; -//#endif - -JitArmAsmRoutineManager asm_routines; - -static void WriteDual8(u32 val1, u32 val2, u32 addr) -{ - PowerPC::Write_U16(((u16)(u8)val1 << 8) | (u16)(u8)val2, addr); -} - -static void WriteDual16(u32 val1, u32 val2, u32 addr) -{ - PowerPC::Write_U32(((u32)(u16)val1 << 16) | (u32)(u16)val2, addr); -} - -static void WriteDual32(u32 val1, u32 val2, u32 addr) -{ - PowerPC::Write_U64(((u64)val1 << 32) | (u64)val2, addr); -} - -void JitArmAsmRoutineManager::Generate() -{ - enterCode = GetCodePtr(); - PUSH(9, R4, R5, R6, R7, R8, R9, R10, R11, _LR); - // Take care to 8-byte align stack for function calls. - // We are misaligned here because of an odd number of args for PUSH. - // It's not like x86 where you need to account for an extra 4 bytes - // consumed by CALL. - SUB(_SP, _SP, 4); - - MOVI2R(R9, (u32)&PowerPC::ppcState.spr[0]); - MOVI2R(R8, (u32)Memory::physical_base); - - FixupBranch skipToRealDispatcher = B(); - dispatcher = GetCodePtr(); - printf("Dispatcher is %p\n", dispatcher); - - // Downcount Check - // The result of slice decrementation should be in flags if somebody jumped here - // IMPORTANT - We jump on negative, not carry!!! - FixupBranch bail = B_CC(CC_MI); - - FixupBranch dbg_exit; - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging) - { - MOVI2R(R0, (u32)PowerPC::GetStatePtr()); - LDR(R0, R0); - TST(R0, PowerPC::CPU_STEPPING); - FixupBranch not_stepping = B_CC(CC_EQ); - // XXX: Check for breakpoints - dbg_exit = B(); - SetJumpTarget(not_stepping); - } - - SetJumpTarget(skipToRealDispatcher); - dispatcherNoCheck = GetCodePtr(); - - // This block of code gets the address of the compiled block of code - // It runs though to the compiling portion if it isn't found - LDR(R12, R9, PPCSTATE_OFF(pc));// Load the current PC into R12 - - Operand2 iCacheMask = Operand2(0xE, 2); // JIT_ICACHE_MASK - BIC(R12, R12, iCacheMask); // R12 contains PC & JIT_ICACHE_MASK here. - - MOVI2R(R14, (u32)jit->GetBlockCache()->iCache.data()); - - LDR(R12, R14, R12); // R12 contains iCache[PC & JIT_ICACHE_MASK] here - // R12 Confirmed this is the correct iCache Location loaded. - TST(R12, 0x80); // Test to see if it is a JIT block. - - FixupBranch no_block = B_CC(CC_NEQ); - // Success, it is our Jitblock. - MOVI2R(R14, (u32)jit->GetBlockCache()->GetCodePointers()); - // LDR R14 right here to get CodePointers()[0] pointer. - LSL(R12, R12, 2); // Multiply by four because address locations are u32 in size - LDR(R14, R14, R12); // Load the block address in to R14 - - B(R14); - // No need to jump anywhere after here, the block will go back to dispatcher start - SetJumpTarget(no_block); - - // If we get to this point, that means that we don't have the block cached to execute - // So call ArmJit to compile the block and then execute it. - MOVI2R(R14, (u32)&Jit); - BL(R14); - - B(dispatcherNoCheck); - - SetJumpTarget(bail); - doTiming = GetCodePtr(); - // XXX: In JIT64, Advance() gets called /after/ the exception checking - // once it jumps back to the start of outerLoop - QuickCallFunction(R14, (void*)&CoreTiming::Advance); - - // Does exception checking - LDR(R0, R9, PPCSTATE_OFF(pc)); - STR(R0, R9, PPCSTATE_OFF(npc)); - QuickCallFunction(R14, (void*)&PowerPC::CheckExceptions); - LDR(R0, R9, PPCSTATE_OFF(npc)); - STR(R0, R9, PPCSTATE_OFF(pc)); - // Check the state pointer to see if we are exiting - // Gets checked on every exception check - MOVI2R(R0, (u32)PowerPC::GetStatePtr()); - MVN(R1, 0); - LDR(R0, R0); - TST(R0, R1); - FixupBranch Exit = B_CC(CC_NEQ); - - B(dispatcher); - - SetJumpTarget(Exit); - if (SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging) - SetJumpTarget(dbg_exit); - - // Let the waiting thread know we are done leaving - MOVI2R(R0, (u32)&PowerPC::FinishStateMove); - BL(R0); - - ADD(_SP, _SP, 4); - - POP(9, R4, R5, R6, R7, R8, R9, R10, R11, _PC); // Returns - - GenerateCommon(); - - FlushIcache(); -} - -void JitArmAsmRoutineManager::GenerateCommon() -{ - // R14 is LR - // R12 is scratch - // R11 is scale - // R10 is the address - Operand2 mask(3, 1); // ~(Memory::MEMVIEW32_MASK) - Operand2 arghmask(3, 3); // 0x0C000000 - NEONXEmitter nemit(this); - - const u8* loadPairedIllegal = GetCodePtr(); - BKPT(0x10); - - const u8* loadPairedFloatTwo = GetCodePtr(); - { - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - nemit.VLD1(I_32, D0, R10); - nemit.VREV32(I_8, D0, D0); - - MOV(_PC, _LR); - } - const u8* loadPairedFloatOne = GetCodePtr(); - { - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - nemit.VLD1(I_32, D0, R10); - nemit.VREV32(I_8, D0, D0); - - MOV(_PC, _LR); - } - const u8* loadPairedU8Two = GetCodePtr(); - { - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - LDRB(R12, R10); - VMOV(S0, R12); - - LDRB(R12, R10, 1); - VMOV(S1, R12); - - MOVI2R(R10, (u32)&m_dequantizeTableS); - ADD(R10, R10, R11); - VLDR(S2, R10, 0); - - VCVT(S0, S0, TO_FLOAT); - VCVT(S1, S1, TO_FLOAT); - - VMUL(S0, S0, S2); - VMUL(S1, S1, S2); - - MOV(_PC, _LR); - } - const u8* loadPairedU8One = GetCodePtr(); - { - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - LDRB(R12, R10); - VMOV(S0, R12); - - MOVI2R(R10, (u32)&m_dequantizeTableS); - ADD(R10, R10, R11); - VLDR(S2, R10, 0); - - VCVT(S0, S0, TO_FLOAT); - - VMUL(S0, S0, S2); - - MOV(_PC, _LR); - } - const u8* loadPairedS8Two = GetCodePtr(); - { - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - LDRSB(R12, R10); - VMOV(S0, R12); - - LDRSB(R12, R10, 1); - VMOV(S1, R12); - - MOVI2R(R10, (u32)&m_dequantizeTableS); - ADD(R10, R10, R11); - VLDR(S2, R10, 0); - - VCVT(S0, S0, TO_FLOAT | IS_SIGNED); - VCVT(S1, S1, TO_FLOAT | IS_SIGNED); - - VMUL(S0, S0, S2); - VMUL(S1, S1, S2); - - MOV(_PC, _LR); - } - const u8* loadPairedS8One = GetCodePtr(); - { - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - LDRSB(R12, R10); - VMOV(S0, R12); - - MOVI2R(R10, (u32)&m_dequantizeTableS); - ADD(R10, R10, R11); - VLDR(S2, R10, 0); - - VCVT(S0, S0, TO_FLOAT | IS_SIGNED); - - VMUL(S0, S0, S2); - - MOV(_PC, _LR); - } - const u8* loadPairedU16Two = GetCodePtr(); - { - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - LDRH(R12, R10); - REV16(R12, R12); - VMOV(S0, R12); - - LDRH(R12, R10, 2); - REV16(R12, R12); - VMOV(S1, R12); - - MOVI2R(R10, (u32)&m_dequantizeTableS); - ADD(R10, R10, R11); - VLDR(S2, R10, 0); - - VCVT(S0, S0, TO_FLOAT); - VCVT(S1, S1, TO_FLOAT); - - VMUL(S0, S0, S2); - VMUL(S1, S1, S2); - - MOV(_PC, _LR); - } - const u8* loadPairedU16One = GetCodePtr(); - { - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - LDRH(R12, R10); - REV16(R12, R12); - VMOV(S0, R12); - - MOVI2R(R10, (u32)&m_dequantizeTableS); - ADD(R10, R10, R11); - VLDR(S2, R10, 0); - - VCVT(S0, S0, TO_FLOAT); - - VMUL(S0, S0, S2); - MOV(_PC, _LR); - } - const u8* loadPairedS16Two = GetCodePtr(); - { - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - LDRH(R12, R10); - REV16(R12, R12); - SXTH(R12, R12); - VMOV(S0, R12); - - LDRH(R12, R10, 2); - REV16(R12, R12); - SXTH(R12, R12); - VMOV(S1, R12); - - MOVI2R(R10, (u32)&m_dequantizeTableS); - ADD(R10, R10, R11); - VLDR(S2, R10, 0); - - VCVT(S0, S0, TO_FLOAT | IS_SIGNED); - VCVT(S1, S1, TO_FLOAT | IS_SIGNED); - - VMUL(S0, S0, S2); - VMUL(S1, S1, S2); - - MOV(_PC, _LR); - } - const u8* loadPairedS16One = GetCodePtr(); - { - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - LDRH(R12, R10); - - MOVI2R(R10, (u32)&m_dequantizeTableS); - ADD(R10, R10, R11); - VLDR(S2, R10, 0); - - REV16(R12, R12); - SXTH(R12, R12); - VMOV(S0, R12); - VCVT(S0, S0, TO_FLOAT | IS_SIGNED); - - VMUL(S0, S0, S2); - MOV(_PC, _LR); - } - - pairedLoadQuantized = reinterpret_cast(const_cast(AlignCode16())); - ReserveCodeSpace(16 * sizeof(u8*)); - - pairedLoadQuantized[0] = loadPairedFloatTwo; - pairedLoadQuantized[1] = loadPairedIllegal; - pairedLoadQuantized[2] = loadPairedIllegal; - pairedLoadQuantized[3] = loadPairedIllegal; - pairedLoadQuantized[4] = loadPairedU8Two; - pairedLoadQuantized[5] = loadPairedU16Two; - pairedLoadQuantized[6] = loadPairedS8Two; - pairedLoadQuantized[7] = loadPairedS16Two; - - pairedLoadQuantized[8] = loadPairedFloatOne; - pairedLoadQuantized[9] = loadPairedIllegal; - pairedLoadQuantized[10] = loadPairedIllegal; - pairedLoadQuantized[11] = loadPairedIllegal; - pairedLoadQuantized[12] = loadPairedU8One; - pairedLoadQuantized[13] = loadPairedU16One; - pairedLoadQuantized[14] = loadPairedS8One; - pairedLoadQuantized[15] = loadPairedS16One; - - // Stores - const u8* storePairedIllegal = GetCodePtr(); - BKPT(0x21); - const u8* storePairedFloat = GetCodePtr(); - { - TST(R10, arghmask); - FixupBranch argh = B_CC(CC_NEQ); - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - nemit.VREV32(I_8, D0, D0); - nemit.VST1(I_32, D0, R10); - MOV(_PC, _LR); - - SetJumpTarget(argh); - - PUSH(5, R0, R1, R2, R3, _LR); - VMOV(R0, S0); - VMOV(R1, S1); - MOV(R2, R10); - MOVI2R(R12, (u32)&WriteDual32); - BL(R12); - POP(5, R0, R1, R2, R3, _PC); - } - const u8* storePairedU8 = GetCodePtr(); - { - // R10 is the addr - // R11 is the scale - // R12 is scratch - // S0, S1 is the values - PUSH(5, R0, R1, R2, R3, _LR); - - MOVI2R(R12, (u32)&m_quantizeTableS); - ADD(R12, R12, R11); - VLDR(S2, R12, 0); - VMUL(S0, S0, S2); - VMUL(S1, S1, S2); - - VCVT(S0, S0, TO_INT | ROUND_TO_ZERO); - VCVT(S1, S1, TO_INT | ROUND_TO_ZERO); - - VMOV(R0, S0); - VMOV(R1, S1); - MOV(R2, R10); - MOVI2R(R12, (u32)&WriteDual8); - BL(R12); - - POP(5, R0, R1, R2, R3, _PC); - } - const u8* storePairedS8 = GetCodePtr(); - { - // R10 is the addr - // R11 is the scale - // R12 is scratch - // S0, S1 is the values - PUSH(5, R0, R1, R2, R3, _LR); - - MOVI2R(R12, (u32)&m_quantizeTableS); - ADD(R12, R12, R11); - VLDR(S2, R12, 0); - VMUL(S0, S0, S2); - VMUL(S1, S1, S2); - - VCVT(S0, S0, TO_INT | ROUND_TO_ZERO | IS_SIGNED); - VCVT(S1, S1, TO_INT | ROUND_TO_ZERO | IS_SIGNED); - - VMOV(R0, S0); - VMOV(R1, S1); - MOV(R2, R10); - MOVI2R(R12, (u32)&WriteDual8); - BL(R12); - - POP(5, R0, R1, R2, R3, _PC); - } - const u8* storePairedU16 = GetCodePtr(); - { - PUSH(5, R0, R1, R2, R3, _LR); - - MOVI2R(R12, (u32)&m_quantizeTableS); - ADD(R12, R12, R11); - VLDR(S2, R12, 0); - VMUL(S0, S0, S2); - VMUL(S1, S1, S2); - - VCVT(S0, S0, TO_INT | ROUND_TO_ZERO); - VCVT(S1, S1, TO_INT | ROUND_TO_ZERO); - - VMOV(R0, S0); - VMOV(R1, S1); - MOV(R2, R10); - MOVI2R(R12, (u32)&WriteDual16); - BL(R12); - - POP(5, R0, R1, R2, R3, _PC); - } - const u8* storePairedS16 = GetCodePtr(); - { - PUSH(5, R0, R1, R2, R3, _LR); - - MOVI2R(R12, (u32)&m_quantizeTableS); - ADD(R12, R12, R11); - VLDR(S2, R12, 0); - VMUL(S0, S0, S2); - VMUL(S1, S1, S2); - - VCVT(S0, S0, TO_INT | ROUND_TO_ZERO | IS_SIGNED); - VCVT(S1, S1, TO_INT | ROUND_TO_ZERO | IS_SIGNED); - - VMOV(R0, S0); - VMOV(R1, S1); - MOV(R2, R10); - MOVI2R(R12, (u32)&WriteDual16); - BL(R12); - - POP(5, R0, R1, R2, R3, _PC); - } - const u8* storeSingleIllegal = GetCodePtr(); - BKPT(0x27); - const u8* storeSingleFloat = GetCodePtr(); - { - TST(R10, arghmask); - FixupBranch argh = B_CC(CC_NEQ); - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - VMOV(R12, S0); - REV(R12, R12); - STR(R12, R10); - MOV(_PC, _LR); - - SetJumpTarget(argh); - - PUSH(5, R0, R1, R2, R3, _LR); - VMOV(R0, S0); - MOV(R1, R10); - MOVI2R(R10, (u32)&PowerPC::Write_U32); - BL(R10); - - POP(5, R0, R1, R2, R3, _PC); - } - const u8* storeSingleU8 = GetCodePtr(); // Used by MKWii - { - MOVI2R(R12, (u32)&m_quantizeTableS); - ADD(R12, R12, R11); - VLDR(S2, R12, 0); - VMUL(S0, S0, S2); - - TST(R10, arghmask); - FixupBranch argh = B_CC(CC_NEQ); - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - VCVT(S0, S0, TO_INT | ROUND_TO_ZERO); - VMOV(R12, S0); - STRB(R12, R10); - MOV(_PC, _LR); - - SetJumpTarget(argh); - - PUSH(5, R0, R1, R2, R3, _LR); - VMOV(R0, S0); - MOV(R1, R10); - MOVI2R(R10, (u32)&PowerPC::Write_U8); - BL(R10); - POP(5, R0, R1, R2, R3, _PC); - } - const u8* storeSingleS8 = GetCodePtr(); - { - MOVI2R(R12, (u32)&m_quantizeTableS); - ADD(R12, R12, R11); - VLDR(S2, R12, 0); - VMUL(S0, S0, S2); - - TST(R10, arghmask); - FixupBranch argh = B_CC(CC_NEQ); - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - VCVT(S0, S0, TO_INT | ROUND_TO_ZERO | IS_SIGNED); - VMOV(R12, S0); - STRB(R12, R10); - MOV(_PC, _LR); - - SetJumpTarget(argh); - - PUSH(5, R0, R1, R2, R3, _LR); - VMOV(R0, S0); - MOV(R1, R10); - MOVI2R(R10, (u32)&PowerPC::Write_U8); - BL(R10); - POP(5, R0, R1, R2, R3, _PC); - } - const u8* storeSingleU16 = GetCodePtr(); // Used by MKWii - { - MOVI2R(R12, (u32)&m_quantizeTableS); - ADD(R12, R12, R11); - VLDR(S2, R12, 0); - VMUL(S0, S0, S2); - - TST(R10, arghmask); - FixupBranch argh = B_CC(CC_NEQ); - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - VCVT(S0, S0, TO_INT | ROUND_TO_ZERO); - VMOV(R12, S0); - REV16(R12, R12); - STRH(R12, R10); - MOV(_PC, _LR); - - SetJumpTarget(argh); - - PUSH(5, R0, R1, R2, R3, _LR); - VMOV(R0, S0); - MOV(R1, R10); - MOVI2R(R10, (u32)&PowerPC::Write_U16); - BL(R10); - - POP(5, R0, R1, R2, R3, _PC); - } - const u8* storeSingleS16 = GetCodePtr(); - { - MOVI2R(R12, (u32)&m_quantizeTableS); - ADD(R12, R12, R11); - VLDR(S2, R12, 0); - VMUL(S0, S0, S2); - - TST(R10, arghmask); - FixupBranch argh = B_CC(CC_NEQ); - BIC(R10, R10, mask); - ADD(R10, R10, R8); - - VCVT(S0, S0, TO_INT | ROUND_TO_ZERO | IS_SIGNED); - VMOV(R12, S0); - REV16(R12, R12); - STRH(R12, R10); - MOV(_PC, _LR); - - SetJumpTarget(argh); - - PUSH(5, R0, R1, R2, R3, _LR); - VMOV(R0, S0); - MOV(R1, R10); - MOVI2R(R10, (u32)&PowerPC::Write_U16); - BL(R10); - - POP(5, R0, R1, R2, R3, _PC); - } - - pairedStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); - ReserveCodeSpace(16 * sizeof(u8*)); - - pairedStoreQuantized[0] = storePairedFloat; - pairedStoreQuantized[1] = storePairedIllegal; - pairedStoreQuantized[2] = storePairedIllegal; - pairedStoreQuantized[3] = storePairedIllegal; - pairedStoreQuantized[4] = storePairedU8; - pairedStoreQuantized[5] = storePairedU16; - pairedStoreQuantized[6] = storePairedS8; - pairedStoreQuantized[7] = storePairedS16; - - pairedStoreQuantized[8] = storeSingleFloat; - pairedStoreQuantized[9] = storeSingleIllegal; - pairedStoreQuantized[10] = storeSingleIllegal; - pairedStoreQuantized[11] = storeSingleIllegal; - pairedStoreQuantized[12] = storeSingleU8; - pairedStoreQuantized[13] = storeSingleU16; - pairedStoreQuantized[14] = storeSingleS8; - pairedStoreQuantized[15] = storeSingleS16; - - m_increment_profile_counter = AlignCode16(); - - nemit.VLD1(I_64, D0, R0); // Start - ADD(R0, R0, 8); - nemit.VLD1(I_64, D1, R0); // End - ADD(R0, R0, 8); - nemit.VLD1(I_64, D2, R0); // Counter - nemit.VSUB(I_64, D1, D1, D0); - nemit.VADD(I_64, D2, D2, D1); - nemit.VST1(I_64, D2, R0); - MOV(_PC, _LR); -} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitAsm.h b/Source/Core/Core/PowerPC/JitArm32/JitAsm.h deleted file mode 100644 index bcdca0ab39..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitAsm.h +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright 2008 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "Common/ArmEmitter.h" -#include "Core/PowerPC/JitCommon/JitAsmCommon.h" - -class JitArmAsmRoutineManager : public CommonAsmRoutinesBase, public ArmGen::ARMCodeBlock -{ -private: - void Generate(); - void GenerateCommon(); - -public: - const u8* m_increment_profile_counter; - - void Init() - { - AllocCodeSpace(8192); - Generate(); - WriteProtect(); - } - - void Shutdown() - { - FreeCodeSpace(); - } -}; - -extern JitArmAsmRoutineManager asm_routines; diff --git a/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.cpp b/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.cpp deleted file mode 100644 index 800c729e93..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.cpp +++ /dev/null @@ -1,252 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitFPRCache.h" - -using namespace ArmGen; - -ArmFPRCache::ArmFPRCache() -{ - emit = nullptr; -} - -void ArmFPRCache::Init(ARMXEmitter *emitter) -{ - emit = emitter; - ARMReg *PPCRegs = GetPPCAllocationOrder(NUMPPCREG); - ARMReg *Regs = GetAllocationOrder(NUMARMREG); - - for (u8 a = 0; a < NUMPPCREG; ++a) - { - ArmCRegs[a].PPCReg = 33; - ArmCRegs[a].Reg = PPCRegs[a]; - ArmCRegs[a].LastLoad = 0; - ArmCRegs[a].PS1 = false; - } - for (u8 a = 0; a < NUMARMREG; ++a) - { - ArmRegs[a].Reg = Regs[a]; - ArmRegs[a].free = true; - } -} - -void ArmFPRCache::Start(PPCAnalyst::BlockRegStats &stats) -{ - // Make sure the state is wiped on Start - // There is a potential for the state remaining dirty from the previous block - // This is due to conditional branches not clearing the register cache state - - for (u8 a = 0; a < 32; ++a) - { - if (_regs[a][0].GetType() != REG_NOTLOADED) - { - u32 regindex = _regs[a][0].GetRegIndex(); - ArmCRegs[regindex].PPCReg = 33; - ArmCRegs[regindex].LastLoad = 0; - _regs[a][0].Flush(); - } - if (_regs[a][1].GetType() != REG_NOTLOADED) - { - u32 regindex = _regs[a][1].GetRegIndex(); - ArmCRegs[regindex].PPCReg = 33; - ArmCRegs[regindex].LastLoad = 0; - _regs[a][1].Flush(); - } - } -} - -ARMReg *ArmFPRCache::GetPPCAllocationOrder(int &count) -{ - // This will return us the allocation order of the registers we can use on - // the ppc side. - static ARMReg allocationOrder[] = - { - D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, - D14, D15, D16, D17, D18, D19, D20, D21, D22, - D23, D24, D25, D26, D27, D28, D29, D30, D31 - }; - count = sizeof(allocationOrder) / sizeof(const int); - return allocationOrder; -} -ARMReg *ArmFPRCache::GetAllocationOrder(int &count) -{ - // This will return us the allocation order of the registers we can use on - // the host side. - static ARMReg allocationOrder[] = - { - D0, D1, D2, D3 - }; - count = sizeof(allocationOrder) / sizeof(const int); - return allocationOrder; -} - -ARMReg ArmFPRCache::GetReg(bool AutoLock) -{ - for (u8 a = 0; a < NUMARMREG; ++a) - { - if (ArmRegs[a].free) - { - // Alright, this one is free - if (AutoLock) - ArmRegs[a].free = false; - return ArmRegs[a].Reg; - } - } - - // Uh Oh, we have all them locked.... - _assert_msg_(_DYNA_REC_, false, "All available registers are locked dumb dumb"); - return D31; -} -void ArmFPRCache::Unlock(ARMReg V0) -{ - for (u8 RegNum = 0; RegNum < NUMARMREG; ++RegNum) - { - if (ArmRegs[RegNum].Reg == V0) - { - _assert_msg_(_DYNA_REC, !ArmRegs[RegNum].free, "This register is already unlocked"); - ArmRegs[RegNum].free = true; - } - } -} -u32 ArmFPRCache::GetLeastUsedRegister(bool increment) -{ - u32 HighestUsed = 0; - u8 lastRegIndex = 0; - for (u8 a = 0; a < NUMPPCREG; ++a) - { - if (increment) - ++ArmCRegs[a].LastLoad; - - if (ArmCRegs[a].LastLoad > HighestUsed) - { - HighestUsed = ArmCRegs[a].LastLoad; - lastRegIndex = a; - } - } - return lastRegIndex; -} -bool ArmFPRCache::FindFreeRegister(u32 ®index) -{ - for (u8 a = 0; a < NUMPPCREG; ++a) - { - if (ArmCRegs[a].PPCReg == 33) - { - regindex = a; - return true; - } - } - return false; -} - -ARMReg ArmFPRCache::GetPPCReg(u32 preg, bool PS1, bool preLoad) -{ - u32 lastRegIndex = GetLeastUsedRegister(true); - - if (_regs[preg][PS1].GetType() != REG_NOTLOADED) - { - u8 a = _regs[preg][PS1].GetRegIndex(); - ArmCRegs[a].LastLoad = 0; - return ArmCRegs[a].Reg; - } - - u32 regindex; - if (FindFreeRegister(regindex)) - { - s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0); - - ArmCRegs[regindex].PPCReg = preg; - ArmCRegs[regindex].LastLoad = 0; - ArmCRegs[regindex].PS1 = PS1; - - _regs[preg][PS1].LoadToReg(regindex); - if (preLoad) - emit->VLDR(ArmCRegs[regindex].Reg, R9, offset); - return ArmCRegs[regindex].Reg; - } - - // Alright, we couldn't get a free space, dump that least used register - s16 offsetOld = PPCSTATE_OFF(ps) + (ArmCRegs[lastRegIndex].PPCReg * 16) + (ArmCRegs[lastRegIndex].PS1 ? 8 : 0); - s16 offsetNew = PPCSTATE_OFF(ps) + (preg * 16) + (PS1 ? 8 : 0); - - emit->VSTR(ArmCRegs[lastRegIndex].Reg, R9, offsetOld); - - _regs[ArmCRegs[lastRegIndex].PPCReg][ArmCRegs[lastRegIndex].PS1].Flush(); - - ArmCRegs[lastRegIndex].PPCReg = preg; - ArmCRegs[lastRegIndex].LastLoad = 0; - ArmCRegs[lastRegIndex].PS1 = PS1; - - _regs[preg][PS1].LoadToReg(lastRegIndex); - if (preLoad) - emit->VLDR(ArmCRegs[lastRegIndex].Reg, R9, offsetNew); - return ArmCRegs[lastRegIndex].Reg; -} - -ARMReg ArmFPRCache::R0(u32 preg, bool preLoad) -{ - return GetPPCReg(preg, false, preLoad); -} - -ARMReg ArmFPRCache::R1(u32 preg, bool preLoad) -{ - return GetPPCReg(preg, true, preLoad); -} - -void ArmFPRCache::Flush(FlushMode mode) -{ - for (u8 a = 0; a < 32; ++a) - { - if (_regs[a][0].GetType() != REG_NOTLOADED) - { - s16 offset = PPCSTATE_OFF(ps) + (a * 16); - u32 regindex = _regs[a][0].GetRegIndex(); - emit->VSTR(ArmCRegs[regindex].Reg, R9, offset); - - if (mode == FLUSH_ALL) - { - ArmCRegs[regindex].PPCReg = 33; - ArmCRegs[regindex].LastLoad = 0; - _regs[a][0].Flush(); - } - } - if (_regs[a][1].GetType() != REG_NOTLOADED) - { - s16 offset = PPCSTATE_OFF(ps) + (a * 16) + 8; - u32 regindex = _regs[a][1].GetRegIndex(); - emit->VSTR(ArmCRegs[regindex].Reg, R9, offset); - - if (mode == FLUSH_ALL) - { - ArmCRegs[regindex].PPCReg = 33; - ArmCRegs[regindex].LastLoad = 0; - _regs[a][1].Flush(); - } - } - } -} - -void ArmFPRCache::StoreFromRegister(u32 preg) -{ - if (_regs[preg][0].GetType() != REG_NOTLOADED) - { - s16 offset = PPCSTATE_OFF(ps) + (preg * 16); - u32 regindex = _regs[preg][0].GetRegIndex(); - emit->VSTR(ArmCRegs[regindex].Reg, R9, offset); - - ArmCRegs[regindex].PPCReg = 33; - ArmCRegs[regindex].LastLoad = 0; - _regs[preg][0].Flush(); - } - if (_regs[preg][1].GetType() != REG_NOTLOADED) - { - s16 offset = PPCSTATE_OFF(ps) + (preg * 16) + 8; - u32 regindex = _regs[preg][1].GetRegIndex(); - emit->VSTR(ArmCRegs[regindex].Reg, R9, offset); - - ArmCRegs[regindex].PPCReg = 33; - ArmCRegs[regindex].LastLoad = 0; - _regs[preg][1].Flush(); - } -} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.h b/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.h deleted file mode 100644 index 9103089b1f..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitFPRCache.h +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "Common/ArmEmitter.h" -#include "Core/PowerPC/Gekko.h" -#include "Core/PowerPC/PPCAnalyst.h" -#include "Core/PowerPC/JitArm32/JitRegCache.h" - -#define ARMFPUREGS 32 - -class ArmFPRCache -{ -private: - OpArg _regs[32][2]; // One for each FPR reg - JRCPPC ArmCRegs[ARMFPUREGS]; - JRCReg ArmRegs[ARMFPUREGS]; - - int NUMPPCREG; - int NUMARMREG; - - ArmGen::ARMReg *GetAllocationOrder(int &count); - ArmGen::ARMReg *GetPPCAllocationOrder(int &count); - - ArmGen::ARMReg GetPPCReg(u32 preg, bool PS1, bool preLoad); - - u32 GetLeastUsedRegister(bool increment); - bool FindFreeRegister(u32 ®index); -protected: - ArmGen::ARMXEmitter *emit; - -public: - ArmFPRCache(); - ~ArmFPRCache() {} - - void Init(ArmGen::ARMXEmitter *emitter); - void Start(PPCAnalyst::BlockRegStats &stats); - - void SetEmitter(ArmGen::ARMXEmitter *emitter) {emit = emitter;} - - ArmGen::ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use. - void Unlock(ArmGen::ARMReg V0); - void Flush(FlushMode mode = FLUSH_ALL); - ArmGen::ARMReg R0(u32 preg, bool preLoad = true); // Returns a cached register - ArmGen::ARMReg R1(u32 preg, bool preLoad = true); - - void StoreFromRegister(u32 preg); -}; diff --git a/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp b/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp deleted file mode 100644 index e14c09ee8b..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitRegCache.cpp +++ /dev/null @@ -1,319 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitRegCache.h" - -using namespace ArmGen; - -ArmRegCache::ArmRegCache() -{ - emit = nullptr; -} - -void ArmRegCache::Init(ARMXEmitter *emitter) -{ - emit = emitter; - ARMReg *PPCRegs = GetPPCAllocationOrder(NUMPPCREG); - ARMReg *Regs = GetAllocationOrder(NUMARMREG); - - for (u8 a = 0; a < NUMPPCREG; ++a) - { - ArmCRegs[a].PPCReg = 33; - ArmCRegs[a].Reg = PPCRegs[a]; - ArmCRegs[a].LastLoad = 0; - } - for (u8 a = 0; a < NUMARMREG; ++a) - { - ArmRegs[a].Reg = Regs[a]; - ArmRegs[a].free = true; - } -} - -void ArmRegCache::Start(PPCAnalyst::BlockRegStats &stats) -{ - // Make sure the state is wiped on Start - // There is a potential for the state remaining dirty from the previous block - // This is due to conditional branches not clearing the register cache state - for (u8 a = 0; a < 32; ++a) - { - if (regs[a].GetType() == REG_REG) - { - u32 regindex = regs[a].GetRegIndex(); - ArmCRegs[regindex].PPCReg = 33; - ArmCRegs[regindex].LastLoad = 0; - } - regs[a].Flush(); - } -} - -ARMReg *ArmRegCache::GetPPCAllocationOrder(int &count) -{ - // This will return us the allocation order of the registers we can use on - // the ppc side. - static ARMReg allocationOrder[] = - { - R0, R1, R2, R3, R4, R5, R6, R7 - }; - count = sizeof(allocationOrder) / sizeof(const int); - return allocationOrder; -} -ARMReg *ArmRegCache::GetAllocationOrder(int &count) -{ - // This will return us the allocation order of the registers we can use on - // the host side. - static ARMReg allocationOrder[] = - { - R14, R12, R11, R10 - }; - count = sizeof(allocationOrder) / sizeof(const int); - return allocationOrder; -} - -ARMReg ArmRegCache::GetReg(bool AutoLock) -{ - for (u8 a = 0; a < NUMARMREG; ++a) - { - if (ArmRegs[a].free) - { - // Alright, this one is free - if (AutoLock) - ArmRegs[a].free = false; - return ArmRegs[a].Reg; - } - } - - // Uh Oh, we have all them locked.... - _assert_msg_(_DYNA_REC_, false, "All available registers are locked dumb dumb"); - return R0; -} - -void ArmRegCache::Unlock(ARMReg R0, ARMReg R1, ARMReg R2, ARMReg R3) -{ - for (u8 RegNum = 0; RegNum < NUMARMREG; ++RegNum) - { - if (ArmRegs[RegNum].Reg == R0) - { - _assert_msg_(_DYNA_REC, !ArmRegs[RegNum].free, "This register is already unlocked"); - ArmRegs[RegNum].free = true; - } - - if (R1 != INVALID_REG && ArmRegs[RegNum].Reg == R1) - ArmRegs[RegNum].free = true; - - if (R2 != INVALID_REG && ArmRegs[RegNum].Reg == R2) - ArmRegs[RegNum].free = true; - - if (R3 != INVALID_REG && ArmRegs[RegNum].Reg == R3) - ArmRegs[RegNum].free = true; - } -} - -u32 ArmRegCache::GetLeastUsedRegister(bool increment) -{ - u32 HighestUsed = 0; - u8 lastRegIndex = 0; - for (u8 a = 0; a < NUMPPCREG; ++a) - { - if (increment) - ++ArmCRegs[a].LastLoad; - if (ArmCRegs[a].LastLoad > HighestUsed) - { - HighestUsed = ArmCRegs[a].LastLoad; - lastRegIndex = a; - } - } - return lastRegIndex; -} - -bool ArmRegCache::FindFreeRegister(u32 ®index) -{ - for (u8 a = 0; a < NUMPPCREG; ++a) - { - if (ArmCRegs[a].PPCReg == 33) - { - regindex = a; - return true; - } - } - return false; -} - -ARMReg ArmRegCache::R(u32 preg) -{ - if (regs[preg].GetType() == REG_IMM) - return BindToRegister(preg, true, true); - - u32 lastRegIndex = GetLeastUsedRegister(true); - - // Check if already Loaded - if (regs[preg].GetType() == REG_REG) - { - u8 a = regs[preg].GetRegIndex(); - ArmCRegs[a].LastLoad = 0; - return ArmCRegs[a].Reg; - } - - // Check if we have a free register - u32 regindex; - if (FindFreeRegister(regindex)) - { - emit->LDR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4); - ArmCRegs[regindex].PPCReg = preg; - ArmCRegs[regindex].LastLoad = 0; - - regs[preg].LoadToReg(regindex); - return ArmCRegs[regindex].Reg; - } - - // Alright, we couldn't get a free space, dump that least used register - emit->STR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[lastRegIndex].PPCReg * 4); - emit->LDR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4); - - regs[ArmCRegs[lastRegIndex].PPCReg].Flush(); - - ArmCRegs[lastRegIndex].PPCReg = preg; - ArmCRegs[lastRegIndex].LastLoad = 0; - - regs[preg].LoadToReg(lastRegIndex); - - return ArmCRegs[lastRegIndex].Reg; -} - -void ArmRegCache::BindToRegister(u32 preg, bool doLoad) -{ - BindToRegister(preg, doLoad, false); -} - -ARMReg ArmRegCache::BindToRegister(u32 preg, bool doLoad, bool kill_imm) -{ - u32 lastRegIndex = GetLeastUsedRegister(false); - u32 freeRegIndex; - bool found_free = FindFreeRegister(freeRegIndex); - if (regs[preg].GetType() == REG_IMM) - { - if (!kill_imm) - return INVALID_REG; - if (found_free) - { - if (doLoad) - emit->MOVI2R(ArmCRegs[freeRegIndex].Reg, regs[preg].GetImm()); - ArmCRegs[freeRegIndex].PPCReg = preg; - ArmCRegs[freeRegIndex].LastLoad = 0; - regs[preg].LoadToReg(freeRegIndex); - return ArmCRegs[freeRegIndex].Reg; - } - else - { - emit->STR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[lastRegIndex].PPCReg * 4); - if (doLoad) - emit->MOVI2R(ArmCRegs[lastRegIndex].Reg, regs[preg].GetImm()); - - regs[ArmCRegs[lastRegIndex].PPCReg].Flush(); - - ArmCRegs[lastRegIndex].PPCReg = preg; - ArmCRegs[lastRegIndex].LastLoad = 0; - - regs[preg].LoadToReg(lastRegIndex); - return ArmCRegs[lastRegIndex].Reg; - } - } - else if (regs[preg].GetType() == REG_NOTLOADED) - { - if (found_free) - { - if (doLoad) - emit->LDR(ArmCRegs[freeRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4); - - ArmCRegs[freeRegIndex].PPCReg = preg; - ArmCRegs[freeRegIndex].LastLoad = 0; - regs[preg].LoadToReg(freeRegIndex); - return ArmCRegs[freeRegIndex].Reg; - } - else - { - emit->STR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + ArmCRegs[lastRegIndex].PPCReg * 4); - - if (doLoad) - emit->LDR(ArmCRegs[lastRegIndex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4); - - regs[ArmCRegs[lastRegIndex].PPCReg].Flush(); - - ArmCRegs[lastRegIndex].PPCReg = preg; - ArmCRegs[lastRegIndex].LastLoad = 0; - - regs[preg].LoadToReg(lastRegIndex); - return ArmCRegs[lastRegIndex].Reg; - } - } - else - { - u8 a = regs[preg].GetRegIndex(); - ArmCRegs[a].LastLoad = 0; - return ArmCRegs[a].Reg; - } -} - -void ArmRegCache::SetImmediate(u32 preg, u32 imm) -{ - if (regs[preg].GetType() == REG_REG) - { - // Dump real reg at this point - u32 regindex = regs[preg].GetRegIndex(); - ArmCRegs[regindex].PPCReg = 33; - ArmCRegs[regindex].LastLoad = 0; - } - regs[preg].LoadToImm(imm); -} - -void ArmRegCache::Flush(FlushMode mode) -{ - for (u8 a = 0; a < 32; ++a) - { - if (regs[a].GetType() == REG_IMM) - { - if (mode == FLUSH_ALL) - { - // This changes the type over to a REG_REG and gets caught below. - BindToRegister(a, true, true); - } - else - { - ARMReg tmp = GetReg(); - emit->MOVI2R(tmp, regs[a].GetImm()); - emit->STR(tmp, R9, PPCSTATE_OFF(gpr) + a * 4); - Unlock(tmp); - } - } - if (regs[a].GetType() == REG_REG) - { - u32 regindex = regs[a].GetRegIndex(); - emit->STR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + a * 4); - if (mode == FLUSH_ALL) - { - ArmCRegs[regindex].PPCReg = 33; - ArmCRegs[regindex].LastLoad = 0; - regs[a].Flush(); - } - } - } -} - -void ArmRegCache::StoreFromRegister(u32 preg) -{ - if (regs[preg].GetType() == REG_IMM) - { - // This changes the type over to a REG_REG and gets caught below. - BindToRegister(preg, true, true); - } - if (regs[preg].GetType() == REG_REG) - { - u32 regindex = regs[preg].GetRegIndex(); - emit->STR(ArmCRegs[regindex].Reg, R9, PPCSTATE_OFF(gpr) + preg * 4); - - ArmCRegs[regindex].PPCReg = 33; - ArmCRegs[regindex].LastLoad = 0; - regs[preg].Flush(); - } -} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitRegCache.h b/Source/Core/Core/PowerPC/JitArm32/JitRegCache.h deleted file mode 100644 index 75056d2fde..0000000000 --- a/Source/Core/Core/PowerPC/JitArm32/JitRegCache.h +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2013 Dolphin Emulator Project -// Licensed under GPLv2+ -// Refer to the license.txt file included. - -#pragma once - -#include "Common/ArmEmitter.h" -#include "Core/PowerPC/Gekko.h" -#include "Core/PowerPC/PPCAnalyst.h" - -// This ARM Register cache actually pre loads the most used registers before -// the block to increase speed since every memory load requires two -// instructions to load it. We are going to use R0-RMAX as registers for the -// use of PPC Registers. -// Allocation order as follows -#define ARMREGS 16 -// Allocate R0 to R9 for PPC first. -// For General registers on the host side, start with R14 and go down as we go -// R13 is reserved for our stack pointer, don't ever use that. Unless you save -// it -// So we have R14, R12, R11, R10 to work with instructions - -enum RegType -{ - REG_NOTLOADED = 0, - REG_REG, // Reg type is register - REG_IMM, // Reg is really a IMM - REG_AWAY, // Bound to a register, but not preloaded -}; - -enum FlushMode -{ - FLUSH_ALL = 0, - FLUSH_MAINTAIN_STATE, -}; - -class OpArg -{ - private: - RegType m_type; // store type - u8 m_reg; // index to register - u32 m_value; // IMM value - - public: - OpArg() - { - m_type = REG_NOTLOADED; - m_reg = 33; - m_value = 0; - } - - RegType GetType() - { - return m_type; - } - - u8 GetRegIndex() - { - return m_reg; - } - u32 GetImm() - { - return m_value; - } - void LoadToAway(u8 reg) - { - m_type = REG_AWAY; - m_reg = reg; - } - void LoadToReg(u8 reg) - { - m_type = REG_REG; - m_reg = reg; - } - void LoadToImm(u32 imm) - { - m_type = REG_IMM; - m_value = imm; - } - void Flush() - { - m_type = REG_NOTLOADED; - } -}; - -struct JRCPPC -{ - u32 PPCReg; // Tied to which PPC Register - bool PS1; - ArmGen::ARMReg Reg; // Tied to which ARM Register - u32 LastLoad; -}; -struct JRCReg -{ - ArmGen::ARMReg Reg; // Which reg this is. - bool free; -}; -class ArmRegCache -{ -private: - OpArg regs[32]; - JRCPPC ArmCRegs[ARMREGS]; - JRCReg ArmRegs[ARMREGS]; // Four registers remaining - - int NUMPPCREG; - int NUMARMREG; - - ArmGen::ARMReg *GetAllocationOrder(int &count); - ArmGen::ARMReg *GetPPCAllocationOrder(int &count); - - u32 GetLeastUsedRegister(bool increment); - bool FindFreeRegister(u32 ®index); - - // Private function can kill immediates - ArmGen::ARMReg BindToRegister(u32 preg, bool doLoad, bool kill_imm); - -protected: - ArmGen::ARMXEmitter *emit; - -public: - ArmRegCache(); - ~ArmRegCache() {} - - void Init(ArmGen::ARMXEmitter *emitter); - void Start(PPCAnalyst::BlockRegStats &stats); - - ArmGen::ARMReg GetReg(bool AutoLock = true); // Return a ARM register we can use. - void Unlock(ArmGen::ARMReg R0, ArmGen::ARMReg R1 = ArmGen::INVALID_REG, ArmGen::ARMReg R2 = ArmGen::INVALID_REG, ArmGen::ARMReg R3 = ArmGen::INVALID_REG); - void Flush(FlushMode mode = FLUSH_ALL); - ArmGen::ARMReg R(u32 preg); // Returns a cached register - bool IsImm(u32 preg) { return regs[preg].GetType() == REG_IMM; } - u32 GetImm(u32 preg) { return regs[preg].GetImm(); } - void SetImmediate(u32 preg, u32 imm); - - // Public function doesn't kill immediates - // In reality when you call R(u32) it'll bind an immediate there - void BindToRegister(u32 preg, bool doLoad = true); - - void StoreFromRegister(u32 preg); -}; diff --git a/Source/Core/Core/PowerPC/JitInterface.cpp b/Source/Core/Core/PowerPC/JitInterface.cpp index f76011ec8a..e1d5415ee5 100644 --- a/Source/Core/Core/PowerPC/JitInterface.cpp +++ b/Source/Core/Core/PowerPC/JitInterface.cpp @@ -27,11 +27,6 @@ #include "Core/PowerPC/Jit64IL/JitIL_Tables.h" #endif -#if _M_ARM_32 -#include "Core/PowerPC/JitArm32/Jit.h" -#include "Core/PowerPC/JitArm32/JitArm_Tables.h" -#endif - #if _M_ARM_64 #include "Core/PowerPC/JitArm64/Jit.h" #include "Core/PowerPC/JitArm64/JitArm64_Tables.h" @@ -63,11 +58,6 @@ namespace JitInterface ptr = new JitIL(); break; #endif - #if _M_ARM_32 - case PowerPC::CORE_JITARM: - ptr = new JitArm(); - break; - #endif #if _M_ARM_64 case PowerPC::CORE_JITARM64: ptr = new JitArm64(); @@ -94,11 +84,6 @@ namespace JitInterface JitILTables::InitTables(); break; #endif - #if _M_ARM_32 - case PowerPC::CORE_JITARM: - JitArmTables::InitTables(); - break; - #endif #if _M_ARM_64 case PowerPC::CORE_JITARM64: JitArm64Tables::InitTables(); diff --git a/Source/Core/DolphinWX/Config/GeneralConfigPane.cpp b/Source/Core/DolphinWX/Config/GeneralConfigPane.cpp index 5c4a85fc3e..8e8fd41de9 100644 --- a/Source/Core/DolphinWX/Config/GeneralConfigPane.cpp +++ b/Source/Core/DolphinWX/Config/GeneralConfigPane.cpp @@ -26,8 +26,6 @@ GeneralConfigPane::GeneralConfigPane(wxWindow* parent, wxWindowID id) #ifdef _M_X86_64 { 1, _("JIT Recompiler (recommended)") }, { 2, _("JITIL Recompiler (slower, experimental)") }, -#elif defined(_M_ARM_32) - { 3, _("Arm JIT (experimental)") }, #elif defined(_M_ARM_64) { 4, _("Arm64 JIT (experimental)") }, #endif diff --git a/Source/Core/DolphinWX/Debugger/JitWindow.cpp b/Source/Core/DolphinWX/Debugger/JitWindow.cpp index bf13cd7f08..a86afee4f9 100644 --- a/Source/Core/DolphinWX/Debugger/JitWindow.cpp +++ b/Source/Core/DolphinWX/Debugger/JitWindow.cpp @@ -50,8 +50,6 @@ CJitWindow::CJitWindow(wxWindow* parent, wxWindowID id, const wxPoint& pos, m_disassembler.reset(GetNewDisassembler("x86")); #elif defined(_M_ARM_64) m_disassembler.reset(GetNewDisassembler("aarch64")); -#elif defined(_M_ARM_32) - m_disassembler.reset(GetNewDisassembler("armv7")); #else m_disassembler.reset(GetNewDisassembler("UNK")); #endif diff --git a/Source/UnitTests/Core/PageFaultTest.cpp b/Source/UnitTests/Core/PageFaultTest.cpp index d2f97fe119..002b5ec866 100644 --- a/Source/UnitTests/Core/PageFaultTest.cpp +++ b/Source/UnitTests/Core/PageFaultTest.cpp @@ -12,7 +12,7 @@ // include order is important #include // NOLINT -#if _M_X86_64 || _M_ARM_32 +#if _M_X86_64 enum { #ifdef _WIN32