From 2f6b46fd4f4eb593746391131e2523f5252d0ea4 Mon Sep 17 00:00:00 2001 From: RSDuck Date: Tue, 25 Jun 2019 17:09:27 +0200 Subject: [PATCH] JIT: implemented most ALU instructions --- src/ARM.cpp | 18 +- src/ARMJIT.cpp | 16 +- src/ARMJIT.h | 25 +- src/ARMJIT_RegCache.h | 136 +++++++ src/ARMJIT_x64/ARMJIT_ALU.cpp | 546 +++++++++++++++++++++++++++++ src/ARMJIT_x64/ARMJIT_Compiler.cpp | 245 ++++++------- src/ARMJIT_x64/ARMJIT_Compiler.h | 60 +++- src/CMakeLists.txt | 1 + 8 files changed, 881 insertions(+), 166 deletions(-) create mode 100644 src/ARMJIT_RegCache.h create mode 100644 src/ARMJIT_x64/ARMJIT_ALU.cpp diff --git a/src/ARM.cpp b/src/ARM.cpp index f2b92b4a..eadedc7d 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -560,10 +560,10 @@ void ARMv5::Execute() AddCycles_C(); }*/ - if (!ARMJIT::IsMapped(Num, R[15] - ((CPSR&0x20)?2:4))) - printf("aaarg ungempappter raum %x\n", R[15]); + /*if (!ARMJIT::IsMapped(0, R[15] - ((CPSR&0x20)?2:4))) + printf("aaarg ungempappter raum %x\n", R[15]);*/ - ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(Num, R[15] - ((CPSR&0x20)?2:4)); + ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(0, R[15] - ((CPSR&0x20)?2:4)); if (block == NULL) block = ARMJIT::CompileBlock(this); Cycles += block(); @@ -615,7 +615,7 @@ void ARMv4::Execute() while (NDS::ARM7Timestamp < NDS::ARM7Target) { - if (CPSR & 0x20) // THUMB + /*if (CPSR & 0x20) // THUMB { // prefetch R[15] += 2; @@ -643,7 +643,15 @@ void ARMv4::Execute() } else AddCycles_C(); - } + }*/ + + /*if (!ARMJIT::IsMapped(1, R[15] - ((CPSR&0x20)?2:4))) + printf("aaarg ungempappter raum %x\n", R[15]);*/ + + ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(1, R[15] - ((CPSR&0x20)?2:4)); + if (block == NULL) + block = ARMJIT::CompileBlock(this); + Cycles += block(); // TODO optimize this shit!!! if (Halted) diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index 489cdcfc..74e154b8 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -1,5 +1,7 @@ #include "ARMJIT.h" +#include + #include "ARMJIT_x64/ARMJIT_Compiler.h" namespace ARMJIT @@ -8,7 +10,6 @@ namespace ARMJIT Compiler* compiler; BlockCache cache; - #define DUP2(x) x, x static ptrdiff_t JIT_MEM[2][32] = { @@ -174,4 +175,17 @@ CompiledBlock CompileBlock(ARM* cpu) return block; } +void ResetBlocks() +{ + memset(cache.MainRAM, 0, sizeof(cache.MainRAM)); + memset(cache.SWRAM, 0, sizeof(cache.SWRAM)); + memset(cache.ARM9_BIOS, 0, sizeof(cache.ARM9_BIOS)); + memset(cache.ARM9_ITCM, 0, sizeof(cache.ARM9_ITCM)); + memset(cache.ARM9_LCDC, 0, sizeof(cache.ARM9_LCDC)); + memset(cache.ARM7_BIOS, 0, sizeof(cache.ARM7_BIOS)); + memset(cache.ARM7_WIRAM, 0, sizeof(cache.ARM7_WIRAM)); + memset(cache.ARM7_WRAM, 0, sizeof(cache.ARM7_WRAM)); + memset(cache.ARM7_WVRAM, 0, sizeof(cache.ARM7_WVRAM)); +} + } \ No newline at end of file diff --git a/src/ARMJIT.h b/src/ARMJIT.h index d718295b..2ca29e8e 100644 --- a/src/ARMJIT.h +++ b/src/ARMJIT.h @@ -3,8 +3,6 @@ #include "types.h" -#include - #include "ARM.h" #include "ARM_InstrInfo.h" @@ -13,14 +11,6 @@ namespace ARMJIT typedef u32 (*CompiledBlock)(); -class RegCache -{ - -static const int NativeRegAllocOrder[]; -static const int NativeRegsCount; - -}; - struct FetchedInstr { u32 A_Reg(int pos) const @@ -117,24 +107,13 @@ inline void InsertBlock(u32 num, u32 addr, CompiledBlock func) cache.AddrMapping[num][(addr & 0xFFFFFFF) >> 14][(addr & 0x3FFF) >> 1] = func; } -inline void ResetBlocks() -{ - memset(cache.MainRAM, 0, sizeof(cache.MainRAM)); - memset(cache.SWRAM, 0, sizeof(cache.SWRAM)); - memset(cache.ARM9_BIOS, 0, sizeof(cache.ARM9_BIOS)); - memset(cache.ARM9_ITCM, 0, sizeof(cache.ARM9_ITCM)); - memset(cache.ARM9_LCDC, 0, sizeof(cache.ARM9_LCDC)); - memset(cache.ARM7_BIOS, 0, sizeof(cache.ARM7_BIOS)); - memset(cache.ARM7_WIRAM, 0, sizeof(cache.ARM7_WIRAM)); - memset(cache.ARM7_WRAM, 0, sizeof(cache.ARM7_WRAM)); - memset(cache.ARM7_WVRAM, 0, sizeof(cache.ARM7_WVRAM)); -} - void Init(); void DeInit(); CompiledBlock CompileBlock(ARM* cpu); +void ResetBlocks(); + } #endif \ No newline at end of file diff --git a/src/ARMJIT_RegCache.h b/src/ARMJIT_RegCache.h new file mode 100644 index 00000000..e18d50f4 --- /dev/null +++ b/src/ARMJIT_RegCache.h @@ -0,0 +1,136 @@ +#ifndef ARMJIT_REGCACHE_H +#define ARMJIT_REGCACHE_H + +#include "ARMJIT.h" + +// TODO: replace this in the future +#include "dolphin/BitSet.h" + +#include + +namespace ARMJIT +{ + +template +class RegCache +{ +public: + RegCache() + {} + + RegCache(T* compiler, FetchedInstr instrs[], int instrsCount) + : Compiler(compiler), Instrs(instrs), InstrsCount(instrsCount) + { + for (int i = 0; i < 16; i++) + Mapping[i] = (Reg)-1; + } + + void UnloadRegister(int reg) + { + assert(Mapping[reg] != -1); + + if (DirtyRegs & (1 << reg)) + Compiler->UnloadReg(reg, Mapping[reg]); + + DirtyRegs &= ~(1 << reg); + LoadedRegs &= ~(1 << reg); + NativeRegsUsed &= ~(1 << (int)Mapping[reg]); + Mapping[reg] = (Reg)-1; + } + + void LoadRegister(int reg) + { + assert(Mapping[reg] == -1); + for (int i = 0; i < NativeRegsAvailable; i++) + { + Reg nativeReg = NativeRegAllocOrder[i]; + if (!(NativeRegsUsed & (1 << nativeReg))) + { + Mapping[reg] = nativeReg; + NativeRegsUsed |= 1 << (int)nativeReg; + LoadedRegs |= 1 << reg; + + Compiler->LoadReg(reg, nativeReg); + + return; + } + } + + assert("Welp!"); + } + + void Flush() + { + BitSet16 loadedSet(LoadedRegs); + for (int reg : loadedSet) + UnloadRegister(reg); + } + + void Prepare(int i) + { + u16 futureNeeded = 0; + int ranking[16]; + for (int j = 0; j < 16; j++) + ranking[j] = 0; + for (int j = i; j < InstrsCount; j++) + { + BitSet16 regsNeeded((Instrs[j].Info.SrcRegs & ~(1 << 15)) | Instrs[j].Info.DstRegs); + futureNeeded |= regsNeeded.m_val; + for (int reg : regsNeeded) + ranking[reg]++; + } + + // we'll unload all registers which are never used again + BitSet16 neverNeededAgain(LoadedRegs & ~futureNeeded); + for (int reg : neverNeededAgain) + UnloadRegister(reg); + + FetchedInstr Instr = Instrs[i]; + u16 necessaryRegs = (Instr.Info.SrcRegs & ~(1 << 15)) | Instr.Info.DstRegs; + BitSet16 needToBeLoaded(necessaryRegs & ~LoadedRegs); + if (needToBeLoaded != BitSet16(0)) + { + int neededCount = needToBeLoaded.Count(); + BitSet16 loadedSet(LoadedRegs); + while (loadedSet.Count() + neededCount > NativeRegsAvailable) + { + int leastReg = -1; + int rank = 1000; + for (int reg : loadedSet) + { + if (!((1 << reg) & necessaryRegs) && ranking[reg] < rank) + { + leastReg = reg; + rank = ranking[reg]; + } + } + + assert(leastReg != -1); + UnloadRegister(leastReg); + + loadedSet.m_val = LoadedRegs; + } + + for (int reg : needToBeLoaded) + LoadRegister(reg); + } + DirtyRegs |= Instr.Info.DstRegs; + } + + static const Reg NativeRegAllocOrder[]; + static const int NativeRegsAvailable; + + Reg Mapping[16]; + u32 NativeRegsUsed = 0; + u16 LoadedRegs = 0; + u16 DirtyRegs = 0; + + T* Compiler; + + FetchedInstr* Instrs; + int InstrsCount; +}; + +} + +#endif \ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_ALU.cpp b/src/ARMJIT_x64/ARMJIT_ALU.cpp new file mode 100644 index 00000000..d06c99c4 --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_ALU.cpp @@ -0,0 +1,546 @@ +#include "ARMJIT_Compiler.h" + +using namespace Gen; + +namespace ARMJIT +{ + +// uses RSCRATCH3 +void Compiler::Comp_ArithTriOp(void (Compiler::*op)(int, const OpArg&, const OpArg&), + OpArg rd, OpArg rn, OpArg op2, bool carryUsed, int opFlags) +{ + if (opFlags & opSyncCarry) + { + BT(32, R(RCPSR), Imm8(29)); + if (opFlags & opInvertCarry) + CMC(); + } + + if (rd == rn && !(opFlags & opInvertOp2)) + (this->*op)(32, rd, op2); + else if (opFlags & opSymmetric && op2 == R(RSCRATCH)) + { + if (opFlags & opInvertOp2) + NOT(32, op2); + (this->*op)(32, op2, rn); + MOV(32, rd, op2); + } + else + { + if (opFlags & opInvertOp2) + { + if (op2 != R(RSCRATCH)) + { + MOV(32, R(RSCRATCH), op2); + op2 = R(RSCRATCH); + } + NOT(32, op2); + } + MOV(32, R(RSCRATCH3), rn); + (this->*op)(32, R(RSCRATCH3), op2); + MOV(32, rd, R(RSCRATCH3)); + } + + if (opFlags & opSetsFlags) + Comp_RetriveFlags(opFlags & opInvertCarry, opFlags & opRetriveCV, carryUsed); +} + +void Compiler::Comp_ArithTriOpReverse(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&), + Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags) +{ + if (opFlags & opSyncCarry) + { + BT(32, R(RCPSR), Imm8(29)); + if (opFlags & opInvertCarry) + CMC(); + } + + if (op2 != R(RSCRATCH)) + { + MOV(32, R(RSCRATCH), op2); + op2 = R(RSCRATCH); + } + (this->*op)(32, op2, rn); + MOV(32, rd, op2); + + if (opFlags & opSetsFlags) + Comp_RetriveFlags(opFlags & opInvertCarry, opFlags & opRetriveCV, carryUsed); +} + +void Compiler::Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed) +{ + switch (op) + { + case 0: // TST + if (rn.IsImm()) + { + MOV(32, R(RSCRATCH3), rn); + rn = R(RSCRATCH3); + } + TEST(32, rn, op2); + break; + case 1: // TEQ + MOV(32, R(RSCRATCH3), rn); + XOR(32, R(RSCRATCH3), op2); + break; + case 2: // CMP + if (rn.IsImm()) + { + MOV(32, R(RSCRATCH3), rn); + rn = R(RSCRATCH3); + } + CMP(32, rn, op2); + break; + case 3: // CMN + MOV(32, R(RSCRATCH3), rn); + ADD(32, R(RSCRATCH3), op2); + break; + } + + Comp_RetriveFlags(op == 2, op >= 2, carryUsed); +} + +// also calculates cycles +OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed) +{ + if (CurrentInstr.Instr & (1 << 25)) + { + Comp_AddCycles_C(); + carryUsed = false; + return Imm32(ROR(CurrentInstr.Instr & 0xFF, (CurrentInstr.Instr >> 7) & 0x1E)); + } + else + { + int op = (CurrentInstr.Instr >> 5) & 0x3; + if (CurrentInstr.Instr & (1 << 4)) + { + Comp_AddCycles_CI(1); + OpArg rm = MapReg(CurrentInstr.A_Reg(0)); + if (rm.IsImm() && CurrentInstr.A_Reg(0) == 15) + rm = Imm32(rm.Imm32() + 4); + return Comp_RegShiftReg(op, MapReg(CurrentInstr.A_Reg(8)), rm, S, carryUsed); + } + else + { + Comp_AddCycles_C(); + return Comp_RegShiftImm(op, (CurrentInstr.Instr >> 7) & 0x1F, + MapReg(CurrentInstr.A_Reg(0)), S, carryUsed); + } + } +} + +void Compiler::A_Comp_CmpOp() +{ + u32 op = (CurrentInstr.Instr >> 21) & 0xF; + + bool carryUsed; + OpArg rn = MapReg(CurrentInstr.A_Reg(16)); + OpArg op2 = A_Comp_GetALUOp2((1 << op) & 0xF303, carryUsed); + + Comp_CmpOp(op - 0x8, rn, op2, carryUsed); +} + +void Compiler::A_Comp_Arith() +{ + bool S = CurrentInstr.Instr & (1 << 20); + u32 op = (CurrentInstr.Instr >> 21) & 0xF; + + bool carryUsed; + OpArg rn = MapReg(CurrentInstr.A_Reg(16)); + OpArg rd = MapReg(CurrentInstr.A_Reg(12)); + OpArg op2 = A_Comp_GetALUOp2(S && (1 << op) & 0xF303, carryUsed); + + u32 sFlag = S ? opSetsFlags : 0; + switch (op) + { + case 0x0: // AND + Comp_ArithTriOp(AND, rd, rn, op2, carryUsed, opSymmetric|sFlag); + return; + case 0x1: // EOR + Comp_ArithTriOp(XOR, rd, rn, op2, carryUsed, opSymmetric|sFlag); + return; + case 0x2: // SUB + Comp_ArithTriOp(SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry); + return; + case 0x3: // RSB + if (op2.IsZero()) + { + if (rd != rn) + MOV(32, rd, rn); + NEG(32, rd); + if (S) + Comp_RetriveFlags(true, true, false); + } + else + Comp_ArithTriOpReverse(SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry); + return; + case 0x4: // ADD + Comp_ArithTriOp(ADD, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV); + return; + case 0x5: // ADC + Comp_ArithTriOp(ADC, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry); + return; + case 0x6: // SBC + Comp_ArithTriOp(SBB, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry|opInvertCarry); + return; + case 0x7: // RSC + Comp_ArithTriOpReverse(SBB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry|opSyncCarry); + return; + case 0xC: // ORR + Comp_ArithTriOp(OR, rd, rn, op2, carryUsed, opSymmetric|sFlag); + return; + case 0xE: // BIC + Comp_ArithTriOp(AND, rd, rn, op2, carryUsed, sFlag|opSymmetric|opInvertOp2); + return; + default: + assert("unimplemented"); + } +} + +void Compiler::A_Comp_MovOp() +{ + bool carryUsed; + bool S = CurrentInstr.Instr & (1 << 20); + OpArg op2 = A_Comp_GetALUOp2(S, carryUsed); + OpArg rd = MapReg(CurrentInstr.A_Reg(12)); + + if (rd != op2) + MOV(32, rd, op2); + + if (((CurrentInstr.Instr >> 21) & 0xF) == 0xF) + NOT(32, rd); + + if (S) + { + TEST(32, rd, rd); + Comp_RetriveFlags(false, false, carryUsed); + } +} + +void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed) +{ + CPSRDirty = true; + + bool carryOnly = !retriveCV && carryUsed; + if (retriveCV) + { + SETcc(CC_O, R(RSCRATCH)); + SETcc(sign ? CC_NC : CC_C, R(RSCRATCH3)); + LEA(32, RSCRATCH2, MComplex(RSCRATCH, RSCRATCH3, SCALE_2, 0)); + } + + if (carryUsed == 983298) + printf("etwas ist faul im lande daenemark %x\n", CurrentInstr.Instr); + + SETcc(CC_S, R(RSCRATCH)); + SETcc(CC_Z, R(RSCRATCH3)); + LEA(32, RSCRATCH, MComplex(RSCRATCH3, RSCRATCH, SCALE_2, 0)); + int shiftAmount = 30; + if (retriveCV || carryUsed) + { + LEA(32, RSCRATCH, MComplex(RSCRATCH2, RSCRATCH, carryOnly ? SCALE_2 : SCALE_4, 0)); + shiftAmount = carryOnly ? 29 : 28; + } + SHL(32, R(RSCRATCH), Imm8(shiftAmount)); + + AND(32, R(RCPSR), Imm32(0x3FFFFFFF & ~(carryUsed << 29) & ~((retriveCV ? 3 : 0) << 28))); + OR(32, R(RCPSR), R(RSCRATCH)); +} + +// always uses RSCRATCH, RSCRATCH2 only if S == true +OpArg Compiler::Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed) +{ + carryUsed = S; + + if (S) + { + XOR(32, R(RSCRATCH2), R(RSCRATCH2)); + BT(32, R(RCPSR), Imm8(29)); + SETcc(CC_C, R(RSCRATCH2)); + } + + MOV(32, R(RSCRATCH), rm); + static_assert(RSCRATCH3 == ECX); + MOV(32, R(ECX), rs); + AND(32, R(ECX), Imm32(0xFF)); + + FixupBranch zero = J_CC(CC_Z); + if (op < 3) + { + void (Compiler::*shiftOp)(int, const OpArg&, const OpArg&) = NULL; + if (op == 0) + shiftOp = SHL; + else if (op == 1) + shiftOp = SHR; + else if (op == 2) + shiftOp = SAR; + + CMP(32, R(ECX), Imm8(32)); + FixupBranch lt32 = J_CC(CC_L); + FixupBranch done1; + if (op < 2) + { + FixupBranch eq32 = J_CC(CC_E); + XOR(32, R(RSCRATCH), R(RSCRATCH)); + if (S) + XOR(32, R(RSCRATCH2), R(RSCRATCH2)); + done1 = J(); + SetJumpTarget(eq32); + } + (this->*shiftOp)(32, R(RSCRATCH), Imm8(31)); + (this->*shiftOp)(32, R(RSCRATCH), Imm8(1)); + if (S) + SETcc(CC_C, R(RSCRATCH2)); + + FixupBranch done2 = J(); + + SetJumpTarget(lt32); + (this->*shiftOp)(32, R(RSCRATCH), R(ECX)); + if (S) + SETcc(CC_C, R(RSCRATCH2)); + + if (op < 2) + SetJumpTarget(done1); + SetJumpTarget(done2); + + } + else if (op == 3) + { + if (S) + BT(32, R(RSCRATCH), Imm8(31)); + ROR_(32, R(RSCRATCH), R(ECX)); + if (S) + SETcc(CC_C, R(RSCRATCH2)); + } + SetJumpTarget(zero); + + return R(RSCRATCH); +} + +// may uses RSCRATCH for op2 and RSCRATCH2 for the carryValue +OpArg Compiler::Comp_RegShiftImm(int op, int amount, OpArg rm, bool S, bool& carryUsed) +{ + carryUsed = true; + + switch (op) + { + case 0: // LSL + if (amount > 0) + { + MOV(32, R(RSCRATCH), rm); + SHL(32, R(RSCRATCH), Imm8(amount)); + if (S) + SETcc(CC_C, R(RSCRATCH2)); + + return R(RSCRATCH); + } + else + { + carryUsed = false; + return rm; + } + case 1: // LSR + if (amount > 0) + { + MOV(32, R(RSCRATCH), rm); + SHR(32, R(RSCRATCH), Imm8(amount)); + if (S) + SETcc(CC_C, R(RSCRATCH2)); + return R(RSCRATCH); + } + else + { + if (S) + { + MOV(32, R(RSCRATCH2), rm); + SHR(32, R(RSCRATCH2), Imm8(31)); + } + return Imm32(0); + } + case 2: // ASR + MOV(32, R(RSCRATCH), rm); + SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31)); + if (S) + { + if (amount == 0) + BT(32, rm, Imm8(31)); + SETcc(CC_C, R(RSCRATCH2)); + } + return R(RSCRATCH); + case 3: // ROR + MOV(32, R(RSCRATCH), rm); + if (amount > 0) + ROR_(32, R(RSCRATCH), Imm8(amount)); + else + { + BT(32, R(RCPSR), Imm8(29)); + RCR(32, R(RSCRATCH), Imm8(1)); + } + if (S) + SETcc(CC_C, R(RSCRATCH2)); + return R(RSCRATCH); + } + + assert(false); +} + +void Compiler::T_Comp_ShiftImm() +{ + OpArg rd = MapReg(CurrentInstr.T_Reg(0)); + OpArg rs = MapReg(CurrentInstr.T_Reg(3)); + + int op = (CurrentInstr.Instr >> 11) & 0x3; + int amount = (CurrentInstr.Instr >> 6) & 0x1F; + + Comp_AddCycles_C(); + + bool carryUsed; + OpArg shifted = Comp_RegShiftImm(op, amount, rs, true, carryUsed); + + if (shifted != rd) + MOV(32, rd, shifted); + + TEST(32, rd, rd); + Comp_RetriveFlags(false, false, carryUsed); +} + +void Compiler::T_Comp_AddSub_() +{ + OpArg rd = MapReg(CurrentInstr.T_Reg(0)); + OpArg rs = MapReg(CurrentInstr.T_Reg(3)); + + int op = (CurrentInstr.Instr >> 9) & 0x3; + + OpArg rn = op >= 2 ? Imm32((CurrentInstr.Instr >> 6) & 0x7) : MapReg(CurrentInstr.T_Reg(6)); + + Comp_AddCycles_C(); + + if (op & 1) + Comp_ArithTriOp(SUB, rd, rs, rn, false, opSetsFlags|opInvertCarry|opRetriveCV); + else + Comp_ArithTriOp(ADD, rd, rs, rn, false, opSetsFlags|opSymmetric|opRetriveCV); +} + +void Compiler::T_Comp_ALU_Imm8() +{ + OpArg rd = MapReg(CurrentInstr.T_Reg(8)); + + u32 op = (CurrentInstr.Instr >> 11) & 0x3; + OpArg imm = Imm32(CurrentInstr.Instr & 0xFF); + + Comp_AddCycles_C(); + + switch (op) + { + case 0x0: + MOV(32, rd, imm); + TEST(32, rd, rd); + Comp_RetriveFlags(false, false, false); + return; + case 0x1: + Comp_CmpOp(2, rd, imm, false); + return; + case 0x2: + Comp_ArithTriOp(ADD, rd, rd, imm, false, opSetsFlags|opSymmetric|opRetriveCV); + return; + case 0x3: + Comp_ArithTriOp(SUB, rd, rd, imm, false, opSetsFlags|opInvertCarry|opRetriveCV); + return; + } +} + +void Compiler::T_Comp_ALU() +{ + OpArg rd = MapReg(CurrentInstr.T_Reg(0)); + OpArg rs = MapReg(CurrentInstr.T_Reg(3)); + + u32 op = (CurrentInstr.Instr >> 6) & 0xF; + + Comp_AddCycles_C(); + + switch (op) + { + case 0x0: // AND + Comp_ArithTriOp(AND, rd, rd, rs, false, opSetsFlags|opSymmetric); + return; + case 0x1: // EOR + Comp_ArithTriOp(XOR, rd, rd, rs, false, opSetsFlags|opSymmetric); + return; + case 0x2: + case 0x3: + case 0x4: + case 0x7: + { + int shiftOp = op == 7 ? 3 : op - 0x2; + bool carryUsed; + OpArg shifted = Comp_RegShiftReg(shiftOp, rs, rd, true, carryUsed); + TEST(32, shifted, shifted); + MOV(32, rd, shifted); + Comp_RetriveFlags(false, false, true); + } + return; + case 0x5: // ADC + Comp_ArithTriOp(ADC, rd, rd, rs, false, opSetsFlags|opSymmetric|opSyncCarry|opRetriveCV); + return; + case 0x6: // SBC + Comp_ArithTriOp(SBB, rd, rd, rs, false, opSetsFlags|opSyncCarry|opInvertCarry|opRetriveCV); + return; + case 0x8: // TST + Comp_CmpOp(0, rd, rs, false); + return; + case 0x9: // NEG + if (rd != rs) + MOV(32, rd, rs); + NEG(32, rd); + Comp_RetriveFlags(true, true, false); + return; + case 0xA: // CMP + Comp_CmpOp(2, rd, rs, false); + return; + case 0xB: // CMN + Comp_CmpOp(3, rd, rs, false); + return; + case 0xC: // ORR + Comp_ArithTriOp(OR, rd, rd, rs, false, opSetsFlags|opSymmetric); + return; + case 0xE: // BIC + Comp_ArithTriOp(AND, rd, rd, rs, false, opSetsFlags|opSymmetric|opInvertOp2); + return; + case 0xF: // MVN + if (rd != rs) + MOV(32, rd, rs); + NOT(32, rd); + Comp_RetriveFlags(false, false, false); + return; + default: + break; + } +} + +void Compiler::T_Comp_ALU_HiReg() +{ + OpArg rd = MapReg(((CurrentInstr.Instr & 0x7) | ((CurrentInstr.Instr >> 4) & 0x8))); + OpArg rs = MapReg((CurrentInstr.Instr >> 3) & 0xF); + + u32 op = (CurrentInstr.Instr >> 8) & 0x3; + + Comp_AddCycles_C(); + + switch (op) + { + case 0x0: // ADD + Comp_ArithTriOp(ADD, rd, rd, rs, false, opSymmetric|opRetriveCV); + return; + case 0x1: // CMP + Comp_CmpOp(2, rd, rs, false); + return; + case 0x2: // MOV + if (rd != rs) + MOV(32, rd, rs); + TEST(32, rd, rd); + Comp_RetriveFlags(false, false, false); + return; + } +} + +} \ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index fb2fda82..f51d4d99 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -8,18 +8,16 @@ using namespace Gen; namespace ARMJIT { - -const int RegCache::NativeRegAllocOrder[] = {(int)RBX, (int)RSI, (int)RDI, (int)R12, (int)R13}; -const int RegCache::NativeRegsCount = 5; +template <> +const X64Reg RegCache::NativeRegAllocOrder[] = {RBX, RSI, RDI, R12, R13}; +template <> +const int RegCache::NativeRegsAvailable = 5; Compiler::Compiler() { AllocCodeSpace(1024 * 1024 * 4); } -typedef void (Compiler::*CompileFunc)(); -typedef void (*InterpretFunc)(ARM*); - void Compiler::LoadCPSR() { assert(!CPSRDirty); @@ -36,6 +34,19 @@ void Compiler::SaveCPSR() } } +void Compiler::LoadReg(int reg, X64Reg nativeReg) +{ + if (reg != 15) + MOV(32, R(nativeReg), MDisp(RCPU, offsetof(ARM, R[reg]))); + else + MOV(32, R(nativeReg), Imm32(R15)); +} + +void Compiler::UnloadReg(int reg, X64Reg nativeReg) +{ + MOV(32, MDisp(RCPU, offsetof(ARM, R[reg])), R(nativeReg)); +} + CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount) { if (IsAlmostFull()) @@ -58,12 +69,18 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs LoadCPSR(); + // TODO: this is ugly as a whole, do better + RegCache = ARMJIT::RegCache(this, instrs, instrsCount); + for (int i = 0; i < instrsCount; i++) { R15 += Thumb ? 2 : 4; CurrentInstr = instrs[i]; - CompileFunc comp = NULL; + CompileFunc comp = GetCompFunc(CurrentInstr.Info.Kind); + + if (CurrentInstr.Info.Branches()) + comp = NULL; if (comp == NULL || i == instrsCount - 1) { @@ -79,6 +96,11 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs SaveCPSR(); } + if (comp != NULL) + RegCache.Prepare(i); + else + RegCache.Flush(); + if (Thumb) { if (comp == NULL) @@ -89,8 +111,7 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]); } else - { - } + (this->*comp)(); } else { @@ -101,7 +122,7 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs ABI_CallFunction(ARMInterpreter::A_BLX_IMM); } else if (cond == 0xF) - AddCycles_C(); + Comp_AddCycles_C(); else { FixupBranch skipExecute; @@ -115,17 +136,17 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs MOV(32, R(RSCRATCH), Imm32(1)); SHL(32, R(RSCRATCH), R(RSCRATCH3)); TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond])); - + skipExecute = J_CC(CC_Z); } else { // could have used a LUT, but then where would be the fun? BT(32, R(RCPSR), Imm8(28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)))); - + skipExecute = J_CC(cond & 1 ? CC_C : CC_NC); } - + } if (comp == NULL) @@ -136,8 +157,7 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs ABI_CallFunction(ARMInterpreter::ARMInstrTable[icode]); } else - { - } + (this->*comp)(); FixupBranch skipFailed; if (CurrentInstr.Cond() < 0xE) @@ -145,7 +165,7 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs skipFailed = J(); SetJumpTarget(skipExecute); - AddCycles_C(); + Comp_AddCycles_C(); SetJumpTarget(skipFailed); } @@ -155,13 +175,14 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs /* we don't need to collect the interpreted cycles, since all functions only add to it, the dispatcher - can take care of it. + takes care of it. */ if (comp == NULL && i != instrsCount - 1) LoadCPSR(); } + RegCache.Flush(); SaveCPSR(); LEA(32, RAX, MDisp(RCycles, ConstantCycles)); @@ -172,42 +193,57 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs return res; } -void Compiler::Compile(RegCache& regs, const FetchedInstr& instr) +CompileFunc Compiler::GetCompFunc(int kind) { + // this might look like waste of space, so many repeatitions, but it's invaluable for debugging. + // see ARMInstrInfo.h for the order const CompileFunc A_Comp[ARMInstrInfo::ak_Count] = { - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + // AND + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + // EOR + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + // SUB + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + // RSB + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + // ADD + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + // ADC + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + // SBC + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + // RSC + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + // ORR + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + // MOV + A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, + A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, + // BIC + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, A_Comp_Arith, + // MVN + A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, + A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, A_Comp_MovOp, + // TST + A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, + // TEQ + A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, + // CMP + A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, + // CMN + A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -227,21 +263,34 @@ void Compiler::Compile(RegCache& regs, const FetchedInstr& instr) }; const CompileFunc T_Comp[ARMInstrInfo::tk_Count] = { - NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL, + // Shift imm + T_Comp_ShiftImm, T_Comp_ShiftImm, T_Comp_ShiftImm, + // Three operand ADD/SUB + T_Comp_AddSub_, T_Comp_AddSub_, T_Comp_AddSub_, T_Comp_AddSub_, + // 8 bit imm + T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, + // general ALU + T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, + T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, + T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, + T_Comp_ALU, NULL, T_Comp_ALU, T_Comp_ALU, + // hi reg + T_Comp_ALU_HiReg, T_Comp_ALU_HiReg, T_Comp_ALU_HiReg, + // pc/sp relative + NULL, NULL, NULL, + // mem... + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL }; + + return Thumb ? T_Comp[kind] : A_Comp[kind]; } -void Compiler::AddCycles_C() +void Compiler::Comp_AddCycles_C() { s32 cycles = Num ? NDS::ARM7MemTimings[CurrentInstr.CodeCycles][Thumb ? 1 : 3] @@ -253,80 +302,16 @@ void Compiler::AddCycles_C() ConstantCycles += cycles; } -// may uses RSCRATCH for op2 and RSCRATCH2 for the carryValue -OpArg Compiler::Comp_ShiftRegImm(int op, int amount, Gen::X64Reg rm, bool S, bool& carryUsed) -{ - carryUsed = true; - - switch (op) - { - case 0: // LSL - if (amount > 0) - { - MOV(32, R(RSCRATCH), R(rm)); - SHL(32, R(RSCRATCH), Imm8(amount)); - if (S) - SETcc(CC_C, R(RSCRATCH2)); - - return R(RSCRATCH); - } - else - { - carryUsed = false; - return R(rm); - } - case 1: // LSR - if (amount > 0) - { - MOV(32, R(RSCRATCH), R(rm)); - SHR(32, R(RSCRATCH), Imm8(amount)); - if (S) - SETcc(CC_C, R(RSCRATCH2)); - return R(RSCRATCH); - } - else - { - if (S) - { - MOV(32, R(RSCRATCH2), R(rm)); - SHR(32, R(RSCRATCH2), Imm8(31)); - } - return Imm32(0); - } - case 2: // ASR - MOV(32, R(RSCRATCH), R(rm)); - SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31)); - if (S) - { - if (amount == 0) - { - MOV(32, R(RSCRATCH2), R(rm)); - SHR(32, R(RSCRATCH2), Imm8(31)); - } - else - SETcc(CC_C, R(RSCRATCH2)); - } - return R(RSCRATCH); - case 3: // ROR - if (amount > 0) - { - MOV(32, R(RSCRATCH), R(rm)); - ROR_(32, R(RSCRATCH), Imm8(amount)); - } - else - { - BT(32, R(RCPSR), Imm8(29)); - MOV(32, R(RSCRATCH), R(rm)); - RCR(32, R(RSCRATCH), Imm8(1)); - } - if (S) - SETcc(CC_C, R(RSCRATCH2)); - return R(RSCRATCH); - } -} - -void Compiler::A_Comp_ALU(const FetchedInstr& instr) +void Compiler::Comp_AddCycles_CI(u32 i) { + s32 cycles = (Num ? + NDS::ARM7MemTimings[CurrentInstr.CodeCycles][Thumb ? 0 : 2] + : ((R15 & 0x2) ? 0 : CurrentInstr.CodeCycles)) + i; + + if (CurrentInstr.Cond() < 0xE) + ADD(32, R(RCycles), Imm8(cycles)); + else + ConstantCycles += cycles; } } \ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h index 8e1d100e..9b454f43 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.h +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -4,7 +4,7 @@ #include "../dolphin/x64Emitter.h" #include "../ARMJIT.h" - +#include "../ARMJIT_RegCache.h" namespace ARMJIT { @@ -17,6 +17,10 @@ const Gen::X64Reg RSCRATCH = Gen::EAX; const Gen::X64Reg RSCRATCH2 = Gen::EDX; const Gen::X64Reg RSCRATCH3 = Gen::ECX; +class Compiler; + +typedef void (Compiler::*CompileFunc)(); + class Compiler : public Gen::X64CodeBlock { public: @@ -24,24 +28,66 @@ public: CompiledBlock CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount); - void StartBlock(ARM* cpu); - CompiledBlock FinaliseBlock(); + void LoadReg(int reg, Gen::X64Reg nativeReg); + void UnloadReg(int reg, Gen::X64Reg nativeReg); - void Compile(RegCache& regs, const FetchedInstr& instr); private: - void AddCycles_C(); + CompileFunc GetCompFunc(int kind); - Gen::OpArg Comp_ShiftRegImm(int op, int amount, Gen::X64Reg rm, bool S, bool& carryUsed); + void Comp_AddCycles_C(); + void Comp_AddCycles_CI(u32 i); - void A_Comp_ALU(const FetchedInstr& instr); + enum + { + opSetsFlags = 1 << 0, + opSymmetric = 1 << 1, + opRetriveCV = 1 << 2, + opInvertCarry = 1 << 3, + opSyncCarry = 1 << 4, + opInvertOp2 = 1 << 5, + }; + + void A_Comp_Arith(); + void A_Comp_MovOp(); + void A_Comp_CmpOp(); + + void T_Comp_ShiftImm(); + void T_Comp_AddSub_(); + void T_Comp_ALU_Imm8(); + void T_Comp_ALU(); + void T_Comp_ALU_HiReg(); + + void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&), + Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags); + void Comp_ArithTriOpReverse(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&), + Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags); + void Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed); + + void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed); + + Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed); + Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed); + + Gen::OpArg A_Comp_GetALUOp2(bool S, bool& carryUsed); void LoadCPSR(); void SaveCPSR(); + Gen::OpArg MapReg(int reg) + { + if (reg == 15 && RegCache.Mapping[reg] == Gen::INVALID_REG) + return Gen::Imm32(R15); + + assert(RegCache.Mapping[reg] != Gen::INVALID_REG); + return Gen::R(RegCache.Mapping[reg]); + } + bool CPSRDirty = false; FetchedInstr CurrentInstr; + RegCache RegCache; + bool Thumb; u32 Num; u32 R15; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a6011e1b..0faa57a4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -52,6 +52,7 @@ add_library(core STATIC ARMJIT.cpp ARMJIT_x64/ARMJIT_Compiler.cpp + ARMJIT_x64/ARMJIT_ALU.cpp dolphin/CommonFuncs.cpp dolphin/x64ABI.cpp