From 2c44bf927c230efbbd1b27920de062ddcc631fcf Mon Sep 17 00:00:00 2001 From: RSDuck Date: Sat, 6 Jul 2019 01:48:42 +0200 Subject: [PATCH] JIT: most mem instructions working + branching --- src/ARM.cpp | 10 +- src/ARMJIT.cpp | 7 +- src/ARMJIT.h | 2 +- src/ARMJIT_RegCache.h | 2 +- src/ARMJIT_x64/ARMJIT_ALU.cpp | 326 ++++++----- src/ARMJIT_x64/ARMJIT_Compiler.cpp | 145 +++-- src/ARMJIT_x64/ARMJIT_Compiler.h | 42 +- src/ARMJIT_x64/ARMJIT_LoadStore.cpp | 833 ++++++++++++---------------- src/ARM_InstrInfo.cpp | 2 +- src/NDS.cpp | 2 + 10 files changed, 669 insertions(+), 702 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 420257a9..f7ca26d4 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -522,8 +522,9 @@ void ARMv5::Execute() ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(0, R[15] - ((CPSR&0x20)?2:4)); if (block == NULL) - block = ARMJIT::CompileBlock(this); - Cycles += block(); + ARMJIT::CompileBlock(this); + else + Cycles += block(); // TODO optimize this shit!!! if (Halted) @@ -607,8 +608,9 @@ void ARMv4::Execute() ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(1, R[15] - ((CPSR&0x20)?2:4)); if (block == NULL) - block = ARMJIT::CompileBlock(this); - Cycles += block(); + ARMJIT::CompileBlock(this); + else + Cycles += block(); // TODO optimize this shit!!! if (Halted) diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index 4da781c2..6afa967a 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -121,12 +121,13 @@ void DeInit() delete compiler; } -CompiledBlock CompileBlock(ARM* cpu) +void CompileBlock(ARM* cpu) { bool thumb = cpu->CPSR & 0x20; FetchedInstr instrs[12]; int i = 0; + u32 r15Initial = cpu->R[15]; u32 r15 = cpu->R[15]; u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]}; //printf("block %x %d\n", r15, thumb); @@ -169,9 +170,7 @@ CompiledBlock CompileBlock(ARM* cpu) CompiledBlock block = compiler->CompileBlock(cpu, instrs, i); - InsertBlock(cpu->Num, cpu->R[15] - (thumb ? 2 : 4), block); - - return block; + InsertBlock(cpu->Num, r15Initial - (thumb ? 2 : 4), block); } void ResetBlocks() diff --git a/src/ARMJIT.h b/src/ARMJIT.h index 45bb4ed7..71188f93 100644 --- a/src/ARMJIT.h +++ b/src/ARMJIT.h @@ -109,7 +109,7 @@ inline void InsertBlock(u32 num, u32 addr, CompiledBlock func) void Init(); void DeInit(); -CompiledBlock CompileBlock(ARM* cpu); +void CompileBlock(ARM* cpu); void ResetBlocks(); diff --git a/src/ARMJIT_RegCache.h b/src/ARMJIT_RegCache.h index ea9fb303..556d27b4 100644 --- a/src/ARMJIT_RegCache.h +++ b/src/ARMJIT_RegCache.h @@ -114,7 +114,7 @@ public: for (int reg : needToBeLoaded) LoadRegister(reg); } - DirtyRegs |= Instr.Info.DstRegs; + DirtyRegs |= Instr.Info.DstRegs & ~(1 << 15); } static const Reg NativeRegAllocOrder[]; diff --git a/src/ARMJIT_x64/ARMJIT_ALU.cpp b/src/ARMJIT_x64/ARMJIT_ALU.cpp index 6294e1d4..c22751e4 100644 --- a/src/ARMJIT_x64/ARMJIT_ALU.cpp +++ b/src/ARMJIT_x64/ARMJIT_ALU.cpp @@ -71,30 +71,30 @@ void Compiler::Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed) { switch (op) { - case 0: // TST - if (rn.IsImm()) - { - MOV(32, R(RSCRATCH3), rn); - rn = R(RSCRATCH3); - } - TEST(32, rn, op2); - break; - case 1: // TEQ + case 0: // TST + if (rn.IsImm()) + { MOV(32, R(RSCRATCH3), rn); - XOR(32, R(RSCRATCH3), op2); - break; - case 2: // CMP - if (rn.IsImm()) - { - MOV(32, R(RSCRATCH3), rn); - rn = R(RSCRATCH3); - } - CMP(32, rn, op2); - break; - case 3: // CMN + rn = R(RSCRATCH3); + } + TEST(32, rn, op2); + break; + case 1: // TEQ + MOV(32, R(RSCRATCH3), rn); + XOR(32, R(RSCRATCH3), op2); + break; + case 2: // CMP + if (rn.IsImm()) + { MOV(32, R(RSCRATCH3), rn); - ADD(32, R(RSCRATCH3), op2); - break; + rn = R(RSCRATCH3); + } + CMP(32, rn, op2); + break; + case 3: // CMN + MOV(32, R(RSCRATCH3), rn); + ADD(32, R(RSCRATCH3), op2); + break; } Comp_RetriveFlags(op == 2, op >= 2, carryUsed); @@ -103,38 +103,38 @@ void Compiler::Comp_CmpOp(int op, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed) // also calculates cycles OpArg Compiler::A_Comp_GetALUOp2(bool S, bool& carryUsed) { - if (CurrentInstr.Instr & (1 << 25)) + if (CurInstr.Instr & (1 << 25)) { Comp_AddCycles_C(); carryUsed = false; - return Imm32(ROR(CurrentInstr.Instr & 0xFF, (CurrentInstr.Instr >> 7) & 0x1E)); + return Imm32(ROR(CurInstr.Instr & 0xFF, (CurInstr.Instr >> 7) & 0x1E)); } else { - int op = (CurrentInstr.Instr >> 5) & 0x3; - if (CurrentInstr.Instr & (1 << 4)) + int op = (CurInstr.Instr >> 5) & 0x3; + if (CurInstr.Instr & (1 << 4)) { Comp_AddCycles_CI(1); - OpArg rm = MapReg(CurrentInstr.A_Reg(0)); - if (rm.IsImm() && CurrentInstr.A_Reg(0) == 15) + OpArg rm = MapReg(CurInstr.A_Reg(0)); + if (rm.IsImm() && CurInstr.A_Reg(0) == 15) rm = Imm32(rm.Imm32() + 4); - return Comp_RegShiftReg(op, MapReg(CurrentInstr.A_Reg(8)), rm, S, carryUsed); + return Comp_RegShiftReg(op, MapReg(CurInstr.A_Reg(8)), rm, S, carryUsed); } else { Comp_AddCycles_C(); - return Comp_RegShiftImm(op, (CurrentInstr.Instr >> 7) & 0x1F, - MapReg(CurrentInstr.A_Reg(0)), S, carryUsed); + return Comp_RegShiftImm(op, (CurInstr.Instr >> 7) & 0x1F, + MapReg(CurInstr.A_Reg(0)), S, carryUsed); } } } void Compiler::A_Comp_CmpOp() { - u32 op = (CurrentInstr.Instr >> 21) & 0xF; + u32 op = (CurInstr.Instr >> 21) & 0xF; bool carryUsed; - OpArg rn = MapReg(CurrentInstr.A_Reg(16)); + OpArg rn = MapReg(CurInstr.A_Reg(16)); OpArg op2 = A_Comp_GetALUOp2((1 << op) & 0xF303, carryUsed); Comp_CmpOp(op - 0x8, rn, op2, carryUsed); @@ -142,12 +142,12 @@ void Compiler::A_Comp_CmpOp() void Compiler::A_Comp_Arith() { - bool S = CurrentInstr.Instr & (1 << 20); - u32 op = (CurrentInstr.Instr >> 21) & 0xF; + bool S = CurInstr.Instr & (1 << 20); + u32 op = (CurInstr.Instr >> 21) & 0xF; bool carryUsed; - OpArg rn = MapReg(CurrentInstr.A_Reg(16)); - OpArg rd = MapReg(CurrentInstr.A_Reg(12)); + OpArg rn = MapReg(CurInstr.A_Reg(16)); + OpArg rd = MapReg(CurInstr.A_Reg(12)); OpArg op2 = A_Comp_GetALUOp2(S && (1 << op) & 0xF303, carryUsed); u32 sFlag = S ? opSetsFlags : 0; @@ -155,13 +155,13 @@ void Compiler::A_Comp_Arith() { case 0x0: // AND Comp_ArithTriOp(AND, rd, rn, op2, carryUsed, opSymmetric|sFlag); - return; + break; case 0x1: // EOR Comp_ArithTriOp(XOR, rd, rn, op2, carryUsed, opSymmetric|sFlag); - return; + break; case 0x2: // SUB Comp_ArithTriOp(SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry); - return; + break; case 0x3: // RSB if (op2.IsZero()) { @@ -173,41 +173,44 @@ void Compiler::A_Comp_Arith() } else Comp_ArithTriOpReverse(SUB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry); - return; + break; case 0x4: // ADD Comp_ArithTriOp(ADD, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV); - return; + break; case 0x5: // ADC Comp_ArithTriOp(ADC, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry); - return; + break; case 0x6: // SBC Comp_ArithTriOp(SBB, rd, rn, op2, carryUsed, opSymmetric|sFlag|opRetriveCV|opSyncCarry|opInvertCarry); - return; + break; case 0x7: // RSC Comp_ArithTriOpReverse(SBB, rd, rn, op2, carryUsed, sFlag|opRetriveCV|opInvertCarry|opSyncCarry); - return; + break; case 0xC: // ORR Comp_ArithTriOp(OR, rd, rn, op2, carryUsed, opSymmetric|sFlag); - return; + break; case 0xE: // BIC Comp_ArithTriOp(AND, rd, rn, op2, carryUsed, sFlag|opSymmetric|opInvertOp2); - return; + break; default: assert("unimplemented"); } + + if (CurInstr.A_Reg(12) == 15) + Comp_JumpTo(rd.GetSimpleReg(), S); } void Compiler::A_Comp_MovOp() { bool carryUsed; - bool S = CurrentInstr.Instr & (1 << 20); + bool S = CurInstr.Instr & (1 << 20); OpArg op2 = A_Comp_GetALUOp2(S, carryUsed); - OpArg rd = MapReg(CurrentInstr.A_Reg(12)); + OpArg rd = MapReg(CurInstr.A_Reg(12)); if (rd != op2) MOV(32, rd, op2); - if (((CurrentInstr.Instr >> 21) & 0xF) == 0xF) + if (((CurInstr.Instr >> 21) & 0xF) == 0xF) NOT(32, rd); if (S) @@ -215,6 +218,9 @@ void Compiler::A_Comp_MovOp() TEST(32, rd, rd); Comp_RetriveFlags(false, false, carryUsed); } + + if (CurInstr.A_Reg(12) == 15) + Comp_JumpTo(rd.GetSimpleReg(), S); } void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed) @@ -230,7 +236,7 @@ void Compiler::Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed) } if (carryUsed == 983298) - printf("etwas ist faul im lande daenemark %x\n", CurrentInstr.Instr); + printf("etwas ist faul im lande daenemark %x\n", CurInstr.Instr); SETcc(CC_S, R(RSCRATCH)); SETcc(CC_Z, R(RSCRATCH3)); @@ -324,61 +330,61 @@ OpArg Compiler::Comp_RegShiftImm(int op, int amount, OpArg rm, bool S, bool& car switch (op) { - case 0: // LSL - if (amount > 0) - { - MOV(32, R(RSCRATCH), rm); - SHL(32, R(RSCRATCH), Imm8(amount)); - if (S) - SETcc(CC_C, R(RSCRATCH2)); + case 0: // LSL + if (amount > 0) + { + MOV(32, R(RSCRATCH), rm); + SHL(32, R(RSCRATCH), Imm8(amount)); + if (S) + SETcc(CC_C, R(RSCRATCH2)); - return R(RSCRATCH); - } - else - { - carryUsed = false; - return rm; - } - case 1: // LSR - if (amount > 0) - { - MOV(32, R(RSCRATCH), rm); - SHR(32, R(RSCRATCH), Imm8(amount)); - if (S) - SETcc(CC_C, R(RSCRATCH2)); - return R(RSCRATCH); - } - else - { - if (S) - { - MOV(32, R(RSCRATCH2), rm); - SHR(32, R(RSCRATCH2), Imm8(31)); - } - return Imm32(0); - } - case 2: // ASR - MOV(32, R(RSCRATCH), rm); - SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31)); - if (S) - { - if (amount == 0) - BT(32, rm, Imm8(31)); - SETcc(CC_C, R(RSCRATCH2)); - } return R(RSCRATCH); - case 3: // ROR + } + else + { + carryUsed = false; + return rm; + } + case 1: // LSR + if (amount > 0) + { MOV(32, R(RSCRATCH), rm); - if (amount > 0) - ROR_(32, R(RSCRATCH), Imm8(amount)); - else - { - BT(32, R(RCPSR), Imm8(29)); - RCR(32, R(RSCRATCH), Imm8(1)); - } + SHR(32, R(RSCRATCH), Imm8(amount)); if (S) SETcc(CC_C, R(RSCRATCH2)); return R(RSCRATCH); + } + else + { + if (S) + { + MOV(32, R(RSCRATCH2), rm); + SHR(32, R(RSCRATCH2), Imm8(31)); + } + return Imm32(0); + } + case 2: // ASR + MOV(32, R(RSCRATCH), rm); + SAR(32, R(RSCRATCH), Imm8(amount ? amount : 31)); + if (S) + { + if (amount == 0) + BT(32, rm, Imm8(31)); + SETcc(CC_C, R(RSCRATCH2)); + } + return R(RSCRATCH); + case 3: // ROR + MOV(32, R(RSCRATCH), rm); + if (amount > 0) + ROR_(32, R(RSCRATCH), Imm8(amount)); + else + { + BT(32, R(RCPSR), Imm8(29)); + RCR(32, R(RSCRATCH), Imm8(1)); + } + if (S) + SETcc(CC_C, R(RSCRATCH2)); + return R(RSCRATCH); } assert(false); @@ -386,11 +392,11 @@ OpArg Compiler::Comp_RegShiftImm(int op, int amount, OpArg rm, bool S, bool& car void Compiler::T_Comp_ShiftImm() { - OpArg rd = MapReg(CurrentInstr.T_Reg(0)); - OpArg rs = MapReg(CurrentInstr.T_Reg(3)); + OpArg rd = MapReg(CurInstr.T_Reg(0)); + OpArg rs = MapReg(CurInstr.T_Reg(3)); - int op = (CurrentInstr.Instr >> 11) & 0x3; - int amount = (CurrentInstr.Instr >> 6) & 0x1F; + int op = (CurInstr.Instr >> 11) & 0x3; + int amount = (CurInstr.Instr >> 6) & 0x1F; Comp_AddCycles_C(); @@ -406,12 +412,12 @@ void Compiler::T_Comp_ShiftImm() void Compiler::T_Comp_AddSub_() { - OpArg rd = MapReg(CurrentInstr.T_Reg(0)); - OpArg rs = MapReg(CurrentInstr.T_Reg(3)); + OpArg rd = MapReg(CurInstr.T_Reg(0)); + OpArg rs = MapReg(CurInstr.T_Reg(3)); - int op = (CurrentInstr.Instr >> 9) & 0x3; + int op = (CurInstr.Instr >> 9) & 0x3; - OpArg rn = op >= 2 ? Imm32((CurrentInstr.Instr >> 6) & 0x7) : MapReg(CurrentInstr.T_Reg(6)); + OpArg rn = op >= 2 ? Imm32((CurInstr.Instr >> 6) & 0x7) : MapReg(CurInstr.T_Reg(6)); Comp_AddCycles_C(); @@ -423,38 +429,38 @@ void Compiler::T_Comp_AddSub_() void Compiler::T_Comp_ALU_Imm8() { - OpArg rd = MapReg(CurrentInstr.T_Reg(8)); + OpArg rd = MapReg(CurInstr.T_Reg(8)); - u32 op = (CurrentInstr.Instr >> 11) & 0x3; - OpArg imm = Imm32(CurrentInstr.Instr & 0xFF); + u32 op = (CurInstr.Instr >> 11) & 0x3; + OpArg imm = Imm32(CurInstr.Instr & 0xFF); Comp_AddCycles_C(); switch (op) { - case 0x0: - MOV(32, rd, imm); - TEST(32, rd, rd); - Comp_RetriveFlags(false, false, false); - return; - case 0x1: - Comp_CmpOp(2, rd, imm, false); - return; - case 0x2: - Comp_ArithTriOp(ADD, rd, rd, imm, false, opSetsFlags|opSymmetric|opRetriveCV); - return; - case 0x3: - Comp_ArithTriOp(SUB, rd, rd, imm, false, opSetsFlags|opInvertCarry|opRetriveCV); - return; + case 0x0: + MOV(32, rd, imm); + TEST(32, rd, rd); + Comp_RetriveFlags(false, false, false); + return; + case 0x1: + Comp_CmpOp(2, rd, imm, false); + return; + case 0x2: + Comp_ArithTriOp(ADD, rd, rd, imm, false, opSetsFlags|opSymmetric|opRetriveCV); + return; + case 0x3: + Comp_ArithTriOp(SUB, rd, rd, imm, false, opSetsFlags|opInvertCarry|opRetriveCV); + return; } } void Compiler::T_Comp_ALU() { - OpArg rd = MapReg(CurrentInstr.T_Reg(0)); - OpArg rs = MapReg(CurrentInstr.T_Reg(3)); + OpArg rd = MapReg(CurInstr.T_Reg(0)); + OpArg rs = MapReg(CurInstr.T_Reg(3)); - u32 op = (CurrentInstr.Instr >> 6) & 0xF; + u32 op = (CurInstr.Instr >> 6) & 0xF; if ((op >= 0x2 && op < 0x4) || op == 0x7) Comp_AddCycles_CI(1); @@ -522,28 +528,62 @@ void Compiler::T_Comp_ALU() void Compiler::T_Comp_ALU_HiReg() { - OpArg rd = MapReg(((CurrentInstr.Instr & 0x7) | ((CurrentInstr.Instr >> 4) & 0x8))); - OpArg rs = MapReg((CurrentInstr.Instr >> 3) & 0xF); + u32 rd = ((CurInstr.Instr & 0x7) | ((CurInstr.Instr >> 4) & 0x8)); + OpArg rdMapped = MapReg(rd); + OpArg rs = MapReg((CurInstr.Instr >> 3) & 0xF); - u32 op = (CurrentInstr.Instr >> 8) & 0x3; + u32 op = (CurInstr.Instr >> 8) & 0x3; Comp_AddCycles_C(); switch (op) { - case 0x0: // ADD - Comp_ArithTriOp(ADD, rd, rd, rs, false, opSymmetric|opRetriveCV); - return; - case 0x1: // CMP - Comp_CmpOp(2, rd, rs, false); - return; - case 0x2: // MOV - if (rd != rs) - MOV(32, rd, rs); - TEST(32, rd, rd); - Comp_RetriveFlags(false, false, false); - return; + case 0x0: // ADD + Comp_ArithTriOp(ADD, rdMapped, rdMapped, rs, false, opSymmetric|opRetriveCV); + break; + case 0x1: // CMP + Comp_CmpOp(2, rdMapped, rs, false); + return; // this is on purpose + case 0x2: // MOV + if (rdMapped != rs) + MOV(32, rdMapped, rs); + TEST(32, rdMapped, rdMapped); + Comp_RetriveFlags(false, false, false); + break; + } + + if (rd == 15) + { + OR(32, rdMapped, Imm8(1)); + Comp_JumpTo(rdMapped.GetSimpleReg()); } } +void Compiler::T_Comp_AddSP() +{ + Comp_AddCycles_C(); + + OpArg sp = MapReg(13); + OpArg offset = Imm32((CurInstr.Instr & 0x7F) << 2); + if (CurInstr.Instr & (1 << 7)) + SUB(32, sp, offset); + else + ADD(32, sp, offset); +} + +void Compiler::T_Comp_RelAddr() +{ + Comp_AddCycles_C(); + + OpArg rd = MapReg(CurInstr.T_Reg(8)); + u32 offset = (CurInstr.Instr & 0xFF) << 2; + if (CurInstr.Instr & (1 << 11)) + { + OpArg sp = MapReg(13); + LEA(32, rd.GetSimpleReg(), MDisp(sp.GetSimpleReg(), offset)); + } + else + MOV(32, rd, Imm32((R15 & ~2) + offset)); +} + } \ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index 90963976..b7358a22 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -9,7 +9,7 @@ using namespace Gen; namespace ARMJIT { template <> -const X64Reg RegCache::NativeRegAllocOrder[] = +const X64Reg RegCache::NativeRegAllocOrder[] = { #ifdef _WIN32 RBX, RSI, RDI, R12, R13 @@ -18,7 +18,7 @@ const X64Reg RegCache::NativeRegAllocOrder[] = #endif }; template <> -const int RegCache::NativeRegsAvailable = +const int RegCache::NativeRegsAvailable = #ifdef _WIN32 5 #else @@ -30,24 +30,33 @@ Compiler::Compiler() { AllocCodeSpace(1024 * 1024 * 16); - for (int i = 0; i < 15; i++) + for (int i = 0; i < 3; i++) { - ReadMemFuncs9[i] = Gen_MemoryRoutine9(false, 32, 0x1000000 * i); - WriteMemFuncs9[i] = Gen_MemoryRoutine9(true, 32, 0x1000000 * i); for (int j = 0; j < 2; j++) { - ReadMemFuncs7[j][i] = Gen_MemoryRoutine7(false, 32, j, 0x1000000 * i); - WriteMemFuncs7[j][i] = Gen_MemoryRoutine7(true, 32, j, 0x1000000 * i); + MemoryFuncs9[i][j] = Gen_MemoryRoutine9(j, 8 << i); + MemoryFuncs7[i][j][0] = Gen_MemoryRoutine7(j, false, 8 << i); + MemoryFuncs7[i][j][1] = Gen_MemoryRoutine7(j, true, 8 << i); } } - ReadMemFuncs9[15] = Gen_MemoryRoutine9(false, 32, 0xFF000000); - WriteMemFuncs9[15] = Gen_MemoryRoutine9(true, 32, 0xFF000000); - ReadMemFuncs7[15][0] = ReadMemFuncs7[15][1] = Gen_MemoryRoutine7(false, 32, false, 0xFF000000); - WriteMemFuncs7[15][0] = WriteMemFuncs7[15][1] = Gen_MemoryRoutine7(true, 32, false, 0xFF000000); ResetStart = GetWritableCodePtr(); } +DataRegion Compiler::ClassifyAddress(u32 addr) +{ + if (Num == 0 && addr >= ((ARMv5*)CurCPU)->DTCMBase && addr < ((ARMv5*)CurCPU)->DTCMBase) + return dataRegionDTCM; + switch (addr & 0xFF000000) + { + case 0x02000000: return dataRegionMainRAM; + case 0x03000000: return Num == 1 && (addr & 0xF00000) == 0x800000 ? dataRegionWRAM7 : dataRegionSWRAM; + case 0x04000000: return dataRegionIO; + case 0x06000000: return dataRegionVRAM; + } + return dataRegionGeneric; +} + void Compiler::LoadCPSR() { assert(!CPSRDirty); @@ -92,6 +101,7 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs Num = cpu->Num; R15 = cpu->R[15]; CodeRegion = cpu->CodeRegion; + CurCPU = cpu; ABI_PushRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS}, 8, 16); @@ -106,27 +116,32 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs for (int i = 0; i < instrsCount; i++) { R15 += Thumb ? 2 : 4; - CurrentInstr = instrs[i]; + CurInstr = instrs[i]; - CompileFunc comp = GetCompFunc(CurrentInstr.Info.Kind); - - if (CurrentInstr.Info.Branches()) - comp = NULL; + CompileFunc comp = GetCompFunc(CurInstr.Info.Kind); if (comp == NULL || i == instrsCount - 1) { MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15)); - MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurrentInstr.CodeCycles)); - MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurrentInstr.Instr)); + MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurInstr.CodeCycles)); + MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurInstr.Instr)); if (i == instrsCount - 1) { - MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[0])), Imm32(CurrentInstr.NextInstr[0])); - MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(CurrentInstr.NextInstr[1])); + MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[0])), Imm32(CurInstr.NextInstr[0])); + MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(CurInstr.NextInstr[1])); } - SaveCPSR(); + if (comp == NULL || CurInstr.Info.Branches()) + SaveCPSR(); } + // run interpreter + cpu->CodeCycles = CurInstr.CodeCycles; + cpu->R[15] = R15; + cpu->CurInstr = CurInstr.Instr; + cpu->NextInstr[0] = CurInstr.NextInstr[0]; + cpu->NextInstr[1] = CurInstr.NextInstr[1]; + if (comp != NULL) RegCache.Prepare(i); else @@ -134,26 +149,33 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs if (Thumb) { + u32 icode = (CurInstr.Instr >> 6) & 0x3FF; if (comp == NULL) { MOV(64, R(ABI_PARAM1), R(RCPU)); - u32 icode = (CurrentInstr.Instr >> 6) & 0x3FF; ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]); } else (this->*comp)(); + + ARMInterpreter::THUMBInstrTable[icode](cpu); } else { - u32 cond = CurrentInstr.Cond(); - if (CurrentInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM) + u32 cond = CurInstr.Cond(); + if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM) { MOV(64, R(ABI_PARAM1), R(RCPU)); ABI_CallFunction(ARMInterpreter::A_BLX_IMM); + + ARMInterpreter::A_BLX_IMM(cpu); } else if (cond == 0xF) + { Comp_AddCycles_C(); + cpu->AddCycles_C(); + } else { FixupBranch skipExecute; @@ -180,18 +202,18 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs } + u32 icode = ((CurInstr.Instr >> 4) & 0xF) | ((CurInstr.Instr >> 16) & 0xFF0); if (comp == NULL) { MOV(64, R(ABI_PARAM1), R(RCPU)); - u32 icode = ((CurrentInstr.Instr >> 4) & 0xF) | ((CurrentInstr.Instr >> 16) & 0xFF0); ABI_CallFunction(ARMInterpreter::ARMInstrTable[icode]); } else (this->*comp)(); FixupBranch skipFailed; - if (CurrentInstr.Cond() < 0xE) + if (CurInstr.Cond() < 0xE) { skipFailed = J(); SetJumpTarget(skipExecute); @@ -200,13 +222,17 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs SetJumpTarget(skipFailed); } + + if (cpu->CheckCondition(cond)) + ARMInterpreter::ARMInstrTable[icode](cpu); + else + cpu->AddCycles_C(); } } /* we don't need to collect the interpreted cycles, - since all functions only add to it, the dispatcher - takes care of it. + since cpu->Cycles is taken into account by the dispatcher. */ if (comp == NULL && i != instrsCount - 1) @@ -277,29 +303,29 @@ CompileFunc Compiler::GetCompFunc(int kind) // Mul NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, // ARMv5 stuff - NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, // STR A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, // STRB - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, // LDR A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, // LDRB - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, // STRH - NULL, NULL, NULL, NULL, + A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, // LDRD NULL, NULL, NULL, NULL, // STRD NULL, NULL, NULL, NULL, // LDRH - NULL, NULL, NULL, NULL, + A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, // LDRSB - NULL, NULL, NULL, NULL, + A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, // LDRSH - NULL, NULL, NULL, NULL, + A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, // swap - NULL, NULL, + NULL, NULL, // LDM/STM NULL, NULL, // Branch @@ -314,26 +340,26 @@ CompileFunc Compiler::GetCompFunc(int kind) // Three operand ADD/SUB T_Comp_AddSub_, T_Comp_AddSub_, T_Comp_AddSub_, T_Comp_AddSub_, // 8 bit imm - T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, + T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, T_Comp_ALU_Imm8, // general ALU - T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, - T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, + T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, + T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, T_Comp_ALU, NULL, T_Comp_ALU, T_Comp_ALU, // hi reg T_Comp_ALU_HiReg, T_Comp_ALU_HiReg, T_Comp_ALU_HiReg, // pc/sp relative - NULL, NULL, NULL, + T_Comp_RelAddr, T_Comp_RelAddr, T_Comp_AddSP, // LDR pcrel - NULL, + NULL, // LDR/STR reg offset - T_Comp_MemReg, NULL, T_Comp_MemReg, NULL, - // LDR/STR sign extended, half - NULL, NULL, NULL, NULL, + T_Comp_MemReg, T_Comp_MemReg, T_Comp_MemReg, T_Comp_MemReg, + // LDR/STR sign extended, half + T_Comp_MemRegHalf, T_Comp_MemRegHalf, T_Comp_MemRegHalf, T_Comp_MemRegHalf, // LDR/STR imm offset - T_Comp_MemImm, T_Comp_MemImm, NULL, NULL, + T_Comp_MemImm, T_Comp_MemImm, T_Comp_MemImm, T_Comp_MemImm, // LDR/STR half imm offset - NULL, NULL, + T_Comp_MemImmHalf, T_Comp_MemImmHalf, // branch, etc. NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -346,10 +372,10 @@ CompileFunc Compiler::GetCompFunc(int kind) void Compiler::Comp_AddCycles_C() { s32 cycles = Num ? - NDS::ARM7MemTimings[CurrentInstr.CodeCycles][Thumb ? 1 : 3] - : ((R15 & 0x2) ? 0 : CurrentInstr.CodeCycles); + NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3] + : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles); - if (CurrentInstr.Cond() < 0xE) + if (CurInstr.Cond() < 0xE) ADD(32, R(RCycles), Imm8(cycles)); else ConstantCycles += cycles; @@ -358,13 +384,26 @@ void Compiler::Comp_AddCycles_C() void Compiler::Comp_AddCycles_CI(u32 i) { s32 cycles = (Num ? - NDS::ARM7MemTimings[CurrentInstr.CodeCycles][Thumb ? 0 : 2] - : ((R15 & 0x2) ? 0 : CurrentInstr.CodeCycles)) + i; - - if (CurrentInstr.Cond() < 0xE) + NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] + : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i; + + if (CurInstr.Cond() < 0xE) ADD(32, R(RCycles), Imm8(cycles)); else ConstantCycles += cycles; } +void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) +{ + SaveCPSR(); + + MOV(64, R(ABI_PARAM1), R(RCPU)); + MOV(32, R(ABI_PARAM2), R(addr)); + MOV(32, R(ABI_PARAM3), Imm32(restoreCPSR)); + if (Num == 0) + CALL((void*)&ARMv5::JumpTo); + else + CALL((void*)&ARMv4::JumpTo); +} + } \ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h index 7ab9b256..9395a299 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.h +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -6,6 +6,8 @@ #include "../ARMJIT.h" #include "../ARMJIT_RegCache.h" +#include + namespace ARMJIT { @@ -21,6 +23,19 @@ class Compiler; typedef void (Compiler::*CompileFunc)(); +enum DataRegion +{ + dataRegionGeneric, // hey, that's me! + dataRegionMainRAM, + dataRegionSWRAM, + dataRegionVRAM, + dataRegionIO, + dataRegionExclusive, + dataRegionsCount, + dataRegionDTCM = dataRegionExclusive, + dataRegionWRAM7 = dataRegionExclusive, +}; + class Compiler : public Gen::X64CodeBlock { public: @@ -34,6 +49,8 @@ public: private: CompileFunc GetCompFunc(int kind); + void Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR = false); + void Comp_AddCycles_C(); void Comp_AddCycles_CI(u32 i); @@ -47,11 +64,14 @@ private: opInvertOp2 = 1 << 5, }; + DataRegion ClassifyAddress(u32 addr); + void A_Comp_Arith(); void A_Comp_MovOp(); void A_Comp_CmpOp(); void A_Comp_MemWB(); + void A_Comp_MemHalf(); void T_Comp_ShiftImm(); void T_Comp_AddSub_(); @@ -59,8 +79,15 @@ private: void T_Comp_ALU(); void T_Comp_ALU_HiReg(); + void T_Comp_RelAddr(); + void T_Comp_AddSP(); + void T_Comp_MemReg(); void T_Comp_MemImm(); + void T_Comp_MemRegHalf(); + void T_Comp_MemImmHalf(); + + void Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size); void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&), Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags); @@ -70,8 +97,8 @@ private: void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed); - void* Gen_MemoryRoutine9(bool store, int size, u32 region); - void* Gen_MemoryRoutine7(bool store, int size, bool mainRAMCode, u32 region); + void* Gen_MemoryRoutine9(bool store, int size); + void* Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size); Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed); Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed); @@ -92,10 +119,12 @@ private: } void* ResetStart; + void* MemoryFuncs9[3][2]; + void* MemoryFuncs7[3][2][2]; bool CPSRDirty = false; - FetchedInstr CurrentInstr; + FetchedInstr CurInstr; RegCache RegCache; @@ -105,12 +134,9 @@ private: u32 CodeRegion; u32 ConstantCycles; -}; -extern void* ReadMemFuncs9[16]; -extern void* ReadMemFuncs7[2][16]; -extern void* WriteMemFuncs9[16]; -extern void* WriteMemFuncs7[2][16]; + ARM* CurCPU; +}; } diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp index d5342692..69746e29 100644 --- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp @@ -5,7 +5,6 @@ namespace NDS { -#define MAIN_RAM_SIZE 0x400000 extern u8* SWRAM_ARM9; extern u32 SWRAM_ARM9Mask; extern u8* SWRAM_ARM7; @@ -19,11 +18,6 @@ using namespace Gen; namespace ARMJIT { -void* ReadMemFuncs9[16]; -void* ReadMemFuncs7[2][16]; -void* WriteMemFuncs9[16]; -void* WriteMemFuncs7[2][16]; - template int squeezePointer(T* ptr) { @@ -32,569 +26,434 @@ int squeezePointer(T* ptr) return truncated; } -u32 ReadVRAM9(u32 addr) -{ - switch (addr & 0x00E00000) - { - case 0x00000000: return GPU::ReadVRAM_ABG(addr); - case 0x00200000: return GPU::ReadVRAM_BBG(addr); - case 0x00400000: return GPU::ReadVRAM_AOBJ(addr); - case 0x00600000: return GPU::ReadVRAM_BOBJ(addr); - default: return GPU::ReadVRAM_LCDC(addr); - } -} +/* + According to DeSmuME and my own research, approx. 99% (seriously, that's an empirical number) + of all memory load and store instructions always access addresses in the same region as + during the their first execution. -void WriteVRAM9(u32 addr, u32 val) -{ - switch (addr & 0x00E00000) - { - case 0x00000000: GPU::WriteVRAM_ABG(addr, val); return; - case 0x00200000: GPU::WriteVRAM_BBG(addr, val); return; - case 0x00400000: GPU::WriteVRAM_AOBJ(addr, val); return; - case 0x00600000: GPU::WriteVRAM_BOBJ(addr, val); return; - default: GPU::WriteVRAM_LCDC(addr, val); return; - } -} + I tried multiple optimisations, which would benefit from this behaviour + (having fast paths for the first region, …), though none of them yielded a measureable + improvement. +*/ /* - R11 - data to write (store only) - RSCRATCH2 - address - RSCRATCH3 - code cycles + address - ABI_PARAM1 (a.k.a. ECX = RSCRATCH3 on Windows) + store value - ABI_PARAM2 (a.k.a. RDX = RSCRATCH2 on Windows) + code cycles - ABI_PARAM3 */ -void* Compiler::Gen_MemoryRoutine9(bool store, int size, u32 region) +void* Compiler::Gen_MemoryRoutine9(bool store, int size) { + u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0)); AlignCode4(); - void* res = (void*)GetWritableCodePtr(); + void* res = GetWritableCodePtr(); - if (!store) - { - MOV(32, R(RSCRATCH), R(RSCRATCH2)); - AND(32, R(RSCRATCH), Imm8(0x3)); - SHL(32, R(RSCRATCH), Imm8(3)); - // enter the shadow realm! - MOV(32, MDisp(RSP, 8), R(RSCRATCH)); - } + MOV(32, R(RSCRATCH), R(ABI_PARAM1)); + SUB(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMBase))); + CMP(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMSize))); + FixupBranch insideDTCM = J_CC(CC_B); + + CMP(32, R(ABI_PARAM1), MDisp(RCPU, offsetof(ARMv5, ITCMSize))); + FixupBranch insideITCM = J_CC(CC_B); // cycle counting! - // this is AddCycles_CDI - MOV(32, R(R10), R(RSCRATCH2)); - SHR(32, R(R10), Imm8(12)); - MOVZX(32, 8, R10, MComplex(RCPU, R10, SCALE_1, offsetof(ARMv5, MemTimings) + 2)); - LEA(32, RSCRATCH, MComplex(RSCRATCH3, R10, SCALE_1, -6)); - CMP(32, R(R10), R(RSCRATCH3)); - CMOVcc(32, RSCRATCH3, R(R10), CC_G); - CMP(32, R(RSCRATCH), R(RSCRATCH3)); - CMOVcc(32, RSCRATCH3, R(RSCRATCH), CC_G); - ADD(32, R(RCycles), R(RSCRATCH3)); - - if (!store) - XOR(32, R(RSCRATCH), R(RSCRATCH)); - AND(32, R(RSCRATCH2), Imm32(~3)); + MOV(32, R(RSCRATCH), R(ABI_PARAM1)); + SHR(32, R(RSCRATCH), Imm8(12)); + MOVZX(32, 8, RSCRATCH, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, MemTimings) + (size == 32 ? 2 : 0))); + LEA(32, ABI_PARAM4, MComplex(RSCRATCH, ABI_PARAM3, SCALE_1, -6)); + CMP(32, R(ABI_PARAM3), R(RSCRATCH)); + CMOVcc(32, RSCRATCH, R(ABI_PARAM3), CC_G); + CMP(32, R(ABI_PARAM4), R(RSCRATCH)); + CMOVcc(32, RSCRATCH, R(ABI_PARAM4), CC_G); + ADD(32, R(RCycles), R(RSCRATCH)); + if (store) { - MOV(32, R(RSCRATCH3), R(RSCRATCH2)); - SUB(32, R(RSCRATCH2), MDisp(RCPU, offsetof(ARMv5, DTCMBase))); - CMP(32, R(RSCRATCH2), MDisp(RCPU, offsetof(ARMv5, DTCMSize))); - FixupBranch outsideDTCM = J_CC(CC_AE); - AND(32, R(RSCRATCH2), Imm32(0x3FFF)); - if (!store) + if (size > 8) + AND(32, R(ABI_PARAM1), Imm32(addressMask)); + switch (size) { - MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, DTCM))); - MOV(32, R(ECX), MDisp(RSP, 8)); + case 32: JMP((u8*)NDS::ARM9Write32, true); break; + case 16: JMP((u8*)NDS::ARM9Write16, true); break; + case 8: JMP((u8*)NDS::ARM9Write8, true); break; + } + } + else + { + if (size == 32) + { + ABI_PushRegistersAndAdjustStack({ABI_PARAM1}, 8); + AND(32, R(ABI_PARAM1), Imm32(addressMask)); + // everything's already in the appropriate register + ABI_CallFunction(NDS::ARM9Read32); + ABI_PopRegistersAndAdjustStack({ECX}, 8); + AND(32, R(ECX), Imm8(3)); + SHL(32, R(ECX), Imm8(3)); + ROR_(32, R(RSCRATCH), R(ECX)); + RET(); + } + else if (size == 16) + { + AND(32, R(ABI_PARAM1), Imm32(addressMask)); + JMP((u8*)NDS::ARM9Read16, true); + } + else + JMP((u8*)NDS::ARM9Read8, true); + } + + SetJumpTarget(insideDTCM); + ADD(32, R(RCycles), R(ABI_PARAM3)); + AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask)); + if (store) + MOV(size, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM2)); + else + { + MOVZX(32, size, RSCRATCH, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM))); + if (size == 32) + { + if (ABI_PARAM1 != ECX) + MOV(32, R(ECX), R(ABI_PARAM1)); + AND(32, R(ECX), Imm8(3)); + SHL(32, R(ECX), Imm8(3)); ROR_(32, R(RSCRATCH), R(ECX)); } - else - MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, DTCM)), R(R11)); - RET(); - SetJumpTarget(outsideDTCM); - MOV(32, R(RSCRATCH2), R(RSCRATCH3)); } - - switch (region) - { - case 0x00000000: - case 0x01000000: - { - CMP(32, R(RSCRATCH2), MDisp(RCPU, offsetof(ARMv5, ITCMSize))); - FixupBranch insideITCM = J_CC(CC_B); - RET(); - SetJumpTarget(insideITCM); - AND(32, R(RSCRATCH2), Imm32(0x7FFF)); - if (!store) - MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, ITCM))); - else - { - MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, ITCM)), R(R11)); - MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM9_ITCM)), Imm32(0)); - MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM9_ITCM) + 8), Imm32(0)); - } - } - break; - case 0x02000000: - AND(32, R(RSCRATCH2), Imm32(MAIN_RAM_SIZE - 1)); - if (!store) - MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM))); - else - { - MOV(32, MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM)), R(R11)); - MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM)), Imm32(0)); - MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM) + 8), Imm32(0)); - } - break; - case 0x03000000: - { - MOV(64, R(RSCRATCH3), M(&NDS::SWRAM_ARM9)); - TEST(64, R(RSCRATCH3), R(RSCRATCH3)); - FixupBranch notMapped = J_CC(CC_Z); - AND(32, R(RSCRATCH2), M(&NDS::SWRAM_ARM9Mask)); - if (!store) - MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH3)); - else - { - MOV(32, MRegSum(RSCRATCH2, RSCRATCH3), R(R11)); - MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM)), Imm32(0)); - MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM) + 8), Imm32(0)); - } - SetJumpTarget(notMapped); - } - break; - case 0x04000000: - MOV(32, R(ABI_PARAM1), R(RSCRATCH2)); - if (!store) - { - ABI_PushRegistersAndAdjustStack({}, 8, 0); - ABI_CallFunction(NDS::ARM9IORead32); - ABI_PopRegistersAndAdjustStack({}, 8, 0); - } - else - { - MOV(32, R(ABI_PARAM2), R(R11)); - JMP((u8*)NDS::ARM9IOWrite32, true); - } - break; - case 0x05000000: - { - MOV(32, R(RSCRATCH), Imm32(1<<1)); - MOV(32, R(RSCRATCH3), Imm32(1<<9)); - TEST(32, R(RSCRATCH2), Imm32(0x400)); - CMOVcc(32, RSCRATCH, R(RSCRATCH3), CC_NZ); - TEST(16, R(RSCRATCH), M(&NDS::PowerControl9)); - FixupBranch available = J_CC(CC_NZ); - RET(); - SetJumpTarget(available); - AND(32, R(RSCRATCH2), Imm32(0x7FF)); - if (!store) - MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(GPU::Palette))); - else - MOV(32, MDisp(RSCRATCH2, squeezePointer(GPU::Palette)), R(R11)); - } - break; - case 0x06000000: - MOV(32, R(ABI_PARAM1), R(RSCRATCH2)); - if (!store) - { - ABI_PushRegistersAndAdjustStack({}, 8); - ABI_CallFunction(ReadVRAM9); - ABI_PopRegistersAndAdjustStack({}, 8); - } - else - { - MOV(32, R(ABI_PARAM2), R(R11)); - JMP((u8*)WriteVRAM9, true); - } - break; - case 0x07000000: - { - MOV(32, R(RSCRATCH), Imm32(1<<1)); - MOV(32, R(RSCRATCH3), Imm32(1<<9)); - TEST(32, R(RSCRATCH2), Imm32(0x400)); - CMOVcc(32, RSCRATCH, R(RSCRATCH3), CC_NZ); - TEST(16, R(RSCRATCH), M(&NDS::PowerControl9)); - FixupBranch available = J_CC(CC_NZ); - RET(); - SetJumpTarget(available); - AND(32, R(RSCRATCH2), Imm32(0x7FF)); - if (!store) - MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(GPU::OAM))); - else - MOV(32, MDisp(RSCRATCH2, squeezePointer(GPU::OAM)), R(R11)); - } - break; - case 0x08000000: - case 0x09000000: - case 0x0A000000: - if (!store) - MOV(32, R(RSCRATCH), Imm32(0xFFFFFFFF)); - break; - case 0xFF000000: - if (!store) - { - AND(32, R(RSCRATCH2), Imm32(0xFFF)); - MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::ARM9BIOS))); - } - break; - default: - MOV(32, R(ABI_PARAM1), R(RSCRATCH2)); - if (!store) - { - ABI_PushRegistersAndAdjustStack({}, 8, 0); - ABI_CallFunction(NDS::ARM9Read32); - ABI_PopRegistersAndAdjustStack({}, 8, 0); - } - else - { - MOV(32, R(ABI_PARAM2), R(R11)); - JMP((u8*)NDS::ARM9Write32, true); - } - break; - } - - if (!store) - { - MOV(32, R(ECX), MDisp(RSP, 8)); - ROR_(32, R(RSCRATCH), R(ECX)); - } - RET(); + SetJumpTarget(insideITCM); + ADD(32, R(RCycles), R(ABI_PARAM3)); + MOV(32, R(ABI_PARAM3), R(ABI_PARAM1)); // free up ECX + AND(32, R(ABI_PARAM3), Imm32(0x7FFF & addressMask)); + if (store) + { + MOV(size, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM2)); + XOR(32, R(RSCRATCH), R(RSCRATCH)); + MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.ARM9_ITCM)), R(RSCRATCH)); + if (size == 32) + MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.ARM9_ITCM) + 8), R(RSCRATCH)); + } + else + { + MOVZX(32, size, RSCRATCH, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM))); + if (size == 32) + { + if (ABI_PARAM1 != ECX) + MOV(32, R(ECX), R(ABI_PARAM1)); + AND(32, R(ECX), Imm8(3)); + SHL(32, R(ECX), Imm8(3)); + ROR_(32, R(RSCRATCH), R(ECX)); + } + } + RET(); + + static_assert(RSCRATCH == EAX); + return res; } -void* Compiler::Gen_MemoryRoutine7(bool store, int size, bool mainRAMCode, u32 region) +void* Compiler::Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size) { + u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0)); AlignCode4(); void* res = GetWritableCodePtr(); - if (!store) - { - MOV(32, R(RSCRATCH), R(RSCRATCH2)); - AND(32, R(RSCRATCH), Imm8(0x3)); - SHL(32, R(RSCRATCH), Imm8(3)); - // enter the shadow realm! - MOV(32, MDisp(RSP, 8), R(RSCRATCH)); - } - - // AddCycles_CDI - MOV(32, R(RSCRATCH), R(RSCRATCH2)); + MOV(32, R(RSCRATCH), R(ABI_PARAM1)); SHR(32, R(RSCRATCH), Imm8(15)); - MOVZX(32, 8, RSCRATCH, MDisp(RSCRATCH, squeezePointer(NDS::ARM7MemTimings + 2))); - if ((region == 0x02000000 && mainRAMCode) || (region != 0x02000000 && !mainRAMCode)) + MOVZX(32, 8, ABI_PARAM4, MDisp(RSCRATCH, (size == 32 ? 2 : 0) + squeezePointer(NDS::ARM7MemTimings))); + + MOV(32, R(RSCRATCH), R(ABI_PARAM1)); + AND(32, R(RSCRATCH), Imm32(0xFF000000)); + CMP(32, R(RSCRATCH), Imm32(0x02000000)); + FixupBranch outsideMainRAM = J_CC(CC_NE); + if (codeMainRAM) { - if (!store && region != 0x02000000) - LEA(32, RSCRATCH3, MComplex(RSCRATCH, RSCRATCH3, SCALE_1, 1)); - ADD(32, R(RCycles), R(RSCRATCH3)); + LEA(32, RSCRATCH, MRegSum(ABI_PARAM4, ABI_PARAM3)); + ADD(32, R(RCycles), R(RSCRATCH)); } else { if (!store) - ADD(32, R(region == 0x02000000 ? RSCRATCH2 : RSCRATCH), Imm8(1)); - LEA(32, R10, MComplex(RSCRATCH, RSCRATCH3, SCALE_1, -3)); - CMP(32, R(RSCRATCH3), R(RSCRATCH)); - CMOVcc(32, RSCRATCH, R(RSCRATCH3), CC_G); - CMP(32, R(R10), R(RSCRATCH)); - CMOVcc(32, RSCRATCH, R(R10), CC_G); + ADD(32, R(ABI_PARAM3), Imm8(1)); + LEA(32, RSCRATCH, MComplex(ABI_PARAM4, ABI_PARAM3, SCALE_1, -3)); + CMP(32, R(ABI_PARAM4), R(ABI_PARAM3)); + CMOVcc(32, ABI_PARAM3, R(ABI_PARAM4), CC_G); + CMP(32, R(ABI_PARAM3), R(RSCRATCH)); + CMOVcc(32, RSCRATCH, R(ABI_PARAM3), CC_G); ADD(32, R(RCycles), R(RSCRATCH)); } - - if (!store) + MOV(32, R(ABI_PARAM3), R(ABI_PARAM1)); + AND(32, R(ABI_PARAM3), Imm32((MAIN_RAM_SIZE - 1) & addressMask)); + if (store) + { + MOV(size, MDisp(ABI_PARAM3, squeezePointer(NDS::MainRAM)), R(ABI_PARAM2)); XOR(32, R(RSCRATCH), R(RSCRATCH)); - AND(32, R(RSCRATCH2), Imm32(~3)); - - switch (region) - { - case 0x00000000: - if (!store) { - CMP(32, R(RSCRATCH2), Imm32(0x4000)); - FixupBranch outsideBIOS1 = J_CC(CC_AE); - - MOV(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARM, R[15]))); - CMP(32, R(RSCRATCH), Imm32(0x4000)); - FixupBranch outsideBIOS2 = J_CC(CC_AE); - MOV(32, R(RSCRATCH3), M(&NDS::ARM7BIOSProt)); - CMP(32, R(RSCRATCH2), R(RSCRATCH3)); - FixupBranch notDenied1 = J_CC(CC_AE); - CMP(32, R(RSCRATCH), R(RSCRATCH3)); - FixupBranch notDenied2 = J_CC(CC_B); - SetJumpTarget(outsideBIOS2); - MOV(32, R(RSCRATCH), Imm32(0xFFFFFFFF)); - RET(); - - SetJumpTarget(notDenied1); - SetJumpTarget(notDenied2); - MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::ARM7BIOS))); - MOV(32, R(ECX), MDisp(RSP, 8)); - ROR_(32, R(RSCRATCH), R(ECX)); - RET(); - - SetJumpTarget(outsideBIOS1); - } - break; - case 0x02000000: - AND(32, R(RSCRATCH2), Imm32(MAIN_RAM_SIZE - 1)); - if (!store) - MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM))); - else - { - MOV(32, MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM)), R(R11)); - MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM)), Imm32(0)); - MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM) + 8), Imm32(0)); - } - break; - case 0x03000000: - { - TEST(32, R(RSCRATCH2), Imm32(0x800000)); - FixupBranch region = J_CC(CC_NZ); - MOV(64, R(RSCRATCH), M(&NDS::SWRAM_ARM7)); - TEST(64, R(RSCRATCH), R(RSCRATCH)); - FixupBranch notMapped = J_CC(CC_Z); - AND(32, R(RSCRATCH2), M(&NDS::SWRAM_ARM7Mask)); - if (!store) - { - MOV(32, R(RSCRATCH), MRegSum(RSCRATCH, RSCRATCH2)); - MOV(32, R(ECX), MDisp(RSP, 8)); - ROR_(32, R(RSCRATCH), R(ECX)); - } - else - { - MOV(32, MRegSum(RSCRATCH, RSCRATCH2), R(R11)); - MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM)), Imm32(0)); - MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM) + 8), Imm32(0)); - } - RET(); - SetJumpTarget(region); - SetJumpTarget(notMapped); - AND(32, R(RSCRATCH2), Imm32(0xFFFF)); - if (!store) - MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::ARM7WRAM))); - else - { - MOV(32, MDisp(RSCRATCH2, squeezePointer(NDS::ARM7WRAM)), R(R11)); - MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM7_WRAM)), Imm32(0)); - MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM7_WRAM) + 8), Imm32(0)); - } - } - break; - case 0x04000000: - { - TEST(32, R(RSCRATCH2), Imm32(0x800000)); - FixupBranch region = J_CC(CC_NZ); - MOV(32, R(ABI_PARAM1), R(RSCRATCH2)); - if (!store) - { - ABI_PushRegistersAndAdjustStack({}, 8); - ABI_CallFunction(NDS::ARM7IORead32); - ABI_PopRegistersAndAdjustStack({}, 8); - - MOV(32, R(ECX), MDisp(RSP, 8)); - ROR_(32, R(RSCRATCH), R(ECX)); - RET(); - } - else - { - MOV(32, R(ABI_PARAM2), R(R11)); - JMP((u8*)NDS::ARM7IOWrite32, true); - } - SetJumpTarget(region); - - if (!store) - { - ABI_PushRegistersAndAdjustStack({RSCRATCH2}, 8); - MOV(32, R(ABI_PARAM1), R(RSCRATCH2)); - ABI_CallFunction(Wifi::Read); - ABI_PopRegistersAndAdjustStack({RSCRATCH2}, 8); - - ADD(32, R(RSCRATCH2), Imm8(2)); - ABI_PushRegistersAndAdjustStack({EAX}, 8); - MOV(32, R(ABI_PARAM1), R(RSCRATCH2)); - ABI_CallFunction(Wifi::Read); - MOV(32, R(RSCRATCH2), R(EAX)); - SHL(32, R(RSCRATCH2), Imm8(16)); - ABI_PopRegistersAndAdjustStack({EAX}, 8); - OR(32, R(EAX), R(RSCRATCH2)); - } - else - { - ABI_PushRegistersAndAdjustStack({RSCRATCH2, R11}, 8); - MOV(32, R(ABI_PARAM1), R(RSCRATCH2)); - MOVZX(32, 16, ABI_PARAM2, R(R11)); - ABI_CallFunction(Wifi::Write); - ABI_PopRegistersAndAdjustStack({RSCRATCH2, R11}, 8); - SHR(32, R(R11), Imm8(16)); - ADD(32, R(RSCRATCH2), Imm8(2)); - ABI_PushRegistersAndAdjustStack({RSCRATCH2, R11}, 8); - MOV(32, R(ABI_PARAM1), R(RSCRATCH2)); - MOVZX(32, 16, ABI_PARAM2, R(R11)); - ABI_CallFunction(Wifi::Write); - ABI_PopRegistersAndAdjustStack({RSCRATCH2, R11}, 8); - } - } - break; - case 0x06000000: - MOV(32, R(ABI_PARAM1), R(RSCRATCH2)); - if (!store) - { - ABI_PushRegistersAndAdjustStack({}, 8); - ABI_CallFunction(GPU::ReadVRAM_ARM7); - ABI_PopRegistersAndAdjustStack({}, 8); - } - else - { - AND(32, R(ABI_PARAM1), Imm32(0x40000 - 1)); - MOV(64, MScaled(ABI_PARAM1, SCALE_4, squeezePointer(cache.ARM7_WVRAM)), Imm32(0)); - MOV(64, MScaled(ABI_PARAM1, SCALE_4, squeezePointer(cache.ARM7_WVRAM) + 8), Imm32(0)); - MOV(32, R(ABI_PARAM2), R(R11)); - JMP((u8*)GPU::WriteVRAM_ARM7, true); - } - break; - case 0x08000000: - case 0x09000000: - case 0x0A000000: - if (!store) - MOV(32, R(RSCRATCH), Imm32(0xFFFFFFFF)); - break; - /*default: - ABI_PushRegistersAndAdjustStack({}, 8, 0); - MOV(32, R(ABI_PARAM1), R(RSCRATCH2)); - ABI_CallFunction(NDS::ARM7Read32); - ABI_PopRegistersAndAdjustStack({}, 8, 0); - break;*/ + MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.MainRAM)), R(RSCRATCH)); + if (size == 32) + MOV(64, MScaled(ABI_PARAM3, SCALE_4, squeezePointer(cache.MainRAM) + 8), R(RSCRATCH)); } - - if (!store) + else { - MOV(32, R(ECX), MDisp(RSP, 8)); - ROR_(32, R(RSCRATCH), R(ECX)); + MOVZX(32, size, RSCRATCH, MDisp(ABI_PARAM3, squeezePointer(NDS::MainRAM))); + if (size == 32) + { + if (ABI_PARAM1 != ECX) + MOV(32, R(ECX), R(ABI_PARAM1)); + AND(32, R(ECX), Imm8(3)); + SHL(32, R(ECX), Imm8(3)); + ROR_(32, R(RSCRATCH), R(ECX)); + } } - RET(); + SetJumpTarget(outsideMainRAM); + if (codeMainRAM) + { + if (!store) + ADD(32, R(ABI_PARAM4), Imm8(1)); + LEA(32, RSCRATCH, MComplex(ABI_PARAM4, ABI_PARAM3, SCALE_1, -3)); + CMP(32, R(ABI_PARAM4), R(ABI_PARAM3)); + CMOVcc(32, ABI_PARAM3, R(ABI_PARAM4), CC_G); + CMP(32, R(ABI_PARAM3), R(RSCRATCH)); + CMOVcc(32, RSCRATCH, R(ABI_PARAM3), CC_G); + ADD(32, R(RCycles), R(RSCRATCH)); + } + else + { + LEA(32, RSCRATCH, MComplex(ABI_PARAM4, ABI_PARAM3, SCALE_1, store ? 0 : 1)); + ADD(32, R(RCycles), R(RSCRATCH)); + } + if (store) + { + if (size > 8) + AND(32, R(ABI_PARAM1), Imm32(addressMask)); + switch (size) + { + case 32: JMP((u8*)NDS::ARM7Write32, true); break; + case 16: JMP((u8*)NDS::ARM7Write16, true); break; + case 8: JMP((u8*)NDS::ARM7Write8, true); break; + } + } + else + { + if (size == 32) + { + ABI_PushRegistersAndAdjustStack({ABI_PARAM1}, 8); + AND(32, R(ABI_PARAM1), Imm32(addressMask)); + ABI_CallFunction(NDS::ARM7Read32); + ABI_PopRegistersAndAdjustStack({ECX}, 8); + AND(32, R(ECX), Imm8(3)); + SHL(32, R(ECX), Imm8(3)); + ROR_(32, R(RSCRATCH), R(ECX)); + RET(); + } + else if (size == 16) + { + AND(32, R(ABI_PARAM1), Imm32(addressMask)); + JMP((u8*)NDS::ARM7Read16, true); + } + else + JMP((u8*)NDS::ARM7Read8, true); + } + return res; } +void Compiler::Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size) +{ + if (store) + MOV(32, R(ABI_PARAM2), rd); + u32 cycles = Num + ? NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] + : (R15 & 0x2 ? 0 : CurInstr.CodeCycles); + MOV(32, R(ABI_PARAM3), Imm32(cycles)); + CALL(Num == 0 + ? MemoryFuncs9[size >> 4][store] + : MemoryFuncs7[size >> 4][store][CodeRegion == 0x02]); + + if (!store) + { + if (signExtend) + MOVSX(32, size, rd.GetSimpleReg(), R(RSCRATCH)); + else + MOVZX(32, size, rd.GetSimpleReg(), R(RSCRATCH)); + } +} + OpArg Compiler::A_Comp_GetMemWBOffset() { - if (!(CurrentInstr.Instr & (1 << 25))) - return Imm32(CurrentInstr.Instr & 0xFFF); + if (!(CurInstr.Instr & (1 << 25))) + { + u32 imm = CurInstr.Instr & 0xFFF; + return Imm32(imm); + } else { - int op = (CurrentInstr.Instr >> 5) & 0x3; - int amount = (CurrentInstr.Instr >> 7) & 0x1F; - OpArg rm = MapReg(CurrentInstr.A_Reg(0)); + int op = (CurInstr.Instr >> 5) & 0x3; + int amount = (CurInstr.Instr >> 7) & 0x1F; + OpArg rm = MapReg(CurInstr.A_Reg(0)); bool carryUsed; + return Comp_RegShiftImm(op, amount, rm, false, carryUsed); } } void Compiler::A_Comp_MemWB() -{ - OpArg rn = MapReg(CurrentInstr.A_Reg(16)); - OpArg rd = MapReg(CurrentInstr.A_Reg(12)); - bool load = CurrentInstr.Instr & (1 << 20); +{ + OpArg rn = MapReg(CurInstr.A_Reg(16)); + OpArg rd = MapReg(CurInstr.A_Reg(12)); + bool load = CurInstr.Instr & (1 << 20); + bool byte = CurInstr.Instr & (1 << 22); + int size = byte ? 8 : 32; - MOV(32, R(RSCRATCH2), rn); - if (CurrentInstr.Instr & (1 << 24)) + if (CurInstr.Instr & (1 << 24)) { OpArg offset = A_Comp_GetMemWBOffset(); - if (CurrentInstr.Instr & (1 << 23)) - ADD(32, R(RSCRATCH2), offset); + if (CurInstr.Instr & (1 << 23)) + MOV_sum(32, ABI_PARAM1, rn, offset); else - SUB(32, R(RSCRATCH2), offset); + { + MOV(32, R(ABI_PARAM1), rn); + SUB(32, R(ABI_PARAM1), offset); + } - if (CurrentInstr.Instr & (1 << 21)) - MOV(32, rn, R(RSCRATCH2)); + if (CurInstr.Instr & (1 << 21)) + MOV(32, rn, R(ABI_PARAM1)); } - - u32 cycles = Num ? NDS::ARM7MemTimings[CurrentInstr.CodeCycles][2] : CurrentInstr.CodeCycles; - MOV(32, R(RSCRATCH3), Imm32(cycles)); - MOV(32, R(RSCRATCH), R(RSCRATCH2)); - SHR(32, R(RSCRATCH), Imm8(24)); - AND(32, R(RSCRATCH), Imm8(0xF)); - void** funcArray; - if (load) - funcArray = Num ? ReadMemFuncs7[CodeRegion == 0x02] : ReadMemFuncs9; else - { - funcArray = Num ? WriteMemFuncs7[CodeRegion == 0x02] : WriteMemFuncs9; - MOV(32, R(R11), rd); - } - CALLptr(MScaled(RSCRATCH, SCALE_8, squeezePointer(funcArray))); + MOV(32, R(ABI_PARAM1), rn); - if (load) - MOV(32, R(RSCRATCH2), R(RSCRATCH)); - - if (!(CurrentInstr.Instr & (1 << 24))) + if (!(CurInstr.Instr & (1 << 24))) { OpArg offset = A_Comp_GetMemWBOffset(); - if (CurrentInstr.Instr & (1 << 23)) + if (CurInstr.Instr & (1 << 23)) ADD(32, rn, offset); else SUB(32, rn, offset); } - if (load) - MOV(32, rd, R(RSCRATCH2)); + Comp_MemAccess(rd, false, !load, byte ? 8 : 32); + if (load && CurInstr.A_Reg(12) == 15) + { + if (byte) + printf("!!! LDRB PC %08X\n", R15); + else + { + if (Num == 1) + AND(32, rd, Imm8(0xFE)); // immediate is sign extended + Comp_JumpTo(rd.GetSimpleReg()); + } + } +} + +void Compiler::A_Comp_MemHalf() +{ + OpArg rn = MapReg(CurInstr.A_Reg(16)); + OpArg rd = MapReg(CurInstr.A_Reg(12)); + + OpArg offset = CurInstr.Instr & (1 << 22) + ? Imm32(CurInstr.Instr & 0xF | ((CurInstr.Instr >> 4) & 0xF0)) + : MapReg(CurInstr.A_Reg(0)); + + if (CurInstr.Instr & (1 << 24)) + { + if (CurInstr.Instr & (1 << 23)) + MOV_sum(32, ABI_PARAM1, rn, offset); + else + { + MOV(32, R(ABI_PARAM1), rn); + SUB(32, R(ABI_PARAM1), offset); + } + + if (CurInstr.Instr & (1 << 21)) + MOV(32, rn, R(ABI_PARAM1)); + } + else + MOV(32, R(ABI_PARAM1), rn); + + int op = (CurInstr.Instr >> 5) & 0x3; + bool load = CurInstr.Instr & (1 << 20); + + bool signExtend = false; + int size; + if (!load && op == 1) + size = 16; + else if (load) + { + size = op == 2 ? 8 : 16; + signExtend = op > 1; + } + + if (!(CurInstr.Instr & (1 << 24))) + { + if (CurInstr.Instr & (1 << 23)) + ADD(32, rn, offset); + else + SUB(32, rn, offset); + } + + Comp_MemAccess(rd, signExtend, !load, size); + + if (load && CurInstr.A_Reg(12) == 15) + printf("!!! MemHalf op PC %08X\n", R15);; } void Compiler::T_Comp_MemReg() { - OpArg rd = MapReg(CurrentInstr.T_Reg(0)); - OpArg rb = MapReg(CurrentInstr.T_Reg(3)); - OpArg ro = MapReg(CurrentInstr.T_Reg(6)); + OpArg rd = MapReg(CurInstr.T_Reg(0)); + OpArg rb = MapReg(CurInstr.T_Reg(3)); + OpArg ro = MapReg(CurInstr.T_Reg(6)); - int op = (CurrentInstr.Instr >> 10) & 0x3; + int op = (CurInstr.Instr >> 10) & 0x3; bool load = op & 0x2; - - MOV(32, R(RSCRATCH2), rb); - ADD(32, R(RSCRATCH2), ro); + bool byte = op & 0x1; - u32 cycles = Num ? NDS::ARM7MemTimings[CurrentInstr.CodeCycles][0] : (R15 & 0x2 ? 0 : CurrentInstr.CodeCycles); - MOV(32, R(RSCRATCH3), Imm32(cycles)); - MOV(32, R(RSCRATCH), R(RSCRATCH2)); - SHR(32, R(RSCRATCH), Imm8(24)); - AND(32, R(RSCRATCH), Imm8(0xF)); - void** funcArray; - if (load) - funcArray = Num ? ReadMemFuncs7[CodeRegion == 0x02] : ReadMemFuncs9; - else - { - funcArray = Num ? WriteMemFuncs7[CodeRegion == 0x02] : WriteMemFuncs9; - MOV(32, R(R11), rd); - } - CALLptr(MScaled(RSCRATCH, SCALE_8, squeezePointer(funcArray))); + MOV_sum(32, ABI_PARAM1, rb, ro); - if (load) - MOV(32, rd, R(RSCRATCH)); + Comp_MemAccess(rd, false, !load, byte ? 8 : 32); } void Compiler::T_Comp_MemImm() { - // TODO: aufräumen!!! - OpArg rd = MapReg(CurrentInstr.T_Reg(0)); - OpArg rb = MapReg(CurrentInstr.T_Reg(3)); - - int op = (CurrentInstr.Instr >> 11) & 0x3; - u32 offset = ((CurrentInstr.Instr >> 6) & 0x1F) * 4; + OpArg rd = MapReg(CurInstr.T_Reg(0)); + OpArg rb = MapReg(CurInstr.T_Reg(3)); + + int op = (CurInstr.Instr >> 11) & 0x3; bool load = op & 0x1; + bool byte = op & 0x2; + u32 offset = ((CurInstr.Instr >> 6) & 0x1F) * (byte ? 1 : 4); - LEA(32, RSCRATCH2, MDisp(rb.GetSimpleReg(), offset)); - u32 cycles = Num ? NDS::ARM7MemTimings[CurrentInstr.CodeCycles][0] : (R15 & 0x2 ? 0 : CurrentInstr.CodeCycles); - MOV(32, R(RSCRATCH3), Imm32(cycles)); - MOV(32, R(RSCRATCH), R(RSCRATCH2)); - SHR(32, R(RSCRATCH), Imm8(24)); - AND(32, R(RSCRATCH), Imm8(0xF)); - void** funcArray; - if (load) - funcArray = Num ? ReadMemFuncs7[CodeRegion == 0x02] : ReadMemFuncs9; - else - { - funcArray = Num ? WriteMemFuncs7[CodeRegion == 0x02] : WriteMemFuncs9; - MOV(32, R(R11), rd); - } - CALLptr(MScaled(RSCRATCH, SCALE_8, squeezePointer(funcArray))); + LEA(32, ABI_PARAM1, MDisp(rb.GetSimpleReg(), offset)); - if (load) - MOV(32, rd, R(RSCRATCH)); + Comp_MemAccess(rd, false, !load, byte ? 8 : 32); +} + +void Compiler::T_Comp_MemRegHalf() +{ + OpArg rd = MapReg(CurInstr.T_Reg(0)); + OpArg rb = MapReg(CurInstr.T_Reg(3)); + OpArg ro = MapReg(CurInstr.T_Reg(6)); + + int op = (CurInstr.Instr >> 10) & 0x3; + bool load = op != 0; + int size = op != 1 ? 16 : 8; + bool signExtend = op & 1; + + MOV_sum(32, ABI_PARAM1, rb, ro); + + Comp_MemAccess(rd, signExtend, !load, size); +} + +void Compiler::T_Comp_MemImmHalf() +{ + OpArg rd = MapReg(CurInstr.T_Reg(0)); + OpArg rb = MapReg(CurInstr.T_Reg(3)); + + u32 offset = (CurInstr.Instr >> 5) & 0x3E; + bool load = CurInstr.Instr & (1 << 11); + + LEA(32, ABI_PARAM1, MDisp(rb.GetSimpleReg(), offset)); + + Comp_MemAccess(rd, false, !load, 16); } } \ No newline at end of file diff --git a/src/ARM_InstrInfo.cpp b/src/ARM_InstrInfo.cpp index 41c46e1d..32a96451 100644 --- a/src/ARM_InstrInfo.cpp +++ b/src/ARM_InstrInfo.cpp @@ -317,7 +317,7 @@ Info Decode(bool thumb, u32 num, u32 instr) else { u32 data = ARMInstrTable[((instr >> 4) & 0xF) | ((instr >> 16) & 0xFF0)]; - if ((instr & 0xFE000000) == 0xFA000000) + if (num == 0 && (instr & 0xFE000000) == 0xFA000000) data = A_BLX_IMM; if (data & A_ARM9Only && num != 0) diff --git a/src/NDS.cpp b/src/NDS.cpp index b8fd8cbd..baa5e0de 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -524,6 +524,8 @@ void Reset() KeyCnt = 0; RCnt = 0; + ARMJIT::ResetBlocks(); + NDSCart::Reset(); GBACart::Reset(); GPU::Reset();