From c58fdbd66bab9f1b97e9522afa5436f212540b6d Mon Sep 17 00:00:00 2001 From: RSDuck Date: Thu, 11 Jul 2019 16:22:47 +0200 Subject: [PATCH] jit: branch instructions --- src/ARM.cpp | 12 +- src/ARMJIT.cpp | 4 +- src/ARMJIT.h | 2 +- src/ARMJIT_x64/ARMJIT_Branch.cpp | 267 ++++++++++++++++++++++++++++ src/ARMJIT_x64/ARMJIT_Compiler.cpp | 187 +++++++------------ src/ARMJIT_x64/ARMJIT_Compiler.h | 30 ++-- src/ARMJIT_x64/ARMJIT_LoadStore.cpp | 42 +---- src/ARM_InstrInfo.cpp | 6 +- src/ARM_InstrInfo.h | 1 + src/CMakeLists.txt | 1 + 10 files changed, 364 insertions(+), 188 deletions(-) create mode 100644 src/ARMJIT_x64/ARMJIT_Branch.cpp diff --git a/src/ARM.cpp b/src/ARM.cpp index f7ca26d4..aca876d4 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -521,11 +521,8 @@ void ARMv5::Execute() printf("aaarg ungempappter raum %x\n", R[15]);*/ ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(0, R[15] - ((CPSR&0x20)?2:4)); - if (block == NULL) - ARMJIT::CompileBlock(this); - else - Cycles += block(); - + Cycles += (block ? block : ARMJIT::CompileBlock(this))(); + // TODO optimize this shit!!! if (Halted) { @@ -607,10 +604,7 @@ void ARMv4::Execute() printf("aaarg ungempappter raum %x\n", R[15]);*/ ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(1, R[15] - ((CPSR&0x20)?2:4)); - if (block == NULL) - ARMJIT::CompileBlock(this); - else - Cycles += block(); + Cycles += (block ? block : ARMJIT::CompileBlock(this))(); // TODO optimize this shit!!! if (Halted) diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index 6afa967a..47b425f0 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -121,7 +121,7 @@ void DeInit() delete compiler; } -void CompileBlock(ARM* cpu) +CompiledBlock CompileBlock(ARM* cpu) { bool thumb = cpu->CPSR & 0x20; @@ -171,6 +171,8 @@ void CompileBlock(ARM* cpu) CompiledBlock block = compiler->CompileBlock(cpu, instrs, i); InsertBlock(cpu->Num, r15Initial - (thumb ? 2 : 4), block); + + return block; } void ResetBlocks() diff --git a/src/ARMJIT.h b/src/ARMJIT.h index 71188f93..45bb4ed7 100644 --- a/src/ARMJIT.h +++ b/src/ARMJIT.h @@ -109,7 +109,7 @@ inline void InsertBlock(u32 num, u32 addr, CompiledBlock func) void Init(); void DeInit(); -void CompileBlock(ARM* cpu); +CompiledBlock CompileBlock(ARM* cpu); void ResetBlocks(); diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp new file mode 100644 index 00000000..fb2acba8 --- /dev/null +++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp @@ -0,0 +1,267 @@ +#include "ARMJIT_Compiler.h" + +using namespace Gen; + +namespace ARMJIT +{ + +void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) +{ + // we can simplify constant branches by a lot + // it's not completely safe to assume stuff like, which instructions to preload + // we'll see how it works out + + u32 newPC; + u32 nextInstr[2]; + u32 cycles = 0; + bool setupRegion = false; + + if (addr & 0x1 && !Thumb) + { + CPSRDirty = true; + OR(32, R(RCPSR), Imm8(0x20)); + } + else if (!(addr & 0x1) && Thumb) + { + CPSRDirty = true; + AND(32, R(RCPSR), Imm32(~0x20)); + } + + if (Num == 0) + { + ARMv5* cpu9 = (ARMv5*)CurCPU; + + u32 oldregion = R15 >> 24; + u32 newregion = addr >> 24; + + u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0]; + cpu9->RegionCodeCycles = regionCodeCycles; + + MOV(32, MDisp(RCPU, offsetof(ARMv5, RegionCodeCycles)), Imm32(regionCodeCycles)); + + setupRegion = newregion != oldregion; + if (setupRegion) + cpu9->SetupCodeMem(addr); + + if (addr & 0x1) + { + addr &= ~0x1; + newPC = addr+2; + + // two-opcodes-at-once fetch + // doesn't matter if we put garbage in the MSbs there + if (addr & 0x2) + { + nextInstr[0] = cpu9->CodeRead32(addr-2, true) >> 16; + cycles += CurCPU->CodeCycles; + nextInstr[1] = cpu9->CodeRead32(addr+2, false); + cycles += CurCPU->CodeCycles; + } + else + { + nextInstr[0] = cpu9->CodeRead32(addr, true); + nextInstr[1] = nextInstr[0] >> 16; + cycles += CurCPU->CodeCycles; + } + } + else + { + addr &= ~0x3; + newPC = addr+4; + + nextInstr[0] = cpu9->CodeRead32(addr, true); + cycles += cpu9->CodeCycles; + nextInstr[1] = cpu9->CodeRead32(addr+4, false); + cycles += cpu9->CodeCycles; + } + } + else + { + ARMv4* cpu7 = (ARMv4*)CurCPU; + + u32 codeRegion = addr >> 24; + u32 codeCycles = addr >> 15; // cheato + + cpu7->CodeRegion = codeRegion; + cpu7->CodeCycles = codeCycles; + + MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeRegion)); + MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeCycles)); + + if (addr & 0x1) + { + addr &= ~0x1; + newPC = addr+2; + + nextInstr[0] = ((ARMv4*)CurCPU)->CodeRead16(addr); + nextInstr[1] = ((ARMv4*)CurCPU)->CodeRead16(addr+2); + cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1]; + } + else + { + addr &= ~0x3; + newPC = addr+4; + + nextInstr[0] = cpu7->CodeRead32(addr); + nextInstr[1] = cpu7->CodeRead32(addr+4); + cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3]; + } + } + + MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(newPC)); + MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[0])), Imm32(nextInstr[0])); + MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(nextInstr[1])); + if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles) + ConstantCycles += cycles; + else + ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); + + if (setupRegion) + { + MOV(32, R(ABI_PARAM1), R(RCPU)); + MOV(32, R(ABI_PARAM2), Imm32(newPC)); + CALL((void*)&ARMv5::SetupCodeMem); + } +} + +void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) +{ + BitSet16 hiRegsLoaded(RegCache.DirtyRegs & 0xFFFF0000); + bool previouslyDirty = CPSRDirty; + SaveCPSR(); + + if (restoreCPSR) + { + if (Thumb || CurInstr.Cond() >= 0xE) + { + for (int reg : hiRegsLoaded) + RegCache.UnloadRegister(reg); + } + else + { + // the ugly way... + // we only save them, to load and save them again + for (int reg : hiRegsLoaded) + SaveReg(reg, RegCache.Mapping[reg]); + } + } + + MOV(64, R(ABI_PARAM1), R(RCPU)); + MOV(32, R(ABI_PARAM2), R(addr)); + if (!restoreCPSR) + XOR(32, R(ABI_PARAM3), R(ABI_PARAM3)); + else + MOV(32, R(ABI_PARAM3), Imm32(restoreCPSR)); + if (Num == 0) + CALL((void*)&ARMv5::JumpTo); + else + CALL((void*)&ARMv4::JumpTo); + + if (!Thumb && restoreCPSR && CurInstr.Cond() < 0xE) + { + for (int reg : hiRegsLoaded) + LoadReg(reg, RegCache.Mapping[reg]); + } + + if (previouslyDirty) + LoadCPSR(); + CPSRDirty = previouslyDirty; +} + +void Compiler::A_Comp_BranchImm() +{ + int op = (CurInstr.Instr >> 24) & 1; + s32 offset = (s32)(CurInstr.Instr << 8) >> 6; + u32 target = R15 + offset; + bool link = op; + + if (CurInstr.Cond() == 0xF) // BLX_imm + { + target += (op << 1) + 1; + link = true; + } + + if (link) + MOV(32, MapReg(14), Imm32(R15 - 4)); + + Comp_JumpTo(target); +} + +void Compiler::A_Comp_BranchXchangeReg() +{ + OpArg rn = MapReg(CurInstr.A_Reg(0)); + if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg + MOV(32, MapReg(14), Imm32(R15 - 4)); + Comp_JumpTo(rn.GetSimpleReg()); +} + +void Compiler::T_Comp_BCOND() +{ + u32 cond = (CurInstr.Instr >> 8) & 0xF; + FixupBranch skipExecute = CheckCondition(cond); + + s32 offset = (s32)(CurInstr.Instr << 24) >> 23; + Comp_JumpTo(R15 + offset + 1, true); + + FixupBranch skipFailed = J(); + SetJumpTarget(skipExecute); + Comp_AddCycles_C(true); + SetJumpTarget(skipFailed); +} + +void Compiler::T_Comp_B() +{ + s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20; + Comp_JumpTo(R15 + offset + 1); +} + +void Compiler::T_Comp_BranchXchangeReg() +{ + bool link = CurInstr.Instr & (1 << 7); + if (link && Num == 1) + { + printf("BLX unsupported on ARM7!!!\n"); + return; + } + + OpArg rn = MapReg(CurInstr.A_Reg(3)); + if (link) + MOV(32, MapReg(14), Imm32(R15 - 1)); + Comp_JumpTo(rn.GetSimpleReg()); +} + +void Compiler::T_Comp_BL_LONG_1() +{ + s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9; + MOV(32, MapReg(14), Imm32(R15 + offset)); + Comp_AddCycles_C(); +} + +void Compiler::T_Comp_BL_LONG_2() +{ + OpArg lr = MapReg(14); + s32 offset = (CurInstr.Instr & 0x7FF) << 1; + LEA(32, RSCRATCH, MDisp(lr.GetSimpleReg(), offset)); + MOV(32, lr, Imm32((R15 - 2) | 1)); + if (Num == 1 || CurInstr.Instr & (1 << 12)) + OR(32, R(RSCRATCH), Imm8(1)); + Comp_JumpTo(RSCRATCH); +} + +void Compiler::T_Comp_BL_Merged(FetchedInstr part1) +{ + assert(part1.Info.Kind == ARMInstrInfo::tk_BL_LONG_1); + Comp_AddCycles_C(); + + u32 target = (R15 - 2) + ((s32)((part1.Instr & 0x7FF) << 21) >> 9); + target += (CurInstr.Instr & 0x7FF) << 1; + + if (Num == 1 || CurInstr.Instr & (1 << 12)) + target |= 1; + + MOV(32, MapReg(14), Imm32((R15 - 2) | 1)); + + Comp_JumpTo(target); +} + +} \ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.cpp b/src/ARMJIT_x64/ARMJIT_Compiler.cpp index 4fe0c703..6799a90b 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.cpp +++ b/src/ARMJIT_x64/ARMJIT_Compiler.cpp @@ -50,50 +50,6 @@ Compiler::Compiler() ResetStart = GetWritableCodePtr(); } -void* Compiler::Gen_ChangeCPSRRoutine() -{ - void* res = (void*)GetWritableCodePtr(); - - MOV(32, R(RSCRATCH), R(RCPSR)); - AND(32, R(RSCRATCH), Imm8(0x1F)); - CMP(32, R(RSCRATCH), Imm8(0x11)); - FixupBranch fiq = J_CC(CC_E); - CMP(32, R(RSCRATCH), Imm8(0x12)); - FixupBranch irq = J_CC(CC_E); - CMP(32, R(RSCRATCH), Imm8(0x13)); - FixupBranch svc = J_CC(CC_E); - CMP(32, R(RSCRATCH), Imm8(0x17)); - FixupBranch abt = J_CC(CC_E); - CMP(32, R(RSCRATCH), Imm8(0x1B)); - FixupBranch und = J_CC(CC_E); - - SetJumpTarget(fiq); - - SetJumpTarget(irq); - - SetJumpTarget(svc); - - SetJumpTarget(abt); - - SetJumpTarget(und); - - return res; -} - -DataRegion Compiler::ClassifyAddress(u32 addr) -{ - if (Num == 0 && addr >= ((ARMv5*)CurCPU)->DTCMBase && addr < ((ARMv5*)CurCPU)->DTCMBase) - return dataRegionDTCM; - switch (addr & 0xFF000000) - { - case 0x02000000: return dataRegionMainRAM; - case 0x03000000: return Num == 1 && (addr & 0xF00000) == 0x800000 ? dataRegionWRAM7 : dataRegionSWRAM; - case 0x04000000: return dataRegionIO; - case 0x06000000: return dataRegionVRAM; - } - return dataRegionGeneric; -} - void Compiler::LoadCPSR() { assert(!CPSRDirty); @@ -123,6 +79,29 @@ void Compiler::SaveReg(int reg, X64Reg nativeReg) MOV(32, MDisp(RCPU, offsetof(ARM, R[reg])), R(nativeReg)); } +// invalidates RSCRATCH and RSCRATCH3 +Gen::FixupBranch Compiler::CheckCondition(u32 cond) +{ + if (cond >= 0x8) + { + static_assert(RSCRATCH3 == ECX); + MOV(32, R(RSCRATCH3), R(RCPSR)); + SHR(32, R(RSCRATCH3), Imm8(28)); + MOV(32, R(RSCRATCH), Imm32(1)); + SHL(32, R(RSCRATCH), R(RSCRATCH3)); + TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond])); + + return J_CC(CC_Z); + } + else + { + // could have used a LUT, but then where would be the fun? + TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1))))); + + return J_CC(cond & 1 ? CC_NZ : CC_Z); + } +} + CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount) { if (IsAlmostFull()) @@ -140,6 +119,8 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs CodeRegion = cpu->CodeRegion; CurCPU = cpu; + bool mergedThumbBL = false; + ABI_PushRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS}, 8, 16); MOV(64, R(RCPU), ImmPtr(cpu)); @@ -167,17 +148,10 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(CurInstr.NextInstr[1])); } - if (comp == NULL || CurInstr.Info.Branches()) + if (comp == NULL) SaveCPSR(); } - - // run interpreter - cpu->CodeCycles = CurInstr.CodeCycles; - cpu->R[15] = R15; - cpu->CurInstr = CurInstr.Instr; - cpu->NextInstr[0] = CurInstr.NextInstr[0]; - cpu->NextInstr[1] = CurInstr.NextInstr[1]; - + if (comp != NULL) RegCache.Prepare(i); else @@ -185,58 +159,44 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs if (Thumb) { - u32 icode = (CurInstr.Instr >> 6) & 0x3FF; - if (comp == NULL) - { - MOV(64, R(ABI_PARAM1), R(RCPU)); - - ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]); - } + if (i < instrsCount - 1 && CurInstr.Info.Kind == ARMInstrInfo::tk_BL_LONG_1 + && instrs[i + 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_2) + mergedThumbBL = true; else - (this->*comp)(); + { + u32 icode = (CurInstr.Instr >> 6) & 0x3FF; + if (comp == NULL) + { + MOV(64, R(ABI_PARAM1), R(RCPU)); - ARMInterpreter::THUMBInstrTable[icode](cpu); + ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]); + } + else if (mergedThumbBL) + T_Comp_BL_Merged(instrs[i - 1]); + else + (this->*comp)(); + } } else { u32 cond = CurInstr.Cond(); if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM) { - MOV(64, R(ABI_PARAM1), R(RCPU)); - ABI_CallFunction(ARMInterpreter::A_BLX_IMM); - - ARMInterpreter::A_BLX_IMM(cpu); + if (comp) + (this->*comp)(); + else + { + MOV(64, R(ABI_PARAM1), R(RCPU)); + ABI_CallFunction(ARMInterpreter::A_BLX_IMM); + } } else if (cond == 0xF) - { Comp_AddCycles_C(); - cpu->AddCycles_C(); - } else { FixupBranch skipExecute; if (cond < 0xE) - { - if (cond >= 0x8) - { - static_assert(RSCRATCH3 == ECX); - MOV(32, R(RSCRATCH3), R(RCPSR)); - SHR(32, R(RSCRATCH3), Imm8(28)); - MOV(32, R(RSCRATCH), Imm32(1)); - SHL(32, R(RSCRATCH), R(RSCRATCH3)); - TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond])); - - skipExecute = J_CC(CC_Z); - } - else - { - // could have used a LUT, but then where would be the fun? - TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1))))); - - skipExecute = J_CC(cond & 1 ? CC_NZ : CC_Z); - } - - } + skipExecute = CheckCondition(cond); u32 icode = ((CurInstr.Instr >> 4) & 0xF) | ((CurInstr.Instr >> 16) & 0xFF0); if (comp == NULL) @@ -258,19 +218,9 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs SetJumpTarget(skipFailed); } - - if (cpu->CheckCondition(cond)) - ARMInterpreter::ARMInstrTable[icode](cpu); - else - cpu->AddCycles_C(); } } - /* - we don't need to collect the interpreted cycles, - since cpu->Cycles is taken into account by the dispatcher. - */ - if (comp == NULL && i != instrsCount - 1) LoadCPSR(); } @@ -367,7 +317,7 @@ CompileFunc Compiler::GetCompFunc(int kind) // LDM/STM NULL, NULL, // Branch - NULL, NULL, NULL, NULL, NULL, + A_Comp_BranchImm, A_Comp_BranchImm, A_Comp_BranchImm, A_Comp_BranchXchangeReg, A_Comp_BranchXchangeReg, // system stuff NULL, NULL, NULL, NULL, NULL, NULL, NULL, }; @@ -389,7 +339,7 @@ CompileFunc Compiler::GetCompFunc(int kind) // pc/sp relative T_Comp_RelAddr, T_Comp_RelAddr, T_Comp_AddSP, // LDR pcrel - NULL, + T_Comp_LoadPCRel, // LDR/STR reg offset T_Comp_MemReg, T_Comp_MemReg, T_Comp_MemReg, T_Comp_MemReg, // LDR/STR sign extended, half @@ -399,25 +349,27 @@ CompileFunc Compiler::GetCompFunc(int kind) // LDR/STR half imm offset T_Comp_MemImmHalf, T_Comp_MemImmHalf, // LDR/STR sp rel - NULL, NULL, + T_Comp_MemSPRel, T_Comp_MemSPRel, // PUSH/POP - NULL, NULL, + T_Comp_PUSH_POP, T_Comp_PUSH_POP, // LDMIA, STMIA - NULL, NULL, - NULL, NULL, - NULL, NULL, NULL, NULL, NULL, NULL + T_Comp_LDMIA_STMIA, T_Comp_LDMIA_STMIA, + // Branch + T_Comp_BCOND, T_Comp_BranchXchangeReg, T_Comp_BranchXchangeReg, T_Comp_B, T_Comp_BL_LONG_1, T_Comp_BL_LONG_2, + // Unk, SVC + NULL, NULL }; return Thumb ? T_Comp[kind] : A_Comp[kind]; } -void Compiler::Comp_AddCycles_C() +void Compiler::Comp_AddCycles_C(bool forceNonConstant) { s32 cycles = Num ? NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3] : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles); - if (CurInstr.Cond() < 0xE) + if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant) ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; @@ -429,25 +381,10 @@ void Compiler::Comp_AddCycles_CI(u32 i) NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2] : ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i; - if (CurInstr.Cond() < 0xE) + if (!Thumb && CurInstr.Cond() < 0xE) ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles)); else ConstantCycles += cycles; } -void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR) -{ - // potentieller Bug: falls ein Register das noch gecacht ist, beim Modeswitch gespeichert - // wird der alte Wert gespeichert - SaveCPSR(); - - MOV(64, R(ABI_PARAM1), R(RCPU)); - MOV(32, R(ABI_PARAM2), R(addr)); - MOV(32, R(ABI_PARAM3), Imm32(restoreCPSR)); - if (Num == 0) - CALL((void*)&ARMv5::JumpTo); - else - CALL((void*)&ARMv4::JumpTo); -} - } \ No newline at end of file diff --git a/src/ARMJIT_x64/ARMJIT_Compiler.h b/src/ARMJIT_x64/ARMJIT_Compiler.h index a751737c..45b488a6 100644 --- a/src/ARMJIT_x64/ARMJIT_Compiler.h +++ b/src/ARMJIT_x64/ARMJIT_Compiler.h @@ -22,19 +22,6 @@ class Compiler; typedef void (Compiler::*CompileFunc)(); -enum DataRegion -{ - dataRegionGeneric, // hey, that's me! - dataRegionMainRAM, - dataRegionSWRAM, - dataRegionVRAM, - dataRegionIO, - dataRegionExclusive, - dataRegionsCount, - dataRegionDTCM = dataRegionExclusive, - dataRegionWRAM7 = dataRegionExclusive, -}; - class Compiler : public Gen::X64CodeBlock { public: @@ -49,8 +36,9 @@ private: CompileFunc GetCompFunc(int kind); void Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR = false); + void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false); - void Comp_AddCycles_C(); + void Comp_AddCycles_C(bool forceNonConstant = false); void Comp_AddCycles_CI(u32 i); enum @@ -63,8 +51,6 @@ private: opInvertOp2 = 1 << 5, }; - DataRegion ClassifyAddress(u32 addr); - void A_Comp_Arith(); void A_Comp_MovOp(); void A_Comp_CmpOp(); @@ -73,6 +59,9 @@ private: void A_Comp_MemHalf(); void A_Comp_LDM_STM(); + void A_Comp_BranchImm(); + void A_Comp_BranchXchangeReg(); + void T_Comp_ShiftImm(); void T_Comp_AddSub_(); void T_Comp_ALU_Imm8(); @@ -91,6 +80,13 @@ private: void T_Comp_PUSH_POP(); void T_Comp_LDMIA_STMIA(); + void T_Comp_BCOND(); + void T_Comp_B(); + void T_Comp_BranchXchangeReg(); + void T_Comp_BL_LONG_1(); + void T_Comp_BL_LONG_2(); + void T_Comp_BL_Merged(FetchedInstr prefix); + void Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size); s32 Comp_MemAccessBlock(Gen::OpArg rb, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode); @@ -119,6 +115,8 @@ private: void LoadCPSR(); void SaveCPSR(); + Gen::FixupBranch CheckCondition(u32 cond); + Gen::OpArg MapReg(int reg) { if (reg == 15 && RegCache.Mapping[reg] == Gen::INVALID_REG) diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp index 20e18931..69b324ce 100644 --- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp @@ -462,38 +462,10 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei { int regsCount = regs.Count(); - const u8 userModeOffsets[] = - { - offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]), - offsetof(ARM, R[12]), offsetof(ARM, R[13]), offsetof(ARM, R[14]), 0, - - offsetof(ARM, R_FIQ[0]), offsetof(ARM, R_FIQ[1]), offsetof(ARM, R_FIQ[2]), offsetof(ARM, R_FIQ[3]), - offsetof(ARM, R_FIQ[4]), offsetof(ARM, R_FIQ[5]), offsetof(ARM, R_FIQ[6]), 0, - - offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]), - offsetof(ARM, R[12]), offsetof(ARM, R_IRQ[13]), offsetof(ARM, R_IRQ[14]), 0, - - offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]), - offsetof(ARM, R[12]), offsetof(ARM, R_SVC[13]), offsetof(ARM, R_SVC[14]), 0, - - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - - offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]), - offsetof(ARM, R[12]), offsetof(ARM, R_ABT[13]), offsetof(ARM, R_ABT[14]), 0, - - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - - offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]), - offsetof(ARM, R[12]), offsetof(ARM, R_UND[13]), offsetof(ARM, R_UND[14]), 0, - }; - if (decrement) { MOV_sum(32, ABI_PARAM1, rb, Imm32(-regsCount * 4)); - preinc = !preinc; + preinc ^= true; } else MOV(32, R(ABI_PARAM1), rb); @@ -516,16 +488,16 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei { if (regs[reg]) { - if (usermode && reg >= 8 && reg < 15) + /*if (usermode && reg >= 8 && reg < 15) { MOV(32, R(RSCRATCH2), R(RCPSR)); AND(32, R(RSCRATCH2), Imm8(0x1F)); // (RSCRATCH2 - 0x11) * 8 + squeezePointer(userModeOffsets) + (reg - 8), algebra is great! - MOVZX(32, 8, RSCRATCH2, MScaled(RSCRATCH2, SCALE_8, squeezePointer(userModeOffsets) - 0x11 * 8 + (reg - 8))); + MOVZX(32, 8, RSCRATCH2, MScaled(RSCRATCH2, SCALE_8, squeezePointer(userModeOffsets) - 0x10 * 8 + (reg - 8))); POP(RSCRATCH); MOV(32, MRegSum(RCPU, RSCRATCH2), R(RSCRATCH)); } - else if (RegCache.Mapping[reg] == INVALID_REG) + else */if (RegCache.Mapping[reg] == INVALID_REG) { assert(reg != 15); @@ -552,16 +524,16 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei { for (int reg : regs) { - if (usermode && reg >= 8 && reg < 15) + /*if (usermode && reg >= 8 && reg < 15) { MOV(32, R(RSCRATCH), R(RCPSR)); AND(32, R(RSCRATCH), Imm8(0x1F)); // (RSCRATCH2 - 0x11) * 8 + squeezePointer(userModeOffsets) + (reg - 8), algebra is great! - MOVZX(32, 8, RSCRATCH, MScaled(RSCRATCH, SCALE_8, squeezePointer(userModeOffsets) - 0x11 * 8 + (reg - 8))); + MOVZX(32, 8, RSCRATCH, MScaled(RSCRATCH, SCALE_8, squeezePointer(userModeOffsets) - 0x10 * 8 + (reg - 8))); MOV(32, R(RSCRATCH), MRegSum(RCPU, RSCRATCH)); PUSH(RSCRATCH); } - else if (RegCache.Mapping[reg] == INVALID_REG) + else */if (RegCache.Mapping[reg] == INVALID_REG) { LoadReg(reg, RSCRATCH); PUSH(RSCRATCH); diff --git a/src/ARM_InstrInfo.cpp b/src/ARM_InstrInfo.cpp index c5192299..b8dff00b 100644 --- a/src/ARM_InstrInfo.cpp +++ b/src/ARM_InstrInfo.cpp @@ -255,7 +255,7 @@ const u32 T_STMIA = T_Read8 | T_Write8 | tk(tk_STMIA); const u32 T_BCOND = T_BranchAlways | tk(tk_BCOND); const u32 T_BX = T_BranchAlways | T_ReadHi3 | tk(tk_BX); -const u32 T_BLX_REG = T_BranchAlways | T_ReadR15 | T_WriteR14 | T_ReadHi3 | tk(tk_BLX_REG); +const u32 T_BLX_REG = T_BranchAlways | T_WriteR14 | T_ReadHi3 | tk(tk_BLX_REG); const u32 T_B = T_BranchAlways | tk(tk_B); const u32 T_BL_LONG_1 = T_WriteR14 | T_ReadR15 | tk(tk_BL_LONG_1); const u32 T_BL_LONG_2 = T_BranchAlways | T_ReadR14 | T_WriteR14 | T_ReadR15 | tk(tk_BL_LONG_2); @@ -301,6 +301,10 @@ Info Decode(bool thumb, u32 num, u32 instr) res.DstRegs |= (1 << 13); if (data & T_ReadR15) res.SrcRegs |= (1 << 15); + if (data & T_WriteR14) + res.DstRegs |= (1 << 14); + if (data & T_ReadR14) + res.SrcRegs |= (1 << 14); if (data & T_BranchAlways) res.DstRegs |= (1 << 15); diff --git a/src/ARM_InstrInfo.h b/src/ARM_InstrInfo.h index dcd938bc..51dcfa25 100644 --- a/src/ARM_InstrInfo.h +++ b/src/ARM_InstrInfo.h @@ -202,6 +202,7 @@ enum tk_POP, tk_LDMIA, tk_STMIA, + tk_BCOND, tk_BX, tk_BLX_REG, diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 662ed5c7..94012208 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -35,6 +35,7 @@ add_library(core STATIC ARMJIT_x64/ARMJIT_Compiler.cpp ARMJIT_x64/ARMJIT_ALU.cpp ARMJIT_x64/ARMJIT_LoadStore.cpp + ARMJIT_x64/ARMJIT_Branch.cpp dolphin/CommonFuncs.cpp dolphin/x64ABI.cpp