melonDS/src/ARMJIT_A64/ARMJIT_Branch.cpp

452 lines
10 KiB
C++

#include "ARMJIT_Compiler.h"
using namespace Arm64Gen;
// hack
const int kCodeCacheTiming = 3;
namespace ARMJIT
{
template <typename T>
void jumpToTrampoline(T* cpu, u32 addr, bool changeCPSR)
{
cpu->JumpTo(addr, changeCPSR);
}
void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
{
// we can simplify constant branches by a lot
// it's not completely safe to assume stuff like, which instructions to preload
// we'll see how it works out
IrregularCycles = true;
u32 newPC;
u32 cycles = 0;
bool setupRegion = false;
if (addr & 0x1 && !Thumb)
{
CPSRDirty = true;
ORRI2R(RCPSR, RCPSR, 0x20);
}
else if (!(addr & 0x1) && Thumb)
{
CPSRDirty = true;
ANDI2R(RCPSR, RCPSR, ~0x20);
}
if (Num == 0)
{
ARMv5* cpu9 = (ARMv5*)CurCPU;
u32 oldregion = R15 >> 24;
u32 newregion = addr >> 24;
u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
u32 compileTimeCodeCycles = cpu9->RegionCodeCycles;
cpu9->RegionCodeCycles = regionCodeCycles;
MOVI2R(W0, regionCodeCycles);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, RegionCodeCycles));
setupRegion = newregion != oldregion;
if (setupRegion)
cpu9->SetupCodeMem(addr);
if (addr & 0x1)
{
addr &= ~0x1;
newPC = addr+2;
// two-opcodes-at-once fetch
// doesn't matter if we put garbage in the MSbs there
if (addr & 0x2)
{
cpu9->CodeRead32(addr-2, true) >> 16;
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+2, false);
cycles += CurCPU->CodeCycles;
}
else
{
cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
}
}
else
{
addr &= ~0x3;
newPC = addr+4;
cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
cpu9->CodeRead32(addr+4, false);
cycles += cpu9->CodeCycles;
}
cpu9->RegionCodeCycles = compileTimeCodeCycles;
if (setupRegion)
cpu9->SetupCodeMem(R15);
}
else
{
ARMv4* cpu7 = (ARMv4*)CurCPU;
u32 codeRegion = addr >> 24;
u32 codeCycles = addr >> 15; // cheato
cpu7->CodeRegion = codeRegion;
cpu7->CodeCycles = codeCycles;
MOVI2R(W0, codeRegion);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeRegion));
MOVI2R(W0, codeCycles);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, CodeCycles));
if (addr & 0x1)
{
addr &= ~0x1;
newPC = addr+2;
// this is necessary because ARM7 bios protection
u32 compileTimePC = CurCPU->R[15];
CurCPU->R[15] = newPC;
cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1];
CurCPU->R[15] = compileTimePC;
}
else
{
addr &= ~0x3;
newPC = addr+4;
u32 compileTimePC = CurCPU->R[15];
CurCPU->R[15] = newPC;
cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3];
CurCPU->R[15] = compileTimePC;
}
cpu7->CodeRegion = R15 >> 24;
cpu7->CodeCycles = addr >> 15;
}
if (Exit)
{
MOVI2R(W0, newPC);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, R[15]));
}
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
ConstantCycles += cycles;
else
ADD(RCycles, RCycles, cycles);
}
void* Compiler::Gen_JumpTo9(int kind)
{
AlignCode16();
void* res = GetRXPtr();
MOVI2R(W2, kCodeCacheTiming);
// W1 - code cycles non branch
// W2 - branch code cycles
LSR(W1, W0, 12);
LSL(W1, W1, 2);
ADDI2R(W1, W1, offsetof(ARMv5, MemTimings), W2);
LDRB(W1, RCPU, W1);
LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, ITCMSize));
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARMv5, RegionCodeCycles));
CMP(W0, W3);
FixupBranch outsideITCM = B(CC_LO);
MOVI2R(W1, 1);
MOVI2R(W2, 1);
SetJumpTarget(outsideITCM);
FixupBranch switchToThumb;
if (kind == 0)
switchToThumb = TBNZ(W0, 0);
if (kind == 0 || kind == 1)
{
ANDI2R(W0, W0, ~3);
if (kind == 0)
ANDI2R(RCPSR, RCPSR, ~0x20);
ADD(W3, W0, 4);
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
ADD(W1, W1, W2);
ADD(RCycles, RCycles, W1);
RET();
}
if (kind == 0 || kind == 2)
{
if (kind == 0)
{
SetJumpTarget(switchToThumb);
ORRI2R(RCPSR, RCPSR, 0x20);
}
ANDI2R(W0, W0, ~1);
ADD(W3, W0, 2);
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
FixupBranch halfwordLoc = TBZ(W0, 1);
ADD(W1, W1, W2);
ADD(RCycles, RCycles, W1);
RET();
SetJumpTarget(halfwordLoc);
ADD(RCycles, RCycles, W2);
RET();
}
return res;
}
void* Compiler::Gen_JumpTo7(int kind)
{
void* res = GetRXPtr();
LSR(W1, W0, 24);
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeRegion));
LSR(W1, W0, 15);
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARM, CodeCycles));
MOVP2R(X2, NDS::ARM7MemTimings);
LDR(W3, X2, ArithOption(W1, true));
FixupBranch switchToThumb;
if (kind == 0)
switchToThumb = TBNZ(W0, 0);
if (kind == 0 || kind == 1)
{
UBFX(W2, W3, 0, 8);
UBFX(W3, W3, 8, 8);
ADD(W2, W3, W2);
ADD(RCycles, RCycles, W2);
ANDI2R(W0, W0, ~3);
if (kind == 0)
ANDI2R(RCPSR, RCPSR, ~0x20);
ADD(W3, W0, 4);
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
RET();
}
if (kind == 0 || kind == 2)
{
if (kind == 0)
{
SetJumpTarget(switchToThumb);
ORRI2R(RCPSR, RCPSR, 0x20);
}
UBFX(W2, W3, 16, 8);
UBFX(W3, W3, 24, 8);
ADD(W2, W3, W2);
ADD(RCycles, RCycles, W2);
ANDI2R(W0, W0, ~1);
ADD(W3, W0, 2);
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
RET();
}
return res;
}
void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool restoreCPSR)
{
IrregularCycles = true;
if (!restoreCPSR)
{
if (switchThumb)
CPSRDirty = true;
MOV(W0, addr);
BL((Num ? JumpToFuncs7 : JumpToFuncs9)[switchThumb ? 0 : (Thumb + 1)]);
}
else
{
BitSet16 hiRegsLoaded(RegCache.DirtyRegs & 0xFF00);
bool previouslyDirty = CPSRDirty;
SaveCPSR();
if (restoreCPSR)
{
if (Thumb || CurInstr.Cond() >= 0xE)
RegCache.Flush();
else
{
// the ugly way...
// we only save them, to load and save them again
for (int reg : hiRegsLoaded)
SaveReg(reg, RegCache.Mapping[reg]);
}
}
if (switchThumb)
MOV(W1, addr);
else
{
if (Thumb)
ORRI2R(W1, addr, 1);
else
ANDI2R(W1, addr, ~1);
}
MOV(X0, RCPU);
MOVI2R(W2, restoreCPSR);
if (Num == 0)
QuickCallFunction(X3, jumpToTrampoline<ARMv5>);
else
QuickCallFunction(X3, jumpToTrampoline<ARMv4>);
if (!Thumb && restoreCPSR && CurInstr.Cond() < 0xE)
{
for (int reg : hiRegsLoaded)
LoadReg(reg, RegCache.Mapping[reg]);
}
if (previouslyDirty)
LoadCPSR();
CPSRDirty = previouslyDirty;
}
}
void Compiler::A_Comp_BranchImm()
{
int op = (CurInstr.Instr >> 24) & 1;
s32 offset = (s32)(CurInstr.Instr << 8) >> 6;
u32 target = R15 + offset;
bool link = op;
if (CurInstr.Cond() == 0xF) // BLX_imm
{
target += (op << 1) + 1;
link = true;
}
if (link)
MOVI2R(MapReg(14), R15 - 4);
Comp_JumpTo(target);
}
void Compiler::A_Comp_BranchXchangeReg()
{
ARM64Reg rn = MapReg(CurInstr.A_Reg(0));
MOV(W0, rn);
if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg
MOVI2R(MapReg(14), R15 - 4);
Comp_JumpTo(W0, true);
}
void Compiler::T_Comp_BCOND()
{
u32 cond = (CurInstr.Instr >> 8) & 0xF;
FixupBranch skipExecute = CheckCondition(cond);
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
Comp_JumpTo(R15 + offset + 1, true);
Comp_BranchSpecialBehaviour();
FixupBranch skipFailed = B();
SetJumpTarget(skipExecute);
Comp_AddCycles_C(true);
if (CurInstr.BranchFlags & branch_FollowCondTaken)
{
SaveCPSR(false);
RegCache.PrepareExit();
ADD(W0, RCycles, ConstantCycles);
ABI_PopRegisters(SavedRegs);
RET();
}
SetJumpTarget(skipFailed);
}
void Compiler::T_Comp_B()
{
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20;
Comp_JumpTo(R15 + offset + 1);
}
void Compiler::T_Comp_BranchXchangeReg()
{
bool link = CurInstr.Instr & (1 << 7);
if (link)
{
if (Num == 1)
{
printf("BLX unsupported on ARM7!!!\n");
return;
}
MOV(W0, MapReg(CurInstr.A_Reg(3)));
MOVI2R(MapReg(14), R15 - 1);
Comp_JumpTo(W0, true);
}
else
{
ARM64Reg rn = MapReg(CurInstr.A_Reg(3));
Comp_JumpTo(rn, true);
}
}
void Compiler::T_Comp_BL_LONG_1()
{
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9;
MOVI2R(MapReg(14), R15 + offset);
Comp_AddCycles_C();
}
void Compiler::T_Comp_BL_LONG_2()
{
ARM64Reg lr = MapReg(14);
s32 offset = (CurInstr.Instr & 0x7FF) << 1;
ADD(W0, lr, offset);
MOVI2R(lr, (R15 - 2) | 1);
Comp_JumpTo(W0, Num == 0 && !(CurInstr.Instr & (1 << 12)));
}
void Compiler::T_Comp_BL_Merged()
{
Comp_AddCycles_C();
R15 += 2;
u32 upperPart = CurInstr.Instr >> 16;
u32 target = (R15 - 2) + ((s32)((CurInstr.Instr & 0x7FF) << 21) >> 9);
target += (upperPart & 0x7FF) << 1;
if (Num == 1 || upperPart & (1 << 12))
target |= 1;
MOVI2R(MapReg(14), (R15 - 2) | 1);
Comp_JumpTo(target);
}
}