jit: branch instructions

This commit is contained in:
RSDuck 2019-07-11 16:22:47 +02:00
parent 27cbc821b1
commit 83bd863361
10 changed files with 364 additions and 188 deletions

View File

@ -564,11 +564,8 @@ void ARMv5::Execute()
printf("aaarg ungempappter raum %x\n", R[15]);*/
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(0, R[15] - ((CPSR&0x20)?2:4));
if (block == NULL)
ARMJIT::CompileBlock(this);
else
Cycles += block();
Cycles += (block ? block : ARMJIT::CompileBlock(this))();
// TODO optimize this shit!!!
if (Halted)
{
@ -650,10 +647,7 @@ void ARMv4::Execute()
printf("aaarg ungempappter raum %x\n", R[15]);*/
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(1, R[15] - ((CPSR&0x20)?2:4));
if (block == NULL)
ARMJIT::CompileBlock(this);
else
Cycles += block();
Cycles += (block ? block : ARMJIT::CompileBlock(this))();
// TODO optimize this shit!!!
if (Halted)

View File

@ -121,7 +121,7 @@ void DeInit()
delete compiler;
}
void CompileBlock(ARM* cpu)
CompiledBlock CompileBlock(ARM* cpu)
{
bool thumb = cpu->CPSR & 0x20;
@ -171,6 +171,8 @@ void CompileBlock(ARM* cpu)
CompiledBlock block = compiler->CompileBlock(cpu, instrs, i);
InsertBlock(cpu->Num, r15Initial - (thumb ? 2 : 4), block);
return block;
}
void ResetBlocks()

View File

@ -109,7 +109,7 @@ inline void InsertBlock(u32 num, u32 addr, CompiledBlock func)
void Init();
void DeInit();
void CompileBlock(ARM* cpu);
CompiledBlock CompileBlock(ARM* cpu);
void ResetBlocks();

View File

@ -0,0 +1,267 @@
#include "ARMJIT_Compiler.h"
using namespace Gen;
namespace ARMJIT
{
void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
{
// we can simplify constant branches by a lot
// it's not completely safe to assume stuff like, which instructions to preload
// we'll see how it works out
u32 newPC;
u32 nextInstr[2];
u32 cycles = 0;
bool setupRegion = false;
if (addr & 0x1 && !Thumb)
{
CPSRDirty = true;
OR(32, R(RCPSR), Imm8(0x20));
}
else if (!(addr & 0x1) && Thumb)
{
CPSRDirty = true;
AND(32, R(RCPSR), Imm32(~0x20));
}
if (Num == 0)
{
ARMv5* cpu9 = (ARMv5*)CurCPU;
u32 oldregion = R15 >> 24;
u32 newregion = addr >> 24;
u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
cpu9->RegionCodeCycles = regionCodeCycles;
MOV(32, MDisp(RCPU, offsetof(ARMv5, RegionCodeCycles)), Imm32(regionCodeCycles));
setupRegion = newregion != oldregion;
if (setupRegion)
cpu9->SetupCodeMem(addr);
if (addr & 0x1)
{
addr &= ~0x1;
newPC = addr+2;
// two-opcodes-at-once fetch
// doesn't matter if we put garbage in the MSbs there
if (addr & 0x2)
{
nextInstr[0] = cpu9->CodeRead32(addr-2, true) >> 16;
cycles += CurCPU->CodeCycles;
nextInstr[1] = cpu9->CodeRead32(addr+2, false);
cycles += CurCPU->CodeCycles;
}
else
{
nextInstr[0] = cpu9->CodeRead32(addr, true);
nextInstr[1] = nextInstr[0] >> 16;
cycles += CurCPU->CodeCycles;
}
}
else
{
addr &= ~0x3;
newPC = addr+4;
nextInstr[0] = cpu9->CodeRead32(addr, true);
cycles += cpu9->CodeCycles;
nextInstr[1] = cpu9->CodeRead32(addr+4, false);
cycles += cpu9->CodeCycles;
}
}
else
{
ARMv4* cpu7 = (ARMv4*)CurCPU;
u32 codeRegion = addr >> 24;
u32 codeCycles = addr >> 15; // cheato
cpu7->CodeRegion = codeRegion;
cpu7->CodeCycles = codeCycles;
MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeRegion));
MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeCycles));
if (addr & 0x1)
{
addr &= ~0x1;
newPC = addr+2;
nextInstr[0] = ((ARMv4*)CurCPU)->CodeRead16(addr);
nextInstr[1] = ((ARMv4*)CurCPU)->CodeRead16(addr+2);
cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1];
}
else
{
addr &= ~0x3;
newPC = addr+4;
nextInstr[0] = cpu7->CodeRead32(addr);
nextInstr[1] = cpu7->CodeRead32(addr+4);
cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3];
}
}
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(newPC));
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[0])), Imm32(nextInstr[0]));
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(nextInstr[1]));
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
ConstantCycles += cycles;
else
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
if (setupRegion)
{
MOV(32, R(ABI_PARAM1), R(RCPU));
MOV(32, R(ABI_PARAM2), Imm32(newPC));
CALL((void*)&ARMv5::SetupCodeMem);
}
}
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
{
BitSet16 hiRegsLoaded(RegCache.DirtyRegs & 0xFFFF0000);
bool previouslyDirty = CPSRDirty;
SaveCPSR();
if (restoreCPSR)
{
if (Thumb || CurInstr.Cond() >= 0xE)
{
for (int reg : hiRegsLoaded)
RegCache.UnloadRegister(reg);
}
else
{
// the ugly way...
// we only save them, to load and save them again
for (int reg : hiRegsLoaded)
SaveReg(reg, RegCache.Mapping[reg]);
}
}
MOV(64, R(ABI_PARAM1), R(RCPU));
MOV(32, R(ABI_PARAM2), R(addr));
if (!restoreCPSR)
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
else
MOV(32, R(ABI_PARAM3), Imm32(restoreCPSR));
if (Num == 0)
CALL((void*)&ARMv5::JumpTo);
else
CALL((void*)&ARMv4::JumpTo);
if (!Thumb && restoreCPSR && CurInstr.Cond() < 0xE)
{
for (int reg : hiRegsLoaded)
LoadReg(reg, RegCache.Mapping[reg]);
}
if (previouslyDirty)
LoadCPSR();
CPSRDirty = previouslyDirty;
}
void Compiler::A_Comp_BranchImm()
{
int op = (CurInstr.Instr >> 24) & 1;
s32 offset = (s32)(CurInstr.Instr << 8) >> 6;
u32 target = R15 + offset;
bool link = op;
if (CurInstr.Cond() == 0xF) // BLX_imm
{
target += (op << 1) + 1;
link = true;
}
if (link)
MOV(32, MapReg(14), Imm32(R15 - 4));
Comp_JumpTo(target);
}
void Compiler::A_Comp_BranchXchangeReg()
{
OpArg rn = MapReg(CurInstr.A_Reg(0));
if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg
MOV(32, MapReg(14), Imm32(R15 - 4));
Comp_JumpTo(rn.GetSimpleReg());
}
void Compiler::T_Comp_BCOND()
{
u32 cond = (CurInstr.Instr >> 8) & 0xF;
FixupBranch skipExecute = CheckCondition(cond);
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
Comp_JumpTo(R15 + offset + 1, true);
FixupBranch skipFailed = J();
SetJumpTarget(skipExecute);
Comp_AddCycles_C(true);
SetJumpTarget(skipFailed);
}
void Compiler::T_Comp_B()
{
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20;
Comp_JumpTo(R15 + offset + 1);
}
void Compiler::T_Comp_BranchXchangeReg()
{
bool link = CurInstr.Instr & (1 << 7);
if (link && Num == 1)
{
printf("BLX unsupported on ARM7!!!\n");
return;
}
OpArg rn = MapReg(CurInstr.A_Reg(3));
if (link)
MOV(32, MapReg(14), Imm32(R15 - 1));
Comp_JumpTo(rn.GetSimpleReg());
}
void Compiler::T_Comp_BL_LONG_1()
{
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9;
MOV(32, MapReg(14), Imm32(R15 + offset));
Comp_AddCycles_C();
}
void Compiler::T_Comp_BL_LONG_2()
{
OpArg lr = MapReg(14);
s32 offset = (CurInstr.Instr & 0x7FF) << 1;
LEA(32, RSCRATCH, MDisp(lr.GetSimpleReg(), offset));
MOV(32, lr, Imm32((R15 - 2) | 1));
if (Num == 1 || CurInstr.Instr & (1 << 12))
OR(32, R(RSCRATCH), Imm8(1));
Comp_JumpTo(RSCRATCH);
}
void Compiler::T_Comp_BL_Merged(FetchedInstr part1)
{
assert(part1.Info.Kind == ARMInstrInfo::tk_BL_LONG_1);
Comp_AddCycles_C();
u32 target = (R15 - 2) + ((s32)((part1.Instr & 0x7FF) << 21) >> 9);
target += (CurInstr.Instr & 0x7FF) << 1;
if (Num == 1 || CurInstr.Instr & (1 << 12))
target |= 1;
MOV(32, MapReg(14), Imm32((R15 - 2) | 1));
Comp_JumpTo(target);
}
}

View File

@ -50,50 +50,6 @@ Compiler::Compiler()
ResetStart = GetWritableCodePtr();
}
void* Compiler::Gen_ChangeCPSRRoutine()
{
void* res = (void*)GetWritableCodePtr();
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
CMP(32, R(RSCRATCH), Imm8(0x11));
FixupBranch fiq = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x12));
FixupBranch irq = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x13));
FixupBranch svc = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x17));
FixupBranch abt = J_CC(CC_E);
CMP(32, R(RSCRATCH), Imm8(0x1B));
FixupBranch und = J_CC(CC_E);
SetJumpTarget(fiq);
SetJumpTarget(irq);
SetJumpTarget(svc);
SetJumpTarget(abt);
SetJumpTarget(und);
return res;
}
DataRegion Compiler::ClassifyAddress(u32 addr)
{
if (Num == 0 && addr >= ((ARMv5*)CurCPU)->DTCMBase && addr < ((ARMv5*)CurCPU)->DTCMBase)
return dataRegionDTCM;
switch (addr & 0xFF000000)
{
case 0x02000000: return dataRegionMainRAM;
case 0x03000000: return Num == 1 && (addr & 0xF00000) == 0x800000 ? dataRegionWRAM7 : dataRegionSWRAM;
case 0x04000000: return dataRegionIO;
case 0x06000000: return dataRegionVRAM;
}
return dataRegionGeneric;
}
void Compiler::LoadCPSR()
{
assert(!CPSRDirty);
@ -123,6 +79,29 @@ void Compiler::SaveReg(int reg, X64Reg nativeReg)
MOV(32, MDisp(RCPU, offsetof(ARM, R[reg])), R(nativeReg));
}
// invalidates RSCRATCH and RSCRATCH3
Gen::FixupBranch Compiler::CheckCondition(u32 cond)
{
if (cond >= 0x8)
{
static_assert(RSCRATCH3 == ECX);
MOV(32, R(RSCRATCH3), R(RCPSR));
SHR(32, R(RSCRATCH3), Imm8(28));
MOV(32, R(RSCRATCH), Imm32(1));
SHL(32, R(RSCRATCH), R(RSCRATCH3));
TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond]));
return J_CC(CC_Z);
}
else
{
// could have used a LUT, but then where would be the fun?
TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)))));
return J_CC(cond & 1 ? CC_NZ : CC_Z);
}
}
CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount)
{
if (IsAlmostFull())
@ -140,6 +119,8 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
CodeRegion = cpu->CodeRegion;
CurCPU = cpu;
bool mergedThumbBL = false;
ABI_PushRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS}, 8, 16);
MOV(64, R(RCPU), ImmPtr(cpu));
@ -167,17 +148,10 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(CurInstr.NextInstr[1]));
}
if (comp == NULL || CurInstr.Info.Branches())
if (comp == NULL)
SaveCPSR();
}
// run interpreter
cpu->CodeCycles = CurInstr.CodeCycles;
cpu->R[15] = R15;
cpu->CurInstr = CurInstr.Instr;
cpu->NextInstr[0] = CurInstr.NextInstr[0];
cpu->NextInstr[1] = CurInstr.NextInstr[1];
if (comp != NULL)
RegCache.Prepare(i);
else
@ -185,58 +159,44 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
if (Thumb)
{
u32 icode = (CurInstr.Instr >> 6) & 0x3FF;
if (comp == NULL)
{
MOV(64, R(ABI_PARAM1), R(RCPU));
ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]);
}
if (i < instrsCount - 1 && CurInstr.Info.Kind == ARMInstrInfo::tk_BL_LONG_1
&& instrs[i + 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_2)
mergedThumbBL = true;
else
(this->*comp)();
{
u32 icode = (CurInstr.Instr >> 6) & 0x3FF;
if (comp == NULL)
{
MOV(64, R(ABI_PARAM1), R(RCPU));
ARMInterpreter::THUMBInstrTable[icode](cpu);
ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]);
}
else if (mergedThumbBL)
T_Comp_BL_Merged(instrs[i - 1]);
else
(this->*comp)();
}
}
else
{
u32 cond = CurInstr.Cond();
if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
{
MOV(64, R(ABI_PARAM1), R(RCPU));
ABI_CallFunction(ARMInterpreter::A_BLX_IMM);
ARMInterpreter::A_BLX_IMM(cpu);
if (comp)
(this->*comp)();
else
{
MOV(64, R(ABI_PARAM1), R(RCPU));
ABI_CallFunction(ARMInterpreter::A_BLX_IMM);
}
}
else if (cond == 0xF)
{
Comp_AddCycles_C();
cpu->AddCycles_C();
}
else
{
FixupBranch skipExecute;
if (cond < 0xE)
{
if (cond >= 0x8)
{
static_assert(RSCRATCH3 == ECX);
MOV(32, R(RSCRATCH3), R(RCPSR));
SHR(32, R(RSCRATCH3), Imm8(28));
MOV(32, R(RSCRATCH), Imm32(1));
SHL(32, R(RSCRATCH), R(RSCRATCH3));
TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond]));
skipExecute = J_CC(CC_Z);
}
else
{
// could have used a LUT, but then where would be the fun?
TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)))));
skipExecute = J_CC(cond & 1 ? CC_NZ : CC_Z);
}
}
skipExecute = CheckCondition(cond);
u32 icode = ((CurInstr.Instr >> 4) & 0xF) | ((CurInstr.Instr >> 16) & 0xFF0);
if (comp == NULL)
@ -258,19 +218,9 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
SetJumpTarget(skipFailed);
}
if (cpu->CheckCondition(cond))
ARMInterpreter::ARMInstrTable[icode](cpu);
else
cpu->AddCycles_C();
}
}
/*
we don't need to collect the interpreted cycles,
since cpu->Cycles is taken into account by the dispatcher.
*/
if (comp == NULL && i != instrsCount - 1)
LoadCPSR();
}
@ -367,7 +317,7 @@ CompileFunc Compiler::GetCompFunc(int kind)
// LDM/STM
NULL, NULL,
// Branch
NULL, NULL, NULL, NULL, NULL,
A_Comp_BranchImm, A_Comp_BranchImm, A_Comp_BranchImm, A_Comp_BranchXchangeReg, A_Comp_BranchXchangeReg,
// system stuff
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
@ -389,7 +339,7 @@ CompileFunc Compiler::GetCompFunc(int kind)
// pc/sp relative
T_Comp_RelAddr, T_Comp_RelAddr, T_Comp_AddSP,
// LDR pcrel
NULL,
T_Comp_LoadPCRel,
// LDR/STR reg offset
T_Comp_MemReg, T_Comp_MemReg, T_Comp_MemReg, T_Comp_MemReg,
// LDR/STR sign extended, half
@ -399,25 +349,27 @@ CompileFunc Compiler::GetCompFunc(int kind)
// LDR/STR half imm offset
T_Comp_MemImmHalf, T_Comp_MemImmHalf,
// LDR/STR sp rel
NULL, NULL,
T_Comp_MemSPRel, T_Comp_MemSPRel,
// PUSH/POP
NULL, NULL,
T_Comp_PUSH_POP, T_Comp_PUSH_POP,
// LDMIA, STMIA
NULL, NULL,
NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL
T_Comp_LDMIA_STMIA, T_Comp_LDMIA_STMIA,
// Branch
T_Comp_BCOND, T_Comp_BranchXchangeReg, T_Comp_BranchXchangeReg, T_Comp_B, T_Comp_BL_LONG_1, T_Comp_BL_LONG_2,
// Unk, SVC
NULL, NULL
};
return Thumb ? T_Comp[kind] : A_Comp[kind];
}
void Compiler::Comp_AddCycles_C()
void Compiler::Comp_AddCycles_C(bool forceNonConstant)
{
s32 cycles = Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
if (CurInstr.Cond() < 0xE)
if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
@ -429,25 +381,10 @@ void Compiler::Comp_AddCycles_CI(u32 i)
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
if (CurInstr.Cond() < 0xE)
if (!Thumb && CurInstr.Cond() < 0xE)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
else
ConstantCycles += cycles;
}
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
{
// potentieller Bug: falls ein Register das noch gecacht ist, beim Modeswitch gespeichert
// wird der alte Wert gespeichert
SaveCPSR();
MOV(64, R(ABI_PARAM1), R(RCPU));
MOV(32, R(ABI_PARAM2), R(addr));
MOV(32, R(ABI_PARAM3), Imm32(restoreCPSR));
if (Num == 0)
CALL((void*)&ARMv5::JumpTo);
else
CALL((void*)&ARMv4::JumpTo);
}
}

View File

@ -22,19 +22,6 @@ class Compiler;
typedef void (Compiler::*CompileFunc)();
enum DataRegion
{
dataRegionGeneric, // hey, that's me!
dataRegionMainRAM,
dataRegionSWRAM,
dataRegionVRAM,
dataRegionIO,
dataRegionExclusive,
dataRegionsCount,
dataRegionDTCM = dataRegionExclusive,
dataRegionWRAM7 = dataRegionExclusive,
};
class Compiler : public Gen::X64CodeBlock
{
public:
@ -49,8 +36,9 @@ private:
CompileFunc GetCompFunc(int kind);
void Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR = false);
void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false);
void Comp_AddCycles_C();
void Comp_AddCycles_C(bool forceNonConstant = false);
void Comp_AddCycles_CI(u32 i);
enum
@ -63,8 +51,6 @@ private:
opInvertOp2 = 1 << 5,
};
DataRegion ClassifyAddress(u32 addr);
void A_Comp_Arith();
void A_Comp_MovOp();
void A_Comp_CmpOp();
@ -73,6 +59,9 @@ private:
void A_Comp_MemHalf();
void A_Comp_LDM_STM();
void A_Comp_BranchImm();
void A_Comp_BranchXchangeReg();
void T_Comp_ShiftImm();
void T_Comp_AddSub_();
void T_Comp_ALU_Imm8();
@ -91,6 +80,13 @@ private:
void T_Comp_PUSH_POP();
void T_Comp_LDMIA_STMIA();
void T_Comp_BCOND();
void T_Comp_B();
void T_Comp_BranchXchangeReg();
void T_Comp_BL_LONG_1();
void T_Comp_BL_LONG_2();
void T_Comp_BL_Merged(FetchedInstr prefix);
void Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size);
s32 Comp_MemAccessBlock(Gen::OpArg rb, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
@ -119,6 +115,8 @@ private:
void LoadCPSR();
void SaveCPSR();
Gen::FixupBranch CheckCondition(u32 cond);
Gen::OpArg MapReg(int reg)
{
if (reg == 15 && RegCache.Mapping[reg] == Gen::INVALID_REG)

View File

@ -462,38 +462,10 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei
{
int regsCount = regs.Count();
const u8 userModeOffsets[] =
{
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
offsetof(ARM, R[12]), offsetof(ARM, R[13]), offsetof(ARM, R[14]), 0,
offsetof(ARM, R_FIQ[0]), offsetof(ARM, R_FIQ[1]), offsetof(ARM, R_FIQ[2]), offsetof(ARM, R_FIQ[3]),
offsetof(ARM, R_FIQ[4]), offsetof(ARM, R_FIQ[5]), offsetof(ARM, R_FIQ[6]), 0,
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
offsetof(ARM, R[12]), offsetof(ARM, R_IRQ[13]), offsetof(ARM, R_IRQ[14]), 0,
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
offsetof(ARM, R[12]), offsetof(ARM, R_SVC[13]), offsetof(ARM, R_SVC[14]), 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
offsetof(ARM, R[12]), offsetof(ARM, R_ABT[13]), offsetof(ARM, R_ABT[14]), 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0,
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
offsetof(ARM, R[12]), offsetof(ARM, R_UND[13]), offsetof(ARM, R_UND[14]), 0,
};
if (decrement)
{
MOV_sum(32, ABI_PARAM1, rb, Imm32(-regsCount * 4));
preinc = !preinc;
preinc ^= true;
}
else
MOV(32, R(ABI_PARAM1), rb);
@ -516,16 +488,16 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei
{
if (regs[reg])
{
if (usermode && reg >= 8 && reg < 15)
/*if (usermode && reg >= 8 && reg < 15)
{
MOV(32, R(RSCRATCH2), R(RCPSR));
AND(32, R(RSCRATCH2), Imm8(0x1F));
// (RSCRATCH2 - 0x11) * 8 + squeezePointer(userModeOffsets) + (reg - 8), algebra is great!
MOVZX(32, 8, RSCRATCH2, MScaled(RSCRATCH2, SCALE_8, squeezePointer(userModeOffsets) - 0x11 * 8 + (reg - 8)));
MOVZX(32, 8, RSCRATCH2, MScaled(RSCRATCH2, SCALE_8, squeezePointer(userModeOffsets) - 0x10 * 8 + (reg - 8)));
POP(RSCRATCH);
MOV(32, MRegSum(RCPU, RSCRATCH2), R(RSCRATCH));
}
else if (RegCache.Mapping[reg] == INVALID_REG)
else */if (RegCache.Mapping[reg] == INVALID_REG)
{
assert(reg != 15);
@ -552,16 +524,16 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei
{
for (int reg : regs)
{
if (usermode && reg >= 8 && reg < 15)
/*if (usermode && reg >= 8 && reg < 15)
{
MOV(32, R(RSCRATCH), R(RCPSR));
AND(32, R(RSCRATCH), Imm8(0x1F));
// (RSCRATCH2 - 0x11) * 8 + squeezePointer(userModeOffsets) + (reg - 8), algebra is great!
MOVZX(32, 8, RSCRATCH, MScaled(RSCRATCH, SCALE_8, squeezePointer(userModeOffsets) - 0x11 * 8 + (reg - 8)));
MOVZX(32, 8, RSCRATCH, MScaled(RSCRATCH, SCALE_8, squeezePointer(userModeOffsets) - 0x10 * 8 + (reg - 8)));
MOV(32, R(RSCRATCH), MRegSum(RCPU, RSCRATCH));
PUSH(RSCRATCH);
}
else if (RegCache.Mapping[reg] == INVALID_REG)
else */if (RegCache.Mapping[reg] == INVALID_REG)
{
LoadReg(reg, RSCRATCH);
PUSH(RSCRATCH);

View File

@ -255,7 +255,7 @@ const u32 T_STMIA = T_Read8 | T_Write8 | tk(tk_STMIA);
const u32 T_BCOND = T_BranchAlways | tk(tk_BCOND);
const u32 T_BX = T_BranchAlways | T_ReadHi3 | tk(tk_BX);
const u32 T_BLX_REG = T_BranchAlways | T_ReadR15 | T_WriteR14 | T_ReadHi3 | tk(tk_BLX_REG);
const u32 T_BLX_REG = T_BranchAlways | T_WriteR14 | T_ReadHi3 | tk(tk_BLX_REG);
const u32 T_B = T_BranchAlways | tk(tk_B);
const u32 T_BL_LONG_1 = T_WriteR14 | T_ReadR15 | tk(tk_BL_LONG_1);
const u32 T_BL_LONG_2 = T_BranchAlways | T_ReadR14 | T_WriteR14 | T_ReadR15 | tk(tk_BL_LONG_2);
@ -301,6 +301,10 @@ Info Decode(bool thumb, u32 num, u32 instr)
res.DstRegs |= (1 << 13);
if (data & T_ReadR15)
res.SrcRegs |= (1 << 15);
if (data & T_WriteR14)
res.DstRegs |= (1 << 14);
if (data & T_ReadR14)
res.SrcRegs |= (1 << 14);
if (data & T_BranchAlways)
res.DstRegs |= (1 << 15);

View File

@ -202,6 +202,7 @@ enum
tk_POP,
tk_LDMIA,
tk_STMIA,
tk_BCOND,
tk_BX,
tk_BLX_REG,

View File

@ -54,6 +54,7 @@ add_library(core STATIC
ARMJIT_x64/ARMJIT_Compiler.cpp
ARMJIT_x64/ARMJIT_ALU.cpp
ARMJIT_x64/ARMJIT_LoadStore.cpp
ARMJIT_x64/ARMJIT_Branch.cpp
dolphin/CommonFuncs.cpp
dolphin/x64ABI.cpp