jit: branch instructions
This commit is contained in:
parent
ff97211114
commit
c58fdbd66b
10
src/ARM.cpp
10
src/ARM.cpp
|
@ -521,10 +521,7 @@ void ARMv5::Execute()
|
|||
printf("aaarg ungempappter raum %x\n", R[15]);*/
|
||||
|
||||
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(0, R[15] - ((CPSR&0x20)?2:4));
|
||||
if (block == NULL)
|
||||
ARMJIT::CompileBlock(this);
|
||||
else
|
||||
Cycles += block();
|
||||
Cycles += (block ? block : ARMJIT::CompileBlock(this))();
|
||||
|
||||
// TODO optimize this shit!!!
|
||||
if (Halted)
|
||||
|
@ -607,10 +604,7 @@ void ARMv4::Execute()
|
|||
printf("aaarg ungempappter raum %x\n", R[15]);*/
|
||||
|
||||
ARMJIT::CompiledBlock block = ARMJIT::LookUpBlock(1, R[15] - ((CPSR&0x20)?2:4));
|
||||
if (block == NULL)
|
||||
ARMJIT::CompileBlock(this);
|
||||
else
|
||||
Cycles += block();
|
||||
Cycles += (block ? block : ARMJIT::CompileBlock(this))();
|
||||
|
||||
// TODO optimize this shit!!!
|
||||
if (Halted)
|
||||
|
|
|
@ -121,7 +121,7 @@ void DeInit()
|
|||
delete compiler;
|
||||
}
|
||||
|
||||
void CompileBlock(ARM* cpu)
|
||||
CompiledBlock CompileBlock(ARM* cpu)
|
||||
{
|
||||
bool thumb = cpu->CPSR & 0x20;
|
||||
|
||||
|
@ -171,6 +171,8 @@ void CompileBlock(ARM* cpu)
|
|||
CompiledBlock block = compiler->CompileBlock(cpu, instrs, i);
|
||||
|
||||
InsertBlock(cpu->Num, r15Initial - (thumb ? 2 : 4), block);
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
void ResetBlocks()
|
||||
|
|
|
@ -109,7 +109,7 @@ inline void InsertBlock(u32 num, u32 addr, CompiledBlock func)
|
|||
void Init();
|
||||
void DeInit();
|
||||
|
||||
void CompileBlock(ARM* cpu);
|
||||
CompiledBlock CompileBlock(ARM* cpu);
|
||||
|
||||
void ResetBlocks();
|
||||
|
||||
|
|
|
@ -0,0 +1,267 @@
|
|||
#include "ARMJIT_Compiler.h"
|
||||
|
||||
using namespace Gen;
|
||||
|
||||
namespace ARMJIT
|
||||
{
|
||||
|
||||
void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||
{
|
||||
// we can simplify constant branches by a lot
|
||||
// it's not completely safe to assume stuff like, which instructions to preload
|
||||
// we'll see how it works out
|
||||
|
||||
u32 newPC;
|
||||
u32 nextInstr[2];
|
||||
u32 cycles = 0;
|
||||
bool setupRegion = false;
|
||||
|
||||
if (addr & 0x1 && !Thumb)
|
||||
{
|
||||
CPSRDirty = true;
|
||||
OR(32, R(RCPSR), Imm8(0x20));
|
||||
}
|
||||
else if (!(addr & 0x1) && Thumb)
|
||||
{
|
||||
CPSRDirty = true;
|
||||
AND(32, R(RCPSR), Imm32(~0x20));
|
||||
}
|
||||
|
||||
if (Num == 0)
|
||||
{
|
||||
ARMv5* cpu9 = (ARMv5*)CurCPU;
|
||||
|
||||
u32 oldregion = R15 >> 24;
|
||||
u32 newregion = addr >> 24;
|
||||
|
||||
u32 regionCodeCycles = cpu9->MemTimings[addr >> 12][0];
|
||||
cpu9->RegionCodeCycles = regionCodeCycles;
|
||||
|
||||
MOV(32, MDisp(RCPU, offsetof(ARMv5, RegionCodeCycles)), Imm32(regionCodeCycles));
|
||||
|
||||
setupRegion = newregion != oldregion;
|
||||
if (setupRegion)
|
||||
cpu9->SetupCodeMem(addr);
|
||||
|
||||
if (addr & 0x1)
|
||||
{
|
||||
addr &= ~0x1;
|
||||
newPC = addr+2;
|
||||
|
||||
// two-opcodes-at-once fetch
|
||||
// doesn't matter if we put garbage in the MSbs there
|
||||
if (addr & 0x2)
|
||||
{
|
||||
nextInstr[0] = cpu9->CodeRead32(addr-2, true) >> 16;
|
||||
cycles += CurCPU->CodeCycles;
|
||||
nextInstr[1] = cpu9->CodeRead32(addr+2, false);
|
||||
cycles += CurCPU->CodeCycles;
|
||||
}
|
||||
else
|
||||
{
|
||||
nextInstr[0] = cpu9->CodeRead32(addr, true);
|
||||
nextInstr[1] = nextInstr[0] >> 16;
|
||||
cycles += CurCPU->CodeCycles;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
addr &= ~0x3;
|
||||
newPC = addr+4;
|
||||
|
||||
nextInstr[0] = cpu9->CodeRead32(addr, true);
|
||||
cycles += cpu9->CodeCycles;
|
||||
nextInstr[1] = cpu9->CodeRead32(addr+4, false);
|
||||
cycles += cpu9->CodeCycles;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ARMv4* cpu7 = (ARMv4*)CurCPU;
|
||||
|
||||
u32 codeRegion = addr >> 24;
|
||||
u32 codeCycles = addr >> 15; // cheato
|
||||
|
||||
cpu7->CodeRegion = codeRegion;
|
||||
cpu7->CodeCycles = codeCycles;
|
||||
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeRegion));
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, CodeRegion)), Imm32(codeCycles));
|
||||
|
||||
if (addr & 0x1)
|
||||
{
|
||||
addr &= ~0x1;
|
||||
newPC = addr+2;
|
||||
|
||||
nextInstr[0] = ((ARMv4*)CurCPU)->CodeRead16(addr);
|
||||
nextInstr[1] = ((ARMv4*)CurCPU)->CodeRead16(addr+2);
|
||||
cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1];
|
||||
}
|
||||
else
|
||||
{
|
||||
addr &= ~0x3;
|
||||
newPC = addr+4;
|
||||
|
||||
nextInstr[0] = cpu7->CodeRead32(addr);
|
||||
nextInstr[1] = cpu7->CodeRead32(addr+4);
|
||||
cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3];
|
||||
}
|
||||
}
|
||||
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(newPC));
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[0])), Imm32(nextInstr[0]));
|
||||
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(nextInstr[1]));
|
||||
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
|
||||
ConstantCycles += cycles;
|
||||
else
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
|
||||
if (setupRegion)
|
||||
{
|
||||
MOV(32, R(ABI_PARAM1), R(RCPU));
|
||||
MOV(32, R(ABI_PARAM2), Imm32(newPC));
|
||||
CALL((void*)&ARMv5::SetupCodeMem);
|
||||
}
|
||||
}
|
||||
|
||||
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
|
||||
{
|
||||
BitSet16 hiRegsLoaded(RegCache.DirtyRegs & 0xFFFF0000);
|
||||
bool previouslyDirty = CPSRDirty;
|
||||
SaveCPSR();
|
||||
|
||||
if (restoreCPSR)
|
||||
{
|
||||
if (Thumb || CurInstr.Cond() >= 0xE)
|
||||
{
|
||||
for (int reg : hiRegsLoaded)
|
||||
RegCache.UnloadRegister(reg);
|
||||
}
|
||||
else
|
||||
{
|
||||
// the ugly way...
|
||||
// we only save them, to load and save them again
|
||||
for (int reg : hiRegsLoaded)
|
||||
SaveReg(reg, RegCache.Mapping[reg]);
|
||||
}
|
||||
}
|
||||
|
||||
MOV(64, R(ABI_PARAM1), R(RCPU));
|
||||
MOV(32, R(ABI_PARAM2), R(addr));
|
||||
if (!restoreCPSR)
|
||||
XOR(32, R(ABI_PARAM3), R(ABI_PARAM3));
|
||||
else
|
||||
MOV(32, R(ABI_PARAM3), Imm32(restoreCPSR));
|
||||
if (Num == 0)
|
||||
CALL((void*)&ARMv5::JumpTo);
|
||||
else
|
||||
CALL((void*)&ARMv4::JumpTo);
|
||||
|
||||
if (!Thumb && restoreCPSR && CurInstr.Cond() < 0xE)
|
||||
{
|
||||
for (int reg : hiRegsLoaded)
|
||||
LoadReg(reg, RegCache.Mapping[reg]);
|
||||
}
|
||||
|
||||
if (previouslyDirty)
|
||||
LoadCPSR();
|
||||
CPSRDirty = previouslyDirty;
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_BranchImm()
|
||||
{
|
||||
int op = (CurInstr.Instr >> 24) & 1;
|
||||
s32 offset = (s32)(CurInstr.Instr << 8) >> 6;
|
||||
u32 target = R15 + offset;
|
||||
bool link = op;
|
||||
|
||||
if (CurInstr.Cond() == 0xF) // BLX_imm
|
||||
{
|
||||
target += (op << 1) + 1;
|
||||
link = true;
|
||||
}
|
||||
|
||||
if (link)
|
||||
MOV(32, MapReg(14), Imm32(R15 - 4));
|
||||
|
||||
Comp_JumpTo(target);
|
||||
}
|
||||
|
||||
void Compiler::A_Comp_BranchXchangeReg()
|
||||
{
|
||||
OpArg rn = MapReg(CurInstr.A_Reg(0));
|
||||
if ((CurInstr.Instr & 0xF0) == 0x30) // BLX_reg
|
||||
MOV(32, MapReg(14), Imm32(R15 - 4));
|
||||
Comp_JumpTo(rn.GetSimpleReg());
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_BCOND()
|
||||
{
|
||||
u32 cond = (CurInstr.Instr >> 8) & 0xF;
|
||||
FixupBranch skipExecute = CheckCondition(cond);
|
||||
|
||||
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
|
||||
Comp_JumpTo(R15 + offset + 1, true);
|
||||
|
||||
FixupBranch skipFailed = J();
|
||||
SetJumpTarget(skipExecute);
|
||||
Comp_AddCycles_C(true);
|
||||
SetJumpTarget(skipFailed);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_B()
|
||||
{
|
||||
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 20;
|
||||
Comp_JumpTo(R15 + offset + 1);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_BranchXchangeReg()
|
||||
{
|
||||
bool link = CurInstr.Instr & (1 << 7);
|
||||
if (link && Num == 1)
|
||||
{
|
||||
printf("BLX unsupported on ARM7!!!\n");
|
||||
return;
|
||||
}
|
||||
|
||||
OpArg rn = MapReg(CurInstr.A_Reg(3));
|
||||
if (link)
|
||||
MOV(32, MapReg(14), Imm32(R15 - 1));
|
||||
Comp_JumpTo(rn.GetSimpleReg());
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_BL_LONG_1()
|
||||
{
|
||||
s32 offset = (s32)((CurInstr.Instr & 0x7FF) << 21) >> 9;
|
||||
MOV(32, MapReg(14), Imm32(R15 + offset));
|
||||
Comp_AddCycles_C();
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_BL_LONG_2()
|
||||
{
|
||||
OpArg lr = MapReg(14);
|
||||
s32 offset = (CurInstr.Instr & 0x7FF) << 1;
|
||||
LEA(32, RSCRATCH, MDisp(lr.GetSimpleReg(), offset));
|
||||
MOV(32, lr, Imm32((R15 - 2) | 1));
|
||||
if (Num == 1 || CurInstr.Instr & (1 << 12))
|
||||
OR(32, R(RSCRATCH), Imm8(1));
|
||||
Comp_JumpTo(RSCRATCH);
|
||||
}
|
||||
|
||||
void Compiler::T_Comp_BL_Merged(FetchedInstr part1)
|
||||
{
|
||||
assert(part1.Info.Kind == ARMInstrInfo::tk_BL_LONG_1);
|
||||
Comp_AddCycles_C();
|
||||
|
||||
u32 target = (R15 - 2) + ((s32)((part1.Instr & 0x7FF) << 21) >> 9);
|
||||
target += (CurInstr.Instr & 0x7FF) << 1;
|
||||
|
||||
if (Num == 1 || CurInstr.Instr & (1 << 12))
|
||||
target |= 1;
|
||||
|
||||
MOV(32, MapReg(14), Imm32((R15 - 2) | 1));
|
||||
|
||||
Comp_JumpTo(target);
|
||||
}
|
||||
|
||||
}
|
|
@ -50,50 +50,6 @@ Compiler::Compiler()
|
|||
ResetStart = GetWritableCodePtr();
|
||||
}
|
||||
|
||||
void* Compiler::Gen_ChangeCPSRRoutine()
|
||||
{
|
||||
void* res = (void*)GetWritableCodePtr();
|
||||
|
||||
MOV(32, R(RSCRATCH), R(RCPSR));
|
||||
AND(32, R(RSCRATCH), Imm8(0x1F));
|
||||
CMP(32, R(RSCRATCH), Imm8(0x11));
|
||||
FixupBranch fiq = J_CC(CC_E);
|
||||
CMP(32, R(RSCRATCH), Imm8(0x12));
|
||||
FixupBranch irq = J_CC(CC_E);
|
||||
CMP(32, R(RSCRATCH), Imm8(0x13));
|
||||
FixupBranch svc = J_CC(CC_E);
|
||||
CMP(32, R(RSCRATCH), Imm8(0x17));
|
||||
FixupBranch abt = J_CC(CC_E);
|
||||
CMP(32, R(RSCRATCH), Imm8(0x1B));
|
||||
FixupBranch und = J_CC(CC_E);
|
||||
|
||||
SetJumpTarget(fiq);
|
||||
|
||||
SetJumpTarget(irq);
|
||||
|
||||
SetJumpTarget(svc);
|
||||
|
||||
SetJumpTarget(abt);
|
||||
|
||||
SetJumpTarget(und);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
DataRegion Compiler::ClassifyAddress(u32 addr)
|
||||
{
|
||||
if (Num == 0 && addr >= ((ARMv5*)CurCPU)->DTCMBase && addr < ((ARMv5*)CurCPU)->DTCMBase)
|
||||
return dataRegionDTCM;
|
||||
switch (addr & 0xFF000000)
|
||||
{
|
||||
case 0x02000000: return dataRegionMainRAM;
|
||||
case 0x03000000: return Num == 1 && (addr & 0xF00000) == 0x800000 ? dataRegionWRAM7 : dataRegionSWRAM;
|
||||
case 0x04000000: return dataRegionIO;
|
||||
case 0x06000000: return dataRegionVRAM;
|
||||
}
|
||||
return dataRegionGeneric;
|
||||
}
|
||||
|
||||
void Compiler::LoadCPSR()
|
||||
{
|
||||
assert(!CPSRDirty);
|
||||
|
@ -123,6 +79,29 @@ void Compiler::SaveReg(int reg, X64Reg nativeReg)
|
|||
MOV(32, MDisp(RCPU, offsetof(ARM, R[reg])), R(nativeReg));
|
||||
}
|
||||
|
||||
// invalidates RSCRATCH and RSCRATCH3
|
||||
Gen::FixupBranch Compiler::CheckCondition(u32 cond)
|
||||
{
|
||||
if (cond >= 0x8)
|
||||
{
|
||||
static_assert(RSCRATCH3 == ECX);
|
||||
MOV(32, R(RSCRATCH3), R(RCPSR));
|
||||
SHR(32, R(RSCRATCH3), Imm8(28));
|
||||
MOV(32, R(RSCRATCH), Imm32(1));
|
||||
SHL(32, R(RSCRATCH), R(RSCRATCH3));
|
||||
TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond]));
|
||||
|
||||
return J_CC(CC_Z);
|
||||
}
|
||||
else
|
||||
{
|
||||
// could have used a LUT, but then where would be the fun?
|
||||
TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)))));
|
||||
|
||||
return J_CC(cond & 1 ? CC_NZ : CC_Z);
|
||||
}
|
||||
}
|
||||
|
||||
CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount)
|
||||
{
|
||||
if (IsAlmostFull())
|
||||
|
@ -140,6 +119,8 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
|||
CodeRegion = cpu->CodeRegion;
|
||||
CurCPU = cpu;
|
||||
|
||||
bool mergedThumbBL = false;
|
||||
|
||||
ABI_PushRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS}, 8, 16);
|
||||
|
||||
MOV(64, R(RCPU), ImmPtr(cpu));
|
||||
|
@ -167,23 +148,21 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
|||
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(CurInstr.NextInstr[1]));
|
||||
}
|
||||
|
||||
if (comp == NULL || CurInstr.Info.Branches())
|
||||
if (comp == NULL)
|
||||
SaveCPSR();
|
||||
}
|
||||
|
||||
// run interpreter
|
||||
cpu->CodeCycles = CurInstr.CodeCycles;
|
||||
cpu->R[15] = R15;
|
||||
cpu->CurInstr = CurInstr.Instr;
|
||||
cpu->NextInstr[0] = CurInstr.NextInstr[0];
|
||||
cpu->NextInstr[1] = CurInstr.NextInstr[1];
|
||||
|
||||
if (comp != NULL)
|
||||
RegCache.Prepare(i);
|
||||
else
|
||||
RegCache.Flush();
|
||||
|
||||
if (Thumb)
|
||||
{
|
||||
if (i < instrsCount - 1 && CurInstr.Info.Kind == ARMInstrInfo::tk_BL_LONG_1
|
||||
&& instrs[i + 1].Info.Kind == ARMInstrInfo::tk_BL_LONG_2)
|
||||
mergedThumbBL = true;
|
||||
else
|
||||
{
|
||||
u32 icode = (CurInstr.Instr >> 6) & 0x3FF;
|
||||
if (comp == NULL)
|
||||
|
@ -192,51 +171,32 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
|||
|
||||
ABI_CallFunction(ARMInterpreter::THUMBInstrTable[icode]);
|
||||
}
|
||||
else if (mergedThumbBL)
|
||||
T_Comp_BL_Merged(instrs[i - 1]);
|
||||
else
|
||||
(this->*comp)();
|
||||
|
||||
ARMInterpreter::THUMBInstrTable[icode](cpu);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 cond = CurInstr.Cond();
|
||||
if (CurInstr.Info.Kind == ARMInstrInfo::ak_BLX_IMM)
|
||||
{
|
||||
if (comp)
|
||||
(this->*comp)();
|
||||
else
|
||||
{
|
||||
MOV(64, R(ABI_PARAM1), R(RCPU));
|
||||
ABI_CallFunction(ARMInterpreter::A_BLX_IMM);
|
||||
|
||||
ARMInterpreter::A_BLX_IMM(cpu);
|
||||
}
|
||||
}
|
||||
else if (cond == 0xF)
|
||||
{
|
||||
Comp_AddCycles_C();
|
||||
cpu->AddCycles_C();
|
||||
}
|
||||
else
|
||||
{
|
||||
FixupBranch skipExecute;
|
||||
if (cond < 0xE)
|
||||
{
|
||||
if (cond >= 0x8)
|
||||
{
|
||||
static_assert(RSCRATCH3 == ECX);
|
||||
MOV(32, R(RSCRATCH3), R(RCPSR));
|
||||
SHR(32, R(RSCRATCH3), Imm8(28));
|
||||
MOV(32, R(RSCRATCH), Imm32(1));
|
||||
SHL(32, R(RSCRATCH), R(RSCRATCH3));
|
||||
TEST(32, R(RSCRATCH), Imm32(ARM::ConditionTable[cond]));
|
||||
|
||||
skipExecute = J_CC(CC_Z);
|
||||
}
|
||||
else
|
||||
{
|
||||
// could have used a LUT, but then where would be the fun?
|
||||
TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)))));
|
||||
|
||||
skipExecute = J_CC(cond & 1 ? CC_NZ : CC_Z);
|
||||
}
|
||||
|
||||
}
|
||||
skipExecute = CheckCondition(cond);
|
||||
|
||||
u32 icode = ((CurInstr.Instr >> 4) & 0xF) | ((CurInstr.Instr >> 16) & 0xFF0);
|
||||
if (comp == NULL)
|
||||
|
@ -258,19 +218,9 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
|||
|
||||
SetJumpTarget(skipFailed);
|
||||
}
|
||||
|
||||
if (cpu->CheckCondition(cond))
|
||||
ARMInterpreter::ARMInstrTable[icode](cpu);
|
||||
else
|
||||
cpu->AddCycles_C();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
we don't need to collect the interpreted cycles,
|
||||
since cpu->Cycles is taken into account by the dispatcher.
|
||||
*/
|
||||
|
||||
if (comp == NULL && i != instrsCount - 1)
|
||||
LoadCPSR();
|
||||
}
|
||||
|
@ -367,7 +317,7 @@ CompileFunc Compiler::GetCompFunc(int kind)
|
|||
// LDM/STM
|
||||
NULL, NULL,
|
||||
// Branch
|
||||
NULL, NULL, NULL, NULL, NULL,
|
||||
A_Comp_BranchImm, A_Comp_BranchImm, A_Comp_BranchImm, A_Comp_BranchXchangeReg, A_Comp_BranchXchangeReg,
|
||||
// system stuff
|
||||
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||
};
|
||||
|
@ -389,7 +339,7 @@ CompileFunc Compiler::GetCompFunc(int kind)
|
|||
// pc/sp relative
|
||||
T_Comp_RelAddr, T_Comp_RelAddr, T_Comp_AddSP,
|
||||
// LDR pcrel
|
||||
NULL,
|
||||
T_Comp_LoadPCRel,
|
||||
// LDR/STR reg offset
|
||||
T_Comp_MemReg, T_Comp_MemReg, T_Comp_MemReg, T_Comp_MemReg,
|
||||
// LDR/STR sign extended, half
|
||||
|
@ -399,25 +349,27 @@ CompileFunc Compiler::GetCompFunc(int kind)
|
|||
// LDR/STR half imm offset
|
||||
T_Comp_MemImmHalf, T_Comp_MemImmHalf,
|
||||
// LDR/STR sp rel
|
||||
NULL, NULL,
|
||||
T_Comp_MemSPRel, T_Comp_MemSPRel,
|
||||
// PUSH/POP
|
||||
NULL, NULL,
|
||||
T_Comp_PUSH_POP, T_Comp_PUSH_POP,
|
||||
// LDMIA, STMIA
|
||||
NULL, NULL,
|
||||
NULL, NULL,
|
||||
NULL, NULL, NULL, NULL, NULL, NULL
|
||||
T_Comp_LDMIA_STMIA, T_Comp_LDMIA_STMIA,
|
||||
// Branch
|
||||
T_Comp_BCOND, T_Comp_BranchXchangeReg, T_Comp_BranchXchangeReg, T_Comp_B, T_Comp_BL_LONG_1, T_Comp_BL_LONG_2,
|
||||
// Unk, SVC
|
||||
NULL, NULL
|
||||
};
|
||||
|
||||
return Thumb ? T_Comp[kind] : A_Comp[kind];
|
||||
}
|
||||
|
||||
void Compiler::Comp_AddCycles_C()
|
||||
void Compiler::Comp_AddCycles_C(bool forceNonConstant)
|
||||
{
|
||||
s32 cycles = Num ?
|
||||
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
|
||||
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
|
||||
|
||||
if (CurInstr.Cond() < 0xE)
|
||||
if ((!Thumb && CurInstr.Cond() < 0xE) || forceNonConstant)
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
else
|
||||
ConstantCycles += cycles;
|
||||
|
@ -429,25 +381,10 @@ void Compiler::Comp_AddCycles_CI(u32 i)
|
|||
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
|
||||
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
|
||||
|
||||
if (CurInstr.Cond() < 0xE)
|
||||
if (!Thumb && CurInstr.Cond() < 0xE)
|
||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||
else
|
||||
ConstantCycles += cycles;
|
||||
}
|
||||
|
||||
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
|
||||
{
|
||||
// potentieller Bug: falls ein Register das noch gecacht ist, beim Modeswitch gespeichert
|
||||
// wird der alte Wert gespeichert
|
||||
SaveCPSR();
|
||||
|
||||
MOV(64, R(ABI_PARAM1), R(RCPU));
|
||||
MOV(32, R(ABI_PARAM2), R(addr));
|
||||
MOV(32, R(ABI_PARAM3), Imm32(restoreCPSR));
|
||||
if (Num == 0)
|
||||
CALL((void*)&ARMv5::JumpTo);
|
||||
else
|
||||
CALL((void*)&ARMv4::JumpTo);
|
||||
}
|
||||
|
||||
}
|
|
@ -22,19 +22,6 @@ class Compiler;
|
|||
|
||||
typedef void (Compiler::*CompileFunc)();
|
||||
|
||||
enum DataRegion
|
||||
{
|
||||
dataRegionGeneric, // hey, that's me!
|
||||
dataRegionMainRAM,
|
||||
dataRegionSWRAM,
|
||||
dataRegionVRAM,
|
||||
dataRegionIO,
|
||||
dataRegionExclusive,
|
||||
dataRegionsCount,
|
||||
dataRegionDTCM = dataRegionExclusive,
|
||||
dataRegionWRAM7 = dataRegionExclusive,
|
||||
};
|
||||
|
||||
class Compiler : public Gen::X64CodeBlock
|
||||
{
|
||||
public:
|
||||
|
@ -49,8 +36,9 @@ private:
|
|||
CompileFunc GetCompFunc(int kind);
|
||||
|
||||
void Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR = false);
|
||||
void Comp_JumpTo(u32 addr, bool forceNonConstantCycles = false);
|
||||
|
||||
void Comp_AddCycles_C();
|
||||
void Comp_AddCycles_C(bool forceNonConstant = false);
|
||||
void Comp_AddCycles_CI(u32 i);
|
||||
|
||||
enum
|
||||
|
@ -63,8 +51,6 @@ private:
|
|||
opInvertOp2 = 1 << 5,
|
||||
};
|
||||
|
||||
DataRegion ClassifyAddress(u32 addr);
|
||||
|
||||
void A_Comp_Arith();
|
||||
void A_Comp_MovOp();
|
||||
void A_Comp_CmpOp();
|
||||
|
@ -73,6 +59,9 @@ private:
|
|||
void A_Comp_MemHalf();
|
||||
void A_Comp_LDM_STM();
|
||||
|
||||
void A_Comp_BranchImm();
|
||||
void A_Comp_BranchXchangeReg();
|
||||
|
||||
void T_Comp_ShiftImm();
|
||||
void T_Comp_AddSub_();
|
||||
void T_Comp_ALU_Imm8();
|
||||
|
@ -91,6 +80,13 @@ private:
|
|||
void T_Comp_PUSH_POP();
|
||||
void T_Comp_LDMIA_STMIA();
|
||||
|
||||
void T_Comp_BCOND();
|
||||
void T_Comp_B();
|
||||
void T_Comp_BranchXchangeReg();
|
||||
void T_Comp_BL_LONG_1();
|
||||
void T_Comp_BL_LONG_2();
|
||||
void T_Comp_BL_Merged(FetchedInstr prefix);
|
||||
|
||||
void Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size);
|
||||
s32 Comp_MemAccessBlock(Gen::OpArg rb, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
|
||||
|
||||
|
@ -119,6 +115,8 @@ private:
|
|||
void LoadCPSR();
|
||||
void SaveCPSR();
|
||||
|
||||
Gen::FixupBranch CheckCondition(u32 cond);
|
||||
|
||||
Gen::OpArg MapReg(int reg)
|
||||
{
|
||||
if (reg == 15 && RegCache.Mapping[reg] == Gen::INVALID_REG)
|
||||
|
|
|
@ -462,38 +462,10 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei
|
|||
{
|
||||
int regsCount = regs.Count();
|
||||
|
||||
const u8 userModeOffsets[] =
|
||||
{
|
||||
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
|
||||
offsetof(ARM, R[12]), offsetof(ARM, R[13]), offsetof(ARM, R[14]), 0,
|
||||
|
||||
offsetof(ARM, R_FIQ[0]), offsetof(ARM, R_FIQ[1]), offsetof(ARM, R_FIQ[2]), offsetof(ARM, R_FIQ[3]),
|
||||
offsetof(ARM, R_FIQ[4]), offsetof(ARM, R_FIQ[5]), offsetof(ARM, R_FIQ[6]), 0,
|
||||
|
||||
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
|
||||
offsetof(ARM, R[12]), offsetof(ARM, R_IRQ[13]), offsetof(ARM, R_IRQ[14]), 0,
|
||||
|
||||
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
|
||||
offsetof(ARM, R[12]), offsetof(ARM, R_SVC[13]), offsetof(ARM, R_SVC[14]), 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
|
||||
offsetof(ARM, R[12]), offsetof(ARM, R_ABT[13]), offsetof(ARM, R_ABT[14]), 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
|
||||
offsetof(ARM, R[12]), offsetof(ARM, R_UND[13]), offsetof(ARM, R_UND[14]), 0,
|
||||
};
|
||||
|
||||
if (decrement)
|
||||
{
|
||||
MOV_sum(32, ABI_PARAM1, rb, Imm32(-regsCount * 4));
|
||||
preinc = !preinc;
|
||||
preinc ^= true;
|
||||
}
|
||||
else
|
||||
MOV(32, R(ABI_PARAM1), rb);
|
||||
|
@ -516,16 +488,16 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei
|
|||
{
|
||||
if (regs[reg])
|
||||
{
|
||||
if (usermode && reg >= 8 && reg < 15)
|
||||
/*if (usermode && reg >= 8 && reg < 15)
|
||||
{
|
||||
MOV(32, R(RSCRATCH2), R(RCPSR));
|
||||
AND(32, R(RSCRATCH2), Imm8(0x1F));
|
||||
// (RSCRATCH2 - 0x11) * 8 + squeezePointer(userModeOffsets) + (reg - 8), algebra is great!
|
||||
MOVZX(32, 8, RSCRATCH2, MScaled(RSCRATCH2, SCALE_8, squeezePointer(userModeOffsets) - 0x11 * 8 + (reg - 8)));
|
||||
MOVZX(32, 8, RSCRATCH2, MScaled(RSCRATCH2, SCALE_8, squeezePointer(userModeOffsets) - 0x10 * 8 + (reg - 8)));
|
||||
POP(RSCRATCH);
|
||||
MOV(32, MRegSum(RCPU, RSCRATCH2), R(RSCRATCH));
|
||||
}
|
||||
else if (RegCache.Mapping[reg] == INVALID_REG)
|
||||
else */if (RegCache.Mapping[reg] == INVALID_REG)
|
||||
{
|
||||
assert(reg != 15);
|
||||
|
||||
|
@ -552,16 +524,16 @@ s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool prei
|
|||
{
|
||||
for (int reg : regs)
|
||||
{
|
||||
if (usermode && reg >= 8 && reg < 15)
|
||||
/*if (usermode && reg >= 8 && reg < 15)
|
||||
{
|
||||
MOV(32, R(RSCRATCH), R(RCPSR));
|
||||
AND(32, R(RSCRATCH), Imm8(0x1F));
|
||||
// (RSCRATCH2 - 0x11) * 8 + squeezePointer(userModeOffsets) + (reg - 8), algebra is great!
|
||||
MOVZX(32, 8, RSCRATCH, MScaled(RSCRATCH, SCALE_8, squeezePointer(userModeOffsets) - 0x11 * 8 + (reg - 8)));
|
||||
MOVZX(32, 8, RSCRATCH, MScaled(RSCRATCH, SCALE_8, squeezePointer(userModeOffsets) - 0x10 * 8 + (reg - 8)));
|
||||
MOV(32, R(RSCRATCH), MRegSum(RCPU, RSCRATCH));
|
||||
PUSH(RSCRATCH);
|
||||
}
|
||||
else if (RegCache.Mapping[reg] == INVALID_REG)
|
||||
else */if (RegCache.Mapping[reg] == INVALID_REG)
|
||||
{
|
||||
LoadReg(reg, RSCRATCH);
|
||||
PUSH(RSCRATCH);
|
||||
|
|
|
@ -255,7 +255,7 @@ const u32 T_STMIA = T_Read8 | T_Write8 | tk(tk_STMIA);
|
|||
|
||||
const u32 T_BCOND = T_BranchAlways | tk(tk_BCOND);
|
||||
const u32 T_BX = T_BranchAlways | T_ReadHi3 | tk(tk_BX);
|
||||
const u32 T_BLX_REG = T_BranchAlways | T_ReadR15 | T_WriteR14 | T_ReadHi3 | tk(tk_BLX_REG);
|
||||
const u32 T_BLX_REG = T_BranchAlways | T_WriteR14 | T_ReadHi3 | tk(tk_BLX_REG);
|
||||
const u32 T_B = T_BranchAlways | tk(tk_B);
|
||||
const u32 T_BL_LONG_1 = T_WriteR14 | T_ReadR15 | tk(tk_BL_LONG_1);
|
||||
const u32 T_BL_LONG_2 = T_BranchAlways | T_ReadR14 | T_WriteR14 | T_ReadR15 | tk(tk_BL_LONG_2);
|
||||
|
@ -301,6 +301,10 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
|||
res.DstRegs |= (1 << 13);
|
||||
if (data & T_ReadR15)
|
||||
res.SrcRegs |= (1 << 15);
|
||||
if (data & T_WriteR14)
|
||||
res.DstRegs |= (1 << 14);
|
||||
if (data & T_ReadR14)
|
||||
res.SrcRegs |= (1 << 14);
|
||||
|
||||
if (data & T_BranchAlways)
|
||||
res.DstRegs |= (1 << 15);
|
||||
|
|
|
@ -202,6 +202,7 @@ enum
|
|||
tk_POP,
|
||||
tk_LDMIA,
|
||||
tk_STMIA,
|
||||
|
||||
tk_BCOND,
|
||||
tk_BX,
|
||||
tk_BLX_REG,
|
||||
|
|
|
@ -35,6 +35,7 @@ add_library(core STATIC
|
|||
ARMJIT_x64/ARMJIT_Compiler.cpp
|
||||
ARMJIT_x64/ARMJIT_ALU.cpp
|
||||
ARMJIT_x64/ARMJIT_LoadStore.cpp
|
||||
ARMJIT_x64/ARMJIT_Branch.cpp
|
||||
|
||||
dolphin/CommonFuncs.cpp
|
||||
dolphin/x64ABI.cpp
|
||||
|
|
Loading…
Reference in New Issue