integrate changes from ARM64 backend and more
- better handle LDM/STM in reg alloc - unify Halted and IRQ in anticipation for branch inlining - literal optimisations can be disabled in gui - jit blocks follow simple returns - fix idle loop detection - break jit blocks on IRQ (fixes saving in Pokemon White)
This commit is contained in:
parent
aa23f21b8d
commit
81f38c14be
40
src/ARM.cpp
40
src/ARM.cpp
|
@ -113,7 +113,7 @@ void ARM::DoSavestate(Savestate* file)
|
||||||
|
|
||||||
file->Var32((u32*)&Cycles);
|
file->Var32((u32*)&Cycles);
|
||||||
//file->Var32((u32*)&CyclesToRun);
|
//file->Var32((u32*)&CyclesToRun);
|
||||||
file->Var32(&Halted);
|
file->Var32(&StopExecution);
|
||||||
|
|
||||||
file->VarArray(R, 16*sizeof(u32));
|
file->VarArray(R, 16*sizeof(u32));
|
||||||
file->Var32(&CPSR);
|
file->Var32(&CPSR);
|
||||||
|
@ -589,16 +589,21 @@ void ARMv5::ExecuteJIT()
|
||||||
NDS::ARM9Timestamp += Cycles;
|
NDS::ARM9Timestamp += Cycles;
|
||||||
Cycles = 0;
|
Cycles = 0;
|
||||||
|
|
||||||
if (IRQ) TriggerIRQ();
|
if (StopExecution)
|
||||||
if (Halted)
|
|
||||||
{
|
{
|
||||||
bool idleLoop = Halted & 0x20;
|
if (IRQ)
|
||||||
Halted &= ~0x20;
|
TriggerIRQ();
|
||||||
if ((Halted == 1 || idleLoop) && NDS::ARM9Timestamp < NDS::ARM9Target)
|
|
||||||
|
if (Halted || IdleLoop)
|
||||||
{
|
{
|
||||||
NDS::ARM9Timestamp = NDS::ARM9Target;
|
bool idleLoop = IdleLoop;
|
||||||
|
IdleLoop = 0;
|
||||||
|
if ((Halted == 1 || idleLoop) && NDS::ARM9Timestamp < NDS::ARM9Target)
|
||||||
|
{
|
||||||
|
NDS::ARM9Timestamp = NDS::ARM9Target;
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -726,16 +731,21 @@ void ARMv4::ExecuteJIT()
|
||||||
Cycles = 0;
|
Cycles = 0;
|
||||||
|
|
||||||
// TODO optimize this shit!!!
|
// TODO optimize this shit!!!
|
||||||
if (IRQ) TriggerIRQ();
|
if (StopExecution)
|
||||||
if (Halted)
|
|
||||||
{
|
{
|
||||||
bool idleLoop = Halted & 0x20;
|
if (IRQ)
|
||||||
Halted &= ~0x20;
|
TriggerIRQ();
|
||||||
if ((Halted == 1 || idleLoop) && NDS::ARM7Timestamp < NDS::ARM7Target)
|
|
||||||
|
if (Halted || IdleLoop)
|
||||||
{
|
{
|
||||||
NDS::ARM7Timestamp = NDS::ARM7Target;
|
bool idleLoop = IdleLoop;
|
||||||
|
IdleLoop = 0;
|
||||||
|
if ((Halted == 1 || idleLoop) && NDS::ARM7Timestamp < NDS::ARM7Target)
|
||||||
|
{
|
||||||
|
NDS::ARM7Timestamp = NDS::ARM7Target;
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
13
src/ARM.h
13
src/ARM.h
|
@ -112,9 +112,16 @@ public:
|
||||||
u32 Num;
|
u32 Num;
|
||||||
|
|
||||||
s32 Cycles;
|
s32 Cycles;
|
||||||
u32 Halted;
|
union
|
||||||
|
{
|
||||||
u32 IRQ; // nonzero to trigger IRQ
|
struct
|
||||||
|
{
|
||||||
|
u8 Halted;
|
||||||
|
u8 IRQ; // nonzero to trigger IRQ
|
||||||
|
u8 IdleLoop;
|
||||||
|
};
|
||||||
|
u32 StopExecution;
|
||||||
|
};
|
||||||
|
|
||||||
u32 CodeRegion;
|
u32 CodeRegion;
|
||||||
s32 CodeCycles;
|
s32 CodeCycles;
|
||||||
|
|
|
@ -16,11 +16,13 @@
|
||||||
#include "GPU3D.h"
|
#include "GPU3D.h"
|
||||||
#include "SPU.h"
|
#include "SPU.h"
|
||||||
#include "Wifi.h"
|
#include "Wifi.h"
|
||||||
|
#include "NDSCart.h"
|
||||||
|
|
||||||
namespace ARMJIT
|
namespace ARMJIT
|
||||||
{
|
{
|
||||||
|
|
||||||
#define JIT_DEBUGPRINT(msg, ...)
|
#define JIT_DEBUGPRINT(msg, ...)
|
||||||
|
//#define JIT_DEBUGPRINT(msg, ...) printf(msg, ## __VA_ARGS__)
|
||||||
|
|
||||||
Compiler* compiler;
|
Compiler* compiler;
|
||||||
|
|
||||||
|
@ -159,13 +161,17 @@ void FloodFillSetFlags(FetchedInstr instrs[], int start, u8 flags)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, u32& targetAddr)
|
bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, bool hasLink, u32 lr, bool& link,
|
||||||
|
u32& linkAddr, u32& targetAddr)
|
||||||
{
|
{
|
||||||
if (thumb)
|
if (thumb)
|
||||||
{
|
{
|
||||||
u32 r15 = instr.Addr + 4;
|
u32 r15 = instr.Addr + 4;
|
||||||
cond = 0xE;
|
cond = 0xE;
|
||||||
|
|
||||||
|
link = instr.Info.Kind == ARMInstrInfo::tk_BL_LONG;
|
||||||
|
linkAddr = instr.Addr + 4;
|
||||||
|
|
||||||
if (instr.Info.Kind == ARMInstrInfo::tk_BL_LONG && !(instr.Instr & (1 << 12)))
|
if (instr.Info.Kind == ARMInstrInfo::tk_BL_LONG && !(instr.Instr & (1 << 12)))
|
||||||
{
|
{
|
||||||
targetAddr = r15 + ((s32)((instr.Instr & 0x7FF) << 21) >> 9);
|
targetAddr = r15 + ((s32)((instr.Instr & 0x7FF) << 21) >> 9);
|
||||||
|
@ -185,9 +191,18 @@ bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, u32& targetA
|
||||||
targetAddr = r15 + offset;
|
targetAddr = r15 + offset;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
else if (hasLink && instr.Info.Kind == ARMInstrInfo::tk_BX && instr.A_Reg(3) == 14)
|
||||||
|
{
|
||||||
|
JIT_DEBUGPRINT("returning!\n");
|
||||||
|
targetAddr = lr;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
link = instr.Info.Kind == ARMInstrInfo::ak_BL;
|
||||||
|
linkAddr = instr.Addr + 4;
|
||||||
|
|
||||||
cond = instr.Cond();
|
cond = instr.Cond();
|
||||||
if (instr.Info.Kind == ARMInstrInfo::ak_BL
|
if (instr.Info.Kind == ARMInstrInfo::ak_BL
|
||||||
|| instr.Info.Kind == ARMInstrInfo::ak_B)
|
|| instr.Info.Kind == ARMInstrInfo::ak_B)
|
||||||
|
@ -197,6 +212,12 @@ bool DecodeBranch(bool thumb, const FetchedInstr& instr, u32& cond, u32& targetA
|
||||||
targetAddr = r15 + offset;
|
targetAddr = r15 + offset;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
else if (hasLink && instr.Info.Kind == ARMInstrInfo::ak_BX && instr.A_Reg(0) == 14)
|
||||||
|
{
|
||||||
|
JIT_DEBUGPRINT("returning!\n");
|
||||||
|
targetAddr = lr;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -351,6 +372,8 @@ void CompileBlock(ARM* cpu)
|
||||||
CodeRanges[pseudoPhysicalAddr / 512].TimesInvalidated);
|
CodeRanges[pseudoPhysicalAddr / 512].TimesInvalidated);
|
||||||
|
|
||||||
u32 lastSegmentStart = blockAddr;
|
u32 lastSegmentStart = blockAddr;
|
||||||
|
u32 lr;
|
||||||
|
bool hasLink = false;
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@ -413,6 +436,9 @@ void CompileBlock(ARM* cpu)
|
||||||
cpu->CurInstr = instrs[i].Instr;
|
cpu->CurInstr = instrs[i].Instr;
|
||||||
cpu->CodeCycles = instrs[i].CodeCycles;
|
cpu->CodeCycles = instrs[i].CodeCycles;
|
||||||
|
|
||||||
|
if (instrs[i].Info.DstRegs & (1 << 14))
|
||||||
|
hasLink = false;
|
||||||
|
|
||||||
if (thumb)
|
if (thumb)
|
||||||
{
|
{
|
||||||
InterpretTHUMB[instrs[i].Info.Kind](cpu);
|
InterpretTHUMB[instrs[i].Info.Kind](cpu);
|
||||||
|
@ -452,8 +478,9 @@ void CompileBlock(ARM* cpu)
|
||||||
{
|
{
|
||||||
bool hasBranched = cpu->R[15] != r15;
|
bool hasBranched = cpu->R[15] != r15;
|
||||||
|
|
||||||
u32 cond, target;
|
bool link;
|
||||||
bool staticBranch = DecodeBranch(thumb, instrs[i], cond, target);
|
u32 cond, target, linkAddr;
|
||||||
|
bool staticBranch = DecodeBranch(thumb, instrs[i], cond, hasLink, lr, link, linkAddr, target);
|
||||||
JIT_DEBUGPRINT("branch cond %x target %x (%d)\n", cond, target, hasBranched);
|
JIT_DEBUGPRINT("branch cond %x target %x (%d)\n", cond, target, hasBranched);
|
||||||
|
|
||||||
if (staticBranch)
|
if (staticBranch)
|
||||||
|
@ -474,18 +501,24 @@ void CompileBlock(ARM* cpu)
|
||||||
if (cond < 0xE && target < instrs[i].Addr && target >= lastSegmentStart)
|
if (cond < 0xE && target < instrs[i].Addr && target >= lastSegmentStart)
|
||||||
{
|
{
|
||||||
// we might have an idle loop
|
// we might have an idle loop
|
||||||
u32 offset = (target - blockAddr) / (thumb ? 2 : 4);
|
u32 backwardsOffset = (instrs[i].Addr - target) / (thumb ? 2 : 4);
|
||||||
if (IsIdleLoop(instrs + offset, i - offset + 1))
|
if (IsIdleLoop(&instrs[i - backwardsOffset], backwardsOffset + 1))
|
||||||
{
|
{
|
||||||
instrs[i].BranchFlags |= branch_IdleBranch;
|
instrs[i].BranchFlags |= branch_IdleBranch;
|
||||||
JIT_DEBUGPRINT("found %s idle loop %d in block %x\n", thumb ? "thumb" : "arm", cpu->Num, blockAddr);
|
JIT_DEBUGPRINT("found %s idle loop %d in block %x\n", thumb ? "thumb" : "arm", cpu->Num, blockAddr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (hasBranched && (!thumb || cond == 0xE) && !isBackJump && i + 1 < Config::JIT_MaxBlockSize)
|
else if (hasBranched && !isBackJump && i + 1 < Config::JIT_MaxBlockSize)
|
||||||
{
|
{
|
||||||
u32 targetPseudoPhysical = cpu->Num == 0
|
u32 targetPseudoPhysical = cpu->Num == 0
|
||||||
? TranslateAddr<0>(target)
|
? TranslateAddr<0>(target)
|
||||||
: TranslateAddr<1>(target);
|
: TranslateAddr<1>(target);
|
||||||
|
|
||||||
|
if (link)
|
||||||
|
{
|
||||||
|
lr = linkAddr;
|
||||||
|
hasLink = true;
|
||||||
|
}
|
||||||
|
|
||||||
r15 = target + (thumb ? 2 : 4);
|
r15 = target + (thumb ? 2 : 4);
|
||||||
assert(r15 == cpu->R[15]);
|
assert(r15 == cpu->R[15]);
|
||||||
|
@ -520,7 +553,7 @@ void CompileBlock(ARM* cpu)
|
||||||
bool secondaryFlagReadCond = !canCompile || (instrs[i - 1].BranchFlags & (branch_FollowCondTaken | branch_FollowCondNotTaken));
|
bool secondaryFlagReadCond = !canCompile || (instrs[i - 1].BranchFlags & (branch_FollowCondTaken | branch_FollowCondNotTaken));
|
||||||
if (instrs[i - 1].Info.ReadFlags != 0 || secondaryFlagReadCond)
|
if (instrs[i - 1].Info.ReadFlags != 0 || secondaryFlagReadCond)
|
||||||
FloodFillSetFlags(instrs, i - 2, !secondaryFlagReadCond ? instrs[i - 1].Info.ReadFlags : 0xF);
|
FloodFillSetFlags(instrs, i - 2, !secondaryFlagReadCond ? instrs[i - 1].Info.ReadFlags : 0xF);
|
||||||
} while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize && !cpu->Halted);
|
} while(!instrs[i - 1].Info.EndBlock && i < Config::JIT_MaxBlockSize && !cpu->Halted && (!cpu->IRQ || (cpu->CPSR & 0x80)));
|
||||||
|
|
||||||
u32 restoreSlot = HashRestoreCandidate(pseudoPhysicalAddr);
|
u32 restoreSlot = HashRestoreCandidate(pseudoPhysicalAddr);
|
||||||
JitBlock* prevBlock = RestoreCandidates[restoreSlot];
|
JitBlock* prevBlock = RestoreCandidates[restoreSlot];
|
||||||
|
@ -713,6 +746,9 @@ void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
|
||||||
{
|
{
|
||||||
if ((addr & 0xFF000000) == 0x04000000)
|
if ((addr & 0xFF000000) == 0x04000000)
|
||||||
{
|
{
|
||||||
|
if (!store && size == 32 && addr == 0x04100010 && NDS::ExMemCnt[0] & (1<<11))
|
||||||
|
return (void*)NDSCart::ReadROMData;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
unfortunately we can't map GPU2D this way
|
unfortunately we can't map GPU2D this way
|
||||||
since it's hidden inside an object
|
since it's hidden inside an object
|
||||||
|
|
|
@ -93,10 +93,12 @@ public:
|
||||||
|
|
||||||
void Prepare(bool thumb, int i)
|
void Prepare(bool thumb, int i)
|
||||||
{
|
{
|
||||||
|
FetchedInstr instr = Instrs[i];
|
||||||
|
|
||||||
if (LoadedRegs & (1 << 15))
|
if (LoadedRegs & (1 << 15))
|
||||||
UnloadRegister(15);
|
UnloadRegister(15);
|
||||||
|
|
||||||
BitSet16 invalidedLiterals(LiteralsLoaded & Instrs[i].Info.DstRegs);
|
BitSet16 invalidedLiterals(LiteralsLoaded & instr.Info.DstRegs);
|
||||||
for (int reg : invalidedLiterals)
|
for (int reg : invalidedLiterals)
|
||||||
UnloadLiteral(reg);
|
UnloadLiteral(reg);
|
||||||
|
|
||||||
|
@ -108,6 +110,7 @@ public:
|
||||||
{
|
{
|
||||||
BitSet16 regsNeeded((Instrs[j].Info.SrcRegs & ~(1 << 15)) | Instrs[j].Info.DstRegs);
|
BitSet16 regsNeeded((Instrs[j].Info.SrcRegs & ~(1 << 15)) | Instrs[j].Info.DstRegs);
|
||||||
futureNeeded |= regsNeeded.m_val;
|
futureNeeded |= regsNeeded.m_val;
|
||||||
|
regsNeeded &= BitSet16(~Instrs[j].Info.NotStrictlyNeeded);
|
||||||
for (int reg : regsNeeded)
|
for (int reg : regsNeeded)
|
||||||
ranking[reg]++;
|
ranking[reg]++;
|
||||||
}
|
}
|
||||||
|
@ -117,8 +120,8 @@ public:
|
||||||
for (int reg : neverNeededAgain)
|
for (int reg : neverNeededAgain)
|
||||||
UnloadRegister(reg);
|
UnloadRegister(reg);
|
||||||
|
|
||||||
FetchedInstr Instr = Instrs[i];
|
u16 necessaryRegs = ((instr.Info.SrcRegs & ~(1 << 15)) | instr.Info.DstRegs) & ~instr.Info.NotStrictlyNeeded;
|
||||||
u16 necessaryRegs = (Instr.Info.SrcRegs & ~(1 << 15)) | Instr.Info.DstRegs;
|
u16 writeRegs = instr.Info.DstRegs & ~instr.Info.NotStrictlyNeeded;
|
||||||
BitSet16 needToBeLoaded(necessaryRegs & ~LoadedRegs);
|
BitSet16 needToBeLoaded(necessaryRegs & ~LoadedRegs);
|
||||||
if (needToBeLoaded != BitSet16(0))
|
if (needToBeLoaded != BitSet16(0))
|
||||||
{
|
{
|
||||||
|
@ -143,13 +146,31 @@ public:
|
||||||
loadedSet.m_val = LoadedRegs;
|
loadedSet.m_val = LoadedRegs;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// we don't need to load a value which is always going to be overwritten
|
||||||
BitSet16 needValueLoaded(needToBeLoaded);
|
BitSet16 needValueLoaded(needToBeLoaded);
|
||||||
if (thumb || Instr.Cond() >= 0xE)
|
if (thumb || instr.Cond() >= 0xE)
|
||||||
needValueLoaded = BitSet16(Instr.Info.SrcRegs);
|
needValueLoaded = BitSet16(instr.Info.SrcRegs);
|
||||||
for (int reg : needToBeLoaded)
|
for (int reg : needToBeLoaded)
|
||||||
LoadRegister(reg, needValueLoaded[reg]);
|
LoadRegister(reg, needValueLoaded[reg]);
|
||||||
|
}
|
||||||
|
{
|
||||||
|
BitSet16 loadedSet(LoadedRegs);
|
||||||
|
BitSet16 loadRegs(instr.Info.NotStrictlyNeeded & futureNeeded & ~LoadedRegs);
|
||||||
|
if (loadRegs && loadedSet.Count() < NativeRegsAvailable)
|
||||||
|
{
|
||||||
|
int left = NativeRegsAvailable - loadedSet.Count();
|
||||||
|
for (int reg : loadRegs)
|
||||||
|
{
|
||||||
|
if (left-- == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
writeRegs |= (1 << reg) & instr.Info.DstRegs;
|
||||||
|
LoadRegister(reg, !(thumb || instr.Cond() >= 0xE) || (1 << reg) & instr.Info.SrcRegs);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
DirtyRegs |= Instr.Info.DstRegs & ~(1 << 15);
|
|
||||||
|
DirtyRegs |= writeRegs & ~(1 << 15);
|
||||||
}
|
}
|
||||||
|
|
||||||
static const Reg NativeRegAllocOrder[];
|
static const Reg NativeRegAllocOrder[];
|
||||||
|
|
|
@ -364,7 +364,7 @@ void Compiler::Reset()
|
||||||
void Compiler::Comp_SpecialBranchBehaviour()
|
void Compiler::Comp_SpecialBranchBehaviour()
|
||||||
{
|
{
|
||||||
if (CurInstr.BranchFlags & branch_IdleBranch)
|
if (CurInstr.BranchFlags & branch_IdleBranch)
|
||||||
OR(32, MDisp(RCPU, offsetof(ARM, Halted)), Imm8(0x20));
|
OR(32, MDisp(RCPU, offsetof(ARM, IdleLoop)), Imm8(0x1));
|
||||||
|
|
||||||
if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
|
if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
|
||||||
{
|
{
|
||||||
|
@ -402,6 +402,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
|
||||||
{
|
{
|
||||||
CurInstr = instrs[i];
|
CurInstr = instrs[i];
|
||||||
R15 = CurInstr.Addr + (Thumb ? 4 : 8);
|
R15 = CurInstr.Addr + (Thumb ? 4 : 8);
|
||||||
|
CodeRegion = R15 >> 24;
|
||||||
|
|
||||||
Exit = i == instrsCount - 1 || (CurInstr.BranchFlags & branch_FollowCondNotTaken);
|
Exit = i == instrsCount - 1 || (CurInstr.BranchFlags & branch_FollowCondNotTaken);
|
||||||
|
|
||||||
|
@ -571,8 +572,6 @@ void Compiler::Comp_AddCycles_CDI()
|
||||||
Comp_AddCycles_CD();
|
Comp_AddCycles_CD();
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
IrregularCycles = true;
|
|
||||||
|
|
||||||
s32 cycles;
|
s32 cycles;
|
||||||
|
|
||||||
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
|
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
|
||||||
|
@ -642,7 +641,7 @@ void Compiler::Comp_AddCycles_CD()
|
||||||
IrregularCycles = true;
|
IrregularCycles = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!Thumb && CurInstr.Cond() < 0xE)
|
if (IrregularCycles && !Thumb && CurInstr.Cond() < 0xE)
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||||
else
|
else
|
||||||
ConstantCycles += cycles;
|
ConstantCycles += cycles;
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#include "ARMJIT_Compiler.h"
|
#include "ARMJIT_Compiler.h"
|
||||||
|
|
||||||
|
#include "../Config.h"
|
||||||
|
|
||||||
using namespace Gen;
|
using namespace Gen;
|
||||||
|
|
||||||
|
@ -290,7 +291,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
|
||||||
if (size == 16)
|
if (size == 16)
|
||||||
addressMask = ~1;
|
addressMask = ~1;
|
||||||
|
|
||||||
if (rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback)))
|
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback)))
|
||||||
{
|
{
|
||||||
u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
||||||
Comp_MemLoadLiteral(size, rd, addr);
|
Comp_MemLoadLiteral(size, rd, addr);
|
||||||
|
@ -309,6 +310,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
|
||||||
|
|
||||||
OpArg rdMapped = MapReg(rd);
|
OpArg rdMapped = MapReg(rd);
|
||||||
OpArg rnMapped = MapReg(rn);
|
OpArg rnMapped = MapReg(rn);
|
||||||
|
if (Thumb && rn == 15)
|
||||||
|
rnMapped = Imm32(R15 & ~0x2);
|
||||||
|
|
||||||
bool inlinePreparation = Num == 1;
|
bool inlinePreparation = Num == 1;
|
||||||
u32 constLocalROR32 = 4;
|
u32 constLocalROR32 = 4;
|
||||||
|
@ -317,7 +320,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
|
||||||
? MemoryFuncs9[size >> 4][!!(flags & memop_Store)]
|
? MemoryFuncs9[size >> 4][!!(flags & memop_Store)]
|
||||||
: MemoryFuncs7[size >> 4][!!((flags & memop_Store))];
|
: MemoryFuncs7[size >> 4][!!((flags & memop_Store))];
|
||||||
|
|
||||||
if ((rd != 15 || (flags & memop_Store)) && op2.IsImm && RegCache.IsLiteral(rn))
|
if (Config::JIT_LiteralOptimisations && (rd != 15 || (flags & memop_Store)) && op2.IsImm && RegCache.IsLiteral(rn))
|
||||||
{
|
{
|
||||||
u32 addr = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
u32 addr = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
||||||
|
|
||||||
|
@ -749,9 +752,12 @@ void Compiler::T_Comp_MemImmHalf()
|
||||||
|
|
||||||
void Compiler::T_Comp_LoadPCRel()
|
void Compiler::T_Comp_LoadPCRel()
|
||||||
{
|
{
|
||||||
u32 addr = (R15 & ~0x2) + ((CurInstr.Instr & 0xFF) << 2);
|
u32 offset = (CurInstr.Instr & 0xFF) << 2;
|
||||||
|
u32 addr = (R15 & ~0x2) + offset;
|
||||||
Comp_MemLoadLiteral(32, CurInstr.T_Reg(8), addr);
|
if (Config::JIT_LiteralOptimisations)
|
||||||
|
Comp_MemLoadLiteral(32, CurInstr.T_Reg(8), addr);
|
||||||
|
else
|
||||||
|
Comp_MemAccess(CurInstr.T_Reg(8), 15, ComplexOperand(offset), 32, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::T_Comp_MemSPRel()
|
void Compiler::T_Comp_MemSPRel()
|
||||||
|
|
|
@ -365,6 +365,21 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
||||||
if (res.Kind == ARMInstrInfo::tk_LDR_PCREL)
|
if (res.Kind == ARMInstrInfo::tk_LDR_PCREL)
|
||||||
res.SpecialKind = special_LoadLiteral;
|
res.SpecialKind = special_LoadLiteral;
|
||||||
|
|
||||||
|
if (res.Kind == tk_LDMIA || res.Kind == tk_POP)
|
||||||
|
{
|
||||||
|
u32 set = (instr & 0xFF) & ~(res.DstRegs|res.SrcRegs);
|
||||||
|
res.NotStrictlyNeeded |= set;
|
||||||
|
res.DstRegs |= set;
|
||||||
|
}
|
||||||
|
if (res.Kind == tk_STMIA || res.Kind == tk_PUSH)
|
||||||
|
{
|
||||||
|
u32 set = (instr & 0xFF) & ~(res.DstRegs|res.SrcRegs);
|
||||||
|
if (res.Kind == tk_PUSH && instr & (1 << 8))
|
||||||
|
set |= (1 << 14);
|
||||||
|
res.NotStrictlyNeeded |= set;
|
||||||
|
res.SrcRegs |= set;
|
||||||
|
}
|
||||||
|
|
||||||
res.EndBlock |= res.Branches();
|
res.EndBlock |= res.Branches();
|
||||||
|
|
||||||
if (res.Kind == tk_BCOND)
|
if (res.Kind == tk_BCOND)
|
||||||
|
@ -466,6 +481,19 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
||||||
|
|
||||||
if ((data & A_LoadMem) && res.SrcRegs == (1 << 15))
|
if ((data & A_LoadMem) && res.SrcRegs == (1 << 15))
|
||||||
res.SpecialKind = special_LoadLiteral;
|
res.SpecialKind = special_LoadLiteral;
|
||||||
|
|
||||||
|
if (res.Kind == ak_LDM)
|
||||||
|
{
|
||||||
|
u16 set = (instr & 0xFFFF) & ~(res.SrcRegs|res.DstRegs|(1<<15));
|
||||||
|
res.DstRegs |= set;
|
||||||
|
res.NotStrictlyNeeded |= set;
|
||||||
|
}
|
||||||
|
if (res.Kind == ak_STM)
|
||||||
|
{
|
||||||
|
u16 set = (instr & 0xFFFF) & ~(res.SrcRegs|res.DstRegs|(1<<15));
|
||||||
|
res.SrcRegs |= set;
|
||||||
|
res.NotStrictlyNeeded |= set;
|
||||||
|
}
|
||||||
|
|
||||||
if ((instr >> 28) < 0xE)
|
if ((instr >> 28) < 0xE)
|
||||||
{
|
{
|
||||||
|
|
|
@ -236,7 +236,7 @@ enum
|
||||||
|
|
||||||
struct Info
|
struct Info
|
||||||
{
|
{
|
||||||
u16 DstRegs, SrcRegs;
|
u16 DstRegs, SrcRegs, NotStrictlyNeeded;
|
||||||
u16 Kind;
|
u16 Kind;
|
||||||
|
|
||||||
u8 SpecialKind;
|
u8 SpecialKind;
|
||||||
|
|
|
@ -38,6 +38,7 @@ int GL_Antialias;
|
||||||
bool JIT_Enable = false;
|
bool JIT_Enable = false;
|
||||||
int JIT_MaxBlockSize = 12;
|
int JIT_MaxBlockSize = 12;
|
||||||
bool JIT_BrancheOptimisations = true;
|
bool JIT_BrancheOptimisations = true;
|
||||||
|
bool JIT_LiteralOptimisations = true;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ConfigEntry ConfigFile[] =
|
ConfigEntry ConfigFile[] =
|
||||||
|
@ -52,6 +53,7 @@ ConfigEntry ConfigFile[] =
|
||||||
{"JIT_Enable", 0, &JIT_Enable, 0, NULL, 0},
|
{"JIT_Enable", 0, &JIT_Enable, 0, NULL, 0},
|
||||||
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 10, NULL, 0},
|
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 10, NULL, 0},
|
||||||
{"JIT_BrancheOptimisations", 0, &JIT_BrancheOptimisations, 1, NULL, 0},
|
{"JIT_BrancheOptimisations", 0, &JIT_BrancheOptimisations, 1, NULL, 0},
|
||||||
|
{"JIT_BrancheOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0},
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
{"", -1, NULL, 0, NULL, 0}
|
{"", -1, NULL, 0, NULL, 0}
|
||||||
|
|
|
@ -50,6 +50,7 @@ extern int GL_Antialias;
|
||||||
extern bool JIT_Enable;
|
extern bool JIT_Enable;
|
||||||
extern int JIT_MaxBlockSize;
|
extern int JIT_MaxBlockSize;
|
||||||
extern bool JIT_BrancheOptimisations;
|
extern bool JIT_BrancheOptimisations;
|
||||||
|
extern bool JIT_LiteralOptimisations;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1142,7 +1142,7 @@ void UpdateIRQ(u32 cpu)
|
||||||
|
|
||||||
if (IME[cpu] & 0x1)
|
if (IME[cpu] & 0x1)
|
||||||
{
|
{
|
||||||
arm->IRQ = IE[cpu] & IF[cpu];
|
arm->IRQ = !!(IE[cpu] & IF[cpu]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -43,6 +43,7 @@ uiCheckbox* cbDirectBoot;
|
||||||
uiCheckbox* cbJITEnabled;
|
uiCheckbox* cbJITEnabled;
|
||||||
uiEntry* enJITMaxBlockSize;
|
uiEntry* enJITMaxBlockSize;
|
||||||
uiCheckbox* cbJITBranchOptimisations;
|
uiCheckbox* cbJITBranchOptimisations;
|
||||||
|
uiCheckbox* cbJITLiteralOptimisations;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int OnCloseWindow(uiWindow* window, void* blarg)
|
int OnCloseWindow(uiWindow* window, void* blarg)
|
||||||
|
@ -66,14 +67,16 @@ void OnOk(uiButton* btn, void* blarg)
|
||||||
char* maxBlockSizeStr = uiEntryText(enJITMaxBlockSize);
|
char* maxBlockSizeStr = uiEntryText(enJITMaxBlockSize);
|
||||||
long blockSize = strtol(maxBlockSizeStr, NULL, 10);
|
long blockSize = strtol(maxBlockSizeStr, NULL, 10);
|
||||||
bool branchOptimisations = uiCheckboxChecked(cbJITBranchOptimisations);
|
bool branchOptimisations = uiCheckboxChecked(cbJITBranchOptimisations);
|
||||||
|
bool literalOptimisations = uiCheckboxChecked(cbJITLiteralOptimisations);
|
||||||
uiFreeText(maxBlockSizeStr);
|
uiFreeText(maxBlockSizeStr);
|
||||||
if (blockSize < 1)
|
if (blockSize < 1)
|
||||||
blockSize = 1;
|
blockSize = 1;
|
||||||
if (blockSize > 32)
|
if (blockSize > 32)
|
||||||
blockSize = 32;
|
blockSize = 32;
|
||||||
|
|
||||||
if (enableJit != Config::JIT_Enable || blockSize != Config::JIT_MaxBlockSize ||
|
if (enableJit != Config::JIT_Enable || blockSize != Config::JIT_MaxBlockSize
|
||||||
branchOptimisations != Config::JIT_BrancheOptimisations)
|
|| branchOptimisations != Config::JIT_BrancheOptimisations
|
||||||
|
|| literalOptimisations != Config::JIT_LiteralOptimisations)
|
||||||
{
|
{
|
||||||
if (RunningSomething &&
|
if (RunningSomething &&
|
||||||
!uiMsgBoxConfirm(win, "Reset emulator",
|
!uiMsgBoxConfirm(win, "Reset emulator",
|
||||||
|
@ -82,7 +85,8 @@ void OnOk(uiButton* btn, void* blarg)
|
||||||
|
|
||||||
Config::JIT_Enable = enableJit;
|
Config::JIT_Enable = enableJit;
|
||||||
Config::JIT_MaxBlockSize = blockSize;
|
Config::JIT_MaxBlockSize = blockSize;
|
||||||
Config::JIT_BrancheOptimisations = uiCheckboxChecked(cbJITBranchOptimisations);
|
Config::JIT_BrancheOptimisations = branchOptimisations;
|
||||||
|
Config::JIT_LiteralOptimisations = literalOptimisations;
|
||||||
|
|
||||||
restart = true;
|
restart = true;
|
||||||
}
|
}
|
||||||
|
@ -108,11 +112,13 @@ void OnJITStateChanged(uiCheckbox* cb, void* blarg)
|
||||||
{
|
{
|
||||||
uiControlEnable(uiControl(enJITMaxBlockSize));
|
uiControlEnable(uiControl(enJITMaxBlockSize));
|
||||||
uiControlEnable(uiControl(cbJITBranchOptimisations));
|
uiControlEnable(uiControl(cbJITBranchOptimisations));
|
||||||
|
uiControlEnable(uiControl(cbJITLiteralOptimisations));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
uiControlDisable(uiControl(enJITMaxBlockSize));
|
uiControlDisable(uiControl(enJITMaxBlockSize));
|
||||||
uiControlDisable(uiControl(cbJITBranchOptimisations));
|
uiControlDisable(uiControl(cbJITBranchOptimisations));
|
||||||
|
uiControlDisable(uiControl(cbJITLiteralOptimisations));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -174,9 +180,25 @@ void Open()
|
||||||
uiBox* row = uiNewHorizontalBox();
|
uiBox* row = uiNewHorizontalBox();
|
||||||
uiBoxAppend(in_ctrl, uiControl(row), 0);
|
uiBoxAppend(in_ctrl, uiControl(row), 0);
|
||||||
|
|
||||||
cbJITBranchOptimisations = uiNewCheckbox("Branch optimisations (breaks in rare cases games!)");
|
uiLabel* lbl = uiNewLabel("If you experience problems with a certain game, you can try disabling these options:");
|
||||||
|
uiBoxAppend(row, uiControl(lbl), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
uiBox* row = uiNewHorizontalBox();
|
||||||
|
uiBoxAppend(in_ctrl, uiControl(row), 0);
|
||||||
|
|
||||||
|
cbJITBranchOptimisations = uiNewCheckbox("Branch optimisations");
|
||||||
uiBoxAppend(row, uiControl(cbJITBranchOptimisations), 0);
|
uiBoxAppend(row, uiControl(cbJITBranchOptimisations), 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
uiBox* row = uiNewHorizontalBox();
|
||||||
|
uiBoxAppend(in_ctrl, uiControl(row), 0);
|
||||||
|
|
||||||
|
cbJITLiteralOptimisations = uiNewCheckbox("Literal optimisations");
|
||||||
|
uiBoxAppend(row, uiControl(cbJITLiteralOptimisations), 0);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -214,6 +236,7 @@ void Open()
|
||||||
OnJITStateChanged(cbJITEnabled, NULL);
|
OnJITStateChanged(cbJITEnabled, NULL);
|
||||||
|
|
||||||
uiCheckboxSetChecked(cbJITBranchOptimisations, Config::JIT_BrancheOptimisations);
|
uiCheckboxSetChecked(cbJITBranchOptimisations, Config::JIT_BrancheOptimisations);
|
||||||
|
uiCheckboxSetChecked(cbJITLiteralOptimisations, Config::JIT_LiteralOptimisations);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
uiControlShow(uiControl(win));
|
uiControlShow(uiControl(win));
|
||||||
|
|
|
@ -2675,8 +2675,6 @@ void RecreateMainWindow(bool opengl)
|
||||||
|
|
||||||
int main(int argc, char** argv)
|
int main(int argc, char** argv)
|
||||||
{
|
{
|
||||||
freopen("miauz.txt", "w", stdout);
|
|
||||||
|
|
||||||
srand(time(NULL));
|
srand(time(NULL));
|
||||||
|
|
||||||
printf("melonDS " MELONDS_VERSION "\n");
|
printf("melonDS " MELONDS_VERSION "\n");
|
||||||
|
|
Loading…
Reference in New Issue