abandon pipelining on jit
fixes Golden Sun Dawn this makes the cpu state incompatible between interpreter and JIT. That's why switching cpu mode requires a restart(not requiring is stupid anyway) and the pipeline is manually filled when making a save state.
This commit is contained in:
parent
26ecf6bb3c
commit
316378092a
44
src/ARM.cpp
44
src/ARM.cpp
|
@ -23,6 +23,7 @@
|
||||||
#include "ARMInterpreter.h"
|
#include "ARMInterpreter.h"
|
||||||
#include "AREngine.h"
|
#include "AREngine.h"
|
||||||
#include "ARMJIT.h"
|
#include "ARMJIT.h"
|
||||||
|
#include "Config.h"
|
||||||
|
|
||||||
|
|
||||||
// instruction timing notes
|
// instruction timing notes
|
||||||
|
@ -168,6 +169,13 @@ void ARM::DoSavestate(Savestate* file)
|
||||||
file->VarArray(R_IRQ, 3*sizeof(u32));
|
file->VarArray(R_IRQ, 3*sizeof(u32));
|
||||||
file->VarArray(R_UND, 3*sizeof(u32));
|
file->VarArray(R_UND, 3*sizeof(u32));
|
||||||
file->Var32(&CurInstr);
|
file->Var32(&CurInstr);
|
||||||
|
if (!file->Saving && Config::JIT_Enable)
|
||||||
|
{
|
||||||
|
// hack, the JIT doesn't really pipeline
|
||||||
|
// but we still want JIT save states to be
|
||||||
|
// loaded while running the interpreter
|
||||||
|
FillPipeline();
|
||||||
|
}
|
||||||
file->VarArray(NextInstr, 2*sizeof(u32));
|
file->VarArray(NextInstr, 2*sizeof(u32));
|
||||||
|
|
||||||
file->Var32(&ExceptionBase);
|
file->Var32(&ExceptionBase);
|
||||||
|
@ -768,3 +776,39 @@ void ARMv4::ExecuteJIT()
|
||||||
Halted = 0;
|
Halted = 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
void ARMv5::FillPipeline()
|
||||||
|
{
|
||||||
|
if (CPSR & 0x20)
|
||||||
|
{
|
||||||
|
if ((R[15] - 2) & 0x2)
|
||||||
|
{
|
||||||
|
NextInstr[0] = CodeRead32(R[15] - 4, false) >> 16;
|
||||||
|
NextInstr[1] = CodeRead32(R[15], false);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
NextInstr[0] = CodeRead32(R[15] - 2, false);
|
||||||
|
NextInstr[1] = NextInstr[0] >> 16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
NextInstr[0] = CodeRead32(R[15] - 4, false);
|
||||||
|
NextInstr[1] = CodeRead32(R[15], false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ARMv4::FillPipeline()
|
||||||
|
{
|
||||||
|
if (CPSR & 0x20)
|
||||||
|
{
|
||||||
|
NextInstr[0] = CodeRead16(R[15] - 2);
|
||||||
|
NextInstr[1] = CodeRead16(R[15]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
NextInstr[0] = CodeRead32(R[15] - 4);
|
||||||
|
NextInstr[1] = CodeRead32(R[15]);
|
||||||
|
}
|
||||||
|
}
|
|
@ -42,6 +42,8 @@ public:
|
||||||
|
|
||||||
virtual void DoSavestate(Savestate* file);
|
virtual void DoSavestate(Savestate* file);
|
||||||
|
|
||||||
|
virtual void FillPipeline() = 0;
|
||||||
|
|
||||||
virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0;
|
virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0;
|
||||||
void RestoreCPSR();
|
void RestoreCPSR();
|
||||||
|
|
||||||
|
@ -156,6 +158,8 @@ public:
|
||||||
|
|
||||||
void UpdateRegionTimings(u32 addrstart, u32 addrend);
|
void UpdateRegionTimings(u32 addrstart, u32 addrend);
|
||||||
|
|
||||||
|
void FillPipeline();
|
||||||
|
|
||||||
void JumpTo(u32 addr, bool restorecpsr = false);
|
void JumpTo(u32 addr, bool restorecpsr = false);
|
||||||
|
|
||||||
void PrefetchAbort();
|
void PrefetchAbort();
|
||||||
|
@ -284,6 +288,8 @@ public:
|
||||||
|
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
|
void FillPipeline();
|
||||||
|
|
||||||
void JumpTo(u32 addr, bool restorecpsr = false);
|
void JumpTo(u32 addr, bool restorecpsr = false);
|
||||||
|
|
||||||
void Execute();
|
void Execute();
|
||||||
|
|
|
@ -139,6 +139,7 @@ CompiledBlock CompileBlock(ARM* cpu)
|
||||||
int i = 0;
|
int i = 0;
|
||||||
u32 blockAddr = cpu->R[15] - (thumb ? 2 : 4);
|
u32 blockAddr = cpu->R[15] - (thumb ? 2 : 4);
|
||||||
u32 r15 = cpu->R[15];
|
u32 r15 = cpu->R[15];
|
||||||
|
cpu->FillPipeline();
|
||||||
u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]};
|
u32 nextInstr[2] = {cpu->NextInstr[0], cpu->NextInstr[1]};
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
|
|
@ -5,6 +5,14 @@ using namespace Gen;
|
||||||
namespace ARMJIT
|
namespace ARMJIT
|
||||||
{
|
{
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
int squeezePointer(T* ptr)
|
||||||
|
{
|
||||||
|
int truncated = (int)((u64)ptr);
|
||||||
|
assert((T*)((u64)truncated) == ptr);
|
||||||
|
return truncated;
|
||||||
|
}
|
||||||
|
|
||||||
void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||||
{
|
{
|
||||||
// we can simplify constant branches by a lot
|
// we can simplify constant branches by a lot
|
||||||
|
@ -12,9 +20,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||||
// we'll see how it works out
|
// we'll see how it works out
|
||||||
|
|
||||||
u32 newPC;
|
u32 newPC;
|
||||||
u32 nextInstr[2];
|
|
||||||
u32 cycles = 0;
|
u32 cycles = 0;
|
||||||
bool setupRegion = false;
|
|
||||||
|
|
||||||
if (addr & 0x1 && !Thumb)
|
if (addr & 0x1 && !Thumb)
|
||||||
{
|
{
|
||||||
|
@ -40,7 +46,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||||
|
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARMv5, RegionCodeCycles)), Imm32(regionCodeCycles));
|
MOV(32, MDisp(RCPU, offsetof(ARMv5, RegionCodeCycles)), Imm32(regionCodeCycles));
|
||||||
|
|
||||||
setupRegion = newregion != oldregion;
|
bool setupRegion = newregion != oldregion;
|
||||||
if (setupRegion)
|
if (setupRegion)
|
||||||
cpu9->SetupCodeMem(addr);
|
cpu9->SetupCodeMem(addr);
|
||||||
|
|
||||||
|
@ -53,15 +59,14 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||||
// doesn't matter if we put garbage in the MSbs there
|
// doesn't matter if we put garbage in the MSbs there
|
||||||
if (addr & 0x2)
|
if (addr & 0x2)
|
||||||
{
|
{
|
||||||
nextInstr[0] = cpu9->CodeRead32(addr-2, true) >> 16;
|
cpu9->CodeRead32(addr-2, true);
|
||||||
cycles += cpu9->CodeCycles;
|
cycles += cpu9->CodeCycles;
|
||||||
nextInstr[1] = cpu9->CodeRead32(addr+2, false);
|
cpu9->CodeRead32(addr+2, false);
|
||||||
cycles += CurCPU->CodeCycles;
|
cycles += CurCPU->CodeCycles;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
nextInstr[0] = cpu9->CodeRead32(addr, true);
|
cpu9->CodeRead32(addr, true);
|
||||||
nextInstr[1] = nextInstr[0] >> 16;
|
|
||||||
cycles += cpu9->CodeCycles;
|
cycles += cpu9->CodeCycles;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -70,12 +75,15 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||||
addr &= ~0x3;
|
addr &= ~0x3;
|
||||||
newPC = addr+4;
|
newPC = addr+4;
|
||||||
|
|
||||||
nextInstr[0] = cpu9->CodeRead32(addr, true);
|
cpu9->CodeRead32(addr, true);
|
||||||
cycles += cpu9->CodeCycles;
|
cycles += cpu9->CodeCycles;
|
||||||
nextInstr[1] = cpu9->CodeRead32(addr+4, false);
|
cpu9->CodeRead32(addr+4, false);
|
||||||
cycles += cpu9->CodeCycles;
|
cycles += cpu9->CodeCycles;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MOV(64, MDisp(RCPU, offsetof(ARM, CodeMem.Mem)), Imm32(squeezePointer(cpu9->CodeMem.Mem)));
|
||||||
|
MOV(32, MDisp(RCPU, offsetof(ARM, CodeMem.Mask)), Imm32(cpu9->CodeMem.Mask));
|
||||||
|
|
||||||
cpu9->RegionCodeCycles = compileTimeCodeCycles;
|
cpu9->RegionCodeCycles = compileTimeCodeCycles;
|
||||||
if (setupRegion)
|
if (setupRegion)
|
||||||
cpu9->SetupCodeMem(R15);
|
cpu9->SetupCodeMem(R15);
|
||||||
|
@ -102,8 +110,6 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||||
u32 compileTimePC = CurCPU->R[15];
|
u32 compileTimePC = CurCPU->R[15];
|
||||||
CurCPU->R[15] = newPC;
|
CurCPU->R[15] = newPC;
|
||||||
|
|
||||||
nextInstr[0] = ((ARMv4*)CurCPU)->CodeRead16(addr);
|
|
||||||
nextInstr[1] = ((ARMv4*)CurCPU)->CodeRead16(addr+2);
|
|
||||||
cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1];
|
cycles += NDS::ARM7MemTimings[codeCycles][0] + NDS::ARM7MemTimings[codeCycles][1];
|
||||||
|
|
||||||
CurCPU->R[15] = compileTimePC;
|
CurCPU->R[15] = compileTimePC;
|
||||||
|
@ -116,8 +122,6 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||||
u32 compileTimePC = CurCPU->R[15];
|
u32 compileTimePC = CurCPU->R[15];
|
||||||
CurCPU->R[15] = newPC;
|
CurCPU->R[15] = newPC;
|
||||||
|
|
||||||
nextInstr[0] = cpu7->CodeRead32(addr);
|
|
||||||
nextInstr[1] = cpu7->CodeRead32(addr+4);
|
|
||||||
cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3];
|
cycles += NDS::ARM7MemTimings[codeCycles][2] + NDS::ARM7MemTimings[codeCycles][3];
|
||||||
|
|
||||||
CurCPU->R[15] = compileTimePC;
|
CurCPU->R[15] = compileTimePC;
|
||||||
|
@ -128,19 +132,10 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
|
||||||
}
|
}
|
||||||
|
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(newPC));
|
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(newPC));
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[0])), Imm32(nextInstr[0]));
|
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(nextInstr[1]));
|
|
||||||
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
|
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
|
||||||
ConstantCycles += cycles;
|
ConstantCycles += cycles;
|
||||||
else
|
else
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||||
|
|
||||||
if (setupRegion)
|
|
||||||
{
|
|
||||||
MOV(64, R(ABI_PARAM1), R(RCPU));
|
|
||||||
MOV(32, R(ABI_PARAM2), Imm32(newPC));
|
|
||||||
CALL((void*)&ARMv5::SetupCodeMem);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
|
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
|
||||||
|
|
|
@ -395,11 +395,6 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15));
|
MOV(32, MDisp(RCPU, offsetof(ARM, R[15])), Imm32(R15));
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurInstr.CodeCycles));
|
MOV(32, MDisp(RCPU, offsetof(ARM, CodeCycles)), Imm32(CurInstr.CodeCycles));
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurInstr.Instr));
|
MOV(32, MDisp(RCPU, offsetof(ARM, CurInstr)), Imm32(CurInstr.Instr));
|
||||||
if (i == instrsCount - 1)
|
|
||||||
{
|
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[0])), Imm32(CurInstr.NextInstr[0]));
|
|
||||||
MOV(32, MDisp(RCPU, offsetof(ARM, NextInstr[1])), Imm32(CurInstr.NextInstr[1]));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (comp == NULL)
|
if (comp == NULL)
|
||||||
SaveCPSR();
|
SaveCPSR();
|
||||||
|
|
|
@ -457,11 +457,6 @@ void Compiler::Comp_MemAccess(OpArg rd, bool signExtend, bool store, int size)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void printStuff2(u32 a, u32 b)
|
|
||||||
{
|
|
||||||
printf("b %x %x\n", a, b);
|
|
||||||
}
|
|
||||||
|
|
||||||
s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode)
|
s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode)
|
||||||
{
|
{
|
||||||
int regsCount = regs.Count();
|
int regsCount = regs.Count();
|
||||||
|
|
Loading…
Reference in New Issue