first steps in bringing over the JIT refactor/fastmem

This commit is contained in:
RSDuck 2020-06-14 21:04:25 +02:00
parent fea9f95bba
commit e335a8ca76
25 changed files with 2368 additions and 1624 deletions

View File

@ -21,6 +21,8 @@
#include "DSi.h"
#include "ARM.h"
#include "ARMInterpreter.h"
#include "ARMJIT.h"
#include "Config.h"
#include "AREngine.h"
#include "ARMJIT.h"
#include "Config.h"
@ -74,7 +76,9 @@ ARM::~ARM()
ARMv5::ARMv5() : ARM(0)
{
//
#ifndef JIT_ENABLED
DTCM = new u8[DTCMSize];
#endif
}
ARMv4::ARMv4() : ARM(1)
@ -82,6 +86,13 @@ ARMv4::ARMv4() : ARM(1)
//
}
ARMv5::~ARMv5()
{
#ifndef JIT_ENABLED
delete[] DTCM;
#endif
}
void ARM::Reset()
{
Cycles = 0;
@ -622,24 +633,26 @@ void ARMv5::ExecuteJIT()
while (NDS::ARM9Timestamp < NDS::ARM9Target)
{
u32 instrAddr = R[15] - ((CPSR&0x20)?2:4);
u32 translatedAddr = ARMJIT::TranslateAddr9(instrAddr);
if (!translatedAddr)
// hack so Cycles <= 0 becomes Cycles < 0
Cycles = NDS::ARM9Target - NDS::ARM9Timestamp - 1;
if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize))
&& !ARMJIT::SetupExecutableRegion(0, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize))
{
NDS::ARM9Timestamp = NDS::ARM9Target;
printf("ARMv5 PC in non executable region %08X\n", R[15]);
return;
}
// hack so Cycles <= 0 becomes Cycles < 0
Cycles = NDS::ARM9Target - NDS::ARM9Timestamp - 1;
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry<0>(translatedAddr);
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock(0, FastBlockLookup,
instrAddr - FastBlockLookupStart, instrAddr);
if (block)
ARM_Dispatch(this, block);
else
ARMJIT::CompileBlock(this);
NDS::ARM9Timestamp = NDS::ARM9Target - (Cycles + 1);
NDS::ARM9Timestamp = NDS::ARM9Target - Cycles - 1;
if (StopExecution)
{
@ -766,23 +779,25 @@ void ARMv4::ExecuteJIT()
while (NDS::ARM7Timestamp < NDS::ARM7Target)
{
u32 instrAddr = R[15] - ((CPSR&0x20)?2:4);
u32 translatedAddr = ARMJIT::TranslateAddr7(instrAddr);
if (!translatedAddr)
Cycles = NDS::ARM7Target - NDS::ARM7Timestamp - 1;
if ((instrAddr < FastBlockLookupStart || instrAddr >= (FastBlockLookupStart + FastBlockLookupSize))
&& !ARMJIT::SetupExecutableRegion(1, instrAddr, FastBlockLookup, FastBlockLookupStart, FastBlockLookupSize))
{
NDS::ARM7Timestamp = NDS::ARM7Target;
printf("ARMv4 PC in non executable region %08X\n", R[15]);
return;
}
Cycles = NDS::ARM7Target - NDS::ARM7Timestamp - 1;
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry<1>(translatedAddr);
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlock(1, FastBlockLookup,
instrAddr - FastBlockLookupStart, instrAddr);
if (block)
ARM_Dispatch(this, block);
else
ARMJIT::CompileBlock(this);
NDS::ARM7Timestamp = NDS::ARM7Target - (Cycles + 1);
NDS::ARM7Timestamp = NDS::ARM7Target - Cycles - 1;
// TODO optimize this shit!!!
if (StopExecution)

View File

@ -32,11 +32,14 @@ enum
RWFlags_ForceUser = (1<<21),
};
const u32 ITCMPhysicalSize = 0x8000;
const u32 DTCMPhysicalSize = 0x4000;
class ARM
{
public:
ARM(u32 num);
~ARM(); // destroy shit
virtual ~ARM(); // destroy shit
virtual void Reset();
@ -143,6 +146,11 @@ public:
NDS::MemRegion CodeMem;
#ifdef JIT_ENABLED
u32 FastBlockLookupStart = 0, FastBlockLookupSize = 0;
u64* FastBlockLookup;
#endif
static u32 ConditionTable[16];
protected:
@ -158,6 +166,7 @@ class ARMv5 : public ARM
{
public:
ARMv5();
~ARMv5();
void Reset();
@ -260,8 +269,8 @@ public:
u32 DTCMBase, DTCMSize;
s32 RegionCodeCycles;
u8 ITCM[0x8000];
u8 DTCM[0x4000];
u8 ITCM[ITCMPhysicalSize];
u8* DTCM;
u8 ICache[0x2000];
u32 ICacheTags[64*4];

File diff suppressed because it is too large Load Diff

View File

@ -9,32 +9,7 @@
namespace ARMJIT
{
enum ExeMemKind
{
exeMem_Unmapped = 0,
exeMem_ITCM,
exeMem_MainRAM,
exeMem_SWRAM,
exeMem_LCDC,
exeMem_ARM9_BIOS,
exeMem_ARM7_BIOS,
exeMem_ARM7_WRAM,
exeMem_ARM7_WVRAM,
exeMem_Count
};
extern const u32 ExeMemRegionOffsets[];
extern const u32 ExeMemRegionSizes[];
typedef u32 (*JitBlockEntry)();
const u32 ExeMemSpaceSize = 0x518000; // I hate you C++, sometimes I really hate you...
u32 TranslateAddr9(u32 addr);
u32 TranslateAddr7(u32 addr);
template <u32 Num>
JitBlockEntry LookUpBlockEntry(u32 addr);
typedef void (*JitBlockEntry)();
void Init();
void DeInit();
@ -43,44 +18,15 @@ void Reset();
void InvalidateByAddr(u32 pseudoPhysical);
void InvalidateRegionIfNecessary(u32 addr);
inline void InvalidateMainRAMIfNecessary(u32 addr)
{
InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_MainRAM] + (addr & (MAIN_RAM_SIZE - 1)));
}
inline void InvalidateITCMIfNecessary(u32 addr)
{
InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_ITCM] + (addr & 0x7FFF));
}
inline void InvalidateLCDCIfNecessary(u32 addr)
{
if (addr < 0x68A3FFF)
InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_LCDC] + (addr - 0x6800000));
}
inline void InvalidateSWRAM7IfNecessary(u32 addr)
{
InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_SWRAM] + (NDS::SWRAM_ARM7 - NDS::SharedWRAM) + (addr & NDS::SWRAM_ARM7Mask));
}
inline void InvalidateSWRAM9IfNecessary(u32 addr)
{
InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_SWRAM] + (NDS::SWRAM_ARM9 - NDS::SharedWRAM) + (addr & NDS::SWRAM_ARM9Mask));
}
inline void InvalidateARM7WRAMIfNecessary(u32 addr)
{
InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_ARM7_WRAM] + (addr & 0xFFFF));
}
inline void InvalidateARM7WVRAMIfNecessary(u32 addr)
{
InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_ARM7_WVRAM] + (addr & 0x1FFFF));
}
template <u32 num, int region>
void CheckAndInvalidate(u32 addr);
void CompileBlock(ARM* cpu);
void ResetBlockCache();
void UpdateMemoryStatus9(u32 start, u32 end);
void UpdateMemoryStatus7(u32 start, u32 end);
JitBlockEntry LookUpBlock(u32 num, u64* entries, u32 offset, u32 addr);
bool SetupExecutableRegion(u32 num, u32 blockAddr, u64*& entry, u32& start, u32& size);
}

View File

@ -243,7 +243,7 @@ void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2
if (S && !CurInstr.SetFlags)
S = false;
bool CVInGP = false;
bool CVInGPR = false;
switch (op)
{
case 0x2: // SUB
@ -306,7 +306,7 @@ void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2
UBFX(W2, RCPSR, 29, 1);
if (S)
{
CVInGP = true;
CVInGPR = true;
ADDS(W1, rn, W2);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
@ -335,7 +335,7 @@ void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2
ORN(W1, WZR, op2.Reg.Rm, op2.ToArithOption());
if (S)
{
CVInGP = true;
CVInGPR = true;
ADDS(W1, W2, W1);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
@ -355,7 +355,7 @@ void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2
MVN(W1, rn);
if (S)
{
CVInGP = true;
CVInGPR = true;
ADDS(W1, W2, W1);
CSET(W2, CC_CS);
CSET(W3, CC_VS);
@ -379,12 +379,12 @@ void Compiler::Comp_Arithmetic(int op, bool S, ARM64Reg rd, ARM64Reg rn, Op2 op2
if (S)
{
if (CVInGP)
if (CVInGPR)
{
BFI(RCPSR, W2, 29, 1);
BFI(RCPSR, W3, 28, 1);
}
Comp_RetriveFlags(!CVInGP);
Comp_RetriveFlags(!CVInGPR);
}
}
@ -501,7 +501,23 @@ void Compiler::A_Comp_ALUMovOp()
MOVI2R(rd, op2.Imm);
}
else
MOV(rd, op2.Reg.Rm, op2.ToArithOption());
{
// ORR with shifted operand has cycles latency
if (op2.Reg.ShiftAmount > 0)
{
switch (op2.Reg.ShiftType)
{
case ST_LSL: LSL(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
case ST_LSR: LSR(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
case ST_ASR: ASR(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
case ST_ROR: ROR_(rd, op2.Reg.Rm, op2.Reg.ShiftAmount); break;
}
}
else
{
MOV(rd, op2.Reg.Rm, op2.ToArithOption());
}
}
}
if (S)
@ -558,10 +574,7 @@ void Compiler::Comp_Mul_Mla(bool S, bool mla, ARM64Reg rd, ARM64Reg rm, ARM64Reg
}
else
{
CLZ(W0, rs);
CLS(W1, rs);
CMP(W0, W1);
CSEL(W0, W0, W1, CC_GT);
CLS(W0, rs);
Comp_AddCycles_CI(mla ? 1 : 0, W0, ArithOption(W0, ST_LSR, 3));
}
@ -594,10 +607,10 @@ void Compiler::A_Comp_Mul_Long()
}
else
{
CLZ(W0, rs);
CLS(W1, rs);
CMP(W0, W1);
CSEL(W0, W0, W1, CC_GT);
if (sign)
CLS(W0, rs);
else
CLZ(W0, rs);
Comp_AddCycles_CI(0, W0, ArithOption(W0, ST_LSR, 3));
}
@ -628,6 +641,86 @@ void Compiler::A_Comp_Mul_Long()
Comp_RetriveFlags(false);
}
void Compiler::A_Comp_Mul_Short()
{
ARM64Reg rd = MapReg(CurInstr.A_Reg(16));
ARM64Reg rm = MapReg(CurInstr.A_Reg(0));
ARM64Reg rs = MapReg(CurInstr.A_Reg(8));
u32 op = (CurInstr.Instr >> 21) & 0xF;
bool x = CurInstr.Instr & (1 << 5);
bool y = CurInstr.Instr & (1 << 6);
SBFX(W1, rs, y ? 16 : 0, 16);
if (op == 0b1000)
{
// SMLAxy
SBFX(W0, rm, x ? 16 : 0, 16);
MUL(W0, W0, W1);
ORRI2R(W1, RCPSR, 0x08000000);
ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
ADDS(rd, W0, rn);
CSEL(RCPSR, W1, RCPSR, CC_VS);
CPSRDirty = true;
Comp_AddCycles_C();
}
else if (op == 0b1011)
{
// SMULxy
SBFX(W0, rm, x ? 16 : 0, 16);
MUL(rd, W0, W1);
Comp_AddCycles_C();
}
else if (op == 0b1010)
{
// SMLALxy
ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
MOV(W2, rn);
BFI(X2, rd, 32, 32);
SBFX(W0, rm, x ? 16 : 0, 16);
SMADDL(EncodeRegTo64(rn), W0, W1, X2);
UBFX(EncodeRegTo64(rd), EncodeRegTo64(rn), 32, 32);
Comp_AddCycles_CI(1);
}
else if (op == 0b1001)
{
// SMLAWy/SMULWy
SMULL(X0, rm, W1);
ASR(x ? EncodeRegTo64(rd) : X0, X0, 16);
if (!x)
{
ORRI2R(W1, RCPSR, 0x08000000);
ARM64Reg rn = MapReg(CurInstr.A_Reg(12));
ADDS(rd, W0, rn);
CSEL(RCPSR, W1, RCPSR, CC_VS);
CPSRDirty = true;
}
Comp_AddCycles_C();
}
}
void Compiler::A_Comp_Mul()
{
ARM64Reg rd = MapReg(CurInstr.A_Reg(16));

View File

@ -143,7 +143,7 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles)
if ((Thumb || CurInstr.Cond() >= 0xE) && !forceNonConstantCycles)
ConstantCycles += cycles;
else
ADD(RCycles, RCycles, cycles);
SUB(RCycles, RCycles, cycles);
}
@ -152,23 +152,19 @@ void* Compiler::Gen_JumpTo9(int kind)
AlignCode16();
void* res = GetRXPtr();
MOVI2R(W2, kCodeCacheTiming);
// W1 - code cycles non branch
// W2 - branch code cycles
LSR(W1, W0, 12);
LSL(W1, W1, 2);
ADDI2R(W1, W1, offsetof(ARMv5, MemTimings), W2);
LDRB(W1, RCPU, W1);
LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, ITCMSize));
LDR(INDEX_UNSIGNED, W2, RCPU, offsetof(ARMv5, ITCMSize));
STR(INDEX_UNSIGNED, W1, RCPU, offsetof(ARMv5, RegionCodeCycles));
CMP(W0, W3);
FixupBranch outsideITCM = B(CC_LO);
MOVI2R(W1, 1);
MOVI2R(W2, 1);
SetJumpTarget(outsideITCM);
CMP(W1, 0xFF);
MOVI2R(W3, kCodeCacheTiming);
CSEL(W1, W3, W1, CC_EQ);
CMP(W0, W2);
CSINC(W1, W1, WZR, CC_HS);
FixupBranch switchToThumb;
if (kind == 0)
@ -176,40 +172,36 @@ void* Compiler::Gen_JumpTo9(int kind)
if (kind == 0 || kind == 1)
{
ANDI2R(W0, W0, ~3);
// ARM
if (kind == 0)
ANDI2R(RCPSR, RCPSR, ~0x20);
ADD(W3, W0, 4);
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
ADD(W1, W1, W2);
ADD(RCycles, RCycles, W1);
ANDI2R(W0, W0, ~3);
ADD(W0, W0, 4);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15]));
ADD(W1, W1, W1);
SUB(RCycles, RCycles, W1);
RET();
}
if (kind == 0 || kind == 2)
{
// Thumb
if (kind == 0)
{
SetJumpTarget(switchToThumb);
ORRI2R(RCPSR, RCPSR, 0x20);
}
ANDI2R(W0, W0, ~1);
ADD(W0, W0, 2);
STR(INDEX_UNSIGNED, W0, RCPU, offsetof(ARMv5, R[15]));
ADD(W3, W0, 2);
STR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARM, R[15]));
FixupBranch halfwordLoc = TBZ(W0, 1);
ADD(W1, W1, W2);
ADD(RCycles, RCycles, W1);
RET();
SetJumpTarget(halfwordLoc);
ADD(RCycles, RCycles, W2);
ADD(W2, W1, W1);
TSTI2R(W0, 0x2);
CSEL(W1, W1, W2, CC_EQ);
SUB(RCycles, RCycles, W1);
RET();
}
@ -237,7 +229,7 @@ void* Compiler::Gen_JumpTo7(int kind)
UBFX(W2, W3, 0, 8);
UBFX(W3, W3, 8, 8);
ADD(W2, W3, W2);
ADD(RCycles, RCycles, W2);
SUB(RCycles, RCycles, W2);
ANDI2R(W0, W0, ~3);
@ -261,7 +253,7 @@ void* Compiler::Gen_JumpTo7(int kind)
UBFX(W2, W3, 16, 8);
UBFX(W3, W3, 24, 8);
ADD(W2, W3, W2);
ADD(RCycles, RCycles, W2);
SUB(RCycles, RCycles, W2);
ANDI2R(W0, W0, ~1);
@ -287,22 +279,11 @@ void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool resto
}
else
{
BitSet16 hiRegsLoaded(RegCache.DirtyRegs & 0xFF00);
bool previouslyDirty = CPSRDirty;
bool cpsrDirty = CPSRDirty;
SaveCPSR();
if (restoreCPSR)
{
if (Thumb || CurInstr.Cond() >= 0xE)
RegCache.Flush();
else
{
// the ugly way...
// we only save them, to load and save them again
for (int reg : hiRegsLoaded)
SaveReg(reg, RegCache.Mapping[reg]);
}
}
SaveCycles();
PushRegs(restoreCPSR);
if (switchThumb)
MOV(W1, addr);
@ -319,16 +300,12 @@ void Compiler::Comp_JumpTo(Arm64Gen::ARM64Reg addr, bool switchThumb, bool resto
QuickCallFunction(X3, jumpToTrampoline<ARMv5>);
else
QuickCallFunction(X3, jumpToTrampoline<ARMv4>);
if (!Thumb && restoreCPSR && CurInstr.Cond() < 0xE)
{
for (int reg : hiRegsLoaded)
LoadReg(reg, RegCache.Mapping[reg]);
}
if (previouslyDirty)
LoadCPSR();
CPSRDirty = previouslyDirty;
PopRegs(restoreCPSR);
LoadCycles();
LoadCPSR();
if (CurInstr.Cond() < 0xE)
CPSRDirty = cpsrDirty;
}
}
@ -368,21 +345,13 @@ void Compiler::T_Comp_BCOND()
s32 offset = (s32)(CurInstr.Instr << 24) >> 23;
Comp_JumpTo(R15 + offset + 1, true);
Comp_BranchSpecialBehaviour();
Comp_BranchSpecialBehaviour(true);
FixupBranch skipFailed = B();
SetJumpTarget(skipExecute);
Comp_AddCycles_C(true);
if (CurInstr.BranchFlags & branch_FollowCondTaken)
{
SaveCPSR(false);
RegCache.PrepareExit();
ADD(W0, RCycles, ConstantCycles);
ABI_PopRegisters(SavedRegs);
RET();
}
Comp_BranchSpecialBehaviour(false);
SetJumpTarget(skipFailed);
}

View File

@ -1,9 +1,3 @@
#include "ARMJIT_Compiler.h"
#include "../ARMInterpreter.h"
#include "../ARMJIT_Internal.h"
#ifdef __SWITCH__
#include "../switch/compat_switch.h"
@ -13,10 +7,17 @@ extern char __start__;
#include <unistd.h>
#endif
#include "ARMJIT_Compiler.h"
#include "../ARMJIT_Internal.h"
#include "../ARMInterpreter.h"
#include "../Config.h"
#include <malloc.h>
using namespace Arm64Gen;
extern "C" void ARM_Ret();
namespace ARMJIT
{
@ -28,7 +29,10 @@ namespace ARMJIT
like x64. At one hand you can translate a lot of instructions directly.
But at the same time, there are a ton of exceptions, like for
example ADD and SUB can't have a RORed second operand on ARMv8.
*/
While writing a JIT when an instruction is recompiled into multiple ones
not to write back until you've read all the other operands!
*/
template <>
const ARM64Reg RegisterCache<Compiler, ARM64Reg>::NativeRegAllocOrder[] =
@ -46,6 +50,132 @@ void Compiler::MovePC()
ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4);
}
void Compiler::A_Comp_MRS()
{
Comp_AddCycles_C();
ARM64Reg rd = MapReg(CurInstr.A_Reg(12));
if (CurInstr.Instr & (1 << 22))
{
ANDI2R(W5, RCPSR, 0x1F);
MOVI2R(W3, 0);
MOVI2R(W1, 15 - 8);
BL(ReadBanked);
MOV(rd, W3);
}
else
MOV(rd, RCPSR);
}
void Compiler::A_Comp_MSR()
{
Comp_AddCycles_C();
ARM64Reg val;
if (CurInstr.Instr & (1 << 25))
{
val = W0;
MOVI2R(val, ROR((CurInstr.Instr & 0xFF), ((CurInstr.Instr >> 7) & 0x1E)));
}
else
{
val = MapReg(CurInstr.A_Reg(0));
}
u32 mask = 0;
if (CurInstr.Instr & (1<<16)) mask |= 0x000000FF;
if (CurInstr.Instr & (1<<17)) mask |= 0x0000FF00;
if (CurInstr.Instr & (1<<18)) mask |= 0x00FF0000;
if (CurInstr.Instr & (1<<19)) mask |= 0xFF000000;
if (CurInstr.Instr & (1 << 22))
{
ANDI2R(W5, RCPSR, 0x1F);
MOVI2R(W3, 0);
MOVI2R(W1, 15 - 8);
BL(ReadBanked);
MOVI2R(W1, mask);
MOVI2R(W2, mask & 0xFFFFFF00);
ANDI2R(W5, RCPSR, 0x1F);
CMP(W5, 0x10);
CSEL(W1, W2, W1, CC_EQ);
BIC(W3, W3, W1);
AND(W0, val, W1);
ORR(W3, W3, W0);
MOVI2R(W1, 15 - 8);
BL(WriteBanked);
}
else
{
mask &= 0xFFFFFFDF;
CPSRDirty = true;
if ((mask & 0xFF) == 0)
{
ANDI2R(RCPSR, RCPSR, ~mask);
ANDI2R(W0, val, mask);
ORR(RCPSR, RCPSR, W0);
}
else
{
MOVI2R(W2, mask);
MOVI2R(W3, mask & 0xFFFFFF00);
ANDI2R(W1, RCPSR, 0x1F);
// W1 = first argument
CMP(W1, 0x10);
CSEL(W2, W3, W2, CC_EQ);
BIC(RCPSR, RCPSR, W2);
AND(W0, val, W2);
ORR(RCPSR, RCPSR, W0);
MOV(W2, RCPSR);
MOV(X0, RCPU);
PushRegs(true);
QuickCallFunction(X3, (void*)&ARM::UpdateMode);
PopRegs(true);
}
}
}
void Compiler::PushRegs(bool saveHiRegs)
{
if (saveHiRegs)
{
if (Thumb || CurInstr.Cond() == 0xE)
{
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
for (int reg : hiRegsLoaded)
RegCache.UnloadRegister(reg);
}
else
{
BitSet16 hiRegsDirty(RegCache.LoadedRegs & 0x7F00);
for (int reg : hiRegsDirty)
SaveReg(reg, RegCache.Mapping[reg]);
}
}
}
void Compiler::PopRegs(bool saveHiRegs)
{
if (saveHiRegs)
{
BitSet16 hiRegsLoaded(RegCache.LoadedRegs & 0x7F00);
for (int reg : hiRegsLoaded)
LoadReg(reg, RegCache.Mapping[reg]);
}
}
Compiler::Compiler()
{
#ifdef __SWITCH__
@ -80,8 +210,7 @@ Compiler::Compiler()
assert(succeded);
SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart);
JitMemUseableSize = JitMemSize;
Reset();
JitMemMainSize = JitMemSize;
#else
u64 pageSize = sysconf(_SC_PAGE_SIZE);
u8* pageAligned = (u8*)(((u64)JitMem & ~(pageSize - 1)) + pageSize);
@ -90,31 +219,8 @@ Compiler::Compiler()
SetCodeBase(pageAligned, pageAligned);
JitMemUseableSize = alignedSize;
Reset();
#endif
for (int i = 0; i < 3; i++)
{
for (int j = 0; j < 2; j++)
{
MemFunc9[i][j] = Gen_MemoryRoutine9(8 << i, j);
}
}
MemFunc7[0][0] = (void*)NDS::ARM7Read8;
MemFunc7[1][0] = (void*)NDS::ARM7Read16;
MemFunc7[2][0] = (void*)NDS::ARM7Read32;
MemFunc7[0][1] = (void*)NDS::ARM7Write8;
MemFunc7[1][1] = (void*)NDS::ARM7Write16;
MemFunc7[2][1] = (void*)NDS::ARM7Write32;
for (int i = 0; i < 2; i++)
{
for (int j = 0; j < 2; j++)
{
MemFuncsSeq9[i][j] = Gen_MemoryRoutine9Seq(i, j);
MemFuncsSeq7[i][j] = Gen_MemoryRoutine7Seq(i, j);
}
}
SetCodePtr(0);
for (int i = 0; i < 3; i++)
{
@ -123,26 +229,26 @@ Compiler::Compiler()
}
/*
W0 - mode
W5 - mode
W1 - reg num
W3 - in/out value of reg
*/
{
ReadBanked = GetRXPtr();
ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
CMP(W0, 0x11);
ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
CMP(W5, 0x11);
FixupBranch fiq = B(CC_EQ);
SUBS(W1, W1, 13 - 8);
ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
FixupBranch notEverything = B(CC_LT);
CMP(W0, 0x12);
CMP(W5, 0x12);
FixupBranch irq = B(CC_EQ);
CMP(W0, 0x13);
CMP(W5, 0x13);
FixupBranch svc = B(CC_EQ);
CMP(W0, 0x17);
CMP(W5, 0x17);
FixupBranch abt = B(CC_EQ);
CMP(W0, 0x1B);
CMP(W5, 0x1B);
FixupBranch und = B(CC_EQ);
SetJumpTarget(notEverything);
RET();
@ -166,19 +272,19 @@ Compiler::Compiler()
{
WriteBanked = GetRXPtr();
ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
CMP(W0, 0x11);
ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
CMP(W5, 0x11);
FixupBranch fiq = B(CC_EQ);
SUBS(W1, W1, 13 - 8);
ADD(X2, RCPU, X1, ArithOption(X1, ST_LSL, 2));
ADD(X2, RCPU, X1, ArithOption(X2, ST_LSL, 2));
FixupBranch notEverything = B(CC_LT);
CMP(W0, 0x12);
CMP(W5, 0x12);
FixupBranch irq = B(CC_EQ);
CMP(W0, 0x13);
CMP(W5, 0x13);
FixupBranch svc = B(CC_EQ);
CMP(W0, 0x17);
CMP(W5, 0x17);
FixupBranch abt = B(CC_EQ);
CMP(W0, 0x1B);
CMP(W5, 0x1B);
FixupBranch und = B(CC_EQ);
SetJumpTarget(notEverything);
MOVI2R(W4, 0);
@ -206,9 +312,71 @@ Compiler::Compiler()
RET();
}
//FlushIcache();
for (int num = 0; num < 2; num++)
{
for (int size = 0; size < 3; size++)
{
for (int reg = 0; reg < 8; reg++)
{
ARM64Reg rdMapped = (ARM64Reg)(W19 + reg);
PatchedStoreFuncs[num][size][reg] = GetRXPtr();
if (num == 0)
{
MOV(X1, RCPU);
MOV(W2, rdMapped);
}
else
{
MOV(W1, rdMapped);
}
ABI_PushRegisters({30});
switch ((8 << size) | num)
{
case 32: QuickCallFunction(X3, SlowWrite9<u32>); break;
case 33: QuickCallFunction(X3, SlowWrite7<u32>); break;
case 16: QuickCallFunction(X3, SlowWrite9<u16>); break;
case 17: QuickCallFunction(X3, SlowWrite7<u16>); break;
case 8: QuickCallFunction(X3, SlowWrite9<u8>); break;
case 9: QuickCallFunction(X3, SlowWrite7<u8>); break;
}
ABI_PopRegisters({30});
RET();
for (int signextend = 0; signextend < 2; signextend++)
{
PatchedLoadFuncs[num][size][signextend][reg] = GetRXPtr();
if (num == 0)
MOV(X1, RCPU);
ABI_PushRegisters({30});
switch ((8 << size) | num)
{
case 32: QuickCallFunction(X3, SlowRead9<u32>); break;
case 33: QuickCallFunction(X3, SlowRead7<u32>); break;
case 16: QuickCallFunction(X3, SlowRead9<u16>); break;
case 17: QuickCallFunction(X3, SlowRead7<u16>); break;
case 8: QuickCallFunction(X3, SlowRead9<u8>); break;
case 9: QuickCallFunction(X3, SlowRead7<u8>); break;
}
ABI_PopRegisters({30});
if (size == 32)
MOV(rdMapped, W0);
else if (signextend)
SBFX(rdMapped, W0, 0, 8 << size);
else
UBFX(rdMapped, W0, 0, 8 << size);
RET();
}
}
}
}
FlushIcache();
JitMemSecondarySize = 1024*1024*4;
JitMemMainSize -= GetCodeOffset();
JitMemMainSize -= JitMemSecondarySize;
JitMemUseableSize -= GetCodeOffset();
SetCodeBase((u8*)GetRWPtr(), (u8*)GetRXPtr());
}
@ -227,6 +395,16 @@ Compiler::~Compiler()
#endif
}
void Compiler::LoadCycles()
{
LDR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles));
}
void Compiler::SaveCycles()
{
STR(INDEX_UNSIGNED, RCycles, RCPU, offsetof(ARM, Cycles));
}
void Compiler::LoadReg(int reg, ARM64Reg nativeReg)
{
if (reg == 15)
@ -325,7 +503,7 @@ const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
// CMN
F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp), F(ALUCmpOp),
// Mul
F(Mul), F(Mul), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Long), NULL, NULL, NULL, NULL, NULL,
F(Mul), F(Mul), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Long), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short), F(Mul_Short),
// ARMv5 exclusives
F(Clz), NULL, NULL, NULL, NULL,
@ -356,7 +534,7 @@ const Compiler::CompileFunc A_Comp[ARMInstrInfo::ak_Count] =
// Branch
F(BranchImm), F(BranchImm), F(BranchImm), F(BranchXchangeReg), F(BranchXchangeReg),
// Special
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, F(MSR), F(MSR), F(MRS), NULL, NULL, NULL,
&Compiler::Nop
};
#undef F
@ -404,29 +582,34 @@ bool Compiler::CanCompile(bool thumb, u16 kind)
return (thumb ? T_Comp[kind] : A_Comp[kind]) != NULL;
}
void Compiler::Comp_BranchSpecialBehaviour()
void Compiler::Comp_BranchSpecialBehaviour(bool taken)
{
if (CurInstr.BranchFlags & branch_IdleBranch)
if (taken && CurInstr.BranchFlags & branch_IdleBranch)
{
MOVI2R(W0, 1);
STRB(INDEX_UNSIGNED, W0, RCPU, offsetof(ARM, IdleLoop));
}
if (CurInstr.BranchFlags & branch_FollowCondNotTaken)
if ((CurInstr.BranchFlags & branch_FollowCondNotTaken && taken)
|| (CurInstr.BranchFlags & branch_FollowCondTaken && !taken))
{
SaveCPSR(false);
RegCache.PrepareExit();
ADD(W0, RCycles, ConstantCycles);
ABI_PopRegisters(SavedRegs);
RET();
SUB(RCycles, RCycles, ConstantCycles);
QuickTailCall(X0, ARM_Ret);
}
}
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
{
if (JitMemUseableSize - GetCodeOffset() < 1024 * 16)
if (JitMemMainSize - GetCodeOffset() < 1024 * 16)
{
printf("JIT memory full, resetting...\n");
printf("JIT near memory full, resetting...\n");
ResetBlockCache();
}
if ((JitMemMainSize + JitMemSecondarySize) - OtherCodeRegion < 1024 * 8)
{
printf("JIT far memory full, resetting...\n");
ResetBlockCache();
}
@ -437,21 +620,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
CurCPU = cpu;
ConstantCycles = 0;
RegCache = RegisterCache<Compiler, ARM64Reg>(this, instrs, instrsCount, true);
//printf("compiling block at %x\n", R15 - (Thumb ? 2 : 4));
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
SavedRegs = BitSet32((RegCache.GetPushRegs() | BitSet32(0x78000000)) & BitSet32(ALL_CALLEE_SAVED));
//if (Num == 1)
{
ABI_PushRegisters(SavedRegs);
MOVP2R(RCPU, CurCPU);
MOVI2R(RCycles, 0);
LoadCPSR();
}
CPSRDirty = false;
for (int i = 0; i < instrsCount; i++)
{
@ -486,6 +655,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
if (comp == NULL)
{
SaveCycles();
SaveCPSR();
RegCache.Flush();
}
@ -535,25 +705,18 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
(this->*comp)();
}
Comp_BranchSpecialBehaviour();
Comp_BranchSpecialBehaviour(true);
if (cond < 0xE)
{
if (IrregularCycles)
if (IrregularCycles || (CurInstr.BranchFlags & branch_FollowCondTaken))
{
FixupBranch skipNop = B();
SetJumpTarget(skipExecute);
Comp_AddCycles_C();
if (CurInstr.BranchFlags & branch_FollowCondTaken)
{
SaveCPSR(false);
RegCache.PrepareExit();
ADD(W0, RCycles, ConstantCycles);
ABI_PopRegisters(SavedRegs);
RET();
}
Comp_BranchSpecialBehaviour(false);
SetJumpTarget(skipNop);
}
@ -565,76 +728,74 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
}
if (comp == NULL)
{
LoadCycles();
LoadCPSR();
}
}
RegCache.Flush();
//if (Num == 1)
{
SaveCPSR();
ADD(W0, RCycles, ConstantCycles);
ABI_PopRegisters(SavedRegs);
}
//else
// ADD(RCycles, RCycles, ConstantCycles);
RET();
SUB(RCycles, RCycles, ConstantCycles);
QuickTailCall(X0, ARM_Ret);
FlushIcache();
//printf("finished\n");
return res;
}
void Compiler::Reset()
{
LoadStorePatches.clear();
SetCodePtr(0);
OtherCodeRegion = JitMemMainSize;
const u32 brk_0 = 0xD4200000;
for (int i = 0; i < JitMemUseableSize / 4; i++)
for (int i = 0; i < (JitMemMainSize + JitMemSecondarySize) / 4; i++)
*(((u32*)GetRWPtr()) + i) = brk_0;
}
void Compiler::Comp_AddCycles_C(bool nonConst)
void Compiler::Comp_AddCycles_C(bool forceNonConstant)
{
s32 cycles = Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 1 : 3]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
if (!nonConst && !CurInstr.Info.Branches())
if (forceNonConstant)
ConstantCycles += cycles;
else
ADD(RCycles, RCycles, cycles);
SUB(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CI(u32 numI)
{
IrregularCycles = true;
s32 cycles = (Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + numI;
if (Thumb || CurInstr.Cond() >= 0xE)
if (Thumb || CurInstr.Cond() == 0xE)
ConstantCycles += cycles;
else
ADD(RCycles, RCycles, cycles);
SUB(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CI(u32 c, ARM64Reg numI, ArithOption shift)
{
IrregularCycles = true;
s32 cycles = (Num ?
NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + c;
ADD(RCycles, RCycles, numI, shift);
SUB(RCycles, RCycles, cycles);
if (Thumb || CurInstr.Cond() >= 0xE)
ConstantCycles += c;
ConstantCycles += cycles;
else
ADD(RCycles, RCycles, cycles);
SUB(RCycles, RCycles, cycles);
}
void Compiler::Comp_AddCycles_CDI()
@ -671,7 +832,7 @@ void Compiler::Comp_AddCycles_CDI()
}
if (!Thumb && CurInstr.Cond() < 0xE)
ADD(RCycles, RCycles, cycles);
SUB(RCycles, RCycles, cycles);
else
ConstantCycles += cycles;
}
@ -715,7 +876,7 @@ void Compiler::Comp_AddCycles_CD()
}
if ((!Thumb && CurInstr.Cond() < 0xE) && IrregularCycles)
ADD(RCycles, RCycles, cycles);
SUB(RCycles, RCycles, cycles);
else
ConstantCycles += cycles;
}

View File

@ -9,6 +9,8 @@
#include "../ARMJIT_Internal.h"
#include "../ARMJIT_RegisterCache.h"
#include <unordered_map>
namespace ARMJIT
{
@ -64,7 +66,14 @@ struct Op2
};
};
class Compiler : Arm64Gen::ARM64XEmitter
struct LoadStorePatch
{
void* PatchFunc;
s32 PatchOffset;
u32 PatchSize;
};
class Compiler : public Arm64Gen::ARM64XEmitter
{
public:
typedef void (Compiler::*CompileFunc)();
@ -72,6 +81,9 @@ public:
Compiler();
~Compiler();
void PushRegs(bool saveHiRegs);
void PopRegs(bool saveHiRegs);
Arm64Gen::ARM64Reg MapReg(int reg)
{
assert(RegCache.Mapping[reg] != Arm64Gen::INVALID_REG);
@ -89,7 +101,7 @@ public:
void Reset();
void Comp_AddCycles_C(bool forceNonConst = false);
void Comp_AddCycles_C(bool forceNonConstant = false);
void Comp_AddCycles_CI(u32 numI);
void Comp_AddCycles_CI(u32 c, Arm64Gen::ARM64Reg numI, Arm64Gen::ArithOption shift);
void Comp_AddCycles_CD();
@ -103,6 +115,9 @@ public:
void LoadCPSR();
void SaveCPSR(bool markClean = true);
void LoadCycles();
void SaveCycles();
void Nop() {}
void A_Comp_ALUTriOp();
@ -111,6 +126,7 @@ public:
void A_Comp_Mul();
void A_Comp_Mul_Long();
void A_Comp_Mul_Short();
void A_Comp_Clz();
@ -122,6 +138,8 @@ public:
void A_Comp_BranchImm();
void A_Comp_BranchXchangeReg();
void A_Comp_MRS();
void A_Comp_MSR();
void T_Comp_ShiftImm();
void T_Comp_AddSub_();
@ -168,7 +186,7 @@ public:
void Comp_RegShiftImm(int op, int amount, bool S, Op2& op2, Arm64Gen::ARM64Reg tmp = Arm64Gen::W0);
void Comp_RegShiftReg(int op, bool S, Op2& op2, Arm64Gen::ARM64Reg rs);
void Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
bool Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr);
enum
{
memop_Writeback = 1 << 0,
@ -179,16 +197,33 @@ public:
};
void Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags);
void* Gen_MemoryRoutine9(int size, bool store);
void* Gen_MemoryRoutine9Seq(bool store, bool preinc);
void* Gen_MemoryRoutine7Seq(bool store, bool preinc);
// 0 = switch mode, 1 = stay arm, 2 = stay thumb
void* Gen_JumpTo9(int kind);
void* Gen_JumpTo7(int kind);
void Comp_BranchSpecialBehaviour();
void Comp_BranchSpecialBehaviour(bool taken);
JitBlockEntry AddEntryOffset(u32 offset)
{
return (JitBlockEntry)(GetRXBase() + offset);
}
u32 SubEntryOffset(JitBlockEntry entry)
{
return (u8*)entry - GetRXBase();
}
bool IsJITFault(u64 pc);
s64 RewriteMemAccess(u64 pc);
void SwapCodeRegion()
{
ptrdiff_t offset = GetCodeOffset();
SetCodePtrUnsafe(OtherCodeRegion);
OtherCodeRegion = offset;
}
ptrdiff_t OtherCodeRegion;
bool Exit;
@ -202,22 +237,20 @@ public:
BitSet32 SavedRegs;
u32 JitMemUseableSize;
u32 JitMemSecondarySize;
u32 JitMemMainSize;
void* ReadBanked, *WriteBanked;
// [size][store]
void* MemFunc9[3][2];
void* MemFunc7[3][2];
// [store][pre increment]
void* MemFuncsSeq9[2][2];
// "[code in main ram]
void* MemFuncsSeq7[2][2];
void* JumpToFuncs9[3];
void* JumpToFuncs7[3];
std::unordered_map<ptrdiff_t, LoadStorePatch> LoadStorePatches;
// [Num][Size][Sign Extend][Output register]
void* PatchedLoadFuncs[2][3][2][8];
void* PatchedStoreFuncs[2][3][8];
RegisterCache<Compiler, Arm64Gen::ARM64Reg> RegCache;
bool CPSRDirty = false;

View File

@ -0,0 +1,68 @@
#include "../ARMJIT_x64/ARMJIT_Offsets.h"
.text
#define RCPSR W27
#define RCycles W28
#define RCPU X29
.p2align 4,,15
.global ARM_Dispatch
ARM_Dispatch:
stp x19, x20, [sp, #-96]!
stp x21, x22, [sp, #16]
stp x23, x24, [sp, #32]
stp x25, x26, [sp, #48]
stp x27, x28, [sp, #64]
stp x29, x30, [sp, #80]
mov RCPU, x0
ldr RCycles, [RCPU, ARM_Cycles_offset]
ldr RCPSR, [RCPU, ARM_CPSR_offset]
br x1
.p2align 4,,15
.global ARM_Ret
ARM_Ret:
str RCycles, [RCPU, ARM_Cycles_offset]
str RCPSR, [RCPU, ARM_CPSR_offset]
ldp x29, x30, [sp, #80]
ldp x27, x28, [sp, #64]
ldp x25, x26, [sp, #48]
ldp x23, x24, [sp, #32]
ldp x21, x22, [sp, #16]
ldp x19, x20, [sp], #96
ret
.p2align 4,,15
.global ARM_RestoreContext
ARM_RestoreContext:
mov sp, x0
ldp x0, x1, [sp]
ldp x2, x3, [sp, #16]
ldp x4, x5, [sp, #32]
ldp x6, x7, [sp, #48]
ldp x8, x9, [sp, #64]
ldp x10, x11, [sp, #80]
ldp x12, x13, [sp, #96]
ldp x14, x15, [sp, #112]
ldp x16, x17, [sp, #128]
ldp x18, x19, [sp, #144]
ldp x20, x21, [sp, #160]
ldp x22, x23, [sp, #176]
ldp x24, x25, [sp, #192]
ldp x26, x27, [sp, #208]
ldp x28, x29, [sp, #224]
ldr x30, [sp, #240]
ldp x17, x18, [sp, #248]
mov sp, x17
br x18

View File

@ -2,286 +2,62 @@
#include "../Config.h"
#include "../ARMJIT_Memory.h"
using namespace Arm64Gen;
namespace ARMJIT
{
// W0 - address
// (if store) W1 - value to store
// W2 - code cycles
void* Compiler::Gen_MemoryRoutine9(int size, bool store)
bool Compiler::IsJITFault(u64 pc)
{
AlignCode16();
void* res = GetRXPtr();
u32 addressMask;
switch (size)
{
case 32: addressMask = ~3; break;
case 16: addressMask = ~1; break;
case 8: addressMask = ~0; break;
}
LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, DTCMBase));
LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMSize));
SUB(W3, W0, W3);
CMP(W3, W4);
FixupBranch insideDTCM = B(CC_LO);
UBFX(W4, W0, 24, 8);
CMP(W4, 0x02);
FixupBranch outsideMainRAM = B(CC_NEQ);
ANDI2R(W3, W0, addressMask & (MAIN_RAM_SIZE - 1));
MOVP2R(X4, NDS::MainRAM);
if (!store && size == 32)
{
LDR(W3, X3, X4);
ANDI2R(W0, W0, 3);
LSL(W0, W0, 3);
RORV(W0, W3, W0);
}
else if (store)
STRGeneric(size, W1, X3, X4);
else
LDRGeneric(size, false, W0, X3, X4);
RET();
SetJumpTarget(outsideMainRAM);
LDR(INDEX_UNSIGNED, W3, RCPU, offsetof(ARMv5, ITCMSize));
CMP(W0, W3);
FixupBranch insideITCM = B(CC_LO);
if (store)
{
if (size > 8)
ANDI2R(W0, W0, addressMask);
switch (size)
{
case 32: QuickTailCall(X4, NDS::ARM9Write32); break;
case 16: QuickTailCall(X4, NDS::ARM9Write16); break;
case 8: QuickTailCall(X4, NDS::ARM9Write8); break;
}
}
else
{
if (size == 32)
ABI_PushRegisters({0, 30});
if (size > 8)
ANDI2R(W0, W0, addressMask);
switch (size)
{
case 32: QuickCallFunction(X4, NDS::ARM9Read32); break;
case 16: QuickTailCall (X4, NDS::ARM9Read16); break;
case 8: QuickTailCall (X4, NDS::ARM9Read8 ); break;
}
if (size == 32)
{
ABI_PopRegisters({1, 30});
ANDI2R(W1, W1, 3);
LSL(W1, W1, 3);
RORV(W0, W0, W1);
RET();
}
}
SetJumpTarget(insideDTCM);
ANDI2R(W3, W3, 0x3FFF & addressMask);
ADDI2R(W3, W3, offsetof(ARMv5, DTCM), W4);
if (!store && size == 32)
{
ANDI2R(W4, W0, 3);
LDR(W0, RCPU, W3);
LSL(W4, W4, 3);
RORV(W0, W0, W4);
}
else if (store)
STRGeneric(size, W1, RCPU, W3);
else
LDRGeneric(size, false, W0, RCPU, W3);
RET();
SetJumpTarget(insideITCM);
ANDI2R(W3, W0, 0x7FFF & addressMask);
if (store)
{
ADDI2R(W0, W3, ExeMemRegionOffsets[exeMem_ITCM], W4);
LSR(W5, W0, 9);
MOVP2R(X4, CodeRanges);
ADD(X4, X4, X5, ArithOption(X5, ST_LSL, 4));
static_assert(sizeof(AddressRange) == 16);
LDRH(INDEX_UNSIGNED, W4, X4, offsetof(AddressRange, Blocks.Length));
FixupBranch null = CBZ(W4);
ABI_PushRegisters({1, 3, 30});
QuickCallFunction(X4, InvalidateByAddr);
ABI_PopRegisters({1, 3, 30});
SetJumpTarget(null);
}
ADDI2R(W3, W3, offsetof(ARMv5, ITCM), W4);
if (!store && size == 32)
{
ANDI2R(W4, W0, 3);
LDR(W0, RCPU, W3);
LSL(W4, W4, 3);
RORV(W0, W0, W4);
}
else if (store)
STRGeneric(size, W1, RCPU, W3);
else
LDRGeneric(size, false, W0, RCPU, W3);
RET();
return res;
return pc >= (u64)GetRXBase() && pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize);
}
/*
W0 - base address
X1 - stack space
W2 - values count
*/
void* Compiler::Gen_MemoryRoutine9Seq(bool store, bool preinc)
s64 Compiler::RewriteMemAccess(u64 pc)
{
AlignCode16();
void* res = GetRXPtr();
void* loopStart = GetRXPtr();
SUB(W2, W2, 1);
ptrdiff_t pcOffset = pc - (u64)GetRXBase();
if (preinc)
ADD(W0, W0, 4);
auto it = LoadStorePatches.find(pcOffset);
LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, DTCMBase));
LDR(INDEX_UNSIGNED, W5, RCPU, offsetof(ARMv5, DTCMSize));
SUB(W4, W0, W4);
CMP(W4, W5);
FixupBranch insideDTCM = B(CC_LO);
LDR(INDEX_UNSIGNED, W4, RCPU, offsetof(ARMv5, ITCMSize));
CMP(W0, W4);
FixupBranch insideITCM = B(CC_LO);
ABI_PushRegisters({0, 1, 2, 30}); // TODO: move SP only once
if (store)
if (it != LoadStorePatches.end())
{
LDR(X1, X1, ArithOption(X2, true));
QuickCallFunction(X4, NDS::ARM9Write32);
LoadStorePatch patch = it->second;
ABI_PopRegisters({0, 1, 2, 30});
ptrdiff_t curCodeOffset = GetCodeOffset();
SetCodePtrUnsafe(pcOffset + patch.PatchOffset);
BL(patch.PatchFunc);
for (int i = 0; i < patch.PatchSize / 4 - 1; i++)
HINT(HINT_NOP);
FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr());
SetCodePtrUnsafe(curCodeOffset);
LoadStorePatches.erase(it);
return patch.PatchOffset;
}
else
{
QuickCallFunction(X4, NDS::ARM9Read32);
MOV(W4, W0);
ABI_PopRegisters({0, 1, 2, 30});
STR(X4, X1, ArithOption(X2, true));
}
if (!preinc)
ADD(W0, W0, 4);
CBNZ(W2, loopStart);
RET();
SetJumpTarget(insideDTCM);
ANDI2R(W4, W4, ~3 & 0x3FFF);
ADDI2R(X4, X4, offsetof(ARMv5, DTCM));
if (store)
{
LDR(X5, X1, ArithOption(X2, true));
STR(W5, RCPU, X4);
}
else
{
LDR(W5, RCPU, X4);
STR(X5, X1, ArithOption(X2, true));
}
if (!preinc)
ADD(W0, W0, 4);
CBNZ(W2, loopStart);
RET();
SetJumpTarget(insideITCM);
ANDI2R(W4, W0, ~3 & 0x7FFF);
ADDI2R(W6, W4, offsetof(ARMv5, ITCM), W5);
if (store)
{
LDR(X5, X1, ArithOption(X2, true));
STR(W5, RCPU, X6);
}
else
{
LDR(W5, RCPU, X6);
STR(X5, X1, ArithOption(X2, true));
}
if (store)
{
ADDI2R(W4, W4, ExeMemRegionOffsets[exeMem_ITCM], W5);
LSR(W6, W4, 9);
MOVP2R(X5, CodeRanges);
ADD(X5, X5, X6, ArithOption(X6, ST_LSL, 4));
static_assert(sizeof(AddressRange) == 16);
LDRH(INDEX_UNSIGNED, W5, X5, offsetof(AddressRange, Blocks.Length));
FixupBranch null = CBZ(W5);
ABI_PushRegisters({0, 1, 2, 4, 30});
MOV(W0, W4);
QuickCallFunction(X5, InvalidateByAddr);
ABI_PopRegisters({0, 1, 2, 4, 30});
SetJumpTarget(null);
}
if (!preinc)
ADD(W0, W0, 4);
CBNZ(W2, loopStart);
RET();
return res;
printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc));
assert(false);
}
void* Compiler::Gen_MemoryRoutine7Seq(bool store, bool preinc)
bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
{
AlignCode16();
void* res = GetRXPtr();
u32 localAddr = LocaliseCodeAddress(Num, addr);
void* loopStart = GetRXPtr();
SUB(W2, W2, 1);
if (preinc)
ADD(W0, W0, 4);
ABI_PushRegisters({0, 1, 2, 30});
if (store)
int invalidLiteralIdx = InvalidLiterals.Find(localAddr);
if (invalidLiteralIdx != -1)
{
LDR(X1, X1, ArithOption(X2, true));
QuickCallFunction(X4, NDS::ARM7Write32);
ABI_PopRegisters({0, 1, 2, 30});
}
else
{
QuickCallFunction(X4, NDS::ARM7Read32);
MOV(W4, W0);
ABI_PopRegisters({0, 1, 2, 30});
STR(X4, X1, ArithOption(X2, true));
InvalidLiterals.Remove(invalidLiteralIdx);
return false;
}
if (!preinc)
ADD(W0, W0, 4);
CBNZ(W2, loopStart);
RET();
Comp_AddCycles_CDI();
return res;
}
void Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
{
u32 val;
// make sure arm7 bios is accessible
u32 tmpR15 = CurCPU->R[15];
@ -309,6 +85,8 @@ void Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr)
if (Thumb || CurInstr.Cond() == 0xE)
RegCache.PutLiteral(rd, val);
return true;
}
void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
@ -318,162 +96,208 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
addressMask = ~3;
if (size == 16)
addressMask = ~1;
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
{
u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
if (Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr))
return;
}
if (flags & memop_Store)
Comp_AddCycles_CD();
else
Comp_AddCycles_CDI();
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && offset.IsImm && !(flags & (memop_Post|memop_Store|memop_Writeback)))
{
u32 addr = R15 + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
ARM64Reg rdMapped = MapReg(rd);
ARM64Reg rnMapped = MapReg(rn);
if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16))))
{
Comp_MemLoadLiteral(size, flags & memop_SignExtend, rd, addr);
return;
}
if (Thumb && rn == 15)
{
ANDI2R(W3, rnMapped, ~2);
rnMapped = W3;
}
ARM64Reg finalAddr = W0;
if (flags & memop_Post)
{
ARM64Reg rdMapped = MapReg(rd);
ARM64Reg rnMapped = MapReg(rn);
finalAddr = rnMapped;
MOV(W0, rnMapped);
}
bool inlinePreparation = Num == 1;
u32 constLocalROR32 = 4;
bool addrIsStatic = Config::JIT_LiteralOptimisations
&& RegCache.IsLiteral(rn) && offset.IsImm && !(flags & (memop_Writeback|memop_Post));
u32 staticAddress;
if (addrIsStatic)
staticAddress = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
void* memFunc = Num == 0
? MemFunc9[size >> 4][!!(flags & memop_Store)]
: MemFunc7[size >> 4][!!((flags & memop_Store))];
if (Config::JIT_LiteralOptimisations && (rd != 15 || (flags & memop_Store)) && offset.IsImm && RegCache.IsLiteral(rn))
{
u32 addr = RegCache.LiteralValues[rn] + offset.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
NDS::MemRegion region;
region.Mem = NULL;
if (Num == 0)
{
ARMv5* cpu5 = (ARMv5*)CurCPU;
// stupid dtcm...
if (addr >= cpu5->DTCMBase && addr < (cpu5->DTCMBase + cpu5->DTCMSize))
{
region.Mem = cpu5->DTCM;
region.Mask = 0x3FFF;
}
else
{
NDS::ARM9GetMemRegion(addr, flags & memop_Store, &region);
}
}
else
NDS::ARM7GetMemRegion(addr, flags & memop_Store, &region);
if (region.Mem != NULL)
{
void* ptr = &region.Mem[addr & addressMask & region.Mask];
MOVP2R(X0, ptr);
if (flags & memop_Store)
STRGeneric(size, INDEX_UNSIGNED, rdMapped, X0, 0);
else
{
LDRGeneric(size, flags & memop_SignExtend, INDEX_UNSIGNED, rdMapped, X0, 0);
if (size == 32 && addr & ~0x3)
ROR_(rdMapped, rdMapped, (addr & 0x3) << 3);
}
return;
}
void* specialFunc = GetFuncForAddr(CurCPU, addr, flags & memop_Store, size);
if (specialFunc)
{
memFunc = specialFunc;
inlinePreparation = true;
constLocalROR32 = addr & 0x3;
}
}
ARM64Reg finalAddr = W0;
if (flags & memop_Post)
{
finalAddr = rnMapped;
MOV(W0, rnMapped);
}
if (flags & memop_Store)
MOV(W1, rdMapped);
if (!offset.IsImm)
Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2);
// offset might become an immediate
if (offset.IsImm)
if (!offset.IsImm)
Comp_RegShiftImm(offset.Reg.ShiftType, offset.Reg.ShiftAmount, false, offset, W2);
// offset might has become an immediate
if (offset.IsImm)
{
if (offset.Imm)
{
if (flags & memop_SubtractOffset)
SUB(finalAddr, rnMapped, offset.Imm);
else
ADD(finalAddr, rnMapped, offset.Imm);
}
else if (finalAddr != rnMapped)
MOV(finalAddr, rnMapped);
}
else
{
if (offset.Reg.ShiftType == ST_ROR)
{
ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount);
offset = Op2(W0);
}
if (flags & memop_SubtractOffset)
SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
else
ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
}
if (!(flags & memop_Post) && (flags & memop_Writeback))
MOV(rnMapped, W0);
u32 expectedTarget = Num == 0
? ARMJIT_Memory::ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion)
: ARMJIT_Memory::ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion);
if (Config::JIT_FastMemory && ((!Thumb && CurInstr.Cond() != 0xE) || ARMJIT_Memory::IsMappable(expectedTarget)))
{
ptrdiff_t memopStart = GetCodeOffset();
LoadStorePatch patch;
patch.PatchFunc = flags & memop_Store
? PatchedStoreFuncs[Num][__builtin_ctz(size) - 3][rdMapped - W19]
: PatchedLoadFuncs[Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped - W19];
assert(rdMapped - W19 >= 0 && rdMapped - W19 < 8);
MOVP2R(X7, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
// take a chance at fastmem
if (size > 8)
ANDI2R(W1, W0, addressMask);
ptrdiff_t loadStorePosition = GetCodeOffset();
if (flags & memop_Store)
{
STRGeneric(size, rdMapped, size > 8 ? X1 : X0, X7);
}
else
{
if (offset.Reg.ShiftType == ST_ROR)
LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7);
if (size == 32)
{
ROR_(W0, offset.Reg.Rm, offset.Reg.ShiftAmount);
offset = Op2(W0);
UBFIZ(W0, W0, 3, 2);
RORV(rdMapped, rdMapped, W0);
}
if (flags & memop_SubtractOffset)
SUB(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
else
ADD(finalAddr, rnMapped, offset.Reg.Rm, offset.ToArithOption());
}
if (!(flags & memop_Post) && (flags & memop_Writeback))
MOV(rnMapped, W0);
patch.PatchOffset = memopStart - loadStorePosition;
patch.PatchSize = GetCodeOffset() - memopStart;
LoadStorePatches[loadStorePosition] = patch;
}
else
{
void* func = NULL;
if (addrIsStatic)
func = ARMJIT_Memory::GetFuncForAddr(CurCPU, staticAddress, flags & memop_Store, size);
if (inlinePreparation)
if (func)
{
if (size == 32 && !(flags & memop_Store) && constLocalROR32 == 4)
ANDI2R(rdMapped, W0, 3);
if (size > 8)
ANDI2R(W0, W0, addressMask);
}
QuickCallFunction(X2, memFunc);
if (!(flags & memop_Store))
{
if (inlinePreparation && !(flags & memop_Store) && size == 32)
if (flags & memop_Store)
MOV(W1, rdMapped);
QuickCallFunction(X2, (void (*)())func);
if (!(flags & memop_Store))
{
if (constLocalROR32 == 4)
if (size == 32)
{
LSL(rdMapped, rdMapped, 3);
RORV(rdMapped, W0, rdMapped);
if (staticAddress & 0x3)
ROR_(rdMapped, W0, (staticAddress & 0x3) << 3);
else
MOV(rdMapped, W0);
}
else if (constLocalROR32 > 0)
ROR_(rdMapped, W0, constLocalROR32 << 3);
else
MOV(rdMapped, W0);
}
else if (flags & memop_SignExtend)
{
if (size == 16)
SXTH(rdMapped, W0);
else if (size == 8)
SXTB(rdMapped, W0);
else
assert("What's wrong with you?");
}
else
MOV(rdMapped, W0);
if (CurInstr.Info.Branches())
{
if (size < 32)
printf("LDR size < 32 branching?\n");
Comp_JumpTo(rdMapped, Num == 0, false);
{
if (flags & memop_SignExtend)
SBFX(rdMapped, W0, 0, size);
else
UBFX(rdMapped, W0, 0, size);
}
}
}
else
{
if (Num == 0)
{
MOV(X1, RCPU);
if (flags & memop_Store)
{
MOV(W2, rdMapped);
switch (size)
{
case 32: QuickCallFunction(X3, SlowWrite9<u32>); break;
case 16: QuickCallFunction(X3, SlowWrite9<u16>); break;
case 8: QuickCallFunction(X3, SlowWrite9<u8>); break;
}
}
else
{
switch (size)
{
case 32: QuickCallFunction(X3, SlowRead9<u32>); break;
case 16: QuickCallFunction(X3, SlowRead9<u16>); break;
case 8: QuickCallFunction(X3, SlowRead9<u8>); break;
}
}
}
else
{
if (flags & memop_Store)
{
MOV(W1, rdMapped);
switch (size)
{
case 32: QuickCallFunction(X3, SlowWrite7<u32>); break;
case 16: QuickCallFunction(X3, SlowWrite7<u16>); break;
case 8: QuickCallFunction(X3, SlowWrite7<u8>); break;
}
}
else
{
switch (size)
{
case 32: QuickCallFunction(X3, SlowRead7<u32>); break;
case 16: QuickCallFunction(X3, SlowRead7<u16>); break;
case 8: QuickCallFunction(X3, SlowRead7<u8>); break;
}
}
}
if (!(flags & memop_Store))
{
if (size == 32)
MOV(rdMapped, W0);
else if (flags & memop_SignExtend)
SBFX(rdMapped, W0, 0, size);
else
UBFX(rdMapped, W0, 0, size);
}
}
}
if (CurInstr.Info.Branches())
{
if (size < 32)
printf("LDR size < 32 branching?\n");
Comp_JumpTo(rdMapped, Num == 0, false);
}
}
@ -589,19 +413,11 @@ void Compiler::T_Comp_MemImmHalf()
void Compiler::T_Comp_LoadPCRel()
{
u32 addr = (R15 & ~0x2) + ((CurInstr.Instr & 0xFF) << 2);
u32 offset = ((CurInstr.Instr & 0xFF) << 2);
u32 addr = (R15 & ~0x2) + offset;
if (Config::JIT_LiteralOptimisations)
{
Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr);
Comp_AddCycles_CDI();
}
else
{
bool negative = addr < R15;
u32 abs = negative ? R15 - addr : addr - R15;
Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(abs), 32, negative ? memop_SubtractOffset : 0);
}
if (!Config::JIT_LiteralOptimisations || !Comp_MemLoadLiteral(32, false, CurInstr.T_Reg(8), addr))
Comp_MemAccess(CurInstr.T_Reg(8), 15, Op2(offset), 32, 0);
}
void Compiler::T_Comp_MemSPRel()
@ -621,15 +437,138 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
if (regsCount == 0)
return 0; // actually not the right behaviour TODO: fix me
if (regsCount == 1 && !usermode && RegCache.LoadedRegs & (1 << *regs.begin()))
{
int flags = 0;
if (store)
flags |= memop_Store;
if (decrement)
flags |= memop_SubtractOffset;
Op2 offset = preinc ? Op2(4) : Op2(0);
Comp_MemAccess(*regs.begin(), rn, offset, 32, flags);
return decrement ? -4 : 4;
}
if (store)
Comp_AddCycles_CD();
else
Comp_AddCycles_CDI();
int expectedTarget = Num == 0
? ARMJIT_Memory::ClassifyAddress9(CurInstr.DataRegion)
: ARMJIT_Memory::ClassifyAddress7(CurInstr.DataRegion);
bool compileFastPath = Config::JIT_FastMemory
&& store && !usermode && (CurInstr.Cond() < 0xE || ARMJIT_Memory::IsMappable(expectedTarget));
if (decrement)
{
SUB(W0, MapReg(rn), regsCount * 4);
ANDI2R(W0, W0, ~3);
preinc ^= true;
}
else
{
ANDI2R(W0, MapReg(rn), ~3);
}
LoadStorePatch patch;
if (compileFastPath)
{
ptrdiff_t fastPathStart = GetCodeOffset();
ptrdiff_t firstLoadStoreOffset;
bool firstLoadStore = true;
MOVP2R(X1, Num == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
ADD(X1, X1, X0);
u32 offset = preinc ? 4 : 0;
BitSet16::Iterator it = regs.begin();
if (regsCount & 1)
{
int reg = *it;
it++;
ARM64Reg first = W3;
if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
else if (store)
LoadReg(reg, first);
if (firstLoadStore)
{
firstLoadStoreOffset = GetCodeOffset();
firstLoadStore = false;
}
if (store)
STR(INDEX_UNSIGNED, first, X1, offset);
else
LDR(INDEX_UNSIGNED, first, X1, offset);
if (!(RegCache.LoadedRegs & (1 << reg)) && !store)
SaveReg(reg, first);
offset += 4;
}
while (it != regs.end())
{
int reg = *it;
it++;
int nextReg = *it;
it++;
ARM64Reg first = W3, second = W4;
if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
else if (store)
LoadReg(reg, first);
if (RegCache.LoadedRegs & (1 << nextReg))
second = MapReg(nextReg);
else if (store)
LoadReg(nextReg, second);
if (firstLoadStore)
{
firstLoadStoreOffset = GetCodeOffset();
firstLoadStore = false;
}
if (store)
STP(INDEX_SIGNED, first, second, X1, offset);
else
LDP(INDEX_SIGNED, first, second, X1, offset);
if (!(RegCache.LoadedRegs & (1 << reg)) && !store)
SaveReg(reg, first);
if (!(RegCache.LoadedRegs & (1 << nextReg)) && !store)
SaveReg(nextReg, second);
offset += 8;
}
patch.PatchSize = GetCodeOffset() - fastPathStart;
patch.PatchOffset = fastPathStart - firstLoadStoreOffset;
SwapCodeRegion();
patch.PatchFunc = GetRXPtr();
LoadStorePatches[firstLoadStoreOffset] = patch;
ABI_PushRegisters({30});
}
int i = 0;
SUB(SP, SP, ((regsCount + 1) & ~1) * 8);
if (store)
{
Comp_AddCycles_CD();
if (usermode && (regs & BitSet16(0x7f00)))
UBFX(W0, RCPSR, 0, 5);
int i = regsCount - 1;
UBFX(W5, RCPSR, 0, 5);
BitSet16::Iterator it = regs.begin();
while (it != regs.end())
@ -641,7 +580,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
if (usermode && reg >= 8 && reg < 15)
{
if (RegCache.Mapping[reg] != INVALID_REG)
if (RegCache.LoadedRegs & (1 << reg))
MOV(W3, MapReg(reg));
else
LoadReg(reg, W3);
@ -651,55 +590,67 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
}
else if (!usermode && nextReg != regs.end())
{
ARM64Reg first = W3;
ARM64Reg second = W4;
ARM64Reg first = W3, second = W4;
if (RegCache.Mapping[reg] != INVALID_REG)
if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
else
LoadReg(reg, W3);
if (RegCache.Mapping[*nextReg] != INVALID_REG)
if (RegCache.LoadedRegs & (1 << *nextReg))
second = MapReg(*nextReg);
else
LoadReg(*nextReg, W4);
STP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8);
STP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8);
i--;
i++;
it++;
}
else if (RegCache.Mapping[reg] != INVALID_REG)
else if (RegCache.LoadedRegs & (1 << reg))
{
STR(INDEX_UNSIGNED, MapReg(reg), SP, i * 8);
}
else
{
LoadReg(reg, W3);
STR(INDEX_UNSIGNED, W3, SP, i * 8);
}
i--;
i++;
it++;
}
}
if (decrement)
{
SUB(W0, MapReg(rn), regsCount * 4);
preinc ^= true;
}
else
MOV(W0, MapReg(rn));
ADD(X1, SP, 0);
MOVI2R(W2, regsCount);
BL(Num ? MemFuncsSeq7[store][preinc] : MemFuncsSeq9[store][preinc]);
if (Num == 0)
{
MOV(X3, RCPU);
switch (preinc * 2 | store)
{
case 0: QuickCallFunction(X4, SlowBlockTransfer9<false, false>); break;
case 1: QuickCallFunction(X4, SlowBlockTransfer9<false, true>); break;
case 2: QuickCallFunction(X4, SlowBlockTransfer9<true, false>); break;
case 3: QuickCallFunction(X4, SlowBlockTransfer9<true, true>); break;
}
}
else
{
switch (preinc * 2 | store)
{
case 0: QuickCallFunction(X4, SlowBlockTransfer7<false, false>); break;
case 1: QuickCallFunction(X4, SlowBlockTransfer7<false, true>); break;
case 2: QuickCallFunction(X4, SlowBlockTransfer7<true, false>); break;
case 3: QuickCallFunction(X4, SlowBlockTransfer7<true, true>); break;
}
}
if (!store)
{
Comp_AddCycles_CDI();
if (usermode && !regs[15] && (regs & BitSet16(0x7f00)))
UBFX(W0, RCPSR, 0, 5);
UBFX(W5, RCPSR, 0, 5);
int i = regsCount - 1;
BitSet16::Iterator it = regs.begin();
while (it != regs.end())
{
@ -714,11 +665,8 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
MOVI2R(W1, reg - 8);
BL(WriteBanked);
FixupBranch alreadyWritten = CBNZ(W4);
if (RegCache.Mapping[reg] != INVALID_REG)
{
if (RegCache.LoadedRegs & (1 << reg))
MOV(MapReg(reg), W3);
RegCache.DirtyRegs |= 1 << reg;
}
else
SaveReg(reg, W3);
SetJumpTarget(alreadyWritten);
@ -727,20 +675,12 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
{
ARM64Reg first = W3, second = W4;
if (RegCache.Mapping[reg] != INVALID_REG)
{
if (RegCache.LoadedRegs & (1 << reg))
first = MapReg(reg);
if (reg != 15)
RegCache.DirtyRegs |= 1 << reg;
}
if (RegCache.Mapping[*nextReg] != INVALID_REG)
{
if (RegCache.LoadedRegs & (1 << *nextReg))
second = MapReg(*nextReg);
if (*nextReg != 15)
RegCache.DirtyRegs |= 1 << *nextReg;
}
LDP(INDEX_SIGNED, EncodeRegTo64(second), EncodeRegTo64(first), SP, i * 8 - 8);
LDP(INDEX_SIGNED, EncodeRegTo64(first), EncodeRegTo64(second), SP, i * 8);
if (first == W3)
SaveReg(reg, W3);
@ -748,15 +688,12 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
SaveReg(*nextReg, W4);
it++;
i--;
i++;
}
else if (RegCache.Mapping[reg] != INVALID_REG)
else if (RegCache.LoadedRegs & (1 << reg))
{
ARM64Reg mapped = MapReg(reg);
LDR(INDEX_UNSIGNED, mapped, SP, i * 8);
if (reg != 15)
RegCache.DirtyRegs |= 1 << reg;
}
else
{
@ -765,11 +702,20 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
}
it++;
i--;
i++;
}
}
ADD(SP, SP, ((regsCount + 1) & ~1) * 8);
if (compileFastPath)
{
ABI_PopRegisters({30});
RET();
FlushIcacheSection((u8*)patch.PatchFunc, (u8*)GetRXPtr());
SwapCodeRegion();
}
if (!store && regs[15])
{
ARM64Reg mapped = MapReg(15);

12
src/ARMJIT_Compiler.h Normal file
View File

@ -0,0 +1,12 @@
#if defined(__x86_64__)
#include "ARMJIT_x64/ARMJIT_Compiler.h"
#elif defined(__aarch64__)
#include "ARMJIT_A64/ARMJIT_Compiler.h"
#else
#error "The current target platform doesn't have a JIT backend"
#endif
namespace ARMJIT
{
extern Compiler* JITCompiler;
}

View File

@ -3,8 +3,11 @@
#include "types.h"
#include <stdint.h>
#include <string.h>
#include <assert.h>
#include "ARMJIT.h"
#include "ARMJIT_Memory.h"
// here lands everything which doesn't fit into ARMJIT.h
// where it would be included by pretty much everything
@ -160,8 +163,8 @@ public:
Data.SetLength(numAddresses * 2 + numLiterals);
}
u32 PseudoPhysicalAddr;
u32 StartAddr;
u32 StartAddrLocal;
u32 InstrHash, LiteralHash;
u8 Num;
u16 NumAddresses;
@ -175,28 +178,8 @@ public:
{ return &Data[NumAddresses]; }
u32* Literals()
{ return &Data[NumAddresses * 2]; }
u32* Links()
{ return &Data[NumAddresses * 2 + NumLiterals]; }
u32 NumLinks()
{ return Data.Length - NumAddresses * 2 - NumLiterals; }
void AddLink(u32 link)
{
Data.Add(link);
}
void ResetLinks()
{
Data.SetLength(NumAddresses * 2 + NumLiterals);
}
private:
/*
0..<NumInstrs - the instructions of the block
NumInstrs..<(NumLinks + NumInstrs) - pseudo physical addresses where the block is located
(atleast one, the pseudo physical address of the block)
*/
TinyVector<u32> Data;
};
@ -207,45 +190,32 @@ struct __attribute__((packed)) AddressRange
u32 Code;
};
extern AddressRange CodeRanges[ExeMemSpaceSize / 512];
typedef void (*InterpreterFunc)(ARM* cpu);
extern InterpreterFunc InterpretARM[];
extern InterpreterFunc InterpretTHUMB[];
extern u8 MemoryStatus9[0x800000];
extern u8 MemoryStatus7[0x800000];
extern TinyVector<u32> InvalidLiterals;
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size);
extern AddressRange* const CodeMemRegions[ARMJIT_Memory::memregions_Count];
inline bool PageContainsCode(AddressRange* range)
{
for (int i = 0; i < 8; i++)
{
if (range[i].Blocks.Length > 0)
return true;
}
return false;
}
u32 LocaliseCodeAddress(u32 num, u32 addr);
template <u32 Num>
void LinkBlock(ARM* cpu, u32 codeOffset);
enum
{
memregion_Other = 0,
memregion_ITCM,
memregion_DTCM,
memregion_BIOS9,
memregion_MainRAM,
memregion_SWRAM9,
memregion_SWRAM7,
memregion_IO9,
memregion_VRAM,
memregion_BIOS7,
memregion_WRAM7,
memregion_IO7,
memregion_Wifi,
memregion_VWRAM,
};
int ClassifyAddress9(u32 addr);
int ClassifyAddress7(u32 addr);
template <typename T> T SlowRead9(ARMv5* cpu, u32 addr);
template <typename T> void SlowWrite9(ARMv5* cpu, u32 addr, T val);
template <typename T> T SlowRead9(u32 addr, ARMv5* cpu);
template <typename T> void SlowWrite9(u32 addr, ARMv5* cpu, T val);
template <typename T> T SlowRead7(u32 addr);
template <typename T> void SlowWrite7(u32 addr, T val);

822
src/ARMJIT_Memory.cpp Normal file
View File

@ -0,0 +1,822 @@
#ifdef __SWITCH__
#include "switch/compat_switch.h"
#endif
#include "ARMJIT_Memory.h"
#include "ARMJIT_Internal.h"
#include "ARMJIT_Compiler.h"
#include "GPU.h"
#include "GPU3D.h"
#include "Wifi.h"
#include "NDSCart.h"
#include "SPU.h"
#include <malloc.h>
/*
We're handling fastmem here.
Basically we're repurposing a big piece of virtual memory
and map the memory regions as they're structured on the DS
in it.
On most systems you have a single piece of main ram,
maybe some video ram and faster cache RAM and that's about it.
Here we have not only a lot more different memory regions,
but also two address spaces. Not only that but they all have
mirrors (the worst case is 16kb SWRAM which is mirrored 1024x).
We handle this by only mapping those regions which are actually
used and by praying the games don't go wild.
Beware, this file is full of platform specific code.
*/
namespace ARMJIT_Memory
{
#ifdef __aarch64__
struct FaultDescription
{
u64 IntegerRegisters[33];
u64 FaultAddr;
u32 GetEmulatedAddr()
{
// now this is podracing
return (u32)IntegerRegisters[0];
}
u64 RealAddr()
{
return FaultAddr;
}
u64 GetPC()
{
return IntegerRegisters[32];
}
void RestoreAndRepeat(s64 offset);
};
#else
struct FaultDescription
{
u64 GetPC()
{
return 0;
}
u32 GetEmulatedAddr()
{
return 0;
}
u64 RealAddr()
{
return 0;
}
void RestoreAndRepeat(s64 offset);
};
#endif
void FaultHandler(FaultDescription* faultDesc);
}
#ifdef __aarch64__
extern "C" void ARM_RestoreContext(u64* registers) __attribute__((noreturn));
#endif
#ifdef __SWITCH__
// with LTO the symbols seem to be not properly overriden
// if they're somewhere else
extern "C"
{
extern char __start__;
extern char __rodata_start;
alignas(16) u8 __nx_exception_stack[0x8000];
u64 __nx_exception_stack_size = 0x8000;
void __libnx_exception_handler(ThreadExceptionDump* ctx)
{
ARMJIT_Memory::FaultDescription desc;
memcpy(desc.IntegerRegisters, &ctx->cpu_gprs[0].x, 8*29);
desc.IntegerRegisters[29] = ctx->fp.x;
desc.IntegerRegisters[30] = ctx->lr.x;
desc.IntegerRegisters[31] = ctx->sp.x;
desc.IntegerRegisters[32] = ctx->pc.x;
ARMJIT_Memory::FaultHandler(&desc);
if (ctx->pc.x >= (u64)&__start__ && ctx->pc.x < (u64)&__rodata_start)
{
printf("non JIT fault in .text at 0x%x (type %d) (trying to access 0x%x?)\n",
ctx->pc.x - (u64)&__start__, ctx->error_desc, ctx->far.x);
}
else
{
printf("non JIT fault somewhere in deep (address) space at %x (type %d)\n", ctx->pc.x, ctx->error_desc);
}
}
}
#endif
namespace ARMJIT_Memory
{
#ifdef __aarch64__
void FaultDescription::RestoreAndRepeat(s64 offset)
{
IntegerRegisters[32] += offset;
ARM_RestoreContext(IntegerRegisters);
}
#else
void FaultDescription::RestoreAndRepeat(s64 offset)
{
}
#endif
void* FastMem9Start, *FastMem7Start;
const u32 MemoryTotalSize =
NDS::MainRAMSize
+ NDS::SharedWRAMSize
+ NDS::ARM7WRAMSize
+ DTCMPhysicalSize;
const u32 MemBlockMainRAMOffset = 0;
const u32 MemBlockSWRAMOffset = NDS::MainRAMSize;
const u32 MemBlockARM7WRAMOffset = NDS::MainRAMSize + NDS::SharedWRAMSize;
const u32 MemBlockDTCMOffset = NDS::MainRAMSize + NDS::SharedWRAMSize + NDS::ARM7WRAMSize;
const u32 OffsetsPerRegion[memregions_Count] =
{
UINT32_MAX,
UINT32_MAX,
MemBlockDTCMOffset,
UINT32_MAX,
MemBlockMainRAMOffset,
MemBlockSWRAMOffset,
UINT32_MAX,
UINT32_MAX,
UINT32_MAX,
MemBlockARM7WRAMOffset,
UINT32_MAX,
UINT32_MAX,
UINT32_MAX,
};
enum
{
memstate_Unmapped,
memstate_MappedRW,
// on switch this is unmapped as well
memstate_MappedProtected,
};
u8 MappingStatus9[1 << (32-12)];
u8 MappingStatus7[1 << (32-12)];
#ifdef __SWITCH__
u8* MemoryBase;
u8* MemoryBaseCodeMem;
#else
u8* MemoryBase;
#endif
bool MapIntoRange(u32 addr, u32 num, u32 offset, u32 size)
{
u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr;
#ifdef __SWITCH__
Result r = (svcMapProcessMemory(dst, envGetOwnProcessHandle(),
(u64)(MemoryBaseCodeMem + offset), size));
return R_SUCCEEDED(r);
#endif
}
bool UnmapFromRange(u32 addr, u32 num, u32 offset, u32 size)
{
u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr;
#ifdef __SWITCH__
Result r = svcUnmapProcessMemory(dst, envGetOwnProcessHandle(),
(u64)(MemoryBaseCodeMem + offset), size);
printf("%x\n", r);
return R_SUCCEEDED(r);
#endif
}
struct Mapping
{
u32 Addr;
u32 Size, LocalOffset;
u32 Num;
void Unmap(int region)
{
bool skipDTCM = Num == 0 && region != memregion_DTCM;
u8* statuses = Num == 0 ? MappingStatus9 : MappingStatus7;
u32 offset = 0;
while (offset < Size)
{
if (skipDTCM && Addr + offset == NDS::ARM9->DTCMBase)
{
offset += NDS::ARM9->DTCMSize;
printf("%x skip\n", NDS::ARM9->DTCMSize);
}
else
{
u32 segmentOffset = offset;
u8 status = statuses[(Addr + offset) >> 12];
while (statuses[(Addr + offset) >> 12] == status
&& offset < Size
&& (!skipDTCM || Addr + offset != NDS::ARM9->DTCMBase))
{
assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped);
statuses[(Addr + offset) >> 12] = memstate_Unmapped;
offset += 0x1000;
}
if (status == memstate_MappedRW)
{
u32 segmentSize = offset - segmentOffset;
printf("unmapping %x %x %x %x\n", Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize);
bool success = UnmapFromRange(Addr + segmentOffset, Num, segmentOffset + LocalOffset + OffsetsPerRegion[region], segmentSize);
assert(success);
}
}
}
}
};
ARMJIT::TinyVector<Mapping> Mappings[memregions_Count];
void SetCodeProtection(int region, u32 offset, bool protect)
{
offset &= ~0xFFF;
printf("set code protection %d %x %d\n", region, offset, protect);
for (int i = 0; i < Mappings[region].Length; i++)
{
Mapping& mapping = Mappings[region][i];
u32 effectiveAddr = mapping.Addr + (offset - mapping.LocalOffset);
if (mapping.Num == 0
&& region != memregion_DTCM
&& effectiveAddr >= NDS::ARM9->DTCMBase
&& effectiveAddr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize))
continue;
u8* states = (u8*)(mapping.Num == 0 ? MappingStatus9 : MappingStatus7);
printf("%d %x %d\n", states[effectiveAddr >> 12], effectiveAddr, mapping.Num);
assert(states[effectiveAddr >> 12] == (protect ? memstate_MappedRW : memstate_MappedProtected));
states[effectiveAddr >> 12] = protect ? memstate_MappedProtected : memstate_MappedRW;
bool success;
if (protect)
success = UnmapFromRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000);
else
success = MapIntoRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000);
assert(success);
}
}
void RemapDTCM(u32 newBase, u32 newSize)
{
// this first part could be made more efficient
// by unmapping DTCM first and then map the holes
u32 oldDTCMBase = NDS::ARM9->DTCMBase;
u32 oldDTCBEnd = oldDTCMBase + NDS::ARM9->DTCMSize;
u32 newEnd = newBase + newSize;
printf("remapping DTCM %x %x %x %x\n", newBase, newEnd, oldDTCMBase, oldDTCBEnd);
// unmap all regions containing the old or the current DTCM mapping
for (int region = 0; region < memregions_Count; region++)
{
if (region == memregion_DTCM)
continue;
for (int i = 0; i < Mappings[region].Length;)
{
Mapping& mapping = Mappings[region][i];
u32 start = mapping.Addr;
u32 end = mapping.Addr + mapping.Size;
printf("mapping %d %x %x %x %x\n", region, mapping.Addr, mapping.Size, mapping.Num, mapping.LocalOffset);
bool oldOverlap = NDS::ARM9->DTCMSize > 0 && ((oldDTCMBase >= start && oldDTCMBase < end) || (oldDTCBEnd >= start && oldDTCBEnd < end));
bool newOverlap = newSize > 0 && ((newBase >= start && newBase < end) || (newEnd >= start && newEnd < end));
if (mapping.Num == 0 && (oldOverlap || newOverlap))
{
mapping.Unmap(region);
Mappings[region].Remove(i);
}
else
{
i++;
}
}
}
for (int i = 0; i < Mappings[memregion_DTCM].Length; i++)
{
Mappings[memregion_DTCM][i].Unmap(memregion_DTCM);
}
Mappings[memregion_DTCM].Clear();
}
void RemapSWRAM()
{
printf("remapping SWRAM\n");
for (int i = 0; i < Mappings[memregion_SWRAM].Length; i++)
{
Mappings[memregion_SWRAM][i].Unmap(memregion_SWRAM);
}
Mappings[memregion_SWRAM].Clear();
for (int i = 0; i < Mappings[memregion_WRAM7].Length; i++)
{
Mappings[memregion_WRAM7][i].Unmap(memregion_WRAM7);
}
Mappings[memregion_WRAM7].Clear();
}
bool MapAtAddress(u32 addr)
{
u32 num = NDS::CurCPU;
int region = num == 0
? ClassifyAddress9(addr)
: ClassifyAddress7(addr);
if (!IsMappable(region))
return false;
u32 mappingStart, mappingSize, memoryOffset, memorySize;
bool isMapped = GetRegionMapping(region, num, mappingStart, mappingSize, memoryOffset, memorySize);
if (!isMapped)
return false;
// this calculation even works with DTCM
// which doesn't have to be aligned to it's own size
u32 mirrorStart = (addr - mappingStart) / memorySize * memorySize + mappingStart;
u8* states = num == 0 ? MappingStatus9 : MappingStatus7;
printf("trying to create mapping %08x %d %x %d %x\n", addr, num, memorySize, region, memoryOffset);
bool isExecutable = ARMJIT::CodeMemRegions[region];
ARMJIT::AddressRange* range = ARMJIT::CodeMemRegions[region] + memoryOffset;
// this overcomplicated piece of code basically just finds whole pieces of code memory
// which can be mapped
u32 offset = 0;
bool skipDTCM = num == 0 && region != memregion_DTCM;
while (offset < memorySize)
{
if (skipDTCM && mirrorStart + offset == NDS::ARM9->DTCMBase)
{
offset += NDS::ARM9->DTCMSize;
}
else
{
u32 sectionOffset = offset;
bool hasCode = isExecutable && ARMJIT::PageContainsCode(&range[offset / 512]);
while ((!isExecutable || ARMJIT::PageContainsCode(&range[offset / 512]) == hasCode)
&& offset < memorySize
&& (!skipDTCM || mirrorStart + offset != NDS::ARM9->DTCMBase))
{
assert(states[(mirrorStart + offset) >> 12] == memstate_Unmapped);
states[(mirrorStart + offset) >> 12] = hasCode ? memstate_MappedProtected : memstate_MappedRW;
offset += 0x1000;
}
u32 sectionSize = offset - sectionOffset;
if (!hasCode)
{
printf("trying to map %x (size: %x) from %x\n", mirrorStart + sectionOffset, sectionSize, sectionOffset + memoryOffset + OffsetsPerRegion[region]);
bool succeded = MapIntoRange(mirrorStart + sectionOffset, num, sectionOffset + memoryOffset + OffsetsPerRegion[region], sectionSize);
assert(succeded);
}
}
}
Mapping mapping{mirrorStart, memorySize, memoryOffset, num};
Mappings[region].Add(mapping);
printf("mapped mirror at %08x-%08x\n", mirrorStart, mirrorStart + memorySize - 1);
return true;
}
void FaultHandler(FaultDescription* faultDesc)
{
if (ARMJIT::JITCompiler->IsJITFault(faultDesc->GetPC()))
{
bool rewriteToSlowPath = true;
u32 addr = faultDesc->GetEmulatedAddr();
if ((NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7)[addr >> 12] == memstate_Unmapped)
rewriteToSlowPath = !MapAtAddress(faultDesc->GetEmulatedAddr());
s64 offset = 0;
if (rewriteToSlowPath)
{
offset = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc->GetPC());
}
faultDesc->RestoreAndRepeat(offset);
}
}
void Init()
{
#if defined(__SWITCH__)
MemoryBase = (u8*)memalign(0x1000, MemoryTotalSize);
MemoryBaseCodeMem = (u8*)virtmemReserve(MemoryTotalSize);
bool succeded = R_SUCCEEDED(svcMapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem,
(u64)MemoryBase, MemoryTotalSize));
assert(succeded);
succeded = R_SUCCEEDED(svcSetProcessMemoryPermission(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem,
MemoryTotalSize, Perm_Rw));
assert(succeded);
// 8 GB of address space, just don't ask...
FastMem9Start = virtmemReserve(0x100000000);
assert(FastMem9Start);
FastMem7Start = virtmemReserve(0x100000000);
assert(FastMem7Start);
NDS::MainRAM = MemoryBaseCodeMem + MemBlockMainRAMOffset;
NDS::SharedWRAM = MemoryBaseCodeMem + MemBlockSWRAMOffset;
NDS::ARM7WRAM = MemoryBaseCodeMem + MemBlockARM7WRAMOffset;
NDS::ARM9->DTCM = MemoryBaseCodeMem + MemBlockDTCMOffset;
#else
MemoryBase = new u8[MemoryTotalSize];
NDS::MainRAM = MemoryBase + MemBlockMainRAMOffset;
NDS::SharedWRAM = MemoryBase + MemBlockSWRAMOffset;
NDS::ARM7WRAM = MemoryBase + MemBlockARM7WRAMOffset;
NDS::ARM9->DTCM = MemoryBase + MemBlockDTCMOffset;
#endif
}
void DeInit()
{
#if defined(__SWITCH__)
virtmemFree(FastMem9Start, 0x100000000);
virtmemFree(FastMem7Start, 0x100000000);
svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize);
virtmemFree(MemoryBaseCodeMem, MemoryTotalSize);
free(MemoryBase);
#else
delete[] MemoryBase;
#endif
}
void Reset()
{
for (int region = 0; region < memregions_Count; region++)
{
for (int i = 0; i < Mappings[region].Length; i++)
Mappings[region][i].Unmap(region);
Mappings[region].Clear();
}
for (int i = 0; i < sizeof(MappingStatus9); i++)
{
assert(MappingStatus9[i] == memstate_Unmapped);
assert(MappingStatus7[i] == memstate_Unmapped);
}
printf("done resetting jit mem\n");
}
bool IsMappable(int region)
{
return OffsetsPerRegion[region] != UINT32_MAX;
}
bool GetRegionMapping(int region, u32 num, u32& mappingStart, u32& mappingSize, u32& memoryOffset, u32& memorySize)
{
memoryOffset = 0;
switch (region)
{
case memregion_ITCM:
if (num == 0)
{
mappingStart = 0;
mappingSize = NDS::ARM9->ITCMSize;
memorySize = ITCMPhysicalSize;
return true;
}
return false;
case memregion_DTCM:
if (num == 0)
{
mappingStart = NDS::ARM9->DTCMBase;
mappingSize = NDS::ARM9->DTCMSize;
memorySize = DTCMPhysicalSize;
return true;
}
return false;
case memregion_BIOS9:
if (num == 0)
{
mappingStart = 0xFFFF0000;
mappingSize = 0x10000;
memorySize = 0x1000;
return true;
}
return false;
case memregion_MainRAM:
mappingStart = 0x2000000;
mappingSize = 0x1000000;
memorySize = NDS::MainRAMSize;
return true;
case memregion_SWRAM:
mappingStart = 0x3000000;
if (num == 0 && NDS::SWRAM_ARM9.Mem)
{
mappingSize = 0x1000000;
memoryOffset = NDS::SWRAM_ARM9.Mem - NDS::SharedWRAM;
memorySize = NDS::SWRAM_ARM9.Mask + 1;
return true;
}
else if (num == 1 && NDS::SWRAM_ARM7.Mem)
{
mappingSize = 0x800000;
memoryOffset = NDS::SWRAM_ARM7.Mem - NDS::SharedWRAM;
memorySize = NDS::SWRAM_ARM7.Mask + 1;
return true;
}
return false;
case memregion_VRAM:
if (num == 0)
{
// this is a gross simplification
// mostly to make code on vram working
// it doesn't take any of the actual VRAM mappings into account
mappingStart = 0x6000000;
mappingSize = 0x1000000;
memorySize = 0x100000;
return true;
}
return false;
case memregion_BIOS7:
if (num == 1)
{
mappingStart = 0;
mappingSize = 0x4000;
memorySize = 0x4000;
return true;
}
return false;
case memregion_WRAM7:
if (num == 1)
{
if (NDS::SWRAM_ARM7.Mem)
{
mappingStart = 0x3800000;
mappingSize = 0x800000;
}
else
{
mappingStart = 0x3000000;
mappingSize = 0x1000000;
}
memorySize = NDS::ARM7WRAMSize;
return true;
}
return false;
case memregion_VWRAM:
if (num == 1)
{
mappingStart = 0x6000000;
mappingSize = 0x1000000;
memorySize = 0x20000;
return true;
}
return false;
default:
// for the JIT we don't are about the rest
return false;
}
}
int ClassifyAddress9(u32 addr)
{
if (addr < NDS::ARM9->ITCMSize)
return memregion_ITCM;
else if (addr >= NDS::ARM9->DTCMBase && addr < (NDS::ARM9->DTCMBase + NDS::ARM9->DTCMSize))
return memregion_DTCM;
else if ((addr & 0xFFFFF000) == 0xFFFF0000)
return memregion_BIOS9;
else
{
switch (addr & 0xFF000000)
{
case 0x02000000:
return memregion_MainRAM;
case 0x03000000:
if (NDS::SWRAM_ARM9.Mem)
return memregion_SWRAM;
else
return memregion_Other;
case 0x04000000:
return memregion_IO9;
case 0x06000000:
return memregion_VRAM;
}
}
return memregion_Other;
}
int ClassifyAddress7(u32 addr)
{
if (addr < 0x00004000)
return memregion_BIOS7;
else
{
switch (addr & 0xFF800000)
{
case 0x02000000:
case 0x02800000:
return memregion_MainRAM;
case 0x03000000:
if (NDS::SWRAM_ARM7.Mem)
return memregion_SWRAM;
else
return memregion_WRAM7;
case 0x03800000:
return memregion_WRAM7;
case 0x04000000:
return memregion_IO7;
case 0x04800000:
return memregion_Wifi;
case 0x06000000:
case 0x06800000:
return memregion_VWRAM;
}
}
return memregion_Other;
}
void WifiWrite32(u32 addr, u32 val)
{
Wifi::Write(addr, val & 0xFFFF);
Wifi::Write(addr + 2, val >> 16);
}
u32 WifiRead32(u32 addr)
{
return Wifi::Read(addr) | (Wifi::Read(addr + 2) << 16);
}
template <typename T>
void VRAMWrite(u32 addr, T val)
{
switch (addr & 0x00E00000)
{
case 0x00000000: GPU::WriteVRAM_ABG<T>(addr, val); return;
case 0x00200000: GPU::WriteVRAM_BBG<T>(addr, val); return;
case 0x00400000: GPU::WriteVRAM_AOBJ<T>(addr, val); return;
case 0x00600000: GPU::WriteVRAM_BOBJ<T>(addr, val); return;
default: GPU::WriteVRAM_LCDC<T>(addr, val); return;
}
}
template <typename T>
T VRAMRead(u32 addr)
{
switch (addr & 0x00E00000)
{
case 0x00000000: return GPU::ReadVRAM_ABG<T>(addr);
case 0x00200000: return GPU::ReadVRAM_BBG<T>(addr);
case 0x00400000: return GPU::ReadVRAM_AOBJ<T>(addr);
case 0x00600000: return GPU::ReadVRAM_BOBJ<T>(addr);
default: return GPU::ReadVRAM_LCDC<T>(addr);
}
}
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size)
{
if (cpu->Num == 0)
{
switch (addr & 0xFF000000)
{
case 0x04000000:
if (!store && size == 32 && addr == 0x04100010 && NDS::ExMemCnt[0] & (1<<11))
return (void*)NDSCart::ReadROMData;
/*
unfortunately we can't map GPU2D this way
since it's hidden inside an object
though GPU3D registers are accessed much more intensive
*/
if (addr >= 0x04000320 && addr < 0x040006A4)
{
switch (size | store)
{
case 8: return (void*)GPU3D::Read8;
case 9: return (void*)GPU3D::Write8;
case 16: return (void*)GPU3D::Read16;
case 17: return (void*)GPU3D::Write16;
case 32: return (void*)GPU3D::Read32;
case 33: return (void*)GPU3D::Write32;
}
}
switch (size | store)
{
case 8: return (void*)NDS::ARM9IORead8;
case 9: return (void*)NDS::ARM9IOWrite8;
case 16: return (void*)NDS::ARM9IORead16;
case 17: return (void*)NDS::ARM9IOWrite16;
case 32: return (void*)NDS::ARM9IORead32;
case 33: return (void*)NDS::ARM9IOWrite32;
}
break;
case 0x06000000:
switch (size | store)
{
case 8: return (void*)VRAMRead<u8>;
case 9: return NULL;
case 16: return (void*)VRAMRead<u16>;
case 17: return (void*)VRAMWrite<u16>;
case 32: return (void*)VRAMRead<u32>;
case 33: return (void*)VRAMWrite<u32>;
}
break;
}
}
else
{
switch (addr & 0xFF800000)
{
case 0x04000000:
if (addr >= 0x04000400 && addr < 0x04000520)
{
switch (size | store)
{
case 8: return (void*)SPU::Read8;
case 9: return (void*)SPU::Write8;
case 16: return (void*)SPU::Read16;
case 17: return (void*)SPU::Write16;
case 32: return (void*)SPU::Read32;
case 33: return (void*)SPU::Write32;
}
}
switch (size | store)
{
case 8: return (void*)NDS::ARM7IORead8;
case 9: return (void*)NDS::ARM7IOWrite8;
case 16: return (void*)NDS::ARM7IORead16;
case 17: return (void*)NDS::ARM7IOWrite16;
case 32: return (void*)NDS::ARM7IORead32;
case 33: return (void*)NDS::ARM7IOWrite32;
}
break;
case 0x04800000:
if (addr < 0x04810000 && size >= 16)
{
switch (size | store)
{
case 16: return (void*)Wifi::Read;
case 17: return (void*)Wifi::Write;
case 32: return (void*)WifiRead32;
case 33: return (void*)WifiWrite32;
}
}
break;
case 0x06000000:
case 0x06800000:
switch (size | store)
{
case 8: return (void*)GPU::ReadVRAM_ARM7<u8>;
case 9: return (void*)GPU::WriteVRAM_ARM7<u8>;
case 16: return (void*)GPU::ReadVRAM_ARM7<u16>;
case 17: return (void*)GPU::WriteVRAM_ARM7<u16>;
case 32: return (void*)GPU::ReadVRAM_ARM7<u32>;
case 33: return (void*)GPU::WriteVRAM_ARM7<u32>;
}
}
}
return NULL;
}
}

53
src/ARMJIT_Memory.h Normal file
View File

@ -0,0 +1,53 @@
#ifndef ARMJIT_MEMORY
#define ARMJIT_MEMORY
#include "types.h"
#include "ARM.h"
namespace ARMJIT_Memory
{
extern void* FastMem9Start;
extern void* FastMem7Start;
void Init();
void DeInit();
void Reset();
enum
{
memregion_Other = 0,
memregion_ITCM,
memregion_DTCM,
memregion_BIOS9,
memregion_MainRAM,
memregion_SWRAM,
memregion_IO9,
memregion_VRAM,
memregion_BIOS7,
memregion_WRAM7,
memregion_IO7,
memregion_Wifi,
memregion_VWRAM,
memregions_Count
};
int ClassifyAddress9(u32 addr);
int ClassifyAddress7(u32 addr);
bool GetRegionMapping(int region, u32 num, u32& mappingStart, u32& mappingSize, u32& memoryOffset, u32& memorySize);
bool IsMappable(int region);
void RemapDTCM(u32 newBase, u32 newSize);
void RemapSWRAM();
void SetCodeProtection(int region, u32 offset, bool protect);
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size);
}
#endif

View File

@ -301,24 +301,6 @@ Compiler::Compiler()
RET();
}
{
CPSRDirty = true;
BranchStub[0] = GetWritableCodePtr();
SaveCPSR();
MOV(64, R(ABI_PARAM1), R(RCPU));
CALL((u8*)ARMJIT::LinkBlock<0>);
LoadCPSR();
JMP((u8*)ARM_Ret, true);
CPSRDirty = true;
BranchStub[1] = GetWritableCodePtr();
SaveCPSR();
MOV(64, R(ABI_PARAM1), R(RCPU));
CALL((u8*)ARMJIT::LinkBlock<1>);
LoadCPSR();
JMP((u8*)ARM_Ret, true);
}
// move the region forward to prevent overwriting the generated functions
CodeMemSize -= GetWritableCodePtr() - ResetStart;
ResetStart = GetWritableCodePtr();
@ -520,6 +502,11 @@ void Compiler::Reset()
FarCode = FarStart;
}
bool Compiler::IsJITFault(u64 addr)
{
return addr >= (u64)CodeMemory && addr < (u64)CodeMemory + sizeof(CodeMemory);
}
void Compiler::Comp_SpecialBranchBehaviour(bool taken)
{
if (taken && CurInstr.BranchFlags & branch_IdleBranch)
@ -531,32 +518,11 @@ void Compiler::Comp_SpecialBranchBehaviour(bool taken)
RegCache.PrepareExit();
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
if (Config::JIT_BrancheOptimisations == 2 && !(CurInstr.BranchFlags & branch_IdleBranch)
&& (!taken || (CurInstr.BranchFlags & branch_StaticTarget)))
{
FixupBranch ret = J_CC(CC_S);
CMP(32, MDisp(RCPU, offsetof(ARM, StopExecution)), Imm8(0));
FixupBranch ret2 = J_CC(CC_NZ);
u8* rewritePart = GetWritableCodePtr();
NOP(5);
MOV(32, R(ABI_PARAM2), Imm32(rewritePart - ResetStart));
JMP((u8*)BranchStub[Num], true);
SetJumpTarget(ret);
SetJumpTarget(ret2);
JMP((u8*)ARM_Ret, true);
}
else
{
JMP((u8*)&ARM_Ret, true);
}
JMP((u8*)&ARM_Ret, true);
}
}
JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
{
if (NearSize - (NearCode - NearStart) < 1024 * 32) // guess...
{
@ -575,7 +541,7 @@ JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, F
CodeRegion = instrs[0].Addr >> 24;
CurCPU = cpu;
// CPSR might have been modified in a previous block
CPSRDirty = Config::JIT_BrancheOptimisations == 2;
CPSRDirty = false;
JitBlockEntry res = (JitBlockEntry)GetWritableCodePtr();
@ -685,31 +651,7 @@ JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, F
RegCache.Flush();
SUB(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
if (Config::JIT_BrancheOptimisations == 2
&& !(instrs[instrsCount - 1].BranchFlags & branch_IdleBranch)
&& (!instrs[instrsCount - 1].Info.Branches()
|| instrs[instrsCount - 1].BranchFlags & branch_FollowCondNotTaken
|| (instrs[instrsCount - 1].BranchFlags & branch_FollowCondTaken && instrs[instrsCount - 1].BranchFlags & branch_StaticTarget)))
{
FixupBranch ret = J_CC(CC_S);
CMP(32, MDisp(RCPU, offsetof(ARM, StopExecution)), Imm8(0));
FixupBranch ret2 = J_CC(CC_NZ);
u8* rewritePart = GetWritableCodePtr();
NOP(5);
MOV(32, R(ABI_PARAM2), Imm32(rewritePart - ResetStart));
JMP((u8*)BranchStub[Num], true);
SetJumpTarget(ret);
SetJumpTarget(ret2);
JMP((u8*)ARM_Ret, true);
}
else
{
JMP((u8*)ARM_Ret, true);
}
JMP((u8*)ARM_Ret, true);
/*FILE* codeout = fopen("codeout", "a");
fprintf(codeout, "beginning block argargarg__ %x!!!", instrs[0].Addr);
@ -720,22 +662,6 @@ JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, F
return res;
}
void Compiler::LinkBlock(u32 offset, JitBlockEntry entry)
{
u8* curPtr = GetWritableCodePtr();
SetCodePtr(ResetStart + offset);
JMP((u8*)entry, true);
SetCodePtr(curPtr);
}
void Compiler::UnlinkBlock(u32 offset)
{
u8* curPtr = GetWritableCodePtr();
SetCodePtr(ResetStart + offset);
NOP(5);
SetCodePtr(curPtr);
}
void Compiler::Comp_AddCycles_C(bool forceNonConstant)
{
s32 cycles = Num ?

View File

@ -52,10 +52,7 @@ public:
void Reset();
void LinkBlock(u32 offset, JitBlockEntry entry);
void UnlinkBlock(u32 offset);
JitBlockEntry CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
JitBlockEntry CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount);
void LoadReg(int reg, Gen::X64Reg nativeReg);
void SaveReg(int reg, Gen::X64Reg nativeReg);
@ -202,6 +199,10 @@ public:
SetCodePtr(FarCode);
}
bool IsJITFault(u64 addr);
s32 RewriteMemAccess(u64 pc);
u8* FarCode;
u8* NearCode;
u32 FarSize;
@ -216,8 +217,6 @@ public:
bool Exit;
bool IrregularCycles;
void* BranchStub[2];
void* ReadBanked;
void* WriteBanked;

View File

@ -15,6 +15,11 @@ int squeezePointer(T* ptr)
return truncated;
}
s32 Compiler::RewriteMemAccess(u64 pc)
{
return 0;
}
/*
According to DeSmuME and my own research, approx. 99% (seriously, that's an empirical number)
of all memory load and store instructions always access addresses in the same region as
@ -27,14 +32,15 @@ int squeezePointer(T* ptr)
bool Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
{
u32 translatedAddr = Num == 0 ? TranslateAddr9(addr) : TranslateAddr7(addr);
return false;
//u32 translatedAddr = Num == 0 ? TranslateAddr9(addr) : TranslateAddr7(addr);
int invalidLiteralIdx = InvalidLiterals.Find(translatedAddr);
/*int invalidLiteralIdx = InvalidLiterals.Find(translatedAddr);
if (invalidLiteralIdx != -1)
{
InvalidLiterals.Remove(invalidLiteralIdx);
return false;
}
}*/
u32 val;
// make sure arm7 bios is accessible
@ -95,7 +101,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
staticAddress = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
OpArg rdMapped = MapReg(rd);
if (!addrIsStatic)
if (true)
{
OpArg rnMapped = MapReg(rn);
if (Thumb && rn == 15)
@ -145,7 +151,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
MOV(32, rnMapped, R(finalAddr));
}
int expectedTarget = Num == 0
/*int expectedTarget = Num == 0
? ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion)
: ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion);
if (CurInstr.Cond() < 0xE)
@ -184,8 +190,8 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
if (addrIsStatic && compileSlowPath)
MOV(32, R(RSCRATCH3), Imm32(staticAddress));
if (compileFastPath)
*/
/*if (compileFastPath)
{
FixupBranch slowPath;
if (compileSlowPath)
@ -357,15 +363,16 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
SetJumpTarget(slowPath);
}
}
if (compileSlowPath)
*/
if (true)
{
PushRegs(false);
if (Num == 0)
{
MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
MOV(64, R(ABI_PARAM1), R(RCPU));
MOV(64, R(ABI_PARAM2), R(RCPU));
if (ABI_PARAM1 != RSCRATCH3)
MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
if (flags & memop_Store)
{
MOV(32, R(ABI_PARAM3), rdMapped);
@ -423,13 +430,13 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
}
}
/*
if (compileFastPath && compileSlowPath)
{
FixupBranch ret = J(true);
SwitchToNearCode();
SetJumpTarget(ret);
}
}*/
if (!(flags & memop_Store) && rd == 15)
{
@ -458,7 +465,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
u32 stackAlloc = ((regsCount + 1) & ~1) * 8;
#endif
u32 allocOffset = stackAlloc - regsCount * 8;
/*
int expectedTarget = Num == 0
? ClassifyAddress9(CurInstr.DataRegion)
: ClassifyAddress7(CurInstr.DataRegion);
@ -479,7 +486,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
default:
break;
}
*/
if (!store)
Comp_AddCycles_CDI();
else
@ -492,7 +499,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
}
else
MOV(32, R(RSCRATCH4), MapReg(rn));
/*
if (compileFastPath)
{
assert(!usermode);
@ -570,7 +577,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
SwitchToFarCode();
SetJumpTarget(slowPath);
}
}*/
if (!store)
{
@ -696,13 +703,13 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
PopRegs(false);
}
/*
if (compileFastPath)
{
FixupBranch ret = J(true);
SwitchToNearCode();
SetJumpTarget(ret);
}
}*/
if (!store && regs[15])
{

View File

@ -206,15 +206,14 @@ enum {
T_ReadR14 = 1 << 13,
T_WriteR14 = 1 << 14,
T_PopPC = 1 << 15,
T_SetNZ = 1 << 16,
T_SetCV = 1 << 17,
T_SetMaybeC = 1 << 18,
T_ReadC = 1 << 19,
T_SetC = 1 << 20,
T_SetNZ = 1 << 15,
T_SetCV = 1 << 16,
T_SetMaybeC = 1 << 17,
T_ReadC = 1 << 18,
T_SetC = 1 << 19,
T_WriteMem = 1 << 21,
T_WriteMem = 1 << 20,
T_LoadMem = 1 << 21,
};
const u32 T_LSL_IMM = T_SetNZ | T_SetMaybeC | T_Write0 | T_Read3 | tk(tk_LSL_IMM);
@ -256,31 +255,31 @@ const u32 T_ADD_PCREL = T_Write8 | tk(tk_ADD_PCREL);
const u32 T_ADD_SPREL = T_Write8 | T_ReadR13 | tk(tk_ADD_SPREL);
const u32 T_ADD_SP = T_WriteR13 | T_ReadR13 | tk(tk_ADD_SP);
const u32 T_LDR_PCREL = T_Write8 | tk(tk_LDR_PCREL);
const u32 T_LDR_PCREL = T_Write8 | T_LoadMem | tk(tk_LDR_PCREL);
const u32 T_STR_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STR_REG);
const u32 T_STRB_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STRB_REG);
const u32 T_LDR_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDR_REG);
const u32 T_LDRB_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRB_REG);
const u32 T_LDR_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDR_REG);
const u32 T_LDRB_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRB_REG);
const u32 T_STRH_REG = T_Read0 | T_Read3 | T_Read6 | T_WriteMem | tk(tk_STRH_REG);
const u32 T_LDRSB_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRSB_REG);
const u32 T_LDRH_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRH_REG);
const u32 T_LDRSH_REG = T_Write0 | T_Read3 | T_Read6 | tk(tk_LDRSH_REG);
const u32 T_LDRSB_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRSB_REG);
const u32 T_LDRH_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRH_REG);
const u32 T_LDRSH_REG = T_Write0 | T_Read3 | T_Read6 | T_LoadMem | tk(tk_LDRSH_REG);
const u32 T_STR_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STR_IMM);
const u32 T_LDR_IMM = T_Write0 | T_Read3 | tk(tk_LDR_IMM);
const u32 T_LDR_IMM = T_Write0 | T_Read3 | T_LoadMem | tk(tk_LDR_IMM);
const u32 T_STRB_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STRB_IMM);
const u32 T_LDRB_IMM = T_Write0 | T_Read3 | tk(tk_LDRB_IMM);
const u32 T_LDRB_IMM = T_Write0 | T_Read3 | T_LoadMem | tk(tk_LDRB_IMM);
const u32 T_STRH_IMM = T_Read0 | T_Read3 | T_WriteMem | tk(tk_STRH_IMM);
const u32 T_LDRH_IMM = T_Write0 | T_Read3 | tk(tk_LDRH_IMM);
const u32 T_LDRH_IMM = T_Write0 | T_Read3 | T_LoadMem | tk(tk_LDRH_IMM);
const u32 T_STR_SPREL = T_Read8 | T_ReadR13 | T_WriteMem | tk(tk_STR_SPREL);
const u32 T_LDR_SPREL = T_Write8 | T_ReadR13 | tk(tk_LDR_SPREL);
const u32 T_LDR_SPREL = T_Write8 | T_ReadR13 | T_LoadMem | tk(tk_LDR_SPREL);
const u32 T_PUSH = T_ReadR13 | T_WriteR13 | T_WriteMem | tk(tk_PUSH);
const u32 T_POP = T_PopPC | T_ReadR13 | T_WriteR13 | tk(tk_POP);
const u32 T_POP = T_ReadR13 | T_WriteR13 | T_LoadMem | tk(tk_POP);
const u32 T_LDMIA = T_Read8 | T_Write8 | tk(tk_LDMIA);
const u32 T_LDMIA = T_Read8 | T_Write8 | T_LoadMem | tk(tk_LDMIA);
const u32 T_STMIA = T_Read8 | T_Write8 | T_WriteMem | tk(tk_STMIA);
const u32 T_BCOND = T_BranchAlways | tk(tk_BCOND);
@ -347,7 +346,7 @@ Info Decode(bool thumb, u32 num, u32 instr)
if (data & T_BranchAlways)
res.DstRegs |= (1 << 15);
if (data & T_PopPC && instr & (1 << 8))
if (res.Kind == tk_POP && instr & (1 << 8))
res.DstRegs |= 1 << 15;
if (data & T_SetNZ)
@ -364,11 +363,18 @@ Info Decode(bool thumb, u32 num, u32 instr)
if (data & T_WriteMem)
res.SpecialKind = special_WriteMem;
if (res.Kind == ARMInstrInfo::tk_LDR_PCREL)
if (data & T_LoadMem)
{
if (!Config::JIT_LiteralOptimisations)
res.SrcRegs |= 1 << 15;
res.SpecialKind = special_LoadLiteral;
if (res.Kind == tk_LDR_PCREL)
{
if (!Config::JIT_LiteralOptimisations)
res.SrcRegs |= 1 << 15;
res.SpecialKind = special_LoadLiteral;
}
else
{
res.SpecialKind = special_LoadMem;
}
}
if (res.Kind == tk_LDMIA || res.Kind == tk_POP)
@ -401,11 +407,17 @@ Info Decode(bool thumb, u32 num, u32 instr)
else if ((instr >> 28) == 0xF)
data = ak(ak_Nop);
if (data & A_UnkOnARM7 && num != 0)
if (data & A_UnkOnARM7 && num == 1)
data = A_UNK;
res.Kind = (data >> 22) & 0x1FF;
if (res.Kind >= ak_SMLAxy && res.Kind <= ak_SMULxy && num == 1)
{
data = ak(ak_Nop);
res.Kind = ak_Nop;
}
if (res.Kind == ak_MCR)
{
u32 cn = (instr >> 16) & 0xF;
@ -490,8 +502,13 @@ Info Decode(bool thumb, u32 num, u32 instr)
if (data & A_WriteMem)
res.SpecialKind = special_WriteMem;
if ((data & A_LoadMem) && res.SrcRegs == (1 << 15))
res.SpecialKind = special_LoadLiteral;
if (data & A_LoadMem)
{
if (res.SrcRegs == (1 << 15))
res.SpecialKind = special_LoadLiteral;
else
res.SpecialKind = special_LoadMem;
}
if (res.Kind == ak_LDM)
{

View File

@ -232,6 +232,7 @@ enum
{
special_NotSpecialAtAll = 0,
special_WriteMem,
special_LoadMem,
special_WaitForInterrupt,
special_LoadLiteral
};

View File

@ -55,9 +55,11 @@ if (ENABLE_JIT)
enable_language(ASM)
target_sources(core PRIVATE
ARMJIT.cpp
ARM_InstrInfo.cpp
ARMJIT.cpp
ARMJIT_Memory.cpp
dolphin/CommonFuncs.cpp
)
@ -85,6 +87,8 @@ if (ENABLE_JIT)
ARMJIT_A64/ARMJIT_ALU.cpp
ARMJIT_A64/ARMJIT_LoadStore.cpp
ARMJIT_A64/ARMJIT_Branch.cpp
ARMJIT_A64/ARMJIT_Linkage.s
)
endif()
endif()

View File

@ -22,6 +22,7 @@
#include "DSi.h"
#include "ARM.h"
#include "ARMJIT.h"
#include "ARMJIT_Memory.h"
// access timing for cached regions
@ -42,8 +43,8 @@ void ARMv5::CP15Reset()
DTCMSetting = 0;
ITCMSetting = 0;
memset(ITCM, 0, 0x8000);
memset(DTCM, 0, 0x4000);
memset(ITCM, 0, ITCMPhysicalSize);
memset(DTCM, 0, DTCMPhysicalSize);
ITCMSize = 0;
DTCMBase = 0xFFFFFFFF;
@ -75,8 +76,8 @@ void ARMv5::CP15DoSavestate(Savestate* file)
file->Var32(&DTCMSetting);
file->Var32(&ITCMSetting);
file->VarArray(ITCM, 0x8000);
file->VarArray(DTCM, 0x4000);
file->VarArray(ITCM, ITCMPhysicalSize);
file->VarArray(DTCM, DTCMPhysicalSize);
file->Var32(&PU_CodeCacheable);
file->Var32(&PU_DataCacheable);
@ -98,36 +99,30 @@ void ARMv5::CP15DoSavestate(Savestate* file)
void ARMv5::UpdateDTCMSetting()
{
#ifdef JIT_ENABLED
u32 oldDTCMBase = DTCMBase;
u32 oldDTCMSize = DTCMSize;
#endif
u32 newDTCMBase;
u32 newDTCMSize;
if (CP15Control & (1<<16))
{
DTCMBase = DTCMSetting & 0xFFFFF000;
DTCMSize = 0x200 << ((DTCMSetting >> 1) & 0x1F);
newDTCMBase = DTCMSetting & 0xFFFFF000;
newDTCMSize = 0x200 << ((DTCMSetting >> 1) & 0x1F);
//printf("DTCM [%08X] enabled at %08X, size %X\n", DTCMSetting, DTCMBase, DTCMSize);
}
else
{
DTCMBase = 0xFFFFFFFF;
DTCMSize = 0;
newDTCMBase = 0xFFFFFFFF;
newDTCMSize = 0;
//printf("DTCM disabled\n");
}
#ifdef JIT_ENABLED
if (oldDTCMBase != DTCMBase || oldDTCMSize != DTCMSize)
if (newDTCMBase != DTCMBase || newDTCMSize != DTCMSize)
{
ARMJIT::UpdateMemoryStatus9(oldDTCMBase, oldDTCMBase + oldDTCMSize);
ARMJIT::UpdateMemoryStatus9(DTCMBase, DTCMBase + DTCMSize);
ARMJIT_Memory::RemapDTCM(newDTCMBase, newDTCMSize);
DTCMBase = newDTCMBase;
DTCMSize = newDTCMSize;
}
#endif
}
void ARMv5::UpdateITCMSetting()
{
#ifdef JIT_ENABLED
u32 oldITCMSize = ITCMSize;
#endif
if (CP15Control & (1<<18))
{
ITCMSize = 0x200 << ((ITCMSetting >> 1) & 0x1F);
@ -138,10 +133,6 @@ void ARMv5::UpdateITCMSetting()
ITCMSize = 0;
//printf("ITCM disabled\n");
}
#ifdef JIT_ENABLED
if (oldITCMSize != ITCMSize)
ARMJIT::UpdateMemoryStatus9(0, std::max(oldITCMSize, ITCMSize));
#endif
}
@ -581,12 +572,15 @@ void ARMv5::CP15Write(u32 id, u32 val)
case 0x750:
ICacheInvalidateAll();
//Halt(255);
return;
case 0x751:
ICacheInvalidateByAddr(val);
//Halt(255);
return;
case 0x752:
printf("CP15: ICACHE INVALIDATE WEIRD. %08X\n", val);
//Halt(255);
return;
@ -723,7 +717,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
if (addr < ITCMSize)
{
CodeCycles = 1;
return *(u32*)&ITCM[addr & 0x7FFF];
return *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
}
CodeCycles = RegionCodeCycles;
@ -750,13 +744,13 @@ void ARMv5::DataRead8(u32 addr, u32* val)
if (addr < ITCMSize)
{
DataCycles = 1;
*val = *(u8*)&ITCM[addr & 0x7FFF];
*val = *(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
*val = *(u8*)&DTCM[(addr - DTCMBase) & 0x3FFF];
*val = *(u8*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)];
return;
}
@ -773,13 +767,13 @@ void ARMv5::DataRead16(u32 addr, u32* val)
if (addr < ITCMSize)
{
DataCycles = 1;
*val = *(u16*)&ITCM[addr & 0x7FFF];
*val = *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
*val = *(u16*)&DTCM[(addr - DTCMBase) & 0x3FFF];
*val = *(u16*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)];
return;
}
@ -796,13 +790,13 @@ void ARMv5::DataRead32(u32 addr, u32* val)
if (addr < ITCMSize)
{
DataCycles = 1;
*val = *(u32*)&ITCM[addr & 0x7FFF];
*val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
*val = *(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF];
*val = *(u32*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)];
return;
}
@ -817,13 +811,13 @@ void ARMv5::DataRead32S(u32 addr, u32* val)
if (addr < ITCMSize)
{
DataCycles += 1;
*val = *(u32*)&ITCM[addr & 0x7FFF];
*val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)];
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles += 1;
*val = *(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF];
*val = *(u32*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)];
return;
}
@ -838,16 +832,16 @@ void ARMv5::DataWrite8(u32 addr, u8 val)
if (addr < ITCMSize)
{
DataCycles = 1;
*(u8*)&ITCM[addr & 0x7FFF] = val;
*(u8*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
#ifdef JIT_ENABLED
ARMJIT::InvalidateITCMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
#endif
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
*(u8*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
*(u8*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)] = val;
return;
}
@ -864,16 +858,16 @@ void ARMv5::DataWrite16(u32 addr, u16 val)
if (addr < ITCMSize)
{
DataCycles = 1;
*(u16*)&ITCM[addr & 0x7FFF] = val;
*(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
#ifdef JIT_ENABLED
ARMJIT::InvalidateITCMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
#endif
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
*(u16*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
*(u16*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)] = val;
return;
}
@ -890,16 +884,16 @@ void ARMv5::DataWrite32(u32 addr, u32 val)
if (addr < ITCMSize)
{
DataCycles = 1;
*(u32*)&ITCM[addr & 0x7FFF] = val;
*(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
#ifdef JIT_ENABLED
ARMJIT::InvalidateITCMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
#endif
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles = 1;
*(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
*(u32*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)] = val;
return;
}
@ -914,16 +908,16 @@ void ARMv5::DataWrite32S(u32 addr, u32 val)
if (addr < ITCMSize)
{
DataCycles += 1;
*(u32*)&ITCM[addr & 0x7FFF] = val;
*(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)] = val;
#ifdef JIT_ENABLED
ARMJIT::InvalidateITCMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr);
#endif
return;
}
if (addr >= DTCMBase && addr < (DTCMBase + DTCMSize))
{
DataCycles += 1;
*(u32*)&DTCM[(addr - DTCMBase) & 0x3FFF] = val;
*(u32*)&DTCM[(addr - DTCMBase) & (DTCMPhysicalSize - 1)] = val;
return;
}

View File

@ -47,8 +47,9 @@ int JIT_LiteralOptimisations = true;
#ifdef JIT_ENABLED
int JIT_Enable = false;
int JIT_MaxBlockSize = 32;
int JIT_BrancheOptimisations = 2;
int JIT_BrancheOptimisations = true;
int JIT_LiteralOptimisations = true;
int JIT_FastMemory = true;
#endif
ConfigEntry ConfigFile[] =
@ -72,8 +73,9 @@ ConfigEntry ConfigFile[] =
#ifdef JIT_ENABLED
{"JIT_Enable", 0, &JIT_Enable, 0, NULL, 0},
{"JIT_MaxBlockSize", 0, &JIT_MaxBlockSize, 32, NULL, 0},
{"JIT_BranchOptimisations", 0, &JIT_BrancheOptimisations, 2, NULL, 0},
{"JIT_BranchOptimisations", 0, &JIT_BrancheOptimisations, 1, NULL, 0},
{"JIT_LiteralOptimisations", 0, &JIT_LiteralOptimisations, 1, NULL, 0},
{"JIT_FastMem", 0, &JIT_FastMemory, 1, NULL, 0},
#endif
{"", -1, NULL, 0, NULL, 0}

View File

@ -63,6 +63,7 @@ extern int JIT_Enable;
extern int JIT_MaxBlockSize;
extern int JIT_BrancheOptimisations;
extern int JIT_LiteralOptimisations;
extern int JIT_FastMemory;
#endif
}

View File

@ -33,6 +33,7 @@
#include "AREngine.h"
#include "Platform.h"
#include "ARMJIT.h"
#include "ARMJIT_Memory.h"
#include "DSi.h"
#include "DSi_SPI_TSC.h"
@ -94,17 +95,17 @@ u32 CPUStop;
u8 ARM9BIOS[0x1000];
u8 ARM7BIOS[0x4000];
u8 MainRAM[0x1000000];
u8* MainRAM;
u32 MainRAMMask;
u8 SharedWRAM[0x8000];
u8* SharedWRAM;
u8 WRAMCnt;
u8* SWRAM_ARM9;
u8* SWRAM_ARM7;
u32 SWRAM_ARM9Mask;
u32 SWRAM_ARM7Mask;
u8 ARM7WRAM[0x10000];
// putting them together so they're always next to each other
MemRegion SWRAM_ARM9;
MemRegion SWRAM_ARM7;
u8* ARM7WRAM;
u16 ExMemCnt[2];
@ -171,6 +172,10 @@ bool Init()
#ifdef JIT_ENABLED
ARMJIT::Init();
#else
MainRAM = new u8[MainRAMSize];
ARM7WRAM = new u8[ARM7WRAMSize];
SharedWRAM = new u8[SharedWRAMSize];
#endif
DMAs[0] = new DMA(0, 0);
@ -485,6 +490,10 @@ void Reset()
printf("ARM7 BIOS loaded\n");
fclose(f);
}
#ifdef JIT_ENABLED
ARMJIT::Reset();
#endif
if (ConsoleType == 1)
{
@ -510,7 +519,7 @@ void Reset()
InitTimings();
memset(MainRAM, 0, 0x1000000);
memset(MainRAM, 0, MainRAMMask + 1);
memset(SharedWRAM, 0, 0x8000);
memset(ARM7WRAM, 0, 0x10000);
@ -587,10 +596,6 @@ void Reset()
}
AREngine::Reset();
#ifdef JIT_ENABLED
ARMJIT::Reset();
#endif
}
void Stop()
@ -705,7 +710,7 @@ bool DoSavestate(Savestate* file)
file->VarArray(MainRAM, 0x400000);
file->VarArray(SharedWRAM, 0x8000);
file->VarArray(ARM7WRAM, 0x10000);
file->VarArray(ARM7WRAM, ARM7WRAMSize);
file->VarArray(ExMemCnt, 2*sizeof(u16));
file->VarArray(ROMSeed0, 2*8);
@ -1128,43 +1133,40 @@ void MapSharedWRAM(u8 val)
if (val == WRAMCnt)
return;
ARMJIT_Memory::RemapSWRAM();
WRAMCnt = val;
switch (WRAMCnt & 0x3)
{
case 0:
SWRAM_ARM9 = &SharedWRAM[0];
SWRAM_ARM9Mask = 0x7FFF;
SWRAM_ARM7 = NULL;
SWRAM_ARM7Mask = 0;
SWRAM_ARM9.Mem = &SharedWRAM[0];
SWRAM_ARM9.Mask = 0x7FFF;
SWRAM_ARM7.Mem = NULL;
SWRAM_ARM7.Mask = 0;
break;
case 1:
SWRAM_ARM9 = &SharedWRAM[0x4000];
SWRAM_ARM9Mask = 0x3FFF;
SWRAM_ARM7 = &SharedWRAM[0];
SWRAM_ARM7Mask = 0x3FFF;
SWRAM_ARM9.Mem = &SharedWRAM[0x4000];
SWRAM_ARM9.Mask = 0x3FFF;
SWRAM_ARM7.Mem = &SharedWRAM[0];
SWRAM_ARM7.Mask = 0x3FFF;
break;
case 2:
SWRAM_ARM9 = &SharedWRAM[0];
SWRAM_ARM9Mask = 0x3FFF;
SWRAM_ARM7 = &SharedWRAM[0x4000];
SWRAM_ARM7Mask = 0x3FFF;
SWRAM_ARM9.Mem = &SharedWRAM[0];
SWRAM_ARM9.Mask = 0x3FFF;
SWRAM_ARM7.Mem = &SharedWRAM[0x4000];
SWRAM_ARM7.Mask = 0x3FFF;
break;
case 3:
SWRAM_ARM9 = NULL;
SWRAM_ARM9Mask = 0;
SWRAM_ARM7 = &SharedWRAM[0];
SWRAM_ARM7Mask = 0x7FFF;
SWRAM_ARM9.Mem = NULL;
SWRAM_ARM9.Mask = 0;
SWRAM_ARM7.Mem = &SharedWRAM[0];
SWRAM_ARM7.Mask = 0x7FFF;
break;
}
#ifdef JIT_ENABLED
ARMJIT::UpdateMemoryStatus9(0x3000000, 0x3000000 + 0x1000000);
ARMJIT::UpdateMemoryStatus7(0x3000000, 0x3000000 + 0x1000000);
#endif
}
@ -1835,12 +1837,12 @@ u8 ARM9Read8(u32 addr)
switch (addr & 0xFF000000)
{
case 0x02000000:
return *(u8*)&MainRAM[addr & MainRAMMask];
return *(u8*)&MainRAM[addr & (MainRAMSize - 1)];
case 0x03000000:
if (SWRAM_ARM9)
if (SWRAM_ARM9.Mem)
{
return *(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
return *(u8*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask];
}
else
{
@ -1900,12 +1902,12 @@ u16 ARM9Read16(u32 addr)
switch (addr & 0xFF000000)
{
case 0x02000000:
return *(u16*)&MainRAM[addr & MainRAMMask];
return *(u16*)&MainRAM[addr & (MainRAMSize - 1)];
case 0x03000000:
if (SWRAM_ARM9)
if (SWRAM_ARM9.Mem)
{
return *(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
return *(u16*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask];
}
else
{
@ -1968,9 +1970,9 @@ u32 ARM9Read32(u32 addr)
return *(u32*)&MainRAM[addr & MainRAMMask];
case 0x03000000:
if (SWRAM_ARM9)
if (SWRAM_ARM9.Mem)
{
return *(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask];
return *(u32*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask];
}
else
{
@ -2026,7 +2028,7 @@ void ARM9Write8(u32 addr, u8 val)
{
case 0x02000000:
#ifdef JIT_ENABLED
ARMJIT::InvalidateMainRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u8*)&MainRAM[addr & MainRAMMask] = val;
#ifdef JIT_ENABLED
@ -2035,12 +2037,12 @@ void ARM9Write8(u32 addr, u8 val)
return;
case 0x03000000:
if (SWRAM_ARM9)
if (SWRAM_ARM9.Mem)
{
#ifdef JIT_ENABLED
ARMJIT::InvalidateSWRAM9IfNecessary(addr);
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_SWRAM>(addr);
#endif
*(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
*(u8*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask] = val;
}
return;
@ -2085,7 +2087,7 @@ void ARM9Write16(u32 addr, u16 val)
{
case 0x02000000:
#ifdef JIT_ENABLED
ARMJIT::InvalidateMainRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u16*)&MainRAM[addr & MainRAMMask] = val;
#ifdef JIT_ENABLED
@ -2094,12 +2096,12 @@ void ARM9Write16(u32 addr, u16 val)
return;
case 0x03000000:
if (SWRAM_ARM9)
if (SWRAM_ARM9.Mem)
{
#ifdef JIT_ENABLED
ARMJIT::InvalidateSWRAM9IfNecessary(addr);
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_SWRAM>(addr);
#endif
*(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
*(u16*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask] = val;
}
return;
@ -2113,18 +2115,16 @@ void ARM9Write16(u32 addr, u16 val)
return;
case 0x06000000:
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr);
#endif
switch (addr & 0x00E00000)
{
case 0x00000000: GPU::WriteVRAM_ABG<u16>(addr, val); return;
case 0x00200000: GPU::WriteVRAM_BBG<u16>(addr, val); return;
case 0x00400000: GPU::WriteVRAM_AOBJ<u16>(addr, val); return;
case 0x00600000: GPU::WriteVRAM_BOBJ<u16>(addr, val); return;
default:
#ifdef JIT_ENABLED
ARMJIT::InvalidateLCDCIfNecessary(addr);
#endif
GPU::WriteVRAM_LCDC<u16>(addr, val);
return;
default: GPU::WriteVRAM_LCDC<u16>(addr, val); return;
}
case 0x07000000:
@ -2165,7 +2165,7 @@ void ARM9Write32(u32 addr, u32 val)
{
case 0x02000000:
#ifdef JIT_ENABLED
ARMJIT::InvalidateMainRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u32*)&MainRAM[addr & MainRAMMask] = val;
#ifdef JIT_ENABLED
@ -2174,12 +2174,12 @@ void ARM9Write32(u32 addr, u32 val)
return ;
case 0x03000000:
if (SWRAM_ARM9)
if (SWRAM_ARM9.Mem)
{
#ifdef JIT_ENABLED
ARMJIT::InvalidateSWRAM9IfNecessary(addr);
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_SWRAM>(addr);
#endif
*(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
*(u32*)&SWRAM_ARM9.Mem[addr & SWRAM_ARM9.Mask] = val;
}
return;
@ -2193,18 +2193,16 @@ void ARM9Write32(u32 addr, u32 val)
return;
case 0x06000000:
#ifdef JIT_ENABLED
ARMJIT::CheckAndInvalidate<0, ARMJIT_Memory::memregion_VRAM>(addr);
#endif
switch (addr & 0x00E00000)
{
case 0x00000000: GPU::WriteVRAM_ABG<u32>(addr, val); return;
case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); return;
case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); return;
case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); return;
default:
#ifdef JIT_ENABLED
ARMJIT::InvalidateLCDCIfNecessary(addr);
#endif
GPU::WriteVRAM_LCDC<u32>(addr, val);
return;
default: GPU::WriteVRAM_LCDC<u32>(addr, val); return;
}
case 0x07000000:
@ -2250,10 +2248,10 @@ bool ARM9GetMemRegion(u32 addr, bool write, MemRegion* region)
return true;
case 0x03000000:
if (SWRAM_ARM9)
if (SWRAM_ARM9.Mem)
{
region->Mem = SWRAM_ARM9;
region->Mask = SWRAM_ARM9Mask;
region->Mem = SWRAM_ARM9.Mem;
region->Mask = SWRAM_ARM9.Mask;
return true;
}
break;
@ -2292,17 +2290,17 @@ u8 ARM7Read8(u32 addr)
return *(u8*)&MainRAM[addr & MainRAMMask];
case 0x03000000:
if (SWRAM_ARM7)
if (SWRAM_ARM7.Mem)
{
return *(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
return *(u8*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask];
}
else
{
return *(u8*)&ARM7WRAM[addr & 0xFFFF];
return *(u8*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
}
case 0x03800000:
return *(u8*)&ARM7WRAM[addr & 0xFFFF];
return *(u8*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
case 0x04000000:
return ARM7IORead8(addr);
@ -2352,17 +2350,17 @@ u16 ARM7Read16(u32 addr)
return *(u16*)&MainRAM[addr & MainRAMMask];
case 0x03000000:
if (SWRAM_ARM7)
if (SWRAM_ARM7.Mem)
{
return *(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
return *(u16*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask];
}
else
{
return *(u16*)&ARM7WRAM[addr & 0xFFFF];
return *(u16*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
}
case 0x03800000:
return *(u16*)&ARM7WRAM[addr & 0xFFFF];
return *(u16*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
case 0x04000000:
return ARM7IORead16(addr);
@ -2419,17 +2417,17 @@ u32 ARM7Read32(u32 addr)
return *(u32*)&MainRAM[addr & MainRAMMask];
case 0x03000000:
if (SWRAM_ARM7)
if (SWRAM_ARM7.Mem)
{
return *(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask];
return *(u32*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask];
}
else
{
return *(u32*)&ARM7WRAM[addr & 0xFFFF];
return *(u32*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
}
case 0x03800000:
return *(u32*)&ARM7WRAM[addr & 0xFFFF];
return *(u32*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)];
case 0x04000000:
return ARM7IORead32(addr);
@ -2474,7 +2472,7 @@ void ARM7Write8(u32 addr, u8 val)
case 0x02000000:
case 0x02800000:
#ifdef JIT_ENABLED
ARMJIT::InvalidateMainRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u8*)&MainRAM[addr & MainRAMMask] = val;
#ifdef JIT_ENABLED
@ -2483,28 +2481,28 @@ void ARM7Write8(u32 addr, u8 val)
return;
case 0x03000000:
if (SWRAM_ARM7)
if (SWRAM_ARM7.Mem)
{
#ifdef JIT_ENABLED
ARMJIT::InvalidateSWRAM7IfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_SWRAM>(addr);
#endif
*(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
*(u8*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask] = val;
return;
}
else
{
#ifdef JIT_ENABLED
ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
*(u8*)&ARM7WRAM[addr & 0xFFFF] = val;
*(u8*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
}
case 0x03800000:
#ifdef JIT_ENABLED
ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
*(u8*)&ARM7WRAM[addr & 0xFFFF] = val;
*(u8*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
case 0x04000000:
@ -2514,7 +2512,7 @@ void ARM7Write8(u32 addr, u8 val)
case 0x06000000:
case 0x06800000:
#ifdef JIT_ENABLED
ARMJIT::InvalidateARM7WVRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr);
#endif
GPU::WriteVRAM_ARM7<u8>(addr, val);
return;
@ -2551,7 +2549,7 @@ void ARM7Write16(u32 addr, u16 val)
case 0x02000000:
case 0x02800000:
#ifdef JIT_ENABLED
ARMJIT::InvalidateMainRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u16*)&MainRAM[addr & MainRAMMask] = val;
#ifdef JIT_ENABLED
@ -2560,28 +2558,28 @@ void ARM7Write16(u32 addr, u16 val)
return;
case 0x03000000:
if (SWRAM_ARM7)
if (SWRAM_ARM7.Mem)
{
#ifdef JIT_ENABLED
ARMJIT::InvalidateSWRAM7IfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_SWRAM>(addr);
#endif
*(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
*(u16*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask] = val;
return;
}
else
{
#ifdef JIT_ENABLED
ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
*(u16*)&ARM7WRAM[addr & 0xFFFF] = val;
*(u16*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
}
case 0x03800000:
#ifdef JIT_ENABLED
ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
*(u16*)&ARM7WRAM[addr & 0xFFFF] = val;
*(u16*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
case 0x04000000:
@ -2599,7 +2597,7 @@ void ARM7Write16(u32 addr, u16 val)
case 0x06000000:
case 0x06800000:
#ifdef JIT_ENABLED
ARMJIT::InvalidateARM7WVRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr);
#endif
GPU::WriteVRAM_ARM7<u16>(addr, val);
return;
@ -2638,7 +2636,7 @@ void ARM7Write32(u32 addr, u32 val)
case 0x02000000:
case 0x02800000:
#ifdef JIT_ENABLED
ARMJIT::InvalidateMainRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_MainRAM>(addr);
#endif
*(u32*)&MainRAM[addr & MainRAMMask] = val;
#ifdef JIT_ENABLED
@ -2647,28 +2645,28 @@ void ARM7Write32(u32 addr, u32 val)
return;
case 0x03000000:
if (SWRAM_ARM7)
if (SWRAM_ARM7.Mem)
{
#ifdef JIT_ENABLED
ARMJIT::InvalidateSWRAM7IfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_SWRAM>(addr);
#endif
*(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
*(u32*)&SWRAM_ARM7.Mem[addr & SWRAM_ARM7.Mask] = val;
return;
}
else
{
#ifdef JIT_ENABLED
ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
*(u32*)&ARM7WRAM[addr & 0xFFFF] = val;
*(u32*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
}
case 0x03800000:
#ifdef JIT_ENABLED
ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_WRAM7>(addr);
#endif
*(u32*)&ARM7WRAM[addr & 0xFFFF] = val;
*(u32*)&ARM7WRAM[addr & (ARM7WRAMSize - 1)] = val;
return;
case 0x04000000:
@ -2687,7 +2685,7 @@ void ARM7Write32(u32 addr, u32 val)
case 0x06000000:
case 0x06800000:
#ifdef JIT_ENABLED
ARMJIT::InvalidateARM7WVRAMIfNecessary(addr);
ARMJIT::CheckAndInvalidate<1, ARMJIT_Memory::memregion_VWRAM>(addr);
#endif
GPU::WriteVRAM_ARM7<u32>(addr, val);
return;
@ -2736,17 +2734,17 @@ bool ARM7GetMemRegion(u32 addr, bool write, MemRegion* region)
// then access all the WRAM as one contiguous block starting at 0x037F8000
// this case needs a bit of a hack to cover
// it's not really worth bothering anyway
if (!SWRAM_ARM7)
if (!SWRAM_ARM7.Mem)
{
region->Mem = ARM7WRAM;
region->Mask = 0xFFFF;
region->Mask = ARM7WRAMSize-1;
return true;
}
break;
case 0x03800000:
region->Mem = ARM7WRAM;
region->Mask = 0xFFFF;
region->Mask = ARM7WRAMSize-1;
return true;
}

View File

@ -134,6 +134,7 @@ typedef struct
} MemRegion;
extern int ConsoleType;
extern int CurCPU;
extern u8 ARM9MemTimings[0x40000][4];
extern u8 ARM7MemTimings[0x20000][4];
@ -161,20 +162,20 @@ extern u8 ARM9BIOS[0x1000];
extern u8 ARM7BIOS[0x4000];
extern u16 ARM7BIOSProt;
extern u8 MainRAM[0x1000000];
extern u8* MainRAM;
extern u32 MainRAMMask;
extern u8 SharedWRAM[0x8000];
extern u8* SWRAM_ARM9;
extern u8* SWRAM_ARM7;
extern u32 SWRAM_ARM9Mask;
extern u32 SWRAM_ARM7Mask;
extern u8 ARM7WRAM[0x10000];
const u32 SharedWRAMSize = 0x8000;
extern u8* SharedWRAM;
extern MemRegion SWRAM_ARM9;
extern MemRegion SWRAM_ARM7;
extern u32 KeyInput;
const u32 ARM7WRAMSize = 0x10000;
extern u8* ARM7WRAM;
bool Init();
void DeInit();
void Reset();