rewrite JIT memory emulation
This commit is contained in:
parent
bcc4b5c8dd
commit
0f53a34551
10
src/ARM.cpp
10
src/ARM.cpp
|
@ -579,7 +579,8 @@ void ARMv5::ExecuteJIT()
|
|||
while (NDS::ARM9Timestamp < NDS::ARM9Target)
|
||||
{
|
||||
u32 instrAddr = R[15] - ((CPSR&0x20)?2:4);
|
||||
if (!ARMJIT::IsMapped<0>(instrAddr))
|
||||
u32 translatedAddr = ARMJIT::TranslateAddr9(instrAddr);
|
||||
if (!translatedAddr)
|
||||
{
|
||||
NDS::ARM9Timestamp = NDS::ARM9Target;
|
||||
printf("ARMv5 PC in non executable region %08X\n", R[15]);
|
||||
|
@ -589,7 +590,7 @@ void ARMv5::ExecuteJIT()
|
|||
// hack so Cycles <= 0 becomes Cycles < 0
|
||||
Cycles = NDS::ARM9Target - NDS::ARM9Timestamp - 1;
|
||||
|
||||
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry(ARMJIT::TranslateAddr<0>(instrAddr));
|
||||
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry<0>(translatedAddr);
|
||||
if (block)
|
||||
ARM_Dispatch(this, block);
|
||||
else
|
||||
|
@ -722,7 +723,8 @@ void ARMv4::ExecuteJIT()
|
|||
while (NDS::ARM7Timestamp < NDS::ARM7Target)
|
||||
{
|
||||
u32 instrAddr = R[15] - ((CPSR&0x20)?2:4);
|
||||
if (!ARMJIT::IsMapped<1>(instrAddr))
|
||||
u32 translatedAddr = ARMJIT::TranslateAddr7(instrAddr);
|
||||
if (!translatedAddr)
|
||||
{
|
||||
NDS::ARM7Timestamp = NDS::ARM7Target;
|
||||
printf("ARMv4 PC in non executable region %08X\n", R[15]);
|
||||
|
@ -731,7 +733,7 @@ void ARMv4::ExecuteJIT()
|
|||
|
||||
Cycles = NDS::ARM7Target - NDS::ARM7Timestamp - 1;
|
||||
|
||||
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry(ARMJIT::TranslateAddr<1>(instrAddr));
|
||||
ARMJIT::JitBlockEntry block = ARMJIT::LookUpBlockEntry<1>(translatedAddr);
|
||||
if (block)
|
||||
ARM_Dispatch(this, block);
|
||||
else
|
||||
|
|
24
src/ARM.h
24
src/ARM.h
|
@ -308,7 +308,7 @@ public:
|
|||
void DataRead8(u32 addr, u32* val)
|
||||
{
|
||||
*val = NDS::ARM7Read8(addr);
|
||||
DataRegion = addr >> 20;
|
||||
DataRegion = addr;
|
||||
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
|
||||
}
|
||||
|
||||
|
@ -317,7 +317,7 @@ public:
|
|||
addr &= ~1;
|
||||
|
||||
*val = NDS::ARM7Read16(addr);
|
||||
DataRegion = addr >> 20;
|
||||
DataRegion = addr;
|
||||
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
|
||||
}
|
||||
|
||||
|
@ -326,7 +326,7 @@ public:
|
|||
addr &= ~3;
|
||||
|
||||
*val = NDS::ARM7Read32(addr);
|
||||
DataRegion = addr >> 20;
|
||||
DataRegion = addr;
|
||||
DataCycles = NDS::ARM7MemTimings[addr >> 15][2];
|
||||
}
|
||||
|
||||
|
@ -341,7 +341,7 @@ public:
|
|||
void DataWrite8(u32 addr, u8 val)
|
||||
{
|
||||
NDS::ARM7Write8(addr, val);
|
||||
DataRegion = addr >> 20;
|
||||
DataRegion = addr;
|
||||
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
|
||||
}
|
||||
|
||||
|
@ -350,7 +350,7 @@ public:
|
|||
addr &= ~1;
|
||||
|
||||
NDS::ARM7Write16(addr, val);
|
||||
DataRegion = addr >> 20;
|
||||
DataRegion = addr;
|
||||
DataCycles = NDS::ARM7MemTimings[addr >> 15][0];
|
||||
}
|
||||
|
||||
|
@ -359,7 +359,7 @@ public:
|
|||
addr &= ~3;
|
||||
|
||||
NDS::ARM7Write32(addr, val);
|
||||
DataRegion = addr >> 20;
|
||||
DataRegion = addr;
|
||||
DataCycles = NDS::ARM7MemTimings[addr >> 15][2];
|
||||
}
|
||||
|
||||
|
@ -390,7 +390,7 @@ public:
|
|||
s32 numC = NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2];
|
||||
s32 numD = DataCycles;
|
||||
|
||||
if ((DataRegion >> 4) == 0x02) // mainRAM
|
||||
if ((DataRegion >> 24) == 0x02) // mainRAM
|
||||
{
|
||||
if (CodeRegion == 0x02)
|
||||
Cycles -= numC + numD;
|
||||
|
@ -417,7 +417,7 @@ public:
|
|||
s32 numC = NDS::ARM7MemTimings[CodeCycles][(CPSR&0x20)?0:2];
|
||||
s32 numD = DataCycles;
|
||||
|
||||
if ((DataRegion >> 4) == 0x02)
|
||||
if ((DataRegion >> 24) == 0x02)
|
||||
{
|
||||
if (CodeRegion == 0x02)
|
||||
Cycles -= numC + numD;
|
||||
|
@ -443,4 +443,12 @@ void T_UNK(ARM* cpu);
|
|||
|
||||
}
|
||||
|
||||
namespace NDS
|
||||
{
|
||||
|
||||
extern ARMv5* ARM9;
|
||||
extern ARMv4* ARM7;
|
||||
|
||||
}
|
||||
|
||||
#endif // ARM_H
|
||||
|
|
899
src/ARMJIT.cpp
899
src/ARMJIT.cpp
File diff suppressed because it is too large
Load Diff
65
src/ARMJIT.h
65
src/ARMJIT.h
|
@ -28,45 +28,60 @@ extern const u32 ExeMemRegionSizes[];
|
|||
|
||||
typedef u32 (*JitBlockEntry)();
|
||||
|
||||
extern u32 AddrTranslate9[0x2000];
|
||||
extern u32 AddrTranslate7[0x4000];
|
||||
|
||||
const u32 ExeMemSpaceSize = 0x518000; // I hate you C++, sometimes I really hate you...
|
||||
|
||||
template <u32 num>
|
||||
inline bool IsMapped(u32 addr)
|
||||
{
|
||||
if (num == 0)
|
||||
return AddrTranslate9[(addr & 0xFFFFFFF) >> 15] >= ExeMemRegionSizes[exeMem_Unmapped];
|
||||
else
|
||||
return AddrTranslate7[(addr & 0xFFFFFFF) >> 14] >= ExeMemRegionSizes[exeMem_Unmapped];
|
||||
}
|
||||
|
||||
template <u32 num>
|
||||
inline u32 TranslateAddr(u32 addr)
|
||||
{
|
||||
if (num == 0)
|
||||
return AddrTranslate9[(addr & 0xFFFFFFF) >> 15] + (addr & 0x7FFF);
|
||||
else
|
||||
return AddrTranslate7[(addr & 0xFFFFFFF) >> 14] + (addr & 0x3FFF);
|
||||
}
|
||||
u32 TranslateAddr9(u32 addr);
|
||||
u32 TranslateAddr7(u32 addr);
|
||||
|
||||
template <u32 Num>
|
||||
JitBlockEntry LookUpBlockEntry(u32 addr);
|
||||
|
||||
|
||||
void Init();
|
||||
void DeInit();
|
||||
|
||||
void InvalidateByAddr(u32 pseudoPhysical, bool mayRestore = true);
|
||||
void InvalidateAll();
|
||||
void Reset();
|
||||
|
||||
void InvalidateITCM(u32 addr);
|
||||
void InvalidateByAddr7(u32 addr);
|
||||
void InvalidateByAddr(u32 pseudoPhysical);
|
||||
|
||||
void InvalidateRegionIfNecessary(u32 addr);
|
||||
|
||||
inline void InvalidateMainRAMIfNecessary(u32 addr)
|
||||
{
|
||||
InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_MainRAM] + (addr & (MAIN_RAM_SIZE - 1)));
|
||||
}
|
||||
inline void InvalidateITCMIfNecessary(u32 addr)
|
||||
{
|
||||
InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_ITCM] + (addr & 0x7FFF));
|
||||
}
|
||||
inline void InvalidateLCDCIfNecessary(u32 addr)
|
||||
{
|
||||
if (addr < 0x68A3FFF)
|
||||
InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_LCDC] + (addr - 0x6800000));
|
||||
}
|
||||
inline void InvalidateSWRAM7IfNecessary(u32 addr)
|
||||
{
|
||||
InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_SWRAM] + (NDS::SWRAM_ARM7 - NDS::SharedWRAM) + (addr & NDS::SWRAM_ARM7Mask));
|
||||
}
|
||||
inline void InvalidateSWRAM9IfNecessary(u32 addr)
|
||||
{
|
||||
InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_SWRAM] + (NDS::SWRAM_ARM9 - NDS::SharedWRAM) + (addr & NDS::SWRAM_ARM9Mask));
|
||||
}
|
||||
inline void InvalidateARM7WRAMIfNecessary(u32 addr)
|
||||
{
|
||||
InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_ARM7_WRAM] + (addr & 0xFFFF));
|
||||
}
|
||||
inline void InvalidateARM7WVRAMIfNecessary(u32 addr)
|
||||
{
|
||||
InvalidateRegionIfNecessary(ExeMemRegionOffsets[exeMem_ARM7_WVRAM] + (addr & 0x1FFFF));
|
||||
}
|
||||
|
||||
void CompileBlock(ARM* cpu);
|
||||
|
||||
void ResetBlockCache();
|
||||
|
||||
void UpdateMemoryStatus9(u32 start, u32 end);
|
||||
void UpdateMemoryStatus7(u32 start, u32 end);
|
||||
|
||||
}
|
||||
|
||||
extern "C" void ARM_Dispatch(ARM* cpu, ARMJIT::JitBlockEntry entry);
|
||||
|
|
|
@ -650,7 +650,7 @@ void Compiler::Comp_AddCycles_CDI()
|
|||
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
|
||||
s32 numD = CurInstr.DataCycles;
|
||||
|
||||
if ((CurInstr.DataRegion >> 4) == 0x02) // mainRAM
|
||||
if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM
|
||||
{
|
||||
if (CodeRegion == 0x02)
|
||||
cycles = numC + numD;
|
||||
|
@ -695,7 +695,7 @@ void Compiler::Comp_AddCycles_CD()
|
|||
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
|
||||
s32 numD = CurInstr.DataCycles;
|
||||
|
||||
if ((CurInstr.DataRegion >> 4) == 0x02)
|
||||
if ((CurInstr.DataRegion >> 24) == 0x02)
|
||||
{
|
||||
if (CodeRegion == 0x02)
|
||||
cycles += numC + numD;
|
||||
|
|
|
@ -152,30 +152,34 @@ struct __attribute__((packed)) TinyVector
|
|||
class JitBlock
|
||||
{
|
||||
public:
|
||||
JitBlock(u32 numInstrs, u32 numAddresses)
|
||||
JitBlock(u32 num, u32 literalHash, u32 numAddresses, u32 numLiterals)
|
||||
{
|
||||
NumInstrs = numInstrs;
|
||||
Num = num;
|
||||
NumAddresses = numAddresses;
|
||||
Data.SetLength(numInstrs + numAddresses);
|
||||
NumLiterals = numLiterals;
|
||||
Data.SetLength(numAddresses * 2 + numLiterals);
|
||||
}
|
||||
|
||||
u32 StartAddr;
|
||||
u32 PseudoPhysicalAddr;
|
||||
|
||||
u32 NumInstrs;
|
||||
u32 NumAddresses;
|
||||
u32 InstrHash, LiteralHash;
|
||||
u8 Num;
|
||||
u16 NumAddresses;
|
||||
u16 NumLiterals;
|
||||
|
||||
JitBlockEntry EntryPoint;
|
||||
|
||||
u32* Instrs()
|
||||
{ return &Data[0]; }
|
||||
u32* AddressRanges()
|
||||
{ return &Data[NumInstrs]; }
|
||||
{ return &Data[0]; }
|
||||
u32* AddressMasks()
|
||||
{ return &Data[NumAddresses]; }
|
||||
u32* Literals()
|
||||
{ return &Data[NumAddresses * 2]; }
|
||||
u32* Links()
|
||||
{ return &Data[NumInstrs + NumAddresses]; }
|
||||
{ return &Data[NumAddresses * 2 + NumLiterals]; }
|
||||
|
||||
u32 NumLinks()
|
||||
{ return Data.Length - NumInstrs - NumAddresses; }
|
||||
{ return Data.Length - NumAddresses * 2 - NumLiterals; }
|
||||
|
||||
void AddLink(u32 link)
|
||||
{
|
||||
|
@ -184,7 +188,7 @@ public:
|
|||
|
||||
void ResetLinks()
|
||||
{
|
||||
Data.SetLength(NumInstrs + NumAddresses);
|
||||
Data.SetLength(NumAddresses * 2 + NumLiterals);
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -200,8 +204,7 @@ private:
|
|||
struct __attribute__((packed)) AddressRange
|
||||
{
|
||||
TinyVector<JitBlock*> Blocks;
|
||||
u16 InvalidLiterals;
|
||||
u16 TimesInvalidated;
|
||||
u32 Code;
|
||||
};
|
||||
|
||||
extern AddressRange CodeRanges[ExeMemSpaceSize / 512];
|
||||
|
@ -210,14 +213,45 @@ typedef void (*InterpreterFunc)(ARM* cpu);
|
|||
extern InterpreterFunc InterpretARM[];
|
||||
extern InterpreterFunc InterpretTHUMB[];
|
||||
|
||||
extern u8 MemRegion9[0x80000];
|
||||
extern u8 MemRegion7[0x80000];
|
||||
extern u8 MemoryStatus9[0x800000];
|
||||
extern u8 MemoryStatus7[0x800000];
|
||||
|
||||
extern TinyVector<u32> InvalidLiterals;
|
||||
|
||||
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size);
|
||||
|
||||
template <u32 Num>
|
||||
void LinkBlock(ARM* cpu, u32 codeOffset);
|
||||
|
||||
enum
|
||||
{
|
||||
memregion_Other = 0,
|
||||
memregion_ITCM,
|
||||
memregion_DTCM,
|
||||
memregion_BIOS9,
|
||||
memregion_MainRAM,
|
||||
memregion_SWRAM9,
|
||||
memregion_SWRAM7,
|
||||
memregion_IO9,
|
||||
memregion_VRAM,
|
||||
memregion_BIOS7,
|
||||
memregion_WRAM7,
|
||||
memregion_IO7,
|
||||
memregion_Wifi,
|
||||
memregion_VWRAM,
|
||||
};
|
||||
|
||||
int ClassifyAddress9(u32 addr);
|
||||
int ClassifyAddress7(u32 addr);
|
||||
|
||||
template <typename T> T SlowRead9(ARMv5* cpu, u32 addr);
|
||||
template <typename T> void SlowWrite9(ARMv5* cpu, u32 addr, T val);
|
||||
template <typename T> T SlowRead7(u32 addr);
|
||||
template <typename T> void SlowWrite7(u32 addr, T val);
|
||||
|
||||
template <bool PreInc, bool Write> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu);
|
||||
template <bool PreInc, bool Write> void SlowBlockTransfer7(u32 addr, u64* data, u32 num);
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -95,20 +95,6 @@ public:
|
|||
LiteralsLoaded = 0;
|
||||
}
|
||||
|
||||
BitSet32 GetPushRegs()
|
||||
{
|
||||
BitSet16 used;
|
||||
for (int i = 0; i < InstrsCount; i++)
|
||||
used |= BitSet16(Instrs[i].Info.SrcRegs | Instrs[i].Info.DstRegs);
|
||||
|
||||
BitSet32 res;
|
||||
u32 registersMax = std::min((int)used.Count(), NativeRegsAvailable);
|
||||
for (int i = 0; i < registersMax; i++)
|
||||
res |= BitSet32(1 << (int)NativeRegAllocOrder[i]);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void Prepare(bool thumb, int i)
|
||||
{
|
||||
FetchedInstr instr = Instrs[i];
|
||||
|
@ -139,7 +125,6 @@ public:
|
|||
UnloadRegister(reg);
|
||||
|
||||
u16 necessaryRegs = ((instr.Info.SrcRegs & PCAllocatableAsSrc) | instr.Info.DstRegs) & ~instr.Info.NotStrictlyNeeded;
|
||||
u16 writeRegs = instr.Info.DstRegs & ~instr.Info.NotStrictlyNeeded;
|
||||
BitSet16 needToBeLoaded(necessaryRegs & ~LoadedRegs);
|
||||
if (needToBeLoaded != BitSet16(0))
|
||||
{
|
||||
|
@ -182,13 +167,12 @@ public:
|
|||
if (left-- == 0)
|
||||
break;
|
||||
|
||||
writeRegs |= (1 << reg) & instr.Info.DstRegs;
|
||||
LoadRegister(reg, !(thumb || instr.Cond() >= 0xE) || (1 << reg) & instr.Info.SrcRegs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DirtyRegs |= writeRegs & ~(1 << 15);
|
||||
DirtyRegs |= (LoadedRegs & instr.Info.DstRegs) & ~(1 << 15);
|
||||
}
|
||||
|
||||
static const Reg NativeRegAllocOrder[];
|
||||
|
|
|
@ -195,26 +195,6 @@ Compiler::Compiler()
|
|||
|
||||
Reset();
|
||||
|
||||
for (int i = 0; i < 3; i++)
|
||||
{
|
||||
for (int j = 0; j < 2; j++)
|
||||
MemoryFuncs9[i][j] = Gen_MemoryRoutine9(j, 8 << i);
|
||||
}
|
||||
MemoryFuncs7[0][0] = (void*)NDS::ARM7Read8;
|
||||
MemoryFuncs7[0][1] = (void*)NDS::ARM7Write8;
|
||||
MemoryFuncs7[1][0] = (void*)NDS::ARM7Read16;
|
||||
MemoryFuncs7[1][1] = (void*)NDS::ARM7Write16;
|
||||
MemoryFuncs7[2][0] = (void*)NDS::ARM7Read32;
|
||||
MemoryFuncs7[2][1] = (void*)NDS::ARM7Write32;
|
||||
|
||||
for (int i = 0; i < 2; i++)
|
||||
for (int j = 0; j < 2; j++)
|
||||
{
|
||||
MemoryFuncsSeq9[i][j] = Gen_MemoryRoutineSeq9(i, j);
|
||||
MemoryFuncsSeq7[i][j][0] = Gen_MemoryRoutineSeq7(i, j, false);
|
||||
MemoryFuncsSeq7[i][j][1] = Gen_MemoryRoutineSeq7(i, j, true);
|
||||
}
|
||||
|
||||
{
|
||||
// RSCRATCH mode
|
||||
// RSCRATCH2 reg number
|
||||
|
@ -317,6 +297,12 @@ Compiler::Compiler()
|
|||
// move the region forward to prevent overwriting the generated functions
|
||||
CodeMemSize -= GetWritableCodePtr() - ResetStart;
|
||||
ResetStart = GetWritableCodePtr();
|
||||
|
||||
NearStart = ResetStart;
|
||||
FarStart = ResetStart + 1024*1024*24;
|
||||
|
||||
NearSize = FarStart - ResetStart;
|
||||
FarSize = (ResetStart + CodeMemSize) - FarStart;
|
||||
}
|
||||
|
||||
void Compiler::LoadCPSR()
|
||||
|
@ -504,6 +490,9 @@ void Compiler::Reset()
|
|||
{
|
||||
memset(ResetStart, 0xcc, CodeMemSize);
|
||||
SetCodePtr(ResetStart);
|
||||
|
||||
NearCode = NearStart;
|
||||
FarCode = FarStart;
|
||||
}
|
||||
|
||||
void Compiler::Comp_SpecialBranchBehaviour(bool taken)
|
||||
|
@ -544,8 +533,16 @@ void Compiler::Comp_SpecialBranchBehaviour(bool taken)
|
|||
|
||||
JitBlockEntry Compiler::CompileBlock(u32 translatedAddr, ARM* cpu, bool thumb, FetchedInstr instrs[], int instrsCount)
|
||||
{
|
||||
if (CodeMemSize - (GetWritableCodePtr() - ResetStart) < 1024 * 32) // guess...
|
||||
if (NearSize - (NearCode - NearStart) < 1024 * 32) // guess...
|
||||
{
|
||||
printf("near reset\n");
|
||||
ResetBlockCache();
|
||||
}
|
||||
if (FarSize - (FarCode - FarStart) < 1024 * 32) // guess...
|
||||
{
|
||||
printf("far reset\n");
|
||||
ResetBlockCache();
|
||||
}
|
||||
|
||||
ConstantCycles = 0;
|
||||
Thumb = thumb;
|
||||
|
@ -762,12 +759,14 @@ void Compiler::Comp_AddCycles_CDI()
|
|||
Comp_AddCycles_CD();
|
||||
else
|
||||
{
|
||||
IrregularCycles = true;
|
||||
|
||||
s32 cycles;
|
||||
|
||||
s32 numC = NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2];
|
||||
s32 numD = CurInstr.DataCycles;
|
||||
|
||||
if ((CurInstr.DataRegion >> 4) == 0x02) // mainRAM
|
||||
if ((CurInstr.DataRegion >> 24) == 0x02) // mainRAM
|
||||
{
|
||||
if (CodeRegion == 0x02)
|
||||
cycles = numC + numD;
|
||||
|
|
|
@ -140,7 +140,7 @@ public:
|
|||
};
|
||||
void Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags);
|
||||
s32 Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
|
||||
void Comp_MemLoadLiteral(int size, int rd, u32 addr);
|
||||
bool Comp_MemLoadLiteral(int size, int rd, u32 addr);
|
||||
|
||||
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
|
||||
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
|
||||
|
@ -154,12 +154,6 @@ public:
|
|||
|
||||
void Comp_SpecialBranchBehaviour(bool taken);
|
||||
|
||||
void* Gen_MemoryRoutine9(bool store, int size);
|
||||
|
||||
void* Gen_MemoryRoutineSeq9(bool store, bool preinc);
|
||||
void* Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM);
|
||||
|
||||
void* Gen_ChangeCPSRRoutine();
|
||||
|
||||
Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed);
|
||||
Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
|
||||
|
@ -193,6 +187,26 @@ public:
|
|||
return (u8*)entry - ResetStart;
|
||||
}
|
||||
|
||||
void SwitchToNearCode()
|
||||
{
|
||||
FarCode = GetWritableCodePtr();
|
||||
SetCodePtr(NearCode);
|
||||
}
|
||||
|
||||
void SwitchToFarCode()
|
||||
{
|
||||
NearCode = GetWritableCodePtr();
|
||||
SetCodePtr(FarCode);
|
||||
}
|
||||
|
||||
u8* FarCode;
|
||||
u8* NearCode;
|
||||
u32 FarSize;
|
||||
u32 NearSize;
|
||||
|
||||
u8* NearStart;
|
||||
u8* FarStart;
|
||||
|
||||
u8* ResetStart;
|
||||
u32 CodeMemSize;
|
||||
|
||||
|
@ -201,12 +215,6 @@ public:
|
|||
|
||||
void* BranchStub[2];
|
||||
|
||||
void* MemoryFuncs9[3][2];
|
||||
void* MemoryFuncs7[3][2];
|
||||
|
||||
void* MemoryFuncsSeq9[2][2];
|
||||
void* MemoryFuncsSeq7[2][2][2];
|
||||
|
||||
void* ReadBanked;
|
||||
void* WriteBanked;
|
||||
|
||||
|
|
|
@ -25,236 +25,17 @@ int squeezePointer(T* ptr)
|
|||
improvement.
|
||||
*/
|
||||
|
||||
/*
|
||||
address - ABI_PARAM1 (a.k.a. ECX = RSCRATCH3 on Windows)
|
||||
store value - ABI_PARAM2 (a.k.a. RDX = RSCRATCH2 on Windows)
|
||||
*/
|
||||
void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
||||
bool Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
|
||||
{
|
||||
u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0));
|
||||
AlignCode4();
|
||||
void* res = GetWritableCodePtr();
|
||||
u32 translatedAddr = Num == 0 ? TranslateAddr9(addr) : TranslateAddr7(addr);
|
||||
|
||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||
SUB(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
|
||||
CMP(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMSize)));
|
||||
FixupBranch insideDTCM = J_CC(CC_B);
|
||||
|
||||
CMP(32, R(ABI_PARAM1), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
|
||||
FixupBranch insideITCM = J_CC(CC_B);
|
||||
|
||||
if (store)
|
||||
int invalidLiteralIdx = InvalidLiterals.Find(translatedAddr);
|
||||
if (invalidLiteralIdx != -1)
|
||||
{
|
||||
if (size > 8)
|
||||
AND(32, R(ABI_PARAM1), Imm32(addressMask));
|
||||
switch (size)
|
||||
{
|
||||
case 32: JMP((u8*)NDS::ARM9Write32, true); break;
|
||||
case 16: JMP((u8*)NDS::ARM9Write16, true); break;
|
||||
case 8: JMP((u8*)NDS::ARM9Write8, true); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (size == 32)
|
||||
{
|
||||
ABI_PushRegistersAndAdjustStack({ABI_PARAM1}, 8);
|
||||
AND(32, R(ABI_PARAM1), Imm32(addressMask));
|
||||
// everything's already in the appropriate register
|
||||
ABI_CallFunction(NDS::ARM9Read32);
|
||||
ABI_PopRegistersAndAdjustStack({ECX}, 8);
|
||||
AND(32, R(ECX), Imm8(3));
|
||||
SHL(32, R(ECX), Imm8(3));
|
||||
ROR_(32, R(RSCRATCH), R(ECX));
|
||||
RET();
|
||||
}
|
||||
else if (size == 16)
|
||||
{
|
||||
AND(32, R(ABI_PARAM1), Imm32(addressMask));
|
||||
JMP((u8*)NDS::ARM9Read16, true);
|
||||
}
|
||||
else
|
||||
JMP((u8*)NDS::ARM9Read8, true);
|
||||
InvalidLiterals.Remove(invalidLiteralIdx);
|
||||
return false;
|
||||
}
|
||||
|
||||
SetJumpTarget(insideDTCM);
|
||||
AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask));
|
||||
if (store)
|
||||
MOV(size, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM2));
|
||||
else
|
||||
{
|
||||
MOVZX(32, size, RSCRATCH, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)));
|
||||
if (size == 32)
|
||||
{
|
||||
if (ABI_PARAM1 != ECX)
|
||||
MOV(32, R(ECX), R(ABI_PARAM1));
|
||||
AND(32, R(ECX), Imm8(3));
|
||||
SHL(32, R(ECX), Imm8(3));
|
||||
ROR_(32, R(RSCRATCH), R(ECX));
|
||||
}
|
||||
}
|
||||
RET();
|
||||
|
||||
SetJumpTarget(insideITCM);
|
||||
MOV(32, R(ABI_PARAM3), R(ABI_PARAM1)); // free up ECX
|
||||
AND(32, R(ABI_PARAM3), Imm32(0x7FFF & addressMask));
|
||||
if (store)
|
||||
{
|
||||
MOV(size, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM2));
|
||||
|
||||
// if CodeRanges[pseudoPhysical/256].Blocks.Length > 0 we're writing into code!
|
||||
static_assert(sizeof(AddressRange) == 16);
|
||||
LEA(32, ABI_PARAM1, MDisp(ABI_PARAM3, ExeMemRegionOffsets[exeMem_ITCM]));
|
||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||
SHR(32, R(RSCRATCH), Imm8(9));
|
||||
SHL(32, R(RSCRATCH), Imm8(4));
|
||||
CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
||||
FixupBranch noCode = J_CC(CC_Z);
|
||||
JMP((u8*)InvalidateByAddr, true);
|
||||
SetJumpTarget(noCode);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVZX(32, size, RSCRATCH, MComplex(RCPU, ABI_PARAM3, SCALE_1, offsetof(ARMv5, ITCM)));
|
||||
if (size == 32)
|
||||
{
|
||||
if (ABI_PARAM1 != ECX)
|
||||
MOV(32, R(ECX), R(ABI_PARAM1));
|
||||
AND(32, R(ECX), Imm8(3));
|
||||
SHL(32, R(ECX), Imm8(3));
|
||||
ROR_(32, R(RSCRATCH), R(ECX));
|
||||
}
|
||||
}
|
||||
RET();
|
||||
|
||||
static_assert(RSCRATCH == EAX, "Someone changed RSCRATCH!");
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#define MEMORY_SEQ_WHILE_COND \
|
||||
if (!store) \
|
||||
MOV(32, currentElement, R(EAX));\
|
||||
if (!preinc) \
|
||||
ADD(32, R(ABI_PARAM1), Imm8(4)); \
|
||||
\
|
||||
SUB(32, R(ABI_PARAM3), Imm8(1)); \
|
||||
J_CC(CC_NZ, repeat);
|
||||
|
||||
/*
|
||||
ABI_PARAM1 address
|
||||
ABI_PARAM2 address where registers are stored
|
||||
ABI_PARAM3 how many values to read/write
|
||||
|
||||
Dolphin x64CodeEmitter is my favourite assembler
|
||||
*/
|
||||
void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
||||
{
|
||||
void* res = (void*)GetWritableCodePtr();
|
||||
|
||||
const u8* repeat = GetCodePtr();
|
||||
|
||||
if (preinc)
|
||||
ADD(32, R(ABI_PARAM1), Imm8(4));
|
||||
|
||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||
SUB(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
|
||||
CMP(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMSize)));
|
||||
FixupBranch insideDTCM = J_CC(CC_B);
|
||||
|
||||
CMP(32, R(ABI_PARAM1), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
|
||||
FixupBranch insideITCM = J_CC(CC_B);
|
||||
|
||||
OpArg currentElement = MComplex(ABI_PARAM2, ABI_PARAM3, SCALE_8, -8); // wasting stack space like a gangster
|
||||
|
||||
ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||
AND(32, R(ABI_PARAM1), Imm8(~3));
|
||||
if (store)
|
||||
{
|
||||
MOV(32, R(ABI_PARAM2), currentElement);
|
||||
CALL((void*)NDS::ARM9Write32);
|
||||
}
|
||||
else
|
||||
CALL((void*)NDS::ARM9Read32);
|
||||
ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||
|
||||
MEMORY_SEQ_WHILE_COND
|
||||
RET();
|
||||
|
||||
SetJumpTarget(insideDTCM);
|
||||
AND(32, R(RSCRATCH), Imm32(0x3FFF & ~3));
|
||||
if (store)
|
||||
{
|
||||
MOV(32, R(ABI_PARAM4), currentElement);
|
||||
MOV(32, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM4));
|
||||
}
|
||||
else
|
||||
MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)));
|
||||
|
||||
MEMORY_SEQ_WHILE_COND
|
||||
RET();
|
||||
|
||||
SetJumpTarget(insideITCM);
|
||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||
AND(32, R(RSCRATCH), Imm32(0x7FFF & ~3));
|
||||
if (store)
|
||||
{
|
||||
MOV(32, R(ABI_PARAM4), currentElement);
|
||||
MOV(32, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM4));
|
||||
|
||||
ADD(32, R(RSCRATCH), Imm32(ExeMemRegionOffsets[exeMem_ITCM]));
|
||||
MOV(32, R(ABI_PARAM4), R(RSCRATCH));
|
||||
SHR(32, R(RSCRATCH), Imm8(9));
|
||||
SHL(32, R(RSCRATCH), Imm8(4));
|
||||
CMP(16, MDisp(RSCRATCH, squeezePointer(CodeRanges) + offsetof(AddressRange, Blocks.Length)), Imm8(0));
|
||||
FixupBranch noCode = J_CC(CC_Z);
|
||||
ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||
MOV(32, R(ABI_PARAM1), R(ABI_PARAM4));
|
||||
CALL((u8*)InvalidateByAddr);
|
||||
ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||
SetJumpTarget(noCode);
|
||||
}
|
||||
else
|
||||
MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, ITCM)));
|
||||
|
||||
MEMORY_SEQ_WHILE_COND
|
||||
RET();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
void* Compiler::Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM)
|
||||
{
|
||||
void* res = (void*)GetWritableCodePtr();
|
||||
|
||||
const u8* repeat = GetCodePtr();
|
||||
|
||||
if (preinc)
|
||||
ADD(32, R(ABI_PARAM1), Imm8(4));
|
||||
|
||||
OpArg currentElement = MComplex(ABI_PARAM2, ABI_PARAM3, SCALE_8, -8);
|
||||
|
||||
ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||
AND(32, R(ABI_PARAM1), Imm8(~3));
|
||||
if (store)
|
||||
{
|
||||
MOV(32, R(ABI_PARAM2), currentElement);
|
||||
CALL((void*)NDS::ARM7Write32);
|
||||
}
|
||||
else
|
||||
CALL((void*)NDS::ARM7Read32);
|
||||
ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||
|
||||
MEMORY_SEQ_WHILE_COND
|
||||
RET();
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
#undef MEMORY_SEQ_WHILE_COND
|
||||
|
||||
void Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
|
||||
{
|
||||
u32 val;
|
||||
// make sure arm7 bios is accessible
|
||||
u32 tmpR15 = CurCPU->R[15];
|
||||
|
@ -276,12 +57,10 @@ void Compiler::Comp_MemLoadLiteral(int size, int rd, u32 addr)
|
|||
RegCache.PutLiteral(rd, val);
|
||||
|
||||
Comp_AddCycles_CDI();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*void fault(u32 a, u32 b, u32 c, u32 d)
|
||||
{
|
||||
printf("actually not static! %x %x %x %x\n", a, b, c, d);
|
||||
}*/
|
||||
|
||||
void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int size, int flags)
|
||||
{
|
||||
|
@ -291,18 +70,13 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
|
|||
if (size == 16)
|
||||
addressMask = ~1;
|
||||
|
||||
//bool check = false;
|
||||
if (Config::JIT_LiteralOptimisations && rn == 15 && rd != 15 && op2.IsImm && !(flags & (memop_SignExtend|memop_Post|memop_Store|memop_Writeback)))
|
||||
{
|
||||
u32 addr = R15 + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
||||
u32 translatedAddr = Num == 0 ? TranslateAddr<0>(addr) : TranslateAddr<1>(addr);
|
||||
|
||||
if (!(CodeRanges[translatedAddr / 512].InvalidLiterals & (1 << ((translatedAddr & 0x1FF) / 16))))
|
||||
{
|
||||
Comp_MemLoadLiteral(size, rd, addr);
|
||||
if (Comp_MemLoadLiteral(size, rd, addr))
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
if (flags & memop_Store)
|
||||
|
@ -314,89 +88,23 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
|
|||
Comp_AddCycles_CDI();
|
||||
}
|
||||
|
||||
bool addrIsStatic = Config::JIT_LiteralOptimisations
|
||||
&& RegCache.IsLiteral(rn) && op2.IsImm && !(flags & (memop_Writeback|memop_Post));
|
||||
u32 staticAddress;
|
||||
if (addrIsStatic)
|
||||
staticAddress = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
||||
OpArg rdMapped = MapReg(rd);
|
||||
|
||||
if (!addrIsStatic)
|
||||
{
|
||||
OpArg rnMapped = MapReg(rn);
|
||||
if (Thumb && rn == 15)
|
||||
rnMapped = Imm32(R15 & ~0x2);
|
||||
|
||||
bool inlinePreparation = Num == 1;
|
||||
u32 constLocalROR32 = 4;
|
||||
|
||||
void* memoryFunc = Num == 0
|
||||
? MemoryFuncs9[size >> 4][!!(flags & memop_Store)]
|
||||
: MemoryFuncs7[size >> 4][!!((flags & memop_Store))];
|
||||
|
||||
if (Config::JIT_LiteralOptimisations && (rd != 15 || (flags & memop_Store)) && op2.IsImm && RegCache.IsLiteral(rn))
|
||||
{
|
||||
u32 addr = RegCache.LiteralValues[rn] + op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1);
|
||||
|
||||
/*MOV(32, R(ABI_PARAM1), Imm32(CurInstr.Instr));
|
||||
MOV(32, R(ABI_PARAM1), Imm32(R15));
|
||||
MOV_sum(32, RSCRATCH, rnMapped, Imm32(op2.Imm * ((flags & memop_SubtractOffset) ? -1 : 1)));
|
||||
CMP(32, R(RSCRATCH), Imm32(addr));
|
||||
FixupBranch eq = J_CC(CC_E);
|
||||
CALL((void*)fault);
|
||||
SetJumpTarget(eq);*/
|
||||
|
||||
NDS::MemRegion region;
|
||||
region.Mem = NULL;
|
||||
if (Num == 0)
|
||||
{
|
||||
ARMv5* cpu5 = (ARMv5*)CurCPU;
|
||||
|
||||
// stupid dtcm...
|
||||
if (addr >= cpu5->DTCMBase && addr < (cpu5->DTCMBase + cpu5->DTCMSize))
|
||||
{
|
||||
// disable this for now as DTCM is located in heap
|
||||
// which might excced the RIP-addressable range
|
||||
//region.Mem = cpu5->DTCM;
|
||||
//region.Mask = 0x3FFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
NDS::ARM9GetMemRegion(addr, flags & memop_Store, ®ion);
|
||||
}
|
||||
}
|
||||
else
|
||||
NDS::ARM7GetMemRegion(addr, flags & memop_Store, ®ion);
|
||||
|
||||
if (region.Mem != NULL)
|
||||
{
|
||||
void* ptr = ®ion.Mem[addr & addressMask & region.Mask];
|
||||
|
||||
if (flags & memop_Store)
|
||||
{
|
||||
MOV(size, M(ptr), MapReg(rd));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (flags & memop_SignExtend)
|
||||
MOVSX(32, size, rdMapped.GetSimpleReg(), M(ptr));
|
||||
else
|
||||
MOVZX(32, size, rdMapped.GetSimpleReg(), M(ptr));
|
||||
|
||||
if (size == 32 && addr & ~0x3)
|
||||
{
|
||||
ROR_(32, rdMapped, Imm8((addr & 0x3) << 3));
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void* specialFunc = GetFuncForAddr(CurCPU, addr, flags & memop_Store, size);
|
||||
if (specialFunc)
|
||||
{
|
||||
memoryFunc = specialFunc;
|
||||
inlinePreparation = true;
|
||||
constLocalROR32 = addr & 0x3;
|
||||
}
|
||||
}
|
||||
|
||||
X64Reg finalAddr = ABI_PARAM1;
|
||||
X64Reg finalAddr = RSCRATCH3;
|
||||
if (flags & memop_Post)
|
||||
{
|
||||
MOV(32, R(ABI_PARAM1), rnMapped);
|
||||
MOV(32, R(RSCRATCH3), rnMapped);
|
||||
|
||||
finalAddr = rnMapped.GetSimpleReg();
|
||||
}
|
||||
|
@ -435,53 +143,280 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
|
|||
|
||||
if ((flags & memop_Writeback) && !(flags & memop_Post))
|
||||
MOV(32, rnMapped, R(finalAddr));
|
||||
}
|
||||
|
||||
int expectedTarget = Num == 0
|
||||
? ClassifyAddress9(addrIsStatic ? staticAddress : CurInstr.DataRegion)
|
||||
: ClassifyAddress7(addrIsStatic ? staticAddress : CurInstr.DataRegion);
|
||||
if (CurInstr.Cond() < 0xE)
|
||||
expectedTarget = memregion_Other;
|
||||
|
||||
bool compileFastPath = false, compileSlowPath = !addrIsStatic || (flags & memop_Store);
|
||||
|
||||
switch (expectedTarget)
|
||||
{
|
||||
case memregion_MainRAM:
|
||||
case memregion_DTCM:
|
||||
case memregion_WRAM7:
|
||||
case memregion_SWRAM9:
|
||||
case memregion_SWRAM7:
|
||||
case memregion_IO9:
|
||||
case memregion_IO7:
|
||||
case memregion_VWRAM:
|
||||
compileFastPath = true;
|
||||
break;
|
||||
case memregion_Wifi:
|
||||
compileFastPath = size >= 16;
|
||||
break;
|
||||
case memregion_VRAM:
|
||||
compileFastPath = !(flags & memop_Store) || size >= 16;
|
||||
case memregion_BIOS9:
|
||||
compileFastPath = !(flags & memop_Store);
|
||||
break;
|
||||
default: break;
|
||||
}
|
||||
|
||||
if (addrIsStatic && !compileFastPath)
|
||||
{
|
||||
compileFastPath = false;
|
||||
compileSlowPath = true;
|
||||
}
|
||||
|
||||
if (addrIsStatic && compileSlowPath)
|
||||
MOV(32, R(RSCRATCH3), Imm32(staticAddress));
|
||||
|
||||
if (compileFastPath)
|
||||
{
|
||||
FixupBranch slowPath;
|
||||
if (compileSlowPath)
|
||||
{
|
||||
MOV(32, R(RSCRATCH), R(RSCRATCH3));
|
||||
SHR(32, R(RSCRATCH), Imm8(9));
|
||||
if (flags & memop_Store)
|
||||
MOV(32, R(ABI_PARAM2), rdMapped);
|
||||
{
|
||||
CMP(8, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)), Imm8(expectedTarget));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVZX(32, 8, RSCRATCH, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)));
|
||||
AND(32, R(RSCRATCH), Imm8(~0x80));
|
||||
CMP(32, R(RSCRATCH), Imm8(expectedTarget));
|
||||
}
|
||||
|
||||
if (!(flags & memop_Store) && inlinePreparation && constLocalROR32 == 4 && size == 32)
|
||||
MOV(32, rdMapped, R(ABI_PARAM1));
|
||||
slowPath = J_CC(CC_NE, true);
|
||||
}
|
||||
|
||||
if (inlinePreparation && size > 8)
|
||||
if (expectedTarget == memregion_MainRAM || expectedTarget == memregion_WRAM7
|
||||
|| expectedTarget == memregion_BIOS9)
|
||||
{
|
||||
u8* data;
|
||||
u32 mask;
|
||||
if (expectedTarget == memregion_MainRAM)
|
||||
{
|
||||
data = NDS::MainRAM;
|
||||
mask = MAIN_RAM_SIZE - 1;
|
||||
}
|
||||
else if (expectedTarget == memregion_BIOS9)
|
||||
{
|
||||
data = NDS::ARM9BIOS;
|
||||
mask = 0xFFF;
|
||||
}
|
||||
else
|
||||
{
|
||||
data = NDS::ARM7WRAM;
|
||||
mask = 0xFFFF;
|
||||
}
|
||||
OpArg memLoc;
|
||||
if (addrIsStatic)
|
||||
{
|
||||
memLoc = M(data + ((staticAddress & mask & addressMask)));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(RSCRATCH), R(RSCRATCH3));
|
||||
AND(32, R(RSCRATCH), Imm32(mask & addressMask));
|
||||
memLoc = MDisp(RSCRATCH, squeezePointer(data));
|
||||
}
|
||||
if (flags & memop_Store)
|
||||
MOV(size, memLoc, rdMapped);
|
||||
else if (flags & memop_SignExtend)
|
||||
MOVSX(32, size, rdMapped.GetSimpleReg(), memLoc);
|
||||
else
|
||||
MOVZX(32, size, rdMapped.GetSimpleReg(), memLoc);
|
||||
}
|
||||
else if (expectedTarget == memregion_DTCM)
|
||||
{
|
||||
if (addrIsStatic)
|
||||
MOV(32, R(RSCRATCH), Imm32(staticAddress));
|
||||
else
|
||||
MOV(32, R(RSCRATCH), R(RSCRATCH3));
|
||||
SUB(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
|
||||
AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask));
|
||||
OpArg memLoc = MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM));
|
||||
if (flags & memop_Store)
|
||||
MOV(size, memLoc, rdMapped);
|
||||
else if (flags & memop_SignExtend)
|
||||
MOVSX(32, size, rdMapped.GetSimpleReg(), memLoc);
|
||||
else
|
||||
MOVZX(32, size, rdMapped.GetSimpleReg(), memLoc);
|
||||
}
|
||||
else if (expectedTarget == memregion_SWRAM9 || expectedTarget == memregion_SWRAM7)
|
||||
{
|
||||
MOV(64, R(RSCRATCH2), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9 : &NDS::SWRAM_ARM7));
|
||||
if (addrIsStatic)
|
||||
{
|
||||
MOV(32, R(RSCRATCH), Imm32(staticAddress & addressMask));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(RSCRATCH), R(RSCRATCH3));
|
||||
AND(32, R(RSCRATCH), Imm8(addressMask));
|
||||
}
|
||||
AND(32, R(RSCRATCH), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9Mask : &NDS::SWRAM_ARM7Mask));
|
||||
OpArg memLoc = MRegSum(RSCRATCH, RSCRATCH2);
|
||||
if (flags & memop_Store)
|
||||
MOV(size, memLoc, rdMapped);
|
||||
else if (flags & memop_SignExtend)
|
||||
MOVSX(32, size, rdMapped.GetSimpleReg(), memLoc);
|
||||
else
|
||||
MOVZX(32, size, rdMapped.GetSimpleReg(), memLoc);
|
||||
}
|
||||
else
|
||||
{
|
||||
u32 maskedDataRegion;
|
||||
|
||||
if (addrIsStatic)
|
||||
{
|
||||
maskedDataRegion = staticAddress;
|
||||
MOV(32, R(ABI_PARAM1), Imm32(staticAddress));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ABI_PARAM1 != RSCRATCH3)
|
||||
MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
|
||||
AND(32, R(ABI_PARAM1), Imm8(addressMask));
|
||||
|
||||
CALL(memoryFunc);
|
||||
|
||||
/*if (Num == 0 && check)
|
||||
{
|
||||
CMP(32, R(EAX), rdMapped);
|
||||
FixupBranch notEqual = J_CC(CC_E);
|
||||
ABI_PushRegistersAndAdjustStack({RSCRATCH}, 0);
|
||||
MOV(32, R(ABI_PARAM1), Imm32(R15 - (Thumb ? 4 : 8)));
|
||||
MOV(32, R(ABI_PARAM2), R(EAX));
|
||||
MOV(32, R(ABI_PARAM3), rdMapped);
|
||||
MOV(32, R(ABI_PARAM4), Imm32(CurInstr.Instr));
|
||||
CALL((u8*)fault);
|
||||
ABI_PopRegistersAndAdjustStack({RSCRATCH}, 0);
|
||||
SetJumpTarget(notEqual);
|
||||
}*/
|
||||
|
||||
if (!(flags & memop_Store))
|
||||
{
|
||||
if (inlinePreparation && size == 32)
|
||||
{
|
||||
if (constLocalROR32 == 4)
|
||||
{
|
||||
static_assert(RSCRATCH3 == ECX);
|
||||
MOV(32, R(ECX), rdMapped);
|
||||
AND(32, R(ECX), Imm8(3));
|
||||
SHL(32, R(ECX), Imm8(3));
|
||||
ROR_(32, R(RSCRATCH), R(ECX));
|
||||
maskedDataRegion = CurInstr.DataRegion;
|
||||
if (Num == 0)
|
||||
maskedDataRegion &= ~0xFFFFFF;
|
||||
else
|
||||
maskedDataRegion &= ~0x7FFFFF;
|
||||
}
|
||||
else if (constLocalROR32 != 0)
|
||||
ROR_(32, R(RSCRATCH), Imm8(constLocalROR32 << 3));
|
||||
|
||||
void* func = GetFuncForAddr(CurCPU, maskedDataRegion, flags & memop_Store, size);
|
||||
|
||||
if (flags & memop_Store)
|
||||
{
|
||||
MOV(32, R(ABI_PARAM2), rdMapped);
|
||||
|
||||
ABI_CallFunction((void(*)())func);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!addrIsStatic)
|
||||
MOV(32, rdMapped, R(RSCRATCH3));
|
||||
|
||||
ABI_CallFunction((void(*)())func);
|
||||
|
||||
if (!addrIsStatic)
|
||||
MOV(32, R(RSCRATCH3), rdMapped);
|
||||
|
||||
if (flags & memop_SignExtend)
|
||||
MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
|
||||
else
|
||||
MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
|
||||
}
|
||||
}
|
||||
|
||||
if ((size == 32 && !(flags & memop_Store)))
|
||||
{
|
||||
if (addrIsStatic)
|
||||
{
|
||||
if (staticAddress & 0x3)
|
||||
ROR_(32, rdMapped, Imm8((staticAddress & 0x3) * 8));
|
||||
}
|
||||
else
|
||||
{
|
||||
AND(32, R(RSCRATCH3), Imm8(0x3));
|
||||
SHL(32, R(RSCRATCH3), Imm8(3));
|
||||
ROR_(32, rdMapped, R(RSCRATCH3));
|
||||
}
|
||||
}
|
||||
|
||||
if (compileSlowPath)
|
||||
{
|
||||
SwitchToFarCode();
|
||||
SetJumpTarget(slowPath);
|
||||
}
|
||||
}
|
||||
|
||||
if (compileSlowPath)
|
||||
{
|
||||
if (Num == 0)
|
||||
{
|
||||
MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
|
||||
MOV(64, R(ABI_PARAM1), R(RCPU));
|
||||
if (flags & memop_Store)
|
||||
{
|
||||
MOV(32, R(ABI_PARAM3), rdMapped);
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case 32: CALL((void*)&SlowWrite9<u32>); break;
|
||||
case 16: CALL((void*)&SlowWrite9<u16>); break;
|
||||
case 8: CALL((void*)&SlowWrite9<u8>); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (size)
|
||||
{
|
||||
case 32: CALL((void*)&SlowRead9<u32>); break;
|
||||
case 16: CALL((void*)&SlowRead9<u16>); break;
|
||||
case 8: CALL((void*)&SlowRead9<u8>); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ABI_PARAM1 != RSCRATCH3)
|
||||
MOV(32, R(ABI_PARAM1), R(RSCRATCH3));
|
||||
if (flags & memop_Store)
|
||||
{
|
||||
MOV(32, R(ABI_PARAM2), rdMapped);
|
||||
|
||||
switch (size)
|
||||
{
|
||||
case 32: CALL((void*)&SlowWrite7<u32>); break;
|
||||
case 16: CALL((void*)&SlowWrite7<u16>); break;
|
||||
case 8: CALL((void*)&SlowWrite7<u8>); break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
switch (size)
|
||||
{
|
||||
case 32: CALL((void*)&SlowRead7<u32>); break;
|
||||
case 16: CALL((void*)&SlowRead7<u16>); break;
|
||||
case 8: CALL((void*)&SlowRead7<u8>); break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!(flags & memop_Store))
|
||||
{
|
||||
if (flags & memop_SignExtend)
|
||||
MOVSX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
|
||||
else
|
||||
MOVZX(32, size, rdMapped.GetSimpleReg(), R(RSCRATCH));
|
||||
}
|
||||
}
|
||||
|
||||
if (compileFastPath && compileSlowPath)
|
||||
{
|
||||
FixupBranch ret = J(true);
|
||||
SwitchToNearCode();
|
||||
SetJumpTarget(ret);
|
||||
}
|
||||
|
||||
if (!(flags & memop_Store) && rd == 15)
|
||||
{
|
||||
|
@ -498,39 +433,158 @@ void Compiler::Comp_MemAccess(int rd, int rn, const ComplexOperand& op2, int siz
|
|||
|
||||
s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode)
|
||||
{
|
||||
IrregularCycles = true;
|
||||
|
||||
int regsCount = regs.Count();
|
||||
|
||||
s32 offset = (regsCount * 4) * (decrement ? -1 : 1);
|
||||
|
||||
// we need to make sure that the stack stays aligned to 16 bytes
|
||||
#ifdef _WIN32
|
||||
// include shadow
|
||||
u32 stackAlloc = ((regsCount + 4 + 1) & ~1) * 8;
|
||||
#else
|
||||
u32 stackAlloc = ((regsCount + 1) & ~1) * 8;
|
||||
#endif
|
||||
u32 allocOffset = stackAlloc - regsCount * 8;
|
||||
|
||||
int expectedTarget = Num == 0
|
||||
? ClassifyAddress9(CurInstr.DataRegion)
|
||||
: ClassifyAddress7(CurInstr.DataRegion);
|
||||
if (usermode || CurInstr.Cond() < 0xE)
|
||||
expectedTarget = memregion_Other;
|
||||
|
||||
bool compileFastPath = false;
|
||||
|
||||
switch (expectedTarget)
|
||||
{
|
||||
case memregion_DTCM:
|
||||
case memregion_MainRAM:
|
||||
case memregion_SWRAM9:
|
||||
case memregion_SWRAM7:
|
||||
case memregion_WRAM7:
|
||||
compileFastPath = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (!store)
|
||||
{
|
||||
Comp_AddCycles_CDI();
|
||||
else
|
||||
Comp_AddCycles_CD();
|
||||
|
||||
if (decrement)
|
||||
{
|
||||
MOV_sum(32, ABI_PARAM1, MapReg(rn), Imm32(-regsCount * 4));
|
||||
MOV_sum(32, RSCRATCH4, MapReg(rn), Imm32(-regsCount * 4));
|
||||
preinc ^= true;
|
||||
}
|
||||
else
|
||||
MOV(32, R(ABI_PARAM1), MapReg(rn));
|
||||
MOV(32, R(RSCRATCH4), MapReg(rn));
|
||||
|
||||
if (compileFastPath)
|
||||
{
|
||||
assert(!usermode);
|
||||
|
||||
MOV(32, R(RSCRATCH), R(RSCRATCH4));
|
||||
SHR(32, R(RSCRATCH), Imm8(9));
|
||||
|
||||
if (store)
|
||||
{
|
||||
CMP(8, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)), Imm8(expectedTarget));
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVZX(32, 8, RSCRATCH, MDisp(RSCRATCH, squeezePointer(Num == 0 ? MemoryStatus9 : MemoryStatus7)));
|
||||
AND(32, R(RSCRATCH), Imm8(~0x80));
|
||||
CMP(32, R(RSCRATCH), Imm8(expectedTarget));
|
||||
}
|
||||
FixupBranch slowPath = J_CC(CC_NE, true);
|
||||
|
||||
if (expectedTarget == memregion_DTCM)
|
||||
{
|
||||
SUB(32, R(RSCRATCH4), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
|
||||
AND(32, R(RSCRATCH4), Imm32(0x3FFF & ~3));
|
||||
LEA(64, RSCRATCH4, MComplex(RCPU, RSCRATCH4, 1, offsetof(ARMv5, DTCM)));
|
||||
}
|
||||
else if (expectedTarget == memregion_MainRAM)
|
||||
{
|
||||
AND(32, R(RSCRATCH4), Imm32((MAIN_RAM_SIZE - 1) & ~3));
|
||||
ADD(64, R(RSCRATCH4), Imm32(squeezePointer(NDS::MainRAM)));
|
||||
}
|
||||
else if (expectedTarget == memregion_WRAM7)
|
||||
{
|
||||
AND(32, R(RSCRATCH4), Imm32(0xFFFF & ~3));
|
||||
ADD(64, R(RSCRATCH4), Imm32(squeezePointer(NDS::ARM7WRAM)));
|
||||
}
|
||||
else // SWRAM
|
||||
{
|
||||
AND(32, R(RSCRATCH4), Imm8(~3));
|
||||
AND(32, R(RSCRATCH4), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9Mask : &NDS::SWRAM_ARM7Mask));
|
||||
ADD(64, R(RSCRATCH4), M(expectedTarget == memregion_SWRAM9 ? &NDS::SWRAM_ARM9 : &NDS::SWRAM_ARM7));
|
||||
}
|
||||
u32 offset = 0;
|
||||
for (int reg : regs)
|
||||
{
|
||||
if (preinc)
|
||||
offset += 4;
|
||||
OpArg mem = MDisp(RSCRATCH4, offset);
|
||||
if (store)
|
||||
{
|
||||
if (RegCache.LoadedRegs & (1 << reg))
|
||||
{
|
||||
MOV(32, mem, MapReg(reg));
|
||||
}
|
||||
else
|
||||
{
|
||||
LoadReg(reg, RSCRATCH);
|
||||
MOV(32, mem, R(RSCRATCH));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (RegCache.LoadedRegs & (1 << reg))
|
||||
{
|
||||
MOV(32, MapReg(reg), mem);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(32, R(RSCRATCH), mem);
|
||||
SaveReg(reg, RSCRATCH);
|
||||
}
|
||||
}
|
||||
if (!preinc)
|
||||
offset += 4;
|
||||
}
|
||||
|
||||
SwitchToFarCode();
|
||||
SetJumpTarget(slowPath);
|
||||
}
|
||||
|
||||
if (!store)
|
||||
{
|
||||
MOV(32, R(ABI_PARAM1), R(RSCRATCH4));
|
||||
MOV(32, R(ABI_PARAM3), Imm32(regsCount));
|
||||
SUB(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
|
||||
if (allocOffset == 0)
|
||||
MOV(64, R(ABI_PARAM2), R(RSP));
|
||||
else
|
||||
LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset));
|
||||
|
||||
CALL(Num == 0
|
||||
? MemoryFuncsSeq9[0][preinc]
|
||||
: MemoryFuncsSeq7[0][preinc][CodeRegion == 0x02]);
|
||||
if (Num == 0)
|
||||
MOV(64, R(ABI_PARAM4), R(RCPU));
|
||||
|
||||
switch (Num * 2 | preinc)
|
||||
{
|
||||
case 0: CALL((void*)&SlowBlockTransfer9<false, false>); break;
|
||||
case 1: CALL((void*)&SlowBlockTransfer9<true, false>); break;
|
||||
case 2: CALL((void*)&SlowBlockTransfer7<false, false>); break;
|
||||
case 3: CALL((void*)&SlowBlockTransfer7<true, false>); break;
|
||||
}
|
||||
|
||||
if (allocOffset)
|
||||
ADD(64, R(RSP), Imm8(allocOffset));
|
||||
|
||||
bool firstUserMode = true;
|
||||
for (int reg = 15; reg >= 0; reg--)
|
||||
{
|
||||
if (regs[reg])
|
||||
for (int reg : regs)
|
||||
{
|
||||
if (usermode && !regs[15] && reg >= 8 && reg < 15)
|
||||
{
|
||||
|
@ -544,13 +598,13 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
|||
POP(RSCRATCH3);
|
||||
CALL(WriteBanked);
|
||||
FixupBranch sucessfulWritten = J_CC(CC_NC);
|
||||
if (RegCache.Mapping[reg] != INVALID_REG)
|
||||
if (RegCache.LoadedRegs & (1 << reg))
|
||||
MOV(32, R(RegCache.Mapping[reg]), R(RSCRATCH3));
|
||||
else
|
||||
SaveReg(reg, RSCRATCH3);
|
||||
SetJumpTarget(sucessfulWritten);
|
||||
}
|
||||
else if (RegCache.Mapping[reg] == INVALID_REG)
|
||||
else if (!(RegCache.LoadedRegs & (1 << reg)))
|
||||
{
|
||||
assert(reg != 15);
|
||||
|
||||
|
@ -559,37 +613,16 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
|||
}
|
||||
else
|
||||
{
|
||||
if (reg != 15)
|
||||
RegCache.DirtyRegs |= (1 << reg);
|
||||
POP(MapReg(reg).GetSimpleReg());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (regsCount & 1)
|
||||
POP(RSCRATCH);
|
||||
|
||||
if (regs[15])
|
||||
{
|
||||
if (Num == 1)
|
||||
{
|
||||
if (Thumb)
|
||||
OR(32, MapReg(15), Imm8(1));
|
||||
else
|
||||
AND(32, MapReg(15), Imm8(0xFE));
|
||||
}
|
||||
Comp_JumpTo(MapReg(15).GetSimpleReg(), usermode);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Comp_AddCycles_CD();
|
||||
|
||||
if (regsCount & 1)
|
||||
PUSH(RSCRATCH);
|
||||
|
||||
bool firstUserMode = true;
|
||||
for (int reg : regs)
|
||||
for (int reg = 15; reg >= 0; reg--)
|
||||
{
|
||||
if (regs[reg])
|
||||
{
|
||||
if (usermode && reg >= 8 && reg < 15)
|
||||
{
|
||||
|
@ -607,7 +640,7 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
|||
CALL(ReadBanked);
|
||||
PUSH(RSCRATCH3);
|
||||
}
|
||||
else if (RegCache.Mapping[reg] == INVALID_REG)
|
||||
else if (!(RegCache.LoadedRegs & (1 << reg)))
|
||||
{
|
||||
LoadReg(reg, RSCRATCH);
|
||||
PUSH(RSCRATCH);
|
||||
|
@ -617,25 +650,51 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
|||
PUSH(MapReg(reg).GetSimpleReg());
|
||||
}
|
||||
}
|
||||
|
||||
if (decrement)
|
||||
{
|
||||
MOV_sum(32, ABI_PARAM1, MapReg(rn), Imm32(-regsCount * 4));
|
||||
preinc ^= true;
|
||||
}
|
||||
|
||||
if (allocOffset)
|
||||
SUB(64, R(RSP), Imm8(allocOffset));
|
||||
|
||||
MOV(32, R(ABI_PARAM1), R(RSCRATCH4));
|
||||
if (allocOffset)
|
||||
LEA(64, ABI_PARAM2, MDisp(RSP, allocOffset));
|
||||
else
|
||||
MOV(32, R(ABI_PARAM1), MapReg(rn));
|
||||
|
||||
MOV(64, R(ABI_PARAM2), R(RSP));
|
||||
MOV(32, R(ABI_PARAM3), Imm32(regsCount));
|
||||
|
||||
CALL(Num == 0
|
||||
? MemoryFuncsSeq9[1][preinc]
|
||||
: MemoryFuncsSeq7[1][preinc][CodeRegion == 0x02]);
|
||||
MOV(32, R(ABI_PARAM3), Imm32(regsCount));
|
||||
if (Num == 0)
|
||||
MOV(64, R(ABI_PARAM4), R(RCPU));
|
||||
|
||||
switch (Num * 2 | preinc)
|
||||
{
|
||||
case 0: CALL((void*)&SlowBlockTransfer9<false, true>); break;
|
||||
case 1: CALL((void*)&SlowBlockTransfer9<true, true>); break;
|
||||
case 2: CALL((void*)&SlowBlockTransfer7<false, true>); break;
|
||||
case 3: CALL((void*)&SlowBlockTransfer7<true, true>); break;
|
||||
}
|
||||
|
||||
ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
|
||||
}
|
||||
|
||||
if (compileFastPath)
|
||||
{
|
||||
FixupBranch ret = J(true);
|
||||
SwitchToNearCode();
|
||||
SetJumpTarget(ret);
|
||||
}
|
||||
|
||||
if (!store && regs[15])
|
||||
{
|
||||
if (Num == 1)
|
||||
{
|
||||
if (Thumb)
|
||||
OR(32, MapReg(15), Imm8(1));
|
||||
else
|
||||
AND(32, MapReg(15), Imm8(0xFE));
|
||||
}
|
||||
Comp_JumpTo(MapReg(15).GetSimpleReg(), usermode);
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
|
@ -786,9 +845,7 @@ void Compiler::T_Comp_LoadPCRel()
|
|||
{
|
||||
u32 offset = (CurInstr.Instr & 0xFF) << 2;
|
||||
u32 addr = (R15 & ~0x2) + offset;
|
||||
if (Config::JIT_LiteralOptimisations)
|
||||
Comp_MemLoadLiteral(32, CurInstr.T_Reg(8), addr);
|
||||
else
|
||||
if (!Config::JIT_LiteralOptimisations || !Comp_MemLoadLiteral(32, CurInstr.T_Reg(8), addr))
|
||||
Comp_MemAccess(CurInstr.T_Reg(8), 15, ComplexOperand(offset), 32, 0);
|
||||
}
|
||||
|
||||
|
|
|
@ -373,16 +373,16 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
|||
|
||||
if (res.Kind == tk_LDMIA || res.Kind == tk_POP)
|
||||
{
|
||||
u32 set = (instr & 0xFF) & ~(res.DstRegs|res.SrcRegs);
|
||||
res.NotStrictlyNeeded |= set;
|
||||
u32 set = (instr & 0xFF);
|
||||
res.NotStrictlyNeeded |= set & ~(res.DstRegs|res.SrcRegs);
|
||||
res.DstRegs |= set;
|
||||
}
|
||||
if (res.Kind == tk_STMIA || res.Kind == tk_PUSH)
|
||||
{
|
||||
u32 set = (instr & 0xFF) & ~(res.DstRegs|res.SrcRegs);
|
||||
u32 set = (instr & 0xFF);
|
||||
if (res.Kind == tk_PUSH && instr & (1 << 8))
|
||||
set |= (1 << 14);
|
||||
res.NotStrictlyNeeded |= set;
|
||||
res.NotStrictlyNeeded |= set & ~(res.DstRegs|res.SrcRegs);
|
||||
res.SrcRegs |= set;
|
||||
}
|
||||
|
||||
|
@ -495,15 +495,15 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
|||
|
||||
if (res.Kind == ak_LDM)
|
||||
{
|
||||
u16 set = (instr & 0xFFFF) & ~(res.SrcRegs|res.DstRegs|(1<<15));
|
||||
u16 set = (instr & 0xFFFF);
|
||||
res.NotStrictlyNeeded |= set & ~(res.SrcRegs|res.DstRegs|(1<<15));
|
||||
res.DstRegs |= set;
|
||||
res.NotStrictlyNeeded |= set;
|
||||
}
|
||||
if (res.Kind == ak_STM)
|
||||
{
|
||||
u16 set = (instr & 0xFFFF) & ~(res.SrcRegs|res.DstRegs|(1<<15));
|
||||
u16 set = (instr & 0xFFFF);
|
||||
res.NotStrictlyNeeded |= set & ~(res.SrcRegs|res.DstRegs|(1<<15));
|
||||
res.SrcRegs |= set;
|
||||
res.NotStrictlyNeeded |= set;
|
||||
}
|
||||
|
||||
if ((instr >> 28) < 0xE)
|
||||
|
|
44
src/CP15.cpp
44
src/CP15.cpp
|
@ -97,6 +97,10 @@ void ARMv5::CP15DoSavestate(Savestate* file)
|
|||
|
||||
void ARMv5::UpdateDTCMSetting()
|
||||
{
|
||||
#ifdef JIT_ENABLED
|
||||
u32 oldDTCMBase = DTCMBase;
|
||||
u32 oldDTCMSize = DTCMSize;
|
||||
#endif
|
||||
if (CP15Control & (1<<16))
|
||||
{
|
||||
DTCMBase = DTCMSetting & 0xFFFFF000;
|
||||
|
@ -109,10 +113,20 @@ void ARMv5::UpdateDTCMSetting()
|
|||
DTCMSize = 0;
|
||||
//printf("DTCM disabled\n");
|
||||
}
|
||||
#ifdef JIT_ENABLED
|
||||
if (oldDTCMBase != DTCMBase || oldDTCMSize != DTCMSize)
|
||||
{
|
||||
ARMJIT::UpdateMemoryStatus9(oldDTCMBase, oldDTCMBase + oldDTCMSize);
|
||||
ARMJIT::UpdateMemoryStatus9(DTCMBase, DTCMBase + DTCMSize);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void ARMv5::UpdateITCMSetting()
|
||||
{
|
||||
#ifdef JIT_ENABLED
|
||||
u32 oldITCMSize = ITCMSize;
|
||||
#endif
|
||||
if (CP15Control & (1<<18))
|
||||
{
|
||||
ITCMSize = 0x200 << ((ITCMSetting >> 1) & 0x1F);
|
||||
|
@ -123,6 +137,10 @@ void ARMv5::UpdateITCMSetting()
|
|||
ITCMSize = 0;
|
||||
//printf("ITCM disabled\n");
|
||||
}
|
||||
#ifdef JIT_ENABLED
|
||||
if (oldITCMSize != ITCMSize)
|
||||
ARMJIT::UpdateMemoryStatus9(0, std::max(oldITCMSize, ITCMSize));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -561,15 +579,9 @@ void ARMv5::CP15Write(u32 id, u32 val)
|
|||
|
||||
|
||||
case 0x750:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateAll();
|
||||
#endif
|
||||
ICacheInvalidateAll();
|
||||
return;
|
||||
case 0x751:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateByAddr(ARMJIT::TranslateAddr<0>(val));
|
||||
#endif
|
||||
ICacheInvalidateByAddr(val);
|
||||
return;
|
||||
case 0x752:
|
||||
|
@ -732,7 +744,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
|
|||
|
||||
void ARMv5::DataRead8(u32 addr, u32* val)
|
||||
{
|
||||
DataRegion = addr >> 12;
|
||||
DataRegion = addr;
|
||||
|
||||
if (addr < ITCMSize)
|
||||
{
|
||||
|
@ -753,7 +765,7 @@ void ARMv5::DataRead8(u32 addr, u32* val)
|
|||
|
||||
void ARMv5::DataRead16(u32 addr, u32* val)
|
||||
{
|
||||
DataRegion = addr >> 12;
|
||||
DataRegion = addr;
|
||||
|
||||
addr &= ~1;
|
||||
|
||||
|
@ -776,7 +788,7 @@ void ARMv5::DataRead16(u32 addr, u32* val)
|
|||
|
||||
void ARMv5::DataRead32(u32 addr, u32* val)
|
||||
{
|
||||
DataRegion = addr >> 12;
|
||||
DataRegion = addr;
|
||||
|
||||
addr &= ~3;
|
||||
|
||||
|
@ -820,14 +832,14 @@ void ARMv5::DataRead32S(u32 addr, u32* val)
|
|||
|
||||
void ARMv5::DataWrite8(u32 addr, u8 val)
|
||||
{
|
||||
DataRegion = addr >> 12;
|
||||
DataRegion = addr;
|
||||
|
||||
if (addr < ITCMSize)
|
||||
{
|
||||
DataCycles = 1;
|
||||
*(u8*)&ITCM[addr & 0x7FFF] = val;
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateITCM(addr & 0x7FFF);
|
||||
ARMJIT::InvalidateITCMIfNecessary(addr);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
@ -844,7 +856,7 @@ void ARMv5::DataWrite8(u32 addr, u8 val)
|
|||
|
||||
void ARMv5::DataWrite16(u32 addr, u16 val)
|
||||
{
|
||||
DataRegion = addr >> 12;
|
||||
DataRegion = addr;
|
||||
|
||||
addr &= ~1;
|
||||
|
||||
|
@ -853,7 +865,7 @@ void ARMv5::DataWrite16(u32 addr, u16 val)
|
|||
DataCycles = 1;
|
||||
*(u16*)&ITCM[addr & 0x7FFF] = val;
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateITCM(addr & 0x7FFF);
|
||||
ARMJIT::InvalidateITCMIfNecessary(addr);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
@ -870,7 +882,7 @@ void ARMv5::DataWrite16(u32 addr, u16 val)
|
|||
|
||||
void ARMv5::DataWrite32(u32 addr, u32 val)
|
||||
{
|
||||
DataRegion = addr >> 12;
|
||||
DataRegion = addr;
|
||||
|
||||
addr &= ~3;
|
||||
|
||||
|
@ -879,7 +891,7 @@ void ARMv5::DataWrite32(u32 addr, u32 val)
|
|||
DataCycles = 1;
|
||||
*(u32*)&ITCM[addr & 0x7FFF] = val;
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateITCM(addr & 0x7FFF);
|
||||
ARMJIT::InvalidateITCMIfNecessary(addr);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
@ -903,7 +915,7 @@ void ARMv5::DataWrite32S(u32 addr, u32 val)
|
|||
DataCycles += 1;
|
||||
*(u32*)&ITCM[addr & 0x7FFF] = val;
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateITCM(addr & 0x7FFF);
|
||||
ARMJIT::InvalidateITCMIfNecessary(addr);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
|
105
src/NDS.cpp
105
src/NDS.cpp
|
@ -535,10 +535,6 @@ void Reset()
|
|||
KeyCnt = 0;
|
||||
RCnt = 0;
|
||||
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::ResetBlockCache();
|
||||
#endif
|
||||
|
||||
NDSCart::Reset();
|
||||
GBACart::Reset();
|
||||
GPU::Reset();
|
||||
|
@ -548,6 +544,10 @@ void Reset()
|
|||
Wifi::Reset();
|
||||
|
||||
AREngine::Reset();
|
||||
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::Reset();
|
||||
#endif
|
||||
}
|
||||
|
||||
void Stop()
|
||||
|
@ -1058,6 +1058,9 @@ void Halt()
|
|||
|
||||
void MapSharedWRAM(u8 val)
|
||||
{
|
||||
if (val == WRAMCnt)
|
||||
return;
|
||||
|
||||
WRAMCnt = val;
|
||||
|
||||
switch (WRAMCnt & 0x3)
|
||||
|
@ -1090,6 +1093,11 @@ void MapSharedWRAM(u8 val)
|
|||
SWRAM_ARM7Mask = 0x7FFF;
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::UpdateMemoryStatus9(0x3000000, 0x3000000 + 0x1000000);
|
||||
ARMJIT::UpdateMemoryStatus7(0x3000000, 0x3000000 + 0x1000000);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -1873,12 +1881,18 @@ void ARM9Write8(u32 addr, u8 val)
|
|||
switch (addr & 0xFF000000)
|
||||
{
|
||||
case 0x02000000:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateMainRAMIfNecessary(addr);
|
||||
#endif
|
||||
*(u8*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
|
||||
return;
|
||||
|
||||
case 0x03000000:
|
||||
if (SWRAM_ARM9)
|
||||
{
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateSWRAM9IfNecessary(addr);
|
||||
#endif
|
||||
*(u8*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
|
||||
}
|
||||
return;
|
||||
|
@ -1923,12 +1937,18 @@ void ARM9Write16(u32 addr, u16 val)
|
|||
switch (addr & 0xFF000000)
|
||||
{
|
||||
case 0x02000000:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateMainRAMIfNecessary(addr);
|
||||
#endif
|
||||
*(u16*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
|
||||
return;
|
||||
|
||||
case 0x03000000:
|
||||
if (SWRAM_ARM9)
|
||||
{
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateSWRAM9IfNecessary(addr);
|
||||
#endif
|
||||
*(u16*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
|
||||
}
|
||||
return;
|
||||
|
@ -1949,7 +1969,12 @@ void ARM9Write16(u32 addr, u16 val)
|
|||
case 0x00200000: GPU::WriteVRAM_BBG<u16>(addr, val); return;
|
||||
case 0x00400000: GPU::WriteVRAM_AOBJ<u16>(addr, val); return;
|
||||
case 0x00600000: GPU::WriteVRAM_BOBJ<u16>(addr, val); return;
|
||||
default: GPU::WriteVRAM_LCDC<u16>(addr, val); return;
|
||||
default:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateLCDCIfNecessary(addr);
|
||||
#endif
|
||||
GPU::WriteVRAM_LCDC<u16>(addr, val);
|
||||
return;
|
||||
}
|
||||
|
||||
case 0x07000000:
|
||||
|
@ -1989,12 +2014,18 @@ void ARM9Write32(u32 addr, u32 val)
|
|||
switch (addr & 0xFF000000)
|
||||
{
|
||||
case 0x02000000:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateMainRAMIfNecessary(addr);
|
||||
#endif
|
||||
*(u32*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
|
||||
return ;
|
||||
|
||||
case 0x03000000:
|
||||
if (SWRAM_ARM9)
|
||||
{
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateSWRAM9IfNecessary(addr);
|
||||
#endif
|
||||
*(u32*)&SWRAM_ARM9[addr & SWRAM_ARM9Mask] = val;
|
||||
}
|
||||
return;
|
||||
|
@ -2015,7 +2046,12 @@ void ARM9Write32(u32 addr, u32 val)
|
|||
case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); return;
|
||||
case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); return;
|
||||
case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); return;
|
||||
default: GPU::WriteVRAM_LCDC<u32>(addr, val); return;
|
||||
default:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateLCDCIfNecessary(addr);
|
||||
#endif
|
||||
GPU::WriteVRAM_LCDC<u32>(addr, val);
|
||||
return;
|
||||
}
|
||||
|
||||
case 0x07000000:
|
||||
|
@ -2279,30 +2315,38 @@ u32 ARM7Read32(u32 addr)
|
|||
|
||||
void ARM7Write8(u32 addr, u8 val)
|
||||
{
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateByAddr7(addr);
|
||||
#endif
|
||||
|
||||
switch (addr & 0xFF800000)
|
||||
{
|
||||
case 0x02000000:
|
||||
case 0x02800000:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateMainRAMIfNecessary(addr);
|
||||
#endif
|
||||
*(u8*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
|
||||
return;
|
||||
|
||||
case 0x03000000:
|
||||
if (SWRAM_ARM7)
|
||||
{
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateSWRAM7IfNecessary(addr);
|
||||
#endif
|
||||
*(u8*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
|
||||
#endif
|
||||
*(u8*)&ARM7WRAM[addr & 0xFFFF] = val;
|
||||
return;
|
||||
}
|
||||
|
||||
case 0x03800000:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
|
||||
#endif
|
||||
*(u8*)&ARM7WRAM[addr & 0xFFFF] = val;
|
||||
return;
|
||||
|
||||
|
@ -2312,6 +2356,9 @@ void ARM7Write8(u32 addr, u8 val)
|
|||
|
||||
case 0x06000000:
|
||||
case 0x06800000:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateARM7WVRAMIfNecessary(addr);
|
||||
#endif
|
||||
GPU::WriteVRAM_ARM7<u8>(addr, val);
|
||||
return;
|
||||
|
||||
|
@ -2342,30 +2389,38 @@ void ARM7Write8(u32 addr, u8 val)
|
|||
|
||||
void ARM7Write16(u32 addr, u16 val)
|
||||
{
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateByAddr7(addr);
|
||||
#endif
|
||||
|
||||
switch (addr & 0xFF800000)
|
||||
{
|
||||
case 0x02000000:
|
||||
case 0x02800000:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateMainRAMIfNecessary(addr);
|
||||
#endif
|
||||
*(u16*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
|
||||
return;
|
||||
|
||||
case 0x03000000:
|
||||
if (SWRAM_ARM7)
|
||||
{
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateSWRAM7IfNecessary(addr);
|
||||
#endif
|
||||
*(u16*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
|
||||
#endif
|
||||
*(u16*)&ARM7WRAM[addr & 0xFFFF] = val;
|
||||
return;
|
||||
}
|
||||
|
||||
case 0x03800000:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
|
||||
#endif
|
||||
*(u16*)&ARM7WRAM[addr & 0xFFFF] = val;
|
||||
return;
|
||||
|
||||
|
@ -2383,6 +2438,9 @@ void ARM7Write16(u32 addr, u16 val)
|
|||
|
||||
case 0x06000000:
|
||||
case 0x06800000:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateARM7WVRAMIfNecessary(addr);
|
||||
#endif
|
||||
GPU::WriteVRAM_ARM7<u16>(addr, val);
|
||||
return;
|
||||
|
||||
|
@ -2415,30 +2473,38 @@ void ARM7Write16(u32 addr, u16 val)
|
|||
|
||||
void ARM7Write32(u32 addr, u32 val)
|
||||
{
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateByAddr7(addr);
|
||||
#endif
|
||||
|
||||
switch (addr & 0xFF800000)
|
||||
{
|
||||
case 0x02000000:
|
||||
case 0x02800000:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateMainRAMIfNecessary(addr);
|
||||
#endif
|
||||
*(u32*)&MainRAM[addr & (MAIN_RAM_SIZE - 1)] = val;
|
||||
return;
|
||||
|
||||
case 0x03000000:
|
||||
if (SWRAM_ARM7)
|
||||
{
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateSWRAM7IfNecessary(addr);
|
||||
#endif
|
||||
*(u32*)&SWRAM_ARM7[addr & SWRAM_ARM7Mask] = val;
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
|
||||
#endif
|
||||
*(u32*)&ARM7WRAM[addr & 0xFFFF] = val;
|
||||
return;
|
||||
}
|
||||
|
||||
case 0x03800000:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateARM7WRAMIfNecessary(addr);
|
||||
#endif
|
||||
*(u32*)&ARM7WRAM[addr & 0xFFFF] = val;
|
||||
return;
|
||||
|
||||
|
@ -2457,6 +2523,9 @@ void ARM7Write32(u32 addr, u32 val)
|
|||
|
||||
case 0x06000000:
|
||||
case 0x06800000:
|
||||
#ifdef JIT_ENABLED
|
||||
ARMJIT::InvalidateARM7WVRAMIfNecessary(addr);
|
||||
#endif
|
||||
GPU::WriteVRAM_ARM7<u32>(addr, val);
|
||||
return;
|
||||
|
||||
|
|
|
@ -120,6 +120,14 @@ extern u8 ROMSeed1[2*8];
|
|||
extern u8 ARM9BIOS[0x1000];
|
||||
extern u8 ARM7BIOS[0x4000];
|
||||
|
||||
extern u8 SharedWRAM[0x8000];
|
||||
extern u8* SWRAM_ARM9;
|
||||
extern u8* SWRAM_ARM7;
|
||||
extern u32 SWRAM_ARM9Mask;
|
||||
extern u32 SWRAM_ARM7Mask;
|
||||
|
||||
extern u8 ARM7WRAM[0x10000];
|
||||
|
||||
#define MAIN_RAM_SIZE 0x400000
|
||||
|
||||
extern u8 MainRAM[MAIN_RAM_SIZE];
|
||||
|
|
Loading…
Reference in New Issue