JIT: compilation of word load and store

This commit is contained in:
RSDuck 2019-06-30 13:35:03 +02:00
parent ea98a44e1e
commit 550e6b86d2
10 changed files with 712 additions and 43 deletions

View File

@ -40,8 +40,7 @@ static ptrdiff_t JIT_MEM[2][32] = {
/* 2X*/ DUP2(offsetof(BlockCache, MainRAM)),
/* 3X*/ offsetof(BlockCache, SWRAM),
offsetof(BlockCache, ARM7_WRAM),
/* 4X*/ -1,
offsetof(BlockCache, ARM7_WIRAM),
/* 4X*/ DUP2(-1),
/* 5X*/ DUP2(-1),
/* 6X*/ DUP2(offsetof(BlockCache, ARM7_WVRAM)), /* contrary to Gbatek, melonDS and itself,
DeSmuME doesn't mirror the 64 MB region at 0x6800000 */
@ -183,7 +182,6 @@ void ResetBlocks()
memset(cache.ARM9_ITCM, 0, sizeof(cache.ARM9_ITCM));
memset(cache.ARM9_LCDC, 0, sizeof(cache.ARM9_LCDC));
memset(cache.ARM7_BIOS, 0, sizeof(cache.ARM7_BIOS));
memset(cache.ARM7_WIRAM, 0, sizeof(cache.ARM7_WIRAM));
memset(cache.ARM7_WRAM, 0, sizeof(cache.ARM7_WRAM));
memset(cache.ARM7_WVRAM, 0, sizeof(cache.ARM7_WVRAM));
}

View File

@ -63,14 +63,13 @@ struct BlockCache
{
CompiledBlock* AddrMapping[2][0x4000] = {0};
CompiledBlock MainRAM[16*1024*1024/2];
CompiledBlock MainRAM[4*1024*1024/2];
CompiledBlock SWRAM[0x8000/2]; // Shared working RAM
CompiledBlock ARM9_ITCM[0x8000/2];
CompiledBlock ARM9_LCDC[0xA4000/2];
CompiledBlock ARM9_BIOS[0x8000/2];
CompiledBlock ARM7_BIOS[0x4000/2];
CompiledBlock ARM7_WRAM[0x10000/2]; // dedicated ARM7 WRAM
CompiledBlock ARM7_WIRAM[0x10000/2]; // Wifi
CompiledBlock ARM7_WVRAM[0x40000/2]; // VRAM allocated as Working RAM
};

View File

@ -30,7 +30,7 @@ public:
assert(Mapping[reg] != -1);
if (DirtyRegs & (1 << reg))
Compiler->UnloadReg(reg, Mapping[reg]);
Compiler->SaveReg(reg, Mapping[reg]);
DirtyRegs &= ~(1 << reg);
LoadedRegs &= ~(1 << reg);

View File

@ -255,8 +255,8 @@ OpArg Compiler::Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, b
if (S)
{
XOR(32, R(RSCRATCH2), R(RSCRATCH2));
BT(32, R(RCPSR), Imm8(29));
SETcc(CC_C, R(RSCRATCH2));
TEST(32, R(RCPSR), Imm32(1 << 29));
SETcc(CC_NZ, R(RSCRATCH2));
}
MOV(32, R(RSCRATCH), rm);

View File

@ -9,13 +9,43 @@ using namespace Gen;
namespace ARMJIT
{
template <>
const X64Reg RegCache<Compiler, X64Reg>::NativeRegAllocOrder[] = {RBX, RSI, RDI, R12, R13};
const X64Reg RegCache<Compiler, X64Reg>::NativeRegAllocOrder[] =
{
#ifdef _WIN32
RBX, RSI, RDI, R12, R13
#else
RBX, R12, R13
#endif
};
template <>
const int RegCache<Compiler, X64Reg>::NativeRegsAvailable = 5;
const int RegCache<Compiler, X64Reg>::NativeRegsAvailable =
#ifdef _WIN32
5
#else
3
#endif
;
Compiler::Compiler()
{
AllocCodeSpace(1024 * 1024 * 4);
AllocCodeSpace(1024 * 1024 * 16);
for (int i = 0; i < 15; i++)
{
ReadMemFuncs9[i] = Gen_MemoryRoutine9(false, 32, 0x1000000 * i);
WriteMemFuncs9[i] = Gen_MemoryRoutine9(true, 32, 0x1000000 * i);
for (int j = 0; j < 2; j++)
{
ReadMemFuncs7[j][i] = Gen_MemoryRoutine7(false, 32, j, 0x1000000 * i);
WriteMemFuncs7[j][i] = Gen_MemoryRoutine7(true, 32, j, 0x1000000 * i);
}
}
ReadMemFuncs9[15] = Gen_MemoryRoutine9(false, 32, 0xFF000000);
WriteMemFuncs9[15] = Gen_MemoryRoutine9(true, 32, 0xFF000000);
ReadMemFuncs7[15][0] = ReadMemFuncs7[15][1] = Gen_MemoryRoutine7(false, 32, false, 0xFF000000);
WriteMemFuncs7[15][0] = WriteMemFuncs7[15][1] = Gen_MemoryRoutine7(true, 32, false, 0xFF000000);
ResetStart = GetWritableCodePtr();
}
void Compiler::LoadCPSR()
@ -42,7 +72,7 @@ void Compiler::LoadReg(int reg, X64Reg nativeReg)
MOV(32, R(nativeReg), Imm32(R15));
}
void Compiler::UnloadReg(int reg, X64Reg nativeReg)
void Compiler::SaveReg(int reg, X64Reg nativeReg)
{
MOV(32, MDisp(RCPU, offsetof(ARM, R[reg])), R(nativeReg));
}
@ -52,7 +82,7 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
if (IsAlmostFull())
{
ResetBlocks();
ResetCodePtr();
SetCodePtr((u8*)ResetStart);
}
CompiledBlock res = (CompiledBlock)GetWritableCodePtr();
@ -61,8 +91,9 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
Thumb = cpu->CPSR & 0x20;
Num = cpu->Num;
R15 = cpu->R[15];
CodeRegion = cpu->CodeRegion;
ABI_PushRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED}, 8, 0);
ABI_PushRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS}, 8, 16);
MOV(64, R(RCPU), ImmPtr(cpu));
XOR(32, R(RCycles), R(RCycles));
@ -142,9 +173,9 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
else
{
// could have used a LUT, but then where would be the fun?
BT(32, R(RCPSR), Imm8(28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1))));
TEST(32, R(RCPSR), Imm32(1 << (28 + ((~(cond >> 1) & 1) << 1 | (cond >> 2 & 1) ^ (cond >> 1 & 1)))));
skipExecute = J_CC(cond & 1 ? CC_C : CC_NC);
skipExecute = J_CC(cond & 1 ? CC_NZ : CC_Z);
}
}
@ -187,7 +218,7 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
LEA(32, RAX, MDisp(RCycles, ConstantCycles));
ABI_PopRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED}, 8, 0);
ABI_PopRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS}, 8, 16);
RET();
return res;
@ -243,23 +274,38 @@ CompileFunc Compiler::GetCompFunc(int kind)
A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp,
// CMN
A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp, A_Comp_CmpOp,
// Mul
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
// ARMv5 stuff
NULL, NULL, NULL, NULL, NULL,
// STR
A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
// STRB
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
// LDR
A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
// LDRB
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
// STRH
NULL, NULL, NULL, NULL,
// LDRD
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
// STRD
NULL, NULL, NULL, NULL,
// LDRH
NULL, NULL, NULL, NULL,
// LDRSB
NULL, NULL, NULL, NULL,
// LDRSH
NULL, NULL, NULL, NULL,
// swap
NULL, NULL,
// LDM/STM
NULL, NULL,
// Branch
NULL, NULL, NULL, NULL, NULL,
// system stuff
NULL, NULL, NULL, NULL, NULL, NULL, NULL,
};
const CompileFunc T_Comp[ARMInstrInfo::tk_Count] = {
@ -278,10 +324,17 @@ CompileFunc Compiler::GetCompFunc(int kind)
T_Comp_ALU_HiReg, T_Comp_ALU_HiReg, T_Comp_ALU_HiReg,
// pc/sp relative
NULL, NULL, NULL,
// mem...
NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
// LDR pcrel
NULL,
// LDR/STR reg offset
T_Comp_MemReg, NULL, T_Comp_MemReg, NULL,
// LDR/STR sign extended, half
NULL, NULL, NULL, NULL,
// LDR/STR imm offset
T_Comp_MemImm, T_Comp_MemImm, NULL, NULL,
// LDR/STR half imm offset
NULL, NULL,
// branch, etc.
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL,
NULL, NULL

View File

@ -29,7 +29,7 @@ public:
CompiledBlock CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrsCount);
void LoadReg(int reg, Gen::X64Reg nativeReg);
void UnloadReg(int reg, Gen::X64Reg nativeReg);
void SaveReg(int reg, Gen::X64Reg nativeReg);
private:
CompileFunc GetCompFunc(int kind);
@ -51,12 +51,17 @@ private:
void A_Comp_MovOp();
void A_Comp_CmpOp();
void A_Comp_MemWB();
void T_Comp_ShiftImm();
void T_Comp_AddSub_();
void T_Comp_ALU_Imm8();
void T_Comp_ALU();
void T_Comp_ALU_HiReg();
void T_Comp_MemReg();
void T_Comp_MemImm();
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
void Comp_ArithTriOpReverse(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
@ -65,10 +70,14 @@ private:
void Comp_RetriveFlags(bool sign, bool retriveCV, bool carryUsed);
void* Gen_MemoryRoutine9(bool store, int size, u32 region);
void* Gen_MemoryRoutine7(bool store, int size, bool mainRAMCode, u32 region);
Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed);
Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
Gen::OpArg A_Comp_GetALUOp2(bool S, bool& carryUsed);
Gen::OpArg A_Comp_GetMemWBOffset();
void LoadCPSR();
void SaveCPSR();
@ -82,6 +91,8 @@ private:
return Gen::R(RegCache.Mapping[reg]);
}
void* ResetStart;
bool CPSRDirty = false;
FetchedInstr CurrentInstr;
@ -91,10 +102,16 @@ private:
bool Thumb;
u32 Num;
u32 R15;
u32 CodeRegion;
u32 ConstantCycles;
};
extern void* ReadMemFuncs9[16];
extern void* ReadMemFuncs7[2][16];
extern void* WriteMemFuncs9[16];
extern void* WriteMemFuncs7[2][16];
}
#endif

View File

@ -0,0 +1,600 @@
#include "ARMJIT_Compiler.h"
#include "../GPU.h"
#include "../Wifi.h"
namespace NDS
{
#define MAIN_RAM_SIZE 0x400000
extern u8* SWRAM_ARM9;
extern u32 SWRAM_ARM9Mask;
extern u8* SWRAM_ARM7;
extern u32 SWRAM_ARM7Mask;
extern u8 ARM7WRAM[];
extern u16 ARM7BIOSProt;
}
using namespace Gen;
namespace ARMJIT
{
void* ReadMemFuncs9[16];
void* ReadMemFuncs7[2][16];
void* WriteMemFuncs9[16];
void* WriteMemFuncs7[2][16];
template <typename T>
int squeezePointer(T* ptr)
{
int truncated = (int)((u64)ptr);
assert((T*)((u64)truncated) == ptr);
return truncated;
}
u32 ReadVRAM9(u32 addr)
{
switch (addr & 0x00E00000)
{
case 0x00000000: return GPU::ReadVRAM_ABG<u32>(addr);
case 0x00200000: return GPU::ReadVRAM_BBG<u32>(addr);
case 0x00400000: return GPU::ReadVRAM_AOBJ<u32>(addr);
case 0x00600000: return GPU::ReadVRAM_BOBJ<u32>(addr);
default: return GPU::ReadVRAM_LCDC<u32>(addr);
}
}
void WriteVRAM9(u32 addr, u32 val)
{
switch (addr & 0x00E00000)
{
case 0x00000000: GPU::WriteVRAM_ABG<u32>(addr, val); return;
case 0x00200000: GPU::WriteVRAM_BBG<u32>(addr, val); return;
case 0x00400000: GPU::WriteVRAM_AOBJ<u32>(addr, val); return;
case 0x00600000: GPU::WriteVRAM_BOBJ<u32>(addr, val); return;
default: GPU::WriteVRAM_LCDC<u32>(addr, val); return;
}
}
/*
R11 - data to write (store only)
RSCRATCH2 - address
RSCRATCH3 - code cycles
*/
void* Compiler::Gen_MemoryRoutine9(bool store, int size, u32 region)
{
AlignCode4();
void* res = (void*)GetWritableCodePtr();
if (!store)
{
MOV(32, R(RSCRATCH), R(RSCRATCH2));
AND(32, R(RSCRATCH), Imm8(0x3));
SHL(32, R(RSCRATCH), Imm8(3));
// enter the shadow realm!
MOV(32, MDisp(RSP, 8), R(RSCRATCH));
}
// cycle counting!
// this is AddCycles_CDI
MOV(32, R(R10), R(RSCRATCH2));
SHR(32, R(R10), Imm8(12));
MOVZX(32, 8, R10, MComplex(RCPU, R10, SCALE_1, offsetof(ARMv5, MemTimings) + 2));
LEA(32, RSCRATCH, MComplex(RSCRATCH3, R10, SCALE_1, -6));
CMP(32, R(R10), R(RSCRATCH3));
CMOVcc(32, RSCRATCH3, R(R10), CC_G);
CMP(32, R(RSCRATCH), R(RSCRATCH3));
CMOVcc(32, RSCRATCH3, R(RSCRATCH), CC_G);
ADD(32, R(RCycles), R(RSCRATCH3));
if (!store)
XOR(32, R(RSCRATCH), R(RSCRATCH));
AND(32, R(RSCRATCH2), Imm32(~3));
{
MOV(32, R(RSCRATCH3), R(RSCRATCH2));
SUB(32, R(RSCRATCH2), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
CMP(32, R(RSCRATCH2), MDisp(RCPU, offsetof(ARMv5, DTCMSize)));
FixupBranch outsideDTCM = J_CC(CC_AE);
AND(32, R(RSCRATCH2), Imm32(0x3FFF));
if (!store)
{
MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, DTCM)));
MOV(32, R(ECX), MDisp(RSP, 8));
ROR_(32, R(RSCRATCH), R(ECX));
}
else
MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, DTCM)), R(R11));
RET();
SetJumpTarget(outsideDTCM);
MOV(32, R(RSCRATCH2), R(RSCRATCH3));
}
switch (region)
{
case 0x00000000:
case 0x01000000:
{
CMP(32, R(RSCRATCH2), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
FixupBranch insideITCM = J_CC(CC_B);
RET();
SetJumpTarget(insideITCM);
AND(32, R(RSCRATCH2), Imm32(0x7FFF));
if (!store)
MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, ITCM)));
else
{
MOV(32, MComplex(RCPU, RSCRATCH2, SCALE_1, offsetof(ARMv5, ITCM)), R(R11));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM9_ITCM)), Imm32(0));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM9_ITCM) + 8), Imm32(0));
}
}
break;
case 0x02000000:
AND(32, R(RSCRATCH2), Imm32(MAIN_RAM_SIZE - 1));
if (!store)
MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM)));
else
{
MOV(32, MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM)), R(R11));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM)), Imm32(0));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM) + 8), Imm32(0));
}
break;
case 0x03000000:
{
MOV(64, R(RSCRATCH3), M(&NDS::SWRAM_ARM9));
TEST(64, R(RSCRATCH3), R(RSCRATCH3));
FixupBranch notMapped = J_CC(CC_Z);
AND(32, R(RSCRATCH2), M(&NDS::SWRAM_ARM9Mask));
if (!store)
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH3));
else
{
MOV(32, MRegSum(RSCRATCH2, RSCRATCH3), R(R11));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM)), Imm32(0));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM) + 8), Imm32(0));
}
SetJumpTarget(notMapped);
}
break;
case 0x04000000:
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
if (!store)
{
ABI_PushRegistersAndAdjustStack({}, 8, 0);
ABI_CallFunction(NDS::ARM9IORead32);
ABI_PopRegistersAndAdjustStack({}, 8, 0);
}
else
{
MOV(32, R(ABI_PARAM2), R(R11));
JMP((u8*)NDS::ARM9IOWrite32, true);
}
break;
case 0x05000000:
{
MOV(32, R(RSCRATCH), Imm32(1<<1));
MOV(32, R(RSCRATCH3), Imm32(1<<9));
TEST(32, R(RSCRATCH2), Imm32(0x400));
CMOVcc(32, RSCRATCH, R(RSCRATCH3), CC_NZ);
TEST(16, R(RSCRATCH), M(&NDS::PowerControl9));
FixupBranch available = J_CC(CC_NZ);
RET();
SetJumpTarget(available);
AND(32, R(RSCRATCH2), Imm32(0x7FF));
if (!store)
MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(GPU::Palette)));
else
MOV(32, MDisp(RSCRATCH2, squeezePointer(GPU::Palette)), R(R11));
}
break;
case 0x06000000:
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
if (!store)
{
ABI_PushRegistersAndAdjustStack({}, 8);
ABI_CallFunction(ReadVRAM9);
ABI_PopRegistersAndAdjustStack({}, 8);
}
else
{
MOV(32, R(ABI_PARAM2), R(R11));
JMP((u8*)WriteVRAM9, true);
}
break;
case 0x07000000:
{
MOV(32, R(RSCRATCH), Imm32(1<<1));
MOV(32, R(RSCRATCH3), Imm32(1<<9));
TEST(32, R(RSCRATCH2), Imm32(0x400));
CMOVcc(32, RSCRATCH, R(RSCRATCH3), CC_NZ);
TEST(16, R(RSCRATCH), M(&NDS::PowerControl9));
FixupBranch available = J_CC(CC_NZ);
RET();
SetJumpTarget(available);
AND(32, R(RSCRATCH2), Imm32(0x7FF));
if (!store)
MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(GPU::OAM)));
else
MOV(32, MDisp(RSCRATCH2, squeezePointer(GPU::OAM)), R(R11));
}
break;
case 0x08000000:
case 0x09000000:
case 0x0A000000:
if (!store)
MOV(32, R(RSCRATCH), Imm32(0xFFFFFFFF));
break;
case 0xFF000000:
if (!store)
{
AND(32, R(RSCRATCH2), Imm32(0xFFF));
MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::ARM9BIOS)));
}
break;
default:
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
if (!store)
{
ABI_PushRegistersAndAdjustStack({}, 8, 0);
ABI_CallFunction(NDS::ARM9Read32);
ABI_PopRegistersAndAdjustStack({}, 8, 0);
}
else
{
MOV(32, R(ABI_PARAM2), R(R11));
JMP((u8*)NDS::ARM9Write32, true);
}
break;
}
if (!store)
{
MOV(32, R(ECX), MDisp(RSP, 8));
ROR_(32, R(RSCRATCH), R(ECX));
}
RET();
return res;
}
void* Compiler::Gen_MemoryRoutine7(bool store, int size, bool mainRAMCode, u32 region)
{
AlignCode4();
void* res = GetWritableCodePtr();
if (!store)
{
MOV(32, R(RSCRATCH), R(RSCRATCH2));
AND(32, R(RSCRATCH), Imm8(0x3));
SHL(32, R(RSCRATCH), Imm8(3));
// enter the shadow realm!
MOV(32, MDisp(RSP, 8), R(RSCRATCH));
}
// AddCycles_CDI
MOV(32, R(RSCRATCH), R(RSCRATCH2));
SHR(32, R(RSCRATCH), Imm8(15));
MOVZX(32, 8, RSCRATCH, MDisp(RSCRATCH, squeezePointer(NDS::ARM7MemTimings + 2)));
if ((region == 0x02000000 && mainRAMCode) || (region != 0x02000000 && !mainRAMCode))
{
if (!store && region != 0x02000000)
LEA(32, RSCRATCH3, MComplex(RSCRATCH, RSCRATCH3, SCALE_1, 1));
ADD(32, R(RCycles), R(RSCRATCH3));
}
else
{
if (!store)
ADD(32, R(region == 0x02000000 ? RSCRATCH2 : RSCRATCH), Imm8(1));
LEA(32, R10, MComplex(RSCRATCH, RSCRATCH3, SCALE_1, -3));
CMP(32, R(RSCRATCH3), R(RSCRATCH));
CMOVcc(32, RSCRATCH, R(RSCRATCH3), CC_G);
CMP(32, R(R10), R(RSCRATCH));
CMOVcc(32, RSCRATCH, R(R10), CC_G);
ADD(32, R(RCycles), R(RSCRATCH));
}
if (!store)
XOR(32, R(RSCRATCH), R(RSCRATCH));
AND(32, R(RSCRATCH2), Imm32(~3));
switch (region)
{
case 0x00000000:
if (!store) {
CMP(32, R(RSCRATCH2), Imm32(0x4000));
FixupBranch outsideBIOS1 = J_CC(CC_AE);
MOV(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARM, R[15])));
CMP(32, R(RSCRATCH), Imm32(0x4000));
FixupBranch outsideBIOS2 = J_CC(CC_AE);
MOV(32, R(RSCRATCH3), M(&NDS::ARM7BIOSProt));
CMP(32, R(RSCRATCH2), R(RSCRATCH3));
FixupBranch notDenied1 = J_CC(CC_AE);
CMP(32, R(RSCRATCH), R(RSCRATCH3));
FixupBranch notDenied2 = J_CC(CC_B);
SetJumpTarget(outsideBIOS2);
MOV(32, R(RSCRATCH), Imm32(0xFFFFFFFF));
RET();
SetJumpTarget(notDenied1);
SetJumpTarget(notDenied2);
MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::ARM7BIOS)));
MOV(32, R(ECX), MDisp(RSP, 8));
ROR_(32, R(RSCRATCH), R(ECX));
RET();
SetJumpTarget(outsideBIOS1);
}
break;
case 0x02000000:
AND(32, R(RSCRATCH2), Imm32(MAIN_RAM_SIZE - 1));
if (!store)
MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM)));
else
{
MOV(32, MDisp(RSCRATCH2, squeezePointer(NDS::MainRAM)), R(R11));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM)), Imm32(0));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.MainRAM) + 8), Imm32(0));
}
break;
case 0x03000000:
{
TEST(32, R(RSCRATCH2), Imm32(0x800000));
FixupBranch region = J_CC(CC_NZ);
MOV(64, R(RSCRATCH), M(&NDS::SWRAM_ARM7));
TEST(64, R(RSCRATCH), R(RSCRATCH));
FixupBranch notMapped = J_CC(CC_Z);
AND(32, R(RSCRATCH2), M(&NDS::SWRAM_ARM7Mask));
if (!store)
{
MOV(32, R(RSCRATCH), MRegSum(RSCRATCH, RSCRATCH2));
MOV(32, R(ECX), MDisp(RSP, 8));
ROR_(32, R(RSCRATCH), R(ECX));
}
else
{
MOV(32, MRegSum(RSCRATCH, RSCRATCH2), R(R11));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM)), Imm32(0));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.SWRAM) + 8), Imm32(0));
}
RET();
SetJumpTarget(region);
SetJumpTarget(notMapped);
AND(32, R(RSCRATCH2), Imm32(0xFFFF));
if (!store)
MOV(32, R(RSCRATCH), MDisp(RSCRATCH2, squeezePointer(NDS::ARM7WRAM)));
else
{
MOV(32, MDisp(RSCRATCH2, squeezePointer(NDS::ARM7WRAM)), R(R11));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM7_WRAM)), Imm32(0));
MOV(64, MScaled(RSCRATCH2, SCALE_4, squeezePointer(cache.ARM7_WRAM) + 8), Imm32(0));
}
}
break;
case 0x04000000:
{
TEST(32, R(RSCRATCH2), Imm32(0x800000));
FixupBranch region = J_CC(CC_NZ);
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
if (!store)
{
ABI_PushRegistersAndAdjustStack({}, 8);
ABI_CallFunction(NDS::ARM7IORead32);
ABI_PopRegistersAndAdjustStack({}, 8);
MOV(32, R(ECX), MDisp(RSP, 8));
ROR_(32, R(RSCRATCH), R(ECX));
RET();
}
else
{
MOV(32, R(ABI_PARAM2), R(R11));
JMP((u8*)NDS::ARM7IOWrite32, true);
}
SetJumpTarget(region);
if (!store)
{
ABI_PushRegistersAndAdjustStack({RSCRATCH2}, 8);
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
ABI_CallFunction(Wifi::Read);
ABI_PopRegistersAndAdjustStack({RSCRATCH2}, 8);
ADD(32, R(RSCRATCH2), Imm8(2));
ABI_PushRegistersAndAdjustStack({EAX}, 8);
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
ABI_CallFunction(Wifi::Read);
MOV(32, R(RSCRATCH2), R(EAX));
SHL(32, R(RSCRATCH2), Imm8(16));
ABI_PopRegistersAndAdjustStack({EAX}, 8);
OR(32, R(EAX), R(RSCRATCH2));
}
else
{
ABI_PushRegistersAndAdjustStack({RSCRATCH2, R11}, 8);
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
MOVZX(32, 16, ABI_PARAM2, R(R11));
ABI_CallFunction(Wifi::Write);
ABI_PopRegistersAndAdjustStack({RSCRATCH2, R11}, 8);
SHR(32, R(R11), Imm8(16));
ADD(32, R(RSCRATCH2), Imm8(2));
ABI_PushRegistersAndAdjustStack({RSCRATCH2, R11}, 8);
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
MOVZX(32, 16, ABI_PARAM2, R(R11));
ABI_CallFunction(Wifi::Write);
ABI_PopRegistersAndAdjustStack({RSCRATCH2, R11}, 8);
}
}
break;
case 0x06000000:
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
if (!store)
{
ABI_PushRegistersAndAdjustStack({}, 8);
ABI_CallFunction(GPU::ReadVRAM_ARM7<u32>);
ABI_PopRegistersAndAdjustStack({}, 8);
}
else
{
AND(32, R(ABI_PARAM1), Imm32(0x40000 - 1));
MOV(64, MScaled(ABI_PARAM1, SCALE_4, squeezePointer(cache.ARM7_WVRAM)), Imm32(0));
MOV(64, MScaled(ABI_PARAM1, SCALE_4, squeezePointer(cache.ARM7_WVRAM) + 8), Imm32(0));
MOV(32, R(ABI_PARAM2), R(R11));
JMP((u8*)GPU::WriteVRAM_ARM7<u32>, true);
}
break;
case 0x08000000:
case 0x09000000:
case 0x0A000000:
if (!store)
MOV(32, R(RSCRATCH), Imm32(0xFFFFFFFF));
break;
/*default:
ABI_PushRegistersAndAdjustStack({}, 8, 0);
MOV(32, R(ABI_PARAM1), R(RSCRATCH2));
ABI_CallFunction(NDS::ARM7Read32);
ABI_PopRegistersAndAdjustStack({}, 8, 0);
break;*/
}
if (!store)
{
MOV(32, R(ECX), MDisp(RSP, 8));
ROR_(32, R(RSCRATCH), R(ECX));
}
RET();
return res;
}
OpArg Compiler::A_Comp_GetMemWBOffset()
{
if (!(CurrentInstr.Instr & (1 << 25)))
return Imm32(CurrentInstr.Instr & 0xFFF);
else
{
int op = (CurrentInstr.Instr >> 5) & 0x3;
int amount = (CurrentInstr.Instr >> 7) & 0x1F;
OpArg rm = MapReg(CurrentInstr.A_Reg(0));
bool carryUsed;
return Comp_RegShiftImm(op, amount, rm, false, carryUsed);
}
}
void Compiler::A_Comp_MemWB()
{
OpArg rn = MapReg(CurrentInstr.A_Reg(16));
OpArg rd = MapReg(CurrentInstr.A_Reg(12));
bool load = CurrentInstr.Instr & (1 << 20);
MOV(32, R(RSCRATCH2), rn);
if (CurrentInstr.Instr & (1 << 24))
{
OpArg offset = A_Comp_GetMemWBOffset();
if (CurrentInstr.Instr & (1 << 23))
ADD(32, R(RSCRATCH2), offset);
else
SUB(32, R(RSCRATCH2), offset);
if (CurrentInstr.Instr & (1 << 21))
MOV(32, rn, R(RSCRATCH2));
}
u32 cycles = Num ? NDS::ARM7MemTimings[CurrentInstr.CodeCycles][2] : CurrentInstr.CodeCycles;
MOV(32, R(RSCRATCH3), Imm32(cycles));
MOV(32, R(RSCRATCH), R(RSCRATCH2));
SHR(32, R(RSCRATCH), Imm8(24));
AND(32, R(RSCRATCH), Imm8(0xF));
void** funcArray;
if (load)
funcArray = Num ? ReadMemFuncs7[CodeRegion == 0x02] : ReadMemFuncs9;
else
{
funcArray = Num ? WriteMemFuncs7[CodeRegion == 0x02] : WriteMemFuncs9;
MOV(32, R(R11), rd);
}
CALLptr(MScaled(RSCRATCH, SCALE_8, squeezePointer(funcArray)));
if (load)
MOV(32, R(RSCRATCH2), R(RSCRATCH));
if (!(CurrentInstr.Instr & (1 << 24)))
{
OpArg offset = A_Comp_GetMemWBOffset();
if (CurrentInstr.Instr & (1 << 23))
ADD(32, rn, offset);
else
SUB(32, rn, offset);
}
if (load)
MOV(32, rd, R(RSCRATCH2));
}
void Compiler::T_Comp_MemReg()
{
OpArg rd = MapReg(CurrentInstr.T_Reg(0));
OpArg rb = MapReg(CurrentInstr.T_Reg(3));
OpArg ro = MapReg(CurrentInstr.T_Reg(6));
int op = (CurrentInstr.Instr >> 10) & 0x3;
bool load = op & 0x2;
MOV(32, R(RSCRATCH2), rb);
ADD(32, R(RSCRATCH2), ro);
u32 cycles = Num ? NDS::ARM7MemTimings[CurrentInstr.CodeCycles][0] : (R15 & 0x2 ? 0 : CurrentInstr.CodeCycles);
MOV(32, R(RSCRATCH3), Imm32(cycles));
MOV(32, R(RSCRATCH), R(RSCRATCH2));
SHR(32, R(RSCRATCH), Imm8(24));
AND(32, R(RSCRATCH), Imm8(0xF));
void** funcArray;
if (load)
funcArray = Num ? ReadMemFuncs7[CodeRegion == 0x02] : ReadMemFuncs9;
else
{
funcArray = Num ? WriteMemFuncs7[CodeRegion == 0x02] : WriteMemFuncs9;
MOV(32, R(R11), rd);
}
CALLptr(MScaled(RSCRATCH, SCALE_8, squeezePointer(funcArray)));
if (load)
MOV(32, rd, R(RSCRATCH));
}
void Compiler::T_Comp_MemImm()
{
// TODO: aufräumen!!!
OpArg rd = MapReg(CurrentInstr.T_Reg(0));
OpArg rb = MapReg(CurrentInstr.T_Reg(3));
int op = (CurrentInstr.Instr >> 11) & 0x3;
u32 offset = ((CurrentInstr.Instr >> 6) & 0x1F) * 4;
bool load = op & 0x1;
LEA(32, RSCRATCH2, MDisp(rb.GetSimpleReg(), offset));
u32 cycles = Num ? NDS::ARM7MemTimings[CurrentInstr.CodeCycles][0] : (R15 & 0x2 ? 0 : CurrentInstr.CodeCycles);
MOV(32, R(RSCRATCH3), Imm32(cycles));
MOV(32, R(RSCRATCH), R(RSCRATCH2));
SHR(32, R(RSCRATCH), Imm8(24));
AND(32, R(RSCRATCH), Imm8(0xF));
void** funcArray;
if (load)
funcArray = Num ? ReadMemFuncs7[CodeRegion == 0x02] : ReadMemFuncs9;
else
{
funcArray = Num ? WriteMemFuncs7[CodeRegion == 0x02] : WriteMemFuncs9;
MOV(32, R(R11), rd);
}
CALLptr(MScaled(RSCRATCH, SCALE_8, squeezePointer(funcArray)));
if (load)
MOV(32, rd, R(RSCRATCH));
}
}

View File

@ -83,10 +83,10 @@ enum
ak_ALU(BIC),
ak_ALU(MVN),
ak_ALU(TST),
ak_ALU(TEQ),
ak_ALU(CMP),
ak_ALU(CMN),
ak_Test(TST),
ak_Test(TEQ),
ak_Test(CMP),
ak_Test(CMN),
ak_MUL,
ak_MLA,

View File

@ -53,6 +53,7 @@ add_library(core STATIC
ARMJIT.cpp
ARMJIT_x64/ARMJIT_Compiler.cpp
ARMJIT_x64/ARMJIT_ALU.cpp
ARMJIT_x64/ARMJIT_LoadStore.cpp
dolphin/CommonFuncs.cpp
dolphin/x64ABI.cpp

View File

@ -37,7 +37,8 @@
// xmm0-xmm15 use the upper 16 bits in the functions that push/pop registers.
#define ABI_ALL_CALLER_SAVED \
(BitSet32{RAX, RCX, RDX, R8, R9, R10, R11})
(BitSet32{RAX, RCX, RDX, R8, R9, R10, R11, XMM0 + 16, XMM1 + 16, XMM2 + 16, XMM3 + 16, \
XMM4 + 16, XMM5 + 16})
#else // 64-bit Unix / OS X
#define ABI_PARAM1 RDI