jit: thumb block transfer working
also pc and sp relative loads and some refactoring
This commit is contained in:
parent
2c44bf927c
commit
ff97211114
|
@ -12,13 +12,13 @@ namespace ARMJIT
|
||||||
{
|
{
|
||||||
|
|
||||||
template <typename T, typename Reg>
|
template <typename T, typename Reg>
|
||||||
class RegCache
|
class RegisterCache
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
RegCache()
|
RegisterCache()
|
||||||
{}
|
{}
|
||||||
|
|
||||||
RegCache(T* compiler, FetchedInstr instrs[], int instrsCount)
|
RegisterCache(T* compiler, FetchedInstr instrs[], int instrsCount)
|
||||||
: Compiler(compiler), Instrs(instrs), InstrsCount(instrsCount)
|
: Compiler(compiler), Instrs(instrs), InstrsCount(instrsCount)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 16; i++)
|
for (int i = 0; i < 16; i++)
|
|
@ -9,20 +9,20 @@ using namespace Gen;
|
||||||
namespace ARMJIT
|
namespace ARMJIT
|
||||||
{
|
{
|
||||||
template <>
|
template <>
|
||||||
const X64Reg RegCache<Compiler, X64Reg>::NativeRegAllocOrder[] =
|
const X64Reg RegisterCache<Compiler, X64Reg>::NativeRegAllocOrder[] =
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
RBX, RSI, RDI, R12, R13
|
RBX, RSI, RDI, R12, R13, R14
|
||||||
#else
|
#else
|
||||||
RBX, R12, R13
|
RBX, R12, R13, R14 // this is sad
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
template <>
|
template <>
|
||||||
const int RegCache<Compiler, X64Reg>::NativeRegsAvailable =
|
const int RegisterCache<Compiler, X64Reg>::NativeRegsAvailable =
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
5
|
6
|
||||||
#else
|
#else
|
||||||
3
|
4
|
||||||
#endif
|
#endif
|
||||||
;
|
;
|
||||||
|
|
||||||
|
@ -39,10 +39,47 @@ Compiler::Compiler()
|
||||||
MemoryFuncs7[i][j][1] = Gen_MemoryRoutine7(j, true, 8 << i);
|
MemoryFuncs7[i][j][1] = Gen_MemoryRoutine7(j, true, 8 << i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (int i = 0; i < 2; i++)
|
||||||
|
for (int j = 0; j < 2; j++)
|
||||||
|
{
|
||||||
|
MemoryFuncsSeq9[i][j] = Gen_MemoryRoutineSeq9(i, j);
|
||||||
|
MemoryFuncsSeq7[i][j][0] = Gen_MemoryRoutineSeq7(i, j, false);
|
||||||
|
MemoryFuncsSeq7[i][j][1] = Gen_MemoryRoutineSeq7(i, j, true);
|
||||||
|
}
|
||||||
|
|
||||||
ResetStart = GetWritableCodePtr();
|
ResetStart = GetWritableCodePtr();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void* Compiler::Gen_ChangeCPSRRoutine()
|
||||||
|
{
|
||||||
|
void* res = (void*)GetWritableCodePtr();
|
||||||
|
|
||||||
|
MOV(32, R(RSCRATCH), R(RCPSR));
|
||||||
|
AND(32, R(RSCRATCH), Imm8(0x1F));
|
||||||
|
CMP(32, R(RSCRATCH), Imm8(0x11));
|
||||||
|
FixupBranch fiq = J_CC(CC_E);
|
||||||
|
CMP(32, R(RSCRATCH), Imm8(0x12));
|
||||||
|
FixupBranch irq = J_CC(CC_E);
|
||||||
|
CMP(32, R(RSCRATCH), Imm8(0x13));
|
||||||
|
FixupBranch svc = J_CC(CC_E);
|
||||||
|
CMP(32, R(RSCRATCH), Imm8(0x17));
|
||||||
|
FixupBranch abt = J_CC(CC_E);
|
||||||
|
CMP(32, R(RSCRATCH), Imm8(0x1B));
|
||||||
|
FixupBranch und = J_CC(CC_E);
|
||||||
|
|
||||||
|
SetJumpTarget(fiq);
|
||||||
|
|
||||||
|
SetJumpTarget(irq);
|
||||||
|
|
||||||
|
SetJumpTarget(svc);
|
||||||
|
|
||||||
|
SetJumpTarget(abt);
|
||||||
|
|
||||||
|
SetJumpTarget(und);
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
DataRegion Compiler::ClassifyAddress(u32 addr)
|
DataRegion Compiler::ClassifyAddress(u32 addr)
|
||||||
{
|
{
|
||||||
if (Num == 0 && addr >= ((ARMv5*)CurCPU)->DTCMBase && addr < ((ARMv5*)CurCPU)->DTCMBase)
|
if (Num == 0 && addr >= ((ARMv5*)CurCPU)->DTCMBase && addr < ((ARMv5*)CurCPU)->DTCMBase)
|
||||||
|
@ -106,12 +143,11 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
||||||
ABI_PushRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS}, 8, 16);
|
ABI_PushRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS}, 8, 16);
|
||||||
|
|
||||||
MOV(64, R(RCPU), ImmPtr(cpu));
|
MOV(64, R(RCPU), ImmPtr(cpu));
|
||||||
XOR(32, R(RCycles), R(RCycles));
|
|
||||||
|
|
||||||
LoadCPSR();
|
LoadCPSR();
|
||||||
|
|
||||||
// TODO: this is ugly as a whole, do better
|
// TODO: this is ugly as a whole, do better
|
||||||
RegCache = ARMJIT::RegCache<Compiler, X64Reg>(this, instrs, instrsCount);
|
RegCache = RegisterCache<Compiler, X64Reg>(this, instrs, instrsCount);
|
||||||
|
|
||||||
for (int i = 0; i < instrsCount; i++)
|
for (int i = 0; i < instrsCount; i++)
|
||||||
{
|
{
|
||||||
|
@ -242,7 +278,7 @@ CompiledBlock Compiler::CompileBlock(ARM* cpu, FetchedInstr instrs[], int instrs
|
||||||
RegCache.Flush();
|
RegCache.Flush();
|
||||||
SaveCPSR();
|
SaveCPSR();
|
||||||
|
|
||||||
LEA(32, RAX, MDisp(RCycles, ConstantCycles));
|
MOV(32, R(RAX), Imm32(ConstantCycles));
|
||||||
|
|
||||||
ABI_PopRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS}, 8, 16);
|
ABI_PopRegistersAndAdjustStack({ABI_ALL_CALLEE_SAVED & ABI_ALL_GPRS}, 8, 16);
|
||||||
RET();
|
RET();
|
||||||
|
@ -306,18 +342,20 @@ CompileFunc Compiler::GetCompFunc(int kind)
|
||||||
NULL, NULL, NULL, NULL, NULL,
|
NULL, NULL, NULL, NULL, NULL,
|
||||||
// STR
|
// STR
|
||||||
A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
|
A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
|
||||||
|
//NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||||
// STRB
|
// STRB
|
||||||
|
//NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||||
A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
|
A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
|
||||||
// LDR
|
// LDR
|
||||||
|
//NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||||
A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
|
A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
|
||||||
// LDRB
|
// LDRB
|
||||||
|
//NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||||
A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
|
A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB, A_Comp_MemWB,
|
||||||
// STRH
|
// STRH
|
||||||
A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf,
|
A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf,
|
||||||
// LDRD
|
// LDRD, STRD never used by anything so they stay interpreted (by anything I mean the 5 games I checked)
|
||||||
NULL, NULL, NULL, NULL,
|
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
||||||
// STRD
|
|
||||||
NULL, NULL, NULL, NULL,
|
|
||||||
// LDRH
|
// LDRH
|
||||||
A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf,
|
A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf, A_Comp_MemHalf,
|
||||||
// LDRSB
|
// LDRSB
|
||||||
|
@ -360,10 +398,14 @@ CompileFunc Compiler::GetCompFunc(int kind)
|
||||||
T_Comp_MemImm, T_Comp_MemImm, T_Comp_MemImm, T_Comp_MemImm,
|
T_Comp_MemImm, T_Comp_MemImm, T_Comp_MemImm, T_Comp_MemImm,
|
||||||
// LDR/STR half imm offset
|
// LDR/STR half imm offset
|
||||||
T_Comp_MemImmHalf, T_Comp_MemImmHalf,
|
T_Comp_MemImmHalf, T_Comp_MemImmHalf,
|
||||||
// branch, etc.
|
// LDR/STR sp rel
|
||||||
NULL, NULL, NULL, NULL, NULL, NULL,
|
NULL, NULL,
|
||||||
NULL, NULL, NULL, NULL, NULL, NULL,
|
// PUSH/POP
|
||||||
NULL, NULL
|
NULL, NULL,
|
||||||
|
// LDMIA, STMIA
|
||||||
|
NULL, NULL,
|
||||||
|
NULL, NULL,
|
||||||
|
NULL, NULL, NULL, NULL, NULL, NULL
|
||||||
};
|
};
|
||||||
|
|
||||||
return Thumb ? T_Comp[kind] : A_Comp[kind];
|
return Thumb ? T_Comp[kind] : A_Comp[kind];
|
||||||
|
@ -376,7 +418,7 @@ void Compiler::Comp_AddCycles_C()
|
||||||
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
|
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles);
|
||||||
|
|
||||||
if (CurInstr.Cond() < 0xE)
|
if (CurInstr.Cond() < 0xE)
|
||||||
ADD(32, R(RCycles), Imm8(cycles));
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||||
else
|
else
|
||||||
ConstantCycles += cycles;
|
ConstantCycles += cycles;
|
||||||
}
|
}
|
||||||
|
@ -388,13 +430,15 @@ void Compiler::Comp_AddCycles_CI(u32 i)
|
||||||
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
|
: ((R15 & 0x2) ? 0 : CurInstr.CodeCycles)) + i;
|
||||||
|
|
||||||
if (CurInstr.Cond() < 0xE)
|
if (CurInstr.Cond() < 0xE)
|
||||||
ADD(32, R(RCycles), Imm8(cycles));
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm8(cycles));
|
||||||
else
|
else
|
||||||
ConstantCycles += cycles;
|
ConstantCycles += cycles;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
|
void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
|
||||||
{
|
{
|
||||||
|
// potentieller Bug: falls ein Register das noch gecacht ist, beim Modeswitch gespeichert
|
||||||
|
// wird der alte Wert gespeichert
|
||||||
SaveCPSR();
|
SaveCPSR();
|
||||||
|
|
||||||
MOV(64, R(ABI_PARAM1), R(RCPU));
|
MOV(64, R(ABI_PARAM1), R(RCPU));
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
#include "../dolphin/x64Emitter.h"
|
#include "../dolphin/x64Emitter.h"
|
||||||
|
|
||||||
#include "../ARMJIT.h"
|
#include "../ARMJIT.h"
|
||||||
#include "../ARMJIT_RegCache.h"
|
#include "../ARMJIT_RegisterCache.h"
|
||||||
|
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
|
@ -12,7 +12,6 @@ namespace ARMJIT
|
||||||
{
|
{
|
||||||
|
|
||||||
const Gen::X64Reg RCPU = Gen::RBP;
|
const Gen::X64Reg RCPU = Gen::RBP;
|
||||||
const Gen::X64Reg RCycles = Gen::R14;
|
|
||||||
const Gen::X64Reg RCPSR = Gen::R15;
|
const Gen::X64Reg RCPSR = Gen::R15;
|
||||||
|
|
||||||
const Gen::X64Reg RSCRATCH = Gen::EAX;
|
const Gen::X64Reg RSCRATCH = Gen::EAX;
|
||||||
|
@ -72,6 +71,7 @@ private:
|
||||||
|
|
||||||
void A_Comp_MemWB();
|
void A_Comp_MemWB();
|
||||||
void A_Comp_MemHalf();
|
void A_Comp_MemHalf();
|
||||||
|
void A_Comp_LDM_STM();
|
||||||
|
|
||||||
void T_Comp_ShiftImm();
|
void T_Comp_ShiftImm();
|
||||||
void T_Comp_AddSub_();
|
void T_Comp_AddSub_();
|
||||||
|
@ -86,8 +86,13 @@ private:
|
||||||
void T_Comp_MemImm();
|
void T_Comp_MemImm();
|
||||||
void T_Comp_MemRegHalf();
|
void T_Comp_MemRegHalf();
|
||||||
void T_Comp_MemImmHalf();
|
void T_Comp_MemImmHalf();
|
||||||
|
void T_Comp_LoadPCRel();
|
||||||
|
void T_Comp_MemSPRel();
|
||||||
|
void T_Comp_PUSH_POP();
|
||||||
|
void T_Comp_LDMIA_STMIA();
|
||||||
|
|
||||||
void Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size);
|
void Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size);
|
||||||
|
s32 Comp_MemAccessBlock(Gen::OpArg rb, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode);
|
||||||
|
|
||||||
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
|
void Comp_ArithTriOp(void (Compiler::*op)(int, const Gen::OpArg&, const Gen::OpArg&),
|
||||||
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
|
Gen::OpArg rd, Gen::OpArg rn, Gen::OpArg op2, bool carryUsed, int opFlags);
|
||||||
|
@ -100,6 +105,11 @@ private:
|
||||||
void* Gen_MemoryRoutine9(bool store, int size);
|
void* Gen_MemoryRoutine9(bool store, int size);
|
||||||
void* Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size);
|
void* Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size);
|
||||||
|
|
||||||
|
void* Gen_MemoryRoutineSeq9(bool store, bool preinc);
|
||||||
|
void* Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM);
|
||||||
|
|
||||||
|
void* Gen_ChangeCPSRRoutine();
|
||||||
|
|
||||||
Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed);
|
Gen::OpArg Comp_RegShiftImm(int op, int amount, Gen::OpArg rm, bool S, bool& carryUsed);
|
||||||
Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
|
Gen::OpArg Comp_RegShiftReg(int op, Gen::OpArg rs, Gen::OpArg rm, bool S, bool& carryUsed);
|
||||||
|
|
||||||
|
@ -122,11 +132,14 @@ private:
|
||||||
void* MemoryFuncs9[3][2];
|
void* MemoryFuncs9[3][2];
|
||||||
void* MemoryFuncs7[3][2][2];
|
void* MemoryFuncs7[3][2][2];
|
||||||
|
|
||||||
|
void* MemoryFuncsSeq9[2][2];
|
||||||
|
void* MemoryFuncsSeq7[2][2][2];
|
||||||
|
|
||||||
bool CPSRDirty = false;
|
bool CPSRDirty = false;
|
||||||
|
|
||||||
FetchedInstr CurInstr;
|
FetchedInstr CurInstr;
|
||||||
|
|
||||||
RegCache<Compiler, Gen::X64Reg> RegCache;
|
RegisterCache<Compiler, Gen::X64Reg> RegCache;
|
||||||
|
|
||||||
bool Thumb;
|
bool Thumb;
|
||||||
u32 Num;
|
u32 Num;
|
||||||
|
|
|
@ -3,16 +3,6 @@
|
||||||
#include "../GPU.h"
|
#include "../GPU.h"
|
||||||
#include "../Wifi.h"
|
#include "../Wifi.h"
|
||||||
|
|
||||||
namespace NDS
|
|
||||||
{
|
|
||||||
extern u8* SWRAM_ARM9;
|
|
||||||
extern u32 SWRAM_ARM9Mask;
|
|
||||||
extern u8* SWRAM_ARM7;
|
|
||||||
extern u32 SWRAM_ARM7Mask;
|
|
||||||
extern u8 ARM7WRAM[];
|
|
||||||
extern u16 ARM7BIOSProt;
|
|
||||||
}
|
|
||||||
|
|
||||||
using namespace Gen;
|
using namespace Gen;
|
||||||
|
|
||||||
namespace ARMJIT
|
namespace ARMJIT
|
||||||
|
@ -41,6 +31,49 @@ int squeezePointer(T* ptr)
|
||||||
store value - ABI_PARAM2 (a.k.a. RDX = RSCRATCH2 on Windows)
|
store value - ABI_PARAM2 (a.k.a. RDX = RSCRATCH2 on Windows)
|
||||||
code cycles - ABI_PARAM3
|
code cycles - ABI_PARAM3
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#define CALC_CYCLES_9(numC, numD, scratch) \
|
||||||
|
LEA(32, scratch, MComplex(numD, numC, SCALE_1, -6)); \
|
||||||
|
CMP(32, R(numC), R(numD)); \
|
||||||
|
CMOVcc(32, numD, R(numC), CC_G); \
|
||||||
|
CMP(32, R(numD), R(scratch)); \
|
||||||
|
CMOVcc(32, scratch, R(numD), CC_G); \
|
||||||
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch));
|
||||||
|
#define CALC_CYCLES_7_DATA_MAIN_RAM(numC, numD, scratch) \
|
||||||
|
if (codeMainRAM) \
|
||||||
|
{ \
|
||||||
|
LEA(32, scratch, MRegSum(numD, numC)); \
|
||||||
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch)); \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
if (!store) \
|
||||||
|
ADD(32, R(numC), Imm8(1)); \
|
||||||
|
LEA(32, scratch, MComplex(numD, numC, SCALE_1, -3)); \
|
||||||
|
CMP(32, R(numD), R(numC)); \
|
||||||
|
CMOVcc(32, numC, R(numD), CC_G); \
|
||||||
|
CMP(32, R(numC), R(scratch)); \
|
||||||
|
CMOVcc(32, scratch, R(numC), CC_G); \
|
||||||
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch)); \
|
||||||
|
}
|
||||||
|
#define CALC_CYCLES_7_DATA_NON_MAIN_RAM(numC, numD, scratch) \
|
||||||
|
if (codeMainRAM) \
|
||||||
|
{ \
|
||||||
|
if (!store) \
|
||||||
|
ADD(32, R(numD), Imm8(1)); \
|
||||||
|
LEA(32, scratch, MComplex(numD, numC, SCALE_1, -3)); \
|
||||||
|
CMP(32, R(numD), R(numC)); \
|
||||||
|
CMOVcc(32, numC, R(numD), CC_G); \
|
||||||
|
CMP(32, R(numC), R(scratch)); \
|
||||||
|
CMOVcc(32, scratch, R(numC), CC_G); \
|
||||||
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch)); \
|
||||||
|
} \
|
||||||
|
else \
|
||||||
|
{ \
|
||||||
|
LEA(32, scratch, MComplex(numD, numC, SCALE_1, store ? 0 : 1)); \
|
||||||
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(scratch)); \
|
||||||
|
}
|
||||||
|
|
||||||
void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
||||||
{
|
{
|
||||||
u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0));
|
u32 addressMask = ~(size == 32 ? 3 : (size == 16 ? 1 : 0));
|
||||||
|
@ -56,15 +89,10 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
||||||
FixupBranch insideITCM = J_CC(CC_B);
|
FixupBranch insideITCM = J_CC(CC_B);
|
||||||
|
|
||||||
// cycle counting!
|
// cycle counting!
|
||||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
MOV(32, R(ABI_PARAM4), R(ABI_PARAM1));
|
||||||
SHR(32, R(RSCRATCH), Imm8(12));
|
SHR(32, R(ABI_PARAM4), Imm8(12));
|
||||||
MOVZX(32, 8, RSCRATCH, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, MemTimings) + (size == 32 ? 2 : 0)));
|
MOVZX(32, 8, ABI_PARAM4, MComplex(RCPU, ABI_PARAM4, SCALE_4, offsetof(ARMv5, MemTimings) + (size == 32 ? 2 : 1)));
|
||||||
LEA(32, ABI_PARAM4, MComplex(RSCRATCH, ABI_PARAM3, SCALE_1, -6));
|
CALC_CYCLES_9(ABI_PARAM3, ABI_PARAM4, RSCRATCH)
|
||||||
CMP(32, R(ABI_PARAM3), R(RSCRATCH));
|
|
||||||
CMOVcc(32, RSCRATCH, R(ABI_PARAM3), CC_G);
|
|
||||||
CMP(32, R(ABI_PARAM4), R(RSCRATCH));
|
|
||||||
CMOVcc(32, RSCRATCH, R(ABI_PARAM4), CC_G);
|
|
||||||
ADD(32, R(RCycles), R(RSCRATCH));
|
|
||||||
|
|
||||||
if (store)
|
if (store)
|
||||||
{
|
{
|
||||||
|
@ -101,7 +129,7 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
||||||
}
|
}
|
||||||
|
|
||||||
SetJumpTarget(insideDTCM);
|
SetJumpTarget(insideDTCM);
|
||||||
ADD(32, R(RCycles), R(ABI_PARAM3));
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(ABI_PARAM3));
|
||||||
AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask));
|
AND(32, R(RSCRATCH), Imm32(0x3FFF & addressMask));
|
||||||
if (store)
|
if (store)
|
||||||
MOV(size, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM2));
|
MOV(size, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM2));
|
||||||
|
@ -120,7 +148,7 @@ void* Compiler::Gen_MemoryRoutine9(bool store, int size)
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
SetJumpTarget(insideITCM);
|
SetJumpTarget(insideITCM);
|
||||||
ADD(32, R(RCycles), R(ABI_PARAM3));
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(ABI_PARAM3));
|
||||||
MOV(32, R(ABI_PARAM3), R(ABI_PARAM1)); // free up ECX
|
MOV(32, R(ABI_PARAM3), R(ABI_PARAM1)); // free up ECX
|
||||||
AND(32, R(ABI_PARAM3), Imm32(0x7FFF & addressMask));
|
AND(32, R(ABI_PARAM3), Imm32(0x7FFF & addressMask));
|
||||||
if (store)
|
if (store)
|
||||||
|
@ -158,28 +186,13 @@ void* Compiler::Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size)
|
||||||
|
|
||||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||||
SHR(32, R(RSCRATCH), Imm8(15));
|
SHR(32, R(RSCRATCH), Imm8(15));
|
||||||
MOVZX(32, 8, ABI_PARAM4, MDisp(RSCRATCH, (size == 32 ? 2 : 0) + squeezePointer(NDS::ARM7MemTimings)));
|
MOVZX(32, 8, ABI_PARAM4, MScaled(RSCRATCH, SCALE_4, (size == 32 ? 2 : 0) + squeezePointer(NDS::ARM7MemTimings)));
|
||||||
|
|
||||||
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||||
AND(32, R(RSCRATCH), Imm32(0xFF000000));
|
AND(32, R(RSCRATCH), Imm32(0xFF000000));
|
||||||
CMP(32, R(RSCRATCH), Imm32(0x02000000));
|
CMP(32, R(RSCRATCH), Imm32(0x02000000));
|
||||||
FixupBranch outsideMainRAM = J_CC(CC_NE);
|
FixupBranch outsideMainRAM = J_CC(CC_NE);
|
||||||
if (codeMainRAM)
|
CALC_CYCLES_7_DATA_MAIN_RAM(ABI_PARAM3, ABI_PARAM4, RSCRATCH)
|
||||||
{
|
|
||||||
LEA(32, RSCRATCH, MRegSum(ABI_PARAM4, ABI_PARAM3));
|
|
||||||
ADD(32, R(RCycles), R(RSCRATCH));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (!store)
|
|
||||||
ADD(32, R(ABI_PARAM3), Imm8(1));
|
|
||||||
LEA(32, RSCRATCH, MComplex(ABI_PARAM4, ABI_PARAM3, SCALE_1, -3));
|
|
||||||
CMP(32, R(ABI_PARAM4), R(ABI_PARAM3));
|
|
||||||
CMOVcc(32, ABI_PARAM3, R(ABI_PARAM4), CC_G);
|
|
||||||
CMP(32, R(ABI_PARAM3), R(RSCRATCH));
|
|
||||||
CMOVcc(32, RSCRATCH, R(ABI_PARAM3), CC_G);
|
|
||||||
ADD(32, R(RCycles), R(RSCRATCH));
|
|
||||||
}
|
|
||||||
MOV(32, R(ABI_PARAM3), R(ABI_PARAM1));
|
MOV(32, R(ABI_PARAM3), R(ABI_PARAM1));
|
||||||
AND(32, R(ABI_PARAM3), Imm32((MAIN_RAM_SIZE - 1) & addressMask));
|
AND(32, R(ABI_PARAM3), Imm32((MAIN_RAM_SIZE - 1) & addressMask));
|
||||||
if (store)
|
if (store)
|
||||||
|
@ -205,22 +218,7 @@ void* Compiler::Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size)
|
||||||
RET();
|
RET();
|
||||||
|
|
||||||
SetJumpTarget(outsideMainRAM);
|
SetJumpTarget(outsideMainRAM);
|
||||||
if (codeMainRAM)
|
CALC_CYCLES_7_DATA_NON_MAIN_RAM(ABI_PARAM3, ABI_PARAM4, RSCRATCH)
|
||||||
{
|
|
||||||
if (!store)
|
|
||||||
ADD(32, R(ABI_PARAM4), Imm8(1));
|
|
||||||
LEA(32, RSCRATCH, MComplex(ABI_PARAM4, ABI_PARAM3, SCALE_1, -3));
|
|
||||||
CMP(32, R(ABI_PARAM4), R(ABI_PARAM3));
|
|
||||||
CMOVcc(32, ABI_PARAM3, R(ABI_PARAM4), CC_G);
|
|
||||||
CMP(32, R(ABI_PARAM3), R(RSCRATCH));
|
|
||||||
CMOVcc(32, RSCRATCH, R(ABI_PARAM3), CC_G);
|
|
||||||
ADD(32, R(RCycles), R(RSCRATCH));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
LEA(32, RSCRATCH, MComplex(ABI_PARAM4, ABI_PARAM3, SCALE_1, store ? 0 : 1));
|
|
||||||
ADD(32, R(RCycles), R(RSCRATCH));
|
|
||||||
}
|
|
||||||
if (store)
|
if (store)
|
||||||
{
|
{
|
||||||
if (size > 8)
|
if (size > 8)
|
||||||
|
@ -257,7 +255,189 @@ void* Compiler::Gen_MemoryRoutine7(bool store, bool codeMainRAM, int size)
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int size)
|
#define MEMORY_SEQ_WHILE_COND \
|
||||||
|
if (!store) \
|
||||||
|
MOV(32, currentElement, R(EAX));\
|
||||||
|
if (!preinc) \
|
||||||
|
ADD(32, R(ABI_PARAM1), Imm8(4)); \
|
||||||
|
\
|
||||||
|
SUB(32, R(ABI_PARAM3), Imm8(1)); \
|
||||||
|
J_CC(CC_NZ, repeat);
|
||||||
|
|
||||||
|
/*
|
||||||
|
ABI_PARAM1 address
|
||||||
|
ABI_PARAM2 address where registers are stored
|
||||||
|
ABI_PARAM3 how many values to read/write
|
||||||
|
ABI_PARAM4 code cycles
|
||||||
|
|
||||||
|
Dolphin x64CodeEmitter is my favourite assembler
|
||||||
|
*/
|
||||||
|
void* Compiler::Gen_MemoryRoutineSeq9(bool store, bool preinc)
|
||||||
|
{
|
||||||
|
const u8* zero = GetCodePtr();
|
||||||
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(ABI_PARAM4));
|
||||||
|
RET();
|
||||||
|
|
||||||
|
void* res = (void*)GetWritableCodePtr();
|
||||||
|
|
||||||
|
TEST(32, R(ABI_PARAM3), R(ABI_PARAM3));
|
||||||
|
J_CC(CC_Z, zero);
|
||||||
|
|
||||||
|
PUSH(ABI_PARAM3);
|
||||||
|
PUSH(ABI_PARAM4); // we need you later
|
||||||
|
|
||||||
|
const u8* repeat = GetCodePtr();
|
||||||
|
|
||||||
|
if (preinc)
|
||||||
|
ADD(32, R(ABI_PARAM1), Imm8(4));
|
||||||
|
|
||||||
|
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||||
|
SUB(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMBase)));
|
||||||
|
CMP(32, R(RSCRATCH), MDisp(RCPU, offsetof(ARMv5, DTCMSize)));
|
||||||
|
FixupBranch insideDTCM = J_CC(CC_B);
|
||||||
|
|
||||||
|
CMP(32, R(ABI_PARAM1), MDisp(RCPU, offsetof(ARMv5, ITCMSize)));
|
||||||
|
FixupBranch insideITCM = J_CC(CC_B);
|
||||||
|
|
||||||
|
OpArg currentElement = MComplex(ABI_PARAM2, ABI_PARAM3, SCALE_8, -8); // wasting stack space like a gangster
|
||||||
|
|
||||||
|
ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||||
|
AND(32, R(ABI_PARAM1), Imm8(~3));
|
||||||
|
if (store)
|
||||||
|
{
|
||||||
|
MOV(32, R(ABI_PARAM2), currentElement);
|
||||||
|
CALL((void*)NDS::ARM9Write32);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
CALL((void*)NDS::ARM9Read32);
|
||||||
|
ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||||
|
|
||||||
|
MEMORY_SEQ_WHILE_COND
|
||||||
|
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||||
|
SHR(32, R(RSCRATCH), Imm8(12));
|
||||||
|
MOVZX(32, 8, ABI_PARAM2, MComplex(RCPU, RSCRATCH, SCALE_4, 2 + offsetof(ARMv5, MemTimings)));
|
||||||
|
MOVZX(32, 8, RSCRATCH, MComplex(RCPU, RSCRATCH, SCALE_4, 3 + offsetof(ARMv5, MemTimings)));
|
||||||
|
|
||||||
|
FixupBranch finishIt1 = J();
|
||||||
|
|
||||||
|
SetJumpTarget(insideDTCM);
|
||||||
|
AND(32, R(RSCRATCH), Imm32(0x3FFF & ~3));
|
||||||
|
if (store)
|
||||||
|
{
|
||||||
|
MOV(32, R(ABI_PARAM4), currentElement);
|
||||||
|
MOV(32, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)), R(ABI_PARAM4));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, DTCM)));
|
||||||
|
|
||||||
|
MEMORY_SEQ_WHILE_COND
|
||||||
|
MOV(32, R(RSCRATCH), Imm32(1)); // sequential access time
|
||||||
|
MOV(32, R(ABI_PARAM2), Imm32(1)); // non sequential
|
||||||
|
FixupBranch finishIt2 = J();
|
||||||
|
|
||||||
|
SetJumpTarget(insideITCM);
|
||||||
|
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||||
|
AND(32, R(RSCRATCH), Imm32(0x7FFF & ~3));
|
||||||
|
if (store)
|
||||||
|
{
|
||||||
|
MOV(32, R(ABI_PARAM4), currentElement);
|
||||||
|
MOV(32, MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, ITCM)), R(ABI_PARAM4));
|
||||||
|
XOR(32, R(ABI_PARAM4), R(ABI_PARAM4));
|
||||||
|
MOV(64, MScaled(RSCRATCH, SCALE_4, squeezePointer(cache.ARM9_ITCM)), R(ABI_PARAM4));
|
||||||
|
MOV(64, MScaled(RSCRATCH, SCALE_4, squeezePointer(cache.ARM9_ITCM) + 8), R(ABI_PARAM4));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
MOV(32, R(RSCRATCH), MComplex(RCPU, RSCRATCH, SCALE_1, offsetof(ARMv5, ITCM)));
|
||||||
|
|
||||||
|
MEMORY_SEQ_WHILE_COND
|
||||||
|
MOV(32, R(RSCRATCH), Imm32(1));
|
||||||
|
MOV(32, R(ABI_PARAM2), Imm32(1));
|
||||||
|
|
||||||
|
SetJumpTarget(finishIt1);
|
||||||
|
SetJumpTarget(finishIt2);
|
||||||
|
|
||||||
|
POP(ABI_PARAM4);
|
||||||
|
POP(ABI_PARAM3);
|
||||||
|
|
||||||
|
CMP(32, R(ABI_PARAM3), Imm8(1));
|
||||||
|
FixupBranch skipSequential = J_CC(CC_E);
|
||||||
|
SUB(32, R(ABI_PARAM3), Imm8(1));
|
||||||
|
IMUL(32, R(ABI_PARAM3));
|
||||||
|
ADD(32, R(ABI_PARAM2), R(RSCRATCH));
|
||||||
|
SetJumpTarget(skipSequential);
|
||||||
|
|
||||||
|
CALC_CYCLES_9(ABI_PARAM4, ABI_PARAM2, RSCRATCH)
|
||||||
|
RET();
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
void* Compiler::Gen_MemoryRoutineSeq7(bool store, bool preinc, bool codeMainRAM)
|
||||||
|
{
|
||||||
|
const u8* zero = GetCodePtr();
|
||||||
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), R(ABI_PARAM4));
|
||||||
|
RET();
|
||||||
|
|
||||||
|
void* res = (void*)GetWritableCodePtr();
|
||||||
|
|
||||||
|
TEST(32, R(ABI_PARAM3), R(ABI_PARAM3));
|
||||||
|
J_CC(CC_Z, zero);
|
||||||
|
|
||||||
|
PUSH(ABI_PARAM3);
|
||||||
|
PUSH(ABI_PARAM4); // we need you later
|
||||||
|
|
||||||
|
const u8* repeat = GetCodePtr();
|
||||||
|
|
||||||
|
if (preinc)
|
||||||
|
ADD(32, R(ABI_PARAM1), Imm8(4));
|
||||||
|
|
||||||
|
OpArg currentElement = MComplex(ABI_PARAM2, ABI_PARAM3, SCALE_8, -8);
|
||||||
|
|
||||||
|
ABI_PushRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||||
|
AND(32, R(ABI_PARAM1), Imm8(~3));
|
||||||
|
if (store)
|
||||||
|
{
|
||||||
|
MOV(32, R(ABI_PARAM2), currentElement);
|
||||||
|
CALL((void*)NDS::ARM7Write32);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
CALL((void*)NDS::ARM7Read32);
|
||||||
|
ABI_PopRegistersAndAdjustStack({ABI_PARAM1, ABI_PARAM2, ABI_PARAM3}, 8);
|
||||||
|
|
||||||
|
MEMORY_SEQ_WHILE_COND
|
||||||
|
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||||
|
SHR(32, R(RSCRATCH), Imm8(15));
|
||||||
|
MOVZX(32, 8, ABI_PARAM2, MScaled(RSCRATCH, SCALE_4, 2 + squeezePointer(NDS::ARM7MemTimings)));
|
||||||
|
MOVZX(32, 8, RSCRATCH, MScaled(RSCRATCH, SCALE_4, 3 + squeezePointer(NDS::ARM7MemTimings)));
|
||||||
|
|
||||||
|
POP(ABI_PARAM4);
|
||||||
|
POP(ABI_PARAM3);
|
||||||
|
|
||||||
|
CMP(32, R(ABI_PARAM3), Imm8(1));
|
||||||
|
FixupBranch skipSequential = J_CC(CC_E);
|
||||||
|
SUB(32, R(ABI_PARAM3), Imm8(1));
|
||||||
|
IMUL(32, R(ABI_PARAM3));
|
||||||
|
ADD(32, R(ABI_PARAM2), R(RSCRATCH));
|
||||||
|
SetJumpTarget(skipSequential);
|
||||||
|
|
||||||
|
MOV(32, R(RSCRATCH), R(ABI_PARAM1));
|
||||||
|
AND(32, R(RSCRATCH), Imm32(0xFF000000));
|
||||||
|
CMP(32, R(RSCRATCH), Imm32(0x02000000));
|
||||||
|
FixupBranch outsideMainRAM = J_CC(CC_NE);
|
||||||
|
CALC_CYCLES_7_DATA_MAIN_RAM(ABI_PARAM4, ABI_PARAM2, RSCRATCH)
|
||||||
|
RET();
|
||||||
|
|
||||||
|
SetJumpTarget(outsideMainRAM);
|
||||||
|
CALC_CYCLES_7_DATA_NON_MAIN_RAM(ABI_PARAM4, ABI_PARAM2, RSCRATCH)
|
||||||
|
RET();
|
||||||
|
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef CALC_CYCLES_9
|
||||||
|
#undef MEMORY_SEQ_WHILE_COND
|
||||||
|
|
||||||
|
void Compiler::Comp_MemAccess(OpArg rd, bool signExtend, bool store, int size)
|
||||||
{
|
{
|
||||||
if (store)
|
if (store)
|
||||||
MOV(32, R(ABI_PARAM2), rd);
|
MOV(32, R(ABI_PARAM2), rd);
|
||||||
|
@ -278,6 +458,129 @@ void Compiler::Comp_MemAccess(Gen::OpArg rd, bool signExtend, bool store, int si
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
s32 Compiler::Comp_MemAccessBlock(OpArg rb, BitSet16 regs, bool store, bool preinc, bool decrement, bool usermode)
|
||||||
|
{
|
||||||
|
int regsCount = regs.Count();
|
||||||
|
|
||||||
|
const u8 userModeOffsets[] =
|
||||||
|
{
|
||||||
|
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
|
||||||
|
offsetof(ARM, R[12]), offsetof(ARM, R[13]), offsetof(ARM, R[14]), 0,
|
||||||
|
|
||||||
|
offsetof(ARM, R_FIQ[0]), offsetof(ARM, R_FIQ[1]), offsetof(ARM, R_FIQ[2]), offsetof(ARM, R_FIQ[3]),
|
||||||
|
offsetof(ARM, R_FIQ[4]), offsetof(ARM, R_FIQ[5]), offsetof(ARM, R_FIQ[6]), 0,
|
||||||
|
|
||||||
|
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
|
||||||
|
offsetof(ARM, R[12]), offsetof(ARM, R_IRQ[13]), offsetof(ARM, R_IRQ[14]), 0,
|
||||||
|
|
||||||
|
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
|
||||||
|
offsetof(ARM, R[12]), offsetof(ARM, R_SVC[13]), offsetof(ARM, R_SVC[14]), 0,
|
||||||
|
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
|
||||||
|
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
|
||||||
|
offsetof(ARM, R[12]), offsetof(ARM, R_ABT[13]), offsetof(ARM, R_ABT[14]), 0,
|
||||||
|
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
|
||||||
|
offsetof(ARM, R[8]), offsetof(ARM, R[9]), offsetof(ARM, R[10]), offsetof(ARM, R[11]),
|
||||||
|
offsetof(ARM, R[12]), offsetof(ARM, R_UND[13]), offsetof(ARM, R_UND[14]), 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
if (decrement)
|
||||||
|
{
|
||||||
|
MOV_sum(32, ABI_PARAM1, rb, Imm32(-regsCount * 4));
|
||||||
|
preinc = !preinc;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
MOV(32, R(ABI_PARAM1), rb);
|
||||||
|
|
||||||
|
MOV(32, R(ABI_PARAM3), Imm32(regsCount));
|
||||||
|
u32 cycles = Num
|
||||||
|
? NDS::ARM7MemTimings[CurInstr.CodeCycles][Thumb ? 0 : 2]
|
||||||
|
: (R15 & 0x2 ? 0 : CurInstr.CodeCycles);
|
||||||
|
MOV(32, R(ABI_PARAM4), Imm32(cycles));
|
||||||
|
if (!store)
|
||||||
|
{
|
||||||
|
SUB(32, R(RSP), regsCount < 16 ? Imm8(regsCount * 8) : Imm32(regsCount * 8));
|
||||||
|
MOV(64, R(ABI_PARAM2), R(RSP));
|
||||||
|
|
||||||
|
CALL(Num == 0
|
||||||
|
? MemoryFuncsSeq9[0][preinc]
|
||||||
|
: MemoryFuncsSeq7[0][preinc][CodeRegion == 0x02]);
|
||||||
|
|
||||||
|
for (int reg = 15; reg >= 0; reg--)
|
||||||
|
{
|
||||||
|
if (regs[reg])
|
||||||
|
{
|
||||||
|
if (usermode && reg >= 8 && reg < 15)
|
||||||
|
{
|
||||||
|
MOV(32, R(RSCRATCH2), R(RCPSR));
|
||||||
|
AND(32, R(RSCRATCH2), Imm8(0x1F));
|
||||||
|
// (RSCRATCH2 - 0x11) * 8 + squeezePointer(userModeOffsets) + (reg - 8), algebra is great!
|
||||||
|
MOVZX(32, 8, RSCRATCH2, MScaled(RSCRATCH2, SCALE_8, squeezePointer(userModeOffsets) - 0x11 * 8 + (reg - 8)));
|
||||||
|
POP(RSCRATCH);
|
||||||
|
MOV(32, MRegSum(RCPU, RSCRATCH2), R(RSCRATCH));
|
||||||
|
}
|
||||||
|
else if (RegCache.Mapping[reg] == INVALID_REG)
|
||||||
|
{
|
||||||
|
assert(reg != 15);
|
||||||
|
|
||||||
|
POP(RSCRATCH);
|
||||||
|
SaveReg(reg, RSCRATCH);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (reg != 15)
|
||||||
|
RegCache.DirtyRegs |= (1 << reg);
|
||||||
|
POP(MapReg(reg).GetSimpleReg());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (regs[15])
|
||||||
|
{
|
||||||
|
if (Num == 1)
|
||||||
|
OR(32, MapReg(15), Imm8(1));
|
||||||
|
Comp_JumpTo(MapReg(15).GetSimpleReg(), usermode);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (int reg : regs)
|
||||||
|
{
|
||||||
|
if (usermode && reg >= 8 && reg < 15)
|
||||||
|
{
|
||||||
|
MOV(32, R(RSCRATCH), R(RCPSR));
|
||||||
|
AND(32, R(RSCRATCH), Imm8(0x1F));
|
||||||
|
// (RSCRATCH2 - 0x11) * 8 + squeezePointer(userModeOffsets) + (reg - 8), algebra is great!
|
||||||
|
MOVZX(32, 8, RSCRATCH, MScaled(RSCRATCH, SCALE_8, squeezePointer(userModeOffsets) - 0x11 * 8 + (reg - 8)));
|
||||||
|
MOV(32, R(RSCRATCH), MRegSum(RCPU, RSCRATCH));
|
||||||
|
PUSH(RSCRATCH);
|
||||||
|
}
|
||||||
|
else if (RegCache.Mapping[reg] == INVALID_REG)
|
||||||
|
{
|
||||||
|
LoadReg(reg, RSCRATCH);
|
||||||
|
PUSH(RSCRATCH);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
PUSH(MapReg(reg).GetSimpleReg());
|
||||||
|
}
|
||||||
|
MOV(64, R(ABI_PARAM2), R(RSP));
|
||||||
|
|
||||||
|
CALL(Num == 0
|
||||||
|
? MemoryFuncsSeq9[1][preinc]
|
||||||
|
: MemoryFuncsSeq7[1][preinc][CodeRegion == 0x02]);
|
||||||
|
|
||||||
|
ADD(32, R(RSP), regsCount < 16 ? Imm8(regsCount * 8) : Imm32(regsCount * 8));
|
||||||
|
}
|
||||||
|
|
||||||
|
return (regsCount * 4) * (decrement ? -1 : 1);
|
||||||
|
}
|
||||||
|
|
||||||
OpArg Compiler::A_Comp_GetMemWBOffset()
|
OpArg Compiler::A_Comp_GetMemWBOffset()
|
||||||
{
|
{
|
||||||
if (!(CurInstr.Instr & (1 << 25)))
|
if (!(CurInstr.Instr & (1 << 25)))
|
||||||
|
@ -354,6 +657,25 @@ void Compiler::A_Comp_MemHalf()
|
||||||
? Imm32(CurInstr.Instr & 0xF | ((CurInstr.Instr >> 4) & 0xF0))
|
? Imm32(CurInstr.Instr & 0xF | ((CurInstr.Instr >> 4) & 0xF0))
|
||||||
: MapReg(CurInstr.A_Reg(0));
|
: MapReg(CurInstr.A_Reg(0));
|
||||||
|
|
||||||
|
int op = (CurInstr.Instr >> 5) & 0x3;
|
||||||
|
bool load = CurInstr.Instr & (1 << 20);
|
||||||
|
|
||||||
|
bool signExtend = false;
|
||||||
|
int size;
|
||||||
|
if (!load)
|
||||||
|
{
|
||||||
|
size = op == 1 ? 16 : 32;
|
||||||
|
load = op == 2;
|
||||||
|
}
|
||||||
|
else if (load)
|
||||||
|
{
|
||||||
|
size = op == 2 ? 8 : 16;
|
||||||
|
signExtend = op > 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size == 32 && Num == 1)
|
||||||
|
return; // NOP
|
||||||
|
|
||||||
if (CurInstr.Instr & (1 << 24))
|
if (CurInstr.Instr & (1 << 24))
|
||||||
{
|
{
|
||||||
if (CurInstr.Instr & (1 << 23))
|
if (CurInstr.Instr & (1 << 23))
|
||||||
|
@ -370,19 +692,6 @@ void Compiler::A_Comp_MemHalf()
|
||||||
else
|
else
|
||||||
MOV(32, R(ABI_PARAM1), rn);
|
MOV(32, R(ABI_PARAM1), rn);
|
||||||
|
|
||||||
int op = (CurInstr.Instr >> 5) & 0x3;
|
|
||||||
bool load = CurInstr.Instr & (1 << 20);
|
|
||||||
|
|
||||||
bool signExtend = false;
|
|
||||||
int size;
|
|
||||||
if (!load && op == 1)
|
|
||||||
size = 16;
|
|
||||||
else if (load)
|
|
||||||
{
|
|
||||||
size = op == 2 ? 8 : 16;
|
|
||||||
signExtend = op > 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!(CurInstr.Instr & (1 << 24)))
|
if (!(CurInstr.Instr & (1 << 24)))
|
||||||
{
|
{
|
||||||
if (CurInstr.Instr & (1 << 23))
|
if (CurInstr.Instr & (1 << 23))
|
||||||
|
@ -412,6 +721,24 @@ void Compiler::T_Comp_MemReg()
|
||||||
Comp_MemAccess(rd, false, !load, byte ? 8 : 32);
|
Comp_MemAccess(rd, false, !load, byte ? 8 : 32);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Compiler::A_Comp_LDM_STM()
|
||||||
|
{
|
||||||
|
BitSet16 regs(CurInstr.Instr & 0xFFFF);
|
||||||
|
|
||||||
|
bool load = (CurInstr.Instr >> 20) & 1;
|
||||||
|
bool pre = (CurInstr.Instr >> 24) & 1;
|
||||||
|
bool add = (CurInstr.Instr >> 23) & 1;
|
||||||
|
bool writeback = (CurInstr.Instr >> 21) & 1;
|
||||||
|
bool usermode = (CurInstr.Instr >> 22) & 1;
|
||||||
|
|
||||||
|
OpArg rn = MapReg(CurInstr.A_Reg(16));
|
||||||
|
|
||||||
|
s32 offset = Comp_MemAccessBlock(rn, regs, !load, pre, !add, false);
|
||||||
|
|
||||||
|
if (writeback)
|
||||||
|
ADD(32, rn, offset >= INT8_MIN && offset < INT8_MAX ? Imm8(offset) : Imm32(offset));
|
||||||
|
}
|
||||||
|
|
||||||
void Compiler::T_Comp_MemImm()
|
void Compiler::T_Comp_MemImm()
|
||||||
{
|
{
|
||||||
OpArg rd = MapReg(CurInstr.T_Reg(0));
|
OpArg rd = MapReg(CurInstr.T_Reg(0));
|
||||||
|
@ -456,4 +783,56 @@ void Compiler::T_Comp_MemImmHalf()
|
||||||
Comp_MemAccess(rd, false, !load, 16);
|
Comp_MemAccess(rd, false, !load, 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Compiler::T_Comp_LoadPCRel()
|
||||||
|
{
|
||||||
|
OpArg rd = MapReg(CurInstr.T_Reg(8));
|
||||||
|
u32 addr = (R15 & ~0x2) + ((CurInstr.Instr & 0xFF) << 2);
|
||||||
|
|
||||||
|
// hopefully this doesn't break
|
||||||
|
u32 val; CurCPU->DataRead32(addr, &val);
|
||||||
|
MOV(32, rd, Imm32(val));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compiler::T_Comp_MemSPRel()
|
||||||
|
{
|
||||||
|
u32 offset = (CurInstr.Instr & 0xFF) * 4;
|
||||||
|
OpArg rd = MapReg(CurInstr.T_Reg(8));
|
||||||
|
bool load = CurInstr.Instr & (1 << 11);
|
||||||
|
|
||||||
|
LEA(32, ABI_PARAM1, MDisp(MapReg(13).GetSimpleReg(), offset));
|
||||||
|
|
||||||
|
Comp_MemAccess(rd, false, !load, 32);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compiler::T_Comp_PUSH_POP()
|
||||||
|
{
|
||||||
|
bool load = CurInstr.Instr & (1 << 11);
|
||||||
|
BitSet16 regs(CurInstr.Instr & 0xFF);
|
||||||
|
if (CurInstr.Instr & (1 << 8))
|
||||||
|
{
|
||||||
|
if (load)
|
||||||
|
regs[15] = true;
|
||||||
|
else
|
||||||
|
regs[14] = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
OpArg sp = MapReg(13);
|
||||||
|
|
||||||
|
s32 offset = Comp_MemAccessBlock(sp, regs, !load, !load, !load, false);
|
||||||
|
|
||||||
|
ADD(32, sp, Imm8(offset)); // offset will be always be in range since PUSH accesses 9 regs max
|
||||||
|
}
|
||||||
|
|
||||||
|
void Compiler::T_Comp_LDMIA_STMIA()
|
||||||
|
{
|
||||||
|
BitSet16 regs(CurInstr.Instr & 0xFF);
|
||||||
|
OpArg rb = MapReg(CurInstr.T_Reg(8));
|
||||||
|
bool load = CurInstr.Instr & (1 << 11);
|
||||||
|
|
||||||
|
s32 offset = Comp_MemAccessBlock(rb, regs, !load, false, false, false);
|
||||||
|
|
||||||
|
if (!load || !regs[CurInstr.T_Reg(8)])
|
||||||
|
ADD(32, rb, Imm8(offset));
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
|
@ -25,9 +25,7 @@ enum {
|
||||||
|
|
||||||
A_Link = 1 << 10,
|
A_Link = 1 << 10,
|
||||||
|
|
||||||
A_LDMSTM = 1 << 11,
|
A_UnkOnARM7 = 1 << 11,
|
||||||
|
|
||||||
A_ARM9Only = 1 << 12,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#define A_BIOP A_Read16
|
#define A_BIOP A_Read16
|
||||||
|
@ -97,12 +95,12 @@ const u32 A_SMULWy = A_Write16 | A_Read0 | A_Read8 | ak(ak_SMULWy);
|
||||||
const u32 A_SMLALxy = A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLALxy);
|
const u32 A_SMLALxy = A_Write16 | A_Write12 | A_Read16 | A_Read12 | A_Read0 | A_Read8 | ak(ak_SMLALxy);
|
||||||
const u32 A_SMULxy = A_Write16 | A_Read0 | A_Read8 | ak(ak_SMULxy);
|
const u32 A_SMULxy = A_Write16 | A_Read0 | A_Read8 | ak(ak_SMULxy);
|
||||||
|
|
||||||
const u32 A_CLZ = A_Write12 | A_Read0 | A_ARM9Only | ak(ak_CLZ);
|
const u32 A_CLZ = A_Write12 | A_Read0 | A_UnkOnARM7 | ak(ak_CLZ);
|
||||||
|
|
||||||
const u32 A_QADD = A_Write12 | A_Read0 | A_Read16 | A_ARM9Only | ak(ak_QADD);
|
const u32 A_QADD = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QADD);
|
||||||
const u32 A_QSUB = A_Write12 | A_Read0 | A_Read16 | A_ARM9Only | ak(ak_QSUB);
|
const u32 A_QSUB = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QSUB);
|
||||||
const u32 A_QDADD = A_Write12 | A_Read0 | A_Read16 | A_ARM9Only | ak(ak_QDADD);
|
const u32 A_QDADD = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QDADD);
|
||||||
const u32 A_QDSUB = A_Write12 | A_Read0 | A_Read16 | A_ARM9Only | ak(ak_QDSUB);
|
const u32 A_QDSUB = A_Write12 | A_Read0 | A_Read16 | A_UnkOnARM7 | ak(ak_QDSUB);
|
||||||
|
|
||||||
#define A_LDR A_Write12
|
#define A_LDR A_Write12
|
||||||
#define A_STR A_Read12
|
#define A_STR A_Read12
|
||||||
|
@ -144,8 +142,8 @@ A_IMPLEMENT_HD_LDRSTR(LDRSH,LDR)
|
||||||
const u32 A_SWP = A_Write12 | A_Read16 | A_Read0 | ak(ak_SWP);
|
const u32 A_SWP = A_Write12 | A_Read16 | A_Read0 | ak(ak_SWP);
|
||||||
const u32 A_SWPB = A_Write12 | A_Read16 | A_Read0 | ak(ak_SWPB);
|
const u32 A_SWPB = A_Write12 | A_Read16 | A_Read0 | ak(ak_SWPB);
|
||||||
|
|
||||||
const u32 A_LDM = A_Read16 | A_LDMSTM | ak(ak_LDM);
|
const u32 A_LDM = A_Read16 | A_MemWriteback | ak(ak_LDM);
|
||||||
const u32 A_STM = A_Read16 | A_LDMSTM | ak(ak_STM);
|
const u32 A_STM = A_Read16 | A_MemWriteback | ak(ak_STM);
|
||||||
|
|
||||||
const u32 A_B = A_BranchAlways | ak(ak_B);
|
const u32 A_B = A_BranchAlways | ak(ak_B);
|
||||||
const u32 A_BL = A_BranchAlways | A_Link | ak(ak_BL);
|
const u32 A_BL = A_BranchAlways | A_Link | ak(ak_BL);
|
||||||
|
@ -154,11 +152,11 @@ const u32 A_BX = A_BranchAlways | A_Read0 | ak(ak_BX);
|
||||||
const u32 A_BLX_REG = A_BranchAlways | A_Link | A_Read0 | ak(ak_BLX_REG);
|
const u32 A_BLX_REG = A_BranchAlways | A_Link | A_Read0 | ak(ak_BLX_REG);
|
||||||
|
|
||||||
const u32 A_UNK = A_BranchAlways | A_Link | ak(ak_UNK);
|
const u32 A_UNK = A_BranchAlways | A_Link | ak(ak_UNK);
|
||||||
const u32 A_MSR_IMM = A_ARM9Only | ak(ak_MSR_IMM);
|
const u32 A_MSR_IMM = A_UnkOnARM7 | ak(ak_MSR_IMM);
|
||||||
const u32 A_MSR_REG = A_Read0 | A_ARM9Only | ak(ak_MSR_REG);
|
const u32 A_MSR_REG = A_Read0 | A_UnkOnARM7 | ak(ak_MSR_REG);
|
||||||
const u32 A_MRS = A_Write12 | A_ARM9Only | ak(ak_MRS);
|
const u32 A_MRS = A_Write12 | A_UnkOnARM7 | ak(ak_MRS);
|
||||||
const u32 A_MCR = A_Read12 | A_ARM9Only | ak(ak_MCR);
|
const u32 A_MCR = A_Read12 | A_UnkOnARM7 | ak(ak_MCR);
|
||||||
const u32 A_MRC = A_Write12 | A_ARM9Only | ak(ak_MRC);
|
const u32 A_MRC = A_Write12 | A_UnkOnARM7 | ak(ak_MRC);
|
||||||
const u32 A_SVC = A_BranchAlways | A_Link | ak(ak_SVC);
|
const u32 A_SVC = A_BranchAlways | A_Link | ak(ak_SVC);
|
||||||
|
|
||||||
// THUMB
|
// THUMB
|
||||||
|
@ -249,7 +247,7 @@ const u32 T_LDRH_IMM = T_Write0 | T_Read3 | tk(tk_LDRH_IMM);
|
||||||
const u32 T_STR_SPREL = T_Read8 | T_ReadR13 | tk(tk_STR_SPREL);
|
const u32 T_STR_SPREL = T_Read8 | T_ReadR13 | tk(tk_STR_SPREL);
|
||||||
const u32 T_LDR_SPREL = T_Write8 | T_ReadR13 | tk(tk_LDR_SPREL);
|
const u32 T_LDR_SPREL = T_Write8 | T_ReadR13 | tk(tk_LDR_SPREL);
|
||||||
|
|
||||||
const u32 T_PUSH = T_ReadR15 | T_ReadR13 | T_WriteR13 | tk(tk_PUSH);
|
const u32 T_PUSH = T_ReadR13 | T_WriteR13 | tk(tk_PUSH);
|
||||||
const u32 T_POP = T_PopPC | T_ReadR13 | T_WriteR13 | tk(tk_POP);
|
const u32 T_POP = T_PopPC | T_ReadR13 | T_WriteR13 | tk(tk_POP);
|
||||||
|
|
||||||
const u32 T_LDMIA = T_Read8 | T_Write8 | tk(tk_LDMIA);
|
const u32 T_LDMIA = T_Read8 | T_Write8 | tk(tk_LDMIA);
|
||||||
|
@ -320,8 +318,10 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
||||||
if (num == 0 && (instr & 0xFE000000) == 0xFA000000)
|
if (num == 0 && (instr & 0xFE000000) == 0xFA000000)
|
||||||
data = A_BLX_IMM;
|
data = A_BLX_IMM;
|
||||||
|
|
||||||
if (data & A_ARM9Only && num != 0)
|
if (data & A_UnkOnARM7 && num != 0)
|
||||||
data |= A_BranchAlways | A_Link;
|
data = A_UNK;
|
||||||
|
|
||||||
|
res.Kind = (data >> 13) & 0x1FF;
|
||||||
|
|
||||||
if (data & A_Read0)
|
if (data & A_Read0)
|
||||||
res.SrcRegs |= 1 << (instr & 0xF);
|
res.SrcRegs |= 1 << (instr & 0xF);
|
||||||
|
@ -360,14 +360,8 @@ Info Decode(bool thumb, u32 num, u32 instr)
|
||||||
res.SrcRegs |= 1 << 15;
|
res.SrcRegs |= 1 << 15;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (data & A_LDMSTM)
|
if (res.Kind == ak_LDM)
|
||||||
{
|
res.DstRegs |= instr & (1 << 15); // this is right
|
||||||
res.DstRegs |= instr & (!!(instr & (1 << 20)) << 15);
|
|
||||||
if (instr & (1 << 21))
|
|
||||||
res.DstRegs |= 1 << ((instr >> 16) & 0xF);
|
|
||||||
}
|
|
||||||
|
|
||||||
res.Kind = (data >> 13) & 0x1FF;
|
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue