re-add interlocks

breaks gcc debug builds for ??? reason
This commit is contained in:
Jaklyy 2024-10-18 15:00:55 -04:00
parent d7212643f1
commit e2a810147f
6 changed files with 473 additions and 173 deletions

View File

@ -200,13 +200,11 @@ void ARM::Reset()
void ARMv5::Reset()
{
PU_Map = PU_PrivMap;
Store = false;
TimestampActual = 0;
InterlockMem = 16;
InterlockWBCur = 16;
InterlockWBPrev = 16;
Store = false;
InterlockMask = 0;
ILCurrReg = 16;
ILPrevReg = 16;
WBWritePointer = 16;
WBFillPointer = 0;
@ -1152,7 +1150,7 @@ u32 ARMv5::ReadMem(u32 addr, int size)
#endif
inline void ARMv5::CodeFetch()
void ARMv5::CodeFetch()
{
if (NullFetch)
{
@ -1181,6 +1179,36 @@ void ARMv5::AddCycles_MW(s32 numM)
if (numM > 0) NDS.ARM9Timestamp += numM;
}
template <bool bitfield>
void ARMv5::HandleInterlocksExecute(u16 ilmask)
{
if ((bitfield && (ilmask & (1<<ILCurrReg))) || (!bitfield && (ilmask == ILCurrReg)))
{
if (NDS.ARM9Timestamp > ILCurrTime) NDS.ARM9Timestamp = ILCurrTime;
ILCurrReg = 16;
ILPrevReg = 16;
return;
}
else if ((bitfield && (ilmask & (1<<ILPrevReg))) || (!bitfield && (ilmask == ILCurrReg)))
{
if (NDS.ARM9Timestamp > ILPrevTime) NDS.ARM9Timestamp = ILPrevTime;
}
ILPrevReg = ILCurrReg;
ILPrevTime = ILCurrTime;
ILCurrReg = 16;
}
template void ARMv5::HandleInterlocksExecute<true>(u16 ilmask);
template void ARMv5::HandleInterlocksExecute<false>(u16 ilmask);
void ARMv5::HandleInterlocksMemory(u8 reg)
{
if ((reg != ILPrevReg) || (NDS.ARM9Timestamp <= ILPrevTime)) return;
NDS.ARM9Timestamp = ILPrevTime;
ILPrevTime = 16;
}
u16 ARMv4::CodeRead16(u32 addr)
{
if ((addr >> 24) == 0x02)

View File

@ -263,23 +263,6 @@ public:
bool DataWrite16(u32 addr, u16 val) override;
bool DataWrite32(u32 addr, u32 val) override;
bool DataWrite32S(u32 addr, u32 val) override;
template<u8 nregs>
void ExecuteStage(u8 rn, u8 rm)
{
static_assert((nregs < 2), "too many regs");
if constexpr (nregs == 1)
{
InterlockMask = 1 << rn;
}
if constexpr (nregs == 2)
{
InterlockMask = 1 << rn | 1 << rm;
}
AddCycles_C();
}
void CodeFetch();
@ -300,6 +283,10 @@ public:
AddCycles_MW(DataCycles);
DataCycles = 0;
}
template <bool bitfield>
void HandleInterlocksExecute(u16 ilmask);
void HandleInterlocksMemory(u8 reg);
void GetCodeMemRegion(u32 addr, MemRegion* region);
@ -371,14 +358,14 @@ public:
u64 ITCMTimestamp;
u64 TimestampActual;
u8 InterlockMem;
u8 InterlockWBCur;
u8 InterlockWBPrev;
bool Store;
u16 InterlockMask;
bool NullFetch;
u32 PC;
bool NullFetch;
bool Store;
u8 ILCurrReg;
u8 ILPrevReg;
u64 ILCurrTime;
u64 ILPrevTime;
u8 WBWritePointer; // which entry to attempt to write next; should always be ANDed with 0xF after incrementing
u8 WBFillPointer; // where the next entry should be added; should always be ANDed with 0xF after incrementing

View File

@ -153,6 +153,8 @@ void A_MSR_IMM(ARM* cpu)
void A_MSR_REG(ARM* cpu)
{
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr & 0xF);
u32* psr;
if (cpu->CurInstr & (1<<22))
{
@ -275,6 +277,8 @@ void A_MCR(ARM* cpu)
u32 val = cpu->R[(cpu->CurInstr>>12)&0xF];
if (((cpu->CurInstr>>12) & 0xF) == 15) val += 4;
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr>>12)&0xF);
if (cpu->Num==0 && cp==15)
{
((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo, val);

View File

@ -152,22 +152,26 @@ inline bool OverflowSbc(u32 a, u32 b, u32 carry)
#define A_CALC_OP2_IMM \
u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E);
u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); \
u16 ilmask = 0;
#define A_CALC_OP2_IMM_S \
u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); \
if ((cpu->CurInstr>>7)&0x1E) \
cpu->SetC(b & 0x80000000);
cpu->SetC(b & 0x80000000); \
u16 ilmask = 0;
#define A_CALC_OP2_REG_SHIFT_IMM(shiftop) \
u32 b = cpu->R[cpu->CurInstr&0xF]; \
u32 s = (cpu->CurInstr>>7)&0x1F; \
shiftop(b, s);
shiftop(b, s); \
u16 ilmask = 1 << (cpu->CurInstr&0xF);
#define A_CALC_OP2_REG_SHIFT_REG(shiftop) \
u32 b = cpu->R[cpu->CurInstr&0xF]; \
if ((cpu->CurInstr&0xF)==15) b += 4; \
shiftop(b, (cpu->R[(cpu->CurInstr>>8)&0xF] & 0xFF));
shiftop(b, (cpu->R[(cpu->CurInstr>>8)&0xF] & 0xFF)); \
u16 ilmask = 1 << (cpu->CurInstr&0xF);
#define A_IMPLEMENT_ALU_OP(x,s) \
@ -377,6 +381,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S)
#define A_SUB(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a - b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -394,6 +399,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S)
!res, \
CarrySub(a, b), \
OverflowSub(a, b)); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -410,6 +416,7 @@ A_IMPLEMENT_ALU_OP(SUB,)
#define A_RSB(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = b - a; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -427,6 +434,7 @@ A_IMPLEMENT_ALU_OP(SUB,)
!res, \
CarrySub(b, a), \
OverflowSub(b, a)); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -443,6 +451,7 @@ A_IMPLEMENT_ALU_OP(RSB,)
#define A_ADD(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a + b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -460,6 +469,7 @@ A_IMPLEMENT_ALU_OP(RSB,)
!res, \
CarryAdd(a, b), \
OverflowAdd(a, b)); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -476,6 +486,7 @@ A_IMPLEMENT_ALU_OP(ADD,)
#define A_ADC(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a + b + (cpu->CPSR&0x20000000 ? 1:0); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -495,6 +506,7 @@ A_IMPLEMENT_ALU_OP(ADD,)
!res, \
CarryAdd(a, b) | CarryAdd(res_tmp, carry), \
OverflowAdc(a, b, carry)); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -511,6 +523,7 @@ A_IMPLEMENT_ALU_OP(ADC,)
#define A_SBC(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a - b - (cpu->CPSR&0x20000000 ? 0:1); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -530,6 +543,7 @@ A_IMPLEMENT_ALU_OP(ADC,)
!res, \
CarrySub(a, b) & CarrySub(res_tmp, carry), \
OverflowSbc(a, b, carry)); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -546,6 +560,7 @@ A_IMPLEMENT_ALU_OP(SBC,)
#define A_RSC(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = b - a - (cpu->CPSR&0x20000000 ? 0:1); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -565,6 +580,7 @@ A_IMPLEMENT_ALU_OP(SBC,)
!res, \
CarrySub(b, a) & CarrySub(res_tmp, carry), \
OverflowSbc(b, a, carry)); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -581,6 +597,8 @@ A_IMPLEMENT_ALU_OP(RSC,)
#define A_TST(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a & b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \
{ \
if (cpu->Num == 1) \
@ -601,8 +619,7 @@ A_IMPLEMENT_ALU_OP(RSC,)
{ \
cpu->SetNZ(res & 0x80000000, \
!res); \
} \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C();
}
A_IMPLEMENT_ALU_TEST(TST,_S)
@ -610,6 +627,8 @@ A_IMPLEMENT_ALU_TEST(TST,_S)
#define A_TEQ(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a ^ b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \
{ \
if (cpu->Num == 1) \
@ -630,8 +649,7 @@ A_IMPLEMENT_ALU_TEST(TST,_S)
{ \
cpu->SetNZ(res & 0x80000000, \
!res); \
} \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C();
}
A_IMPLEMENT_ALU_TEST(TEQ,_S)
@ -639,6 +657,8 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S)
#define A_CMP(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a - b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \
{ \
if (cpu->Num == 1) \
@ -663,8 +683,7 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S)
!res, \
CarrySub(a, b), \
OverflowSub(a, b)); \
} \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C();
}
A_IMPLEMENT_ALU_TEST(CMP,)
@ -672,6 +691,8 @@ A_IMPLEMENT_ALU_TEST(CMP,)
#define A_CMN(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a + b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \
{ \
if (cpu->Num == 1) \
@ -696,8 +717,7 @@ A_IMPLEMENT_ALU_TEST(CMP,)
!res, \
CarryAdd(a, b), \
OverflowAdd(a, b)); \
} \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C();
}
A_IMPLEMENT_ALU_TEST(CMN,)
@ -705,6 +725,7 @@ A_IMPLEMENT_ALU_TEST(CMN,)
#define A_ORR(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a | b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -720,6 +741,7 @@ A_IMPLEMENT_ALU_TEST(CMN,)
u32 res = a | b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -734,6 +756,7 @@ A_IMPLEMENT_ALU_OP(ORR,_S)
#define A_MOV(c) \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -747,6 +770,7 @@ A_IMPLEMENT_ALU_OP(ORR,_S)
#define A_MOV_S(c) \
cpu->SetNZ(b & 0x80000000, \
!b); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -781,6 +805,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu)
#define A_BIC(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a & ~b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -796,6 +821,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu)
u32 res = a & ~b; \
cpu->SetNZ(res & 0x80000000, \
!res); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -811,6 +837,7 @@ A_IMPLEMENT_ALU_OP(BIC,_S)
#define A_MVN(c) \
b = ~b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -825,6 +852,7 @@ A_IMPLEMENT_ALU_OP(BIC,_S)
b = ~b; \
cpu->SetNZ(b & 0x80000000, \
!b); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \
@ -859,12 +887,17 @@ void A_MUL(ARM* cpu)
if (cpu->Num == 0)
{
if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(3);
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)));
if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(3); // S
else
{
cpu->AddCycles_C(); // 1 X
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(2); // 2 M
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF;
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
}
else
@ -899,12 +932,18 @@ void A_MLA(ARM* cpu)
if (cpu->Num == 0)
{
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)) |
(1 << ((cpu->CurInstr >> 12) & 0xF)));
if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(3);
else
{
cpu->AddCycles_C(); // 1 X
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(2); // 2 M
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF;
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
}
else
@ -938,18 +977,31 @@ void A_UMULL(ARM* cpu)
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2;
{
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)));
if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4);
else
{
cpu->AddCycles_CI(2);
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
}
else
{
u32 cycles;
if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
else cycles = 5;
cpu->AddCycles_CI(cycles);
}
cpu->AddCycles_CI(cycles);
}
void A_UMLAL(ARM* cpu)
@ -974,18 +1026,33 @@ void A_UMLAL(ARM* cpu)
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2;
{
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)) |
(1 << ((cpu->CurInstr >> 12) & 0xF)) |
(1 << ((cpu->CurInstr >> 16) & 0xF)));
if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4);
else
{
cpu->AddCycles_CI(2);
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
}
else
{
u32 cycles;
if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
else cycles = 5;
cpu->AddCycles_CI(cycles);
}
cpu->AddCycles_CI(cycles);
}
void A_SMULL(ARM* cpu)
@ -1007,18 +1074,30 @@ void A_SMULL(ARM* cpu)
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2;
{
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)));
if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4);
else
{
cpu->AddCycles_CI(2);
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
}
else
{
u32 cycles;
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5;
cpu->AddCycles_CI(cycles);
}
cpu->AddCycles_CI(cycles);
}
void A_SMLAL(ARM* cpu)
@ -1043,18 +1122,32 @@ void A_SMLAL(ARM* cpu)
if (cpu->Num==1) cpu->SetC(0);
}
u32 cycles;
if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2;
{
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)) |
(1 << ((cpu->CurInstr >> 12) & 0xF)) |
(1 << ((cpu->CurInstr >> 16) & 0xF)));
if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4);
else
{
cpu->AddCycles_CI(2);
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
}
else
{
u32 cycles;
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5;
cpu->AddCycles_CI(cycles);
}
cpu->AddCycles_CI(cycles);
}
void A_SMLAxy(ARM* cpu)
@ -1078,8 +1171,17 @@ void A_SMLAxy(ARM* cpu)
if (OverflowAdd(res_mul, rn))
cpu->CPSR |= 0x08000000;
cpu->AddCycles_C(); // TODO: interlock??
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)) |
(1 << ((cpu->CurInstr >> 12) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
void A_SMLAWy(ARM* cpu)
@ -1101,7 +1203,16 @@ void A_SMLAWy(ARM* cpu)
if (OverflowAdd(res_mul, rn))
cpu->CPSR |= 0x08000000;
cpu->AddCycles_C(); // TODO: interlock??
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)) |
(1 << ((cpu->CurInstr >> 12) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
void A_SMULxy(ARM* cpu)
@ -1120,7 +1231,16 @@ void A_SMULxy(ARM* cpu)
if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock??
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
void A_SMULWy(ARM* cpu)
@ -1137,7 +1257,16 @@ void A_SMULWy(ARM* cpu)
if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock??
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
void A_SMLALxy(ARM* cpu)
@ -1162,10 +1291,18 @@ void A_SMLALxy(ARM* cpu)
if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)) |
(1 << ((cpu->CurInstr >> 12) & 0xF)) |
(1 << ((cpu->CurInstr >> 16) & 0xF)));
cpu->AddCycles_C(); // 1 X
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(2); // 2 M
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
@ -1192,6 +1329,8 @@ void A_CLZ(ARM* cpu)
if (((cpu->CurInstr >> 12) & 0xF) == 15) cpu->JumpTo(res & ~1);
else cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr & 0xF);
cpu->AddCycles_C();
}
@ -1213,7 +1352,13 @@ void A_QADD(ARM* cpu)
if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock??
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
void A_QSUB(ARM* cpu)
@ -1233,7 +1378,13 @@ void A_QSUB(ARM* cpu)
if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock??
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
void A_QDADD(ARM* cpu)
@ -1261,7 +1412,13 @@ void A_QDADD(ARM* cpu)
if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock??
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
void A_QDSUB(ARM* cpu)
@ -1289,7 +1446,13 @@ void A_QDSUB(ARM* cpu)
if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock??
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
@ -1306,6 +1469,7 @@ void T_LSL_IMM(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = op;
cpu->SetNZ(op & 0x80000000,
!op);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0x7);
cpu->AddCycles_C();
}
@ -1317,6 +1481,7 @@ void T_LSR_IMM(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = op;
cpu->SetNZ(op & 0x80000000,
!op);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0x7);
cpu->AddCycles_C();
}
@ -1328,6 +1493,7 @@ void T_ASR_IMM(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = op;
cpu->SetNZ(op & 0x80000000,
!op);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0x7);
cpu->AddCycles_C();
}
@ -1341,6 +1507,7 @@ void T_ADD_REG_(ARM* cpu)
!res,
CarryAdd(a, b),
OverflowAdd(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << ((cpu->CurInstr >> 3) & 0x7)) | (1 << ((cpu->CurInstr >> 6) & 0x7)));
cpu->AddCycles_C();
}
@ -1354,6 +1521,7 @@ void T_SUB_REG_(ARM* cpu)
!res,
CarrySub(a, b),
OverflowSub(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << ((cpu->CurInstr >> 3) & 0x7)) | (1 << ((cpu->CurInstr >> 6) & 0x7)));
cpu->AddCycles_C();
}
@ -1367,6 +1535,7 @@ void T_ADD_IMM_(ARM* cpu)
!res,
CarryAdd(a, b),
OverflowAdd(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0x7);
cpu->AddCycles_C();
}
@ -1380,6 +1549,7 @@ void T_SUB_IMM_(ARM* cpu)
!res,
CarrySub(a, b),
OverflowSub(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0x7);
cpu->AddCycles_C();
}
@ -1389,6 +1559,7 @@ void T_MOV_IMM(ARM* cpu)
cpu->R[(cpu->CurInstr >> 8) & 0x7] = b;
cpu->SetNZ(0,
!b);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 8) & 0x7);
cpu->AddCycles_C();
}
@ -1401,6 +1572,7 @@ void T_CMP_IMM(ARM* cpu)
!res,
CarrySub(a, b),
OverflowSub(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 8) & 0x7);
cpu->AddCycles_C();
}
@ -1414,6 +1586,7 @@ void T_ADD_IMM(ARM* cpu)
!res,
CarryAdd(a, b),
OverflowAdd(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 8) & 0x7);
cpu->AddCycles_C();
}
@ -1427,6 +1600,7 @@ void T_SUB_IMM(ARM* cpu)
!res,
CarrySub(a, b),
OverflowSub(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 8) & 0x7);
cpu->AddCycles_C();
}
@ -1439,6 +1613,7 @@ void T_AND_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000,
!res);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C();
}
@ -1450,6 +1625,7 @@ void T_EOR_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000,
!res);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C();
}
@ -1461,6 +1637,7 @@ void T_LSL_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = a;
cpu->SetNZ(a & 0x80000000,
!a);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_CI(1);
}
@ -1472,6 +1649,7 @@ void T_LSR_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = a;
cpu->SetNZ(a & 0x80000000,
!a);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_CI(1);
}
@ -1483,6 +1661,7 @@ void T_ASR_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = a;
cpu->SetNZ(a & 0x80000000,
!a);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_CI(1);
}
@ -1498,6 +1677,7 @@ void T_ADC_REG(ARM* cpu)
!res,
CarryAdd(a, b) | CarryAdd(res_tmp, carry),
OverflowAdc(a, b, carry));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C();
}
@ -1513,6 +1693,7 @@ void T_SBC_REG(ARM* cpu)
!res,
CarrySub(a, b) & CarrySub(res_tmp, carry),
OverflowSbc(a, b, carry));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C();
}
@ -1524,6 +1705,7 @@ void T_ROR_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = a;
cpu->SetNZ(a & 0x80000000,
!a);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_CI(1);
}
@ -1534,6 +1716,7 @@ void T_TST_REG(ARM* cpu)
u32 res = a & b;
cpu->SetNZ(res & 0x80000000,
!res);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C();
}
@ -1546,6 +1729,7 @@ void T_NEG_REG(ARM* cpu)
!res,
CarrySub(0, b),
OverflowSub(0, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0x7);
cpu->AddCycles_C();
}
@ -1558,6 +1742,7 @@ void T_CMP_REG(ARM* cpu)
!res,
CarrySub(a, b),
OverflowSub(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C();
}
@ -1570,6 +1755,7 @@ void T_CMN_REG(ARM* cpu)
!res,
CarryAdd(a, b),
OverflowAdd(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C();
}
@ -1581,6 +1767,7 @@ void T_ORR_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000,
!res);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C();
}
@ -1597,6 +1784,7 @@ void T_MUL_REG(ARM* cpu)
if (cpu->Num == 0)
{
cycles += 3;
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
}
else
{
@ -1606,7 +1794,7 @@ void T_MUL_REG(ARM* cpu)
else if (a & 0x0000FF00) cycles += 2;
else cycles += 1;
}
cpu->AddCycles_CI(cycles);
cpu->AddCycles_CI(cycles); // implemented as S variant, doesn't interlock
}
void T_BIC_REG(ARM* cpu)
@ -1617,6 +1805,7 @@ void T_BIC_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000,
!res);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C();
}
@ -1627,6 +1816,7 @@ void T_MVN_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000,
!res);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0x7);
cpu->AddCycles_C();
}
@ -1643,7 +1833,8 @@ void T_ADD_HIREG(ARM* cpu)
u32 b = cpu->R[rs];
cpu->AddCycles_C();
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << rd) | (1 << rs));
if (rd == 15)
{
cpu->JumpTo((a + b) | 1);
@ -1678,6 +1869,7 @@ void T_CMP_HIREG(ARM* cpu)
cpu->CPSR |= 0x20; // keep it from crashing the emulator at least
}
}
else if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << rd) | (1 << rs));
cpu->AddCycles_C();
}
@ -1687,6 +1879,7 @@ void T_MOV_HIREG(ARM* cpu)
u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8);
u32 rs = (cpu->CurInstr >> 3) & 0xF;
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << rd) | (1 << rs));
cpu->AddCycles_C();
if (rd == 15)
@ -1717,6 +1910,8 @@ void T_ADD_PCREL(ARM* cpu)
u32 val = cpu->R[15] & ~2;
val += ((cpu->CurInstr & 0xFF) << 2);
cpu->R[(cpu->CurInstr >> 8) & 0x7] = val;
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(15);
cpu->AddCycles_C();
}
@ -1725,6 +1920,8 @@ void T_ADD_SPREL(ARM* cpu)
u32 val = cpu->R[13];
val += ((cpu->CurInstr & 0xFF) << 2);
cpu->R[(cpu->CurInstr >> 8) & 0x7] = val;
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(13);
cpu->AddCycles_C();
}
@ -1736,6 +1933,8 @@ void T_ADD_SP(ARM* cpu)
else
val += ((cpu->CurInstr & 0x7F) << 2);
cpu->R[13] = val;
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(13);
cpu->AddCycles_C();
}

View File

@ -51,12 +51,14 @@ void A_BLX_IMM(ARM* cpu)
void A_BX(ARM* cpu)
{
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr&0xF);
cpu->AddCycles_C();
cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]);
}
void A_BLX_REG(ARM* cpu)
{
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr&0xF);
cpu->AddCycles_C();
u32 lr = cpu->R[15] - 4;
cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]);
@ -77,12 +79,14 @@ void T_BCOND(ARM* cpu)
void T_BX(ARM* cpu)
{
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0xF);
cpu->AddCycles_C();
cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]);
}
void T_BLX_REG(ARM* cpu)
{
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0xF);
cpu->AddCycles_C();
if (cpu->Num==1)
{

View File

@ -18,15 +18,20 @@
#include <stdio.h>
#include "ARM.h"
#include "NDS.h"
namespace melonDS::ARMInterpreter
{
void ExecuteStage(ARM* cpu)
template <bool bitfield>
inline void ExecuteStage(ARM* cpu, u16 ilmask)
{
if (cpu->Num == 0) cpu->AddCycles_C();
if (cpu->Num == 0)
{
((ARMv5*)cpu)->HandleInterlocksExecute<bitfield>(ilmask);
((ARMv5*)cpu)->AddCycles_C();
}
}
@ -56,13 +61,15 @@ void ExecuteStage(ARM* cpu)
#define A_WB_CALC_OFFSET_IMM \
u32 offset = (cpu->CurInstr & 0xFFF); \
if (!(cpu->CurInstr & (1<<23))) offset = -offset;
if (!(cpu->CurInstr & (1<<23))) offset = -offset; \
u16 ilmask = 0;
#define A_WB_CALC_OFFSET_REG(shiftop) \
u32 offset = cpu->R[cpu->CurInstr & 0xF]; \
u32 shift = ((cpu->CurInstr>>7)&0x1F); \
shiftop(offset, shift); \
if (!(cpu->CurInstr & (1<<23))) offset = -offset;
if (!(cpu->CurInstr & (1<<23))) offset = -offset; \
u16 ilmask = 1 << (cpu->CurInstr & 0xF);
enum class Writeback
{
@ -72,11 +79,12 @@ enum class Writeback
Trans,
};
template<bool signror, int size, Writeback writeback>
void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset)
template<bool signror, int size, Writeback writeback, bool multireg>
void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset, u16 ilmask)
{
ExecuteStage(cpu);
static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!");
ExecuteStage<multireg>(cpu, (ilmask | (1<<rn)));
u32 addr;
if constexpr (writeback < Writeback::Post) addr = offset + cpu->R[rn];
@ -117,15 +125,25 @@ void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset)
if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) val &= ~0x1;
cpu->JumpTo(val);
}
else cpu->R[rd] = val;
else
{
cpu->R[rd] = val;
if (cpu->Num == 0)
{
((ARMv5*)cpu)->ILCurrReg = rd;
bool extra = ((size < 32) || (signror && (addr&0x3)));
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles + extra;
}
}
}
template<int size, Writeback writeback>
void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset)
template<int size, Writeback writeback, bool multireg>
void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset, u16 ilmask)
{
ExecuteStage(cpu);
static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!");
ExecuteStage<multireg>(cpu, (ilmask | (1<<rn)));
u32 addr;
if constexpr (writeback < Writeback::Post) addr = offset + cpu->R[rn];
else addr = cpu->R[rn];
@ -139,6 +157,8 @@ void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset)
((ARMv5*)cpu)->PU_Map = ((ARMv5*)cpu)->PU_UserMap;
}
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksMemory(rd);
bool dabort;
if constexpr (size == 8) dabort = !cpu->DataWrite8 (addr, storeval);
if constexpr (size == 16) dabort = !cpu->DataWrite16(addr, storeval);
@ -162,36 +182,36 @@ void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset)
#define A_STR \
if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \
else StoreSingle<32, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else StoreSingle<32, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_STR_POST \
if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Trans>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \
else StoreSingle<32, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Trans, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else StoreSingle<32, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_STRB \
if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \
else StoreSingle<8, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else StoreSingle<8, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_STRB_POST \
if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Trans>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \
else StoreSingle<8, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Trans, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else StoreSingle<8, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDR \
if (cpu->CurInstr & (1<<21)) LoadSingle<true, 32, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \
else LoadSingle<true, 32, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
if (cpu->CurInstr & (1<<21)) LoadSingle<true, 32, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else LoadSingle<true, 32, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDR_POST \
if (cpu->CurInstr & (1<<21)) LoadSingle<true, 32, Writeback::Trans>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \
else LoadSingle<true, 32, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
if (cpu->CurInstr & (1<<21)) LoadSingle<true, 32, Writeback::Trans, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else LoadSingle<true, 32, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDRB \
if (cpu->CurInstr & (1<<21)) LoadSingle<false, 8, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \
else LoadSingle<false, 8, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
if (cpu->CurInstr & (1<<21)) LoadSingle<false, 8, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else LoadSingle<false, 8, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDRB_POST \
if (cpu->CurInstr & (1<<21)) LoadSingle<false, 8, Writeback::Trans>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \
else LoadSingle<false, 8, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
if (cpu->CurInstr & (1<<21)) LoadSingle<false, 8, Writeback::Trans, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else LoadSingle<false, 8, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
@ -266,63 +286,72 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
#define A_HD_CALC_OFFSET_IMM \
u32 offset = (cpu->CurInstr & 0xF) | ((cpu->CurInstr >> 4) & 0xF0); \
if (!(cpu->CurInstr & (1<<23))) offset = -offset;
if (!(cpu->CurInstr & (1<<23))) offset = -offset; \
u16 ilmask = 0;
#define A_HD_CALC_OFFSET_REG \
u32 offset = cpu->R[cpu->CurInstr & 0xF]; \
if (!(cpu->CurInstr & (1<<23))) offset = -offset;
if (!(cpu->CurInstr & (1<<23))) offset = -offset; \
u16 ilmask = 1 << (cpu->CurInstr & 0xF);
#define A_STRH \
if (cpu->CurInstr & (1<<21)) StoreSingle<16, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \
else StoreSingle<16, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
if (cpu->CurInstr & (1<<21)) StoreSingle<16, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else StoreSingle<16, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_STRH_POST \
StoreSingle<16, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
StoreSingle<16, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
// TODO: CHECK LDRD/STRD TIMINGS!!
#define A_LDRD \
if (cpu->Num != 0) return; \
ExecuteStage(cpu); \
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
bool dabort = !cpu->DataRead32(offset, &cpu->R[r]); \
u32 val; dabort |= !cpu->DataRead32S(offset+4, &val); \
cpu->AddCycles_CDI(); \
if (dabort) { \
cpu->AddCycles_CDI(); \
((ARMv5*)cpu)->DataAbort(); \
return; } \
if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \
else cpu->R[r+1] = val; \
cpu->AddCycles_CDI(); \
else { \
cpu->R[r+1] = val; \
if (cpu->Num == 0) { \
((ARMv5*)cpu)->ILCurrReg = r+1; \
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } } \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset;
#define A_LDRD_POST \
if (cpu->Num != 0) return; \
ExecuteStage(cpu); \
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
bool dabort = !cpu->DataRead32(addr, &cpu->R[r]); \
u32 val; dabort |= !cpu->DataRead32S(addr+4, &val); \
cpu->AddCycles_CDI(); \
if (dabort) { \
cpu->AddCycles_CDI(); \
((ARMv5*)cpu)->DataAbort(); \
return; } \
if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \
else cpu->R[r+1] = val; \
cpu->AddCycles_CDI(); \
else { \
cpu->R[r+1] = val; \
if (cpu->Num == 0) { \
((ARMv5*)cpu)->ILCurrReg = r+1; \
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } } \
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
#define A_STRD \
if (cpu->Num != 0) return; \
ExecuteStage(cpu); \
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
((ARMv5*)cpu)->HandleInterlocksMemory(r); \
bool dabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \
u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \
dabort |= !cpu->DataWrite32S (offset+4, storeval); /* no, i dont understand it either */ \
@ -334,10 +363,11 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
#define A_STRD_POST \
if (cpu->Num != 0) return; \
ExecuteStage(cpu); \
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
((ARMv5*)cpu)->HandleInterlocksMemory(r); \
bool dabort = !cpu->DataWrite32(addr, cpu->R[r]); \
u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \
dabort |= !cpu->DataWrite32S (addr+4, storeval); \
@ -348,25 +378,25 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
#define A_LDRH \
if (cpu->CurInstr & (1<<21)) LoadSingle<false, 16, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \
else LoadSingle<false, 16, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
if (cpu->CurInstr & (1<<21)) LoadSingle<false, 16, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else LoadSingle<false, 16, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDRH_POST \
LoadSingle<false, 16, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
LoadSingle<false, 16, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDRSB \
if (cpu->CurInstr & (1<<21)) LoadSingle<true, 8, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \
else LoadSingle<true, 8, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
if (cpu->CurInstr & (1<<21)) LoadSingle<true, 8, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else LoadSingle<true, 8, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDRSB_POST \
LoadSingle<true, 8, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
LoadSingle<true, 8, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDRSH \
if (cpu->CurInstr & (1<<21)) LoadSingle<true, 16, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \
else LoadSingle<true, 16, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
if (cpu->CurInstr & (1<<21)) LoadSingle<true, 16, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else LoadSingle<true, 16, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDRSH_POST \
LoadSingle<true, 16, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset);
LoadSingle<true, 16, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_IMPLEMENT_HD_LDRSTR(x) \
@ -406,7 +436,7 @@ A_IMPLEMENT_HD_LDRSTR(LDRSH)
template<bool byte>
inline void SWP(ARM* cpu)
{
ExecuteStage(cpu);
ExecuteStage<false>(cpu, ((cpu->CurInstr >> 16) & 0xF));
u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF];
u32 rm = cpu->R[cpu->CurInstr & 0xF];
if ((cpu->CurInstr & 0xF) == 15) rm += 4;
@ -415,7 +445,7 @@ inline void SWP(ARM* cpu)
if ((byte ? cpu->DataRead8 (base, &val)
: cpu->DataRead32(base, &val))) [[likely]]
{
u32 numD = cpu->DataCycles;
cpu->NDS.ARM9Timestamp += cpu->DataCycles; // checkme
if ((byte ? cpu->DataWrite8 (base, rm)
: cpu->DataWrite32(base, rm))) [[likely]]
@ -424,17 +454,27 @@ inline void SWP(ARM* cpu)
u32 rd = (cpu->CurInstr >> 12) & 0xF;
if constexpr (!byte) val = ROR(val, 8*(base&0x3));
cpu->AddCycles_CDI();
if (rd != 15) cpu->R[rd] = val;
if (rd != 15)
{
cpu->R[rd] = val;
if (cpu->Num == 0)
{
((ARMv5*)cpu)->ILCurrReg = rd;
bool extra = (byte || (base&0x3));
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles + extra;
}
}
else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't seem to work on the arm 9?
return;
}
else ((ARMv5*)cpu)->DataAbort();
cpu->DataCycles += numD;
}
else ((ARMv5*)cpu)->DataAbort();
// data abort handling
cpu->AddCycles_CDI();
((ARMv5*)cpu)->DataAbort();
}
void A_SWP(ARM* cpu)
@ -499,7 +539,6 @@ void EmptyRListLDMSTM(ARM* cpu, const u8 baseid, const u8 flags)
void A_LDM(ARM* cpu)
{
ExecuteStage(cpu);
u32 baseid = (cpu->CurInstr >> 16) & 0xF;
u32 base = cpu->R[baseid];
u32 wbbase;
@ -518,6 +557,8 @@ void A_LDM(ARM* cpu)
return;
}
ExecuteStage<false>(cpu, baseid);
if (!(cpu->CurInstr & (1<<23))) // decrement
{
// decrement is actually an increment starting from the end address
@ -573,11 +614,12 @@ void A_LDM(ARM* cpu)
// switch back to previous regs
if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true);
cpu->AddCycles_CDI();
// handle data aborts
if (dabort) [[unlikely]]
{
cpu->AddCycles_CDI();
((ARMv5*)cpu)->DataAbort();
return;
}
@ -601,17 +643,20 @@ void A_LDM(ARM* cpu)
else
cpu->R[baseid] = wbbase;
}
// jump if pc got written
if (cpu->CurInstr & (1<<15))
cpu->JumpTo(pc, cpu->CurInstr & (1<<22));
cpu->AddCycles_CDI();
else if (cpu->Num == 0)
{
u8 lastreg = 31 - __builtin_clz(cpu->CurInstr & 0x7FFF);
((ARMv5*)cpu)->ILCurrReg = lastreg;
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
}
void A_STM(ARM* cpu)
{
ExecuteStage(cpu);
u32 baseid = (cpu->CurInstr >> 16) & 0xF;
u32 base = cpu->R[baseid];
u32 oldbase = base;
@ -628,6 +673,8 @@ void A_STM(ARM* cpu)
(0 << 4))); // thumb
return;
}
ExecuteStage<false>(cpu, baseid);
if (!(cpu->CurInstr & (1<<23)))
{
@ -655,6 +702,8 @@ void A_STM(ARM* cpu)
cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10, true);
}
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksMemory(__builtin_ctz(cpu->CurInstr));
for (u32 i = 0; i < 16; i++)
{
if (cpu->CurInstr & (1<<i))
@ -683,22 +732,20 @@ void A_STM(ARM* cpu)
if (cpu->CurInstr & (1<<22))
cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true);
cpu->AddCycles_CD();
// handle data aborts
if (dabort) [[unlikely]]
{
// restore original value of base
cpu->R[baseid] = oldbase;
cpu->AddCycles_CD();
((ARMv5*)cpu)->DataAbort();
return;
}
if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21)))
cpu->R[baseid] = base;
cpu->AddCycles_CD();
}
@ -710,106 +757,108 @@ void A_STM(ARM* cpu)
void T_LDR_PCREL(ARM* cpu)
{
ExecuteStage(cpu);
ExecuteStage<false>(cpu, 15);
u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2);
bool dabort = !cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]);
cpu->AddCycles_CDI();
if (dabort) [[unlikely]]
if (dabort) [[unlikely]] ((ARMv5*)cpu)->DataAbort();
else if (cpu->Num == 0)
{
((ARMv5*)cpu)->DataAbort();
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 8) & 0x7;
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
}
void T_STR_REG(ARM* cpu)
{
StoreSingle<32, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]);
StoreSingle<32, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
}
void T_STRB_REG(ARM* cpu)
{
StoreSingle<8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]);
StoreSingle<8, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
}
void T_LDR_REG(ARM* cpu)
{
LoadSingle<true, 32, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]);
LoadSingle<true, 32, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
}
void T_LDRB_REG(ARM* cpu)
{
LoadSingle<false, 8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]);
LoadSingle<false, 8, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
}
void T_STRH_REG(ARM* cpu)
{
StoreSingle<16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]);
StoreSingle<16, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
}
void T_LDRSB_REG(ARM* cpu)
{
LoadSingle<true, 8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]);
LoadSingle<true, 8, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
}
void T_LDRH_REG(ARM* cpu)
{
LoadSingle<false, 16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]);
LoadSingle<false, 16, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
}
void T_LDRSH_REG(ARM* cpu)
{
LoadSingle<true, 16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]);
LoadSingle<true, 16, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
}
void T_STR_IMM(ARM* cpu)
{
StoreSingle<32, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C));
StoreSingle<32, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C), 0);
}
void T_LDR_IMM(ARM* cpu)
{
LoadSingle<true, 32, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C));
LoadSingle<true, 32, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C), 0);
}
void T_STRB_IMM(ARM* cpu)
{
StoreSingle<8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F));
StoreSingle<8, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F), 0);
}
void T_LDRB_IMM(ARM* cpu)
{
LoadSingle<false, 8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F));
LoadSingle<false, 8, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F), 0);
}
void T_STRH_IMM(ARM* cpu)
{
StoreSingle<16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E));
StoreSingle<16, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E), 0);
}
void T_LDRH_IMM(ARM* cpu)
{
LoadSingle<false, 16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E));
LoadSingle<false, 16, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E), 0);
}
void T_STR_SPREL(ARM* cpu)
{
StoreSingle<32, Writeback::None>(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC));
StoreSingle<32, Writeback::None, false>(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC), 0);
}
void T_LDR_SPREL(ARM* cpu)
{
LoadSingle<false, 32, Writeback::None>(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC));
LoadSingle<false, 32, Writeback::None, false>(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC), 0);
}
void T_PUSH(ARM* cpu)
{
ExecuteStage(cpu);
ExecuteStage<false>(cpu, 13);
int nregs = 0;
bool first = true;
bool dabort = false;
@ -833,6 +882,13 @@ void T_PUSH(ARM* cpu)
base -= (nregs<<2);
u32 wbbase = base;
if (cpu->Num == 0)
{
u8 firstreg = __builtin_ctz(cpu->CurInstr);
if (firstreg == 8) firstreg = 14;
((ARMv5*)cpu)->HandleInterlocksMemory(firstreg);
}
for (int i = 0; i < 8; i++)
{
if (cpu->CurInstr & (1<<i))
@ -851,25 +907,24 @@ void T_PUSH(ARM* cpu)
: cpu->DataWrite32S(base, cpu->R[14]));
}
cpu->AddCycles_CD();
if (dabort) [[unlikely]]
{
cpu->AddCycles_CD();
((ARMv5*)cpu)->DataAbort();
return;
}
cpu->R[13] = wbbase;
cpu->AddCycles_CD();
}
void T_POP(ARM* cpu)
{
ExecuteStage(cpu);
ExecuteStage<false>(cpu, 13);
u32 base = cpu->R[13];
bool first = true;
bool dabort = false;
if (!(cpu->CurInstr & 0x1FF)) [[unlikely]]
{
EmptyRListLDMSTM(cpu, 13, 0b00011);
@ -898,10 +953,23 @@ void T_POP(ARM* cpu)
: cpu->DataRead32S(base, &pc));
if (dabort) [[unlikely]] goto dataabort;
cpu->AddCycles_CDI();
if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) pc |= 0x1;
cpu->JumpTo(pc);
base += 4;
}
else
{
cpu->AddCycles_CDI();
if (cpu->Num == 0)
{
u8 lastreg = 31 - __builtin_clz(cpu->CurInstr & 0xFF);
((ARMv5*)cpu)->ILCurrReg = lastreg;
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
}
if (dabort) [[unlikely]]
{
@ -912,23 +980,27 @@ void T_POP(ARM* cpu)
}
cpu->R[13] = base;
cpu->AddCycles_CDI();
}
void T_STMIA(ARM* cpu)
{
ExecuteStage(cpu);
ExecuteStage<false>(cpu, ((cpu->CurInstr >> 8) & 0x7));
u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7];
bool first = true;
bool dabort = false;
if (!(cpu->CurInstr & 0xFF)) [[unlikely]]
{
EmptyRListLDMSTM(cpu, (cpu->CurInstr >> 8) & 0x7, 0b10010);
return;
}
if (cpu->Num == 0)
{
u8 firstreg = __builtin_ctz(cpu->CurInstr);
((ARMv5*)cpu)->HandleInterlocksMemory(firstreg);
}
for (int i = 0; i < 8; i++)
{
if (cpu->CurInstr & (1<<i))
@ -941,25 +1013,25 @@ void T_STMIA(ARM* cpu)
}
}
cpu->AddCycles_CD();
if (dabort) [[unlikely]]
{
cpu->AddCycles_CD();
((ARMv5*)cpu)->DataAbort();
return;
}
// TODO: check "Rb included in Rlist" case
cpu->R[(cpu->CurInstr >> 8) & 0x7] = base;
cpu->AddCycles_CD();
}
void T_LDMIA(ARM* cpu)
{
ExecuteStage(cpu);
ExecuteStage<false>(cpu, ((cpu->CurInstr >> 8) & 0x7));
u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7];
bool first = true;
bool dabort = false;
if (!(cpu->CurInstr & 0xFF)) [[unlikely]]
{
EmptyRListLDMSTM(cpu, (cpu->CurInstr >> 8) & 0x7, 0b00011);
@ -980,17 +1052,23 @@ void T_LDMIA(ARM* cpu)
}
}
cpu->AddCycles_CDI();
if (dabort) [[unlikely]]
{
cpu->AddCycles_CDI();
((ARMv5*)cpu)->DataAbort();
return;
}
if (cpu->Num == 0)
{
u8 lastreg = 31 - __builtin_clz(cpu->CurInstr & 0xFF);
((ARMv5*)cpu)->ILCurrReg = lastreg;
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
if (!(cpu->CurInstr & (1<<((cpu->CurInstr >> 8) & 0x7))))
cpu->R[(cpu->CurInstr >> 8) & 0x7] = base;
cpu->AddCycles_CDI();
}