Merge branch 'less-ambitious-timing-rework' into chemical-x

This commit is contained in:
Jaklyy 2024-10-18 15:01:14 -04:00
commit cc031cd4b8
6 changed files with 516 additions and 171 deletions

View File

@ -200,13 +200,11 @@ void ARM::Reset()
void ARMv5::Reset() void ARMv5::Reset()
{ {
PU_Map = PU_PrivMap; PU_Map = PU_PrivMap;
Store = false;
TimestampActual = 0; TimestampActual = 0;
InterlockMem = 16; ILCurrReg = 16;
InterlockWBCur = 16; ILPrevReg = 16;
InterlockWBPrev = 16;
Store = false;
InterlockMask = 0;
WBWritePointer = 16; WBWritePointer = 16;
WBFillPointer = 0; WBFillPointer = 0;
@ -527,6 +525,7 @@ void ARM::UpdateMode(u32 oldmode, u32 newmode, bool phony)
template <CPUExecuteMode mode> template <CPUExecuteMode mode>
void ARM::TriggerIRQ() void ARM::TriggerIRQ()
{ {
AddCycles_C();
if (CPSR & 0x80) if (CPSR & 0x80)
return; return;
@ -560,6 +559,7 @@ template void ARM::TriggerIRQ<CPUExecuteMode::JIT>();
void ARMv5::PrefetchAbort() void ARMv5::PrefetchAbort()
{ {
AddCycles_C();
Log(LogLevel::Warn, "ARM9: prefetch abort (%08X)\n", R[15]); Log(LogLevel::Warn, "ARM9: prefetch abort (%08X)\n", R[15]);
u32 oldcpsr = CPSR; u32 oldcpsr = CPSR;
@ -675,19 +675,11 @@ void ARMv5::Execute()
R[15] += 2; R[15] += 2;
CurInstr = NextInstr[0]; CurInstr = NextInstr[0];
NextInstr[0] = NextInstr[1]; NextInstr[0] = NextInstr[1];
if (R[15] & 0x2) // code fetch is done during the execute stage cycle handling
{ if (R[15] & 0x2) NullFetch = true;
// no fetch is performed. else NullFetch = false;
// unclear if it's a "1 cycle fetch" or a legitmately 0 cycle fetch stage? PC = R[15];
// in practice it doesn't matter though.
NextInstr[1] >>= 16;
NDS.ARM9Timestamp++;
if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual;
DataRegion = Mem9_Null;
}
else NextInstr[1] = CodeRead32(R[15], false);
if (IRQ && !(CPSR & 0x80)) TriggerIRQ<mode>(); if (IRQ && !(CPSR & 0x80)) TriggerIRQ<mode>();
else if (!(PU_Map[(R[15]-4)>>12] & 0x04)) [[unlikely]] // handle aborted instructions else if (!(PU_Map[(R[15]-4)>>12] & 0x04)) [[unlikely]] // handle aborted instructions
{ {
@ -708,8 +700,9 @@ void ARMv5::Execute()
R[15] += 4; R[15] += 4;
CurInstr = NextInstr[0]; CurInstr = NextInstr[0];
NextInstr[0] = NextInstr[1]; NextInstr[0] = NextInstr[1];
NextInstr[1] = CodeRead32(R[15], false); // code fetch is done during the execute stage cycle handling
NullFetch = false;
PC = R[15];
if (IRQ && !(CPSR & 0x80)) TriggerIRQ<mode>(); if (IRQ && !(CPSR & 0x80)) TriggerIRQ<mode>();
else if (!(PU_Map[(R[15]-8)>>12] & 0x04)) [[unlikely]] // handle aborted instructions else if (!(PU_Map[(R[15]-8)>>12] & 0x04)) [[unlikely]] // handle aborted instructions
@ -1157,8 +1150,23 @@ u32 ARMv5::ReadMem(u32 addr, int size)
#endif #endif
void ARMv5::CodeFetch()
{
if (NullFetch)
{
// no fetch is performed.
// in practice it doesn't matter though.
NextInstr[1] >>= 16;
NDS.ARM9Timestamp++;
if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual;
DataRegion = Mem9_Null;
}
else NextInstr[1] = CodeRead32(PC, false);
}
void ARMv5::AddCycles_CI(s32 numX) void ARMv5::AddCycles_CI(s32 numX)
{ {
CodeFetch();
NDS.ARM9Timestamp += numX; NDS.ARM9Timestamp += numX;
} }
@ -1171,6 +1179,36 @@ void ARMv5::AddCycles_MW(s32 numM)
if (numM > 0) NDS.ARM9Timestamp += numM; if (numM > 0) NDS.ARM9Timestamp += numM;
} }
template <bool bitfield>
void ARMv5::HandleInterlocksExecute(u16 ilmask)
{
if ((bitfield && (ilmask & (1<<ILCurrReg))) || (!bitfield && (ilmask == ILCurrReg)))
{
if (NDS.ARM9Timestamp > ILCurrTime) NDS.ARM9Timestamp = ILCurrTime;
ILCurrReg = 16;
ILPrevReg = 16;
return;
}
else if ((bitfield && (ilmask & (1<<ILPrevReg))) || (!bitfield && (ilmask == ILCurrReg)))
{
if (NDS.ARM9Timestamp > ILPrevTime) NDS.ARM9Timestamp = ILPrevTime;
}
ILPrevReg = ILCurrReg;
ILPrevTime = ILCurrTime;
ILCurrReg = 16;
}
template void ARMv5::HandleInterlocksExecute<true>(u16 ilmask);
template void ARMv5::HandleInterlocksExecute<false>(u16 ilmask);
void ARMv5::HandleInterlocksMemory(u8 reg)
{
if ((reg != ILPrevReg) || (NDS.ARM9Timestamp <= ILPrevTime)) return;
NDS.ARM9Timestamp = ILPrevTime;
ILPrevTime = 16;
}
u16 ARMv4::CodeRead16(u32 addr) u16 ARMv4::CodeRead16(u32 addr)
{ {
if ((addr >> 24) == 0x02) if ((addr >> 24) == 0x02)

View File

@ -264,26 +264,10 @@ public:
bool DataWrite16(u32 addr, u16 val) override; bool DataWrite16(u32 addr, u16 val) override;
bool DataWrite32(u32 addr, u32 val) override; bool DataWrite32(u32 addr, u32 val) override;
bool DataWrite32S(u32 addr, u32 val) override; bool DataWrite32S(u32 addr, u32 val) override;
template<u8 nregs>
void ExecuteStage(u8 rn, u8 rm)
{
static_assert((nregs < 2), "too many regs");
if constexpr (nregs == 1) void CodeFetch();
{
InterlockMask = 1 << rn;
}
if constexpr (nregs == 2)
{
InterlockMask = 1 << rn | 1 << rm;
}
AddCycles_C(); void AddCycles_C() override { CodeFetch(); }
}
void AddCycles_C() override {}
void AddCycles_CI(s32 numX) override; void AddCycles_CI(s32 numX) override;
@ -300,6 +284,10 @@ public:
AddCycles_MW(DataCycles); AddCycles_MW(DataCycles);
DataCycles = 0; DataCycles = 0;
} }
template <bool bitfield>
void HandleInterlocksExecute(u16 ilmask);
void HandleInterlocksMemory(u8 reg);
void GetCodeMemRegion(const u32 addr, MemRegion* region); void GetCodeMemRegion(const u32 addr, MemRegion* region);
@ -676,12 +664,14 @@ public:
u64 ITCMTimestamp; u64 ITCMTimestamp;
u64 TimestampActual; u64 TimestampActual;
u8 InterlockMem; u32 PC;
u8 InterlockWBCur; bool NullFetch;
u8 InterlockWBPrev;
bool Store; bool Store;
u16 InterlockMask;
u8 ILCurrReg;
u8 ILPrevReg;
u64 ILCurrTime;
u64 ILPrevTime;
u8 WBWritePointer; // which entry to attempt to write next; should always be ANDed with 0xF after incrementing u8 WBWritePointer; // which entry to attempt to write next; should always be ANDed with 0xF after incrementing
u8 WBFillPointer; // where the next entry should be added; should always be ANDed with 0xF after incrementing u8 WBFillPointer; // where the next entry should be added; should always be ANDed with 0xF after incrementing

View File

@ -36,6 +36,7 @@ namespace melonDS::ARMInterpreter
void A_UNK(ARM* cpu) void A_UNK(ARM* cpu)
{ {
cpu->AddCycles_C();
Log(LogLevel::Warn, "undefined ARM%d instruction %08X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-8); Log(LogLevel::Warn, "undefined ARM%d instruction %08X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-8);
#ifdef GDBSTUB_ENABLED #ifdef GDBSTUB_ENABLED
cpu->GdbStub.Enter(cpu->GdbStub.IsConnected(), Gdb::TgtStatus::FaultInsn, cpu->R[15]-8); cpu->GdbStub.Enter(cpu->GdbStub.IsConnected(), Gdb::TgtStatus::FaultInsn, cpu->R[15]-8);
@ -54,6 +55,7 @@ void A_UNK(ARM* cpu)
void T_UNK(ARM* cpu) void T_UNK(ARM* cpu)
{ {
cpu->AddCycles_C();
Log(LogLevel::Warn, "undefined THUMB%d instruction %04X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-4); Log(LogLevel::Warn, "undefined THUMB%d instruction %04X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-4);
#ifdef GDBSTUB_ENABLED #ifdef GDBSTUB_ENABLED
cpu->GdbStub.Enter(cpu->GdbStub.IsConnected(), Gdb::TgtStatus::FaultInsn, cpu->R[15]-4); cpu->GdbStub.Enter(cpu->GdbStub.IsConnected(), Gdb::TgtStatus::FaultInsn, cpu->R[15]-4);
@ -151,6 +153,8 @@ void A_MSR_IMM(ARM* cpu)
void A_MSR_REG(ARM* cpu) void A_MSR_REG(ARM* cpu)
{ {
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr & 0xF);
u32* psr; u32* psr;
if (cpu->CurInstr & (1<<22)) if (cpu->CurInstr & (1<<22))
{ {
@ -273,6 +277,8 @@ void A_MCR(ARM* cpu)
u32 val = cpu->R[(cpu->CurInstr>>12)&0xF]; u32 val = cpu->R[(cpu->CurInstr>>12)&0xF];
if (((cpu->CurInstr>>12) & 0xF) == 15) val += 4; if (((cpu->CurInstr>>12) & 0xF) == 15) val += 4;
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr>>12)&0xF);
if (cpu->Num==0 && cp==15) if (cpu->Num==0 && cp==15)
{ {
((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo|(op<<12), val); ((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo|(op<<12), val);
@ -335,6 +341,7 @@ void A_MRC(ARM* cpu)
void A_SVC(ARM* cpu) // A_SWI void A_SVC(ARM* cpu) // A_SWI
{ {
cpu->AddCycles_C();
u32 oldcpsr = cpu->CPSR; u32 oldcpsr = cpu->CPSR;
cpu->CPSR &= ~0xBF; cpu->CPSR &= ~0xBF;
cpu->CPSR |= 0x93; cpu->CPSR |= 0x93;
@ -347,6 +354,7 @@ void A_SVC(ARM* cpu) // A_SWI
void T_SVC(ARM* cpu) // T_SWI void T_SVC(ARM* cpu) // T_SWI
{ {
cpu->AddCycles_C();
u32 oldcpsr = cpu->CPSR; u32 oldcpsr = cpu->CPSR;
cpu->CPSR &= ~0xBF; cpu->CPSR &= ~0xBF;
cpu->CPSR |= 0x93; cpu->CPSR |= 0x93;

View File

@ -152,22 +152,26 @@ inline bool OverflowSbc(u32 a, u32 b, u32 carry)
#define A_CALC_OP2_IMM \ #define A_CALC_OP2_IMM \
u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); \
u16 ilmask = 0;
#define A_CALC_OP2_IMM_S \ #define A_CALC_OP2_IMM_S \
u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); \ u32 b = ROR(cpu->CurInstr&0xFF, (cpu->CurInstr>>7)&0x1E); \
if ((cpu->CurInstr>>7)&0x1E) \ if ((cpu->CurInstr>>7)&0x1E) \
cpu->SetC(b & 0x80000000); cpu->SetC(b & 0x80000000); \
u16 ilmask = 0;
#define A_CALC_OP2_REG_SHIFT_IMM(shiftop) \ #define A_CALC_OP2_REG_SHIFT_IMM(shiftop) \
u32 b = cpu->R[cpu->CurInstr&0xF]; \ u32 b = cpu->R[cpu->CurInstr&0xF]; \
u32 s = (cpu->CurInstr>>7)&0x1F; \ u32 s = (cpu->CurInstr>>7)&0x1F; \
shiftop(b, s); shiftop(b, s); \
u16 ilmask = 1 << (cpu->CurInstr&0xF);
#define A_CALC_OP2_REG_SHIFT_REG(shiftop) \ #define A_CALC_OP2_REG_SHIFT_REG(shiftop) \
u32 b = cpu->R[cpu->CurInstr&0xF]; \ u32 b = cpu->R[cpu->CurInstr&0xF]; \
if ((cpu->CurInstr&0xF)==15) b += 4; \ if ((cpu->CurInstr&0xF)==15) b += 4; \
shiftop(b, (cpu->R[(cpu->CurInstr>>8)&0xF] & 0xFF)); shiftop(b, (cpu->R[(cpu->CurInstr>>8)&0xF] & 0xFF)); \
u16 ilmask = 1 << (cpu->CurInstr&0xF);
#define A_IMPLEMENT_ALU_OP(x,s) \ #define A_IMPLEMENT_ALU_OP(x,s) \
@ -377,6 +381,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S)
#define A_SUB(c) \ #define A_SUB(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a - b; \ u32 res = a - b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -394,6 +399,7 @@ A_IMPLEMENT_ALU_OP(EOR,_S)
!res, \ !res, \
CarrySub(a, b), \ CarrySub(a, b), \
OverflowSub(a, b)); \ OverflowSub(a, b)); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -410,6 +416,7 @@ A_IMPLEMENT_ALU_OP(SUB,)
#define A_RSB(c) \ #define A_RSB(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = b - a; \ u32 res = b - a; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -427,6 +434,7 @@ A_IMPLEMENT_ALU_OP(SUB,)
!res, \ !res, \
CarrySub(b, a), \ CarrySub(b, a), \
OverflowSub(b, a)); \ OverflowSub(b, a)); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -443,6 +451,7 @@ A_IMPLEMENT_ALU_OP(RSB,)
#define A_ADD(c) \ #define A_ADD(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a + b; \ u32 res = a + b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -460,6 +469,7 @@ A_IMPLEMENT_ALU_OP(RSB,)
!res, \ !res, \
CarryAdd(a, b), \ CarryAdd(a, b), \
OverflowAdd(a, b)); \ OverflowAdd(a, b)); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -476,6 +486,7 @@ A_IMPLEMENT_ALU_OP(ADD,)
#define A_ADC(c) \ #define A_ADC(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a + b + (cpu->CPSR&0x20000000 ? 1:0); \ u32 res = a + b + (cpu->CPSR&0x20000000 ? 1:0); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -495,6 +506,7 @@ A_IMPLEMENT_ALU_OP(ADD,)
!res, \ !res, \
CarryAdd(a, b) | CarryAdd(res_tmp, carry), \ CarryAdd(a, b) | CarryAdd(res_tmp, carry), \
OverflowAdc(a, b, carry)); \ OverflowAdc(a, b, carry)); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -511,6 +523,7 @@ A_IMPLEMENT_ALU_OP(ADC,)
#define A_SBC(c) \ #define A_SBC(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a - b - (cpu->CPSR&0x20000000 ? 0:1); \ u32 res = a - b - (cpu->CPSR&0x20000000 ? 0:1); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -530,6 +543,7 @@ A_IMPLEMENT_ALU_OP(ADC,)
!res, \ !res, \
CarrySub(a, b) & CarrySub(res_tmp, carry), \ CarrySub(a, b) & CarrySub(res_tmp, carry), \
OverflowSbc(a, b, carry)); \ OverflowSbc(a, b, carry)); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -546,6 +560,7 @@ A_IMPLEMENT_ALU_OP(SBC,)
#define A_RSC(c) \ #define A_RSC(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = b - a - (cpu->CPSR&0x20000000 ? 0:1); \ u32 res = b - a - (cpu->CPSR&0x20000000 ? 0:1); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -565,6 +580,7 @@ A_IMPLEMENT_ALU_OP(SBC,)
!res, \ !res, \
CarrySub(b, a) & CarrySub(res_tmp, carry), \ CarrySub(b, a) & CarrySub(res_tmp, carry), \
OverflowSbc(b, a, carry)); \ OverflowSbc(b, a, carry)); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -581,6 +597,8 @@ A_IMPLEMENT_ALU_OP(RSC,)
#define A_TST(c) \ #define A_TST(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a & b; \ u32 res = a & b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \
{ \ { \
if (cpu->Num == 1) \ if (cpu->Num == 1) \
@ -601,8 +619,7 @@ A_IMPLEMENT_ALU_OP(RSC,)
{ \ { \
cpu->SetNZ(res & 0x80000000, \ cpu->SetNZ(res & 0x80000000, \
!res); \ !res); \
} \ }
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C();
A_IMPLEMENT_ALU_TEST(TST,_S) A_IMPLEMENT_ALU_TEST(TST,_S)
@ -610,6 +627,8 @@ A_IMPLEMENT_ALU_TEST(TST,_S)
#define A_TEQ(c) \ #define A_TEQ(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a ^ b; \ u32 res = a ^ b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \
{ \ { \
if (cpu->Num == 1) \ if (cpu->Num == 1) \
@ -630,8 +649,7 @@ A_IMPLEMENT_ALU_TEST(TST,_S)
{ \ { \
cpu->SetNZ(res & 0x80000000, \ cpu->SetNZ(res & 0x80000000, \
!res); \ !res); \
} \ }
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C();
A_IMPLEMENT_ALU_TEST(TEQ,_S) A_IMPLEMENT_ALU_TEST(TEQ,_S)
@ -639,6 +657,8 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S)
#define A_CMP(c) \ #define A_CMP(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a - b; \ u32 res = a - b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \
{ \ { \
if (cpu->Num == 1) \ if (cpu->Num == 1) \
@ -663,8 +683,7 @@ A_IMPLEMENT_ALU_TEST(TEQ,_S)
!res, \ !res, \
CarrySub(a, b), \ CarrySub(a, b), \
OverflowSub(a, b)); \ OverflowSub(a, b)); \
} \ }
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C();
A_IMPLEMENT_ALU_TEST(CMP,) A_IMPLEMENT_ALU_TEST(CMP,)
@ -672,6 +691,8 @@ A_IMPLEMENT_ALU_TEST(CMP,)
#define A_CMN(c) \ #define A_CMN(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a + b; \ u32 res = a + b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \ if (((cpu->CurInstr>>12) & 0xF) == 15) [[unlikely]] /* this seems to trigger alu rd==15 behavior for arm7 and legacy instruction behavior for arm9 */ \
{ \ { \
if (cpu->Num == 1) \ if (cpu->Num == 1) \
@ -696,8 +717,7 @@ A_IMPLEMENT_ALU_TEST(CMP,)
!res, \ !res, \
CarryAdd(a, b), \ CarryAdd(a, b), \
OverflowAdd(a, b)); \ OverflowAdd(a, b)); \
} \ }
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C();
A_IMPLEMENT_ALU_TEST(CMN,) A_IMPLEMENT_ALU_TEST(CMN,)
@ -705,6 +725,7 @@ A_IMPLEMENT_ALU_TEST(CMN,)
#define A_ORR(c) \ #define A_ORR(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a | b; \ u32 res = a | b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -720,6 +741,7 @@ A_IMPLEMENT_ALU_TEST(CMN,)
u32 res = a | b; \ u32 res = a | b; \
cpu->SetNZ(res & 0x80000000, \ cpu->SetNZ(res & 0x80000000, \
!res); \ !res); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -734,6 +756,7 @@ A_IMPLEMENT_ALU_OP(ORR,_S)
#define A_MOV(c) \ #define A_MOV(c) \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -747,6 +770,7 @@ A_IMPLEMENT_ALU_OP(ORR,_S)
#define A_MOV_S(c) \ #define A_MOV_S(c) \
cpu->SetNZ(b & 0x80000000, \ cpu->SetNZ(b & 0x80000000, \
!b); \ !b); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -781,6 +805,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu)
#define A_BIC(c) \ #define A_BIC(c) \
u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 a = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 res = a & ~b; \ u32 res = a & ~b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -796,6 +821,7 @@ void A_MOV_REG_LSL_IMM_DBG(ARM* cpu)
u32 res = a & ~b; \ u32 res = a & ~b; \
cpu->SetNZ(res & 0x80000000, \ cpu->SetNZ(res & 0x80000000, \
!res); \ !res); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask | (1 <<((cpu->CurInstr>>16) & 0xF))); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -811,6 +837,7 @@ A_IMPLEMENT_ALU_OP(BIC,_S)
#define A_MVN(c) \ #define A_MVN(c) \
b = ~b; \ b = ~b; \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -825,6 +852,7 @@ A_IMPLEMENT_ALU_OP(BIC,_S)
b = ~b; \ b = ~b; \
cpu->SetNZ(b & 0x80000000, \ cpu->SetNZ(b & 0x80000000, \
!b); \ !b); \
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>(ilmask); \
if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \ if (c) cpu->AddCycles_CI(c); else cpu->AddCycles_C(); \
if (((cpu->CurInstr>>12) & 0xF) == 15) \ if (((cpu->CurInstr>>12) & 0xF) == 15) \
{ \ { \
@ -859,12 +887,17 @@ void A_MUL(ARM* cpu)
if (cpu->Num == 0) if (cpu->Num == 0)
{ {
if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(3); ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)));
if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(3); // S
else else
{ {
cpu->AddCycles_C(); // 1 X cpu->AddCycles_C(); // 1 X
cpu->DataRegion = Mem9_Null; cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(2); // 2 M ((ARMv5*)cpu)->AddCycles_MW(2); // 2 M
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF;
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
} }
} }
else else
@ -899,12 +932,18 @@ void A_MLA(ARM* cpu)
if (cpu->Num == 0) if (cpu->Num == 0)
{ {
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)) |
(1 << ((cpu->CurInstr >> 12) & 0xF)));
if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(3); if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(3);
else else
{ {
cpu->AddCycles_C(); // 1 X cpu->AddCycles_C(); // 1 X
cpu->DataRegion = Mem9_Null; cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(2); // 2 M ((ARMv5*)cpu)->AddCycles_MW(2); // 2 M
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF;
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
} }
} }
else else
@ -938,18 +977,31 @@ void A_UMULL(ARM* cpu)
if (cpu->Num==1) cpu->SetC(0); if (cpu->Num==1) cpu->SetC(0);
} }
u32 cycles;
if (cpu->Num == 0) if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; {
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)));
if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4);
else
{
cpu->AddCycles_CI(2);
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
}
else else
{ {
u32 cycles;
if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
else cycles = 5; else cycles = 5;
cpu->AddCycles_CI(cycles);
} }
cpu->AddCycles_CI(cycles);
} }
void A_UMLAL(ARM* cpu) void A_UMLAL(ARM* cpu)
@ -974,18 +1026,33 @@ void A_UMLAL(ARM* cpu)
if (cpu->Num==1) cpu->SetC(0); if (cpu->Num==1) cpu->SetC(0);
} }
u32 cycles;
if (cpu->Num == 0) if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; {
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)) |
(1 << ((cpu->CurInstr >> 12) & 0xF)) |
(1 << ((cpu->CurInstr >> 16) & 0xF)));
if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4);
else
{
cpu->AddCycles_CI(2);
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
}
else else
{ {
u32 cycles;
if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2; if ((rs & 0xFFFFFF00) == 0x00000000) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3; else if ((rs & 0xFFFF0000) == 0x00000000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000) cycles = 4; else if ((rs & 0xFF000000) == 0x00000000) cycles = 4;
else cycles = 5; else cycles = 5;
cpu->AddCycles_CI(cycles);
} }
cpu->AddCycles_CI(cycles);
} }
void A_SMULL(ARM* cpu) void A_SMULL(ARM* cpu)
@ -1007,18 +1074,30 @@ void A_SMULL(ARM* cpu)
if (cpu->Num==1) cpu->SetC(0); if (cpu->Num==1) cpu->SetC(0);
} }
u32 cycles;
if (cpu->Num == 0) if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; {
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)));
if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4);
else
{
cpu->AddCycles_CI(2);
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
}
else else
{ {
u32 cycles;
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5; else cycles = 5;
cpu->AddCycles_CI(cycles);
} }
cpu->AddCycles_CI(cycles);
} }
void A_SMLAL(ARM* cpu) void A_SMLAL(ARM* cpu)
@ -1043,18 +1122,32 @@ void A_SMLAL(ARM* cpu)
if (cpu->Num==1) cpu->SetC(0); if (cpu->Num==1) cpu->SetC(0);
} }
u32 cycles;
if (cpu->Num == 0) if (cpu->Num == 0)
cycles = (cpu->CurInstr & (1<<20)) ? 4 : 2; {
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)) |
(1 << ((cpu->CurInstr >> 12) & 0xF)) |
(1 << ((cpu->CurInstr >> 16) & 0xF)));
if (cpu->CurInstr & (1<<20)) cpu->AddCycles_CI(4);
else
{
cpu->AddCycles_CI(2);
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
}
else else
{ {
u32 cycles;
if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2; if ((rs & 0xFFFFFF00) == 0x00000000 || (rs & 0xFFFFFF00) == 0xFFFFFF00) cycles = 2;
else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3; else if ((rs & 0xFFFF0000) == 0x00000000 || (rs & 0xFFFF0000) == 0xFFFF0000) cycles = 3;
else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4; else if ((rs & 0xFF000000) == 0x00000000 || (rs & 0xFF000000) == 0xFF000000) cycles = 4;
else cycles = 5; else cycles = 5;
cpu->AddCycles_CI(cycles);
} }
cpu->AddCycles_CI(cycles);
} }
void A_SMLAxy(ARM* cpu) void A_SMLAxy(ARM* cpu)
@ -1078,8 +1171,17 @@ void A_SMLAxy(ARM* cpu)
if (OverflowAdd(res_mul, rn)) if (OverflowAdd(res_mul, rn))
cpu->CPSR |= 0x08000000; cpu->CPSR |= 0x08000000;
cpu->AddCycles_C(); // TODO: interlock?? ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)) |
(1 << ((cpu->CurInstr >> 12) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
} }
void A_SMLAWy(ARM* cpu) void A_SMLAWy(ARM* cpu)
@ -1101,7 +1203,16 @@ void A_SMLAWy(ARM* cpu)
if (OverflowAdd(res_mul, rn)) if (OverflowAdd(res_mul, rn))
cpu->CPSR |= 0x08000000; cpu->CPSR |= 0x08000000;
cpu->AddCycles_C(); // TODO: interlock??
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)) |
(1 << ((cpu->CurInstr >> 12) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
} }
void A_SMULxy(ARM* cpu) void A_SMULxy(ARM* cpu)
@ -1120,7 +1231,16 @@ void A_SMULxy(ARM* cpu)
if (((cpu->CurInstr >> 16) & 0xF) != 15) if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock??
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
} }
void A_SMULWy(ARM* cpu) void A_SMULWy(ARM* cpu)
@ -1137,7 +1257,16 @@ void A_SMULWy(ARM* cpu)
if (((cpu->CurInstr >> 16) & 0xF) != 15) if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = res; cpu->R[(cpu->CurInstr >> 16) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock??
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
} }
void A_SMLALxy(ARM* cpu) void A_SMLALxy(ARM* cpu)
@ -1162,10 +1291,18 @@ void A_SMLALxy(ARM* cpu)
if (((cpu->CurInstr >> 16) & 0xF) != 15) if (((cpu->CurInstr >> 16) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL); cpu->R[(cpu->CurInstr >> 16) & 0xF] = (u32)(res >> 32ULL);
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) |
(1 << ((cpu->CurInstr >> 8) & 0xF)) |
(1 << ((cpu->CurInstr >> 12) & 0xF)) |
(1 << ((cpu->CurInstr >> 16) & 0xF)));
cpu->AddCycles_C(); // 1 X cpu->AddCycles_C(); // 1 X
cpu->DataRegion = Mem9_Null; cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(2); // 2 M ((ARMv5*)cpu)->AddCycles_MW(2); // 2 M
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
} }
@ -1192,6 +1329,8 @@ void A_CLZ(ARM* cpu)
if (((cpu->CurInstr >> 12) & 0xF) == 15) cpu->JumpTo(res & ~1); if (((cpu->CurInstr >> 12) & 0xF) == 15) cpu->JumpTo(res & ~1);
else cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; else cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr & 0xF);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1213,7 +1352,13 @@ void A_QADD(ARM* cpu)
if (((cpu->CurInstr >> 12) & 0xF) != 15) if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock?? ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
} }
void A_QSUB(ARM* cpu) void A_QSUB(ARM* cpu)
@ -1233,7 +1378,13 @@ void A_QSUB(ARM* cpu)
if (((cpu->CurInstr >> 12) & 0xF) != 15) if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock?? ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
} }
void A_QDADD(ARM* cpu) void A_QDADD(ARM* cpu)
@ -1261,7 +1412,13 @@ void A_QDADD(ARM* cpu)
if (((cpu->CurInstr >> 12) & 0xF) != 15) if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock?? ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
} }
void A_QDSUB(ARM* cpu) void A_QDSUB(ARM* cpu)
@ -1289,7 +1446,13 @@ void A_QDSUB(ARM* cpu)
if (((cpu->CurInstr >> 12) & 0xF) != 15) if (((cpu->CurInstr >> 12) & 0xF) != 15)
cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; cpu->R[(cpu->CurInstr >> 12) & 0xF] = res;
cpu->AddCycles_C(); // TODO: interlock?? ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF)));
cpu->AddCycles_C();
cpu->DataRegion = Mem9_Null;
((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling
((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
} }
@ -1306,6 +1469,7 @@ void T_LSL_IMM(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = op; cpu->R[cpu->CurInstr & 0x7] = op;
cpu->SetNZ(op & 0x80000000, cpu->SetNZ(op & 0x80000000,
!op); !op);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0x7);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1317,6 +1481,7 @@ void T_LSR_IMM(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = op; cpu->R[cpu->CurInstr & 0x7] = op;
cpu->SetNZ(op & 0x80000000, cpu->SetNZ(op & 0x80000000,
!op); !op);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0x7);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1328,6 +1493,7 @@ void T_ASR_IMM(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = op; cpu->R[cpu->CurInstr & 0x7] = op;
cpu->SetNZ(op & 0x80000000, cpu->SetNZ(op & 0x80000000,
!op); !op);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0x7);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1341,6 +1507,7 @@ void T_ADD_REG_(ARM* cpu)
!res, !res,
CarryAdd(a, b), CarryAdd(a, b),
OverflowAdd(a, b)); OverflowAdd(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << ((cpu->CurInstr >> 3) & 0x7)) | (1 << ((cpu->CurInstr >> 6) & 0x7)));
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1354,6 +1521,7 @@ void T_SUB_REG_(ARM* cpu)
!res, !res,
CarrySub(a, b), CarrySub(a, b),
OverflowSub(a, b)); OverflowSub(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << ((cpu->CurInstr >> 3) & 0x7)) | (1 << ((cpu->CurInstr >> 6) & 0x7)));
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1367,6 +1535,7 @@ void T_ADD_IMM_(ARM* cpu)
!res, !res,
CarryAdd(a, b), CarryAdd(a, b),
OverflowAdd(a, b)); OverflowAdd(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0x7);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1380,6 +1549,7 @@ void T_SUB_IMM_(ARM* cpu)
!res, !res,
CarrySub(a, b), CarrySub(a, b),
OverflowSub(a, b)); OverflowSub(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0x7);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1389,6 +1559,7 @@ void T_MOV_IMM(ARM* cpu)
cpu->R[(cpu->CurInstr >> 8) & 0x7] = b; cpu->R[(cpu->CurInstr >> 8) & 0x7] = b;
cpu->SetNZ(0, cpu->SetNZ(0,
!b); !b);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 8) & 0x7);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1401,6 +1572,7 @@ void T_CMP_IMM(ARM* cpu)
!res, !res,
CarrySub(a, b), CarrySub(a, b),
OverflowSub(a, b)); OverflowSub(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 8) & 0x7);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1414,6 +1586,7 @@ void T_ADD_IMM(ARM* cpu)
!res, !res,
CarryAdd(a, b), CarryAdd(a, b),
OverflowAdd(a, b)); OverflowAdd(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 8) & 0x7);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1427,6 +1600,7 @@ void T_SUB_IMM(ARM* cpu)
!res, !res,
CarrySub(a, b), CarrySub(a, b),
OverflowSub(a, b)); OverflowSub(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 8) & 0x7);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1439,6 +1613,7 @@ void T_AND_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res; cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000, cpu->SetNZ(res & 0x80000000,
!res); !res);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1450,6 +1625,7 @@ void T_EOR_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res; cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000, cpu->SetNZ(res & 0x80000000,
!res); !res);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1461,6 +1637,7 @@ void T_LSL_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = a; cpu->R[cpu->CurInstr & 0x7] = a;
cpu->SetNZ(a & 0x80000000, cpu->SetNZ(a & 0x80000000,
!a); !a);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_CI(1); cpu->AddCycles_CI(1);
} }
@ -1472,6 +1649,7 @@ void T_LSR_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = a; cpu->R[cpu->CurInstr & 0x7] = a;
cpu->SetNZ(a & 0x80000000, cpu->SetNZ(a & 0x80000000,
!a); !a);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_CI(1); cpu->AddCycles_CI(1);
} }
@ -1483,6 +1661,7 @@ void T_ASR_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = a; cpu->R[cpu->CurInstr & 0x7] = a;
cpu->SetNZ(a & 0x80000000, cpu->SetNZ(a & 0x80000000,
!a); !a);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_CI(1); cpu->AddCycles_CI(1);
} }
@ -1498,6 +1677,7 @@ void T_ADC_REG(ARM* cpu)
!res, !res,
CarryAdd(a, b) | CarryAdd(res_tmp, carry), CarryAdd(a, b) | CarryAdd(res_tmp, carry),
OverflowAdc(a, b, carry)); OverflowAdc(a, b, carry));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1513,6 +1693,7 @@ void T_SBC_REG(ARM* cpu)
!res, !res,
CarrySub(a, b) & CarrySub(res_tmp, carry), CarrySub(a, b) & CarrySub(res_tmp, carry),
OverflowSbc(a, b, carry)); OverflowSbc(a, b, carry));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1524,6 +1705,7 @@ void T_ROR_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = a; cpu->R[cpu->CurInstr & 0x7] = a;
cpu->SetNZ(a & 0x80000000, cpu->SetNZ(a & 0x80000000,
!a); !a);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_CI(1); cpu->AddCycles_CI(1);
} }
@ -1534,6 +1716,7 @@ void T_TST_REG(ARM* cpu)
u32 res = a & b; u32 res = a & b;
cpu->SetNZ(res & 0x80000000, cpu->SetNZ(res & 0x80000000,
!res); !res);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1546,6 +1729,7 @@ void T_NEG_REG(ARM* cpu)
!res, !res,
CarrySub(0, b), CarrySub(0, b),
OverflowSub(0, b)); OverflowSub(0, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0x7);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1558,6 +1742,7 @@ void T_CMP_REG(ARM* cpu)
!res, !res,
CarrySub(a, b), CarrySub(a, b),
OverflowSub(a, b)); OverflowSub(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1570,6 +1755,7 @@ void T_CMN_REG(ARM* cpu)
!res, !res,
CarryAdd(a, b), CarryAdd(a, b),
OverflowAdd(a, b)); OverflowAdd(a, b));
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1581,6 +1767,7 @@ void T_ORR_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res; cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000, cpu->SetNZ(res & 0x80000000,
!res); !res);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1597,6 +1784,7 @@ void T_MUL_REG(ARM* cpu)
if (cpu->Num == 0) if (cpu->Num == 0)
{ {
cycles += 3; cycles += 3;
((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
} }
else else
{ {
@ -1606,7 +1794,7 @@ void T_MUL_REG(ARM* cpu)
else if (a & 0x0000FF00) cycles += 2; else if (a & 0x0000FF00) cycles += 2;
else cycles += 1; else cycles += 1;
} }
cpu->AddCycles_CI(cycles); cpu->AddCycles_CI(cycles); // implemented as S variant, doesn't interlock
} }
void T_BIC_REG(ARM* cpu) void T_BIC_REG(ARM* cpu)
@ -1617,6 +1805,7 @@ void T_BIC_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res; cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000, cpu->SetNZ(res & 0x80000000,
!res); !res);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << (cpu->CurInstr & 0x7)) | (1 << ((cpu->CurInstr >> 3) & 0x7)));
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1627,6 +1816,7 @@ void T_MVN_REG(ARM* cpu)
cpu->R[cpu->CurInstr & 0x7] = res; cpu->R[cpu->CurInstr & 0x7] = res;
cpu->SetNZ(res & 0x80000000, cpu->SetNZ(res & 0x80000000,
!res); !res);
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0x7);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1643,7 +1833,8 @@ void T_ADD_HIREG(ARM* cpu)
u32 b = cpu->R[rs]; u32 b = cpu->R[rs];
cpu->AddCycles_C(); cpu->AddCycles_C();
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << rd) | (1 << rs));
if (rd == 15) if (rd == 15)
{ {
cpu->JumpTo((a + b) | 1); cpu->JumpTo((a + b) | 1);
@ -1678,6 +1869,7 @@ void T_CMP_HIREG(ARM* cpu)
cpu->CPSR |= 0x20; // keep it from crashing the emulator at least cpu->CPSR |= 0x20; // keep it from crashing the emulator at least
} }
} }
else if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << rd) | (1 << rs));
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1687,6 +1879,7 @@ void T_MOV_HIREG(ARM* cpu)
u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8); u32 rd = (cpu->CurInstr & 0x7) | ((cpu->CurInstr >> 4) & 0x8);
u32 rs = (cpu->CurInstr >> 3) & 0xF; u32 rs = (cpu->CurInstr >> 3) & 0xF;
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<true>((1 << rd) | (1 << rs));
cpu->AddCycles_C(); cpu->AddCycles_C();
if (rd == 15) if (rd == 15)
@ -1717,6 +1910,8 @@ void T_ADD_PCREL(ARM* cpu)
u32 val = cpu->R[15] & ~2; u32 val = cpu->R[15] & ~2;
val += ((cpu->CurInstr & 0xFF) << 2); val += ((cpu->CurInstr & 0xFF) << 2);
cpu->R[(cpu->CurInstr >> 8) & 0x7] = val; cpu->R[(cpu->CurInstr >> 8) & 0x7] = val;
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(15);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1725,6 +1920,8 @@ void T_ADD_SPREL(ARM* cpu)
u32 val = cpu->R[13]; u32 val = cpu->R[13];
val += ((cpu->CurInstr & 0xFF) << 2); val += ((cpu->CurInstr & 0xFF) << 2);
cpu->R[(cpu->CurInstr >> 8) & 0x7] = val; cpu->R[(cpu->CurInstr >> 8) & 0x7] = val;
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(13);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }
@ -1736,6 +1933,8 @@ void T_ADD_SP(ARM* cpu)
else else
val += ((cpu->CurInstr & 0x7F) << 2); val += ((cpu->CurInstr & 0x7F) << 2);
cpu->R[13] = val; cpu->R[13] = val;
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(13);
cpu->AddCycles_C(); cpu->AddCycles_C();
} }

View File

@ -27,12 +27,14 @@ using Platform::LogLevel;
void A_B(ARM* cpu) void A_B(ARM* cpu)
{ {
cpu->AddCycles_C();
s32 offset = (s32)(cpu->CurInstr << 8) >> 6; s32 offset = (s32)(cpu->CurInstr << 8) >> 6;
cpu->JumpTo(cpu->R[15] + offset); cpu->JumpTo(cpu->R[15] + offset);
} }
void A_BL(ARM* cpu) void A_BL(ARM* cpu)
{ {
cpu->AddCycles_C();
s32 offset = (s32)(cpu->CurInstr << 8) >> 6; s32 offset = (s32)(cpu->CurInstr << 8) >> 6;
cpu->R[14] = cpu->R[15] - 4; cpu->R[14] = cpu->R[15] - 4;
cpu->JumpTo(cpu->R[15] + offset); cpu->JumpTo(cpu->R[15] + offset);
@ -40,6 +42,7 @@ void A_BL(ARM* cpu)
void A_BLX_IMM(ARM* cpu) void A_BLX_IMM(ARM* cpu)
{ {
cpu->AddCycles_C();
s32 offset = (s32)(cpu->CurInstr << 8) >> 6; s32 offset = (s32)(cpu->CurInstr << 8) >> 6;
if (cpu->CurInstr & 0x01000000) offset += 2; if (cpu->CurInstr & 0x01000000) offset += 2;
cpu->R[14] = cpu->R[15] - 4; cpu->R[14] = cpu->R[15] - 4;
@ -48,11 +51,15 @@ void A_BLX_IMM(ARM* cpu)
void A_BX(ARM* cpu) void A_BX(ARM* cpu)
{ {
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr&0xF);
cpu->AddCycles_C();
cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]); cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]);
} }
void A_BLX_REG(ARM* cpu) void A_BLX_REG(ARM* cpu)
{ {
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr&0xF);
cpu->AddCycles_C();
u32 lr = cpu->R[15] - 4; u32 lr = cpu->R[15] - 4;
cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]); cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]);
cpu->R[14] = lr; cpu->R[14] = lr;
@ -62,22 +69,25 @@ void A_BLX_REG(ARM* cpu)
void T_BCOND(ARM* cpu) void T_BCOND(ARM* cpu)
{ {
cpu->AddCycles_C();
if (cpu->CheckCondition((cpu->CurInstr >> 8) & 0xF)) if (cpu->CheckCondition((cpu->CurInstr >> 8) & 0xF))
{ {
s32 offset = (s32)(cpu->CurInstr << 24) >> 23; s32 offset = (s32)(cpu->CurInstr << 24) >> 23;
cpu->JumpTo(cpu->R[15] + offset + 1); cpu->JumpTo(cpu->R[15] + offset + 1);
} }
else
cpu->AddCycles_C();
} }
void T_BX(ARM* cpu) void T_BX(ARM* cpu)
{ {
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0xF);
cpu->AddCycles_C();
cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]); cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]);
} }
void T_BLX_REG(ARM* cpu) void T_BLX_REG(ARM* cpu)
{ {
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0xF);
cpu->AddCycles_C();
if (cpu->Num==1) if (cpu->Num==1)
{ {
Log(LogLevel::Warn, "!! THUMB BLX_REG ON ARM7\n"); Log(LogLevel::Warn, "!! THUMB BLX_REG ON ARM7\n");
@ -91,6 +101,7 @@ void T_BLX_REG(ARM* cpu)
void T_B(ARM* cpu) void T_B(ARM* cpu)
{ {
cpu->AddCycles_C();
s32 offset = (s32)((cpu->CurInstr & 0x7FF) << 21) >> 20; s32 offset = (s32)((cpu->CurInstr & 0x7FF) << 21) >> 20;
cpu->JumpTo(cpu->R[15] + offset + 1); cpu->JumpTo(cpu->R[15] + offset + 1);
} }
@ -104,6 +115,7 @@ void T_BL_LONG_1(ARM* cpu)
void T_BL_LONG_2(ARM* cpu) void T_BL_LONG_2(ARM* cpu)
{ {
cpu->AddCycles_C();
s32 offset = (cpu->CurInstr & 0x7FF) << 1; s32 offset = (cpu->CurInstr & 0x7FF) << 1;
u32 pc = cpu->R[14] + offset; u32 pc = cpu->R[14] + offset;
cpu->R[14] = (cpu->R[15] - 2) | 1; cpu->R[14] = (cpu->R[15] - 2) | 1;

View File

@ -18,11 +18,22 @@
#include <stdio.h> #include <stdio.h>
#include "ARM.h" #include "ARM.h"
#include "NDS.h"
namespace melonDS::ARMInterpreter namespace melonDS::ARMInterpreter
{ {
template <bool bitfield>
inline void ExecuteStage(ARM* cpu, u16 ilmask)
{
if (cpu->Num == 0)
{
((ARMv5*)cpu)->HandleInterlocksExecute<bitfield>(ilmask);
((ARMv5*)cpu)->AddCycles_C();
}
}
// copypasta from ALU. bad // copypasta from ALU. bad
#define LSL_IMM(x, s) \ #define LSL_IMM(x, s) \
@ -50,13 +61,15 @@ namespace melonDS::ARMInterpreter
#define A_WB_CALC_OFFSET_IMM \ #define A_WB_CALC_OFFSET_IMM \
u32 offset = (cpu->CurInstr & 0xFFF); \ u32 offset = (cpu->CurInstr & 0xFFF); \
if (!(cpu->CurInstr & (1<<23))) offset = -offset; if (!(cpu->CurInstr & (1<<23))) offset = -offset; \
u16 ilmask = 0;
#define A_WB_CALC_OFFSET_REG(shiftop) \ #define A_WB_CALC_OFFSET_REG(shiftop) \
u32 offset = cpu->R[cpu->CurInstr & 0xF]; \ u32 offset = cpu->R[cpu->CurInstr & 0xF]; \
u32 shift = ((cpu->CurInstr>>7)&0x1F); \ u32 shift = ((cpu->CurInstr>>7)&0x1F); \
shiftop(offset, shift); \ shiftop(offset, shift); \
if (!(cpu->CurInstr & (1<<23))) offset = -offset; if (!(cpu->CurInstr & (1<<23))) offset = -offset; \
u16 ilmask = 1 << (cpu->CurInstr & 0xF);
enum class Writeback enum class Writeback
{ {
@ -66,10 +79,12 @@ enum class Writeback
Trans, Trans,
}; };
template<bool signror, int size, Writeback writeback> template<bool signror, int size, Writeback writeback, bool multireg>
void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset, u16 ilmask)
{ {
static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!");
ExecuteStage<multireg>(cpu, (ilmask | (1<<rn)));
u32 addr; u32 addr;
if constexpr (writeback < Writeback::Post) addr = offset + cpu->R[rn]; if constexpr (writeback < Writeback::Post) addr = offset + cpu->R[rn];
@ -110,14 +125,25 @@ void LoadSingle(ARM* cpu, u8 rd, u8 rn, s32 offset)
if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) val &= ~0x1; if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) val &= ~0x1;
cpu->JumpTo(val); cpu->JumpTo(val);
} }
else cpu->R[rd] = val; else
{
cpu->R[rd] = val;
if (cpu->Num == 0)
{
((ARMv5*)cpu)->ILCurrReg = rd;
bool extra = ((size < 32) || (signror && (addr&0x3)));
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles + extra;
}
}
} }
template<int size, Writeback writeback> template<int size, Writeback writeback, bool multireg>
void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset) void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset, u16 ilmask)
{ {
static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!");
ExecuteStage<multireg>(cpu, (ilmask | (1<<rn)));
u32 addr; u32 addr;
if constexpr (writeback < Writeback::Post) addr = offset + cpu->R[rn]; if constexpr (writeback < Writeback::Post) addr = offset + cpu->R[rn];
else addr = cpu->R[rn]; else addr = cpu->R[rn];
@ -131,6 +157,8 @@ void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset)
((ARMv5*)cpu)->PU_Map = ((ARMv5*)cpu)->PU_UserMap; ((ARMv5*)cpu)->PU_Map = ((ARMv5*)cpu)->PU_UserMap;
} }
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksMemory(rd);
bool dabort; bool dabort;
if constexpr (size == 8) dabort = !cpu->DataWrite8 (addr, storeval); if constexpr (size == 8) dabort = !cpu->DataWrite8 (addr, storeval);
if constexpr (size == 16) dabort = !cpu->DataWrite16(addr, storeval); if constexpr (size == 16) dabort = !cpu->DataWrite16(addr, storeval);
@ -154,36 +182,36 @@ void StoreSingle(ARM* cpu, u8 rd, u8 rn, s32 offset)
#define A_STR \ #define A_STR \
if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else StoreSingle<32, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); else StoreSingle<32, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_STR_POST \ #define A_STR_POST \
if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Trans>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ if (cpu->CurInstr & (1<<21)) StoreSingle<32, Writeback::Trans, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else StoreSingle<32, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); else StoreSingle<32, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_STRB \ #define A_STRB \
if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else StoreSingle<8, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); else StoreSingle<8, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_STRB_POST \ #define A_STRB_POST \
if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Trans>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ if (cpu->CurInstr & (1<<21)) StoreSingle<8, Writeback::Trans, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else StoreSingle<8, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); else StoreSingle<8, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDR \ #define A_LDR \
if (cpu->CurInstr & (1<<21)) LoadSingle<true, 32, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ if (cpu->CurInstr & (1<<21)) LoadSingle<true, 32, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else LoadSingle<true, 32, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); else LoadSingle<true, 32, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDR_POST \ #define A_LDR_POST \
if (cpu->CurInstr & (1<<21)) LoadSingle<true, 32, Writeback::Trans>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ if (cpu->CurInstr & (1<<21)) LoadSingle<true, 32, Writeback::Trans, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else LoadSingle<true, 32, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); else LoadSingle<true, 32, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDRB \ #define A_LDRB \
if (cpu->CurInstr & (1<<21)) LoadSingle<false, 8, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ if (cpu->CurInstr & (1<<21)) LoadSingle<false, 8, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else LoadSingle<false, 8, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); else LoadSingle<false, 8, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDRB_POST \ #define A_LDRB_POST \
if (cpu->CurInstr & (1<<21)) LoadSingle<false, 8, Writeback::Trans>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ if (cpu->CurInstr & (1<<21)) LoadSingle<false, 8, Writeback::Trans, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else LoadSingle<false, 8, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); else LoadSingle<false, 8, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
@ -258,20 +286,22 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
#define A_HD_CALC_OFFSET_IMM \ #define A_HD_CALC_OFFSET_IMM \
u32 offset = (cpu->CurInstr & 0xF) | ((cpu->CurInstr >> 4) & 0xF0); \ u32 offset = (cpu->CurInstr & 0xF) | ((cpu->CurInstr >> 4) & 0xF0); \
if (!(cpu->CurInstr & (1<<23))) offset = -offset; if (!(cpu->CurInstr & (1<<23))) offset = -offset; \
u16 ilmask = 0;
#define A_HD_CALC_OFFSET_REG \ #define A_HD_CALC_OFFSET_REG \
u32 offset = cpu->R[cpu->CurInstr & 0xF]; \ u32 offset = cpu->R[cpu->CurInstr & 0xF]; \
if (!(cpu->CurInstr & (1<<23))) offset = -offset; if (!(cpu->CurInstr & (1<<23))) offset = -offset; \
u16 ilmask = 1 << (cpu->CurInstr & 0xF);
#define A_STRH \ #define A_STRH \
if (cpu->CurInstr & (1<<21)) StoreSingle<16, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ if (cpu->CurInstr & (1<<21)) StoreSingle<16, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else StoreSingle<16, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); else StoreSingle<16, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_STRH_POST \ #define A_STRH_POST \
StoreSingle<16, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); StoreSingle<16, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
// TODO: CHECK LDRD/STRD TIMINGS!! // TODO: CHECK LDRD/STRD TIMINGS!!
@ -280,15 +310,19 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \ u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \ if (r&1) { A_UNK(cpu); return; } \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
bool dabort = !cpu->DataRead32(offset, &cpu->R[r]); \ bool dabort = !cpu->DataRead32(offset, &cpu->R[r]); \
u32 val; dabort |= !cpu->DataRead32S(offset+4, &val); \ u32 val; dabort |= !cpu->DataRead32S(offset+4, &val); \
cpu->AddCycles_CDI(); \
if (dabort) { \ if (dabort) { \
cpu->AddCycles_CDI(); \
((ARMv5*)cpu)->DataAbort(); \ ((ARMv5*)cpu)->DataAbort(); \
return; } \ return; } \
if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \
else cpu->R[r+1] = val; \ else { \
cpu->AddCycles_CDI(); \ cpu->R[r+1] = val; \
if (cpu->Num == 0) { \
((ARMv5*)cpu)->ILCurrReg = r+1; \
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } } \
if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset; if (cpu->CurInstr & (1<<21)) cpu->R[(cpu->CurInstr>>16) & 0xF] = offset;
#define A_LDRD_POST \ #define A_LDRD_POST \
@ -296,15 +330,19 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \ u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \ if (r&1) { A_UNK(cpu); return; } \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
bool dabort = !cpu->DataRead32(addr, &cpu->R[r]); \ bool dabort = !cpu->DataRead32(addr, &cpu->R[r]); \
u32 val; dabort |= !cpu->DataRead32S(addr+4, &val); \ u32 val; dabort |= !cpu->DataRead32S(addr+4, &val); \
cpu->AddCycles_CDI(); \
if (dabort) { \ if (dabort) { \
cpu->AddCycles_CDI(); \
((ARMv5*)cpu)->DataAbort(); \ ((ARMv5*)cpu)->DataAbort(); \
return; } \ return; } \
if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \ if (r == 14) cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); /* restores cpsr presumably due to shared dna with ldm */ \
else cpu->R[r+1] = val; \ else { \
cpu->AddCycles_CDI(); \ cpu->R[r+1] = val; \
if (cpu->Num == 0) { \
((ARMv5*)cpu)->ILCurrReg = r+1; \
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles; } } \
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
#define A_STRD \ #define A_STRD \
@ -312,6 +350,8 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \ u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \ if (r&1) { A_UNK(cpu); return; } \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
((ARMv5*)cpu)->HandleInterlocksMemory(r); \
bool dabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \ bool dabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \
u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \
dabort |= !cpu->DataWrite32S (offset+4, storeval); /* no, i dont understand it either */ \ dabort |= !cpu->DataWrite32S (offset+4, storeval); /* no, i dont understand it either */ \
@ -326,6 +366,8 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \ u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \ if (r&1) { A_UNK(cpu); return; } \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
((ARMv5*)cpu)->HandleInterlocksMemory(r); \
bool dabort = !cpu->DataWrite32(addr, cpu->R[r]); \ bool dabort = !cpu->DataWrite32(addr, cpu->R[r]); \
u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \ u32 storeval = cpu->R[r+1]; if (r == 14) storeval+=4; \
dabort |= !cpu->DataWrite32S (addr+4, storeval); \ dabort |= !cpu->DataWrite32S (addr+4, storeval); \
@ -336,25 +378,25 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
cpu->R[(cpu->CurInstr>>16) & 0xF] += offset; cpu->R[(cpu->CurInstr>>16) & 0xF] += offset;
#define A_LDRH \ #define A_LDRH \
if (cpu->CurInstr & (1<<21)) LoadSingle<false, 16, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ if (cpu->CurInstr & (1<<21)) LoadSingle<false, 16, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else LoadSingle<false, 16, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); else LoadSingle<false, 16, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDRH_POST \ #define A_LDRH_POST \
LoadSingle<false, 16, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); LoadSingle<false, 16, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDRSB \ #define A_LDRSB \
if (cpu->CurInstr & (1<<21)) LoadSingle<true, 8, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ if (cpu->CurInstr & (1<<21)) LoadSingle<true, 8, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else LoadSingle<true, 8, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); else LoadSingle<true, 8, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDRSB_POST \ #define A_LDRSB_POST \
LoadSingle<true, 8, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); LoadSingle<true, 8, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDRSH \ #define A_LDRSH \
if (cpu->CurInstr & (1<<21)) LoadSingle<true, 16, Writeback::Pre>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); \ if (cpu->CurInstr & (1<<21)) LoadSingle<true, 16, Writeback::Pre, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask); \
else LoadSingle<true, 16, Writeback::None>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); else LoadSingle<true, 16, Writeback::None, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_LDRSH_POST \ #define A_LDRSH_POST \
LoadSingle<true, 16, Writeback::Post>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset); LoadSingle<true, 16, Writeback::Post, true>(cpu, ((cpu->CurInstr>>12) & 0xF), ((cpu->CurInstr>>16) & 0xF), offset, ilmask);
#define A_IMPLEMENT_HD_LDRSTR(x) \ #define A_IMPLEMENT_HD_LDRSTR(x) \
@ -394,6 +436,7 @@ A_IMPLEMENT_HD_LDRSTR(LDRSH)
template<bool byte> template<bool byte>
inline void SWP(ARM* cpu) inline void SWP(ARM* cpu)
{ {
ExecuteStage<false>(cpu, ((cpu->CurInstr >> 16) & 0xF));
u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF];
u32 rm = cpu->R[cpu->CurInstr & 0xF]; u32 rm = cpu->R[cpu->CurInstr & 0xF];
if ((cpu->CurInstr & 0xF) == 15) rm += 4; if ((cpu->CurInstr & 0xF) == 15) rm += 4;
@ -402,7 +445,7 @@ inline void SWP(ARM* cpu)
if ((byte ? cpu->DataRead8 (base, &val) if ((byte ? cpu->DataRead8 (base, &val)
: cpu->DataRead32(base, &val))) [[likely]] : cpu->DataRead32(base, &val))) [[likely]]
{ {
u32 numD = cpu->DataCycles; cpu->NDS.ARM9Timestamp += cpu->DataCycles; // checkme
if ((byte ? cpu->DataWrite8 (base, rm) if ((byte ? cpu->DataWrite8 (base, rm)
: cpu->DataWrite32(base, rm))) [[likely]] : cpu->DataWrite32(base, rm))) [[likely]]
@ -411,17 +454,27 @@ inline void SWP(ARM* cpu)
u32 rd = (cpu->CurInstr >> 12) & 0xF; u32 rd = (cpu->CurInstr >> 12) & 0xF;
if constexpr (!byte) val = ROR(val, 8*(base&0x3)); if constexpr (!byte) val = ROR(val, 8*(base&0x3));
cpu->AddCycles_CDI();
if (rd != 15) cpu->R[rd] = val; if (rd != 15)
{
cpu->R[rd] = val;
if (cpu->Num == 0)
{
((ARMv5*)cpu)->ILCurrReg = rd;
bool extra = (byte || (base&0x3));
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles + extra;
}
}
else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't seem to work on the arm 9? else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't seem to work on the arm 9?
return;
} }
else ((ARMv5*)cpu)->DataAbort();
cpu->DataCycles += numD;
} }
else ((ARMv5*)cpu)->DataAbort();
// data abort handling
cpu->AddCycles_CDI(); cpu->AddCycles_CDI();
((ARMv5*)cpu)->DataAbort();
} }
void A_SWP(ARM* cpu) void A_SWP(ARM* cpu)
@ -504,6 +557,8 @@ void A_LDM(ARM* cpu)
return; return;
} }
ExecuteStage<false>(cpu, baseid);
if (!(cpu->CurInstr & (1<<23))) // decrement if (!(cpu->CurInstr & (1<<23))) // decrement
{ {
// decrement is actually an increment starting from the end address // decrement is actually an increment starting from the end address
@ -559,11 +614,12 @@ void A_LDM(ARM* cpu)
// switch back to previous regs // switch back to previous regs
if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15)))
cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true);
cpu->AddCycles_CDI();
// handle data aborts // handle data aborts
if (dabort) [[unlikely]] if (dabort) [[unlikely]]
{ {
cpu->AddCycles_CDI();
((ARMv5*)cpu)->DataAbort(); ((ARMv5*)cpu)->DataAbort();
return; return;
} }
@ -587,12 +643,16 @@ void A_LDM(ARM* cpu)
else else
cpu->R[baseid] = wbbase; cpu->R[baseid] = wbbase;
} }
// jump if pc got written // jump if pc got written
if (cpu->CurInstr & (1<<15)) if (cpu->CurInstr & (1<<15))
cpu->JumpTo(pc, cpu->CurInstr & (1<<22)); cpu->JumpTo(pc, cpu->CurInstr & (1<<22));
else if (cpu->Num == 0)
cpu->AddCycles_CDI(); {
u8 lastreg = 31 - __builtin_clz(cpu->CurInstr & 0x7FFF);
((ARMv5*)cpu)->ILCurrReg = lastreg;
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
} }
void A_STM(ARM* cpu) void A_STM(ARM* cpu)
@ -613,6 +673,8 @@ void A_STM(ARM* cpu)
(0 << 4))); // thumb (0 << 4))); // thumb
return; return;
} }
ExecuteStage<false>(cpu, baseid);
if (!(cpu->CurInstr & (1<<23))) if (!(cpu->CurInstr & (1<<23)))
{ {
@ -640,6 +702,8 @@ void A_STM(ARM* cpu)
cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10, true); cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10, true);
} }
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksMemory(__builtin_ctz(cpu->CurInstr));
for (u32 i = 0; i < 16; i++) for (u32 i = 0; i < 16; i++)
{ {
if (cpu->CurInstr & (1<<i)) if (cpu->CurInstr & (1<<i))
@ -668,22 +732,20 @@ void A_STM(ARM* cpu)
if (cpu->CurInstr & (1<<22)) if (cpu->CurInstr & (1<<22))
cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true);
cpu->AddCycles_CD();
// handle data aborts // handle data aborts
if (dabort) [[unlikely]] if (dabort) [[unlikely]]
{ {
// restore original value of base // restore original value of base
cpu->R[baseid] = oldbase; cpu->R[baseid] = oldbase;
cpu->AddCycles_CD();
((ARMv5*)cpu)->DataAbort(); ((ARMv5*)cpu)->DataAbort();
return; return;
} }
if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21))) if ((cpu->CurInstr & (1<<23)) && (cpu->CurInstr & (1<<21)))
cpu->R[baseid] = base; cpu->R[baseid] = base;
cpu->AddCycles_CD();
} }
@ -695,104 +757,108 @@ void A_STM(ARM* cpu)
void T_LDR_PCREL(ARM* cpu) void T_LDR_PCREL(ARM* cpu)
{ {
ExecuteStage<false>(cpu, 15);
u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2);
bool dabort = !cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); bool dabort = !cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]);
cpu->AddCycles_CDI(); cpu->AddCycles_CDI();
if (dabort) [[unlikely]] if (dabort) [[unlikely]] ((ARMv5*)cpu)->DataAbort();
else if (cpu->Num == 0)
{ {
((ARMv5*)cpu)->DataAbort(); ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 8) & 0x7;
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
} }
} }
void T_STR_REG(ARM* cpu) void T_STR_REG(ARM* cpu)
{ {
StoreSingle<32, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); StoreSingle<32, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
} }
void T_STRB_REG(ARM* cpu) void T_STRB_REG(ARM* cpu)
{ {
StoreSingle<8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); StoreSingle<8, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
} }
void T_LDR_REG(ARM* cpu) void T_LDR_REG(ARM* cpu)
{ {
LoadSingle<true, 32, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); LoadSingle<true, 32, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
} }
void T_LDRB_REG(ARM* cpu) void T_LDRB_REG(ARM* cpu)
{ {
LoadSingle<false, 8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); LoadSingle<false, 8, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
} }
void T_STRH_REG(ARM* cpu) void T_STRH_REG(ARM* cpu)
{ {
StoreSingle<16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); StoreSingle<16, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
} }
void T_LDRSB_REG(ARM* cpu) void T_LDRSB_REG(ARM* cpu)
{ {
LoadSingle<true, 8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); LoadSingle<true, 8, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
} }
void T_LDRH_REG(ARM* cpu) void T_LDRH_REG(ARM* cpu)
{ {
LoadSingle<false, 16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); LoadSingle<false, 16, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
} }
void T_LDRSH_REG(ARM* cpu) void T_LDRSH_REG(ARM* cpu)
{ {
LoadSingle<true, 16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7]); LoadSingle<true, 16, Writeback::None, true>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), cpu->R[(cpu->CurInstr >> 6) & 0x7], (1 << ((cpu->CurInstr >> 6) & 0x7)));
} }
void T_STR_IMM(ARM* cpu) void T_STR_IMM(ARM* cpu)
{ {
StoreSingle<32, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C)); StoreSingle<32, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C), 0);
} }
void T_LDR_IMM(ARM* cpu) void T_LDR_IMM(ARM* cpu)
{ {
LoadSingle<true, 32, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C)); LoadSingle<true, 32, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 4) & 0x7C), 0);
} }
void T_STRB_IMM(ARM* cpu) void T_STRB_IMM(ARM* cpu)
{ {
StoreSingle<8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F)); StoreSingle<8, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F), 0);
} }
void T_LDRB_IMM(ARM* cpu) void T_LDRB_IMM(ARM* cpu)
{ {
LoadSingle<false, 8, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F)); LoadSingle<false, 8, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 6) & 0x1F), 0);
} }
void T_STRH_IMM(ARM* cpu) void T_STRH_IMM(ARM* cpu)
{ {
StoreSingle<16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E)); StoreSingle<16, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E), 0);
} }
void T_LDRH_IMM(ARM* cpu) void T_LDRH_IMM(ARM* cpu)
{ {
LoadSingle<false, 16, Writeback::None>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E)); LoadSingle<false, 16, Writeback::None, false>(cpu, (cpu->CurInstr & 0x7), ((cpu->CurInstr >> 3) & 0x7), ((cpu->CurInstr >> 5) & 0x3E), 0);
} }
void T_STR_SPREL(ARM* cpu) void T_STR_SPREL(ARM* cpu)
{ {
StoreSingle<32, Writeback::None>(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC)); StoreSingle<32, Writeback::None, false>(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC), 0);
} }
void T_LDR_SPREL(ARM* cpu) void T_LDR_SPREL(ARM* cpu)
{ {
LoadSingle<false, 32, Writeback::None>(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC)); LoadSingle<false, 32, Writeback::None, false>(cpu, ((cpu->CurInstr >> 8) & 0x7), 13, ((cpu->CurInstr << 2) & 0x3FC), 0);
} }
void T_PUSH(ARM* cpu) void T_PUSH(ARM* cpu)
{ {
ExecuteStage<false>(cpu, 13);
int nregs = 0; int nregs = 0;
bool first = true; bool first = true;
bool dabort = false; bool dabort = false;
@ -816,6 +882,13 @@ void T_PUSH(ARM* cpu)
base -= (nregs<<2); base -= (nregs<<2);
u32 wbbase = base; u32 wbbase = base;
if (cpu->Num == 0)
{
u8 firstreg = __builtin_ctz(cpu->CurInstr);
if (firstreg == 8) firstreg = 14;
((ARMv5*)cpu)->HandleInterlocksMemory(firstreg);
}
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++)
{ {
if (cpu->CurInstr & (1<<i)) if (cpu->CurInstr & (1<<i))
@ -834,24 +907,24 @@ void T_PUSH(ARM* cpu)
: cpu->DataWrite32S(base, cpu->R[14])); : cpu->DataWrite32S(base, cpu->R[14]));
} }
cpu->AddCycles_CD();
if (dabort) [[unlikely]] if (dabort) [[unlikely]]
{ {
cpu->AddCycles_CD();
((ARMv5*)cpu)->DataAbort(); ((ARMv5*)cpu)->DataAbort();
return; return;
} }
cpu->R[13] = wbbase; cpu->R[13] = wbbase;
cpu->AddCycles_CD();
} }
void T_POP(ARM* cpu) void T_POP(ARM* cpu)
{ {
ExecuteStage<false>(cpu, 13);
u32 base = cpu->R[13]; u32 base = cpu->R[13];
bool first = true; bool first = true;
bool dabort = false; bool dabort = false;
if (!(cpu->CurInstr & 0x1FF)) [[unlikely]] if (!(cpu->CurInstr & 0x1FF)) [[unlikely]]
{ {
EmptyRListLDMSTM(cpu, 13, 0b00011); EmptyRListLDMSTM(cpu, 13, 0b00011);
@ -880,10 +953,23 @@ void T_POP(ARM* cpu)
: cpu->DataRead32S(base, &pc)); : cpu->DataRead32S(base, &pc));
if (dabort) [[unlikely]] goto dataabort; if (dabort) [[unlikely]] goto dataabort;
cpu->AddCycles_CDI();
if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) pc |= 0x1; if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) pc |= 0x1;
cpu->JumpTo(pc); cpu->JumpTo(pc);
base += 4; base += 4;
} }
else
{
cpu->AddCycles_CDI();
if (cpu->Num == 0)
{
u8 lastreg = 31 - __builtin_clz(cpu->CurInstr & 0xFF);
((ARMv5*)cpu)->ILCurrReg = lastreg;
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
}
if (dabort) [[unlikely]] if (dabort) [[unlikely]]
{ {
@ -894,22 +980,27 @@ void T_POP(ARM* cpu)
} }
cpu->R[13] = base; cpu->R[13] = base;
cpu->AddCycles_CDI();
} }
void T_STMIA(ARM* cpu) void T_STMIA(ARM* cpu)
{ {
ExecuteStage<false>(cpu, ((cpu->CurInstr >> 8) & 0x7));
u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7];
bool first = true; bool first = true;
bool dabort = false; bool dabort = false;
if (!(cpu->CurInstr & 0xFF)) [[unlikely]] if (!(cpu->CurInstr & 0xFF)) [[unlikely]]
{ {
EmptyRListLDMSTM(cpu, (cpu->CurInstr >> 8) & 0x7, 0b10010); EmptyRListLDMSTM(cpu, (cpu->CurInstr >> 8) & 0x7, 0b10010);
return; return;
} }
if (cpu->Num == 0)
{
u8 firstreg = __builtin_ctz(cpu->CurInstr);
((ARMv5*)cpu)->HandleInterlocksMemory(firstreg);
}
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++)
{ {
if (cpu->CurInstr & (1<<i)) if (cpu->CurInstr & (1<<i))
@ -922,24 +1013,25 @@ void T_STMIA(ARM* cpu)
} }
} }
cpu->AddCycles_CD();
if (dabort) [[unlikely]] if (dabort) [[unlikely]]
{ {
cpu->AddCycles_CD();
((ARMv5*)cpu)->DataAbort(); ((ARMv5*)cpu)->DataAbort();
return; return;
} }
// TODO: check "Rb included in Rlist" case // TODO: check "Rb included in Rlist" case
cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; cpu->R[(cpu->CurInstr >> 8) & 0x7] = base;
cpu->AddCycles_CD();
} }
void T_LDMIA(ARM* cpu) void T_LDMIA(ARM* cpu)
{ {
ExecuteStage<false>(cpu, ((cpu->CurInstr >> 8) & 0x7));
u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7]; u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7];
bool first = true; bool first = true;
bool dabort = false; bool dabort = false;
if (!(cpu->CurInstr & 0xFF)) [[unlikely]] if (!(cpu->CurInstr & 0xFF)) [[unlikely]]
{ {
EmptyRListLDMSTM(cpu, (cpu->CurInstr >> 8) & 0x7, 0b00011); EmptyRListLDMSTM(cpu, (cpu->CurInstr >> 8) & 0x7, 0b00011);
@ -960,17 +1052,23 @@ void T_LDMIA(ARM* cpu)
} }
} }
cpu->AddCycles_CDI();
if (dabort) [[unlikely]] if (dabort) [[unlikely]]
{ {
cpu->AddCycles_CDI();
((ARMv5*)cpu)->DataAbort(); ((ARMv5*)cpu)->DataAbort();
return; return;
} }
if (cpu->Num == 0)
{
u8 lastreg = 31 - __builtin_clz(cpu->CurInstr & 0xFF);
((ARMv5*)cpu)->ILCurrReg = lastreg;
((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + ((ARMv5*)cpu)->DataCycles;
}
if (!(cpu->CurInstr & (1<<((cpu->CurInstr >> 8) & 0x7)))) if (!(cpu->CurInstr & (1<<((cpu->CurInstr >> 8) & 0x7))))
cpu->R[(cpu->CurInstr >> 8) & 0x7] = base; cpu->R[(cpu->CurInstr >> 8) & 0x7] = base;
cpu->AddCycles_CDI();
} }