clunky fix to get the order of interlocks and irqs right

This commit is contained in:
Jaklyy 2024-12-26 16:32:12 -05:00
parent 3bd25ae58f
commit 21323bd1b7
7 changed files with 330 additions and 250 deletions

View File

@ -159,6 +159,7 @@ void ARM::Reset()
Cycles = 0;
Halted = 0;
DataCycles = 0;
CheckInterlock = false;
IRQ = 0;
IRQTimestamp = -1;
@ -701,11 +702,43 @@ void ARMv5::StartExecTHUMB()
R[15] += 2;
CurInstr = NextInstr[0];
NextInstr[0] = NextInstr[1];
// code fetch is done during the execute stage cycle handling
if (R[15] & 0x2) NullFetch = true;
else NullFetch = false;
PC = R[15];
CheckInterlock = true;
// check for interlocks
if (CurInstr > 0xFFFFFFFF) [[unlikely]] // handle aborted instructions
{
// abt
}
else [[likely]] // actually execute
{
u32 icode = (CurInstr >> 6) & 0x3FF;
ARMInterpreter::THUMBInstrTable[icode](this);
}
if (R[15] & 0x2)
{
// the value we need is cached by the bus
// in practice we can treat this as a 1 cycle fetch, with no penalties
RetVal = NextInstr[1] >> 16;
NDS.ARM9Timestamp++;
if (NDS.ARM9Timestamp < TimestampMemory) NDS.ARM9Timestamp = TimestampMemory;
Store = false;
DataRegion = Mem9_Null;
QueueFunction(&ARMv5::ContExecTHUMB);
}
else
{
DelayedQueue = &ARMv5::ContExecTHUMB;
CodeRead32(R[15]);
}
}
void ARMv5::ContExecTHUMB()
{
NextInstr[1] = RetVal;
CheckInterlock = false;
if ((NDS.ARM9Timestamp >= IRQTimestamp) && !(CPSR & 0x80)) TriggerIRQ<CPUExecuteMode::Interpreter>();
else if (CurInstr > 0xFFFFFFFF) [[unlikely]] // handle aborted instructions
{
@ -725,10 +758,36 @@ void ARMv5::StartExecARM()
R[15] += 4;
CurInstr = NextInstr[0];
NextInstr[0] = NextInstr[1];
// code fetch is done during the execute stage cycle handling
NullFetch = false;
PC = R[15];
CheckInterlock = true;
// check for interlocks
if (CurInstr & ((u64)1<<63)) [[unlikely]] // handle aborted instructions
{
// abt
}
else if (CheckCondition(CurInstr >> 28)) [[likely]] // actually execute
{
u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0);
ARMInterpreter::ARMInstrTable[icode](this);
}
else if ((CurInstr & 0xFE000000) == 0xFA000000)
{
ARMInterpreter::A_BLX_IMM(this);
}
else if ((CurInstr & 0x0FF000F0) == 0x01200070)
{
ARMInterpreter::A_BKPT(this); // always passes regardless of condition code
}
DelayedQueue = &ARMv5::ContExecARM;
CodeRead32(R[15]);
}
void ARMv5::ContExecARM()
{
NextInstr[1] = RetVal;
CheckInterlock = false;
if ((NDS.ARM9Timestamp >= IRQTimestamp) && !(CPSR & 0x80)) TriggerIRQ<CPUExecuteMode::Interpreter>();
else if (CurInstr & ((u64)1<<63)) [[unlikely]] // handle aborted instructions
{
@ -749,6 +808,7 @@ void ARMv5::StartExecARM()
}
else
AddCycles_C();
QueueFunction(&ARMv5::WBCheck_2);
}
@ -855,6 +915,7 @@ void ARMv5::Execute()
else
{
// we got a new addition to the list; redo the current entry and exit to resolve main ram
if (FuncQueueEnd < FuncQueueFill) FuncQueueEnd = FuncQueueFill;
FuncQueueFill = FuncQueueProg;
return;
}
@ -1357,28 +1418,10 @@ u32 ARMv5::ReadMem(u32 addr, int size)
void ARMv5::CodeFetch()
{
if (NullFetch)
{
// the value we need is cached by the bus
// in practice we can treat this as a 1 cycle fetch, with no penalties
RetVal = NextInstr[1] >> 16;
NDS.ARM9Timestamp++;
if (NDS.ARM9Timestamp < TimestampMemory) NDS.ARM9Timestamp = TimestampMemory;
Store = false;
DataRegion = Mem9_Null;
QueueFunction(&ARMv5::AddExecute);
}
else
{
DelayedQueue = &ARMv5::AddExecute;
CodeRead32(PC);
}
}
void ARMv5::AddExecute()
{
NextInstr[1] = RetVal;
NDS.ARM9Timestamp += ExecuteCycles;
}

View File

@ -262,6 +262,7 @@ public:
u8 FuncQueueProg;
u8 ExecuteCycles;
bool FuncQueueActive;
bool CheckInterlock;
#ifdef JIT_ENABLED
u32 FastBlockLookupStart, FastBlockLookupSize;
@ -345,14 +346,14 @@ public:
void AddCycles_C() override
{
ExecuteCycles = 0;
CodeFetch();
//ExecuteCycles = 0;
//CodeFetch();
}
void AddCycles_CI(s32 numX) override
{
ExecuteCycles = numX;
CodeFetch();
QueueFunction(&ARMv5::AddExecute);
}
void AddCycles_MW(s32 numM)
@ -730,7 +731,9 @@ public:
// Queue Functions
void StartExecARM();
void ContExecARM();
void StartExecTHUMB();
void ContExecTHUMB();
void AddExecute();
void AddCycles_MW_2();
void DelayIfITCM_2();

View File

@ -36,6 +36,7 @@ namespace melonDS::ARMInterpreter
void A_UNK(ARM* cpu)
{
if (cpu->CheckInterlock) return;
cpu->AddCycles_C();
Log(LogLevel::Warn, "undefined ARM%d instruction %08X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-8);
#ifdef GDBSTUB_ENABLED
@ -56,6 +57,7 @@ void A_UNK(ARM* cpu)
void T_UNK(ARM* cpu)
{
if (cpu->CheckInterlock) return;
cpu->AddCycles_C();
Log(LogLevel::Warn, "undefined THUMB%d instruction %04X @ %08X\n", cpu->Num?7:9, cpu->CurInstr, cpu->R[15]-4);
#ifdef GDBSTUB_ENABLED
@ -75,6 +77,7 @@ void T_UNK(ARM* cpu)
void A_BKPT(ARM* cpu)
{
if (cpu->CheckInterlock) return;
if (cpu->Num == 1) return A_UNK(cpu); // checkme
Log(LogLevel::Warn, "BKPT: "); // combine with the prefetch abort warning message
@ -85,6 +88,7 @@ void A_BKPT(ARM* cpu)
void A_MSR_IMM(ARM* cpu)
{
if (cpu->CheckInterlock) return;
if ((cpu->Num != 1) && (cpu->CurInstr & ((0x7<<16)|(1<<22)))) cpu->AddCycles_CI(2); // arm9 cpsr_sxc & spsr
else cpu->AddCycles_C();
@ -150,7 +154,7 @@ void A_MSR_IMM(ARM* cpu)
void A_MSR_REG(ARM* cpu)
{
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr & 0xF);
if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr & 0xF);
if ((cpu->Num != 1) && (cpu->CurInstr & ((0x7<<16)|(1<<22)))) cpu->AddCycles_CI(2); // arm9 cpsr_sxc & spsr
else cpu->AddCycles_C();
@ -217,6 +221,7 @@ void A_MSR_REG(ARM* cpu)
void A_MRS(ARM* cpu)
{
if (cpu->CheckInterlock) return;
u32 psr;
if (cpu->CurInstr & (1<<22))
{
@ -257,6 +262,12 @@ void A_MRS(ARM* cpu)
void A_MCR(ARM* cpu)
{
if (cpu->CheckInterlock)
{
if (!((cpu->CPSR & 0x1F) == 0x10)) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr>>12)&0xF);
return;
}
if ((cpu->CPSR & 0x1F) == 0x10)
return A_UNK(cpu);
@ -268,8 +279,6 @@ void A_MCR(ARM* cpu)
u32 val = cpu->R[(cpu->CurInstr>>12)&0xF];
if (((cpu->CurInstr>>12) & 0xF) == 15) val += 4;
if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr>>12)&0xF);
if (cpu->Num==0 && cp==15)
{
((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo|(op<<12), val); // TODO: IF THIS RAISES AN EXCEPTION WE DO A DOUBLE CODE FETCH; FIX THAT
@ -291,6 +300,12 @@ void A_MCR(ARM* cpu)
void A_MRC(ARM* cpu)
{
if (cpu->CheckInterlock)
{
if (!((cpu->CPSR & 0x1F) == 0x10)) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr>>12)&0xF);
return;
}
if ((cpu->CPSR & 0x1F) == 0x10)
return A_UNK(cpu);
@ -334,6 +349,7 @@ void A_MRC(ARM* cpu)
void A_SVC(ARM* cpu) // A_SWI
{
if (cpu->CheckInterlock) return;
cpu->AddCycles_C();
u32 oldcpsr = cpu->CPSR;
cpu->CPSR &= ~0xBF;
@ -348,6 +364,7 @@ void A_SVC(ARM* cpu) // A_SWI
void T_SVC(ARM* cpu) // T_SWI
{
if (cpu->CheckInterlock) return;
cpu->AddCycles_C();
u32 oldcpsr = cpu->CPSR;
cpu->CPSR &= ~0xBF;

File diff suppressed because it is too large Load Diff

View File

@ -27,6 +27,7 @@ using Platform::LogLevel;
void A_B(ARM* cpu)
{
if (cpu->CheckInterlock) return;
cpu->AddCycles_C();
s32 offset = (s32)(cpu->CurInstr << 8) >> 6;
cpu->JumpTo(cpu->R[15] + offset);
@ -34,6 +35,7 @@ void A_B(ARM* cpu)
void A_BL(ARM* cpu)
{
if (cpu->CheckInterlock) return;
cpu->AddCycles_C();
s32 offset = (s32)(cpu->CurInstr << 8) >> 6;
cpu->R[14] = cpu->R[15] - 4;
@ -42,6 +44,7 @@ void A_BL(ARM* cpu)
void A_BLX_IMM(ARM* cpu)
{
if (cpu->CheckInterlock) return;
cpu->AddCycles_C();
s32 offset = (s32)(cpu->CurInstr << 8) >> 6;
if (cpu->CurInstr & 0x01000000) offset += 2;
@ -51,14 +54,14 @@ void A_BLX_IMM(ARM* cpu)
void A_BX(ARM* cpu)
{
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr&0xF);
if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr&0xF);
cpu->AddCycles_C();
cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]);
}
void A_BLX_REG(ARM* cpu)
{
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr&0xF);
if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute<false>(cpu->CurInstr&0xF);
cpu->AddCycles_C();
u32 lr = cpu->R[15] - 4;
cpu->JumpTo(cpu->R[cpu->CurInstr & 0xF]);
@ -69,6 +72,7 @@ void A_BLX_REG(ARM* cpu)
void T_BCOND(ARM* cpu)
{
if (cpu->CheckInterlock) return;
cpu->AddCycles_C();
if (cpu->CheckCondition((cpu->CurInstr >> 8) & 0xF))
{
@ -79,14 +83,14 @@ void T_BCOND(ARM* cpu)
void T_BX(ARM* cpu)
{
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0xF);
if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0xF);
cpu->AddCycles_C();
cpu->JumpTo(cpu->R[(cpu->CurInstr >> 3) & 0xF]);
}
void T_BLX_REG(ARM* cpu)
{
if (cpu->Num==0) ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0xF);
if (cpu->CheckInterlock) return ((ARMv5*)cpu)->HandleInterlocksExecute<false>((cpu->CurInstr >> 3) & 0xF);
cpu->AddCycles_C();
if (cpu->Num==1)
{
@ -101,6 +105,7 @@ void T_BLX_REG(ARM* cpu)
void T_B(ARM* cpu)
{
if (cpu->CheckInterlock) return;
cpu->AddCycles_C();
s32 offset = (s32)((cpu->CurInstr & 0x7FF) << 21) >> 20;
cpu->JumpTo(cpu->R[15] + offset + 1);
@ -108,6 +113,7 @@ void T_B(ARM* cpu)
void T_BL_LONG_1(ARM* cpu)
{
if (cpu->CheckInterlock) return;
s32 offset = (s32)((cpu->CurInstr & 0x7FF) << 21) >> 9;
cpu->R[14] = cpu->R[15] + offset;
cpu->AddCycles_C();
@ -118,6 +124,8 @@ void T_BL_LONG_2(ARM* cpu)
if ((cpu->CurInstr & 0x1801) == 0x0801) // "BLX" with bit 0 set is an undefined instruction.
return T_UNK(cpu); // TODO: Check ARM7 for exceptions
if (cpu->CheckInterlock) return;
cpu->AddCycles_C();
s32 offset = (cpu->CurInstr & 0x7FF) << 1;
u32 pc = cpu->R[14] + offset;

View File

@ -25,13 +25,14 @@ namespace melonDS::ARMInterpreter
{
template <bool bitfield>
inline void ExecuteStage(ARM* cpu, u16 ilmask)
inline bool ExecuteStage(ARM* cpu, u16 ilmask)
{
if (cpu->Num == 0)
{
((ARMv5*)cpu)->HandleInterlocksExecute<bitfield>(ilmask);
if (cpu->CheckInterlock) { ((ARMv5*)cpu)->HandleInterlocksExecute<bitfield>(ilmask); return false;}
((ARMv5*)cpu)->AddCycles_C();
}
return true;
}
@ -85,7 +86,7 @@ void LoadSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16
cpu->LDRFailedRegs = 0;
static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!");
ExecuteStage<multireg>(cpu, (ilmask | (1<<rn)));
if (!ExecuteStage<multireg>(cpu, (ilmask | (1<<rn)))) return;
u32 addr;
if constexpr (writeback < Writeback::Post) addr = offset + cpu->R[rn];
@ -182,7 +183,7 @@ void StoreSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16
{
static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!");
ExecuteStage<multireg>(cpu, (ilmask | (1<<rn)));
if (!ExecuteStage<multireg>(cpu, (ilmask | (1<<rn)))) return;
u32 addr;
if constexpr (writeback < Writeback::Post) addr = offset + cpu->R[rn];
@ -363,7 +364,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \
cpu->LDRFailedRegs = 0; \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
if (!ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF)))) return; \
bool dabort = !cpu->DataRead32(offset, r); \
u32 oldval = cpu->R[r+1]; dabort |= !cpu->DataRead32S(offset+4, r+1); \
((ARMv5*)cpu)->DelayIfITCM(2); \
@ -385,7 +386,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \
cpu->LDRFailedRegs = 0; \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
if (!ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF)))) return; \
bool dabort = !cpu->DataRead32(addr, r); \
u32 oldval = cpu->R[r+1]; dabort |= !cpu->DataRead32S(addr+4, r+1); \
((ARMv5*)cpu)->DelayIfITCM(2); \
@ -406,7 +407,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
if (!ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF)))) return; \
((ARMv5*)cpu)->HandleInterlocksMemory(r); \
bool dabort = !cpu->DataWrite32(offset, cpu->R[r], r); \
u32 storeval = cpu->R[r+1]; if (r+1 == 15) storeval+=4; \
@ -423,7 +424,7 @@ A_IMPLEMENT_WB_LDRSTR(LDRB)
u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \
u32 r = (cpu->CurInstr>>12) & 0xF; \
if (r&1) { A_UNK(cpu); return; } \
ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \
if (!ExecuteStage<true>(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF)))) return; \
((ARMv5*)cpu)->HandleInterlocksMemory(r); \
bool dabort = !cpu->DataWrite32(addr, cpu->R[r], r); \
u32 storeval = cpu->R[r+1]; if (r+1 == 15) storeval+=4; \
@ -494,7 +495,7 @@ A_IMPLEMENT_HD_LDRSTR(LDRSH)
template<bool byte>
inline void SWP(ARM* cpu)
{
ExecuteStage<false>(cpu, ((cpu->CurInstr >> 16) & 0xF));
if (!ExecuteStage<false>(cpu, ((cpu->CurInstr >> 16) & 0xF))) return;
cpu->LDRFailedRegs = 0;
u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF];
u32 rd = (cpu->CurInstr >> 12) & 0xF;
@ -603,8 +604,10 @@ void EmptyRListLDMSTM(ARM* cpu, const u8 baseid, const u8 flags)
void A_LDM(ARM* cpu)
{
cpu->LDRFailedRegs = 0;
u32 baseid = (cpu->CurInstr >> 16) & 0xF;
if (!ExecuteStage<false>(cpu, baseid)) return;
cpu->LDRFailedRegs = 0;
u32 base = cpu->R[baseid];
u32 wbbase;
u32 oldbase = base;
@ -622,8 +625,6 @@ void A_LDM(ARM* cpu)
return;
}
ExecuteStage<false>(cpu, baseid);
if (!(cpu->CurInstr & (1<<23))) // decrement
{
// decrement is actually an increment starting from the end address
@ -750,6 +751,8 @@ void A_LDM(ARM* cpu)
void A_STM(ARM* cpu)
{
u32 baseid = (cpu->CurInstr >> 16) & 0xF;
if (!ExecuteStage<false>(cpu, baseid)) return;
u32 base = cpu->R[baseid];
u32 oldbase = base;
u32 preinc = (cpu->CurInstr & (1<<24));
@ -765,8 +768,6 @@ void A_STM(ARM* cpu)
(0 << 4))); // thumb
return;
}
ExecuteStage<false>(cpu, baseid);
if (!(cpu->CurInstr & (1<<23)))
{
@ -860,7 +861,8 @@ void A_STM(ARM* cpu)
void T_LDR_PCREL(ARM* cpu)
{
ExecuteStage<false>(cpu, 15);
if (!ExecuteStage<false>(cpu, 15)) return;
cpu->LDRFailedRegs = 0;
u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2);
bool dabort = !cpu->DataRead32(addr, (cpu->CurInstr >> 8) & 0x7);
@ -961,7 +963,8 @@ void T_LDR_SPREL(ARM* cpu)
void T_PUSH(ARM* cpu)
{
ExecuteStage<false>(cpu, 13);
if (!ExecuteStage<false>(cpu, 13)) return;
int nregs = 0;
bool first = true;
bool dabort = false;
@ -1033,7 +1036,8 @@ void T_PUSH(ARM* cpu)
void T_POP(ARM* cpu)
{
ExecuteStage<false>(cpu, 13);
if (!ExecuteStage<false>(cpu, 13)) return;
cpu->LDRFailedRegs = 0;
u32 base = cpu->R[13];
bool first = true;
@ -1128,7 +1132,8 @@ void T_POP(ARM* cpu)
void T_STMIA(ARM* cpu)
{
ExecuteStage<false>(cpu, ((cpu->CurInstr >> 8) & 0x7));
if (!ExecuteStage<false>(cpu, ((cpu->CurInstr >> 8) & 0x7))) return;
u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7];
bool first = true;
bool dabort = false;
@ -1181,7 +1186,8 @@ void T_STMIA(ARM* cpu)
void T_LDMIA(ARM* cpu)
{
ExecuteStage<false>(cpu, ((cpu->CurInstr >> 8) & 0x7));
if (!ExecuteStage<false>(cpu, ((cpu->CurInstr >> 8) & 0x7))) return;
u32 base = cpu->R[(cpu->CurInstr >> 8) & 0x7];
bool first = true;
bool dabort = false;

View File

@ -1810,7 +1810,7 @@ u32 NDS::RunFrame()
}
}
//printf("A9 LOOP: 9 %lli %lli %08X %08llX %i 7 %lli %lli %08X %08llX %i\n", ARM9Timestamp, ARM9Target, ARM9.PC, ARM9.CurInstr, (u8)ARM9.MRTrack.Type, ARM7Timestamp, ARM7Target, ARM7.R[15], ARM7.CurInstr, (u8)ARM7.MRTrack.Type);
//printf("A9 LOOP: 9 %lli %lli %08X %08llX %i 7 %lli %lli %08X %08llX %i\n", ARM9Timestamp, ARM9Target, ARM9.R[15], ARM9.CurInstr, (u8)ARM9.MRTrack.Type, ARM7Timestamp, ARM7Target, ARM7.R[15], ARM7.CurInstr, (u8)ARM7.MRTrack.Type);
RunTimers(0);
GPU.GPU3D.Run();