From 5698cf18620278de245d11ddb568ee6f20db02cd Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Thu, 5 Dec 2024 13:29:32 -0500 Subject: [PATCH] lay ground work for main ram contention TAKE 2 alt title: ITS WORKING! ITS WORKING!!! --- src/ARM.cpp | 358 ++++++++++++----- src/ARM.h | 143 +++++-- src/ARMInterpreter.cpp | 61 +-- src/ARMInterpreter_ALU.cpp | 26 +- src/ARMInterpreter_LoadStore.cpp | 274 +++++++------ src/ARMJIT.cpp | 12 +- src/ARMJIT_A64/ARMJIT_Branch.cpp | 10 +- src/ARMJIT_A64/ARMJIT_LoadStore.cpp | 6 +- src/ARMJIT_x64/ARMJIT_Branch.cpp | 10 +- src/ARMJIT_x64/ARMJIT_LoadStore.cpp | 6 +- src/CP15.cpp | 599 +++++++++++++++++----------- src/CP15_Constants.h | 2 +- src/NDS.cpp | 117 +++++- src/NDS.h | 7 + 14 files changed, 1062 insertions(+), 569 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index 75dde763..0bc138c2 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -194,12 +194,22 @@ void ARM::Reset() MainRAMTimestamp = 0; + memset(&MRTrack, 0, sizeof(MRTrack)); + + FuncQueueFill = 0; + FuncQueueEnd = 0; + FuncQueueProg = 0; + FuncQueueActive = false; + ExecuteCycles = 0; + // zorp JumpTo(ExceptionBase); } void ARMv5::Reset() { + FuncQueue[0] = &ARMv5::StartExec; + PU_Map = PU_PrivMap; Store = false; @@ -208,8 +218,8 @@ void ARMv5::Reset() ILCurrReg = 16; ILPrevReg = 16; - ICacheFillPtr = 7; - DCacheFillPtr = 7; + ICacheStreamPtr = 7; + DCacheStreamPtr = 7; WBWritePointer = 16; WBFillPointer = 0; @@ -313,14 +323,33 @@ void ARM::SetupCodeMem(u32 addr) } } -void ARMv5::JumpTo(u32 addr, bool restorecpsr) +void ARMv5::JumpTo(u32 addr, bool restorecpsr, u8 R15) { - if (restorecpsr) + //printf("JUMP! %08X %i %i\n", addr, restorecpsr, R15); + NDS.MonitorARM9Jump(addr); + + BranchRestore = restorecpsr; + BranchUpdate = R15; + BranchAddr = addr; + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::JumpTo_2; + else JumpTo_2(); +} + +void ARMv5::JumpTo_2() +{ + if (CP15Control & (1<<15)) + { + if (BranchUpdate == 1) BranchAddr = R[15] & ~1; + else if (BranchUpdate == 2) BranchAddr = R[15] | 1; + } + else if (BranchUpdate) BranchAddr = R[15]; + + if (BranchRestore) { RestoreCPSR(); - if (CPSR & 0x20) addr |= 0x1; - else addr &= ~0x1; + if (CPSR & 0x20) BranchAddr |= 0x1; + else BranchAddr &= ~0x1; } // aging cart debug crap @@ -329,47 +358,81 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) // jumps count as nonsequential accesses on the instruction bus on the arm9 // thus it requires waiting for the current ICache line fill to complete before continuing - if (ICacheFillPtr < 7) + if (ICacheStreamPtr < 7) { - u64 fillend = ICacheFillTimes[6] + 1; + u64 fillend = ICacheStreamTimes[6] + 1; if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; - ICacheFillPtr = 7; + ICacheStreamPtr = 7; } - if (addr & 0x1) + if (BranchAddr & 0x1) { - addr &= ~0x1; - R[15] = addr+2; + BranchAddr &= ~0x1; + R[15] = BranchAddr+2; + + CPSR |= 0x20; // two-opcodes-at-once fetch // doesn't matter if we put garbage in the MSbs there - if (addr & 0x2) + if (BranchAddr & 0x2) { - NextInstr[0] = CodeRead32(addr-2, true) >> 16; - NextInstr[1] = CodeRead32(addr+2, false); + CodeRead32(BranchAddr-2); + + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::JumpTo_3A; + else JumpTo_3A(); } else { - NextInstr[0] = CodeRead32(addr, true); - NextInstr[1] = NextInstr[0] >> 16; - } + CodeRead32(BranchAddr); - CPSR |= 0x20; + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::JumpTo_3B; + else JumpTo_3B(); + } } else { - addr &= ~0x3; - R[15] = addr+4; - NextInstr[0] = CodeRead32(addr, true); - NextInstr[1] = CodeRead32(addr+4, false); + BranchAddr &= ~0x3; + R[15] = BranchAddr+4; CPSR &= ~0x20; - } - NDS.MonitorARM9Jump(addr); + CodeRead32(BranchAddr); + + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::JumpTo_3C; + else JumpTo_3C(); + } } -void ARMv4::JumpTo(u32 addr, bool restorecpsr) +void ARMv5::JumpTo_3A() +{ + NextInstr[0] = RetVal >> 16; + CodeRead32(BranchAddr+2); + + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::JumpTo_4; + else JumpTo_4(); +} + +void ARMv5::JumpTo_3B() +{ + NextInstr[0] = RetVal; + NextInstr[1] = NextInstr[0] >> 16; +} + +void ARMv5::JumpTo_3C() +{ + NextInstr[0] = RetVal; + CodeRead32(BranchAddr+4); + + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::JumpTo_4; + else JumpTo_4(); +} + +void ARMv5::JumpTo_4() +{ + NextInstr[1] = RetVal; +} + +void ARMv4::JumpTo(u32 addr, bool restorecpsr, u8 R15) { if (restorecpsr) { @@ -447,6 +510,11 @@ void ARM::RestoreCPSR() UpdateMode(oldcpsr, CPSR); } +void ARMv5::QueueUpdateMode() +{ + UpdateMode(QueueMode[0], QueueMode[1], true); +} + void ARM::UpdateMode(u32 oldmode, u32 newmode, bool phony) { if ((oldmode & 0x1F) == (newmode & 0x1F)) return; @@ -563,6 +631,7 @@ template void ARM::TriggerIRQ(); void ARMv5::PrefetchAbort() { + abt = true; AddCycles_C(); Log(LogLevel::Warn, "ARM9: prefetch abort (%08X)\n", R[15]); @@ -578,7 +647,8 @@ void ARMv5::PrefetchAbort() void ARMv5::DataAbort() { - Log(LogLevel::Warn, "ARM9: data abort (%08X)\n", R[15]); + abt = true; + Log(LogLevel::Warn, "ARM9: data abort (%08X) %08llX\n", R[15], CurInstr); u32 oldcpsr = CPSR; CPSR &= ~0xBF; @@ -595,6 +665,63 @@ void ARM::CheckGdbIncoming() GdbCheckA(); } +void ARMv5::StartExec() +{ + if (CPSR & 0x20) // THUMB + { + // prefetch + R[15] += 2; + CurInstr = NextInstr[0]; + NextInstr[0] = NextInstr[1]; + // code fetch is done during the execute stage cycle handling + if (R[15] & 0x2) NullFetch = true; + else NullFetch = false; + PC = R[15]; + + if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); + else if (CurInstr > 0xFFFFFFFF) [[unlikely]] // handle aborted instructions + { + PrefetchAbort(); + } + else [[likely]] // actually execute + { + u32 icode = (CurInstr >> 6) & 0x3FF; + ARMInterpreter::THUMBInstrTable[icode](this); + } + } + else + { + // prefetch + R[15] += 4; + CurInstr = NextInstr[0]; + NextInstr[0] = NextInstr[1]; + // code fetch is done during the execute stage cycle handling + NullFetch = false; + PC = R[15]; + + if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); + else if (CurInstr & ((u64)1<<63)) [[unlikely]] // handle aborted instructions + { + PrefetchAbort(); + } + else if (CheckCondition(CurInstr >> 28)) [[likely]] // actually execute + { + u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0); + ARMInterpreter::ARMInstrTable[icode](this); + } + else if ((CurInstr & 0xFE000000) == 0xFA000000) + { + ARMInterpreter::A_BLX_IMM(this); + } + else if ((CurInstr & 0x0FF000F0) == 0x01200070) + { + ARMInterpreter::A_BKPT(this); // always passes regardless of condition code + } + else + AddCycles_C(); + } +} + template void ARMv5::Execute() { @@ -670,65 +797,81 @@ void ARMv5::Execute() else #endif { - if (CPSR & 0x20) // THUMB + if constexpr (mode == CPUExecuteMode::InterpreterGDB) + GdbCheckC(); // gdb might throw a hissy fit about this change but idc + + //printf("A:%i, F:%i, P:%i, E:%i, I:%08llX, P:%08X, 15:%08X\n", FuncQueueActive, FuncQueueFill, FuncQueueProg, FuncQueueEnd, CurInstr, PC, R[15]); + + (this->*FuncQueue[FuncQueueProg])(); + + if (FuncQueueActive) { - if constexpr (mode == CPUExecuteMode::InterpreterGDB) - GdbCheckC(); - - // prefetch - R[15] += 2; - CurInstr = NextInstr[0]; - NextInstr[0] = NextInstr[1]; - // code fetch is done during the execute stage cycle handling - if (R[15] & 0x2) NullFetch = true; - else NullFetch = false; - PC = R[15]; - - if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); - else if (CurInstr > 0xFFFFFFFF) [[unlikely]] // handle aborted instructions + if (FuncQueueFill == FuncQueueProg) { - PrefetchAbort(); + // we did not get a new addition to the queue; increment and reset ptrs + FuncQueueFill = ++FuncQueueProg; + + // check if we're done with the queue, if so, reset everything + if (FuncQueueProg >= FuncQueueEnd) + { + + FuncQueueFill = 0; + FuncQueueProg = 0; + FuncQueueEnd = 0; + FuncQueueActive = false; + FuncQueue[0] = &ARMv5::StartExec; + /* + Platform::FileHandle* file = Platform::OpenFile("REGLOG.bin", Platform::FileMode::Read); + Platform::FileSeek(file, iter*16*4, Platform::FileSeekOrigin::Start); + u32 Regs[16]; + Platform::FileRead(Regs, 4, 16, file); + if (memcmp(Regs, R, 16*4)) + { + printf("MISMATCH ON ITERATION %lli! %08llX", iter, CurInstr); + for (int i = 0; i < 16; i++) + { + printf(" %i: %08X vs %08X", i, R[i], Regs[i]); + } + printf("\n"); + abt=1; + } + Platform::CloseFile(file); + iter++;*/ + } } - else [[likely]] // actually execute + else { - u32 icode = (CurInstr >> 6) & 0x3FF; - ARMInterpreter::THUMBInstrTable[icode](this); + // we got a new addition to the list; redo the current entry + FuncQueueFill = FuncQueueProg; } } + else if (FuncQueueFill > 0) // check if we started the queue up + { + FuncQueueEnd = FuncQueueFill; + FuncQueueFill = 0; + FuncQueueActive = true; + } else { - if constexpr (mode == CPUExecuteMode::InterpreterGDB) - GdbCheckC(); - - // prefetch - R[15] += 4; - CurInstr = NextInstr[0]; - NextInstr[0] = NextInstr[1]; - // code fetch is done during the execute stage cycle handling - NullFetch = false; - PC = R[15]; - - if (IRQ && !(CPSR & 0x80)) TriggerIRQ(); - else if (CurInstr & ((u64)1<<63)) [[unlikely]] // handle aborted instructions + /* + Platform::FileHandle* file = Platform::OpenFile("REGLOG.bin", Platform::FileMode::Read); + Platform::FileSeek(file, iter*16*4, Platform::FileSeekOrigin::Start); + u32 Regs[16]; + Platform::FileRead(Regs, 4, 16, file); + if (memcmp(Regs, R, 16*4)) { - PrefetchAbort(); + printf("MISMATCH ON ITERATION %lli! %08llX", iter, CurInstr); + for (int i = 0; i < 16; i++) + { + printf(" %i: %08X vs %08X", i, R[i], Regs[i]); + } + printf("\n"); + abt=1; } - else if (CheckCondition(CurInstr >> 28)) [[likely]] // actually execute - { - u32 icode = ((CurInstr >> 4) & 0xF) | ((CurInstr >> 16) & 0xFF0); - ARMInterpreter::ARMInstrTable[icode](this); - } - else if ((CurInstr & 0xFE000000) == 0xFA000000) - { - ARMInterpreter::A_BLX_IMM(this); - } - else if ((CurInstr & 0x0FF000F0) == 0x01200070) - { - ARMInterpreter::A_BKPT(this); // always passes regardless of condition code - } - else - AddCycles_C(); + Platform::CloseFile(file); + iter++;*/ } + if (MRTrack.Type != MainRAMType::Null) break; // check if we need to resolve main ram // TODO optimize this shit!!! if (Halted) @@ -907,26 +1050,26 @@ template void ARMv4::Execute(); void ARMv5::FillPipeline() { - SetupCodeMem(R[15]); + /*SetupCodeMem(R[15]); if (CPSR & 0x20) { if ((R[15] - 2) & 0x2) { - NextInstr[0] = CodeRead32(R[15] - 4, false) >> 16; - NextInstr[1] = CodeRead32(R[15], false); + NextInstr[0] = CodeRead32(R[15] - 4) >> 16; + NextInstr[1] = CodeRead32(R[15]); } else { - NextInstr[0] = CodeRead32(R[15] - 2, false); + NextInstr[0] = CodeRead32(R[15] - 2); NextInstr[1] = NextInstr[0] >> 16; } } else { - NextInstr[0] = CodeRead32(R[15] - 4, false); - NextInstr[1] = CodeRead32(R[15], false); - } + NextInstr[0] = CodeRead32(R[15] - 4); + NextInstr[1] = CodeRead32(R[15]); + }*/ } void ARMv4::FillPipeline() @@ -1160,23 +1303,37 @@ void ARMv5::CodeFetch() { // the value we need is cached by the bus // in practice we can treat this as a 1 cycle fetch, with no penalties - NextInstr[1] >>= 16; + RetVal = NextInstr[1] >> 16; NDS.ARM9Timestamp++; if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; Store = false; DataRegion = Mem9_Null; } - else NextInstr[1] = CodeRead32(PC, false); + else + { + CodeRead32(PC); + } + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::AddExecute; + else AddExecute(); } -void ARMv5::AddCycles_CI(s32 numX) +void ARMv5::AddExecute() { - CodeFetch(); - NDS.ARM9Timestamp += numX; + NextInstr[1] = RetVal; + + NDS.ARM9Timestamp += ExecuteCycles; } void ARMv5::AddCycles_MW(s32 numM) { + DataCycles = numM; + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::AddCycles_MW_2; + else AddCycles_MW_2(); +} + +void ARMv5::AddCycles_MW_2() +{ + s32 numM = DataCycles; TimestampActual = numM + NDS.ARM9Timestamp; numM -= 3< void ARMv5::HandleInterlocksExecute(u16 ilmask, u8* times) { + /* if ((bitfield && (ilmask & (1<(u16 ilmask, u8* times); template void ARMv5::HandleInterlocksExecute(u16 ilmask, u8* times); void ARMv5::HandleInterlocksMemory(u8 reg) { + /* if ((reg != ILPrevReg) || (NDS.ARM9Timestamp >= ILPrevTime)) return; u64 diff = ILPrevTime - NDS.ARM9Timestamp; // should always be 1? NDS.ARM9Timestamp = ILPrevTime; ITCMTimestamp += diff; // checkme - ILPrevTime = 16; + ILPrevTime = 16;*/ } u16 ARMv4::CodeRead16(u32 addr) @@ -1265,8 +1424,10 @@ u32 ARMv4::CodeRead32(u32 addr) return BusRead32(addr); } -bool ARMv4::DataRead8(u32 addr, u32* val) +bool ARMv4::DataRead8(u32 addr, u8 reg) { + u32* val = &R[reg]; + if ((addr >> 24) == 0x02) { if (NDS.ARM7Timestamp < MainRAMTimestamp) NDS.ARM7Timestamp = MainRAMTimestamp; @@ -1284,8 +1445,9 @@ bool ARMv4::DataRead8(u32 addr, u32* val) return true; } -bool ARMv4::DataRead16(u32 addr, u32* val) +bool ARMv4::DataRead16(u32 addr, u8 reg) { + u32* val = &R[reg]; addr &= ~1; if ((addr >> 24) == 0x02) @@ -1305,8 +1467,9 @@ bool ARMv4::DataRead16(u32 addr, u32* val) return true; } -bool ARMv4::DataRead32(u32 addr, u32* val) +bool ARMv4::DataRead32(u32 addr, u8 reg) { + u32* val = &R[reg]; addr &= ~3; if ((addr >> 24) == 0x02) @@ -1326,8 +1489,9 @@ bool ARMv4::DataRead32(u32 addr, u32* val) return true; } -bool ARMv4::DataRead32S(u32 addr, u32* val) +bool ARMv4::DataRead32S(u32 addr, u8 reg) { + u32* val = &R[reg]; addr &= ~3; if ((addr >> 24) == 0x02) @@ -1347,7 +1511,7 @@ bool ARMv4::DataRead32S(u32 addr, u32* val) return true; } -bool ARMv4::DataWrite8(u32 addr, u8 val) +bool ARMv4::DataWrite8(u32 addr, u8 val, u8 reg) { if ((addr >> 24) == 0x02) { @@ -1366,7 +1530,7 @@ bool ARMv4::DataWrite8(u32 addr, u8 val) return true; } -bool ARMv4::DataWrite16(u32 addr, u16 val) +bool ARMv4::DataWrite16(u32 addr, u16 val, u8 reg) { addr &= ~1; @@ -1387,7 +1551,7 @@ bool ARMv4::DataWrite16(u32 addr, u16 val) return true; } -bool ARMv4::DataWrite32(u32 addr, u32 val) +bool ARMv4::DataWrite32(u32 addr, u32 val, u8 reg) { addr &= ~3; @@ -1408,7 +1572,7 @@ bool ARMv4::DataWrite32(u32 addr, u32 val) return true; } -bool ARMv4::DataWrite32S(u32 addr, u32 val) +bool ARMv4::DataWrite32S(u32 addr, u32 val, u8 reg) { addr &= ~3; diff --git a/src/ARM.h b/src/ARM.h index 2a002df9..9fb195f4 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -53,6 +53,19 @@ enum class CPUExecuteMode : u32 #endif }; +enum class MainRAMType : u8 +{ + Null = 0, + ICacheStream, +}; + +struct MainRAMTrackers +{ + MainRAMType Type; + u8 Var; + u8 Progress; +}; + struct GDBArgs; class ARMJIT; class GPU; @@ -75,7 +88,7 @@ public: virtual void FillPipeline() = 0; - virtual void JumpTo(u32 addr, bool restorecpsr = false) = 0; + virtual void JumpTo(u32 addr, bool restorecpsr = false, u8 R15 = 0) = 0; void RestoreCPSR(); void Halt(u32 halt) @@ -135,14 +148,14 @@ public: void SetupCodeMem(u32 addr); - virtual bool DataRead8(u32 addr, u32* val) = 0; - virtual bool DataRead16(u32 addr, u32* val) = 0; - virtual bool DataRead32(u32 addr, u32* val) = 0; - virtual bool DataRead32S(u32 addr, u32* val) = 0; - virtual bool DataWrite8(u32 addr, u8 val) = 0; - virtual bool DataWrite16(u32 addr, u16 val) = 0; - virtual bool DataWrite32(u32 addr, u32 val) = 0; - virtual bool DataWrite32S(u32 addr, u32 val) = 0; + virtual bool DataRead8(u32 addr, u8 reg) = 0; + virtual bool DataRead16(u32 addr, u8 reg) = 0; + virtual bool DataRead32(u32 addr, u8 reg) = 0; + virtual bool DataRead32S(u32 addr, u8 reg) = 0; + virtual bool DataWrite8(u32 addr, u8 val, u8 reg) = 0; + virtual bool DataWrite16(u32 addr, u16 val, u8 reg) = 0; + virtual bool DataWrite32(u32 addr, u32 val, u8 reg) = 0; + virtual bool DataWrite32S(u32 addr, u32 val, u8 reg) = 0; virtual void AddCycles_C() = 0; virtual void AddCycles_CI(s32 numI) = 0; @@ -186,6 +199,29 @@ public: MemRegion CodeMem; u64 MainRAMTimestamp; + MainRAMTrackers MRTrack; + + u32 BranchAddr; + u8 BranchUpdate; + bool BranchRestore; + + u32 QueueMode[2]; + + u64 RetVal; + + u16 LDRRegs; + u16 LDRFailedRegs; + u16 STRRegs; + u32 FetchAddr[17]; + u32 STRVal[16]; + + u64 iter; + + u8 FuncQueueFill; + u8 FuncQueueEnd; + u8 FuncQueueProg; + u8 ExecuteCycles; + bool FuncQueueActive; #ifdef JIT_ENABLED u32 FastBlockLookupStart, FastBlockLookupSize; @@ -245,7 +281,7 @@ public: void FillPipeline() override; - void JumpTo(u32 addr, bool restorecpsr = false) override; + void JumpTo(u32 addr, bool restorecpsr = false, u8 R15 = 0) override; void PrefetchAbort(); void DataAbort(); @@ -254,36 +290,42 @@ public: void Execute(); // all code accesses are forced nonseq 32bit - u64 CodeRead32(const u32 addr, const bool branch); + void CodeRead32(const u32 addr); - bool DataRead8(u32 addr, u32* val) override; - bool DataRead16(u32 addr, u32* val) override; - bool DataRead32(u32 addr, u32* val) override; - bool DataRead32S(u32 addr, u32* val) override; - bool DataWrite8(u32 addr, u8 val) override; - bool DataWrite16(u32 addr, u16 val) override; - bool DataWrite32(u32 addr, u32 val) override; - bool DataWrite32S(u32 addr, u32 val) override; + bool DataRead8(u32 addr, u8 reg) override; + bool DataRead16(u32 addr, u8 reg) override; + bool DataRead32(u32 addr, u8 reg) override; + bool DataRead32S(u32 addr, u8 reg) override; + bool DataWrite8(u32 addr, u8 val, u8 reg) override; + bool DataWrite16(u32 addr, u16 val, u8 reg) override; + bool DataWrite32(u32 addr, u32 val, u8 reg) override; + bool DataWrite32S(u32 addr, u32 val, u8 reg) override; void CodeFetch(); - void AddCycles_C() override { CodeFetch(); } + void AddCycles_C() override + { + ExecuteCycles = 0; + CodeFetch(); + } - void AddCycles_CI(s32 numX) override; + void AddCycles_CI(s32 numX) override + { + ExecuteCycles = numX; + CodeFetch(); + } void AddCycles_MW(s32 numM); void AddCycles_CDI() override { AddCycles_MW(DataCycles); - DataCycles = 0; } void AddCycles_CD() override { - Store = true; + Store = true; // todo: queue this AddCycles_MW(DataCycles); - DataCycles = 0; } template @@ -366,7 +408,7 @@ public: * cache. The address is internally aligned to an word boundary * @return Value of the word at addr */ - u32 ICacheLookup(const u32 addr); + bool ICacheLookup(const u32 addr); /** * @brief Check if an address is within a instruction cachable @@ -604,6 +646,26 @@ public: * @return Value of the cp15 register */ u32 CP15Read(const u32 id) const; + + void StartExec(); + void AddExecute(); + void AddCycles_MW_2(); + void JumpTo_2(); + void JumpTo_3A(); + void JumpTo_3B(); + void JumpTo_3C(); + void JumpTo_4(); + void DAbortHandle(); + void DAbortHandleS(); + void DRead8_2(); + void DRead16_2(); + void DRead32_2(); + void DRead32S_2(); + void DWrite8_2(); + void DWrite16_2(); + void DWrite32_2(); + void DWrite32S_2(); + void QueueUpdateMode(); u32 CP15Control; //! CP15 Register 1: Control Register @@ -652,7 +714,7 @@ public: * 1 - CP15_MAP_WRITEABLE * 2 - CP15_MAP_EXECUTABLE * 4 - CP15_MAP_DCACHEABLE - * 5 - CP15_MAP_DCACHEWRITEBACK + * 5 - CP15_MAP_BUFFERABLE * 6 - CP15_MAP_ICACHEABLE */ u8 PU_UserMap[CP15_MAP_ENTRYCOUNT]; //! Memory mapping flags for User Mode @@ -665,6 +727,7 @@ public: u64 ITCMTimestamp; u64 TimestampActual; + void (ARMv5::*FuncQueue[31])(void); u32 PC; bool NullFetch; bool Store; @@ -674,10 +737,12 @@ public: u64 ILCurrTime; u64 ILPrevTime; - u8 ICacheFillPtr; - u8 DCacheFillPtr; - u64 ICacheFillTimes[7]; - u64 DCacheFillTimes[7]; + u8 ICacheStreamPtr; + u8 DCacheStreamPtr; + u64 ICacheStreamTimes[7]; + u64 DCacheStreamTimes[7]; + + bool abt; u8 WBWritePointer; // which entry to attempt to write next; should always be ANDed with 0xF after incrementing u8 WBFillPointer; // where the next entry should be added; should always be ANDed with 0xF after incrementing @@ -716,7 +781,7 @@ public: void FillPipeline() override; - void JumpTo(u32 addr, bool restorecpsr = false) override; + void JumpTo(u32 addr, bool restorecpsr = false, u8 R15 = 0) override; template void Execute(); @@ -726,14 +791,14 @@ public: u16 CodeRead16(u32 addr); u32 CodeRead32(u32 addr); - bool DataRead8(u32 addr, u32* val) override; - bool DataRead16(u32 addr, u32* val) override; - bool DataRead32(u32 addr, u32* val) override; - bool DataRead32S(u32 addr, u32* val) override; - bool DataWrite8(u32 addr, u8 val) override; - bool DataWrite16(u32 addr, u16 val) override; - bool DataWrite32(u32 addr, u32 val) override; - bool DataWrite32S(u32 addr, u32 val) override; + bool DataRead8(u32 addr, u8 reg) override; + bool DataRead16(u32 addr, u8 reg) override; + bool DataRead32(u32 addr, u8 reg) override; + bool DataRead32S(u32 addr, u8 reg) override; + bool DataWrite8(u32 addr, u8 val, u8 reg) override; + bool DataWrite16(u32 addr, u16 val, u8 reg) override; + bool DataWrite32(u32 addr, u32 val, u8 reg) override; + bool DataWrite32S(u32 addr, u32 val, u8 reg) override; void AddCycles_C() override; void AddCycles_CI(s32 num) override; void AddCycles_CDI() override; diff --git a/src/ARMInterpreter.cpp b/src/ARMInterpreter.cpp index 0400c40a..64249fac 100644 --- a/src/ARMInterpreter.cpp +++ b/src/ARMInterpreter.cpp @@ -50,6 +50,7 @@ void A_UNK(ARM* cpu) cpu->R_UND[2] = oldcpsr; cpu->R[14] = cpu->R[15] - 4; + cpu->JumpTo(cpu->ExceptionBase + 0x04); } @@ -68,12 +69,13 @@ void T_UNK(ARM* cpu) cpu->R_UND[2] = oldcpsr; cpu->R[14] = cpu->R[15] - 2; + cpu->JumpTo(cpu->ExceptionBase + 0x04); } void A_BKPT(ARM* cpu) { - if (cpu->Num == 1) A_UNK(cpu); // checkme + if (cpu->Num == 1) return A_UNK(cpu); // checkme Log(LogLevel::Warn, "BKPT: "); // combine with the prefetch abort warning message ((ARMv5*)cpu)->PrefetchAbort(); @@ -83,6 +85,9 @@ void A_BKPT(ARM* cpu) void A_MSR_IMM(ARM* cpu) { + if ((cpu->Num != 1) && (cpu->CurInstr & ((0x7<<16)|(1<<22)))) cpu->AddCycles_CI(2); // arm9 cpsr_sxc & spsr + else cpu->AddCycles_C(); + u32* psr; if (cpu->CurInstr & (1<<22)) { @@ -100,8 +105,6 @@ void A_MSR_IMM(ARM* cpu) case 0x1A: case 0x1B: psr = &cpu->R_UND[2]; break; default: - if (cpu->Num != 1) cpu->AddCycles_C(); // arm 7 - else cpu->AddCycles_CI(2); // arm 9 return; } } @@ -138,23 +141,15 @@ void A_MSR_IMM(ARM* cpu) cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least } } - - if (cpu->Num != 1) - { - if (cpu->CurInstr & (1<<22)) - { - cpu->AddCycles_CI(2); // spsr - } - else if (cpu->CurInstr & (0x7<<16)) cpu->AddCycles_CI(2); // cpsr_sxc - else cpu->AddCycles_C(); - } - else cpu->AddCycles_C(); } void A_MSR_REG(ARM* cpu) { if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute(cpu->CurInstr & 0xF); + if ((cpu->Num != 1) && (cpu->CurInstr & ((0x7<<16)|(1<<22)))) cpu->AddCycles_CI(2); // arm9 cpsr_sxc & spsr + else cpu->AddCycles_C(); + u32* psr; if (cpu->CurInstr & (1<<22)) { @@ -172,8 +167,6 @@ void A_MSR_REG(ARM* cpu) case 0x1A: case 0x1B: psr = &cpu->R_UND[2]; break; default: - if (cpu->Num != 1) cpu->AddCycles_C(); // arm 7 - else cpu->AddCycles_CI(2); // arm 9 return; } } @@ -210,17 +203,6 @@ void A_MSR_REG(ARM* cpu) cpu->CPSR &= ~0x20; // keep it from crashing the emulator at least } } - - if (cpu->Num != 1) - { - if (cpu->CurInstr & (1<<22)) - { - cpu->AddCycles_CI(2); // spsr - } - else if (cpu->CurInstr & (0x7<<16)) cpu->AddCycles_CI(2); // cpsr_sxc - else cpu->AddCycles_C(); - } - else cpu->AddCycles_C(); } void A_MRS(ARM* cpu) @@ -247,20 +229,19 @@ void A_MRS(ARM* cpu) else psr = cpu->CPSR; + if (cpu->Num != 1) // arm9 + { + cpu->AddCycles_C(); // 1 X + ((ARMv5*)cpu)->AddCycles_MW(2); // 2 M + } + else cpu->AddCycles_C(); // arm7 + if (((cpu->CurInstr>>12) & 0xF) == 15) { if (cpu->Num == 1) // doesn't seem to jump on the arm9? checkme cpu->JumpTo(psr & ~0x1); // checkme: this shouldn't be able to switch to thumb? } else cpu->R[(cpu->CurInstr>>12) & 0xF] = psr; - - if (cpu->Num != 1) // arm9 - { - cpu->AddCycles_C(); // 1 X - cpu->DataRegion = Mem9_Null; - ((ARMv5*)cpu)->AddCycles_MW(2); // 2 M - } - else cpu->AddCycles_C(); // arm7 } @@ -281,7 +262,7 @@ void A_MCR(ARM* cpu) if (cpu->Num==0 && cp==15) { - ((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo|(op<<12), val); + ((ARMv5*)cpu)->CP15Write((cn<<8)|(cm<<4)|cpinfo|(op<<12), val); // TODO: IF THIS RAISES AN EXCEPTION WE DO A DOUBLE CODE FETCH; FIX THAT } else if (cpu->Num==1 && cp==14) { @@ -292,7 +273,8 @@ void A_MCR(ARM* cpu) Log(LogLevel::Warn, "bad MCR opcode p%d, %d, reg, c%d, c%d, %d on ARM%d\n", cp, op, cn, cm, cpinfo, cpu->Num?7:9); return A_UNK(cpu); // TODO: check what kind of exception it really is } - + + // TODO: SINCE THIS DOES A CODE FETCH WE NEED TO DELAY ANY MPU UPDATES UNTIL *AFTER* THE CODE FETCH if (cpu->Num==0) cpu->AddCycles_CI(5); // checkme else /* ARM7 */ cpu->AddCycles_CI(1 + 1); // TODO: checkme } @@ -315,7 +297,7 @@ void A_MRC(ARM* cpu) else { // r15 updates the top 4 bits of the cpsr, done to "allow for conditional branching based on coprocessor status" - u32 flags = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo|(op<<12)) & 0xF0000000; + u32 flags = ((ARMv5*)cpu)->CP15Read((cn<<8)|(cm<<4)|cpinfo|(op<<12)) & 0xF0000000; // TODO: IF THIS RAISES AN EXCEPTION WE DO A DOUBLE CODE FETCH; FIX THAT cpu->CPSR = (cpu->CPSR & ~0xF0000000) | flags; } } @@ -332,7 +314,6 @@ void A_MRC(ARM* cpu) if (cpu->Num != 1) { cpu->AddCycles_C(); // 1 Execute cycle - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(2); // 2 Memory cycles ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -352,6 +333,7 @@ void A_SVC(ARM* cpu) // A_SWI cpu->R_SVC[2] = oldcpsr; cpu->R[14] = cpu->R[15] - 4; + cpu->JumpTo(cpu->ExceptionBase + 0x08); } @@ -365,6 +347,7 @@ void T_SVC(ARM* cpu) // T_SWI cpu->R_SVC[2] = oldcpsr; cpu->R[14] = cpu->R[15] - 2; + cpu->JumpTo(cpu->ExceptionBase + 0x08); } diff --git a/src/ARMInterpreter_ALU.cpp b/src/ARMInterpreter_ALU.cpp index 410a78e1..5edf5a39 100644 --- a/src/ARMInterpreter_ALU.cpp +++ b/src/ARMInterpreter_ALU.cpp @@ -926,7 +926,6 @@ void A_MUL(ARM* cpu) { cpu->AddCycles_C(); // 1 X - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(2); // 2 M ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -974,7 +973,6 @@ void A_MLA(ARM* cpu) { cpu->AddCycles_C(); // 1 X - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(2); // 2 M ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -1020,7 +1018,6 @@ void A_UMULL(ARM* cpu) { cpu->AddCycles_CI(2); - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -1073,7 +1070,6 @@ void A_UMLAL(ARM* cpu) { cpu->AddCycles_CI(2); - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -1119,7 +1115,6 @@ void A_SMULL(ARM* cpu) { cpu->AddCycles_CI(2); - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -1171,7 +1166,6 @@ void A_SMLAL(ARM* cpu) { cpu->AddCycles_CI(2); - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -1219,7 +1213,6 @@ void A_SMLAxy(ARM* cpu) (1 << ((cpu->CurInstr >> 12) & 0xF)), iltime); cpu->AddCycles_C(); - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -1251,7 +1244,6 @@ void A_SMLAWy(ARM* cpu) (1 << ((cpu->CurInstr >> 12) & 0xF)), iltime); cpu->AddCycles_C(); - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -1279,7 +1271,6 @@ void A_SMULxy(ARM* cpu) (1 << ((cpu->CurInstr >> 8) & 0xF))); cpu->AddCycles_C(); - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -1305,7 +1296,6 @@ void A_SMULWy(ARM* cpu) (1 << ((cpu->CurInstr >> 8) & 0xF))); cpu->AddCycles_C(); - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -1342,7 +1332,7 @@ void A_SMLALxy(ARM* cpu) (1 << ((cpu->CurInstr >> 12) & 0xF))/* | (1 << ((cpu->CurInstr >> 16) & 0xF))*/, iltime); cpu->AddCycles_C(); // 1 X - cpu->DataRegion = Mem9_Null; + ((ARMv5*)cpu)->AddCycles_MW(2); // 2 M ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 16) & 0xF; // only one rd interlocks ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -1370,11 +1360,11 @@ void A_CLZ(ARM* cpu) val |= 0x1; } - if (((cpu->CurInstr >> 12) & 0xF) == 15) cpu->JumpTo(res & ~1); - else cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; - ((ARMv5*)cpu)->HandleInterlocksExecute(cpu->CurInstr & 0xF); cpu->AddCycles_C(); + + if (((cpu->CurInstr >> 12) & 0xF) == 15) cpu->JumpTo(res & ~1); + else cpu->R[(cpu->CurInstr >> 12) & 0xF] = res; } void A_QADD(ARM* cpu) @@ -1398,7 +1388,6 @@ void A_QADD(ARM* cpu) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); cpu->AddCycles_C(); - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -1424,7 +1413,6 @@ void A_QSUB(ARM* cpu) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); cpu->AddCycles_C(); - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -1458,7 +1446,6 @@ void A_QDADD(ARM* cpu) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); cpu->AddCycles_C(); - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -1492,7 +1479,6 @@ void A_QDSUB(ARM* cpu) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << (cpu->CurInstr & 0xF)) | (1 << ((cpu->CurInstr >> 16) & 0xF))); cpu->AddCycles_C(); - cpu->DataRegion = Mem9_Null; ((ARMv5*)cpu)->AddCycles_MW(1); // dummy memory stage for interlock handling ((ARMv5*)cpu)->ILCurrReg = (cpu->CurInstr >> 12) & 0xF; // only one rd interlocks ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; @@ -1902,6 +1888,8 @@ void T_CMP_HIREG(ARM* cpu) CarrySub(a, b), OverflowSub(a, b)); + cpu->AddCycles_C(); + if ((cpu->Num == 1) && (rd == 15)) { u32 oldpsr = cpu->CPSR; @@ -1913,8 +1901,6 @@ void T_CMP_HIREG(ARM* cpu) } } else if (cpu->Num == 0) ((ARMv5*)cpu)->HandleInterlocksExecute((1 << rd) | (1 << rs)); - - cpu->AddCycles_C(); } void T_MOV_HIREG(ARM* cpu) diff --git a/src/ARMInterpreter_LoadStore.cpp b/src/ARMInterpreter_LoadStore.cpp index 0a4f7224..56380e6c 100644 --- a/src/ARMInterpreter_LoadStore.cpp +++ b/src/ARMInterpreter_LoadStore.cpp @@ -82,6 +82,7 @@ enum class Writeback template void LoadSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16 ilmask) { + cpu->LDRFailedRegs = 0; static_assert((size == 8) || (size == 16) || (size == 32), "dummy this function only takes 8/16/32 for size!!!"); ExecuteStage(cpu, (ilmask | (1<PU_Map = ((ARMv5*)cpu)->PU_UserMap; } - u32 val; + u32 oldrd = cpu->R[rd]; bool dabort; - if constexpr (size == 8) dabort = !cpu->DataRead8 (addr, &val); - if constexpr (size == 16) dabort = !cpu->DataRead16(addr, &val); - if constexpr (size == 32) dabort = !cpu->DataRead32(addr, &val); + if constexpr (size == 8) dabort = !cpu->DataRead8 (addr, rd); + if constexpr (size == 16) dabort = !cpu->DataRead16(addr, rd); + if constexpr (size == 32) dabort = !cpu->DataRead32(addr, rd); if constexpr (writeback == Writeback::Trans) { @@ -114,21 +115,21 @@ void LoadSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16 ((ARMv5*)cpu)->DataAbort(); return; } + if ((cpu->MRTrack.Type != MainRAMType::Null) && signextend && cpu->Num == 0) printf("ARGH ME BONES"); - if constexpr (size == 8 && signextend) val = (s32)(s8)val; + if constexpr (size == 8 && signextend) cpu->R[rd] = (s32)(s8)cpu->R[rd]; if constexpr (size == 16) { if (cpu->Num == 1) { - val = ROR(val, ((addr&0x1)<<3)); // unaligned 16 bit loads are ROR'd on arm7 - if constexpr (signextend) val = (s32)((addr&0x1) ? (s8)val : (s16)val); // sign extend like a ldrsb if we ror'd the value. + cpu->R[rd] = ROR(cpu->R[rd], ((addr&0x1)<<3)); // unaligned 16 bit loads are ROR'd on arm7 + if constexpr (signextend) cpu->R[rd] = (s32)((addr&0x1) ? (s8)cpu->R[rd] : (s16)cpu->R[rd]); // sign extend like a ldrsb if we ror'd the value. } - else if constexpr (signextend) val = (s32)(s16)val; + else if constexpr (signextend) cpu->R[rd] = (s32)(s16)cpu->R[rd]; } - if constexpr (size == 32) val = ROR(val, ((addr&0x3)<<3)); - + if constexpr (size == 32) cpu->R[rd] = ROR(cpu->R[rd], ((addr&0x3)<<3)); if constexpr (writeback >= Writeback::Post) addr += offset; if constexpr (writeback != Writeback::None) @@ -139,22 +140,23 @@ void LoadSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16 } else if (cpu->Num == 1) // arm 7 { - // note that at no point does it actually write the value it loaded to a register... - cpu->JumpTo((addr+4) & ~1); + cpu->R[rd] = oldrd; // note that at no point does it actually write the value it loaded into a register... + cpu->LDRFailedRegs = 1<JumpTo((addr+4) & ~1); // +4 cause reasons return; } } if (rd == 15) { - if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) val &= ~0x1; - if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual + ((size<32) || (addr&0x3)); // force an interlock + if (cpu->Num==1) cpu->R[15] &= ~0x1; - cpu->JumpTo(val); + //if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual + ((size<32) || (addr&0x3)); // force an interlock + + cpu->JumpTo(cpu->R[15], false, 1); } else { - cpu->R[rd] = val; if (cpu->Num == 0) { ((ARMv5*)cpu)->ILCurrReg = rd; @@ -188,9 +190,9 @@ void StoreSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16 ((ARMv5*)cpu)->HandleInterlocksMemory(rd); bool dabort; - if constexpr (size == 8) dabort = !cpu->DataWrite8 (addr, storeval); - if constexpr (size == 16) dabort = !cpu->DataWrite16(addr, storeval); - if constexpr (size == 32) dabort = !cpu->DataWrite32(addr, storeval); + if constexpr (size == 8) dabort = !cpu->DataWrite8 (addr, storeval, rd); + if constexpr (size == 16) dabort = !cpu->DataWrite16(addr, storeval, rd); + if constexpr (size == 32) dabort = !cpu->DataWrite32(addr, storeval, rd); if constexpr (writeback == Writeback::Trans) { @@ -208,11 +210,11 @@ void StoreSingle(ARM* cpu, const u8 rd, const u8 rn, const s32 offset, const u16 if constexpr (writeback >= Writeback::Post) addr += offset; if constexpr (writeback != Writeback::None) { - if (rn != 15) [[likely]] // r15 writeback fails on arm9 + if (rn != 15) [[likely]] { cpu->R[rn] = addr; } - else if (cpu->Num == 1) // arm 7 + else if (cpu->Num == 1) // r15 writeback fails on arm9 { cpu->JumpTo(addr & ~1); } @@ -349,19 +351,20 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) offset += cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ + cpu->LDRFailedRegs = 0; \ ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \ - bool dabort = !cpu->DataRead32(offset, &cpu->R[r]); \ - u32 val; dabort |= !cpu->DataRead32S(offset+4, &val); \ - if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; \ + bool dabort = !cpu->DataRead32(offset, r); \ + u32 oldval = cpu->R[r+1]; dabort |= !cpu->DataRead32S(offset+4, r+1); \ + /*if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;*/ \ cpu->AddCycles_CDI(); \ if (dabort) { \ + cpu->R[r+1] = oldval; \ ((ARMv5*)cpu)->DataAbort(); \ return; } \ if (r+1 == 15) { \ - if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; \ - cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); } /* restores cpsr presumably due to shared dna with ldm */ \ + /*if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual;*/ \ + cpu->JumpTo(cpu->R[15], cpu->CurInstr & (1<<22), 1); } /* restores cpsr presumably due to shared dna with ldm */ \ else { \ - cpu->R[r+1] = val; \ if (cpu->Num == 0) { \ ((ARMv5*)cpu)->ILCurrReg = r+1; \ ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; } } \ @@ -372,19 +375,20 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) u32 addr = cpu->R[(cpu->CurInstr>>16) & 0xF]; \ u32 r = (cpu->CurInstr>>12) & 0xF; \ if (r&1) { A_UNK(cpu); return; } \ + cpu->LDRFailedRegs = 0; \ ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \ - bool dabort = !cpu->DataRead32(addr, &cpu->R[r]); \ - u32 val; dabort |= !cpu->DataRead32S(addr+4, &val); \ - if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; \ + bool dabort = !cpu->DataRead32(addr, r); \ + u32 oldval = cpu->R[r+1]; dabort |= !cpu->DataRead32S(addr+4, r+1); \ + /*if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;*/ \ cpu->AddCycles_CDI(); \ if (dabort) { \ + cpu->R[r+1] = oldval; \ ((ARMv5*)cpu)->DataAbort(); \ return; } \ if (r+1 == 15) { \ - if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; \ - cpu->JumpTo(((((ARMv5*)cpu)->CP15Control & (1<<15)) ? (val & ~0x1) : val), cpu->CurInstr & (1<<22)); } /* restores cpsr presumably due to shared dna with ldm */ \ + /*if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual;*/ \ + cpu->JumpTo(cpu->R[15], cpu->CurInstr & (1<<22), 1); } /* restores cpsr presumably due to shared dna with ldm */ \ else { \ - cpu->R[r+1] = val; \ if (cpu->Num == 0) { \ ((ARMv5*)cpu)->ILCurrReg = r+1; \ ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual; } } \ @@ -397,10 +401,10 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (r&1) { A_UNK(cpu); return; } \ ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \ ((ARMv5*)cpu)->HandleInterlocksMemory(r); \ - bool dabort = !cpu->DataWrite32(offset, cpu->R[r]); /* yes, this data abort behavior is on purpose */ \ + bool dabort = !cpu->DataWrite32(offset, cpu->R[r], r); \ u32 storeval = cpu->R[r+1]; if (r+1 == 15) storeval+=4; \ - dabort |= !cpu->DataWrite32S (offset+4, storeval); /* no, i dont understand it either */ \ - if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; \ + dabort |= !cpu->DataWrite32S (offset+4, storeval, r+1); \ + /*if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;*/ \ cpu->AddCycles_CD(); \ if (dabort) [[unlikely]] { \ ((ARMv5*)cpu)->DataAbort(); \ @@ -414,10 +418,10 @@ A_IMPLEMENT_WB_LDRSTR(LDRB) if (r&1) { A_UNK(cpu); return; } \ ExecuteStage(cpu, ilmask | (1 << ((cpu->CurInstr>>16) & 0xF))); \ ((ARMv5*)cpu)->HandleInterlocksMemory(r); \ - bool dabort = !cpu->DataWrite32(addr, cpu->R[r]); \ + bool dabort = !cpu->DataWrite32(addr, cpu->R[r], r); \ u32 storeval = cpu->R[r+1]; if (r+1 == 15) storeval+=4; \ - dabort |= !cpu->DataWrite32S (addr+4, storeval); \ - if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; \ + dabort |= !cpu->DataWrite32S (addr+4, storeval, r+1); \ + /*if (cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2;*/ \ cpu->AddCycles_CD(); \ if (dabort) [[unlikely]] { \ ((ARMv5*)cpu)->DataAbort(); \ @@ -484,29 +488,32 @@ template inline void SWP(ARM* cpu) { ExecuteStage(cpu, ((cpu->CurInstr >> 16) & 0xF)); + cpu->LDRFailedRegs = 0; u32 base = cpu->R[(cpu->CurInstr >> 16) & 0xF]; - u32 rm = cpu->R[cpu->CurInstr & 0xF]; - if ((cpu->CurInstr & 0xF) == 15) rm += 4; + u32 rd = (cpu->CurInstr >> 12) & 0xF; + u32 rm = cpu->CurInstr & 0xF; + u32 storeval = cpu->R[rm]; + if (rm == 15) storeval += 4; + - u32 val; - if ((byte ? cpu->DataRead8 (base, &val) - : cpu->DataRead32(base, &val))) [[likely]] + u32 oldrd = cpu->R[rd]; + + if ((byte ? cpu->DataRead8 (base, rd) + : cpu->DataRead32(base, rd))) [[likely]] { - cpu->NDS.ARM9Timestamp += cpu->DataCycles; // checkme + //cpu->NDS.ARM9Timestamp += cpu->DataCycles; // checkme - if ((byte ? cpu->DataWrite8 (base, rm) - : cpu->DataWrite32(base, rm))) [[likely]] + if ((byte ? cpu->DataWrite8 (base, storeval, rm) + : cpu->DataWrite32(base, storeval, rm))) [[likely]] { // rd only gets updated if both read and write succeed - u32 rd = (cpu->CurInstr >> 12) & 0xF; - if constexpr (!byte) val = ROR(val, 8*(base&0x3)); + if constexpr (!byte) cpu->R[rd] = ROR(cpu->R[rd], 8*(base&0x3)); cpu->AddCycles_CDI(); if (rd != 15) { - cpu->R[rd] = val; if (cpu->Num == 0) { ((ARMv5*)cpu)->ILCurrReg = rd; @@ -514,12 +521,18 @@ inline void SWP(ARM* cpu) ((ARMv5*)cpu)->ILCurrTime = ((ARMv5*)cpu)->TimestampActual + extra; } } - else if (cpu->Num==1) cpu->JumpTo(val & ~1); // for some reason these jumps don't seem to work on the arm 9? + else if (cpu->Num==1) // for some reason these jumps don't seem to work on the arm 9? + { + cpu->R[rd] = cpu->R[rd] & ~1; + cpu->JumpTo(cpu->R[rd], false, 1); + } return; } } // data abort handling + cpu->R[rd] = oldrd; + cpu->LDRFailedRegs = 1<AddCycles_CDI(); ((ARMv5*)cpu)->DataAbort(); } @@ -559,15 +572,15 @@ void EmptyRListLDMSTM(ARM* cpu, const u8 baseid, const u8 flags) if (flags & load) { - u32 pc; - cpu->DataRead32(base, &pc); + cpu->DataRead32(base, 15); cpu->AddCycles_CDI(); - cpu->JumpTo(pc, flags & restoreorthumb); + + cpu->JumpTo(cpu->R[15] & ~1, flags & restoreorthumb, 1); // TODO: fix this not maintaining current mode properly } else { - cpu->DataWrite32(base, cpu->R[15] + ((flags & restoreorthumb) ? 2 : 4)); + cpu->DataWrite32(base, cpu->R[15] + ((flags & restoreorthumb) ? 2 : 4), 15); cpu->AddCycles_CD(); } @@ -586,6 +599,7 @@ void EmptyRListLDMSTM(ARM* cpu, const u8 baseid, const u8 flags) void A_LDM(ARM* cpu) { + cpu->LDRFailedRegs = 0; u32 baseid = (cpu->CurInstr >> 16) & 0xF; u32 base = cpu->R[baseid]; u32 wbbase; @@ -626,48 +640,49 @@ void A_LDM(ARM* cpu) // switch to user mode regs if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) + { cpu->UpdateMode(cpu->CPSR, (cpu->CPSR&~0x1F)|0x10, true); + //if (cpu->MRTrack.Type != MainRAMType::Null) printf("AHA, DERES THE PROBLEM\n"); + } for (int i = 0; i < 15; i++) { if (cpu->CurInstr & (1<DataRead32 (base, &val) - : cpu->DataRead32S(base, &val)); - - // remaining loads still occur but are not written to a reg after a data abort is raised - if (!dabort) [[likely]] cpu->R[i] = val; + u32 oldval = cpu->R[i]; + dabort |= !(first ? cpu->DataRead32 (base, i) + : cpu->DataRead32S(base, i)); + if (dabort) [[unlikely]] { cpu->R[i] = oldval; cpu->LDRFailedRegs |= (1<CurInstr & (1<<15)) { if (preinc) base += 4; - dabort |= !(first ? cpu->DataRead32 (base, &pc) - : cpu->DataRead32S(base, &pc)); + u32 oldval = cpu->R[15]; + dabort |= !(first ? cpu->DataRead32 (base, 15) + : cpu->DataRead32S(base, 15)); + if (dabort) [[unlikely]] { cpu->R[15] = oldval; cpu->LDRFailedRegs |= (1<<15); } + else if (cpu->Num == 1) + cpu->R[15] &= ~0x1; if (!preinc) base += 4; - - if (cpu->Num == 1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) - pc &= ~0x1; } if (__builtin_popcount(cpu->CurInstr & 0xFFFF) == 1) [[unlikely]] // single reg { - if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; + //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; cpu->AddCycles_CDI(); - if (cpu->Num == 0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages + if (cpu->Num == 0) ;//cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages else; // CHECKME: ARM7 timing behavior? } else { - if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; + //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; cpu->AddCycles_CDI(); } @@ -675,7 +690,17 @@ void A_LDM(ARM* cpu) if (dabort) [[unlikely]] { if ((cpu->CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) - cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); + { + if (cpu->Num == 0) + { + cpu->QueueMode[0] = (cpu->CPSR&~0x1F)|0x10; + cpu->QueueMode[1] = cpu->CPSR; + + if (cpu->MRTrack.Type != MainRAMType::Null) ((ARMv5*)cpu)->FuncQueue[cpu->FuncQueueFill++] = &ARMv5::QueueUpdateMode; + else ((ARMv5*)cpu)->QueueUpdateMode(); + } + else cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); + } ((ARMv5*)cpu)->DataAbort(); return; @@ -694,7 +719,7 @@ void A_LDM(ARM* cpu) { u32 rlist = cpu->CurInstr & 0xFFFF; if ((!(rlist & ~(1 << baseid))) || (rlist & ~((2 << baseid) - 1))) - cpu->R[baseid] = wbbase; + { cpu->R[baseid] = wbbase; cpu->LDRFailedRegs = 1<CurInstr & (1<<22)) && !(cpu->CurInstr & (1<<15))) - cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); + { + if (cpu->Num == 0) + { + cpu->QueueMode[0] = (cpu->CPSR&~0x1F)|0x10; + cpu->QueueMode[1] = cpu->CPSR; + + if (cpu->MRTrack.Type != MainRAMType::Null) ((ARMv5*)cpu)->FuncQueue[cpu->FuncQueueFill++] = &ARMv5::QueueUpdateMode; + else ((ARMv5*)cpu)->QueueUpdateMode(); + } + else cpu->UpdateMode((cpu->CPSR&~0x1F)|0x10, cpu->CPSR, true); + } // jump if pc got written if (cpu->CurInstr & (1<<15)) { - if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // force an interlock - cpu->JumpTo(pc, cpu->CurInstr & (1<<22)); + //if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // force an interlock + cpu->JumpTo(cpu->R[15], cpu->CurInstr & (1<<22), 1); } else if (cpu->Num == 0) { @@ -785,8 +820,8 @@ void A_STM(ARM* cpu) if (i == 15) val+=4; - dabort |= !(first ? cpu->DataWrite32 (base, val) - : cpu->DataWrite32S(base, val)); + dabort |= !(first ? cpu->DataWrite32 (base, val, i) + : cpu->DataWrite32S(base, val, i)); first = false; @@ -799,14 +834,14 @@ void A_STM(ARM* cpu) if (__builtin_popcount(cpu->CurInstr & 0xFFFF) == 1) [[unlikely]] // single reg { - if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; + //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; cpu->AddCycles_CD(); - if (cpu->Num == 0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages + if (cpu->Num == 0);// cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages else; // CHECKME: ARM7 timing behavior? } else { - if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; + //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; cpu->AddCycles_CD(); } @@ -833,8 +868,9 @@ void A_STM(ARM* cpu) void T_LDR_PCREL(ARM* cpu) { ExecuteStage(cpu, 15); + cpu->LDRFailedRegs = 0; u32 addr = (cpu->R[15] & ~0x2) + ((cpu->CurInstr & 0xFF) << 2); - bool dabort = !cpu->DataRead32(addr, &cpu->R[(cpu->CurInstr >> 8) & 0x7]); + bool dabort = !cpu->DataRead32(addr, (cpu->CurInstr >> 8) & 0x7); cpu->AddCycles_CDI(); if (dabort) [[unlikely]] ((ARMv5*)cpu)->DataAbort(); @@ -968,9 +1004,8 @@ void T_PUSH(ARM* cpu) { if (cpu->CurInstr & (1<DataWrite32 (base, cpu->R[i]) - : cpu->DataWrite32S(base, cpu->R[i])); - + dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[i], i) + : cpu->DataWrite32S(base, cpu->R[i], i)); first = false; base += 4; } @@ -978,20 +1013,20 @@ void T_PUSH(ARM* cpu) if (cpu->CurInstr & (1<<8)) { - dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[14]) - : cpu->DataWrite32S(base, cpu->R[14])); + dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[14], 14) + : cpu->DataWrite32S(base, cpu->R[14], 14)); } if (__builtin_popcount(cpu->CurInstr & 0x1FF) == 1) [[unlikely]] // single reg { - if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; + //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; cpu->AddCycles_CD(); - if (cpu->Num == 0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages + if (cpu->Num == 0);// cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages else; // CHECKME: ARM7 timing behavior? } else { - if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; + //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; cpu->AddCycles_CD(); } @@ -1007,6 +1042,7 @@ void T_PUSH(ARM* cpu) void T_POP(ARM* cpu) { ExecuteStage(cpu, 13); + cpu->LDRFailedRegs = 0; u32 base = cpu->R[13]; bool first = true; bool dabort = false; @@ -1021,11 +1057,10 @@ void T_POP(ARM* cpu) { if (cpu->CurInstr & (1<DataRead32 (base, &val) - : cpu->DataRead32S(base, &val)); - - if (!dabort) [[likely]] cpu->R[i] = val; + u32 oldval = cpu->R[i]; + dabort |= !(first ? cpu->DataRead32 (base, i) + : cpu->DataRead32S(base, i)); + if (dabort) [[unlikely]] { cpu->R[i] = oldval; cpu->LDRFailedRegs |= (1<CurInstr & (1<<8)) { - u32 pc; - dabort |= !(first ? cpu->DataRead32 (base, &pc) - : cpu->DataRead32S(base, &pc)); + u32 oldval = cpu->R[15]; + dabort |= !(first ? cpu->DataRead32 (base, 15) + : cpu->DataRead32S(base, 15)); if (__builtin_popcount(cpu->CurInstr & 0x1FF) == 1) [[unlikely]] // single reg { - if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; + //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; cpu->AddCycles_CDI(); - if (cpu->Num == 0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages + if (cpu->Num == 0);// cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages else; // CHECKME: ARM7 timing behavior? } else { - if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; + //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; cpu->AddCycles_CDI(); } if (!dabort) [[likely]] { - if (cpu->Num==1 || (((ARMv5*)cpu)->CP15Control & (1<<15))) pc |= 0x1; - if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // force an interlock + if (cpu->Num==1) cpu->R[15] |= 0x1; + //if (cpu->Num==0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // force an interlock - cpu->JumpTo(pc); + cpu->JumpTo(cpu->R[15], false, 2); base += 4; } else [[unlikely]] { + cpu->R[15] = oldval; + cpu->LDRFailedRegs |= (1<<15); ((ARMv5*)cpu)->DataAbort(); return; } @@ -1069,14 +1106,14 @@ void T_POP(ARM* cpu) { if (__builtin_popcount(cpu->CurInstr & 0x1FF) == 1) [[unlikely]] // single reg { - if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; + //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; cpu->AddCycles_CDI(); - if (cpu->Num == 0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages + if (cpu->Num == 0);// cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages else; // CHECKME: ARM7 timing behavior? } else { - if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; + //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; cpu->AddCycles_CDI(); } @@ -1122,9 +1159,8 @@ void T_STMIA(ARM* cpu) { if (cpu->CurInstr & (1<DataWrite32 (base, cpu->R[i]) - : cpu->DataWrite32S(base, cpu->R[i])); - + dabort |= !(first ? cpu->DataWrite32 (base, cpu->R[i], i) + : cpu->DataWrite32S(base, cpu->R[i], i)); first = false; base += 4; } @@ -1132,14 +1168,14 @@ void T_STMIA(ARM* cpu) if (__builtin_popcount(cpu->CurInstr & 0xFF) == 1) [[unlikely]] // single reg { - if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; + //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; cpu->AddCycles_CD(); - if (cpu->Num == 0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages + if (cpu->Num == 0);// cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages else; // CHECKME: ARM7 timing behavior? } else { - if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; + //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; cpu->AddCycles_CD(); } @@ -1170,11 +1206,11 @@ void T_LDMIA(ARM* cpu) { if (cpu->CurInstr & (1<DataRead32 (base, &val) - : cpu->DataRead32S(base, &val)); + u32 oldval = cpu->R[i]; + dabort |= !(first ? cpu->DataRead32 (base, i) + : cpu->DataRead32S(base, i)); + if (dabort) [[unlikely]] { cpu->R[i] = oldval; cpu->LDRFailedRegs |= (1<R[i] = val; first = false; base += 4; } @@ -1182,14 +1218,14 @@ void T_LDMIA(ARM* cpu) if (__builtin_popcount(cpu->CurInstr & 0xFF) == 1) [[unlikely]] // single reg { - if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; + //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 1; cpu->AddCycles_CDI(); - if (cpu->Num == 0) cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages + if (cpu->Num == 0);// cpu->NDS.ARM9Timestamp = ((ARMv5*)cpu)->TimestampActual; // on arm9 single reg ldm/stm cannot overlap memory and fetch stages else; // CHECKME: ARM7 timing behavior? } else { - if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; + //if (cpu->Num == 0 && cpu->DataRegion == Mem9_ITCM) cpu->NDS.ARM9Timestamp += 2; cpu->AddCycles_CDI(); } diff --git a/src/ARMJIT.cpp b/src/ARMJIT.cpp index 8bf509e9..498f84c3 100644 --- a/src/ARMJIT.cpp +++ b/src/ARMJIT.cpp @@ -646,17 +646,17 @@ void ARMJIT::CompileBlock(ARM* cpu) noexcept } else { - nextInstr[1] = cpuv5->CodeRead32(r15, false); + //nextInstr[1] = cpuv5->CodeRead32(r15, false); instrs[i].CodeCycles = cpu->CodeCycles; } } else { ARMv4* cpuv4 = (ARMv4*)cpu; - if (thumb) - nextInstr[1] = cpuv4->CodeRead16(r15); - else - nextInstr[1] = cpuv4->CodeRead32(r15); + if (thumb); + //nextInstr[1] = cpuv4->CodeRead16(r15); + else; + // nextInstr[1] = cpuv4->CodeRead32(r15); instrs[i].CodeCycles = cpu->CodeCycles; } instrs[i].Info = ARMInstrInfo::Decode(thumb, cpu->Num, instrs[i].Instr, LiteralOptimizations); @@ -724,7 +724,7 @@ void ARMJIT::CompileBlock(ARM* cpu) noexcept addressRanges[numAddressRanges++] = translatedAddrRounded; addressMasks[j] |= 1 << ((translatedAddr & 0x1FF) / 16); JIT_DEBUGPRINT("literal loading %08x %08x %08x %08x\n", literalAddr, translatedAddr, addressMasks[j], addressRanges[j]); - cpu->DataRead32(literalAddr, &literalValues[numLiterals]); + //cpu->DataRead32(literalAddr, &literalValues[numLiterals]); literalLoadAddrs[numLiterals++] = translatedAddr; } } diff --git a/src/ARMJIT_A64/ARMJIT_Branch.cpp b/src/ARMJIT_A64/ARMJIT_Branch.cpp index f9c2e0c5..fc08c661 100644 --- a/src/ARMJIT_A64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_A64/ARMJIT_Branch.cpp @@ -83,14 +83,14 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) // doesn't matter if we put garbage in the MSbs there if (addr & 0x2) { - cpu9->CodeRead32(addr-2, true) >> 16; + //cpu9->CodeRead32(addr-2, true) >> 16; cycles += cpu9->CodeCycles; - cpu9->CodeRead32(addr+2, false); + //cpu9->CodeRead32(addr+2, false); cycles += CurCPU->CodeCycles; } else { - cpu9->CodeRead32(addr, true); + //cpu9->CodeRead32(addr, true); cycles += cpu9->CodeCycles; } } @@ -99,9 +99,9 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) addr &= ~0x3; newPC = addr+4; - cpu9->CodeRead32(addr, true); + //cpu9->CodeRead32(addr, true); cycles += cpu9->CodeCycles; - cpu9->CodeRead32(addr+4, false); + //cpu9->CodeRead32(addr+4, false); cycles += cpu9->CodeCycles; } diff --git a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp index 6d2c4276..37d6c332 100644 --- a/src/ARMJIT_A64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_A64/ARMJIT_LoadStore.cpp @@ -79,18 +79,18 @@ bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr) CurCPU->R[15] = R15; if (size == 32) { - CurCPU->DataRead32(addr & ~0x3, &val); + //CurCPU->DataRead32(addr & ~0x3, &val); val = melonDS::ROR(val, (addr & 0x3) << 3); } else if (size == 16) { - CurCPU->DataRead16(addr & ~0x1, &val); + //CurCPU->DataRead16(addr & ~0x1, &val); if (signExtend) val = ((s32)val << 16) >> 16; } else { - CurCPU->DataRead8(addr, &val); + // CurCPU->DataRead8(addr, &val); if (signExtend) val = ((s32)val << 24) >> 24; } diff --git a/src/ARMJIT_x64/ARMJIT_Branch.cpp b/src/ARMJIT_x64/ARMJIT_Branch.cpp index c32e2b73..0b5317d0 100644 --- a/src/ARMJIT_x64/ARMJIT_Branch.cpp +++ b/src/ARMJIT_x64/ARMJIT_Branch.cpp @@ -72,14 +72,14 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) // doesn't matter if we put garbage in the MSbs there if (addr & 0x2) { - cpu9->CodeRead32(addr-2, true); + //cpu9->CodeRead32(addr-2, true); cycles += cpu9->CodeCycles; - cpu9->CodeRead32(addr+2, false); + //cpu9->CodeRead32(addr+2, false); cycles += CurCPU->CodeCycles; } else { - cpu9->CodeRead32(addr, true); + //cpu9->CodeRead32(addr, true); cycles += cpu9->CodeCycles; } } @@ -88,9 +88,9 @@ void Compiler::Comp_JumpTo(u32 addr, bool forceNonConstantCycles) addr &= ~0x3; newPC = addr+4; - cpu9->CodeRead32(addr, true); + //cpu9->CodeRead32(addr, true); cycles += cpu9->CodeCycles; - cpu9->CodeRead32(addr+4, false); + //cpu9->CodeRead32(addr+4, false); cycles += cpu9->CodeCycles; } diff --git a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp index 219c7271..fd1fe5af 100644 --- a/src/ARMJIT_x64/ARMJIT_LoadStore.cpp +++ b/src/ARMJIT_x64/ARMJIT_LoadStore.cpp @@ -85,18 +85,18 @@ bool Compiler::Comp_MemLoadLiteral(int size, bool signExtend, int rd, u32 addr) CurCPU->R[15] = R15; if (size == 32) { - CurCPU->DataRead32(addr & ~0x3, &val); + //CurCPU->DataRead32(addr & ~0x3, &val); val = melonDS::ROR(val, (addr & 0x3) << 3); } else if (size == 16) { - CurCPU->DataRead16(addr & ~0x1, &val); + //CurCPU->DataRead16(addr & ~0x1, &val); if (signExtend) val = ((s32)val << 16) >> 16; } else { - CurCPU->DataRead8(addr, &val); + //CurCPU->DataRead8(addr, &val); if (signExtend) val = ((s32)val << 24) >> 24; } diff --git a/src/CP15.cpp b/src/CP15.cpp index 92b41e25..cf5617c3 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -367,7 +367,7 @@ u32 ARMv5::RandomLineIndex() return (RNGSeed >> 17) & 0x3; } -u32 ARMv5::ICacheLookup(const u32 addr) +bool ARMv5::ICacheLookup(const u32 addr) { const u32 tag = (addr & ~(ICACHE_LINELENGTH - 1)); const u32 id = ((addr >> ICACHE_LINELENGTH_LOG2) & (ICACHE_LINESPERSET-1)) << ICACHE_SETS_LOG2; @@ -414,35 +414,36 @@ u32 ARMv5::ICacheLookup(const u32 addr) { u32 *cacheLine = (u32 *)&ICache[(id+set) << ICACHE_LINELENGTH_LOG2]; - if (ICacheFillPtr >= 7) + if (ICacheStreamPtr >= 7) { if (NDS.ARM9Timestamp < ITCMTimestamp) NDS.ARM9Timestamp = ITCMTimestamp; // does this apply to streamed fetches? NDS.ARM9Timestamp++; } else { - u64 nextfill = ICacheFillTimes[ICacheFillPtr++]; + u64 nextfill = ICacheStreamTimes[ICacheStreamPtr++]; if (NDS.ARM9Timestamp < nextfill) { NDS.ARM9Timestamp = nextfill; } else { - u64 fillend = ICacheFillTimes[6] + 2; + u64 fillend = ICacheStreamTimes[6] + 2; if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; else // checkme { if (NDS.ARM9Timestamp < ITCMTimestamp) NDS.ARM9Timestamp = ITCMTimestamp; NDS.ARM9Timestamp++; } - ICacheFillPtr = 7; + ICacheStreamPtr = 7; } } - if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; DataRegion = Mem9_Null; Store = false; - return cacheLine[(addr & (ICACHE_LINELENGTH -1)) >> 2]; + + RetVal = cacheLine[(addr & (ICACHE_LINELENGTH -1)) / 4]; + return true; } } @@ -451,35 +452,8 @@ u32 ARMv5::ICacheLookup(const u32 addr) // We do not fill the cacheline if it is disabled in the // BIST test State register (See arm946e-s Rev 1 technical manual, 2.3.15 "Register 15, test State Register") if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_ICACHE_LINEFILL) [[unlikely]] - { - u8 cycles = MemTimings[addr >> 14][1]; + return false; - WriteBufferDrain(); - - NDS.ARM9Timestamp = NDS.ARM9Timestamp + ((1<> 24) == 0x02) - { - if (NDS.ARM9Timestamp < MainRAMTimestamp) NDS.ARM9Timestamp = MainRAMTimestamp + ((1<>14] == DataRegion && Store) NDS.ARM9Timestamp += (1<> 2] = NDS.ARM9Read32(tag+i); - } - ICacheTags[line] = tag | (line & (ICACHE_SETS-1)) | CACHE_FLAG_VALID; // timing logic NDS.ARM9Timestamp = NDS.ARM9Timestamp + ((1<> 24) == 0x02) + if (NDS.ARM9Regions[addr>>14] == Mem9_MainRAM) { - if (NDS.ARM9Timestamp < MainRAMTimestamp) NDS.ARM9Timestamp = MainRAMTimestamp + ((1<>14] == WBLastRegion)) // check write buffer - || (Store && (NDS.ARM9Regions[addr>>14] == DataRegion))) //check the actual store - NDS.ARM9Timestamp += 1<> 14][1] + (MemTimings[tag >> 14][2] * ((DCACHE_LINELENGTH / 4) - 1)); - if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; // this should never trigger in practice - } - else // ICache Streaming logic - { - u8 ns = MemTimings[addr>>14][1]; - u8 seq = MemTimings[addr>>14][2]; + for (int i = 0; i < ICACHE_LINELENGTH; i+=sizeof(u32)) + ptr[i/4] = NDS.ARM9Read32(tag+i); - u8 linepos = (addr & 0x1F) >> 2; // technically this is one too low, but we want that actually + if (((NDS.ARM9Timestamp <= WBReleaseTS) && (NDS.ARM9Regions[addr>>14] == WBLastRegion)) // check write buffer + || (Store && (NDS.ARM9Regions[addr>>14] == DataRegion))) //check the actual store + NDS.ARM9Timestamp += 1<> 14][1] + (MemTimings[tag >> 14][2] * ((DCACHE_LINELENGTH / 4) - 1)); + if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; // this should never trigger in practice } + else // ICache Streaming logic + { + u8 ns = MemTimings[addr>>14][1]; + u8 seq = MemTimings[addr>>14][2]; + + u8 linepos = (addr & 0x1F) / 4; // technically this is one too low, but we want that actually - if ((addr >> 24) == 0x02) MainRAMTimestamp = ((linepos < 7) ? ICacheFillTimes[6] : NDS.ARM9Timestamp); + u64 cycles = ns + (seq * linepos); + NDS.ARM9Timestamp = cycles += NDS.ARM9Timestamp; + + ICacheStreamPtr = linepos; + for (int i = linepos; i < 7; i++) + { + cycles += seq; + ICacheStreamTimes[i] = cycles; + } + } + RetVal = ptr[(addr & (ICACHE_LINELENGTH-1)) / 4]; } - + Store = false; DataRegion = Mem9_Null; - return ptr[(addr & (ICACHE_LINELENGTH-1)) >> 2]; + return true; } void ARMv5::ICacheInvalidateByAddr(const u32 addr) @@ -659,23 +634,23 @@ u32 ARMv5::DCacheLookup(const u32 addr) { u32 *cacheLine = (u32 *)&DCache[(id+set) << DCACHE_LINELENGTH_LOG2]; - if (DCacheFillPtr >= 7) + if (DCacheStreamPtr >= 7) { DataCycles = 1; } else { - u64 nextfill = DCacheFillTimes[DCacheFillPtr++]; + u64 nextfill = DCacheStreamTimes[DCacheStreamPtr++]; //if (NDS.ARM9Timestamp < nextfill) // can this ever really fail? { DataCycles = nextfill - NDS.ARM9Timestamp; } /*else { - u64 fillend = DCacheFillTimes[6] + 2; + u64 fillend = DCacheStreamTimes[6] + 2; if (NDS.ARM9Timestamp < fillend) DataCycles = fillend - NDS.ARM9Timestamp; else DataCycles = 1; - DCacheFillPtr = 7; + DCacheStreamPtr = 7; }*/ } DataRegion = Mem9_DCache; @@ -683,6 +658,14 @@ u32 ARMv5::DCacheLookup(const u32 addr) return cacheLine[(addr & (DCACHE_LINELENGTH -1)) >> 2]; } } + + // bus reads can only overlap with icache streaming by 6 cycles + // checkme: does cache trigger this? + if (ICacheStreamPtr < 7) + { + u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6? + if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; + } // cache miss miss: @@ -690,14 +673,6 @@ u32 ARMv5::DCacheLookup(const u32 addr) // BIST test State register (See arm946e-s Rev 1 technical manual, 2.3.15 "Register 15, test State Register") if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_DCACHE_LINEFILL) [[unlikely]] { - // bus reads can only overlap with icache streaming by 6 cycles - // checkme: does cache trigger this? - if (ICacheFillPtr < 7) - { - u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6? - if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; - } - WriteBufferDrain(); NDS.ARM9Timestamp = (NDS.ARM9Timestamp + ((1<> 24) == 0x02) MainRAMTimestamp = ((linepos < 7) ? ICacheFillTimes[6] : NDS.ARM9Timestamp); + if ((addr >> 24) == 0x02) MainRAMTimestamp = ((linepos < 7) ? ICacheStreamTimes[6] : NDS.ARM9Timestamp); } return ptr[(addr & (DCACHE_LINELENGTH-1)) >> 2]; } @@ -871,7 +846,7 @@ bool ARMv5::DCacheWrite32(const u32 addr, const u32 val) DataCycles = 1; DataRegion = Mem9_DCache; #if !DISABLE_CACHEWRITEBACK - if (PU_Map[addr >> CP15_MAP_ENTRYSIZE_LOG2] & CP15_MAP_DCACHEWRITEBACK) + if (PU_Map[addr >> CP15_MAP_ENTRYSIZE_LOG2] & CP15_MAP_BUFFERABLE) { if (addr & (DCACHE_LINELENGTH / 2)) { @@ -939,7 +914,7 @@ bool ARMv5::DCacheWrite16(const u32 addr, const u16 val) DataCycles = 1; DataRegion = Mem9_DCache; #if !DISABLE_CACHEWRITEBACK - if (PU_Map[addr >> CP15_MAP_ENTRYSIZE_LOG2] & CP15_MAP_DCACHEWRITEBACK) + if (PU_Map[addr >> CP15_MAP_ENTRYSIZE_LOG2] & CP15_MAP_BUFFERABLE) { if (addr & (DCACHE_LINELENGTH / 2)) { @@ -1008,7 +983,7 @@ bool ARMv5::DCacheWrite8(const u32 addr, const u8 val) DataCycles = 1; DataRegion = Mem9_DCache; #if !DISABLE_CACHEWRITEBACK - if (PU_Map[addr >> CP15_MAP_ENTRYSIZE_LOG2] & CP15_MAP_DCACHEWRITEBACK) + if (PU_Map[addr >> CP15_MAP_ENTRYSIZE_LOG2] & CP15_MAP_BUFFERABLE) { if (addr & (DCACHE_LINELENGTH / 2)) { @@ -1736,7 +1711,8 @@ void ARMv5::CP15Write(u32 id, u32 val) // we force a fill by looking up the value from cache // if it wasn't cached yet, it will be loaded into cache // low bits are set to 0x1C to trick cache streaming - ICacheLookup((val & ~0x03) | 0x1C); + printf("PREFETCH ICACHE\n"); + //ICacheLookup((val & ~0x03) | 0x1C); TODO: REIMPLEMENT WITH DEFERENCE return; case 0x7E0: @@ -2071,17 +2047,18 @@ u32 ARMv5::CP15Read(const u32 id) const // TCM are handled here. // TODO: later on, handle PU -u64 ARMv5::CodeRead32(u32 addr, bool branch) +void ARMv5::CodeRead32(u32 addr) { // prefetch abort // the actual exception is not raised until the aborted instruction is executed - if (!(PU_Map[addr>>12] & 0x04)) [[unlikely]] + if (!(PU_Map[addr>>12] & CP15_MAP_EXECUTABLE)) [[unlikely]] { NDS.ARM9Timestamp += 1; if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; DataRegion = Mem9_Null; Store = false; - return ((u64)1<<63); + RetVal = ((u64)1<<63); + return; } if (addr < ITCMSize) @@ -2091,7 +2068,8 @@ u64 ARMv5::CodeRead32(u32 addr, bool branch) if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; DataRegion = Mem9_Null; Store = false; - return *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)]; + RetVal = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)]; + return; } #if !DISABLE_ICACHE @@ -2101,15 +2079,15 @@ u64 ARMv5::CodeRead32(u32 addr, bool branch) { if (IsAddressICachable(addr)) { - return ICacheLookup(addr); + if (ICacheLookup(addr)) return; } #endif } // bus reads can only overlap with dcache streaming by 6 cycles - if (DCacheFillPtr < 7) + if (DCacheStreamPtr < 7) { - u64 time = DCacheFillTimes[6] - 6; // checkme: minus 6? + u64 time = DCacheStreamTimes[6] - 6; // checkme: minus 6? if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; } @@ -2148,41 +2126,84 @@ u64 ARMv5::CodeRead32(u32 addr, bool branch) if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; DataRegion = Mem9_Null; - return BusRead32(addr); + RetVal = BusRead32(addr); + return; } -bool ARMv5::DataRead8(u32 addr, u32* val) +void ARMv5::DAbortHandle() { - if (DCacheFillPtr < 7) + if (DCacheStreamPtr < 7) { - u64 fillend = DCacheFillTimes[6] + 1; + u64 fillend = DCacheStreamTimes[6] + 1; if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; // checkme: should this be data cycles? - DCacheFillPtr = 7; + DCacheStreamPtr = 7; } + + DataCycles = 1; +} +void ARMv5::DAbortHandleS() +{ + NDS.ARM9Timestamp += DataCycles; + + if (DCacheStreamPtr < 7) + { + u64 fillend = DCacheStreamTimes[6] + 1; + if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; // checkme: should this be data cycles? + DCacheStreamPtr = 7; + } + + DataCycles = 1; +} + +bool ARMv5::DataRead8(u32 addr, u8 reg) +{ // Data Aborts // Exception is handled in the actual instruction implementation - if (!(PU_Map[addr>>12] & 0x01)) [[unlikely]] + if (!(PU_Map[addr>>12] & CP15_MAP_READABLE)) [[unlikely]] { - DataCycles = 1; + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::DAbortHandle; + else DAbortHandle(); return false; } + FetchAddr[reg] = addr; + LDRRegs = 1<> (8 * (addr & 3))) & 0xff; - return true; + return; } } #endif // bus reads can only overlap with icache streaming by 6 cycles // checkme: does dcache trigger this? - if (ICacheFillPtr < 7) + if (ICacheStreamPtr < 7) { - u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6? + u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6? if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; } @@ -2232,24 +2253,39 @@ bool ARMv5::DataRead8(u32 addr, u32* val) if (WBTimestamp < ((NDS.ARM9Timestamp + DataCycles - (3<>12] & CP15_MAP_READABLE)) [[unlikely]] + { + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::DAbortHandle; + else DAbortHandle(); + return false; + } + + FetchAddr[reg] = addr; + LDRRegs = 1<>12] & 0x01)) [[unlikely]] + if (DCacheStreamPtr < 7) { - DataCycles = 1; - return false; + u64 fillend = DCacheStreamTimes[6] + 1; + if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; // checkme: should this be data cycles? + DCacheStreamPtr = 7; } addr &= ~1; @@ -2260,14 +2296,14 @@ bool ARMv5::DataRead16(u32 addr, u32* val) ITCMTimestamp = NDS.ARM9Timestamp + DataCycles; DataRegion = Mem9_ITCM; *val = *(u16*)&ITCM[addr & (ITCMPhysicalSize - 1)]; - return true; + return; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; DataRegion = Mem9_DTCM; *val = *(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)]; - return true; + return; } #if !DISABLE_DCACHE @@ -2278,16 +2314,16 @@ bool ARMv5::DataRead16(u32 addr, u32* val) if (IsAddressDCachable(addr)) { *val = (DCacheLookup(addr) >> (8* (addr & 2))) & 0xffff; - return true; + return; } } #endif // bus reads can only overlap with icache streaming by 6 cycles // checkme: does cache trigger this? - if (ICacheFillPtr < 7) + if (ICacheStreamPtr < 7) { - u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6? + u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6? if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; } @@ -2317,24 +2353,39 @@ bool ARMv5::DataRead16(u32 addr, u32* val) if (WBTimestamp < ((NDS.ARM9Timestamp + DataCycles - (3<>12] & CP15_MAP_READABLE)) [[unlikely]] + { + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::DAbortHandle; + else DAbortHandle(); + return false; + } + + FetchAddr[reg] = addr; + LDRRegs = 1<>12] & 0x01)) [[unlikely]] + if (DCacheStreamPtr < 7) { - DataCycles = 1; - return false; + u64 fillend = DCacheStreamTimes[6] + 1; + if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; // checkme: should this be data cycles? + DCacheStreamPtr = 7; } addr &= ~3; @@ -2345,14 +2396,16 @@ bool ARMv5::DataRead32(u32 addr, u32* val) ITCMTimestamp = NDS.ARM9Timestamp + DataCycles; DataRegion = Mem9_ITCM; *val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)]; - return true; + LDRRegs &= ~1<>12] & CP15_MAP_READABLE)) [[unlikely]] + { + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill] = &ARMv5::DAbortHandleS; + else DAbortHandleS(); + return false; + } + + FetchAddr[reg] = addr; + LDRRegs |= 1<>12] & 0x01)) [[unlikely]] - { - DataCycles = 1; - return false; - } + NDS.ARM9Timestamp += DataCycles; addr &= ~3; @@ -2425,14 +2495,16 @@ bool ARMv5::DataRead32S(u32 addr, u32* val) // we update the timestamp during the actual function, as a sequential itcm access can only occur during instructions with strange itcm wait cycles DataRegion = Mem9_ITCM; *val = *(u32*)&ITCM[addr & (ITCMPhysicalSize - 1)]; - return true; + LDRRegs &= ~1<>12] & CP15_MAP_WRITEABLE)) [[unlikely]] + { + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::DAbortHandle; + else DAbortHandle(); + return false; + } + + FetchAddr[reg] = addr; + STRRegs = 1<>12] & 0x02)) [[unlikely]] + if (DCacheStreamPtr < 7) { - DataCycles = 1; - return false; + u64 fillend = DCacheStreamTimes[6] + 1; + if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; // checkme: should this be data cycles? + DCacheStreamPtr = 7; } if (addr < ITCMSize) @@ -2532,14 +2621,14 @@ bool ARMv5::DataWrite8(u32 addr, u8 val) #ifdef JIT_ENABLED NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); #endif - return true; + return; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; DataRegion = Mem9_DTCM; *(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; - return true; + return; } #if !DISABLE_DCACHE @@ -2550,16 +2639,16 @@ bool ARMv5::DataWrite8(u32 addr, u8 val) if (IsAddressDCachable(addr)) { if (DCacheWrite8(addr, val)) - return true; + return; } } #endif // bus reads can only overlap with icache streaming by 6 cycles // checkme: does cache trigger this? - if (ICacheFillPtr < 7) + if (ICacheStreamPtr < 7) { - u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6? + u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6? if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; } @@ -2594,24 +2683,39 @@ bool ARMv5::DataWrite8(u32 addr, u8 val) DataCycles = 1; WBDelay = NDS.ARM9Timestamp + 2; } +} + +bool ARMv5::DataWrite16(u32 addr, u16 val, u8 reg) +{ + // Data Aborts + // Exception is handled in the actual instruction implementation + if (!(PU_Map[addr>>12] & CP15_MAP_WRITEABLE)) [[unlikely]] + { + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::DAbortHandle; + else DAbortHandle(); + return false; + } + + FetchAddr[reg] = addr; + STRRegs = 1<>12] & 0x02)) [[unlikely]] + if (DCacheStreamPtr < 7) { - DataCycles = 1; - return false; + u64 fillend = DCacheStreamTimes[6] + 1; + if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; // checkme: should this be data cycles? + DCacheStreamPtr = 7; } addr &= ~1; @@ -2625,14 +2729,14 @@ bool ARMv5::DataWrite16(u32 addr, u16 val) #ifdef JIT_ENABLED NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); #endif - return true; + return; } if ((addr & DTCMMask) == DTCMBase) { DataCycles = 1; DataRegion = Mem9_DTCM; *(u16*)&DTCM[addr & (DTCMPhysicalSize - 1)] = val; - return true; + return; } #if !DISABLE_DCACHE @@ -2643,16 +2747,16 @@ bool ARMv5::DataWrite16(u32 addr, u16 val) if (IsAddressDCachable(addr)) { if (DCacheWrite16(addr, val)) - return true; + return; } } #endif // bus reads can only overlap with icache streaming by 6 cycles // checkme: does cache trigger this? - if (ICacheFillPtr < 7) + if (ICacheStreamPtr < 7) { - u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6? + u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6? if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; } @@ -2687,24 +2791,39 @@ bool ARMv5::DataWrite16(u32 addr, u16 val) DataCycles = 1; WBDelay = NDS.ARM9Timestamp + 2; } +} + +bool ARMv5::DataWrite32(u32 addr, u32 val, u8 reg) +{ + // Data Aborts + // Exception is handled in the actual instruction implementation + if (!(PU_Map[addr>>12] & CP15_MAP_WRITEABLE)) [[unlikely]] + { + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::DAbortHandle; + else DAbortHandle(); + return false; + } + + FetchAddr[reg] = addr; + STRRegs = 1<>12] & 0x02)) [[unlikely]] + if (DCacheStreamPtr < 7) { - DataCycles = 1; - return false; + u64 fillend = DCacheStreamTimes[6] + 1; + if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; // checkme: should this be data cycles? + DCacheStreamPtr = 7; } addr &= ~3; @@ -2718,14 +2837,16 @@ bool ARMv5::DataWrite32(u32 addr, u32 val) #ifdef JIT_ENABLED NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); #endif - return true; + STRRegs &= ~1<>12] & CP15_MAP_WRITEABLE)) [[unlikely]] + { + if (MRTrack.Type != MainRAMType::Null) FuncQueue[FuncQueueFill++] = &ARMv5::DAbortHandleS; + else DAbortHandleS(); + return false; + } + + FetchAddr[reg] = addr; + STRRegs |= 1<>12] & 0x02)) [[unlikely]] - { - DataCycles = 1; - return false; - } + NDS.ARM9Timestamp += DataCycles; addr &= ~3; @@ -2806,14 +2946,16 @@ bool ARMv5::DataWrite32S(u32 addr, u32 val) #ifdef JIT_ENABLED NDS.JIT.CheckAndInvalidate<0, ARMJIT_Memory::memregion_ITCM>(addr); #endif - return true; + STRRegs &= ~1< 0) && A9WENTLAST) + { + MainRAMTimestamp += 2; + A9ContentionTS += 2; + } + else + { + MainRAMTimestamp = A9ContentionTS + 9; + A9ContentionTS += (ARM9ClockShift == 1) ? 9 : 8; + MainRAMLastAccess = A9LAST; + } + + if (*prog == ARM9.ICacheStreamPtr) ARM9Timestamp = (A9ContentionTS << ARM9ClockShift) - 1; + else if (*prog > ARM9.ICacheStreamPtr) ARM9.ICacheStreamTimes[*prog-1] = (A9ContentionTS << ARM9ClockShift) - 1; + + (*prog)++; + if (*prog >= 8) + { + ARM9.RetVal = icache[(ARM9.FetchAddr[16] & 0x1F) / 4]; + memset(&ARM9.MRTrack, 0, sizeof(ARM9.MRTrack)); + A9ContentionTS = 0; + } + break; + } + } +} + +void NDS::MainRAMHandle() +{ + if (!A9ContentionTS) + { + A9ContentionTS = (ARM9Timestamp + ((1<> ARM9ClockShift; + if ((ARM9.MRTrack.Type != MainRAMType::Null) && (A9ContentionTS < MainRAMTimestamp)) A9ContentionTS = MainRAMTimestamp; + } + + bool A7Priority = ExMemCnt[0] & 0x8000; + if (A7Priority) + { + while (true) + { + if (A9ContentionTS < ARM7Timestamp) + { + if (ARM9.MRTrack.Type == MainRAMType::Null) { A9ContentionTS = 0; return; } + MainRAMHandleARM9(); + } + else + { + if (true) return; + } + } + } + else + { + while (true) + { + if (A9ContentionTS <= ARM7Timestamp) + { + if (ARM9.MRTrack.Type == MainRAMType::Null) { A9ContentionTS = 0; return; } + MainRAMHandleARM9(); + } + else + { + if (true) return; + } + } + } +} + +#undef A9WENTLAST +#undef A7WENTLAST +#undef A9LAST +#undef A7LAST + template u32 NDS::RunFrame() { @@ -970,16 +1068,21 @@ u32 NDS::RunFrame() ts = ARM9Timestamp - ts; for (int i = 0; i < 7; i++) { - ARM9.ICacheFillTimes[i] += ts; - ARM9.DCacheFillTimes[i] += ts; + ARM9.ICacheStreamTimes[i] += ts; + ARM9.DCacheStreamTimes[i] += ts; } ARM9.WBTimestamp += ts; } - else + else if (ARM9.MRTrack.Type == MainRAMType::Null) { + if (ARM9.abt) ARM9Timestamp = ARM9Target; ARM9.Execute(); } + + //printf("MAIN LOOP: %lli %lli\n", ARM9Timestamp>>ARM9ClockShift, ARM7Timestamp); + + MainRAMHandle(); RunTimers(0); GPU.GPU3D.Run(); @@ -987,9 +1090,11 @@ u32 NDS::RunFrame() target = ARM9Timestamp >> ARM9ClockShift; CurCPU = 1; - while (ARM7Timestamp < target) + while ((ARM7Timestamp < target) || (ARM9.MRTrack.Type != MainRAMType::Null)) { - ARM7Target = target; // might be changed by a reschedule + ARM7Target = (ARM9.MRTrack.Type != MainRAMType::Null) ? (ARM7Timestamp+1) : target; // might be changed by a reschedule + + //printf("A7 LOOP: %lli %lli\n", ARM9Timestamp>>ARM9ClockShift, ARM7Timestamp); if (CPUStop & CPUStop_DMA7) { @@ -1008,6 +1113,8 @@ u32 NDS::RunFrame() ARM7.Execute(); } + MainRAMHandle(); + RunTimers(1); } diff --git a/src/NDS.h b/src/NDS.h index c1f0ff88..8afbdf2c 100644 --- a/src/NDS.h +++ b/src/NDS.h @@ -253,6 +253,8 @@ public: // TODO: Encapsulate the rest of these members // no need to worry about those overflowing, they can keep going for atleast 4350 years u64 ARM9Timestamp, ARM9Target; u64 ARM7Timestamp, ARM7Target; + u64 MainRAMTimestamp; + u64 A9ContentionTS; u32 ARM9ClockShift; u32 IME[2]; @@ -270,6 +272,8 @@ public: // TODO: Encapsulate the rest of these members alignas(u32) u8 ROMSeed0[2*8]; alignas(u32) u8 ROMSeed1[2*8]; + bool MainRAMLastAccess; // 0 == ARM9 | 1 == ARM7 + protected: // These BIOS arrays should be declared *before* the component objects (JIT, SPI, etc.) // so that they're initialized before the component objects' constructors run. @@ -394,6 +398,9 @@ public: // TODO: Encapsulate the rest of these members void LoadGBAAddon(int type); std::unique_ptr EjectGBACart() { return GBACartSlot.EjectCart(); } + void MainRAMHandleARM9(); + void MainRAMHandle(); + u32 RunFrame(); bool IsRunning() const noexcept { return Running; }