From 22f1b4d90cb1e17c52b1bd10ab94d241f6642add Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 20 Dec 2024 20:10:23 -0500 Subject: [PATCH] theoretically improve dma responsiveness? in practice seems to make no difference... --- src/ARM.cpp | 16 +++++ src/ARM.h | 27 ++++---- src/CP15.cpp | 169 ++++++++++++++++++++++++++++++++---------------- src/NDS.cpp | 11 ++-- src/NDSCart.cpp | 24 +++---- src/NDSCart.h | 4 +- 6 files changed, 161 insertions(+), 90 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index d4e49723..e6e7ecc4 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -1447,6 +1447,22 @@ void ARMv5::ForceInterlock_2() NDS.ARM9Timestamp = TimestampMemory + ILForceDelay; } +void ARMv5::QueueFunction(void (ARMv5::*QueueEntry)(void)) +{ + if ((NDS.ARM9Timestamp >= NDS.ARM9Target) || (MRTrack.Type != MainRAMType::Null)) + FuncQueue[FuncQueueFill++] = QueueEntry; + else + (this->*QueueEntry)(); +} + +void ARMv4::QueueFunction(void (ARMv4::*QueueEntry)(void)) +{ + if ((NDS.ARM7Timestamp >= NDS.ARM7Target) || (MRTrack.Type != MainRAMType::Null)) + FuncQueue[FuncQueueFill++] = QueueEntry; + else + (this->*QueueEntry)(); +} + void ARMv4::CodeRead16(u32 addr) { if ((addr >> 24) == 0x02) diff --git a/src/ARM.h b/src/ARM.h index 21c06813..a7092e4c 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -723,14 +723,8 @@ public: * @return Value of the cp15 register */ u32 CP15Read(const u32 id) const; - - inline void QueueFunction(void (ARMv5::*QueueEntry)(void)) - { - if (MRTrack.Type != MainRAMType::Null) - FuncQueue[FuncQueueFill++] = QueueEntry; - else - (this->*QueueEntry)(); - } + + void QueueFunction(void (ARMv5::*QueueEntry)(void)); // Queue Functions void StartExecARM(); @@ -744,27 +738,36 @@ public: void JumpTo_3C(); void JumpTo_4(); void CodeRead32_2(); + void CodeRead32_3(); void ICacheLookup_2(); void DAbortHandle(); void DCacheFin8(); void DRead8_2(); void DRead8_3(); + void DRead8_4(); void DCacheFin16(); void DRead16_2(); void DRead16_3(); + void DRead16_4(); void DCacheFin32(); void DRead32_2(); void DRead32_3(); + void DRead32_4(); void DRead32S_2(); void DRead32S_3(); + void DRead32S_4(); void DWrite8_2(); void DWrite8_3(); + void DWrite8_4(); void DWrite16_2(); void DWrite16_3(); + void DWrite16_4(); void DWrite32_2(); void DWrite32_3(); + void DWrite32_4(); void DWrite32S_2(); void DWrite32S_3(); + void DWrite32S_4(); void WBCheck_2(); void ICachePrefetch_2(); void DCacheLookup_2(); @@ -933,13 +936,7 @@ public: void AddCycles_CDI() override; void AddCycles_CD() override; - inline void QueueFunction(void (ARMv4::*QueueEntry)(void)) - { - if (MRTrack.Type != MainRAMType::Null) - FuncQueue[FuncQueueFill++] = QueueEntry; - else - (this->*QueueEntry)(); - } + void QueueFunction(void (ARMv4::*QueueEntry)(void)); void StartExecARM(); void StartExecTHUMB(); diff --git a/src/CP15.cpp b/src/CP15.cpp index 8903dab8..e0ca7e83 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -2213,7 +2213,13 @@ void ARMv5::CodeRead32(u32 addr) } #endif } - + + FetchAddr[16] = addr; + QueueFunction(&ARMv5::CodeRead32_2); +} + +void ARMv5::CodeRead32_2() +{ if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp; // bus reads can only overlap with dcache streaming by 6 cycles if (DCacheStreamPtr < 7) @@ -2222,16 +2228,15 @@ void ARMv5::CodeRead32(u32 addr) if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; } - if (PU_Map[addr>>12] & 0x30) + if (PU_Map[FetchAddr[16]>>12] & 0x30) WriteBufferDrain(); else WriteBufferCheck<3>(); - FetchAddr[16] = addr; - QueueFunction(&ARMv5::CodeRead32_2); + QueueFunction(&ARMv5::CodeRead32_3); } -void ARMv5::CodeRead32_2() +void ARMv5::CodeRead32_3() { u32 addr = FetchAddr[16]; @@ -2346,6 +2351,15 @@ void ARMv5::DRead8_2() } #endif + QueueFunction(&ARMv5::DRead8_3); +} + +void ARMv5::DRead8_3() +{ + u8 reg = __builtin_ctz(LDRRegs); + u32 addr = FetchAddr[reg]; + u32 dummy; u32* val = (LDRFailedRegs & (1<(); - QueueFunction(&ARMv5::DRead8_3); + QueueFunction(&ARMv5::DRead8_4); } -void ARMv5::DRead8_3() +void ARMv5::DRead8_4() { u8 reg = __builtin_ctz(LDRRegs); u32 addr = FetchAddr[reg]; @@ -2461,6 +2475,15 @@ void ARMv5::DRead16_2() } #endif + QueueFunction(&ARMv5::DRead16_3); +} + +void ARMv5::DRead16_3() +{ + u8 reg = __builtin_ctz(LDRRegs); + u32 addr = FetchAddr[reg]; + u32 dummy; u32* val = (LDRFailedRegs & (1<(); - QueueFunction(&ARMv5::DRead16_3); + QueueFunction(&ARMv5::DRead16_4); } -void ARMv5::DRead16_3() +void ARMv5::DRead16_4() { u8 reg = __builtin_ctz(LDRRegs); u32 addr = FetchAddr[reg]; @@ -2576,7 +2599,16 @@ void ARMv5::DRead32_2() } } #endif - + + QueueFunction(&ARMv5::DRead32_3); +} + +void ARMv5::DRead32_3() +{ + u8 reg = __builtin_ctz(LDRRegs); + u32 addr = FetchAddr[reg]; + u32 dummy; u32* val = (LDRFailedRegs & (1<(); - QueueFunction(&ARMv5::DRead32_3); + QueueFunction(&ARMv5::DRead32_4); } -void ARMv5::DRead32_3() +void ARMv5::DRead32_4() { u8 reg = __builtin_ctz(LDRRegs); u32 addr = FetchAddr[reg]; @@ -2678,7 +2710,16 @@ void ARMv5::DRead32S_2() } } #endif - + + QueueFunction(&ARMv5::DRead32S_3); +} + +void ARMv5::DRead32S_3() +{ + u8 reg = __builtin_ctz(LDRRegs); + u32 addr = FetchAddr[reg]; + u32 dummy; u32* val = (LDRFailedRegs & (1<(); - QueueFunction(&ARMv5::DRead32S_3); + QueueFunction(&ARMv5::DRead32S_4); } -void ARMv5::DRead32S_3() +void ARMv5::DRead32S_4() { u8 reg = __builtin_ctz(LDRRegs); u32 addr = FetchAddr[reg]; @@ -2815,16 +2856,6 @@ void ARMv5::DWrite8_2() if (!(PU_Map[addr>>12] & (0x30))) { - if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp; - // bus reads can only overlap with icache streaming by 6 cycles - // checkme: do buffered writes trigger this? - if (ICacheStreamPtr < 7) - { - u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6? - if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; - } - - WriteBufferCheck<2>(); QueueFunction(&ARMv5::DWrite8_3); } else @@ -2837,6 +2868,21 @@ void ARMv5::DWrite8_2() } void ARMv5::DWrite8_3() +{ + if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp; + // bus reads can only overlap with icache streaming by 6 cycles + // checkme: do buffered writes trigger this? + if (ICacheStreamPtr < 7) + { + u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6? + if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; + } + + WriteBufferCheck<2>(); + QueueFunction(&ARMv5::DWrite8_4); +} + +void ARMv5::DWrite8_4() { u8 reg = __builtin_ctz(STRRegs); u32 addr = FetchAddr[reg]; @@ -2929,16 +2975,6 @@ void ARMv5::DWrite16_2() if (!(PU_Map[addr>>12] & 0x30)) { - if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp; - // bus reads can only overlap with icache streaming by 6 cycles - // checkme: do buffered writes trigger this? - if (ICacheStreamPtr < 7) - { - u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6? - if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; - } - - WriteBufferCheck<2>(); QueueFunction(&ARMv5::DWrite16_3); } else @@ -2951,6 +2987,21 @@ void ARMv5::DWrite16_2() } void ARMv5::DWrite16_3() +{ + if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp; + // bus reads can only overlap with icache streaming by 6 cycles + // checkme: do buffered writes trigger this? + if (ICacheStreamPtr < 7) + { + u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6? + if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; + } + + WriteBufferCheck<2>(); + QueueFunction(&ARMv5::DWrite16_4); +} + +void ARMv5::DWrite16_4() { u8 reg = __builtin_ctz(STRRegs); u32 addr = FetchAddr[reg]; @@ -3048,16 +3099,6 @@ void ARMv5::DWrite32_2() if (!(PU_Map[addr>>12] & 0x30)) { - if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp; - // bus reads can only overlap with icache streaming by 6 cycles - // checkme: do buffered writes trigger this? - if (ICacheStreamPtr < 7) - { - u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6? - if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; - } - - WriteBufferCheck<2>(); QueueFunction(&ARMv5::DWrite32_3); } else @@ -3071,6 +3112,21 @@ void ARMv5::DWrite32_2() } void ARMv5::DWrite32_3() +{ + if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp; + // bus reads can only overlap with icache streaming by 6 cycles + // checkme: do buffered writes trigger this? + if (ICacheStreamPtr < 7) + { + u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6? + if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; + } + + WriteBufferCheck<2>(); + QueueFunction(&ARMv5::DWrite32_4); +} + +void ARMv5::DWrite32_4() { u8 reg = __builtin_ctz(STRRegs); u32 addr = FetchAddr[reg]; @@ -3162,15 +3218,6 @@ void ARMv5::DWrite32S_2() if (!(PU_Map[addr>>12] & 0x30)) // non-bufferable { - if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp; - // bus reads can only overlap with icache streaming by 6 cycles - // checkme: do buffered writes trigger this? - if (ICacheStreamPtr < 7) - { - u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6? - if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; - } - WriteBufferCheck<2>(); QueueFunction(&ARMv5::DWrite32S_3); } else @@ -3181,6 +3228,20 @@ void ARMv5::DWrite32S_2() } void ARMv5::DWrite32S_3() +{ + if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp; + // bus reads can only overlap with icache streaming by 6 cycles + // checkme: do buffered writes trigger this? + if (ICacheStreamPtr < 7) + { + u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6? + if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; + } + WriteBufferCheck<2>(); + QueueFunction(&ARMv5::DWrite32S_4); +} + +void ARMv5::DWrite32S_4() { u8 reg = __builtin_ctz(STRRegs); u32 addr = FetchAddr[reg]; diff --git a/src/NDS.cpp b/src/NDS.cpp index 432d0710..91aee9d4 100644 --- a/src/NDS.cpp +++ b/src/NDS.cpp @@ -792,7 +792,7 @@ void NDS::SetARM9BIOS(const std::array& bios) noexcept u64 NDS::NextTarget() { - u64 minEvent = UINT64_MAX; + u64 minEvent = std::max(SysTimestamp+1, NDSCartSlot.ROMTransferTime[0]); u32 mask = SchedListMask; for (int i = 0; i < Event_MAX; i++) @@ -1740,7 +1740,6 @@ u32 NDS::RunFrame() u64 target = NextTarget(); ARM9Target = target << ARM9ClockShift; - //ARM7Target = target; while (std::max(std::max(ARM9Timestamp, DMA9Timestamp), A9ContentionTS << ARM9ClockShift) < ARM9Target) { @@ -1776,12 +1775,10 @@ u32 NDS::RunFrame() } //printf("MAIN LOOP: 9 %lli %08X %08llX %i 7 %lli %08X %08llX %i %i %08X\n", ARM9Timestamp>>ARM9ClockShift, ARM9.PC, ARM9.CurInstr, (u8)ARM9.MRTrack.Type, ARM7Timestamp, ARM7.R[15], ARM7.CurInstr, (u8)ARM7.MRTrack.Type, IME[1], IE[1]); - - NDSCartSlot.ROMPrepareData(); + RunTimers(0); GPU.GPU3D.Run(); - //if (MainRAMHandle()) break; MainRAMHandle(); target = std::max(std::max(ARM9Timestamp, DMA9Timestamp) >> ARM9ClockShift, A9ContentionTS); @@ -1816,12 +1813,12 @@ u32 NDS::RunFrame() } RunTimers(1); - NDSCartSlot.ROMPrepareData(); if (!MainRAMHandle()) break; } } - + + NDSCartSlot.ROMPrepareData(); RunSystem(target); if (CPUStop & CPUStop_Sleep) diff --git a/src/NDSCart.cpp b/src/NDSCart.cpp index b49c9e15..ef24cdb1 100644 --- a/src/NDSCart.cpp +++ b/src/NDSCart.cpp @@ -1818,8 +1818,8 @@ void NDSCartSlot::ROMEndTransfer(u32 param) noexcept void NDSCartSlot::ROMPrepareData() noexcept { u64 curts; - if (NDS.CurCPU) curts = NDS.ARM7Timestamp; - else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<> NDS.ARM9ClockShift; + if (NDS.ExMemCnt[0] & (1<<11)) curts = NDS.ARM7Timestamp; + else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<> NDS.ARM9ClockShift; if (curts < ROMTransferTime[0]) return; @@ -1937,8 +1937,8 @@ void NDSCartSlot::WriteROMCnt(u32 val) noexcept else { u64 curts; - if (NDS.CurCPU) curts = NDS.ARM7Timestamp; - else curts = ((std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<> NDS.ARM9ClockShift); + if (NDS.ExMemCnt[0] & (1<<11)) curts = NDS.ARM7Timestamp; + else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<> NDS.ARM9ClockShift; ROMTransferTime[0] = (xfercycle*(cmddelay+4)) + curts; @@ -1962,10 +1962,10 @@ void NDSCartSlot::AdvanceROMTransfer() noexcept if (!((TransferPos+4) & 0x1FF)) delay += ((ROMCnt >> 16) & 0x3F); } - + u64 curts; - if (NDS.CurCPU) curts = NDS.ARM7Timestamp; - else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<> NDS.ARM9ClockShift; + if (NDS.ExMemCnt[0] & (1<<11)) curts = NDS.ARM7Timestamp; + else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<> NDS.ARM9ClockShift; ROMTransferTime[0] = ROMTransferTime[1]; @@ -1984,8 +1984,8 @@ u32 NDSCartSlot::ReadROMData() noexcept if (ROMCnt & (1<<30)) return 0; u64 curts; - if (NDS.CurCPU) curts = NDS.ARM7Timestamp; - else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<> NDS.ARM9ClockShift; + if (NDS.ExMemCnt[0] & (1<<11)) curts = NDS.ARM7Timestamp; + else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<> NDS.ARM9ClockShift; ROMPrepareData(); @@ -2000,10 +2000,10 @@ u32 NDSCartSlot::ReadROMData() noexcept void NDSCartSlot::WriteROMData(u32 val) noexcept { if (!(ROMCnt & (1<<30))) return; - + u64 curts; - if (NDS.CurCPU) curts = NDS.ARM7Timestamp; - else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<> NDS.ARM9ClockShift; + if (NDS.ExMemCnt[0] & (1<<11)) curts = NDS.ARM7Timestamp; + else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<> NDS.ARM9ClockShift; ROMPrepareData(); diff --git a/src/NDSCart.h b/src/NDSCart.h index 560b5d62..3db076e4 100644 --- a/src/NDSCart.h +++ b/src/NDSCart.h @@ -420,6 +420,8 @@ public: [[nodiscard]] u16 GetSPICnt() const noexcept { return SPICnt; } void SetSPICnt(u16 val) noexcept { SPICnt = val; } + u64 ROMTransferTime[2]; + private: friend class CartCommon; melonDS::NDS& NDS; @@ -445,8 +447,6 @@ private: u64 Key2_X = 0; u64 Key2_Y = 0; - u64 ROMTransferTime[2]; - void Key1_Encrypt(u32* data) const noexcept; void Key1_Decrypt(u32* data) const noexcept; void Key1_ApplyKeycode(u32* keycode, u32 mod) noexcept;