theoretically improve dma responsiveness?

in practice seems to make no difference...
This commit is contained in:
Jaklyy 2024-12-20 20:10:23 -05:00
parent 98d0a6b371
commit 22f1b4d90c
6 changed files with 161 additions and 90 deletions

View File

@ -1447,6 +1447,22 @@ void ARMv5::ForceInterlock_2()
NDS.ARM9Timestamp = TimestampMemory + ILForceDelay;
}
void ARMv5::QueueFunction(void (ARMv5::*QueueEntry)(void))
{
if ((NDS.ARM9Timestamp >= NDS.ARM9Target) || (MRTrack.Type != MainRAMType::Null))
FuncQueue[FuncQueueFill++] = QueueEntry;
else
(this->*QueueEntry)();
}
void ARMv4::QueueFunction(void (ARMv4::*QueueEntry)(void))
{
if ((NDS.ARM7Timestamp >= NDS.ARM7Target) || (MRTrack.Type != MainRAMType::Null))
FuncQueue[FuncQueueFill++] = QueueEntry;
else
(this->*QueueEntry)();
}
void ARMv4::CodeRead16(u32 addr)
{
if ((addr >> 24) == 0x02)

View File

@ -724,13 +724,7 @@ public:
*/
u32 CP15Read(const u32 id) const;
inline void QueueFunction(void (ARMv5::*QueueEntry)(void))
{
if (MRTrack.Type != MainRAMType::Null)
FuncQueue[FuncQueueFill++] = QueueEntry;
else
(this->*QueueEntry)();
}
void QueueFunction(void (ARMv5::*QueueEntry)(void));
// Queue Functions
void StartExecARM();
@ -744,27 +738,36 @@ public:
void JumpTo_3C();
void JumpTo_4();
void CodeRead32_2();
void CodeRead32_3();
void ICacheLookup_2();
void DAbortHandle();
void DCacheFin8();
void DRead8_2();
void DRead8_3();
void DRead8_4();
void DCacheFin16();
void DRead16_2();
void DRead16_3();
void DRead16_4();
void DCacheFin32();
void DRead32_2();
void DRead32_3();
void DRead32_4();
void DRead32S_2();
void DRead32S_3();
void DRead32S_4();
void DWrite8_2();
void DWrite8_3();
void DWrite8_4();
void DWrite16_2();
void DWrite16_3();
void DWrite16_4();
void DWrite32_2();
void DWrite32_3();
void DWrite32_4();
void DWrite32S_2();
void DWrite32S_3();
void DWrite32S_4();
void WBCheck_2();
void ICachePrefetch_2();
void DCacheLookup_2();
@ -933,13 +936,7 @@ public:
void AddCycles_CDI() override;
void AddCycles_CD() override;
inline void QueueFunction(void (ARMv4::*QueueEntry)(void))
{
if (MRTrack.Type != MainRAMType::Null)
FuncQueue[FuncQueueFill++] = QueueEntry;
else
(this->*QueueEntry)();
}
void QueueFunction(void (ARMv4::*QueueEntry)(void));
void StartExecARM();
void StartExecTHUMB();

View File

@ -2214,6 +2214,12 @@ void ARMv5::CodeRead32(u32 addr)
#endif
}
FetchAddr[16] = addr;
QueueFunction(&ARMv5::CodeRead32_2);
}
void ARMv5::CodeRead32_2()
{
if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp;
// bus reads can only overlap with dcache streaming by 6 cycles
if (DCacheStreamPtr < 7)
@ -2222,16 +2228,15 @@ void ARMv5::CodeRead32(u32 addr)
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (PU_Map[addr>>12] & 0x30)
if (PU_Map[FetchAddr[16]>>12] & 0x30)
WriteBufferDrain();
else
WriteBufferCheck<3>();
FetchAddr[16] = addr;
QueueFunction(&ARMv5::CodeRead32_2);
QueueFunction(&ARMv5::CodeRead32_3);
}
void ARMv5::CodeRead32_2()
void ARMv5::CodeRead32_3()
{
u32 addr = FetchAddr[16];
@ -2346,6 +2351,15 @@ void ARMv5::DRead8_2()
}
#endif
QueueFunction(&ARMv5::DRead8_3);
}
void ARMv5::DRead8_3()
{
u8 reg = __builtin_ctz(LDRRegs);
u32 addr = FetchAddr[reg];
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp;
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does dcache trigger this?
@ -2360,10 +2374,10 @@ void ARMv5::DRead8_2()
else
WriteBufferCheck<1>();
QueueFunction(&ARMv5::DRead8_3);
QueueFunction(&ARMv5::DRead8_4);
}
void ARMv5::DRead8_3()
void ARMv5::DRead8_4()
{
u8 reg = __builtin_ctz(LDRRegs);
u32 addr = FetchAddr[reg];
@ -2461,6 +2475,15 @@ void ARMv5::DRead16_2()
}
#endif
QueueFunction(&ARMv5::DRead16_3);
}
void ARMv5::DRead16_3()
{
u8 reg = __builtin_ctz(LDRRegs);
u32 addr = FetchAddr[reg];
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp;
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
@ -2475,10 +2498,10 @@ void ARMv5::DRead16_2()
else
WriteBufferCheck<1>();
QueueFunction(&ARMv5::DRead16_3);
QueueFunction(&ARMv5::DRead16_4);
}
void ARMv5::DRead16_3()
void ARMv5::DRead16_4()
{
u8 reg = __builtin_ctz(LDRRegs);
u32 addr = FetchAddr[reg];
@ -2577,6 +2600,15 @@ void ARMv5::DRead32_2()
}
#endif
QueueFunction(&ARMv5::DRead32_3);
}
void ARMv5::DRead32_3()
{
u8 reg = __builtin_ctz(LDRRegs);
u32 addr = FetchAddr[reg];
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp;
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
@ -2591,10 +2623,10 @@ void ARMv5::DRead32_2()
else
WriteBufferCheck<1>();
QueueFunction(&ARMv5::DRead32_3);
QueueFunction(&ARMv5::DRead32_4);
}
void ARMv5::DRead32_3()
void ARMv5::DRead32_4()
{
u8 reg = __builtin_ctz(LDRRegs);
u32 addr = FetchAddr[reg];
@ -2679,6 +2711,15 @@ void ARMv5::DRead32S_2()
}
#endif
QueueFunction(&ARMv5::DRead32S_3);
}
void ARMv5::DRead32S_3()
{
u8 reg = __builtin_ctz(LDRRegs);
u32 addr = FetchAddr[reg];
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp;
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
@ -2693,10 +2734,10 @@ void ARMv5::DRead32S_2()
else
WriteBufferCheck<1>();
QueueFunction(&ARMv5::DRead32S_3);
QueueFunction(&ARMv5::DRead32S_4);
}
void ARMv5::DRead32S_3()
void ARMv5::DRead32S_4()
{
u8 reg = __builtin_ctz(LDRRegs);
u32 addr = FetchAddr[reg];
@ -2815,16 +2856,6 @@ void ARMv5::DWrite8_2()
if (!(PU_Map[addr>>12] & (0x30)))
{
if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp;
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: do buffered writes trigger this?
if (ICacheStreamPtr < 7)
{
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
WriteBufferCheck<2>();
QueueFunction(&ARMv5::DWrite8_3);
}
else
@ -2837,6 +2868,21 @@ void ARMv5::DWrite8_2()
}
void ARMv5::DWrite8_3()
{
if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp;
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: do buffered writes trigger this?
if (ICacheStreamPtr < 7)
{
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
WriteBufferCheck<2>();
QueueFunction(&ARMv5::DWrite8_4);
}
void ARMv5::DWrite8_4()
{
u8 reg = __builtin_ctz(STRRegs);
u32 addr = FetchAddr[reg];
@ -2929,16 +2975,6 @@ void ARMv5::DWrite16_2()
if (!(PU_Map[addr>>12] & 0x30))
{
if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp;
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: do buffered writes trigger this?
if (ICacheStreamPtr < 7)
{
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
WriteBufferCheck<2>();
QueueFunction(&ARMv5::DWrite16_3);
}
else
@ -2951,6 +2987,21 @@ void ARMv5::DWrite16_2()
}
void ARMv5::DWrite16_3()
{
if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp;
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: do buffered writes trigger this?
if (ICacheStreamPtr < 7)
{
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
WriteBufferCheck<2>();
QueueFunction(&ARMv5::DWrite16_4);
}
void ARMv5::DWrite16_4()
{
u8 reg = __builtin_ctz(STRRegs);
u32 addr = FetchAddr[reg];
@ -3048,16 +3099,6 @@ void ARMv5::DWrite32_2()
if (!(PU_Map[addr>>12] & 0x30))
{
if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp;
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: do buffered writes trigger this?
if (ICacheStreamPtr < 7)
{
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
WriteBufferCheck<2>();
QueueFunction(&ARMv5::DWrite32_3);
}
else
@ -3071,6 +3112,21 @@ void ARMv5::DWrite32_2()
}
void ARMv5::DWrite32_3()
{
if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp;
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: do buffered writes trigger this?
if (ICacheStreamPtr < 7)
{
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
WriteBufferCheck<2>();
QueueFunction(&ARMv5::DWrite32_4);
}
void ARMv5::DWrite32_4()
{
u8 reg = __builtin_ctz(STRRegs);
u32 addr = FetchAddr[reg];
@ -3162,15 +3218,6 @@ void ARMv5::DWrite32S_2()
if (!(PU_Map[addr>>12] & 0x30)) // non-bufferable
{
if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp;
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: do buffered writes trigger this?
if (ICacheStreamPtr < 7)
{
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
WriteBufferCheck<2>();
QueueFunction(&ARMv5::DWrite32S_3);
}
else
@ -3181,6 +3228,20 @@ void ARMv5::DWrite32S_2()
}
void ARMv5::DWrite32S_3()
{
if (NDS.ARM9Timestamp < NDS.DMA9Timestamp) NDS.ARM9Timestamp = NDS.DMA9Timestamp;
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: do buffered writes trigger this?
if (ICacheStreamPtr < 7)
{
u64 time = ICacheStreamTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
WriteBufferCheck<2>();
QueueFunction(&ARMv5::DWrite32S_4);
}
void ARMv5::DWrite32S_4()
{
u8 reg = __builtin_ctz(STRRegs);
u32 addr = FetchAddr[reg];

View File

@ -792,7 +792,7 @@ void NDS::SetARM9BIOS(const std::array<u8, ARM9BIOSSize>& bios) noexcept
u64 NDS::NextTarget()
{
u64 minEvent = UINT64_MAX;
u64 minEvent = std::max(SysTimestamp+1, NDSCartSlot.ROMTransferTime[0]);
u32 mask = SchedListMask;
for (int i = 0; i < Event_MAX; i++)
@ -1740,7 +1740,6 @@ u32 NDS::RunFrame()
u64 target = NextTarget();
ARM9Target = target << ARM9ClockShift;
//ARM7Target = target;
while (std::max(std::max(ARM9Timestamp, DMA9Timestamp), A9ContentionTS << ARM9ClockShift) < ARM9Target)
{
@ -1777,11 +1776,9 @@ u32 NDS::RunFrame()
//printf("MAIN LOOP: 9 %lli %08X %08llX %i 7 %lli %08X %08llX %i %i %08X\n", ARM9Timestamp>>ARM9ClockShift, ARM9.PC, ARM9.CurInstr, (u8)ARM9.MRTrack.Type, ARM7Timestamp, ARM7.R[15], ARM7.CurInstr, (u8)ARM7.MRTrack.Type, IME[1], IE[1]);
NDSCartSlot.ROMPrepareData();
RunTimers(0);
GPU.GPU3D.Run();
//if (MainRAMHandle()) break;
MainRAMHandle();
target = std::max(std::max(ARM9Timestamp, DMA9Timestamp) >> ARM9ClockShift, A9ContentionTS);
@ -1816,12 +1813,12 @@ u32 NDS::RunFrame()
}
RunTimers(1);
NDSCartSlot.ROMPrepareData();
if (!MainRAMHandle()) break;
}
}
NDSCartSlot.ROMPrepareData();
RunSystem(target);
if (CPUStop & CPUStop_Sleep)

View File

@ -1818,7 +1818,7 @@ void NDSCartSlot::ROMEndTransfer(u32 param) noexcept
void NDSCartSlot::ROMPrepareData() noexcept
{
u64 curts;
if (NDS.CurCPU) curts = NDS.ARM7Timestamp;
if (NDS.ExMemCnt[0] & (1<<11)) curts = NDS.ARM7Timestamp;
else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<<NDS.ARM9ClockShift)-1)) >> NDS.ARM9ClockShift;
if (curts < ROMTransferTime[0]) return;
@ -1937,8 +1937,8 @@ void NDSCartSlot::WriteROMCnt(u32 val) noexcept
else
{
u64 curts;
if (NDS.CurCPU) curts = NDS.ARM7Timestamp;
else curts = ((std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<<NDS.ARM9ClockShift)-1)) >> NDS.ARM9ClockShift);
if (NDS.ExMemCnt[0] & (1<<11)) curts = NDS.ARM7Timestamp;
else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<<NDS.ARM9ClockShift)-1)) >> NDS.ARM9ClockShift;
ROMTransferTime[0] = (xfercycle*(cmddelay+4)) + curts;
@ -1964,7 +1964,7 @@ void NDSCartSlot::AdvanceROMTransfer() noexcept
}
u64 curts;
if (NDS.CurCPU) curts = NDS.ARM7Timestamp;
if (NDS.ExMemCnt[0] & (1<<11)) curts = NDS.ARM7Timestamp;
else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<<NDS.ARM9ClockShift)-1)) >> NDS.ARM9ClockShift;
ROMTransferTime[0] = ROMTransferTime[1];
@ -1984,7 +1984,7 @@ u32 NDSCartSlot::ReadROMData() noexcept
if (ROMCnt & (1<<30)) return 0;
u64 curts;
if (NDS.CurCPU) curts = NDS.ARM7Timestamp;
if (NDS.ExMemCnt[0] & (1<<11)) curts = NDS.ARM7Timestamp;
else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<<NDS.ARM9ClockShift)-1)) >> NDS.ARM9ClockShift;
ROMPrepareData();
@ -2002,7 +2002,7 @@ void NDSCartSlot::WriteROMData(u32 val) noexcept
if (!(ROMCnt & (1<<30))) return;
u64 curts;
if (NDS.CurCPU) curts = NDS.ARM7Timestamp;
if (NDS.ExMemCnt[0] & (1<<11)) curts = NDS.ARM7Timestamp;
else curts = (std::max(NDS.ARM9Timestamp, NDS.DMA9Timestamp) + ((1<<NDS.ARM9ClockShift)-1)) >> NDS.ARM9ClockShift;
ROMPrepareData();

View File

@ -420,6 +420,8 @@ public:
[[nodiscard]] u16 GetSPICnt() const noexcept { return SPICnt; }
void SetSPICnt(u16 val) noexcept { SPICnt = val; }
u64 ROMTransferTime[2];
private:
friend class CartCommon;
melonDS::NDS& NDS;
@ -445,8 +447,6 @@ private:
u64 Key2_X = 0;
u64 Key2_Y = 0;
u64 ROMTransferTime[2];
void Key1_Encrypt(u32* data) const noexcept;
void Key1_Decrypt(u32* data) const noexcept;
void Key1_ApplyKeycode(u32* keycode, u32 mod) noexcept;