implement write buffer

This commit is contained in:
Jaklyy 2024-12-08 00:19:43 -05:00
parent d14c5ea246
commit b40c6bc41d
4 changed files with 594 additions and 294 deletions

View File

@ -198,15 +198,12 @@ void ARM::Reset()
BreakReq = false;
#endif
MainRAMTimestamp = 0;
memset(&MRTrack, 0, sizeof(MRTrack));
FuncQueueFill = 0;
FuncQueueEnd = 0;
FuncQueueProg = 0;
FuncQueueActive = false;
ExecuteCycles = 0;
// zorp
JumpTo(ExceptionBase);
@ -748,6 +745,12 @@ void ARMv5::StartExec()
else
AddCycles_C();
}
QueueFunction(&ARMv5::WBCheck_2);
}
void ARMv5::WBCheck_2()
{
WriteBufferCheck<false>();
}
template <CPUExecuteMode mode>
@ -756,7 +759,7 @@ void ARMv5::Execute()
if constexpr (mode == CPUExecuteMode::InterpreterGDB)
GdbCheckB();
if (Halted)
if (!FuncQueueActive && Halted)
{
if (Halted == 2)
{
@ -777,7 +780,6 @@ void ARMv5::Execute()
else
{
NDS.ARM9Timestamp = NDS.ARM9Target;
WriteBufferCheck<false>();
return;
}
}
@ -828,7 +830,7 @@ void ARMv5::Execute()
if constexpr (mode == CPUExecuteMode::InterpreterGDB)
GdbCheckC(); // gdb might throw a hissy fit about this change but idc
//printf("A:%i, F:%i, P:%i, E:%i, I:%08llX, P:%08X, 15:%08X\n", FuncQueueActive, FuncQueueFill, FuncQueueProg, FuncQueueEnd, CurInstr, PC, R[15]);
//printf("A9: A:%i, F:%i, P:%i, E:%i, I:%08llX, P:%08X, 15:%08X\n", FuncQueueActive, FuncQueueFill, FuncQueueProg, FuncQueueEnd, CurInstr, PC, R[15]);
(this->*FuncQueue[FuncQueueProg])();
@ -882,7 +884,6 @@ void ARMv5::Execute()
//NDS.ARM9Timestamp += Cycles;
//Cycles = 0;
}
WriteBufferCheck<false>();
if (Halted == 2)
Halted = 0;
@ -938,7 +939,7 @@ void ARMv4::Execute()
if constexpr (mode == CPUExecuteMode::InterpreterGDB)
GdbCheckB();
if (Halted)
if (!FuncQueueActive && Halted)
{
if (Halted == 2)
{
@ -1008,8 +1009,8 @@ void ARMv4::Execute()
if constexpr (mode == CPUExecuteMode::InterpreterGDB)
GdbCheckC();
//printf("A:%i, F:%i, P:%i, E:%i, I:%08llX, 15:%08X\n", FuncQueueActive, FuncQueueFill, FuncQueueProg, FuncQueueEnd, CurInstr, R[15]);
//printf("A7: A:%i, F:%i, P:%i, E:%i, I:%08llX, 15:%08X\n", FuncQueueActive, FuncQueueFill, FuncQueueProg, FuncQueueEnd, CurInstr, R[15]);
(this->*FuncQueue[FuncQueueProg])();
if (FuncQueueActive)

View File

@ -21,6 +21,7 @@
#include <algorithm>
#include <optional>
#include <cstring>
#include "types.h"
#include "MemRegion.h"
@ -54,12 +55,26 @@ enum class CPUExecuteMode : u32
#endif
};
enum class WBMode
{
Check,
Force,
SingleBurst,
WaitEntry,
};
enum class MainRAMType : u8
{
Null = 0,
Fetch,
ICacheStream,
DCacheStream,
WriteBufferCmds, // all write buffer commands must be above this one; wb cmds not strictly used for main ram
WBDrain,
WBWrite,
WBCheck,
WBWaitRead,
WBWaitWrite,
};
// each one represents a bit in the field
@ -214,7 +229,6 @@ public:
MemRegion CodeMem;
u64 MainRAMTimestamp;
MainRAMTrackers MRTrack;
u32 BranchAddr;
@ -493,7 +507,7 @@ public:
*/
void ICacheInvalidateAll();
template <int force> inline bool WriteBufferHandle();
template <WBMode mode> bool WriteBufferHandle();
template <int next> void WriteBufferCheck();
void WriteBufferWrite(u32 val, u8 flag, u32 addr = 0);
void WriteBufferDrain();
@ -724,18 +738,35 @@ public:
void JumpTo_3B();
void JumpTo_3C();
void JumpTo_4();
void CodeRead32_2();
void ICacheLookup_2();
void DAbortHandle();
void DCacheFin8();
void DRead8_2();
void DRead8_3();
void DCacheFin16();
void DRead16_2();
void DRead16_3();
void DCacheFin32();
void DRead32_2();
void DRead32_3();
void DRead32S_2();
void DRead32S_3();
void DWrite8_2();
void DWrite8_3();
void DWrite16_2();
void DWrite16_3();
void DWrite32_2();
void DWrite32_3();
void DWrite32S_2();
void DWrite32S_3();
void WBCheck_2();
void DCacheLookup_2();
void DCacheLookup_3();
void DCClearAddr_2();
void DCClearSetWay_2();
void DCClearInvalidateAddr_2();
void DCClearInvalidateSetWay_2();
void SetupInterlock_2();
void HandleInterlocksExecute_2();
void HandleInterlocksMemory_2();
@ -806,11 +837,14 @@ public:
u64 ITCMTimestamp;
u64 TimestampMemory;
void (ARMv5::*FuncQueue[31])(void);
void (ARMv5::*FuncQueue[32])(void);
void (ARMv5::*DelayedQueue)(void);
u32 PC;
bool NullFetch;
bool Store;
s8 ITCMDelay;
u32 QueuedDCacheLine;
u32 CP15Queue;
u8 ILCurrReg;
u8 ILPrevReg;
@ -833,7 +867,9 @@ public:
u8 WBWriting; // whether the buffer is actively trying to perform a write
u32 WBCurAddr; // address the write buffer is currently writing to
u64 WBCurVal; // current value being written; 0-31: val | 61-63: flag; 0 = byte ns; 1 = halfword ns; 2 = word ns; 3 = word s; 4 = address (invalid in this variable)
u32 WBAddrQueued[40];
u32 storeaddr[16]; // temp until i figure out why using the fifo address entries directly didn't work
u64 WBValQueued[40];
u64 WriteBufferFifo[16]; // 0-31: val | 61-63: flag; 0 = byte ns; 1 = halfword ns; 2 = word ns; 3 = word s; 4 = address
u64 WBTimestamp; // current timestamp
//u64 WBMainRAMDelay; // timestamp used to emulate the delay before the next main ram write can begin
@ -870,8 +906,7 @@ public:
template <CPUExecuteMode mode>
void Execute();
Platform::FileHandle* filey;
void (ARMv4::*FuncQueue[31])(void);
void (ARMv4::*FuncQueue[32])(void);
bool Nonseq;
void CodeRead16(u32 addr);

File diff suppressed because it is too large Load Diff

View File

@ -1054,6 +1054,91 @@ void NDS::MainRAMHandleARM9()
}
break;
}
case MainRAMType::WBDrain:
{
if (!ARM9.WriteBufferHandle<WBMode::Force>()) return;
if ((ARM9.WBWritePointer == 16) && !ARM9.WBWriting)
{
memset(&ARM9.MRTrack, 0, sizeof(ARM9.MRTrack));
ConTSLock = false;
}
break;
}
case MainRAMType::WBWrite:
{
if (!ARM9.WriteBufferHandle<WBMode::Check>()) return;
if (ARM9.WBWritePointer == ARM9.WBFillPointer)
{
if (!ARM9.WriteBufferHandle<WBMode::WaitEntry>()) return;
}
else if (ARM9.WBWritePointer == 16)
{
ARM9.WBWritePointer = 0;
if (!ARM9.WBWriting)
{
u64 ts = (ARM9Timestamp + 1 + ((1<<ARM9ClockShift)-1)) & ~((1<<ARM9ClockShift)-1);
if (ARM9.WBTimestamp < ts) ARM9.WBTimestamp = ts;
}
}
ARM9.WriteBufferFifo[ARM9.WBFillPointer] = ARM9.WBValQueued[ARM9.MRTrack.Progress];
ARM9.storeaddr[ARM9.WBFillPointer] = ARM9.WBAddrQueued[ARM9.MRTrack.Progress];
ARM9.WBFillPointer = (ARM9.WBFillPointer + 1) & 0xF;
if ((ARM9.WBValQueued[ARM9.MRTrack.Progress] >> 61) != 4)
{
ARM9Timestamp += ARM9.DataCycles = 1;
ARM9.WBDelay = ARM9Timestamp + 1;
}
ARM9.MRTrack.Progress++;
if (ARM9.MRTrack.Progress >= ARM9.MRTrack.Var)
{
memset(&ARM9.MRTrack, 0, sizeof(ARM9.MRTrack));
ConTSLock = false;
}
break;
}
case MainRAMType::WBCheck:
{
if (!ARM9.WriteBufferHandle<WBMode::Check>()) return;
memset(&ARM9.MRTrack, 0, sizeof(ARM9.MRTrack));
ConTSLock = false;
break;
}
case MainRAMType::WBWaitRead:
{
if (!ARM9.WriteBufferHandle<WBMode::Check>()) return;
if (ARM9Timestamp >= ARM9.WBInitialTS)
{
if (!ARM9.WriteBufferHandle<WBMode::SingleBurst>()) return;
if (ARM9Timestamp < ARM9.WBReleaseTS) ARM9Timestamp = ARM9.WBReleaseTS;
}
memset(&ARM9.MRTrack, 0, sizeof(ARM9.MRTrack));
ConTSLock = false;
break;
}
case MainRAMType::WBWaitWrite:
{
if (!ARM9.WriteBufferHandle<WBMode::Check>()) return;
if (!ARM9.WriteBufferHandle<WBMode::SingleBurst>()) return;
if (ARM9Timestamp < ARM9.WBReleaseTS) ARM9Timestamp = ARM9.WBReleaseTS;
memset(&ARM9.MRTrack, 0, sizeof(ARM9.MRTrack));
ConTSLock = false;
break;
}
}
}
@ -1121,12 +1206,12 @@ void NDS::MainRAMHandle()
{
if (!ConTSLock)
{
A9ContentionTS = (ARM9Timestamp + ((1<<ARM9ClockShift)-1)) >> ARM9ClockShift;
if (ARM9.MRTrack.Type != MainRAMType::Null)
{
ConTSLock = true;
if (A9ContentionTS < MainRAMTimestamp) A9ContentionTS = MainRAMTimestamp;
}
if (ARM9.MRTrack.Type != MainRAMType::Null) ConTSLock = true;
if (ARM9.MRTrack.Type > MainRAMType::WriteBufferCmds)
A9ContentionTS = (ARM9.WBTimestamp + ((1<<ARM9ClockShift)-1)) >> ARM9ClockShift;
else
A9ContentionTS = (ARM9Timestamp + ((1<<ARM9ClockShift)-1)) >> ARM9ClockShift;
}
if (A7PRIORITY)
@ -1261,14 +1346,14 @@ u32 NDS::RunFrame()
}
else if (ARM9.MRTrack.Type == MainRAMType::Null)
{
if (ARM9.abt) ARM9Timestamp = ARM9Target;
//if (ARM9.abt) ARM9Timestamp = ARM9Target;
ARM9.Execute<cpuMode>();
}
//printf("MAIN LOOP: 9 %lli %08X %08llX 7 %lli %08X %08llX %i %08X\n", ARM9Timestamp>>ARM9ClockShift, ARM9.PC, ARM9.CurInstr, ARM7Timestamp, ARM7.R[15], ARM7.CurInstr, IME[1], IE[1]);
//printf("MAIN LOOP: 9 %lli %08X %08llX %i 7 %lli %08X %08llX %i %i %08X\n", ARM9Timestamp>>ARM9ClockShift, ARM9.PC, ARM9.CurInstr, (u8)ARM9.MRTrack.Type, ARM7Timestamp, ARM7.R[15], ARM7.CurInstr, (u8)ARM7.MRTrack.Type, IME[1], IE[1]);
MainRAMHandle();
RunTimers(0);
GPU.GPU3D.Run();
@ -1326,6 +1411,7 @@ u32 NDS::RunFrame()
SPU.TransferOutput();
break;
}
//printf("MAIN LOOP: 9 %lli %08X %08llX %i 7 %lli %08X %08llX %i %i %08X\n", ARM9Timestamp>>ARM9ClockShift, ARM9.PC, ARM9.CurInstr, (u8)ARM9.MRTrack.Type, ARM7Timestamp, ARM7.R[15], ARM7.CurInstr, (u8)ARM7.MRTrack.Type, IME[1], IE[1]);
// In the context of TASes, frame count is traditionally the primary measure of emulated time,
// so it needs to be tracked even if NDS is powered off.