do dcache; tweak some contention handling logic

This commit is contained in:
Jaklyy 2024-12-06 21:55:21 -05:00
parent db7eb564f0
commit 698d78bc8d
4 changed files with 148 additions and 94 deletions

View File

@ -57,8 +57,9 @@ enum class CPUExecuteMode : u32
enum class MainRAMType : u8 enum class MainRAMType : u8
{ {
Null = 0, Null = 0,
Fetch,
ICacheStream, ICacheStream,
Fetch DCacheStream,
}; };
// each one represents a bit in the field // each one represents a bit in the field
@ -501,7 +502,7 @@ public:
* cache. The address is internally aligned to an word boundary * cache. The address is internally aligned to an word boundary
* @return Value of the word at addr * @return Value of the word at addr
*/ */
u32 DCacheLookup(const u32 addr); bool DCacheLookup(const u32 addr);
/** /**
* @brief Updates a word in the data cache if present * @brief Updates a word in the data cache if present
@ -684,8 +685,11 @@ public:
void JumpTo_4(); void JumpTo_4();
void DAbortHandle(); void DAbortHandle();
void DAbortHandleS(); void DAbortHandleS();
void DCacheFin8();
void DRead8_2(); void DRead8_2();
void DCacheFin16();
void DRead16_2(); void DRead16_2();
void DCacheFin32();
void DRead32_2(); void DRead32_2();
void DRead32S_2(); void DRead32S_2();
void DWrite8_2(); void DWrite8_2();

View File

@ -586,7 +586,7 @@ bool ARMv5::IsAddressICachable(const u32 addr) const
return PU_Map[addr >> CP15_MAP_ENTRYSIZE_LOG2] & CP15_MAP_ICACHEABLE; return PU_Map[addr >> CP15_MAP_ENTRYSIZE_LOG2] & CP15_MAP_ICACHEABLE;
} }
u32 ARMv5::DCacheLookup(const u32 addr) bool ARMv5::DCacheLookup(const u32 addr)
{ {
//Log(LogLevel::Debug,"DCache load @ %08x\n", addr); //Log(LogLevel::Debug,"DCache load @ %08x\n", addr);
const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1)); const u32 tag = (addr & ~(DCACHE_LINELENGTH - 1));
@ -655,7 +655,8 @@ u32 ARMv5::DCacheLookup(const u32 addr)
} }
DataRegion = Mem9_DCache; DataRegion = Mem9_DCache;
//Log(LogLevel::Debug, "DCache hit at %08lx returned %08x from set %i, line %i\n", addr, cacheLine[(addr & (DCACHE_LINELENGTH -1)) >> 2], set, id>>2); //Log(LogLevel::Debug, "DCache hit at %08lx returned %08x from set %i, line %i\n", addr, cacheLine[(addr & (DCACHE_LINELENGTH -1)) >> 2], set, id>>2);
return cacheLine[(addr & (DCACHE_LINELENGTH -1)) >> 2]; RetVal = cacheLine[(addr & (DCACHE_LINELENGTH -1)) >> 2];
return true;
} }
} }
@ -672,24 +673,7 @@ u32 ARMv5::DCacheLookup(const u32 addr)
// We do not fill the cacheline if it is disabled in the // We do not fill the cacheline if it is disabled in the
// BIST test State register (See arm946e-s Rev 1 technical manual, 2.3.15 "Register 15, test State Register") // BIST test State register (See arm946e-s Rev 1 technical manual, 2.3.15 "Register 15, test State Register")
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_DCACHE_LINEFILL) [[unlikely]] if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_DCACHE_LINEFILL) [[unlikely]]
{ return false;
WriteBufferDrain();
NDS.ARM9Timestamp = (NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
DataCycles = MemTimings[addr >> 14][1]; // CHECKME: can this do sequential accesses?
if ((addr >> 24) == 0x02)
{
if (NDS.ARM9Timestamp < MainRAMTimestamp) NDS.ARM9Timestamp = (MainRAMTimestamp + ((1<<NDS.ARM9ClockShift)-1)) & ~((1<<NDS.ARM9ClockShift)-1);
MainRAMTimestamp = NDS.ARM9Timestamp + DataCycles;
if (NDS.ARM9ClockShift == 2) DataCycles -= 4;
DataRegion = Mem9_MainRAM;
}
else DataRegion = NDS.ARM9Regions[addr>>14];
return BusRead32(addr & ~3);
}
u32 line; u32 line;
@ -728,73 +712,67 @@ u32 ARMv5::DCacheLookup(const u32 addr)
DCacheClearByASetAndWay(line & (DCACHE_SETS-1), line >> DCACHE_SETS_LOG2); DCacheClearByASetAndWay(line & (DCACHE_SETS-1), line >> DCACHE_SETS_LOG2);
#endif #endif
for (int i = 0; i < DCACHE_LINELENGTH; i+=sizeof(u32))
{
ptr[i >> 2] = BusRead32(tag+i);
}
DCacheTags[line] = tag | (line & (DCACHE_SETS-1)) | CACHE_FLAG_VALID; DCacheTags[line] = tag | (line & (DCACHE_SETS-1)) | CACHE_FLAG_VALID;
// timing logic // timing logic
// Disabled DCACHE Streaming: if (NDS.ARM9Regions[addr>>14] == Mem9_MainRAM)
// Wait until the entire cache line is filled before continuing with execution
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_DCACHE_STREAMING) [[unlikely]]
{ {
NDS.ARM9Timestamp = NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1); MRTrack.Type = MainRAMType::DCacheStream;
MRTrack.Var = line;
NDS.ARM9Timestamp += MemTimings[tag >> 14][1] + (MemTimings[tag >> 14][2] * ((DCACHE_LINELENGTH / 4) - 2)); FetchAddr[16] = addr;
DataCycles = MemTimings[tag>>14][2]; if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_DCACHE_STREAMING) [[unlikely]]
DCacheStreamPtr = 7;
if ((addr >> 24) == 0x02) else DCacheStreamPtr = (addr & 0x1F) / 4;
}
else
{
for (int i = 0; i < DCACHE_LINELENGTH; i+=sizeof(u32))
{ {
if (NDS.ARM9Timestamp < MainRAMTimestamp) NDS.ARM9Timestamp = MainRAMTimestamp; ptr[i >> 2] = BusRead32(tag+i);
MainRAMTimestamp = NDS.ARM9Timestamp + DataCycles;
DataRegion = Mem9_MainRAM;
} }
else // Disabled DCACHE Streaming:
// Wait until the entire cache line is filled before continuing with execution
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_DCACHE_STREAMING) [[unlikely]]
{ {
NDS.ARM9Timestamp = NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1);
NDS.ARM9Timestamp += MemTimings[tag >> 14][1] + (MemTimings[tag >> 14][2] * ((DCACHE_LINELENGTH / 4) - 2));
DataCycles = MemTimings[tag>>14][2];
DataRegion = NDS.ARM9Regions[addr>>14]; DataRegion = NDS.ARM9Regions[addr>>14];
if (((NDS.ARM9Timestamp <= WBReleaseTS) && (NDS.ARM9Regions[addr>>14] == WBLastRegion)) // check write buffer if (((NDS.ARM9Timestamp <= WBReleaseTS) && (NDS.ARM9Regions[addr>>14] == WBLastRegion)) // check write buffer
|| (Store && (NDS.ARM9Regions[addr>>14] == DataRegion))) //check the actual store || (Store && (NDS.ARM9Regions[addr>>14] == DataRegion))) //check the actual store
NDS.ARM9Timestamp += 1<<NDS.ARM9ClockShift; NDS.ARM9Timestamp += 1<<NDS.ARM9ClockShift;
} }
} else // DCache Streaming logic
else // DCache Streaming logic
{
DataRegion = NDS.ARM9Regions[addr>>14];
if ((addr >> 24) == 0x02)
{
if (NDS.ARM9Timestamp < MainRAMTimestamp) NDS.ARM9Timestamp = MainRAMTimestamp;
}
else
{ {
DataRegion = NDS.ARM9Regions[addr>>14];
if ((NDS.ARM9Timestamp <= WBReleaseTS) && (DataRegion == WBLastRegion)) // check write buffer if ((NDS.ARM9Timestamp <= WBReleaseTS) && (DataRegion == WBLastRegion)) // check write buffer
NDS.ARM9Timestamp += 1<<NDS.ARM9ClockShift; NDS.ARM9Timestamp += 1<<NDS.ARM9ClockShift;
}
NDS.ARM9Timestamp = NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1); NDS.ARM9Timestamp = NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1);
u8 ns = MemTimings[addr>>14][1]; u8 ns = MemTimings[addr>>14][1];
u8 seq = MemTimings[addr>>14][2]; u8 seq = MemTimings[addr>>14][2];
u8 linepos = (addr & 0x1F) >> 2; // technically this is one too low, but we want that actually u8 linepos = (addr & 0x1F) >> 2; // technically this is one too low, but we want that actually
u64 cycles = ns + (seq * linepos); u64 cycles = ns + (seq * linepos);
DataCycles = cycles; DataCycles = cycles;
cycles += NDS.ARM9Timestamp; cycles += NDS.ARM9Timestamp;
DCacheStreamPtr = linepos; DCacheStreamPtr = linepos;
for (int i = linepos; i < 7; i++) for (int i = linepos; i < 7; i++)
{ {
cycles += seq; cycles += seq;
DCacheStreamTimes[i] = cycles; DCacheStreamTimes[i] = cycles;
}
} }
RetVal = ptr[(addr & (DCACHE_LINELENGTH-1)) >> 2];
if ((addr >> 24) == 0x02) MainRAMTimestamp = ((linepos < 7) ? ICacheStreamTimes[6] : NDS.ARM9Timestamp);
} }
return ptr[(addr & (DCACHE_LINELENGTH-1)) >> 2]; return true;
} }
bool ARMv5::DCacheWrite32(const u32 addr, const u32 val) bool ARMv5::DCacheWrite32(const u32 addr, const u32 val)
@ -2152,6 +2130,15 @@ void ARMv5::DAbortHandleS()
DataCycles = 1; DataCycles = 1;
} }
void ARMv5::DCacheFin8()
{
u8 reg = __builtin_ctz(LDRRegs);
u32 addr = FetchAddr[reg];
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
*val = (RetVal >> (8 * (addr & 3))) & 0xff;
}
bool ARMv5::DataRead8(u32 addr, u8 reg) bool ARMv5::DataRead8(u32 addr, u8 reg)
{ {
// Data Aborts // Data Aborts
@ -2173,8 +2160,7 @@ void ARMv5::DRead8_2()
{ {
u8 reg = __builtin_ctz(LDRRegs); u8 reg = __builtin_ctz(LDRRegs);
u32 addr = FetchAddr[reg]; u32 addr = FetchAddr[reg];
u32 dummy; u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
if (DCacheStreamPtr < 7) if (DCacheStreamPtr < 7)
{ {
@ -2206,8 +2192,11 @@ void ARMv5::DRead8_2()
{ {
if (IsAddressDCachable(addr)) if (IsAddressDCachable(addr))
{ {
*val = (DCacheLookup(addr) >> (8 * (addr & 3))) & 0xff; if (DCacheLookup(addr))
return; {
QueueFunction(&ARMv5::DCacheFin8);
return;
}
} }
} }
#endif #endif
@ -2247,6 +2236,15 @@ void ARMv5::DRead8_2()
} }
} }
void ARMv5::DCacheFin16()
{
u8 reg = __builtin_ctz(LDRRegs);
u32 addr = FetchAddr[reg];
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
*val = (RetVal >> (8 * (addr & 2))) & 0xffff;
}
bool ARMv5::DataRead16(u32 addr, u8 reg) bool ARMv5::DataRead16(u32 addr, u8 reg)
{ {
// Data Aborts // Data Aborts
@ -2268,8 +2266,7 @@ void ARMv5::DRead16_2()
{ {
u8 reg = __builtin_ctz(LDRRegs); u8 reg = __builtin_ctz(LDRRegs);
u32 addr = FetchAddr[reg]; u32 addr = FetchAddr[reg];
u32 dummy; u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
if (DCacheStreamPtr < 7) if (DCacheStreamPtr < 7)
{ {
@ -2303,8 +2300,11 @@ void ARMv5::DRead16_2()
{ {
if (IsAddressDCachable(addr)) if (IsAddressDCachable(addr))
{ {
*val = (DCacheLookup(addr) >> (8* (addr & 2))) & 0xffff; if (DCacheLookup(addr))
return; {
QueueFunction(&ARMv5::DCacheFin16);
return;
}
} }
} }
#endif #endif
@ -2344,6 +2344,14 @@ void ARMv5::DRead16_2()
} }
} }
void ARMv5::DCacheFin32()
{
u8 reg = __builtin_ctz(LDRRegs);
u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
*val = RetVal;
LDRRegs &= ~1<<reg;
}
bool ARMv5::DataRead32(u32 addr, u8 reg) bool ARMv5::DataRead32(u32 addr, u8 reg)
{ {
// Data Aborts // Data Aborts
@ -2365,8 +2373,7 @@ void ARMv5::DRead32_2()
{ {
u8 reg = __builtin_ctz(LDRRegs); u8 reg = __builtin_ctz(LDRRegs);
u32 addr = FetchAddr[reg]; u32 addr = FetchAddr[reg];
u32 dummy; u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
if (DCacheStreamPtr < 7) if (DCacheStreamPtr < 7)
{ {
@ -2402,9 +2409,11 @@ void ARMv5::DRead32_2()
{ {
if (IsAddressDCachable(addr)) if (IsAddressDCachable(addr))
{ {
*val = DCacheLookup(addr); if (DCacheLookup(addr))
LDRRegs &= ~1<<reg; {
return; QueueFunction(&ARMv5::DCacheFin32);
return;
}
} }
} }
#endif #endif
@ -2466,8 +2475,7 @@ void ARMv5::DRead32S_2()
{ {
u8 reg = __builtin_ctz(LDRRegs); u8 reg = __builtin_ctz(LDRRegs);
u32 addr = FetchAddr[reg]; u32 addr = FetchAddr[reg];
u32 dummy; u32 dummy; u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
u32* val = (LDRFailedRegs & (1<<reg)) ? &dummy : &R[reg];
NDS.ARM9Timestamp += DataCycles; NDS.ARM9Timestamp += DataCycles;
@ -2498,9 +2506,11 @@ void ARMv5::DRead32S_2()
{ {
if (IsAddressDCachable(addr)) if (IsAddressDCachable(addr))
{ {
*val = DCacheLookup(addr); if (DCacheLookup(addr))
LDRRegs &= ~1<<reg; {
return; QueueFunction(&ARMv5::DCacheFin32);
return;
}
} }
} }
#endif #endif

View File

@ -473,7 +473,7 @@ void NDS::Reset()
ARM9Timestamp = 0; ARM9Target = 0; ARM9Timestamp = 0; ARM9Target = 0;
ARM7Timestamp = 0; ARM7Target = 0; ARM7Timestamp = 0; ARM7Target = 0;
MainRAMTimestamp = 0; MainRAMTimestamp = 0;
A9ContentionTS = 0; A9ContentionTS = 0; ConTSLock = false;
SysTimestamp = 0; SysTimestamp = 0;
InitTimings(); InitTimings();
@ -945,7 +945,7 @@ void NDS::MainRAMHandleARM9()
} }
MainRAMLastAccess = A9LAST; MainRAMLastAccess = A9LAST;
} }
ARM9Timestamp = A9ContentionTS << ARM9ClockShift; ARM9Timestamp = (A9ContentionTS << ARM9ClockShift) - 1;
if (var & MRCodeFetch) if (var & MRCodeFetch)
{ {
@ -980,6 +980,7 @@ void NDS::MainRAMHandleARM9()
if (ARM9.WBTimestamp < ts) ARM9.WBTimestamp = ts; if (ARM9.WBTimestamp < ts) ARM9.WBTimestamp = ts;
memset(&ARM9.MRTrack, 0, sizeof(ARM9.MRTrack)); memset(&ARM9.MRTrack, 0, sizeof(ARM9.MRTrack));
ConTSLock = false;
break; break;
} }
@ -1013,7 +1014,42 @@ void NDS::MainRAMHandleARM9()
{ {
ARM9.RetVal = icache[(ARM9.FetchAddr[16] & 0x1F) / 4]; ARM9.RetVal = icache[(ARM9.FetchAddr[16] & 0x1F) / 4];
memset(&ARM9.MRTrack, 0, sizeof(ARM9.MRTrack)); memset(&ARM9.MRTrack, 0, sizeof(ARM9.MRTrack));
A9ContentionTS = 0; ConTSLock = false;
}
break;
}
case MainRAMType::DCacheStream:
{
u8* prog = &ARM9.MRTrack.Progress;
u32 addr = (ARM9.FetchAddr[16] & ~0x1F) | (*prog * 4);
u32* dcache = (u32*)&ARM9.DCache[ARM9.MRTrack.Var << 5];
if ((*prog > 0) && A9WENTLAST)
{
MainRAMTimestamp += 2;
A9ContentionTS += 2;
}
else
{
if (A9ContentionTS < MainRAMTimestamp) { A9ContentionTS = MainRAMTimestamp; if (A7PRIORITY) return; }
MainRAMTimestamp = A9ContentionTS + 9;
A9ContentionTS += (ARM9ClockShift == 1) ? 9 : 8;
MainRAMLastAccess = A9LAST;
}
dcache[*prog] = ARM9Read32(addr);
if (*prog == ARM9.DCacheStreamPtr) ARM9Timestamp = (A9ContentionTS << ARM9ClockShift) - 1;
else if (*prog > ARM9.DCacheStreamPtr) ARM9.DCacheStreamTimes[*prog-1] = (A9ContentionTS << ARM9ClockShift) - 1;
(*prog)++;
if (*prog >= 8)
{
ARM9.RetVal = dcache[(ARM9.FetchAddr[16] & 0x1F) / 4];
memset(&ARM9.MRTrack, 0, sizeof(ARM9.MRTrack));
ConTSLock = false;
} }
break; break;
} }
@ -1082,10 +1118,14 @@ void NDS::MainRAMHandleARM7()
void NDS::MainRAMHandle() void NDS::MainRAMHandle()
{ {
if (!A9ContentionTS) if (!ConTSLock)
{ {
A9ContentionTS = (ARM9Timestamp + ((1<<ARM9ClockShift)-1)) >> ARM9ClockShift; A9ContentionTS = (ARM9Timestamp + ((1<<ARM9ClockShift)-1)) >> ARM9ClockShift;
if ((ARM9.MRTrack.Type != MainRAMType::Null) && (A9ContentionTS < MainRAMTimestamp)) A9ContentionTS = MainRAMTimestamp; if (ARM9.MRTrack.Type != MainRAMType::Null)
{
ConTSLock = true;
if (A9ContentionTS < MainRAMTimestamp) A9ContentionTS = MainRAMTimestamp;
}
} }
if (A7PRIORITY) if (A7PRIORITY)
@ -1094,7 +1134,7 @@ void NDS::MainRAMHandle()
{ {
if (A9ContentionTS < ARM7Timestamp) if (A9ContentionTS < ARM7Timestamp)
{ {
if (ARM9.MRTrack.Type == MainRAMType::Null) { A9ContentionTS = 0; return; } if (ARM9.MRTrack.Type == MainRAMType::Null) return;
MainRAMHandleARM9(); MainRAMHandleARM9();
} }
else else
@ -1110,7 +1150,7 @@ void NDS::MainRAMHandle()
{ {
if (A9ContentionTS <= ARM7Timestamp) if (A9ContentionTS <= ARM7Timestamp)
{ {
if (ARM9.MRTrack.Type == MainRAMType::Null) { A9ContentionTS = 0; return; } if (ARM9.MRTrack.Type == MainRAMType::Null) return;
MainRAMHandleARM9(); MainRAMHandleARM9();
} }
else else
@ -1220,7 +1260,7 @@ u32 NDS::RunFrame()
} }
else if (ARM9.MRTrack.Type == MainRAMType::Null) else if (ARM9.MRTrack.Type == MainRAMType::Null)
{ {
//if (ARM9.abt) ARM9Timestamp = ARM9Target; if (ARM9.abt) ARM9Timestamp = ARM9Target;
ARM9.Execute<cpuMode>(); ARM9.Execute<cpuMode>();
} }

View File

@ -258,7 +258,7 @@ public: // TODO: Encapsulate the rest of these members
u64 ARM9Timestamp, ARM9Target; u64 ARM9Timestamp, ARM9Target;
u64 ARM7Timestamp, ARM7Target; u64 ARM7Timestamp, ARM7Target;
u64 MainRAMTimestamp; u64 MainRAMTimestamp;
u64 A9ContentionTS; u64 A9ContentionTS; bool ConTSLock;
u32 ARM9ClockShift; u32 ARM9ClockShift;
u32 IME[2]; u32 IME[2];