From d31f652fc816cfafc2156e773033ef1d150f5426 Mon Sep 17 00:00:00 2001 From: Jaklyy <102590697+Jaklyy@users.noreply.github.com> Date: Fri, 25 Oct 2024 18:15:54 -0400 Subject: [PATCH] implement icache streaming --- src/ARM.cpp | 20 +++---- src/ARM.h | 5 ++ src/CP15.cpp | 150 +++++++++++++++++++++++++++++++++++++++------------ 3 files changed, 130 insertions(+), 45 deletions(-) diff --git a/src/ARM.cpp b/src/ARM.cpp index b3d3e9aa..b2211173 100644 --- a/src/ARM.cpp +++ b/src/ARM.cpp @@ -207,6 +207,8 @@ void ARMv5::Reset() ILCurrReg = 16; ILPrevReg = 16; + ICacheFillPtr = 7; + WBWritePointer = 16; WBFillPointer = 0; WBDelay = 0; @@ -320,30 +322,29 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) //if (addr == 0x0201764C) printf("capture test %d: R1=%08X\n", R[6], R[1]); //if (addr == 0x020175D8) printf("capture test %d: res=%08X\n", R[6], R[0]); - u32 oldregion = R[15] >> 24; - u32 newregion = addr >> 24; + if (ICacheFillPtr != 7) + { + u64 fillend = ICacheFillTimes[6] + 1; + if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; + ICacheFillPtr = 7; + } if (addr & 0x1) { addr &= ~0x1; R[15] = addr+2; - if (newregion != oldregion) SetupCodeMem(addr); - // two-opcodes-at-once fetch // doesn't matter if we put garbage in the MSbs there if (addr & 0x2) { NextInstr[0] = CodeRead32(addr-2, true) >> 16; - Cycles += CodeCycles; NextInstr[1] = CodeRead32(addr+2, false); - Cycles += CodeCycles; } else { NextInstr[0] = CodeRead32(addr, true); NextInstr[1] = NextInstr[0] >> 16; - Cycles += CodeCycles; } CPSR |= 0x20; @@ -352,13 +353,8 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr) { addr &= ~0x3; R[15] = addr+4; - - if (newregion != oldregion) SetupCodeMem(addr); - NextInstr[0] = CodeRead32(addr, true); - Cycles += CodeCycles; NextInstr[1] = CodeRead32(addr+4, false); - Cycles += CodeCycles; CPSR &= ~0x20; } diff --git a/src/ARM.h b/src/ARM.h index ad97261b..657e2069 100644 --- a/src/ARM.h +++ b/src/ARM.h @@ -673,6 +673,11 @@ public: u64 ILCurrTime; u64 ILPrevTime; + u8 ICacheFillPtr; + u8 DCacheFillPtr; + u64 ICacheFillTimes[7]; + u64 DCacheFillTimes[7]; + u8 WBWritePointer; // which entry to attempt to write next; should always be ANDed with 0xF after incrementing u8 WBFillPointer; // where the next entry should be added; should always be ANDed with 0xF after incrementing bool WBWriting; // whether the buffer is actively trying to perform a write diff --git a/src/CP15.cpp b/src/CP15.cpp index 19bc1339..1278fb6c 100644 --- a/src/CP15.cpp +++ b/src/CP15.cpp @@ -418,16 +418,29 @@ u32 ARMv5::ICacheLookup(const u32 addr) // retreive the data from memory, even if the data was cached // See arm946e-s Rev 1 technical manual, 2.3.15 "Register 15, test State Register") WriteBufferDrain(); - CodeCycles = NDS.ARM9MemTimings[tag >> 14][2]; - if (CodeMem.Mem) - { - return *(u32*)&CodeMem.Mem[(addr & CodeMem.Mask) & ~3]; - } else + CodeCycles = NDS.ARM9MemTimings[tag >> 14][2]; { return NDS.ARM9Read32(addr & ~3); } } - NDS.ARM9Timestamp += 1; + + if (ICacheFillPtr == 7) NDS.ARM9Timestamp++; + else + { + u64 nextfill = ICacheFillTimes[ICacheFillPtr++]; + if (NDS.ARM9Timestamp < nextfill) + { + NDS.ARM9Timestamp = nextfill; + } + else + { + u64 fillend = ICacheFillTimes[6] + 2; + if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend; + else NDS.ARM9Timestamp++; + ICacheFillPtr = 7; + } + } + if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; DataRegion = Mem9_Null; Store = false; @@ -442,11 +455,7 @@ u32 ARMv5::ICacheLookup(const u32 addr) if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_ICACHE_LINEFILL) [[unlikely]] { WriteBufferDrain(); - CodeCycles = NDS.ARM9MemTimings[tag >> 14][2]; - if (CodeMem.Mem) - { - return *(u32*)&CodeMem.Mem[(addr & CodeMem.Mask) & ~3]; - } else + CodeCycles = NDS.ARM9MemTimings[tag >> 14][2]; { return NDS.ARM9Read32(addr & ~3); } @@ -481,14 +490,17 @@ u32 ARMv5::ICacheLookup(const u32 addr) line += id; u32* ptr = (u32 *)&ICache[line << ICACHE_LINELENGTH_LOG2]; - + + // bus reads can only overlap with icache streaming by 6 cycles + // checkme: does cache trigger this? + if (ICacheFillPtr != 7) + { + u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6? + if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; + } + WriteBufferDrain(); - if (CodeMem.Mem) - { - memcpy(ptr, &CodeMem.Mem[tag & CodeMem.Mask], ICACHE_LINELENGTH); - } - else { for (int i = 0; i < ICACHE_LINELENGTH; i+=sizeof(u32)) ptr[i >> 2] = NDS.ARM9Read32(tag+i); @@ -509,10 +521,23 @@ u32 ARMv5::ICacheLookup(const u32 addr) else if (NDS.ARM9Regions[addr>>14] == DataRegion && Store) NDS.ARM9Timestamp += (1<> 14][2] + (NDS.ARM9MemTimings[tag >> 14][3] * ((DCACHE_LINELENGTH / 4) - 1)) - 1) << NDS.ARM9ClockShift) + 1; - NDS.ARM9Timestamp += CodeCycles; + u8 ns = MemTimings[addr>>14][1]; + u8 seq = MemTimings[addr>>14][2] + 1; + + u8 linepos = (addr & 0x1F) >> 2; // technically this is one too low, but we want that actually + + u32 cycles = ns + (seq * linepos); + NDS.ARM9Timestamp = cycles += NDS.ARM9Timestamp; if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; + ICacheFillPtr = linepos; + for (int i = linepos; i < 7; i++) + { + cycles += seq; + ICacheFillTimes[i] = cycles; + } + if ((addr >> 24) == 0x02) MainRAMTimestamp = ICacheFillTimes[6]; + DataRegion = Mem9_Null; return ptr[(addr & (ICACHE_LINELENGTH-1)) >> 2]; } @@ -1876,10 +1901,8 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch) // prefetch abort // the actual exception is not raised until the aborted instruction is executed if (!(PU_Map[addr>>12] & 0x04)) [[unlikely]] - { - CodeCycles = 1; - - NDS.ARM9Timestamp += CodeCycles; + { + NDS.ARM9Timestamp += 1; if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; DataRegion = Mem9_Null; Store = false; @@ -1888,10 +1911,8 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch) if (addr < ITCMSize) { - CodeCycles = 1; - if (NDS.ARM9Timestamp < ITCMTimestamp) NDS.ARM9Timestamp = ITCMTimestamp; - NDS.ARM9Timestamp += CodeCycles; + NDS.ARM9Timestamp += 1; if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; DataRegion = Mem9_Null; Store = false; @@ -1910,7 +1931,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch) #endif } - CodeCycles = MemTimings[addr >> 14][1]; + u8 cycles = MemTimings[addr >> 14][1]; if (PU_Map[addr>>12] & 0x30) WriteBufferDrain(); @@ -1920,7 +1941,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch) if ((addr >> 24) == 0x02) { if (NDS.ARM9Timestamp < MainRAMTimestamp) NDS.ARM9Timestamp = MainRAMTimestamp + ((1<>14] == DataRegion && Store) NDS.ARM9Timestamp += (1<>12] & 0x30) WriteBufferDrain(); @@ -2044,6 +2072,14 @@ bool ARMv5::DataRead16(u32 addr, u32* val) } #endif + // bus reads can only overlap with icache streaming by 6 cycles + // checkme: does cache trigger this? + if (ICacheFillPtr != 7) + { + u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6? + if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; + } + if (PU_Map[addr>>12] & 0x30) WriteBufferDrain(); @@ -2106,6 +2142,14 @@ bool ARMv5::DataRead32(u32 addr, u32* val) } #endif + // bus reads can only overlap with icache streaming by 6 cycles + // checkme: does cache trigger this? + if (ICacheFillPtr != 7) + { + u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6? + if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; + } + if (PU_Map[addr>>12] & 0x30) WriteBufferDrain(); @@ -2170,14 +2214,22 @@ bool ARMv5::DataRead32S(u32 addr, u32* val) } #endif + // bus reads can only overlap with icache streaming by 6 cycles + // checkme: does cache trigger this? + if (ICacheFillPtr != 7) + { + u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6? + if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; + } + if (PU_Map[addr>>12] & 0x30) WriteBufferDrain(); NDS.ARM9Timestamp += DataCycles; // bursts cannot cross a 1kb boundary - if (addr & 0x3FF) DataCycles = MemTimings[addr >> 12][3]; //s - else DataCycles = MemTimings[addr >> 12][2]; // ns + if (addr & 0x3FF) DataCycles = MemTimings[addr >> 12][2]; //s + else DataCycles = MemTimings[addr >> 12][1]; // ns DataCycles += (((NDS.ARM9Timestamp + DataCycles) + ((1<>12] & (0x30))) { @@ -2314,6 +2374,14 @@ bool ARMv5::DataWrite16(u32 addr, u16 val) } } #endif + + // bus reads can only overlap with icache streaming by 6 cycles + // checkme: does cache trigger this? + if (ICacheFillPtr != 7) + { + u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6? + if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; + } if (!(PU_Map[addr>>12] & 0x30)) { @@ -2394,6 +2462,14 @@ bool ARMv5::DataWrite32(u32 addr, u32 val) } } #endif + + // bus reads can only overlap with icache streaming by 6 cycles + // checkme: does cache trigger this? + if (ICacheFillPtr != 7) + { + u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6? + if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; + } if (!(PU_Map[addr>>12] & 0x30)) { @@ -2473,6 +2549,14 @@ bool ARMv5::DataWrite32S(u32 addr, u32 val) } } #endif + + // bus reads can only overlap with icache streaming by 6 cycles + // checkme: does cache trigger this? + if (ICacheFillPtr != 7) + { + u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6? + if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time; + } if (!(PU_Map[addr>>12] & 0x30)) {