implement icache streaming

This commit is contained in:
Jaklyy 2024-10-25 18:15:54 -04:00
parent 54dd4e5913
commit d31f652fc8
3 changed files with 130 additions and 45 deletions

View File

@ -207,6 +207,8 @@ void ARMv5::Reset()
ILCurrReg = 16; ILCurrReg = 16;
ILPrevReg = 16; ILPrevReg = 16;
ICacheFillPtr = 7;
WBWritePointer = 16; WBWritePointer = 16;
WBFillPointer = 0; WBFillPointer = 0;
WBDelay = 0; WBDelay = 0;
@ -320,30 +322,29 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
//if (addr == 0x0201764C) printf("capture test %d: R1=%08X\n", R[6], R[1]); //if (addr == 0x0201764C) printf("capture test %d: R1=%08X\n", R[6], R[1]);
//if (addr == 0x020175D8) printf("capture test %d: res=%08X\n", R[6], R[0]); //if (addr == 0x020175D8) printf("capture test %d: res=%08X\n", R[6], R[0]);
u32 oldregion = R[15] >> 24; if (ICacheFillPtr != 7)
u32 newregion = addr >> 24; {
u64 fillend = ICacheFillTimes[6] + 1;
if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend;
ICacheFillPtr = 7;
}
if (addr & 0x1) if (addr & 0x1)
{ {
addr &= ~0x1; addr &= ~0x1;
R[15] = addr+2; R[15] = addr+2;
if (newregion != oldregion) SetupCodeMem(addr);
// two-opcodes-at-once fetch // two-opcodes-at-once fetch
// doesn't matter if we put garbage in the MSbs there // doesn't matter if we put garbage in the MSbs there
if (addr & 0x2) if (addr & 0x2)
{ {
NextInstr[0] = CodeRead32(addr-2, true) >> 16; NextInstr[0] = CodeRead32(addr-2, true) >> 16;
Cycles += CodeCycles;
NextInstr[1] = CodeRead32(addr+2, false); NextInstr[1] = CodeRead32(addr+2, false);
Cycles += CodeCycles;
} }
else else
{ {
NextInstr[0] = CodeRead32(addr, true); NextInstr[0] = CodeRead32(addr, true);
NextInstr[1] = NextInstr[0] >> 16; NextInstr[1] = NextInstr[0] >> 16;
Cycles += CodeCycles;
} }
CPSR |= 0x20; CPSR |= 0x20;
@ -352,13 +353,8 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
{ {
addr &= ~0x3; addr &= ~0x3;
R[15] = addr+4; R[15] = addr+4;
if (newregion != oldregion) SetupCodeMem(addr);
NextInstr[0] = CodeRead32(addr, true); NextInstr[0] = CodeRead32(addr, true);
Cycles += CodeCycles;
NextInstr[1] = CodeRead32(addr+4, false); NextInstr[1] = CodeRead32(addr+4, false);
Cycles += CodeCycles;
CPSR &= ~0x20; CPSR &= ~0x20;
} }

View File

@ -673,6 +673,11 @@ public:
u64 ILCurrTime; u64 ILCurrTime;
u64 ILPrevTime; u64 ILPrevTime;
u8 ICacheFillPtr;
u8 DCacheFillPtr;
u64 ICacheFillTimes[7];
u64 DCacheFillTimes[7];
u8 WBWritePointer; // which entry to attempt to write next; should always be ANDed with 0xF after incrementing u8 WBWritePointer; // which entry to attempt to write next; should always be ANDed with 0xF after incrementing
u8 WBFillPointer; // where the next entry should be added; should always be ANDed with 0xF after incrementing u8 WBFillPointer; // where the next entry should be added; should always be ANDed with 0xF after incrementing
bool WBWriting; // whether the buffer is actively trying to perform a write bool WBWriting; // whether the buffer is actively trying to perform a write

View File

@ -418,16 +418,29 @@ u32 ARMv5::ICacheLookup(const u32 addr)
// retreive the data from memory, even if the data was cached // retreive the data from memory, even if the data was cached
// See arm946e-s Rev 1 technical manual, 2.3.15 "Register 15, test State Register") // See arm946e-s Rev 1 technical manual, 2.3.15 "Register 15, test State Register")
WriteBufferDrain(); WriteBufferDrain();
CodeCycles = NDS.ARM9MemTimings[tag >> 14][2]; CodeCycles = NDS.ARM9MemTimings[tag >> 14][2];
if (CodeMem.Mem)
{
return *(u32*)&CodeMem.Mem[(addr & CodeMem.Mask) & ~3];
} else
{ {
return NDS.ARM9Read32(addr & ~3); return NDS.ARM9Read32(addr & ~3);
} }
} }
NDS.ARM9Timestamp += 1;
if (ICacheFillPtr == 7) NDS.ARM9Timestamp++;
else
{
u64 nextfill = ICacheFillTimes[ICacheFillPtr++];
if (NDS.ARM9Timestamp < nextfill)
{
NDS.ARM9Timestamp = nextfill;
}
else
{
u64 fillend = ICacheFillTimes[6] + 2;
if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend;
else NDS.ARM9Timestamp++;
ICacheFillPtr = 7;
}
}
if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual;
DataRegion = Mem9_Null; DataRegion = Mem9_Null;
Store = false; Store = false;
@ -442,11 +455,7 @@ u32 ARMv5::ICacheLookup(const u32 addr)
if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_ICACHE_LINEFILL) [[unlikely]] if (CP15BISTTestStateRegister & CP15_BIST_TR_DISABLE_ICACHE_LINEFILL) [[unlikely]]
{ {
WriteBufferDrain(); WriteBufferDrain();
CodeCycles = NDS.ARM9MemTimings[tag >> 14][2]; CodeCycles = NDS.ARM9MemTimings[tag >> 14][2];
if (CodeMem.Mem)
{
return *(u32*)&CodeMem.Mem[(addr & CodeMem.Mask) & ~3];
} else
{ {
return NDS.ARM9Read32(addr & ~3); return NDS.ARM9Read32(addr & ~3);
} }
@ -481,14 +490,17 @@ u32 ARMv5::ICacheLookup(const u32 addr)
line += id; line += id;
u32* ptr = (u32 *)&ICache[line << ICACHE_LINELENGTH_LOG2]; u32* ptr = (u32 *)&ICache[line << ICACHE_LINELENGTH_LOG2];
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
WriteBufferDrain(); WriteBufferDrain();
if (CodeMem.Mem)
{
memcpy(ptr, &CodeMem.Mem[tag & CodeMem.Mask], ICACHE_LINELENGTH);
}
else
{ {
for (int i = 0; i < ICACHE_LINELENGTH; i+=sizeof(u32)) for (int i = 0; i < ICACHE_LINELENGTH; i+=sizeof(u32))
ptr[i >> 2] = NDS.ARM9Read32(tag+i); ptr[i >> 2] = NDS.ARM9Read32(tag+i);
@ -509,10 +521,23 @@ u32 ARMv5::ICacheLookup(const u32 addr)
else if (NDS.ARM9Regions[addr>>14] == DataRegion && Store) NDS.ARM9Timestamp += (1<<NDS.ARM9ClockShift); else if (NDS.ARM9Regions[addr>>14] == DataRegion && Store) NDS.ARM9Timestamp += (1<<NDS.ARM9ClockShift);
Store = false; Store = false;
CodeCycles = ((NDS.ARM9MemTimings[tag >> 14][2] + (NDS.ARM9MemTimings[tag >> 14][3] * ((DCACHE_LINELENGTH / 4) - 1)) - 1) << NDS.ARM9ClockShift) + 1; u8 ns = MemTimings[addr>>14][1];
NDS.ARM9Timestamp += CodeCycles; u8 seq = MemTimings[addr>>14][2] + 1;
u8 linepos = (addr & 0x1F) >> 2; // technically this is one too low, but we want that actually
u32 cycles = ns + (seq * linepos);
NDS.ARM9Timestamp = cycles += NDS.ARM9Timestamp;
if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual;
ICacheFillPtr = linepos;
for (int i = linepos; i < 7; i++)
{
cycles += seq;
ICacheFillTimes[i] = cycles;
}
if ((addr >> 24) == 0x02) MainRAMTimestamp = ICacheFillTimes[6];
DataRegion = Mem9_Null; DataRegion = Mem9_Null;
return ptr[(addr & (ICACHE_LINELENGTH-1)) >> 2]; return ptr[(addr & (ICACHE_LINELENGTH-1)) >> 2];
} }
@ -1876,10 +1901,8 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
// prefetch abort // prefetch abort
// the actual exception is not raised until the aborted instruction is executed // the actual exception is not raised until the aborted instruction is executed
if (!(PU_Map[addr>>12] & 0x04)) [[unlikely]] if (!(PU_Map[addr>>12] & 0x04)) [[unlikely]]
{ {
CodeCycles = 1; NDS.ARM9Timestamp += 1;
NDS.ARM9Timestamp += CodeCycles;
if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual;
DataRegion = Mem9_Null; DataRegion = Mem9_Null;
Store = false; Store = false;
@ -1888,10 +1911,8 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
if (addr < ITCMSize) if (addr < ITCMSize)
{ {
CodeCycles = 1;
if (NDS.ARM9Timestamp < ITCMTimestamp) NDS.ARM9Timestamp = ITCMTimestamp; if (NDS.ARM9Timestamp < ITCMTimestamp) NDS.ARM9Timestamp = ITCMTimestamp;
NDS.ARM9Timestamp += CodeCycles; NDS.ARM9Timestamp += 1;
if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual;
DataRegion = Mem9_Null; DataRegion = Mem9_Null;
Store = false; Store = false;
@ -1910,7 +1931,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
#endif #endif
} }
CodeCycles = MemTimings[addr >> 14][1]; u8 cycles = MemTimings[addr >> 14][1];
if (PU_Map[addr>>12] & 0x30) if (PU_Map[addr>>12] & 0x30)
WriteBufferDrain(); WriteBufferDrain();
@ -1920,7 +1941,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
if ((addr >> 24) == 0x02) if ((addr >> 24) == 0x02)
{ {
if (NDS.ARM9Timestamp < MainRAMTimestamp) NDS.ARM9Timestamp = MainRAMTimestamp + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1); if (NDS.ARM9Timestamp < MainRAMTimestamp) NDS.ARM9Timestamp = MainRAMTimestamp + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1);
NDS.ARM9Timestamp += CodeCycles; NDS.ARM9Timestamp += cycles;
if (NDS.ARM9ClockShift == 2) if (NDS.ARM9ClockShift == 2)
{ {
MainRAMTimestamp = NDS.ARM9Timestamp; MainRAMTimestamp = NDS.ARM9Timestamp;
@ -1930,14 +1951,13 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
else else
{ {
if (NDS.ARM9Regions[addr>>14] == DataRegion && Store) NDS.ARM9Timestamp += (1<<NDS.ARM9ClockShift); if (NDS.ARM9Regions[addr>>14] == DataRegion && Store) NDS.ARM9Timestamp += (1<<NDS.ARM9ClockShift);
NDS.ARM9Timestamp += CodeCycles; NDS.ARM9Timestamp += cycles;
} }
Store = false; Store = false;
if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual; if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual;
DataRegion = Mem9_Null; DataRegion = Mem9_Null;
if (CodeMem.Mem) return *(u32*)&CodeMem.Mem[addr & CodeMem.Mask];
return BusRead32(addr); return BusRead32(addr);
} }
@ -1967,7 +1987,7 @@ bool ARMv5::DataRead8(u32 addr, u32* val)
*val = *(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)]; *val = *(u8*)&DTCM[addr & (DTCMPhysicalSize - 1)];
return true; return true;
} }
#if !DISABLE_DCACHE #if !DISABLE_DCACHE
#ifdef JIT_ENABLED #ifdef JIT_ENABLED
//if (!NDS.IsJITEnabled()) //if (!NDS.IsJITEnabled())
@ -1982,6 +2002,14 @@ bool ARMv5::DataRead8(u32 addr, u32* val)
} }
#endif #endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (PU_Map[addr>>12] & 0x30) if (PU_Map[addr>>12] & 0x30)
WriteBufferDrain(); WriteBufferDrain();
@ -2044,6 +2072,14 @@ bool ARMv5::DataRead16(u32 addr, u32* val)
} }
#endif #endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (PU_Map[addr>>12] & 0x30) if (PU_Map[addr>>12] & 0x30)
WriteBufferDrain(); WriteBufferDrain();
@ -2106,6 +2142,14 @@ bool ARMv5::DataRead32(u32 addr, u32* val)
} }
#endif #endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (PU_Map[addr>>12] & 0x30) if (PU_Map[addr>>12] & 0x30)
WriteBufferDrain(); WriteBufferDrain();
@ -2170,14 +2214,22 @@ bool ARMv5::DataRead32S(u32 addr, u32* val)
} }
#endif #endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (PU_Map[addr>>12] & 0x30) if (PU_Map[addr>>12] & 0x30)
WriteBufferDrain(); WriteBufferDrain();
NDS.ARM9Timestamp += DataCycles; NDS.ARM9Timestamp += DataCycles;
// bursts cannot cross a 1kb boundary // bursts cannot cross a 1kb boundary
if (addr & 0x3FF) DataCycles = MemTimings[addr >> 12][3]; //s if (addr & 0x3FF) DataCycles = MemTimings[addr >> 12][2]; //s
else DataCycles = MemTimings[addr >> 12][2]; // ns else DataCycles = MemTimings[addr >> 12][1]; // ns
DataCycles += (((NDS.ARM9Timestamp + DataCycles) + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1))) - (NDS.ARM9Timestamp + DataCycles); DataCycles += (((NDS.ARM9Timestamp + DataCycles) + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1))) - (NDS.ARM9Timestamp + DataCycles);
@ -2235,6 +2287,14 @@ bool ARMv5::DataWrite8(u32 addr, u8 val)
} }
} }
#endif #endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (!(PU_Map[addr>>12] & (0x30))) if (!(PU_Map[addr>>12] & (0x30)))
{ {
@ -2314,6 +2374,14 @@ bool ARMv5::DataWrite16(u32 addr, u16 val)
} }
} }
#endif #endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (!(PU_Map[addr>>12] & 0x30)) if (!(PU_Map[addr>>12] & 0x30))
{ {
@ -2394,6 +2462,14 @@ bool ARMv5::DataWrite32(u32 addr, u32 val)
} }
} }
#endif #endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (!(PU_Map[addr>>12] & 0x30)) if (!(PU_Map[addr>>12] & 0x30))
{ {
@ -2473,6 +2549,14 @@ bool ARMv5::DataWrite32S(u32 addr, u32 val)
} }
} }
#endif #endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (!(PU_Map[addr>>12] & 0x30)) if (!(PU_Map[addr>>12] & 0x30))
{ {