implement icache streaming

This commit is contained in:
Jaklyy 2024-10-25 18:15:54 -04:00
parent 54dd4e5913
commit d31f652fc8
3 changed files with 130 additions and 45 deletions

View File

@ -207,6 +207,8 @@ void ARMv5::Reset()
ILCurrReg = 16;
ILPrevReg = 16;
ICacheFillPtr = 7;
WBWritePointer = 16;
WBFillPointer = 0;
WBDelay = 0;
@ -320,30 +322,29 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
//if (addr == 0x0201764C) printf("capture test %d: R1=%08X\n", R[6], R[1]);
//if (addr == 0x020175D8) printf("capture test %d: res=%08X\n", R[6], R[0]);
u32 oldregion = R[15] >> 24;
u32 newregion = addr >> 24;
if (ICacheFillPtr != 7)
{
u64 fillend = ICacheFillTimes[6] + 1;
if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend;
ICacheFillPtr = 7;
}
if (addr & 0x1)
{
addr &= ~0x1;
R[15] = addr+2;
if (newregion != oldregion) SetupCodeMem(addr);
// two-opcodes-at-once fetch
// doesn't matter if we put garbage in the MSbs there
if (addr & 0x2)
{
NextInstr[0] = CodeRead32(addr-2, true) >> 16;
Cycles += CodeCycles;
NextInstr[1] = CodeRead32(addr+2, false);
Cycles += CodeCycles;
}
else
{
NextInstr[0] = CodeRead32(addr, true);
NextInstr[1] = NextInstr[0] >> 16;
Cycles += CodeCycles;
}
CPSR |= 0x20;
@ -352,13 +353,8 @@ void ARMv5::JumpTo(u32 addr, bool restorecpsr)
{
addr &= ~0x3;
R[15] = addr+4;
if (newregion != oldregion) SetupCodeMem(addr);
NextInstr[0] = CodeRead32(addr, true);
Cycles += CodeCycles;
NextInstr[1] = CodeRead32(addr+4, false);
Cycles += CodeCycles;
CPSR &= ~0x20;
}

View File

@ -673,6 +673,11 @@ public:
u64 ILCurrTime;
u64 ILPrevTime;
u8 ICacheFillPtr;
u8 DCacheFillPtr;
u64 ICacheFillTimes[7];
u64 DCacheFillTimes[7];
u8 WBWritePointer; // which entry to attempt to write next; should always be ANDed with 0xF after incrementing
u8 WBFillPointer; // where the next entry should be added; should always be ANDed with 0xF after incrementing
bool WBWriting; // whether the buffer is actively trying to perform a write

View File

@ -419,15 +419,28 @@ u32 ARMv5::ICacheLookup(const u32 addr)
// See arm946e-s Rev 1 technical manual, 2.3.15 "Register 15, test State Register")
WriteBufferDrain();
CodeCycles = NDS.ARM9MemTimings[tag >> 14][2];
if (CodeMem.Mem)
{
return *(u32*)&CodeMem.Mem[(addr & CodeMem.Mask) & ~3];
} else
{
return NDS.ARM9Read32(addr & ~3);
}
}
NDS.ARM9Timestamp += 1;
if (ICacheFillPtr == 7) NDS.ARM9Timestamp++;
else
{
u64 nextfill = ICacheFillTimes[ICacheFillPtr++];
if (NDS.ARM9Timestamp < nextfill)
{
NDS.ARM9Timestamp = nextfill;
}
else
{
u64 fillend = ICacheFillTimes[6] + 2;
if (NDS.ARM9Timestamp < fillend) NDS.ARM9Timestamp = fillend;
else NDS.ARM9Timestamp++;
ICacheFillPtr = 7;
}
}
if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual;
DataRegion = Mem9_Null;
Store = false;
@ -443,10 +456,6 @@ u32 ARMv5::ICacheLookup(const u32 addr)
{
WriteBufferDrain();
CodeCycles = NDS.ARM9MemTimings[tag >> 14][2];
if (CodeMem.Mem)
{
return *(u32*)&CodeMem.Mem[(addr & CodeMem.Mask) & ~3];
} else
{
return NDS.ARM9Read32(addr & ~3);
}
@ -482,13 +491,16 @@ u32 ARMv5::ICacheLookup(const u32 addr)
u32* ptr = (u32 *)&ICache[line << ICACHE_LINELENGTH_LOG2];
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
WriteBufferDrain();
if (CodeMem.Mem)
{
memcpy(ptr, &CodeMem.Mem[tag & CodeMem.Mask], ICACHE_LINELENGTH);
}
else
{
for (int i = 0; i < ICACHE_LINELENGTH; i+=sizeof(u32))
ptr[i >> 2] = NDS.ARM9Read32(tag+i);
@ -509,10 +521,23 @@ u32 ARMv5::ICacheLookup(const u32 addr)
else if (NDS.ARM9Regions[addr>>14] == DataRegion && Store) NDS.ARM9Timestamp += (1<<NDS.ARM9ClockShift);
Store = false;
CodeCycles = ((NDS.ARM9MemTimings[tag >> 14][2] + (NDS.ARM9MemTimings[tag >> 14][3] * ((DCACHE_LINELENGTH / 4) - 1)) - 1) << NDS.ARM9ClockShift) + 1;
NDS.ARM9Timestamp += CodeCycles;
u8 ns = MemTimings[addr>>14][1];
u8 seq = MemTimings[addr>>14][2] + 1;
u8 linepos = (addr & 0x1F) >> 2; // technically this is one too low, but we want that actually
u32 cycles = ns + (seq * linepos);
NDS.ARM9Timestamp = cycles += NDS.ARM9Timestamp;
if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual;
ICacheFillPtr = linepos;
for (int i = linepos; i < 7; i++)
{
cycles += seq;
ICacheFillTimes[i] = cycles;
}
if ((addr >> 24) == 0x02) MainRAMTimestamp = ICacheFillTimes[6];
DataRegion = Mem9_Null;
return ptr[(addr & (ICACHE_LINELENGTH-1)) >> 2];
}
@ -1877,9 +1902,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
// the actual exception is not raised until the aborted instruction is executed
if (!(PU_Map[addr>>12] & 0x04)) [[unlikely]]
{
CodeCycles = 1;
NDS.ARM9Timestamp += CodeCycles;
NDS.ARM9Timestamp += 1;
if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual;
DataRegion = Mem9_Null;
Store = false;
@ -1888,10 +1911,8 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
if (addr < ITCMSize)
{
CodeCycles = 1;
if (NDS.ARM9Timestamp < ITCMTimestamp) NDS.ARM9Timestamp = ITCMTimestamp;
NDS.ARM9Timestamp += CodeCycles;
NDS.ARM9Timestamp += 1;
if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual;
DataRegion = Mem9_Null;
Store = false;
@ -1910,7 +1931,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
#endif
}
CodeCycles = MemTimings[addr >> 14][1];
u8 cycles = MemTimings[addr >> 14][1];
if (PU_Map[addr>>12] & 0x30)
WriteBufferDrain();
@ -1920,7 +1941,7 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
if ((addr >> 24) == 0x02)
{
if (NDS.ARM9Timestamp < MainRAMTimestamp) NDS.ARM9Timestamp = MainRAMTimestamp + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1);
NDS.ARM9Timestamp += CodeCycles;
NDS.ARM9Timestamp += cycles;
if (NDS.ARM9ClockShift == 2)
{
MainRAMTimestamp = NDS.ARM9Timestamp;
@ -1930,14 +1951,13 @@ u32 ARMv5::CodeRead32(u32 addr, bool branch)
else
{
if (NDS.ARM9Regions[addr>>14] == DataRegion && Store) NDS.ARM9Timestamp += (1<<NDS.ARM9ClockShift);
NDS.ARM9Timestamp += CodeCycles;
NDS.ARM9Timestamp += cycles;
}
Store = false;
if (NDS.ARM9Timestamp < TimestampActual) NDS.ARM9Timestamp = TimestampActual;
DataRegion = Mem9_Null;
if (CodeMem.Mem) return *(u32*)&CodeMem.Mem[addr & CodeMem.Mask];
return BusRead32(addr);
}
@ -1982,6 +2002,14 @@ bool ARMv5::DataRead8(u32 addr, u32* val)
}
#endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (PU_Map[addr>>12] & 0x30)
WriteBufferDrain();
@ -2044,6 +2072,14 @@ bool ARMv5::DataRead16(u32 addr, u32* val)
}
#endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (PU_Map[addr>>12] & 0x30)
WriteBufferDrain();
@ -2106,6 +2142,14 @@ bool ARMv5::DataRead32(u32 addr, u32* val)
}
#endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (PU_Map[addr>>12] & 0x30)
WriteBufferDrain();
@ -2170,14 +2214,22 @@ bool ARMv5::DataRead32S(u32 addr, u32* val)
}
#endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (PU_Map[addr>>12] & 0x30)
WriteBufferDrain();
NDS.ARM9Timestamp += DataCycles;
// bursts cannot cross a 1kb boundary
if (addr & 0x3FF) DataCycles = MemTimings[addr >> 12][3]; //s
else DataCycles = MemTimings[addr >> 12][2]; // ns
if (addr & 0x3FF) DataCycles = MemTimings[addr >> 12][2]; //s
else DataCycles = MemTimings[addr >> 12][1]; // ns
DataCycles += (((NDS.ARM9Timestamp + DataCycles) + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1))) - (NDS.ARM9Timestamp + DataCycles);
@ -2236,6 +2288,14 @@ bool ARMv5::DataWrite8(u32 addr, u8 val)
}
#endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (!(PU_Map[addr>>12] & (0x30)))
{
NDS.ARM9Timestamp = NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1);
@ -2315,6 +2375,14 @@ bool ARMv5::DataWrite16(u32 addr, u16 val)
}
#endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (!(PU_Map[addr>>12] & 0x30))
{
NDS.ARM9Timestamp = NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1);
@ -2395,6 +2463,14 @@ bool ARMv5::DataWrite32(u32 addr, u32 val)
}
#endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (!(PU_Map[addr>>12] & 0x30))
{
NDS.ARM9Timestamp = NDS.ARM9Timestamp + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1);
@ -2474,6 +2550,14 @@ bool ARMv5::DataWrite32S(u32 addr, u32 val)
}
#endif
// bus reads can only overlap with icache streaming by 6 cycles
// checkme: does cache trigger this?
if (ICacheFillPtr != 7)
{
u64 time = ICacheFillTimes[6] - 6; // checkme: minus 6?
if (NDS.ARM9Timestamp < time) NDS.ARM9Timestamp = time;
}
if (!(PU_Map[addr>>12] & 0x30))
{
DataCycles += (((NDS.ARM9Timestamp + DataCycles) + ((1<<NDS.ARM9ClockShift)-1) & ~((1<<NDS.ARM9ClockShift)-1))) - (NDS.ARM9Timestamp + DataCycles);