From aa523449350546619b30ae9fc9bccab8ff93a580 Mon Sep 17 00:00:00 2001 From: Ty Lamontagne Date: Thu, 19 Dec 2024 16:09:35 -0500 Subject: [PATCH] R5900: Improve the EE cache performance --- pcsx2/COP0.cpp | 112 ++++++++++++++++++++++++------------------------ pcsx2/R5900.cpp | 4 ++ pcsx2/R5900.h | 63 +++++++++++++++++++++++---- pcsx2/vtlb.cpp | 15 ++++--- 4 files changed, 123 insertions(+), 71 deletions(-) diff --git a/pcsx2/COP0.cpp b/pcsx2/COP0.cpp index 60cb09371a..f346f10117 100644 --- a/pcsx2/COP0.cpp +++ b/pcsx2/COP0.cpp @@ -230,49 +230,49 @@ void MapTLB(const tlbs& t, int i) u32 saddr, eaddr; COP0_LOG("MAP TLB %d: 0x%08X-> [0x%08X 0x%08X] S=%d G=%d ASID=%d Mask=0x%03X EntryLo0 PFN=%x EntryLo0 Cache=%x EntryLo1 PFN=%x EntryLo1 Cache=%x VPN2=%x", - i, t.VPN2, t.PFN0, t.PFN1, t.S >> 31, t.G, t.ASID, - t.Mask, t.EntryLo0 >> 6, (t.EntryLo0 & 0x38) >> 3, t.EntryLo1 >> 6, (t.EntryLo1 & 0x38) >> 3, t.VPN2); + i, t.VPN2(), t.PFN0(), t.PFN1(), t.isSPR() >> 31, t.isGlobal(), t.EntryHi.ASID, + t.Mask(), t.EntryLo0.PFN, t.EntryLo0.C, t.EntryLo1.PFN, t.EntryLo1.C, t.VPN2()); // According to the manual // 'It [SPR] must be mapped into a contiguous 16 KB of virtual address space that is // aligned on a 16KB boundary.Results are not guaranteed if this restriction is not followed.' // Assume that the game isn't doing anything less-than-ideal with the scratchpad mapping and map it directly to eeMem->Scratch. - if (t.S) + if (t.isSPR()) { - if (t.VPN2 != 0x70000000) - Console.Warning("COP0: Mapping Scratchpad to non-default address 0x%08X", t.VPN2); + if (t.VPN2() != 0x70000000) + Console.Warning("COP0: Mapping Scratchpad to non-default address 0x%08X", t.VPN2()); - vtlb_VMapBuffer(t.VPN2, eeMem->Scratch, Ps2MemSize::Scratch); + vtlb_VMapBuffer(t.VPN2(), eeMem->Scratch, Ps2MemSize::Scratch); } else { - if (t.EntryLo0 & 0x2) + if (t.EntryLo0.V) { - mask = ((~t.Mask) << 1) & 0xfffff; - saddr = t.VPN2 >> 12; - eaddr = saddr + t.Mask + 1; + mask = ((~t.Mask()) << 1) & 0xfffff; + saddr = t.VPN2() >> 12; + eaddr = saddr + t.Mask() + 1; for (addr = saddr; addr < eaddr; addr++) { - if ((addr & mask) == ((t.VPN2 >> 12) & mask)) + if ((addr & mask) == ((t.VPN2() >> 12) & mask)) { //match - memSetPageAddr(addr << 12, t.PFN0 + ((addr - saddr) << 12)); + memSetPageAddr(addr << 12, t.PFN0() + ((addr - saddr) << 12)); Cpu->Clear(addr << 12, 0x400); } } } - if (t.EntryLo1 & 0x2) + if (t.EntryLo1.V) { - mask = ((~t.Mask) << 1) & 0xfffff; - saddr = (t.VPN2 >> 12) + t.Mask + 1; - eaddr = saddr + t.Mask + 1; + mask = ((~t.Mask()) << 1) & 0xfffff; + saddr = (t.VPN2() >> 12) + t.Mask() + 1; + eaddr = saddr + t.Mask() + 1; for (addr = saddr; addr < eaddr; addr++) { - if ((addr & mask) == ((t.VPN2 >> 12) & mask)) + if ((addr & mask) == ((t.VPN2() >> 12) & mask)) { //match - memSetPageAddr(addr << 12, t.PFN1 + ((addr - saddr) << 12)); + memSetPageAddr(addr << 12, t.PFN1() + ((addr - saddr) << 12)); Cpu->Clear(addr << 12, 0x400); } } @@ -286,21 +286,21 @@ void UnmapTLB(const tlbs& t, int i) u32 mask, addr; u32 saddr, eaddr; - if (t.S) + if (t.isSPR()) { - vtlb_VMapUnmap(t.VPN2, 0x4000); + vtlb_VMapUnmap(t.VPN2(), 0x4000); return; } - if (t.EntryLo0 & 0x2) + if (t.EntryLo0.V) { - mask = ((~t.Mask) << 1) & 0xfffff; - saddr = t.VPN2 >> 12; - eaddr = saddr + t.Mask + 1; + mask = ((~t.Mask()) << 1) & 0xfffff; + saddr = t.VPN2() >> 12; + eaddr = saddr + t.Mask() + 1; // Console.WriteLn("Clear TLB: %08x ~ %08x",saddr,eaddr-1); for (addr = saddr; addr < eaddr; addr++) { - if ((addr & mask) == ((t.VPN2 >> 12) & mask)) + if ((addr & mask) == ((t.VPN2() >> 12) & mask)) { //match memClearPageAddr(addr << 12); Cpu->Clear(addr << 12, 0x400); @@ -308,38 +308,34 @@ void UnmapTLB(const tlbs& t, int i) } } - if (t.EntryLo1 & 0x2) + if (t.EntryLo1.V) { - mask = ((~t.Mask) << 1) & 0xfffff; - saddr = (t.VPN2 >> 12) + t.Mask + 1; - eaddr = saddr + t.Mask + 1; + mask = ((~t.Mask()) << 1) & 0xfffff; + saddr = (t.VPN2() >> 12) + t.Mask() + 1; + eaddr = saddr + t.Mask() + 1; // Console.WriteLn("Clear TLB: %08x ~ %08x",saddr,eaddr-1); for (addr = saddr; addr < eaddr; addr++) { - if ((addr & mask) == ((t.VPN2 >> 12) & mask)) + if ((addr & mask) == ((t.VPN2() >> 12) & mask)) { //match memClearPageAddr(addr << 12); Cpu->Clear(addr << 12, 0x400); } } } + + cachedTlbs.erase(std::remove(cachedTlbs.begin(), cachedTlbs.end(), &t), cachedTlbs.end()); } void WriteTLB(int i) { - tlb[i].PageMask = cpuRegs.CP0.n.PageMask; - tlb[i].EntryHi = cpuRegs.CP0.n.EntryHi; - tlb[i].EntryLo0 = cpuRegs.CP0.n.EntryLo0; - tlb[i].EntryLo1 = cpuRegs.CP0.n.EntryLo1; + tlb[i].PageMask.UL = cpuRegs.CP0.n.PageMask; + tlb[i].EntryHi.UL = cpuRegs.CP0.n.EntryHi; + tlb[i].EntryLo0.UL = cpuRegs.CP0.n.EntryLo0; + tlb[i].EntryLo1.UL = cpuRegs.CP0.n.EntryLo1; - tlb[i].Mask = (cpuRegs.CP0.n.PageMask >> 13) & 0xfff; - tlb[i].nMask = (~tlb[i].Mask) & 0xfff; - tlb[i].VPN2 = ((cpuRegs.CP0.n.EntryHi >> 13) & (~tlb[i].Mask)) << 13; - tlb[i].ASID = cpuRegs.CP0.n.EntryHi & 0xfff; - tlb[i].G = cpuRegs.CP0.n.EntryLo0 & cpuRegs.CP0.n.EntryLo1 & 0x1; - tlb[i].PFN0 = (((cpuRegs.CP0.n.EntryLo0 >> 6) & 0xFFFFF) & (~tlb[i].Mask)) << 12; - tlb[i].PFN1 = (((cpuRegs.CP0.n.EntryLo1 >> 6) & 0xFFFFF) & (~tlb[i].Mask)) << 12; - tlb[i].S = cpuRegs.CP0.n.EntryLo0 & 0x80000000; + if (!tlb[i].isSPR() && ((tlb[i].EntryLo0.V && tlb[i].EntryLo0.isCached()) || (tlb[i].EntryLo1.V && tlb[i].EntryLo1.isCached()))) + cachedTlbs.push_back(&tlb[i]); MapTLB(tlb[i], i); } @@ -357,10 +353,16 @@ namespace COP0 { int i = cpuRegs.CP0.n.Index & 0x3f; - cpuRegs.CP0.n.PageMask = tlb[i].PageMask; - cpuRegs.CP0.n.EntryHi = tlb[i].EntryHi & ~(tlb[i].PageMask | 0x1f00); - cpuRegs.CP0.n.EntryLo0 = (tlb[i].EntryLo0 & ~1) | ((tlb[i].EntryHi >> 12) & 1); - cpuRegs.CP0.n.EntryLo1 = (tlb[i].EntryLo1 & ~1) | ((tlb[i].EntryHi >> 12) & 1); + cpuRegs.CP0.n.PageMask = tlb[i].PageMask.UL; + cpuRegs.CP0.n.EntryHi = tlb[i].EntryHi.UL & ~(tlb[i].PageMask.UL | 0x1f00); + /* + * TEST THIS?? + cpuRegs.CP0.n.EntryLo0 = (tlb[i].EntryLo0 & ~1) | ((tlb[i].EntryHi.UL >> 12) & 1); + cpuRegs.CP0.n.EntryLo1 = (tlb[i].EntryLo1 & ~1) | ((tlb[i].EntryHi.UL >> 12) & 1); + */ + cpuRegs.CP0.n.EntryLo0 = tlb[i].EntryLo0.UL; + cpuRegs.CP0.n.EntryLo1 = tlb[i].EntryLo1.UL; + } void TLBWI() @@ -374,10 +376,10 @@ namespace COP0 { cpuRegs.CP0.n.EntryLo0, cpuRegs.CP0.n.EntryLo1); UnmapTLB(tlb[j], j); - tlb[j].PageMask = cpuRegs.CP0.n.PageMask; - tlb[j].EntryHi = cpuRegs.CP0.n.EntryHi; - tlb[j].EntryLo0 = cpuRegs.CP0.n.EntryLo0; - tlb[j].EntryLo1 = cpuRegs.CP0.n.EntryLo1; + tlb[j].PageMask.UL = cpuRegs.CP0.n.PageMask; + tlb[j].EntryHi.UL = cpuRegs.CP0.n.EntryHi; + tlb[j].EntryLo0.UL = cpuRegs.CP0.n.EntryLo0; + tlb[j].EntryLo1.UL = cpuRegs.CP0.n.EntryLo1; WriteTLB(j); } @@ -394,10 +396,10 @@ namespace COP0 { //if (j > 48) return; UnmapTLB(tlb[j], j); - tlb[j].PageMask = cpuRegs.CP0.n.PageMask; - tlb[j].EntryHi = cpuRegs.CP0.n.EntryHi; - tlb[j].EntryLo0 = cpuRegs.CP0.n.EntryLo0; - tlb[j].EntryLo1 = cpuRegs.CP0.n.EntryLo1; + tlb[j].PageMask.UL = cpuRegs.CP0.n.PageMask; + tlb[j].EntryHi.UL = cpuRegs.CP0.n.EntryHi; + tlb[j].EntryLo0.UL = cpuRegs.CP0.n.EntryLo0; + tlb[j].EntryLo1.UL = cpuRegs.CP0.n.EntryLo1; WriteTLB(j); } @@ -422,7 +424,7 @@ namespace COP0 { cpuRegs.CP0.n.Index = 0xFFFFFFFF; for (i = 0; i < 48; i++) { - if (tlb[i].VPN2 == ((~tlb[i].Mask) & (EntryHi32.s.VPN2)) && ((tlb[i].G & 1) || ((tlb[i].ASID & 0xff) == EntryHi32.s.ASID))) + if (tlb[i].VPN2() == ((~tlb[i].Mask()) & (EntryHi32.s.VPN2)) && ((tlb[i].isGlobal()) || ((tlb[i].EntryHi.ASID & 0xff) == EntryHi32.s.ASID))) { cpuRegs.CP0.n.Index = i; break; diff --git a/pcsx2/R5900.cpp b/pcsx2/R5900.cpp index da3e80f055..61dc588e70 100644 --- a/pcsx2/R5900.cpp +++ b/pcsx2/R5900.cpp @@ -36,6 +36,8 @@ u32 EEoCycle; alignas(16) cpuRegistersPack _cpuRegistersPack; alignas(16) tlbs tlb[48]; +std::vector cachedTlbs; + R5900cpu *Cpu = NULL; static constexpr uint eeWaitCycles = 3072; @@ -59,6 +61,8 @@ void cpuReset() std::memset(&cpuRegs, 0, sizeof(cpuRegs)); std::memset(&fpuRegs, 0, sizeof(fpuRegs)); std::memset(&tlb, 0, sizeof(tlb)); + cachedTlbs.clear(); + cachedTlbs.reserve(48); cpuRegs.pc = 0xbfc00000; //set pc reg to stack cpuRegs.CP0.n.Config = 0x440; diff --git a/pcsx2/R5900.h b/pcsx2/R5900.h index 164f3171ae..bf7f156701 100644 --- a/pcsx2/R5900.h +++ b/pcsx2/R5900.h @@ -160,17 +160,61 @@ struct fpuRegisters { u32 ACCflag; // an internal accumulator overflow flag }; +union PageMask_t +{ + struct + { + u32 : 13; + u32 Mask : 12; + u32 : 7; + }; + u32 UL; + + constexpr u32 nMask() const { return ~Mask & 0xfff; }; +}; + +union EntryHi_t +{ + struct + { + u32 ASID:8; + u32 : 5; + u32 VPN2:19; + }; + u32 UL; +}; + +union EntryLo_t +{ + struct + { + u32 G:1; + u32 V:1; + u32 D:1; + u32 C:3; + u32 PFN:20; + u32 : 5; + u32 S : 1; // Only used in EntryLo0 + }; + u32 UL; + + constexpr bool isCached() const { return C == 0x3; } +}; + struct tlbs { - u32 PageMask,EntryHi; - u32 EntryLo0,EntryLo1; - u32 Mask, nMask; - u32 G; - u32 ASID; - u32 VPN2; - u32 PFN0; - u32 PFN1; - u32 S; + PageMask_t PageMask; + EntryHi_t EntryHi; + EntryLo_t EntryLo0; + EntryLo_t EntryLo1; + + // (((cpuRegs.CP0.n.EntryLo0 >> 6) & 0xFFFFF) & (~tlb[i].Mask())) << 12; + constexpr u32 PFN0() const { return (EntryLo0.PFN & ~Mask()) << 12; } + constexpr u32 PFN1() const { return (EntryLo1.PFN & ~Mask()) << 12; } + constexpr u32 VPN2() const {return ((EntryHi.VPN2) & (~Mask())) << 13; } + constexpr u32 Mask() const { return PageMask.Mask; } + constexpr bool isGlobal() const { return EntryLo0.G && EntryLo1.G; } + constexpr bool isSPR() const { return EntryLo0.S; } }; #ifndef _PC_ @@ -210,6 +254,7 @@ struct cpuRegistersPack alignas(16) extern cpuRegistersPack _cpuRegistersPack; alignas(16) extern tlbs tlb[48]; +extern std::vector cachedTlbs; static cpuRegisters& cpuRegs = _cpuRegistersPack.cpuRegs; static fpuRegisters& fpuRegs = _cpuRegistersPack.fpuRegs; diff --git a/pcsx2/vtlb.cpp b/pcsx2/vtlb.cpp index 6934639d4c..e4d8959ff7 100644 --- a/pcsx2/vtlb.cpp +++ b/pcsx2/vtlb.cpp @@ -128,21 +128,22 @@ __inline int CheckCache(u32 addr) return false; // } - for (int i = 1; i < 48; i++) + for (int i = 0; i < cachedTlbs.size(); i++) { - if (((tlb[i].EntryLo1 & 0x38) >> 3) == 0x3) + const auto& entry = cachedTlbs[i]; + if (entry->EntryLo1.isCached()) { - mask = ConvertPageMask(tlb[i].PageMask); - if ((addr >= tlb[i].PFN1) && (addr <= tlb[i].PFN1 + mask)) + mask = ConvertPageMask(entry->PageMask.UL); + if ((addr >= entry->PFN1()) && (addr <= entry->PFN1() + mask)) { //DevCon.Warning("Yay! Cache check cache addr=%x, mask=%x, addr+mask=%x, VPN2=%x PFN0=%x", addr, mask, (addr & mask), tlb[i].VPN2, tlb[i].PFN0); return true; } } - if (((tlb[i].EntryLo0 & 0x38) >> 3) == 0x3) + if (entry->EntryLo0.isCached()) { - mask = ConvertPageMask(tlb[i].PageMask); - if ((addr >= tlb[i].PFN0) && (addr <= tlb[i].PFN0 + mask)) + mask = ConvertPageMask(entry->PageMask.UL); + if ((addr >= entry->PFN0()) && (addr <= entry->PFN0() + mask)) { //DevCon.Warning("Yay! Cache check cache addr=%x, mask=%x, addr+mask=%x, VPN2=%x PFN0=%x", addr, mask, (addr & mask), tlb[i].VPN2, tlb[i].PFN0); return true;