Merge pull request #1654 from skidau/Fix-TLB-Cache

TLB cache fixes
This commit is contained in:
skidau 2014-12-06 10:41:21 +11:00
commit 757077e6fb
6 changed files with 118 additions and 111 deletions

View File

@ -28,7 +28,7 @@ std::string PPCDebugInterface::Disassemble(unsigned int address)
if (!Memory::IsRAMAddress(address, true, true)) if (!Memory::IsRAMAddress(address, true, true))
{ {
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU || !((address & JIT_ICACHE_VMEM_BIT) && if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU || !((address & JIT_ICACHE_VMEM_BIT) &&
Memory::TranslateAddress(address, Memory::FLAG_OPCODE))) Memory::TranslateAddress(address, Memory::FLAG_NO_EXCEPTION)))
{ {
return "(No RAM here)"; return "(No RAM here)";
} }

View File

@ -704,49 +704,57 @@ void SDRUpdated()
} }
// TLB cache
#define TLB_SIZE 128
#define TLB_WAYS 2
#define NUM_TLBS 2
#define HW_PAGE_INDEX_SHIFT 12
#define HW_PAGE_INDEX_MASK 0x3f
#define HW_PAGE_TAG_SHIFT 18
#define TLB_FLAG_MOST_RECENT 0x01
#define TLB_FLAG_INVALID 0x02
struct tlb_entry
{
u32 tag;
u32 paddr;
u8 flags;
};
// TODO: tlb needs to be in ppcState for save-state purposes.
static tlb_entry tlb[NUM_TLBS][TLB_SIZE/TLB_WAYS][TLB_WAYS];
static u32 LookupTLBPageAddress(const XCheckTLBFlag _Flag, const u32 vpa, u32 *paddr) static u32 LookupTLBPageAddress(const XCheckTLBFlag _Flag, const u32 vpa, u32 *paddr)
{ {
tlb_entry *tlbe = tlb[_Flag == FLAG_OPCODE][(vpa>>HW_PAGE_INDEX_SHIFT)&HW_PAGE_INDEX_MASK]; PowerPC::tlb_entry *tlbe = PowerPC::ppcState.tlb[_Flag == FLAG_OPCODE][(vpa >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK];
if (tlbe[0].tag == (vpa & ~0xfff) && !(tlbe[0].flags & TLB_FLAG_INVALID)) if (tlbe[0].tag == (vpa & ~0xfff) && !(tlbe[0].flags & TLB_FLAG_INVALID))
{ {
// Check if C bit requires updating
if (_Flag == FLAG_WRITE)
{
UPTE2 PTE2;
PTE2.Hex = tlbe[0].pte;
if (PTE2.C == 0)
{
PTE2.C = 1;
tlbe[0].pte = PTE2.Hex;
return 0;
}
}
if (_Flag != FLAG_NO_EXCEPTION) if (_Flag != FLAG_NO_EXCEPTION)
{ {
tlbe[0].flags |= TLB_FLAG_MOST_RECENT; tlbe[0].flags |= TLB_FLAG_MOST_RECENT;
tlbe[1].flags &= ~TLB_FLAG_MOST_RECENT; tlbe[1].flags &= ~TLB_FLAG_MOST_RECENT;
} }
*paddr = tlbe[0].paddr | (vpa & 0xfff); *paddr = tlbe[0].paddr | (vpa & 0xfff);
return 1; return 1;
} }
if (tlbe[1].tag == (vpa & ~0xfff) && !(tlbe[1].flags & TLB_FLAG_INVALID)) if (tlbe[1].tag == (vpa & ~0xfff) && !(tlbe[1].flags & TLB_FLAG_INVALID))
{ {
// Check if C bit requires updating
if (_Flag == FLAG_WRITE)
{
UPTE2 PTE2;
PTE2.Hex = tlbe[1].pte;
if (PTE2.C == 0)
{
PTE2.C = 1;
tlbe[1].pte = PTE2.Hex;
return 0;
}
}
if (_Flag != FLAG_NO_EXCEPTION) if (_Flag != FLAG_NO_EXCEPTION)
{ {
tlbe[1].flags |= TLB_FLAG_MOST_RECENT; tlbe[1].flags |= TLB_FLAG_MOST_RECENT;
tlbe[0].flags &= ~TLB_FLAG_MOST_RECENT; tlbe[0].flags &= ~TLB_FLAG_MOST_RECENT;
} }
*paddr = tlbe[1].paddr | (vpa & 0xfff); *paddr = tlbe[1].paddr | (vpa & 0xfff);
return 1; return 1;
} }
return 0; return 0;
@ -757,12 +765,13 @@ static void UpdateTLBEntry(const XCheckTLBFlag _Flag, UPTE2 PTE2, const u32 vpa)
if (_Flag == FLAG_NO_EXCEPTION) if (_Flag == FLAG_NO_EXCEPTION)
return; return;
tlb_entry *tlbe = tlb[_Flag == FLAG_OPCODE][(vpa>>HW_PAGE_INDEX_SHIFT)&HW_PAGE_INDEX_MASK]; PowerPC::tlb_entry *tlbe = PowerPC::ppcState.tlb[_Flag == FLAG_OPCODE][(vpa >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK];
if ((tlbe[0].flags & TLB_FLAG_MOST_RECENT) == 0) if ((tlbe[0].flags & TLB_FLAG_MOST_RECENT) == 0 || (tlbe[0].flags & TLB_FLAG_INVALID))
{ {
tlbe[0].flags = TLB_FLAG_MOST_RECENT; tlbe[0].flags = TLB_FLAG_MOST_RECENT;
tlbe[1].flags &= ~TLB_FLAG_MOST_RECENT; tlbe[1].flags &= ~TLB_FLAG_MOST_RECENT;
tlbe[0].paddr = PTE2.RPN << HW_PAGE_INDEX_SHIFT; tlbe[0].paddr = PTE2.RPN << HW_PAGE_INDEX_SHIFT;
tlbe[0].pte = PTE2.Hex;
tlbe[0].tag = vpa & ~0xfff; tlbe[0].tag = vpa & ~0xfff;
} }
else else
@ -770,31 +779,20 @@ static void UpdateTLBEntry(const XCheckTLBFlag _Flag, UPTE2 PTE2, const u32 vpa)
tlbe[1].flags = TLB_FLAG_MOST_RECENT; tlbe[1].flags = TLB_FLAG_MOST_RECENT;
tlbe[0].flags &= ~TLB_FLAG_MOST_RECENT; tlbe[0].flags &= ~TLB_FLAG_MOST_RECENT;
tlbe[1].paddr = PTE2.RPN << HW_PAGE_INDEX_SHIFT; tlbe[1].paddr = PTE2.RPN << HW_PAGE_INDEX_SHIFT;
tlbe[1].pte = PTE2.Hex;
tlbe[1].tag = vpa & ~0xfff; tlbe[1].tag = vpa & ~0xfff;
} }
} }
void InvalidateTLBEntry(u32 vpa) void InvalidateTLBEntry(u32 vpa)
{ {
tlb_entry *tlbe = tlb[0][(vpa>>HW_PAGE_INDEX_SHIFT)&HW_PAGE_INDEX_MASK]; PowerPC::tlb_entry *tlbe = PowerPC::ppcState.tlb[0][(vpa >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK];
if (tlbe[0].tag == (vpa & ~0xfff))
{
tlbe[0].flags |= TLB_FLAG_INVALID; tlbe[0].flags |= TLB_FLAG_INVALID;
}
if (tlbe[1].tag == (vpa & ~0xfff))
{
tlbe[1].flags |= TLB_FLAG_INVALID; tlbe[1].flags |= TLB_FLAG_INVALID;
} PowerPC::tlb_entry *tlbe_i = PowerPC::ppcState.tlb[1][(vpa >> HW_PAGE_INDEX_SHIFT) & HW_PAGE_INDEX_MASK];
tlb_entry *tlbe_i = tlb[1][(vpa>>HW_PAGE_INDEX_SHIFT)&HW_PAGE_INDEX_MASK];
if (tlbe_i[0].tag == (vpa & ~0xfff))
{
tlbe_i[0].flags |= TLB_FLAG_INVALID; tlbe_i[0].flags |= TLB_FLAG_INVALID;
}
if (tlbe_i[1].tag == (vpa & ~0xfff))
{
tlbe_i[1].flags |= TLB_FLAG_INVALID; tlbe_i[1].flags |= TLB_FLAG_INVALID;
} }
}
// Page Address Translation // Page Address Translation
static u32 TranslatePageAddress(const u32 _Address, const XCheckTLBFlag _Flag) static u32 TranslatePageAddress(const u32 _Address, const XCheckTLBFlag _Flag)
@ -813,72 +811,59 @@ static u32 TranslatePageAddress(const u32 _Address, const XCheckTLBFlag _Flag)
// Direct access to the fastmem Arena // Direct access to the fastmem Arena
// FIXME: is this the best idea for clean code? // FIXME: is this the best idea for clean code?
u8* pRAM = Memory::base; u8* base = Memory::base;
// hash function no 1 "xor" .360 // hash function no 1 "xor" .360
u32 hash1 = (VSID ^ page_index); u32 hash = (VSID ^ page_index);
u32 pteg_addr = ((hash1 & PowerPC::ppcState.pagetable_hashmask) << 6) | PowerPC::ppcState.pagetable_base;
// hash1 for (int hash_func = 0; hash_func < 2; hash_func++)
for (int i = 0; i < 8; i++)
{ {
UPTE1 PTE1; if (hash_func == 1)
PTE1.Hex = bswap(*(u32*)&pRAM[pteg_addr]);
if (PTE1.V && !PTE1.H)
{ {
if (VSID == PTE1.VSID && (api == PTE1.API))
{
UPTE2 PTE2;
PTE2.Hex = bswap((*(u32*)&pRAM[(pteg_addr + 4)]));
UpdateTLBEntry(_Flag, PTE2, _Address);
// set the access bits
switch (_Flag)
{
case FLAG_READ: PTE2.R = 1; break;
case FLAG_WRITE: PTE2.C = 1; break;
case FLAG_NO_EXCEPTION: break;
case FLAG_OPCODE: break;
}
*(u32*)&pRAM[(pteg_addr + 4)] = bswap(PTE2.Hex);
return ((PTE2.RPN << 12) | offset);
}
}
pteg_addr+=8;
}
// hash function no 2 "not" .360 // hash function no 2 "not" .360
hash1 = ~hash1; hash = ~hash;
pteg_addr = ((hash1 & PowerPC::ppcState.pagetable_hashmask) << 6) | PowerPC::ppcState.pagetable_base; }
u32 pteg_addr = ((hash & PowerPC::ppcState.pagetable_hashmask) << 6) | PowerPC::ppcState.pagetable_base;
if ((pteg_addr >> 28) == 1)
base = Memory::m_pEXRAM;
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++)
{ {
u32 pte = bswap(*(u32*)&pRAM[pteg_addr]); u32 pte = bswap(*(u32*)&base[pteg_addr]);
if ((pte & PTE1_V) && (pte & PTE1_H)) bool pteh = (pte & PTE1_H) == 0;
if (hash_func == 1)
pteh = !pteh;
if ((pte & PTE1_V) && pteh)
{ {
if (VSID == PTE1_VSID(pte) && (api == PTE1_API(pte))) if (VSID == PTE1_VSID(pte) && (api == PTE1_API(pte)))
{ {
UPTE2 PTE2; UPTE2 PTE2;
PTE2.Hex = bswap((*(u32*)&pRAM[(pteg_addr + 4)])); PTE2.Hex = bswap((*(u32*)&base[(pteg_addr + 4)]));
// set the access bits
switch (_Flag)
{
case FLAG_NO_EXCEPTION: break;
case FLAG_READ: PTE2.R = 1; break;
case FLAG_WRITE: PTE2.R = 1; PTE2.C = 1; break;
case FLAG_OPCODE: PTE2.R = 1; break;
}
if (_Flag != FLAG_NO_EXCEPTION)
*(u32*)&base[(pteg_addr + 4)] = bswap(PTE2.Hex);
UpdateTLBEntry(_Flag, PTE2, _Address); UpdateTLBEntry(_Flag, PTE2, _Address);
switch (_Flag) return (PTE2.RPN << 12) | offset;
{
case FLAG_READ: PTE2.R = 1; break;
case FLAG_WRITE: PTE2.C = 1; break;
case FLAG_NO_EXCEPTION: break;
case FLAG_OPCODE: break;
}
*(u32*)&pRAM[(pteg_addr + 4)] = bswap(PTE2.Hex);
return ((PTE2.RPN << 12) | offset);
} }
} }
pteg_addr += 8; pteg_addr += 8;
} }
}
return 0; return 0;
} }

View File

@ -648,7 +648,7 @@ u32 PPCAnalyzer::Analyze(u32 address, CodeBlock *block, CodeBuffer *buffer, u32
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU && (address & JIT_ICACHE_VMEM_BIT)) if (SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU && (address & JIT_ICACHE_VMEM_BIT))
{ {
if (!Memory::TranslateAddress(address, Memory::FLAG_OPCODE)) if (!Memory::TranslateAddress(address, Memory::FLAG_NO_EXCEPTION))
{ {
// Memory exception occurred during instruction fetch // Memory exception occurred during instruction fetch
block->m_memory_exception = true; block->m_memory_exception = true;

View File

@ -118,15 +118,23 @@ void Init(int cpu_core)
FPURoundMode::SetPrecisionMode(FPURoundMode::PREC_53); FPURoundMode::SetPrecisionMode(FPURoundMode::PREC_53);
memset(ppcState.sr, 0, sizeof(ppcState.sr)); memset(ppcState.sr, 0, sizeof(ppcState.sr));
ppcState.dtlb_last = 0;
memset(ppcState.dtlb_va, 0, sizeof(ppcState.dtlb_va));
memset(ppcState.dtlb_pa, 0, sizeof(ppcState.dtlb_pa));
ppcState.itlb_last = 0;
memset(ppcState.itlb_va, 0, sizeof(ppcState.itlb_va));
memset(ppcState.itlb_pa, 0, sizeof(ppcState.itlb_pa));
ppcState.pagetable_base = 0; ppcState.pagetable_base = 0;
ppcState.pagetable_hashmask = 0; ppcState.pagetable_hashmask = 0;
for (int tlb = 0; tlb < 2; tlb++)
{
for (int set = 0; set < 64; set++)
{
for (int way = 0; way < 2; way++)
{
ppcState.tlb[tlb][set][way].flags = TLB_FLAG_INVALID;
ppcState.tlb[tlb][set][way].paddr = 0;
ppcState.tlb[tlb][set][way].pte = 0;
ppcState.tlb[tlb][set][way].tag = 0;
}
}
}
ResetRegisters(); ResetRegisters();
PPCTables::InitTables(cpu_core); PPCTables::InitTables(cpu_core);

View File

@ -27,6 +27,26 @@ enum CoreMode
MODE_JIT, MODE_JIT,
}; };
// TLB cache
#define TLB_SIZE 128
#define TLB_WAYS 2
#define NUM_TLBS 2
#define HW_PAGE_INDEX_SHIFT 12
#define HW_PAGE_INDEX_MASK 0x3f
#define HW_PAGE_TAG_SHIFT 18
#define TLB_FLAG_MOST_RECENT 0x01
#define TLB_FLAG_INVALID 0x02
struct tlb_entry
{
u32 tag;
u32 paddr;
u32 pte;
u8 flags;
};
// This contains the entire state of the emulated PowerPC "Gekko" CPU. // This contains the entire state of the emulated PowerPC "Gekko" CPU.
struct GC_ALIGNED64(PowerPCState) struct GC_ALIGNED64(PowerPCState)
{ {
@ -87,13 +107,7 @@ struct GC_ALIGNED64(PowerPCState)
// also for power management, but we don't care about that. // also for power management, but we don't care about that.
u32 spr[1024]; u32 spr[1024];
u32 dtlb_last; tlb_entry tlb[NUM_TLBS][TLB_SIZE / TLB_WAYS][TLB_WAYS];
u32 dtlb_va[128];
u32 dtlb_pa[128];
u32 itlb_last;
u32 itlb_va[128];
u32 itlb_pa[128];
u32 pagetable_base; u32 pagetable_base;
u32 pagetable_hashmask; u32 pagetable_hashmask;

View File

@ -64,7 +64,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
static std::thread g_save_thread; static std::thread g_save_thread;
// Don't forget to increase this after doing changes on the savestate system // Don't forget to increase this after doing changes on the savestate system
static const u32 STATE_VERSION = 36; static const u32 STATE_VERSION = 37;
enum enum
{ {