diff --git a/core/build.h b/core/build.h index c3dc80b9a..5add9b580 100755 --- a/core/build.h +++ b/core/build.h @@ -115,7 +115,9 @@ */ -#define NO_MMU +//#define NO_MMU +#define FAST_MMU +#define USE_WINCE_HACK #define DC_PLATFORM_MASK 7 #define DC_PLATFORM_DREAMCAST 0 /* Works, for the most part */ diff --git a/core/hw/mem/_vmem.cpp b/core/hw/mem/_vmem.cpp index 2c02197c5..4950a3e45 100644 --- a/core/hw/mem/_vmem.cpp +++ b/core/hw/mem/_vmem.cpp @@ -183,6 +183,11 @@ INLINE Trv DYNACALL _vmem_readt(u32 addr) } } } +template u8 DYNACALL _vmem_readt(u32 addr); +template u16 DYNACALL _vmem_readt(u32 addr); +template u32 DYNACALL _vmem_readt(u32 addr); +template u64 DYNACALL _vmem_readt(u32 addr); + template INLINE void DYNACALL _vmem_writet(u32 addr,T data) { @@ -225,6 +230,10 @@ INLINE void DYNACALL _vmem_writet(u32 addr,T data) } } } +template void DYNACALL _vmem_writet(u32 addr, u8 data); +template void DYNACALL _vmem_writet(u32 addr, u16 data); +template void DYNACALL _vmem_writet(u32 addr, u32 data); +template void DYNACALL _vmem_writet(u32 addr, u64 data); //ReadMem/WriteMem functions //ReadMem @@ -552,7 +561,7 @@ error: } #endif - int fd; + int vmem_fd; void* _nvmem_unused_buffer(u32 start,u32 end) { void* ptr=mmap(&virt_ram_base[start], end-start, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0); @@ -572,7 +581,7 @@ error: verify((addrsz%size)==0); verify(map_times>=1); u32 prot=PROT_READ|(w?PROT_WRITE:0); - rv= mmap(&virt_ram_base[dst], size, prot, MAP_SHARED | MAP_NOSYNC | MAP_FIXED, fd, offset); + rv= mmap(&virt_ram_base[dst], size, prot, MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset); if (MAP_FAILED==rv || rv!=(void*)&virt_ram_base[dst] || (mprotect(rv,size,prot)!=0)) { printf("MAP1 failed %d\n",errno); @@ -582,7 +591,7 @@ error: for (u32 i=1;i slow and stuttery { - fd = open("/data/data/com.reicast.emulator/files/dcnzorz_mem",O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO); - unlink("/data/data/com.reicast.emulator/files/dcnzorz_mem"); + vmem_fd = open("/data/data/com.reicast.emulator/files/dcnzorz_mem",O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO); + unlink("/data/data/com.reicast.emulator/files/dcnzorz_mem"); } #endif @@ -730,7 +739,7 @@ bool _vmem_reserve() //I really should check teh docs before codin ;p //[0x00800000,0x00A00000); map_buffer(0x00800000,0x01000000,MAP_ARAM_START_OFFSET,ARAM_SIZE,false); - map_buffer(0x20000000,0x20000000+ARAM_SIZE,MAP_ARAM_START_OFFSET,ARAM_SIZE,true); + map_buffer(0x02800000,0x02800000+ARAM_SIZE,MAP_ARAM_START_OFFSET,ARAM_SIZE,true); aica_ram.size=ARAM_SIZE; aica_ram.data=(u8*)ptr; @@ -804,7 +813,7 @@ void _vmem_release() virt_ram_base = NULL; } #if HOST_OS != OS_WINDOWS - close(fd); + close(vmem_fd); #endif } } diff --git a/core/hw/mem/_vmem.h b/core/hw/mem/_vmem.h index 8509fe3ac..528cf2a90 100644 --- a/core/hw/mem/_vmem.h +++ b/core/hw/mem/_vmem.h @@ -49,11 +49,13 @@ u8 DYNACALL _vmem_ReadMem8(u32 Address); u16 DYNACALL _vmem_ReadMem16(u32 Address); u32 DYNACALL _vmem_ReadMem32(u32 Address); u64 DYNACALL _vmem_ReadMem64(u32 Address); +template Trv DYNACALL _vmem_readt(u32 addr); //WriteMem(s) void DYNACALL _vmem_WriteMem8(u32 Address,u8 data); void DYNACALL _vmem_WriteMem16(u32 Address,u16 data); void DYNACALL _vmem_WriteMem32(u32 Address,u32 data); void DYNACALL _vmem_WriteMem64(u32 Address,u64 data); +template void DYNACALL _vmem_writet(u32 addr, T data); //should be called at start up to ensure it will succeed :) bool _vmem_reserve(); @@ -70,4 +72,4 @@ static inline bool _nvmem_enabled() { return virt_ram_base != 0; } -void _vmem_bm_reset(); \ No newline at end of file +void _vmem_bm_reset(); diff --git a/core/hw/sh4/dyna/blockmanager.cpp b/core/hw/sh4/dyna/blockmanager.cpp index 679f0a77d..fe109d63a 100644 --- a/core/hw/sh4/dyna/blockmanager.cpp +++ b/core/hw/sh4/dyna/blockmanager.cpp @@ -97,7 +97,7 @@ DynarecCodeEntryPtr DYNACALL bm_GetCode(u32 addr) } // addr must be a virtual address -DynarecCodeEntryPtr DYNACALL bm_GetCode2(u32 addr) +DynarecCodeEntryPtr DYNACALL bm_GetCodeByVAddr(u32 addr) { #ifndef NO_MMU if (!mmu_enabled()) @@ -110,32 +110,47 @@ DynarecCodeEntryPtr DYNACALL bm_GetCode2(u32 addr) { switch (addr) { +#ifdef USE_WINCE_HACK case 0xfffffde7: // GetTickCount // This should make this syscall faster r[0] = sh4_sched_now64() * 1000 / SH4_MAIN_CLOCK; next_pc = pr; - addr = next_pc; break; + + case 0xfffffd05: // QueryPerformanceCounter(u64 *) + { + u32 paddr; + if (mmu_data_translation(r[4], paddr) == MMU_ERROR_NONE) + { + _vmem_WriteMem64(paddr, sh4_sched_now64() >> 4); + r[0] = 1; + next_pc = pr; + } + else + { + Do_Exception(addr, 0xE0, 0x100); + } + } + break; +#endif + default: Do_Exception(addr, 0xE0, 0x100); - addr = next_pc; break; } + addr = next_pc; } - try { - u32 paddr; - bool shared; - mmu_instruction_translation(addr, paddr, shared); - - return (DynarecCodeEntryPtr)bm_GetCode(paddr); - } catch (SH4ThrownException& ex) { - Do_Exception(addr, ex.expEvn, ex.callVect); - u32 paddr; - bool shared; + u32 paddr; + bool shared; + u32 rv = mmu_instruction_translation(addr, paddr, shared); + if (rv != MMU_ERROR_NONE) + { + DoMMUException(addr, rv, MMU_TT_IREAD); mmu_instruction_translation(next_pc, paddr, shared); - return (DynarecCodeEntryPtr)bm_GetCode(paddr); } + + return (DynarecCodeEntryPtr)bm_GetCode(paddr); } #endif } @@ -220,6 +235,7 @@ void bm_RemoveBlock(RuntimeBlockInfo* block) all_blocks.erase(it); break; } + // FIXME need to remove refs delete block; } diff --git a/core/hw/sh4/dyna/blockmanager.h b/core/hw/sh4/dyna/blockmanager.h index b750449e4..6dd89e481 100644 --- a/core/hw/sh4/dyna/blockmanager.h +++ b/core/hw/sh4/dyna/blockmanager.h @@ -17,7 +17,7 @@ struct RuntimeBlockInfo_Core struct RuntimeBlockInfo: RuntimeBlockInfo_Core { - void Setup(u32 pc,fpscr_t fpu_cfg); + bool Setup(u32 pc,fpscr_t fpu_cfg); const char* hash(bool full=true, bool reloc=false); u32 vaddr; @@ -89,7 +89,7 @@ void bm_WriteBlockMap(const string& file); DynarecCodeEntryPtr DYNACALL bm_GetCode(u32 addr); extern "C" { -__attribute__((used)) DynarecCodeEntryPtr DYNACALL bm_GetCode2(u32 addr); +__attribute__((used)) DynarecCodeEntryPtr DYNACALL bm_GetCodeByVAddr(u32 addr); } RuntimeBlockInfo* bm_GetBlock(void* dynarec_code); diff --git a/core/hw/sh4/dyna/decoder.cpp b/core/hw/sh4/dyna/decoder.cpp index cda15f6c4..8e9050441 100644 --- a/core/hw/sh4/dyna/decoder.cpp +++ b/core/hw/sh4/dyna/decoder.cpp @@ -1007,13 +1007,15 @@ void state_Setup(u32 rpc,fpscr_t fpu_cfg) state.info.has_fpu=false; } -void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles) +bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles) { blk=rbi; state_Setup(blk->vaddr, blk->fpu_cfg); ngen_GetFeatures(&state.ngen); blk->guest_opcodes=0; + // If full MMU, don't allow the block to extend past the end of the current 4K page + u32 max_pc = mmu_enabled() ? ((state.cpu.rpc >> 12) + 1) << 12 : 0xFFFFFFFF; for(;;) { @@ -1025,10 +1027,8 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles) //there is no break here by design case NDO_NextOp: { - if ( - ( (blk->oplist.size() >= BLOCK_MAX_SH_OPS_SOFT) || (blk->guest_cycles >= max_cycles) ) - && !state.cpu.is_delayslot - ) + if ((blk->oplist.size() >= BLOCK_MAX_SH_OPS_SOFT || blk->guest_cycles >= max_cycles || state.cpu.rpc >= max_pc) + && !state.cpu.is_delayslot) { dec_End(state.cpu.rpc,BET_StaticJump,false); } @@ -1053,7 +1053,16 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles) } */ - u32 op=IReadMem16(state.cpu.rpc); + u32 op; + if (!mmu_enabled()) + op = IReadMem16(state.cpu.rpc); + else + { + u32 exception_occurred; + op = mmu_IReadMem16NoEx(state.cpu.rpc, &exception_occurred); + if (exception_occurred) + return false; + } if (op==0 && state.cpu.is_delayslot) { printf("Delayslot 0 hack!\n"); @@ -1104,8 +1113,8 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles) case NDO_Jump: die("Too old"); - state.NextOp=state.JumpOp; - state.cpu.rpc=state.JumpAddr; + //state.NextOp=state.JumpOp; + //state.cpu.rpc=state.JumpAddr; break; case NDO_End: @@ -1187,6 +1196,8 @@ _end: //make sure we don't use wayy-too-few cycles blk->guest_cycles=max(1U,blk->guest_cycles); blk=0; + + return true; } #endif diff --git a/core/hw/sh4/dyna/decoder.h b/core/hw/sh4/dyna/decoder.h index efe0e470e..4fb843f47 100644 --- a/core/hw/sh4/dyna/decoder.h +++ b/core/hw/sh4/dyna/decoder.h @@ -45,7 +45,7 @@ struct ngen_features }; struct RuntimeBlockInfo; -void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles); +bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles); struct state_t { diff --git a/core/hw/sh4/dyna/driver.cpp b/core/hw/sh4/dyna/driver.cpp index d410b8546..9c803d492 100644 --- a/core/hw/sh4/dyna/driver.cpp +++ b/core/hw/sh4/dyna/driver.cpp @@ -194,7 +194,7 @@ const char* RuntimeBlockInfo::hash(bool full, bool relocable) return block_hash; } -void RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg) +bool RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg) { staging_runs=addr=lookups=runs=host_code_size=0; guest_cycles=guest_opcodes=host_opcodes=0; @@ -210,7 +210,12 @@ void RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg) if (mmu_enabled()) { bool shared; - mmu_instruction_translation(vaddr, addr, shared); + u32 rv = mmu_instruction_translation(vaddr, addr, shared); + if (rv != MMU_ERROR_NONE) + { + DoMMUException(vaddr, rv, MMU_TT_IREAD); + return false; + } if (addr != vaddr && !shared) asid = CCN_PTEH.ASID; } @@ -220,8 +225,12 @@ void RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg) oplist.clear(); - dec_DecodeBlock(this,SH4_TIMESLICE/2); + if (!dec_DecodeBlock(this,SH4_TIMESLICE/2)) + return false; + AnalyseBlock(this); + + return true; } DynarecCodeEntryPtr rdv_CompilePC() @@ -232,43 +241,36 @@ DynarecCodeEntryPtr rdv_CompilePC() recSh4_ClearCache(); RuntimeBlockInfo* rbi = ngen_AllocateBlock(); -#ifndef NO_MMU - try { -#endif - rbi->Setup(pc,fpscr); - - bool do_opts=((rbi->addr&0x3FFFFFFF)>0x0C010100); - rbi->staging_runs=do_opts?100:-100; - ngen_Compile(rbi,DoCheck(rbi->addr),(pc&0xFFFFFF)==0x08300 || (pc&0xFFFFFF)==0x10000,false,do_opts); - verify(rbi->code!=0); - - bm_AddBlock(rbi); -#ifndef NO_MMU - } catch (SH4ThrownException& ex) { + if (!rbi->Setup(pc,fpscr)) + { delete rbi; - throw ex; + return NULL; } -#endif + + bool do_opts=((rbi->addr&0x3FFFFFFF)>0x0C010100); + rbi->staging_runs=do_opts?100:-100; + ngen_Compile(rbi,DoCheck(rbi->addr),(pc&0xFFFFFF)==0x08300 || (pc&0xFFFFFF)==0x10000,false,do_opts); + verify(rbi->code!=0); + + bm_AddBlock(rbi); return rbi->code; } +DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock_pc() +{ + return rdv_FailedToFindBlock(next_pc); +} + DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock(u32 pc) { //printf("rdv_FailedToFindBlock ~ %08X\n",pc); -#ifndef NO_MMU - try { -#endif - next_pc=pc; - - return rdv_CompilePC(); -#ifndef NO_MMU - } catch (SH4ThrownException& ex) { - Do_Exception(pc, ex.expEvn, ex.callVect); - return bm_GetCode2(next_pc); - } -#endif + next_pc=pc; + DynarecCodeEntryPtr code = rdv_CompilePC(); + if (code == NULL) + code = bm_GetCodeByVAddr(next_pc); + return code; } static void ngen_FailedToFindBlock_internal() { @@ -304,8 +306,17 @@ u32 DYNACALL rdv_DoInterrupts(void* block_cpde) // addr must be the physical address of the start of the block DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 addr) { - RuntimeBlockInfo *block = bm_GetBlock(addr); - bm_RemoveBlock(block); + if (mmu_enabled()) + { + RuntimeBlockInfo *block = bm_GetBlock(addr); + //printf("rdv_BlockCheckFail addr %08x vaddr %08x pc %08x\n", addr, block->vaddr, next_pc); + bm_RemoveBlock(block); + } + else + { + next_pc = addr; + recSh4_ClearCache(); + } return rdv_CompilePC(); } @@ -320,7 +331,7 @@ DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 addr) DynarecCodeEntryPtr rdv_FindOrCompile() { - DynarecCodeEntryPtr rv=bm_GetCode(next_pc); + DynarecCodeEntryPtr rv=bm_GetCodeByVAddr(next_pc); if (rv==ngen_FailedToFindBlock) rv=rdv_CompilePC(); @@ -359,7 +370,7 @@ void* DYNACALL rdv_LinkBlock(u8* code,u32 dpc) DynarecCodeEntryPtr rv=rdv_FindOrCompile(); - bool do_link=bm_GetBlock(code)==rbi; + bool do_link = !mmu_enabled() && bm_GetBlock(code) == rbi; if (do_link) { diff --git a/core/hw/sh4/dyna/ngen.h b/core/hw/sh4/dyna/ngen.h index f2b07aef2..d9c4209b3 100644 --- a/core/hw/sh4/dyna/ngen.h +++ b/core/hw/sh4/dyna/ngen.h @@ -65,6 +65,7 @@ void emit_SetBaseAddr(); //Called from ngen_FailedToFindBlock DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock(u32 pc); +DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock_pc(); //Called when a block check failed, and the block needs to be invalidated DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 pc); //Called to compile code @pc diff --git a/core/hw/sh4/modules/fastmmu.cpp b/core/hw/sh4/modules/fastmmu.cpp new file mode 100644 index 000000000..2211e3e04 --- /dev/null +++ b/core/hw/sh4/modules/fastmmu.cpp @@ -0,0 +1,406 @@ +#include "mmu.h" +#include "hw/sh4/sh4_if.h" +#include "hw/sh4/sh4_interrupts.h" +#include "hw/sh4/sh4_core.h" +#include "types.h" + +#ifdef FAST_MMU + +#include "hw/mem/_vmem.h" + +#include "mmu_impl.h" +#include "ccn.h" +#include "hw/sh4/sh4_mem.h" +#include "oslib/oslib.h" + +extern TLB_Entry UTLB[64]; +// Used when FullMMU is off +extern u32 sq_remap[64]; + +//#define TRACE_WINCE_SYSCALLS + +#include "wince.h" + +#define printf_mmu(...) +//#define printf_mmu printf +#define printf_win32(...) + +extern const u32 mmu_mask[4]; +extern const u32 fast_reg_lut[8]; + +const TLB_Entry *lru_entry = NULL; +static u32 lru_mask; +static u32 lru_address; + +struct TLB_LinkedEntry { + TLB_Entry entry; + TLB_LinkedEntry *next_entry; +}; +#define NBUCKETS 65536 +TLB_LinkedEntry full_table[65536]; +u32 full_table_size; +TLB_LinkedEntry *entry_buckets[NBUCKETS]; + +static u16 bucket_index(u32 address, int size) +{ + return ((address >> 16) ^ ((address & 0xFC00) | size)) & (NBUCKETS - 1); +} + +static void cache_entry(const TLB_Entry &entry) +{ + verify(full_table_size < ARRAY_SIZE(full_table)); + u16 bucket = bucket_index(entry.Address.VPN << 10, entry.Data.SZ1 * 2 + entry.Data.SZ0); + + full_table[full_table_size].entry = entry; + full_table[full_table_size].next_entry = entry_buckets[bucket]; + entry_buckets[bucket] = &full_table[full_table_size]; + full_table_size++; +} + +static void flush_cache() +{ + full_table_size = 0; + memset(entry_buckets, 0, sizeof(entry_buckets)); +} + +template +bool find_entry_by_page_size(u32 address, const TLB_Entry **ret_entry) +{ + u32 shift = size == 1 ? 2 : + size == 2 ? 6 : + size == 3 ? 10 : 0; + u32 vpn = (address >> (10 + shift)) << shift; + u16 bucket = bucket_index(vpn << 10, size); + TLB_LinkedEntry *pEntry = entry_buckets[bucket]; + u32 length = 0; + while (pEntry != NULL) + { + if (pEntry->entry.Address.VPN == vpn && (size >> 1) == pEntry->entry.Data.SZ1 && (size & 1) == pEntry->entry.Data.SZ0) + { + if (pEntry->entry.Data.SH == 1 || pEntry->entry.Address.ASID == CCN_PTEH.ASID) + { + *ret_entry = &pEntry->entry; + return true; + } + } + pEntry = pEntry->next_entry; + } + + return false; +} + +static bool find_entry(u32 address, const TLB_Entry **ret_entry) +{ + // 4k + if (find_entry_by_page_size<1>(address, ret_entry)) + return true; + // 64k + if (find_entry_by_page_size<2>(address, ret_entry)) + return true; + // 1m + if (find_entry_by_page_size<3>(address, ret_entry)) + return true; + // 1k + if (find_entry_by_page_size<0>(address, ret_entry)) + return true; + return false; +} + +#if 0 +static void dump_table() +{ + static int iter = 1; + char filename[128]; + sprintf(filename, "mmutable%03d", iter++); + FILE *f = fopen(filename, "wb"); + if (f == NULL) + return; + fwrite(full_table, sizeof(full_table[0]), full_table_size, f); + fclose(f); +} + +int main(int argc, char *argv[]) +{ + FILE *f = fopen(argv[1], "rb"); + if (f == NULL) + { + perror(argv[1]); + return 1; + } + full_table_size = fread(full_table, sizeof(full_table[0]), ARRAY_SIZE(full_table), f); + fclose(f); + printf("Loaded %d entries\n", full_table_size); + std::vector addrs; + std::vector asids; + for (int i = 0; i < full_table_size; i++) + { + u32 sz = full_table[i].entry.Data.SZ1 * 2 + full_table[i].entry.Data.SZ0; + u32 mask = sz == 3 ? 1*1024*1024 : sz == 2 ? 64*1024 : sz == 1 ? 4*1024 : 1024; + mask--; + addrs.push_back(((full_table[i].entry.Address.VPN << 10) & mmu_mask[sz]) | (random() * mask / RAND_MAX)); + asids.push_back(full_table[i].entry.Address.ASID); +// printf("%08x -> %08x sz %d ASID %d SH %d\n", full_table[i].entry.Address.VPN << 10, full_table[i].entry.Data.PPN << 10, +// full_table[i].entry.Data.SZ1 * 2 + full_table[i].entry.Data.SZ0, +// full_table[i].entry.Address.ASID, full_table[i].entry.Data.SH); + u16 bucket = bucket_index(full_table[i].entry.Address.VPN << 10, full_table[i].entry.Data.SZ1 * 2 + full_table[i].entry.Data.SZ0); + full_table[i].next_entry = entry_buckets[bucket]; + entry_buckets[bucket] = &full_table[i]; + } + for (int i = 0; i < full_table_size / 10; i++) + { + addrs.push_back(random()); + asids.push_back(666); + } + double start = os_GetSeconds(); + int success = 0; + const int loops = 100000; + for (int i = 0; i < loops; i++) + { + for (int j = 0; j < addrs.size(); j++) + { + u32 addr = addrs[j]; + CCN_PTEH.ASID = asids[j]; + const TLB_Entry *p; + if (find_entry(addr, &p)) + success++; + } + } + double end = os_GetSeconds(); + printf("Lookup time: %f ms. Success rate %f max_len %d\n", (end - start) * 1000.0 / addrs.size(), (double)success / addrs.size() / loops, 0/*max_length*/); +} +#endif + +bool UTLB_Sync(u32 entry) +{ + TLB_Entry& tlb_entry = UTLB[entry]; + u32 sz = tlb_entry.Data.SZ1 * 2 + tlb_entry.Data.SZ0; + + lru_entry = &tlb_entry; + lru_mask = mmu_mask[sz]; + lru_address = (tlb_entry.Address.VPN << 10) & lru_mask; + + tlb_entry.Address.VPN = lru_address >> 10; + cache_entry(tlb_entry); + + if (!mmu_enabled() && (tlb_entry.Address.VPN & (0xFC000000 >> 10)) == (0xE0000000 >> 10)) + { + // Used when FullMMU is off + u32 vpn_sq = ((tlb_entry.Address.VPN & 0x7FFFF) >> 10) & 0x3F;//upper bits are always known [0xE0/E1/E2/E3] + sq_remap[vpn_sq] = tlb_entry.Data.PPN << 10; + } + return true; +} + +void ITLB_Sync(u32 entry) +{ +} + +//Do a full lookup on the UTLB entry's +template +u32 mmu_full_lookup(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv) +{ + if (lru_entry != NULL) + { + if (/*lru_entry->Data.V == 1 && */ + lru_address == (va & lru_mask) + && (lru_entry->Address.ASID == CCN_PTEH.ASID + || lru_entry->Data.SH == 1 + /*|| (sr.MD == 1 && CCN_MMUCR.SV == 1)*/)) // SV=1 not handled + { + //VPN->PPN | low bits + // TODO mask off PPN when updating TLB to avoid doing it at look up time + rv = ((lru_entry->Data.PPN << 10) & lru_mask) | (va & (~lru_mask)); + *tlb_entry_ret = lru_entry; + + return MMU_ERROR_NONE; + } + } + + if (find_entry(va, tlb_entry_ret)) + { + u32 mask = mmu_mask[(*tlb_entry_ret)->Data.SZ1 * 2 + (*tlb_entry_ret)->Data.SZ0]; + rv = (((*tlb_entry_ret)->Data.PPN << 10) & mask) | (va & (~mask)); + lru_entry = *tlb_entry_ret; + lru_mask = mask; + lru_address = ((*tlb_entry_ret)->Address.VPN << 10); + return MMU_ERROR_NONE; + } + +#ifdef USE_WINCE_HACK + // WinCE hack + TLB_Entry entry; + if (wince_resolve_address(va, entry)) + { + CCN_PTEL.reg_data = entry.Data.reg_data; + CCN_PTEA.reg_data = entry.Assistance.reg_data; + CCN_PTEH.reg_data = entry.Address.reg_data; + UTLB[CCN_MMUCR.URC] = entry; + + *tlb_entry_ret = &UTLB[CCN_MMUCR.URC]; + lru_entry = *tlb_entry_ret; + + u32 sz = lru_entry->Data.SZ1 * 2 + lru_entry->Data.SZ0; + lru_mask = mmu_mask[sz]; + lru_address = va & lru_mask; + + rv = ((lru_entry->Data.PPN << 10) & lru_mask) | (va & (~lru_mask)); + + cache_entry(*lru_entry); + + return MMU_ERROR_NONE; + } +#endif + + return MMU_ERROR_TLB_MISS; +} +template u32 mmu_full_lookup(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv); + +template +u32 mmu_data_translation(u32 va, u32& rv) +{ + if (va & (sizeof(T) - 1)) + { + return MMU_ERROR_BADADDR; + } + + if (translation_type == MMU_TT_DWRITE) + { + if ((va & 0xFC000000) == 0xE0000000) + { + u32 lookup = mmu_full_SQ(va, rv); + if (lookup != MMU_ERROR_NONE) + return lookup; + + rv = va; //SQ writes are not translated, only write backs are. + return MMU_ERROR_NONE; + } + } + +// if ((sr.MD == 0) && (va & 0x80000000) != 0) +// { +// //if on kernel, and not SQ addr -> error +// return MMU_ERROR_BADADDR; +// } + + if (sr.MD == 1 && ((va & 0xFC000000) == 0x7C000000)) + { + rv = va; + return MMU_ERROR_NONE; + } + + // Not called if CCN_MMUCR.AT == 0 + //if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0)) + if (fast_reg_lut[va >> 29] != 0) + { + rv = va; + return MMU_ERROR_NONE; + } + + const TLB_Entry *entry; + u32 lookup = mmu_full_lookup(va, &entry, rv); + +// if (lookup != MMU_ERROR_NONE) +// return lookup; + +#ifdef TRACE_WINCE_SYSCALLS + if (unresolved_unicode_string != 0 && lookup == MMU_ERROR_NONE) + { + if (va == unresolved_unicode_string) + { + unresolved_unicode_string = 0; + printf("RESOLVED %s\n", get_unicode_string(va).c_str()); + } + } +#endif + +// u32 md = entry->Data.PR >> 1; +// +// //0X & User mode-> protection violation +// //Priv mode protection +// if ((md == 0) && sr.MD == 0) +// { +// die("MMU_ERROR_PROTECTED"); +// return MMU_ERROR_PROTECTED; +// } +// +// //X0 -> read olny +// //X1 -> read/write , can be FW +// +// //Write Protection (Lock or FW) +// if (translation_type == MMU_TT_DWRITE) +// { +// if ((entry->Data.PR & 1) == 0) +// { +// die("MMU_ERROR_PROTECTED"); +// return MMU_ERROR_PROTECTED; +// } +// else if (entry->Data.D == 0) +// { +// die("MMU_ERROR_FIRSTWRITE"); +// return MMU_ERROR_FIRSTWRITE; +// } +// } + + return lookup; +} +template u32 mmu_data_translation(u32 va, u32& rv); +template u32 mmu_data_translation(u32 va, u32& rv); +template u32 mmu_data_translation(u32 va, u32& rv); +template u32 mmu_data_translation(u32 va, u32& rv); + +template u32 mmu_data_translation(u32 va, u32& rv); +template u32 mmu_data_translation(u32 va, u32& rv); +template u32 mmu_data_translation(u32 va, u32& rv); +template u32 mmu_data_translation(u32 va, u32& rv); + +u32 mmu_instruction_translation(u32 va, u32& rv, bool& shared) +{ + if (va & 1) + { + return MMU_ERROR_BADADDR; + } +// if ((sr.MD == 0) && (va & 0x80000000) != 0) +// { +// //if SQ disabled , or if if SQ on but out of SQ mem then BAD ADDR ;) +// if (va >= 0xE0000000) +// return MMU_ERROR_BADADDR; +// } + + if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0)) + { + rv = va; + return MMU_ERROR_NONE; + } + + // Hack fast implementation + const TLB_Entry *tlb_entry; + u32 lookup = mmu_full_lookup(va, &tlb_entry, rv); + if (lookup != MMU_ERROR_NONE) + return lookup; + u32 md = tlb_entry->Data.PR >> 1; + //0X & User mode-> protection violation + //Priv mode protection +// if ((md == 0) && sr.MD == 0) +// { +// return MMU_ERROR_PROTECTED; +// } + shared = tlb_entry->Data.SH == 1; + return MMU_ERROR_NONE; +} + +void mmu_flush_table() +{ +// printf("MMU tables flushed\n"); + +// ITLB[0].Data.V = 0; +// ITLB[1].Data.V = 0; +// ITLB[2].Data.V = 0; +// ITLB[3].Data.V = 0; +// +// for (u32 i = 0; i < 64; i++) +// UTLB[i].Data.V = 0; + + lru_entry = NULL; + flush_cache(); +} +#endif // FAST_MMU diff --git a/core/hw/sh4/modules/mmu.cpp b/core/hw/sh4/modules/mmu.cpp index 07b20f923..4799cb0fb 100644 --- a/core/hw/sh4/modules/mmu.cpp +++ b/core/hw/sh4/modules/mmu.cpp @@ -101,7 +101,7 @@ WriteMem16Func WriteMem16; WriteMem32Func WriteMem32; WriteMem64Func WriteMem64; -const u32 mmu_mask[4] = +extern const u32 mmu_mask[4] = { ((0xFFFFFFFF) >> 10) << 10, //1 kb page ((0xFFFFFFFF) >> 12) << 12, //4 kb page @@ -109,7 +109,7 @@ const u32 mmu_mask[4] = ((0xFFFFFFFF) >> 20) << 20 //1 MB page }; -const u32 fast_reg_lut[8] = +extern const u32 fast_reg_lut[8] = { 0, 0, 0, 0 //P0-U0 , 1 //P1 @@ -134,6 +134,7 @@ const u32 ITLB_LRU_AND[4] = }; u32 ITLB_LRU_USE[64]; +#ifndef FAST_MMU //sync mem mapping to mmu , suspend compiled blocks if needed.entry is a UTLB entry # , -1 is for full sync bool UTLB_Sync(u32 entry) { @@ -160,6 +161,7 @@ void ITLB_Sync(u32 entry) { printf_mmu("ITLB MEM remap %d : 0x%X to 0x%X : %d\n", entry, ITLB[entry].Address.VPN << 10, ITLB[entry].Data.PPN << 10, ITLB[entry].Data.V); } +#endif void RaiseException(u32 expEvnt, u32 callVect) { #if !defined(NO_MMU) @@ -170,16 +172,12 @@ void RaiseException(u32 expEvnt, u32 callVect) { #endif } -u32 mmu_error_TT; void mmu_raise_exception(u32 mmu_error, u32 address, u32 am) { printf_mmu("mmu_raise_exception -> pc = 0x%X : ", next_pc); CCN_TEA = address; CCN_PTEH.VPN = address >> 10; - //save translation type error :) - mmu_error_TT = am; - switch (mmu_error) { //No error @@ -262,6 +260,94 @@ void mmu_raise_exception(u32 mmu_error, u32 address, u32 am) die("Unknown mmu_error"); } + +void DoMMUException(u32 address, u32 mmu_error, u32 access_type) +{ + printf_mmu("DoMMUException -> pc = 0x%X : ", next_pc); + CCN_TEA = address; + CCN_PTEH.VPN = address >> 10; + + switch (mmu_error) + { + //No error + case MMU_ERROR_NONE: + printf("Error : mmu_raise_exception(MMU_ERROR_NONE)\n"); + break; + + //TLB miss + case MMU_ERROR_TLB_MISS: + printf_mmu("MMU_ERROR_UTLB_MISS 0x%X, handled\n", address); + if (access_type == MMU_TT_DWRITE) //WTLBMISS - Write Data TLB Miss Exception + Do_Exception(next_pc, 0x60, 0x400); + else if (access_type == MMU_TT_DREAD) //RTLBMISS - Read Data TLB Miss Exception + Do_Exception(next_pc, 0x40, 0x400); + else //ITLBMISS - Instruction TLB Miss Exception + Do_Exception(next_pc, 0x40, 0x400); + + return; + break; + + //TLB Multihit + case MMU_ERROR_TLB_MHIT: + printf("MMU_ERROR_TLB_MHIT @ 0x%X\n", address); + break; + + //Mem is read/write protected (depends on translation type) + case MMU_ERROR_PROTECTED: + printf_mmu("MMU_ERROR_PROTECTED 0x%X, handled\n", address); + if (access_type == MMU_TT_DWRITE) //WRITEPROT - Write Data TLB Protection Violation Exception + Do_Exception(next_pc, 0xC0, 0x100); + else if (access_type == MMU_TT_DREAD) //READPROT - Data TLB Protection Violation Exception + Do_Exception(next_pc, 0xA0, 0x100); + else + { + verify(false); + } + return; + break; + + //Mem is write protected , firstwrite + case MMU_ERROR_FIRSTWRITE: + printf_mmu("MMU_ERROR_FIRSTWRITE\n"); + verify(access_type == MMU_TT_DWRITE); + //FIRSTWRITE - Initial Page Write Exception + Do_Exception(next_pc, 0x80, 0x100); + + return; + break; + + //data read/write missasligned + case MMU_ERROR_BADADDR: + if (access_type == MMU_TT_DWRITE) //WADDERR - Write Data Address Error + Do_Exception(next_pc, 0x100, 0x100); + else if (access_type == MMU_TT_DREAD) //RADDERR - Read Data Address Error + Do_Exception(next_pc, 0xE0, 0x100); + else //IADDERR - Instruction Address Error + { +#ifdef TRACE_WINCE_SYSCALLS + if (!print_wince_syscall(address)) +#endif + printf_mmu("MMU_ERROR_BADADDR(i) 0x%X\n", address); + Do_Exception(next_pc, 0xE0, 0x100); + return; + } + printf_mmu("MMU_ERROR_BADADDR(d) 0x%X, handled\n", address); + return; + break; + + //Can't Execute + case MMU_ERROR_EXECPROT: + printf("MMU_ERROR_EXECPROT 0x%X\n", address); + + //EXECPROT - Instruction TLB Protection Violation Exception + Do_Exception(next_pc, 0xA0, 0x100); + return; + break; + } + + die("Unknown mmu_error"); +} + bool mmu_match(u32 va, CCN_PTEH_type Address, CCN_PTEL_type Data) { if (Data.V == 0) @@ -283,6 +369,7 @@ bool mmu_match(u32 va, CCN_PTEH_type Address, CCN_PTEL_type Data) return false; } +#ifndef FAST_MMU //Do a full lookup on the UTLB entry's template u32 mmu_full_lookup(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv) @@ -328,6 +415,7 @@ u32 mmu_full_lookup(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv) return MMU_ERROR_NONE; } +#endif //Simple QACR translation for mmu (when AT is off) u32 mmu_QACR_SQ(u32 va) @@ -342,6 +430,7 @@ u32 mmu_QACR_SQ(u32 va) va &= ~0x1f; return QACR + va; } + template u32 mmu_full_SQ(u32 va, u32& rv) { @@ -387,11 +476,14 @@ u32 mmu_full_SQ(u32 va, u32& rv) } return MMU_ERROR_NONE; } +template u32 mmu_full_SQ(u32 va, u32& rv); + +#ifndef FAST_MMU template -void mmu_data_translation(u32 va, u32& rv) +u32 mmu_data_translation(u32 va, u32& rv) { if (va & (sizeof(T) - 1)) - mmu_raise_exception(MMU_ERROR_BADADDR, va, translation_type); + return MMU_ERROR_BADADDR; if (translation_type == MMU_TT_DWRITE) { @@ -399,23 +491,23 @@ void mmu_data_translation(u32 va, u32& rv) { u32 lookup = mmu_full_SQ(va, rv); if (lookup != MMU_ERROR_NONE) - mmu_raise_exception(lookup, va, translation_type); + return lookup; rv = va; //SQ writes are not translated, only write backs are. - return; + return MMU_ERROR_NONE; } } if ((sr.MD == 0) && (va & 0x80000000) != 0) { //if on kernel, and not SQ addr -> error - mmu_raise_exception(MMU_ERROR_BADADDR, va, translation_type); + return MMU_ERROR_BADADDR; } if (sr.MD == 1 && ((va & 0xFC000000) == 0x7C000000)) { rv = va; - return; + return MMU_ERROR_NONE; } // Not called if CCN_MMUCR.AT == 0 @@ -423,14 +515,14 @@ void mmu_data_translation(u32 va, u32& rv) if (fast_reg_lut[va >> 29] != 0) { rv = va; - return; + return MMU_ERROR_NONE; } const TLB_Entry *entry; u32 lookup = mmu_full_lookup(va, &entry, rv); if (lookup != MMU_ERROR_NONE) - mmu_raise_exception(lookup, va, translation_type); + return lookup; #ifdef TRACE_WINCE_SYSCALLS if (unresolved_unicode_string != 0) @@ -449,7 +541,7 @@ void mmu_data_translation(u32 va, u32& rv) //Priv mode protection if ((md == 0) && sr.MD == 0) { - mmu_raise_exception(MMU_ERROR_PROTECTED, va, translation_type); + return MMU_ERROR_PROTECTED; } //X0 -> read olny @@ -459,32 +551,34 @@ void mmu_data_translation(u32 va, u32& rv) if (translation_type == MMU_TT_DWRITE) { if ((entry->Data.PR & 1) == 0) - mmu_raise_exception(MMU_ERROR_PROTECTED, va, translation_type); + return MMU_ERROR_PROTECTED; else if (entry->Data.D == 0) - mmu_raise_exception(MMU_ERROR_FIRSTWRITE, va, translation_type); + return MMU_ERROR_FIRSTWRITE; } + return MMU_ERROR_NONE; } -template void mmu_data_translation(u32 va, u32& rv); -template void mmu_data_translation(u32 va, u32& rv); +template u32 mmu_data_translation(u32 va, u32& rv); +template u32 mmu_data_translation(u32 va, u32& rv); +template u32 mmu_data_translation(u32 va, u32& rv); -void mmu_instruction_translation(u32 va, u32& rv, bool& shared) +u32 mmu_instruction_translation(u32 va, u32& rv, bool& shared) { if (va & 1) { - mmu_raise_exception(MMU_ERROR_BADADDR, va, MMU_TT_IREAD); + return MMU_ERROR_BADADDR; } if ((sr.MD == 0) && (va & 0x80000000) != 0) { //if SQ disabled , or if if SQ on but out of SQ mem then BAD ADDR ;) if (va >= 0xE0000000) - mmu_raise_exception(MMU_ERROR_BADADDR, va, MMU_TT_IREAD); + return MMU_ERROR_BADADDR; } if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0)) { rv = va; - return; + return MMU_ERROR_NONE; } bool mmach = false; @@ -521,7 +615,7 @@ retry_ITLB_Match: u32 lookup = mmu_full_lookup(va, &tlb_entry, rv); if (lookup != MMU_ERROR_NONE) - mmu_raise_exception(lookup, va, MMU_TT_IREAD); + return lookup; u32 replace_index = ITLB_LRU_USE[CCN_MMUCR.LRUI]; verify(replace_index != 0xFFFFFFFF); @@ -535,11 +629,11 @@ retry_ITLB_Match: { if (nom) { - mmu_raise_exception(MMU_ERROR_TLB_MHIT, va, MMU_TT_IREAD); + return MMU_ERROR_TLB_MHIT; } else { - mmu_raise_exception(MMU_ERROR_TLB_MISS, va, MMU_TT_IREAD); + return MMU_ERROR_TLB_MISS; } } @@ -552,25 +646,27 @@ retry_ITLB_Match: //Priv mode protection if ((md == 0) && sr.MD == 0) { - mmu_raise_exception(MMU_ERROR_PROTECTED, va, MMU_TT_IREAD); + return MMU_ERROR_PROTECTED; } + return MMU_ERROR_NONE; } +#endif void mmu_set_state() { if (CCN_MMUCR.AT == 1 && settings.dreamcast.FullMMU) { printf("Enabling Full MMU support\n"); - ReadMem8 = &mmu_ReadMem8; - ReadMem16 = &mmu_ReadMem16; IReadMem16 = &mmu_IReadMem16; - ReadMem32 = &mmu_ReadMem32; - ReadMem64 = &mmu_ReadMem64; + ReadMem8 = &mmu_ReadMem; + ReadMem16 = &mmu_ReadMem; + ReadMem32 = &mmu_ReadMem; + ReadMem64 = &mmu_ReadMem; - WriteMem8 = &mmu_WriteMem8; - WriteMem16 = &mmu_WriteMem16; - WriteMem32 = &mmu_WriteMem32; - WriteMem64 = &mmu_WriteMem64; + WriteMem8 = &mmu_WriteMem; + WriteMem16 = &mmu_WriteMem; + WriteMem32 = &mmu_WriteMem; + WriteMem64 = &mmu_WriteMem; mmu_flush_table(); } else @@ -619,6 +715,7 @@ void MMU_term() { } +#ifndef FAST_MMU void mmu_flush_table() { //printf("MMU tables flushed\n"); @@ -631,66 +728,95 @@ void mmu_flush_table() for (u32 i = 0; i < 64; i++) UTLB[i].Data.V = 0; } +#endif -u8 DYNACALL mmu_ReadMem8(u32 adr) +template +T DYNACALL mmu_ReadMem(u32 adr) { u32 addr; - mmu_data_translation(adr, addr); - return _vmem_ReadMem8(addr); + u32 rv = mmu_data_translation(adr, addr); + if (rv != MMU_ERROR_NONE) + mmu_raise_exception(rv, adr, MMU_TT_DREAD); + return _vmem_readt(addr); } -u16 DYNACALL mmu_ReadMem16(u32 adr) -{ - u32 addr; - mmu_data_translation(adr, addr); - return _vmem_ReadMem16(addr); -} u16 DYNACALL mmu_IReadMem16(u32 vaddr) { u32 addr; bool shared; - mmu_instruction_translation(vaddr, addr, shared); + u32 rv = mmu_instruction_translation(vaddr, addr, shared); + if (rv != MMU_ERROR_NONE) + mmu_raise_exception(rv, vaddr, MMU_TT_IREAD); return _vmem_ReadMem16(addr); } -u32 DYNACALL mmu_ReadMem32(u32 adr) +template +void DYNACALL mmu_WriteMem(u32 adr, T data) { u32 addr; - mmu_data_translation(adr, addr); - return _vmem_ReadMem32(addr); -} -u64 DYNACALL mmu_ReadMem64(u32 adr) -{ - u32 addr; - mmu_data_translation(adr, addr); - return _vmem_ReadMem64(addr); + u32 rv = mmu_data_translation(adr, addr); + if (rv != MMU_ERROR_NONE) + mmu_raise_exception(rv, adr, MMU_TT_DWRITE); + _vmem_writet(addr, data); } -void DYNACALL mmu_WriteMem8(u32 adr, u8 data) +template +T DYNACALL mmu_ReadMemNoEx(u32 adr, u32 *exception_occurred) { u32 addr; - mmu_data_translation(adr, addr); - _vmem_WriteMem8(addr, data); + u32 rv = mmu_data_translation(adr, addr); + if (rv != MMU_ERROR_NONE) + { + DoMMUException(adr, rv, MMU_TT_DREAD); + *exception_occurred = 1; + return 0; + } + else + { + *exception_occurred = 0; + return _vmem_readt(addr); + } +} +template u8 mmu_ReadMemNoEx(u32 adr, u32 *exception_occurred); +template u16 mmu_ReadMemNoEx(u32 adr, u32 *exception_occurred); +template u32 mmu_ReadMemNoEx(u32 adr, u32 *exception_occurred); +template u64 mmu_ReadMemNoEx(u32 adr, u32 *exception_occurred); + +u16 DYNACALL mmu_IReadMem16NoEx(u32 vaddr, u32 *exception_occurred) +{ + u32 addr; + bool shared; + u32 rv = mmu_instruction_translation(vaddr, addr, shared); + if (rv != MMU_ERROR_NONE) + { + DoMMUException(vaddr, rv, MMU_TT_IREAD); + *exception_occurred = 1; + return 0; + } + else + { + *exception_occurred = 0; + return _vmem_ReadMem16(addr); + } } -void DYNACALL mmu_WriteMem16(u32 adr, u16 data) +template +u32 DYNACALL mmu_WriteMemNoEx(u32 adr, T data) { u32 addr; - mmu_data_translation(adr, addr); - _vmem_WriteMem16(addr, data); -} -void DYNACALL mmu_WriteMem32(u32 adr, u32 data) -{ - u32 addr; - mmu_data_translation(adr, addr); - _vmem_WriteMem32(addr, data); -} -void DYNACALL mmu_WriteMem64(u32 adr, u64 data) -{ - u32 addr; - mmu_data_translation(adr, addr); - _vmem_WriteMem64(addr, data); + u32 rv = mmu_data_translation(adr, addr); + if (rv != MMU_ERROR_NONE) + { + DoMMUException(adr, rv, MMU_TT_DWRITE); + return 1; + } + _vmem_writet(addr, data); + return 0; } +template u32 mmu_WriteMemNoEx(u32 adr, u8 data); +template u32 mmu_WriteMemNoEx(u32 adr, u16 data); +template u32 mmu_WriteMemNoEx(u32 adr, u32 data); +template u32 mmu_WriteMemNoEx(u32 adr, u64 data); bool mmu_TranslateSQW(u32 adr, u32* out) { diff --git a/core/hw/sh4/modules/mmu.h b/core/hw/sh4/modules/mmu.h index 4219e0f69..98f81a1ca 100644 --- a/core/hw/sh4/modules/mmu.h +++ b/core/hw/sh4/modules/mmu.h @@ -10,6 +10,22 @@ //Data write #define MMU_TT_DREAD 2 +//Return Values +//Translation was successful +#define MMU_ERROR_NONE 0 +//TLB miss +#define MMU_ERROR_TLB_MISS 1 +//TLB Multihit +#define MMU_ERROR_TLB_MHIT 2 +//Mem is read/write protected (depends on translation type) +#define MMU_ERROR_PROTECTED 3 +//Mem is write protected , firstwrite +#define MMU_ERROR_FIRSTWRITE 4 +//data-Opcode read/write missasligned +#define MMU_ERROR_BADADDR 5 +//Can't Execute +#define MMU_ERROR_EXECPROT 6 + struct TLB_Entry { CCN_PTEH_type Address; @@ -40,9 +56,10 @@ static INLINE bool mmu_enabled() template u32 mmu_full_lookup(u32 va, const TLB_Entry **entry, u32& rv); -void mmu_instruction_translation(u32 va, u32& rv, bool& shared); +u32 mmu_instruction_translation(u32 va, u32& rv, bool& shared); template -extern void mmu_data_translation(u32 va, u32& rv); +extern u32 mmu_data_translation(u32 va, u32& rv); +void DoMMUException(u32 addr, u32 error_code, u32 access_type); #if defined(NO_MMU) bool inline mmu_TranslateSQW(u32 addr, u32* mapped) { @@ -51,16 +68,14 @@ extern void mmu_data_translation(u32 va, u32& rv); } void inline mmu_flush_table() {} #else - u8 DYNACALL mmu_ReadMem8(u32 addr); - u16 DYNACALL mmu_ReadMem16(u32 addr); + template T DYNACALL mmu_ReadMem(u32 adr); u16 DYNACALL mmu_IReadMem16(u32 addr); - u32 DYNACALL mmu_ReadMem32(u32 addr); - u64 DYNACALL mmu_ReadMem64(u32 addr); - void DYNACALL mmu_WriteMem8(u32 addr, u8 data); - void DYNACALL mmu_WriteMem16(u32 addr, u16 data); - void DYNACALL mmu_WriteMem32(u32 addr, u32 data); - void DYNACALL mmu_WriteMem64(u32 addr, u64 data); + template void DYNACALL mmu_WriteMem(u32 adr, T data); bool mmu_TranslateSQW(u32 addr, u32* mapped); + + u16 DYNACALL mmu_IReadMem16NoEx(u32 adr, u32 *exception_occurred); + template T DYNACALL mmu_ReadMemNoEx(u32 adr, u32 *exception_occurred); + template u32 DYNACALL mmu_WriteMemNoEx(u32 adr, T data); #endif diff --git a/core/hw/sh4/modules/mmu_impl.h b/core/hw/sh4/modules/mmu_impl.h index c0eaba84d..a154c9abe 100644 --- a/core/hw/sh4/modules/mmu_impl.h +++ b/core/hw/sh4/modules/mmu_impl.h @@ -3,26 +3,8 @@ #include "ccn.h" #include "mmu.h" - -//Do a full lookup on the UTLB entry's -//Return Values -//Translation was sucessfull , rv contains return -#define MMU_ERROR_NONE 0 -//TLB miss -#define MMU_ERROR_TLB_MISS 1 -//TLB Multihit -#define MMU_ERROR_TLB_MHIT 2 -//Mem is read/write protected (depends on translation type) -#define MMU_ERROR_PROTECTED 3 -//Mem is write protected , firstwrite -#define MMU_ERROR_FIRSTWRITE 4 -//data-Opcode read/write missasligned -#define MMU_ERROR_BADADDR 5 -//Can't Execute -#define MMU_ERROR_EXECPROT 6 - -extern u32 mmu_error_TT; - void MMU_Init(); void MMU_Reset(bool Manual); void MMU_Term(); + +template u32 mmu_full_SQ(u32 va, u32& rv); diff --git a/core/hw/sh4/sh4_mem.h b/core/hw/sh4/sh4_mem.h index e9a03af47..153d3a527 100644 --- a/core/hw/sh4/sh4_mem.h +++ b/core/hw/sh4/sh4_mem.h @@ -22,15 +22,15 @@ extern VArray2 mem_b; //#define WriteMem64(addr,reg) { _vmem_WriteMem32(addr,((u32*)reg)[0]);_vmem_WriteMem32((addr)+4, ((u32*)reg)[1]); } #else -typedef u8 (*ReadMem8Func)(u32 addr); -typedef u16 (*ReadMem16Func)(u32 addr); -typedef u32 (*ReadMem32Func)(u32 addr); -typedef u64 (*ReadMem64Func)(u32 addr); +typedef u8 DYNACALL (*ReadMem8Func)(u32 addr); +typedef u16 DYNACALL (*ReadMem16Func)(u32 addr); +typedef u32 DYNACALL (*ReadMem32Func)(u32 addr); +typedef u64 DYNACALL (*ReadMem64Func)(u32 addr); -typedef void (*WriteMem8Func)(u32 addr, u8 data); -typedef void (*WriteMem16Func)(u32 addr, u16 data); -typedef void (*WriteMem32Func)(u32 addr, u32 data); -typedef void (*WriteMem64Func)(u32 addr, u64 data); +typedef void DYNACALL (*WriteMem8Func)(u32 addr, u8 data); +typedef void DYNACALL (*WriteMem16Func)(u32 addr, u16 data); +typedef void DYNACALL (*WriteMem32Func)(u32 addr, u32 data); +typedef void DYNACALL (*WriteMem64Func)(u32 addr, u64 data); extern ReadMem8Func ReadMem8; extern ReadMem16Func ReadMem16; diff --git a/core/nullDC.cpp b/core/nullDC.cpp index e43a6117b..7ca8bc480 100755 --- a/core/nullDC.cpp +++ b/core/nullDC.cpp @@ -34,6 +34,7 @@ static bool rtt_to_buffer_game; static bool safemode_game; static bool tr_poly_depth_mask_game; static bool extra_depth_game; +static bool full_mmu_game; cThread emu_thread(&dc_run, NULL); @@ -137,12 +138,15 @@ void LoadSpecialSettings() safemode_game = false; tr_poly_depth_mask_game = false; extra_depth_game = false; + full_mmu_game = false; if (reios_windows_ce) { - printf("Enabling Extra depth scaling for Windows CE games\n"); + printf("Enabling Full MMU and Extra depth scaling for Windows CE game\n"); settings.rend.ExtraDepthScale = 0.1; extra_depth_game = true; + settings.dreamcast.FullMMU = true; + full_mmu_game = true; } // Tony Hawk's Pro Skater 2 @@ -660,7 +664,8 @@ void SaveSettings() cfgSaveInt("config", "Dreamcast.Cable", settings.dreamcast.cable); cfgSaveInt("config", "Dreamcast.Region", settings.dreamcast.region); cfgSaveInt("config", "Dreamcast.Broadcast", settings.dreamcast.broadcast); - cfgSaveBool("config", "Dreamcast.FullMMU", settings.dreamcast.FullMMU); + if (!full_mmu_game || !settings.dreamcast.FullMMU) + cfgSaveBool("config", "Dreamcast.FullMMU", settings.dreamcast.FullMMU); cfgSaveBool("config", "Dynarec.idleskip", settings.dynarec.idleskip); cfgSaveBool("config", "Dynarec.unstable-opt", settings.dynarec.unstable_opt); if (!safemode_game || !settings.dynarec.safemode) diff --git a/core/rec-ARM64/rec_arm64.cpp b/core/rec-ARM64/rec_arm64.cpp index ca5d96b4c..d0a829c5d 100644 --- a/core/rec-ARM64/rec_arm64.cpp +++ b/core/rec-ARM64/rec_arm64.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include "deps/vixl/aarch64/macro-assembler-aarch64.h" using namespace vixl::aarch64; @@ -104,6 +105,8 @@ void Arm64CacheFlush(void* start, void* end) double host_cpu_time; u64 guest_cpu_cycles; +static jmp_buf jmp_env; +static u32 cycle_counter; #ifdef PROFILING #include @@ -153,8 +156,8 @@ __asm__ ".hidden ngen_FailedToFindBlock_ \n\t" ".globl ngen_FailedToFindBlock_ \n\t" "ngen_FailedToFindBlock_: \n\t" - "mov w0, w29 \n\t" - "bl rdv_FailedToFindBlock \n\t" +// "mov w0, w29 \n\t" // FIXME w29 might not be up to date anymore (exception in bm_GetCodeByVAddr) + "bl rdv_FailedToFindBlock_pc \n\t" "br x0 \n" ".hidden ngen_blockcheckfail \n\t" @@ -180,10 +183,16 @@ void ngen_mainloop(void* v_cntx) "stp s10, s11, [sp, #112] \n\t" "stp s12, s13, [sp, #128] \n\t" "stp x29, x30, [sp, #144] \n\t" - // Use x28 as sh4 context pointer - "mov x28, %[cntx] \n\t" - // Use x27 as cycle_counter + + "stp %[cntx], %[cycle_counter], [sp, #-16]! \n\t" // Push context, cycle_counter address "mov w27, %[_SH4_TIMESLICE] \n\t" + "str w27, [%[cycle_counter]] \n\t" + + "mov x0, %[jmp_env] \n\t" // SETJMP + "bl setjmp \n\t" + + // Use x28 as sh4 context pointer + "ldr x28, [sp] \n\t" // Set context // w29 is next_pc "ldr w29, [x28, %[pc]] \n\t" "b no_update \n" @@ -191,8 +200,11 @@ void ngen_mainloop(void* v_cntx) ".hidden intc_sched \n\t" ".globl intc_sched \n\t" "intc_sched: \n\t" - "add w27, w27, %[_SH4_TIMESLICE] \n\t" - "mov x29, lr \n\r" // Trashing pc here but it will be reset at the end of the block or in DoInterrupts + "ldr x27, [sp, #8] \n\t" // &cycle_counter + "ldr w0, [x27] \n\t" // cycle_counter + "add w0, w0, %[_SH4_TIMESLICE] \n\t" + "str w0, [x27] \n\t" + "mov x29, lr \n\t" // Trashing pc here but it will be reset at the end of the block or in DoInterrupts "bl UpdateSystem \n\t" "mov lr, x29 \n\t" "cbnz w0, .do_interrupts \n\t" @@ -208,7 +220,9 @@ void ngen_mainloop(void* v_cntx) "no_update: \n\t" // next_pc _MUST_ be on w29 "ldr w0, [x28, %[CpuRunning]] \n\t" "cbz w0, .end_mainloop \n\t" + "ldr w29, [x28, %[pc]] \n\t" // shouldn't be necessary +#ifdef NO_MMU "movz x2, %[RCB_SIZE], lsl #16 \n\t" "sub x2, x28, x2 \n\t" "add x2, x2, %[SH4CTX_SIZE] \n\t" @@ -221,8 +235,14 @@ void ngen_mainloop(void* v_cntx) #endif "ldr x0, [x2, x1, lsl #3] \n\t" "br x0 \n" +#else + "mov w0, w29 \n\t" + "bl bm_GetCodeByVAddr \n\t" + "br x0 \n" +#endif ".end_mainloop: \n\t" + "add sp, sp, #16 \n\t" // Pop context "ldp x29, x30, [sp, #144] \n\t" "ldp s12, s13, [sp, #128] \n\t" "ldp s10, s11, [sp, #112] \n\t" @@ -239,7 +259,9 @@ void ngen_mainloop(void* v_cntx) [_SH4_TIMESLICE] "i"(SH4_TIMESLICE), [CpuRunning] "i"(offsetof(Sh4Context, CpuRunning)), [RCB_SIZE] "i" (sizeof(Sh4RCB) >> 16), - [SH4CTX_SIZE] "i" (sizeof(Sh4Context)) + [SH4CTX_SIZE] "i" (sizeof(Sh4Context)), + [jmp_env] "r"(reinterpret_cast(jmp_env)), + [cycle_counter] "r"(reinterpret_cast(&cycle_counter)) : "memory" ); } @@ -265,6 +287,75 @@ RuntimeBlockInfo* ngen_AllocateBlock() return new DynaRBI(); } +template +static T ReadMemNoEx(u32 addr, u32 pc) +{ + u32 ex; + T rv = mmu_ReadMemNoEx(addr, &ex); + if (ex) + { + if (pc & 1) + spc = pc - 1; + else + spc = pc; + longjmp(jmp_env, 1); + } + return rv; +} + +template +static void WriteMemNoEx(u32 addr, T data, u32 pc) +{ + u32 ex = mmu_WriteMemNoEx(addr, data); + if (ex) + { + if (pc & 1) + spc = pc - 1; + else + spc = pc; + longjmp(jmp_env, 1); + } +} + +static u32 interpreter_fallback(u16 op, u32 pc) +{ + try { + OpDesc[op]->oph(op); + return 0; + } catch (SH4ThrownException& ex) { + die("IFB exception"); + if (pc & 1) + { + // Delay slot + AdjustDelaySlotException(ex); + pc--; + } + Do_Exception(pc, ex.expEvn, ex.callVect); + return 1; + } +} + +static u32 exception_raised; + +static void do_sqw_mmu_no_ex(u32 addr, u32 pc) +{ + try { + do_sqw_mmu(addr); + exception_raised = 0; + } catch (SH4ThrownException& ex) { + die("do_sqw_mmu exception"); + if (pc & 1) + { + // Delay slot + AdjustDelaySlotException(ex); + pc--; + } + Do_Exception(pc, ex.expEvn, ex.callVect); + exception_raised = 1; + printf("SQW MMU EXCEPTION\n"); + } +} + class Arm64Assembler : public MacroAssembler { typedef void (MacroAssembler::*Arm64Op_RRO)(const Register&, const Register&, const Operand&); @@ -327,22 +418,47 @@ public: if (op.rs3.is_imm()) { - Add(*ret_reg, regalloc.MapRegister(op.rs1), op.rs3._imm); + if (regalloc.IsAllocg(op.rs1)) + Add(*ret_reg, regalloc.MapRegister(op.rs1), op.rs3._imm); + else + { + Ldr(*ret_reg, sh4_context_mem_operand(op.rs1.reg_ptr())); + Add(*ret_reg, *ret_reg, op.rs3._imm); + } } else if (op.rs3.is_r32i()) { - Add(*ret_reg, regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs3)); + if (regalloc.IsAllocg(op.rs1) && regalloc.IsAllocg(op.rs3)) + Add(*ret_reg, regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs3)); + else + { + Ldr(*ret_reg, sh4_context_mem_operand(op.rs1.reg_ptr())); + Ldr(w8, sh4_context_mem_operand(op.rs3.reg_ptr())); + Add(*ret_reg, *ret_reg, w8); + } } else if (!op.rs3.is_null()) { die("invalid rs3"); } + else if (op.rs1.is_reg()) + { + if (regalloc.IsAllocg(op.rs1)) + { + if (raddr == NULL) + ret_reg = ®alloc.MapRegister(op.rs1); + else + Mov(*ret_reg, regalloc.MapRegister(op.rs1)); + } + else + { + Ldr(*ret_reg, sh4_context_mem_operand(op.rs1.reg_ptr())); + } + } else { - if (raddr == NULL) - ret_reg = ®alloc.MapRegister(op.rs1); - else - Mov(*ret_reg, regalloc.MapRegister(op.rs1)); + verify(op.rs1.is_imm()); + Mov(*ret_reg, op.rs1._imm); } return *ret_reg; @@ -362,7 +478,10 @@ public: regalloc.DoAlloc(block); // scheduler - Subs(w27, w27, block->guest_cycles); + Mov(x27, reinterpret_cast(&cycle_counter)); + Ldr(w0, MemOperand(x27)); + Subs(w0, w0, block->guest_cycles); + Str(w0, MemOperand(x27)); Label cycles_remaining; B(&cycles_remaining, pl); GenCallRuntime(intc_sched); @@ -389,7 +508,21 @@ public: } Mov(*call_regs[0], op.rs3._imm); - GenCallRuntime(OpDesc[op.rs3._imm]->oph); + if (!mmu_enabled()) + { + GenCallRuntime(OpDesc[op.rs3._imm]->oph); + } + else + { + Mov(*call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc + + GenCallRuntime(interpreter_fallback); + + Cmp(w0, 0); + Ldr(w29, sh4_context_mem_operand(&next_pc)); + GenBranch(no_update, ne); + } + break; case shop_jcond: @@ -532,20 +665,33 @@ public: case shop_shld: case shop_shad: - // TODO optimize - Cmp(regalloc.MapRegister(op.rs2), 0); - Csel(w1, regalloc.MapRegister(op.rs2), wzr, ge); // if shift >= 0 then w1 = shift else w1 = 0 - Mov(w0, wzr); // wzr not supported by csneg - Csneg(w2, w0, regalloc.MapRegister(op.rs2), ge); // if shift < 0 then w2 = -shift else w2 = 0 - Cmp(w2, 32); - Csel(w2, 31, w2, eq); // if shift == -32 then w2 = 31 - Lsl(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), w1); // Left shift by w1 - if (op.op == shop_shld) // Right shift by w2 - // Logical shift - Lsr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w2); - else - // Arithmetic shift - Asr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w2); + { + Label positive_shift, negative_shift, end; + Tbz(regalloc.MapRegister(op.rs2), 31, &positive_shift); + Cmn(regalloc.MapRegister(op.rs2), 32); + B(&negative_shift, ne); + if (op.op == shop_shld) + // Logical shift + Lsr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), 31); + else + // Arithmetic shift + Asr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), 31); + B(&end); + + Bind(&positive_shift); + Lsl(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2)); + B(&end); + + Bind(&negative_shift); + Neg(w1, regalloc.MapRegister(op.rs2)); + if (op.op == shop_shld) + // Logical shift + Lsr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w1); + else + // Arithmetic shift + Asr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), w1); + Bind(&end); + } break; case shop_test: @@ -617,31 +763,45 @@ public: break; case shop_pref: - Mov(w0, regalloc.MapRegister(op.rs1)); - if (op.flags != 0x1337) { - Lsr(w1, regalloc.MapRegister(op.rs1), 26); + if (regalloc.IsAllocg(op.rs1)) + Lsr(w1, regalloc.MapRegister(op.rs1), 26); + else + { + Ldr(w0, sh4_context_mem_operand(op.rs1.reg_ptr())); + Lsr(w1, w0, 26); + } Cmp(w1, 0x38); - } + Label not_sqw; + B(¬_sqw, ne); + if (regalloc.IsAllocg(op.rs1)) + Mov(w0, regalloc.MapRegister(op.rs1)); - if (CCN_MMUCR.AT) - { - Ldr(x9, reinterpret_cast(&do_sqw_mmu)); - } - else - { - Sub(x9, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, do_sqw_nommu)); - Ldr(x9, MemOperand(x9)); - Sub(x1, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, sq_buffer)); - } - if (op.flags == 0x1337) - Blr(x9); - else - { - Label no_branch; - B(&no_branch, ne); - Blr(x9); - Bind(&no_branch); + if (mmu_enabled()) + { + Mov(*call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc + + GenCallRuntime(do_sqw_mmu_no_ex); + + Cmp(w0, 0); + Ldr(w29, sh4_context_mem_operand(&next_pc)); + GenBranch(no_update, ne); + } + else + { + if (CCN_MMUCR.AT) + { + Ldr(x9, reinterpret_cast(&do_sqw_mmu)); + } + else + { + Sub(x9, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, do_sqw_nommu)); + Ldr(x9, MemOperand(x9)); + Sub(x1, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, sq_buffer)); + } + Blr(x9); + } + Bind(¬_sqw); } break; @@ -863,24 +1023,39 @@ public: Instruction *start_instruction = GetCursorAddress(); u32 size = op.flags & 0x7f; + if (mmu_enabled()) + Mov(*call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc + switch (size) { case 1: - GenCallRuntime(ReadMem8); + if (!mmu_enabled()) + GenCallRuntime(ReadMem8); + else + GenCallRuntime(ReadMemNoEx); Sxtb(w0, w0); break; case 2: - GenCallRuntime(ReadMem16); + if (!mmu_enabled()) + GenCallRuntime(ReadMem16); + else + GenCallRuntime(ReadMemNoEx); Sxth(w0, w0); break; case 4: - GenCallRuntime(ReadMem32); + if (!mmu_enabled()) + GenCallRuntime(ReadMem32); + else + GenCallRuntime(ReadMemNoEx); break; case 8: - GenCallRuntime(ReadMem64); + if (!mmu_enabled()) + GenCallRuntime(ReadMem64); + else + GenCallRuntime(ReadMemNoEx); break; default: @@ -906,24 +1081,39 @@ public: void GenWriteMemorySlow(const shil_opcode& op) { + if (mmu_enabled()) + Mov(*call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc + Instruction *start_instruction = GetCursorAddress(); u32 size = op.flags & 0x7f; switch (size) { case 1: - GenCallRuntime(WriteMem8); + if (!mmu_enabled()) + GenCallRuntime(WriteMem8); + else + GenCallRuntime(WriteMemNoEx); break; case 2: - GenCallRuntime(WriteMem16); + if (!mmu_enabled()) + GenCallRuntime(WriteMem16); + else + GenCallRuntime(WriteMemNoEx); break; case 4: - GenCallRuntime(WriteMem32); + if (!mmu_enabled()) + GenCallRuntime(WriteMem32); + else + GenCallRuntime(WriteMemNoEx); break; case 8: - GenCallRuntime(WriteMem64); + if (!mmu_enabled()) + GenCallRuntime(WriteMem64); + else + GenCallRuntime(WriteMemNoEx); break; default: @@ -950,7 +1140,16 @@ public: case BET_StaticCall: // next_pc = block->BranchBlock; if (block->pBranchBlock == NULL) - GenCallRuntime(ngen_LinkBlock_Generic_stub); + { + if (!mmu_enabled()) + GenCallRuntime(ngen_LinkBlock_Generic_stub); + else + { + Mov(w29, block->BranchBlock); + Str(w29, sh4_context_mem_operand(&next_pc)); + GenBranch(no_update); + } + } else GenBranch(block->pBranchBlock->code); break; @@ -975,14 +1174,32 @@ public: if (block->pBranchBlock != NULL) GenBranch(block->pBranchBlock->code); else - GenCallRuntime(ngen_LinkBlock_cond_Branch_stub); + { + if (!mmu_enabled()) + GenCallRuntime(ngen_LinkBlock_cond_Branch_stub); + else + { + Mov(w29, block->BranchBlock); + Str(w29, sh4_context_mem_operand(&next_pc)); + GenBranch(no_update); + } + } Bind(&branch_not_taken); if (block->pNextBlock != NULL) GenBranch(block->pNextBlock->code); else - GenCallRuntime(ngen_LinkBlock_cond_Next_stub); + { + if (!mmu_enabled()) + GenCallRuntime(ngen_LinkBlock_cond_Next_stub); + else + { + Mov(w29, block->NextBlock); + Str(w29, sh4_context_mem_operand(&next_pc)); + GenBranch(no_update); + } + } } break; @@ -991,18 +1208,26 @@ public: case BET_DynamicRet: // next_pc = *jdyn; - Str(w29, sh4_context_mem_operand(&next_pc)); - // TODO Call no_update instead (and check CpuRunning less frequently?) - Mov(x2, sizeof(Sh4RCB)); - Sub(x2, x28, x2); - Add(x2, x2, sizeof(Sh4Context)); // x2 now points to FPCB + if (!mmu_enabled()) + { + Str(w29, sh4_context_mem_operand(&next_pc)); + // TODO Call no_update instead (and check CpuRunning less frequently?) + Mov(x2, sizeof(Sh4RCB)); + Sub(x2, x28, x2); + Add(x2, x2, sizeof(Sh4Context)); // x2 now points to FPCB #if RAM_SIZE_MAX == 33554432 - Ubfx(w1, w29, 1, 24); + Ubfx(w1, w29, 1, 24); #else - Ubfx(w1, w29, 1, 23); + Ubfx(w1, w29, 1, 23); #endif - Ldr(x15, MemOperand(x2, x1, LSL, 3)); // Get block entry point - Br(x15); + Ldr(x15, MemOperand(x2, x1, LSL, 3)); // Get block entry point + Br(x15); + } + else + { + Str(w29, sh4_context_mem_operand(&next_pc)); + GenBranch(no_update); + } break; @@ -1093,8 +1318,6 @@ private: void GenReadMemory(const shil_opcode& op, size_t opid) { - u32 size = op.flags & 0x7f; - if (GenReadMemoryImmediate(op)) return; @@ -1112,34 +1335,73 @@ private: return false; u32 size = op.flags & 0x7f; + u32 addr = op.rs1._imm; + if (mmu_enabled()) + { + if ((addr >> 12) != (block->vaddr >> 12)) + // When full mmu is on, only consider addresses in the same 4k page + return false; + u32 paddr; + u32 rv; + if (size == 2) + rv = mmu_data_translation(addr, paddr); + else if (size == 4) + rv = mmu_data_translation(addr, paddr); + else + die("Invalid immediate size"); + if (rv != MMU_ERROR_NONE) + return false; + addr = paddr; + } bool isram = false; - void* ptr = _vmem_read_const(op.rs1._imm, isram, size); + void* ptr = _vmem_read_const(addr, isram, size); if (isram) { Ldr(x1, reinterpret_cast(ptr)); - switch (size) + if (regalloc.IsAllocAny(op.rd)) { - case 2: - Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x1, xzr, SXTW)); - break; + switch (size) + { + case 2: + Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x1, xzr, SXTW)); + break; - case 4: - if (op.rd.is_r32f()) - Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1)); - else - Ldr(regalloc.MapRegister(op.rd), MemOperand(x1)); - break; + case 4: + if (op.rd.is_r32f()) + Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1)); + else + Ldr(regalloc.MapRegister(op.rd), MemOperand(x1)); + break; - default: - die("Invalid size"); - break; + default: + die("Invalid size"); + break; + } + } + else + { + switch (size) + { + case 2: + Ldrsh(w1, MemOperand(x1, xzr, SXTW)); + break; + + case 4: + Ldr(w1, MemOperand(x1)); + break; + + default: + die("Invalid size"); + break; + } + Str(w1, sh4_context_mem_operand(op.rd.reg_ptr())); } } else { // Not RAM - Mov(w0, op.rs1._imm); + Mov(w0, addr); switch(size) { @@ -1165,7 +1427,10 @@ private: if (regalloc.IsAllocg(op.rd)) Mov(regalloc.MapRegister(op.rd), w0); else + { + verify(regalloc.IsAllocf(op.rd)); Fmov(regalloc.MapVRegister(op.rd), w0); + } } return true; @@ -1174,7 +1439,7 @@ private: bool GenReadMemoryFast(const shil_opcode& op, size_t opid) { // Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite() - if (!_nvmem_enabled()) + if (!_nvmem_enabled() || mmu_enabled()) return false; Instruction *start_instruction = GetCursorAddress(); @@ -1254,7 +1519,7 @@ private: bool GenWriteMemoryFast(const shil_opcode& op, size_t opid) { // Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite() - if (!_nvmem_enabled()) + if (!_nvmem_enabled() || mmu_enabled()) return false; Instruction *start_instruction = GetCursorAddress(); @@ -1307,9 +1572,16 @@ private: u8* ptr = GetMemPtr(block->addr, sz); if (ptr == NULL) - // FIXME Can a block cross a RAM / non-RAM boundary?? return; + if (mmu_enabled()) + { + Ldr(w10, sh4_context_mem_operand(&next_pc)); + Ldr(w11, block->vaddr); + Cmp(w10, w11); + B(ne, &blockcheck_fail); + } + Ldr(x9, reinterpret_cast(ptr)); while (sz > 0) @@ -1347,6 +1619,23 @@ private: TailCallRuntime(ngen_blockcheckfail); Bind(&blockcheck_success); +/* + if (mmu_enabled() && block->has_fpu_op) + { + Label fpu_enabled; + Ldr(w10, sh4_context_mem_operand(&sr)); + Tbz(w10, 15, &fpu_enabled); // test SR.FD bit + + Mov(*call_regs[0], block->vaddr); // pc + Mov(*call_regs[1], 0x800); // event + Mov(*call_regs[2], 0x100); // vector + CallRuntime(Do_Exception); + Ldr(w29, sh4_context_mem_operand(&next_pc)); + GenBranch(no_update); + + Bind(&fpu_enabled); + } +*/ } void shil_param_to_host_reg(const shil_param& param, const Register& reg) @@ -1360,9 +1649,19 @@ private: if (param.is_r64f()) Ldr(reg, sh4_context_mem_operand(param.reg_ptr())); else if (param.is_r32f()) - Fmov(reg, regalloc.MapVRegister(param)); + { + if (regalloc.IsAllocf(param)) + Fmov(reg, regalloc.MapVRegister(param)); + else + Ldr(reg, sh4_context_mem_operand(param.reg_ptr())); + } else - Mov(reg, regalloc.MapRegister(param)); + { + if (regalloc.IsAllocg(param)) + Mov(reg, regalloc.MapRegister(param)); + else + Ldr(reg, sh4_context_mem_operand(param.reg_ptr())); + } } else { @@ -1383,13 +1682,17 @@ private: else Fmov(regalloc.MapRegister(param), (const VRegister&)reg); } - else + else if (regalloc.IsAllocf(param)) { if (reg.IsVRegister()) Fmov(regalloc.MapVRegister(param), (const VRegister&)reg); else Fmov(regalloc.MapVRegister(param), (const Register&)reg); } + else + { + Str(reg, sh4_context_mem_operand(param.reg_ptr())); + } } struct CC_PS @@ -1476,6 +1779,8 @@ bool ngen_Rewrite(unat& host_pc, unat, unat) u32 DynaRBI::Relink() { + if (mmu_enabled()) + return 0; //printf("DynaRBI::Relink %08x\n", this->addr); Arm64Assembler *compiler = new Arm64Assembler((u8 *)this->code + this->relink_offset); diff --git a/core/rec-cpp/rec_cpp.cpp b/core/rec-cpp/rec_cpp.cpp index c2232fbba..6dbf5ac0f 100644 --- a/core/rec-cpp/rec_cpp.cpp +++ b/core/rec-cpp/rec_cpp.cpp @@ -50,7 +50,7 @@ void ngen_mainloop(void* v_cntx) #endif cycle_counter = SH4_TIMESLICE; do { - DynarecCodeEntryPtr rcb = bm_GetCode(ctx->cntx.pc); + DynarecCodeEntryPtr rcb = bm_GetCodeByVAddr(ctx->cntx.pc); rcb(); } while (cycle_counter > 0); diff --git a/core/rec-x64/rec_x64.cpp b/core/rec-x64/rec_x64.cpp index c1fabc1d8..02e2e64cb 100644 --- a/core/rec-x64/rec_x64.cpp +++ b/core/rec-x64/rec_x64.cpp @@ -1,6 +1,8 @@ #include "build.h" #if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_X64 +#include + #define EXPLODE_SPANS //#define PROFILING @@ -77,6 +79,8 @@ static __attribute((used)) void end_slice() #error RAM_SIZE_MAX unknown #endif +jmp_buf jmp_env; + #ifdef _WIN32 // Fully naked function in win32 for proper SEH prologue __asm__ ( @@ -120,6 +124,14 @@ WIN32_ONLY( ".seh_pushreg %r14 \n\t") #endif "movl $" _S(SH4_TIMESLICE) "," _U "cycle_counter(%rip) \n" +#ifdef _WIN32 + "movq $" _U "jmp_env, %rcx \n\t" // SETJMP +#else + "movq $" _U "jmp_env, %rdi \n\t" +#endif + "call " _U "setjmp \n\t" +// "testl %rax, %rax \n\t" + "1: \n\t" // run_loop "movq " _U "p_sh4rcb(%rip), %rax \n\t" "movl " _S(CPU_RUNNING) "(%rax), %edx \n\t" @@ -136,7 +148,7 @@ WIN32_ONLY( ".seh_pushreg %r14 \n\t") #else "movl " _S(PC)"(%rax), %edi \n\t" #endif - "call " _U "bm_GetCode2 \n\t" + "call " _U "bm_GetCodeByVAddr \n\t" "call *%rax \n\t" "movl " _U "cycle_counter(%rip), %ecx \n\t" "testl %ecx, %ecx \n\t" @@ -212,51 +224,31 @@ static u32 exception_raised; template static T ReadMemNoEx(u32 addr, u32 pc) { - try { - exception_raised = 0; - if (sizeof(T) == 1) - return ReadMem8(addr); - else if (sizeof(T) == 2) - return ReadMem16(addr); - else if (sizeof(T) == 4) - return ReadMem32(addr); - else if (sizeof(T) == 8) - return ReadMem64(addr); - } catch (SH4ThrownException& ex) { + T rv = mmu_ReadMemNoEx(addr, &exception_raised); + if (exception_raised) + { if (pc & 1) - { // Delay slot - AdjustDelaySlotException(ex); - pc--; - } - Do_Exception(pc, ex.expEvn, ex.callVect); - exception_raised = 1; - return 0; + spc = pc - 1; + else + spc = pc; + longjmp(jmp_env, 1); } + return rv; } template static void WriteMemNoEx(u32 addr, T data, u32 pc) { - try { - if (sizeof(T) == 1) - WriteMem8(addr, data); - else if (sizeof(T) == 2) - WriteMem16(addr, data); - else if (sizeof(T) == 4) - WriteMem32(addr, data); - else if (sizeof(T) == 8) - WriteMem64(addr, data); - exception_raised = 0; - } catch (SH4ThrownException& ex) { + exception_raised = mmu_WriteMemNoEx(addr, data); + if (exception_raised) + { if (pc & 1) - { // Delay slot - AdjustDelaySlotException(ex); - pc--; - } - Do_Exception(pc, ex.expEvn, ex.callVect); - exception_raised = 1; + spc = pc - 1; + else + spc = pc; + longjmp(jmp_env, 1); } } @@ -352,7 +344,7 @@ public: sub(rsp, 0x8); // align stack #endif Xbyak::Label exit_block; - +/* if (mmu_enabled() && block->has_fpu_op) { Xbyak::Label fpu_enabled; @@ -367,7 +359,7 @@ public: jmp(exit_block, T_NEAR); L(fpu_enabled); } - +*/ for (current_opid = 0; current_opid < block->oplist.size(); current_opid++) { shil_opcode& op = block->oplist[current_opid]; @@ -449,25 +441,32 @@ public: { u32 size = op.flags & 0x7f; bool immediate_address = op.rs1.is_imm(); - if (immediate_address && mmu_enabled() && (op.rs1._imm >> 12) != (block->vaddr >> 12)) + u32 addr = op.rs1._imm; + if (immediate_address && mmu_enabled()) { - // When full mmu is on, only consider addresses in the same 4k page - immediate_address = false; + if ((op.rs1._imm >> 12) != (block->vaddr >> 12)) + { + // When full mmu is on, only consider addresses in the same 4k page + immediate_address = false; + } + else + { + u32 paddr; + u32 rv; + if (size == 2) + rv = mmu_data_translation(addr, paddr); + else if (size == 4) + rv = mmu_data_translation(addr, paddr); + else + die("Invalid immediate size"); + if (rv != MMU_ERROR_NONE) + immediate_address = false; + else + addr = paddr; + } } if (immediate_address) { - u32 addr = op.rs1._imm; - if (mmu_enabled()) - { - u32 paddr; - if (size == 2) - mmu_data_translation(addr, paddr); - else if (size == 4) - mmu_data_translation(addr, paddr); - else - die("Invalid immediate size"); - addr = paddr; - } bool isram = false; void* ptr = _vmem_read_const(addr, isram, size); @@ -581,11 +580,11 @@ public: die("1..8 bytes"); } - if (mmu_enabled()) - { - test(dword[(void *)&exception_raised], 1); - jnz(exit_block, T_NEAR); - } +// if (mmu_enabled()) +// { +// test(dword[(void *)&exception_raised], 1); +// jnz(exit_block, T_NEAR); +// } if (size != 8) host_reg_to_shil_param(op.rd, ecx); @@ -674,11 +673,11 @@ public: die("1..8 bytes"); } - if (mmu_enabled()) - { - test(dword[(void *)&exception_raised], 1); - jnz(exit_block, T_NEAR); - } +// if (mmu_enabled()) +// { +// test(dword[(void *)&exception_raised], 1); +// jnz(exit_block, T_NEAR); +// } } break; @@ -1353,6 +1352,10 @@ private: // cmp(byte[rax], block->asid); // jne(reinterpret_cast(&ngen_blockcheckfail)); // } + // FIXME Neither of these tests should be necessary + // However the decoder makes various assumptions about the current PC value, which are simply not + // true in a virtualized memory model. So this can only work if virtual and phy addresses are the + // same at compile and run times. if (mmu_enabled()) { mov(rax, (uintptr_t)&next_pc); diff --git a/core/rec-x86/rec_lin86_asm.S b/core/rec-x86/rec_lin86_asm.S index a8d8d42a6..9dee7fc9c 100644 --- a/core/rec-x86/rec_lin86_asm.S +++ b/core/rec-x86/rec_lin86_asm.S @@ -9,7 +9,7 @@ .globl cycle_counter .globl loop_no_update .globl intc_sched -.globl bm_GetCode +.globl bm_GetCodeByVAddr .globl cycle_counter .globl UpdateSystem .globl rdv_DoInterrupts @@ -109,7 +109,7 @@ ngen_mainloop: # next_pc _MUST_ be on ecx no_update: mov esi,ecx - call _Z10bm_GetCodej #bm_GetCode + call bm_GetCodeByVAddr jmp eax intc_sched_offs: diff --git a/core/rec-x86/rec_x86_asm.cpp b/core/rec-x86/rec_x86_asm.cpp index 74ba494f5..0d7161980 100644 --- a/core/rec-x86/rec_x86_asm.cpp +++ b/core/rec-x86/rec_x86_asm.cpp @@ -84,7 +84,7 @@ naked void ngen_mainloop(void* cntx) //next_pc _MUST_ be on ecx no_update: mov esi,ecx; - call bm_GetCode + call bm_GetCodeByVAddr jmp eax; intc_sched_offs: diff --git a/core/serialize.cpp b/core/serialize.cpp index 04cb136a1..913d6bbe5 100644 --- a/core/serialize.cpp +++ b/core/serialize.cpp @@ -492,8 +492,7 @@ extern TLB_Entry ITLB[4]; #if defined(NO_MMU) extern u32 sq_remap[64]; #else -extern u32 ITLB_LRU_USE[64]; -extern u32 mmu_error_TT; +static u32 ITLB_LRU_USE[64]; #endif @@ -1085,7 +1084,6 @@ bool dc_serialize(void **data, unsigned int *total_size) REICAST_SA(sq_remap,64); #else REICAST_SA(ITLB_LRU_USE,64); - REICAST_S(mmu_error_TT); #endif @@ -1487,7 +1485,6 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size) REICAST_USA(sq_remap,64); #else REICAST_USA(ITLB_LRU_USE,64); - REICAST_US(mmu_error_TT); #endif @@ -1877,7 +1874,6 @@ bool dc_unserialize(void **data, unsigned int *total_size) REICAST_USA(sq_remap,64); #else REICAST_USA(ITLB_LRU_USE,64); - REICAST_US(mmu_error_TT); #endif diff --git a/shell/android-studio/reicast/src/main/jni/Android.mk b/shell/android-studio/reicast/src/main/jni/Android.mk index fb12326e1..09719e421 100644 --- a/shell/android-studio/reicast/src/main/jni/Android.mk +++ b/shell/android-studio/reicast/src/main/jni/Android.mk @@ -63,8 +63,8 @@ LOCAL_SRC_FILES := $(RZDCY_FILES) LOCAL_SRC_FILES += $(wildcard $(LOCAL_PATH)/jni/src/Android.cpp) LOCAL_SRC_FILES += $(wildcard $(LOCAL_PATH)/jni/src/utils.cpp) LOCAL_CFLAGS := $(RZDCY_CFLAGS) -fPIC -fvisibility=hidden -ffunction-sections -fdata-sections -LOCAL_CXXFLAGS := $(RZDCY_CXXFLAGS) -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections -LOCAL_CPPFLAGS := $(RZDCY_CXXFLAGS) -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections +LOCAL_CXXFLAGS := $(RZDCY_CXXFLAGS) -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections -fexceptions +LOCAL_CPPFLAGS := $(RZDCY_CXXFLAGS) -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections -fexceptions # 7-Zip/LZMA settings (CHDv5) ifdef CHD5_LZMA