wince: use setjmp/longjmp instead of try/catch for dynarecs

WinCE fast mmu implementation
WIP arm64 dynarec
This commit is contained in:
Flyinghead 2019-04-15 18:02:34 +02:00
parent ba00da2420
commit dece3fc13e
22 changed files with 1246 additions and 356 deletions

View File

@ -115,7 +115,9 @@
*/ */
#define NO_MMU //#define NO_MMU
#define FAST_MMU
#define USE_WINCE_HACK
#define DC_PLATFORM_MASK 7 #define DC_PLATFORM_MASK 7
#define DC_PLATFORM_DREAMCAST 0 /* Works, for the most part */ #define DC_PLATFORM_DREAMCAST 0 /* Works, for the most part */

View File

@ -183,6 +183,11 @@ INLINE Trv DYNACALL _vmem_readt(u32 addr)
} }
} }
} }
template u8 DYNACALL _vmem_readt<u8, u8>(u32 addr);
template u16 DYNACALL _vmem_readt<u16, u16>(u32 addr);
template u32 DYNACALL _vmem_readt<u32, u32>(u32 addr);
template u64 DYNACALL _vmem_readt<u64, u64>(u32 addr);
template<typename T> template<typename T>
INLINE void DYNACALL _vmem_writet(u32 addr,T data) INLINE void DYNACALL _vmem_writet(u32 addr,T data)
{ {
@ -225,6 +230,10 @@ INLINE void DYNACALL _vmem_writet(u32 addr,T data)
} }
} }
} }
template void DYNACALL _vmem_writet<u8>(u32 addr, u8 data);
template void DYNACALL _vmem_writet<u16>(u32 addr, u16 data);
template void DYNACALL _vmem_writet<u32>(u32 addr, u32 data);
template void DYNACALL _vmem_writet<u64>(u32 addr, u64 data);
//ReadMem/WriteMem functions //ReadMem/WriteMem functions
//ReadMem //ReadMem
@ -552,7 +561,7 @@ error:
} }
#endif #endif
int fd; int vmem_fd;
void* _nvmem_unused_buffer(u32 start,u32 end) void* _nvmem_unused_buffer(u32 start,u32 end)
{ {
void* ptr=mmap(&virt_ram_base[start], end-start, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0); void* ptr=mmap(&virt_ram_base[start], end-start, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0);
@ -572,7 +581,7 @@ error:
verify((addrsz%size)==0); verify((addrsz%size)==0);
verify(map_times>=1); verify(map_times>=1);
u32 prot=PROT_READ|(w?PROT_WRITE:0); u32 prot=PROT_READ|(w?PROT_WRITE:0);
rv= mmap(&virt_ram_base[dst], size, prot, MAP_SHARED | MAP_NOSYNC | MAP_FIXED, fd, offset); rv= mmap(&virt_ram_base[dst], size, prot, MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset);
if (MAP_FAILED==rv || rv!=(void*)&virt_ram_base[dst] || (mprotect(rv,size,prot)!=0)) if (MAP_FAILED==rv || rv!=(void*)&virt_ram_base[dst] || (mprotect(rv,size,prot)!=0))
{ {
printf("MAP1 failed %d\n",errno); printf("MAP1 failed %d\n",errno);
@ -582,7 +591,7 @@ error:
for (u32 i=1;i<map_times;i++) for (u32 i=1;i<map_times;i++)
{ {
dst+=size; dst+=size;
ptr=mmap(&virt_ram_base[dst], size, prot , MAP_SHARED | MAP_NOSYNC | MAP_FIXED, fd, offset); ptr=mmap(&virt_ram_base[dst], size, prot , MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset);
if (MAP_FAILED==ptr || ptr!=(void*)&virt_ram_base[dst] || (mprotect(rv,size,prot)!=0)) if (MAP_FAILED==ptr || ptr!=(void*)&virt_ram_base[dst] || (mprotect(rv,size,prot)!=0))
{ {
printf("MAP2 failed %d\n",errno); printf("MAP2 failed %d\n",errno);
@ -598,26 +607,26 @@ error:
#if HOST_OS == OS_DARWIN #if HOST_OS == OS_DARWIN
string path = get_writable_data_path("/dcnzorz_mem"); string path = get_writable_data_path("/dcnzorz_mem");
fd = open(path.c_str(),O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO); vmem_fd = open(path.c_str(),O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO);
unlink(path.c_str()); unlink(path.c_str());
verify(ftruncate(fd, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX) == 0); verify(ftruncate(vmem_fd, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX) == 0);
#elif !defined(_ANDROID) #elif !defined(_ANDROID)
fd = shm_open("/dcnzorz_mem", O_CREAT | O_EXCL | O_RDWR,S_IREAD | S_IWRITE); vmem_fd = shm_open("/dcnzorz_mem", O_CREAT | O_EXCL | O_RDWR,S_IREAD | S_IWRITE);
shm_unlink("/dcnzorz_mem"); shm_unlink("/dcnzorz_mem");
if (fd==-1) if (vmem_fd==-1)
{ {
fd = open("dcnzorz_mem",O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO); vmem_fd = open("dcnzorz_mem",O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO);
unlink("dcnzorz_mem"); unlink("dcnzorz_mem");
} }
verify(ftruncate(fd, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX) == 0); verify(ftruncate(vmem_fd, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX) == 0);
#else #else
fd = ashmem_create_region(0, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX); vmem_fd = ashmem_create_region(0, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX);
if (false)//this causes writebacks to flash -> slow and stuttery if (false)//this causes writebacks to flash -> slow and stuttery
{ {
fd = open("/data/data/com.reicast.emulator/files/dcnzorz_mem",O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO); vmem_fd = open("/data/data/com.reicast.emulator/files/dcnzorz_mem",O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO);
unlink("/data/data/com.reicast.emulator/files/dcnzorz_mem"); unlink("/data/data/com.reicast.emulator/files/dcnzorz_mem");
} }
#endif #endif
@ -730,7 +739,7 @@ bool _vmem_reserve()
//I really should check teh docs before codin ;p //I really should check teh docs before codin ;p
//[0x00800000,0x00A00000); //[0x00800000,0x00A00000);
map_buffer(0x00800000,0x01000000,MAP_ARAM_START_OFFSET,ARAM_SIZE,false); map_buffer(0x00800000,0x01000000,MAP_ARAM_START_OFFSET,ARAM_SIZE,false);
map_buffer(0x20000000,0x20000000+ARAM_SIZE,MAP_ARAM_START_OFFSET,ARAM_SIZE,true); map_buffer(0x02800000,0x02800000+ARAM_SIZE,MAP_ARAM_START_OFFSET,ARAM_SIZE,true);
aica_ram.size=ARAM_SIZE; aica_ram.size=ARAM_SIZE;
aica_ram.data=(u8*)ptr; aica_ram.data=(u8*)ptr;
@ -804,7 +813,7 @@ void _vmem_release()
virt_ram_base = NULL; virt_ram_base = NULL;
} }
#if HOST_OS != OS_WINDOWS #if HOST_OS != OS_WINDOWS
close(fd); close(vmem_fd);
#endif #endif
} }
} }

View File

@ -49,11 +49,13 @@ u8 DYNACALL _vmem_ReadMem8(u32 Address);
u16 DYNACALL _vmem_ReadMem16(u32 Address); u16 DYNACALL _vmem_ReadMem16(u32 Address);
u32 DYNACALL _vmem_ReadMem32(u32 Address); u32 DYNACALL _vmem_ReadMem32(u32 Address);
u64 DYNACALL _vmem_ReadMem64(u32 Address); u64 DYNACALL _vmem_ReadMem64(u32 Address);
template<typename T, typename Trv> Trv DYNACALL _vmem_readt(u32 addr);
//WriteMem(s) //WriteMem(s)
void DYNACALL _vmem_WriteMem8(u32 Address,u8 data); void DYNACALL _vmem_WriteMem8(u32 Address,u8 data);
void DYNACALL _vmem_WriteMem16(u32 Address,u16 data); void DYNACALL _vmem_WriteMem16(u32 Address,u16 data);
void DYNACALL _vmem_WriteMem32(u32 Address,u32 data); void DYNACALL _vmem_WriteMem32(u32 Address,u32 data);
void DYNACALL _vmem_WriteMem64(u32 Address,u64 data); void DYNACALL _vmem_WriteMem64(u32 Address,u64 data);
template<typename T> void DYNACALL _vmem_writet(u32 addr, T data);
//should be called at start up to ensure it will succeed :) //should be called at start up to ensure it will succeed :)
bool _vmem_reserve(); bool _vmem_reserve();
@ -70,4 +72,4 @@ static inline bool _nvmem_enabled() {
return virt_ram_base != 0; return virt_ram_base != 0;
} }
void _vmem_bm_reset(); void _vmem_bm_reset();

View File

@ -97,7 +97,7 @@ DynarecCodeEntryPtr DYNACALL bm_GetCode(u32 addr)
} }
// addr must be a virtual address // addr must be a virtual address
DynarecCodeEntryPtr DYNACALL bm_GetCode2(u32 addr) DynarecCodeEntryPtr DYNACALL bm_GetCodeByVAddr(u32 addr)
{ {
#ifndef NO_MMU #ifndef NO_MMU
if (!mmu_enabled()) if (!mmu_enabled())
@ -110,32 +110,47 @@ DynarecCodeEntryPtr DYNACALL bm_GetCode2(u32 addr)
{ {
switch (addr) switch (addr)
{ {
#ifdef USE_WINCE_HACK
case 0xfffffde7: // GetTickCount case 0xfffffde7: // GetTickCount
// This should make this syscall faster // This should make this syscall faster
r[0] = sh4_sched_now64() * 1000 / SH4_MAIN_CLOCK; r[0] = sh4_sched_now64() * 1000 / SH4_MAIN_CLOCK;
next_pc = pr; next_pc = pr;
addr = next_pc;
break; break;
case 0xfffffd05: // QueryPerformanceCounter(u64 *)
{
u32 paddr;
if (mmu_data_translation<MMU_TT_DWRITE, u64>(r[4], paddr) == MMU_ERROR_NONE)
{
_vmem_WriteMem64(paddr, sh4_sched_now64() >> 4);
r[0] = 1;
next_pc = pr;
}
else
{
Do_Exception(addr, 0xE0, 0x100);
}
}
break;
#endif
default: default:
Do_Exception(addr, 0xE0, 0x100); Do_Exception(addr, 0xE0, 0x100);
addr = next_pc;
break; break;
} }
addr = next_pc;
} }
try { u32 paddr;
u32 paddr; bool shared;
bool shared; u32 rv = mmu_instruction_translation(addr, paddr, shared);
mmu_instruction_translation(addr, paddr, shared); if (rv != MMU_ERROR_NONE)
{
return (DynarecCodeEntryPtr)bm_GetCode(paddr); DoMMUException(addr, rv, MMU_TT_IREAD);
} catch (SH4ThrownException& ex) {
Do_Exception(addr, ex.expEvn, ex.callVect);
u32 paddr;
bool shared;
mmu_instruction_translation(next_pc, paddr, shared); mmu_instruction_translation(next_pc, paddr, shared);
return (DynarecCodeEntryPtr)bm_GetCode(paddr);
} }
return (DynarecCodeEntryPtr)bm_GetCode(paddr);
} }
#endif #endif
} }
@ -220,6 +235,7 @@ void bm_RemoveBlock(RuntimeBlockInfo* block)
all_blocks.erase(it); all_blocks.erase(it);
break; break;
} }
// FIXME need to remove refs
delete block; delete block;
} }

View File

@ -17,7 +17,7 @@ struct RuntimeBlockInfo_Core
struct RuntimeBlockInfo: RuntimeBlockInfo_Core struct RuntimeBlockInfo: RuntimeBlockInfo_Core
{ {
void Setup(u32 pc,fpscr_t fpu_cfg); bool Setup(u32 pc,fpscr_t fpu_cfg);
const char* hash(bool full=true, bool reloc=false); const char* hash(bool full=true, bool reloc=false);
u32 vaddr; u32 vaddr;
@ -89,7 +89,7 @@ void bm_WriteBlockMap(const string& file);
DynarecCodeEntryPtr DYNACALL bm_GetCode(u32 addr); DynarecCodeEntryPtr DYNACALL bm_GetCode(u32 addr);
extern "C" { extern "C" {
__attribute__((used)) DynarecCodeEntryPtr DYNACALL bm_GetCode2(u32 addr); __attribute__((used)) DynarecCodeEntryPtr DYNACALL bm_GetCodeByVAddr(u32 addr);
} }
RuntimeBlockInfo* bm_GetBlock(void* dynarec_code); RuntimeBlockInfo* bm_GetBlock(void* dynarec_code);

View File

@ -1007,13 +1007,15 @@ void state_Setup(u32 rpc,fpscr_t fpu_cfg)
state.info.has_fpu=false; state.info.has_fpu=false;
} }
void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles) bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
{ {
blk=rbi; blk=rbi;
state_Setup(blk->vaddr, blk->fpu_cfg); state_Setup(blk->vaddr, blk->fpu_cfg);
ngen_GetFeatures(&state.ngen); ngen_GetFeatures(&state.ngen);
blk->guest_opcodes=0; blk->guest_opcodes=0;
// If full MMU, don't allow the block to extend past the end of the current 4K page
u32 max_pc = mmu_enabled() ? ((state.cpu.rpc >> 12) + 1) << 12 : 0xFFFFFFFF;
for(;;) for(;;)
{ {
@ -1025,10 +1027,8 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
//there is no break here by design //there is no break here by design
case NDO_NextOp: case NDO_NextOp:
{ {
if ( if ((blk->oplist.size() >= BLOCK_MAX_SH_OPS_SOFT || blk->guest_cycles >= max_cycles || state.cpu.rpc >= max_pc)
( (blk->oplist.size() >= BLOCK_MAX_SH_OPS_SOFT) || (blk->guest_cycles >= max_cycles) ) && !state.cpu.is_delayslot)
&& !state.cpu.is_delayslot
)
{ {
dec_End(state.cpu.rpc,BET_StaticJump,false); dec_End(state.cpu.rpc,BET_StaticJump,false);
} }
@ -1053,7 +1053,16 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
} }
*/ */
u32 op=IReadMem16(state.cpu.rpc); u32 op;
if (!mmu_enabled())
op = IReadMem16(state.cpu.rpc);
else
{
u32 exception_occurred;
op = mmu_IReadMem16NoEx(state.cpu.rpc, &exception_occurred);
if (exception_occurred)
return false;
}
if (op==0 && state.cpu.is_delayslot) if (op==0 && state.cpu.is_delayslot)
{ {
printf("Delayslot 0 hack!\n"); printf("Delayslot 0 hack!\n");
@ -1104,8 +1113,8 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
case NDO_Jump: case NDO_Jump:
die("Too old"); die("Too old");
state.NextOp=state.JumpOp; //state.NextOp=state.JumpOp;
state.cpu.rpc=state.JumpAddr; //state.cpu.rpc=state.JumpAddr;
break; break;
case NDO_End: case NDO_End:
@ -1187,6 +1196,8 @@ _end:
//make sure we don't use wayy-too-few cycles //make sure we don't use wayy-too-few cycles
blk->guest_cycles=max(1U,blk->guest_cycles); blk->guest_cycles=max(1U,blk->guest_cycles);
blk=0; blk=0;
return true;
} }
#endif #endif

View File

@ -45,7 +45,7 @@ struct ngen_features
}; };
struct RuntimeBlockInfo; struct RuntimeBlockInfo;
void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles); bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles);
struct state_t struct state_t
{ {

View File

@ -194,7 +194,7 @@ const char* RuntimeBlockInfo::hash(bool full, bool relocable)
return block_hash; return block_hash;
} }
void RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg) bool RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg)
{ {
staging_runs=addr=lookups=runs=host_code_size=0; staging_runs=addr=lookups=runs=host_code_size=0;
guest_cycles=guest_opcodes=host_opcodes=0; guest_cycles=guest_opcodes=host_opcodes=0;
@ -210,7 +210,12 @@ void RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg)
if (mmu_enabled()) if (mmu_enabled())
{ {
bool shared; bool shared;
mmu_instruction_translation(vaddr, addr, shared); u32 rv = mmu_instruction_translation(vaddr, addr, shared);
if (rv != MMU_ERROR_NONE)
{
DoMMUException(vaddr, rv, MMU_TT_IREAD);
return false;
}
if (addr != vaddr && !shared) if (addr != vaddr && !shared)
asid = CCN_PTEH.ASID; asid = CCN_PTEH.ASID;
} }
@ -220,8 +225,12 @@ void RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg)
oplist.clear(); oplist.clear();
dec_DecodeBlock(this,SH4_TIMESLICE/2); if (!dec_DecodeBlock(this,SH4_TIMESLICE/2))
return false;
AnalyseBlock(this); AnalyseBlock(this);
return true;
} }
DynarecCodeEntryPtr rdv_CompilePC() DynarecCodeEntryPtr rdv_CompilePC()
@ -232,43 +241,36 @@ DynarecCodeEntryPtr rdv_CompilePC()
recSh4_ClearCache(); recSh4_ClearCache();
RuntimeBlockInfo* rbi = ngen_AllocateBlock(); RuntimeBlockInfo* rbi = ngen_AllocateBlock();
#ifndef NO_MMU
try {
#endif
rbi->Setup(pc,fpscr);
if (!rbi->Setup(pc,fpscr))
bool do_opts=((rbi->addr&0x3FFFFFFF)>0x0C010100); {
rbi->staging_runs=do_opts?100:-100;
ngen_Compile(rbi,DoCheck(rbi->addr),(pc&0xFFFFFF)==0x08300 || (pc&0xFFFFFF)==0x10000,false,do_opts);
verify(rbi->code!=0);
bm_AddBlock(rbi);
#ifndef NO_MMU
} catch (SH4ThrownException& ex) {
delete rbi; delete rbi;
throw ex; return NULL;
} }
#endif
bool do_opts=((rbi->addr&0x3FFFFFFF)>0x0C010100);
rbi->staging_runs=do_opts?100:-100;
ngen_Compile(rbi,DoCheck(rbi->addr),(pc&0xFFFFFF)==0x08300 || (pc&0xFFFFFF)==0x10000,false,do_opts);
verify(rbi->code!=0);
bm_AddBlock(rbi);
return rbi->code; return rbi->code;
} }
DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock_pc()
{
return rdv_FailedToFindBlock(next_pc);
}
DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock(u32 pc) DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock(u32 pc)
{ {
//printf("rdv_FailedToFindBlock ~ %08X\n",pc); //printf("rdv_FailedToFindBlock ~ %08X\n",pc);
#ifndef NO_MMU next_pc=pc;
try { DynarecCodeEntryPtr code = rdv_CompilePC();
#endif if (code == NULL)
next_pc=pc; code = bm_GetCodeByVAddr(next_pc);
return code;
return rdv_CompilePC();
#ifndef NO_MMU
} catch (SH4ThrownException& ex) {
Do_Exception(pc, ex.expEvn, ex.callVect);
return bm_GetCode2(next_pc);
}
#endif
} }
static void ngen_FailedToFindBlock_internal() { static void ngen_FailedToFindBlock_internal() {
@ -304,8 +306,17 @@ u32 DYNACALL rdv_DoInterrupts(void* block_cpde)
// addr must be the physical address of the start of the block // addr must be the physical address of the start of the block
DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 addr) DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 addr)
{ {
RuntimeBlockInfo *block = bm_GetBlock(addr); if (mmu_enabled())
bm_RemoveBlock(block); {
RuntimeBlockInfo *block = bm_GetBlock(addr);
//printf("rdv_BlockCheckFail addr %08x vaddr %08x pc %08x\n", addr, block->vaddr, next_pc);
bm_RemoveBlock(block);
}
else
{
next_pc = addr;
recSh4_ClearCache();
}
return rdv_CompilePC(); return rdv_CompilePC();
} }
@ -320,7 +331,7 @@ DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 addr)
DynarecCodeEntryPtr rdv_FindOrCompile() DynarecCodeEntryPtr rdv_FindOrCompile()
{ {
DynarecCodeEntryPtr rv=bm_GetCode(next_pc); DynarecCodeEntryPtr rv=bm_GetCodeByVAddr(next_pc);
if (rv==ngen_FailedToFindBlock) if (rv==ngen_FailedToFindBlock)
rv=rdv_CompilePC(); rv=rdv_CompilePC();
@ -359,7 +370,7 @@ void* DYNACALL rdv_LinkBlock(u8* code,u32 dpc)
DynarecCodeEntryPtr rv=rdv_FindOrCompile(); DynarecCodeEntryPtr rv=rdv_FindOrCompile();
bool do_link=bm_GetBlock(code)==rbi; bool do_link = !mmu_enabled() && bm_GetBlock(code) == rbi;
if (do_link) if (do_link)
{ {

View File

@ -65,6 +65,7 @@ void emit_SetBaseAddr();
//Called from ngen_FailedToFindBlock //Called from ngen_FailedToFindBlock
DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock(u32 pc); DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock(u32 pc);
DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock_pc();
//Called when a block check failed, and the block needs to be invalidated //Called when a block check failed, and the block needs to be invalidated
DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 pc); DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 pc);
//Called to compile code @pc //Called to compile code @pc

View File

@ -0,0 +1,406 @@
#include "mmu.h"
#include "hw/sh4/sh4_if.h"
#include "hw/sh4/sh4_interrupts.h"
#include "hw/sh4/sh4_core.h"
#include "types.h"
#ifdef FAST_MMU
#include "hw/mem/_vmem.h"
#include "mmu_impl.h"
#include "ccn.h"
#include "hw/sh4/sh4_mem.h"
#include "oslib/oslib.h"
extern TLB_Entry UTLB[64];
// Used when FullMMU is off
extern u32 sq_remap[64];
//#define TRACE_WINCE_SYSCALLS
#include "wince.h"
#define printf_mmu(...)
//#define printf_mmu printf
#define printf_win32(...)
extern const u32 mmu_mask[4];
extern const u32 fast_reg_lut[8];
const TLB_Entry *lru_entry = NULL;
static u32 lru_mask;
static u32 lru_address;
struct TLB_LinkedEntry {
TLB_Entry entry;
TLB_LinkedEntry *next_entry;
};
#define NBUCKETS 65536
TLB_LinkedEntry full_table[65536];
u32 full_table_size;
TLB_LinkedEntry *entry_buckets[NBUCKETS];
static u16 bucket_index(u32 address, int size)
{
return ((address >> 16) ^ ((address & 0xFC00) | size)) & (NBUCKETS - 1);
}
static void cache_entry(const TLB_Entry &entry)
{
verify(full_table_size < ARRAY_SIZE(full_table));
u16 bucket = bucket_index(entry.Address.VPN << 10, entry.Data.SZ1 * 2 + entry.Data.SZ0);
full_table[full_table_size].entry = entry;
full_table[full_table_size].next_entry = entry_buckets[bucket];
entry_buckets[bucket] = &full_table[full_table_size];
full_table_size++;
}
static void flush_cache()
{
full_table_size = 0;
memset(entry_buckets, 0, sizeof(entry_buckets));
}
template<u32 size>
bool find_entry_by_page_size(u32 address, const TLB_Entry **ret_entry)
{
u32 shift = size == 1 ? 2 :
size == 2 ? 6 :
size == 3 ? 10 : 0;
u32 vpn = (address >> (10 + shift)) << shift;
u16 bucket = bucket_index(vpn << 10, size);
TLB_LinkedEntry *pEntry = entry_buckets[bucket];
u32 length = 0;
while (pEntry != NULL)
{
if (pEntry->entry.Address.VPN == vpn && (size >> 1) == pEntry->entry.Data.SZ1 && (size & 1) == pEntry->entry.Data.SZ0)
{
if (pEntry->entry.Data.SH == 1 || pEntry->entry.Address.ASID == CCN_PTEH.ASID)
{
*ret_entry = &pEntry->entry;
return true;
}
}
pEntry = pEntry->next_entry;
}
return false;
}
static bool find_entry(u32 address, const TLB_Entry **ret_entry)
{
// 4k
if (find_entry_by_page_size<1>(address, ret_entry))
return true;
// 64k
if (find_entry_by_page_size<2>(address, ret_entry))
return true;
// 1m
if (find_entry_by_page_size<3>(address, ret_entry))
return true;
// 1k
if (find_entry_by_page_size<0>(address, ret_entry))
return true;
return false;
}
#if 0
static void dump_table()
{
static int iter = 1;
char filename[128];
sprintf(filename, "mmutable%03d", iter++);
FILE *f = fopen(filename, "wb");
if (f == NULL)
return;
fwrite(full_table, sizeof(full_table[0]), full_table_size, f);
fclose(f);
}
int main(int argc, char *argv[])
{
FILE *f = fopen(argv[1], "rb");
if (f == NULL)
{
perror(argv[1]);
return 1;
}
full_table_size = fread(full_table, sizeof(full_table[0]), ARRAY_SIZE(full_table), f);
fclose(f);
printf("Loaded %d entries\n", full_table_size);
std::vector<u32> addrs;
std::vector<u32> asids;
for (int i = 0; i < full_table_size; i++)
{
u32 sz = full_table[i].entry.Data.SZ1 * 2 + full_table[i].entry.Data.SZ0;
u32 mask = sz == 3 ? 1*1024*1024 : sz == 2 ? 64*1024 : sz == 1 ? 4*1024 : 1024;
mask--;
addrs.push_back(((full_table[i].entry.Address.VPN << 10) & mmu_mask[sz]) | (random() * mask / RAND_MAX));
asids.push_back(full_table[i].entry.Address.ASID);
// printf("%08x -> %08x sz %d ASID %d SH %d\n", full_table[i].entry.Address.VPN << 10, full_table[i].entry.Data.PPN << 10,
// full_table[i].entry.Data.SZ1 * 2 + full_table[i].entry.Data.SZ0,
// full_table[i].entry.Address.ASID, full_table[i].entry.Data.SH);
u16 bucket = bucket_index(full_table[i].entry.Address.VPN << 10, full_table[i].entry.Data.SZ1 * 2 + full_table[i].entry.Data.SZ0);
full_table[i].next_entry = entry_buckets[bucket];
entry_buckets[bucket] = &full_table[i];
}
for (int i = 0; i < full_table_size / 10; i++)
{
addrs.push_back(random());
asids.push_back(666);
}
double start = os_GetSeconds();
int success = 0;
const int loops = 100000;
for (int i = 0; i < loops; i++)
{
for (int j = 0; j < addrs.size(); j++)
{
u32 addr = addrs[j];
CCN_PTEH.ASID = asids[j];
const TLB_Entry *p;
if (find_entry(addr, &p))
success++;
}
}
double end = os_GetSeconds();
printf("Lookup time: %f ms. Success rate %f max_len %d\n", (end - start) * 1000.0 / addrs.size(), (double)success / addrs.size() / loops, 0/*max_length*/);
}
#endif
bool UTLB_Sync(u32 entry)
{
TLB_Entry& tlb_entry = UTLB[entry];
u32 sz = tlb_entry.Data.SZ1 * 2 + tlb_entry.Data.SZ0;
lru_entry = &tlb_entry;
lru_mask = mmu_mask[sz];
lru_address = (tlb_entry.Address.VPN << 10) & lru_mask;
tlb_entry.Address.VPN = lru_address >> 10;
cache_entry(tlb_entry);
if (!mmu_enabled() && (tlb_entry.Address.VPN & (0xFC000000 >> 10)) == (0xE0000000 >> 10))
{
// Used when FullMMU is off
u32 vpn_sq = ((tlb_entry.Address.VPN & 0x7FFFF) >> 10) & 0x3F;//upper bits are always known [0xE0/E1/E2/E3]
sq_remap[vpn_sq] = tlb_entry.Data.PPN << 10;
}
return true;
}
void ITLB_Sync(u32 entry)
{
}
//Do a full lookup on the UTLB entry's
template<bool internal>
u32 mmu_full_lookup(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv)
{
if (lru_entry != NULL)
{
if (/*lru_entry->Data.V == 1 && */
lru_address == (va & lru_mask)
&& (lru_entry->Address.ASID == CCN_PTEH.ASID
|| lru_entry->Data.SH == 1
/*|| (sr.MD == 1 && CCN_MMUCR.SV == 1)*/)) // SV=1 not handled
{
//VPN->PPN | low bits
// TODO mask off PPN when updating TLB to avoid doing it at look up time
rv = ((lru_entry->Data.PPN << 10) & lru_mask) | (va & (~lru_mask));
*tlb_entry_ret = lru_entry;
return MMU_ERROR_NONE;
}
}
if (find_entry(va, tlb_entry_ret))
{
u32 mask = mmu_mask[(*tlb_entry_ret)->Data.SZ1 * 2 + (*tlb_entry_ret)->Data.SZ0];
rv = (((*tlb_entry_ret)->Data.PPN << 10) & mask) | (va & (~mask));
lru_entry = *tlb_entry_ret;
lru_mask = mask;
lru_address = ((*tlb_entry_ret)->Address.VPN << 10);
return MMU_ERROR_NONE;
}
#ifdef USE_WINCE_HACK
// WinCE hack
TLB_Entry entry;
if (wince_resolve_address(va, entry))
{
CCN_PTEL.reg_data = entry.Data.reg_data;
CCN_PTEA.reg_data = entry.Assistance.reg_data;
CCN_PTEH.reg_data = entry.Address.reg_data;
UTLB[CCN_MMUCR.URC] = entry;
*tlb_entry_ret = &UTLB[CCN_MMUCR.URC];
lru_entry = *tlb_entry_ret;
u32 sz = lru_entry->Data.SZ1 * 2 + lru_entry->Data.SZ0;
lru_mask = mmu_mask[sz];
lru_address = va & lru_mask;
rv = ((lru_entry->Data.PPN << 10) & lru_mask) | (va & (~lru_mask));
cache_entry(*lru_entry);
return MMU_ERROR_NONE;
}
#endif
return MMU_ERROR_TLB_MISS;
}
template u32 mmu_full_lookup<false>(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv);
template<u32 translation_type, typename T>
u32 mmu_data_translation(u32 va, u32& rv)
{
if (va & (sizeof(T) - 1))
{
return MMU_ERROR_BADADDR;
}
if (translation_type == MMU_TT_DWRITE)
{
if ((va & 0xFC000000) == 0xE0000000)
{
u32 lookup = mmu_full_SQ<translation_type>(va, rv);
if (lookup != MMU_ERROR_NONE)
return lookup;
rv = va; //SQ writes are not translated, only write backs are.
return MMU_ERROR_NONE;
}
}
// if ((sr.MD == 0) && (va & 0x80000000) != 0)
// {
// //if on kernel, and not SQ addr -> error
// return MMU_ERROR_BADADDR;
// }
if (sr.MD == 1 && ((va & 0xFC000000) == 0x7C000000))
{
rv = va;
return MMU_ERROR_NONE;
}
// Not called if CCN_MMUCR.AT == 0
//if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0))
if (fast_reg_lut[va >> 29] != 0)
{
rv = va;
return MMU_ERROR_NONE;
}
const TLB_Entry *entry;
u32 lookup = mmu_full_lookup(va, &entry, rv);
// if (lookup != MMU_ERROR_NONE)
// return lookup;
#ifdef TRACE_WINCE_SYSCALLS
if (unresolved_unicode_string != 0 && lookup == MMU_ERROR_NONE)
{
if (va == unresolved_unicode_string)
{
unresolved_unicode_string = 0;
printf("RESOLVED %s\n", get_unicode_string(va).c_str());
}
}
#endif
// u32 md = entry->Data.PR >> 1;
//
// //0X & User mode-> protection violation
// //Priv mode protection
// if ((md == 0) && sr.MD == 0)
// {
// die("MMU_ERROR_PROTECTED");
// return MMU_ERROR_PROTECTED;
// }
//
// //X0 -> read olny
// //X1 -> read/write , can be FW
//
// //Write Protection (Lock or FW)
// if (translation_type == MMU_TT_DWRITE)
// {
// if ((entry->Data.PR & 1) == 0)
// {
// die("MMU_ERROR_PROTECTED");
// return MMU_ERROR_PROTECTED;
// }
// else if (entry->Data.D == 0)
// {
// die("MMU_ERROR_FIRSTWRITE");
// return MMU_ERROR_FIRSTWRITE;
// }
// }
return lookup;
}
template u32 mmu_data_translation<MMU_TT_DREAD, u8>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DREAD, u16>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DREAD, u32>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DREAD, u64>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DWRITE, u8>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DWRITE, u16>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DWRITE, u32>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DWRITE, u64>(u32 va, u32& rv);
u32 mmu_instruction_translation(u32 va, u32& rv, bool& shared)
{
if (va & 1)
{
return MMU_ERROR_BADADDR;
}
// if ((sr.MD == 0) && (va & 0x80000000) != 0)
// {
// //if SQ disabled , or if if SQ on but out of SQ mem then BAD ADDR ;)
// if (va >= 0xE0000000)
// return MMU_ERROR_BADADDR;
// }
if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0))
{
rv = va;
return MMU_ERROR_NONE;
}
// Hack fast implementation
const TLB_Entry *tlb_entry;
u32 lookup = mmu_full_lookup(va, &tlb_entry, rv);
if (lookup != MMU_ERROR_NONE)
return lookup;
u32 md = tlb_entry->Data.PR >> 1;
//0X & User mode-> protection violation
//Priv mode protection
// if ((md == 0) && sr.MD == 0)
// {
// return MMU_ERROR_PROTECTED;
// }
shared = tlb_entry->Data.SH == 1;
return MMU_ERROR_NONE;
}
void mmu_flush_table()
{
// printf("MMU tables flushed\n");
// ITLB[0].Data.V = 0;
// ITLB[1].Data.V = 0;
// ITLB[2].Data.V = 0;
// ITLB[3].Data.V = 0;
//
// for (u32 i = 0; i < 64; i++)
// UTLB[i].Data.V = 0;
lru_entry = NULL;
flush_cache();
}
#endif // FAST_MMU

View File

@ -101,7 +101,7 @@ WriteMem16Func WriteMem16;
WriteMem32Func WriteMem32; WriteMem32Func WriteMem32;
WriteMem64Func WriteMem64; WriteMem64Func WriteMem64;
const u32 mmu_mask[4] = extern const u32 mmu_mask[4] =
{ {
((0xFFFFFFFF) >> 10) << 10, //1 kb page ((0xFFFFFFFF) >> 10) << 10, //1 kb page
((0xFFFFFFFF) >> 12) << 12, //4 kb page ((0xFFFFFFFF) >> 12) << 12, //4 kb page
@ -109,7 +109,7 @@ const u32 mmu_mask[4] =
((0xFFFFFFFF) >> 20) << 20 //1 MB page ((0xFFFFFFFF) >> 20) << 20 //1 MB page
}; };
const u32 fast_reg_lut[8] = extern const u32 fast_reg_lut[8] =
{ {
0, 0, 0, 0 //P0-U0 0, 0, 0, 0 //P0-U0
, 1 //P1 , 1 //P1
@ -134,6 +134,7 @@ const u32 ITLB_LRU_AND[4] =
}; };
u32 ITLB_LRU_USE[64]; u32 ITLB_LRU_USE[64];
#ifndef FAST_MMU
//sync mem mapping to mmu , suspend compiled blocks if needed.entry is a UTLB entry # , -1 is for full sync //sync mem mapping to mmu , suspend compiled blocks if needed.entry is a UTLB entry # , -1 is for full sync
bool UTLB_Sync(u32 entry) bool UTLB_Sync(u32 entry)
{ {
@ -160,6 +161,7 @@ void ITLB_Sync(u32 entry)
{ {
printf_mmu("ITLB MEM remap %d : 0x%X to 0x%X : %d\n", entry, ITLB[entry].Address.VPN << 10, ITLB[entry].Data.PPN << 10, ITLB[entry].Data.V); printf_mmu("ITLB MEM remap %d : 0x%X to 0x%X : %d\n", entry, ITLB[entry].Address.VPN << 10, ITLB[entry].Data.PPN << 10, ITLB[entry].Data.V);
} }
#endif
void RaiseException(u32 expEvnt, u32 callVect) { void RaiseException(u32 expEvnt, u32 callVect) {
#if !defined(NO_MMU) #if !defined(NO_MMU)
@ -170,16 +172,12 @@ void RaiseException(u32 expEvnt, u32 callVect) {
#endif #endif
} }
u32 mmu_error_TT;
void mmu_raise_exception(u32 mmu_error, u32 address, u32 am) void mmu_raise_exception(u32 mmu_error, u32 address, u32 am)
{ {
printf_mmu("mmu_raise_exception -> pc = 0x%X : ", next_pc); printf_mmu("mmu_raise_exception -> pc = 0x%X : ", next_pc);
CCN_TEA = address; CCN_TEA = address;
CCN_PTEH.VPN = address >> 10; CCN_PTEH.VPN = address >> 10;
//save translation type error :)
mmu_error_TT = am;
switch (mmu_error) switch (mmu_error)
{ {
//No error //No error
@ -262,6 +260,94 @@ void mmu_raise_exception(u32 mmu_error, u32 address, u32 am)
die("Unknown mmu_error"); die("Unknown mmu_error");
} }
void DoMMUException(u32 address, u32 mmu_error, u32 access_type)
{
printf_mmu("DoMMUException -> pc = 0x%X : ", next_pc);
CCN_TEA = address;
CCN_PTEH.VPN = address >> 10;
switch (mmu_error)
{
//No error
case MMU_ERROR_NONE:
printf("Error : mmu_raise_exception(MMU_ERROR_NONE)\n");
break;
//TLB miss
case MMU_ERROR_TLB_MISS:
printf_mmu("MMU_ERROR_UTLB_MISS 0x%X, handled\n", address);
if (access_type == MMU_TT_DWRITE) //WTLBMISS - Write Data TLB Miss Exception
Do_Exception(next_pc, 0x60, 0x400);
else if (access_type == MMU_TT_DREAD) //RTLBMISS - Read Data TLB Miss Exception
Do_Exception(next_pc, 0x40, 0x400);
else //ITLBMISS - Instruction TLB Miss Exception
Do_Exception(next_pc, 0x40, 0x400);
return;
break;
//TLB Multihit
case MMU_ERROR_TLB_MHIT:
printf("MMU_ERROR_TLB_MHIT @ 0x%X\n", address);
break;
//Mem is read/write protected (depends on translation type)
case MMU_ERROR_PROTECTED:
printf_mmu("MMU_ERROR_PROTECTED 0x%X, handled\n", address);
if (access_type == MMU_TT_DWRITE) //WRITEPROT - Write Data TLB Protection Violation Exception
Do_Exception(next_pc, 0xC0, 0x100);
else if (access_type == MMU_TT_DREAD) //READPROT - Data TLB Protection Violation Exception
Do_Exception(next_pc, 0xA0, 0x100);
else
{
verify(false);
}
return;
break;
//Mem is write protected , firstwrite
case MMU_ERROR_FIRSTWRITE:
printf_mmu("MMU_ERROR_FIRSTWRITE\n");
verify(access_type == MMU_TT_DWRITE);
//FIRSTWRITE - Initial Page Write Exception
Do_Exception(next_pc, 0x80, 0x100);
return;
break;
//data read/write missasligned
case MMU_ERROR_BADADDR:
if (access_type == MMU_TT_DWRITE) //WADDERR - Write Data Address Error
Do_Exception(next_pc, 0x100, 0x100);
else if (access_type == MMU_TT_DREAD) //RADDERR - Read Data Address Error
Do_Exception(next_pc, 0xE0, 0x100);
else //IADDERR - Instruction Address Error
{
#ifdef TRACE_WINCE_SYSCALLS
if (!print_wince_syscall(address))
#endif
printf_mmu("MMU_ERROR_BADADDR(i) 0x%X\n", address);
Do_Exception(next_pc, 0xE0, 0x100);
return;
}
printf_mmu("MMU_ERROR_BADADDR(d) 0x%X, handled\n", address);
return;
break;
//Can't Execute
case MMU_ERROR_EXECPROT:
printf("MMU_ERROR_EXECPROT 0x%X\n", address);
//EXECPROT - Instruction TLB Protection Violation Exception
Do_Exception(next_pc, 0xA0, 0x100);
return;
break;
}
die("Unknown mmu_error");
}
bool mmu_match(u32 va, CCN_PTEH_type Address, CCN_PTEL_type Data) bool mmu_match(u32 va, CCN_PTEH_type Address, CCN_PTEL_type Data)
{ {
if (Data.V == 0) if (Data.V == 0)
@ -283,6 +369,7 @@ bool mmu_match(u32 va, CCN_PTEH_type Address, CCN_PTEL_type Data)
return false; return false;
} }
#ifndef FAST_MMU
//Do a full lookup on the UTLB entry's //Do a full lookup on the UTLB entry's
template<bool internal> template<bool internal>
u32 mmu_full_lookup(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv) u32 mmu_full_lookup(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv)
@ -328,6 +415,7 @@ u32 mmu_full_lookup(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv)
return MMU_ERROR_NONE; return MMU_ERROR_NONE;
} }
#endif
//Simple QACR translation for mmu (when AT is off) //Simple QACR translation for mmu (when AT is off)
u32 mmu_QACR_SQ(u32 va) u32 mmu_QACR_SQ(u32 va)
@ -342,6 +430,7 @@ u32 mmu_QACR_SQ(u32 va)
va &= ~0x1f; va &= ~0x1f;
return QACR + va; return QACR + va;
} }
template<u32 translation_type> template<u32 translation_type>
u32 mmu_full_SQ(u32 va, u32& rv) u32 mmu_full_SQ(u32 va, u32& rv)
{ {
@ -387,11 +476,14 @@ u32 mmu_full_SQ(u32 va, u32& rv)
} }
return MMU_ERROR_NONE; return MMU_ERROR_NONE;
} }
template u32 mmu_full_SQ<MMU_TT_DWRITE>(u32 va, u32& rv);
#ifndef FAST_MMU
template<u32 translation_type, typename T> template<u32 translation_type, typename T>
void mmu_data_translation(u32 va, u32& rv) u32 mmu_data_translation(u32 va, u32& rv)
{ {
if (va & (sizeof(T) - 1)) if (va & (sizeof(T) - 1))
mmu_raise_exception(MMU_ERROR_BADADDR, va, translation_type); return MMU_ERROR_BADADDR;
if (translation_type == MMU_TT_DWRITE) if (translation_type == MMU_TT_DWRITE)
{ {
@ -399,23 +491,23 @@ void mmu_data_translation(u32 va, u32& rv)
{ {
u32 lookup = mmu_full_SQ<translation_type>(va, rv); u32 lookup = mmu_full_SQ<translation_type>(va, rv);
if (lookup != MMU_ERROR_NONE) if (lookup != MMU_ERROR_NONE)
mmu_raise_exception(lookup, va, translation_type); return lookup;
rv = va; //SQ writes are not translated, only write backs are. rv = va; //SQ writes are not translated, only write backs are.
return; return MMU_ERROR_NONE;
} }
} }
if ((sr.MD == 0) && (va & 0x80000000) != 0) if ((sr.MD == 0) && (va & 0x80000000) != 0)
{ {
//if on kernel, and not SQ addr -> error //if on kernel, and not SQ addr -> error
mmu_raise_exception(MMU_ERROR_BADADDR, va, translation_type); return MMU_ERROR_BADADDR;
} }
if (sr.MD == 1 && ((va & 0xFC000000) == 0x7C000000)) if (sr.MD == 1 && ((va & 0xFC000000) == 0x7C000000))
{ {
rv = va; rv = va;
return; return MMU_ERROR_NONE;
} }
// Not called if CCN_MMUCR.AT == 0 // Not called if CCN_MMUCR.AT == 0
@ -423,14 +515,14 @@ void mmu_data_translation(u32 va, u32& rv)
if (fast_reg_lut[va >> 29] != 0) if (fast_reg_lut[va >> 29] != 0)
{ {
rv = va; rv = va;
return; return MMU_ERROR_NONE;
} }
const TLB_Entry *entry; const TLB_Entry *entry;
u32 lookup = mmu_full_lookup(va, &entry, rv); u32 lookup = mmu_full_lookup(va, &entry, rv);
if (lookup != MMU_ERROR_NONE) if (lookup != MMU_ERROR_NONE)
mmu_raise_exception(lookup, va, translation_type); return lookup;
#ifdef TRACE_WINCE_SYSCALLS #ifdef TRACE_WINCE_SYSCALLS
if (unresolved_unicode_string != 0) if (unresolved_unicode_string != 0)
@ -449,7 +541,7 @@ void mmu_data_translation(u32 va, u32& rv)
//Priv mode protection //Priv mode protection
if ((md == 0) && sr.MD == 0) if ((md == 0) && sr.MD == 0)
{ {
mmu_raise_exception(MMU_ERROR_PROTECTED, va, translation_type); return MMU_ERROR_PROTECTED;
} }
//X0 -> read olny //X0 -> read olny
@ -459,32 +551,34 @@ void mmu_data_translation(u32 va, u32& rv)
if (translation_type == MMU_TT_DWRITE) if (translation_type == MMU_TT_DWRITE)
{ {
if ((entry->Data.PR & 1) == 0) if ((entry->Data.PR & 1) == 0)
mmu_raise_exception(MMU_ERROR_PROTECTED, va, translation_type); return MMU_ERROR_PROTECTED;
else if (entry->Data.D == 0) else if (entry->Data.D == 0)
mmu_raise_exception(MMU_ERROR_FIRSTWRITE, va, translation_type); return MMU_ERROR_FIRSTWRITE;
} }
return MMU_ERROR_NONE;
} }
template void mmu_data_translation<MMU_TT_DREAD, u16>(u32 va, u32& rv); template u32 mmu_data_translation<MMU_TT_DREAD, u16>(u32 va, u32& rv);
template void mmu_data_translation<MMU_TT_DREAD, u32>(u32 va, u32& rv); template u32 mmu_data_translation<MMU_TT_DREAD, u32>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DWRITE, u64>(u32 va, u32& rv);
void mmu_instruction_translation(u32 va, u32& rv, bool& shared) u32 mmu_instruction_translation(u32 va, u32& rv, bool& shared)
{ {
if (va & 1) if (va & 1)
{ {
mmu_raise_exception(MMU_ERROR_BADADDR, va, MMU_TT_IREAD); return MMU_ERROR_BADADDR;
} }
if ((sr.MD == 0) && (va & 0x80000000) != 0) if ((sr.MD == 0) && (va & 0x80000000) != 0)
{ {
//if SQ disabled , or if if SQ on but out of SQ mem then BAD ADDR ;) //if SQ disabled , or if if SQ on but out of SQ mem then BAD ADDR ;)
if (va >= 0xE0000000) if (va >= 0xE0000000)
mmu_raise_exception(MMU_ERROR_BADADDR, va, MMU_TT_IREAD); return MMU_ERROR_BADADDR;
} }
if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0)) if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0))
{ {
rv = va; rv = va;
return; return MMU_ERROR_NONE;
} }
bool mmach = false; bool mmach = false;
@ -521,7 +615,7 @@ retry_ITLB_Match:
u32 lookup = mmu_full_lookup(va, &tlb_entry, rv); u32 lookup = mmu_full_lookup(va, &tlb_entry, rv);
if (lookup != MMU_ERROR_NONE) if (lookup != MMU_ERROR_NONE)
mmu_raise_exception(lookup, va, MMU_TT_IREAD); return lookup;
u32 replace_index = ITLB_LRU_USE[CCN_MMUCR.LRUI]; u32 replace_index = ITLB_LRU_USE[CCN_MMUCR.LRUI];
verify(replace_index != 0xFFFFFFFF); verify(replace_index != 0xFFFFFFFF);
@ -535,11 +629,11 @@ retry_ITLB_Match:
{ {
if (nom) if (nom)
{ {
mmu_raise_exception(MMU_ERROR_TLB_MHIT, va, MMU_TT_IREAD); return MMU_ERROR_TLB_MHIT;
} }
else else
{ {
mmu_raise_exception(MMU_ERROR_TLB_MISS, va, MMU_TT_IREAD); return MMU_ERROR_TLB_MISS;
} }
} }
@ -552,25 +646,27 @@ retry_ITLB_Match:
//Priv mode protection //Priv mode protection
if ((md == 0) && sr.MD == 0) if ((md == 0) && sr.MD == 0)
{ {
mmu_raise_exception(MMU_ERROR_PROTECTED, va, MMU_TT_IREAD); return MMU_ERROR_PROTECTED;
} }
return MMU_ERROR_NONE;
} }
#endif
void mmu_set_state() void mmu_set_state()
{ {
if (CCN_MMUCR.AT == 1 && settings.dreamcast.FullMMU) if (CCN_MMUCR.AT == 1 && settings.dreamcast.FullMMU)
{ {
printf("Enabling Full MMU support\n"); printf("Enabling Full MMU support\n");
ReadMem8 = &mmu_ReadMem8;
ReadMem16 = &mmu_ReadMem16;
IReadMem16 = &mmu_IReadMem16; IReadMem16 = &mmu_IReadMem16;
ReadMem32 = &mmu_ReadMem32; ReadMem8 = &mmu_ReadMem<u8>;
ReadMem64 = &mmu_ReadMem64; ReadMem16 = &mmu_ReadMem<u16>;
ReadMem32 = &mmu_ReadMem<u32>;
ReadMem64 = &mmu_ReadMem<u64>;
WriteMem8 = &mmu_WriteMem8; WriteMem8 = &mmu_WriteMem<u8>;
WriteMem16 = &mmu_WriteMem16; WriteMem16 = &mmu_WriteMem<u16>;
WriteMem32 = &mmu_WriteMem32; WriteMem32 = &mmu_WriteMem<u32>;
WriteMem64 = &mmu_WriteMem64; WriteMem64 = &mmu_WriteMem<u64>;
mmu_flush_table(); mmu_flush_table();
} }
else else
@ -619,6 +715,7 @@ void MMU_term()
{ {
} }
#ifndef FAST_MMU
void mmu_flush_table() void mmu_flush_table()
{ {
//printf("MMU tables flushed\n"); //printf("MMU tables flushed\n");
@ -631,66 +728,95 @@ void mmu_flush_table()
for (u32 i = 0; i < 64; i++) for (u32 i = 0; i < 64; i++)
UTLB[i].Data.V = 0; UTLB[i].Data.V = 0;
} }
#endif
u8 DYNACALL mmu_ReadMem8(u32 adr) template<typename T>
T DYNACALL mmu_ReadMem(u32 adr)
{ {
u32 addr; u32 addr;
mmu_data_translation<MMU_TT_DREAD, u8>(adr, addr); u32 rv = mmu_data_translation<MMU_TT_DREAD, T>(adr, addr);
return _vmem_ReadMem8(addr); if (rv != MMU_ERROR_NONE)
mmu_raise_exception(rv, adr, MMU_TT_DREAD);
return _vmem_readt<T, T>(addr);
} }
u16 DYNACALL mmu_ReadMem16(u32 adr)
{
u32 addr;
mmu_data_translation<MMU_TT_DREAD, u16>(adr, addr);
return _vmem_ReadMem16(addr);
}
u16 DYNACALL mmu_IReadMem16(u32 vaddr) u16 DYNACALL mmu_IReadMem16(u32 vaddr)
{ {
u32 addr; u32 addr;
bool shared; bool shared;
mmu_instruction_translation(vaddr, addr, shared); u32 rv = mmu_instruction_translation(vaddr, addr, shared);
if (rv != MMU_ERROR_NONE)
mmu_raise_exception(rv, vaddr, MMU_TT_IREAD);
return _vmem_ReadMem16(addr); return _vmem_ReadMem16(addr);
} }
u32 DYNACALL mmu_ReadMem32(u32 adr) template<typename T>
void DYNACALL mmu_WriteMem(u32 adr, T data)
{ {
u32 addr; u32 addr;
mmu_data_translation<MMU_TT_DREAD, u32>(adr, addr); u32 rv = mmu_data_translation<MMU_TT_DWRITE, T>(adr, addr);
return _vmem_ReadMem32(addr); if (rv != MMU_ERROR_NONE)
} mmu_raise_exception(rv, adr, MMU_TT_DWRITE);
u64 DYNACALL mmu_ReadMem64(u32 adr) _vmem_writet<T>(addr, data);
{
u32 addr;
mmu_data_translation<MMU_TT_DREAD, u64>(adr, addr);
return _vmem_ReadMem64(addr);
} }
void DYNACALL mmu_WriteMem8(u32 adr, u8 data) template<typename T>
T DYNACALL mmu_ReadMemNoEx(u32 adr, u32 *exception_occurred)
{ {
u32 addr; u32 addr;
mmu_data_translation<MMU_TT_DWRITE, u8>(adr, addr); u32 rv = mmu_data_translation<MMU_TT_DREAD, T>(adr, addr);
_vmem_WriteMem8(addr, data); if (rv != MMU_ERROR_NONE)
{
DoMMUException(adr, rv, MMU_TT_DREAD);
*exception_occurred = 1;
return 0;
}
else
{
*exception_occurred = 0;
return _vmem_readt<T, T>(addr);
}
}
template u8 mmu_ReadMemNoEx<u8>(u32 adr, u32 *exception_occurred);
template u16 mmu_ReadMemNoEx<u16>(u32 adr, u32 *exception_occurred);
template u32 mmu_ReadMemNoEx<u32>(u32 adr, u32 *exception_occurred);
template u64 mmu_ReadMemNoEx<u64>(u32 adr, u32 *exception_occurred);
u16 DYNACALL mmu_IReadMem16NoEx(u32 vaddr, u32 *exception_occurred)
{
u32 addr;
bool shared;
u32 rv = mmu_instruction_translation(vaddr, addr, shared);
if (rv != MMU_ERROR_NONE)
{
DoMMUException(vaddr, rv, MMU_TT_IREAD);
*exception_occurred = 1;
return 0;
}
else
{
*exception_occurred = 0;
return _vmem_ReadMem16(addr);
}
} }
void DYNACALL mmu_WriteMem16(u32 adr, u16 data) template<typename T>
u32 DYNACALL mmu_WriteMemNoEx(u32 adr, T data)
{ {
u32 addr; u32 addr;
mmu_data_translation<MMU_TT_DWRITE, u16>(adr, addr); u32 rv = mmu_data_translation<MMU_TT_DWRITE, T>(adr, addr);
_vmem_WriteMem16(addr, data); if (rv != MMU_ERROR_NONE)
} {
void DYNACALL mmu_WriteMem32(u32 adr, u32 data) DoMMUException(adr, rv, MMU_TT_DWRITE);
{ return 1;
u32 addr; }
mmu_data_translation<MMU_TT_DWRITE, u32>(adr, addr); _vmem_writet<T>(addr, data);
_vmem_WriteMem32(addr, data); return 0;
}
void DYNACALL mmu_WriteMem64(u32 adr, u64 data)
{
u32 addr;
mmu_data_translation<MMU_TT_DWRITE, u64>(adr, addr);
_vmem_WriteMem64(addr, data);
} }
template u32 mmu_WriteMemNoEx<u8>(u32 adr, u8 data);
template u32 mmu_WriteMemNoEx<u16>(u32 adr, u16 data);
template u32 mmu_WriteMemNoEx<u32>(u32 adr, u32 data);
template u32 mmu_WriteMemNoEx<u64>(u32 adr, u64 data);
bool mmu_TranslateSQW(u32 adr, u32* out) bool mmu_TranslateSQW(u32 adr, u32* out)
{ {

View File

@ -10,6 +10,22 @@
//Data write //Data write
#define MMU_TT_DREAD 2 #define MMU_TT_DREAD 2
//Return Values
//Translation was successful
#define MMU_ERROR_NONE 0
//TLB miss
#define MMU_ERROR_TLB_MISS 1
//TLB Multihit
#define MMU_ERROR_TLB_MHIT 2
//Mem is read/write protected (depends on translation type)
#define MMU_ERROR_PROTECTED 3
//Mem is write protected , firstwrite
#define MMU_ERROR_FIRSTWRITE 4
//data-Opcode read/write missasligned
#define MMU_ERROR_BADADDR 5
//Can't Execute
#define MMU_ERROR_EXECPROT 6
struct TLB_Entry struct TLB_Entry
{ {
CCN_PTEH_type Address; CCN_PTEH_type Address;
@ -40,9 +56,10 @@ static INLINE bool mmu_enabled()
template<bool internal = false> template<bool internal = false>
u32 mmu_full_lookup(u32 va, const TLB_Entry **entry, u32& rv); u32 mmu_full_lookup(u32 va, const TLB_Entry **entry, u32& rv);
void mmu_instruction_translation(u32 va, u32& rv, bool& shared); u32 mmu_instruction_translation(u32 va, u32& rv, bool& shared);
template<u32 translation_type, typename T> template<u32 translation_type, typename T>
extern void mmu_data_translation(u32 va, u32& rv); extern u32 mmu_data_translation(u32 va, u32& rv);
void DoMMUException(u32 addr, u32 error_code, u32 access_type);
#if defined(NO_MMU) #if defined(NO_MMU)
bool inline mmu_TranslateSQW(u32 addr, u32* mapped) { bool inline mmu_TranslateSQW(u32 addr, u32* mapped) {
@ -51,16 +68,14 @@ extern void mmu_data_translation(u32 va, u32& rv);
} }
void inline mmu_flush_table() {} void inline mmu_flush_table() {}
#else #else
u8 DYNACALL mmu_ReadMem8(u32 addr); template<typename T> T DYNACALL mmu_ReadMem(u32 adr);
u16 DYNACALL mmu_ReadMem16(u32 addr);
u16 DYNACALL mmu_IReadMem16(u32 addr); u16 DYNACALL mmu_IReadMem16(u32 addr);
u32 DYNACALL mmu_ReadMem32(u32 addr);
u64 DYNACALL mmu_ReadMem64(u32 addr);
void DYNACALL mmu_WriteMem8(u32 addr, u8 data); template<typename T> void DYNACALL mmu_WriteMem(u32 adr, T data);
void DYNACALL mmu_WriteMem16(u32 addr, u16 data);
void DYNACALL mmu_WriteMem32(u32 addr, u32 data);
void DYNACALL mmu_WriteMem64(u32 addr, u64 data);
bool mmu_TranslateSQW(u32 addr, u32* mapped); bool mmu_TranslateSQW(u32 addr, u32* mapped);
u16 DYNACALL mmu_IReadMem16NoEx(u32 adr, u32 *exception_occurred);
template<typename T> T DYNACALL mmu_ReadMemNoEx(u32 adr, u32 *exception_occurred);
template<typename T> u32 DYNACALL mmu_WriteMemNoEx(u32 adr, T data);
#endif #endif

View File

@ -3,26 +3,8 @@
#include "ccn.h" #include "ccn.h"
#include "mmu.h" #include "mmu.h"
//Do a full lookup on the UTLB entry's
//Return Values
//Translation was sucessfull , rv contains return
#define MMU_ERROR_NONE 0
//TLB miss
#define MMU_ERROR_TLB_MISS 1
//TLB Multihit
#define MMU_ERROR_TLB_MHIT 2
//Mem is read/write protected (depends on translation type)
#define MMU_ERROR_PROTECTED 3
//Mem is write protected , firstwrite
#define MMU_ERROR_FIRSTWRITE 4
//data-Opcode read/write missasligned
#define MMU_ERROR_BADADDR 5
//Can't Execute
#define MMU_ERROR_EXECPROT 6
extern u32 mmu_error_TT;
void MMU_Init(); void MMU_Init();
void MMU_Reset(bool Manual); void MMU_Reset(bool Manual);
void MMU_Term(); void MMU_Term();
template<u32 translation_type> u32 mmu_full_SQ(u32 va, u32& rv);

View File

@ -22,15 +22,15 @@ extern VArray2 mem_b;
//#define WriteMem64(addr,reg) { _vmem_WriteMem32(addr,((u32*)reg)[0]);_vmem_WriteMem32((addr)+4, ((u32*)reg)[1]); } //#define WriteMem64(addr,reg) { _vmem_WriteMem32(addr,((u32*)reg)[0]);_vmem_WriteMem32((addr)+4, ((u32*)reg)[1]); }
#else #else
typedef u8 (*ReadMem8Func)(u32 addr); typedef u8 DYNACALL (*ReadMem8Func)(u32 addr);
typedef u16 (*ReadMem16Func)(u32 addr); typedef u16 DYNACALL (*ReadMem16Func)(u32 addr);
typedef u32 (*ReadMem32Func)(u32 addr); typedef u32 DYNACALL (*ReadMem32Func)(u32 addr);
typedef u64 (*ReadMem64Func)(u32 addr); typedef u64 DYNACALL (*ReadMem64Func)(u32 addr);
typedef void (*WriteMem8Func)(u32 addr, u8 data); typedef void DYNACALL (*WriteMem8Func)(u32 addr, u8 data);
typedef void (*WriteMem16Func)(u32 addr, u16 data); typedef void DYNACALL (*WriteMem16Func)(u32 addr, u16 data);
typedef void (*WriteMem32Func)(u32 addr, u32 data); typedef void DYNACALL (*WriteMem32Func)(u32 addr, u32 data);
typedef void (*WriteMem64Func)(u32 addr, u64 data); typedef void DYNACALL (*WriteMem64Func)(u32 addr, u64 data);
extern ReadMem8Func ReadMem8; extern ReadMem8Func ReadMem8;
extern ReadMem16Func ReadMem16; extern ReadMem16Func ReadMem16;

View File

@ -34,6 +34,7 @@ static bool rtt_to_buffer_game;
static bool safemode_game; static bool safemode_game;
static bool tr_poly_depth_mask_game; static bool tr_poly_depth_mask_game;
static bool extra_depth_game; static bool extra_depth_game;
static bool full_mmu_game;
cThread emu_thread(&dc_run, NULL); cThread emu_thread(&dc_run, NULL);
@ -137,12 +138,15 @@ void LoadSpecialSettings()
safemode_game = false; safemode_game = false;
tr_poly_depth_mask_game = false; tr_poly_depth_mask_game = false;
extra_depth_game = false; extra_depth_game = false;
full_mmu_game = false;
if (reios_windows_ce) if (reios_windows_ce)
{ {
printf("Enabling Extra depth scaling for Windows CE games\n"); printf("Enabling Full MMU and Extra depth scaling for Windows CE game\n");
settings.rend.ExtraDepthScale = 0.1; settings.rend.ExtraDepthScale = 0.1;
extra_depth_game = true; extra_depth_game = true;
settings.dreamcast.FullMMU = true;
full_mmu_game = true;
} }
// Tony Hawk's Pro Skater 2 // Tony Hawk's Pro Skater 2
@ -660,7 +664,8 @@ void SaveSettings()
cfgSaveInt("config", "Dreamcast.Cable", settings.dreamcast.cable); cfgSaveInt("config", "Dreamcast.Cable", settings.dreamcast.cable);
cfgSaveInt("config", "Dreamcast.Region", settings.dreamcast.region); cfgSaveInt("config", "Dreamcast.Region", settings.dreamcast.region);
cfgSaveInt("config", "Dreamcast.Broadcast", settings.dreamcast.broadcast); cfgSaveInt("config", "Dreamcast.Broadcast", settings.dreamcast.broadcast);
cfgSaveBool("config", "Dreamcast.FullMMU", settings.dreamcast.FullMMU); if (!full_mmu_game || !settings.dreamcast.FullMMU)
cfgSaveBool("config", "Dreamcast.FullMMU", settings.dreamcast.FullMMU);
cfgSaveBool("config", "Dynarec.idleskip", settings.dynarec.idleskip); cfgSaveBool("config", "Dynarec.idleskip", settings.dynarec.idleskip);
cfgSaveBool("config", "Dynarec.unstable-opt", settings.dynarec.unstable_opt); cfgSaveBool("config", "Dynarec.unstable-opt", settings.dynarec.unstable_opt);
if (!safemode_game || !settings.dynarec.safemode) if (!safemode_game || !settings.dynarec.safemode)

View File

@ -24,6 +24,7 @@
#include <unistd.h> #include <unistd.h>
#include <sys/mman.h> #include <sys/mman.h>
#include <map> #include <map>
#include <setjmp.h>
#include "deps/vixl/aarch64/macro-assembler-aarch64.h" #include "deps/vixl/aarch64/macro-assembler-aarch64.h"
using namespace vixl::aarch64; using namespace vixl::aarch64;
@ -104,6 +105,8 @@ void Arm64CacheFlush(void* start, void* end)
double host_cpu_time; double host_cpu_time;
u64 guest_cpu_cycles; u64 guest_cpu_cycles;
static jmp_buf jmp_env;
static u32 cycle_counter;
#ifdef PROFILING #ifdef PROFILING
#include <time.h> #include <time.h>
@ -153,8 +156,8 @@ __asm__
".hidden ngen_FailedToFindBlock_ \n\t" ".hidden ngen_FailedToFindBlock_ \n\t"
".globl ngen_FailedToFindBlock_ \n\t" ".globl ngen_FailedToFindBlock_ \n\t"
"ngen_FailedToFindBlock_: \n\t" "ngen_FailedToFindBlock_: \n\t"
"mov w0, w29 \n\t" // "mov w0, w29 \n\t" // FIXME w29 might not be up to date anymore (exception in bm_GetCodeByVAddr)
"bl rdv_FailedToFindBlock \n\t" "bl rdv_FailedToFindBlock_pc \n\t"
"br x0 \n" "br x0 \n"
".hidden ngen_blockcheckfail \n\t" ".hidden ngen_blockcheckfail \n\t"
@ -180,10 +183,16 @@ void ngen_mainloop(void* v_cntx)
"stp s10, s11, [sp, #112] \n\t" "stp s10, s11, [sp, #112] \n\t"
"stp s12, s13, [sp, #128] \n\t" "stp s12, s13, [sp, #128] \n\t"
"stp x29, x30, [sp, #144] \n\t" "stp x29, x30, [sp, #144] \n\t"
// Use x28 as sh4 context pointer
"mov x28, %[cntx] \n\t" "stp %[cntx], %[cycle_counter], [sp, #-16]! \n\t" // Push context, cycle_counter address
// Use x27 as cycle_counter
"mov w27, %[_SH4_TIMESLICE] \n\t" "mov w27, %[_SH4_TIMESLICE] \n\t"
"str w27, [%[cycle_counter]] \n\t"
"mov x0, %[jmp_env] \n\t" // SETJMP
"bl setjmp \n\t"
// Use x28 as sh4 context pointer
"ldr x28, [sp] \n\t" // Set context
// w29 is next_pc // w29 is next_pc
"ldr w29, [x28, %[pc]] \n\t" "ldr w29, [x28, %[pc]] \n\t"
"b no_update \n" "b no_update \n"
@ -191,8 +200,11 @@ void ngen_mainloop(void* v_cntx)
".hidden intc_sched \n\t" ".hidden intc_sched \n\t"
".globl intc_sched \n\t" ".globl intc_sched \n\t"
"intc_sched: \n\t" "intc_sched: \n\t"
"add w27, w27, %[_SH4_TIMESLICE] \n\t" "ldr x27, [sp, #8] \n\t" // &cycle_counter
"mov x29, lr \n\r" // Trashing pc here but it will be reset at the end of the block or in DoInterrupts "ldr w0, [x27] \n\t" // cycle_counter
"add w0, w0, %[_SH4_TIMESLICE] \n\t"
"str w0, [x27] \n\t"
"mov x29, lr \n\t" // Trashing pc here but it will be reset at the end of the block or in DoInterrupts
"bl UpdateSystem \n\t" "bl UpdateSystem \n\t"
"mov lr, x29 \n\t" "mov lr, x29 \n\t"
"cbnz w0, .do_interrupts \n\t" "cbnz w0, .do_interrupts \n\t"
@ -208,7 +220,9 @@ void ngen_mainloop(void* v_cntx)
"no_update: \n\t" // next_pc _MUST_ be on w29 "no_update: \n\t" // next_pc _MUST_ be on w29
"ldr w0, [x28, %[CpuRunning]] \n\t" "ldr w0, [x28, %[CpuRunning]] \n\t"
"cbz w0, .end_mainloop \n\t" "cbz w0, .end_mainloop \n\t"
"ldr w29, [x28, %[pc]] \n\t" // shouldn't be necessary
#ifdef NO_MMU
"movz x2, %[RCB_SIZE], lsl #16 \n\t" "movz x2, %[RCB_SIZE], lsl #16 \n\t"
"sub x2, x28, x2 \n\t" "sub x2, x28, x2 \n\t"
"add x2, x2, %[SH4CTX_SIZE] \n\t" "add x2, x2, %[SH4CTX_SIZE] \n\t"
@ -221,8 +235,14 @@ void ngen_mainloop(void* v_cntx)
#endif #endif
"ldr x0, [x2, x1, lsl #3] \n\t" "ldr x0, [x2, x1, lsl #3] \n\t"
"br x0 \n" "br x0 \n"
#else
"mov w0, w29 \n\t"
"bl bm_GetCodeByVAddr \n\t"
"br x0 \n"
#endif
".end_mainloop: \n\t" ".end_mainloop: \n\t"
"add sp, sp, #16 \n\t" // Pop context
"ldp x29, x30, [sp, #144] \n\t" "ldp x29, x30, [sp, #144] \n\t"
"ldp s12, s13, [sp, #128] \n\t" "ldp s12, s13, [sp, #128] \n\t"
"ldp s10, s11, [sp, #112] \n\t" "ldp s10, s11, [sp, #112] \n\t"
@ -239,7 +259,9 @@ void ngen_mainloop(void* v_cntx)
[_SH4_TIMESLICE] "i"(SH4_TIMESLICE), [_SH4_TIMESLICE] "i"(SH4_TIMESLICE),
[CpuRunning] "i"(offsetof(Sh4Context, CpuRunning)), [CpuRunning] "i"(offsetof(Sh4Context, CpuRunning)),
[RCB_SIZE] "i" (sizeof(Sh4RCB) >> 16), [RCB_SIZE] "i" (sizeof(Sh4RCB) >> 16),
[SH4CTX_SIZE] "i" (sizeof(Sh4Context)) [SH4CTX_SIZE] "i" (sizeof(Sh4Context)),
[jmp_env] "r"(reinterpret_cast<uintptr_t>(jmp_env)),
[cycle_counter] "r"(reinterpret_cast<uintptr_t>(&cycle_counter))
: "memory" : "memory"
); );
} }
@ -265,6 +287,75 @@ RuntimeBlockInfo* ngen_AllocateBlock()
return new DynaRBI(); return new DynaRBI();
} }
template<typename T>
static T ReadMemNoEx(u32 addr, u32 pc)
{
u32 ex;
T rv = mmu_ReadMemNoEx<T>(addr, &ex);
if (ex)
{
if (pc & 1)
spc = pc - 1;
else
spc = pc;
longjmp(jmp_env, 1);
}
return rv;
}
template<typename T>
static void WriteMemNoEx(u32 addr, T data, u32 pc)
{
u32 ex = mmu_WriteMemNoEx<T>(addr, data);
if (ex)
{
if (pc & 1)
spc = pc - 1;
else
spc = pc;
longjmp(jmp_env, 1);
}
}
static u32 interpreter_fallback(u16 op, u32 pc)
{
try {
OpDesc[op]->oph(op);
return 0;
} catch (SH4ThrownException& ex) {
die("IFB exception");
if (pc & 1)
{
// Delay slot
AdjustDelaySlotException(ex);
pc--;
}
Do_Exception(pc, ex.expEvn, ex.callVect);
return 1;
}
}
static u32 exception_raised;
static void do_sqw_mmu_no_ex(u32 addr, u32 pc)
{
try {
do_sqw_mmu(addr);
exception_raised = 0;
} catch (SH4ThrownException& ex) {
die("do_sqw_mmu exception");
if (pc & 1)
{
// Delay slot
AdjustDelaySlotException(ex);
pc--;
}
Do_Exception(pc, ex.expEvn, ex.callVect);
exception_raised = 1;
printf("SQW MMU EXCEPTION\n");
}
}
class Arm64Assembler : public MacroAssembler class Arm64Assembler : public MacroAssembler
{ {
typedef void (MacroAssembler::*Arm64Op_RRO)(const Register&, const Register&, const Operand&); typedef void (MacroAssembler::*Arm64Op_RRO)(const Register&, const Register&, const Operand&);
@ -327,22 +418,47 @@ public:
if (op.rs3.is_imm()) if (op.rs3.is_imm())
{ {
Add(*ret_reg, regalloc.MapRegister(op.rs1), op.rs3._imm); if (regalloc.IsAllocg(op.rs1))
Add(*ret_reg, regalloc.MapRegister(op.rs1), op.rs3._imm);
else
{
Ldr(*ret_reg, sh4_context_mem_operand(op.rs1.reg_ptr()));
Add(*ret_reg, *ret_reg, op.rs3._imm);
}
} }
else if (op.rs3.is_r32i()) else if (op.rs3.is_r32i())
{ {
Add(*ret_reg, regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs3)); if (regalloc.IsAllocg(op.rs1) && regalloc.IsAllocg(op.rs3))
Add(*ret_reg, regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs3));
else
{
Ldr(*ret_reg, sh4_context_mem_operand(op.rs1.reg_ptr()));
Ldr(w8, sh4_context_mem_operand(op.rs3.reg_ptr()));
Add(*ret_reg, *ret_reg, w8);
}
} }
else if (!op.rs3.is_null()) else if (!op.rs3.is_null())
{ {
die("invalid rs3"); die("invalid rs3");
} }
else if (op.rs1.is_reg())
{
if (regalloc.IsAllocg(op.rs1))
{
if (raddr == NULL)
ret_reg = &regalloc.MapRegister(op.rs1);
else
Mov(*ret_reg, regalloc.MapRegister(op.rs1));
}
else
{
Ldr(*ret_reg, sh4_context_mem_operand(op.rs1.reg_ptr()));
}
}
else else
{ {
if (raddr == NULL) verify(op.rs1.is_imm());
ret_reg = &regalloc.MapRegister(op.rs1); Mov(*ret_reg, op.rs1._imm);
else
Mov(*ret_reg, regalloc.MapRegister(op.rs1));
} }
return *ret_reg; return *ret_reg;
@ -362,7 +478,10 @@ public:
regalloc.DoAlloc(block); regalloc.DoAlloc(block);
// scheduler // scheduler
Subs(w27, w27, block->guest_cycles); Mov(x27, reinterpret_cast<uintptr_t>(&cycle_counter));
Ldr(w0, MemOperand(x27));
Subs(w0, w0, block->guest_cycles);
Str(w0, MemOperand(x27));
Label cycles_remaining; Label cycles_remaining;
B(&cycles_remaining, pl); B(&cycles_remaining, pl);
GenCallRuntime(intc_sched); GenCallRuntime(intc_sched);
@ -389,7 +508,21 @@ public:
} }
Mov(*call_regs[0], op.rs3._imm); Mov(*call_regs[0], op.rs3._imm);
GenCallRuntime(OpDesc[op.rs3._imm]->oph); if (!mmu_enabled())
{
GenCallRuntime(OpDesc[op.rs3._imm]->oph);
}
else
{
Mov(*call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
GenCallRuntime(interpreter_fallback);
Cmp(w0, 0);
Ldr(w29, sh4_context_mem_operand(&next_pc));
GenBranch(no_update, ne);
}
break; break;
case shop_jcond: case shop_jcond:
@ -532,20 +665,33 @@ public:
case shop_shld: case shop_shld:
case shop_shad: case shop_shad:
// TODO optimize {
Cmp(regalloc.MapRegister(op.rs2), 0); Label positive_shift, negative_shift, end;
Csel(w1, regalloc.MapRegister(op.rs2), wzr, ge); // if shift >= 0 then w1 = shift else w1 = 0 Tbz(regalloc.MapRegister(op.rs2), 31, &positive_shift);
Mov(w0, wzr); // wzr not supported by csneg Cmn(regalloc.MapRegister(op.rs2), 32);
Csneg(w2, w0, regalloc.MapRegister(op.rs2), ge); // if shift < 0 then w2 = -shift else w2 = 0 B(&negative_shift, ne);
Cmp(w2, 32); if (op.op == shop_shld)
Csel(w2, 31, w2, eq); // if shift == -32 then w2 = 31 // Logical shift
Lsl(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), w1); // Left shift by w1 Lsr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), 31);
if (op.op == shop_shld) // Right shift by w2 else
// Logical shift // Arithmetic shift
Lsr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w2); Asr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), 31);
else B(&end);
// Arithmetic shift
Asr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w2); Bind(&positive_shift);
Lsl(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2));
B(&end);
Bind(&negative_shift);
Neg(w1, regalloc.MapRegister(op.rs2));
if (op.op == shop_shld)
// Logical shift
Lsr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w1);
else
// Arithmetic shift
Asr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), w1);
Bind(&end);
}
break; break;
case shop_test: case shop_test:
@ -617,31 +763,45 @@ public:
break; break;
case shop_pref: case shop_pref:
Mov(w0, regalloc.MapRegister(op.rs1));
if (op.flags != 0x1337)
{ {
Lsr(w1, regalloc.MapRegister(op.rs1), 26); if (regalloc.IsAllocg(op.rs1))
Lsr(w1, regalloc.MapRegister(op.rs1), 26);
else
{
Ldr(w0, sh4_context_mem_operand(op.rs1.reg_ptr()));
Lsr(w1, w0, 26);
}
Cmp(w1, 0x38); Cmp(w1, 0x38);
} Label not_sqw;
B(&not_sqw, ne);
if (regalloc.IsAllocg(op.rs1))
Mov(w0, regalloc.MapRegister(op.rs1));
if (CCN_MMUCR.AT) if (mmu_enabled())
{ {
Ldr(x9, reinterpret_cast<uintptr_t>(&do_sqw_mmu)); Mov(*call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
}
else GenCallRuntime(do_sqw_mmu_no_ex);
{
Sub(x9, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, do_sqw_nommu)); Cmp(w0, 0);
Ldr(x9, MemOperand(x9)); Ldr(w29, sh4_context_mem_operand(&next_pc));
Sub(x1, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, sq_buffer)); GenBranch(no_update, ne);
} }
if (op.flags == 0x1337) else
Blr(x9); {
else if (CCN_MMUCR.AT)
{ {
Label no_branch; Ldr(x9, reinterpret_cast<uintptr_t>(&do_sqw_mmu));
B(&no_branch, ne); }
Blr(x9); else
Bind(&no_branch); {
Sub(x9, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, do_sqw_nommu));
Ldr(x9, MemOperand(x9));
Sub(x1, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, sq_buffer));
}
Blr(x9);
}
Bind(&not_sqw);
} }
break; break;
@ -863,24 +1023,39 @@ public:
Instruction *start_instruction = GetCursorAddress<Instruction *>(); Instruction *start_instruction = GetCursorAddress<Instruction *>();
u32 size = op.flags & 0x7f; u32 size = op.flags & 0x7f;
if (mmu_enabled())
Mov(*call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
switch (size) switch (size)
{ {
case 1: case 1:
GenCallRuntime(ReadMem8); if (!mmu_enabled())
GenCallRuntime(ReadMem8);
else
GenCallRuntime(ReadMemNoEx<u8>);
Sxtb(w0, w0); Sxtb(w0, w0);
break; break;
case 2: case 2:
GenCallRuntime(ReadMem16); if (!mmu_enabled())
GenCallRuntime(ReadMem16);
else
GenCallRuntime(ReadMemNoEx<u16>);
Sxth(w0, w0); Sxth(w0, w0);
break; break;
case 4: case 4:
GenCallRuntime(ReadMem32); if (!mmu_enabled())
GenCallRuntime(ReadMem32);
else
GenCallRuntime(ReadMemNoEx<u32>);
break; break;
case 8: case 8:
GenCallRuntime(ReadMem64); if (!mmu_enabled())
GenCallRuntime(ReadMem64);
else
GenCallRuntime(ReadMemNoEx<u64>);
break; break;
default: default:
@ -906,24 +1081,39 @@ public:
void GenWriteMemorySlow(const shil_opcode& op) void GenWriteMemorySlow(const shil_opcode& op)
{ {
if (mmu_enabled())
Mov(*call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
Instruction *start_instruction = GetCursorAddress<Instruction *>(); Instruction *start_instruction = GetCursorAddress<Instruction *>();
u32 size = op.flags & 0x7f; u32 size = op.flags & 0x7f;
switch (size) switch (size)
{ {
case 1: case 1:
GenCallRuntime(WriteMem8); if (!mmu_enabled())
GenCallRuntime(WriteMem8);
else
GenCallRuntime(WriteMemNoEx<u8>);
break; break;
case 2: case 2:
GenCallRuntime(WriteMem16); if (!mmu_enabled())
GenCallRuntime(WriteMem16);
else
GenCallRuntime(WriteMemNoEx<u16>);
break; break;
case 4: case 4:
GenCallRuntime(WriteMem32); if (!mmu_enabled())
GenCallRuntime(WriteMem32);
else
GenCallRuntime(WriteMemNoEx<u32>);
break; break;
case 8: case 8:
GenCallRuntime(WriteMem64); if (!mmu_enabled())
GenCallRuntime(WriteMem64);
else
GenCallRuntime(WriteMemNoEx<u64>);
break; break;
default: default:
@ -950,7 +1140,16 @@ public:
case BET_StaticCall: case BET_StaticCall:
// next_pc = block->BranchBlock; // next_pc = block->BranchBlock;
if (block->pBranchBlock == NULL) if (block->pBranchBlock == NULL)
GenCallRuntime(ngen_LinkBlock_Generic_stub); {
if (!mmu_enabled())
GenCallRuntime(ngen_LinkBlock_Generic_stub);
else
{
Mov(w29, block->BranchBlock);
Str(w29, sh4_context_mem_operand(&next_pc));
GenBranch(no_update);
}
}
else else
GenBranch(block->pBranchBlock->code); GenBranch(block->pBranchBlock->code);
break; break;
@ -975,14 +1174,32 @@ public:
if (block->pBranchBlock != NULL) if (block->pBranchBlock != NULL)
GenBranch(block->pBranchBlock->code); GenBranch(block->pBranchBlock->code);
else else
GenCallRuntime(ngen_LinkBlock_cond_Branch_stub); {
if (!mmu_enabled())
GenCallRuntime(ngen_LinkBlock_cond_Branch_stub);
else
{
Mov(w29, block->BranchBlock);
Str(w29, sh4_context_mem_operand(&next_pc));
GenBranch(no_update);
}
}
Bind(&branch_not_taken); Bind(&branch_not_taken);
if (block->pNextBlock != NULL) if (block->pNextBlock != NULL)
GenBranch(block->pNextBlock->code); GenBranch(block->pNextBlock->code);
else else
GenCallRuntime(ngen_LinkBlock_cond_Next_stub); {
if (!mmu_enabled())
GenCallRuntime(ngen_LinkBlock_cond_Next_stub);
else
{
Mov(w29, block->NextBlock);
Str(w29, sh4_context_mem_operand(&next_pc));
GenBranch(no_update);
}
}
} }
break; break;
@ -991,18 +1208,26 @@ public:
case BET_DynamicRet: case BET_DynamicRet:
// next_pc = *jdyn; // next_pc = *jdyn;
Str(w29, sh4_context_mem_operand(&next_pc)); if (!mmu_enabled())
// TODO Call no_update instead (and check CpuRunning less frequently?) {
Mov(x2, sizeof(Sh4RCB)); Str(w29, sh4_context_mem_operand(&next_pc));
Sub(x2, x28, x2); // TODO Call no_update instead (and check CpuRunning less frequently?)
Add(x2, x2, sizeof(Sh4Context)); // x2 now points to FPCB Mov(x2, sizeof(Sh4RCB));
Sub(x2, x28, x2);
Add(x2, x2, sizeof(Sh4Context)); // x2 now points to FPCB
#if RAM_SIZE_MAX == 33554432 #if RAM_SIZE_MAX == 33554432
Ubfx(w1, w29, 1, 24); Ubfx(w1, w29, 1, 24);
#else #else
Ubfx(w1, w29, 1, 23); Ubfx(w1, w29, 1, 23);
#endif #endif
Ldr(x15, MemOperand(x2, x1, LSL, 3)); // Get block entry point Ldr(x15, MemOperand(x2, x1, LSL, 3)); // Get block entry point
Br(x15); Br(x15);
}
else
{
Str(w29, sh4_context_mem_operand(&next_pc));
GenBranch(no_update);
}
break; break;
@ -1093,8 +1318,6 @@ private:
void GenReadMemory(const shil_opcode& op, size_t opid) void GenReadMemory(const shil_opcode& op, size_t opid)
{ {
u32 size = op.flags & 0x7f;
if (GenReadMemoryImmediate(op)) if (GenReadMemoryImmediate(op))
return; return;
@ -1112,34 +1335,73 @@ private:
return false; return false;
u32 size = op.flags & 0x7f; u32 size = op.flags & 0x7f;
u32 addr = op.rs1._imm;
if (mmu_enabled())
{
if ((addr >> 12) != (block->vaddr >> 12))
// When full mmu is on, only consider addresses in the same 4k page
return false;
u32 paddr;
u32 rv;
if (size == 2)
rv = mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
else if (size == 4)
rv = mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
else
die("Invalid immediate size");
if (rv != MMU_ERROR_NONE)
return false;
addr = paddr;
}
bool isram = false; bool isram = false;
void* ptr = _vmem_read_const(op.rs1._imm, isram, size); void* ptr = _vmem_read_const(addr, isram, size);
if (isram) if (isram)
{ {
Ldr(x1, reinterpret_cast<uintptr_t>(ptr)); Ldr(x1, reinterpret_cast<uintptr_t>(ptr));
switch (size) if (regalloc.IsAllocAny(op.rd))
{ {
case 2: switch (size)
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x1, xzr, SXTW)); {
break; case 2:
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x1, xzr, SXTW));
break;
case 4: case 4:
if (op.rd.is_r32f()) if (op.rd.is_r32f())
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1)); Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1));
else else
Ldr(regalloc.MapRegister(op.rd), MemOperand(x1)); Ldr(regalloc.MapRegister(op.rd), MemOperand(x1));
break; break;
default: default:
die("Invalid size"); die("Invalid size");
break; break;
}
}
else
{
switch (size)
{
case 2:
Ldrsh(w1, MemOperand(x1, xzr, SXTW));
break;
case 4:
Ldr(w1, MemOperand(x1));
break;
default:
die("Invalid size");
break;
}
Str(w1, sh4_context_mem_operand(op.rd.reg_ptr()));
} }
} }
else else
{ {
// Not RAM // Not RAM
Mov(w0, op.rs1._imm); Mov(w0, addr);
switch(size) switch(size)
{ {
@ -1165,7 +1427,10 @@ private:
if (regalloc.IsAllocg(op.rd)) if (regalloc.IsAllocg(op.rd))
Mov(regalloc.MapRegister(op.rd), w0); Mov(regalloc.MapRegister(op.rd), w0);
else else
{
verify(regalloc.IsAllocf(op.rd));
Fmov(regalloc.MapVRegister(op.rd), w0); Fmov(regalloc.MapVRegister(op.rd), w0);
}
} }
return true; return true;
@ -1174,7 +1439,7 @@ private:
bool GenReadMemoryFast(const shil_opcode& op, size_t opid) bool GenReadMemoryFast(const shil_opcode& op, size_t opid)
{ {
// Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite() // Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite()
if (!_nvmem_enabled()) if (!_nvmem_enabled() || mmu_enabled())
return false; return false;
Instruction *start_instruction = GetCursorAddress<Instruction *>(); Instruction *start_instruction = GetCursorAddress<Instruction *>();
@ -1254,7 +1519,7 @@ private:
bool GenWriteMemoryFast(const shil_opcode& op, size_t opid) bool GenWriteMemoryFast(const shil_opcode& op, size_t opid)
{ {
// Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite() // Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite()
if (!_nvmem_enabled()) if (!_nvmem_enabled() || mmu_enabled())
return false; return false;
Instruction *start_instruction = GetCursorAddress<Instruction *>(); Instruction *start_instruction = GetCursorAddress<Instruction *>();
@ -1307,9 +1572,16 @@ private:
u8* ptr = GetMemPtr(block->addr, sz); u8* ptr = GetMemPtr(block->addr, sz);
if (ptr == NULL) if (ptr == NULL)
// FIXME Can a block cross a RAM / non-RAM boundary??
return; return;
if (mmu_enabled())
{
Ldr(w10, sh4_context_mem_operand(&next_pc));
Ldr(w11, block->vaddr);
Cmp(w10, w11);
B(ne, &blockcheck_fail);
}
Ldr(x9, reinterpret_cast<uintptr_t>(ptr)); Ldr(x9, reinterpret_cast<uintptr_t>(ptr));
while (sz > 0) while (sz > 0)
@ -1347,6 +1619,23 @@ private:
TailCallRuntime(ngen_blockcheckfail); TailCallRuntime(ngen_blockcheckfail);
Bind(&blockcheck_success); Bind(&blockcheck_success);
/*
if (mmu_enabled() && block->has_fpu_op)
{
Label fpu_enabled;
Ldr(w10, sh4_context_mem_operand(&sr));
Tbz(w10, 15, &fpu_enabled); // test SR.FD bit
Mov(*call_regs[0], block->vaddr); // pc
Mov(*call_regs[1], 0x800); // event
Mov(*call_regs[2], 0x100); // vector
CallRuntime(Do_Exception);
Ldr(w29, sh4_context_mem_operand(&next_pc));
GenBranch(no_update);
Bind(&fpu_enabled);
}
*/
} }
void shil_param_to_host_reg(const shil_param& param, const Register& reg) void shil_param_to_host_reg(const shil_param& param, const Register& reg)
@ -1360,9 +1649,19 @@ private:
if (param.is_r64f()) if (param.is_r64f())
Ldr(reg, sh4_context_mem_operand(param.reg_ptr())); Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
else if (param.is_r32f()) else if (param.is_r32f())
Fmov(reg, regalloc.MapVRegister(param)); {
if (regalloc.IsAllocf(param))
Fmov(reg, regalloc.MapVRegister(param));
else
Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
}
else else
Mov(reg, regalloc.MapRegister(param)); {
if (regalloc.IsAllocg(param))
Mov(reg, regalloc.MapRegister(param));
else
Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
}
} }
else else
{ {
@ -1383,13 +1682,17 @@ private:
else else
Fmov(regalloc.MapRegister(param), (const VRegister&)reg); Fmov(regalloc.MapRegister(param), (const VRegister&)reg);
} }
else else if (regalloc.IsAllocf(param))
{ {
if (reg.IsVRegister()) if (reg.IsVRegister())
Fmov(regalloc.MapVRegister(param), (const VRegister&)reg); Fmov(regalloc.MapVRegister(param), (const VRegister&)reg);
else else
Fmov(regalloc.MapVRegister(param), (const Register&)reg); Fmov(regalloc.MapVRegister(param), (const Register&)reg);
} }
else
{
Str(reg, sh4_context_mem_operand(param.reg_ptr()));
}
} }
struct CC_PS struct CC_PS
@ -1476,6 +1779,8 @@ bool ngen_Rewrite(unat& host_pc, unat, unat)
u32 DynaRBI::Relink() u32 DynaRBI::Relink()
{ {
if (mmu_enabled())
return 0;
//printf("DynaRBI::Relink %08x\n", this->addr); //printf("DynaRBI::Relink %08x\n", this->addr);
Arm64Assembler *compiler = new Arm64Assembler((u8 *)this->code + this->relink_offset); Arm64Assembler *compiler = new Arm64Assembler((u8 *)this->code + this->relink_offset);

View File

@ -50,7 +50,7 @@ void ngen_mainloop(void* v_cntx)
#endif #endif
cycle_counter = SH4_TIMESLICE; cycle_counter = SH4_TIMESLICE;
do { do {
DynarecCodeEntryPtr rcb = bm_GetCode(ctx->cntx.pc); DynarecCodeEntryPtr rcb = bm_GetCodeByVAddr(ctx->cntx.pc);
rcb(); rcb();
} while (cycle_counter > 0); } while (cycle_counter > 0);

View File

@ -1,6 +1,8 @@
#include "build.h" #include "build.h"
#if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_X64 #if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_X64
#include <setjmp.h>
#define EXPLODE_SPANS #define EXPLODE_SPANS
//#define PROFILING //#define PROFILING
@ -77,6 +79,8 @@ static __attribute((used)) void end_slice()
#error RAM_SIZE_MAX unknown #error RAM_SIZE_MAX unknown
#endif #endif
jmp_buf jmp_env;
#ifdef _WIN32 #ifdef _WIN32
// Fully naked function in win32 for proper SEH prologue // Fully naked function in win32 for proper SEH prologue
__asm__ ( __asm__ (
@ -120,6 +124,14 @@ WIN32_ONLY( ".seh_pushreg %r14 \n\t")
#endif #endif
"movl $" _S(SH4_TIMESLICE) "," _U "cycle_counter(%rip) \n" "movl $" _S(SH4_TIMESLICE) "," _U "cycle_counter(%rip) \n"
#ifdef _WIN32
"movq $" _U "jmp_env, %rcx \n\t" // SETJMP
#else
"movq $" _U "jmp_env, %rdi \n\t"
#endif
"call " _U "setjmp \n\t"
// "testl %rax, %rax \n\t"
"1: \n\t" // run_loop "1: \n\t" // run_loop
"movq " _U "p_sh4rcb(%rip), %rax \n\t" "movq " _U "p_sh4rcb(%rip), %rax \n\t"
"movl " _S(CPU_RUNNING) "(%rax), %edx \n\t" "movl " _S(CPU_RUNNING) "(%rax), %edx \n\t"
@ -136,7 +148,7 @@ WIN32_ONLY( ".seh_pushreg %r14 \n\t")
#else #else
"movl " _S(PC)"(%rax), %edi \n\t" "movl " _S(PC)"(%rax), %edi \n\t"
#endif #endif
"call " _U "bm_GetCode2 \n\t" "call " _U "bm_GetCodeByVAddr \n\t"
"call *%rax \n\t" "call *%rax \n\t"
"movl " _U "cycle_counter(%rip), %ecx \n\t" "movl " _U "cycle_counter(%rip), %ecx \n\t"
"testl %ecx, %ecx \n\t" "testl %ecx, %ecx \n\t"
@ -212,51 +224,31 @@ static u32 exception_raised;
template<typename T> template<typename T>
static T ReadMemNoEx(u32 addr, u32 pc) static T ReadMemNoEx(u32 addr, u32 pc)
{ {
try { T rv = mmu_ReadMemNoEx<T>(addr, &exception_raised);
exception_raised = 0; if (exception_raised)
if (sizeof(T) == 1) {
return ReadMem8(addr);
else if (sizeof(T) == 2)
return ReadMem16(addr);
else if (sizeof(T) == 4)
return ReadMem32(addr);
else if (sizeof(T) == 8)
return ReadMem64(addr);
} catch (SH4ThrownException& ex) {
if (pc & 1) if (pc & 1)
{
// Delay slot // Delay slot
AdjustDelaySlotException(ex); spc = pc - 1;
pc--; else
} spc = pc;
Do_Exception(pc, ex.expEvn, ex.callVect); longjmp(jmp_env, 1);
exception_raised = 1;
return 0;
} }
return rv;
} }
template<typename T> template<typename T>
static void WriteMemNoEx(u32 addr, T data, u32 pc) static void WriteMemNoEx(u32 addr, T data, u32 pc)
{ {
try { exception_raised = mmu_WriteMemNoEx<T>(addr, data);
if (sizeof(T) == 1) if (exception_raised)
WriteMem8(addr, data); {
else if (sizeof(T) == 2)
WriteMem16(addr, data);
else if (sizeof(T) == 4)
WriteMem32(addr, data);
else if (sizeof(T) == 8)
WriteMem64(addr, data);
exception_raised = 0;
} catch (SH4ThrownException& ex) {
if (pc & 1) if (pc & 1)
{
// Delay slot // Delay slot
AdjustDelaySlotException(ex); spc = pc - 1;
pc--; else
} spc = pc;
Do_Exception(pc, ex.expEvn, ex.callVect); longjmp(jmp_env, 1);
exception_raised = 1;
} }
} }
@ -352,7 +344,7 @@ public:
sub(rsp, 0x8); // align stack sub(rsp, 0x8); // align stack
#endif #endif
Xbyak::Label exit_block; Xbyak::Label exit_block;
/*
if (mmu_enabled() && block->has_fpu_op) if (mmu_enabled() && block->has_fpu_op)
{ {
Xbyak::Label fpu_enabled; Xbyak::Label fpu_enabled;
@ -367,7 +359,7 @@ public:
jmp(exit_block, T_NEAR); jmp(exit_block, T_NEAR);
L(fpu_enabled); L(fpu_enabled);
} }
*/
for (current_opid = 0; current_opid < block->oplist.size(); current_opid++) for (current_opid = 0; current_opid < block->oplist.size(); current_opid++)
{ {
shil_opcode& op = block->oplist[current_opid]; shil_opcode& op = block->oplist[current_opid];
@ -449,25 +441,32 @@ public:
{ {
u32 size = op.flags & 0x7f; u32 size = op.flags & 0x7f;
bool immediate_address = op.rs1.is_imm(); bool immediate_address = op.rs1.is_imm();
if (immediate_address && mmu_enabled() && (op.rs1._imm >> 12) != (block->vaddr >> 12)) u32 addr = op.rs1._imm;
if (immediate_address && mmu_enabled())
{ {
// When full mmu is on, only consider addresses in the same 4k page if ((op.rs1._imm >> 12) != (block->vaddr >> 12))
immediate_address = false; {
// When full mmu is on, only consider addresses in the same 4k page
immediate_address = false;
}
else
{
u32 paddr;
u32 rv;
if (size == 2)
rv = mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
else if (size == 4)
rv = mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
else
die("Invalid immediate size");
if (rv != MMU_ERROR_NONE)
immediate_address = false;
else
addr = paddr;
}
} }
if (immediate_address) if (immediate_address)
{ {
u32 addr = op.rs1._imm;
if (mmu_enabled())
{
u32 paddr;
if (size == 2)
mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
else if (size == 4)
mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
else
die("Invalid immediate size");
addr = paddr;
}
bool isram = false; bool isram = false;
void* ptr = _vmem_read_const(addr, isram, size); void* ptr = _vmem_read_const(addr, isram, size);
@ -581,11 +580,11 @@ public:
die("1..8 bytes"); die("1..8 bytes");
} }
if (mmu_enabled()) // if (mmu_enabled())
{ // {
test(dword[(void *)&exception_raised], 1); // test(dword[(void *)&exception_raised], 1);
jnz(exit_block, T_NEAR); // jnz(exit_block, T_NEAR);
} // }
if (size != 8) if (size != 8)
host_reg_to_shil_param(op.rd, ecx); host_reg_to_shil_param(op.rd, ecx);
@ -674,11 +673,11 @@ public:
die("1..8 bytes"); die("1..8 bytes");
} }
if (mmu_enabled()) // if (mmu_enabled())
{ // {
test(dword[(void *)&exception_raised], 1); // test(dword[(void *)&exception_raised], 1);
jnz(exit_block, T_NEAR); // jnz(exit_block, T_NEAR);
} // }
} }
break; break;
@ -1353,6 +1352,10 @@ private:
// cmp(byte[rax], block->asid); // cmp(byte[rax], block->asid);
// jne(reinterpret_cast<const void*>(&ngen_blockcheckfail)); // jne(reinterpret_cast<const void*>(&ngen_blockcheckfail));
// } // }
// FIXME Neither of these tests should be necessary
// However the decoder makes various assumptions about the current PC value, which are simply not
// true in a virtualized memory model. So this can only work if virtual and phy addresses are the
// same at compile and run times.
if (mmu_enabled()) if (mmu_enabled())
{ {
mov(rax, (uintptr_t)&next_pc); mov(rax, (uintptr_t)&next_pc);

View File

@ -9,7 +9,7 @@
.globl cycle_counter .globl cycle_counter
.globl loop_no_update .globl loop_no_update
.globl intc_sched .globl intc_sched
.globl bm_GetCode .globl bm_GetCodeByVAddr
.globl cycle_counter .globl cycle_counter
.globl UpdateSystem .globl UpdateSystem
.globl rdv_DoInterrupts .globl rdv_DoInterrupts
@ -109,7 +109,7 @@ ngen_mainloop:
# next_pc _MUST_ be on ecx # next_pc _MUST_ be on ecx
no_update: no_update:
mov esi,ecx mov esi,ecx
call _Z10bm_GetCodej #bm_GetCode call bm_GetCodeByVAddr
jmp eax jmp eax
intc_sched_offs: intc_sched_offs:

View File

@ -84,7 +84,7 @@ naked void ngen_mainloop(void* cntx)
//next_pc _MUST_ be on ecx //next_pc _MUST_ be on ecx
no_update: no_update:
mov esi,ecx; mov esi,ecx;
call bm_GetCode call bm_GetCodeByVAddr
jmp eax; jmp eax;
intc_sched_offs: intc_sched_offs:

View File

@ -492,8 +492,7 @@ extern TLB_Entry ITLB[4];
#if defined(NO_MMU) #if defined(NO_MMU)
extern u32 sq_remap[64]; extern u32 sq_remap[64];
#else #else
extern u32 ITLB_LRU_USE[64]; static u32 ITLB_LRU_USE[64];
extern u32 mmu_error_TT;
#endif #endif
@ -1085,7 +1084,6 @@ bool dc_serialize(void **data, unsigned int *total_size)
REICAST_SA(sq_remap,64); REICAST_SA(sq_remap,64);
#else #else
REICAST_SA(ITLB_LRU_USE,64); REICAST_SA(ITLB_LRU_USE,64);
REICAST_S(mmu_error_TT);
#endif #endif
@ -1487,7 +1485,6 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size)
REICAST_USA(sq_remap,64); REICAST_USA(sq_remap,64);
#else #else
REICAST_USA(ITLB_LRU_USE,64); REICAST_USA(ITLB_LRU_USE,64);
REICAST_US(mmu_error_TT);
#endif #endif
@ -1877,7 +1874,6 @@ bool dc_unserialize(void **data, unsigned int *total_size)
REICAST_USA(sq_remap,64); REICAST_USA(sq_remap,64);
#else #else
REICAST_USA(ITLB_LRU_USE,64); REICAST_USA(ITLB_LRU_USE,64);
REICAST_US(mmu_error_TT);
#endif #endif

View File

@ -63,8 +63,8 @@ LOCAL_SRC_FILES := $(RZDCY_FILES)
LOCAL_SRC_FILES += $(wildcard $(LOCAL_PATH)/jni/src/Android.cpp) LOCAL_SRC_FILES += $(wildcard $(LOCAL_PATH)/jni/src/Android.cpp)
LOCAL_SRC_FILES += $(wildcard $(LOCAL_PATH)/jni/src/utils.cpp) LOCAL_SRC_FILES += $(wildcard $(LOCAL_PATH)/jni/src/utils.cpp)
LOCAL_CFLAGS := $(RZDCY_CFLAGS) -fPIC -fvisibility=hidden -ffunction-sections -fdata-sections LOCAL_CFLAGS := $(RZDCY_CFLAGS) -fPIC -fvisibility=hidden -ffunction-sections -fdata-sections
LOCAL_CXXFLAGS := $(RZDCY_CXXFLAGS) -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections LOCAL_CXXFLAGS := $(RZDCY_CXXFLAGS) -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections -fexceptions
LOCAL_CPPFLAGS := $(RZDCY_CXXFLAGS) -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections LOCAL_CPPFLAGS := $(RZDCY_CXXFLAGS) -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections -fexceptions
# 7-Zip/LZMA settings (CHDv5) # 7-Zip/LZMA settings (CHDv5)
ifdef CHD5_LZMA ifdef CHD5_LZMA