wince: use setjmp/longjmp instead of try/catch for dynarecs

WinCE fast mmu implementation
WIP arm64 dynarec
This commit is contained in:
Flyinghead 2019-04-15 18:02:34 +02:00
parent ba00da2420
commit dece3fc13e
22 changed files with 1246 additions and 356 deletions

View File

@ -115,7 +115,9 @@
*/
#define NO_MMU
//#define NO_MMU
#define FAST_MMU
#define USE_WINCE_HACK
#define DC_PLATFORM_MASK 7
#define DC_PLATFORM_DREAMCAST 0 /* Works, for the most part */

View File

@ -183,6 +183,11 @@ INLINE Trv DYNACALL _vmem_readt(u32 addr)
}
}
}
template u8 DYNACALL _vmem_readt<u8, u8>(u32 addr);
template u16 DYNACALL _vmem_readt<u16, u16>(u32 addr);
template u32 DYNACALL _vmem_readt<u32, u32>(u32 addr);
template u64 DYNACALL _vmem_readt<u64, u64>(u32 addr);
template<typename T>
INLINE void DYNACALL _vmem_writet(u32 addr,T data)
{
@ -225,6 +230,10 @@ INLINE void DYNACALL _vmem_writet(u32 addr,T data)
}
}
}
template void DYNACALL _vmem_writet<u8>(u32 addr, u8 data);
template void DYNACALL _vmem_writet<u16>(u32 addr, u16 data);
template void DYNACALL _vmem_writet<u32>(u32 addr, u32 data);
template void DYNACALL _vmem_writet<u64>(u32 addr, u64 data);
//ReadMem/WriteMem functions
//ReadMem
@ -552,7 +561,7 @@ error:
}
#endif
int fd;
int vmem_fd;
void* _nvmem_unused_buffer(u32 start,u32 end)
{
void* ptr=mmap(&virt_ram_base[start], end-start, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0);
@ -572,7 +581,7 @@ error:
verify((addrsz%size)==0);
verify(map_times>=1);
u32 prot=PROT_READ|(w?PROT_WRITE:0);
rv= mmap(&virt_ram_base[dst], size, prot, MAP_SHARED | MAP_NOSYNC | MAP_FIXED, fd, offset);
rv= mmap(&virt_ram_base[dst], size, prot, MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset);
if (MAP_FAILED==rv || rv!=(void*)&virt_ram_base[dst] || (mprotect(rv,size,prot)!=0))
{
printf("MAP1 failed %d\n",errno);
@ -582,7 +591,7 @@ error:
for (u32 i=1;i<map_times;i++)
{
dst+=size;
ptr=mmap(&virt_ram_base[dst], size, prot , MAP_SHARED | MAP_NOSYNC | MAP_FIXED, fd, offset);
ptr=mmap(&virt_ram_base[dst], size, prot , MAP_SHARED | MAP_NOSYNC | MAP_FIXED, vmem_fd, offset);
if (MAP_FAILED==ptr || ptr!=(void*)&virt_ram_base[dst] || (mprotect(rv,size,prot)!=0))
{
printf("MAP2 failed %d\n",errno);
@ -598,26 +607,26 @@ error:
#if HOST_OS == OS_DARWIN
string path = get_writable_data_path("/dcnzorz_mem");
fd = open(path.c_str(),O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO);
vmem_fd = open(path.c_str(),O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO);
unlink(path.c_str());
verify(ftruncate(fd, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX) == 0);
verify(ftruncate(vmem_fd, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX) == 0);
#elif !defined(_ANDROID)
fd = shm_open("/dcnzorz_mem", O_CREAT | O_EXCL | O_RDWR,S_IREAD | S_IWRITE);
vmem_fd = shm_open("/dcnzorz_mem", O_CREAT | O_EXCL | O_RDWR,S_IREAD | S_IWRITE);
shm_unlink("/dcnzorz_mem");
if (fd==-1)
if (vmem_fd==-1)
{
fd = open("dcnzorz_mem",O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO);
vmem_fd = open("dcnzorz_mem",O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO);
unlink("dcnzorz_mem");
}
verify(ftruncate(fd, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX) == 0);
verify(ftruncate(vmem_fd, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX) == 0);
#else
fd = ashmem_create_region(0, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX);
vmem_fd = ashmem_create_region(0, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX);
if (false)//this causes writebacks to flash -> slow and stuttery
{
fd = open("/data/data/com.reicast.emulator/files/dcnzorz_mem",O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO);
unlink("/data/data/com.reicast.emulator/files/dcnzorz_mem");
vmem_fd = open("/data/data/com.reicast.emulator/files/dcnzorz_mem",O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO);
unlink("/data/data/com.reicast.emulator/files/dcnzorz_mem");
}
#endif
@ -730,7 +739,7 @@ bool _vmem_reserve()
//I really should check teh docs before codin ;p
//[0x00800000,0x00A00000);
map_buffer(0x00800000,0x01000000,MAP_ARAM_START_OFFSET,ARAM_SIZE,false);
map_buffer(0x20000000,0x20000000+ARAM_SIZE,MAP_ARAM_START_OFFSET,ARAM_SIZE,true);
map_buffer(0x02800000,0x02800000+ARAM_SIZE,MAP_ARAM_START_OFFSET,ARAM_SIZE,true);
aica_ram.size=ARAM_SIZE;
aica_ram.data=(u8*)ptr;
@ -804,7 +813,7 @@ void _vmem_release()
virt_ram_base = NULL;
}
#if HOST_OS != OS_WINDOWS
close(fd);
close(vmem_fd);
#endif
}
}

View File

@ -49,11 +49,13 @@ u8 DYNACALL _vmem_ReadMem8(u32 Address);
u16 DYNACALL _vmem_ReadMem16(u32 Address);
u32 DYNACALL _vmem_ReadMem32(u32 Address);
u64 DYNACALL _vmem_ReadMem64(u32 Address);
template<typename T, typename Trv> Trv DYNACALL _vmem_readt(u32 addr);
//WriteMem(s)
void DYNACALL _vmem_WriteMem8(u32 Address,u8 data);
void DYNACALL _vmem_WriteMem16(u32 Address,u16 data);
void DYNACALL _vmem_WriteMem32(u32 Address,u32 data);
void DYNACALL _vmem_WriteMem64(u32 Address,u64 data);
template<typename T> void DYNACALL _vmem_writet(u32 addr, T data);
//should be called at start up to ensure it will succeed :)
bool _vmem_reserve();
@ -70,4 +72,4 @@ static inline bool _nvmem_enabled() {
return virt_ram_base != 0;
}
void _vmem_bm_reset();
void _vmem_bm_reset();

View File

@ -97,7 +97,7 @@ DynarecCodeEntryPtr DYNACALL bm_GetCode(u32 addr)
}
// addr must be a virtual address
DynarecCodeEntryPtr DYNACALL bm_GetCode2(u32 addr)
DynarecCodeEntryPtr DYNACALL bm_GetCodeByVAddr(u32 addr)
{
#ifndef NO_MMU
if (!mmu_enabled())
@ -110,32 +110,47 @@ DynarecCodeEntryPtr DYNACALL bm_GetCode2(u32 addr)
{
switch (addr)
{
#ifdef USE_WINCE_HACK
case 0xfffffde7: // GetTickCount
// This should make this syscall faster
r[0] = sh4_sched_now64() * 1000 / SH4_MAIN_CLOCK;
next_pc = pr;
addr = next_pc;
break;
case 0xfffffd05: // QueryPerformanceCounter(u64 *)
{
u32 paddr;
if (mmu_data_translation<MMU_TT_DWRITE, u64>(r[4], paddr) == MMU_ERROR_NONE)
{
_vmem_WriteMem64(paddr, sh4_sched_now64() >> 4);
r[0] = 1;
next_pc = pr;
}
else
{
Do_Exception(addr, 0xE0, 0x100);
}
}
break;
#endif
default:
Do_Exception(addr, 0xE0, 0x100);
addr = next_pc;
break;
}
addr = next_pc;
}
try {
u32 paddr;
bool shared;
mmu_instruction_translation(addr, paddr, shared);
return (DynarecCodeEntryPtr)bm_GetCode(paddr);
} catch (SH4ThrownException& ex) {
Do_Exception(addr, ex.expEvn, ex.callVect);
u32 paddr;
bool shared;
u32 paddr;
bool shared;
u32 rv = mmu_instruction_translation(addr, paddr, shared);
if (rv != MMU_ERROR_NONE)
{
DoMMUException(addr, rv, MMU_TT_IREAD);
mmu_instruction_translation(next_pc, paddr, shared);
return (DynarecCodeEntryPtr)bm_GetCode(paddr);
}
return (DynarecCodeEntryPtr)bm_GetCode(paddr);
}
#endif
}
@ -220,6 +235,7 @@ void bm_RemoveBlock(RuntimeBlockInfo* block)
all_blocks.erase(it);
break;
}
// FIXME need to remove refs
delete block;
}

View File

@ -17,7 +17,7 @@ struct RuntimeBlockInfo_Core
struct RuntimeBlockInfo: RuntimeBlockInfo_Core
{
void Setup(u32 pc,fpscr_t fpu_cfg);
bool Setup(u32 pc,fpscr_t fpu_cfg);
const char* hash(bool full=true, bool reloc=false);
u32 vaddr;
@ -89,7 +89,7 @@ void bm_WriteBlockMap(const string& file);
DynarecCodeEntryPtr DYNACALL bm_GetCode(u32 addr);
extern "C" {
__attribute__((used)) DynarecCodeEntryPtr DYNACALL bm_GetCode2(u32 addr);
__attribute__((used)) DynarecCodeEntryPtr DYNACALL bm_GetCodeByVAddr(u32 addr);
}
RuntimeBlockInfo* bm_GetBlock(void* dynarec_code);

View File

@ -1007,13 +1007,15 @@ void state_Setup(u32 rpc,fpscr_t fpu_cfg)
state.info.has_fpu=false;
}
void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
{
blk=rbi;
state_Setup(blk->vaddr, blk->fpu_cfg);
ngen_GetFeatures(&state.ngen);
blk->guest_opcodes=0;
// If full MMU, don't allow the block to extend past the end of the current 4K page
u32 max_pc = mmu_enabled() ? ((state.cpu.rpc >> 12) + 1) << 12 : 0xFFFFFFFF;
for(;;)
{
@ -1025,10 +1027,8 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
//there is no break here by design
case NDO_NextOp:
{
if (
( (blk->oplist.size() >= BLOCK_MAX_SH_OPS_SOFT) || (blk->guest_cycles >= max_cycles) )
&& !state.cpu.is_delayslot
)
if ((blk->oplist.size() >= BLOCK_MAX_SH_OPS_SOFT || blk->guest_cycles >= max_cycles || state.cpu.rpc >= max_pc)
&& !state.cpu.is_delayslot)
{
dec_End(state.cpu.rpc,BET_StaticJump,false);
}
@ -1053,7 +1053,16 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
}
*/
u32 op=IReadMem16(state.cpu.rpc);
u32 op;
if (!mmu_enabled())
op = IReadMem16(state.cpu.rpc);
else
{
u32 exception_occurred;
op = mmu_IReadMem16NoEx(state.cpu.rpc, &exception_occurred);
if (exception_occurred)
return false;
}
if (op==0 && state.cpu.is_delayslot)
{
printf("Delayslot 0 hack!\n");
@ -1104,8 +1113,8 @@ void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles)
case NDO_Jump:
die("Too old");
state.NextOp=state.JumpOp;
state.cpu.rpc=state.JumpAddr;
//state.NextOp=state.JumpOp;
//state.cpu.rpc=state.JumpAddr;
break;
case NDO_End:
@ -1187,6 +1196,8 @@ _end:
//make sure we don't use wayy-too-few cycles
blk->guest_cycles=max(1U,blk->guest_cycles);
blk=0;
return true;
}
#endif

View File

@ -45,7 +45,7 @@ struct ngen_features
};
struct RuntimeBlockInfo;
void dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles);
bool dec_DecodeBlock(RuntimeBlockInfo* rbi,u32 max_cycles);
struct state_t
{

View File

@ -194,7 +194,7 @@ const char* RuntimeBlockInfo::hash(bool full, bool relocable)
return block_hash;
}
void RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg)
bool RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg)
{
staging_runs=addr=lookups=runs=host_code_size=0;
guest_cycles=guest_opcodes=host_opcodes=0;
@ -210,7 +210,12 @@ void RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg)
if (mmu_enabled())
{
bool shared;
mmu_instruction_translation(vaddr, addr, shared);
u32 rv = mmu_instruction_translation(vaddr, addr, shared);
if (rv != MMU_ERROR_NONE)
{
DoMMUException(vaddr, rv, MMU_TT_IREAD);
return false;
}
if (addr != vaddr && !shared)
asid = CCN_PTEH.ASID;
}
@ -220,8 +225,12 @@ void RuntimeBlockInfo::Setup(u32 rpc,fpscr_t rfpu_cfg)
oplist.clear();
dec_DecodeBlock(this,SH4_TIMESLICE/2);
if (!dec_DecodeBlock(this,SH4_TIMESLICE/2))
return false;
AnalyseBlock(this);
return true;
}
DynarecCodeEntryPtr rdv_CompilePC()
@ -232,43 +241,36 @@ DynarecCodeEntryPtr rdv_CompilePC()
recSh4_ClearCache();
RuntimeBlockInfo* rbi = ngen_AllocateBlock();
#ifndef NO_MMU
try {
#endif
rbi->Setup(pc,fpscr);
bool do_opts=((rbi->addr&0x3FFFFFFF)>0x0C010100);
rbi->staging_runs=do_opts?100:-100;
ngen_Compile(rbi,DoCheck(rbi->addr),(pc&0xFFFFFF)==0x08300 || (pc&0xFFFFFF)==0x10000,false,do_opts);
verify(rbi->code!=0);
bm_AddBlock(rbi);
#ifndef NO_MMU
} catch (SH4ThrownException& ex) {
if (!rbi->Setup(pc,fpscr))
{
delete rbi;
throw ex;
return NULL;
}
#endif
bool do_opts=((rbi->addr&0x3FFFFFFF)>0x0C010100);
rbi->staging_runs=do_opts?100:-100;
ngen_Compile(rbi,DoCheck(rbi->addr),(pc&0xFFFFFF)==0x08300 || (pc&0xFFFFFF)==0x10000,false,do_opts);
verify(rbi->code!=0);
bm_AddBlock(rbi);
return rbi->code;
}
DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock_pc()
{
return rdv_FailedToFindBlock(next_pc);
}
DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock(u32 pc)
{
//printf("rdv_FailedToFindBlock ~ %08X\n",pc);
#ifndef NO_MMU
try {
#endif
next_pc=pc;
return rdv_CompilePC();
#ifndef NO_MMU
} catch (SH4ThrownException& ex) {
Do_Exception(pc, ex.expEvn, ex.callVect);
return bm_GetCode2(next_pc);
}
#endif
next_pc=pc;
DynarecCodeEntryPtr code = rdv_CompilePC();
if (code == NULL)
code = bm_GetCodeByVAddr(next_pc);
return code;
}
static void ngen_FailedToFindBlock_internal() {
@ -304,8 +306,17 @@ u32 DYNACALL rdv_DoInterrupts(void* block_cpde)
// addr must be the physical address of the start of the block
DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 addr)
{
RuntimeBlockInfo *block = bm_GetBlock(addr);
bm_RemoveBlock(block);
if (mmu_enabled())
{
RuntimeBlockInfo *block = bm_GetBlock(addr);
//printf("rdv_BlockCheckFail addr %08x vaddr %08x pc %08x\n", addr, block->vaddr, next_pc);
bm_RemoveBlock(block);
}
else
{
next_pc = addr;
recSh4_ClearCache();
}
return rdv_CompilePC();
}
@ -320,7 +331,7 @@ DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 addr)
DynarecCodeEntryPtr rdv_FindOrCompile()
{
DynarecCodeEntryPtr rv=bm_GetCode(next_pc);
DynarecCodeEntryPtr rv=bm_GetCodeByVAddr(next_pc);
if (rv==ngen_FailedToFindBlock)
rv=rdv_CompilePC();
@ -359,7 +370,7 @@ void* DYNACALL rdv_LinkBlock(u8* code,u32 dpc)
DynarecCodeEntryPtr rv=rdv_FindOrCompile();
bool do_link=bm_GetBlock(code)==rbi;
bool do_link = !mmu_enabled() && bm_GetBlock(code) == rbi;
if (do_link)
{

View File

@ -65,6 +65,7 @@ void emit_SetBaseAddr();
//Called from ngen_FailedToFindBlock
DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock(u32 pc);
DynarecCodeEntryPtr DYNACALL rdv_FailedToFindBlock_pc();
//Called when a block check failed, and the block needs to be invalidated
DynarecCodeEntryPtr DYNACALL rdv_BlockCheckFail(u32 pc);
//Called to compile code @pc

View File

@ -0,0 +1,406 @@
#include "mmu.h"
#include "hw/sh4/sh4_if.h"
#include "hw/sh4/sh4_interrupts.h"
#include "hw/sh4/sh4_core.h"
#include "types.h"
#ifdef FAST_MMU
#include "hw/mem/_vmem.h"
#include "mmu_impl.h"
#include "ccn.h"
#include "hw/sh4/sh4_mem.h"
#include "oslib/oslib.h"
extern TLB_Entry UTLB[64];
// Used when FullMMU is off
extern u32 sq_remap[64];
//#define TRACE_WINCE_SYSCALLS
#include "wince.h"
#define printf_mmu(...)
//#define printf_mmu printf
#define printf_win32(...)
extern const u32 mmu_mask[4];
extern const u32 fast_reg_lut[8];
const TLB_Entry *lru_entry = NULL;
static u32 lru_mask;
static u32 lru_address;
struct TLB_LinkedEntry {
TLB_Entry entry;
TLB_LinkedEntry *next_entry;
};
#define NBUCKETS 65536
TLB_LinkedEntry full_table[65536];
u32 full_table_size;
TLB_LinkedEntry *entry_buckets[NBUCKETS];
static u16 bucket_index(u32 address, int size)
{
return ((address >> 16) ^ ((address & 0xFC00) | size)) & (NBUCKETS - 1);
}
static void cache_entry(const TLB_Entry &entry)
{
verify(full_table_size < ARRAY_SIZE(full_table));
u16 bucket = bucket_index(entry.Address.VPN << 10, entry.Data.SZ1 * 2 + entry.Data.SZ0);
full_table[full_table_size].entry = entry;
full_table[full_table_size].next_entry = entry_buckets[bucket];
entry_buckets[bucket] = &full_table[full_table_size];
full_table_size++;
}
static void flush_cache()
{
full_table_size = 0;
memset(entry_buckets, 0, sizeof(entry_buckets));
}
template<u32 size>
bool find_entry_by_page_size(u32 address, const TLB_Entry **ret_entry)
{
u32 shift = size == 1 ? 2 :
size == 2 ? 6 :
size == 3 ? 10 : 0;
u32 vpn = (address >> (10 + shift)) << shift;
u16 bucket = bucket_index(vpn << 10, size);
TLB_LinkedEntry *pEntry = entry_buckets[bucket];
u32 length = 0;
while (pEntry != NULL)
{
if (pEntry->entry.Address.VPN == vpn && (size >> 1) == pEntry->entry.Data.SZ1 && (size & 1) == pEntry->entry.Data.SZ0)
{
if (pEntry->entry.Data.SH == 1 || pEntry->entry.Address.ASID == CCN_PTEH.ASID)
{
*ret_entry = &pEntry->entry;
return true;
}
}
pEntry = pEntry->next_entry;
}
return false;
}
static bool find_entry(u32 address, const TLB_Entry **ret_entry)
{
// 4k
if (find_entry_by_page_size<1>(address, ret_entry))
return true;
// 64k
if (find_entry_by_page_size<2>(address, ret_entry))
return true;
// 1m
if (find_entry_by_page_size<3>(address, ret_entry))
return true;
// 1k
if (find_entry_by_page_size<0>(address, ret_entry))
return true;
return false;
}
#if 0
static void dump_table()
{
static int iter = 1;
char filename[128];
sprintf(filename, "mmutable%03d", iter++);
FILE *f = fopen(filename, "wb");
if (f == NULL)
return;
fwrite(full_table, sizeof(full_table[0]), full_table_size, f);
fclose(f);
}
int main(int argc, char *argv[])
{
FILE *f = fopen(argv[1], "rb");
if (f == NULL)
{
perror(argv[1]);
return 1;
}
full_table_size = fread(full_table, sizeof(full_table[0]), ARRAY_SIZE(full_table), f);
fclose(f);
printf("Loaded %d entries\n", full_table_size);
std::vector<u32> addrs;
std::vector<u32> asids;
for (int i = 0; i < full_table_size; i++)
{
u32 sz = full_table[i].entry.Data.SZ1 * 2 + full_table[i].entry.Data.SZ0;
u32 mask = sz == 3 ? 1*1024*1024 : sz == 2 ? 64*1024 : sz == 1 ? 4*1024 : 1024;
mask--;
addrs.push_back(((full_table[i].entry.Address.VPN << 10) & mmu_mask[sz]) | (random() * mask / RAND_MAX));
asids.push_back(full_table[i].entry.Address.ASID);
// printf("%08x -> %08x sz %d ASID %d SH %d\n", full_table[i].entry.Address.VPN << 10, full_table[i].entry.Data.PPN << 10,
// full_table[i].entry.Data.SZ1 * 2 + full_table[i].entry.Data.SZ0,
// full_table[i].entry.Address.ASID, full_table[i].entry.Data.SH);
u16 bucket = bucket_index(full_table[i].entry.Address.VPN << 10, full_table[i].entry.Data.SZ1 * 2 + full_table[i].entry.Data.SZ0);
full_table[i].next_entry = entry_buckets[bucket];
entry_buckets[bucket] = &full_table[i];
}
for (int i = 0; i < full_table_size / 10; i++)
{
addrs.push_back(random());
asids.push_back(666);
}
double start = os_GetSeconds();
int success = 0;
const int loops = 100000;
for (int i = 0; i < loops; i++)
{
for (int j = 0; j < addrs.size(); j++)
{
u32 addr = addrs[j];
CCN_PTEH.ASID = asids[j];
const TLB_Entry *p;
if (find_entry(addr, &p))
success++;
}
}
double end = os_GetSeconds();
printf("Lookup time: %f ms. Success rate %f max_len %d\n", (end - start) * 1000.0 / addrs.size(), (double)success / addrs.size() / loops, 0/*max_length*/);
}
#endif
bool UTLB_Sync(u32 entry)
{
TLB_Entry& tlb_entry = UTLB[entry];
u32 sz = tlb_entry.Data.SZ1 * 2 + tlb_entry.Data.SZ0;
lru_entry = &tlb_entry;
lru_mask = mmu_mask[sz];
lru_address = (tlb_entry.Address.VPN << 10) & lru_mask;
tlb_entry.Address.VPN = lru_address >> 10;
cache_entry(tlb_entry);
if (!mmu_enabled() && (tlb_entry.Address.VPN & (0xFC000000 >> 10)) == (0xE0000000 >> 10))
{
// Used when FullMMU is off
u32 vpn_sq = ((tlb_entry.Address.VPN & 0x7FFFF) >> 10) & 0x3F;//upper bits are always known [0xE0/E1/E2/E3]
sq_remap[vpn_sq] = tlb_entry.Data.PPN << 10;
}
return true;
}
void ITLB_Sync(u32 entry)
{
}
//Do a full lookup on the UTLB entry's
template<bool internal>
u32 mmu_full_lookup(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv)
{
if (lru_entry != NULL)
{
if (/*lru_entry->Data.V == 1 && */
lru_address == (va & lru_mask)
&& (lru_entry->Address.ASID == CCN_PTEH.ASID
|| lru_entry->Data.SH == 1
/*|| (sr.MD == 1 && CCN_MMUCR.SV == 1)*/)) // SV=1 not handled
{
//VPN->PPN | low bits
// TODO mask off PPN when updating TLB to avoid doing it at look up time
rv = ((lru_entry->Data.PPN << 10) & lru_mask) | (va & (~lru_mask));
*tlb_entry_ret = lru_entry;
return MMU_ERROR_NONE;
}
}
if (find_entry(va, tlb_entry_ret))
{
u32 mask = mmu_mask[(*tlb_entry_ret)->Data.SZ1 * 2 + (*tlb_entry_ret)->Data.SZ0];
rv = (((*tlb_entry_ret)->Data.PPN << 10) & mask) | (va & (~mask));
lru_entry = *tlb_entry_ret;
lru_mask = mask;
lru_address = ((*tlb_entry_ret)->Address.VPN << 10);
return MMU_ERROR_NONE;
}
#ifdef USE_WINCE_HACK
// WinCE hack
TLB_Entry entry;
if (wince_resolve_address(va, entry))
{
CCN_PTEL.reg_data = entry.Data.reg_data;
CCN_PTEA.reg_data = entry.Assistance.reg_data;
CCN_PTEH.reg_data = entry.Address.reg_data;
UTLB[CCN_MMUCR.URC] = entry;
*tlb_entry_ret = &UTLB[CCN_MMUCR.URC];
lru_entry = *tlb_entry_ret;
u32 sz = lru_entry->Data.SZ1 * 2 + lru_entry->Data.SZ0;
lru_mask = mmu_mask[sz];
lru_address = va & lru_mask;
rv = ((lru_entry->Data.PPN << 10) & lru_mask) | (va & (~lru_mask));
cache_entry(*lru_entry);
return MMU_ERROR_NONE;
}
#endif
return MMU_ERROR_TLB_MISS;
}
template u32 mmu_full_lookup<false>(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv);
template<u32 translation_type, typename T>
u32 mmu_data_translation(u32 va, u32& rv)
{
if (va & (sizeof(T) - 1))
{
return MMU_ERROR_BADADDR;
}
if (translation_type == MMU_TT_DWRITE)
{
if ((va & 0xFC000000) == 0xE0000000)
{
u32 lookup = mmu_full_SQ<translation_type>(va, rv);
if (lookup != MMU_ERROR_NONE)
return lookup;
rv = va; //SQ writes are not translated, only write backs are.
return MMU_ERROR_NONE;
}
}
// if ((sr.MD == 0) && (va & 0x80000000) != 0)
// {
// //if on kernel, and not SQ addr -> error
// return MMU_ERROR_BADADDR;
// }
if (sr.MD == 1 && ((va & 0xFC000000) == 0x7C000000))
{
rv = va;
return MMU_ERROR_NONE;
}
// Not called if CCN_MMUCR.AT == 0
//if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0))
if (fast_reg_lut[va >> 29] != 0)
{
rv = va;
return MMU_ERROR_NONE;
}
const TLB_Entry *entry;
u32 lookup = mmu_full_lookup(va, &entry, rv);
// if (lookup != MMU_ERROR_NONE)
// return lookup;
#ifdef TRACE_WINCE_SYSCALLS
if (unresolved_unicode_string != 0 && lookup == MMU_ERROR_NONE)
{
if (va == unresolved_unicode_string)
{
unresolved_unicode_string = 0;
printf("RESOLVED %s\n", get_unicode_string(va).c_str());
}
}
#endif
// u32 md = entry->Data.PR >> 1;
//
// //0X & User mode-> protection violation
// //Priv mode protection
// if ((md == 0) && sr.MD == 0)
// {
// die("MMU_ERROR_PROTECTED");
// return MMU_ERROR_PROTECTED;
// }
//
// //X0 -> read olny
// //X1 -> read/write , can be FW
//
// //Write Protection (Lock or FW)
// if (translation_type == MMU_TT_DWRITE)
// {
// if ((entry->Data.PR & 1) == 0)
// {
// die("MMU_ERROR_PROTECTED");
// return MMU_ERROR_PROTECTED;
// }
// else if (entry->Data.D == 0)
// {
// die("MMU_ERROR_FIRSTWRITE");
// return MMU_ERROR_FIRSTWRITE;
// }
// }
return lookup;
}
template u32 mmu_data_translation<MMU_TT_DREAD, u8>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DREAD, u16>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DREAD, u32>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DREAD, u64>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DWRITE, u8>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DWRITE, u16>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DWRITE, u32>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DWRITE, u64>(u32 va, u32& rv);
u32 mmu_instruction_translation(u32 va, u32& rv, bool& shared)
{
if (va & 1)
{
return MMU_ERROR_BADADDR;
}
// if ((sr.MD == 0) && (va & 0x80000000) != 0)
// {
// //if SQ disabled , or if if SQ on but out of SQ mem then BAD ADDR ;)
// if (va >= 0xE0000000)
// return MMU_ERROR_BADADDR;
// }
if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0))
{
rv = va;
return MMU_ERROR_NONE;
}
// Hack fast implementation
const TLB_Entry *tlb_entry;
u32 lookup = mmu_full_lookup(va, &tlb_entry, rv);
if (lookup != MMU_ERROR_NONE)
return lookup;
u32 md = tlb_entry->Data.PR >> 1;
//0X & User mode-> protection violation
//Priv mode protection
// if ((md == 0) && sr.MD == 0)
// {
// return MMU_ERROR_PROTECTED;
// }
shared = tlb_entry->Data.SH == 1;
return MMU_ERROR_NONE;
}
void mmu_flush_table()
{
// printf("MMU tables flushed\n");
// ITLB[0].Data.V = 0;
// ITLB[1].Data.V = 0;
// ITLB[2].Data.V = 0;
// ITLB[3].Data.V = 0;
//
// for (u32 i = 0; i < 64; i++)
// UTLB[i].Data.V = 0;
lru_entry = NULL;
flush_cache();
}
#endif // FAST_MMU

View File

@ -101,7 +101,7 @@ WriteMem16Func WriteMem16;
WriteMem32Func WriteMem32;
WriteMem64Func WriteMem64;
const u32 mmu_mask[4] =
extern const u32 mmu_mask[4] =
{
((0xFFFFFFFF) >> 10) << 10, //1 kb page
((0xFFFFFFFF) >> 12) << 12, //4 kb page
@ -109,7 +109,7 @@ const u32 mmu_mask[4] =
((0xFFFFFFFF) >> 20) << 20 //1 MB page
};
const u32 fast_reg_lut[8] =
extern const u32 fast_reg_lut[8] =
{
0, 0, 0, 0 //P0-U0
, 1 //P1
@ -134,6 +134,7 @@ const u32 ITLB_LRU_AND[4] =
};
u32 ITLB_LRU_USE[64];
#ifndef FAST_MMU
//sync mem mapping to mmu , suspend compiled blocks if needed.entry is a UTLB entry # , -1 is for full sync
bool UTLB_Sync(u32 entry)
{
@ -160,6 +161,7 @@ void ITLB_Sync(u32 entry)
{
printf_mmu("ITLB MEM remap %d : 0x%X to 0x%X : %d\n", entry, ITLB[entry].Address.VPN << 10, ITLB[entry].Data.PPN << 10, ITLB[entry].Data.V);
}
#endif
void RaiseException(u32 expEvnt, u32 callVect) {
#if !defined(NO_MMU)
@ -170,16 +172,12 @@ void RaiseException(u32 expEvnt, u32 callVect) {
#endif
}
u32 mmu_error_TT;
void mmu_raise_exception(u32 mmu_error, u32 address, u32 am)
{
printf_mmu("mmu_raise_exception -> pc = 0x%X : ", next_pc);
CCN_TEA = address;
CCN_PTEH.VPN = address >> 10;
//save translation type error :)
mmu_error_TT = am;
switch (mmu_error)
{
//No error
@ -262,6 +260,94 @@ void mmu_raise_exception(u32 mmu_error, u32 address, u32 am)
die("Unknown mmu_error");
}
void DoMMUException(u32 address, u32 mmu_error, u32 access_type)
{
printf_mmu("DoMMUException -> pc = 0x%X : ", next_pc);
CCN_TEA = address;
CCN_PTEH.VPN = address >> 10;
switch (mmu_error)
{
//No error
case MMU_ERROR_NONE:
printf("Error : mmu_raise_exception(MMU_ERROR_NONE)\n");
break;
//TLB miss
case MMU_ERROR_TLB_MISS:
printf_mmu("MMU_ERROR_UTLB_MISS 0x%X, handled\n", address);
if (access_type == MMU_TT_DWRITE) //WTLBMISS - Write Data TLB Miss Exception
Do_Exception(next_pc, 0x60, 0x400);
else if (access_type == MMU_TT_DREAD) //RTLBMISS - Read Data TLB Miss Exception
Do_Exception(next_pc, 0x40, 0x400);
else //ITLBMISS - Instruction TLB Miss Exception
Do_Exception(next_pc, 0x40, 0x400);
return;
break;
//TLB Multihit
case MMU_ERROR_TLB_MHIT:
printf("MMU_ERROR_TLB_MHIT @ 0x%X\n", address);
break;
//Mem is read/write protected (depends on translation type)
case MMU_ERROR_PROTECTED:
printf_mmu("MMU_ERROR_PROTECTED 0x%X, handled\n", address);
if (access_type == MMU_TT_DWRITE) //WRITEPROT - Write Data TLB Protection Violation Exception
Do_Exception(next_pc, 0xC0, 0x100);
else if (access_type == MMU_TT_DREAD) //READPROT - Data TLB Protection Violation Exception
Do_Exception(next_pc, 0xA0, 0x100);
else
{
verify(false);
}
return;
break;
//Mem is write protected , firstwrite
case MMU_ERROR_FIRSTWRITE:
printf_mmu("MMU_ERROR_FIRSTWRITE\n");
verify(access_type == MMU_TT_DWRITE);
//FIRSTWRITE - Initial Page Write Exception
Do_Exception(next_pc, 0x80, 0x100);
return;
break;
//data read/write missasligned
case MMU_ERROR_BADADDR:
if (access_type == MMU_TT_DWRITE) //WADDERR - Write Data Address Error
Do_Exception(next_pc, 0x100, 0x100);
else if (access_type == MMU_TT_DREAD) //RADDERR - Read Data Address Error
Do_Exception(next_pc, 0xE0, 0x100);
else //IADDERR - Instruction Address Error
{
#ifdef TRACE_WINCE_SYSCALLS
if (!print_wince_syscall(address))
#endif
printf_mmu("MMU_ERROR_BADADDR(i) 0x%X\n", address);
Do_Exception(next_pc, 0xE0, 0x100);
return;
}
printf_mmu("MMU_ERROR_BADADDR(d) 0x%X, handled\n", address);
return;
break;
//Can't Execute
case MMU_ERROR_EXECPROT:
printf("MMU_ERROR_EXECPROT 0x%X\n", address);
//EXECPROT - Instruction TLB Protection Violation Exception
Do_Exception(next_pc, 0xA0, 0x100);
return;
break;
}
die("Unknown mmu_error");
}
bool mmu_match(u32 va, CCN_PTEH_type Address, CCN_PTEL_type Data)
{
if (Data.V == 0)
@ -283,6 +369,7 @@ bool mmu_match(u32 va, CCN_PTEH_type Address, CCN_PTEL_type Data)
return false;
}
#ifndef FAST_MMU
//Do a full lookup on the UTLB entry's
template<bool internal>
u32 mmu_full_lookup(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv)
@ -328,6 +415,7 @@ u32 mmu_full_lookup(u32 va, const TLB_Entry** tlb_entry_ret, u32& rv)
return MMU_ERROR_NONE;
}
#endif
//Simple QACR translation for mmu (when AT is off)
u32 mmu_QACR_SQ(u32 va)
@ -342,6 +430,7 @@ u32 mmu_QACR_SQ(u32 va)
va &= ~0x1f;
return QACR + va;
}
template<u32 translation_type>
u32 mmu_full_SQ(u32 va, u32& rv)
{
@ -387,11 +476,14 @@ u32 mmu_full_SQ(u32 va, u32& rv)
}
return MMU_ERROR_NONE;
}
template u32 mmu_full_SQ<MMU_TT_DWRITE>(u32 va, u32& rv);
#ifndef FAST_MMU
template<u32 translation_type, typename T>
void mmu_data_translation(u32 va, u32& rv)
u32 mmu_data_translation(u32 va, u32& rv)
{
if (va & (sizeof(T) - 1))
mmu_raise_exception(MMU_ERROR_BADADDR, va, translation_type);
return MMU_ERROR_BADADDR;
if (translation_type == MMU_TT_DWRITE)
{
@ -399,23 +491,23 @@ void mmu_data_translation(u32 va, u32& rv)
{
u32 lookup = mmu_full_SQ<translation_type>(va, rv);
if (lookup != MMU_ERROR_NONE)
mmu_raise_exception(lookup, va, translation_type);
return lookup;
rv = va; //SQ writes are not translated, only write backs are.
return;
return MMU_ERROR_NONE;
}
}
if ((sr.MD == 0) && (va & 0x80000000) != 0)
{
//if on kernel, and not SQ addr -> error
mmu_raise_exception(MMU_ERROR_BADADDR, va, translation_type);
return MMU_ERROR_BADADDR;
}
if (sr.MD == 1 && ((va & 0xFC000000) == 0x7C000000))
{
rv = va;
return;
return MMU_ERROR_NONE;
}
// Not called if CCN_MMUCR.AT == 0
@ -423,14 +515,14 @@ void mmu_data_translation(u32 va, u32& rv)
if (fast_reg_lut[va >> 29] != 0)
{
rv = va;
return;
return MMU_ERROR_NONE;
}
const TLB_Entry *entry;
u32 lookup = mmu_full_lookup(va, &entry, rv);
if (lookup != MMU_ERROR_NONE)
mmu_raise_exception(lookup, va, translation_type);
return lookup;
#ifdef TRACE_WINCE_SYSCALLS
if (unresolved_unicode_string != 0)
@ -449,7 +541,7 @@ void mmu_data_translation(u32 va, u32& rv)
//Priv mode protection
if ((md == 0) && sr.MD == 0)
{
mmu_raise_exception(MMU_ERROR_PROTECTED, va, translation_type);
return MMU_ERROR_PROTECTED;
}
//X0 -> read olny
@ -459,32 +551,34 @@ void mmu_data_translation(u32 va, u32& rv)
if (translation_type == MMU_TT_DWRITE)
{
if ((entry->Data.PR & 1) == 0)
mmu_raise_exception(MMU_ERROR_PROTECTED, va, translation_type);
return MMU_ERROR_PROTECTED;
else if (entry->Data.D == 0)
mmu_raise_exception(MMU_ERROR_FIRSTWRITE, va, translation_type);
return MMU_ERROR_FIRSTWRITE;
}
return MMU_ERROR_NONE;
}
template void mmu_data_translation<MMU_TT_DREAD, u16>(u32 va, u32& rv);
template void mmu_data_translation<MMU_TT_DREAD, u32>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DREAD, u16>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DREAD, u32>(u32 va, u32& rv);
template u32 mmu_data_translation<MMU_TT_DWRITE, u64>(u32 va, u32& rv);
void mmu_instruction_translation(u32 va, u32& rv, bool& shared)
u32 mmu_instruction_translation(u32 va, u32& rv, bool& shared)
{
if (va & 1)
{
mmu_raise_exception(MMU_ERROR_BADADDR, va, MMU_TT_IREAD);
return MMU_ERROR_BADADDR;
}
if ((sr.MD == 0) && (va & 0x80000000) != 0)
{
//if SQ disabled , or if if SQ on but out of SQ mem then BAD ADDR ;)
if (va >= 0xE0000000)
mmu_raise_exception(MMU_ERROR_BADADDR, va, MMU_TT_IREAD);
return MMU_ERROR_BADADDR;
}
if ((CCN_MMUCR.AT == 0) || (fast_reg_lut[va >> 29] != 0))
{
rv = va;
return;
return MMU_ERROR_NONE;
}
bool mmach = false;
@ -521,7 +615,7 @@ retry_ITLB_Match:
u32 lookup = mmu_full_lookup(va, &tlb_entry, rv);
if (lookup != MMU_ERROR_NONE)
mmu_raise_exception(lookup, va, MMU_TT_IREAD);
return lookup;
u32 replace_index = ITLB_LRU_USE[CCN_MMUCR.LRUI];
verify(replace_index != 0xFFFFFFFF);
@ -535,11 +629,11 @@ retry_ITLB_Match:
{
if (nom)
{
mmu_raise_exception(MMU_ERROR_TLB_MHIT, va, MMU_TT_IREAD);
return MMU_ERROR_TLB_MHIT;
}
else
{
mmu_raise_exception(MMU_ERROR_TLB_MISS, va, MMU_TT_IREAD);
return MMU_ERROR_TLB_MISS;
}
}
@ -552,25 +646,27 @@ retry_ITLB_Match:
//Priv mode protection
if ((md == 0) && sr.MD == 0)
{
mmu_raise_exception(MMU_ERROR_PROTECTED, va, MMU_TT_IREAD);
return MMU_ERROR_PROTECTED;
}
return MMU_ERROR_NONE;
}
#endif
void mmu_set_state()
{
if (CCN_MMUCR.AT == 1 && settings.dreamcast.FullMMU)
{
printf("Enabling Full MMU support\n");
ReadMem8 = &mmu_ReadMem8;
ReadMem16 = &mmu_ReadMem16;
IReadMem16 = &mmu_IReadMem16;
ReadMem32 = &mmu_ReadMem32;
ReadMem64 = &mmu_ReadMem64;
ReadMem8 = &mmu_ReadMem<u8>;
ReadMem16 = &mmu_ReadMem<u16>;
ReadMem32 = &mmu_ReadMem<u32>;
ReadMem64 = &mmu_ReadMem<u64>;
WriteMem8 = &mmu_WriteMem8;
WriteMem16 = &mmu_WriteMem16;
WriteMem32 = &mmu_WriteMem32;
WriteMem64 = &mmu_WriteMem64;
WriteMem8 = &mmu_WriteMem<u8>;
WriteMem16 = &mmu_WriteMem<u16>;
WriteMem32 = &mmu_WriteMem<u32>;
WriteMem64 = &mmu_WriteMem<u64>;
mmu_flush_table();
}
else
@ -619,6 +715,7 @@ void MMU_term()
{
}
#ifndef FAST_MMU
void mmu_flush_table()
{
//printf("MMU tables flushed\n");
@ -631,66 +728,95 @@ void mmu_flush_table()
for (u32 i = 0; i < 64; i++)
UTLB[i].Data.V = 0;
}
#endif
u8 DYNACALL mmu_ReadMem8(u32 adr)
template<typename T>
T DYNACALL mmu_ReadMem(u32 adr)
{
u32 addr;
mmu_data_translation<MMU_TT_DREAD, u8>(adr, addr);
return _vmem_ReadMem8(addr);
u32 rv = mmu_data_translation<MMU_TT_DREAD, T>(adr, addr);
if (rv != MMU_ERROR_NONE)
mmu_raise_exception(rv, adr, MMU_TT_DREAD);
return _vmem_readt<T, T>(addr);
}
u16 DYNACALL mmu_ReadMem16(u32 adr)
{
u32 addr;
mmu_data_translation<MMU_TT_DREAD, u16>(adr, addr);
return _vmem_ReadMem16(addr);
}
u16 DYNACALL mmu_IReadMem16(u32 vaddr)
{
u32 addr;
bool shared;
mmu_instruction_translation(vaddr, addr, shared);
u32 rv = mmu_instruction_translation(vaddr, addr, shared);
if (rv != MMU_ERROR_NONE)
mmu_raise_exception(rv, vaddr, MMU_TT_IREAD);
return _vmem_ReadMem16(addr);
}
u32 DYNACALL mmu_ReadMem32(u32 adr)
template<typename T>
void DYNACALL mmu_WriteMem(u32 adr, T data)
{
u32 addr;
mmu_data_translation<MMU_TT_DREAD, u32>(adr, addr);
return _vmem_ReadMem32(addr);
}
u64 DYNACALL mmu_ReadMem64(u32 adr)
{
u32 addr;
mmu_data_translation<MMU_TT_DREAD, u64>(adr, addr);
return _vmem_ReadMem64(addr);
u32 rv = mmu_data_translation<MMU_TT_DWRITE, T>(adr, addr);
if (rv != MMU_ERROR_NONE)
mmu_raise_exception(rv, adr, MMU_TT_DWRITE);
_vmem_writet<T>(addr, data);
}
void DYNACALL mmu_WriteMem8(u32 adr, u8 data)
template<typename T>
T DYNACALL mmu_ReadMemNoEx(u32 adr, u32 *exception_occurred)
{
u32 addr;
mmu_data_translation<MMU_TT_DWRITE, u8>(adr, addr);
_vmem_WriteMem8(addr, data);
u32 rv = mmu_data_translation<MMU_TT_DREAD, T>(adr, addr);
if (rv != MMU_ERROR_NONE)
{
DoMMUException(adr, rv, MMU_TT_DREAD);
*exception_occurred = 1;
return 0;
}
else
{
*exception_occurred = 0;
return _vmem_readt<T, T>(addr);
}
}
template u8 mmu_ReadMemNoEx<u8>(u32 adr, u32 *exception_occurred);
template u16 mmu_ReadMemNoEx<u16>(u32 adr, u32 *exception_occurred);
template u32 mmu_ReadMemNoEx<u32>(u32 adr, u32 *exception_occurred);
template u64 mmu_ReadMemNoEx<u64>(u32 adr, u32 *exception_occurred);
u16 DYNACALL mmu_IReadMem16NoEx(u32 vaddr, u32 *exception_occurred)
{
u32 addr;
bool shared;
u32 rv = mmu_instruction_translation(vaddr, addr, shared);
if (rv != MMU_ERROR_NONE)
{
DoMMUException(vaddr, rv, MMU_TT_IREAD);
*exception_occurred = 1;
return 0;
}
else
{
*exception_occurred = 0;
return _vmem_ReadMem16(addr);
}
}
void DYNACALL mmu_WriteMem16(u32 adr, u16 data)
template<typename T>
u32 DYNACALL mmu_WriteMemNoEx(u32 adr, T data)
{
u32 addr;
mmu_data_translation<MMU_TT_DWRITE, u16>(adr, addr);
_vmem_WriteMem16(addr, data);
}
void DYNACALL mmu_WriteMem32(u32 adr, u32 data)
{
u32 addr;
mmu_data_translation<MMU_TT_DWRITE, u32>(adr, addr);
_vmem_WriteMem32(addr, data);
}
void DYNACALL mmu_WriteMem64(u32 adr, u64 data)
{
u32 addr;
mmu_data_translation<MMU_TT_DWRITE, u64>(adr, addr);
_vmem_WriteMem64(addr, data);
u32 rv = mmu_data_translation<MMU_TT_DWRITE, T>(adr, addr);
if (rv != MMU_ERROR_NONE)
{
DoMMUException(adr, rv, MMU_TT_DWRITE);
return 1;
}
_vmem_writet<T>(addr, data);
return 0;
}
template u32 mmu_WriteMemNoEx<u8>(u32 adr, u8 data);
template u32 mmu_WriteMemNoEx<u16>(u32 adr, u16 data);
template u32 mmu_WriteMemNoEx<u32>(u32 adr, u32 data);
template u32 mmu_WriteMemNoEx<u64>(u32 adr, u64 data);
bool mmu_TranslateSQW(u32 adr, u32* out)
{

View File

@ -10,6 +10,22 @@
//Data write
#define MMU_TT_DREAD 2
//Return Values
//Translation was successful
#define MMU_ERROR_NONE 0
//TLB miss
#define MMU_ERROR_TLB_MISS 1
//TLB Multihit
#define MMU_ERROR_TLB_MHIT 2
//Mem is read/write protected (depends on translation type)
#define MMU_ERROR_PROTECTED 3
//Mem is write protected , firstwrite
#define MMU_ERROR_FIRSTWRITE 4
//data-Opcode read/write missasligned
#define MMU_ERROR_BADADDR 5
//Can't Execute
#define MMU_ERROR_EXECPROT 6
struct TLB_Entry
{
CCN_PTEH_type Address;
@ -40,9 +56,10 @@ static INLINE bool mmu_enabled()
template<bool internal = false>
u32 mmu_full_lookup(u32 va, const TLB_Entry **entry, u32& rv);
void mmu_instruction_translation(u32 va, u32& rv, bool& shared);
u32 mmu_instruction_translation(u32 va, u32& rv, bool& shared);
template<u32 translation_type, typename T>
extern void mmu_data_translation(u32 va, u32& rv);
extern u32 mmu_data_translation(u32 va, u32& rv);
void DoMMUException(u32 addr, u32 error_code, u32 access_type);
#if defined(NO_MMU)
bool inline mmu_TranslateSQW(u32 addr, u32* mapped) {
@ -51,16 +68,14 @@ extern void mmu_data_translation(u32 va, u32& rv);
}
void inline mmu_flush_table() {}
#else
u8 DYNACALL mmu_ReadMem8(u32 addr);
u16 DYNACALL mmu_ReadMem16(u32 addr);
template<typename T> T DYNACALL mmu_ReadMem(u32 adr);
u16 DYNACALL mmu_IReadMem16(u32 addr);
u32 DYNACALL mmu_ReadMem32(u32 addr);
u64 DYNACALL mmu_ReadMem64(u32 addr);
void DYNACALL mmu_WriteMem8(u32 addr, u8 data);
void DYNACALL mmu_WriteMem16(u32 addr, u16 data);
void DYNACALL mmu_WriteMem32(u32 addr, u32 data);
void DYNACALL mmu_WriteMem64(u32 addr, u64 data);
template<typename T> void DYNACALL mmu_WriteMem(u32 adr, T data);
bool mmu_TranslateSQW(u32 addr, u32* mapped);
u16 DYNACALL mmu_IReadMem16NoEx(u32 adr, u32 *exception_occurred);
template<typename T> T DYNACALL mmu_ReadMemNoEx(u32 adr, u32 *exception_occurred);
template<typename T> u32 DYNACALL mmu_WriteMemNoEx(u32 adr, T data);
#endif

View File

@ -3,26 +3,8 @@
#include "ccn.h"
#include "mmu.h"
//Do a full lookup on the UTLB entry's
//Return Values
//Translation was sucessfull , rv contains return
#define MMU_ERROR_NONE 0
//TLB miss
#define MMU_ERROR_TLB_MISS 1
//TLB Multihit
#define MMU_ERROR_TLB_MHIT 2
//Mem is read/write protected (depends on translation type)
#define MMU_ERROR_PROTECTED 3
//Mem is write protected , firstwrite
#define MMU_ERROR_FIRSTWRITE 4
//data-Opcode read/write missasligned
#define MMU_ERROR_BADADDR 5
//Can't Execute
#define MMU_ERROR_EXECPROT 6
extern u32 mmu_error_TT;
void MMU_Init();
void MMU_Reset(bool Manual);
void MMU_Term();
template<u32 translation_type> u32 mmu_full_SQ(u32 va, u32& rv);

View File

@ -22,15 +22,15 @@ extern VArray2 mem_b;
//#define WriteMem64(addr,reg) { _vmem_WriteMem32(addr,((u32*)reg)[0]);_vmem_WriteMem32((addr)+4, ((u32*)reg)[1]); }
#else
typedef u8 (*ReadMem8Func)(u32 addr);
typedef u16 (*ReadMem16Func)(u32 addr);
typedef u32 (*ReadMem32Func)(u32 addr);
typedef u64 (*ReadMem64Func)(u32 addr);
typedef u8 DYNACALL (*ReadMem8Func)(u32 addr);
typedef u16 DYNACALL (*ReadMem16Func)(u32 addr);
typedef u32 DYNACALL (*ReadMem32Func)(u32 addr);
typedef u64 DYNACALL (*ReadMem64Func)(u32 addr);
typedef void (*WriteMem8Func)(u32 addr, u8 data);
typedef void (*WriteMem16Func)(u32 addr, u16 data);
typedef void (*WriteMem32Func)(u32 addr, u32 data);
typedef void (*WriteMem64Func)(u32 addr, u64 data);
typedef void DYNACALL (*WriteMem8Func)(u32 addr, u8 data);
typedef void DYNACALL (*WriteMem16Func)(u32 addr, u16 data);
typedef void DYNACALL (*WriteMem32Func)(u32 addr, u32 data);
typedef void DYNACALL (*WriteMem64Func)(u32 addr, u64 data);
extern ReadMem8Func ReadMem8;
extern ReadMem16Func ReadMem16;

View File

@ -34,6 +34,7 @@ static bool rtt_to_buffer_game;
static bool safemode_game;
static bool tr_poly_depth_mask_game;
static bool extra_depth_game;
static bool full_mmu_game;
cThread emu_thread(&dc_run, NULL);
@ -137,12 +138,15 @@ void LoadSpecialSettings()
safemode_game = false;
tr_poly_depth_mask_game = false;
extra_depth_game = false;
full_mmu_game = false;
if (reios_windows_ce)
{
printf("Enabling Extra depth scaling for Windows CE games\n");
printf("Enabling Full MMU and Extra depth scaling for Windows CE game\n");
settings.rend.ExtraDepthScale = 0.1;
extra_depth_game = true;
settings.dreamcast.FullMMU = true;
full_mmu_game = true;
}
// Tony Hawk's Pro Skater 2
@ -660,7 +664,8 @@ void SaveSettings()
cfgSaveInt("config", "Dreamcast.Cable", settings.dreamcast.cable);
cfgSaveInt("config", "Dreamcast.Region", settings.dreamcast.region);
cfgSaveInt("config", "Dreamcast.Broadcast", settings.dreamcast.broadcast);
cfgSaveBool("config", "Dreamcast.FullMMU", settings.dreamcast.FullMMU);
if (!full_mmu_game || !settings.dreamcast.FullMMU)
cfgSaveBool("config", "Dreamcast.FullMMU", settings.dreamcast.FullMMU);
cfgSaveBool("config", "Dynarec.idleskip", settings.dynarec.idleskip);
cfgSaveBool("config", "Dynarec.unstable-opt", settings.dynarec.unstable_opt);
if (!safemode_game || !settings.dynarec.safemode)

View File

@ -24,6 +24,7 @@
#include <unistd.h>
#include <sys/mman.h>
#include <map>
#include <setjmp.h>
#include "deps/vixl/aarch64/macro-assembler-aarch64.h"
using namespace vixl::aarch64;
@ -104,6 +105,8 @@ void Arm64CacheFlush(void* start, void* end)
double host_cpu_time;
u64 guest_cpu_cycles;
static jmp_buf jmp_env;
static u32 cycle_counter;
#ifdef PROFILING
#include <time.h>
@ -153,8 +156,8 @@ __asm__
".hidden ngen_FailedToFindBlock_ \n\t"
".globl ngen_FailedToFindBlock_ \n\t"
"ngen_FailedToFindBlock_: \n\t"
"mov w0, w29 \n\t"
"bl rdv_FailedToFindBlock \n\t"
// "mov w0, w29 \n\t" // FIXME w29 might not be up to date anymore (exception in bm_GetCodeByVAddr)
"bl rdv_FailedToFindBlock_pc \n\t"
"br x0 \n"
".hidden ngen_blockcheckfail \n\t"
@ -180,10 +183,16 @@ void ngen_mainloop(void* v_cntx)
"stp s10, s11, [sp, #112] \n\t"
"stp s12, s13, [sp, #128] \n\t"
"stp x29, x30, [sp, #144] \n\t"
// Use x28 as sh4 context pointer
"mov x28, %[cntx] \n\t"
// Use x27 as cycle_counter
"stp %[cntx], %[cycle_counter], [sp, #-16]! \n\t" // Push context, cycle_counter address
"mov w27, %[_SH4_TIMESLICE] \n\t"
"str w27, [%[cycle_counter]] \n\t"
"mov x0, %[jmp_env] \n\t" // SETJMP
"bl setjmp \n\t"
// Use x28 as sh4 context pointer
"ldr x28, [sp] \n\t" // Set context
// w29 is next_pc
"ldr w29, [x28, %[pc]] \n\t"
"b no_update \n"
@ -191,8 +200,11 @@ void ngen_mainloop(void* v_cntx)
".hidden intc_sched \n\t"
".globl intc_sched \n\t"
"intc_sched: \n\t"
"add w27, w27, %[_SH4_TIMESLICE] \n\t"
"mov x29, lr \n\r" // Trashing pc here but it will be reset at the end of the block or in DoInterrupts
"ldr x27, [sp, #8] \n\t" // &cycle_counter
"ldr w0, [x27] \n\t" // cycle_counter
"add w0, w0, %[_SH4_TIMESLICE] \n\t"
"str w0, [x27] \n\t"
"mov x29, lr \n\t" // Trashing pc here but it will be reset at the end of the block or in DoInterrupts
"bl UpdateSystem \n\t"
"mov lr, x29 \n\t"
"cbnz w0, .do_interrupts \n\t"
@ -208,7 +220,9 @@ void ngen_mainloop(void* v_cntx)
"no_update: \n\t" // next_pc _MUST_ be on w29
"ldr w0, [x28, %[CpuRunning]] \n\t"
"cbz w0, .end_mainloop \n\t"
"ldr w29, [x28, %[pc]] \n\t" // shouldn't be necessary
#ifdef NO_MMU
"movz x2, %[RCB_SIZE], lsl #16 \n\t"
"sub x2, x28, x2 \n\t"
"add x2, x2, %[SH4CTX_SIZE] \n\t"
@ -221,8 +235,14 @@ void ngen_mainloop(void* v_cntx)
#endif
"ldr x0, [x2, x1, lsl #3] \n\t"
"br x0 \n"
#else
"mov w0, w29 \n\t"
"bl bm_GetCodeByVAddr \n\t"
"br x0 \n"
#endif
".end_mainloop: \n\t"
"add sp, sp, #16 \n\t" // Pop context
"ldp x29, x30, [sp, #144] \n\t"
"ldp s12, s13, [sp, #128] \n\t"
"ldp s10, s11, [sp, #112] \n\t"
@ -239,7 +259,9 @@ void ngen_mainloop(void* v_cntx)
[_SH4_TIMESLICE] "i"(SH4_TIMESLICE),
[CpuRunning] "i"(offsetof(Sh4Context, CpuRunning)),
[RCB_SIZE] "i" (sizeof(Sh4RCB) >> 16),
[SH4CTX_SIZE] "i" (sizeof(Sh4Context))
[SH4CTX_SIZE] "i" (sizeof(Sh4Context)),
[jmp_env] "r"(reinterpret_cast<uintptr_t>(jmp_env)),
[cycle_counter] "r"(reinterpret_cast<uintptr_t>(&cycle_counter))
: "memory"
);
}
@ -265,6 +287,75 @@ RuntimeBlockInfo* ngen_AllocateBlock()
return new DynaRBI();
}
template<typename T>
static T ReadMemNoEx(u32 addr, u32 pc)
{
u32 ex;
T rv = mmu_ReadMemNoEx<T>(addr, &ex);
if (ex)
{
if (pc & 1)
spc = pc - 1;
else
spc = pc;
longjmp(jmp_env, 1);
}
return rv;
}
template<typename T>
static void WriteMemNoEx(u32 addr, T data, u32 pc)
{
u32 ex = mmu_WriteMemNoEx<T>(addr, data);
if (ex)
{
if (pc & 1)
spc = pc - 1;
else
spc = pc;
longjmp(jmp_env, 1);
}
}
static u32 interpreter_fallback(u16 op, u32 pc)
{
try {
OpDesc[op]->oph(op);
return 0;
} catch (SH4ThrownException& ex) {
die("IFB exception");
if (pc & 1)
{
// Delay slot
AdjustDelaySlotException(ex);
pc--;
}
Do_Exception(pc, ex.expEvn, ex.callVect);
return 1;
}
}
static u32 exception_raised;
static void do_sqw_mmu_no_ex(u32 addr, u32 pc)
{
try {
do_sqw_mmu(addr);
exception_raised = 0;
} catch (SH4ThrownException& ex) {
die("do_sqw_mmu exception");
if (pc & 1)
{
// Delay slot
AdjustDelaySlotException(ex);
pc--;
}
Do_Exception(pc, ex.expEvn, ex.callVect);
exception_raised = 1;
printf("SQW MMU EXCEPTION\n");
}
}
class Arm64Assembler : public MacroAssembler
{
typedef void (MacroAssembler::*Arm64Op_RRO)(const Register&, const Register&, const Operand&);
@ -327,22 +418,47 @@ public:
if (op.rs3.is_imm())
{
Add(*ret_reg, regalloc.MapRegister(op.rs1), op.rs3._imm);
if (regalloc.IsAllocg(op.rs1))
Add(*ret_reg, regalloc.MapRegister(op.rs1), op.rs3._imm);
else
{
Ldr(*ret_reg, sh4_context_mem_operand(op.rs1.reg_ptr()));
Add(*ret_reg, *ret_reg, op.rs3._imm);
}
}
else if (op.rs3.is_r32i())
{
Add(*ret_reg, regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs3));
if (regalloc.IsAllocg(op.rs1) && regalloc.IsAllocg(op.rs3))
Add(*ret_reg, regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs3));
else
{
Ldr(*ret_reg, sh4_context_mem_operand(op.rs1.reg_ptr()));
Ldr(w8, sh4_context_mem_operand(op.rs3.reg_ptr()));
Add(*ret_reg, *ret_reg, w8);
}
}
else if (!op.rs3.is_null())
{
die("invalid rs3");
}
else if (op.rs1.is_reg())
{
if (regalloc.IsAllocg(op.rs1))
{
if (raddr == NULL)
ret_reg = &regalloc.MapRegister(op.rs1);
else
Mov(*ret_reg, regalloc.MapRegister(op.rs1));
}
else
{
Ldr(*ret_reg, sh4_context_mem_operand(op.rs1.reg_ptr()));
}
}
else
{
if (raddr == NULL)
ret_reg = &regalloc.MapRegister(op.rs1);
else
Mov(*ret_reg, regalloc.MapRegister(op.rs1));
verify(op.rs1.is_imm());
Mov(*ret_reg, op.rs1._imm);
}
return *ret_reg;
@ -362,7 +478,10 @@ public:
regalloc.DoAlloc(block);
// scheduler
Subs(w27, w27, block->guest_cycles);
Mov(x27, reinterpret_cast<uintptr_t>(&cycle_counter));
Ldr(w0, MemOperand(x27));
Subs(w0, w0, block->guest_cycles);
Str(w0, MemOperand(x27));
Label cycles_remaining;
B(&cycles_remaining, pl);
GenCallRuntime(intc_sched);
@ -389,7 +508,21 @@ public:
}
Mov(*call_regs[0], op.rs3._imm);
GenCallRuntime(OpDesc[op.rs3._imm]->oph);
if (!mmu_enabled())
{
GenCallRuntime(OpDesc[op.rs3._imm]->oph);
}
else
{
Mov(*call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
GenCallRuntime(interpreter_fallback);
Cmp(w0, 0);
Ldr(w29, sh4_context_mem_operand(&next_pc));
GenBranch(no_update, ne);
}
break;
case shop_jcond:
@ -532,20 +665,33 @@ public:
case shop_shld:
case shop_shad:
// TODO optimize
Cmp(regalloc.MapRegister(op.rs2), 0);
Csel(w1, regalloc.MapRegister(op.rs2), wzr, ge); // if shift >= 0 then w1 = shift else w1 = 0
Mov(w0, wzr); // wzr not supported by csneg
Csneg(w2, w0, regalloc.MapRegister(op.rs2), ge); // if shift < 0 then w2 = -shift else w2 = 0
Cmp(w2, 32);
Csel(w2, 31, w2, eq); // if shift == -32 then w2 = 31
Lsl(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), w1); // Left shift by w1
if (op.op == shop_shld) // Right shift by w2
// Logical shift
Lsr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w2);
else
// Arithmetic shift
Asr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w2);
{
Label positive_shift, negative_shift, end;
Tbz(regalloc.MapRegister(op.rs2), 31, &positive_shift);
Cmn(regalloc.MapRegister(op.rs2), 32);
B(&negative_shift, ne);
if (op.op == shop_shld)
// Logical shift
Lsr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), 31);
else
// Arithmetic shift
Asr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), 31);
B(&end);
Bind(&positive_shift);
Lsl(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), regalloc.MapRegister(op.rs2));
B(&end);
Bind(&negative_shift);
Neg(w1, regalloc.MapRegister(op.rs2));
if (op.op == shop_shld)
// Logical shift
Lsr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rd), w1);
else
// Arithmetic shift
Asr(regalloc.MapRegister(op.rd), regalloc.MapRegister(op.rs1), w1);
Bind(&end);
}
break;
case shop_test:
@ -617,31 +763,45 @@ public:
break;
case shop_pref:
Mov(w0, regalloc.MapRegister(op.rs1));
if (op.flags != 0x1337)
{
Lsr(w1, regalloc.MapRegister(op.rs1), 26);
if (regalloc.IsAllocg(op.rs1))
Lsr(w1, regalloc.MapRegister(op.rs1), 26);
else
{
Ldr(w0, sh4_context_mem_operand(op.rs1.reg_ptr()));
Lsr(w1, w0, 26);
}
Cmp(w1, 0x38);
}
Label not_sqw;
B(&not_sqw, ne);
if (regalloc.IsAllocg(op.rs1))
Mov(w0, regalloc.MapRegister(op.rs1));
if (CCN_MMUCR.AT)
{
Ldr(x9, reinterpret_cast<uintptr_t>(&do_sqw_mmu));
}
else
{
Sub(x9, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, do_sqw_nommu));
Ldr(x9, MemOperand(x9));
Sub(x1, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, sq_buffer));
}
if (op.flags == 0x1337)
Blr(x9);
else
{
Label no_branch;
B(&no_branch, ne);
Blr(x9);
Bind(&no_branch);
if (mmu_enabled())
{
Mov(*call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
GenCallRuntime(do_sqw_mmu_no_ex);
Cmp(w0, 0);
Ldr(w29, sh4_context_mem_operand(&next_pc));
GenBranch(no_update, ne);
}
else
{
if (CCN_MMUCR.AT)
{
Ldr(x9, reinterpret_cast<uintptr_t>(&do_sqw_mmu));
}
else
{
Sub(x9, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, do_sqw_nommu));
Ldr(x9, MemOperand(x9));
Sub(x1, x28, offsetof(Sh4RCB, cntx) - offsetof(Sh4RCB, sq_buffer));
}
Blr(x9);
}
Bind(&not_sqw);
}
break;
@ -863,24 +1023,39 @@ public:
Instruction *start_instruction = GetCursorAddress<Instruction *>();
u32 size = op.flags & 0x7f;
if (mmu_enabled())
Mov(*call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
switch (size)
{
case 1:
GenCallRuntime(ReadMem8);
if (!mmu_enabled())
GenCallRuntime(ReadMem8);
else
GenCallRuntime(ReadMemNoEx<u8>);
Sxtb(w0, w0);
break;
case 2:
GenCallRuntime(ReadMem16);
if (!mmu_enabled())
GenCallRuntime(ReadMem16);
else
GenCallRuntime(ReadMemNoEx<u16>);
Sxth(w0, w0);
break;
case 4:
GenCallRuntime(ReadMem32);
if (!mmu_enabled())
GenCallRuntime(ReadMem32);
else
GenCallRuntime(ReadMemNoEx<u32>);
break;
case 8:
GenCallRuntime(ReadMem64);
if (!mmu_enabled())
GenCallRuntime(ReadMem64);
else
GenCallRuntime(ReadMemNoEx<u64>);
break;
default:
@ -906,24 +1081,39 @@ public:
void GenWriteMemorySlow(const shil_opcode& op)
{
if (mmu_enabled())
Mov(*call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
Instruction *start_instruction = GetCursorAddress<Instruction *>();
u32 size = op.flags & 0x7f;
switch (size)
{
case 1:
GenCallRuntime(WriteMem8);
if (!mmu_enabled())
GenCallRuntime(WriteMem8);
else
GenCallRuntime(WriteMemNoEx<u8>);
break;
case 2:
GenCallRuntime(WriteMem16);
if (!mmu_enabled())
GenCallRuntime(WriteMem16);
else
GenCallRuntime(WriteMemNoEx<u16>);
break;
case 4:
GenCallRuntime(WriteMem32);
if (!mmu_enabled())
GenCallRuntime(WriteMem32);
else
GenCallRuntime(WriteMemNoEx<u32>);
break;
case 8:
GenCallRuntime(WriteMem64);
if (!mmu_enabled())
GenCallRuntime(WriteMem64);
else
GenCallRuntime(WriteMemNoEx<u64>);
break;
default:
@ -950,7 +1140,16 @@ public:
case BET_StaticCall:
// next_pc = block->BranchBlock;
if (block->pBranchBlock == NULL)
GenCallRuntime(ngen_LinkBlock_Generic_stub);
{
if (!mmu_enabled())
GenCallRuntime(ngen_LinkBlock_Generic_stub);
else
{
Mov(w29, block->BranchBlock);
Str(w29, sh4_context_mem_operand(&next_pc));
GenBranch(no_update);
}
}
else
GenBranch(block->pBranchBlock->code);
break;
@ -975,14 +1174,32 @@ public:
if (block->pBranchBlock != NULL)
GenBranch(block->pBranchBlock->code);
else
GenCallRuntime(ngen_LinkBlock_cond_Branch_stub);
{
if (!mmu_enabled())
GenCallRuntime(ngen_LinkBlock_cond_Branch_stub);
else
{
Mov(w29, block->BranchBlock);
Str(w29, sh4_context_mem_operand(&next_pc));
GenBranch(no_update);
}
}
Bind(&branch_not_taken);
if (block->pNextBlock != NULL)
GenBranch(block->pNextBlock->code);
else
GenCallRuntime(ngen_LinkBlock_cond_Next_stub);
{
if (!mmu_enabled())
GenCallRuntime(ngen_LinkBlock_cond_Next_stub);
else
{
Mov(w29, block->NextBlock);
Str(w29, sh4_context_mem_operand(&next_pc));
GenBranch(no_update);
}
}
}
break;
@ -991,18 +1208,26 @@ public:
case BET_DynamicRet:
// next_pc = *jdyn;
Str(w29, sh4_context_mem_operand(&next_pc));
// TODO Call no_update instead (and check CpuRunning less frequently?)
Mov(x2, sizeof(Sh4RCB));
Sub(x2, x28, x2);
Add(x2, x2, sizeof(Sh4Context)); // x2 now points to FPCB
if (!mmu_enabled())
{
Str(w29, sh4_context_mem_operand(&next_pc));
// TODO Call no_update instead (and check CpuRunning less frequently?)
Mov(x2, sizeof(Sh4RCB));
Sub(x2, x28, x2);
Add(x2, x2, sizeof(Sh4Context)); // x2 now points to FPCB
#if RAM_SIZE_MAX == 33554432
Ubfx(w1, w29, 1, 24);
Ubfx(w1, w29, 1, 24);
#else
Ubfx(w1, w29, 1, 23);
Ubfx(w1, w29, 1, 23);
#endif
Ldr(x15, MemOperand(x2, x1, LSL, 3)); // Get block entry point
Br(x15);
Ldr(x15, MemOperand(x2, x1, LSL, 3)); // Get block entry point
Br(x15);
}
else
{
Str(w29, sh4_context_mem_operand(&next_pc));
GenBranch(no_update);
}
break;
@ -1093,8 +1318,6 @@ private:
void GenReadMemory(const shil_opcode& op, size_t opid)
{
u32 size = op.flags & 0x7f;
if (GenReadMemoryImmediate(op))
return;
@ -1112,34 +1335,73 @@ private:
return false;
u32 size = op.flags & 0x7f;
u32 addr = op.rs1._imm;
if (mmu_enabled())
{
if ((addr >> 12) != (block->vaddr >> 12))
// When full mmu is on, only consider addresses in the same 4k page
return false;
u32 paddr;
u32 rv;
if (size == 2)
rv = mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
else if (size == 4)
rv = mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
else
die("Invalid immediate size");
if (rv != MMU_ERROR_NONE)
return false;
addr = paddr;
}
bool isram = false;
void* ptr = _vmem_read_const(op.rs1._imm, isram, size);
void* ptr = _vmem_read_const(addr, isram, size);
if (isram)
{
Ldr(x1, reinterpret_cast<uintptr_t>(ptr));
switch (size)
if (regalloc.IsAllocAny(op.rd))
{
case 2:
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x1, xzr, SXTW));
break;
switch (size)
{
case 2:
Ldrsh(regalloc.MapRegister(op.rd), MemOperand(x1, xzr, SXTW));
break;
case 4:
if (op.rd.is_r32f())
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1));
else
Ldr(regalloc.MapRegister(op.rd), MemOperand(x1));
break;
case 4:
if (op.rd.is_r32f())
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1));
else
Ldr(regalloc.MapRegister(op.rd), MemOperand(x1));
break;
default:
die("Invalid size");
break;
default:
die("Invalid size");
break;
}
}
else
{
switch (size)
{
case 2:
Ldrsh(w1, MemOperand(x1, xzr, SXTW));
break;
case 4:
Ldr(w1, MemOperand(x1));
break;
default:
die("Invalid size");
break;
}
Str(w1, sh4_context_mem_operand(op.rd.reg_ptr()));
}
}
else
{
// Not RAM
Mov(w0, op.rs1._imm);
Mov(w0, addr);
switch(size)
{
@ -1165,7 +1427,10 @@ private:
if (regalloc.IsAllocg(op.rd))
Mov(regalloc.MapRegister(op.rd), w0);
else
{
verify(regalloc.IsAllocf(op.rd));
Fmov(regalloc.MapVRegister(op.rd), w0);
}
}
return true;
@ -1174,7 +1439,7 @@ private:
bool GenReadMemoryFast(const shil_opcode& op, size_t opid)
{
// Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite()
if (!_nvmem_enabled())
if (!_nvmem_enabled() || mmu_enabled())
return false;
Instruction *start_instruction = GetCursorAddress<Instruction *>();
@ -1254,7 +1519,7 @@ private:
bool GenWriteMemoryFast(const shil_opcode& op, size_t opid)
{
// Direct memory access. Need to handle SIGSEGV and rewrite block as needed. See ngen_Rewrite()
if (!_nvmem_enabled())
if (!_nvmem_enabled() || mmu_enabled())
return false;
Instruction *start_instruction = GetCursorAddress<Instruction *>();
@ -1307,9 +1572,16 @@ private:
u8* ptr = GetMemPtr(block->addr, sz);
if (ptr == NULL)
// FIXME Can a block cross a RAM / non-RAM boundary??
return;
if (mmu_enabled())
{
Ldr(w10, sh4_context_mem_operand(&next_pc));
Ldr(w11, block->vaddr);
Cmp(w10, w11);
B(ne, &blockcheck_fail);
}
Ldr(x9, reinterpret_cast<uintptr_t>(ptr));
while (sz > 0)
@ -1347,6 +1619,23 @@ private:
TailCallRuntime(ngen_blockcheckfail);
Bind(&blockcheck_success);
/*
if (mmu_enabled() && block->has_fpu_op)
{
Label fpu_enabled;
Ldr(w10, sh4_context_mem_operand(&sr));
Tbz(w10, 15, &fpu_enabled); // test SR.FD bit
Mov(*call_regs[0], block->vaddr); // pc
Mov(*call_regs[1], 0x800); // event
Mov(*call_regs[2], 0x100); // vector
CallRuntime(Do_Exception);
Ldr(w29, sh4_context_mem_operand(&next_pc));
GenBranch(no_update);
Bind(&fpu_enabled);
}
*/
}
void shil_param_to_host_reg(const shil_param& param, const Register& reg)
@ -1360,9 +1649,19 @@ private:
if (param.is_r64f())
Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
else if (param.is_r32f())
Fmov(reg, regalloc.MapVRegister(param));
{
if (regalloc.IsAllocf(param))
Fmov(reg, regalloc.MapVRegister(param));
else
Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
}
else
Mov(reg, regalloc.MapRegister(param));
{
if (regalloc.IsAllocg(param))
Mov(reg, regalloc.MapRegister(param));
else
Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
}
}
else
{
@ -1383,13 +1682,17 @@ private:
else
Fmov(regalloc.MapRegister(param), (const VRegister&)reg);
}
else
else if (regalloc.IsAllocf(param))
{
if (reg.IsVRegister())
Fmov(regalloc.MapVRegister(param), (const VRegister&)reg);
else
Fmov(regalloc.MapVRegister(param), (const Register&)reg);
}
else
{
Str(reg, sh4_context_mem_operand(param.reg_ptr()));
}
}
struct CC_PS
@ -1476,6 +1779,8 @@ bool ngen_Rewrite(unat& host_pc, unat, unat)
u32 DynaRBI::Relink()
{
if (mmu_enabled())
return 0;
//printf("DynaRBI::Relink %08x\n", this->addr);
Arm64Assembler *compiler = new Arm64Assembler((u8 *)this->code + this->relink_offset);

View File

@ -50,7 +50,7 @@ void ngen_mainloop(void* v_cntx)
#endif
cycle_counter = SH4_TIMESLICE;
do {
DynarecCodeEntryPtr rcb = bm_GetCode(ctx->cntx.pc);
DynarecCodeEntryPtr rcb = bm_GetCodeByVAddr(ctx->cntx.pc);
rcb();
} while (cycle_counter > 0);

View File

@ -1,6 +1,8 @@
#include "build.h"
#if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_X64
#include <setjmp.h>
#define EXPLODE_SPANS
//#define PROFILING
@ -77,6 +79,8 @@ static __attribute((used)) void end_slice()
#error RAM_SIZE_MAX unknown
#endif
jmp_buf jmp_env;
#ifdef _WIN32
// Fully naked function in win32 for proper SEH prologue
__asm__ (
@ -120,6 +124,14 @@ WIN32_ONLY( ".seh_pushreg %r14 \n\t")
#endif
"movl $" _S(SH4_TIMESLICE) "," _U "cycle_counter(%rip) \n"
#ifdef _WIN32
"movq $" _U "jmp_env, %rcx \n\t" // SETJMP
#else
"movq $" _U "jmp_env, %rdi \n\t"
#endif
"call " _U "setjmp \n\t"
// "testl %rax, %rax \n\t"
"1: \n\t" // run_loop
"movq " _U "p_sh4rcb(%rip), %rax \n\t"
"movl " _S(CPU_RUNNING) "(%rax), %edx \n\t"
@ -136,7 +148,7 @@ WIN32_ONLY( ".seh_pushreg %r14 \n\t")
#else
"movl " _S(PC)"(%rax), %edi \n\t"
#endif
"call " _U "bm_GetCode2 \n\t"
"call " _U "bm_GetCodeByVAddr \n\t"
"call *%rax \n\t"
"movl " _U "cycle_counter(%rip), %ecx \n\t"
"testl %ecx, %ecx \n\t"
@ -212,51 +224,31 @@ static u32 exception_raised;
template<typename T>
static T ReadMemNoEx(u32 addr, u32 pc)
{
try {
exception_raised = 0;
if (sizeof(T) == 1)
return ReadMem8(addr);
else if (sizeof(T) == 2)
return ReadMem16(addr);
else if (sizeof(T) == 4)
return ReadMem32(addr);
else if (sizeof(T) == 8)
return ReadMem64(addr);
} catch (SH4ThrownException& ex) {
T rv = mmu_ReadMemNoEx<T>(addr, &exception_raised);
if (exception_raised)
{
if (pc & 1)
{
// Delay slot
AdjustDelaySlotException(ex);
pc--;
}
Do_Exception(pc, ex.expEvn, ex.callVect);
exception_raised = 1;
return 0;
spc = pc - 1;
else
spc = pc;
longjmp(jmp_env, 1);
}
return rv;
}
template<typename T>
static void WriteMemNoEx(u32 addr, T data, u32 pc)
{
try {
if (sizeof(T) == 1)
WriteMem8(addr, data);
else if (sizeof(T) == 2)
WriteMem16(addr, data);
else if (sizeof(T) == 4)
WriteMem32(addr, data);
else if (sizeof(T) == 8)
WriteMem64(addr, data);
exception_raised = 0;
} catch (SH4ThrownException& ex) {
exception_raised = mmu_WriteMemNoEx<T>(addr, data);
if (exception_raised)
{
if (pc & 1)
{
// Delay slot
AdjustDelaySlotException(ex);
pc--;
}
Do_Exception(pc, ex.expEvn, ex.callVect);
exception_raised = 1;
spc = pc - 1;
else
spc = pc;
longjmp(jmp_env, 1);
}
}
@ -352,7 +344,7 @@ public:
sub(rsp, 0x8); // align stack
#endif
Xbyak::Label exit_block;
/*
if (mmu_enabled() && block->has_fpu_op)
{
Xbyak::Label fpu_enabled;
@ -367,7 +359,7 @@ public:
jmp(exit_block, T_NEAR);
L(fpu_enabled);
}
*/
for (current_opid = 0; current_opid < block->oplist.size(); current_opid++)
{
shil_opcode& op = block->oplist[current_opid];
@ -449,25 +441,32 @@ public:
{
u32 size = op.flags & 0x7f;
bool immediate_address = op.rs1.is_imm();
if (immediate_address && mmu_enabled() && (op.rs1._imm >> 12) != (block->vaddr >> 12))
u32 addr = op.rs1._imm;
if (immediate_address && mmu_enabled())
{
// When full mmu is on, only consider addresses in the same 4k page
immediate_address = false;
if ((op.rs1._imm >> 12) != (block->vaddr >> 12))
{
// When full mmu is on, only consider addresses in the same 4k page
immediate_address = false;
}
else
{
u32 paddr;
u32 rv;
if (size == 2)
rv = mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
else if (size == 4)
rv = mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
else
die("Invalid immediate size");
if (rv != MMU_ERROR_NONE)
immediate_address = false;
else
addr = paddr;
}
}
if (immediate_address)
{
u32 addr = op.rs1._imm;
if (mmu_enabled())
{
u32 paddr;
if (size == 2)
mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
else if (size == 4)
mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
else
die("Invalid immediate size");
addr = paddr;
}
bool isram = false;
void* ptr = _vmem_read_const(addr, isram, size);
@ -581,11 +580,11 @@ public:
die("1..8 bytes");
}
if (mmu_enabled())
{
test(dword[(void *)&exception_raised], 1);
jnz(exit_block, T_NEAR);
}
// if (mmu_enabled())
// {
// test(dword[(void *)&exception_raised], 1);
// jnz(exit_block, T_NEAR);
// }
if (size != 8)
host_reg_to_shil_param(op.rd, ecx);
@ -674,11 +673,11 @@ public:
die("1..8 bytes");
}
if (mmu_enabled())
{
test(dword[(void *)&exception_raised], 1);
jnz(exit_block, T_NEAR);
}
// if (mmu_enabled())
// {
// test(dword[(void *)&exception_raised], 1);
// jnz(exit_block, T_NEAR);
// }
}
break;
@ -1353,6 +1352,10 @@ private:
// cmp(byte[rax], block->asid);
// jne(reinterpret_cast<const void*>(&ngen_blockcheckfail));
// }
// FIXME Neither of these tests should be necessary
// However the decoder makes various assumptions about the current PC value, which are simply not
// true in a virtualized memory model. So this can only work if virtual and phy addresses are the
// same at compile and run times.
if (mmu_enabled())
{
mov(rax, (uintptr_t)&next_pc);

View File

@ -9,7 +9,7 @@
.globl cycle_counter
.globl loop_no_update
.globl intc_sched
.globl bm_GetCode
.globl bm_GetCodeByVAddr
.globl cycle_counter
.globl UpdateSystem
.globl rdv_DoInterrupts
@ -109,7 +109,7 @@ ngen_mainloop:
# next_pc _MUST_ be on ecx
no_update:
mov esi,ecx
call _Z10bm_GetCodej #bm_GetCode
call bm_GetCodeByVAddr
jmp eax
intc_sched_offs:

View File

@ -84,7 +84,7 @@ naked void ngen_mainloop(void* cntx)
//next_pc _MUST_ be on ecx
no_update:
mov esi,ecx;
call bm_GetCode
call bm_GetCodeByVAddr
jmp eax;
intc_sched_offs:

View File

@ -492,8 +492,7 @@ extern TLB_Entry ITLB[4];
#if defined(NO_MMU)
extern u32 sq_remap[64];
#else
extern u32 ITLB_LRU_USE[64];
extern u32 mmu_error_TT;
static u32 ITLB_LRU_USE[64];
#endif
@ -1085,7 +1084,6 @@ bool dc_serialize(void **data, unsigned int *total_size)
REICAST_SA(sq_remap,64);
#else
REICAST_SA(ITLB_LRU_USE,64);
REICAST_S(mmu_error_TT);
#endif
@ -1487,7 +1485,6 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size)
REICAST_USA(sq_remap,64);
#else
REICAST_USA(ITLB_LRU_USE,64);
REICAST_US(mmu_error_TT);
#endif
@ -1877,7 +1874,6 @@ bool dc_unserialize(void **data, unsigned int *total_size)
REICAST_USA(sq_remap,64);
#else
REICAST_USA(ITLB_LRU_USE,64);
REICAST_US(mmu_error_TT);
#endif

View File

@ -63,8 +63,8 @@ LOCAL_SRC_FILES := $(RZDCY_FILES)
LOCAL_SRC_FILES += $(wildcard $(LOCAL_PATH)/jni/src/Android.cpp)
LOCAL_SRC_FILES += $(wildcard $(LOCAL_PATH)/jni/src/utils.cpp)
LOCAL_CFLAGS := $(RZDCY_CFLAGS) -fPIC -fvisibility=hidden -ffunction-sections -fdata-sections
LOCAL_CXXFLAGS := $(RZDCY_CXXFLAGS) -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections
LOCAL_CPPFLAGS := $(RZDCY_CXXFLAGS) -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections
LOCAL_CXXFLAGS := $(RZDCY_CXXFLAGS) -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections -fexceptions
LOCAL_CPPFLAGS := $(RZDCY_CXXFLAGS) -fPIC -fvisibility=hidden -fvisibility-inlines-hidden -ffunction-sections -fdata-sections -fexceptions
# 7-Zip/LZMA settings (CHDv5)
ifdef CHD5_LZMA