vmem: Automatic fallback to slowpath if alloc fails

This consolidates some of the work done for TARGET_NO_NVMEM and
feat/no-direct-memmap. If nvmem is disabled at compile time or alloc
fails _nvmem_enabled() will return false. Various other fixes
and cleanups all around.
This commit is contained in:
Stefanos Kornilios Mitsis Poiitidis 2015-08-12 00:58:50 +02:00
parent eeeb2d6a62
commit 67ecd6d9f9
14 changed files with 252 additions and 135 deletions

View File

@ -1,6 +1,7 @@
#include "_vmem.h"
#include "hw/aica/aica_if.h"
#include "hw/sh4/dyna/blockmanager.h"
#define HANDLER_MAX 0x1F
#define HANDLER_COUNT (HANDLER_MAX+1)
@ -393,13 +394,51 @@ void _vmem_term()
#include "hw/pvr/pvr_mem.h"
#include "hw/sh4/sh4_mem.h"
u8* virt_ram_base;
void* malloc_pages(size_t size) {
u8* rv = (u8*)malloc(size + PAGE_SIZE);
return rv + PAGE_SIZE - ((unat)rv % PAGE_SIZE);
}
bool _vmem_reserve_nonvmem()
{
virt_ram_base = 0;
p_sh4rcb=(Sh4RCB*)malloc_pages(sizeof(Sh4RCB));
mem_b.size=RAM_SIZE;
mem_b.data=(u8*)malloc_pages(RAM_SIZE);
vram.size=VRAM_SIZE;
vram.data=(u8*)malloc_pages(VRAM_SIZE);
aica_ram.size=ARAM_SIZE;
aica_ram.data=(u8*)malloc_pages(ARAM_SIZE);
return true;
}
void _vmem_bm_reset_nvmem();
void _vmem_bm_reset() {
if (virt_ram_base) {
#if !defined(TARGET_NO_NVMEM)
_vmem_bm_reset_nvmem();
#endif
} else {
bm_vmem_pagefill((void**)p_sh4rcb->fpcb, FPCB_SIZE);
}
}
#if !defined(TARGET_NO_NVMEM)
#define MAP_RAM_START_OFFSET 0
#define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE)
#define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE)
u8* virt_ram_base;
#if HOST_OS==OS_WINDOWS
#include <Windows.h>
HANDLE mem_handle;
@ -576,10 +615,8 @@ error:
#define map_buffer(dsts,dste,offset,sz,w) {ptr=_nvmem_map_buffer(dsts,dste-dsts,offset,sz,w);if (!ptr) return false;}
#define unused_buffer(start,end) {ptr=_nvmem_unused_buffer(start,end);if (!ptr) return false;}
void _vmem_bm_pagefail(void** ptr,u32 PAGE_SZ);
u32 pagecnt;
void _vmem_bm_reset()
void _vmem_bm_reset_nvmem()
{
#if defined(TARGET_NO_NVMEM)
return;
@ -610,6 +647,9 @@ void _vmem_bm_reset()
bool BM_LockedWrite(u8* address)
{
if (!_nvmem_enabled())
return false;
#if FEAT_SHREC != DYNAREC_NONE
u32 addr=address-(u8*)p_sh4rcb->fpcb;
@ -625,7 +665,7 @@ bool BM_LockedWrite(u8* address)
mprotect (address, PAGE_SIZE, PROT_READ | PROT_WRITE);
#endif
_vmem_bm_pagefail((void**)address,PAGE_SIZE);
bm_vmem_pagefill((void**)address,PAGE_SIZE);
return true;
}
@ -641,10 +681,13 @@ bool _vmem_reserve()
verify((sizeof(Sh4RCB)%PAGE_SIZE)==0);
if (settings.dynarec.disable_nvmem)
return _vmem_reserve_nonvmem();
virt_ram_base=(u8*)_nvmem_alloc_mem();
if (virt_ram_base==0)
return false;
return _vmem_reserve_nonvmem();
p_sh4rcb=(Sh4RCB*)virt_ram_base;
@ -730,27 +773,9 @@ bool _vmem_reserve()
}
#else
void* malloc_pages(size_t size) {
u8* rv = (u8*)malloc(size + PAGE_SIZE);
return rv + PAGE_SIZE - ((unat)rv % PAGE_SIZE);
}
bool _vmem_reserve()
{
p_sh4rcb=(Sh4RCB*)malloc_pages(sizeof(Sh4RCB));
mem_b.size=RAM_SIZE;
mem_b.data=(u8*)malloc_pages(RAM_SIZE);
vram.size=VRAM_SIZE;
vram.data=(u8*)malloc_pages(VRAM_SIZE);
aica_ram.size=ARAM_SIZE;
aica_ram.data=(u8*)malloc_pages(ARAM_SIZE);
return true;
return _vmem_reserve_nonvmem();
}
#endif

View File

@ -62,4 +62,12 @@ void _vmem_release();
//dynarec helpers
void _vmem_get_ptrs(u32 sz,bool write,void*** vmap,void*** func);
void* _vmem_get_ptr2(u32 addr,u32& mask);
void* _vmem_read_const(u32 addr,bool& ismem,u32 sz);
void* _vmem_read_const(u32 addr,bool& ismem,u32 sz);
extern u8* virt_ram_base;
static inline bool _nvmem_enabled() {
return virt_ram_base != 0;
}
void _vmem_bm_reset();

View File

@ -362,9 +362,7 @@ void bm_Rebuild()
rebuild_counter=30;
}
void _vmem_bm_reset();
void _vmem_bm_pagefail(void** ptr,u32 PAGE_SZ)
void bm_vmem_pagefill(void** ptr,u32 PAGE_SZ)
{
for (size_t i=0; i<PAGE_SZ/sizeof(ptr[0]); i++)
{
@ -380,17 +378,7 @@ void bm_Reset()
blocks_page[i].clear();
}
#if !defined(TARGET_NO_NVMEM)
_vmem_bm_reset();
#endif
#if (HOST_OS == OS_DARWIN) || defined(TARGET_NO_NVMEM)
//lazy allocation isn't working on iOS
for (u32 i=0;i<FPCB_SIZE;i++)
{
sh4rcb.fpcb[i]=(void*)ngen_FailedToFindBlock;
}
#endif
for (size_t i=0; i<all_blocks.size(); i++)
{

View File

@ -105,3 +105,4 @@ void bm_Sort();
void bm_Init();
void bm_Term();
void bm_vmem_pagefill(void** ptr,u32 PAGE_SZ);

View File

@ -415,9 +415,9 @@ void recSh4_Init()
verify(rcb_noffs(&p_sh4rcb->cntx.sh4_sched_next) == -152);
verify(rcb_noffs(&p_sh4rcb->cntx.interrupt_pend) == -148);
#if !defined(TARGET_NO_NVMEM)
verify(mem_b.data==((u8*)p_sh4rcb->sq_buffer+512+0x0C000000));
#endif
if (_nvmem_enabled()) {
verify(mem_b.data==((u8*)p_sh4rcb->sq_buffer+512+0x0C000000));
}
#if defined(_WIN64)
for (int i = 10; i < 1300; i++) {

View File

@ -1299,18 +1299,22 @@ INLINE void DYNACALL do_sqw(u32 Dest)
void DYNACALL do_sqw_mmu(u32 dst) { do_sqw<true>(dst); }
#if HOST_CPU!=CPU_ARM
//yes, this micro optimization makes a difference
extern "C" void DYNACALL do_sqw_nommu_area_3(u32 dst,u8* sqb)
{
#if defined (TARGET_NO_NVMEM)
u8* pmem = mem_b.data;
#else
u8* pmem=sqb+512+0x0C000000;
#endif
memcpy((u64*)&pmem[dst&(RAM_MASK-0x1F)],(u64*)&sqb[dst & 0x20],32);
}
#endif
extern "C" void DYNACALL do_sqw_nommu_area_3_nonvmem(u32 dst,u8* sqb)
{
u8* pmem = mem_b.data;
memcpy((u64*)&pmem[dst&(RAM_MASK-0x1F)],(u64*)&sqb[dst & 0x20],32);
}
void DYNACALL do_sqw_nommu_full(u32 dst, u8* sqb) { do_sqw<false>(dst); }
sh4op(i0000_nnnn_1000_0011)

View File

@ -7,6 +7,7 @@
#include "ccn.h"
#include "../sh4_core.h"
#include "hw/pvr/pvr_mem.h"
#include "hw/mem/_vmem.h"
//Types
@ -26,8 +27,16 @@ void CCN_QACR_write(u32 addr, u32 value)
switch(area)
{
case 3: do_sqw_nommu=&do_sqw_nommu_area_3; break;
case 4: do_sqw_nommu=(sqw_fp*)&TAWriteSQ; break;
case 3:
if (_nvmem_enabled())
do_sqw_nommu=&do_sqw_nommu_area_3;
else
do_sqw_nommu=&do_sqw_nommu_area_3_nonvmem;
break;
case 4:
do_sqw_nommu=(sqw_fp*)&TAWriteSQ;
break;
default: do_sqw_nommu=&do_sqw_nommu_full;
}
}

View File

@ -300,6 +300,7 @@ struct Sh4Context
void DYNACALL do_sqw_mmu(u32 dst);
extern "C" void DYNACALL do_sqw_nommu_area_3(u32 dst, u8* sqb);
extern "C" void DYNACALL do_sqw_nommu_area_3_nonvmem(u32 dst, u8* sqb);
void DYNACALL do_sqw_nommu_full(u32 dst, u8* sqb);
typedef void DYNACALL sqw_fp(u32 dst,u8* sqb);

View File

@ -245,6 +245,7 @@ void LoadSettings()
settings.dynarec.Enable = cfgLoadInt("config","Dynarec.Enabled", 1)!=0;
settings.dynarec.idleskip = cfgLoadInt("config","Dynarec.idleskip",1)!=0;
settings.dynarec.unstable_opt = cfgLoadInt("config","Dynarec.unstable-opt",0);
//disable_nvmem can't be loaded, because nvmem init is before cfg load
settings.dreamcast.cable = cfgLoadInt("config","Dreamcast.Cable",3);
settings.dreamcast.RTC = cfgLoadInt("config","Dreamcast.RTC",GetRTC_now());
settings.dreamcast.region = cfgLoadInt("config","Dreamcast.Region",3);

View File

@ -844,6 +844,41 @@ union arm_mem_op
u32 full;
};
void vmem_slowpath(eReg raddr, eReg rt, eFSReg ft, eFDReg fd, mem_op_type optp, bool read)
{
if (raddr != r0)
MOV(r0, (eReg)raddr);
if (!read)
{
if (optp <= SZ_32I) MOV(r1, rt);
else if (optp == SZ_32F) VMOV(r1, ft);
else if (optp == SZ_64F) VMOV(r2, r3, fd);
}
if (fd != d0 && optp == SZ_64F)
{
die("BLAH");
}
u32 funct = 0;
if (optp <= SZ_32I)
funct = _mem_hndl[read][optp][raddr];
else
funct = _mem_func[read][optp];
verify(funct != 0);
CALL(funct);
if (read)
{
if (optp <= SZ_32I) MOV(rt, r0);
else if (optp == SZ_32F) VMOV(ft, r0);
else if (optp == SZ_64F) VMOV(fd, r0, r1);
}
}
u32* ngen_readm_fail_v2(u32* ptrv,u32* regs,u32 fault_addr)
{
arm_mem_op* ptr=(arm_mem_op*)ptrv;
@ -1157,35 +1192,59 @@ void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool staging,
{
eReg raddr=GenMemAddr(op);
BIC(r1,raddr,0xE0000000);
//UBFX(r1,raddr,0,29);
//SUB(r1,raddr,raddr);
if (_nvmem_enabled()) {
BIC(r1,raddr,0xE0000000);
switch(optp)
{
case SZ_8:
LDRSB(reg.mapg(op->rd),r1,r8,true);
break;
switch(optp)
{
case SZ_8:
LDRSB(reg.mapg(op->rd),r1,r8,true);
break;
case SZ_16:
LDRSH(reg.mapg(op->rd),r1,r8,true);
break;
case SZ_16:
LDRSH(reg.mapg(op->rd),r1,r8,true);
break;
case SZ_32I:
LDR(reg.mapg(op->rd),r1,r8,Offset,true);
break;
case SZ_32I:
LDR(reg.mapg(op->rd),r1,r8,Offset,true);
break;
case SZ_32F:
ADD(r1,r1,r8); //3 opcodes, there's no [REG+REG] VLDR
VLDR(reg.mapf(op->rd),r1,0);
break;
case SZ_32F:
ADD(r1,r1,r8); //3 opcodes, there's no [REG+REG] VLDR
VLDR(reg.mapf(op->rd),r1,0);
break;
case SZ_64F:
ADD(r1,r1,r8); //3 opcodes, there's no [REG+REG] VLDR
VLDR(d0,r1,0); //TODO: use reg alloc
case SZ_64F:
ADD(r1,r1,r8); //3 opcodes, there's no [REG+REG] VLDR
VLDR(d0,r1,0); //TODO: use reg alloc
VSTR(d0,r8,op->rd.reg_nofs()/4);
break;
VSTR(d0,r8,op->rd.reg_nofs()/4);
break;
}
} else {
switch(optp)
{
case SZ_8:
vmem_slowpath(raddr, reg.mapg(op->rd), f0, d0, optp, true);
break;
case SZ_16:
vmem_slowpath(raddr, reg.mapg(op->rd), f0, d0, optp, true);
break;
case SZ_32I:
vmem_slowpath(raddr, reg.mapg(op->rd), f0, d0, optp, true);
break;
case SZ_32F:
vmem_slowpath(raddr, r0, reg.mapf(op->rd), d0, optp, true);
break;
case SZ_64F:
vmem_slowpath(raddr, r0, f0, d0, optp, true);
VSTR(d0,r8,op->rd.reg_nofs()/4);
break;
}
}
}
}
@ -1202,62 +1261,87 @@ void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool staging,
if (optp == SZ_64F)
VLDR(d0,r8,op->rs2.reg_nofs()/4);
BIC(r1,raddr,0xE0000000);
//UBFX(r1,raddr,0,29);
//SUB(r1,raddr,raddr);
s32 sq_offs=rcb_noffs(sq_both);
switch(optp)
{
case SZ_8:
STRB(reg.mapg(op->rs2),r1,r8,Offset,true);
break;
if (_nvmem_enabled()) {
BIC(r1,raddr,0xE0000000);
//UBFX(r1,raddr,0,29);
//SUB(r1,raddr,raddr);
s32 sq_offs=rcb_noffs(sq_both);
switch(optp)
{
case SZ_8:
STRB(reg.mapg(op->rs2),r1,r8,Offset,true);
break;
case SZ_16:
STRH(reg.mapg(op->rs2),r1,r8,true);
break;
case SZ_16:
STRH(reg.mapg(op->rs2),r1,r8,true);
break;
case SZ_32I:
if (op->flags2!=0x1337)
STR(reg.mapg(op->rs2),r1,r8,Offset,true);
else
{
emit_Skip(-4);
AND(r1,raddr,0x3F);
ADD(r1,r1,r8);
STR(reg.mapg(op->rs2),r1,sq_offs);
}
break;
case SZ_32I:
if (op->flags2!=0x1337)
STR(reg.mapg(op->rs2),r1,r8,Offset,true);
else
{
emit_Skip(-4);
AND(r1,raddr,0x3F);
ADD(r1,r1,r8);
STR(reg.mapg(op->rs2),r1,sq_offs);
}
break;
case SZ_32F:
if (op->flags2!=0x1337)
{
ADD(r1,r1,r8); //3 opcodes: there's no [REG+REG] VLDR, also required for SQ
VSTR(reg.mapf(op->rs2),r1,0);
}
else
{
emit_Skip(-4);
AND(r1,raddr,0x3F);
ADD(r1,r1,r8);
VSTR(reg.mapf(op->rs2),r1,sq_offs/4);
}
break;
case SZ_32F:
if (op->flags2!=0x1337)
{
ADD(r1,r1,r8); //3 opcodes: there's no [REG+REG] VLDR, also required for SQ
VSTR(reg.mapf(op->rs2),r1,0);
}
else
{
emit_Skip(-4);
AND(r1,raddr,0x3F);
ADD(r1,r1,r8);
VSTR(reg.mapf(op->rs2),r1,sq_offs/4);
}
break;
case SZ_64F:
if (op->flags2!=0x1337)
{
ADD(r1,r1,r8); //3 opcodes: there's no [REG+REG] VLDR, also required for SQ
VSTR(d0,r1,0); //TODO: use reg alloc
case SZ_64F:
if (op->flags2!=0x1337)
{
ADD(r1,r1,r8); //3 opcodes: there's no [REG+REG] VLDR, also required for SQ
VSTR(d0,r1,0); //TODO: use reg alloc
}
else
{
emit_Skip(-4);
AND(r1,raddr,0x3F);
ADD(r1,r1,r8);
VSTR(d0,r1,sq_offs/4);
}
break;
}
else
} else {
switch(optp)
{
emit_Skip(-4);
AND(r1,raddr,0x3F);
ADD(r1,r1,r8);
VSTR(d0,r1,sq_offs/4);
case SZ_8:
vmem_slowpath(raddr, reg.mapg(op->rs2), f0, d0, optp, false);
break;
case SZ_16:
vmem_slowpath(raddr, reg.mapg(op->rs2), f0, d0, optp, false);
break;
case SZ_32I:
vmem_slowpath(raddr, reg.mapg(op->rs2), f0, d0, optp, false);
break;
case SZ_32F:
vmem_slowpath(raddr, r0, reg.mapf(op->rs2), d0, optp, false);
break;
case SZ_64F:
vmem_slowpath(raddr, r0, f0, d0, optp, false);
break;
}
break;
}
}
break;

View File

@ -620,10 +620,6 @@ u32 DynaRBI::Relink()
W F32v2 B,S{,M}
*/
#if !defined(TARGET_NO_NVMEM)
extern u8* virt_ram_base;
#endif
#include "hw/sh4/sh4_mmr.h"
enum mem_op_type
@ -651,8 +647,7 @@ void gen_hande(u32 w, u32 sz, u32 mode)
u32 si=x86e->x86_indx;
#ifndef TARGET_NO_NVMEM
if (mode==0)
if (mode==0 && _nvmem_enabled())
{
//Buffer
x86e->Emit(op_mov32,EAX,ECX);
@ -684,9 +679,7 @@ void gen_hande(u32 w, u32 sz, u32 mode)
}
}
}
else
#endif
if (mode==1)
else if (mode==1)
{
//SQ
verify(w==1);

View File

@ -178,10 +178,6 @@ void DYNACALL VERIFYME(u32 addr)
verify((addr>>26)==0x38);
}
#if !defined(TARGET_NO_NVMEM)
extern u8* virt_ram_base;
#endif
/*
ReadM

View File

@ -1,5 +1,6 @@
#include "TexCache.h"
#include "hw/pvr/pvr_regs.h"
#include "hw/mem/_vmem.h"
u8* vq_codebook;
u32 palette_index;
@ -193,7 +194,8 @@ vram_block* libCore_vramlock_Lock(u32 start_offset64,u32 end_offset64,void* user
vram.LockRegion(block->start,block->len);
if (VRAM_SIZE == 0x800000) {
//TODO: Fix this for 32M wrap as well
if (_nvmem_enabled() && VRAM_SIZE == 0x800000) {
vram.LockRegion(block->start + VRAM_SIZE, block->len);
}
@ -236,7 +238,11 @@ bool VramLockedWrite(u8* address)
list->clear();
vram.UnLockRegion((u32)offset&(~(PAGE_SIZE-1)),PAGE_SIZE);
vram.UnLockRegion((u32)offset&(~(PAGE_SIZE-1)) + VRAM_SIZE,PAGE_SIZE);
//TODO: Fix this for 32M wrap as well
if (_nvmem_enabled() && VRAM_SIZE == 0x800000) {
vram.UnLockRegion((u32)offset&(~(PAGE_SIZE-1)) + VRAM_SIZE,PAGE_SIZE);
}
vramlist_lock.Unlock();
}

View File

@ -616,6 +616,7 @@ struct settings_t
bool Enable;
bool idleskip;
bool unstable_opt;
bool disable_nvmem;
} dynarec;
struct