diff --git a/core/hw/mem/_vmem.cpp b/core/hw/mem/_vmem.cpp index bf2f8d347..6b5836183 100644 --- a/core/hw/mem/_vmem.cpp +++ b/core/hw/mem/_vmem.cpp @@ -1,6 +1,7 @@ #include "_vmem.h" #include "hw/aica/aica_if.h" +#include "hw/sh4/dyna/blockmanager.h" #define HANDLER_MAX 0x1F #define HANDLER_COUNT (HANDLER_MAX+1) @@ -393,13 +394,51 @@ void _vmem_term() #include "hw/pvr/pvr_mem.h" #include "hw/sh4/sh4_mem.h" +u8* virt_ram_base; + +void* malloc_pages(size_t size) { + + u8* rv = (u8*)malloc(size + PAGE_SIZE); + + return rv + PAGE_SIZE - ((unat)rv % PAGE_SIZE); +} + +bool _vmem_reserve_nonvmem() +{ + virt_ram_base = 0; + + p_sh4rcb=(Sh4RCB*)malloc_pages(sizeof(Sh4RCB)); + + mem_b.size=RAM_SIZE; + mem_b.data=(u8*)malloc_pages(RAM_SIZE); + + vram.size=VRAM_SIZE; + vram.data=(u8*)malloc_pages(VRAM_SIZE); + + aica_ram.size=ARAM_SIZE; + aica_ram.data=(u8*)malloc_pages(ARAM_SIZE); + + return true; +} + +void _vmem_bm_reset_nvmem(); + +void _vmem_bm_reset() { + if (virt_ram_base) { + #if !defined(TARGET_NO_NVMEM) + _vmem_bm_reset_nvmem(); + #endif + } else { + bm_vmem_pagefill((void**)p_sh4rcb->fpcb, FPCB_SIZE); + } +} + #if !defined(TARGET_NO_NVMEM) #define MAP_RAM_START_OFFSET 0 #define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE) #define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE) -u8* virt_ram_base; #if HOST_OS==OS_WINDOWS #include HANDLE mem_handle; @@ -576,10 +615,8 @@ error: #define map_buffer(dsts,dste,offset,sz,w) {ptr=_nvmem_map_buffer(dsts,dste-dsts,offset,sz,w);if (!ptr) return false;} #define unused_buffer(start,end) {ptr=_nvmem_unused_buffer(start,end);if (!ptr) return false;} -void _vmem_bm_pagefail(void** ptr,u32 PAGE_SZ); - u32 pagecnt; -void _vmem_bm_reset() +void _vmem_bm_reset_nvmem() { #if defined(TARGET_NO_NVMEM) return; @@ -610,6 +647,9 @@ void _vmem_bm_reset() bool BM_LockedWrite(u8* address) { + if (!_nvmem_enabled()) + return false; + #if FEAT_SHREC != DYNAREC_NONE u32 addr=address-(u8*)p_sh4rcb->fpcb; @@ -625,7 +665,7 @@ bool BM_LockedWrite(u8* address) mprotect (address, PAGE_SIZE, PROT_READ | PROT_WRITE); #endif - _vmem_bm_pagefail((void**)address,PAGE_SIZE); + bm_vmem_pagefill((void**)address,PAGE_SIZE); return true; } @@ -641,10 +681,13 @@ bool _vmem_reserve() verify((sizeof(Sh4RCB)%PAGE_SIZE)==0); + if (settings.dynarec.disable_nvmem) + return _vmem_reserve_nonvmem(); + virt_ram_base=(u8*)_nvmem_alloc_mem(); if (virt_ram_base==0) - return false; + return _vmem_reserve_nonvmem(); p_sh4rcb=(Sh4RCB*)virt_ram_base; @@ -730,27 +773,9 @@ bool _vmem_reserve() } #else -void* malloc_pages(size_t size) { - - u8* rv = (u8*)malloc(size + PAGE_SIZE); - - return rv + PAGE_SIZE - ((unat)rv % PAGE_SIZE); -} - bool _vmem_reserve() { - p_sh4rcb=(Sh4RCB*)malloc_pages(sizeof(Sh4RCB)); - - mem_b.size=RAM_SIZE; - mem_b.data=(u8*)malloc_pages(RAM_SIZE); - - vram.size=VRAM_SIZE; - vram.data=(u8*)malloc_pages(VRAM_SIZE); - - aica_ram.size=ARAM_SIZE; - aica_ram.data=(u8*)malloc_pages(ARAM_SIZE); - - return true; + return _vmem_reserve_nonvmem(); } #endif diff --git a/core/hw/mem/_vmem.h b/core/hw/mem/_vmem.h index 6df230222..8509fe3ac 100644 --- a/core/hw/mem/_vmem.h +++ b/core/hw/mem/_vmem.h @@ -62,4 +62,12 @@ void _vmem_release(); //dynarec helpers void _vmem_get_ptrs(u32 sz,bool write,void*** vmap,void*** func); void* _vmem_get_ptr2(u32 addr,u32& mask); -void* _vmem_read_const(u32 addr,bool& ismem,u32 sz); \ No newline at end of file +void* _vmem_read_const(u32 addr,bool& ismem,u32 sz); + +extern u8* virt_ram_base; + +static inline bool _nvmem_enabled() { + return virt_ram_base != 0; +} + +void _vmem_bm_reset(); \ No newline at end of file diff --git a/core/hw/sh4/dyna/blockmanager.cpp b/core/hw/sh4/dyna/blockmanager.cpp index fab71484d..9a5224118 100644 --- a/core/hw/sh4/dyna/blockmanager.cpp +++ b/core/hw/sh4/dyna/blockmanager.cpp @@ -362,9 +362,7 @@ void bm_Rebuild() rebuild_counter=30; } -void _vmem_bm_reset(); - -void _vmem_bm_pagefail(void** ptr,u32 PAGE_SZ) +void bm_vmem_pagefill(void** ptr,u32 PAGE_SZ) { for (size_t i=0; icntx.sh4_sched_next) == -152); verify(rcb_noffs(&p_sh4rcb->cntx.interrupt_pend) == -148); -#if !defined(TARGET_NO_NVMEM) - verify(mem_b.data==((u8*)p_sh4rcb->sq_buffer+512+0x0C000000)); -#endif + if (_nvmem_enabled()) { + verify(mem_b.data==((u8*)p_sh4rcb->sq_buffer+512+0x0C000000)); + } #if defined(_WIN64) for (int i = 10; i < 1300; i++) { diff --git a/core/hw/sh4/interpr/sh4_opcodes.cpp b/core/hw/sh4/interpr/sh4_opcodes.cpp index 06976ddda..355b2138e 100644 --- a/core/hw/sh4/interpr/sh4_opcodes.cpp +++ b/core/hw/sh4/interpr/sh4_opcodes.cpp @@ -1299,18 +1299,22 @@ INLINE void DYNACALL do_sqw(u32 Dest) void DYNACALL do_sqw_mmu(u32 dst) { do_sqw(dst); } #if HOST_CPU!=CPU_ARM +//yes, this micro optimization makes a difference extern "C" void DYNACALL do_sqw_nommu_area_3(u32 dst,u8* sqb) { - #if defined (TARGET_NO_NVMEM) - u8* pmem = mem_b.data; - #else u8* pmem=sqb+512+0x0C000000; - #endif memcpy((u64*)&pmem[dst&(RAM_MASK-0x1F)],(u64*)&sqb[dst & 0x20],32); } #endif +extern "C" void DYNACALL do_sqw_nommu_area_3_nonvmem(u32 dst,u8* sqb) +{ + u8* pmem = mem_b.data; + + memcpy((u64*)&pmem[dst&(RAM_MASK-0x1F)],(u64*)&sqb[dst & 0x20],32); +} + void DYNACALL do_sqw_nommu_full(u32 dst, u8* sqb) { do_sqw(dst); } sh4op(i0000_nnnn_1000_0011) diff --git a/core/hw/sh4/modules/ccn.cpp b/core/hw/sh4/modules/ccn.cpp index f132162ec..b62afb6cc 100644 --- a/core/hw/sh4/modules/ccn.cpp +++ b/core/hw/sh4/modules/ccn.cpp @@ -7,6 +7,7 @@ #include "ccn.h" #include "../sh4_core.h" #include "hw/pvr/pvr_mem.h" +#include "hw/mem/_vmem.h" //Types @@ -26,8 +27,16 @@ void CCN_QACR_write(u32 addr, u32 value) switch(area) { - case 3: do_sqw_nommu=&do_sqw_nommu_area_3; break; - case 4: do_sqw_nommu=(sqw_fp*)&TAWriteSQ; break; + case 3: + if (_nvmem_enabled()) + do_sqw_nommu=&do_sqw_nommu_area_3; + else + do_sqw_nommu=&do_sqw_nommu_area_3_nonvmem; + break; + + case 4: + do_sqw_nommu=(sqw_fp*)&TAWriteSQ; + break; default: do_sqw_nommu=&do_sqw_nommu_full; } } diff --git a/core/hw/sh4/sh4_if.h b/core/hw/sh4/sh4_if.h index 957a9f361..aa7aaf47c 100644 --- a/core/hw/sh4/sh4_if.h +++ b/core/hw/sh4/sh4_if.h @@ -300,6 +300,7 @@ struct Sh4Context void DYNACALL do_sqw_mmu(u32 dst); extern "C" void DYNACALL do_sqw_nommu_area_3(u32 dst, u8* sqb); +extern "C" void DYNACALL do_sqw_nommu_area_3_nonvmem(u32 dst, u8* sqb); void DYNACALL do_sqw_nommu_full(u32 dst, u8* sqb); typedef void DYNACALL sqw_fp(u32 dst,u8* sqb); diff --git a/core/nullDC.cpp b/core/nullDC.cpp index 85a5ed0f4..7368ab093 100755 --- a/core/nullDC.cpp +++ b/core/nullDC.cpp @@ -245,6 +245,7 @@ void LoadSettings() settings.dynarec.Enable = cfgLoadInt("config","Dynarec.Enabled", 1)!=0; settings.dynarec.idleskip = cfgLoadInt("config","Dynarec.idleskip",1)!=0; settings.dynarec.unstable_opt = cfgLoadInt("config","Dynarec.unstable-opt",0); + //disable_nvmem can't be loaded, because nvmem init is before cfg load settings.dreamcast.cable = cfgLoadInt("config","Dreamcast.Cable",3); settings.dreamcast.RTC = cfgLoadInt("config","Dreamcast.RTC",GetRTC_now()); settings.dreamcast.region = cfgLoadInt("config","Dreamcast.Region",3); diff --git a/core/rec-ARM/rec_arm.cpp b/core/rec-ARM/rec_arm.cpp index 392058e86..c03c0d1a1 100644 --- a/core/rec-ARM/rec_arm.cpp +++ b/core/rec-ARM/rec_arm.cpp @@ -844,6 +844,41 @@ union arm_mem_op u32 full; }; +void vmem_slowpath(eReg raddr, eReg rt, eFSReg ft, eFDReg fd, mem_op_type optp, bool read) +{ + if (raddr != r0) + MOV(r0, (eReg)raddr); + + if (!read) + { + if (optp <= SZ_32I) MOV(r1, rt); + else if (optp == SZ_32F) VMOV(r1, ft); + else if (optp == SZ_64F) VMOV(r2, r3, fd); + } + + if (fd != d0 && optp == SZ_64F) + { + die("BLAH"); + } + + u32 funct = 0; + + if (optp <= SZ_32I) + funct = _mem_hndl[read][optp][raddr]; + else + funct = _mem_func[read][optp]; + + verify(funct != 0); + CALL(funct); + + if (read) + { + if (optp <= SZ_32I) MOV(rt, r0); + else if (optp == SZ_32F) VMOV(ft, r0); + else if (optp == SZ_64F) VMOV(fd, r0, r1); + } +} + u32* ngen_readm_fail_v2(u32* ptrv,u32* regs,u32 fault_addr) { arm_mem_op* ptr=(arm_mem_op*)ptrv; @@ -1157,35 +1192,59 @@ void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool staging, { eReg raddr=GenMemAddr(op); - BIC(r1,raddr,0xE0000000); - //UBFX(r1,raddr,0,29); - //SUB(r1,raddr,raddr); + if (_nvmem_enabled()) { + BIC(r1,raddr,0xE0000000); - switch(optp) - { - case SZ_8: - LDRSB(reg.mapg(op->rd),r1,r8,true); - break; + switch(optp) + { + case SZ_8: + LDRSB(reg.mapg(op->rd),r1,r8,true); + break; - case SZ_16: - LDRSH(reg.mapg(op->rd),r1,r8,true); - break; + case SZ_16: + LDRSH(reg.mapg(op->rd),r1,r8,true); + break; - case SZ_32I: - LDR(reg.mapg(op->rd),r1,r8,Offset,true); - break; + case SZ_32I: + LDR(reg.mapg(op->rd),r1,r8,Offset,true); + break; - case SZ_32F: - ADD(r1,r1,r8); //3 opcodes, there's no [REG+REG] VLDR - VLDR(reg.mapf(op->rd),r1,0); - break; + case SZ_32F: + ADD(r1,r1,r8); //3 opcodes, there's no [REG+REG] VLDR + VLDR(reg.mapf(op->rd),r1,0); + break; - case SZ_64F: - ADD(r1,r1,r8); //3 opcodes, there's no [REG+REG] VLDR - VLDR(d0,r1,0); //TODO: use reg alloc + case SZ_64F: + ADD(r1,r1,r8); //3 opcodes, there's no [REG+REG] VLDR + VLDR(d0,r1,0); //TODO: use reg alloc - VSTR(d0,r8,op->rd.reg_nofs()/4); - break; + VSTR(d0,r8,op->rd.reg_nofs()/4); + break; + } + } else { + switch(optp) + { + case SZ_8: + vmem_slowpath(raddr, reg.mapg(op->rd), f0, d0, optp, true); + break; + + case SZ_16: + vmem_slowpath(raddr, reg.mapg(op->rd), f0, d0, optp, true); + break; + + case SZ_32I: + vmem_slowpath(raddr, reg.mapg(op->rd), f0, d0, optp, true); + break; + + case SZ_32F: + vmem_slowpath(raddr, r0, reg.mapf(op->rd), d0, optp, true); + break; + + case SZ_64F: + vmem_slowpath(raddr, r0, f0, d0, optp, true); + VSTR(d0,r8,op->rd.reg_nofs()/4); + break; + } } } } @@ -1202,62 +1261,87 @@ void ngen_compile_opcode(RuntimeBlockInfo* block, shil_opcode* op, bool staging, if (optp == SZ_64F) VLDR(d0,r8,op->rs2.reg_nofs()/4); - BIC(r1,raddr,0xE0000000); - //UBFX(r1,raddr,0,29); - //SUB(r1,raddr,raddr); - - s32 sq_offs=rcb_noffs(sq_both); - switch(optp) - { - case SZ_8: - STRB(reg.mapg(op->rs2),r1,r8,Offset,true); - break; + if (_nvmem_enabled()) { + BIC(r1,raddr,0xE0000000); + //UBFX(r1,raddr,0,29); + //SUB(r1,raddr,raddr); + + s32 sq_offs=rcb_noffs(sq_both); + switch(optp) + { + case SZ_8: + STRB(reg.mapg(op->rs2),r1,r8,Offset,true); + break; - case SZ_16: - STRH(reg.mapg(op->rs2),r1,r8,true); - break; + case SZ_16: + STRH(reg.mapg(op->rs2),r1,r8,true); + break; - case SZ_32I: - if (op->flags2!=0x1337) - STR(reg.mapg(op->rs2),r1,r8,Offset,true); - else - { - emit_Skip(-4); - AND(r1,raddr,0x3F); - ADD(r1,r1,r8); - STR(reg.mapg(op->rs2),r1,sq_offs); - } - break; + case SZ_32I: + if (op->flags2!=0x1337) + STR(reg.mapg(op->rs2),r1,r8,Offset,true); + else + { + emit_Skip(-4); + AND(r1,raddr,0x3F); + ADD(r1,r1,r8); + STR(reg.mapg(op->rs2),r1,sq_offs); + } + break; - case SZ_32F: - if (op->flags2!=0x1337) - { - ADD(r1,r1,r8); //3 opcodes: there's no [REG+REG] VLDR, also required for SQ - VSTR(reg.mapf(op->rs2),r1,0); - } - else - { - emit_Skip(-4); - AND(r1,raddr,0x3F); - ADD(r1,r1,r8); - VSTR(reg.mapf(op->rs2),r1,sq_offs/4); - } - break; + case SZ_32F: + if (op->flags2!=0x1337) + { + ADD(r1,r1,r8); //3 opcodes: there's no [REG+REG] VLDR, also required for SQ + VSTR(reg.mapf(op->rs2),r1,0); + } + else + { + emit_Skip(-4); + AND(r1,raddr,0x3F); + ADD(r1,r1,r8); + VSTR(reg.mapf(op->rs2),r1,sq_offs/4); + } + break; - case SZ_64F: - if (op->flags2!=0x1337) - { - ADD(r1,r1,r8); //3 opcodes: there's no [REG+REG] VLDR, also required for SQ - VSTR(d0,r1,0); //TODO: use reg alloc + case SZ_64F: + if (op->flags2!=0x1337) + { + ADD(r1,r1,r8); //3 opcodes: there's no [REG+REG] VLDR, also required for SQ + VSTR(d0,r1,0); //TODO: use reg alloc + } + else + { + emit_Skip(-4); + AND(r1,raddr,0x3F); + ADD(r1,r1,r8); + VSTR(d0,r1,sq_offs/4); + } + break; } - else + } else { + switch(optp) { - emit_Skip(-4); - AND(r1,raddr,0x3F); - ADD(r1,r1,r8); - VSTR(d0,r1,sq_offs/4); + case SZ_8: + vmem_slowpath(raddr, reg.mapg(op->rs2), f0, d0, optp, false); + break; + + case SZ_16: + vmem_slowpath(raddr, reg.mapg(op->rs2), f0, d0, optp, false); + break; + + case SZ_32I: + vmem_slowpath(raddr, reg.mapg(op->rs2), f0, d0, optp, false); + break; + + case SZ_32F: + vmem_slowpath(raddr, r0, reg.mapf(op->rs2), d0, optp, false); + break; + + case SZ_64F: + vmem_slowpath(raddr, r0, f0, d0, optp, false); + break; } - break; } } break; diff --git a/core/rec-x86/rec_x86_driver.cpp b/core/rec-x86/rec_x86_driver.cpp index 4d9f6d61b..2156bb6e3 100644 --- a/core/rec-x86/rec_x86_driver.cpp +++ b/core/rec-x86/rec_x86_driver.cpp @@ -620,10 +620,6 @@ u32 DynaRBI::Relink() W F32v2 B,S{,M} */ -#if !defined(TARGET_NO_NVMEM) -extern u8* virt_ram_base; -#endif - #include "hw/sh4/sh4_mmr.h" enum mem_op_type @@ -651,8 +647,7 @@ void gen_hande(u32 w, u32 sz, u32 mode) u32 si=x86e->x86_indx; -#ifndef TARGET_NO_NVMEM - if (mode==0) + if (mode==0 && _nvmem_enabled()) { //Buffer x86e->Emit(op_mov32,EAX,ECX); @@ -684,9 +679,7 @@ void gen_hande(u32 w, u32 sz, u32 mode) } } } - else -#endif - if (mode==1) + else if (mode==1) { //SQ verify(w==1); diff --git a/core/rec-x86/rec_x86_il.cpp b/core/rec-x86/rec_x86_il.cpp index 7ee459c22..13c7b8f8c 100644 --- a/core/rec-x86/rec_x86_il.cpp +++ b/core/rec-x86/rec_x86_il.cpp @@ -178,10 +178,6 @@ void DYNACALL VERIFYME(u32 addr) verify((addr>>26)==0x38); } -#if !defined(TARGET_NO_NVMEM) -extern u8* virt_ram_base; -#endif - /* ReadM diff --git a/core/rend/TexCache.cpp b/core/rend/TexCache.cpp index 4003b33dd..69039fd82 100644 --- a/core/rend/TexCache.cpp +++ b/core/rend/TexCache.cpp @@ -1,5 +1,6 @@ #include "TexCache.h" #include "hw/pvr/pvr_regs.h" +#include "hw/mem/_vmem.h" u8* vq_codebook; u32 palette_index; @@ -193,7 +194,8 @@ vram_block* libCore_vramlock_Lock(u32 start_offset64,u32 end_offset64,void* user vram.LockRegion(block->start,block->len); - if (VRAM_SIZE == 0x800000) { + //TODO: Fix this for 32M wrap as well + if (_nvmem_enabled() && VRAM_SIZE == 0x800000) { vram.LockRegion(block->start + VRAM_SIZE, block->len); } @@ -236,7 +238,11 @@ bool VramLockedWrite(u8* address) list->clear(); vram.UnLockRegion((u32)offset&(~(PAGE_SIZE-1)),PAGE_SIZE); - vram.UnLockRegion((u32)offset&(~(PAGE_SIZE-1)) + VRAM_SIZE,PAGE_SIZE); + + //TODO: Fix this for 32M wrap as well + if (_nvmem_enabled() && VRAM_SIZE == 0x800000) { + vram.UnLockRegion((u32)offset&(~(PAGE_SIZE-1)) + VRAM_SIZE,PAGE_SIZE); + } vramlist_lock.Unlock(); } diff --git a/core/types.h b/core/types.h index 109b92f83..4845815b6 100644 --- a/core/types.h +++ b/core/types.h @@ -616,6 +616,7 @@ struct settings_t bool Enable; bool idleskip; bool unstable_opt; + bool disable_nvmem; } dynarec; struct