From be1ecbaa8ba9124807dda1ed21b1a9cfa38c4a1e Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Fri, 10 May 2019 18:57:28 +0200 Subject: [PATCH 1/7] Fix TARGET_NO_NVMEM and deprecate TARGET_NO_EXCEPTIONS Linked them both toghether since you can't really define one and not the other (plus Linux honors one windows the other in some cases). More refactoring on this area to follow. --- core/hw/mem/_vmem.cpp | 2 +- core/hw/sh4/dyna/blockmanager.cpp | 4 ++-- core/linux/common.cpp | 17 ++++++++--------- core/linux/context.cpp | 4 ++-- shell/cmake/config.cmake | 4 ++-- shell/emscripten/Makefile | 2 +- shell/nacl/Makefile | 4 ++-- 7 files changed, 18 insertions(+), 19 deletions(-) diff --git a/core/hw/mem/_vmem.cpp b/core/hw/mem/_vmem.cpp index 2c02197c5..e621fa948 100644 --- a/core/hw/mem/_vmem.cpp +++ b/core/hw/mem/_vmem.cpp @@ -440,7 +440,7 @@ void _vmem_bm_reset() { if (!virt_ram_base) #endif { - bm_vmem_pagefill((void**)p_sh4rcb->fpcb, FPCB_SIZE); + bm_vmem_pagefill((void**)p_sh4rcb->fpcb, sizeof(p_sh4rcb->fpcb)); } } diff --git a/core/hw/sh4/dyna/blockmanager.cpp b/core/hw/sh4/dyna/blockmanager.cpp index e64cd0d60..0994c3469 100644 --- a/core/hw/sh4/dyna/blockmanager.cpp +++ b/core/hw/sh4/dyna/blockmanager.cpp @@ -339,9 +339,9 @@ void bm_Rebuild() rebuild_counter=30; } -void bm_vmem_pagefill(void** ptr,u32 PAGE_SZ) +void bm_vmem_pagefill(void** ptr, u32 size_bytes) { - for (size_t i=0; i void VArray2::LockRegion(u32 offset,u32 size) { - #if !defined(TARGET_NO_EXCEPTIONS) + #if !defined(TARGET_NO_NVMEM) u32 inpage=offset & PAGE_MASK; u32 rv=mprotect (data+offset-inpage, size+inpage, PROT_READ ); if (rv!=0) @@ -185,7 +184,7 @@ void print_mem_addr() void VArray2::UnLockRegion(u32 offset,u32 size) { - #if !defined(TARGET_NO_EXCEPTIONS) + #if !defined(TARGET_NO_NVMEM) u32 inpage=offset & PAGE_MASK; u32 rv=mprotect (data+offset-inpage, size+inpage, PROT_READ | PROT_WRITE); if (rv!=0) diff --git a/core/linux/context.cpp b/core/linux/context.cpp index 7c317c401..9884ad426 100644 --- a/core/linux/context.cpp +++ b/core/linux/context.cpp @@ -8,7 +8,7 @@ #define __USE_GNU 1 #endif - #if !defined(TARGET_NO_EXCEPTIONS) + #if !defined(TARGET_NO_NVMEM) #include #endif #endif @@ -29,7 +29,7 @@ void bicopy(Ta& rei, Tb& seg, bool to_segfault) { void context_segfault(rei_host_context_t* reictx, void* segfault_ctx, bool to_segfault) { -#if !defined(TARGET_NO_EXCEPTIONS) +#if !defined(TARGET_NO_NVMEM) #if HOST_CPU == CPU_ARM #if defined(__FreeBSD__) bicopy(reictx->pc, MCTX(.__gregs[_REG_PC]), to_segfault); diff --git a/shell/cmake/config.cmake b/shell/cmake/config.cmake index d1dfd59e4..056b7e74a 100644 --- a/shell/cmake/config.cmake +++ b/shell/cmake/config.cmake @@ -343,7 +343,7 @@ if (TARGET_NSW) # -DCMAKE_TOOLCHAIN_FILE=./cmake/devkitA64.cmake -DTARGET_NSW=ON message("HOST_OS ${HOST_OS}") add_definitions(-D__SWITCH__ -DGLES -DMESA_EGL_NO_X11_HEADERS) - add_definitions(-DTARGET_NO_THREADS -DTARGET_NO_EXCEPTIONS -DTARGET_NO_NIXPROF) + add_definitions(-DTARGET_NO_THREADS -DTARGET_NO_NVMEM -DTARGET_NO_NIXPROF) add_definitions(-DTARGET_NO_COREIO_HTTP -DTARGET_NO_WEBUI -UTARGET_SOFTREND) add_definitions(-D_GLIBCXX_USE_C99_MATH_TR1 -D_LDBL_EQ_DBL) @@ -355,7 +355,7 @@ if (TARGET_PS4) # -DCMAKE_TOOLCHAIN_FILE=./cmake/{ps4sdk,clang_scei}.cmake -DTAR add_definitions(-DPS4 -DTARGET_PS4 -DTARGET_BSD -D__ORBIS__ -DGLES -DMESA_EGL_NO_X11_HEADERS) ## last needed for __unix__ on eglplatform.h - add_definitions(-DTARGET_NO_THREADS -DTARGET_NO_EXCEPTIONS -DTARGET_NO_NIXPROF) + add_definitions(-DTARGET_NO_THREADS -DTARGET_NO_NVMEM -DTARGET_NO_NIXPROF) add_definitions(-DTARGET_NO_COREIO_HTTP -DTARGET_NO_WEBUI -UTARGET_SOFTREND) diff --git a/shell/emscripten/Makefile b/shell/emscripten/Makefile index 6f1054198..1c74e19b6 100644 --- a/shell/emscripten/Makefile +++ b/shell/emscripten/Makefile @@ -26,7 +26,7 @@ LDFLAGS := -Wl,-Map,$(notdir $@).map,--gc-sections -Wl,-O3 -Wl,--sort-common CXXONLYFLAGS := -std=c++11 -CXXFLAGS := -O3 -D GLES -D RELEASE -c -D TARGET_EMSCRIPTEN -D TARGET_NO_REC -D TARGET_NO_NVMEM -D TARGET_NO_WEBUI -D TARGET_NO_THREADS -D TARGET_BOUNDED_EXECUTION -D TARGET_NO_EXCEPTIONS -D TARGET_NO_COREIO_HTTP +CXXFLAGS := -O3 -D GLES -D RELEASE -c -D TARGET_EMSCRIPTEN -D TARGET_NO_REC -D TARGET_NO_NVMEM -D TARGET_NO_WEBUI -D TARGET_NO_THREADS -D TARGET_BOUNDED_EXECUTION -D TARGET_NO_COREIO_HTTP CXXFLAGS += -fno-strict-aliasing CXXFLAGS += -ffast-math diff --git a/shell/nacl/Makefile b/shell/nacl/Makefile index 733a7d538..e0d03834e 100644 --- a/shell/nacl/Makefile +++ b/shell/nacl/Makefile @@ -23,7 +23,7 @@ CFLAGS = -Wno-error -Wno-ignored-attributes CFLAGS += -O3 -fno-strict-aliasing -ffast-math CFLAGS += -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/deps CFLAGS += -D RELEASE -D TARGET_NO_JIT -D TARGET_NACL32 -DGLES -CFLAGS += -D TARGET_NO_EXCEPTIONS -D TARGET_NO_NVMEM -D TARGET_NO_WEBUI -D TARGET_NO_COREIO_HTTP +CFLAGS += -D TARGET_NO_NVMEM -D TARGET_NO_WEBUI -D TARGET_NO_COREIO_HTTP SOURCES = $(RZDCY_FILES) ../../core/nacl/nacl.cpp @@ -49,4 +49,4 @@ endif SHELL = sh -$(eval $(call NMF_RULE,$(TARGET),)) \ No newline at end of file +$(eval $(call NMF_RULE,$(TARGET),)) From b74db6ef53cbbc3870453a6487d0b5854136bf06 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Sat, 11 May 2019 22:09:52 +0200 Subject: [PATCH 2/7] Moving vmem to separate files with a proper interface. Works so far for Linux and Android, need to do some testing on Windows. --- core/hw/aica/aica_if.cpp | 2 +- core/hw/aica/aica_if.h | 2 +- core/hw/mem/_vmem.cpp | 469 +++++++------------------------------- core/hw/mem/_vmem.h | 29 ++- core/hw/pvr/pvr_mem.h | 4 +- core/hw/sh4/sh4_mem.cpp | 2 +- core/hw/sh4/sh4_mem.h | 2 +- core/linux/common.cpp | 72 +----- core/linux/context.cpp | 4 +- core/linux/posix_vmem.cpp | 177 ++++++++++++++ core/rend/TexCache.cpp | 5 +- core/serialize.cpp | 6 +- core/stdclass.h | 42 ++-- core/windows/win_vmem.cpp | 96 ++++++++ core/windows/winmain.cpp | 18 -- shell/cmake/config.cmake | 4 +- shell/emscripten/Makefile | 2 +- shell/nacl/Makefile | 2 +- 18 files changed, 432 insertions(+), 506 deletions(-) create mode 100644 core/linux/posix_vmem.cpp create mode 100644 core/windows/win_vmem.cpp diff --git a/core/hw/aica/aica_if.cpp b/core/hw/aica/aica_if.cpp index 9ba65c0a7..d562a0de1 100644 --- a/core/hw/aica/aica_if.cpp +++ b/core/hw/aica/aica_if.cpp @@ -13,7 +13,7 @@ #include -VArray2 aica_ram; +VLockedMemory aica_ram; u32 VREG;//video reg =P u32 ARMRST;//arm reset reg u32 rtc_EN=0; diff --git a/core/hw/aica/aica_if.h b/core/hw/aica/aica_if.h index 8c58b5326..13ee6fd97 100644 --- a/core/hw/aica/aica_if.h +++ b/core/hw/aica/aica_if.h @@ -2,7 +2,7 @@ #include "types.h" extern u32 VREG; -extern VArray2 aica_ram; +extern VLockedMemory aica_ram; extern u32 RealTimeClock; u32 ReadMem_aica_rtc(u32 addr,u32 sz); void WriteMem_aica_rtc(u32 addr,u32 data,u32 sz); diff --git a/core/hw/mem/_vmem.cpp b/core/hw/mem/_vmem.cpp index e621fa948..9a297330c 100644 --- a/core/hw/mem/_vmem.cpp +++ b/core/hw/mem/_vmem.cpp @@ -21,7 +21,6 @@ _vmem_WriteMem32FP* _vmem_WF32[HANDLER_COUNT]; //upper 8b of the address void* _vmem_MemInfo_ptr[0x100]; - void _vmem_get_ptrs(u32 sz,bool write,void*** vmap,void*** func) { *vmap=_vmem_MemInfo_ptr; @@ -385,10 +384,7 @@ void _vmem_reset() verify(_vmem_register_handler(0,0,0,0,0,0)==0); } -void _vmem_term() -{ - -} +void _vmem_term() {} #include "hw/pvr/pvr_mem.h" #include "hw/sh4/sh4_mem.h" @@ -409,414 +405,119 @@ void* malloc_pages(size_t size) { #endif } -bool _vmem_reserve_nonvmem() -{ - virt_ram_base = 0; - - p_sh4rcb=(Sh4RCB*)malloc_pages(sizeof(Sh4RCB)); - - mem_b.size=RAM_SIZE; - mem_b.data=(u8*)malloc_pages(RAM_SIZE); - - vram.size=VRAM_SIZE; - vram.data=(u8*)malloc_pages(VRAM_SIZE); - - aica_ram.size=ARAM_SIZE; - aica_ram.data=(u8*)malloc_pages(ARAM_SIZE); - - return true; -} - -void _vmem_bm_reset_nvmem(); - +// Resets the FPCB table (by either clearing it to the default val +// or by flushing it and making it fault on access again. void _vmem_bm_reset() { - if (virt_ram_base) { - #if !defined(TARGET_NO_NVMEM) - _vmem_bm_reset_nvmem(); - #endif - } - -#ifndef TARGET_IPHONE - if (!virt_ram_base) -#endif - { + // If we allocated it via vmem: + if (virt_ram_base) + vmem_platform_reset_mem(p_sh4rcb->fpcb, sizeof(p_sh4rcb->fpcb)); + else + // We allocated it via a regular malloc/new/whatever on the heap bm_vmem_pagefill((void**)p_sh4rcb->fpcb, sizeof(p_sh4rcb->fpcb)); - } } -static void _vmem_release_nonvmem() -{ - free(p_sh4rcb); - free(vram.data); - free(aica_ram.data); - free(mem_b.data); -} +// This gets called whenever there is a pagefault, it is possible that it lands +// on the fpcb memory range, which is allocated on miss. Returning true tells the +// fault handler this was us, and that the page is resolved and can continue the execution. +bool BM_LockedWrite(u8* address) { + if (!virt_ram_base) + return false; // No vmem, therefore not us who caused this. -#if !defined(TARGET_NO_NVMEM) + uintptr_t ptrint = (uintptr_t)address; + uintptr_t start = (uintptr_t)p_sh4rcb->fpcb; + uintptr_t end = start + sizeof(p_sh4rcb->fpcb); -#define MAP_RAM_START_OFFSET 0 -#define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE) -#define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE) - -#if HOST_OS==OS_WINDOWS -#include -HANDLE mem_handle; - -void* _nvmem_map_buffer(u32 dst,u32 addrsz,u32 offset,u32 size, bool w) -{ - void* ptr; - void* rv; - - u32 map_times=addrsz/size; - verify((addrsz%size)==0); - verify(map_times>=1); - - rv= MapViewOfFileEx(mem_handle,FILE_MAP_READ | (w?FILE_MAP_WRITE:0),0,offset,size,&virt_ram_base[dst]); - if (!rv) - return 0; - - for (u32 i=1;i - #include - #include - #include - #include - #include - -#ifndef MAP_NOSYNC -#define MAP_NOSYNC 0 //missing from linux :/ -- could be the cause of android slowness ? -#endif - -#ifdef _ANDROID -#include - -#ifndef ASHMEM_DEVICE -#define ASHMEM_DEVICE "/dev/ashmem" -#endif -int ashmem_create_region(const char *name, size_t size) -{ - int fd, ret; - - fd = open(ASHMEM_DEVICE, O_RDWR); - if (fd < 0) - return fd; - - if (name) { - char buf[ASHMEM_NAME_LEN]; - - strlcpy(buf, name, sizeof(buf)); - ret = ioctl(fd, ASHMEM_SET_NAME, buf); - if (ret < 0) - goto error; - } - - ret = ioctl(fd, ASHMEM_SET_SIZE, size); - if (ret < 0) - goto error; - - return fd; - -error: - close(fd); - return ret; -} -#endif - - int fd; - void* _nvmem_unused_buffer(u32 start,u32 end) - { - void* ptr=mmap(&virt_ram_base[start], end-start, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0); - if (MAP_FAILED==ptr) - return 0; - return ptr; - } - - - void* _nvmem_map_buffer(u32 dst,u32 addrsz,u32 offset,u32 size, bool w) - { - void* ptr; - void* rv; - - printf("MAP %08X w/ %d\n",dst,offset); - u32 map_times=addrsz/size; - verify((addrsz%size)==0); - verify(map_times>=1); - u32 prot=PROT_READ|(w?PROT_WRITE:0); - rv= mmap(&virt_ram_base[dst], size, prot, MAP_SHARED | MAP_NOSYNC | MAP_FIXED, fd, offset); - if (MAP_FAILED==rv || rv!=(void*)&virt_ram_base[dst] || (mprotect(rv,size,prot)!=0)) - { - printf("MAP1 failed %d\n",errno); - return 0; - } - - for (u32 i=1;i slow and stuttery - { - fd = open("/data/data/com.reicast.emulator/files/dcnzorz_mem",O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO); - unlink("/data/data/com.reicast.emulator/files/dcnzorz_mem"); - } -#endif - - - - u32 sz = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX + 0x10000; - void* rv=mmap(0, sz, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); - verify(rv != NULL); - munmap(rv,sz); - return (u8*)rv + 0x10000 - unat(rv)%0x10000;//align to 64 KB (Needed for linaro mmap not to extend to next region) - } -#endif - -#define map_buffer(dsts,dste,offset,sz,w) {ptr=_nvmem_map_buffer(dsts,dste-dsts,offset,sz,w);if (!ptr) return false;} -#define unused_buffer(start,end) {ptr=_nvmem_unused_buffer(start,end);if (!ptr) return false;} - -u32 pagecnt; -void _vmem_bm_reset_nvmem() -{ - #if defined(TARGET_NO_NVMEM) - return; - #endif - - #ifdef TARGET_IPHONE - //On iOS & nacl we allways allocate all of the mapping table - mprotect(p_sh4rcb, sizeof(p_sh4rcb->fpcb), PROT_READ | PROT_WRITE); - return; - #endif - pagecnt=0; - -#if HOST_OS==OS_WINDOWS - VirtualFree(p_sh4rcb,sizeof(p_sh4rcb->fpcb),MEM_DECOMMIT); -#else - mprotect(p_sh4rcb, sizeof(p_sh4rcb->fpcb), PROT_NONE); - madvise(p_sh4rcb,sizeof(p_sh4rcb->fpcb),MADV_DONTNEED); - #ifdef MADV_REMOVE - madvise(p_sh4rcb,sizeof(p_sh4rcb->fpcb),MADV_REMOVE); - #else - //OSX, IOS - madvise(p_sh4rcb,sizeof(p_sh4rcb->fpcb),MADV_FREE); - #endif -#endif - - printf("Freeing fpcb\n"); -} - -bool BM_LockedWrite(u8* address) -{ - if (!_nvmem_enabled()) - return false; - -#if FEAT_SHREC != DYNAREC_NONE - u32 addr=address-(u8*)p_sh4rcb->fpcb; - - address=(u8*)p_sh4rcb->fpcb+ (addr&~PAGE_MASK); - - if (addrfpcb)) - { - //printf("Allocated %d PAGES [%08X]\n",++pagecnt,addr); - -#if HOST_OS==OS_WINDOWS - verify(VirtualAlloc(address,PAGE_SIZE,MEM_COMMIT,PAGE_READWRITE)); -#else - mprotect (address, PAGE_SIZE, PROT_READ | PROT_WRITE); -#endif - - bm_vmem_pagefill((void**)address,PAGE_SIZE); - + if (ptrint >= start && ptrint < end) { + // Alloc the page then and initialize it to default values + void *aligned_addr = (void*)(ptrint & (~PAGE_MASK)); + vmem_platform_ondemand_page(aligned_addr, PAGE_SIZE); + bm_vmem_pagefill((void**)aligned_addr, PAGE_SIZE); return true; } -#else -die("BM_LockedWrite and NO REC"); -#endif return false; } -bool _vmem_reserve() -{ - void* ptr=0; - +bool _vmem_reserve() { + // TODO: Static assert? verify((sizeof(Sh4RCB)%PAGE_SIZE)==0); - if (settings.dynarec.disable_nvmem) - return _vmem_reserve_nonvmem(); + VMemType vmemstatus = MemTypeError; - virt_ram_base=(u8*)_nvmem_alloc_mem(); + // Use vmem only if settings mandate so, and if we have proper exception handlers. + #ifndef TARGET_NO_EXCEPTIONS + if (!settings.dynarec.disable_nvmem) + vmemstatus = vmem_platform_init((void**)&virt_ram_base, (void**)&p_sh4rcb); + #endif - if (virt_ram_base==0) - return _vmem_reserve_nonvmem(); - - p_sh4rcb=(Sh4RCB*)virt_ram_base; + // Fallback to statically allocated buffers, this results in slow-ops being generated. + if (vmemstatus == MemTypeError) { + printf("Warning! nvmem is DISABLED (due to failure or not being built-in\n"); + virt_ram_base = 0; - // Map the sh4 context but protect access to Sh4RCB.fpcb[] -#if HOST_OS==OS_WINDOWS - //verify(p_sh4rcb==VirtualAlloc(p_sh4rcb,sizeof(Sh4RCB),MEM_RESERVE|MEM_COMMIT,PAGE_READWRITE)); - verify(p_sh4rcb==VirtualAlloc(p_sh4rcb,sizeof(Sh4RCB),MEM_RESERVE,PAGE_NOACCESS)); + // Allocate it all and initialize it. + p_sh4rcb = (Sh4RCB*)malloc_pages(sizeof(Sh4RCB)); + bm_vmem_pagefill((void**)p_sh4rcb->fpcb, sizeof(p_sh4rcb->fpcb)); - verify(VirtualAlloc((u8*)p_sh4rcb + sizeof(p_sh4rcb->fpcb),sizeof(Sh4RCB)-sizeof(p_sh4rcb->fpcb),MEM_COMMIT,PAGE_READWRITE)); -#else - verify(p_sh4rcb==mmap(p_sh4rcb,sizeof(Sh4RCB),PROT_NONE,MAP_PRIVATE | MAP_ANON, -1, 0)); - mprotect((u8*)p_sh4rcb + sizeof(p_sh4rcb->fpcb),sizeof(Sh4RCB)-sizeof(p_sh4rcb->fpcb),PROT_READ|PROT_WRITE); -#endif - virt_ram_base+=sizeof(Sh4RCB); + mem_b.size = RAM_SIZE; + mem_b.data = (u8*)malloc_pages(RAM_SIZE); - //Area 0 - //[0x00000000 ,0x00800000) -> unused - unused_buffer(0x00000000,0x00800000); + vram.size = VRAM_SIZE; + vram.data = (u8*)malloc_pages(VRAM_SIZE); - //I wonder, aica ram warps here ?.? - //I really should check teh docs before codin ;p - //[0x00800000,0x00A00000); - map_buffer(0x00800000,0x01000000,MAP_ARAM_START_OFFSET,ARAM_SIZE,false); - map_buffer(0x20000000,0x20000000+ARAM_SIZE,MAP_ARAM_START_OFFSET,ARAM_SIZE,true); + aica_ram.size = ARAM_SIZE; + aica_ram.data = (u8*)malloc_pages(ARAM_SIZE); + } + else { + printf("Info: nvmem is enabled, with addr space of size %s\n", vmemstatus == MemType4GB ? "4GB" : "512MB"); + // Map the different parts of the memory file into the new memory range we got. + #define MAP_RAM_START_OFFSET 0 + #define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE) + #define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE) + const vmem_mapping mem_mappings[] = { + {0x00000000, 0x00800000, 0, 0, false}, // Area 0 -> unused + {0x00800000, 0x01000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, false}, // Aica, wraps too + {0x20000000, 0x20000000+ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, + {0x01000000, 0x04000000, 0, 0, false}, // More unused + {0x04000000, 0x05000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC) + {0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused) + {0x06000000, 0x07000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror + {0x07000000, 0x08000000, 0, 0, false}, // 32 bit path (unused) mirror + {0x08000000, 0x0C000000, 0, 0, false}, // Area 2 + {0x0C000000, 0x10000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) + {0x10000000, 0x20000000, 0, 0, false}, // Area 4-7 (unused) + }; + vmem_platform_create_mappings(&mem_mappings[0], sizeof(mem_mappings) / sizeof(mem_mappings[0])); - aica_ram.size=ARAM_SIZE; - aica_ram.data=(u8*)ptr; - //[0x01000000 ,0x04000000) -> unused - unused_buffer(0x01000000,0x04000000); - + // Point buffers to actual data pointers + aica_ram.size = ARAM_SIZE; + aica_ram.data = &virt_ram_base[0x20000000]; // Points to the writtable AICA addrspace - //Area 1 - //[0x04000000,0x05000000) -> vram (16mb, warped on dc) - map_buffer(0x04000000,0x05000000,MAP_VRAM_START_OFFSET,VRAM_SIZE,true); - - vram.size=VRAM_SIZE; - vram.data=(u8*)ptr; + vram.size = VRAM_SIZE; + vram.data = &virt_ram_base[0x04000000]; // Points to first vram mirror (writtable and lockable) - //[0x05000000,0x06000000) -> unused (32b path) - unused_buffer(0x05000000,0x06000000); - - //[0x06000000,0x07000000) -> vram mirror - map_buffer(0x06000000,0x07000000,MAP_VRAM_START_OFFSET,VRAM_SIZE,true); - - - //[0x07000000,0x08000000) -> unused (32b path) mirror - unused_buffer(0x07000000,0x08000000); - - //Area 2 - //[0x08000000,0x0C000000) -> unused - unused_buffer(0x08000000,0x0C000000); - - //Area 3 - //[0x0C000000,0x0D000000) -> main ram - //[0x0D000000,0x0E000000) -> main ram mirror - //[0x0E000000,0x0F000000) -> main ram mirror - //[0x0F000000,0x10000000) -> main ram mirror - map_buffer(0x0C000000,0x10000000,MAP_RAM_START_OFFSET,RAM_SIZE,true); - - mem_b.size=RAM_SIZE; - mem_b.data=(u8*)ptr; - - //Area 4 - //Area 5 - //Area 6 - //Area 7 - //all -> Unused - //[0x10000000,0x20000000) -> unused - unused_buffer(0x10000000,0x20000000); - - printf("vmem reserve: base: %08X, aram: %08x, vram: %08X, ram: %08X\n",virt_ram_base,aica_ram.data,vram.data,mem_b.data); + mem_b.size = RAM_SIZE; + mem_b.data = &virt_ram_base[0x0C000000]; // Main memory, first mirror + } + // Clear out memory aica_ram.Zero(); vram.Zero(); mem_b.Zero(); - printf("Mem alloc successful!\n"); - - return virt_ram_base!=0; + return true; } -void _vmem_release() -{ - if (!_nvmem_enabled()) - _vmem_release_nonvmem(); - else - { - if (virt_ram_base != NULL) - { -#if HOST_OS == OS_WINDOWS - VirtualFree(virt_ram_base, 0, MEM_RELEASE); -#else - munmap(virt_ram_base, 0x20000000); -#endif - virt_ram_base = NULL; - } -#if HOST_OS != OS_WINDOWS - close(fd); -#endif +#define freedefptr(x) \ + if (x) { free(x); x = NULL; } + +void _vmem_release() { + if (virt_ram_base) + vmem_platform_destroy(); + else { + freedefptr(p_sh4rcb); + freedefptr(vram.data); + freedefptr(aica_ram.data); + freedefptr(mem_b.data); } } -#else - -bool _vmem_reserve() -{ - return _vmem_reserve_nonvmem(); -} -void _vmem_release() -{ - _vmem_release_nonvmem(); -} -#endif diff --git a/core/hw/mem/_vmem.h b/core/hw/mem/_vmem.h index 8509fe3ac..f739b0454 100644 --- a/core/hw/mem/_vmem.h +++ b/core/hw/mem/_vmem.h @@ -1,6 +1,33 @@ #pragma once #include "types.h" +enum VMemType { + MemType4GB, + MemType512MB, + MemTypeError +}; + +struct vmem_mapping { + u32 start_address, end_address; + unsigned memoffset, memsize; + bool allow_writes; +}; + +// Platform specific vmemory API +// To initialize (maybe) the vmem subsystem +VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr); +// To reset the on-demand allocated pages. +void vmem_platform_reset_mem(void *ptr, unsigned size_bytes); +// To handle a fault&allocate an ondemand page. +void vmem_platform_ondemand_page(void *address, unsigned size_bytes); +// To create the mappings in the address space. +void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned nummaps); +// Just tries to wipe as much as possible in the relevant area. +void vmem_platform_destroy(); + +// Note: if you want to disable vmem magic in any given platform, implement the +// above functions as empty functions and make vmem_platform_init return MemTypeError. + //Typedef's //ReadMem typedef u8 DYNACALL _vmem_ReadMem8FP(u32 Address); @@ -70,4 +97,4 @@ static inline bool _nvmem_enabled() { return virt_ram_base != 0; } -void _vmem_bm_reset(); \ No newline at end of file +void _vmem_bm_reset(); diff --git a/core/hw/pvr/pvr_mem.h b/core/hw/pvr/pvr_mem.h index 722d37bf6..f2584937d 100644 --- a/core/hw/pvr/pvr_mem.h +++ b/core/hw/pvr/pvr_mem.h @@ -9,7 +9,7 @@ f32 vrf(u32 addr); u32 vri(u32 addr); //vram 32-64b -extern VArray2 vram; +extern VLockedMemory vram; //read u8 DYNACALL pvr_read_area1_8(u32 addr); u16 DYNACALL pvr_read_area1_16(u32 addr); @@ -36,4 +36,4 @@ extern "C" void DYNACALL TAWriteSQ(u32 address,u8* sqb); void YUV_init(); //registers -#define PVR_BASE 0x005F8000 \ No newline at end of file +#define PVR_BASE 0x005F8000 diff --git a/core/hw/sh4/sh4_mem.cpp b/core/hw/sh4/sh4_mem.cpp index b05c2c419..e3a76f08f 100644 --- a/core/hw/sh4/sh4_mem.cpp +++ b/core/hw/sh4/sh4_mem.cpp @@ -17,7 +17,7 @@ //main system mem -VArray2 mem_b; +VLockedMemory mem_b; void _vmem_init(); void _vmem_reset(); diff --git a/core/hw/sh4/sh4_mem.h b/core/hw/sh4/sh4_mem.h index e9a03af47..31982ab3f 100644 --- a/core/hw/sh4/sh4_mem.h +++ b/core/hw/sh4/sh4_mem.h @@ -2,7 +2,7 @@ #include "types.h" //main system mem -extern VArray2 mem_b; +extern VLockedMemory mem_b; #include "hw/mem/_vmem.h" #include "modules/mmu.h" diff --git a/core/linux/common.cpp b/core/linux/common.cpp index 3cbd62a68..50df5f662 100644 --- a/core/linux/common.cpp +++ b/core/linux/common.cpp @@ -34,7 +34,7 @@ #include "hw/sh4/dyna/ngen.h" -#if !defined(TARGET_NO_NVMEM) +#if !defined(TARGET_NO_EXCEPTIONS) bool ngen_Rewrite(unat& addr,unat retadr,unat acc); u32* ngen_readm_fail_v2(u32* ptr,u32* regs,u32 saddr); bool VramLockedWrite(u8* address); @@ -124,79 +124,13 @@ void install_fault_handler(void) sigaction(SIGILL, &act, &segv_oact); #endif } -#else // !defined(TARGET_NO_NVMEM) +#else // !defined(TARGET_NO_EXCEPTIONS) // No exceptions/nvmem dummy handlers. void install_fault_handler(void) {} -#endif // !defined(TARGET_NO_NVMEM) +#endif // !defined(TARGET_NO_EXCEPTIONS) #include -void VArray2::LockRegion(u32 offset,u32 size) -{ - #if !defined(TARGET_NO_NVMEM) - u32 inpage=offset & PAGE_MASK; - u32 rv=mprotect (data+offset-inpage, size+inpage, PROT_READ ); - if (rv!=0) - { - printf("mprotect(%8s,%08X,R) failed: %d | %d\n",data+offset-inpage,size+inpage,rv,errno); - die("mprotect failed ..\n"); - } - - #else - //printf("VA2: LockRegion\n"); - #endif -} - -void print_mem_addr() -{ - FILE *ifp, *ofp; - - char outputFilename[] = "/data/data/com.reicast.emulator/files/mem_alloc.txt"; - - ifp = fopen("/proc/self/maps", "r"); - - if (ifp == NULL) { - fprintf(stderr, "Can't open input file /proc/self/maps!\n"); - exit(1); - } - - ofp = fopen(outputFilename, "w"); - - if (ofp == NULL) { - fprintf(stderr, "Can't open output file %s!\n", - outputFilename); -#if HOST_OS == OS_LINUX - ofp = stderr; -#else - exit(1); -#endif - } - - char line [ 512 ]; - while (fgets(line, sizeof line, ifp) != NULL) { - fprintf(ofp, "%s", line); - } - - fclose(ifp); - if (ofp != stderr) - fclose(ofp); -} - -void VArray2::UnLockRegion(u32 offset,u32 size) -{ - #if !defined(TARGET_NO_NVMEM) - u32 inpage=offset & PAGE_MASK; - u32 rv=mprotect (data+offset-inpage, size+inpage, PROT_READ | PROT_WRITE); - if (rv!=0) - { - print_mem_addr(); - printf("mprotect(%8p,%08X,RW) failed: %d | %d\n",data+offset-inpage,size+inpage,rv,errno); - die("mprotect failed ..\n"); - } - #else - //printf("VA2: UnLockRegion\n"); - #endif -} double os_GetSeconds() { timeval a; diff --git a/core/linux/context.cpp b/core/linux/context.cpp index 9884ad426..7c317c401 100644 --- a/core/linux/context.cpp +++ b/core/linux/context.cpp @@ -8,7 +8,7 @@ #define __USE_GNU 1 #endif - #if !defined(TARGET_NO_NVMEM) + #if !defined(TARGET_NO_EXCEPTIONS) #include #endif #endif @@ -29,7 +29,7 @@ void bicopy(Ta& rei, Tb& seg, bool to_segfault) { void context_segfault(rei_host_context_t* reictx, void* segfault_ctx, bool to_segfault) { -#if !defined(TARGET_NO_NVMEM) +#if !defined(TARGET_NO_EXCEPTIONS) #if HOST_CPU == CPU_ARM #if defined(__FreeBSD__) bicopy(reictx->pc, MCTX(.__gregs[_REG_PC]), to_segfault); diff --git a/core/linux/posix_vmem.cpp b/core/linux/posix_vmem.cpp new file mode 100644 index 000000000..8fcf57614 --- /dev/null +++ b/core/linux/posix_vmem.cpp @@ -0,0 +1,177 @@ + +// Implementation of the vmem related function for POSIX-like platforms. +// There's some minimal amount of platform specific hacks to support +// Android and OSX since they are slightly different in some areas. + +// This implements the VLockedMemory interface, as defined in _vmem.h +// The implementation allows it to be empty (that is, to not lock memory). + +#include +#include +#include +#include +#include +#include + +#include "hw/mem/_vmem.h" +#include "stdclass.h" + +#ifndef MAP_NOSYNC +#define MAP_NOSYNC 0 //missing from linux :/ -- could be the cause of android slowness ? +#endif + +#ifdef _ANDROID + #include + #ifndef ASHMEM_DEVICE + #define ASHMEM_DEVICE "/dev/ashmem" + #undef PAGE_MASK + #define PAGE_MASK (PAGE_SIZE-1) +#else + #define PAGE_SIZE 4096 + #define PAGE_MASK (PAGE_SIZE-1) +#endif + +// Android specific ashmem-device stuff for creating shared memory regions +int ashmem_create_region(const char *name, size_t size) { + int fd = open(ASHMEM_DEVICE, O_RDWR); + if (fd < 0) + return -1; + + if (ioctl(fd, ASHMEM_SET_SIZE, size) < 0) { + close(fd); + return -1; + } + + return fd; +} +#endif // #ifdef _ANDROID + +void VLockedMemory::LockRegion(unsigned offset, unsigned size_bytes) { + size_t inpage = offset & PAGE_MASK; + if (mprotect(&data[offset - inpage], size_bytes + inpage, PROT_READ)) { + die("mprotect failed ..\n"); + } +} + +void VLockedMemory::UnLockRegion(unsigned offset, unsigned size_bytes) { + size_t inpage = offset & PAGE_MASK; + if (mprotect(&data[offset - inpage], size_bytes + inpage, PROT_READ|PROT_WRITE)) { + // Add some way to see why it failed? gdb> info proc mappings + die("mprotect failed ..\n"); + } +} + +// Allocates memory via a fd on shmem/ahmem or even a file on disk +static int allocate_shared_filemem() { + int fd = -1; + #if defined(_ANDROID) + // Use Android's specific shmem stuff. + fd = ashmem_create_region(0, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX); + #else + #if HOST_OS != OS_DARWIN + fd = shm_open("/dcnzorz_mem", O_CREAT | O_EXCL | O_RDWR,S_IREAD | S_IWRITE); + shm_unlink("/dcnzorz_mem"); + #endif + + // if shmem does not work (or using OSX) fallback to a regular file on disk + if (fd < 0) { + string path = get_writable_data_path("/dcnzorz_mem"); + fd = open(path.c_str(), O_CREAT|O_RDWR|O_TRUNC, S_IRWXU|S_IRWXG|S_IRWXO); + unlink(path.c_str()); + } + // If we can't open the file, fallback to slow mem. + if (fd < 0) + return -1; + + // Finally make the file as big as we need! + if (ftruncate(fd, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX)) { + // Can't get as much memory as needed, fallback. + close(fd); + return -1; + } + #endif + + return fd; +} + +// Implement vmem initialization for RAM, ARAM, VRAM and SH4 context, fpcb etc. +// The function supports allocating 512MB or 4GB addr spaces. + +static int shmem_fd = -1; + +// vmem_base_addr points to an address space of 512MB (or 4GB) that can be used for fast memory ops. +// In negative offsets of the pointer (up to FPCB size, usually 65/129MB) the context and jump table +// can be found. If the platform init returns error, the user is responsible for initializing the +// memory using a fallback (that is, regular mallocs and falling back to slow memory JIT). +VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr) { + // Firt let's try to allocate the shm-backed memory + shmem_fd = allocate_shared_filemem(); + if (shmem_fd < 0) + return MemTypeError; + + // Now try to allocate a contiguous piece of memory. + unsigned memsize = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX + 0x10000; + void *first_ptr = mmap(0, memsize, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (!first_ptr) { + close(shmem_fd); + return MemTypeError; + } + + // Align pointer to 64KB too, some Linaro bug (no idea but let's just be safe I guess). + uintptr_t ptrint = (uintptr_t)first_ptr; + ptrint = (ptrint + 0x10000 - 1) & (~0xffff); + *sh4rcb_addr = (void*)ptrint; + *vmem_base_addr = (void*)(ptrint + sizeof(Sh4RCB)); + void *sh4rcb_base_ptr = (void*)(ptrint + FPCB_SIZE); + + // Now map the memory for the SH4 context, do not include FPCB on purpose (paged on demand). + mprotect(sh4rcb_base_ptr, sizeof(Sh4RCB) - FPCB_SIZE, PROT_READ | PROT_WRITE); + + return MemType512MB; +} + +// Just tries to wipe as much as possible in the relevant area. +void vmem_platform_destroy() { + munmap(virt_ram_base, 0x20000000); +} + +// Resets a chunk of memory by deleting its data and setting its protection back. +void vmem_platform_reset_mem(void *ptr, unsigned size_bytes) { + // Mark them as non accessible. + mprotect(ptr, size_bytes, PROT_NONE); + // Tell the kernel to flush'em all (FIXME: perhaps unmap+mmap 'd be better?) + madvise(ptr, size_bytes, MADV_DONTNEED); + #ifdef MADV_REMOVE + madvise(ptr, size_bytes, MADV_REMOVE); + #endif + madvise(ptr, size_bytes, MADV_FREE); +} + +// Allocates a bunch of memory (page aligned and page-sized) +void vmem_platform_ondemand_page(void *address, unsigned size_bytes) { + verify(!mprotect(address, size_bytes, PROT_READ | PROT_WRITE)); +} + +// Creates mappings to the underlying file including mirroring sections +void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned nummaps) { + for (unsigned i = 0; i < nummaps; i++) { + // Ignore unmapped stuff, it is already reserved as PROT_NONE + if (!vmem_maps[i].memsize) + continue; + + // Calculate the number of mirrors + unsigned address_range_size = vmem_maps[i].end_address - vmem_maps[i].start_address; + unsigned num_mirrors = (address_range_size) / vmem_maps[i].memsize; + int protection = vmem_maps[i].allow_writes ? (PROT_READ | PROT_WRITE) : PROT_READ; + verify((address_range_size % vmem_maps[i].memsize) == 0 && num_mirrors >= 1); + + for (unsigned j = 0; j < num_mirrors; j++) { + unsigned offset = vmem_maps[i].start_address + j * vmem_maps[i].memsize; + verify(!munmap(&virt_ram_base[offset], vmem_maps[i].memsize)); + verify(MAP_FAILED != mmap(&virt_ram_base[offset], vmem_maps[i].memsize, protection, + MAP_SHARED | MAP_NOSYNC | MAP_FIXED, shmem_fd, vmem_maps[i].memoffset)); + // ??? (mprotect(rv,size,prot)!=0) + } + } +} + diff --git a/core/rend/TexCache.cpp b/core/rend/TexCache.cpp index b0480c273..df904ed22 100644 --- a/core/rend/TexCache.cpp +++ b/core/rend/TexCache.cpp @@ -124,8 +124,7 @@ void palette_update() using namespace std; vector VramLocks[VRAM_SIZE/PAGE_SIZE]; -//vram 32-64b -VArray2 vram; +VLockedMemory vram; // vram 32-64b //List functions // @@ -207,7 +206,7 @@ vram_block* libCore_vramlock_Lock(u32 start_offset64,u32 end_offset64,void* user { vramlist_lock.Lock(); - vram.LockRegion(block->start,block->len); + vram.LockRegion(block->start, block->len); //TODO: Fix this for 32M wrap as well if (_nvmem_enabled() && VRAM_SIZE == 0x800000) { diff --git a/core/serialize.cpp b/core/serialize.cpp index 1d7fec35b..47b6eb0ca 100644 --- a/core/serialize.cpp +++ b/core/serialize.cpp @@ -101,7 +101,7 @@ extern AicaTimer timers[3]; //./core/hw/aica/aica_if.o -extern VArray2 aica_ram; +extern VLockedMemory aica_ram; extern u32 VREG;//video reg =P extern u32 ARMRST;//arm reset reg extern u32 rtc_EN; @@ -381,7 +381,7 @@ extern DECL_ALIGN(4) u32 SFaceOffsColor; //extern vector VramLocks[/*VRAM_SIZE*/(16*1024*1024)/PAGE_SIZE]; //maybe - probably not - just a locking mechanism //extern cMutex vramlist_lock; -extern VArray2 vram; +extern VLockedMemory vram; @@ -403,7 +403,7 @@ extern Array SCIF; //SCIF : 10 registers //./core/hw/sh4/sh4_mem.o -extern VArray2 mem_b; +extern VLockedMemory mem_b; //one-time init //extern _vmem_handler area1_32b; //one-time init diff --git a/core/stdclass.h b/core/stdclass.h index f5e87e6cb..04e9142db 100644 --- a/core/stdclass.h +++ b/core/stdclass.h @@ -279,29 +279,38 @@ string get_game_save_prefix(); string get_game_basename(); string get_game_dir(); -class VArray2 -{ + +// Locked memory class, used for texture invalidation purposes. +class VLockedMemory { public: - u8* data; - u32 size; - //void Init(void* data,u32 sz); - //void Term(); - void LockRegion(u32 offset,u32 size); - void UnLockRegion(u32 offset,u32 size); + unsigned size; - void Zero() - { - UnLockRegion(0,size); - memset(data,0,size); + void SetRegion(void* ptr, unsigned size) { + this->data = (u8*)ptr; + this->size = size; + } + void *getPtr() const { return data; } + unsigned getSize() const { return size; } + + #ifdef TARGET_NO_EXCEPTIONS + void LockRegion(unsigned offset, unsigned size_bytes) {} + void UnLockRegion(unsigned offset, unsigned size_bytes) {} + #else + void LockRegion(unsigned offset, unsigned size_bytes); + void UnLockRegion(unsigned offset, unsigned size_bytes); + #endif + + void Zero() { + UnLockRegion(0, size); + memset(data, 0, size); } - INLINE u8& operator [](const u32 i) - { + INLINE u8& operator [](unsigned i) { #ifdef MEM_BOUND_CHECK - if (i>=size) + if (i >= size) { - printf("Error: VArray2 , index out of range (%d>%d)\n",i,size-1); + printf("Error: VLockedMemory , index out of range (%d > %d)\n", i, size-1); MEM_DO_BREAK; } #endif @@ -309,6 +318,7 @@ public: } }; + int msgboxf(const wchar* text,unsigned int type,...); diff --git a/core/windows/win_vmem.cpp b/core/windows/win_vmem.cpp new file mode 100644 index 000000000..b1434b7e2 --- /dev/null +++ b/core/windows/win_vmem.cpp @@ -0,0 +1,96 @@ + +#define _WIN32_WINNT 0x0500 +#include +#include + +// Implementation of the vmem related function for Windows platforms. +// For now this probably does some assumptions on the CPU/platform. + +// This implements the VLockedMemory interface, as defined in _vmem.h +// The implementation allows it to be empty (that is, to not lock memory). + +void VLockedMemory::LockRegion(unsigned offset, unsigned size) { + verify(offset + size < this->size && size != 0); + DWORD old; + VirtualProtect(&data[offset], size, PAGE_READONLY, &old); +} + +void VLockedMemory::UnLockRegion(unsigned offset, unsigned size) { + verify(offset + size <= this->size && size != 0); + DWORD old; + VirtualProtect(&data[offset], size, PAGE_READWRITE, &old); +} + +static HANDLE mem_handle = INVALID_HANDLE_VALUE; +static char * base_alloc = NULL; + +// Implement vmem initialization for RAM, ARAM, VRAM and SH4 context, fpcb etc. +// The function supports allocating 512MB or 4GB addr spaces. + +// Plase read the POSIX implementation for more information. On Windows this is +// rather straightforward. +VMemType vmem_platform_init(void *vmem_base_addr, void *sh4rcb_addr) { + // Firt let's try to allocate the in-memory file + mem_handle = CreateFileMapping(INVALID_HANDLE_VALUE, 0, PAGE_READWRITE, 0, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX, 0); + + // Now allocate the actual address space (it will be 64KB aligned on windows). + unsigned memsize = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX; + base_alloc = (char*)VirtualAlloc(0, memsize, MEM_RESERVE, PAGE_NOACCESS); + + // Calculate pointers now + sh4rcb_addr = &base_alloc[0]; + vmem_base_addr = &base_alloc[sizeof(Sh4RCB)]; + + return MemType512MB; +} + +// Just tries to wipe as much as possible in the relevant area. +void vmem_platform_destroy() { + VirtualFree(base_alloc, 0, MEM_RELEASE); + CloseHandle(mem_handle); +} + +// Resets a chunk of memory by deleting its data and setting its protection back. +void vmem_platform_reset_mem(void *ptr, unsigned size_bytes) { + VirtualFree(ptr, size_bytes, MEM_DECOMMIT); +} + +// Allocates a bunch of memory (page aligned and page-sized) +void vmem_platform_ondemand_page(void *address, unsigned size_bytes) { + verify(VirtualAlloc(address, size_bytes, MEM_COMMIT, PAGE_READWRITE)); +} + +/// Creates mappings to the underlying file including mirroring sections +void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned nummaps) { + // Since this is tricky to get right in Windows (in posix one can just unmap sections and remap later) + // we unmap the whole thing only to remap it later. + + // Unmap the whole section + VirtualFree(base_alloc, 0, MEM_RELEASE); + + for (unsigned i = 0; i < nummaps; i++) { + unsigned address_range_size = vmem_maps[i].end_address - vmem_maps[i].start_address; + DWORD protection = vmem_maps[i].allow_writes ? (FILE_MAP_READ | FILE_MAP_WRITE) : FILE_MAP_READ; + + if (!vmem_maps[i].memsize) { + // Unmapped stuff goes with a protected area or memory. Prevent anything from allocating here + void *ptr = VirtualAlloc(&virt_ram_base[vmem_maps[i].start_address], address_range_size, MEM_RESERVE, PAGE_NOACCESS); + verify(ptr == &virt_ram_base[vmem_maps[i].start_address]); + } + else { + // Calculate the number of mirrors + unsigned num_mirrors = (address_range_size) / vmem_maps[i].memsize; + verify((address_range_size % vmem_maps[i].memsize) == 0 && num_mirrors >= 1); + + // Remap the views one by one + for (unsigned j = 0; j < num_mirrors; j++) { + unsigned offset = vmem_maps[i].start_address + j * vmem_maps[i].memsize; + + void *ptr = MapViewOfFileEx(mem_handle, protection, 0, vmem_maps[i].memoffset, + vmem_maps[i].memsize, &virt_ram_base[vmem_maps[i].start_address]); + verify(ptr == &virt_ram_base[vmem_maps[i].start_address]); + } + } + } +} + diff --git a/core/windows/winmain.cpp b/core/windows/winmain.cpp index 029ba1246..44633ab0d 100644 --- a/core/windows/winmain.cpp +++ b/core/windows/winmain.cpp @@ -148,12 +148,10 @@ LONG ExeptionHandler(EXCEPTION_POINTERS *ExceptionInfo) { return EXCEPTION_CONTINUE_EXECUTION; } -#ifndef TARGET_NO_NVMEM else if (BM_LockedWrite(address)) { return EXCEPTION_CONTINUE_EXECUTION; } -#endif #if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_X86 else if ( ngen_Rewrite((unat&)ep->ContextRecord->Eip,*(unat*)ep->ContextRecord->Esp,ep->ContextRecord->Eax) ) { @@ -800,21 +798,5 @@ void os_DoEvents() } } - -void VArray2::LockRegion(u32 offset,u32 size) -{ - //verify(offset+sizesize); - verify(size!=0); - DWORD old; - VirtualProtect(((u8*)data)+offset , size, PAGE_READONLY,&old); -} -void VArray2::UnLockRegion(u32 offset,u32 size) -{ - //verify(offset+size<=this->size); - verify(size!=0); - DWORD old; - VirtualProtect(((u8*)data)+offset , size, PAGE_READWRITE,&old); -} - int get_mic_data(u8* buffer) { return 0; } int push_vmu_screen(u8* buffer) { return 0; } diff --git a/shell/cmake/config.cmake b/shell/cmake/config.cmake index 056b7e74a..d1dfd59e4 100644 --- a/shell/cmake/config.cmake +++ b/shell/cmake/config.cmake @@ -343,7 +343,7 @@ if (TARGET_NSW) # -DCMAKE_TOOLCHAIN_FILE=./cmake/devkitA64.cmake -DTARGET_NSW=ON message("HOST_OS ${HOST_OS}") add_definitions(-D__SWITCH__ -DGLES -DMESA_EGL_NO_X11_HEADERS) - add_definitions(-DTARGET_NO_THREADS -DTARGET_NO_NVMEM -DTARGET_NO_NIXPROF) + add_definitions(-DTARGET_NO_THREADS -DTARGET_NO_EXCEPTIONS -DTARGET_NO_NIXPROF) add_definitions(-DTARGET_NO_COREIO_HTTP -DTARGET_NO_WEBUI -UTARGET_SOFTREND) add_definitions(-D_GLIBCXX_USE_C99_MATH_TR1 -D_LDBL_EQ_DBL) @@ -355,7 +355,7 @@ if (TARGET_PS4) # -DCMAKE_TOOLCHAIN_FILE=./cmake/{ps4sdk,clang_scei}.cmake -DTAR add_definitions(-DPS4 -DTARGET_PS4 -DTARGET_BSD -D__ORBIS__ -DGLES -DMESA_EGL_NO_X11_HEADERS) ## last needed for __unix__ on eglplatform.h - add_definitions(-DTARGET_NO_THREADS -DTARGET_NO_NVMEM -DTARGET_NO_NIXPROF) + add_definitions(-DTARGET_NO_THREADS -DTARGET_NO_EXCEPTIONS -DTARGET_NO_NIXPROF) add_definitions(-DTARGET_NO_COREIO_HTTP -DTARGET_NO_WEBUI -UTARGET_SOFTREND) diff --git a/shell/emscripten/Makefile b/shell/emscripten/Makefile index 1c74e19b6..1b8ef4c22 100644 --- a/shell/emscripten/Makefile +++ b/shell/emscripten/Makefile @@ -26,7 +26,7 @@ LDFLAGS := -Wl,-Map,$(notdir $@).map,--gc-sections -Wl,-O3 -Wl,--sort-common CXXONLYFLAGS := -std=c++11 -CXXFLAGS := -O3 -D GLES -D RELEASE -c -D TARGET_EMSCRIPTEN -D TARGET_NO_REC -D TARGET_NO_NVMEM -D TARGET_NO_WEBUI -D TARGET_NO_THREADS -D TARGET_BOUNDED_EXECUTION -D TARGET_NO_COREIO_HTTP +CXXFLAGS := -O3 -D GLES -D RELEASE -c -D TARGET_EMSCRIPTEN -D TARGET_NO_REC -D TARGET_NO_EXCEPTIONS -D TARGET_NO_WEBUI -D TARGET_NO_THREADS -D TARGET_BOUNDED_EXECUTION -D TARGET_NO_COREIO_HTTP CXXFLAGS += -fno-strict-aliasing CXXFLAGS += -ffast-math diff --git a/shell/nacl/Makefile b/shell/nacl/Makefile index e0d03834e..fec9cf250 100644 --- a/shell/nacl/Makefile +++ b/shell/nacl/Makefile @@ -23,7 +23,7 @@ CFLAGS = -Wno-error -Wno-ignored-attributes CFLAGS += -O3 -fno-strict-aliasing -ffast-math CFLAGS += -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/deps CFLAGS += -D RELEASE -D TARGET_NO_JIT -D TARGET_NACL32 -DGLES -CFLAGS += -D TARGET_NO_NVMEM -D TARGET_NO_WEBUI -D TARGET_NO_COREIO_HTTP +CFLAGS += -D TARGET_NO_EXCEPTIONS -D TARGET_NO_WEBUI -D TARGET_NO_COREIO_HTTP SOURCES = $(RZDCY_FILES) ../../core/nacl/nacl.cpp From 4458dac49ab4a27637f253aa3b8bfee4d301a07c Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Sat, 11 May 2019 22:35:17 +0200 Subject: [PATCH 3/7] Fixing some missing imports and a bug in pointer tracking. --- core/windows/win_vmem.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/core/windows/win_vmem.cpp b/core/windows/win_vmem.cpp index b1434b7e2..44d3deabc 100644 --- a/core/windows/win_vmem.cpp +++ b/core/windows/win_vmem.cpp @@ -3,6 +3,8 @@ #include #include +#include "hw/mem/_vmem.h" + // Implementation of the vmem related function for Windows platforms. // For now this probably does some assumptions on the CPU/platform. @@ -29,7 +31,7 @@ static char * base_alloc = NULL; // Plase read the POSIX implementation for more information. On Windows this is // rather straightforward. -VMemType vmem_platform_init(void *vmem_base_addr, void *sh4rcb_addr) { +VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr) { // Firt let's try to allocate the in-memory file mem_handle = CreateFileMapping(INVALID_HANDLE_VALUE, 0, PAGE_READWRITE, 0, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX, 0); @@ -38,8 +40,8 @@ VMemType vmem_platform_init(void *vmem_base_addr, void *sh4rcb_addr) { base_alloc = (char*)VirtualAlloc(0, memsize, MEM_RESERVE, PAGE_NOACCESS); // Calculate pointers now - sh4rcb_addr = &base_alloc[0]; - vmem_base_addr = &base_alloc[sizeof(Sh4RCB)]; + *sh4rcb_addr = &base_alloc[0]; + *vmem_base_addr = &base_alloc[sizeof(Sh4RCB)]; return MemType512MB; } From 4e5053be20c765286837dff99ee1e411314bfde1 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Sat, 11 May 2019 22:38:57 +0200 Subject: [PATCH 4/7] Adding win_vmem.cpp to VC project files. --- shell/reicast.vcxproj | 3 ++- shell/reicast.vcxproj.filters | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/shell/reicast.vcxproj b/shell/reicast.vcxproj index 3d10f789a..93904ce9a 100644 --- a/shell/reicast.vcxproj +++ b/shell/reicast.vcxproj @@ -322,6 +322,7 @@ + @@ -998,4 +999,4 @@ for /f "delims=" %%i in ('date /T') do echo #define BUILD_DATE "%%i" >>$(P - \ No newline at end of file + diff --git a/shell/reicast.vcxproj.filters b/shell/reicast.vcxproj.filters index 1f347b584..59289b0b3 100644 --- a/shell/reicast.vcxproj.filters +++ b/shell/reicast.vcxproj.filters @@ -129,6 +129,9 @@ windows + + windows + profiler @@ -1397,4 +1400,4 @@ - \ No newline at end of file + From 1a4323c1ce3579e797ef00a2dd2a3454f0da6597 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Sat, 11 May 2019 23:04:22 +0200 Subject: [PATCH 5/7] Fix runtime issue in Windows target, was not mapping pages correctly. --- core/windows/win_vmem.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/windows/win_vmem.cpp b/core/windows/win_vmem.cpp index 44d3deabc..32e250a50 100644 --- a/core/windows/win_vmem.cpp +++ b/core/windows/win_vmem.cpp @@ -89,8 +89,8 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma unsigned offset = vmem_maps[i].start_address + j * vmem_maps[i].memsize; void *ptr = MapViewOfFileEx(mem_handle, protection, 0, vmem_maps[i].memoffset, - vmem_maps[i].memsize, &virt_ram_base[vmem_maps[i].start_address]); - verify(ptr == &virt_ram_base[vmem_maps[i].start_address]); + vmem_maps[i].memsize, &virt_ram_base[offset]); + verify(ptr == &virt_ram_base[offset]); } } } From 6cba98b70ae490621f65a73860226a869de3f842 Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Sun, 12 May 2019 00:02:24 +0200 Subject: [PATCH 6/7] Allocate missing SH4CB and make lock more forgiving. --- core/windows/win_vmem.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/core/windows/win_vmem.cpp b/core/windows/win_vmem.cpp index 32e250a50..2e456198c 100644 --- a/core/windows/win_vmem.cpp +++ b/core/windows/win_vmem.cpp @@ -12,13 +12,13 @@ // The implementation allows it to be empty (that is, to not lock memory). void VLockedMemory::LockRegion(unsigned offset, unsigned size) { - verify(offset + size < this->size && size != 0); + //verify(offset + size < this->size && size != 0); DWORD old; VirtualProtect(&data[offset], size, PAGE_READONLY, &old); } void VLockedMemory::UnLockRegion(unsigned offset, unsigned size) { - verify(offset + size <= this->size && size != 0); + //verify(offset + size <= this->size && size != 0); DWORD old; VirtualProtect(&data[offset], size, PAGE_READWRITE, &old); } @@ -70,6 +70,12 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma // Unmap the whole section VirtualFree(base_alloc, 0, MEM_RELEASE); + // Map the SH4CB block too + void *base_ptr = VirtualAlloc(base_alloc, sizeof(Sh4RCB), MEM_RESERVE, PAGE_NOACCESS); + verify(base_ptr == base_alloc); + void *cntx_ptr = VirtualAlloc((u8*)p_sh4rcb + sizeof(p_sh4rcb->fpcb), sizeof(Sh4RCB) - sizeof(p_sh4rcb->fpcb), MEM_COMMIT, PAGE_READWRITE); + verify(cntx_ptr == (u8*)p_sh4rcb + sizeof(p_sh4rcb->fpcb)); + for (unsigned i = 0; i < nummaps; i++) { unsigned address_range_size = vmem_maps[i].end_address - vmem_maps[i].start_address; DWORD protection = vmem_maps[i].allow_writes ? (FILE_MAP_READ | FILE_MAP_WRITE) : FILE_MAP_READ; From 55e7c170302d3a0f6d98673ce787e4d7b999b3cd Mon Sep 17 00:00:00 2001 From: David Guillen Fandos Date: Sun, 12 May 2019 13:40:18 +0200 Subject: [PATCH 7/7] Fix minor build issue with madvise flags. --- core/hw/mem/_vmem.cpp | 2 +- core/linux/posix_vmem.cpp | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/core/hw/mem/_vmem.cpp b/core/hw/mem/_vmem.cpp index 9a297330c..3fbfdf6b5 100644 --- a/core/hw/mem/_vmem.cpp +++ b/core/hw/mem/_vmem.cpp @@ -478,7 +478,7 @@ bool _vmem_reserve() { {0x00800000, 0x01000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, false}, // Aica, wraps too {0x20000000, 0x20000000+ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, {0x01000000, 0x04000000, 0, 0, false}, // More unused - {0x04000000, 0x05000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC) + {0x04000000, 0x05000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC as 2x8MB) {0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused) {0x06000000, 0x07000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror {0x07000000, 0x08000000, 0, 0, false}, // 32 bit path (unused) mirror diff --git a/core/linux/posix_vmem.cpp b/core/linux/posix_vmem.cpp index 8fcf57614..3601eef1b 100644 --- a/core/linux/posix_vmem.cpp +++ b/core/linux/posix_vmem.cpp @@ -141,10 +141,11 @@ void vmem_platform_reset_mem(void *ptr, unsigned size_bytes) { mprotect(ptr, size_bytes, PROT_NONE); // Tell the kernel to flush'em all (FIXME: perhaps unmap+mmap 'd be better?) madvise(ptr, size_bytes, MADV_DONTNEED); - #ifdef MADV_REMOVE + #if defined(MADV_REMOVE) madvise(ptr, size_bytes, MADV_REMOVE); - #endif - madvise(ptr, size_bytes, MADV_FREE); + #elif defined(MADV_FREE) + madvise(ptr, size_bytes, MADV_FREE); + #endif } // Allocates a bunch of memory (page aligned and page-sized)