diff --git a/core/hw/aica/aica_if.cpp b/core/hw/aica/aica_if.cpp index 9ba65c0a7..d562a0de1 100644 --- a/core/hw/aica/aica_if.cpp +++ b/core/hw/aica/aica_if.cpp @@ -13,7 +13,7 @@ #include -VArray2 aica_ram; +VLockedMemory aica_ram; u32 VREG;//video reg =P u32 ARMRST;//arm reset reg u32 rtc_EN=0; diff --git a/core/hw/aica/aica_if.h b/core/hw/aica/aica_if.h index 8c58b5326..13ee6fd97 100644 --- a/core/hw/aica/aica_if.h +++ b/core/hw/aica/aica_if.h @@ -2,7 +2,7 @@ #include "types.h" extern u32 VREG; -extern VArray2 aica_ram; +extern VLockedMemory aica_ram; extern u32 RealTimeClock; u32 ReadMem_aica_rtc(u32 addr,u32 sz); void WriteMem_aica_rtc(u32 addr,u32 data,u32 sz); diff --git a/core/hw/mem/_vmem.cpp b/core/hw/mem/_vmem.cpp index e621fa948..9a297330c 100644 --- a/core/hw/mem/_vmem.cpp +++ b/core/hw/mem/_vmem.cpp @@ -21,7 +21,6 @@ _vmem_WriteMem32FP* _vmem_WF32[HANDLER_COUNT]; //upper 8b of the address void* _vmem_MemInfo_ptr[0x100]; - void _vmem_get_ptrs(u32 sz,bool write,void*** vmap,void*** func) { *vmap=_vmem_MemInfo_ptr; @@ -385,10 +384,7 @@ void _vmem_reset() verify(_vmem_register_handler(0,0,0,0,0,0)==0); } -void _vmem_term() -{ - -} +void _vmem_term() {} #include "hw/pvr/pvr_mem.h" #include "hw/sh4/sh4_mem.h" @@ -409,414 +405,119 @@ void* malloc_pages(size_t size) { #endif } -bool _vmem_reserve_nonvmem() -{ - virt_ram_base = 0; - - p_sh4rcb=(Sh4RCB*)malloc_pages(sizeof(Sh4RCB)); - - mem_b.size=RAM_SIZE; - mem_b.data=(u8*)malloc_pages(RAM_SIZE); - - vram.size=VRAM_SIZE; - vram.data=(u8*)malloc_pages(VRAM_SIZE); - - aica_ram.size=ARAM_SIZE; - aica_ram.data=(u8*)malloc_pages(ARAM_SIZE); - - return true; -} - -void _vmem_bm_reset_nvmem(); - +// Resets the FPCB table (by either clearing it to the default val +// or by flushing it and making it fault on access again. void _vmem_bm_reset() { - if (virt_ram_base) { - #if !defined(TARGET_NO_NVMEM) - _vmem_bm_reset_nvmem(); - #endif - } - -#ifndef TARGET_IPHONE - if (!virt_ram_base) -#endif - { + // If we allocated it via vmem: + if (virt_ram_base) + vmem_platform_reset_mem(p_sh4rcb->fpcb, sizeof(p_sh4rcb->fpcb)); + else + // We allocated it via a regular malloc/new/whatever on the heap bm_vmem_pagefill((void**)p_sh4rcb->fpcb, sizeof(p_sh4rcb->fpcb)); - } } -static void _vmem_release_nonvmem() -{ - free(p_sh4rcb); - free(vram.data); - free(aica_ram.data); - free(mem_b.data); -} +// This gets called whenever there is a pagefault, it is possible that it lands +// on the fpcb memory range, which is allocated on miss. Returning true tells the +// fault handler this was us, and that the page is resolved and can continue the execution. +bool BM_LockedWrite(u8* address) { + if (!virt_ram_base) + return false; // No vmem, therefore not us who caused this. -#if !defined(TARGET_NO_NVMEM) + uintptr_t ptrint = (uintptr_t)address; + uintptr_t start = (uintptr_t)p_sh4rcb->fpcb; + uintptr_t end = start + sizeof(p_sh4rcb->fpcb); -#define MAP_RAM_START_OFFSET 0 -#define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE) -#define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE) - -#if HOST_OS==OS_WINDOWS -#include -HANDLE mem_handle; - -void* _nvmem_map_buffer(u32 dst,u32 addrsz,u32 offset,u32 size, bool w) -{ - void* ptr; - void* rv; - - u32 map_times=addrsz/size; - verify((addrsz%size)==0); - verify(map_times>=1); - - rv= MapViewOfFileEx(mem_handle,FILE_MAP_READ | (w?FILE_MAP_WRITE:0),0,offset,size,&virt_ram_base[dst]); - if (!rv) - return 0; - - for (u32 i=1;i - #include - #include - #include - #include - #include - -#ifndef MAP_NOSYNC -#define MAP_NOSYNC 0 //missing from linux :/ -- could be the cause of android slowness ? -#endif - -#ifdef _ANDROID -#include - -#ifndef ASHMEM_DEVICE -#define ASHMEM_DEVICE "/dev/ashmem" -#endif -int ashmem_create_region(const char *name, size_t size) -{ - int fd, ret; - - fd = open(ASHMEM_DEVICE, O_RDWR); - if (fd < 0) - return fd; - - if (name) { - char buf[ASHMEM_NAME_LEN]; - - strlcpy(buf, name, sizeof(buf)); - ret = ioctl(fd, ASHMEM_SET_NAME, buf); - if (ret < 0) - goto error; - } - - ret = ioctl(fd, ASHMEM_SET_SIZE, size); - if (ret < 0) - goto error; - - return fd; - -error: - close(fd); - return ret; -} -#endif - - int fd; - void* _nvmem_unused_buffer(u32 start,u32 end) - { - void* ptr=mmap(&virt_ram_base[start], end-start, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0); - if (MAP_FAILED==ptr) - return 0; - return ptr; - } - - - void* _nvmem_map_buffer(u32 dst,u32 addrsz,u32 offset,u32 size, bool w) - { - void* ptr; - void* rv; - - printf("MAP %08X w/ %d\n",dst,offset); - u32 map_times=addrsz/size; - verify((addrsz%size)==0); - verify(map_times>=1); - u32 prot=PROT_READ|(w?PROT_WRITE:0); - rv= mmap(&virt_ram_base[dst], size, prot, MAP_SHARED | MAP_NOSYNC | MAP_FIXED, fd, offset); - if (MAP_FAILED==rv || rv!=(void*)&virt_ram_base[dst] || (mprotect(rv,size,prot)!=0)) - { - printf("MAP1 failed %d\n",errno); - return 0; - } - - for (u32 i=1;i slow and stuttery - { - fd = open("/data/data/com.reicast.emulator/files/dcnzorz_mem",O_CREAT|O_RDWR|O_TRUNC,S_IRWXU|S_IRWXG|S_IRWXO); - unlink("/data/data/com.reicast.emulator/files/dcnzorz_mem"); - } -#endif - - - - u32 sz = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX + 0x10000; - void* rv=mmap(0, sz, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); - verify(rv != NULL); - munmap(rv,sz); - return (u8*)rv + 0x10000 - unat(rv)%0x10000;//align to 64 KB (Needed for linaro mmap not to extend to next region) - } -#endif - -#define map_buffer(dsts,dste,offset,sz,w) {ptr=_nvmem_map_buffer(dsts,dste-dsts,offset,sz,w);if (!ptr) return false;} -#define unused_buffer(start,end) {ptr=_nvmem_unused_buffer(start,end);if (!ptr) return false;} - -u32 pagecnt; -void _vmem_bm_reset_nvmem() -{ - #if defined(TARGET_NO_NVMEM) - return; - #endif - - #ifdef TARGET_IPHONE - //On iOS & nacl we allways allocate all of the mapping table - mprotect(p_sh4rcb, sizeof(p_sh4rcb->fpcb), PROT_READ | PROT_WRITE); - return; - #endif - pagecnt=0; - -#if HOST_OS==OS_WINDOWS - VirtualFree(p_sh4rcb,sizeof(p_sh4rcb->fpcb),MEM_DECOMMIT); -#else - mprotect(p_sh4rcb, sizeof(p_sh4rcb->fpcb), PROT_NONE); - madvise(p_sh4rcb,sizeof(p_sh4rcb->fpcb),MADV_DONTNEED); - #ifdef MADV_REMOVE - madvise(p_sh4rcb,sizeof(p_sh4rcb->fpcb),MADV_REMOVE); - #else - //OSX, IOS - madvise(p_sh4rcb,sizeof(p_sh4rcb->fpcb),MADV_FREE); - #endif -#endif - - printf("Freeing fpcb\n"); -} - -bool BM_LockedWrite(u8* address) -{ - if (!_nvmem_enabled()) - return false; - -#if FEAT_SHREC != DYNAREC_NONE - u32 addr=address-(u8*)p_sh4rcb->fpcb; - - address=(u8*)p_sh4rcb->fpcb+ (addr&~PAGE_MASK); - - if (addrfpcb)) - { - //printf("Allocated %d PAGES [%08X]\n",++pagecnt,addr); - -#if HOST_OS==OS_WINDOWS - verify(VirtualAlloc(address,PAGE_SIZE,MEM_COMMIT,PAGE_READWRITE)); -#else - mprotect (address, PAGE_SIZE, PROT_READ | PROT_WRITE); -#endif - - bm_vmem_pagefill((void**)address,PAGE_SIZE); - + if (ptrint >= start && ptrint < end) { + // Alloc the page then and initialize it to default values + void *aligned_addr = (void*)(ptrint & (~PAGE_MASK)); + vmem_platform_ondemand_page(aligned_addr, PAGE_SIZE); + bm_vmem_pagefill((void**)aligned_addr, PAGE_SIZE); return true; } -#else -die("BM_LockedWrite and NO REC"); -#endif return false; } -bool _vmem_reserve() -{ - void* ptr=0; - +bool _vmem_reserve() { + // TODO: Static assert? verify((sizeof(Sh4RCB)%PAGE_SIZE)==0); - if (settings.dynarec.disable_nvmem) - return _vmem_reserve_nonvmem(); + VMemType vmemstatus = MemTypeError; - virt_ram_base=(u8*)_nvmem_alloc_mem(); + // Use vmem only if settings mandate so, and if we have proper exception handlers. + #ifndef TARGET_NO_EXCEPTIONS + if (!settings.dynarec.disable_nvmem) + vmemstatus = vmem_platform_init((void**)&virt_ram_base, (void**)&p_sh4rcb); + #endif - if (virt_ram_base==0) - return _vmem_reserve_nonvmem(); - - p_sh4rcb=(Sh4RCB*)virt_ram_base; + // Fallback to statically allocated buffers, this results in slow-ops being generated. + if (vmemstatus == MemTypeError) { + printf("Warning! nvmem is DISABLED (due to failure or not being built-in\n"); + virt_ram_base = 0; - // Map the sh4 context but protect access to Sh4RCB.fpcb[] -#if HOST_OS==OS_WINDOWS - //verify(p_sh4rcb==VirtualAlloc(p_sh4rcb,sizeof(Sh4RCB),MEM_RESERVE|MEM_COMMIT,PAGE_READWRITE)); - verify(p_sh4rcb==VirtualAlloc(p_sh4rcb,sizeof(Sh4RCB),MEM_RESERVE,PAGE_NOACCESS)); + // Allocate it all and initialize it. + p_sh4rcb = (Sh4RCB*)malloc_pages(sizeof(Sh4RCB)); + bm_vmem_pagefill((void**)p_sh4rcb->fpcb, sizeof(p_sh4rcb->fpcb)); - verify(VirtualAlloc((u8*)p_sh4rcb + sizeof(p_sh4rcb->fpcb),sizeof(Sh4RCB)-sizeof(p_sh4rcb->fpcb),MEM_COMMIT,PAGE_READWRITE)); -#else - verify(p_sh4rcb==mmap(p_sh4rcb,sizeof(Sh4RCB),PROT_NONE,MAP_PRIVATE | MAP_ANON, -1, 0)); - mprotect((u8*)p_sh4rcb + sizeof(p_sh4rcb->fpcb),sizeof(Sh4RCB)-sizeof(p_sh4rcb->fpcb),PROT_READ|PROT_WRITE); -#endif - virt_ram_base+=sizeof(Sh4RCB); + mem_b.size = RAM_SIZE; + mem_b.data = (u8*)malloc_pages(RAM_SIZE); - //Area 0 - //[0x00000000 ,0x00800000) -> unused - unused_buffer(0x00000000,0x00800000); + vram.size = VRAM_SIZE; + vram.data = (u8*)malloc_pages(VRAM_SIZE); - //I wonder, aica ram warps here ?.? - //I really should check teh docs before codin ;p - //[0x00800000,0x00A00000); - map_buffer(0x00800000,0x01000000,MAP_ARAM_START_OFFSET,ARAM_SIZE,false); - map_buffer(0x20000000,0x20000000+ARAM_SIZE,MAP_ARAM_START_OFFSET,ARAM_SIZE,true); + aica_ram.size = ARAM_SIZE; + aica_ram.data = (u8*)malloc_pages(ARAM_SIZE); + } + else { + printf("Info: nvmem is enabled, with addr space of size %s\n", vmemstatus == MemType4GB ? "4GB" : "512MB"); + // Map the different parts of the memory file into the new memory range we got. + #define MAP_RAM_START_OFFSET 0 + #define MAP_VRAM_START_OFFSET (MAP_RAM_START_OFFSET+RAM_SIZE) + #define MAP_ARAM_START_OFFSET (MAP_VRAM_START_OFFSET+VRAM_SIZE) + const vmem_mapping mem_mappings[] = { + {0x00000000, 0x00800000, 0, 0, false}, // Area 0 -> unused + {0x00800000, 0x01000000, MAP_ARAM_START_OFFSET, ARAM_SIZE, false}, // Aica, wraps too + {0x20000000, 0x20000000+ARAM_SIZE, MAP_ARAM_START_OFFSET, ARAM_SIZE, true}, + {0x01000000, 0x04000000, 0, 0, false}, // More unused + {0x04000000, 0x05000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // Area 1 (vram, 16MB, wrapped on DC) + {0x05000000, 0x06000000, 0, 0, false}, // 32 bit path (unused) + {0x06000000, 0x07000000, MAP_VRAM_START_OFFSET, VRAM_SIZE, true}, // VRAM mirror + {0x07000000, 0x08000000, 0, 0, false}, // 32 bit path (unused) mirror + {0x08000000, 0x0C000000, 0, 0, false}, // Area 2 + {0x0C000000, 0x10000000, MAP_RAM_START_OFFSET, RAM_SIZE, true}, // Area 3 (main RAM + 3 mirrors) + {0x10000000, 0x20000000, 0, 0, false}, // Area 4-7 (unused) + }; + vmem_platform_create_mappings(&mem_mappings[0], sizeof(mem_mappings) / sizeof(mem_mappings[0])); - aica_ram.size=ARAM_SIZE; - aica_ram.data=(u8*)ptr; - //[0x01000000 ,0x04000000) -> unused - unused_buffer(0x01000000,0x04000000); - + // Point buffers to actual data pointers + aica_ram.size = ARAM_SIZE; + aica_ram.data = &virt_ram_base[0x20000000]; // Points to the writtable AICA addrspace - //Area 1 - //[0x04000000,0x05000000) -> vram (16mb, warped on dc) - map_buffer(0x04000000,0x05000000,MAP_VRAM_START_OFFSET,VRAM_SIZE,true); - - vram.size=VRAM_SIZE; - vram.data=(u8*)ptr; + vram.size = VRAM_SIZE; + vram.data = &virt_ram_base[0x04000000]; // Points to first vram mirror (writtable and lockable) - //[0x05000000,0x06000000) -> unused (32b path) - unused_buffer(0x05000000,0x06000000); - - //[0x06000000,0x07000000) -> vram mirror - map_buffer(0x06000000,0x07000000,MAP_VRAM_START_OFFSET,VRAM_SIZE,true); - - - //[0x07000000,0x08000000) -> unused (32b path) mirror - unused_buffer(0x07000000,0x08000000); - - //Area 2 - //[0x08000000,0x0C000000) -> unused - unused_buffer(0x08000000,0x0C000000); - - //Area 3 - //[0x0C000000,0x0D000000) -> main ram - //[0x0D000000,0x0E000000) -> main ram mirror - //[0x0E000000,0x0F000000) -> main ram mirror - //[0x0F000000,0x10000000) -> main ram mirror - map_buffer(0x0C000000,0x10000000,MAP_RAM_START_OFFSET,RAM_SIZE,true); - - mem_b.size=RAM_SIZE; - mem_b.data=(u8*)ptr; - - //Area 4 - //Area 5 - //Area 6 - //Area 7 - //all -> Unused - //[0x10000000,0x20000000) -> unused - unused_buffer(0x10000000,0x20000000); - - printf("vmem reserve: base: %08X, aram: %08x, vram: %08X, ram: %08X\n",virt_ram_base,aica_ram.data,vram.data,mem_b.data); + mem_b.size = RAM_SIZE; + mem_b.data = &virt_ram_base[0x0C000000]; // Main memory, first mirror + } + // Clear out memory aica_ram.Zero(); vram.Zero(); mem_b.Zero(); - printf("Mem alloc successful!\n"); - - return virt_ram_base!=0; + return true; } -void _vmem_release() -{ - if (!_nvmem_enabled()) - _vmem_release_nonvmem(); - else - { - if (virt_ram_base != NULL) - { -#if HOST_OS == OS_WINDOWS - VirtualFree(virt_ram_base, 0, MEM_RELEASE); -#else - munmap(virt_ram_base, 0x20000000); -#endif - virt_ram_base = NULL; - } -#if HOST_OS != OS_WINDOWS - close(fd); -#endif +#define freedefptr(x) \ + if (x) { free(x); x = NULL; } + +void _vmem_release() { + if (virt_ram_base) + vmem_platform_destroy(); + else { + freedefptr(p_sh4rcb); + freedefptr(vram.data); + freedefptr(aica_ram.data); + freedefptr(mem_b.data); } } -#else - -bool _vmem_reserve() -{ - return _vmem_reserve_nonvmem(); -} -void _vmem_release() -{ - _vmem_release_nonvmem(); -} -#endif diff --git a/core/hw/mem/_vmem.h b/core/hw/mem/_vmem.h index 8509fe3ac..f739b0454 100644 --- a/core/hw/mem/_vmem.h +++ b/core/hw/mem/_vmem.h @@ -1,6 +1,33 @@ #pragma once #include "types.h" +enum VMemType { + MemType4GB, + MemType512MB, + MemTypeError +}; + +struct vmem_mapping { + u32 start_address, end_address; + unsigned memoffset, memsize; + bool allow_writes; +}; + +// Platform specific vmemory API +// To initialize (maybe) the vmem subsystem +VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr); +// To reset the on-demand allocated pages. +void vmem_platform_reset_mem(void *ptr, unsigned size_bytes); +// To handle a fault&allocate an ondemand page. +void vmem_platform_ondemand_page(void *address, unsigned size_bytes); +// To create the mappings in the address space. +void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned nummaps); +// Just tries to wipe as much as possible in the relevant area. +void vmem_platform_destroy(); + +// Note: if you want to disable vmem magic in any given platform, implement the +// above functions as empty functions and make vmem_platform_init return MemTypeError. + //Typedef's //ReadMem typedef u8 DYNACALL _vmem_ReadMem8FP(u32 Address); @@ -70,4 +97,4 @@ static inline bool _nvmem_enabled() { return virt_ram_base != 0; } -void _vmem_bm_reset(); \ No newline at end of file +void _vmem_bm_reset(); diff --git a/core/hw/pvr/pvr_mem.h b/core/hw/pvr/pvr_mem.h index 722d37bf6..f2584937d 100644 --- a/core/hw/pvr/pvr_mem.h +++ b/core/hw/pvr/pvr_mem.h @@ -9,7 +9,7 @@ f32 vrf(u32 addr); u32 vri(u32 addr); //vram 32-64b -extern VArray2 vram; +extern VLockedMemory vram; //read u8 DYNACALL pvr_read_area1_8(u32 addr); u16 DYNACALL pvr_read_area1_16(u32 addr); @@ -36,4 +36,4 @@ extern "C" void DYNACALL TAWriteSQ(u32 address,u8* sqb); void YUV_init(); //registers -#define PVR_BASE 0x005F8000 \ No newline at end of file +#define PVR_BASE 0x005F8000 diff --git a/core/hw/sh4/sh4_mem.cpp b/core/hw/sh4/sh4_mem.cpp index b05c2c419..e3a76f08f 100644 --- a/core/hw/sh4/sh4_mem.cpp +++ b/core/hw/sh4/sh4_mem.cpp @@ -17,7 +17,7 @@ //main system mem -VArray2 mem_b; +VLockedMemory mem_b; void _vmem_init(); void _vmem_reset(); diff --git a/core/hw/sh4/sh4_mem.h b/core/hw/sh4/sh4_mem.h index e9a03af47..31982ab3f 100644 --- a/core/hw/sh4/sh4_mem.h +++ b/core/hw/sh4/sh4_mem.h @@ -2,7 +2,7 @@ #include "types.h" //main system mem -extern VArray2 mem_b; +extern VLockedMemory mem_b; #include "hw/mem/_vmem.h" #include "modules/mmu.h" diff --git a/core/linux/common.cpp b/core/linux/common.cpp index 3cbd62a68..50df5f662 100644 --- a/core/linux/common.cpp +++ b/core/linux/common.cpp @@ -34,7 +34,7 @@ #include "hw/sh4/dyna/ngen.h" -#if !defined(TARGET_NO_NVMEM) +#if !defined(TARGET_NO_EXCEPTIONS) bool ngen_Rewrite(unat& addr,unat retadr,unat acc); u32* ngen_readm_fail_v2(u32* ptr,u32* regs,u32 saddr); bool VramLockedWrite(u8* address); @@ -124,79 +124,13 @@ void install_fault_handler(void) sigaction(SIGILL, &act, &segv_oact); #endif } -#else // !defined(TARGET_NO_NVMEM) +#else // !defined(TARGET_NO_EXCEPTIONS) // No exceptions/nvmem dummy handlers. void install_fault_handler(void) {} -#endif // !defined(TARGET_NO_NVMEM) +#endif // !defined(TARGET_NO_EXCEPTIONS) #include -void VArray2::LockRegion(u32 offset,u32 size) -{ - #if !defined(TARGET_NO_NVMEM) - u32 inpage=offset & PAGE_MASK; - u32 rv=mprotect (data+offset-inpage, size+inpage, PROT_READ ); - if (rv!=0) - { - printf("mprotect(%8s,%08X,R) failed: %d | %d\n",data+offset-inpage,size+inpage,rv,errno); - die("mprotect failed ..\n"); - } - - #else - //printf("VA2: LockRegion\n"); - #endif -} - -void print_mem_addr() -{ - FILE *ifp, *ofp; - - char outputFilename[] = "/data/data/com.reicast.emulator/files/mem_alloc.txt"; - - ifp = fopen("/proc/self/maps", "r"); - - if (ifp == NULL) { - fprintf(stderr, "Can't open input file /proc/self/maps!\n"); - exit(1); - } - - ofp = fopen(outputFilename, "w"); - - if (ofp == NULL) { - fprintf(stderr, "Can't open output file %s!\n", - outputFilename); -#if HOST_OS == OS_LINUX - ofp = stderr; -#else - exit(1); -#endif - } - - char line [ 512 ]; - while (fgets(line, sizeof line, ifp) != NULL) { - fprintf(ofp, "%s", line); - } - - fclose(ifp); - if (ofp != stderr) - fclose(ofp); -} - -void VArray2::UnLockRegion(u32 offset,u32 size) -{ - #if !defined(TARGET_NO_NVMEM) - u32 inpage=offset & PAGE_MASK; - u32 rv=mprotect (data+offset-inpage, size+inpage, PROT_READ | PROT_WRITE); - if (rv!=0) - { - print_mem_addr(); - printf("mprotect(%8p,%08X,RW) failed: %d | %d\n",data+offset-inpage,size+inpage,rv,errno); - die("mprotect failed ..\n"); - } - #else - //printf("VA2: UnLockRegion\n"); - #endif -} double os_GetSeconds() { timeval a; diff --git a/core/linux/context.cpp b/core/linux/context.cpp index 9884ad426..7c317c401 100644 --- a/core/linux/context.cpp +++ b/core/linux/context.cpp @@ -8,7 +8,7 @@ #define __USE_GNU 1 #endif - #if !defined(TARGET_NO_NVMEM) + #if !defined(TARGET_NO_EXCEPTIONS) #include #endif #endif @@ -29,7 +29,7 @@ void bicopy(Ta& rei, Tb& seg, bool to_segfault) { void context_segfault(rei_host_context_t* reictx, void* segfault_ctx, bool to_segfault) { -#if !defined(TARGET_NO_NVMEM) +#if !defined(TARGET_NO_EXCEPTIONS) #if HOST_CPU == CPU_ARM #if defined(__FreeBSD__) bicopy(reictx->pc, MCTX(.__gregs[_REG_PC]), to_segfault); diff --git a/core/linux/posix_vmem.cpp b/core/linux/posix_vmem.cpp new file mode 100644 index 000000000..8fcf57614 --- /dev/null +++ b/core/linux/posix_vmem.cpp @@ -0,0 +1,177 @@ + +// Implementation of the vmem related function for POSIX-like platforms. +// There's some minimal amount of platform specific hacks to support +// Android and OSX since they are slightly different in some areas. + +// This implements the VLockedMemory interface, as defined in _vmem.h +// The implementation allows it to be empty (that is, to not lock memory). + +#include +#include +#include +#include +#include +#include + +#include "hw/mem/_vmem.h" +#include "stdclass.h" + +#ifndef MAP_NOSYNC +#define MAP_NOSYNC 0 //missing from linux :/ -- could be the cause of android slowness ? +#endif + +#ifdef _ANDROID + #include + #ifndef ASHMEM_DEVICE + #define ASHMEM_DEVICE "/dev/ashmem" + #undef PAGE_MASK + #define PAGE_MASK (PAGE_SIZE-1) +#else + #define PAGE_SIZE 4096 + #define PAGE_MASK (PAGE_SIZE-1) +#endif + +// Android specific ashmem-device stuff for creating shared memory regions +int ashmem_create_region(const char *name, size_t size) { + int fd = open(ASHMEM_DEVICE, O_RDWR); + if (fd < 0) + return -1; + + if (ioctl(fd, ASHMEM_SET_SIZE, size) < 0) { + close(fd); + return -1; + } + + return fd; +} +#endif // #ifdef _ANDROID + +void VLockedMemory::LockRegion(unsigned offset, unsigned size_bytes) { + size_t inpage = offset & PAGE_MASK; + if (mprotect(&data[offset - inpage], size_bytes + inpage, PROT_READ)) { + die("mprotect failed ..\n"); + } +} + +void VLockedMemory::UnLockRegion(unsigned offset, unsigned size_bytes) { + size_t inpage = offset & PAGE_MASK; + if (mprotect(&data[offset - inpage], size_bytes + inpage, PROT_READ|PROT_WRITE)) { + // Add some way to see why it failed? gdb> info proc mappings + die("mprotect failed ..\n"); + } +} + +// Allocates memory via a fd on shmem/ahmem or even a file on disk +static int allocate_shared_filemem() { + int fd = -1; + #if defined(_ANDROID) + // Use Android's specific shmem stuff. + fd = ashmem_create_region(0, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX); + #else + #if HOST_OS != OS_DARWIN + fd = shm_open("/dcnzorz_mem", O_CREAT | O_EXCL | O_RDWR,S_IREAD | S_IWRITE); + shm_unlink("/dcnzorz_mem"); + #endif + + // if shmem does not work (or using OSX) fallback to a regular file on disk + if (fd < 0) { + string path = get_writable_data_path("/dcnzorz_mem"); + fd = open(path.c_str(), O_CREAT|O_RDWR|O_TRUNC, S_IRWXU|S_IRWXG|S_IRWXO); + unlink(path.c_str()); + } + // If we can't open the file, fallback to slow mem. + if (fd < 0) + return -1; + + // Finally make the file as big as we need! + if (ftruncate(fd, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX)) { + // Can't get as much memory as needed, fallback. + close(fd); + return -1; + } + #endif + + return fd; +} + +// Implement vmem initialization for RAM, ARAM, VRAM and SH4 context, fpcb etc. +// The function supports allocating 512MB or 4GB addr spaces. + +static int shmem_fd = -1; + +// vmem_base_addr points to an address space of 512MB (or 4GB) that can be used for fast memory ops. +// In negative offsets of the pointer (up to FPCB size, usually 65/129MB) the context and jump table +// can be found. If the platform init returns error, the user is responsible for initializing the +// memory using a fallback (that is, regular mallocs and falling back to slow memory JIT). +VMemType vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr) { + // Firt let's try to allocate the shm-backed memory + shmem_fd = allocate_shared_filemem(); + if (shmem_fd < 0) + return MemTypeError; + + // Now try to allocate a contiguous piece of memory. + unsigned memsize = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX + 0x10000; + void *first_ptr = mmap(0, memsize, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (!first_ptr) { + close(shmem_fd); + return MemTypeError; + } + + // Align pointer to 64KB too, some Linaro bug (no idea but let's just be safe I guess). + uintptr_t ptrint = (uintptr_t)first_ptr; + ptrint = (ptrint + 0x10000 - 1) & (~0xffff); + *sh4rcb_addr = (void*)ptrint; + *vmem_base_addr = (void*)(ptrint + sizeof(Sh4RCB)); + void *sh4rcb_base_ptr = (void*)(ptrint + FPCB_SIZE); + + // Now map the memory for the SH4 context, do not include FPCB on purpose (paged on demand). + mprotect(sh4rcb_base_ptr, sizeof(Sh4RCB) - FPCB_SIZE, PROT_READ | PROT_WRITE); + + return MemType512MB; +} + +// Just tries to wipe as much as possible in the relevant area. +void vmem_platform_destroy() { + munmap(virt_ram_base, 0x20000000); +} + +// Resets a chunk of memory by deleting its data and setting its protection back. +void vmem_platform_reset_mem(void *ptr, unsigned size_bytes) { + // Mark them as non accessible. + mprotect(ptr, size_bytes, PROT_NONE); + // Tell the kernel to flush'em all (FIXME: perhaps unmap+mmap 'd be better?) + madvise(ptr, size_bytes, MADV_DONTNEED); + #ifdef MADV_REMOVE + madvise(ptr, size_bytes, MADV_REMOVE); + #endif + madvise(ptr, size_bytes, MADV_FREE); +} + +// Allocates a bunch of memory (page aligned and page-sized) +void vmem_platform_ondemand_page(void *address, unsigned size_bytes) { + verify(!mprotect(address, size_bytes, PROT_READ | PROT_WRITE)); +} + +// Creates mappings to the underlying file including mirroring sections +void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned nummaps) { + for (unsigned i = 0; i < nummaps; i++) { + // Ignore unmapped stuff, it is already reserved as PROT_NONE + if (!vmem_maps[i].memsize) + continue; + + // Calculate the number of mirrors + unsigned address_range_size = vmem_maps[i].end_address - vmem_maps[i].start_address; + unsigned num_mirrors = (address_range_size) / vmem_maps[i].memsize; + int protection = vmem_maps[i].allow_writes ? (PROT_READ | PROT_WRITE) : PROT_READ; + verify((address_range_size % vmem_maps[i].memsize) == 0 && num_mirrors >= 1); + + for (unsigned j = 0; j < num_mirrors; j++) { + unsigned offset = vmem_maps[i].start_address + j * vmem_maps[i].memsize; + verify(!munmap(&virt_ram_base[offset], vmem_maps[i].memsize)); + verify(MAP_FAILED != mmap(&virt_ram_base[offset], vmem_maps[i].memsize, protection, + MAP_SHARED | MAP_NOSYNC | MAP_FIXED, shmem_fd, vmem_maps[i].memoffset)); + // ??? (mprotect(rv,size,prot)!=0) + } + } +} + diff --git a/core/rend/TexCache.cpp b/core/rend/TexCache.cpp index b0480c273..df904ed22 100644 --- a/core/rend/TexCache.cpp +++ b/core/rend/TexCache.cpp @@ -124,8 +124,7 @@ void palette_update() using namespace std; vector VramLocks[VRAM_SIZE/PAGE_SIZE]; -//vram 32-64b -VArray2 vram; +VLockedMemory vram; // vram 32-64b //List functions // @@ -207,7 +206,7 @@ vram_block* libCore_vramlock_Lock(u32 start_offset64,u32 end_offset64,void* user { vramlist_lock.Lock(); - vram.LockRegion(block->start,block->len); + vram.LockRegion(block->start, block->len); //TODO: Fix this for 32M wrap as well if (_nvmem_enabled() && VRAM_SIZE == 0x800000) { diff --git a/core/serialize.cpp b/core/serialize.cpp index 1d7fec35b..47b6eb0ca 100644 --- a/core/serialize.cpp +++ b/core/serialize.cpp @@ -101,7 +101,7 @@ extern AicaTimer timers[3]; //./core/hw/aica/aica_if.o -extern VArray2 aica_ram; +extern VLockedMemory aica_ram; extern u32 VREG;//video reg =P extern u32 ARMRST;//arm reset reg extern u32 rtc_EN; @@ -381,7 +381,7 @@ extern DECL_ALIGN(4) u32 SFaceOffsColor; //extern vector VramLocks[/*VRAM_SIZE*/(16*1024*1024)/PAGE_SIZE]; //maybe - probably not - just a locking mechanism //extern cMutex vramlist_lock; -extern VArray2 vram; +extern VLockedMemory vram; @@ -403,7 +403,7 @@ extern Array SCIF; //SCIF : 10 registers //./core/hw/sh4/sh4_mem.o -extern VArray2 mem_b; +extern VLockedMemory mem_b; //one-time init //extern _vmem_handler area1_32b; //one-time init diff --git a/core/stdclass.h b/core/stdclass.h index f5e87e6cb..04e9142db 100644 --- a/core/stdclass.h +++ b/core/stdclass.h @@ -279,29 +279,38 @@ string get_game_save_prefix(); string get_game_basename(); string get_game_dir(); -class VArray2 -{ + +// Locked memory class, used for texture invalidation purposes. +class VLockedMemory { public: - u8* data; - u32 size; - //void Init(void* data,u32 sz); - //void Term(); - void LockRegion(u32 offset,u32 size); - void UnLockRegion(u32 offset,u32 size); + unsigned size; - void Zero() - { - UnLockRegion(0,size); - memset(data,0,size); + void SetRegion(void* ptr, unsigned size) { + this->data = (u8*)ptr; + this->size = size; + } + void *getPtr() const { return data; } + unsigned getSize() const { return size; } + + #ifdef TARGET_NO_EXCEPTIONS + void LockRegion(unsigned offset, unsigned size_bytes) {} + void UnLockRegion(unsigned offset, unsigned size_bytes) {} + #else + void LockRegion(unsigned offset, unsigned size_bytes); + void UnLockRegion(unsigned offset, unsigned size_bytes); + #endif + + void Zero() { + UnLockRegion(0, size); + memset(data, 0, size); } - INLINE u8& operator [](const u32 i) - { + INLINE u8& operator [](unsigned i) { #ifdef MEM_BOUND_CHECK - if (i>=size) + if (i >= size) { - printf("Error: VArray2 , index out of range (%d>%d)\n",i,size-1); + printf("Error: VLockedMemory , index out of range (%d > %d)\n", i, size-1); MEM_DO_BREAK; } #endif @@ -309,6 +318,7 @@ public: } }; + int msgboxf(const wchar* text,unsigned int type,...); diff --git a/core/windows/win_vmem.cpp b/core/windows/win_vmem.cpp new file mode 100644 index 000000000..b1434b7e2 --- /dev/null +++ b/core/windows/win_vmem.cpp @@ -0,0 +1,96 @@ + +#define _WIN32_WINNT 0x0500 +#include +#include + +// Implementation of the vmem related function for Windows platforms. +// For now this probably does some assumptions on the CPU/platform. + +// This implements the VLockedMemory interface, as defined in _vmem.h +// The implementation allows it to be empty (that is, to not lock memory). + +void VLockedMemory::LockRegion(unsigned offset, unsigned size) { + verify(offset + size < this->size && size != 0); + DWORD old; + VirtualProtect(&data[offset], size, PAGE_READONLY, &old); +} + +void VLockedMemory::UnLockRegion(unsigned offset, unsigned size) { + verify(offset + size <= this->size && size != 0); + DWORD old; + VirtualProtect(&data[offset], size, PAGE_READWRITE, &old); +} + +static HANDLE mem_handle = INVALID_HANDLE_VALUE; +static char * base_alloc = NULL; + +// Implement vmem initialization for RAM, ARAM, VRAM and SH4 context, fpcb etc. +// The function supports allocating 512MB or 4GB addr spaces. + +// Plase read the POSIX implementation for more information. On Windows this is +// rather straightforward. +VMemType vmem_platform_init(void *vmem_base_addr, void *sh4rcb_addr) { + // Firt let's try to allocate the in-memory file + mem_handle = CreateFileMapping(INVALID_HANDLE_VALUE, 0, PAGE_READWRITE, 0, RAM_SIZE_MAX + VRAM_SIZE_MAX + ARAM_SIZE_MAX, 0); + + // Now allocate the actual address space (it will be 64KB aligned on windows). + unsigned memsize = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX; + base_alloc = (char*)VirtualAlloc(0, memsize, MEM_RESERVE, PAGE_NOACCESS); + + // Calculate pointers now + sh4rcb_addr = &base_alloc[0]; + vmem_base_addr = &base_alloc[sizeof(Sh4RCB)]; + + return MemType512MB; +} + +// Just tries to wipe as much as possible in the relevant area. +void vmem_platform_destroy() { + VirtualFree(base_alloc, 0, MEM_RELEASE); + CloseHandle(mem_handle); +} + +// Resets a chunk of memory by deleting its data and setting its protection back. +void vmem_platform_reset_mem(void *ptr, unsigned size_bytes) { + VirtualFree(ptr, size_bytes, MEM_DECOMMIT); +} + +// Allocates a bunch of memory (page aligned and page-sized) +void vmem_platform_ondemand_page(void *address, unsigned size_bytes) { + verify(VirtualAlloc(address, size_bytes, MEM_COMMIT, PAGE_READWRITE)); +} + +/// Creates mappings to the underlying file including mirroring sections +void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned nummaps) { + // Since this is tricky to get right in Windows (in posix one can just unmap sections and remap later) + // we unmap the whole thing only to remap it later. + + // Unmap the whole section + VirtualFree(base_alloc, 0, MEM_RELEASE); + + for (unsigned i = 0; i < nummaps; i++) { + unsigned address_range_size = vmem_maps[i].end_address - vmem_maps[i].start_address; + DWORD protection = vmem_maps[i].allow_writes ? (FILE_MAP_READ | FILE_MAP_WRITE) : FILE_MAP_READ; + + if (!vmem_maps[i].memsize) { + // Unmapped stuff goes with a protected area or memory. Prevent anything from allocating here + void *ptr = VirtualAlloc(&virt_ram_base[vmem_maps[i].start_address], address_range_size, MEM_RESERVE, PAGE_NOACCESS); + verify(ptr == &virt_ram_base[vmem_maps[i].start_address]); + } + else { + // Calculate the number of mirrors + unsigned num_mirrors = (address_range_size) / vmem_maps[i].memsize; + verify((address_range_size % vmem_maps[i].memsize) == 0 && num_mirrors >= 1); + + // Remap the views one by one + for (unsigned j = 0; j < num_mirrors; j++) { + unsigned offset = vmem_maps[i].start_address + j * vmem_maps[i].memsize; + + void *ptr = MapViewOfFileEx(mem_handle, protection, 0, vmem_maps[i].memoffset, + vmem_maps[i].memsize, &virt_ram_base[vmem_maps[i].start_address]); + verify(ptr == &virt_ram_base[vmem_maps[i].start_address]); + } + } + } +} + diff --git a/core/windows/winmain.cpp b/core/windows/winmain.cpp index 029ba1246..44633ab0d 100644 --- a/core/windows/winmain.cpp +++ b/core/windows/winmain.cpp @@ -148,12 +148,10 @@ LONG ExeptionHandler(EXCEPTION_POINTERS *ExceptionInfo) { return EXCEPTION_CONTINUE_EXECUTION; } -#ifndef TARGET_NO_NVMEM else if (BM_LockedWrite(address)) { return EXCEPTION_CONTINUE_EXECUTION; } -#endif #if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_X86 else if ( ngen_Rewrite((unat&)ep->ContextRecord->Eip,*(unat*)ep->ContextRecord->Esp,ep->ContextRecord->Eax) ) { @@ -800,21 +798,5 @@ void os_DoEvents() } } - -void VArray2::LockRegion(u32 offset,u32 size) -{ - //verify(offset+sizesize); - verify(size!=0); - DWORD old; - VirtualProtect(((u8*)data)+offset , size, PAGE_READONLY,&old); -} -void VArray2::UnLockRegion(u32 offset,u32 size) -{ - //verify(offset+size<=this->size); - verify(size!=0); - DWORD old; - VirtualProtect(((u8*)data)+offset , size, PAGE_READWRITE,&old); -} - int get_mic_data(u8* buffer) { return 0; } int push_vmu_screen(u8* buffer) { return 0; } diff --git a/shell/cmake/config.cmake b/shell/cmake/config.cmake index 056b7e74a..d1dfd59e4 100644 --- a/shell/cmake/config.cmake +++ b/shell/cmake/config.cmake @@ -343,7 +343,7 @@ if (TARGET_NSW) # -DCMAKE_TOOLCHAIN_FILE=./cmake/devkitA64.cmake -DTARGET_NSW=ON message("HOST_OS ${HOST_OS}") add_definitions(-D__SWITCH__ -DGLES -DMESA_EGL_NO_X11_HEADERS) - add_definitions(-DTARGET_NO_THREADS -DTARGET_NO_NVMEM -DTARGET_NO_NIXPROF) + add_definitions(-DTARGET_NO_THREADS -DTARGET_NO_EXCEPTIONS -DTARGET_NO_NIXPROF) add_definitions(-DTARGET_NO_COREIO_HTTP -DTARGET_NO_WEBUI -UTARGET_SOFTREND) add_definitions(-D_GLIBCXX_USE_C99_MATH_TR1 -D_LDBL_EQ_DBL) @@ -355,7 +355,7 @@ if (TARGET_PS4) # -DCMAKE_TOOLCHAIN_FILE=./cmake/{ps4sdk,clang_scei}.cmake -DTAR add_definitions(-DPS4 -DTARGET_PS4 -DTARGET_BSD -D__ORBIS__ -DGLES -DMESA_EGL_NO_X11_HEADERS) ## last needed for __unix__ on eglplatform.h - add_definitions(-DTARGET_NO_THREADS -DTARGET_NO_NVMEM -DTARGET_NO_NIXPROF) + add_definitions(-DTARGET_NO_THREADS -DTARGET_NO_EXCEPTIONS -DTARGET_NO_NIXPROF) add_definitions(-DTARGET_NO_COREIO_HTTP -DTARGET_NO_WEBUI -UTARGET_SOFTREND) diff --git a/shell/emscripten/Makefile b/shell/emscripten/Makefile index 1c74e19b6..1b8ef4c22 100644 --- a/shell/emscripten/Makefile +++ b/shell/emscripten/Makefile @@ -26,7 +26,7 @@ LDFLAGS := -Wl,-Map,$(notdir $@).map,--gc-sections -Wl,-O3 -Wl,--sort-common CXXONLYFLAGS := -std=c++11 -CXXFLAGS := -O3 -D GLES -D RELEASE -c -D TARGET_EMSCRIPTEN -D TARGET_NO_REC -D TARGET_NO_NVMEM -D TARGET_NO_WEBUI -D TARGET_NO_THREADS -D TARGET_BOUNDED_EXECUTION -D TARGET_NO_COREIO_HTTP +CXXFLAGS := -O3 -D GLES -D RELEASE -c -D TARGET_EMSCRIPTEN -D TARGET_NO_REC -D TARGET_NO_EXCEPTIONS -D TARGET_NO_WEBUI -D TARGET_NO_THREADS -D TARGET_BOUNDED_EXECUTION -D TARGET_NO_COREIO_HTTP CXXFLAGS += -fno-strict-aliasing CXXFLAGS += -ffast-math diff --git a/shell/nacl/Makefile b/shell/nacl/Makefile index e0d03834e..fec9cf250 100644 --- a/shell/nacl/Makefile +++ b/shell/nacl/Makefile @@ -23,7 +23,7 @@ CFLAGS = -Wno-error -Wno-ignored-attributes CFLAGS += -O3 -fno-strict-aliasing -ffast-math CFLAGS += -I$(RZDCY_SRC_DIR) -I$(RZDCY_SRC_DIR)/deps CFLAGS += -D RELEASE -D TARGET_NO_JIT -D TARGET_NACL32 -DGLES -CFLAGS += -D TARGET_NO_NVMEM -D TARGET_NO_WEBUI -D TARGET_NO_COREIO_HTTP +CFLAGS += -D TARGET_NO_EXCEPTIONS -D TARGET_NO_WEBUI -D TARGET_NO_COREIO_HTTP SOURCES = $(RZDCY_FILES) ../../core/nacl/nacl.cpp