diff --git a/core/hw/mem/_vmem.h b/core/hw/mem/_vmem.h index f739b0454..3213ce3be 100644 --- a/core/hw/mem/_vmem.h +++ b/core/hw/mem/_vmem.h @@ -24,6 +24,9 @@ void vmem_platform_ondemand_page(void *address, unsigned size_bytes); void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned nummaps); // Just tries to wipe as much as possible in the relevant area. void vmem_platform_destroy(); +// Given a block of data in the .text section, prepares it for JIT action. +// both code_area and size are page aligned. +void vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rwx); // Note: if you want to disable vmem magic in any given platform, implement the // above functions as empty functions and make vmem_platform_init return MemTypeError. diff --git a/core/hw/sh4/dyna/driver.cpp b/core/hw/sh4/dyna/driver.cpp index 56e66724e..1a840e693 100644 --- a/core/hw/sh4/dyna/driver.cpp +++ b/core/hw/sh4/dyna/driver.cpp @@ -1,18 +1,12 @@ #include "types.h" -#if HOST_OS==OS_WINDOWS -#include -#elif HOST_OS==OS_LINUX -#include -#include -#endif - #include "../sh4_interpreter.h" #include "../sh4_opcode_list.h" #include "../sh4_core.h" #include "../sh4_if.h" #include "hw/sh4/sh4_interrupts.h" +#include "hw/mem/_vmem.h" #include "hw/sh4/sh4_mem.h" #include "hw/pvr/pvr_mem.h" #include "hw/aica/aica_if.h" @@ -26,9 +20,7 @@ #include "decoder.h" #if FEAT_SHREC != DYNAREC_NONE -//uh uh -#if !defined(_WIN64) u8 SH4_TCB[CODE_SIZE+4096] #if HOST_OS == OS_WINDOWS || FEAT_SHREC != DYNAREC_JIT ; @@ -39,7 +31,6 @@ u8 SH4_TCB[CODE_SIZE+4096] #else #error SH4_TCB ALLOC #endif -#endif u8* CodeCache; @@ -455,55 +446,17 @@ void recSh4_Init() if (_nvmem_enabled()) { verify(mem_b.data==((u8*)p_sh4rcb->sq_buffer+512+0x0C000000)); } - -#if defined(_WIN64) -#ifdef _MSC_VER - for (int i = 10; i < 1300; i++) { + // Prepare some pointer to the pre-allocated code cache: + void *candidate_ptr = (void*)(((unat)SH4_TCB + 4095) & ~4095); - //align to next page .. - u8* ptr = (u8*)recSh4_Init - i * 1024 * 1024; - - CodeCache = (u8*)VirtualAlloc(ptr, CODE_SIZE, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE);//; (u8*)(((unat)SH4_TCB+4095)& ~4095); - - if (CodeCache) - break; - } -#else - CodeCache = (u8*)VirtualAlloc(NULL, CODE_SIZE, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE); -#endif + // Call the platform-specific magic to make the pages RWX + CodeCache = NULL; + vmem_platform_prepare_jit_block(candidate_ptr, CODE_SIZE, (void**)&CodeCache); + // Ensure the pointer returned is non-null verify(CodeCache != NULL); -#else - CodeCache = (u8*)(((unat)SH4_TCB+4095)& ~4095); -#endif -#if HOST_OS == OS_DARWIN - munmap(CodeCache, CODE_SIZE); - CodeCache = (u8*)mmap(CodeCache, CODE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0); -#endif - -#if HOST_OS == OS_WINDOWS - DWORD old; - VirtualProtect(CodeCache,CODE_SIZE,PAGE_EXECUTE_READWRITE,&old); -#elif HOST_OS == OS_LINUX || HOST_OS == OS_DARWIN - - printf("\n\t CodeCache addr: %p | from: %p | addr here: %p\n", CodeCache, CodeCache, recSh4_Init); - - #if FEAT_SHREC == DYNAREC_JIT - if (mprotect(CodeCache, CODE_SIZE, PROT_READ|PROT_WRITE|PROT_EXEC)) - { - perror("\n\tError,Couldn’t mprotect CodeCache!"); - die("Couldn’t mprotect CodeCache"); - } - #endif - -#if TARGET_IPHONE - memset((u8*)mmap(CodeCache, CODE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0),0xFF,CODE_SIZE); -#else - memset(CodeCache,0xFF,CODE_SIZE); -#endif - -#endif + memset(CodeCache, 0xFF, CODE_SIZE); ngen_init(); } @@ -532,4 +485,5 @@ void Get_Sh4Recompiler(sh4_if* rv) rv->IsCpuRunning = recSh4_IsCpuRunning; rv->ResetCache = recSh4_ClearCache; } -#endif + +#endif // FEAT_SHREC != DYNAREC_NONE diff --git a/core/linux/posix_vmem.cpp b/core/linux/posix_vmem.cpp index 3601eef1b..c04bf1e39 100644 --- a/core/linux/posix_vmem.cpp +++ b/core/linux/posix_vmem.cpp @@ -176,3 +176,20 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma } } +// Prepares the code region for JIT operations, thus marking it as RWX +void vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rwx) { + // Try to map is as RWX, this fails apparently on OSX (and perhaps other systems?) + if (mprotect(code_area, size, PROT_READ | PROT_WRITE | PROT_EXEC)) { + // Well it failed, use another approach, unmap the memory area and remap it back. + // Seems it works well on Darwin according to reicast code :P + munmap(code_area, size); + void *ret_ptr = mmap(code_area, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_ANON, 0, 0); + // Ensure it's the area we requested + verify(ret_ptr == code_area); + } + + // Pointer location should be same: + *code_area_rwx = code_area; +} + + diff --git a/core/windows/win_vmem.cpp b/core/windows/win_vmem.cpp index 2e456198c..2ccfe1c12 100644 --- a/core/windows/win_vmem.cpp +++ b/core/windows/win_vmem.cpp @@ -102,3 +102,44 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma } } +// Prepares the code region for JIT operations, thus marking it as RWX +void vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rwx) { + // Several issues on Windows: can't protect arbitrary pages due to (I guess) the way + // kernel tracks mappings, so only stuff that has been allocated with VirtualAlloc can be + // protected (the entire allocation IIUC). + + // Strategy: ignore code_area and allocate a new one. Protect it properly. + // More issues: the area should be "close" to the .text stuff so that code gen works. + // Remember that on x64 we have 4 byte jump/load offset immediates, no issues on x86 :D + + // Take this function addr as reference. + uintptr_t base_addr = reinterpret_cast(&vmem_platform_prepare_jit_block); + + // Probably safe to assume reicast code is <200MB (today seems to be <16MB on every platform I've seen). + for (unsigned i = 0; i < 1800*1024*1024; i += 10*1024*1024) { // Some arbitrary step size. + uintptr_t try_addr_above = base_addr + i; + uintptr_t try_addr_below = base_addr - i; + + // We need to make sure there's no address wrap around the end of the addrspace (meaning: int overflow). + if (try_addr_above > base_addr) { + void *ptr = VirtualAlloc((void*)try_addr_above, size, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE); + if (ptr) { + *code_area_rwx = ptr; + break; + } + } + if (try_addr_below < base_addr) { + void *ptr = *code_area_rwx = VirtualAlloc((void*)try_addr_below, size, MEM_RESERVE | MEM_COMMIT, PAGE_EXECUTE_READWRITE); + if (ptr) { + *code_area_rwx = ptr; + break; + } + } + } + + printf("Found code area at %p, not too far away from %p\n", *code_area_rwx, (void*)base_addr); + + // We should have found some area in the addrspace, after all size is ~tens of megabytes. + // Pages are already RWX, all done +} +