release dynarec mem allocs when terminating, including arm and dsp
Issue #453
This commit is contained in:
parent
c7d214e55a
commit
31debefbe1
|
@ -663,6 +663,7 @@ void Emulator::term()
|
|||
sh4_cpu.Term();
|
||||
custom_texture.Terminate(); // lr: avoid deadlock on exit (win32)
|
||||
reios_term();
|
||||
aicaarm::term();
|
||||
libAICA_Term();
|
||||
pvr::term();
|
||||
mem_Term();
|
||||
|
|
|
@ -100,6 +100,9 @@ void DecodeInst(const u32 *IPtr, Instruction *i)
|
|||
void recInit() {
|
||||
}
|
||||
|
||||
void recTerm() {
|
||||
}
|
||||
|
||||
void recompile() {
|
||||
}
|
||||
#endif
|
||||
|
@ -124,6 +127,7 @@ void writeProg(u32 addr)
|
|||
void term()
|
||||
{
|
||||
state.stopped = true;
|
||||
recTerm();
|
||||
}
|
||||
|
||||
void step()
|
||||
|
|
|
@ -62,6 +62,7 @@ void step();
|
|||
void writeProg(u32 addr);
|
||||
|
||||
void recInit();
|
||||
void recTerm();
|
||||
void runStep();
|
||||
void recompile();
|
||||
|
||||
|
|
|
@ -411,6 +411,10 @@ void recInit()
|
|||
verify(rc);
|
||||
}
|
||||
|
||||
void recTerm()
|
||||
{
|
||||
}
|
||||
|
||||
void runStep()
|
||||
{
|
||||
((void (*)())DynCode)();
|
||||
|
|
|
@ -461,6 +461,22 @@ void recInit()
|
|||
#endif
|
||||
}
|
||||
|
||||
|
||||
void recTerm()
|
||||
{
|
||||
#if defined(TARGET_IPHONE) || defined(TARGET_ARM_MAC)
|
||||
DynCode = nullptr;
|
||||
#endif
|
||||
#ifdef FEAT_NO_RWX_PAGES
|
||||
if (pCodeBuffer != nullptr)
|
||||
vmem_platform_release_jit_block(DynCode, pCodeBuffer, CodeSize);
|
||||
#else
|
||||
if (pCodeBuffer != nullptr && pCodeBuffer != DynCode)
|
||||
vmem_platform_release_jit_block(pCodeBuffer, CodeSize);
|
||||
#endif
|
||||
pCodeBuffer = nullptr;
|
||||
}
|
||||
|
||||
void runStep()
|
||||
{
|
||||
((void (*)())DynCode)();
|
||||
|
|
|
@ -428,6 +428,18 @@ void recInit()
|
|||
die("vmem_platform_prepare_jit_block failed in x64 dsp");
|
||||
}
|
||||
|
||||
void recTerm()
|
||||
{
|
||||
#ifdef FEAT_NO_RWX_PAGES
|
||||
if (pCodeBuffer != nullptr)
|
||||
vmem_platform_release_jit_block(CodeBuffer, pCodeBuffer, CodeBufferSize);
|
||||
#else
|
||||
if (pCodeBuffer != nullptr && pCodeBuffer != CodeBuffer)
|
||||
vmem_platform_release_jit_block(pCodeBuffer, CodeBufferSize);
|
||||
#endif
|
||||
pCodeBuffer = nullptr;
|
||||
}
|
||||
|
||||
void runStep()
|
||||
{
|
||||
((void (*)())&pCodeBuffer[0])();
|
||||
|
|
|
@ -383,6 +383,11 @@ void recInit()
|
|||
die("mprotect failed in x86 dsp");
|
||||
}
|
||||
|
||||
void recTerm()
|
||||
{
|
||||
pCodeBuffer = nullptr;
|
||||
}
|
||||
|
||||
void runStep()
|
||||
{
|
||||
((void (*)())&CodeBuffer[0])();
|
||||
|
|
|
@ -104,6 +104,13 @@ void aicaarm::init()
|
|||
}
|
||||
}
|
||||
|
||||
void aicaarm::term()
|
||||
{
|
||||
#if FEAT_AREC != DYNAREC_NONE
|
||||
recompiler::term();
|
||||
#endif
|
||||
}
|
||||
|
||||
static void CPUSwitchMode(int mode, bool saveState)
|
||||
{
|
||||
CPUUpdateCPSR();
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
namespace aicaarm {
|
||||
|
||||
void init();
|
||||
void term();
|
||||
void reset();
|
||||
void run(u32 samples);
|
||||
void enable(bool enabled);
|
||||
|
|
|
@ -674,8 +674,6 @@ void init()
|
|||
#endif
|
||||
verify(rc);
|
||||
|
||||
icPtr = ICache;
|
||||
|
||||
for (int i = 0; i < 256; i++)
|
||||
{
|
||||
int count = 0;
|
||||
|
@ -685,6 +683,19 @@ void init()
|
|||
|
||||
cpuBitsSet[i] = count;
|
||||
}
|
||||
flush();
|
||||
}
|
||||
|
||||
void term()
|
||||
{
|
||||
#ifdef FEAT_NO_RWX_PAGES
|
||||
if (ICache != nullptr)
|
||||
vmem_platform_release_jit_block(ARM7_TCB, ICache, ICacheSize);
|
||||
#else
|
||||
if (ICache != nullptr && ICache != ARM7_TCB)
|
||||
vmem_platform_release_jit_block(ICache, ICacheSize);
|
||||
#endif
|
||||
ICache = nullptr;
|
||||
}
|
||||
|
||||
template <bool Load, bool Byte>
|
||||
|
|
|
@ -421,6 +421,7 @@ protected:
|
|||
namespace recompiler {
|
||||
|
||||
void init();
|
||||
void term();
|
||||
void flush();
|
||||
void compile();
|
||||
void *getMemOp(bool load, bool byte);
|
||||
|
|
|
@ -20,14 +20,18 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma
|
|||
void vmem_platform_destroy();
|
||||
// Given a block of data in the .text section, prepares it for JIT action.
|
||||
// both code_area and size are page aligned. Returns success.
|
||||
bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rwx);
|
||||
bool vmem_platform_prepare_jit_block(void *code_area, size_t size, void **code_area_rwx);
|
||||
// Same as above but uses two address spaces one with RX and RW protections.
|
||||
// Note: this function doesnt have to be implemented, it's a fallback for the above one.
|
||||
bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rw, ptrdiff_t *rx_offset);
|
||||
bool vmem_platform_prepare_jit_block(void *code_area, size_t size, void **code_area_rw, ptrdiff_t *rx_offset);
|
||||
// This might not need an implementation (ie x86/64 cpus).
|
||||
void vmem_platform_flush_cache(void *icache_start, void *icache_end, void *dcache_start, void *dcache_end);
|
||||
// Change a code buffer permissions from r-x to/from rw-
|
||||
void vmem_platform_jit_set_exec(void* code, size_t size, bool enable);
|
||||
// Release a jit block previously allocated by vmem_platform_prepare_jit_block
|
||||
void vmem_platform_release_jit_block(void *code_area, size_t size);
|
||||
// Release a jit block previously allocated by vmem_platform_prepare_jit_block (with dual RW and RX areas)
|
||||
void vmem_platform_release_jit_block(void *code_area1, void *code_area2, size_t size);
|
||||
|
||||
// Note: if you want to disable vmem magic in any given platform, implement the
|
||||
// above functions as empty functions and make vmem_platform_init return false.
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
#if defined(_WIN32) || FEAT_SHREC != DYNAREC_JIT || defined(TARGET_IPHONE) || defined(TARGET_ARM_MAC)
|
||||
static u8 *SH4_TCB;
|
||||
#else
|
||||
static u8 SH4_TCB[CODE_SIZE + TEMP_CODE_SIZE + 4096]
|
||||
alignas(4096) static u8 SH4_TCB[CODE_SIZE + TEMP_CODE_SIZE]
|
||||
#if defined(__unix__) || defined(__SWITCH__)
|
||||
__attribute__((section(".text")));
|
||||
#elif defined(__APPLE__)
|
||||
|
@ -394,24 +394,20 @@ static void recSh4_Init()
|
|||
Get_Sh4Interpreter(&sh4Interp);
|
||||
sh4Interp.Init();
|
||||
bm_Init();
|
||||
|
||||
|
||||
if (_nvmem_enabled())
|
||||
verify(mem_b.data == ((u8*)p_sh4rcb->sq_buffer + 512 + 0x0C000000));
|
||||
|
||||
// Prepare some pointer to the pre-allocated code cache:
|
||||
void *candidate_ptr = (void*)(((unat)SH4_TCB + 4095) & ~4095);
|
||||
|
||||
// Call the platform-specific magic to make the pages RWX
|
||||
CodeCache = NULL;
|
||||
CodeCache = nullptr;
|
||||
#ifdef FEAT_NO_RWX_PAGES
|
||||
bool rc = vmem_platform_prepare_jit_block(candidate_ptr, CODE_SIZE + TEMP_CODE_SIZE, (void**)&CodeCache, &cc_rx_offset);
|
||||
bool rc = vmem_platform_prepare_jit_block(SH4_TCB, CODE_SIZE + TEMP_CODE_SIZE, (void**)&CodeCache, &cc_rx_offset);
|
||||
#else
|
||||
bool rc = vmem_platform_prepare_jit_block(candidate_ptr, CODE_SIZE + TEMP_CODE_SIZE, (void**)&CodeCache);
|
||||
bool rc = vmem_platform_prepare_jit_block(SH4_TCB, CODE_SIZE + TEMP_CODE_SIZE, (void**)&CodeCache);
|
||||
#endif
|
||||
verify(rc);
|
||||
// Ensure the pointer returned is non-null
|
||||
verify(CodeCache != NULL);
|
||||
verify(CodeCache != nullptr);
|
||||
|
||||
TempCodeCache = CodeCache + CODE_SIZE;
|
||||
ngen_init();
|
||||
|
@ -421,6 +417,15 @@ static void recSh4_Init()
|
|||
static void recSh4_Term()
|
||||
{
|
||||
INFO_LOG(DYNAREC, "recSh4 Term");
|
||||
#ifdef FEAT_NO_RWX_PAGES
|
||||
if (CodeCache != nullptr)
|
||||
vmem_platform_release_jit_block(CodeCache, (u8 *)CodeCache + cc_rx_offset, CODE_SIZE + TEMP_CODE_SIZE);
|
||||
#else
|
||||
if (CodeCache != nullptr && CodeCache != SH4_TCB)
|
||||
vmem_platform_release_jit_block(CodeCache, CODE_SIZE + TEMP_CODE_SIZE);
|
||||
#endif
|
||||
CodeCache = nullptr;
|
||||
TempCodeCache = nullptr;
|
||||
bm_Term();
|
||||
sh4Interp.Term();
|
||||
}
|
||||
|
|
|
@ -150,7 +150,6 @@ static int allocate_shared_filemem(unsigned size) {
|
|||
// Implement vmem initialization for RAM, ARAM, VRAM and SH4 context, fpcb etc.
|
||||
|
||||
int vmem_fd = -1;
|
||||
static int shmem_fd2 = -1;
|
||||
static void *reserved_base;
|
||||
static size_t reserved_size;
|
||||
|
||||
|
@ -242,7 +241,7 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma
|
|||
}
|
||||
|
||||
// Prepares the code region for JIT operations, thus marking it as RWX
|
||||
bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rwx)
|
||||
bool vmem_platform_prepare_jit_block(void *code_area, size_t size, void **code_area_rwx)
|
||||
{
|
||||
// Try to map is as RWX, this fails apparently on OSX (and perhaps other systems?)
|
||||
if (code_area != nullptr && mem_region_set_exec(code_area, size))
|
||||
|
@ -278,10 +277,16 @@ bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code
|
|||
return true;
|
||||
}
|
||||
|
||||
void vmem_platform_release_jit_block(void *code_area, size_t size)
|
||||
{
|
||||
munmap(code_area, size);
|
||||
}
|
||||
|
||||
// Use two addr spaces: need to remap something twice, therefore use allocate_shared_filemem()
|
||||
bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rw, ptrdiff_t *rx_offset) {
|
||||
shmem_fd2 = allocate_shared_filemem(size);
|
||||
if (shmem_fd2 < 0)
|
||||
bool vmem_platform_prepare_jit_block(void *code_area, size_t size, void **code_area_rw, ptrdiff_t *rx_offset)
|
||||
{
|
||||
int fd = allocate_shared_filemem(size);
|
||||
if (fd < 0)
|
||||
return false;
|
||||
|
||||
// Need to unmap the section we are about to use (it might be already unmapped but nevertheless...)
|
||||
|
@ -289,20 +294,31 @@ bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code
|
|||
|
||||
// Map the RX bits on the code_area, for proximity, as usual.
|
||||
void *ptr_rx = mmap(code_area, size, PROT_READ | PROT_EXEC,
|
||||
MAP_SHARED | MAP_NOSYNC | MAP_FIXED, shmem_fd2, 0);
|
||||
MAP_SHARED | MAP_NOSYNC | MAP_FIXED, fd, 0);
|
||||
if (ptr_rx != code_area)
|
||||
{
|
||||
close(fd);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Now remap the same memory as RW in some location we don't really care at all.
|
||||
void *ptr_rw = mmap(NULL, size, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_NOSYNC, shmem_fd2, 0);
|
||||
MAP_SHARED | MAP_NOSYNC, fd, 0);
|
||||
|
||||
*code_area_rw = ptr_rw;
|
||||
*rx_offset = (char*)ptr_rx - (char*)ptr_rw;
|
||||
close(fd);
|
||||
INFO_LOG(DYNAREC, "Info: Using NO_RWX mode, rx ptr: %p, rw ptr: %p, offset: %ld", ptr_rx, ptr_rw, (long)*rx_offset);
|
||||
|
||||
return (ptr_rw != MAP_FAILED);
|
||||
}
|
||||
|
||||
void vmem_platform_release_jit_block(void *code_area1, void *code_area2, size_t size)
|
||||
{
|
||||
// keep code_area1 (RX) mapped since it's statically allocated
|
||||
munmap(code_area2, size);
|
||||
}
|
||||
|
||||
#endif // !__SWITCH__
|
||||
|
||||
void vmem_platform_jit_set_exec(void* code, size_t size, bool enable) {
|
||||
|
|
|
@ -57,7 +57,7 @@ bool vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ramSiz
|
|||
mapped_regions.reserve(32);
|
||||
|
||||
// First let's try to allocate the in-memory file
|
||||
mem_handle = CreateFileMapping(INVALID_HANDLE_VALUE, 0, PAGE_READWRITE, 0, ramSize, 0);
|
||||
mem_handle = CreateFileMapping(INVALID_HANDLE_VALUE, 0, PAGE_READWRITE, 0, (DWORD)ramSize, 0);
|
||||
|
||||
// Now allocate the actual address space (it will be 64KB aligned on windows).
|
||||
unsigned memsize = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX;
|
||||
|
@ -114,7 +114,7 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma
|
|||
unmapped_regions.clear();
|
||||
|
||||
for (unsigned i = 0; i < nummaps; i++) {
|
||||
unsigned address_range_size = vmem_maps[i].end_address - vmem_maps[i].start_address;
|
||||
size_t address_range_size = vmem_maps[i].end_address - vmem_maps[i].start_address;
|
||||
DWORD protection = vmem_maps[i].allow_writes ? (FILE_MAP_READ | FILE_MAP_WRITE) : FILE_MAP_READ;
|
||||
|
||||
if (!vmem_maps[i].memsize) {
|
||||
|
@ -125,14 +125,14 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma
|
|||
}
|
||||
else {
|
||||
// Calculate the number of mirrors
|
||||
unsigned num_mirrors = (address_range_size) / vmem_maps[i].memsize;
|
||||
unsigned num_mirrors = (unsigned)(address_range_size / vmem_maps[i].memsize);
|
||||
verify((address_range_size % vmem_maps[i].memsize) == 0 && num_mirrors >= 1);
|
||||
|
||||
// Remap the views one by one
|
||||
for (unsigned j = 0; j < num_mirrors; j++) {
|
||||
unsigned offset = vmem_maps[i].start_address + j * vmem_maps[i].memsize;
|
||||
size_t offset = vmem_maps[i].start_address + j * vmem_maps[i].memsize;
|
||||
|
||||
void *ptr = MapViewOfFileEx(mem_handle, protection, 0, vmem_maps[i].memoffset,
|
||||
void *ptr = MapViewOfFileEx(mem_handle, protection, 0, (DWORD)vmem_maps[i].memoffset,
|
||||
vmem_maps[i].memsize, &virt_ram_base[offset]);
|
||||
verify(ptr == &virt_ram_base[offset]);
|
||||
mapped_regions.push_back(ptr);
|
||||
|
@ -142,15 +142,14 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma
|
|||
#endif
|
||||
}
|
||||
|
||||
typedef void* (*mapper_fn) (void *addr, unsigned size);
|
||||
|
||||
// This is a templated function since it's used twice
|
||||
static void* vmem_platform_prepare_jit_block_template(void *code_area, unsigned size, mapper_fn mapper) {
|
||||
template<typename Mapper>
|
||||
static void *vmem_platform_prepare_jit_block_template(size_t size, Mapper mapper)
|
||||
{
|
||||
// Several issues on Windows: can't protect arbitrary pages due to (I guess) the way
|
||||
// kernel tracks mappings, so only stuff that has been allocated with VirtualAlloc can be
|
||||
// protected (the entire allocation IIUC).
|
||||
|
||||
// Strategy: ignore code_area and allocate a new one. Protect it properly.
|
||||
// Strategy: Allocate a new region. Protect it properly.
|
||||
// More issues: the area should be "close" to the .text stuff so that code gen works.
|
||||
// Remember that on x64 we have 4 byte jump/load offset immediates, no issues on x86 :D
|
||||
|
||||
|
@ -177,7 +176,7 @@ static void* vmem_platform_prepare_jit_block_template(void *code_area, unsigned
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static void* mem_alloc(void *addr, unsigned size)
|
||||
static void* mem_alloc(void *addr, size_t size)
|
||||
{
|
||||
#ifdef TARGET_UWP
|
||||
// rwx is not allowed. Need to switch between r-x and rw-
|
||||
|
@ -188,9 +187,10 @@ static void* mem_alloc(void *addr, unsigned size)
|
|||
}
|
||||
|
||||
// Prepares the code region for JIT operations, thus marking it as RWX
|
||||
bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rwx) {
|
||||
bool vmem_platform_prepare_jit_block(void *, size_t size, void **code_area_rwx)
|
||||
{
|
||||
// Get the RWX page close to the code_area
|
||||
void *ptr = vmem_platform_prepare_jit_block_template(code_area, size, &mem_alloc);
|
||||
void *ptr = vmem_platform_prepare_jit_block_template(size, mem_alloc);
|
||||
if (!ptr)
|
||||
return false;
|
||||
|
||||
|
@ -202,8 +202,12 @@ bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code
|
|||
return true;
|
||||
}
|
||||
|
||||
void vmem_platform_release_jit_block(void *code_area, size_t)
|
||||
{
|
||||
VirtualFree(code_area, 0, MEM_RELEASE);
|
||||
}
|
||||
|
||||
static void* mem_file_map(void *addr, unsigned size)
|
||||
static void* mem_file_map(void *addr, size_t size)
|
||||
{
|
||||
// Maps the entire file at the specified addr.
|
||||
void *ptr = VirtualAlloc(addr, size, MEM_RESERVE, PAGE_NOACCESS);
|
||||
|
@ -221,12 +225,12 @@ static void* mem_file_map(void *addr, unsigned size)
|
|||
}
|
||||
|
||||
// Use two addr spaces: need to remap something twice, therefore use CreateFileMapping()
|
||||
bool vmem_platform_prepare_jit_block(void* code_area, unsigned size, void** code_area_rw, ptrdiff_t* rx_offset)
|
||||
bool vmem_platform_prepare_jit_block(void *, size_t size, void** code_area_rw, ptrdiff_t* rx_offset)
|
||||
{
|
||||
mem_handle2 = CreateFileMapping(INVALID_HANDLE_VALUE, 0, PAGE_EXECUTE_READWRITE, 0, size, 0);
|
||||
mem_handle2 = CreateFileMapping(INVALID_HANDLE_VALUE, 0, PAGE_EXECUTE_READWRITE, 0, (DWORD)size, 0);
|
||||
|
||||
// Get the RX page close to the code_area
|
||||
void* ptr_rx = vmem_platform_prepare_jit_block_template(code_area, size, &mem_file_map);
|
||||
void* ptr_rx = vmem_platform_prepare_jit_block_template(size, mem_file_map);
|
||||
if (!ptr_rx)
|
||||
return false;
|
||||
|
||||
|
@ -244,6 +248,17 @@ bool vmem_platform_prepare_jit_block(void* code_area, unsigned size, void** code
|
|||
return (ptr_rw != NULL);
|
||||
}
|
||||
|
||||
void vmem_platform_release_jit_block(void *code_area1, void *code_area2, size_t)
|
||||
{
|
||||
UnmapViewOfFile(code_area1);
|
||||
UnmapViewOfFile(code_area2);
|
||||
// FIXME the same handle is used for all allocations, and thus leaks.
|
||||
// And the last opened handle is closed multiple times.
|
||||
// But windows doesn't need separate RW and RX areas except perhaps UWP
|
||||
// instead of switching back and forth between RX and RW
|
||||
CloseHandle(mem_handle2);
|
||||
}
|
||||
|
||||
void vmem_platform_jit_set_exec(void* code, size_t size, bool enable)
|
||||
{
|
||||
#ifdef TARGET_UWP
|
||||
|
|
Loading…
Reference in New Issue