release dynarec mem allocs when terminating, including arm and dsp

Issue #453
This commit is contained in:
Flyinghead 2023-01-24 09:21:46 +01:00
parent c7d214e55a
commit 31debefbe1
15 changed files with 140 additions and 37 deletions

View File

@ -663,6 +663,7 @@ void Emulator::term()
sh4_cpu.Term();
custom_texture.Terminate(); // lr: avoid deadlock on exit (win32)
reios_term();
aicaarm::term();
libAICA_Term();
pvr::term();
mem_Term();

View File

@ -100,6 +100,9 @@ void DecodeInst(const u32 *IPtr, Instruction *i)
void recInit() {
}
void recTerm() {
}
void recompile() {
}
#endif
@ -124,6 +127,7 @@ void writeProg(u32 addr)
void term()
{
state.stopped = true;
recTerm();
}
void step()

View File

@ -62,6 +62,7 @@ void step();
void writeProg(u32 addr);
void recInit();
void recTerm();
void runStep();
void recompile();

View File

@ -411,6 +411,10 @@ void recInit()
verify(rc);
}
void recTerm()
{
}
void runStep()
{
((void (*)())DynCode)();

View File

@ -461,6 +461,22 @@ void recInit()
#endif
}
void recTerm()
{
#if defined(TARGET_IPHONE) || defined(TARGET_ARM_MAC)
DynCode = nullptr;
#endif
#ifdef FEAT_NO_RWX_PAGES
if (pCodeBuffer != nullptr)
vmem_platform_release_jit_block(DynCode, pCodeBuffer, CodeSize);
#else
if (pCodeBuffer != nullptr && pCodeBuffer != DynCode)
vmem_platform_release_jit_block(pCodeBuffer, CodeSize);
#endif
pCodeBuffer = nullptr;
}
void runStep()
{
((void (*)())DynCode)();

View File

@ -428,6 +428,18 @@ void recInit()
die("vmem_platform_prepare_jit_block failed in x64 dsp");
}
void recTerm()
{
#ifdef FEAT_NO_RWX_PAGES
if (pCodeBuffer != nullptr)
vmem_platform_release_jit_block(CodeBuffer, pCodeBuffer, CodeBufferSize);
#else
if (pCodeBuffer != nullptr && pCodeBuffer != CodeBuffer)
vmem_platform_release_jit_block(pCodeBuffer, CodeBufferSize);
#endif
pCodeBuffer = nullptr;
}
void runStep()
{
((void (*)())&pCodeBuffer[0])();

View File

@ -383,6 +383,11 @@ void recInit()
die("mprotect failed in x86 dsp");
}
void recTerm()
{
pCodeBuffer = nullptr;
}
void runStep()
{
((void (*)())&CodeBuffer[0])();

View File

@ -104,6 +104,13 @@ void aicaarm::init()
}
}
void aicaarm::term()
{
#if FEAT_AREC != DYNAREC_NONE
recompiler::term();
#endif
}
static void CPUSwitchMode(int mode, bool saveState)
{
CPUUpdateCPSR();

View File

@ -4,6 +4,7 @@
namespace aicaarm {
void init();
void term();
void reset();
void run(u32 samples);
void enable(bool enabled);

View File

@ -674,8 +674,6 @@ void init()
#endif
verify(rc);
icPtr = ICache;
for (int i = 0; i < 256; i++)
{
int count = 0;
@ -685,6 +683,19 @@ void init()
cpuBitsSet[i] = count;
}
flush();
}
void term()
{
#ifdef FEAT_NO_RWX_PAGES
if (ICache != nullptr)
vmem_platform_release_jit_block(ARM7_TCB, ICache, ICacheSize);
#else
if (ICache != nullptr && ICache != ARM7_TCB)
vmem_platform_release_jit_block(ICache, ICacheSize);
#endif
ICache = nullptr;
}
template <bool Load, bool Byte>

View File

@ -421,6 +421,7 @@ protected:
namespace recompiler {
void init();
void term();
void flush();
void compile();
void *getMemOp(bool load, bool byte);

View File

@ -20,14 +20,18 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma
void vmem_platform_destroy();
// Given a block of data in the .text section, prepares it for JIT action.
// both code_area and size are page aligned. Returns success.
bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rwx);
bool vmem_platform_prepare_jit_block(void *code_area, size_t size, void **code_area_rwx);
// Same as above but uses two address spaces one with RX and RW protections.
// Note: this function doesnt have to be implemented, it's a fallback for the above one.
bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rw, ptrdiff_t *rx_offset);
bool vmem_platform_prepare_jit_block(void *code_area, size_t size, void **code_area_rw, ptrdiff_t *rx_offset);
// This might not need an implementation (ie x86/64 cpus).
void vmem_platform_flush_cache(void *icache_start, void *icache_end, void *dcache_start, void *dcache_end);
// Change a code buffer permissions from r-x to/from rw-
void vmem_platform_jit_set_exec(void* code, size_t size, bool enable);
// Release a jit block previously allocated by vmem_platform_prepare_jit_block
void vmem_platform_release_jit_block(void *code_area, size_t size);
// Release a jit block previously allocated by vmem_platform_prepare_jit_block (with dual RW and RX areas)
void vmem_platform_release_jit_block(void *code_area1, void *code_area2, size_t size);
// Note: if you want to disable vmem magic in any given platform, implement the
// above functions as empty functions and make vmem_platform_init return false.

View File

@ -23,7 +23,7 @@
#if defined(_WIN32) || FEAT_SHREC != DYNAREC_JIT || defined(TARGET_IPHONE) || defined(TARGET_ARM_MAC)
static u8 *SH4_TCB;
#else
static u8 SH4_TCB[CODE_SIZE + TEMP_CODE_SIZE + 4096]
alignas(4096) static u8 SH4_TCB[CODE_SIZE + TEMP_CODE_SIZE]
#if defined(__unix__) || defined(__SWITCH__)
__attribute__((section(".text")));
#elif defined(__APPLE__)
@ -394,24 +394,20 @@ static void recSh4_Init()
Get_Sh4Interpreter(&sh4Interp);
sh4Interp.Init();
bm_Init();
if (_nvmem_enabled())
verify(mem_b.data == ((u8*)p_sh4rcb->sq_buffer + 512 + 0x0C000000));
// Prepare some pointer to the pre-allocated code cache:
void *candidate_ptr = (void*)(((unat)SH4_TCB + 4095) & ~4095);
// Call the platform-specific magic to make the pages RWX
CodeCache = NULL;
CodeCache = nullptr;
#ifdef FEAT_NO_RWX_PAGES
bool rc = vmem_platform_prepare_jit_block(candidate_ptr, CODE_SIZE + TEMP_CODE_SIZE, (void**)&CodeCache, &cc_rx_offset);
bool rc = vmem_platform_prepare_jit_block(SH4_TCB, CODE_SIZE + TEMP_CODE_SIZE, (void**)&CodeCache, &cc_rx_offset);
#else
bool rc = vmem_platform_prepare_jit_block(candidate_ptr, CODE_SIZE + TEMP_CODE_SIZE, (void**)&CodeCache);
bool rc = vmem_platform_prepare_jit_block(SH4_TCB, CODE_SIZE + TEMP_CODE_SIZE, (void**)&CodeCache);
#endif
verify(rc);
// Ensure the pointer returned is non-null
verify(CodeCache != NULL);
verify(CodeCache != nullptr);
TempCodeCache = CodeCache + CODE_SIZE;
ngen_init();
@ -421,6 +417,15 @@ static void recSh4_Init()
static void recSh4_Term()
{
INFO_LOG(DYNAREC, "recSh4 Term");
#ifdef FEAT_NO_RWX_PAGES
if (CodeCache != nullptr)
vmem_platform_release_jit_block(CodeCache, (u8 *)CodeCache + cc_rx_offset, CODE_SIZE + TEMP_CODE_SIZE);
#else
if (CodeCache != nullptr && CodeCache != SH4_TCB)
vmem_platform_release_jit_block(CodeCache, CODE_SIZE + TEMP_CODE_SIZE);
#endif
CodeCache = nullptr;
TempCodeCache = nullptr;
bm_Term();
sh4Interp.Term();
}

View File

@ -150,7 +150,6 @@ static int allocate_shared_filemem(unsigned size) {
// Implement vmem initialization for RAM, ARAM, VRAM and SH4 context, fpcb etc.
int vmem_fd = -1;
static int shmem_fd2 = -1;
static void *reserved_base;
static size_t reserved_size;
@ -242,7 +241,7 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma
}
// Prepares the code region for JIT operations, thus marking it as RWX
bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rwx)
bool vmem_platform_prepare_jit_block(void *code_area, size_t size, void **code_area_rwx)
{
// Try to map is as RWX, this fails apparently on OSX (and perhaps other systems?)
if (code_area != nullptr && mem_region_set_exec(code_area, size))
@ -278,10 +277,16 @@ bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code
return true;
}
void vmem_platform_release_jit_block(void *code_area, size_t size)
{
munmap(code_area, size);
}
// Use two addr spaces: need to remap something twice, therefore use allocate_shared_filemem()
bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rw, ptrdiff_t *rx_offset) {
shmem_fd2 = allocate_shared_filemem(size);
if (shmem_fd2 < 0)
bool vmem_platform_prepare_jit_block(void *code_area, size_t size, void **code_area_rw, ptrdiff_t *rx_offset)
{
int fd = allocate_shared_filemem(size);
if (fd < 0)
return false;
// Need to unmap the section we are about to use (it might be already unmapped but nevertheless...)
@ -289,20 +294,31 @@ bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code
// Map the RX bits on the code_area, for proximity, as usual.
void *ptr_rx = mmap(code_area, size, PROT_READ | PROT_EXEC,
MAP_SHARED | MAP_NOSYNC | MAP_FIXED, shmem_fd2, 0);
MAP_SHARED | MAP_NOSYNC | MAP_FIXED, fd, 0);
if (ptr_rx != code_area)
{
close(fd);
return false;
}
// Now remap the same memory as RW in some location we don't really care at all.
void *ptr_rw = mmap(NULL, size, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_NOSYNC, shmem_fd2, 0);
MAP_SHARED | MAP_NOSYNC, fd, 0);
*code_area_rw = ptr_rw;
*rx_offset = (char*)ptr_rx - (char*)ptr_rw;
close(fd);
INFO_LOG(DYNAREC, "Info: Using NO_RWX mode, rx ptr: %p, rw ptr: %p, offset: %ld", ptr_rx, ptr_rw, (long)*rx_offset);
return (ptr_rw != MAP_FAILED);
}
void vmem_platform_release_jit_block(void *code_area1, void *code_area2, size_t size)
{
// keep code_area1 (RX) mapped since it's statically allocated
munmap(code_area2, size);
}
#endif // !__SWITCH__
void vmem_platform_jit_set_exec(void* code, size_t size, bool enable) {

View File

@ -57,7 +57,7 @@ bool vmem_platform_init(void **vmem_base_addr, void **sh4rcb_addr, size_t ramSiz
mapped_regions.reserve(32);
// First let's try to allocate the in-memory file
mem_handle = CreateFileMapping(INVALID_HANDLE_VALUE, 0, PAGE_READWRITE, 0, ramSize, 0);
mem_handle = CreateFileMapping(INVALID_HANDLE_VALUE, 0, PAGE_READWRITE, 0, (DWORD)ramSize, 0);
// Now allocate the actual address space (it will be 64KB aligned on windows).
unsigned memsize = 512*1024*1024 + sizeof(Sh4RCB) + ARAM_SIZE_MAX;
@ -114,7 +114,7 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma
unmapped_regions.clear();
for (unsigned i = 0; i < nummaps; i++) {
unsigned address_range_size = vmem_maps[i].end_address - vmem_maps[i].start_address;
size_t address_range_size = vmem_maps[i].end_address - vmem_maps[i].start_address;
DWORD protection = vmem_maps[i].allow_writes ? (FILE_MAP_READ | FILE_MAP_WRITE) : FILE_MAP_READ;
if (!vmem_maps[i].memsize) {
@ -125,14 +125,14 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma
}
else {
// Calculate the number of mirrors
unsigned num_mirrors = (address_range_size) / vmem_maps[i].memsize;
unsigned num_mirrors = (unsigned)(address_range_size / vmem_maps[i].memsize);
verify((address_range_size % vmem_maps[i].memsize) == 0 && num_mirrors >= 1);
// Remap the views one by one
for (unsigned j = 0; j < num_mirrors; j++) {
unsigned offset = vmem_maps[i].start_address + j * vmem_maps[i].memsize;
size_t offset = vmem_maps[i].start_address + j * vmem_maps[i].memsize;
void *ptr = MapViewOfFileEx(mem_handle, protection, 0, vmem_maps[i].memoffset,
void *ptr = MapViewOfFileEx(mem_handle, protection, 0, (DWORD)vmem_maps[i].memoffset,
vmem_maps[i].memsize, &virt_ram_base[offset]);
verify(ptr == &virt_ram_base[offset]);
mapped_regions.push_back(ptr);
@ -142,15 +142,14 @@ void vmem_platform_create_mappings(const vmem_mapping *vmem_maps, unsigned numma
#endif
}
typedef void* (*mapper_fn) (void *addr, unsigned size);
// This is a templated function since it's used twice
static void* vmem_platform_prepare_jit_block_template(void *code_area, unsigned size, mapper_fn mapper) {
template<typename Mapper>
static void *vmem_platform_prepare_jit_block_template(size_t size, Mapper mapper)
{
// Several issues on Windows: can't protect arbitrary pages due to (I guess) the way
// kernel tracks mappings, so only stuff that has been allocated with VirtualAlloc can be
// protected (the entire allocation IIUC).
// Strategy: ignore code_area and allocate a new one. Protect it properly.
// Strategy: Allocate a new region. Protect it properly.
// More issues: the area should be "close" to the .text stuff so that code gen works.
// Remember that on x64 we have 4 byte jump/load offset immediates, no issues on x86 :D
@ -177,7 +176,7 @@ static void* vmem_platform_prepare_jit_block_template(void *code_area, unsigned
return NULL;
}
static void* mem_alloc(void *addr, unsigned size)
static void* mem_alloc(void *addr, size_t size)
{
#ifdef TARGET_UWP
// rwx is not allowed. Need to switch between r-x and rw-
@ -188,9 +187,10 @@ static void* mem_alloc(void *addr, unsigned size)
}
// Prepares the code region for JIT operations, thus marking it as RWX
bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code_area_rwx) {
bool vmem_platform_prepare_jit_block(void *, size_t size, void **code_area_rwx)
{
// Get the RWX page close to the code_area
void *ptr = vmem_platform_prepare_jit_block_template(code_area, size, &mem_alloc);
void *ptr = vmem_platform_prepare_jit_block_template(size, mem_alloc);
if (!ptr)
return false;
@ -202,8 +202,12 @@ bool vmem_platform_prepare_jit_block(void *code_area, unsigned size, void **code
return true;
}
void vmem_platform_release_jit_block(void *code_area, size_t)
{
VirtualFree(code_area, 0, MEM_RELEASE);
}
static void* mem_file_map(void *addr, unsigned size)
static void* mem_file_map(void *addr, size_t size)
{
// Maps the entire file at the specified addr.
void *ptr = VirtualAlloc(addr, size, MEM_RESERVE, PAGE_NOACCESS);
@ -221,12 +225,12 @@ static void* mem_file_map(void *addr, unsigned size)
}
// Use two addr spaces: need to remap something twice, therefore use CreateFileMapping()
bool vmem_platform_prepare_jit_block(void* code_area, unsigned size, void** code_area_rw, ptrdiff_t* rx_offset)
bool vmem_platform_prepare_jit_block(void *, size_t size, void** code_area_rw, ptrdiff_t* rx_offset)
{
mem_handle2 = CreateFileMapping(INVALID_HANDLE_VALUE, 0, PAGE_EXECUTE_READWRITE, 0, size, 0);
mem_handle2 = CreateFileMapping(INVALID_HANDLE_VALUE, 0, PAGE_EXECUTE_READWRITE, 0, (DWORD)size, 0);
// Get the RX page close to the code_area
void* ptr_rx = vmem_platform_prepare_jit_block_template(code_area, size, &mem_file_map);
void* ptr_rx = vmem_platform_prepare_jit_block_template(size, mem_file_map);
if (!ptr_rx)
return false;
@ -244,6 +248,17 @@ bool vmem_platform_prepare_jit_block(void* code_area, unsigned size, void** code
return (ptr_rw != NULL);
}
void vmem_platform_release_jit_block(void *code_area1, void *code_area2, size_t)
{
UnmapViewOfFile(code_area1);
UnmapViewOfFile(code_area2);
// FIXME the same handle is used for all allocations, and thus leaks.
// And the last opened handle is closed multiple times.
// But windows doesn't need separate RW and RX areas except perhaps UWP
// instead of switching back and forth between RX and RW
CloseHandle(mem_handle2);
}
void vmem_platform_jit_set_exec(void* code, size_t size, bool enable)
{
#ifdef TARGET_UWP