Multiinstance jit (#2201)

* works on Linux x64
still needs to be fixed for everything else

* use lots of PROT_NONE memory to reliably reserve virtual address space

* multi instance fastmem on Linux

* Windows

* blarg

* disable fastmem if the page size is not 4kb

* fix fast mem dialog option

* make aarch64 work as well

* fastmem 16kb pages support
This commit is contained in:
Kemal Afzal 2024-11-18 20:43:05 +01:00 committed by GitHub
parent cb7af652f5
commit 99ce959913
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 573 additions and 265 deletions

View File

@ -30,6 +30,7 @@
#include "ARMJIT_Internal.h" #include "ARMJIT_Internal.h"
#include "ARMJIT_Memory.h" #include "ARMJIT_Memory.h"
#include "ARMJIT_Compiler.h" #include "ARMJIT_Compiler.h"
#include "ARMJIT_Global.h"
#include "ARMInterpreter_ALU.h" #include "ARMInterpreter_ALU.h"
#include "ARMInterpreter_LoadStore.h" #include "ARMInterpreter_LoadStore.h"
@ -467,6 +468,16 @@ InterpreterFunc InterpretTHUMB[ARMInstrInfo::tk_Count] =
}; };
#undef F #undef F
ARMJIT::ARMJIT(melonDS::NDS& nds, std::optional<JITArgs> jit) noexcept :
NDS(nds),
Memory(nds),
JITCompiler(nds),
MaxBlockSize(jit.has_value() ? std::clamp(jit->MaxBlockSize, 1u, 32u) : 32),
LiteralOptimizations(jit.has_value() ? jit->LiteralOptimizations : false),
BranchOptimizations(jit.has_value() ? jit->BranchOptimizations : false),
FastMemory((jit.has_value() ? jit->FastMemory : false) && ARMJIT_Memory::IsFastMemSupported())
{}
void ARMJIT::RetireJitBlock(JitBlock* block) noexcept void ARMJIT::RetireJitBlock(JitBlock* block) noexcept
{ {
auto it = RestoreCandidates.find(block->InstrHash); auto it = RestoreCandidates.find(block->InstrHash);
@ -483,6 +494,7 @@ void ARMJIT::RetireJitBlock(JitBlock* block) noexcept
void ARMJIT::SetJITArgs(JITArgs args) noexcept void ARMJIT::SetJITArgs(JITArgs args) noexcept
{ {
args.FastMemory = args.FastMemory && ARMJIT_Memory::IsFastMemSupported();
args.MaxBlockSize = std::clamp(args.MaxBlockSize, 1u, 32u); args.MaxBlockSize = std::clamp(args.MaxBlockSize, 1u, 32u);
if (MaxBlockSize != args.MaxBlockSize if (MaxBlockSize != args.MaxBlockSize
@ -499,36 +511,22 @@ void ARMJIT::SetJITArgs(JITArgs args) noexcept
void ARMJIT::SetMaxBlockSize(int size) noexcept void ARMJIT::SetMaxBlockSize(int size) noexcept
{ {
size = std::clamp(size, 1, 32); SetJITArgs(JITArgs{static_cast<unsigned>(size), LiteralOptimizations, LiteralOptimizations, FastMemory});
if (size != MaxBlockSize)
ResetBlockCache();
MaxBlockSize = size;
} }
void ARMJIT::SetLiteralOptimizations(bool enabled) noexcept void ARMJIT::SetLiteralOptimizations(bool enabled) noexcept
{ {
if (LiteralOptimizations != enabled) SetJITArgs(JITArgs{static_cast<unsigned>(MaxBlockSize), enabled, BranchOptimizations, FastMemory});
ResetBlockCache();
LiteralOptimizations = enabled;
} }
void ARMJIT::SetBranchOptimizations(bool enabled) noexcept void ARMJIT::SetBranchOptimizations(bool enabled) noexcept
{ {
if (BranchOptimizations != enabled) SetJITArgs(JITArgs{static_cast<unsigned>(MaxBlockSize), LiteralOptimizations, enabled, FastMemory});
ResetBlockCache();
BranchOptimizations = enabled;
} }
void ARMJIT::SetFastMemory(bool enabled) noexcept void ARMJIT::SetFastMemory(bool enabled) noexcept
{ {
if (FastMemory != enabled) SetJITArgs(JITArgs{static_cast<unsigned>(MaxBlockSize), LiteralOptimizations, BranchOptimizations, enabled});
ResetBlockCache();
FastMemory = enabled;
} }
void ARMJIT::CompileBlock(ARM* cpu) noexcept void ARMJIT::CompileBlock(ARM* cpu) noexcept
@ -918,7 +916,7 @@ void ARMJIT::CompileBlock(ARM* cpu) noexcept
AddressRange* region = CodeMemRegions[addressRanges[j] >> 27]; AddressRange* region = CodeMemRegions[addressRanges[j] >> 27];
if (!PageContainsCode(&region[(addressRanges[j] & 0x7FFF000) / 512])) if (!PageContainsCode(&region[(addressRanges[j] & 0x7FFF000 & ~(Memory.PageSize - 1)) / 512], Memory.PageSize))
Memory.SetCodeProtection(addressRanges[j] >> 27, addressRanges[j] & 0x7FFFFFF, true); Memory.SetCodeProtection(addressRanges[j] >> 27, addressRanges[j] & 0x7FFFFFF, true);
AddressRange* range = &region[(addressRanges[j] & 0x7FFFFFF) / 512]; AddressRange* range = &region[(addressRanges[j] & 0x7FFFFFF) / 512];
@ -971,7 +969,7 @@ void ARMJIT::InvalidateByAddr(u32 localAddr) noexcept
range->Blocks.Remove(i); range->Blocks.Remove(i);
if (range->Blocks.Length == 0 if (range->Blocks.Length == 0
&& !PageContainsCode(&region[(localAddr & 0x7FFF000) / 512])) && !PageContainsCode(&region[(localAddr & 0x7FFF000 & ~(Memory.PageSize - 1)) / 512], Memory.PageSize))
{ {
Memory.SetCodeProtection(localAddr >> 27, localAddr & 0x7FFFFFF, false); Memory.SetCodeProtection(localAddr >> 27, localAddr & 0x7FFFFFF, false);
} }
@ -1005,7 +1003,7 @@ void ARMJIT::InvalidateByAddr(u32 localAddr) noexcept
if (otherRange->Blocks.Length == 0) if (otherRange->Blocks.Length == 0)
{ {
if (!PageContainsCode(&otherRegion[(addr & 0x7FFF000) / 512])) if (!PageContainsCode(&otherRegion[(addr & 0x7FFF000 & ~(Memory.PageSize - 1)) / 512], Memory.PageSize))
Memory.SetCodeProtection(addr >> 27, addr & 0x7FFFFFF, false); Memory.SetCodeProtection(addr >> 27, addr & 0x7FFFFFF, false);
otherRange->Code = 0; otherRange->Code = 0;

View File

@ -44,15 +44,7 @@ class JitBlock;
class ARMJIT class ARMJIT
{ {
public: public:
ARMJIT(melonDS::NDS& nds, std::optional<JITArgs> jit) noexcept : ARMJIT(melonDS::NDS& nds, std::optional<JITArgs> jit) noexcept;
NDS(nds),
Memory(nds),
JITCompiler(nds),
MaxBlockSize(jit.has_value() ? std::clamp(jit->MaxBlockSize, 1u, 32u) : 32),
LiteralOptimizations(jit.has_value() ? jit->LiteralOptimizations : false),
BranchOptimizations(jit.has_value() ? jit->BranchOptimizations : false),
FastMemory(jit.has_value() ? jit->FastMemory : false)
{}
~ARMJIT() noexcept; ~ARMJIT() noexcept;
void InvalidateByAddr(u32) noexcept; void InvalidateByAddr(u32) noexcept;
void CheckAndInvalidateWVRAM(int) noexcept; void CheckAndInvalidateWVRAM(int) noexcept;
@ -80,6 +72,7 @@ private:
bool LiteralOptimizations = false; bool LiteralOptimizations = false;
bool BranchOptimizations = false; bool BranchOptimizations = false;
bool FastMemory = false; bool FastMemory = false;
public: public:
melonDS::NDS& NDS; melonDS::NDS& NDS;
TinyVector<u32> InvalidLiterals {}; TinyVector<u32> InvalidLiterals {};

View File

@ -22,17 +22,7 @@
#include "../ARMInterpreter.h" #include "../ARMInterpreter.h"
#include "../ARMJIT.h" #include "../ARMJIT.h"
#include "../NDS.h" #include "../NDS.h"
#include "../ARMJIT_Global.h"
#if defined(__SWITCH__)
#include <switch.h>
extern char __start__;
#elif defined(_WIN32)
#include <windows.h>
#else
#include <sys/mman.h>
#include <unistd.h>
#endif
#include <stdlib.h> #include <stdlib.h>
@ -66,11 +56,6 @@ const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 15;
const BitSet32 CallerSavedPushRegs({W8, W9, W10, W11, W12, W13, W14, W15}); const BitSet32 CallerSavedPushRegs({W8, W9, W10, W11, W12, W13, W14, W15});
const int JitMemSize = 16 * 1024 * 1024;
#ifndef __SWITCH__
u8 JitMem[JitMemSize];
#endif
void Compiler::MovePC() void Compiler::MovePC()
{ {
ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4); ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4);
@ -260,29 +245,12 @@ Compiler::Compiler(melonDS::NDS& nds) : Arm64Gen::ARM64XEmitter(), NDS(nds)
SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart); SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart);
JitMemMainSize = JitMemSize; JitMemMainSize = JitMemSize;
#else #else
#ifdef _WIN32 ARMJIT_Global::Init();
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
u64 pageSize = (u64)sysInfo.dwPageSize; CodeMemBase = ARMJIT_Global::AllocateCodeMem();
#else
u64 pageSize = sysconf(_SC_PAGE_SIZE);
#endif
u8* pageAligned = (u8*)(((u64)JitMem & ~(pageSize - 1)) + pageSize);
u64 alignedSize = (((u64)JitMem + sizeof(JitMem)) & ~(pageSize - 1)) - (u64)pageAligned;
#if defined(_WIN32) SetCodeBase(reinterpret_cast<u8*>(CodeMemBase), reinterpret_cast<u8*>(CodeMemBase));
DWORD dummy; JitMemMainSize = ARMJIT_Global::CodeMemorySliceSize;
VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy);
#elif defined(__APPLE__)
pageAligned = (u8*)mmap(NULL, 1024*1024*16, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT,-1, 0);
nds.JIT.JitEnableWrite();
#else
mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
#endif
SetCodeBase(pageAligned, pageAligned);
JitMemMainSize = alignedSize;
#endif #endif
SetCodePtr(0); SetCodePtr(0);
@ -493,6 +461,9 @@ Compiler::~Compiler()
free(JitRWBase); free(JitRWBase);
} }
#endif #endif
ARMJIT_Global::FreeCodeMem(CodeMemBase);
ARMJIT_Global::DeInit();
} }
void Compiler::LoadCycles() void Compiler::LoadCycles()

View File

@ -275,6 +275,7 @@ public:
void* JitRWStart; void* JitRWStart;
void* JitRXStart; void* JitRXStart;
#endif #endif
void* CodeMemBase;
void* ReadBanked, *WriteBanked; void* ReadBanked, *WriteBanked;

118
src/ARMJIT_Global.cpp Normal file
View File

@ -0,0 +1,118 @@
#include "ARMJIT_Global.h"
#include "ARMJIT_Memory.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#include <unistd.h>
#endif
#include <stdio.h>
#include <stdint.h>
#include <mutex>
namespace melonDS
{
namespace ARMJIT_Global
{
std::mutex globalMutex;
#ifndef __APPLE__
static constexpr size_t NumCodeMemSlices = 4;
static constexpr size_t CodeMemoryAlignedSize = NumCodeMemSlices * CodeMemorySliceSize;
// I haven't heard of pages larger than 16 KB
u8 CodeMemory[CodeMemoryAlignedSize + 16*1024];
u32 AvailableCodeMemSlices = (1 << NumCodeMemSlices) - 1;
u8* GetAlignedCodeMemoryStart()
{
return reinterpret_cast<u8*>((reinterpret_cast<intptr_t>(CodeMemory) + (16*1024-1)) & ~static_cast<intptr_t>(16*1024-1));
}
#endif
int RefCounter = 0;
void* AllocateCodeMem()
{
std::lock_guard guard(globalMutex);
#ifndef __APPLE__
if (AvailableCodeMemSlices)
{
int slice = __builtin_ctz(AvailableCodeMemSlices);
AvailableCodeMemSlices &= ~(1 << slice);
//printf("allocating slice %d\n", slice);
return &GetAlignedCodeMemoryStart()[slice * CodeMemorySliceSize];
}
#endif
// allocate
#ifdef _WIN32
return VirtualAlloc(nullptr, CodeMemorySliceSize, MEM_RESERVE|MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#else
//printf("mmaping...\n");
return mmap(nullptr, CodeMemorySliceSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#endif
}
void FreeCodeMem(void* codeMem)
{
std::lock_guard guard(globalMutex);
for (int i = 0; i < NumCodeMemSlices; i++)
{
if (codeMem == &GetAlignedCodeMemoryStart()[CodeMemorySliceSize * i])
{
//printf("freeing slice\n");
AvailableCodeMemSlices |= 1 << i;
return;
}
}
#ifdef _WIN32
VirtualFree(codeMem, CodeMemorySliceSize, MEM_RELEASE|MEM_DECOMMIT);
#else
munmap(codeMem, CodeMemorySliceSize);
#endif
}
void Init()
{
std::lock_guard guard(globalMutex);
RefCounter++;
if (RefCounter == 1)
{
#ifdef _WIN32
DWORD dummy;
VirtualProtect(GetAlignedCodeMemoryStart(), CodeMemoryAlignedSize, PAGE_EXECUTE_READWRITE, &dummy);
#elif defined(__APPLE__)
// Apple always uses dynamic allocation
#else
mprotect(GetAlignedCodeMemoryStart(), CodeMemoryAlignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
#endif
ARMJIT_Memory::RegisterFaultHandler();
}
}
void DeInit()
{
std::lock_guard guard(globalMutex);
RefCounter--;
if (RefCounter == 0)
{
ARMJIT_Memory::UnregisterFaultHandler();
}
}
}
}

44
src/ARMJIT_Global.h Normal file
View File

@ -0,0 +1,44 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#ifndef ARMJIT_GLOBAL_H
#define ARMJIT_GLOBAL_H
#include "types.h"
#include <stdlib.h>
namespace melonDS
{
namespace ARMJIT_Global
{
static constexpr size_t CodeMemorySliceSize = 1024*1024*32;
void Init();
void DeInit();
void* AllocateCodeMem();
void FreeCodeMem(void* codeMem);
}
}
#endif

View File

@ -85,9 +85,9 @@ typedef void (*InterpreterFunc)(ARM* cpu);
extern InterpreterFunc InterpretARM[]; extern InterpreterFunc InterpretARM[];
extern InterpreterFunc InterpretTHUMB[]; extern InterpreterFunc InterpretTHUMB[];
inline bool PageContainsCode(const AddressRange* range) inline bool PageContainsCode(const AddressRange* range, u32 pageSize)
{ {
for (int i = 0; i < 8; i++) for (int i = 0; i < pageSize / 512; i++)
{ {
if (range[i].Blocks.Length > 0) if (range[i].Blocks.Length > 0)
return true; return true;

View File

@ -39,6 +39,7 @@
#include "ARMJIT_Internal.h" #include "ARMJIT_Internal.h"
#include "ARMJIT_Compiler.h" #include "ARMJIT_Compiler.h"
#include "ARMJIT_Global.h"
#include "DSi.h" #include "DSi.h"
#include "GPU.h" #include "GPU.h"
@ -100,6 +101,9 @@
namespace melonDS namespace melonDS
{ {
static constexpr u64 AddrSpaceSize = 0x100000000;
static constexpr u64 VirtmemAreaSize = AddrSpaceSize * 2 + MemoryTotalSize;
using Platform::Log; using Platform::Log;
using Platform::LogLevel; using Platform::LogLevel;
@ -152,6 +156,15 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx)
#elif defined(_WIN32) #elif defined(_WIN32)
static LPVOID ExceptionHandlerHandle = nullptr;
static HMODULE KernelBaseDll = nullptr;
using VirtualAlloc2Type = PVOID WINAPI (*)(HANDLE Process, PVOID BaseAddress, SIZE_T Size, ULONG AllocationType, ULONG PageProtection, MEM_EXTENDED_PARAMETER* ExtendedParameters, ULONG ParameterCount);
using MapViewOfFile3Type = PVOID WINAPI (*)(HANDLE FileMapping, HANDLE Process, PVOID BaseAddress, ULONG64 Offset, SIZE_T ViewSize, ULONG AllocationType, ULONG PageProtection, MEM_EXTENDED_PARAMETER* ExtendedParameters, ULONG ParameterCount);
static VirtualAlloc2Type virtualAlloc2Ptr;
static MapViewOfFile3Type mapViewOfFile3Ptr;
LONG ARMJIT_Memory::ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo) LONG ARMJIT_Memory::ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
{ {
if (exceptionInfo->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION) if (exceptionInfo->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION)
@ -170,6 +183,7 @@ LONG ARMJIT_Memory::ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
return EXCEPTION_CONTINUE_EXECUTION; return EXCEPTION_CONTINUE_EXECUTION;
} }
Log(LogLevel::Debug, "it all returns to nothing\n");
return EXCEPTION_CONTINUE_SEARCH; return EXCEPTION_CONTINUE_SEARCH;
} }
@ -261,18 +275,61 @@ enum
memstate_MappedProtected, memstate_MappedProtected,
}; };
#define CHECK_ALIGNED(value) assert(((value) & (PageSize-1)) == 0)
bool ARMJIT_Memory::MapIntoRange(u32 addr, u32 num, u32 offset, u32 size) noexcept bool ARMJIT_Memory::MapIntoRange(u32 addr, u32 num, u32 offset, u32 size) noexcept
{ {
CHECK_ALIGNED(addr);
CHECK_ALIGNED(offset);
CHECK_ALIGNED(size);
u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr;
#ifdef __SWITCH__ #ifdef __SWITCH__
Result r = (svcMapProcessMemory(dst, envGetOwnProcessHandle(), Result r = (svcMapProcessMemory(dst, envGetOwnProcessHandle(),
(u64)(MemoryBaseCodeMem + offset), size)); (u64)(MemoryBaseCodeMem + offset), size));
return R_SUCCEEDED(r); return R_SUCCEEDED(r);
#elif defined(_WIN32) #elif defined(_WIN32)
bool r = MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, offset, size, dst) == dst; uintptr_t uintptrDst = reinterpret_cast<uintptr_t>(dst);
return r; for (auto it = VirtmemPlaceholders.begin(); it != VirtmemPlaceholders.end(); it++)
{
if (uintptrDst >= it->Start && uintptrDst+size <= it->Start+it->Size)
{
//Log(LogLevel::Debug, "found mapping %llx %llx %llx %llx\n", uintptrDst, size, it->Start, it->Size);
// we split this place holder so that we have a fitting place holder for the mapping
if (uintptrDst != it->Start || size != it->Size)
{
if (!VirtualFree(dst, size, MEM_RELEASE|MEM_PRESERVE_PLACEHOLDER))
{
Log(LogLevel::Debug, "VirtualFree failed with %x\n", GetLastError());
return false;
}
}
VirtmemPlaceholder splitPlaceholder = *it;
VirtmemPlaceholders.erase(it);
if (uintptrDst > splitPlaceholder.Start)
{
//Log(LogLevel::Debug, "splitting on the left %llx\n", uintptrDst - splitPlaceholder.Start);
VirtmemPlaceholders.push_back({splitPlaceholder.Start, uintptrDst - splitPlaceholder.Start});
}
if (uintptrDst+size < splitPlaceholder.Start+splitPlaceholder.Size)
{
//Log(LogLevel::Debug, "splitting on the right %llx\n", (splitPlaceholder.Start+splitPlaceholder.Size)-(uintptrDst+size));
VirtmemPlaceholders.push_back({uintptrDst+size, (splitPlaceholder.Start+splitPlaceholder.Size)-(uintptrDst+size)});
}
if (!mapViewOfFile3Ptr(MemoryFile, nullptr, dst, offset, size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0))
{
Log(LogLevel::Debug, "MapViewOfFile3 failed with %x\n", GetLastError());
return false;
}
return true;
}
}
Log(LogLevel::Debug, "no mapping at all found??? %p %x %p\n", dst, size, MemoryBase);
return false;
#else #else
return mmap(dst, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, offset) != MAP_FAILED; return mmap(dst, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, offset) != MAP_FAILED;
#endif #endif
@ -280,21 +337,68 @@ bool ARMJIT_Memory::MapIntoRange(u32 addr, u32 num, u32 offset, u32 size) noexce
bool ARMJIT_Memory::UnmapFromRange(u32 addr, u32 num, u32 offset, u32 size) noexcept bool ARMJIT_Memory::UnmapFromRange(u32 addr, u32 num, u32 offset, u32 size) noexcept
{ {
CHECK_ALIGNED(addr);
CHECK_ALIGNED(offset);
CHECK_ALIGNED(size);
u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr;
#ifdef __SWITCH__ #ifdef __SWITCH__
Result r = svcUnmapProcessMemory(dst, envGetOwnProcessHandle(), Result r = svcUnmapProcessMemory(dst, envGetOwnProcessHandle(),
(u64)(MemoryBaseCodeMem + offset), size); (u64)(MemoryBaseCodeMem + offset), size);
return R_SUCCEEDED(r); return R_SUCCEEDED(r);
#elif defined(_WIN32) #elif defined(_WIN32)
return UnmapViewOfFile(dst); if (!UnmapViewOfFileEx(dst, MEM_PRESERVE_PLACEHOLDER))
{
Log(LogLevel::Debug, "UnmapViewOfFileEx failed %x\n", GetLastError());
return false;
}
uintptr_t uintptrDst = reinterpret_cast<uintptr_t>(dst);
uintptr_t coalesceStart = uintptrDst;
size_t coalesceSize = size;
for (auto it = VirtmemPlaceholders.begin(); it != VirtmemPlaceholders.end();)
{
if (it->Start+it->Size == uintptrDst)
{
//Log(LogLevel::Debug, "Coalescing to the left\n");
coalesceStart = it->Start;
coalesceSize += it->Size;
it = VirtmemPlaceholders.erase(it);
}
else if (it->Start == uintptrDst+size)
{
//Log(LogLevel::Debug, "Coalescing to the right\n");
coalesceSize += it->Size;
it = VirtmemPlaceholders.erase(it);
}
else
{
it++;
}
}
if (coalesceStart != uintptrDst || coalesceSize != size)
{
if (!VirtualFree(reinterpret_cast<void*>(coalesceStart), coalesceSize, MEM_RELEASE|MEM_COALESCE_PLACEHOLDERS))
return false;
}
VirtmemPlaceholders.push_back({coalesceStart, coalesceSize});
//Log(LogLevel::Debug, "Adding coalesced region %llx %llx", coalesceStart, coalesceSize);
return true;
#else #else
return munmap(dst, size) == 0; return mmap(dst, size, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0) != MAP_FAILED;
#endif #endif
} }
#ifndef __SWITCH__ #ifndef __SWITCH__
void ARMJIT_Memory::SetCodeProtectionRange(u32 addr, u32 size, u32 num, int protection) noexcept void ARMJIT_Memory::SetCodeProtectionRange(u32 addr, u32 size, u32 num, int protection) noexcept
{ {
CHECK_ALIGNED(addr);
CHECK_ALIGNED(size);
u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr; u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr;
#if defined(_WIN32) #if defined(_WIN32)
DWORD winProtection, oldProtection; DWORD winProtection, oldProtection;
@ -305,6 +409,10 @@ void ARMJIT_Memory::SetCodeProtectionRange(u32 addr, u32 size, u32 num, int prot
else else
winProtection = PAGE_READWRITE; winProtection = PAGE_READWRITE;
bool success = VirtualProtect(dst, size, winProtection, &oldProtection); bool success = VirtualProtect(dst, size, winProtection, &oldProtection);
if (!success)
{
Log(LogLevel::Debug, "VirtualProtect failed with %x\n", GetLastError());
}
assert(success); assert(success);
#else #else
int posixProt; int posixProt;
@ -335,14 +443,14 @@ void ARMJIT_Memory::Mapping::Unmap(int region, melonDS::NDS& nds) noexcept
else else
{ {
u32 segmentOffset = offset; u32 segmentOffset = offset;
u8 status = statuses[(Addr + offset) >> 12]; u8 status = statuses[(Addr + offset) >> PageShift];
while (statuses[(Addr + offset) >> 12] == status while (statuses[(Addr + offset) >> PageShift] == status
&& offset < Size && offset < Size
&& (!skipDTCM || Addr + offset != dtcmStart)) && (!skipDTCM || Addr + offset != dtcmStart))
{ {
assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped); assert(statuses[(Addr + offset) >> PageShift] != memstate_Unmapped);
statuses[(Addr + offset) >> 12] = memstate_Unmapped; statuses[(Addr + offset) >> PageShift] = memstate_Unmapped;
offset += 0x1000; offset += PageSize;
} }
#ifdef __SWITCH__ #ifdef __SWITCH__
@ -358,7 +466,6 @@ void ARMJIT_Memory::Mapping::Unmap(int region, melonDS::NDS& nds) noexcept
} }
#ifndef __SWITCH__ #ifndef __SWITCH__
#ifndef _WIN32
u32 dtcmEnd = dtcmStart + dtcmSize; u32 dtcmEnd = dtcmStart + dtcmSize;
if (Num == 0 if (Num == 0
&& dtcmEnd >= Addr && dtcmEnd >= Addr
@ -378,7 +485,6 @@ void ARMJIT_Memory::Mapping::Unmap(int region, melonDS::NDS& nds) noexcept
} }
} }
else else
#endif
{ {
bool succeded = nds.JIT.Memory.UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size); bool succeded = nds.JIT.Memory.UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size);
assert(succeded); assert(succeded);
@ -388,7 +494,7 @@ void ARMJIT_Memory::Mapping::Unmap(int region, melonDS::NDS& nds) noexcept
void ARMJIT_Memory::SetCodeProtection(int region, u32 offset, bool protect) noexcept void ARMJIT_Memory::SetCodeProtection(int region, u32 offset, bool protect) noexcept
{ {
offset &= ~0xFFF; offset &= ~(PageSize - 1);
//printf("set code protection %d %x %d\n", region, offset, protect); //printf("set code protection %d %x %d\n", region, offset, protect);
for (int i = 0; i < Mappings[region].Length; i++) for (int i = 0; i < Mappings[region].Length; i++)
@ -406,9 +512,9 @@ void ARMJIT_Memory::SetCodeProtection(int region, u32 offset, bool protect) noex
u8* states = (u8*)(mapping.Num == 0 ? MappingStatus9 : MappingStatus7); u8* states = (u8*)(mapping.Num == 0 ? MappingStatus9 : MappingStatus7);
//printf("%x %d %x %x %x %d\n", effectiveAddr, mapping.Num, mapping.Addr, mapping.LocalOffset, mapping.Size, states[effectiveAddr >> 12]); //printf("%x %d %x %x %x %d\n", effectiveAddr, mapping.Num, mapping.Addr, mapping.LocalOffset, mapping.Size, states[effectiveAddr >> PageShift]);
assert(states[effectiveAddr >> 12] == (protect ? memstate_MappedRW : memstate_MappedProtected)); assert(states[effectiveAddr >> PageShift] == (protect ? memstate_MappedRW : memstate_MappedProtected));
states[effectiveAddr >> 12] = protect ? memstate_MappedProtected : memstate_MappedRW; states[effectiveAddr >> PageShift] = protect ? memstate_MappedProtected : memstate_MappedRW;
#if defined(__SWITCH__) #if defined(__SWITCH__)
bool success; bool success;
@ -418,7 +524,7 @@ void ARMJIT_Memory::SetCodeProtection(int region, u32 offset, bool protect) noex
success = MapIntoRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000); success = MapIntoRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000);
assert(success); assert(success);
#else #else
SetCodeProtectionRange(effectiveAddr, 0x1000, mapping.Num, protect ? 1 : 2); SetCodeProtectionRange(effectiveAddr, PageSize, mapping.Num, protect ? 1 : 2);
#endif #endif
} }
} }
@ -543,11 +649,19 @@ bool ARMJIT_Memory::MapAtAddress(u32 addr) noexcept
u32 dtcmSize = ~NDS.ARM9.DTCMMask + 1; u32 dtcmSize = ~NDS.ARM9.DTCMMask + 1;
u32 dtcmEnd = dtcmStart + dtcmSize; u32 dtcmEnd = dtcmStart + dtcmSize;
#ifndef __SWITCH__ #ifndef __SWITCH__
#ifndef _WIN32
if (num == 0 if (num == 0
&& dtcmEnd >= mirrorStart && dtcmEnd >= mirrorStart
&& dtcmStart < mirrorStart + mirrorSize) && dtcmStart < mirrorStart + mirrorSize)
{ {
if (dtcmSize < PageSize)
{
// we could technically mask out the DTCM by setting a hole to access permissions
// but realistically there isn't much of a point in mapping less than 16kb of DTCM
// so it isn't worth more complex support
Log(LogLevel::Info, "DTCM size smaller than 16kb skipping mapping entirely");
return false;
}
bool success; bool success;
if (dtcmStart > mirrorStart) if (dtcmStart > mirrorStart)
{ {
@ -562,7 +676,6 @@ bool ARMJIT_Memory::MapAtAddress(u32 addr) noexcept
} }
} }
else else
#endif
{ {
bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize); bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize);
assert(succeded); assert(succeded);
@ -579,22 +692,19 @@ bool ARMJIT_Memory::MapAtAddress(u32 addr) noexcept
{ {
if (skipDTCM && mirrorStart + offset == dtcmStart) if (skipDTCM && mirrorStart + offset == dtcmStart)
{ {
#ifdef _WIN32
SetCodeProtectionRange(dtcmStart, dtcmSize, 0, 0);
#endif
offset += dtcmSize; offset += dtcmSize;
} }
else else
{ {
u32 sectionOffset = offset; u32 sectionOffset = offset;
bool hasCode = isExecutable && PageContainsCode(&range[offset / 512]); bool hasCode = isExecutable && PageContainsCode(&range[offset / 512], PageSize);
while (offset < mirrorSize while (offset < mirrorSize
&& (!isExecutable || PageContainsCode(&range[offset / 512]) == hasCode) && (!isExecutable || PageContainsCode(&range[offset / 512], PageSize) == hasCode)
&& (!skipDTCM || mirrorStart + offset != NDS.ARM9.DTCMBase)) && (!skipDTCM || mirrorStart + offset != NDS.ARM9.DTCMBase))
{ {
assert(states[(mirrorStart + offset) >> 12] == memstate_Unmapped); assert(states[(mirrorStart + offset) >> PageShift] == memstate_Unmapped);
states[(mirrorStart + offset) >> 12] = hasCode ? memstate_MappedProtected : memstate_MappedRW; states[(mirrorStart + offset) >> PageShift] = hasCode ? memstate_MappedProtected : memstate_MappedRW;
offset += 0x1000; offset += PageSize;
} }
u32 sectionSize = offset - sectionOffset; u32 sectionSize = offset - sectionOffset;
@ -624,6 +734,86 @@ bool ARMJIT_Memory::MapAtAddress(u32 addr) noexcept
return true; return true;
} }
u32 ARMJIT_Memory::PageSize = 0;
u32 ARMJIT_Memory::PageShift = 0;
bool ARMJIT_Memory::IsFastMemSupported()
{
#ifdef __APPLE__
return false;
#else
static bool initialised = false;
static bool isSupported = false;
if (!initialised)
{
#ifdef _WIN32
ARMJIT_Global::Init();
isSupported = virtualAlloc2Ptr != nullptr;
ARMJIT_Global::DeInit();
PageSize = RegularPageSize;
#else
PageSize = __sysconf(_SC_PAGESIZE);
isSupported = PageShift == RegularPageSize || PageSize == LargePageSize;
#endif
PageShift = __builtin_ctz(PageSize);
initialised = true;
}
return isSupported;
#endif
}
void ARMJIT_Memory::RegisterFaultHandler()
{
#ifdef _WIN32
ExceptionHandlerHandle = AddVectoredExceptionHandler(1, ExceptionHandler);
KernelBaseDll = LoadLibrary("KernelBase.dll");
if (KernelBaseDll)
{
virtualAlloc2Ptr = reinterpret_cast<VirtualAlloc2Type>(GetProcAddress(KernelBaseDll, "VirtualAlloc2"));
mapViewOfFile3Ptr = reinterpret_cast<MapViewOfFile3Type>(GetProcAddress(KernelBaseDll, "MapViewOfFile3"));
}
if (!virtualAlloc2Ptr)
{
Log(LogLevel::Error, "Could not load new Windows virtual memory functions, fast memory is disabled.\n");
}
#else
struct sigaction sa;
sa.sa_handler = nullptr;
sa.sa_sigaction = &SigsegvHandler;
sa.sa_flags = SA_SIGINFO;
sigemptyset(&sa.sa_mask);
sigaction(SIGSEGV, &sa, &OldSaSegv);
#ifdef __APPLE__
sigaction(SIGBUS, &sa, &OldSaBus);
#endif
#endif
}
void ARMJIT_Memory::UnregisterFaultHandler()
{
#ifdef _WIN32
if (ExceptionHandlerHandle)
{
RemoveVectoredExceptionHandler(ExceptionHandlerHandle);
ExceptionHandlerHandle = nullptr;
}
if (KernelBaseDll)
{
FreeLibrary(KernelBaseDll);
KernelBaseDll = nullptr;
}
#else
sigaction(SIGSEGV, &OldSaSegv, nullptr);
#ifdef __APPLE__
sigaction(SIGBUS, &OldSaBus, nullptr);
#endif
#endif
}
bool ARMJIT_Memory::FaultHandler(FaultDescription& faultDesc, melonDS::NDS& nds) bool ARMJIT_Memory::FaultHandler(FaultDescription& faultDesc, melonDS::NDS& nds)
{ {
if (nds.JIT.JITCompiler.IsJITFault(faultDesc.FaultPC)) if (nds.JIT.JITCompiler.IsJITFault(faultDesc.FaultPC))
@ -632,7 +822,7 @@ bool ARMJIT_Memory::FaultHandler(FaultDescription& faultDesc, melonDS::NDS& nds)
u8* memStatus = nds.CurCPU == 0 ? nds.JIT.Memory.MappingStatus9 : nds.JIT.Memory.MappingStatus7; u8* memStatus = nds.CurCPU == 0 ? nds.JIT.Memory.MappingStatus9 : nds.JIT.Memory.MappingStatus7;
if (memStatus[faultDesc.EmulatedFaultAddr >> 12] == memstate_Unmapped) if (memStatus[faultDesc.EmulatedFaultAddr >> PageShift] == memstate_Unmapped)
rewriteToSlowPath = !nds.JIT.Memory.MapAtAddress(faultDesc.EmulatedFaultAddr); rewriteToSlowPath = !nds.JIT.Memory.MapAtAddress(faultDesc.EmulatedFaultAddr);
if (rewriteToSlowPath) if (rewriteToSlowPath)
@ -643,10 +833,9 @@ bool ARMJIT_Memory::FaultHandler(FaultDescription& faultDesc, melonDS::NDS& nds)
return false; return false;
} }
const u64 AddrSpaceSize = 0x100000000;
ARMJIT_Memory::ARMJIT_Memory(melonDS::NDS& nds) : NDS(nds) ARMJIT_Memory::ARMJIT_Memory(melonDS::NDS& nds) : NDS(nds)
{ {
ARMJIT_Global::Init();
#if defined(__SWITCH__) #if defined(__SWITCH__)
MemoryBase = (u8*)aligned_alloc(0x1000, MemoryTotalSize); MemoryBase = (u8*)aligned_alloc(0x1000, MemoryTotalSize);
virtmemLock(); virtmemLock();
@ -671,33 +860,27 @@ ARMJIT_Memory::ARMJIT_Memory(melonDS::NDS& nds) : NDS(nds)
u8* basePtr = MemoryBaseCodeMem; u8* basePtr = MemoryBaseCodeMem;
#elif defined(_WIN32) #elif defined(_WIN32)
ExceptionHandlerHandle = AddVectoredExceptionHandler(1, ExceptionHandler); if (virtualAlloc2Ptr)
{
MemoryFile = CreateFileMapping(INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE, 0, MemoryTotalSize, nullptr);
MemoryFile = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, MemoryTotalSize, NULL); MemoryBase = reinterpret_cast<u8*>(virtualAlloc2Ptr(nullptr, nullptr, VirtmemAreaSize,
MEM_RESERVE | MEM_RESERVE_PLACEHOLDER,
PAGE_NOACCESS,
nullptr, 0));
// split off placeholder and map base mapping
VirtualFree(MemoryBase, MemoryTotalSize, MEM_RELEASE|MEM_PRESERVE_PLACEHOLDER);
mapViewOfFile3Ptr(MemoryFile, nullptr, MemoryBase, 0, MemoryTotalSize, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0);
MemoryBase = (u8*)VirtualAlloc(NULL, AddrSpaceSize*4, MEM_RESERVE, PAGE_READWRITE); VirtmemPlaceholders.push_back({reinterpret_cast<uintptr_t>(MemoryBase)+MemoryTotalSize, AddrSpaceSize*2});
VirtualFree(MemoryBase, 0, MEM_RELEASE); }
// this is incredible hacky else
// but someone else is trying to go into our address space! {
// Windows will very likely give them virtual memory starting at the same address // old Windows version
// as it is giving us now. MemoryBase = new u8[MemoryTotalSize];
// That's why we don't use this address, but instead 4gb inwards }
// I know this is terrible
FastMem9Start = MemoryBase + AddrSpaceSize;
FastMem7Start = MemoryBase + AddrSpaceSize*2;
MemoryBase = MemoryBase + AddrSpaceSize*3;
MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, MemoryTotalSize, MemoryBase);
#else #else
// this used to be allocated with three different mmaps MemoryBase = (u8*)mmap(nullptr, VirtmemAreaSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
// The idea was to give the OS more freedom where to position the buffers,
// but something was bad about this so instead we take this vmem eating monster
// which seems to work better.
MemoryBase = (u8*)mmap(NULL, AddrSpaceSize*4, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
munmap(MemoryBase, AddrSpaceSize*4);
FastMem9Start = MemoryBase;
FastMem7Start = MemoryBase + AddrSpaceSize;
MemoryBase = MemoryBase + AddrSpaceSize*2;
#if defined(__ANDROID__) #if defined(__ANDROID__)
Libandroid = Platform::DynamicLibrary_Load("libandroid.so"); Libandroid = Platform::DynamicLibrary_Load("libandroid.so");
@ -730,20 +913,10 @@ ARMJIT_Memory::ARMJIT_Memory(melonDS::NDS& nds) : NDS(nds)
Log(LogLevel::Error, "Failed to allocate memory using ftruncate! (%s)", strerror(errno)); Log(LogLevel::Error, "Failed to allocate memory using ftruncate! (%s)", strerror(errno));
} }
struct sigaction sa;
sa.sa_handler = nullptr;
sa.sa_sigaction = &SigsegvHandler;
sa.sa_flags = SA_SIGINFO;
sigemptyset(&sa.sa_mask);
sigaction(SIGSEGV, &sa, &OldSaSegv);
#ifdef __APPLE__
sigaction(SIGBUS, &sa, &OldSaBus);
#endif
mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0); mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0);
u8* basePtr = MemoryBase;
#endif #endif
FastMem9Start = MemoryBase+MemoryTotalSize;
FastMem7Start = static_cast<u8*>(FastMem9Start)+AddrSpaceSize;
} }
ARMJIT_Memory::~ARMJIT_Memory() noexcept ARMJIT_Memory::~ARMJIT_Memory() noexcept
@ -764,13 +937,21 @@ ARMJIT_Memory::~ARMJIT_Memory() noexcept
free(MemoryBase); free(MemoryBase);
MemoryBase = nullptr; MemoryBase = nullptr;
#elif defined(_WIN32) #elif defined(_WIN32)
if (virtualAlloc2Ptr)
{
if (MemoryBase) if (MemoryBase)
{ {
bool viewUnmapped = UnmapViewOfFile(MemoryBase); bool viewUnmapped = UnmapViewOfFileEx(MemoryBase, MEM_PRESERVE_PLACEHOLDER);
assert(viewUnmapped); assert(viewUnmapped);
bool viewCoalesced = VirtualFree(MemoryBase, VirtmemAreaSize, MEM_RELEASE|MEM_COALESCE_PLACEHOLDERS);
assert(viewCoalesced);
bool freeEverything = VirtualFree(MemoryBase, 0, MEM_RELEASE);
assert(freeEverything);
MemoryBase = nullptr; MemoryBase = nullptr;
FastMem9Start = nullptr; FastMem9Start = nullptr;
FastMem7Start = nullptr; FastMem7Start = nullptr;
printf("unmappinged everything\n");
} }
if (MemoryFile) if (MemoryFile)
@ -778,20 +959,15 @@ ARMJIT_Memory::~ARMJIT_Memory() noexcept
CloseHandle(MemoryFile); CloseHandle(MemoryFile);
MemoryFile = INVALID_HANDLE_VALUE; MemoryFile = INVALID_HANDLE_VALUE;
} }
}
if (ExceptionHandlerHandle) else
{ {
RemoveVectoredExceptionHandler(ExceptionHandlerHandle); delete[] MemoryBase;
ExceptionHandlerHandle = nullptr;
} }
#else #else
sigaction(SIGSEGV, &OldSaSegv, nullptr);
#ifdef __APPLE__
sigaction(SIGBUS, &OldSaBus, nullptr);
#endif
if (MemoryBase) if (MemoryBase)
{ {
munmap(MemoryBase, MemoryTotalSize); munmap(MemoryBase, VirtmemAreaSize);
MemoryBase = nullptr; MemoryBase = nullptr;
FastMem9Start = nullptr; FastMem9Start = nullptr;
FastMem7Start = nullptr; FastMem7Start = nullptr;
@ -812,6 +988,8 @@ ARMJIT_Memory::~ARMJIT_Memory() noexcept
#endif #endif
#endif #endif
ARMJIT_Global::DeInit();
} }
void ARMJIT_Memory::Reset() noexcept void ARMJIT_Memory::Reset() noexcept
@ -834,17 +1012,6 @@ void ARMJIT_Memory::Reset() noexcept
bool ARMJIT_Memory::IsFastmemCompatible(int region) const noexcept bool ARMJIT_Memory::IsFastmemCompatible(int region) const noexcept
{ {
#ifdef _WIN32
/*
TODO: with some hacks, the smaller shared WRAM regions
could be mapped in some occaisons as well
*/
if (region == memregion_DTCM
|| region == memregion_SharedWRAM
|| region == memregion_NewSharedWRAM_B
|| region == memregion_NewSharedWRAM_C)
return false;
#endif
return OffsetsPerRegion[region] != UINT32_MAX; return OffsetsPerRegion[region] != UINT32_MAX;
} }

View File

@ -23,6 +23,7 @@
#include "MemConstants.h" #include "MemConstants.h"
#ifdef JIT_ENABLED #ifdef JIT_ENABLED
# include <mutex>
# include "TinyVector.h" # include "TinyVector.h"
# include "ARM.h" # include "ARM.h"
# if defined(__SWITCH__) # if defined(__SWITCH__)
@ -48,23 +49,22 @@ class Compiler;
class ARMJIT; class ARMJIT;
#endif #endif
static constexpr u32 LargePageSize = 0x4000;
static constexpr u32 RegularPageSize = 0x1000;
constexpr u32 RoundUp(u32 size) noexcept constexpr u32 RoundUp(u32 size) noexcept
{ {
#ifdef _WIN32 return (size + LargePageSize - 1) & ~(LargePageSize - 1);
return (size + 0xFFFF) & ~0xFFFF;
#else
return size;
#endif
} }
const u32 MemBlockMainRAMOffset = 0; static constexpr u32 MemBlockMainRAMOffset = 0;
const u32 MemBlockSWRAMOffset = RoundUp(MainRAMMaxSize); static constexpr u32 MemBlockSWRAMOffset = RoundUp(MainRAMMaxSize);
const u32 MemBlockARM7WRAMOffset = MemBlockSWRAMOffset + RoundUp(SharedWRAMSize); static constexpr u32 MemBlockARM7WRAMOffset = MemBlockSWRAMOffset + RoundUp(SharedWRAMSize);
const u32 MemBlockDTCMOffset = MemBlockARM7WRAMOffset + RoundUp(ARM7WRAMSize); static constexpr u32 MemBlockDTCMOffset = MemBlockARM7WRAMOffset + RoundUp(ARM7WRAMSize);
const u32 MemBlockNWRAM_AOffset = MemBlockDTCMOffset + RoundUp(DTCMPhysicalSize); static constexpr u32 MemBlockNWRAM_AOffset = MemBlockDTCMOffset + RoundUp(DTCMPhysicalSize);
const u32 MemBlockNWRAM_BOffset = MemBlockNWRAM_AOffset + RoundUp(NWRAMSize); static constexpr u32 MemBlockNWRAM_BOffset = MemBlockNWRAM_AOffset + RoundUp(NWRAMSize);
const u32 MemBlockNWRAM_COffset = MemBlockNWRAM_BOffset + RoundUp(NWRAMSize); static constexpr u32 MemBlockNWRAM_COffset = MemBlockNWRAM_BOffset + RoundUp(NWRAMSize);
const u32 MemoryTotalSize = MemBlockNWRAM_COffset + RoundUp(NWRAMSize); static constexpr u32 MemoryTotalSize = MemBlockNWRAM_COffset + RoundUp(NWRAMSize);
class ARMJIT_Memory class ARMJIT_Memory
{ {
@ -137,6 +137,14 @@ public:
bool IsFastmemCompatible(int region) const noexcept; bool IsFastmemCompatible(int region) const noexcept;
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) const noexcept; void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) const noexcept;
bool MapAtAddress(u32 addr) noexcept; bool MapAtAddress(u32 addr) noexcept;
static bool IsFastMemSupported();
static void RegisterFaultHandler();
static void UnregisterFaultHandler();
static u32 PageSize;
static u32 PageShift;
private: private:
friend class Compiler; friend class Compiler;
struct Mapping struct Mapping
@ -162,14 +170,22 @@ private:
void* FastMem9Start; void* FastMem9Start;
void* FastMem7Start; void* FastMem7Start;
u8* MemoryBase = nullptr; u8* MemoryBase = nullptr;
#if defined(__SWITCH__) #if defined(__SWITCH__)
VirtmemReservation* FastMem9Reservation, *FastMem7Reservation; VirtmemReservation* FastMem9Reservation, *FastMem7Reservation;
u8* MemoryBaseCodeMem; u8* MemoryBaseCodeMem;
#elif defined(_WIN32) #elif defined(_WIN32)
struct VirtmemPlaceholder
{
uintptr_t Start;
size_t Size;
};
std::vector<VirtmemPlaceholder> VirtmemPlaceholders;
static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo); static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo);
HANDLE MemoryFile = INVALID_HANDLE_VALUE; HANDLE MemoryFile = INVALID_HANDLE_VALUE;
LPVOID ExceptionHandlerHandle = nullptr;
#else #else
static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext); static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext);
int MemoryFile = -1; int MemoryFile = -1;
#endif #endif

View File

@ -176,9 +176,9 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
else else
MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste
if (Num == 0) if (Num == 0)
CALL((void*)&ARMv5JumpToTrampoline); ABI_CallFunction(ARMv5JumpToTrampoline);
else else
CALL((void*)&ARMv4JumpToTrampoline); ABI_CallFunction(ARMv4JumpToTrampoline);
PopRegs(restoreCPSR, true); PopRegs(restoreCPSR, true);

View File

@ -21,19 +21,13 @@
#include "../ARMJIT.h" #include "../ARMJIT.h"
#include "../ARMInterpreter.h" #include "../ARMInterpreter.h"
#include "../NDS.h" #include "../NDS.h"
#include "../ARMJIT_Global.h"
#include <assert.h> #include <assert.h>
#include <stdarg.h> #include <stdarg.h>
#include "../dolphin/CommonFuncs.h" #include "../dolphin/CommonFuncs.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#include <unistd.h>
#endif
using namespace Gen; using namespace Gen;
using namespace Common; using namespace Common;
@ -222,46 +216,21 @@ void Compiler::A_Comp_MSR()
MOV(32, R(ABI_PARAM3), R(RCPSR)); MOV(32, R(ABI_PARAM3), R(RCPSR));
MOV(32, R(ABI_PARAM2), R(RSCRATCH3)); MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
MOV(64, R(ABI_PARAM1), R(RCPU)); MOV(64, R(ABI_PARAM1), R(RCPU));
CALL((void*)&UpdateModeTrampoline); ABI_CallFunction(UpdateModeTrampoline);
PopRegs(true, true); PopRegs(true, true);
} }
} }
} }
/*
We'll repurpose this .bss memory
*/
u8 CodeMemory[1024 * 1024 * 32];
Compiler::Compiler(melonDS::NDS& nds) : XEmitter(), NDS(nds) Compiler::Compiler(melonDS::NDS& nds) : XEmitter(), NDS(nds)
{ {
{ ARMJIT_Global::Init();
#ifdef _WIN32
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
u64 pageSize = (u64)sysInfo.dwPageSize; CodeMemBase = static_cast<u8*>(ARMJIT_Global::AllocateCodeMem());
#else CodeMemSize = ARMJIT_Global::CodeMemorySliceSize;
u64 pageSize = sysconf(_SC_PAGE_SIZE);
#endif
u8* pageAligned = (u8*)(((u64)CodeMemory & ~(pageSize - 1)) + pageSize); ResetStart = CodeMemBase;
u64 alignedSize = (((u64)CodeMemory + sizeof(CodeMemory)) & ~(pageSize - 1)) - (u64)pageAligned;
#ifdef _WIN32
DWORD dummy;
VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy);
#elif defined(__APPLE__)
pageAligned = (u8*)mmap(NULL, 1024*1024*32, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS ,-1, 0);
#else
mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
#endif
ResetStart = pageAligned;
CodeMemSize = alignedSize;
}
Reset(); Reset();
@ -475,6 +444,13 @@ Compiler::Compiler(melonDS::NDS& nds) : XEmitter(), NDS(nds)
FarSize = (ResetStart + CodeMemSize) - FarStart; FarSize = (ResetStart + CodeMemSize) - FarStart;
} }
Compiler::~Compiler()
{
ARMJIT_Global::FreeCodeMem(CodeMemBase);
ARMJIT_Global::DeInit();
}
void Compiler::LoadCPSR() void Compiler::LoadCPSR()
{ {
assert(!CPSRDirty); assert(!CPSRDirty);
@ -684,7 +660,7 @@ void Compiler::Comp_SpecialBranchBehaviour(bool taken)
if (ConstantCycles) if (ConstantCycles)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
JMP((u8*)&ARM_Ret, true); ABI_TailCall(ARM_Ret);
} }
} }
@ -846,7 +822,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
if (ConstantCycles) if (ConstantCycles)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles)); ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
JMP((u8*)ARM_Ret, true); ABI_TailCall(ARM_Ret);
#ifdef JIT_PROFILING_ENABLED #ifdef JIT_PROFILING_ENABLED
CreateMethod("JIT_Block_%d_%d_%08X", (void*)res, Num, Thumb, instrs[0].Addr); CreateMethod("JIT_Block_%d_%d_%08X", (void*)res, Num, Thumb, instrs[0].Addr);

View File

@ -84,6 +84,7 @@ class Compiler : public Gen::XEmitter
{ {
public: public:
explicit Compiler(melonDS::NDS& nds); explicit Compiler(melonDS::NDS& nds);
~Compiler();
void Reset(); void Reset();
@ -256,6 +257,7 @@ public:
std::unordered_map<u8*, LoadStorePatch> LoadStorePatches {}; std::unordered_map<u8*, LoadStorePatch> LoadStorePatches {};
u8* CodeMemBase;
u8* ResetStart {}; u8* ResetStart {};
u32 CodeMemSize {}; u32 CodeMemSize {};

View File

@ -316,24 +316,24 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
{ {
switch (size | NDS.ConsoleType) switch (size | NDS.ConsoleType)
{ {
case 32: CALL((void*)&SlowWrite9<u32, 0>); break; case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break;
case 16: CALL((void*)&SlowWrite9<u16, 0>); break; case 16: ABI_CallFunction(SlowWrite9<u16, 0>); break;
case 8: CALL((void*)&SlowWrite9<u8, 0>); break; case 8: ABI_CallFunction(&SlowWrite9<u8, 0>); break;
case 33: CALL((void*)&SlowWrite9<u32, 1>); break; case 33: ABI_CallFunction(&SlowWrite9<u32, 1>); break;
case 17: CALL((void*)&SlowWrite9<u16, 1>); break; case 17: ABI_CallFunction(&SlowWrite9<u16, 1>); break;
case 9: CALL((void*)&SlowWrite9<u8, 1>); break; case 9: ABI_CallFunction(&SlowWrite9<u8, 1>); break;
} }
} }
else else
{ {
switch (size | NDS.ConsoleType) switch (size | NDS.ConsoleType)
{ {
case 32: CALL((void*)&SlowRead9<u32, 0>); break; case 32: ABI_CallFunction(&SlowRead9<u32, 0>); break;
case 16: CALL((void*)&SlowRead9<u16, 0>); break; case 16: ABI_CallFunction(&SlowRead9<u16, 0>); break;
case 8: CALL((void*)&SlowRead9<u8, 0>); break; case 8: ABI_CallFunction(&SlowRead9<u8, 0>); break;
case 33: CALL((void*)&SlowRead9<u32, 1>); break; case 33: ABI_CallFunction(&SlowRead9<u32, 1>); break;
case 17: CALL((void*)&SlowRead9<u16, 1>); break; case 17: ABI_CallFunction(&SlowRead9<u16, 1>); break;
case 9: CALL((void*)&SlowRead9<u8, 1>); break; case 9: ABI_CallFunction(&SlowRead9<u8, 1>); break;
} }
} }
} }
@ -347,24 +347,24 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
switch (size | NDS.ConsoleType) switch (size | NDS.ConsoleType)
{ {
case 32: CALL((void*)&SlowWrite7<u32, 0>); break; case 32: ABI_CallFunction(&SlowWrite7<u32, 0>); break;
case 16: CALL((void*)&SlowWrite7<u16, 0>); break; case 16: ABI_CallFunction(&SlowWrite7<u16, 0>); break;
case 8: CALL((void*)&SlowWrite7<u8, 0>); break; case 8: ABI_CallFunction(&SlowWrite7<u8, 0>); break;
case 33: CALL((void*)&SlowWrite7<u32, 1>); break; case 33: ABI_CallFunction(&SlowWrite7<u32, 1>); break;
case 17: CALL((void*)&SlowWrite7<u16, 1>); break; case 17: ABI_CallFunction(&SlowWrite7<u16, 1>); break;
case 9: CALL((void*)&SlowWrite7<u8, 1>); break; case 9: ABI_CallFunction(&SlowWrite7<u8, 1>); break;
} }
} }
else else
{ {
switch (size | NDS.ConsoleType) switch (size | NDS.ConsoleType)
{ {
case 32: CALL((void*)&SlowRead7<u32, 0>); break; case 32: ABI_CallFunction(&SlowRead7<u32, 0>); break;
case 16: CALL((void*)&SlowRead7<u16, 0>); break; case 16: ABI_CallFunction(&SlowRead7<u16, 0>); break;
case 8: CALL((void*)&SlowRead7<u8, 0>); break; case 8: ABI_CallFunction(&SlowRead7<u8, 0>); break;
case 33: CALL((void*)&SlowRead7<u32, 1>); break; case 33: ABI_CallFunction(&SlowRead7<u32, 1>); break;
case 17: CALL((void*)&SlowRead7<u16, 1>); break; case 17: ABI_CallFunction(&SlowRead7<u16, 1>); break;
case 9: CALL((void*)&SlowRead7<u8, 1>); break; case 9: ABI_CallFunction(&SlowRead7<u8, 1>); break;
} }
} }
} }
@ -526,10 +526,10 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
switch (Num * 2 | NDS.ConsoleType) switch (Num * 2 | NDS.ConsoleType)
{ {
case 0: CALL((void*)&SlowBlockTransfer9<false, 0>); break; case 0: ABI_CallFunction(&SlowBlockTransfer9<false, 0>); break;
case 1: CALL((void*)&SlowBlockTransfer9<false, 1>); break; case 1: ABI_CallFunction(&SlowBlockTransfer9<false, 1>); break;
case 2: CALL((void*)&SlowBlockTransfer7<false, 0>); break; case 2: ABI_CallFunction(&SlowBlockTransfer7<false, 0>); break;
case 3: CALL((void*)&SlowBlockTransfer7<false, 1>); break; case 3: ABI_CallFunction(&SlowBlockTransfer7<false, 1>); break;
} }
PopRegs(false, false); PopRegs(false, false);
@ -630,10 +630,10 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
switch (Num * 2 | NDS.ConsoleType) switch (Num * 2 | NDS.ConsoleType)
{ {
case 0: CALL((void*)&SlowBlockTransfer9<true, 0>); break; case 0: ABI_CallFunction(&SlowBlockTransfer9<true, 0>); break;
case 1: CALL((void*)&SlowBlockTransfer9<true, 1>); break; case 1: ABI_CallFunction(&SlowBlockTransfer9<true, 1>); break;
case 2: CALL((void*)&SlowBlockTransfer7<true, 0>); break; case 2: ABI_CallFunction(&SlowBlockTransfer7<true, 0>); break;
case 3: CALL((void*)&SlowBlockTransfer7<true, 1>); break; case 3: ABI_CallFunction(&SlowBlockTransfer7<true, 1>); break;
} }
ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc)); ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));

View File

@ -97,9 +97,14 @@ if (ENABLE_JIT)
ARMJIT.cpp ARMJIT.cpp
ARMJIT_Memory.cpp ARMJIT_Memory.cpp
ARMJIT_Global.cpp
dolphin/CommonFuncs.cpp) dolphin/CommonFuncs.cpp)
if (WIN32)
target_link_libraries(core PRIVATE onecore)
endif()
if (ARCHITECTURE STREQUAL x86_64) if (ARCHITECTURE STREQUAL x86_64)
target_sources(core PRIVATE target_sources(core PRIVATE
dolphin/x64ABI.cpp dolphin/x64ABI.cpp

View File

@ -74,7 +74,7 @@ const s32 kIterationCycleMargin = 8;
// //
// timings for GBA slot and wifi are set up at runtime // timings for GBA slot and wifi are set up at runtime
NDS* NDS::Current = nullptr; thread_local NDS* NDS::Current = nullptr;
NDS::NDS() noexcept : NDS::NDS() noexcept :
NDS( NDS(
@ -128,6 +128,7 @@ NDS::NDS(NDSArgs&& args, int type, void* userdata) noexcept :
MainRAM = JIT.Memory.GetMainRAM(); MainRAM = JIT.Memory.GetMainRAM();
SharedWRAM = JIT.Memory.GetSharedWRAM(); SharedWRAM = JIT.Memory.GetSharedWRAM();
ARM7WRAM = JIT.Memory.GetARM7WRAM(); ARM7WRAM = JIT.Memory.GetARM7WRAM();
} }
NDS::~NDS() noexcept NDS::~NDS() noexcept
@ -894,6 +895,8 @@ void NDS::RunSystemSleep(u64 timestamp)
template <CPUExecuteMode cpuMode> template <CPUExecuteMode cpuMode>
u32 NDS::RunFrame() u32 NDS::RunFrame()
{ {
Current = this;
FrameStartTimestamp = SysTimestamp; FrameStartTimestamp = SysTimestamp;
GPU.TotalScanlines = 0; GPU.TotalScanlines = 0;

View File

@ -541,8 +541,8 @@ public:
NDS& operator=(const NDS&) = delete; NDS& operator=(const NDS&) = delete;
NDS(NDS&&) = delete; NDS(NDS&&) = delete;
NDS& operator=(NDS&&) = delete; NDS& operator=(NDS&&) = delete;
// The frontend should set and unset this manually after creating and destroying the NDS object.
[[deprecated("Temporary workaround until JIT code generation is revised to accommodate multiple NDS objects.")]] static NDS* Current; static thread_local NDS* Current;
protected: protected:
explicit NDS(NDSArgs&& args, int type, void* userdata) noexcept; explicit NDS(NDSArgs&& args, int type, void* userdata) noexcept;
virtual void DoSavestateExtra(Savestate* file) {} virtual void DoSavestateExtra(Savestate* file) {}

View File

@ -1019,6 +1019,28 @@ public:
CALL(ptr); CALL(ptr);
} }
} }
template <typename FunctionPointer>
void ABI_TailCall(FunctionPointer func)
{
static_assert(std::is_pointer<FunctionPointer>() &&
std::is_function<std::remove_pointer_t<FunctionPointer>>(),
"Supplied type must be a function pointer.");
const u8* ptr = reinterpret_cast<const u8*>(func);
const u64 address = reinterpret_cast<u64>(ptr);
const u64 distance = address - (reinterpret_cast<u64>(code) + 5);
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL)
{
// Far call
MOV(64, R(RAX), Imm64(address));
JMPptr(R(RAX));
}
else
{
JMP(ptr, true);
}
}
template <typename FunctionPointer> template <typename FunctionPointer>
void ABI_CallFunctionC16(FunctionPointer func, u16 param1) void ABI_CallFunctionC16(FunctionPointer func, u16 param1)

View File

@ -165,7 +165,6 @@ EmuInstance::~EmuInstance()
audioDeInit(); audioDeInit();
inputDeInit(); inputDeInit();
NDS::Current = nullptr;
if (nds) if (nds)
{ {
saveRTCData(); saveRTCData();
@ -1339,7 +1338,6 @@ bool EmuInstance::updateConsole() noexcept
renderLock.lock(); renderLock.lock();
if ((!nds) || (consoleType != nds->ConsoleType)) if ((!nds) || (consoleType != nds->ConsoleType))
{ {
NDS::Current = nullptr;
if (nds) if (nds)
{ {
saveRTCData(); saveRTCData();
@ -1351,7 +1349,6 @@ bool EmuInstance::updateConsole() noexcept
else else
nds = new NDS(std::move(ndsargs), this); nds = new NDS(std::move(ndsargs), this);
NDS::Current = nds;
nds->Reset(); nds->Reset();
loadRTCData(); loadRTCData();
//emuThread->updateVideoRenderer(); // not actually needed? //emuThread->updateVideoRenderer(); // not actually needed?

View File

@ -82,9 +82,6 @@ EmuSettingsDialog::EmuSettingsDialog(QWidget* parent) : QDialog(parent), ui(new
ui->chkJITBranchOptimisations->setChecked(cfg.GetBool("JIT.BranchOptimisations")); ui->chkJITBranchOptimisations->setChecked(cfg.GetBool("JIT.BranchOptimisations"));
ui->chkJITLiteralOptimisations->setChecked(cfg.GetBool("JIT.LiteralOptimisations")); ui->chkJITLiteralOptimisations->setChecked(cfg.GetBool("JIT.LiteralOptimisations"));
ui->chkJITFastMemory->setChecked(cfg.GetBool("JIT.FastMemory")); ui->chkJITFastMemory->setChecked(cfg.GetBool("JIT.FastMemory"));
#ifdef __APPLE__
ui->chkJITFastMemory->setDisabled(true);
#endif
ui->spnJITMaximumBlockSize->setValue(cfg.GetInt("JIT.MaxBlockSize")); ui->spnJITMaximumBlockSize->setValue(cfg.GetInt("JIT.MaxBlockSize"));
#else #else
ui->chkEnableJIT->setDisabled(true); ui->chkEnableJIT->setDisabled(true);
@ -541,9 +538,7 @@ void EmuSettingsDialog::on_chkEnableJIT_toggled()
bool disabled = !ui->chkEnableJIT->isChecked(); bool disabled = !ui->chkEnableJIT->isChecked();
ui->chkJITBranchOptimisations->setDisabled(disabled); ui->chkJITBranchOptimisations->setDisabled(disabled);
ui->chkJITLiteralOptimisations->setDisabled(disabled); ui->chkJITLiteralOptimisations->setDisabled(disabled);
#ifndef __APPLE__ ui->chkJITFastMemory->setDisabled(disabled || !ARMJIT_Memory::IsFastMemSupported());
ui->chkJITFastMemory->setDisabled(disabled);
#endif
ui->spnJITMaximumBlockSize->setDisabled(disabled); ui->spnJITMaximumBlockSize->setDisabled(disabled);
on_cbGdbEnabled_toggled(); on_cbGdbEnabled_toggled();