Multiinstance jit (#2201)
* works on Linux x64 still needs to be fixed for everything else * use lots of PROT_NONE memory to reliably reserve virtual address space * multi instance fastmem on Linux * Windows * blarg * disable fastmem if the page size is not 4kb * fix fast mem dialog option * make aarch64 work as well * fastmem 16kb pages support
This commit is contained in:
parent
cb7af652f5
commit
99ce959913
|
@ -30,6 +30,7 @@
|
||||||
#include "ARMJIT_Internal.h"
|
#include "ARMJIT_Internal.h"
|
||||||
#include "ARMJIT_Memory.h"
|
#include "ARMJIT_Memory.h"
|
||||||
#include "ARMJIT_Compiler.h"
|
#include "ARMJIT_Compiler.h"
|
||||||
|
#include "ARMJIT_Global.h"
|
||||||
|
|
||||||
#include "ARMInterpreter_ALU.h"
|
#include "ARMInterpreter_ALU.h"
|
||||||
#include "ARMInterpreter_LoadStore.h"
|
#include "ARMInterpreter_LoadStore.h"
|
||||||
|
@ -467,6 +468,16 @@ InterpreterFunc InterpretTHUMB[ARMInstrInfo::tk_Count] =
|
||||||
};
|
};
|
||||||
#undef F
|
#undef F
|
||||||
|
|
||||||
|
ARMJIT::ARMJIT(melonDS::NDS& nds, std::optional<JITArgs> jit) noexcept :
|
||||||
|
NDS(nds),
|
||||||
|
Memory(nds),
|
||||||
|
JITCompiler(nds),
|
||||||
|
MaxBlockSize(jit.has_value() ? std::clamp(jit->MaxBlockSize, 1u, 32u) : 32),
|
||||||
|
LiteralOptimizations(jit.has_value() ? jit->LiteralOptimizations : false),
|
||||||
|
BranchOptimizations(jit.has_value() ? jit->BranchOptimizations : false),
|
||||||
|
FastMemory((jit.has_value() ? jit->FastMemory : false) && ARMJIT_Memory::IsFastMemSupported())
|
||||||
|
{}
|
||||||
|
|
||||||
void ARMJIT::RetireJitBlock(JitBlock* block) noexcept
|
void ARMJIT::RetireJitBlock(JitBlock* block) noexcept
|
||||||
{
|
{
|
||||||
auto it = RestoreCandidates.find(block->InstrHash);
|
auto it = RestoreCandidates.find(block->InstrHash);
|
||||||
|
@ -483,6 +494,7 @@ void ARMJIT::RetireJitBlock(JitBlock* block) noexcept
|
||||||
|
|
||||||
void ARMJIT::SetJITArgs(JITArgs args) noexcept
|
void ARMJIT::SetJITArgs(JITArgs args) noexcept
|
||||||
{
|
{
|
||||||
|
args.FastMemory = args.FastMemory && ARMJIT_Memory::IsFastMemSupported();
|
||||||
args.MaxBlockSize = std::clamp(args.MaxBlockSize, 1u, 32u);
|
args.MaxBlockSize = std::clamp(args.MaxBlockSize, 1u, 32u);
|
||||||
|
|
||||||
if (MaxBlockSize != args.MaxBlockSize
|
if (MaxBlockSize != args.MaxBlockSize
|
||||||
|
@ -499,36 +511,22 @@ void ARMJIT::SetJITArgs(JITArgs args) noexcept
|
||||||
|
|
||||||
void ARMJIT::SetMaxBlockSize(int size) noexcept
|
void ARMJIT::SetMaxBlockSize(int size) noexcept
|
||||||
{
|
{
|
||||||
size = std::clamp(size, 1, 32);
|
SetJITArgs(JITArgs{static_cast<unsigned>(size), LiteralOptimizations, LiteralOptimizations, FastMemory});
|
||||||
|
|
||||||
if (size != MaxBlockSize)
|
|
||||||
ResetBlockCache();
|
|
||||||
|
|
||||||
MaxBlockSize = size;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARMJIT::SetLiteralOptimizations(bool enabled) noexcept
|
void ARMJIT::SetLiteralOptimizations(bool enabled) noexcept
|
||||||
{
|
{
|
||||||
if (LiteralOptimizations != enabled)
|
SetJITArgs(JITArgs{static_cast<unsigned>(MaxBlockSize), enabled, BranchOptimizations, FastMemory});
|
||||||
ResetBlockCache();
|
|
||||||
|
|
||||||
LiteralOptimizations = enabled;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARMJIT::SetBranchOptimizations(bool enabled) noexcept
|
void ARMJIT::SetBranchOptimizations(bool enabled) noexcept
|
||||||
{
|
{
|
||||||
if (BranchOptimizations != enabled)
|
SetJITArgs(JITArgs{static_cast<unsigned>(MaxBlockSize), LiteralOptimizations, enabled, FastMemory});
|
||||||
ResetBlockCache();
|
|
||||||
|
|
||||||
BranchOptimizations = enabled;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARMJIT::SetFastMemory(bool enabled) noexcept
|
void ARMJIT::SetFastMemory(bool enabled) noexcept
|
||||||
{
|
{
|
||||||
if (FastMemory != enabled)
|
SetJITArgs(JITArgs{static_cast<unsigned>(MaxBlockSize), LiteralOptimizations, BranchOptimizations, enabled});
|
||||||
ResetBlockCache();
|
|
||||||
|
|
||||||
FastMemory = enabled;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARMJIT::CompileBlock(ARM* cpu) noexcept
|
void ARMJIT::CompileBlock(ARM* cpu) noexcept
|
||||||
|
@ -918,7 +916,7 @@ void ARMJIT::CompileBlock(ARM* cpu) noexcept
|
||||||
|
|
||||||
AddressRange* region = CodeMemRegions[addressRanges[j] >> 27];
|
AddressRange* region = CodeMemRegions[addressRanges[j] >> 27];
|
||||||
|
|
||||||
if (!PageContainsCode(®ion[(addressRanges[j] & 0x7FFF000) / 512]))
|
if (!PageContainsCode(®ion[(addressRanges[j] & 0x7FFF000 & ~(Memory.PageSize - 1)) / 512], Memory.PageSize))
|
||||||
Memory.SetCodeProtection(addressRanges[j] >> 27, addressRanges[j] & 0x7FFFFFF, true);
|
Memory.SetCodeProtection(addressRanges[j] >> 27, addressRanges[j] & 0x7FFFFFF, true);
|
||||||
|
|
||||||
AddressRange* range = ®ion[(addressRanges[j] & 0x7FFFFFF) / 512];
|
AddressRange* range = ®ion[(addressRanges[j] & 0x7FFFFFF) / 512];
|
||||||
|
@ -971,7 +969,7 @@ void ARMJIT::InvalidateByAddr(u32 localAddr) noexcept
|
||||||
range->Blocks.Remove(i);
|
range->Blocks.Remove(i);
|
||||||
|
|
||||||
if (range->Blocks.Length == 0
|
if (range->Blocks.Length == 0
|
||||||
&& !PageContainsCode(®ion[(localAddr & 0x7FFF000) / 512]))
|
&& !PageContainsCode(®ion[(localAddr & 0x7FFF000 & ~(Memory.PageSize - 1)) / 512], Memory.PageSize))
|
||||||
{
|
{
|
||||||
Memory.SetCodeProtection(localAddr >> 27, localAddr & 0x7FFFFFF, false);
|
Memory.SetCodeProtection(localAddr >> 27, localAddr & 0x7FFFFFF, false);
|
||||||
}
|
}
|
||||||
|
@ -1005,7 +1003,7 @@ void ARMJIT::InvalidateByAddr(u32 localAddr) noexcept
|
||||||
|
|
||||||
if (otherRange->Blocks.Length == 0)
|
if (otherRange->Blocks.Length == 0)
|
||||||
{
|
{
|
||||||
if (!PageContainsCode(&otherRegion[(addr & 0x7FFF000) / 512]))
|
if (!PageContainsCode(&otherRegion[(addr & 0x7FFF000 & ~(Memory.PageSize - 1)) / 512], Memory.PageSize))
|
||||||
Memory.SetCodeProtection(addr >> 27, addr & 0x7FFFFFF, false);
|
Memory.SetCodeProtection(addr >> 27, addr & 0x7FFFFFF, false);
|
||||||
|
|
||||||
otherRange->Code = 0;
|
otherRange->Code = 0;
|
||||||
|
|
11
src/ARMJIT.h
11
src/ARMJIT.h
|
@ -44,15 +44,7 @@ class JitBlock;
|
||||||
class ARMJIT
|
class ARMJIT
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ARMJIT(melonDS::NDS& nds, std::optional<JITArgs> jit) noexcept :
|
ARMJIT(melonDS::NDS& nds, std::optional<JITArgs> jit) noexcept;
|
||||||
NDS(nds),
|
|
||||||
Memory(nds),
|
|
||||||
JITCompiler(nds),
|
|
||||||
MaxBlockSize(jit.has_value() ? std::clamp(jit->MaxBlockSize, 1u, 32u) : 32),
|
|
||||||
LiteralOptimizations(jit.has_value() ? jit->LiteralOptimizations : false),
|
|
||||||
BranchOptimizations(jit.has_value() ? jit->BranchOptimizations : false),
|
|
||||||
FastMemory(jit.has_value() ? jit->FastMemory : false)
|
|
||||||
{}
|
|
||||||
~ARMJIT() noexcept;
|
~ARMJIT() noexcept;
|
||||||
void InvalidateByAddr(u32) noexcept;
|
void InvalidateByAddr(u32) noexcept;
|
||||||
void CheckAndInvalidateWVRAM(int) noexcept;
|
void CheckAndInvalidateWVRAM(int) noexcept;
|
||||||
|
@ -80,6 +72,7 @@ private:
|
||||||
bool LiteralOptimizations = false;
|
bool LiteralOptimizations = false;
|
||||||
bool BranchOptimizations = false;
|
bool BranchOptimizations = false;
|
||||||
bool FastMemory = false;
|
bool FastMemory = false;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
melonDS::NDS& NDS;
|
melonDS::NDS& NDS;
|
||||||
TinyVector<u32> InvalidLiterals {};
|
TinyVector<u32> InvalidLiterals {};
|
||||||
|
|
|
@ -22,17 +22,7 @@
|
||||||
#include "../ARMInterpreter.h"
|
#include "../ARMInterpreter.h"
|
||||||
#include "../ARMJIT.h"
|
#include "../ARMJIT.h"
|
||||||
#include "../NDS.h"
|
#include "../NDS.h"
|
||||||
|
#include "../ARMJIT_Global.h"
|
||||||
#if defined(__SWITCH__)
|
|
||||||
#include <switch.h>
|
|
||||||
|
|
||||||
extern char __start__;
|
|
||||||
#elif defined(_WIN32)
|
|
||||||
#include <windows.h>
|
|
||||||
#else
|
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
@ -66,11 +56,6 @@ const int RegisterCache<Compiler, ARM64Reg>::NativeRegsAvailable = 15;
|
||||||
|
|
||||||
const BitSet32 CallerSavedPushRegs({W8, W9, W10, W11, W12, W13, W14, W15});
|
const BitSet32 CallerSavedPushRegs({W8, W9, W10, W11, W12, W13, W14, W15});
|
||||||
|
|
||||||
const int JitMemSize = 16 * 1024 * 1024;
|
|
||||||
#ifndef __SWITCH__
|
|
||||||
u8 JitMem[JitMemSize];
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void Compiler::MovePC()
|
void Compiler::MovePC()
|
||||||
{
|
{
|
||||||
ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4);
|
ADD(MapReg(15), MapReg(15), Thumb ? 2 : 4);
|
||||||
|
@ -260,29 +245,12 @@ Compiler::Compiler(melonDS::NDS& nds) : Arm64Gen::ARM64XEmitter(), NDS(nds)
|
||||||
SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart);
|
SetCodeBase((u8*)JitRWStart, (u8*)JitRXStart);
|
||||||
JitMemMainSize = JitMemSize;
|
JitMemMainSize = JitMemSize;
|
||||||
#else
|
#else
|
||||||
#ifdef _WIN32
|
ARMJIT_Global::Init();
|
||||||
SYSTEM_INFO sysInfo;
|
|
||||||
GetSystemInfo(&sysInfo);
|
|
||||||
|
|
||||||
u64 pageSize = (u64)sysInfo.dwPageSize;
|
CodeMemBase = ARMJIT_Global::AllocateCodeMem();
|
||||||
#else
|
|
||||||
u64 pageSize = sysconf(_SC_PAGE_SIZE);
|
|
||||||
#endif
|
|
||||||
u8* pageAligned = (u8*)(((u64)JitMem & ~(pageSize - 1)) + pageSize);
|
|
||||||
u64 alignedSize = (((u64)JitMem + sizeof(JitMem)) & ~(pageSize - 1)) - (u64)pageAligned;
|
|
||||||
|
|
||||||
#if defined(_WIN32)
|
SetCodeBase(reinterpret_cast<u8*>(CodeMemBase), reinterpret_cast<u8*>(CodeMemBase));
|
||||||
DWORD dummy;
|
JitMemMainSize = ARMJIT_Global::CodeMemorySliceSize;
|
||||||
VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy);
|
|
||||||
#elif defined(__APPLE__)
|
|
||||||
pageAligned = (u8*)mmap(NULL, 1024*1024*16, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_JIT,-1, 0);
|
|
||||||
nds.JIT.JitEnableWrite();
|
|
||||||
#else
|
|
||||||
mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
SetCodeBase(pageAligned, pageAligned);
|
|
||||||
JitMemMainSize = alignedSize;
|
|
||||||
#endif
|
#endif
|
||||||
SetCodePtr(0);
|
SetCodePtr(0);
|
||||||
|
|
||||||
|
@ -493,6 +461,9 @@ Compiler::~Compiler()
|
||||||
free(JitRWBase);
|
free(JitRWBase);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
ARMJIT_Global::FreeCodeMem(CodeMemBase);
|
||||||
|
ARMJIT_Global::DeInit();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Compiler::LoadCycles()
|
void Compiler::LoadCycles()
|
||||||
|
|
|
@ -275,6 +275,7 @@ public:
|
||||||
void* JitRWStart;
|
void* JitRWStart;
|
||||||
void* JitRXStart;
|
void* JitRXStart;
|
||||||
#endif
|
#endif
|
||||||
|
void* CodeMemBase;
|
||||||
|
|
||||||
void* ReadBanked, *WriteBanked;
|
void* ReadBanked, *WriteBanked;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,118 @@
|
||||||
|
#include "ARMJIT_Global.h"
|
||||||
|
#include "ARMJIT_Memory.h"
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
#include <windows.h>
|
||||||
|
#else
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
namespace melonDS
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ARMJIT_Global
|
||||||
|
{
|
||||||
|
|
||||||
|
std::mutex globalMutex;
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
|
static constexpr size_t NumCodeMemSlices = 4;
|
||||||
|
static constexpr size_t CodeMemoryAlignedSize = NumCodeMemSlices * CodeMemorySliceSize;
|
||||||
|
|
||||||
|
// I haven't heard of pages larger than 16 KB
|
||||||
|
u8 CodeMemory[CodeMemoryAlignedSize + 16*1024];
|
||||||
|
|
||||||
|
u32 AvailableCodeMemSlices = (1 << NumCodeMemSlices) - 1;
|
||||||
|
|
||||||
|
u8* GetAlignedCodeMemoryStart()
|
||||||
|
{
|
||||||
|
return reinterpret_cast<u8*>((reinterpret_cast<intptr_t>(CodeMemory) + (16*1024-1)) & ~static_cast<intptr_t>(16*1024-1));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int RefCounter = 0;
|
||||||
|
|
||||||
|
void* AllocateCodeMem()
|
||||||
|
{
|
||||||
|
std::lock_guard guard(globalMutex);
|
||||||
|
|
||||||
|
#ifndef __APPLE__
|
||||||
|
if (AvailableCodeMemSlices)
|
||||||
|
{
|
||||||
|
int slice = __builtin_ctz(AvailableCodeMemSlices);
|
||||||
|
AvailableCodeMemSlices &= ~(1 << slice);
|
||||||
|
//printf("allocating slice %d\n", slice);
|
||||||
|
return &GetAlignedCodeMemoryStart()[slice * CodeMemorySliceSize];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// allocate
|
||||||
|
#ifdef _WIN32
|
||||||
|
return VirtualAlloc(nullptr, CodeMemorySliceSize, MEM_RESERVE|MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
||||||
|
#else
|
||||||
|
//printf("mmaping...\n");
|
||||||
|
return mmap(nullptr, CodeMemorySliceSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void FreeCodeMem(void* codeMem)
|
||||||
|
{
|
||||||
|
std::lock_guard guard(globalMutex);
|
||||||
|
|
||||||
|
for (int i = 0; i < NumCodeMemSlices; i++)
|
||||||
|
{
|
||||||
|
if (codeMem == &GetAlignedCodeMemoryStart()[CodeMemorySliceSize * i])
|
||||||
|
{
|
||||||
|
//printf("freeing slice\n");
|
||||||
|
AvailableCodeMemSlices |= 1 << i;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
VirtualFree(codeMem, CodeMemorySliceSize, MEM_RELEASE|MEM_DECOMMIT);
|
||||||
|
#else
|
||||||
|
munmap(codeMem, CodeMemorySliceSize);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void Init()
|
||||||
|
{
|
||||||
|
std::lock_guard guard(globalMutex);
|
||||||
|
|
||||||
|
RefCounter++;
|
||||||
|
if (RefCounter == 1)
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
DWORD dummy;
|
||||||
|
VirtualProtect(GetAlignedCodeMemoryStart(), CodeMemoryAlignedSize, PAGE_EXECUTE_READWRITE, &dummy);
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
// Apple always uses dynamic allocation
|
||||||
|
#else
|
||||||
|
mprotect(GetAlignedCodeMemoryStart(), CodeMemoryAlignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ARMJIT_Memory::RegisterFaultHandler();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void DeInit()
|
||||||
|
{
|
||||||
|
std::lock_guard guard(globalMutex);
|
||||||
|
|
||||||
|
RefCounter--;
|
||||||
|
if (RefCounter == 0)
|
||||||
|
{
|
||||||
|
ARMJIT_Memory::UnregisterFaultHandler();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,44 @@
|
||||||
|
/*
|
||||||
|
Copyright 2016-2024 melonDS team
|
||||||
|
|
||||||
|
This file is part of melonDS.
|
||||||
|
|
||||||
|
melonDS is free software: you can redistribute it and/or modify it under
|
||||||
|
the terms of the GNU General Public License as published by the Free
|
||||||
|
Software Foundation, either version 3 of the License, or (at your option)
|
||||||
|
any later version.
|
||||||
|
|
||||||
|
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||||
|
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||||
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with melonDS. If not, see http://www.gnu.org/licenses/.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef ARMJIT_GLOBAL_H
|
||||||
|
#define ARMJIT_GLOBAL_H
|
||||||
|
|
||||||
|
#include "types.h"
|
||||||
|
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
namespace melonDS
|
||||||
|
{
|
||||||
|
|
||||||
|
namespace ARMJIT_Global
|
||||||
|
{
|
||||||
|
|
||||||
|
static constexpr size_t CodeMemorySliceSize = 1024*1024*32;
|
||||||
|
|
||||||
|
void Init();
|
||||||
|
void DeInit();
|
||||||
|
|
||||||
|
void* AllocateCodeMem();
|
||||||
|
void FreeCodeMem(void* codeMem);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
|
@ -85,9 +85,9 @@ typedef void (*InterpreterFunc)(ARM* cpu);
|
||||||
extern InterpreterFunc InterpretARM[];
|
extern InterpreterFunc InterpretARM[];
|
||||||
extern InterpreterFunc InterpretTHUMB[];
|
extern InterpreterFunc InterpretTHUMB[];
|
||||||
|
|
||||||
inline bool PageContainsCode(const AddressRange* range)
|
inline bool PageContainsCode(const AddressRange* range, u32 pageSize)
|
||||||
{
|
{
|
||||||
for (int i = 0; i < 8; i++)
|
for (int i = 0; i < pageSize / 512; i++)
|
||||||
{
|
{
|
||||||
if (range[i].Blocks.Length > 0)
|
if (range[i].Blocks.Length > 0)
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -39,6 +39,7 @@
|
||||||
|
|
||||||
#include "ARMJIT_Internal.h"
|
#include "ARMJIT_Internal.h"
|
||||||
#include "ARMJIT_Compiler.h"
|
#include "ARMJIT_Compiler.h"
|
||||||
|
#include "ARMJIT_Global.h"
|
||||||
|
|
||||||
#include "DSi.h"
|
#include "DSi.h"
|
||||||
#include "GPU.h"
|
#include "GPU.h"
|
||||||
|
@ -100,6 +101,9 @@
|
||||||
namespace melonDS
|
namespace melonDS
|
||||||
{
|
{
|
||||||
|
|
||||||
|
static constexpr u64 AddrSpaceSize = 0x100000000;
|
||||||
|
static constexpr u64 VirtmemAreaSize = AddrSpaceSize * 2 + MemoryTotalSize;
|
||||||
|
|
||||||
using Platform::Log;
|
using Platform::Log;
|
||||||
using Platform::LogLevel;
|
using Platform::LogLevel;
|
||||||
|
|
||||||
|
@ -152,6 +156,15 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx)
|
||||||
|
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
|
|
||||||
|
static LPVOID ExceptionHandlerHandle = nullptr;
|
||||||
|
static HMODULE KernelBaseDll = nullptr;
|
||||||
|
|
||||||
|
using VirtualAlloc2Type = PVOID WINAPI (*)(HANDLE Process, PVOID BaseAddress, SIZE_T Size, ULONG AllocationType, ULONG PageProtection, MEM_EXTENDED_PARAMETER* ExtendedParameters, ULONG ParameterCount);
|
||||||
|
using MapViewOfFile3Type = PVOID WINAPI (*)(HANDLE FileMapping, HANDLE Process, PVOID BaseAddress, ULONG64 Offset, SIZE_T ViewSize, ULONG AllocationType, ULONG PageProtection, MEM_EXTENDED_PARAMETER* ExtendedParameters, ULONG ParameterCount);
|
||||||
|
|
||||||
|
static VirtualAlloc2Type virtualAlloc2Ptr;
|
||||||
|
static MapViewOfFile3Type mapViewOfFile3Ptr;
|
||||||
|
|
||||||
LONG ARMJIT_Memory::ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
|
LONG ARMJIT_Memory::ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
|
||||||
{
|
{
|
||||||
if (exceptionInfo->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION)
|
if (exceptionInfo->ExceptionRecord->ExceptionCode != EXCEPTION_ACCESS_VIOLATION)
|
||||||
|
@ -170,6 +183,7 @@ LONG ARMJIT_Memory::ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
|
||||||
return EXCEPTION_CONTINUE_EXECUTION;
|
return EXCEPTION_CONTINUE_EXECUTION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Log(LogLevel::Debug, "it all returns to nothing\n");
|
||||||
return EXCEPTION_CONTINUE_SEARCH;
|
return EXCEPTION_CONTINUE_SEARCH;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -261,18 +275,61 @@ enum
|
||||||
memstate_MappedProtected,
|
memstate_MappedProtected,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define CHECK_ALIGNED(value) assert(((value) & (PageSize-1)) == 0)
|
||||||
|
|
||||||
bool ARMJIT_Memory::MapIntoRange(u32 addr, u32 num, u32 offset, u32 size) noexcept
|
bool ARMJIT_Memory::MapIntoRange(u32 addr, u32 num, u32 offset, u32 size) noexcept
|
||||||
{
|
{
|
||||||
|
CHECK_ALIGNED(addr);
|
||||||
|
CHECK_ALIGNED(offset);
|
||||||
|
CHECK_ALIGNED(size);
|
||||||
|
|
||||||
u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr;
|
u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr;
|
||||||
#ifdef __SWITCH__
|
#ifdef __SWITCH__
|
||||||
Result r = (svcMapProcessMemory(dst, envGetOwnProcessHandle(),
|
Result r = (svcMapProcessMemory(dst, envGetOwnProcessHandle(),
|
||||||
(u64)(MemoryBaseCodeMem + offset), size));
|
(u64)(MemoryBaseCodeMem + offset), size));
|
||||||
return R_SUCCEEDED(r);
|
return R_SUCCEEDED(r);
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
bool r = MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, offset, size, dst) == dst;
|
uintptr_t uintptrDst = reinterpret_cast<uintptr_t>(dst);
|
||||||
return r;
|
for (auto it = VirtmemPlaceholders.begin(); it != VirtmemPlaceholders.end(); it++)
|
||||||
|
{
|
||||||
|
if (uintptrDst >= it->Start && uintptrDst+size <= it->Start+it->Size)
|
||||||
|
{
|
||||||
|
//Log(LogLevel::Debug, "found mapping %llx %llx %llx %llx\n", uintptrDst, size, it->Start, it->Size);
|
||||||
|
// we split this place holder so that we have a fitting place holder for the mapping
|
||||||
|
if (uintptrDst != it->Start || size != it->Size)
|
||||||
|
{
|
||||||
|
if (!VirtualFree(dst, size, MEM_RELEASE|MEM_PRESERVE_PLACEHOLDER))
|
||||||
|
{
|
||||||
|
Log(LogLevel::Debug, "VirtualFree failed with %x\n", GetLastError());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
VirtmemPlaceholder splitPlaceholder = *it;
|
||||||
|
VirtmemPlaceholders.erase(it);
|
||||||
|
if (uintptrDst > splitPlaceholder.Start)
|
||||||
|
{
|
||||||
|
//Log(LogLevel::Debug, "splitting on the left %llx\n", uintptrDst - splitPlaceholder.Start);
|
||||||
|
VirtmemPlaceholders.push_back({splitPlaceholder.Start, uintptrDst - splitPlaceholder.Start});
|
||||||
|
}
|
||||||
|
if (uintptrDst+size < splitPlaceholder.Start+splitPlaceholder.Size)
|
||||||
|
{
|
||||||
|
//Log(LogLevel::Debug, "splitting on the right %llx\n", (splitPlaceholder.Start+splitPlaceholder.Size)-(uintptrDst+size));
|
||||||
|
VirtmemPlaceholders.push_back({uintptrDst+size, (splitPlaceholder.Start+splitPlaceholder.Size)-(uintptrDst+size)});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!mapViewOfFile3Ptr(MemoryFile, nullptr, dst, offset, size, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0))
|
||||||
|
{
|
||||||
|
Log(LogLevel::Debug, "MapViewOfFile3 failed with %x\n", GetLastError());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Log(LogLevel::Debug, "no mapping at all found??? %p %x %p\n", dst, size, MemoryBase);
|
||||||
|
return false;
|
||||||
#else
|
#else
|
||||||
return mmap(dst, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, offset) != MAP_FAILED;
|
return mmap(dst, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, offset) != MAP_FAILED;
|
||||||
#endif
|
#endif
|
||||||
|
@ -280,21 +337,68 @@ bool ARMJIT_Memory::MapIntoRange(u32 addr, u32 num, u32 offset, u32 size) noexce
|
||||||
|
|
||||||
bool ARMJIT_Memory::UnmapFromRange(u32 addr, u32 num, u32 offset, u32 size) noexcept
|
bool ARMJIT_Memory::UnmapFromRange(u32 addr, u32 num, u32 offset, u32 size) noexcept
|
||||||
{
|
{
|
||||||
|
CHECK_ALIGNED(addr);
|
||||||
|
CHECK_ALIGNED(offset);
|
||||||
|
CHECK_ALIGNED(size);
|
||||||
|
|
||||||
u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr;
|
u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr;
|
||||||
#ifdef __SWITCH__
|
#ifdef __SWITCH__
|
||||||
Result r = svcUnmapProcessMemory(dst, envGetOwnProcessHandle(),
|
Result r = svcUnmapProcessMemory(dst, envGetOwnProcessHandle(),
|
||||||
(u64)(MemoryBaseCodeMem + offset), size);
|
(u64)(MemoryBaseCodeMem + offset), size);
|
||||||
return R_SUCCEEDED(r);
|
return R_SUCCEEDED(r);
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
return UnmapViewOfFile(dst);
|
if (!UnmapViewOfFileEx(dst, MEM_PRESERVE_PLACEHOLDER))
|
||||||
|
{
|
||||||
|
Log(LogLevel::Debug, "UnmapViewOfFileEx failed %x\n", GetLastError());
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
uintptr_t uintptrDst = reinterpret_cast<uintptr_t>(dst);
|
||||||
|
uintptr_t coalesceStart = uintptrDst;
|
||||||
|
size_t coalesceSize = size;
|
||||||
|
|
||||||
|
for (auto it = VirtmemPlaceholders.begin(); it != VirtmemPlaceholders.end();)
|
||||||
|
{
|
||||||
|
if (it->Start+it->Size == uintptrDst)
|
||||||
|
{
|
||||||
|
//Log(LogLevel::Debug, "Coalescing to the left\n");
|
||||||
|
coalesceStart = it->Start;
|
||||||
|
coalesceSize += it->Size;
|
||||||
|
it = VirtmemPlaceholders.erase(it);
|
||||||
|
}
|
||||||
|
else if (it->Start == uintptrDst+size)
|
||||||
|
{
|
||||||
|
//Log(LogLevel::Debug, "Coalescing to the right\n");
|
||||||
|
coalesceSize += it->Size;
|
||||||
|
it = VirtmemPlaceholders.erase(it);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
it++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (coalesceStart != uintptrDst || coalesceSize != size)
|
||||||
|
{
|
||||||
|
if (!VirtualFree(reinterpret_cast<void*>(coalesceStart), coalesceSize, MEM_RELEASE|MEM_COALESCE_PLACEHOLDERS))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
}
|
||||||
|
VirtmemPlaceholders.push_back({coalesceStart, coalesceSize});
|
||||||
|
//Log(LogLevel::Debug, "Adding coalesced region %llx %llx", coalesceStart, coalesceSize);
|
||||||
|
|
||||||
|
return true;
|
||||||
#else
|
#else
|
||||||
return munmap(dst, size) == 0;
|
return mmap(dst, size, PROT_NONE, MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0) != MAP_FAILED;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __SWITCH__
|
#ifndef __SWITCH__
|
||||||
void ARMJIT_Memory::SetCodeProtectionRange(u32 addr, u32 size, u32 num, int protection) noexcept
|
void ARMJIT_Memory::SetCodeProtectionRange(u32 addr, u32 size, u32 num, int protection) noexcept
|
||||||
{
|
{
|
||||||
|
CHECK_ALIGNED(addr);
|
||||||
|
CHECK_ALIGNED(size);
|
||||||
|
|
||||||
u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr;
|
u8* dst = (u8*)(num == 0 ? FastMem9Start : FastMem7Start) + addr;
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
DWORD winProtection, oldProtection;
|
DWORD winProtection, oldProtection;
|
||||||
|
@ -305,6 +409,10 @@ void ARMJIT_Memory::SetCodeProtectionRange(u32 addr, u32 size, u32 num, int prot
|
||||||
else
|
else
|
||||||
winProtection = PAGE_READWRITE;
|
winProtection = PAGE_READWRITE;
|
||||||
bool success = VirtualProtect(dst, size, winProtection, &oldProtection);
|
bool success = VirtualProtect(dst, size, winProtection, &oldProtection);
|
||||||
|
if (!success)
|
||||||
|
{
|
||||||
|
Log(LogLevel::Debug, "VirtualProtect failed with %x\n", GetLastError());
|
||||||
|
}
|
||||||
assert(success);
|
assert(success);
|
||||||
#else
|
#else
|
||||||
int posixProt;
|
int posixProt;
|
||||||
|
@ -335,14 +443,14 @@ void ARMJIT_Memory::Mapping::Unmap(int region, melonDS::NDS& nds) noexcept
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
u32 segmentOffset = offset;
|
u32 segmentOffset = offset;
|
||||||
u8 status = statuses[(Addr + offset) >> 12];
|
u8 status = statuses[(Addr + offset) >> PageShift];
|
||||||
while (statuses[(Addr + offset) >> 12] == status
|
while (statuses[(Addr + offset) >> PageShift] == status
|
||||||
&& offset < Size
|
&& offset < Size
|
||||||
&& (!skipDTCM || Addr + offset != dtcmStart))
|
&& (!skipDTCM || Addr + offset != dtcmStart))
|
||||||
{
|
{
|
||||||
assert(statuses[(Addr + offset) >> 12] != memstate_Unmapped);
|
assert(statuses[(Addr + offset) >> PageShift] != memstate_Unmapped);
|
||||||
statuses[(Addr + offset) >> 12] = memstate_Unmapped;
|
statuses[(Addr + offset) >> PageShift] = memstate_Unmapped;
|
||||||
offset += 0x1000;
|
offset += PageSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __SWITCH__
|
#ifdef __SWITCH__
|
||||||
|
@ -358,7 +466,6 @@ void ARMJIT_Memory::Mapping::Unmap(int region, melonDS::NDS& nds) noexcept
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef __SWITCH__
|
#ifndef __SWITCH__
|
||||||
#ifndef _WIN32
|
|
||||||
u32 dtcmEnd = dtcmStart + dtcmSize;
|
u32 dtcmEnd = dtcmStart + dtcmSize;
|
||||||
if (Num == 0
|
if (Num == 0
|
||||||
&& dtcmEnd >= Addr
|
&& dtcmEnd >= Addr
|
||||||
|
@ -378,7 +485,6 @@ void ARMJIT_Memory::Mapping::Unmap(int region, melonDS::NDS& nds) noexcept
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
bool succeded = nds.JIT.Memory.UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size);
|
bool succeded = nds.JIT.Memory.UnmapFromRange(Addr, Num, OffsetsPerRegion[region] + LocalOffset, Size);
|
||||||
assert(succeded);
|
assert(succeded);
|
||||||
|
@ -388,7 +494,7 @@ void ARMJIT_Memory::Mapping::Unmap(int region, melonDS::NDS& nds) noexcept
|
||||||
|
|
||||||
void ARMJIT_Memory::SetCodeProtection(int region, u32 offset, bool protect) noexcept
|
void ARMJIT_Memory::SetCodeProtection(int region, u32 offset, bool protect) noexcept
|
||||||
{
|
{
|
||||||
offset &= ~0xFFF;
|
offset &= ~(PageSize - 1);
|
||||||
//printf("set code protection %d %x %d\n", region, offset, protect);
|
//printf("set code protection %d %x %d\n", region, offset, protect);
|
||||||
|
|
||||||
for (int i = 0; i < Mappings[region].Length; i++)
|
for (int i = 0; i < Mappings[region].Length; i++)
|
||||||
|
@ -406,9 +512,9 @@ void ARMJIT_Memory::SetCodeProtection(int region, u32 offset, bool protect) noex
|
||||||
|
|
||||||
u8* states = (u8*)(mapping.Num == 0 ? MappingStatus9 : MappingStatus7);
|
u8* states = (u8*)(mapping.Num == 0 ? MappingStatus9 : MappingStatus7);
|
||||||
|
|
||||||
//printf("%x %d %x %x %x %d\n", effectiveAddr, mapping.Num, mapping.Addr, mapping.LocalOffset, mapping.Size, states[effectiveAddr >> 12]);
|
//printf("%x %d %x %x %x %d\n", effectiveAddr, mapping.Num, mapping.Addr, mapping.LocalOffset, mapping.Size, states[effectiveAddr >> PageShift]);
|
||||||
assert(states[effectiveAddr >> 12] == (protect ? memstate_MappedRW : memstate_MappedProtected));
|
assert(states[effectiveAddr >> PageShift] == (protect ? memstate_MappedRW : memstate_MappedProtected));
|
||||||
states[effectiveAddr >> 12] = protect ? memstate_MappedProtected : memstate_MappedRW;
|
states[effectiveAddr >> PageShift] = protect ? memstate_MappedProtected : memstate_MappedRW;
|
||||||
|
|
||||||
#if defined(__SWITCH__)
|
#if defined(__SWITCH__)
|
||||||
bool success;
|
bool success;
|
||||||
|
@ -418,7 +524,7 @@ void ARMJIT_Memory::SetCodeProtection(int region, u32 offset, bool protect) noex
|
||||||
success = MapIntoRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000);
|
success = MapIntoRange(effectiveAddr, mapping.Num, OffsetsPerRegion[region] + offset, 0x1000);
|
||||||
assert(success);
|
assert(success);
|
||||||
#else
|
#else
|
||||||
SetCodeProtectionRange(effectiveAddr, 0x1000, mapping.Num, protect ? 1 : 2);
|
SetCodeProtectionRange(effectiveAddr, PageSize, mapping.Num, protect ? 1 : 2);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -543,11 +649,19 @@ bool ARMJIT_Memory::MapAtAddress(u32 addr) noexcept
|
||||||
u32 dtcmSize = ~NDS.ARM9.DTCMMask + 1;
|
u32 dtcmSize = ~NDS.ARM9.DTCMMask + 1;
|
||||||
u32 dtcmEnd = dtcmStart + dtcmSize;
|
u32 dtcmEnd = dtcmStart + dtcmSize;
|
||||||
#ifndef __SWITCH__
|
#ifndef __SWITCH__
|
||||||
#ifndef _WIN32
|
|
||||||
if (num == 0
|
if (num == 0
|
||||||
&& dtcmEnd >= mirrorStart
|
&& dtcmEnd >= mirrorStart
|
||||||
&& dtcmStart < mirrorStart + mirrorSize)
|
&& dtcmStart < mirrorStart + mirrorSize)
|
||||||
{
|
{
|
||||||
|
if (dtcmSize < PageSize)
|
||||||
|
{
|
||||||
|
// we could technically mask out the DTCM by setting a hole to access permissions
|
||||||
|
// but realistically there isn't much of a point in mapping less than 16kb of DTCM
|
||||||
|
// so it isn't worth more complex support
|
||||||
|
Log(LogLevel::Info, "DTCM size smaller than 16kb skipping mapping entirely");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool success;
|
bool success;
|
||||||
if (dtcmStart > mirrorStart)
|
if (dtcmStart > mirrorStart)
|
||||||
{
|
{
|
||||||
|
@ -562,7 +676,6 @@ bool ARMJIT_Memory::MapAtAddress(u32 addr) noexcept
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
#endif
|
|
||||||
{
|
{
|
||||||
bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize);
|
bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize);
|
||||||
assert(succeded);
|
assert(succeded);
|
||||||
|
@ -579,22 +692,19 @@ bool ARMJIT_Memory::MapAtAddress(u32 addr) noexcept
|
||||||
{
|
{
|
||||||
if (skipDTCM && mirrorStart + offset == dtcmStart)
|
if (skipDTCM && mirrorStart + offset == dtcmStart)
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
|
||||||
SetCodeProtectionRange(dtcmStart, dtcmSize, 0, 0);
|
|
||||||
#endif
|
|
||||||
offset += dtcmSize;
|
offset += dtcmSize;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
u32 sectionOffset = offset;
|
u32 sectionOffset = offset;
|
||||||
bool hasCode = isExecutable && PageContainsCode(&range[offset / 512]);
|
bool hasCode = isExecutable && PageContainsCode(&range[offset / 512], PageSize);
|
||||||
while (offset < mirrorSize
|
while (offset < mirrorSize
|
||||||
&& (!isExecutable || PageContainsCode(&range[offset / 512]) == hasCode)
|
&& (!isExecutable || PageContainsCode(&range[offset / 512], PageSize) == hasCode)
|
||||||
&& (!skipDTCM || mirrorStart + offset != NDS.ARM9.DTCMBase))
|
&& (!skipDTCM || mirrorStart + offset != NDS.ARM9.DTCMBase))
|
||||||
{
|
{
|
||||||
assert(states[(mirrorStart + offset) >> 12] == memstate_Unmapped);
|
assert(states[(mirrorStart + offset) >> PageShift] == memstate_Unmapped);
|
||||||
states[(mirrorStart + offset) >> 12] = hasCode ? memstate_MappedProtected : memstate_MappedRW;
|
states[(mirrorStart + offset) >> PageShift] = hasCode ? memstate_MappedProtected : memstate_MappedRW;
|
||||||
offset += 0x1000;
|
offset += PageSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 sectionSize = offset - sectionOffset;
|
u32 sectionSize = offset - sectionOffset;
|
||||||
|
@ -624,6 +734,86 @@ bool ARMJIT_Memory::MapAtAddress(u32 addr) noexcept
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 ARMJIT_Memory::PageSize = 0;
|
||||||
|
u32 ARMJIT_Memory::PageShift = 0;
|
||||||
|
|
||||||
|
bool ARMJIT_Memory::IsFastMemSupported()
|
||||||
|
{
|
||||||
|
#ifdef __APPLE__
|
||||||
|
return false;
|
||||||
|
#else
|
||||||
|
static bool initialised = false;
|
||||||
|
static bool isSupported = false;
|
||||||
|
if (!initialised)
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
ARMJIT_Global::Init();
|
||||||
|
isSupported = virtualAlloc2Ptr != nullptr;
|
||||||
|
ARMJIT_Global::DeInit();
|
||||||
|
|
||||||
|
PageSize = RegularPageSize;
|
||||||
|
#else
|
||||||
|
PageSize = __sysconf(_SC_PAGESIZE);
|
||||||
|
isSupported = PageShift == RegularPageSize || PageSize == LargePageSize;
|
||||||
|
#endif
|
||||||
|
PageShift = __builtin_ctz(PageSize);
|
||||||
|
initialised = true;
|
||||||
|
}
|
||||||
|
return isSupported;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void ARMJIT_Memory::RegisterFaultHandler()
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
ExceptionHandlerHandle = AddVectoredExceptionHandler(1, ExceptionHandler);
|
||||||
|
|
||||||
|
KernelBaseDll = LoadLibrary("KernelBase.dll");
|
||||||
|
if (KernelBaseDll)
|
||||||
|
{
|
||||||
|
virtualAlloc2Ptr = reinterpret_cast<VirtualAlloc2Type>(GetProcAddress(KernelBaseDll, "VirtualAlloc2"));
|
||||||
|
mapViewOfFile3Ptr = reinterpret_cast<MapViewOfFile3Type>(GetProcAddress(KernelBaseDll, "MapViewOfFile3"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!virtualAlloc2Ptr)
|
||||||
|
{
|
||||||
|
Log(LogLevel::Error, "Could not load new Windows virtual memory functions, fast memory is disabled.\n");
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
struct sigaction sa;
|
||||||
|
sa.sa_handler = nullptr;
|
||||||
|
sa.sa_sigaction = &SigsegvHandler;
|
||||||
|
sa.sa_flags = SA_SIGINFO;
|
||||||
|
sigemptyset(&sa.sa_mask);
|
||||||
|
sigaction(SIGSEGV, &sa, &OldSaSegv);
|
||||||
|
#ifdef __APPLE__
|
||||||
|
sigaction(SIGBUS, &sa, &OldSaBus);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void ARMJIT_Memory::UnregisterFaultHandler()
|
||||||
|
{
|
||||||
|
#ifdef _WIN32
|
||||||
|
if (ExceptionHandlerHandle)
|
||||||
|
{
|
||||||
|
RemoveVectoredExceptionHandler(ExceptionHandlerHandle);
|
||||||
|
ExceptionHandlerHandle = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (KernelBaseDll)
|
||||||
|
{
|
||||||
|
FreeLibrary(KernelBaseDll);
|
||||||
|
KernelBaseDll = nullptr;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
sigaction(SIGSEGV, &OldSaSegv, nullptr);
|
||||||
|
#ifdef __APPLE__
|
||||||
|
sigaction(SIGBUS, &OldSaBus, nullptr);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
bool ARMJIT_Memory::FaultHandler(FaultDescription& faultDesc, melonDS::NDS& nds)
|
bool ARMJIT_Memory::FaultHandler(FaultDescription& faultDesc, melonDS::NDS& nds)
|
||||||
{
|
{
|
||||||
if (nds.JIT.JITCompiler.IsJITFault(faultDesc.FaultPC))
|
if (nds.JIT.JITCompiler.IsJITFault(faultDesc.FaultPC))
|
||||||
|
@ -632,7 +822,7 @@ bool ARMJIT_Memory::FaultHandler(FaultDescription& faultDesc, melonDS::NDS& nds)
|
||||||
|
|
||||||
u8* memStatus = nds.CurCPU == 0 ? nds.JIT.Memory.MappingStatus9 : nds.JIT.Memory.MappingStatus7;
|
u8* memStatus = nds.CurCPU == 0 ? nds.JIT.Memory.MappingStatus9 : nds.JIT.Memory.MappingStatus7;
|
||||||
|
|
||||||
if (memStatus[faultDesc.EmulatedFaultAddr >> 12] == memstate_Unmapped)
|
if (memStatus[faultDesc.EmulatedFaultAddr >> PageShift] == memstate_Unmapped)
|
||||||
rewriteToSlowPath = !nds.JIT.Memory.MapAtAddress(faultDesc.EmulatedFaultAddr);
|
rewriteToSlowPath = !nds.JIT.Memory.MapAtAddress(faultDesc.EmulatedFaultAddr);
|
||||||
|
|
||||||
if (rewriteToSlowPath)
|
if (rewriteToSlowPath)
|
||||||
|
@ -643,10 +833,9 @@ bool ARMJIT_Memory::FaultHandler(FaultDescription& faultDesc, melonDS::NDS& nds)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u64 AddrSpaceSize = 0x100000000;
|
|
||||||
|
|
||||||
ARMJIT_Memory::ARMJIT_Memory(melonDS::NDS& nds) : NDS(nds)
|
ARMJIT_Memory::ARMJIT_Memory(melonDS::NDS& nds) : NDS(nds)
|
||||||
{
|
{
|
||||||
|
ARMJIT_Global::Init();
|
||||||
#if defined(__SWITCH__)
|
#if defined(__SWITCH__)
|
||||||
MemoryBase = (u8*)aligned_alloc(0x1000, MemoryTotalSize);
|
MemoryBase = (u8*)aligned_alloc(0x1000, MemoryTotalSize);
|
||||||
virtmemLock();
|
virtmemLock();
|
||||||
|
@ -671,33 +860,27 @@ ARMJIT_Memory::ARMJIT_Memory(melonDS::NDS& nds) : NDS(nds)
|
||||||
|
|
||||||
u8* basePtr = MemoryBaseCodeMem;
|
u8* basePtr = MemoryBaseCodeMem;
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
ExceptionHandlerHandle = AddVectoredExceptionHandler(1, ExceptionHandler);
|
if (virtualAlloc2Ptr)
|
||||||
|
{
|
||||||
|
MemoryFile = CreateFileMapping(INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE, 0, MemoryTotalSize, nullptr);
|
||||||
|
|
||||||
MemoryFile = CreateFileMapping(INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, MemoryTotalSize, NULL);
|
MemoryBase = reinterpret_cast<u8*>(virtualAlloc2Ptr(nullptr, nullptr, VirtmemAreaSize,
|
||||||
|
MEM_RESERVE | MEM_RESERVE_PLACEHOLDER,
|
||||||
|
PAGE_NOACCESS,
|
||||||
|
nullptr, 0));
|
||||||
|
// split off placeholder and map base mapping
|
||||||
|
VirtualFree(MemoryBase, MemoryTotalSize, MEM_RELEASE|MEM_PRESERVE_PLACEHOLDER);
|
||||||
|
mapViewOfFile3Ptr(MemoryFile, nullptr, MemoryBase, 0, MemoryTotalSize, MEM_REPLACE_PLACEHOLDER, PAGE_READWRITE, nullptr, 0);
|
||||||
|
|
||||||
MemoryBase = (u8*)VirtualAlloc(NULL, AddrSpaceSize*4, MEM_RESERVE, PAGE_READWRITE);
|
VirtmemPlaceholders.push_back({reinterpret_cast<uintptr_t>(MemoryBase)+MemoryTotalSize, AddrSpaceSize*2});
|
||||||
VirtualFree(MemoryBase, 0, MEM_RELEASE);
|
}
|
||||||
// this is incredible hacky
|
else
|
||||||
// but someone else is trying to go into our address space!
|
{
|
||||||
// Windows will very likely give them virtual memory starting at the same address
|
// old Windows version
|
||||||
// as it is giving us now.
|
MemoryBase = new u8[MemoryTotalSize];
|
||||||
// That's why we don't use this address, but instead 4gb inwards
|
}
|
||||||
// I know this is terrible
|
|
||||||
FastMem9Start = MemoryBase + AddrSpaceSize;
|
|
||||||
FastMem7Start = MemoryBase + AddrSpaceSize*2;
|
|
||||||
MemoryBase = MemoryBase + AddrSpaceSize*3;
|
|
||||||
|
|
||||||
MapViewOfFileEx(MemoryFile, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, MemoryTotalSize, MemoryBase);
|
|
||||||
#else
|
#else
|
||||||
// this used to be allocated with three different mmaps
|
MemoryBase = (u8*)mmap(nullptr, VirtmemAreaSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||||
// The idea was to give the OS more freedom where to position the buffers,
|
|
||||||
// but something was bad about this so instead we take this vmem eating monster
|
|
||||||
// which seems to work better.
|
|
||||||
MemoryBase = (u8*)mmap(NULL, AddrSpaceSize*4, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
|
|
||||||
munmap(MemoryBase, AddrSpaceSize*4);
|
|
||||||
FastMem9Start = MemoryBase;
|
|
||||||
FastMem7Start = MemoryBase + AddrSpaceSize;
|
|
||||||
MemoryBase = MemoryBase + AddrSpaceSize*2;
|
|
||||||
|
|
||||||
#if defined(__ANDROID__)
|
#if defined(__ANDROID__)
|
||||||
Libandroid = Platform::DynamicLibrary_Load("libandroid.so");
|
Libandroid = Platform::DynamicLibrary_Load("libandroid.so");
|
||||||
|
@ -730,20 +913,10 @@ ARMJIT_Memory::ARMJIT_Memory(melonDS::NDS& nds) : NDS(nds)
|
||||||
Log(LogLevel::Error, "Failed to allocate memory using ftruncate! (%s)", strerror(errno));
|
Log(LogLevel::Error, "Failed to allocate memory using ftruncate! (%s)", strerror(errno));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct sigaction sa;
|
|
||||||
sa.sa_handler = nullptr;
|
|
||||||
sa.sa_sigaction = &SigsegvHandler;
|
|
||||||
sa.sa_flags = SA_SIGINFO;
|
|
||||||
sigemptyset(&sa.sa_mask);
|
|
||||||
sigaction(SIGSEGV, &sa, &OldSaSegv);
|
|
||||||
#ifdef __APPLE__
|
|
||||||
sigaction(SIGBUS, &sa, &OldSaBus);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0);
|
mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0);
|
||||||
|
|
||||||
u8* basePtr = MemoryBase;
|
|
||||||
#endif
|
#endif
|
||||||
|
FastMem9Start = MemoryBase+MemoryTotalSize;
|
||||||
|
FastMem7Start = static_cast<u8*>(FastMem9Start)+AddrSpaceSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
ARMJIT_Memory::~ARMJIT_Memory() noexcept
|
ARMJIT_Memory::~ARMJIT_Memory() noexcept
|
||||||
|
@ -764,34 +937,37 @@ ARMJIT_Memory::~ARMJIT_Memory() noexcept
|
||||||
free(MemoryBase);
|
free(MemoryBase);
|
||||||
MemoryBase = nullptr;
|
MemoryBase = nullptr;
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
if (MemoryBase)
|
if (virtualAlloc2Ptr)
|
||||||
{
|
{
|
||||||
bool viewUnmapped = UnmapViewOfFile(MemoryBase);
|
if (MemoryBase)
|
||||||
assert(viewUnmapped);
|
{
|
||||||
MemoryBase = nullptr;
|
bool viewUnmapped = UnmapViewOfFileEx(MemoryBase, MEM_PRESERVE_PLACEHOLDER);
|
||||||
FastMem9Start = nullptr;
|
assert(viewUnmapped);
|
||||||
FastMem7Start = nullptr;
|
bool viewCoalesced = VirtualFree(MemoryBase, VirtmemAreaSize, MEM_RELEASE|MEM_COALESCE_PLACEHOLDERS);
|
||||||
}
|
assert(viewCoalesced);
|
||||||
|
bool freeEverything = VirtualFree(MemoryBase, 0, MEM_RELEASE);
|
||||||
|
assert(freeEverything);
|
||||||
|
|
||||||
if (MemoryFile)
|
MemoryBase = nullptr;
|
||||||
{
|
FastMem9Start = nullptr;
|
||||||
CloseHandle(MemoryFile);
|
FastMem7Start = nullptr;
|
||||||
MemoryFile = INVALID_HANDLE_VALUE;
|
printf("unmappinged everything\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ExceptionHandlerHandle)
|
if (MemoryFile)
|
||||||
|
{
|
||||||
|
CloseHandle(MemoryFile);
|
||||||
|
MemoryFile = INVALID_HANDLE_VALUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
RemoveVectoredExceptionHandler(ExceptionHandlerHandle);
|
delete[] MemoryBase;
|
||||||
ExceptionHandlerHandle = nullptr;
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
sigaction(SIGSEGV, &OldSaSegv, nullptr);
|
|
||||||
#ifdef __APPLE__
|
|
||||||
sigaction(SIGBUS, &OldSaBus, nullptr);
|
|
||||||
#endif
|
|
||||||
if (MemoryBase)
|
if (MemoryBase)
|
||||||
{
|
{
|
||||||
munmap(MemoryBase, MemoryTotalSize);
|
munmap(MemoryBase, VirtmemAreaSize);
|
||||||
MemoryBase = nullptr;
|
MemoryBase = nullptr;
|
||||||
FastMem9Start = nullptr;
|
FastMem9Start = nullptr;
|
||||||
FastMem7Start = nullptr;
|
FastMem7Start = nullptr;
|
||||||
|
@ -812,6 +988,8 @@ ARMJIT_Memory::~ARMJIT_Memory() noexcept
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
ARMJIT_Global::DeInit();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ARMJIT_Memory::Reset() noexcept
|
void ARMJIT_Memory::Reset() noexcept
|
||||||
|
@ -834,17 +1012,6 @@ void ARMJIT_Memory::Reset() noexcept
|
||||||
|
|
||||||
bool ARMJIT_Memory::IsFastmemCompatible(int region) const noexcept
|
bool ARMJIT_Memory::IsFastmemCompatible(int region) const noexcept
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
|
||||||
/*
|
|
||||||
TODO: with some hacks, the smaller shared WRAM regions
|
|
||||||
could be mapped in some occaisons as well
|
|
||||||
*/
|
|
||||||
if (region == memregion_DTCM
|
|
||||||
|| region == memregion_SharedWRAM
|
|
||||||
|| region == memregion_NewSharedWRAM_B
|
|
||||||
|| region == memregion_NewSharedWRAM_C)
|
|
||||||
return false;
|
|
||||||
#endif
|
|
||||||
return OffsetsPerRegion[region] != UINT32_MAX;
|
return OffsetsPerRegion[region] != UINT32_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include "MemConstants.h"
|
#include "MemConstants.h"
|
||||||
|
|
||||||
#ifdef JIT_ENABLED
|
#ifdef JIT_ENABLED
|
||||||
|
# include <mutex>
|
||||||
# include "TinyVector.h"
|
# include "TinyVector.h"
|
||||||
# include "ARM.h"
|
# include "ARM.h"
|
||||||
# if defined(__SWITCH__)
|
# if defined(__SWITCH__)
|
||||||
|
@ -48,23 +49,22 @@ class Compiler;
|
||||||
class ARMJIT;
|
class ARMJIT;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static constexpr u32 LargePageSize = 0x4000;
|
||||||
|
static constexpr u32 RegularPageSize = 0x1000;
|
||||||
|
|
||||||
constexpr u32 RoundUp(u32 size) noexcept
|
constexpr u32 RoundUp(u32 size) noexcept
|
||||||
{
|
{
|
||||||
#ifdef _WIN32
|
return (size + LargePageSize - 1) & ~(LargePageSize - 1);
|
||||||
return (size + 0xFFFF) & ~0xFFFF;
|
|
||||||
#else
|
|
||||||
return size;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 MemBlockMainRAMOffset = 0;
|
static constexpr u32 MemBlockMainRAMOffset = 0;
|
||||||
const u32 MemBlockSWRAMOffset = RoundUp(MainRAMMaxSize);
|
static constexpr u32 MemBlockSWRAMOffset = RoundUp(MainRAMMaxSize);
|
||||||
const u32 MemBlockARM7WRAMOffset = MemBlockSWRAMOffset + RoundUp(SharedWRAMSize);
|
static constexpr u32 MemBlockARM7WRAMOffset = MemBlockSWRAMOffset + RoundUp(SharedWRAMSize);
|
||||||
const u32 MemBlockDTCMOffset = MemBlockARM7WRAMOffset + RoundUp(ARM7WRAMSize);
|
static constexpr u32 MemBlockDTCMOffset = MemBlockARM7WRAMOffset + RoundUp(ARM7WRAMSize);
|
||||||
const u32 MemBlockNWRAM_AOffset = MemBlockDTCMOffset + RoundUp(DTCMPhysicalSize);
|
static constexpr u32 MemBlockNWRAM_AOffset = MemBlockDTCMOffset + RoundUp(DTCMPhysicalSize);
|
||||||
const u32 MemBlockNWRAM_BOffset = MemBlockNWRAM_AOffset + RoundUp(NWRAMSize);
|
static constexpr u32 MemBlockNWRAM_BOffset = MemBlockNWRAM_AOffset + RoundUp(NWRAMSize);
|
||||||
const u32 MemBlockNWRAM_COffset = MemBlockNWRAM_BOffset + RoundUp(NWRAMSize);
|
static constexpr u32 MemBlockNWRAM_COffset = MemBlockNWRAM_BOffset + RoundUp(NWRAMSize);
|
||||||
const u32 MemoryTotalSize = MemBlockNWRAM_COffset + RoundUp(NWRAMSize);
|
static constexpr u32 MemoryTotalSize = MemBlockNWRAM_COffset + RoundUp(NWRAMSize);
|
||||||
|
|
||||||
class ARMJIT_Memory
|
class ARMJIT_Memory
|
||||||
{
|
{
|
||||||
|
@ -137,6 +137,14 @@ public:
|
||||||
bool IsFastmemCompatible(int region) const noexcept;
|
bool IsFastmemCompatible(int region) const noexcept;
|
||||||
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) const noexcept;
|
void* GetFuncForAddr(ARM* cpu, u32 addr, bool store, int size) const noexcept;
|
||||||
bool MapAtAddress(u32 addr) noexcept;
|
bool MapAtAddress(u32 addr) noexcept;
|
||||||
|
|
||||||
|
static bool IsFastMemSupported();
|
||||||
|
|
||||||
|
static void RegisterFaultHandler();
|
||||||
|
static void UnregisterFaultHandler();
|
||||||
|
|
||||||
|
static u32 PageSize;
|
||||||
|
static u32 PageShift;
|
||||||
private:
|
private:
|
||||||
friend class Compiler;
|
friend class Compiler;
|
||||||
struct Mapping
|
struct Mapping
|
||||||
|
@ -162,14 +170,22 @@ private:
|
||||||
void* FastMem9Start;
|
void* FastMem9Start;
|
||||||
void* FastMem7Start;
|
void* FastMem7Start;
|
||||||
u8* MemoryBase = nullptr;
|
u8* MemoryBase = nullptr;
|
||||||
|
|
||||||
#if defined(__SWITCH__)
|
#if defined(__SWITCH__)
|
||||||
VirtmemReservation* FastMem9Reservation, *FastMem7Reservation;
|
VirtmemReservation* FastMem9Reservation, *FastMem7Reservation;
|
||||||
u8* MemoryBaseCodeMem;
|
u8* MemoryBaseCodeMem;
|
||||||
#elif defined(_WIN32)
|
#elif defined(_WIN32)
|
||||||
|
struct VirtmemPlaceholder
|
||||||
|
{
|
||||||
|
uintptr_t Start;
|
||||||
|
size_t Size;
|
||||||
|
};
|
||||||
|
std::vector<VirtmemPlaceholder> VirtmemPlaceholders;
|
||||||
|
|
||||||
static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo);
|
static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo);
|
||||||
HANDLE MemoryFile = INVALID_HANDLE_VALUE;
|
HANDLE MemoryFile = INVALID_HANDLE_VALUE;
|
||||||
LPVOID ExceptionHandlerHandle = nullptr;
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext);
|
static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext);
|
||||||
int MemoryFile = -1;
|
int MemoryFile = -1;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -176,9 +176,9 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
|
||||||
else
|
else
|
||||||
MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste
|
MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste
|
||||||
if (Num == 0)
|
if (Num == 0)
|
||||||
CALL((void*)&ARMv5JumpToTrampoline);
|
ABI_CallFunction(ARMv5JumpToTrampoline);
|
||||||
else
|
else
|
||||||
CALL((void*)&ARMv4JumpToTrampoline);
|
ABI_CallFunction(ARMv4JumpToTrampoline);
|
||||||
|
|
||||||
PopRegs(restoreCPSR, true);
|
PopRegs(restoreCPSR, true);
|
||||||
|
|
||||||
|
|
|
@ -21,19 +21,13 @@
|
||||||
#include "../ARMJIT.h"
|
#include "../ARMJIT.h"
|
||||||
#include "../ARMInterpreter.h"
|
#include "../ARMInterpreter.h"
|
||||||
#include "../NDS.h"
|
#include "../NDS.h"
|
||||||
|
#include "../ARMJIT_Global.h"
|
||||||
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
|
|
||||||
#include "../dolphin/CommonFuncs.h"
|
#include "../dolphin/CommonFuncs.h"
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
#include <windows.h>
|
|
||||||
#else
|
|
||||||
#include <sys/mman.h>
|
|
||||||
#include <unistd.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
using namespace Gen;
|
using namespace Gen;
|
||||||
using namespace Common;
|
using namespace Common;
|
||||||
|
|
||||||
|
@ -222,46 +216,21 @@ void Compiler::A_Comp_MSR()
|
||||||
MOV(32, R(ABI_PARAM3), R(RCPSR));
|
MOV(32, R(ABI_PARAM3), R(RCPSR));
|
||||||
MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
|
MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
|
||||||
MOV(64, R(ABI_PARAM1), R(RCPU));
|
MOV(64, R(ABI_PARAM1), R(RCPU));
|
||||||
CALL((void*)&UpdateModeTrampoline);
|
ABI_CallFunction(UpdateModeTrampoline);
|
||||||
|
|
||||||
PopRegs(true, true);
|
PopRegs(true, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
We'll repurpose this .bss memory
|
|
||||||
|
|
||||||
*/
|
|
||||||
u8 CodeMemory[1024 * 1024 * 32];
|
|
||||||
|
|
||||||
Compiler::Compiler(melonDS::NDS& nds) : XEmitter(), NDS(nds)
|
Compiler::Compiler(melonDS::NDS& nds) : XEmitter(), NDS(nds)
|
||||||
{
|
{
|
||||||
{
|
ARMJIT_Global::Init();
|
||||||
#ifdef _WIN32
|
|
||||||
SYSTEM_INFO sysInfo;
|
|
||||||
GetSystemInfo(&sysInfo);
|
|
||||||
|
|
||||||
u64 pageSize = (u64)sysInfo.dwPageSize;
|
CodeMemBase = static_cast<u8*>(ARMJIT_Global::AllocateCodeMem());
|
||||||
#else
|
CodeMemSize = ARMJIT_Global::CodeMemorySliceSize;
|
||||||
u64 pageSize = sysconf(_SC_PAGE_SIZE);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
u8* pageAligned = (u8*)(((u64)CodeMemory & ~(pageSize - 1)) + pageSize);
|
ResetStart = CodeMemBase;
|
||||||
u64 alignedSize = (((u64)CodeMemory + sizeof(CodeMemory)) & ~(pageSize - 1)) - (u64)pageAligned;
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
|
||||||
DWORD dummy;
|
|
||||||
VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy);
|
|
||||||
#elif defined(__APPLE__)
|
|
||||||
pageAligned = (u8*)mmap(NULL, 1024*1024*32, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS ,-1, 0);
|
|
||||||
#else
|
|
||||||
mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ResetStart = pageAligned;
|
|
||||||
CodeMemSize = alignedSize;
|
|
||||||
}
|
|
||||||
|
|
||||||
Reset();
|
Reset();
|
||||||
|
|
||||||
|
@ -475,6 +444,13 @@ Compiler::Compiler(melonDS::NDS& nds) : XEmitter(), NDS(nds)
|
||||||
FarSize = (ResetStart + CodeMemSize) - FarStart;
|
FarSize = (ResetStart + CodeMemSize) - FarStart;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Compiler::~Compiler()
|
||||||
|
{
|
||||||
|
ARMJIT_Global::FreeCodeMem(CodeMemBase);
|
||||||
|
|
||||||
|
ARMJIT_Global::DeInit();
|
||||||
|
}
|
||||||
|
|
||||||
void Compiler::LoadCPSR()
|
void Compiler::LoadCPSR()
|
||||||
{
|
{
|
||||||
assert(!CPSRDirty);
|
assert(!CPSRDirty);
|
||||||
|
@ -684,7 +660,7 @@ void Compiler::Comp_SpecialBranchBehaviour(bool taken)
|
||||||
|
|
||||||
if (ConstantCycles)
|
if (ConstantCycles)
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
|
||||||
JMP((u8*)&ARM_Ret, true);
|
ABI_TailCall(ARM_Ret);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -846,7 +822,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
|
||||||
|
|
||||||
if (ConstantCycles)
|
if (ConstantCycles)
|
||||||
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
|
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
|
||||||
JMP((u8*)ARM_Ret, true);
|
ABI_TailCall(ARM_Ret);
|
||||||
|
|
||||||
#ifdef JIT_PROFILING_ENABLED
|
#ifdef JIT_PROFILING_ENABLED
|
||||||
CreateMethod("JIT_Block_%d_%d_%08X", (void*)res, Num, Thumb, instrs[0].Addr);
|
CreateMethod("JIT_Block_%d_%d_%08X", (void*)res, Num, Thumb, instrs[0].Addr);
|
||||||
|
|
|
@ -84,6 +84,7 @@ class Compiler : public Gen::XEmitter
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
explicit Compiler(melonDS::NDS& nds);
|
explicit Compiler(melonDS::NDS& nds);
|
||||||
|
~Compiler();
|
||||||
|
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
||||||
|
@ -256,6 +257,7 @@ public:
|
||||||
|
|
||||||
std::unordered_map<u8*, LoadStorePatch> LoadStorePatches {};
|
std::unordered_map<u8*, LoadStorePatch> LoadStorePatches {};
|
||||||
|
|
||||||
|
u8* CodeMemBase;
|
||||||
u8* ResetStart {};
|
u8* ResetStart {};
|
||||||
u32 CodeMemSize {};
|
u32 CodeMemSize {};
|
||||||
|
|
||||||
|
|
|
@ -316,24 +316,24 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
|
||||||
{
|
{
|
||||||
switch (size | NDS.ConsoleType)
|
switch (size | NDS.ConsoleType)
|
||||||
{
|
{
|
||||||
case 32: CALL((void*)&SlowWrite9<u32, 0>); break;
|
case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break;
|
||||||
case 16: CALL((void*)&SlowWrite9<u16, 0>); break;
|
case 16: ABI_CallFunction(SlowWrite9<u16, 0>); break;
|
||||||
case 8: CALL((void*)&SlowWrite9<u8, 0>); break;
|
case 8: ABI_CallFunction(&SlowWrite9<u8, 0>); break;
|
||||||
case 33: CALL((void*)&SlowWrite9<u32, 1>); break;
|
case 33: ABI_CallFunction(&SlowWrite9<u32, 1>); break;
|
||||||
case 17: CALL((void*)&SlowWrite9<u16, 1>); break;
|
case 17: ABI_CallFunction(&SlowWrite9<u16, 1>); break;
|
||||||
case 9: CALL((void*)&SlowWrite9<u8, 1>); break;
|
case 9: ABI_CallFunction(&SlowWrite9<u8, 1>); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
switch (size | NDS.ConsoleType)
|
switch (size | NDS.ConsoleType)
|
||||||
{
|
{
|
||||||
case 32: CALL((void*)&SlowRead9<u32, 0>); break;
|
case 32: ABI_CallFunction(&SlowRead9<u32, 0>); break;
|
||||||
case 16: CALL((void*)&SlowRead9<u16, 0>); break;
|
case 16: ABI_CallFunction(&SlowRead9<u16, 0>); break;
|
||||||
case 8: CALL((void*)&SlowRead9<u8, 0>); break;
|
case 8: ABI_CallFunction(&SlowRead9<u8, 0>); break;
|
||||||
case 33: CALL((void*)&SlowRead9<u32, 1>); break;
|
case 33: ABI_CallFunction(&SlowRead9<u32, 1>); break;
|
||||||
case 17: CALL((void*)&SlowRead9<u16, 1>); break;
|
case 17: ABI_CallFunction(&SlowRead9<u16, 1>); break;
|
||||||
case 9: CALL((void*)&SlowRead9<u8, 1>); break;
|
case 9: ABI_CallFunction(&SlowRead9<u8, 1>); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -347,24 +347,24 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
|
||||||
|
|
||||||
switch (size | NDS.ConsoleType)
|
switch (size | NDS.ConsoleType)
|
||||||
{
|
{
|
||||||
case 32: CALL((void*)&SlowWrite7<u32, 0>); break;
|
case 32: ABI_CallFunction(&SlowWrite7<u32, 0>); break;
|
||||||
case 16: CALL((void*)&SlowWrite7<u16, 0>); break;
|
case 16: ABI_CallFunction(&SlowWrite7<u16, 0>); break;
|
||||||
case 8: CALL((void*)&SlowWrite7<u8, 0>); break;
|
case 8: ABI_CallFunction(&SlowWrite7<u8, 0>); break;
|
||||||
case 33: CALL((void*)&SlowWrite7<u32, 1>); break;
|
case 33: ABI_CallFunction(&SlowWrite7<u32, 1>); break;
|
||||||
case 17: CALL((void*)&SlowWrite7<u16, 1>); break;
|
case 17: ABI_CallFunction(&SlowWrite7<u16, 1>); break;
|
||||||
case 9: CALL((void*)&SlowWrite7<u8, 1>); break;
|
case 9: ABI_CallFunction(&SlowWrite7<u8, 1>); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
switch (size | NDS.ConsoleType)
|
switch (size | NDS.ConsoleType)
|
||||||
{
|
{
|
||||||
case 32: CALL((void*)&SlowRead7<u32, 0>); break;
|
case 32: ABI_CallFunction(&SlowRead7<u32, 0>); break;
|
||||||
case 16: CALL((void*)&SlowRead7<u16, 0>); break;
|
case 16: ABI_CallFunction(&SlowRead7<u16, 0>); break;
|
||||||
case 8: CALL((void*)&SlowRead7<u8, 0>); break;
|
case 8: ABI_CallFunction(&SlowRead7<u8, 0>); break;
|
||||||
case 33: CALL((void*)&SlowRead7<u32, 1>); break;
|
case 33: ABI_CallFunction(&SlowRead7<u32, 1>); break;
|
||||||
case 17: CALL((void*)&SlowRead7<u16, 1>); break;
|
case 17: ABI_CallFunction(&SlowRead7<u16, 1>); break;
|
||||||
case 9: CALL((void*)&SlowRead7<u8, 1>); break;
|
case 9: ABI_CallFunction(&SlowRead7<u8, 1>); break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -526,10 +526,10 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
||||||
|
|
||||||
switch (Num * 2 | NDS.ConsoleType)
|
switch (Num * 2 | NDS.ConsoleType)
|
||||||
{
|
{
|
||||||
case 0: CALL((void*)&SlowBlockTransfer9<false, 0>); break;
|
case 0: ABI_CallFunction(&SlowBlockTransfer9<false, 0>); break;
|
||||||
case 1: CALL((void*)&SlowBlockTransfer9<false, 1>); break;
|
case 1: ABI_CallFunction(&SlowBlockTransfer9<false, 1>); break;
|
||||||
case 2: CALL((void*)&SlowBlockTransfer7<false, 0>); break;
|
case 2: ABI_CallFunction(&SlowBlockTransfer7<false, 0>); break;
|
||||||
case 3: CALL((void*)&SlowBlockTransfer7<false, 1>); break;
|
case 3: ABI_CallFunction(&SlowBlockTransfer7<false, 1>); break;
|
||||||
}
|
}
|
||||||
|
|
||||||
PopRegs(false, false);
|
PopRegs(false, false);
|
||||||
|
@ -630,10 +630,10 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
|
||||||
|
|
||||||
switch (Num * 2 | NDS.ConsoleType)
|
switch (Num * 2 | NDS.ConsoleType)
|
||||||
{
|
{
|
||||||
case 0: CALL((void*)&SlowBlockTransfer9<true, 0>); break;
|
case 0: ABI_CallFunction(&SlowBlockTransfer9<true, 0>); break;
|
||||||
case 1: CALL((void*)&SlowBlockTransfer9<true, 1>); break;
|
case 1: ABI_CallFunction(&SlowBlockTransfer9<true, 1>); break;
|
||||||
case 2: CALL((void*)&SlowBlockTransfer7<true, 0>); break;
|
case 2: ABI_CallFunction(&SlowBlockTransfer7<true, 0>); break;
|
||||||
case 3: CALL((void*)&SlowBlockTransfer7<true, 1>); break;
|
case 3: ABI_CallFunction(&SlowBlockTransfer7<true, 1>); break;
|
||||||
}
|
}
|
||||||
|
|
||||||
ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
|
ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));
|
||||||
|
|
|
@ -97,8 +97,13 @@ if (ENABLE_JIT)
|
||||||
|
|
||||||
ARMJIT.cpp
|
ARMJIT.cpp
|
||||||
ARMJIT_Memory.cpp
|
ARMJIT_Memory.cpp
|
||||||
|
ARMJIT_Global.cpp
|
||||||
|
|
||||||
dolphin/CommonFuncs.cpp)
|
dolphin/CommonFuncs.cpp)
|
||||||
|
|
||||||
|
if (WIN32)
|
||||||
|
target_link_libraries(core PRIVATE onecore)
|
||||||
|
endif()
|
||||||
|
|
||||||
if (ARCHITECTURE STREQUAL x86_64)
|
if (ARCHITECTURE STREQUAL x86_64)
|
||||||
target_sources(core PRIVATE
|
target_sources(core PRIVATE
|
||||||
|
|
|
@ -74,7 +74,7 @@ const s32 kIterationCycleMargin = 8;
|
||||||
//
|
//
|
||||||
// timings for GBA slot and wifi are set up at runtime
|
// timings for GBA slot and wifi are set up at runtime
|
||||||
|
|
||||||
NDS* NDS::Current = nullptr;
|
thread_local NDS* NDS::Current = nullptr;
|
||||||
|
|
||||||
NDS::NDS() noexcept :
|
NDS::NDS() noexcept :
|
||||||
NDS(
|
NDS(
|
||||||
|
@ -128,6 +128,7 @@ NDS::NDS(NDSArgs&& args, int type, void* userdata) noexcept :
|
||||||
MainRAM = JIT.Memory.GetMainRAM();
|
MainRAM = JIT.Memory.GetMainRAM();
|
||||||
SharedWRAM = JIT.Memory.GetSharedWRAM();
|
SharedWRAM = JIT.Memory.GetSharedWRAM();
|
||||||
ARM7WRAM = JIT.Memory.GetARM7WRAM();
|
ARM7WRAM = JIT.Memory.GetARM7WRAM();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NDS::~NDS() noexcept
|
NDS::~NDS() noexcept
|
||||||
|
@ -894,6 +895,8 @@ void NDS::RunSystemSleep(u64 timestamp)
|
||||||
template <CPUExecuteMode cpuMode>
|
template <CPUExecuteMode cpuMode>
|
||||||
u32 NDS::RunFrame()
|
u32 NDS::RunFrame()
|
||||||
{
|
{
|
||||||
|
Current = this;
|
||||||
|
|
||||||
FrameStartTimestamp = SysTimestamp;
|
FrameStartTimestamp = SysTimestamp;
|
||||||
|
|
||||||
GPU.TotalScanlines = 0;
|
GPU.TotalScanlines = 0;
|
||||||
|
|
|
@ -541,8 +541,8 @@ public:
|
||||||
NDS& operator=(const NDS&) = delete;
|
NDS& operator=(const NDS&) = delete;
|
||||||
NDS(NDS&&) = delete;
|
NDS(NDS&&) = delete;
|
||||||
NDS& operator=(NDS&&) = delete;
|
NDS& operator=(NDS&&) = delete;
|
||||||
// The frontend should set and unset this manually after creating and destroying the NDS object.
|
|
||||||
[[deprecated("Temporary workaround until JIT code generation is revised to accommodate multiple NDS objects.")]] static NDS* Current;
|
static thread_local NDS* Current;
|
||||||
protected:
|
protected:
|
||||||
explicit NDS(NDSArgs&& args, int type, void* userdata) noexcept;
|
explicit NDS(NDSArgs&& args, int type, void* userdata) noexcept;
|
||||||
virtual void DoSavestateExtra(Savestate* file) {}
|
virtual void DoSavestateExtra(Savestate* file) {}
|
||||||
|
|
|
@ -1019,6 +1019,28 @@ public:
|
||||||
CALL(ptr);
|
CALL(ptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
template <typename FunctionPointer>
|
||||||
|
void ABI_TailCall(FunctionPointer func)
|
||||||
|
{
|
||||||
|
static_assert(std::is_pointer<FunctionPointer>() &&
|
||||||
|
std::is_function<std::remove_pointer_t<FunctionPointer>>(),
|
||||||
|
"Supplied type must be a function pointer.");
|
||||||
|
|
||||||
|
const u8* ptr = reinterpret_cast<const u8*>(func);
|
||||||
|
const u64 address = reinterpret_cast<u64>(ptr);
|
||||||
|
const u64 distance = address - (reinterpret_cast<u64>(code) + 5);
|
||||||
|
|
||||||
|
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL)
|
||||||
|
{
|
||||||
|
// Far call
|
||||||
|
MOV(64, R(RAX), Imm64(address));
|
||||||
|
JMPptr(R(RAX));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
JMP(ptr, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename FunctionPointer>
|
template <typename FunctionPointer>
|
||||||
void ABI_CallFunctionC16(FunctionPointer func, u16 param1)
|
void ABI_CallFunctionC16(FunctionPointer func, u16 param1)
|
||||||
|
|
|
@ -165,7 +165,6 @@ EmuInstance::~EmuInstance()
|
||||||
audioDeInit();
|
audioDeInit();
|
||||||
inputDeInit();
|
inputDeInit();
|
||||||
|
|
||||||
NDS::Current = nullptr;
|
|
||||||
if (nds)
|
if (nds)
|
||||||
{
|
{
|
||||||
saveRTCData();
|
saveRTCData();
|
||||||
|
@ -1339,7 +1338,6 @@ bool EmuInstance::updateConsole() noexcept
|
||||||
renderLock.lock();
|
renderLock.lock();
|
||||||
if ((!nds) || (consoleType != nds->ConsoleType))
|
if ((!nds) || (consoleType != nds->ConsoleType))
|
||||||
{
|
{
|
||||||
NDS::Current = nullptr;
|
|
||||||
if (nds)
|
if (nds)
|
||||||
{
|
{
|
||||||
saveRTCData();
|
saveRTCData();
|
||||||
|
@ -1351,7 +1349,6 @@ bool EmuInstance::updateConsole() noexcept
|
||||||
else
|
else
|
||||||
nds = new NDS(std::move(ndsargs), this);
|
nds = new NDS(std::move(ndsargs), this);
|
||||||
|
|
||||||
NDS::Current = nds;
|
|
||||||
nds->Reset();
|
nds->Reset();
|
||||||
loadRTCData();
|
loadRTCData();
|
||||||
//emuThread->updateVideoRenderer(); // not actually needed?
|
//emuThread->updateVideoRenderer(); // not actually needed?
|
||||||
|
|
|
@ -82,9 +82,6 @@ EmuSettingsDialog::EmuSettingsDialog(QWidget* parent) : QDialog(parent), ui(new
|
||||||
ui->chkJITBranchOptimisations->setChecked(cfg.GetBool("JIT.BranchOptimisations"));
|
ui->chkJITBranchOptimisations->setChecked(cfg.GetBool("JIT.BranchOptimisations"));
|
||||||
ui->chkJITLiteralOptimisations->setChecked(cfg.GetBool("JIT.LiteralOptimisations"));
|
ui->chkJITLiteralOptimisations->setChecked(cfg.GetBool("JIT.LiteralOptimisations"));
|
||||||
ui->chkJITFastMemory->setChecked(cfg.GetBool("JIT.FastMemory"));
|
ui->chkJITFastMemory->setChecked(cfg.GetBool("JIT.FastMemory"));
|
||||||
#ifdef __APPLE__
|
|
||||||
ui->chkJITFastMemory->setDisabled(true);
|
|
||||||
#endif
|
|
||||||
ui->spnJITMaximumBlockSize->setValue(cfg.GetInt("JIT.MaxBlockSize"));
|
ui->spnJITMaximumBlockSize->setValue(cfg.GetInt("JIT.MaxBlockSize"));
|
||||||
#else
|
#else
|
||||||
ui->chkEnableJIT->setDisabled(true);
|
ui->chkEnableJIT->setDisabled(true);
|
||||||
|
@ -541,9 +538,7 @@ void EmuSettingsDialog::on_chkEnableJIT_toggled()
|
||||||
bool disabled = !ui->chkEnableJIT->isChecked();
|
bool disabled = !ui->chkEnableJIT->isChecked();
|
||||||
ui->chkJITBranchOptimisations->setDisabled(disabled);
|
ui->chkJITBranchOptimisations->setDisabled(disabled);
|
||||||
ui->chkJITLiteralOptimisations->setDisabled(disabled);
|
ui->chkJITLiteralOptimisations->setDisabled(disabled);
|
||||||
#ifndef __APPLE__
|
ui->chkJITFastMemory->setDisabled(disabled || !ARMJIT_Memory::IsFastMemSupported());
|
||||||
ui->chkJITFastMemory->setDisabled(disabled);
|
|
||||||
#endif
|
|
||||||
ui->spnJITMaximumBlockSize->setDisabled(disabled);
|
ui->spnJITMaximumBlockSize->setDisabled(disabled);
|
||||||
|
|
||||||
on_cbGdbEnabled_toggled();
|
on_cbGdbEnabled_toggled();
|
||||||
|
|
Loading…
Reference in New Issue