works on Linux x64

still needs to be fixed for everything else
This commit is contained in:
RSDuck 2024-10-28 01:52:06 +01:00
parent 58ab33210a
commit 2f6a740b65
13 changed files with 227 additions and 89 deletions

View File

@ -30,6 +30,7 @@
#include "ARMJIT_Internal.h"
#include "ARMJIT_Memory.h"
#include "ARMJIT_Compiler.h"
#include "ARMJIT_CodeMem.h"
#include "ARMInterpreter_ALU.h"
#include "ARMInterpreter_LoadStore.h"
@ -467,6 +468,18 @@ InterpreterFunc InterpretTHUMB[ARMInstrInfo::tk_Count] =
};
#undef F
ARMJIT::ARMJIT(melonDS::NDS& nds, std::optional<JITArgs> jit) noexcept :
NDS(nds),
Memory(nds),
JITCompiler(nds),
MaxBlockSize(jit.has_value() ? std::clamp(jit->MaxBlockSize, 1u, 32u) : 32),
LiteralOptimizations(jit.has_value() ? jit->LiteralOptimizations : false),
BranchOptimizations(jit.has_value() ? jit->BranchOptimizations : false),
FastMemory(jit.has_value() ? jit->FastMemory : false)
{
ARMJIT_CodeMem::Init();
}
void ARMJIT::RetireJitBlock(JitBlock* block) noexcept
{
auto it = RestoreCandidates.find(block->InstrHash);

View File

@ -44,15 +44,7 @@ class JitBlock;
class ARMJIT
{
public:
ARMJIT(melonDS::NDS& nds, std::optional<JITArgs> jit) noexcept :
NDS(nds),
Memory(nds),
JITCompiler(nds),
MaxBlockSize(jit.has_value() ? std::clamp(jit->MaxBlockSize, 1u, 32u) : 32),
LiteralOptimizations(jit.has_value() ? jit->LiteralOptimizations : false),
BranchOptimizations(jit.has_value() ? jit->BranchOptimizations : false),
FastMemory(jit.has_value() ? jit->FastMemory : false)
{}
ARMJIT(melonDS::NDS& nds, std::optional<JITArgs> jit) noexcept;
~ARMJIT() noexcept;
void InvalidateByAddr(u32) noexcept;
void CheckAndInvalidateWVRAM(int) noexcept;

93
src/ARMJIT_CodeMem.cpp Normal file
View File

@ -0,0 +1,93 @@
#include "ARMJIT_CodeMem.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#include <unistd.h>
#endif
#include <stdio.h>
#include <mutex>
namespace melonDS
{
namespace ARMJIT_CodeMem
{
std::mutex codeMemoryMutex;
static constexpr size_t NumCodeMemSlices = 4;
// I haven't heard of pages larger than 16 KB
alignas(16*1024) u8 CodeMemory[NumCodeMemSlices * CodeMemorySliceSize];
u32 AvailableCodeMemSlices = (1 << NumCodeMemSlices) - 1;
int RefCounter = 0;
void* Allocate()
{
std::lock_guard guard(codeMemoryMutex);
if (AvailableCodeMemSlices)
{
int slice = __builtin_ctz(AvailableCodeMemSlices);
AvailableCodeMemSlices &= ~(1 << slice);
//printf("allocating slice %d\n", slice);
return &CodeMemory[slice * CodeMemorySliceSize];
}
// allocate
#ifdef _WIN32
// FIXME
#else
//printf("mmaping...\n");
return mmap(NULL, CodeMemorySliceSize, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
#endif
}
void Free(void* codeMem)
{
std::lock_guard guard(codeMemoryMutex);
for (int i = 0; i < NumCodeMemSlices; i++)
{
if (codeMem == &CodeMemory[CodeMemorySliceSize * i])
{
//printf("freeing slice\n");
AvailableCodeMemSlices |= 1 << i;
return;
}
}
#ifdef _WIN32
// FIXME
#else
munmap(codeMem, CodeMemorySliceSize);
#endif
}
void Init()
{
std::lock_guard guard(codeMemoryMutex);
RefCounter++;
if (RefCounter == 1)
{
#ifdef _WIN32
DWORD dummy;
VirtualProtect(CodeMemory, sizeof(CodeMemory), PAGE_EXECUTE_READWRITE, &dummy);
#elif defined(__APPLE__)
// Apple always uses dynamic allocation
#else
mprotect(CodeMemory, sizeof(CodeMemory), PROT_EXEC | PROT_READ | PROT_WRITE);
#endif
}
}
}
}

43
src/ARMJIT_CodeMem.h Normal file
View File

@ -0,0 +1,43 @@
/*
Copyright 2016-2024 melonDS team
This file is part of melonDS.
melonDS is free software: you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation, either version 3 of the License, or (at your option)
any later version.
melonDS is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with melonDS. If not, see http://www.gnu.org/licenses/.
*/
#ifndef ARMJIT_CODEMEM_H
#define ARMJIT_CODEMEM_H
#include "types.h"
#include <stdlib.h>
namespace melonDS
{
namespace ARMJIT_CodeMem
{
static constexpr size_t CodeMemorySliceSize = 1024*1024*32;
void Init();
void* Allocate();
void Free(void* codeMem);
}
}
#endif

View File

@ -176,9 +176,9 @@ void Compiler::Comp_JumpTo(Gen::X64Reg addr, bool restoreCPSR)
else
MOV(32, R(ABI_PARAM3), Imm32(true)); // what a waste
if (Num == 0)
CALL((void*)&ARMv5JumpToTrampoline);
ABI_CallFunction(ARMv5JumpToTrampoline);
else
CALL((void*)&ARMv4JumpToTrampoline);
ABI_CallFunction(ARMv4JumpToTrampoline);
PopRegs(restoreCPSR, true);

View File

@ -21,19 +21,13 @@
#include "../ARMJIT.h"
#include "../ARMInterpreter.h"
#include "../NDS.h"
#include "../ARMJIT_CodeMem.h"
#include <assert.h>
#include <stdarg.h>
#include "../dolphin/CommonFuncs.h"
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#include <unistd.h>
#endif
using namespace Gen;
using namespace Common;
@ -222,46 +216,19 @@ void Compiler::A_Comp_MSR()
MOV(32, R(ABI_PARAM3), R(RCPSR));
MOV(32, R(ABI_PARAM2), R(RSCRATCH3));
MOV(64, R(ABI_PARAM1), R(RCPU));
CALL((void*)&UpdateModeTrampoline);
ABI_CallFunction(UpdateModeTrampoline);
PopRegs(true, true);
}
}
}
/*
We'll repurpose this .bss memory
*/
u8 CodeMemory[1024 * 1024 * 32];
Compiler::Compiler(melonDS::NDS& nds) : XEmitter(), NDS(nds)
{
{
#ifdef _WIN32
SYSTEM_INFO sysInfo;
GetSystemInfo(&sysInfo);
CodeMemBase = static_cast<u8*>(ARMJIT_CodeMem::Allocate());
CodeMemSize = ARMJIT_CodeMem::CodeMemorySliceSize;
u64 pageSize = (u64)sysInfo.dwPageSize;
#else
u64 pageSize = sysconf(_SC_PAGE_SIZE);
#endif
u8* pageAligned = (u8*)(((u64)CodeMemory & ~(pageSize - 1)) + pageSize);
u64 alignedSize = (((u64)CodeMemory + sizeof(CodeMemory)) & ~(pageSize - 1)) - (u64)pageAligned;
#ifdef _WIN32
DWORD dummy;
VirtualProtect(pageAligned, alignedSize, PAGE_EXECUTE_READWRITE, &dummy);
#elif defined(__APPLE__)
pageAligned = (u8*)mmap(NULL, 1024*1024*32, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS ,-1, 0);
#else
mprotect(pageAligned, alignedSize, PROT_EXEC | PROT_READ | PROT_WRITE);
#endif
ResetStart = pageAligned;
CodeMemSize = alignedSize;
}
ResetStart = CodeMemBase;
Reset();
@ -475,6 +442,11 @@ Compiler::Compiler(melonDS::NDS& nds) : XEmitter(), NDS(nds)
FarSize = (ResetStart + CodeMemSize) - FarStart;
}
Compiler::~Compiler()
{
ARMJIT_CodeMem::Free(CodeMemBase);
}
void Compiler::LoadCPSR()
{
assert(!CPSRDirty);
@ -684,7 +656,7 @@ void Compiler::Comp_SpecialBranchBehaviour(bool taken)
if (ConstantCycles)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
JMP((u8*)&ARM_Ret, true);
ABI_TailCall(ARM_Ret);
}
}
@ -846,7 +818,7 @@ JitBlockEntry Compiler::CompileBlock(ARM* cpu, bool thumb, FetchedInstr instrs[]
if (ConstantCycles)
ADD(32, MDisp(RCPU, offsetof(ARM, Cycles)), Imm32(ConstantCycles));
JMP((u8*)ARM_Ret, true);
ABI_TailCall(ARM_Ret);
#ifdef JIT_PROFILING_ENABLED
CreateMethod("JIT_Block_%d_%d_%08X", (void*)res, Num, Thumb, instrs[0].Addr);

View File

@ -84,6 +84,7 @@ class Compiler : public Gen::XEmitter
{
public:
explicit Compiler(melonDS::NDS& nds);
~Compiler();
void Reset();
@ -256,6 +257,7 @@ public:
std::unordered_map<u8*, LoadStorePatch> LoadStorePatches {};
u8* CodeMemBase;
u8* ResetStart {};
u32 CodeMemSize {};

View File

@ -316,24 +316,24 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
{
switch (size | NDS.ConsoleType)
{
case 32: CALL((void*)&SlowWrite9<u32, 0>); break;
case 16: CALL((void*)&SlowWrite9<u16, 0>); break;
case 8: CALL((void*)&SlowWrite9<u8, 0>); break;
case 33: CALL((void*)&SlowWrite9<u32, 1>); break;
case 17: CALL((void*)&SlowWrite9<u16, 1>); break;
case 9: CALL((void*)&SlowWrite9<u8, 1>); break;
case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break;
case 16: ABI_CallFunction(SlowWrite9<u16, 0>); break;
case 8: ABI_CallFunction(&SlowWrite9<u8, 0>); break;
case 33: ABI_CallFunction(&SlowWrite9<u32, 1>); break;
case 17: ABI_CallFunction(&SlowWrite9<u16, 1>); break;
case 9: ABI_CallFunction(&SlowWrite9<u8, 1>); break;
}
}
else
{
switch (size | NDS.ConsoleType)
{
case 32: CALL((void*)&SlowRead9<u32, 0>); break;
case 16: CALL((void*)&SlowRead9<u16, 0>); break;
case 8: CALL((void*)&SlowRead9<u8, 0>); break;
case 33: CALL((void*)&SlowRead9<u32, 1>); break;
case 17: CALL((void*)&SlowRead9<u16, 1>); break;
case 9: CALL((void*)&SlowRead9<u8, 1>); break;
case 32: ABI_CallFunction(&SlowRead9<u32, 0>); break;
case 16: ABI_CallFunction(&SlowRead9<u16, 0>); break;
case 8: ABI_CallFunction(&SlowRead9<u8, 0>); break;
case 33: ABI_CallFunction(&SlowRead9<u32, 1>); break;
case 17: ABI_CallFunction(&SlowRead9<u16, 1>); break;
case 9: ABI_CallFunction(&SlowRead9<u8, 1>); break;
}
}
}
@ -347,24 +347,24 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
switch (size | NDS.ConsoleType)
{
case 32: CALL((void*)&SlowWrite7<u32, 0>); break;
case 16: CALL((void*)&SlowWrite7<u16, 0>); break;
case 8: CALL((void*)&SlowWrite7<u8, 0>); break;
case 33: CALL((void*)&SlowWrite7<u32, 1>); break;
case 17: CALL((void*)&SlowWrite7<u16, 1>); break;
case 9: CALL((void*)&SlowWrite7<u8, 1>); break;
case 32: ABI_CallFunction(&SlowWrite7<u32, 0>); break;
case 16: ABI_CallFunction(&SlowWrite7<u16, 0>); break;
case 8: ABI_CallFunction(&SlowWrite7<u8, 0>); break;
case 33: ABI_CallFunction(&SlowWrite7<u32, 1>); break;
case 17: ABI_CallFunction(&SlowWrite7<u16, 1>); break;
case 9: ABI_CallFunction(&SlowWrite7<u8, 1>); break;
}
}
else
{
switch (size | NDS.ConsoleType)
{
case 32: CALL((void*)&SlowRead7<u32, 0>); break;
case 16: CALL((void*)&SlowRead7<u16, 0>); break;
case 8: CALL((void*)&SlowRead7<u8, 0>); break;
case 33: CALL((void*)&SlowRead7<u32, 1>); break;
case 17: CALL((void*)&SlowRead7<u16, 1>); break;
case 9: CALL((void*)&SlowRead7<u8, 1>); break;
case 32: ABI_CallFunction(&SlowRead7<u32, 0>); break;
case 16: ABI_CallFunction(&SlowRead7<u16, 0>); break;
case 8: ABI_CallFunction(&SlowRead7<u8, 0>); break;
case 33: ABI_CallFunction(&SlowRead7<u32, 1>); break;
case 17: ABI_CallFunction(&SlowRead7<u16, 1>); break;
case 9: ABI_CallFunction(&SlowRead7<u8, 1>); break;
}
}
}
@ -526,10 +526,10 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
switch (Num * 2 | NDS.ConsoleType)
{
case 0: CALL((void*)&SlowBlockTransfer9<false, 0>); break;
case 1: CALL((void*)&SlowBlockTransfer9<false, 1>); break;
case 2: CALL((void*)&SlowBlockTransfer7<false, 0>); break;
case 3: CALL((void*)&SlowBlockTransfer7<false, 1>); break;
case 0: ABI_CallFunction(&SlowBlockTransfer9<false, 0>); break;
case 1: ABI_CallFunction(&SlowBlockTransfer9<false, 1>); break;
case 2: ABI_CallFunction(&SlowBlockTransfer7<false, 0>); break;
case 3: ABI_CallFunction(&SlowBlockTransfer7<false, 1>); break;
}
PopRegs(false, false);
@ -630,10 +630,10 @@ s32 Compiler::Comp_MemAccessBlock(int rn, BitSet16 regs, bool store, bool preinc
switch (Num * 2 | NDS.ConsoleType)
{
case 0: CALL((void*)&SlowBlockTransfer9<true, 0>); break;
case 1: CALL((void*)&SlowBlockTransfer9<true, 1>); break;
case 2: CALL((void*)&SlowBlockTransfer7<true, 0>); break;
case 3: CALL((void*)&SlowBlockTransfer7<true, 1>); break;
case 0: ABI_CallFunction(&SlowBlockTransfer9<true, 0>); break;
case 1: ABI_CallFunction(&SlowBlockTransfer9<true, 1>); break;
case 2: ABI_CallFunction(&SlowBlockTransfer7<true, 0>); break;
case 3: ABI_CallFunction(&SlowBlockTransfer7<true, 1>); break;
}
ADD(64, R(RSP), stackAlloc <= INT8_MAX ? Imm8(stackAlloc) : Imm32(stackAlloc));

View File

@ -97,6 +97,7 @@ if (ENABLE_JIT)
ARMJIT.cpp
ARMJIT_Memory.cpp
ARMJIT_CodeMem.cpp
dolphin/CommonFuncs.cpp)

View File

@ -74,7 +74,7 @@ const s32 kIterationCycleMargin = 8;
//
// timings for GBA slot and wifi are set up at runtime
NDS* NDS::Current = nullptr;
thread_local NDS* NDS::Current = nullptr;
NDS::NDS() noexcept :
NDS(
@ -130,6 +130,7 @@ NDS::NDS(NDSArgs&& args, int type, void* userdata) noexcept :
MainRAM = JIT.Memory.GetMainRAM();
SharedWRAM = JIT.Memory.GetSharedWRAM();
ARM7WRAM = JIT.Memory.GetARM7WRAM();
}
NDS::~NDS() noexcept
@ -892,6 +893,8 @@ void NDS::RunSystemSleep(u64 timestamp)
template <CPUExecuteMode cpuMode>
u32 NDS::RunFrame()
{
Current = this;
FrameStartTimestamp = SysTimestamp;
GPU.TotalScanlines = 0;

View File

@ -536,8 +536,8 @@ public:
NDS& operator=(const NDS&) = delete;
NDS(NDS&&) = delete;
NDS& operator=(NDS&&) = delete;
// The frontend should set and unset this manually after creating and destroying the NDS object.
[[deprecated("Temporary workaround until JIT code generation is revised to accommodate multiple NDS objects.")]] static NDS* Current;
static thread_local NDS* Current;
protected:
explicit NDS(NDSArgs&& args, int type, void* userdata) noexcept;
virtual void DoSavestateExtra(Savestate* file) {}

View File

@ -1019,6 +1019,28 @@ public:
CALL(ptr);
}
}
template <typename FunctionPointer>
void ABI_TailCall(FunctionPointer func)
{
static_assert(std::is_pointer<FunctionPointer>() &&
std::is_function<std::remove_pointer_t<FunctionPointer>>(),
"Supplied type must be a function pointer.");
const u8* ptr = reinterpret_cast<const u8*>(func);
const u64 address = reinterpret_cast<u64>(ptr);
const u64 distance = address - (reinterpret_cast<u64>(code) + 5);
if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL)
{
// Far call
MOV(64, R(RAX), Imm64(address));
JMPptr(R(RAX));
}
else
{
JMP(ptr, true);
}
}
template <typename FunctionPointer>
void ABI_CallFunctionC16(FunctionPointer func, u16 param1)

View File

@ -165,7 +165,6 @@ EmuInstance::~EmuInstance()
audioDeInit();
inputDeInit();
NDS::Current = nullptr;
if (nds)
{
saveRTCData();
@ -1342,7 +1341,6 @@ bool EmuInstance::updateConsole(UpdateConsoleNDSArgs&& _ndsargs, UpdateConsoleGB
if ((!nds) || (consoleType != nds->ConsoleType))
{
NDS::Current = nullptr;
if (nds)
{
saveRTCData();
@ -1354,7 +1352,6 @@ bool EmuInstance::updateConsole(UpdateConsoleNDSArgs&& _ndsargs, UpdateConsoleGB
else
nds = new NDS(std::move(ndsargs), this);
NDS::Current = nds;
nds->Reset();
loadRTCData();
//emuThread->updateVideoRenderer(); // not actually needed?