JIT fixes

- fix fastmem problems on linux
- small fix memory leak
- SlowWrite functions always take in a 32-bit variable so that the C compiler knows that the values aren't necessary zero extended
- a few other stylistic things
- handle SIGBUS as well (for macos)
This commit is contained in:
RSDuck 2020-11-09 20:43:31 +01:00
parent ec232a9365
commit 78839f862e
8 changed files with 140 additions and 91 deletions

View File

@ -176,7 +176,7 @@ T SlowRead9(u32 addr, ARMv5* cpu)
}
template <typename T, int ConsoleType>
void SlowWrite9(u32 addr, ARMv5* cpu, T val)
void SlowWrite9(u32 addr, ARMv5* cpu, u32 val)
{
addr &= ~(sizeof(T) - 1);
@ -224,7 +224,7 @@ T SlowRead7(u32 addr)
}
template <typename T, int ConsoleType>
void SlowWrite7(u32 addr, T val)
void SlowWrite7(u32 addr, u32 val)
{
addr &= ~(sizeof(T) - 1);
@ -266,16 +266,16 @@ void SlowBlockTransfer7(u32 addr, u64* data, u32 num)
#define INSTANTIATE_SLOWMEM(consoleType) \
template void SlowWrite9<u32, consoleType>(u32, ARMv5*, u32); \
template void SlowWrite9<u16, consoleType>(u32, ARMv5*, u16); \
template void SlowWrite9<u8, consoleType>(u32, ARMv5*, u8); \
template void SlowWrite9<u16, consoleType>(u32, ARMv5*, u32); \
template void SlowWrite9<u8, consoleType>(u32, ARMv5*, u32); \
\
template u32 SlowRead9<u32, consoleType>(u32, ARMv5*); \
template u16 SlowRead9<u16, consoleType>(u32, ARMv5*); \
template u8 SlowRead9<u8, consoleType>(u32, ARMv5*); \
\
template void SlowWrite7<u32, consoleType>(u32, u32); \
template void SlowWrite7<u16, consoleType>(u32, u16); \
template void SlowWrite7<u8, consoleType>(u32, u8); \
template void SlowWrite7<u16, consoleType>(u32, u32); \
template void SlowWrite7<u8, consoleType>(u32, u32); \
\
template u32 SlowRead7<u32, consoleType>(u32); \
template u16 SlowRead7<u16, consoleType>(u32); \
@ -298,6 +298,7 @@ void Init()
void DeInit()
{
ResetBlockCache();
ARMJIT_Memory::DeInit();
delete JITCompiler;
@ -1117,6 +1118,7 @@ void ResetBlockCache()
range->Blocks.Clear();
range->Code = 0;
}
delete block;
}
JitBlocks9.clear();
JitBlocks7.clear();

View File

@ -214,8 +214,8 @@ public:
return (u8*)entry - GetRXBase();
}
bool IsJITFault(u64 pc);
s64 RewriteMemAccess(u64 pc);
bool IsJITFault(u8* pc);
u8* RewriteMemAccess(u8* pc);
void SwapCodeRegion()
{

View File

@ -9,37 +9,34 @@ using namespace Arm64Gen;
namespace ARMJIT
{
bool Compiler::IsJITFault(u64 pc)
bool Compiler::IsJITFault(u8* pc)
{
return pc >= (u64)GetRXBase() && pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize);
return (u64)pc >= (u64)GetRXBase() && (u64)pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize);
}
s64 Compiler::RewriteMemAccess(u64 pc)
u8* Compiler::RewriteMemAccess(u8* pc)
{
ptrdiff_t pcOffset = pc - (u64)GetRXBase();
ptrdiff_t pcOffset = pc - GetRXBase();
auto it = LoadStorePatches.find(pcOffset);
if (it != LoadStorePatches.end())
{
LoadStorePatch patch = it->second;
LoadStorePatches.erase(it);
ptrdiff_t curCodeOffset = GetCodeOffset();
SetCodePtrUnsafe(pcOffset + patch.PatchOffset);
BL(patch.PatchFunc);
for (int i = 0; i < patch.PatchSize / 4 - 1; i++)
HINT(HINT_NOP);
FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr());
SetCodePtrUnsafe(curCodeOffset);
LoadStorePatches.erase(it);
return patch.PatchOffset;
return pc + (ptrdiff_t)patch.PatchOffset;
}
printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc));
abort();
@ -192,7 +189,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
else
{
LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7);
if (size == 32)
if (size == 32 && !addrIsStatic)
{
UBFIZ(W0, W0, 3, 2);
RORV(rdMapped, rdMapped, W0);

View File

@ -216,9 +216,9 @@ template <u32 Num>
void LinkBlock(ARM* cpu, u32 codeOffset);
template <typename T, int ConsoleType> T SlowRead9(u32 addr, ARMv5* cpu);
template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, T val);
template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, u32 val);
template <typename T, int ConsoleType> T SlowRead7(u32 addr);
template <typename T, int ConsoleType> void SlowWrite7(u32 addr, T val);
template <typename T, int ConsoleType> void SlowWrite7(u32 addr, u32 val);
template <bool Write, int ConsoleType> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu);
template <bool Write, int ConsoleType> void SlowBlockTransfer7(u32 addr, u64* data, u32 num);

View File

@ -40,7 +40,8 @@
We handle this by only mapping those regions which are actually
used and by praying the games don't go wild.
Beware, this file is full of platform specific code.
Beware, this file is full of platform specific code and copied
from Dolphin, so enjoy the copied comments!
*/
@ -49,10 +50,10 @@ namespace ARMJIT_Memory
struct FaultDescription
{
u32 EmulatedFaultAddr;
u64 FaultPC;
u8* FaultPC;
};
bool FaultHandler(FaultDescription* faultDesc, s32& offset);
bool FaultHandler(FaultDescription& faultDesc);
}
#if defined(__SWITCH__)
@ -75,7 +76,7 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx)
ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
desc.EmulatedFaultAddr = (u8*)ctx->far.x - curArea;
desc.FaultPC = ctx->pc.x;
desc.FaultPC = (u8*)ctx->pc.x;
u64 integerRegisters[33];
memcpy(integerRegisters, &ctx->cpu_gprs[0].x, 8*29);
@ -84,10 +85,9 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx)
integerRegisters[31] = ctx->sp.x;
integerRegisters[32] = ctx->pc.x;
s32 offset;
if (ARMJIT_Memory::FaultHandler(&desc, offset))
if (ARMJIT_Memory::FaultHandler(desc, offset))
{
integerRegisters[32] += offset;
integerRegisters[32] = (u64)desc.FaultPC;
ARM_RestoreContext(integerRegisters);
}
@ -117,12 +117,11 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
desc.EmulatedFaultAddr = (u8*)exceptionInfo->ExceptionRecord->ExceptionInformation[1] - curArea;
desc.FaultPC = exceptionInfo->ContextRecord->Rip;
desc.FaultPC = (u8*)exceptionInfo->ContextRecord->Rip;
s32 offset = 0;
if (ARMJIT_Memory::FaultHandler(&desc, offset))
if (ARMJIT_Memory::FaultHandler(desc))
{
exceptionInfo->ContextRecord->Rip += offset;
exceptionInfo->ContextRecord->Rip = (u8*)desc.FaultPC;
return EXCEPTION_CONTINUE_EXECUTION;
}
@ -131,50 +130,66 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
#else
struct sigaction NewSa;
struct sigaction OldSa;
static struct sigaction OldSaSegv;
static struct sigaction OldSaBus;
static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext)
{
if (sig != SIGSEGV && sig != SIGBUS)
{
// We are not interested in other signals - handle it as usual.
return;
}
if (info->si_code != SEGV_MAPERR && info->si_code != SEGV_ACCERR)
{
// Huh? Return.
return;
}
ucontext_t* context = (ucontext_t*)rawContext;
ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
#ifdef __x86_64__
desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea;
desc.FaultPC = context->uc_mcontext.gregs[REG_RIP];
desc.FaultPC = (u8*)context->uc_mcontext.gregs[REG_RIP];
#else
desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea;
desc.FaultPC = context->uc_mcontext.pc;
desc.FaultPC = (u8*)context->uc_mcontext.pc;
#endif
s32 offset = 0;
if (ARMJIT_Memory::FaultHandler(&desc, offset))
if (ARMJIT_Memory::FaultHandler(desc))
{
#ifdef __x86_64__
context->uc_mcontext.gregs[REG_RIP] += offset;
context->uc_mcontext.gregs[REG_RIP] = (u64)desc.FaultPC;
#else
context->uc_mcontext.pc += offset;
context->uc_mcontext.pc = (u64)desc.FaultPC;
#endif
return;
}
if (OldSa.sa_flags & SA_SIGINFO)
struct sigaction* oldSa;
if (sig == SIGSEGV)
oldSa = &OldSaSegv;
else
oldSa = &OldSaBus;
if (oldSa->sa_flags & SA_SIGINFO)
{
OldSa.sa_sigaction(sig, info, rawContext);
oldSa->sa_sigaction(sig, info, rawContext);
return;
}
if (OldSa.sa_handler == SIG_DFL)
if (oldSa->sa_handler == SIG_DFL)
{
signal(sig, SIG_DFL);
return;
}
if (OldSa.sa_handler == SIG_IGN)
if (oldSa->sa_handler == SIG_IGN)
{
// Ignore signal
return;
}
OldSa.sa_handler(sig);
oldSa->sa_handler(sig);
}
#endif
@ -231,7 +246,7 @@ enum
{
memstate_Unmapped,
memstate_MappedRW,
// on switch this is unmapped as well
// on Switch this is unmapped as well
memstate_MappedProtected,
};
@ -505,6 +520,21 @@ bool MapAtAddress(u32 addr)
bool isExecutable = ARMJIT::CodeMemRegions[region];
#ifndef __SWITCH__
if (num == 0)
{
// if a DTCM mapping is mapped before the mapping below it
// we unmap it, so it won't just be overriden
for (int i = 0; i < Mappings[memregion_DTCM].Length; i++)
{
Mapping& mapping = Mappings[memregion_DTCM][i];
if (mirrorStart < mapping.Addr + mapping.Size && mirrorStart + mirrorSize >= mapping.Addr)
{
mapping.Unmap(memregion_DTCM);
}
}
Mappings[memregion_DTCM].Clear();
}
bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize);
assert(succeded);
#endif
@ -562,21 +592,20 @@ bool MapAtAddress(u32 addr)
return true;
}
bool FaultHandler(FaultDescription* faultDesc, s32& offset)
bool FaultHandler(FaultDescription& faultDesc)
{
if (ARMJIT::JITCompiler->IsJITFault(faultDesc->FaultPC))
if (ARMJIT::JITCompiler->IsJITFault(faultDesc.FaultPC))
{
bool rewriteToSlowPath = true;
u32 addr = faultDesc->EmulatedFaultAddr;
u8* memStatus = NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7;
if ((NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7)[addr >> 12] == memstate_Unmapped)
rewriteToSlowPath = !MapAtAddress(faultDesc->EmulatedFaultAddr);
if (memStatus[faultDesc.EmulatedFaultAddr >> 12] == memstate_Unmapped)
rewriteToSlowPath = !MapAtAddress(faultDesc.EmulatedFaultAddr);
if (rewriteToSlowPath)
{
offset = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc->FaultPC);
}
faultDesc.FaultPC = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc.FaultPC);
return true;
}
return false;
@ -624,22 +653,28 @@ void Init()
u8* basePtr = MemoryBase;
#else
FastMem9Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
FastMem7Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
MemoryBase = (u8*)mmap(NULL, MemoryTotalSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
// this used to be allocated with three different mmaps
// The idea was to give the OS more freedom where to position the buffers,
// but something was bad about this so instead we take this vmem eating monster
// which seems to work better.
MemoryBase = (u8*)mmap(NULL, AddrSpaceSize*4, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
munmap(MemoryBase, AddrSpaceSize*4);
FastMem9Start = MemoryBase;
FastMem7Start = MemoryBase + AddrSpaceSize;
MemoryBase = MemoryBase + AddrSpaceSize*2;
MemoryFile = memfd_create("melondsfastmem", 0);
ftruncate(MemoryFile, MemoryTotalSize);
NewSa.sa_flags = SA_SIGINFO;
sigemptyset(&NewSa.sa_mask);
NewSa.sa_sigaction = SigsegvHandler;
sigaction(SIGSEGV, &NewSa, &OldSa);
munmap(MemoryBase, MemoryTotalSize);
munmap(FastMem9Start, AddrSpaceSize);
munmap(FastMem7Start, AddrSpaceSize);
struct sigaction sa;
sa.sa_handler = nullptr;
sa.sa_sigaction = &SigsegvHandler;
sa.sa_flags = SA_SIGINFO;
sigemptyset(&sa.sa_mask);
sigaction(SIGSEGV, &sa, &OldSaSegv);
#ifdef __APPLE__
sigaction(SIGBUS, &sa, &OldSaBus);
#endif
mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0);
@ -657,8 +692,8 @@ void Init()
void DeInit()
{
#if defined(__SWITCH__)
virtmemFree(FastMem9Start, 0x100000000);
virtmemFree(FastMem7Start, 0x100000000);
virtmemFree(FastMem9Start, AddrSpaceSize);
virtmemFree(FastMem7Start, AddrSpaceSize);
svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize);
virtmemFree(MemoryBaseCodeMem, MemoryTotalSize);
@ -668,6 +703,14 @@ void DeInit()
CloseHandle(MemoryFile);
RemoveVectoredExceptionHandler(ExceptionHandlerHandle);
#else
sigaction(SIGSEGV, &OldSaSegv, nullptr);
#ifdef __APPLE__
sigaction(SIGBUS, &OldSaBus, nullptr);
#endif
munmap(MemoryBase, MemoryTotalSize);
close(MemoryFile);
#endif
}
@ -702,7 +745,15 @@ bool IsFastmemCompatible(int region)
|| region == memregion_NewSharedWRAM_C)
return false;
#endif
return OffsetsPerRegion[region] != UINT32_MAX;
if (region == memregion_DTCM
|| region == memregion_MainRAM
|| region == memregion_NewSharedWRAM_A
|| region == memregion_NewSharedWRAM_B
|| region == memregion_NewSharedWRAM_C
|| region == memregion_SharedWRAM)
return false;
//return OffsetsPerRegion[region] != UINT32_MAX;
return false;
}
bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mirrorStart, u32& mirrorSize)
@ -997,9 +1048,11 @@ int ClassifyAddress7(u32 addr)
case 0x06000000:
case 0x06800000:
return memregion_VWRAM;
default:
return memregion_Other;
}
}
return memregion_Other;
}
void WifiWrite32(u32 addr, u32 val)

View File

@ -345,7 +345,7 @@ Compiler::Compiler()
ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (consoleType == 0)
{
switch ((8 << size) | num)
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowWrite9<u32, 0>); break;
case 33: ABI_CallFunction(SlowWrite7<u32, 0>); break;
@ -357,7 +357,7 @@ Compiler::Compiler()
}
else
{
switch ((8 << size) | num)
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowWrite9<u32, 1>); break;
case 33: ABI_CallFunction(SlowWrite7<u32, 1>); break;
@ -380,7 +380,7 @@ Compiler::Compiler()
ABI_PushRegistersAndAdjustStack(CallerSavedPushRegs, 8);
if (consoleType == 0)
{
switch ((8 << size) | num)
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowRead9<u32, 0>); break;
case 33: ABI_CallFunction(SlowRead7<u32, 0>); break;
@ -392,7 +392,7 @@ Compiler::Compiler()
}
else
{
switch ((8 << size) | num)
switch ((8 << size) | num)
{
case 32: ABI_CallFunction(SlowRead9<u32, 1>); break;
case 33: ABI_CallFunction(SlowRead7<u32, 1>); break;
@ -617,9 +617,9 @@ void Compiler::Reset()
LoadStorePatches.clear();
}
bool Compiler::IsJITFault(u64 addr)
bool Compiler::IsJITFault(u8* addr)
{
return addr >= (u64)CodeMemory && addr < (u64)CodeMemory + sizeof(CodeMemory);
return (u64)addr >= (u64)ResetStart && (u64)addr < (u64)ResetStart + CodeMemSize;
}
void Compiler::Comp_SpecialBranchBehaviour(bool taken)

View File

@ -208,9 +208,9 @@ public:
SetCodePtr(FarCode);
}
bool IsJITFault(u64 addr);
bool IsJITFault(u8* addr);
s32 RewriteMemAccess(u64 pc);
u8* RewriteMemAccess(u8* pc);
u8* FarCode;
u8* NearCode;

View File

@ -15,28 +15,24 @@ int squeezePointer(T* ptr)
return truncated;
}
s32 Compiler::RewriteMemAccess(u64 pc)
u8* Compiler::RewriteMemAccess(u8* pc)
{
auto it = LoadStorePatches.find((u8*)pc);
auto it = LoadStorePatches.find(pc);
if (it != LoadStorePatches.end())
{
LoadStorePatch patch = it->second;
LoadStorePatches.erase(it);
u8* curCodePtr = GetWritableCodePtr();
u8* rewritePtr = (u8*)pc + (ptrdiff_t)patch.Offset;
SetCodePtr(rewritePtr);
//printf("rewriting memory access %p %d %d\n", (u8*)pc-ResetStart, patch.Offset, patch.Size);
CALL(patch.PatchFunc);
u32 remainingSize = patch.Size - (GetWritableCodePtr() - rewritePtr);
XEmitter emitter(pc + (ptrdiff_t)patch.Offset);
emitter.CALL(patch.PatchFunc);
ptrdiff_t remainingSize = (ptrdiff_t)patch.Size - 5;
assert(remainingSize >= 0);
if (remainingSize > 0)
NOP(remainingSize);
emitter.NOP(remainingSize);
//printf("rewriting memory access %p %d %d\n", patch.PatchFunc, patch.Offset, patch.Size);
SetCodePtr(curCodePtr);
return patch.Offset;
return pc + (ptrdiff_t)patch.Offset;
}
printf("this is a JIT bug %llx\n", pc);
@ -192,6 +188,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
u8* memopStart = GetWritableCodePtr();
LoadStorePatch patch;
assert(rdMapped.GetSimpleReg() >= 0 && rdMapped.GetSimpleReg() < 16);
patch.PatchFunc = flags & memop_Store
? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped.GetSimpleReg()]
: PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped.GetSimpleReg()];