JIT fixes

- fix fastmem problems on linux
- small fix memory leak
- SlowWrite functions always take in a 32-bit variable so that the C compiler knows that the values aren't necessary zero extended
- a few other stylistic things
- handle SIGBUS as well (for macos)
This commit is contained in:
RSDuck 2020-11-09 20:43:31 +01:00
parent ec232a9365
commit 78839f862e
8 changed files with 140 additions and 91 deletions

View File

@ -176,7 +176,7 @@ T SlowRead9(u32 addr, ARMv5* cpu)
} }
template <typename T, int ConsoleType> template <typename T, int ConsoleType>
void SlowWrite9(u32 addr, ARMv5* cpu, T val) void SlowWrite9(u32 addr, ARMv5* cpu, u32 val)
{ {
addr &= ~(sizeof(T) - 1); addr &= ~(sizeof(T) - 1);
@ -224,7 +224,7 @@ T SlowRead7(u32 addr)
} }
template <typename T, int ConsoleType> template <typename T, int ConsoleType>
void SlowWrite7(u32 addr, T val) void SlowWrite7(u32 addr, u32 val)
{ {
addr &= ~(sizeof(T) - 1); addr &= ~(sizeof(T) - 1);
@ -266,16 +266,16 @@ void SlowBlockTransfer7(u32 addr, u64* data, u32 num)
#define INSTANTIATE_SLOWMEM(consoleType) \ #define INSTANTIATE_SLOWMEM(consoleType) \
template void SlowWrite9<u32, consoleType>(u32, ARMv5*, u32); \ template void SlowWrite9<u32, consoleType>(u32, ARMv5*, u32); \
template void SlowWrite9<u16, consoleType>(u32, ARMv5*, u16); \ template void SlowWrite9<u16, consoleType>(u32, ARMv5*, u32); \
template void SlowWrite9<u8, consoleType>(u32, ARMv5*, u8); \ template void SlowWrite9<u8, consoleType>(u32, ARMv5*, u32); \
\ \
template u32 SlowRead9<u32, consoleType>(u32, ARMv5*); \ template u32 SlowRead9<u32, consoleType>(u32, ARMv5*); \
template u16 SlowRead9<u16, consoleType>(u32, ARMv5*); \ template u16 SlowRead9<u16, consoleType>(u32, ARMv5*); \
template u8 SlowRead9<u8, consoleType>(u32, ARMv5*); \ template u8 SlowRead9<u8, consoleType>(u32, ARMv5*); \
\ \
template void SlowWrite7<u32, consoleType>(u32, u32); \ template void SlowWrite7<u32, consoleType>(u32, u32); \
template void SlowWrite7<u16, consoleType>(u32, u16); \ template void SlowWrite7<u16, consoleType>(u32, u32); \
template void SlowWrite7<u8, consoleType>(u32, u8); \ template void SlowWrite7<u8, consoleType>(u32, u32); \
\ \
template u32 SlowRead7<u32, consoleType>(u32); \ template u32 SlowRead7<u32, consoleType>(u32); \
template u16 SlowRead7<u16, consoleType>(u32); \ template u16 SlowRead7<u16, consoleType>(u32); \
@ -298,6 +298,7 @@ void Init()
void DeInit() void DeInit()
{ {
ResetBlockCache();
ARMJIT_Memory::DeInit(); ARMJIT_Memory::DeInit();
delete JITCompiler; delete JITCompiler;
@ -1117,6 +1118,7 @@ void ResetBlockCache()
range->Blocks.Clear(); range->Blocks.Clear();
range->Code = 0; range->Code = 0;
} }
delete block;
} }
JitBlocks9.clear(); JitBlocks9.clear();
JitBlocks7.clear(); JitBlocks7.clear();

View File

@ -214,8 +214,8 @@ public:
return (u8*)entry - GetRXBase(); return (u8*)entry - GetRXBase();
} }
bool IsJITFault(u64 pc); bool IsJITFault(u8* pc);
s64 RewriteMemAccess(u64 pc); u8* RewriteMemAccess(u8* pc);
void SwapCodeRegion() void SwapCodeRegion()
{ {

View File

@ -9,37 +9,34 @@ using namespace Arm64Gen;
namespace ARMJIT namespace ARMJIT
{ {
bool Compiler::IsJITFault(u64 pc) bool Compiler::IsJITFault(u8* pc)
{ {
return pc >= (u64)GetRXBase() && pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize); return (u64)pc >= (u64)GetRXBase() && (u64)pc - (u64)GetRXBase() < (JitMemMainSize + JitMemSecondarySize);
} }
s64 Compiler::RewriteMemAccess(u64 pc) u8* Compiler::RewriteMemAccess(u8* pc)
{ {
ptrdiff_t pcOffset = pc - (u64)GetRXBase(); ptrdiff_t pcOffset = pc - GetRXBase();
auto it = LoadStorePatches.find(pcOffset); auto it = LoadStorePatches.find(pcOffset);
if (it != LoadStorePatches.end()) if (it != LoadStorePatches.end())
{ {
LoadStorePatch patch = it->second; LoadStorePatch patch = it->second;
LoadStorePatches.erase(it);
ptrdiff_t curCodeOffset = GetCodeOffset(); ptrdiff_t curCodeOffset = GetCodeOffset();
SetCodePtrUnsafe(pcOffset + patch.PatchOffset); SetCodePtrUnsafe(pcOffset + patch.PatchOffset);
BL(patch.PatchFunc); BL(patch.PatchFunc);
for (int i = 0; i < patch.PatchSize / 4 - 1; i++) for (int i = 0; i < patch.PatchSize / 4 - 1; i++)
HINT(HINT_NOP); HINT(HINT_NOP);
FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr()); FlushIcacheSection((u8*)pc + patch.PatchOffset, (u8*)GetRXPtr());
SetCodePtrUnsafe(curCodeOffset); SetCodePtrUnsafe(curCodeOffset);
LoadStorePatches.erase(it); return pc + (ptrdiff_t)patch.PatchOffset;
return patch.PatchOffset;
} }
printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc)); printf("this is a JIT bug! %08x\n", __builtin_bswap32(*(u32*)pc));
abort(); abort();
@ -192,7 +189,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, Op2 offset, int size, int flags)
else else
{ {
LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7); LDRGeneric(size, flags & memop_SignExtend, rdMapped, size > 8 ? X1 : X0, X7);
if (size == 32) if (size == 32 && !addrIsStatic)
{ {
UBFIZ(W0, W0, 3, 2); UBFIZ(W0, W0, 3, 2);
RORV(rdMapped, rdMapped, W0); RORV(rdMapped, rdMapped, W0);

View File

@ -216,9 +216,9 @@ template <u32 Num>
void LinkBlock(ARM* cpu, u32 codeOffset); void LinkBlock(ARM* cpu, u32 codeOffset);
template <typename T, int ConsoleType> T SlowRead9(u32 addr, ARMv5* cpu); template <typename T, int ConsoleType> T SlowRead9(u32 addr, ARMv5* cpu);
template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, T val); template <typename T, int ConsoleType> void SlowWrite9(u32 addr, ARMv5* cpu, u32 val);
template <typename T, int ConsoleType> T SlowRead7(u32 addr); template <typename T, int ConsoleType> T SlowRead7(u32 addr);
template <typename T, int ConsoleType> void SlowWrite7(u32 addr, T val); template <typename T, int ConsoleType> void SlowWrite7(u32 addr, u32 val);
template <bool Write, int ConsoleType> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu); template <bool Write, int ConsoleType> void SlowBlockTransfer9(u32 addr, u64* data, u32 num, ARMv5* cpu);
template <bool Write, int ConsoleType> void SlowBlockTransfer7(u32 addr, u64* data, u32 num); template <bool Write, int ConsoleType> void SlowBlockTransfer7(u32 addr, u64* data, u32 num);

View File

@ -40,7 +40,8 @@
We handle this by only mapping those regions which are actually We handle this by only mapping those regions which are actually
used and by praying the games don't go wild. used and by praying the games don't go wild.
Beware, this file is full of platform specific code. Beware, this file is full of platform specific code and copied
from Dolphin, so enjoy the copied comments!
*/ */
@ -49,10 +50,10 @@ namespace ARMJIT_Memory
struct FaultDescription struct FaultDescription
{ {
u32 EmulatedFaultAddr; u32 EmulatedFaultAddr;
u64 FaultPC; u8* FaultPC;
}; };
bool FaultHandler(FaultDescription* faultDesc, s32& offset); bool FaultHandler(FaultDescription& faultDesc);
} }
#if defined(__SWITCH__) #if defined(__SWITCH__)
@ -75,7 +76,7 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx)
ARMJIT_Memory::FaultDescription desc; ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
desc.EmulatedFaultAddr = (u8*)ctx->far.x - curArea; desc.EmulatedFaultAddr = (u8*)ctx->far.x - curArea;
desc.FaultPC = ctx->pc.x; desc.FaultPC = (u8*)ctx->pc.x;
u64 integerRegisters[33]; u64 integerRegisters[33];
memcpy(integerRegisters, &ctx->cpu_gprs[0].x, 8*29); memcpy(integerRegisters, &ctx->cpu_gprs[0].x, 8*29);
@ -84,10 +85,9 @@ void __libnx_exception_handler(ThreadExceptionDump* ctx)
integerRegisters[31] = ctx->sp.x; integerRegisters[31] = ctx->sp.x;
integerRegisters[32] = ctx->pc.x; integerRegisters[32] = ctx->pc.x;
s32 offset; if (ARMJIT_Memory::FaultHandler(desc, offset))
if (ARMJIT_Memory::FaultHandler(&desc, offset))
{ {
integerRegisters[32] += offset; integerRegisters[32] = (u64)desc.FaultPC;
ARM_RestoreContext(integerRegisters); ARM_RestoreContext(integerRegisters);
} }
@ -117,12 +117,11 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
ARMJIT_Memory::FaultDescription desc; ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
desc.EmulatedFaultAddr = (u8*)exceptionInfo->ExceptionRecord->ExceptionInformation[1] - curArea; desc.EmulatedFaultAddr = (u8*)exceptionInfo->ExceptionRecord->ExceptionInformation[1] - curArea;
desc.FaultPC = exceptionInfo->ContextRecord->Rip; desc.FaultPC = (u8*)exceptionInfo->ContextRecord->Rip;
s32 offset = 0; if (ARMJIT_Memory::FaultHandler(desc))
if (ARMJIT_Memory::FaultHandler(&desc, offset))
{ {
exceptionInfo->ContextRecord->Rip += offset; exceptionInfo->ContextRecord->Rip = (u8*)desc.FaultPC;
return EXCEPTION_CONTINUE_EXECUTION; return EXCEPTION_CONTINUE_EXECUTION;
} }
@ -131,50 +130,66 @@ static LONG ExceptionHandler(EXCEPTION_POINTERS* exceptionInfo)
#else #else
struct sigaction NewSa; static struct sigaction OldSaSegv;
struct sigaction OldSa; static struct sigaction OldSaBus;
static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext) static void SigsegvHandler(int sig, siginfo_t* info, void* rawContext)
{ {
if (sig != SIGSEGV && sig != SIGBUS)
{
// We are not interested in other signals - handle it as usual.
return;
}
if (info->si_code != SEGV_MAPERR && info->si_code != SEGV_ACCERR)
{
// Huh? Return.
return;
}
ucontext_t* context = (ucontext_t*)rawContext; ucontext_t* context = (ucontext_t*)rawContext;
ARMJIT_Memory::FaultDescription desc; ARMJIT_Memory::FaultDescription desc;
u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start); u8* curArea = (u8*)(NDS::CurCPU == 0 ? ARMJIT_Memory::FastMem9Start : ARMJIT_Memory::FastMem7Start);
#ifdef __x86_64__ #ifdef __x86_64__
desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea; desc.EmulatedFaultAddr = (u8*)info->si_addr - curArea;
desc.FaultPC = context->uc_mcontext.gregs[REG_RIP]; desc.FaultPC = (u8*)context->uc_mcontext.gregs[REG_RIP];
#else #else
desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea; desc.EmulatedFaultAddr = (u8*)context->uc_mcontext.fault_address - curArea;
desc.FaultPC = context->uc_mcontext.pc; desc.FaultPC = (u8*)context->uc_mcontext.pc;
#endif #endif
s32 offset = 0; if (ARMJIT_Memory::FaultHandler(desc))
if (ARMJIT_Memory::FaultHandler(&desc, offset))
{ {
#ifdef __x86_64__ #ifdef __x86_64__
context->uc_mcontext.gregs[REG_RIP] += offset; context->uc_mcontext.gregs[REG_RIP] = (u64)desc.FaultPC;
#else #else
context->uc_mcontext.pc += offset; context->uc_mcontext.pc = (u64)desc.FaultPC;
#endif #endif
return; return;
} }
if (OldSa.sa_flags & SA_SIGINFO) struct sigaction* oldSa;
if (sig == SIGSEGV)
oldSa = &OldSaSegv;
else
oldSa = &OldSaBus;
if (oldSa->sa_flags & SA_SIGINFO)
{ {
OldSa.sa_sigaction(sig, info, rawContext); oldSa->sa_sigaction(sig, info, rawContext);
return; return;
} }
if (OldSa.sa_handler == SIG_DFL) if (oldSa->sa_handler == SIG_DFL)
{ {
signal(sig, SIG_DFL); signal(sig, SIG_DFL);
return; return;
} }
if (OldSa.sa_handler == SIG_IGN) if (oldSa->sa_handler == SIG_IGN)
{ {
// Ignore signal // Ignore signal
return; return;
} }
OldSa.sa_handler(sig); oldSa->sa_handler(sig);
} }
#endif #endif
@ -231,7 +246,7 @@ enum
{ {
memstate_Unmapped, memstate_Unmapped,
memstate_MappedRW, memstate_MappedRW,
// on switch this is unmapped as well // on Switch this is unmapped as well
memstate_MappedProtected, memstate_MappedProtected,
}; };
@ -505,6 +520,21 @@ bool MapAtAddress(u32 addr)
bool isExecutable = ARMJIT::CodeMemRegions[region]; bool isExecutable = ARMJIT::CodeMemRegions[region];
#ifndef __SWITCH__ #ifndef __SWITCH__
if (num == 0)
{
// if a DTCM mapping is mapped before the mapping below it
// we unmap it, so it won't just be overriden
for (int i = 0; i < Mappings[memregion_DTCM].Length; i++)
{
Mapping& mapping = Mappings[memregion_DTCM][i];
if (mirrorStart < mapping.Addr + mapping.Size && mirrorStart + mirrorSize >= mapping.Addr)
{
mapping.Unmap(memregion_DTCM);
}
}
Mappings[memregion_DTCM].Clear();
}
bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize); bool succeded = MapIntoRange(mirrorStart, num, OffsetsPerRegion[region] + memoryOffset, mirrorSize);
assert(succeded); assert(succeded);
#endif #endif
@ -562,21 +592,20 @@ bool MapAtAddress(u32 addr)
return true; return true;
} }
bool FaultHandler(FaultDescription* faultDesc, s32& offset) bool FaultHandler(FaultDescription& faultDesc)
{ {
if (ARMJIT::JITCompiler->IsJITFault(faultDesc->FaultPC)) if (ARMJIT::JITCompiler->IsJITFault(faultDesc.FaultPC))
{ {
bool rewriteToSlowPath = true; bool rewriteToSlowPath = true;
u32 addr = faultDesc->EmulatedFaultAddr; u8* memStatus = NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7;
if ((NDS::CurCPU == 0 ? MappingStatus9 : MappingStatus7)[addr >> 12] == memstate_Unmapped) if (memStatus[faultDesc.EmulatedFaultAddr >> 12] == memstate_Unmapped)
rewriteToSlowPath = !MapAtAddress(faultDesc->EmulatedFaultAddr); rewriteToSlowPath = !MapAtAddress(faultDesc.EmulatedFaultAddr);
if (rewriteToSlowPath) if (rewriteToSlowPath)
{ faultDesc.FaultPC = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc.FaultPC);
offset = ARMJIT::JITCompiler->RewriteMemAccess(faultDesc->FaultPC);
}
return true; return true;
} }
return false; return false;
@ -624,22 +653,28 @@ void Init()
u8* basePtr = MemoryBase; u8* basePtr = MemoryBase;
#else #else
FastMem9Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); // this used to be allocated with three different mmaps
FastMem7Start = mmap(NULL, AddrSpaceSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); // The idea was to give the OS more freedom where to position the buffers,
// but something was bad about this so instead we take this vmem eating monster
MemoryBase = (u8*)mmap(NULL, MemoryTotalSize, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0); // which seems to work better.
MemoryBase = (u8*)mmap(NULL, AddrSpaceSize*4, PROT_NONE, MAP_ANON | MAP_PRIVATE, -1, 0);
munmap(MemoryBase, AddrSpaceSize*4);
FastMem9Start = MemoryBase;
FastMem7Start = MemoryBase + AddrSpaceSize;
MemoryBase = MemoryBase + AddrSpaceSize*2;
MemoryFile = memfd_create("melondsfastmem", 0); MemoryFile = memfd_create("melondsfastmem", 0);
ftruncate(MemoryFile, MemoryTotalSize); ftruncate(MemoryFile, MemoryTotalSize);
NewSa.sa_flags = SA_SIGINFO; struct sigaction sa;
sigemptyset(&NewSa.sa_mask); sa.sa_handler = nullptr;
NewSa.sa_sigaction = SigsegvHandler; sa.sa_sigaction = &SigsegvHandler;
sigaction(SIGSEGV, &NewSa, &OldSa); sa.sa_flags = SA_SIGINFO;
sigemptyset(&sa.sa_mask);
munmap(MemoryBase, MemoryTotalSize); sigaction(SIGSEGV, &sa, &OldSaSegv);
munmap(FastMem9Start, AddrSpaceSize); #ifdef __APPLE__
munmap(FastMem7Start, AddrSpaceSize); sigaction(SIGBUS, &sa, &OldSaBus);
#endif
mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0); mmap(MemoryBase, MemoryTotalSize, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, MemoryFile, 0);
@ -657,8 +692,8 @@ void Init()
void DeInit() void DeInit()
{ {
#if defined(__SWITCH__) #if defined(__SWITCH__)
virtmemFree(FastMem9Start, 0x100000000); virtmemFree(FastMem9Start, AddrSpaceSize);
virtmemFree(FastMem7Start, 0x100000000); virtmemFree(FastMem7Start, AddrSpaceSize);
svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize); svcUnmapProcessCodeMemory(envGetOwnProcessHandle(), (u64)MemoryBaseCodeMem, (u64)MemoryBase, MemoryTotalSize);
virtmemFree(MemoryBaseCodeMem, MemoryTotalSize); virtmemFree(MemoryBaseCodeMem, MemoryTotalSize);
@ -668,6 +703,14 @@ void DeInit()
CloseHandle(MemoryFile); CloseHandle(MemoryFile);
RemoveVectoredExceptionHandler(ExceptionHandlerHandle); RemoveVectoredExceptionHandler(ExceptionHandlerHandle);
#else
sigaction(SIGSEGV, &OldSaSegv, nullptr);
#ifdef __APPLE__
sigaction(SIGBUS, &OldSaBus, nullptr);
#endif
munmap(MemoryBase, MemoryTotalSize);
close(MemoryFile);
#endif #endif
} }
@ -702,7 +745,15 @@ bool IsFastmemCompatible(int region)
|| region == memregion_NewSharedWRAM_C) || region == memregion_NewSharedWRAM_C)
return false; return false;
#endif #endif
return OffsetsPerRegion[region] != UINT32_MAX; if (region == memregion_DTCM
|| region == memregion_MainRAM
|| region == memregion_NewSharedWRAM_A
|| region == memregion_NewSharedWRAM_B
|| region == memregion_NewSharedWRAM_C
|| region == memregion_SharedWRAM)
return false;
//return OffsetsPerRegion[region] != UINT32_MAX;
return false;
} }
bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mirrorStart, u32& mirrorSize) bool GetMirrorLocation(int region, u32 num, u32 addr, u32& memoryOffset, u32& mirrorStart, u32& mirrorSize)
@ -997,9 +1048,11 @@ int ClassifyAddress7(u32 addr)
case 0x06000000: case 0x06000000:
case 0x06800000: case 0x06800000:
return memregion_VWRAM; return memregion_VWRAM;
}
} default:
return memregion_Other; return memregion_Other;
}
}
} }
void WifiWrite32(u32 addr, u32 val) void WifiWrite32(u32 addr, u32 val)

View File

@ -617,9 +617,9 @@ void Compiler::Reset()
LoadStorePatches.clear(); LoadStorePatches.clear();
} }
bool Compiler::IsJITFault(u64 addr) bool Compiler::IsJITFault(u8* addr)
{ {
return addr >= (u64)CodeMemory && addr < (u64)CodeMemory + sizeof(CodeMemory); return (u64)addr >= (u64)ResetStart && (u64)addr < (u64)ResetStart + CodeMemSize;
} }
void Compiler::Comp_SpecialBranchBehaviour(bool taken) void Compiler::Comp_SpecialBranchBehaviour(bool taken)

View File

@ -208,9 +208,9 @@ public:
SetCodePtr(FarCode); SetCodePtr(FarCode);
} }
bool IsJITFault(u64 addr); bool IsJITFault(u8* addr);
s32 RewriteMemAccess(u64 pc); u8* RewriteMemAccess(u8* pc);
u8* FarCode; u8* FarCode;
u8* NearCode; u8* NearCode;

View File

@ -15,28 +15,24 @@ int squeezePointer(T* ptr)
return truncated; return truncated;
} }
s32 Compiler::RewriteMemAccess(u64 pc) u8* Compiler::RewriteMemAccess(u8* pc)
{ {
auto it = LoadStorePatches.find((u8*)pc); auto it = LoadStorePatches.find(pc);
if (it != LoadStorePatches.end()) if (it != LoadStorePatches.end())
{ {
LoadStorePatch patch = it->second; LoadStorePatch patch = it->second;
LoadStorePatches.erase(it); LoadStorePatches.erase(it);
u8* curCodePtr = GetWritableCodePtr(); //printf("rewriting memory access %p %d %d\n", (u8*)pc-ResetStart, patch.Offset, patch.Size);
u8* rewritePtr = (u8*)pc + (ptrdiff_t)patch.Offset;
SetCodePtr(rewritePtr);
CALL(patch.PatchFunc); XEmitter emitter(pc + (ptrdiff_t)patch.Offset);
u32 remainingSize = patch.Size - (GetWritableCodePtr() - rewritePtr); emitter.CALL(patch.PatchFunc);
ptrdiff_t remainingSize = (ptrdiff_t)patch.Size - 5;
assert(remainingSize >= 0);
if (remainingSize > 0) if (remainingSize > 0)
NOP(remainingSize); emitter.NOP(remainingSize);
//printf("rewriting memory access %p %d %d\n", patch.PatchFunc, patch.Offset, patch.Size); return pc + (ptrdiff_t)patch.Offset;
SetCodePtr(curCodePtr);
return patch.Offset;
} }
printf("this is a JIT bug %llx\n", pc); printf("this is a JIT bug %llx\n", pc);
@ -192,6 +188,7 @@ void Compiler::Comp_MemAccess(int rd, int rn, const Op2& op2, int size, int flag
u8* memopStart = GetWritableCodePtr(); u8* memopStart = GetWritableCodePtr();
LoadStorePatch patch; LoadStorePatch patch;
assert(rdMapped.GetSimpleReg() >= 0 && rdMapped.GetSimpleReg() < 16);
patch.PatchFunc = flags & memop_Store patch.PatchFunc = flags & memop_Store
? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped.GetSimpleReg()] ? PatchedStoreFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][rdMapped.GetSimpleReg()]
: PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped.GetSimpleReg()]; : PatchedLoadFuncs[NDS::ConsoleType][Num][__builtin_ctz(size) - 3][!!(flags & memop_SignExtend)][rdMapped.GetSimpleReg()];