From 5cf41a41f34547bb686a54b5d96fe60fe4144668 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Fri, 24 Nov 2023 19:53:46 +1000 Subject: [PATCH] JitCodeBuffer: Allocate within 32-bit range --- src/common/align.h | 28 +++++++++++- src/util/jit_code_buffer.cpp | 84 ++++++++++++++++++++++++++++-------- src/util/jit_code_buffer.h | 2 + 3 files changed, 95 insertions(+), 19 deletions(-) diff --git a/src/common/align.h b/src/common/align.h index 29a5e748f..6374a99f2 100644 --- a/src/common/align.h +++ b/src/common/align.h @@ -56,10 +56,34 @@ constexpr T PreviousPow2(T value) value |= (value >> 1); value |= (value >> 2); value |= (value >> 4); - value |= (value >> 8); - value |= (value >> 16); + if constexpr (sizeof(T) >= 16) + value |= (value >> 8); + if constexpr (sizeof(T) >= 32) + value |= (value >> 16); + if constexpr (sizeof(T) >= 64) + value |= (value >> 32); return value - (value >> 1); } +template +constexpr T NextPow2(T value) +{ + // https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + if (value == static_cast(0)) + return 0; + + value--; + value |= (value >> 1); + value |= (value >> 2); + value |= (value >> 4); + if constexpr (sizeof(T) >= 16) + value |= (value >> 8); + if constexpr (sizeof(T) >= 32) + value |= (value >> 16); + if constexpr (sizeof(T) >= 64) + value |= (value >> 32); + value++; + return value; +} ALWAYS_INLINE static void* AlignedMalloc(size_t size, size_t alignment) { diff --git a/src/util/jit_code_buffer.cpp b/src/util/jit_code_buffer.cpp index dba350cbd..b915a0d50 100644 --- a/src/util/jit_code_buffer.cpp +++ b/src/util/jit_code_buffer.cpp @@ -44,28 +44,42 @@ bool JitCodeBuffer::Allocate(u32 size /* = 64 * 1024 * 1024 */, u32 far_code_siz m_total_size = size + far_code_size; -#if defined(_WIN32) - m_code_ptr = static_cast(VirtualAlloc(nullptr, m_total_size, MEM_COMMIT, PAGE_EXECUTE_READWRITE)); - if (!m_code_ptr) +#ifdef CPU_ARCH_X64 + // Try to find a region in 32-bit range of ourselves. + // Assume that the DuckStation binary will at max be 256MB. Therefore the max offset is + // +/- 256MB + round_up_pow2(size). This'll be 512MB for the JITs. + static const u8 base_ptr = 0; + const u8* base = + reinterpret_cast(Common::AlignDownPow2(reinterpret_cast(&base_ptr), HOST_PAGE_SIZE)); + const u32 max_displacement = 0x80000000u - Common::NextPow2(256 * 1024 * 1024 + m_total_size); + const u8* max_address = ((base + max_displacement) < base) ? + reinterpret_cast(std::numeric_limits::max()) : + (base + max_displacement); + const u8* min_address = ((base - max_displacement) > base) ? nullptr : (base - max_displacement); + const u32 step = 256 * 1024 * 1024; + const u32 steps = static_cast(max_address - min_address) / step; + for (u32 offset = 0; offset < steps; offset++) { - Log_ErrorPrintf("VirtualAlloc(RWX, %u) for internal buffer failed: %u", m_total_size, GetLastError()); - return false; + const u8* addr = max_address - (offset * step); + Log_VerboseFmt("Trying {} (base {}, offset {}, displacement 0x{:X})", static_cast(addr), + static_cast(base), offset, static_cast(addr - base)); + if (TryAllocateAt(addr)) + break; } -#elif defined(__linux__) || defined(__ANDROID__) || defined(__APPLE__) || defined(__HAIKU__) || defined(__FreeBSD__) - int flags = MAP_PRIVATE | MAP_ANONYMOUS; -#if defined(__APPLE__) && defined(__aarch64__) - // MAP_JIT and toggleable write protection is required on Apple Silicon. - flags |= MAP_JIT; -#endif - - m_code_ptr = static_cast(mmap(nullptr, m_total_size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0)); - if (!m_code_ptr) + if (m_code_ptr) { - Log_ErrorPrintf("mmap(RWX, %u) for internal buffer failed: %d", m_total_size, errno); - return false; + Log_InfoFmt("Allocated JIT buffer of size {} at {} (0x{:X} bytes away)", m_total_size, + static_cast(m_code_ptr), static_cast(m_code_ptr - base)); + } + else + { + Log_ErrorPrint("Failed to allocate JIT buffer in range, expect crashes."); + if (!TryAllocateAt(nullptr)) + return false; } #else - return false; + if (!TryAllocateAt(nullptr)) + return false; #endif m_free_code_ptr = m_code_ptr; @@ -82,6 +96,42 @@ bool JitCodeBuffer::Allocate(u32 size /* = 64 * 1024 * 1024 */, u32 far_code_siz return true; } +bool JitCodeBuffer::TryAllocateAt(const void* addr) +{ +#if defined(_WIN32) + m_code_ptr = static_cast(VirtualAlloc(const_cast(addr), m_total_size, + addr ? (MEM_RESERVE | MEM_COMMIT) : MEM_COMMIT, PAGE_EXECUTE_READWRITE)); + if (!m_code_ptr) + { + if (!addr) + Log_ErrorPrintf("VirtualAlloc(RWX, %u) for internal buffer failed: %u", m_total_size, GetLastError()); + return false; + } + + return true; +#elif defined(__linux__) || defined(__ANDROID__) || defined(__APPLE__) || defined(__HAIKU__) || defined(__FreeBSD__) + int flags = MAP_PRIVATE | MAP_ANONYMOUS; +#if defined(__APPLE__) && defined(__aarch64__) + // MAP_JIT and toggleable write protection is required on Apple Silicon. + flags |= MAP_JIT; +#endif + + m_code_ptr = + static_cast(mmap(const_cast(addr), m_total_size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0)); + if (!m_code_ptr) + { + if (!addr) + Log_ErrorPrintf("mmap(RWX, %u) for internal buffer failed: %d", m_total_size, errno); + + return false; + } + + return true; +#else + return false; +#endif +} + bool JitCodeBuffer::Initialize(void* buffer, u32 size, u32 far_code_size /* = 0 */, u32 guard_size /* = 0 */) { Destroy(); diff --git a/src/util/jit_code_buffer.h b/src/util/jit_code_buffer.h index 88c0f767a..08e586201 100644 --- a/src/util/jit_code_buffer.h +++ b/src/util/jit_code_buffer.h @@ -48,6 +48,8 @@ public: static void FlushInstructionCache(void* address, u32 size); private: + bool TryAllocateAt(const void* addr); + u8* m_code_ptr = nullptr; u8* m_free_code_ptr = nullptr; u32 m_code_size = 0;