From e3fdb08ad72762b86067bdcfb4f92a9849e51778 Mon Sep 17 00:00:00 2001 From: "Dr. Chat" Date: Thu, 18 Aug 2016 18:26:55 -0500 Subject: [PATCH] x64 JIT: Allocate constant data outside of the guest heap. --- src/xenia/cpu/backend/x64/x64_backend.cc | 9 +- src/xenia/cpu/backend/x64/x64_backend.h | 5 +- src/xenia/cpu/backend/x64/x64_emitter.cc | 199 ++++++++++++--------- src/xenia/cpu/backend/x64/x64_emitter.h | 5 +- src/xenia/cpu/backend/x64/x64_sequences.cc | 2 + 5 files changed, 127 insertions(+), 93 deletions(-) diff --git a/src/xenia/cpu/backend/x64/x64_backend.cc b/src/xenia/cpu/backend/x64/x64_backend.cc index 3bfae53ce..6c8415790 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.cc +++ b/src/xenia/cpu/backend/x64/x64_backend.cc @@ -41,7 +41,7 @@ class X64ThunkEmitter : public X64Emitter { }; X64Backend::X64Backend(Processor* processor) - : Backend(processor), code_cache_(nullptr), emitter_data_(0) { + : Backend(processor), code_cache_(nullptr) { if (cs_open(CS_ARCH_X86, CS_MODE_64, &capstone_handle_) != CS_ERR_OK) { assert_always("Failed to initialize capstone"); } @@ -51,14 +51,11 @@ X64Backend::X64Backend(Processor* processor) } X64Backend::~X64Backend() { - if (emitter_data_) { - processor()->memory()->SystemHeapFree(emitter_data_); - emitter_data_ = 0; - } if (capstone_handle_) { cs_close(&capstone_handle_); } + X64Emitter::FreeConstData(emitter_data_); ExceptionHandler::Uninstall(&ExceptionCallbackThunk, this); } @@ -114,7 +111,7 @@ bool X64Backend::Initialize() { code_cache_->CommitExecutableRange(0x9FFF0000, 0x9FFFFFFF); // Allocate emitter constant data. - emitter_data_ = X64Emitter::PlaceData(processor()->memory()); + emitter_data_ = X64Emitter::PlaceConstData(); // Setup exception callback ExceptionHandler::Install(&ExceptionCallbackThunk, this); diff --git a/src/xenia/cpu/backend/x64/x64_backend.h b/src/xenia/cpu/backend/x64/x64_backend.h index 5c060ae86..d0e9e2627 100644 --- a/src/xenia/cpu/backend/x64/x64_backend.h +++ b/src/xenia/cpu/backend/x64/x64_backend.h @@ -42,7 +42,7 @@ class X64Backend : public Backend { ~X64Backend() override; X64CodeCache* code_cache() const { return code_cache_.get(); } - uint32_t emitter_data() const { return emitter_data_; } + uintptr_t emitter_data() const { return emitter_data_; } // Call a generated function, saving all stack parameters. HostToGuestThunk host_to_guest_thunk() const { return host_to_guest_thunk_; } @@ -76,8 +76,7 @@ class X64Backend : public Backend { uintptr_t capstone_handle_ = 0; std::unique_ptr code_cache_; - - uint32_t emitter_data_; + uintptr_t emitter_data_ = 0; HostToGuestThunk host_to_guest_thunk_; GuestToHostThunk guest_to_host_thunk_; diff --git a/src/xenia/cpu/backend/x64/x64_emitter.cc b/src/xenia/cpu/backend/x64/x64_emitter.cc index 79f7f1cbd..be9b53229 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.cc +++ b/src/xenia/cpu/backend/x64/x64_emitter.cc @@ -296,7 +296,7 @@ uint64_t TrapDebugPrint(void* raw_context, uint64_t address) { XELOGD("(DebugPrint) %s", str); if (FLAGS_enable_debugprint_log) { - debugging::DebugPrint("(DebugPrint) %s\n", str); + debugging::DebugPrint("(DebugPrint) %s", str); } return 0; @@ -556,6 +556,7 @@ void X64Emitter::ReloadEDX() { // Len Assembly Byte Sequence // ============================================================================ +// 1b NOP 90H // 2b 66 NOP 66 90H // 3b NOP DWORD ptr [EAX] 0F 1F 00H // 4b NOP DWORD ptr [EAX + 00H] 0F 1F 40 00H @@ -602,90 +603,124 @@ void X64Emitter::MovMem64(const Xbyak::RegExp& addr, uint64_t v) { } } -uint32_t X64Emitter::PlaceData(Memory* memory) { - static const vec128_t xmm_consts[] = { - /* XMMZero */ vec128f(0.0f), - /* XMMOne */ vec128f(1.0f), - /* XMMNegativeOne */ vec128f(-1.0f, -1.0f, -1.0f, -1.0f), - /* XMMFFFF */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, - 0xFFFFFFFFu, 0xFFFFFFFFu), - /* XMMMaskX16Y16 */ vec128i(0x0000FFFFu, 0xFFFF0000u, - 0x00000000u, 0x00000000u), - /* XMMFlipX16Y16 */ vec128i(0x00008000u, 0x00000000u, - 0x00000000u, 0x00000000u), - /* XMMFixX16Y16 */ vec128f(-32768.0f, 0.0f, 0.0f, 0.0f), - /* XMMNormalizeX16Y16 */ vec128f( - 1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f), - /* XMM0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f), - /* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f), - /* XMM3333 */ vec128f(3.0f, 3.0f, 3.0f, 3.0f), - /* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, - 0x80000000u, 0x80000000u), - /* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, - 0x00000000u, 0x80000000u), - /* XMMAbsMaskPS */ vec128i(0x7FFFFFFFu, 0x7FFFFFFFu, - 0x7FFFFFFFu, 0x7FFFFFFFu), - /* XMMAbsMaskPD */ vec128i(0xFFFFFFFFu, 0x7FFFFFFFu, - 0xFFFFFFFFu, 0x7FFFFFFFu), - /* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, - 0x08090A0Bu, 0x0C0D0E0Fu), - /* XMMByteOrderMask */ vec128i(0x01000302u, 0x05040706u, - 0x09080B0Au, 0x0D0C0F0Eu), - /* XMMPermuteControl15 */ vec128b(15), - /* XMMPermuteByteMask */ vec128b(0x1F), - /* XMMPackD3DCOLORSat */ vec128i(0x404000FFu), - /* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, - 0xFFFFFFFFu, 0x0C000408u), - /* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF0Eu, 0xFFFFFF0Du, - 0xFFFFFF0Cu, 0xFFFFFF0Fu), - /* XMMPackFLOAT16_2 */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, - 0xFFFFFFFFu, 0x01000302u), - /* XMMUnpackFLOAT16_2 */ vec128i(0x0D0C0F0Eu, 0xFFFFFFFFu, - 0xFFFFFFFFu, 0xFFFFFFFFu), - /* XMMPackFLOAT16_4 */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, - 0x05040706u, 0x01000302u), - /* XMMUnpackFLOAT16_4 */ vec128i(0x09080B0Au, 0x0D0C0F0Eu, - 0xFFFFFFFFu, 0xFFFFFFFFu), - /* XMMPackSHORT_2Min */ vec128i(0x403F8001u), - /* XMMPackSHORT_2Max */ vec128i(0x40407FFFu), - /* XMMPackSHORT_2 */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, - 0xFFFFFFFFu, 0x01000504u), - /* XMMUnpackSHORT_2 */ vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, - 0xFFFFFFFFu, 0xFFFFFFFFu), - /* XMMOneOver255 */ vec128f(1.0f / 255.0f), - /* XMMMaskEvenPI16 */ vec128i(0x0000FFFFu, 0x0000FFFFu, - 0x0000FFFFu, 0x0000FFFFu), - /* XMMShiftMaskEvenPI16 */ vec128i(0x0000000Fu, 0x0000000Fu, - 0x0000000Fu, 0x0000000Fu), - /* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, - 0x0000001Fu, 0x0000001Fu), - /* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu, - 0x000000FFu, 0x000000FFu), - /* XMMSwapWordMask */ vec128i(0x03030303u, 0x03030303u, - 0x03030303u, 0x03030303u), - /* XMMUnsignedDwordMax */ vec128i(0xFFFFFFFFu, 0x00000000u, - 0xFFFFFFFFu, 0x00000000u), - /* XMM255 */ vec128f(255.0f), - /* XMMPI32 */ vec128i(32), - /* XMMSignMaskI8 */ vec128i(0x80808080u, 0x80808080u, - 0x80808080u, 0x80808080u), - /* XMMSignMaskI16 */ vec128i(0x80008000u, 0x80008000u, - 0x80008000u, 0x80008000u), - /* XMMSignMaskI32 */ vec128i(0x80000000u, 0x80000000u, - 0x80000000u, 0x80000000u), - /* XMMSignMaskF32 */ vec128i(0x80000000u, 0x80000000u, - 0x80000000u, 0x80000000u), - /* XMMShortMinPS */ vec128f(SHRT_MIN), - /* XMMShortMaxPS */ vec128f(SHRT_MAX), - }; - uint32_t ptr = memory->SystemHeapAlloc(sizeof(xmm_consts)); - std::memcpy(memory->TranslateVirtual(ptr), xmm_consts, sizeof(xmm_consts)); - return ptr; +static const vec128_t xmm_consts[] = { + /* XMMZero */ vec128f(0.0f), + /* XMMOne */ vec128f(1.0f), + /* XMMNegativeOne */ vec128f(-1.0f, -1.0f, -1.0f, -1.0f), + /* XMMFFFF */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, + 0xFFFFFFFFu), + /* XMMMaskX16Y16 */ vec128i(0x0000FFFFu, 0xFFFF0000u, 0x00000000u, + 0x00000000u), + /* XMMFlipX16Y16 */ vec128i(0x00008000u, 0x00000000u, 0x00000000u, + 0x00000000u), + /* XMMFixX16Y16 */ vec128f(-32768.0f, 0.0f, 0.0f, 0.0f), + /* XMMNormalizeX16Y16 */ vec128f( + 1.0f / 32767.0f, 1.0f / (32767.0f * 65536.0f), 0.0f, 0.0f), + /* XMM0001 */ vec128f(0.0f, 0.0f, 0.0f, 1.0f), + /* XMM3301 */ vec128f(3.0f, 3.0f, 0.0f, 1.0f), + /* XMM3333 */ vec128f(3.0f, 3.0f, 3.0f, 3.0f), + /* XMMSignMaskPS */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, + 0x80000000u), + /* XMMSignMaskPD */ vec128i(0x00000000u, 0x80000000u, 0x00000000u, + 0x80000000u), + /* XMMAbsMaskPS */ vec128i(0x7FFFFFFFu, 0x7FFFFFFFu, 0x7FFFFFFFu, + 0x7FFFFFFFu), + /* XMMAbsMaskPD */ vec128i(0xFFFFFFFFu, 0x7FFFFFFFu, 0xFFFFFFFFu, + 0x7FFFFFFFu), + /* XMMByteSwapMask */ vec128i(0x00010203u, 0x04050607u, 0x08090A0Bu, + 0x0C0D0E0Fu), + /* XMMByteOrderMask */ vec128i(0x01000302u, 0x05040706u, 0x09080B0Au, + 0x0D0C0F0Eu), + /* XMMPermuteControl15 */ vec128b(15), + /* XMMPermuteByteMask */ vec128b(0x1F), + /* XMMPackD3DCOLORSat */ vec128i(0x404000FFu), + /* XMMPackD3DCOLOR */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, + 0x0C000408u), + /* XMMUnpackD3DCOLOR */ vec128i(0xFFFFFF0Eu, 0xFFFFFF0Du, 0xFFFFFF0Cu, + 0xFFFFFF0Fu), + /* XMMPackFLOAT16_2 */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, + 0x01000302u), + /* XMMUnpackFLOAT16_2 */ vec128i(0x0D0C0F0Eu, 0xFFFFFFFFu, 0xFFFFFFFFu, + 0xFFFFFFFFu), + /* XMMPackFLOAT16_4 */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0x05040706u, + 0x01000302u), + /* XMMUnpackFLOAT16_4 */ vec128i(0x09080B0Au, 0x0D0C0F0Eu, 0xFFFFFFFFu, + 0xFFFFFFFFu), + /* XMMPackSHORT_2Min */ vec128i(0x403F8001u), + /* XMMPackSHORT_2Max */ vec128i(0x40407FFFu), + /* XMMPackSHORT_2 */ vec128i(0xFFFFFFFFu, 0xFFFFFFFFu, 0xFFFFFFFFu, + 0x01000504u), + /* XMMUnpackSHORT_2 */ vec128i(0xFFFF0F0Eu, 0xFFFF0D0Cu, 0xFFFFFFFFu, + 0xFFFFFFFFu), + /* XMMOneOver255 */ vec128f(1.0f / 255.0f), + /* XMMMaskEvenPI16 */ vec128i(0x0000FFFFu, 0x0000FFFFu, 0x0000FFFFu, + 0x0000FFFFu), + /* XMMShiftMaskEvenPI16 */ vec128i(0x0000000Fu, 0x0000000Fu, 0x0000000Fu, + 0x0000000Fu), + /* XMMShiftMaskPS */ vec128i(0x0000001Fu, 0x0000001Fu, 0x0000001Fu, + 0x0000001Fu), + /* XMMShiftByteMask */ vec128i(0x000000FFu, 0x000000FFu, 0x000000FFu, + 0x000000FFu), + /* XMMSwapWordMask */ vec128i(0x03030303u, 0x03030303u, 0x03030303u, + 0x03030303u), + /* XMMUnsignedDwordMax */ vec128i(0xFFFFFFFFu, 0x00000000u, 0xFFFFFFFFu, + 0x00000000u), + /* XMM255 */ vec128f(255.0f), + /* XMMPI32 */ vec128i(32), + /* XMMSignMaskI8 */ vec128i(0x80808080u, 0x80808080u, 0x80808080u, + 0x80808080u), + /* XMMSignMaskI16 */ vec128i(0x80008000u, 0x80008000u, 0x80008000u, + 0x80008000u), + /* XMMSignMaskI32 */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, + 0x80000000u), + /* XMMSignMaskF32 */ vec128i(0x80000000u, 0x80000000u, 0x80000000u, + 0x80000000u), + /* XMMShortMinPS */ vec128f(SHRT_MIN), + /* XMMShortMaxPS */ vec128f(SHRT_MAX), +}; + +// First location to try and place constants. +static const uintptr_t kConstDataLocation = 0x20000000; +static const uintptr_t kConstDataSize = sizeof(xmm_consts); + +// Increment the location by this amount for every allocation failure. +static const uintptr_t kConstDataIncrement = 0x00010000; + +// This function places constant data that is used by the emitter later on. +// Only called once and used by multiple instances of the emitter. +// +// TODO(DrChat): This should be placed in the code cache with the code, but +// doing so requires RIP-relative addressing, which is difficult to support +// given the current setup. +uintptr_t X64Emitter::PlaceConstData() { + uint8_t* ptr = reinterpret_cast(kConstDataLocation); + void* mem = nullptr; + while (!mem) { + mem = memory::AllocFixed( + ptr, xe::round_up(kConstDataSize, memory::page_size()), + memory::AllocationType::kReserveCommit, memory::PageAccess::kReadWrite); + + ptr += kConstDataIncrement; + } + + // The pointer must not be greater than 31 bits. + assert_zero(reinterpret_cast(mem) & ~0x7FFFFFFF); + std::memcpy(mem, xmm_consts, sizeof(xmm_consts)); + memory::Protect(mem, kConstDataSize, memory::PageAccess::kReadOnly, nullptr); + + return reinterpret_cast(mem); +} + +void X64Emitter::FreeConstData(uintptr_t data) { + memory::DeallocFixed(reinterpret_cast(data), 0, + memory::DeallocationType::kDecommitRelease); } Xbyak::Address X64Emitter::GetXmmConstPtr(XmmConst id) { - // Load through fixed constant table setup by PlaceData. - return ptr[rdx + backend_->emitter_data() + sizeof(vec128_t) * id]; + // Load through fixed constant table setup by PlaceConstData. + // It's important that the pointer is not signed, as it will be sign-extended. + return ptr[reinterpret_cast(backend_->emitter_data() + + sizeof(vec128_t) * id)]; } void X64Emitter::LoadConstantXmm(Xbyak::Xmm dest, const vec128_t& v) { diff --git a/src/xenia/cpu/backend/x64/x64_emitter.h b/src/xenia/cpu/backend/x64/x64_emitter.h index 110ca4315..50401ab83 100644 --- a/src/xenia/cpu/backend/x64/x64_emitter.h +++ b/src/xenia/cpu/backend/x64/x64_emitter.h @@ -114,13 +114,14 @@ class X64Emitter : public Xbyak::CodeGenerator { Processor* processor() const { return processor_; } X64Backend* backend() const { return backend_; } + static uintptr_t PlaceConstData(); + static void FreeConstData(uintptr_t data); + bool Emit(GuestFunction* function, hir::HIRBuilder* builder, uint32_t debug_info_flags, FunctionDebugInfo* debug_info, void** out_code_address, size_t* out_code_size, std::vector* out_source_map); - static uint32_t PlaceData(Memory* memory); - public: // Reserved: rsp // Scratch: rax/rcx/rdx diff --git a/src/xenia/cpu/backend/x64/x64_sequences.cc b/src/xenia/cpu/backend/x64/x64_sequences.cc index 4f659732c..83bfcd7d6 100644 --- a/src/xenia/cpu/backend/x64/x64_sequences.cc +++ b/src/xenia/cpu/backend/x64/x64_sequences.cc @@ -610,6 +610,7 @@ struct Sequence { reg_reg_fn(e, i.src2, temp); } } else if (i.src2.is_constant) { + assert_true(!i.src1.is_constant); if (i.src2.ConstantFitsIn32Reg()) { reg_const_fn(e, i.src1, static_cast(i.src2.constant())); } else { @@ -636,6 +637,7 @@ struct Sequence { reg_reg_fn(e, i.dest, i.src2, temp, true); } } else if (i.src2.is_constant) { + assert_true(!i.src1.is_constant); if (i.src2.ConstantFitsIn32Reg()) { reg_const_fn(e, i.dest, i.src1, static_cast(i.src2.constant()), false);