From f32cede08674031d0fb5b49d5a13c9dbe08d6727 Mon Sep 17 00:00:00 2001 From: Matthew Parlane Date: Sun, 14 Jun 2015 16:06:26 +1200 Subject: [PATCH] Revert "x64: build a Position-Independent Executable (PIE)" --- CMakeLists.txt | 6 - Source/Core/Common/CodeBlock.h | 4 +- Source/Core/Common/MemoryUtil.cpp | 120 +++++++++++++++--- Source/Core/Common/MemoryUtil.h | 2 +- Source/Core/Common/x64Emitter.h | 21 ++- Source/Core/Core/DSP/Jit/DSPJitUtil.cpp | 16 +-- Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 39 +++--- Source/Core/Core/PowerPC/Jit64/JitAsm.h | 2 +- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 2 +- .../PowerPC/Jit64/Jit_LoadStorePaired.cpp | 6 +- .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 4 +- .../PowerPC/Jit64Common/Jit64AsmCommon.cpp | 46 +++---- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 4 +- Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp | 2 +- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 12 -- .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 4 +- Source/Core/Core/PowerPC/JitCommon/Jit_Util.h | 10 +- .../PowerPC/JitCommon/TrampolineCache.cpp | 2 +- Source/UnitTests/Common/x64EmitterTest.cpp | 3 +- 19 files changed, 186 insertions(+), 119 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 91071aefdb..81bea32039 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,6 @@ option(USE_UPNP "Enables UPnP port mapping support" ON) option(DISABLE_WX "Disable wxWidgets (use Qt or CLI interface)" OFF) option(ENABLE_QT "Enable Qt (use the experimental Qt interface)" OFF) option(ENABLE_PCH "Use PCH to speed up compilation" ON) -option(ENABLE_PIE "Build a Position-Independent Executable (PIE)" ON) option(ENABLE_LTO "Enables Link Time Optimization" OFF) option(ENABLE_GENERIC "Enables generic build that should run on any little-endian host" OFF) if(APPLE) @@ -215,11 +214,6 @@ if(UNIX AND NOT APPLE) check_and_add_flag(VISIBILITY_HIDDEN -fvisibility=hidden) endif() -if(ENABLE_PIE) - add_definitions(-fPIE) - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie") -endif() - if(ENABLE_LTO) check_and_add_flag(LTO -flto) if(CMAKE_CXX_COMPILER_ID STREQUAL GNU) diff --git a/Source/Core/Common/CodeBlock.h b/Source/Core/Common/CodeBlock.h index ed1f464904..972fc707d6 100644 --- a/Source/Core/Common/CodeBlock.h +++ b/Source/Core/Common/CodeBlock.h @@ -28,10 +28,10 @@ public: virtual ~CodeBlock() { if (region) FreeCodeSpace(); } // Call this before you generate any code. - void AllocCodeSpace(int size, void* hint = nullptr) + void AllocCodeSpace(int size) { region_size = size; - region = (u8*)AllocateExecutableMemory(region_size, hint); + region = (u8*)AllocateExecutableMemory(region_size); T::SetCodePtr(region); } diff --git a/Source/Core/Common/MemoryUtil.cpp b/Source/Core/Common/MemoryUtil.cpp index e4563e3409..510b258b09 100644 --- a/Source/Core/Common/MemoryUtil.cpp +++ b/Source/Core/Common/MemoryUtil.cpp @@ -27,29 +27,71 @@ #endif #endif -void* AllocateExecutableMemory(size_t size, void* map_hint) +// Valgrind doesn't support MAP_32BIT. +// Uncomment the following line to be able to run Dolphin in Valgrind. +//#undef MAP_32BIT + +#if !defined(_WIN32) && defined(_M_X86_64) && !defined(MAP_32BIT) +#include +#define PAGE_MASK (getpagesize() - 1) +#define round_page(x) ((((unsigned long)(x)) + PAGE_MASK) & ~(PAGE_MASK)) +#endif + +// This is purposely not a full wrapper for virtualalloc/mmap, but it +// provides exactly the primitive operations that Dolphin needs. + +void* AllocateExecutableMemory(size_t size, bool low) { #if defined(_WIN32) void* ptr = VirtualAlloc(0, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE); #else + static char *map_hint = nullptr; +#if defined(_M_X86_64) && !defined(MAP_32BIT) + // This OS has no flag to enforce allocation below the 4 GB boundary, + // but if we hint that we want a low address it is very likely we will + // get one. + // An older version of this code used MAP_FIXED, but that has the side + // effect of discarding already mapped pages that happen to be in the + // requested virtual memory range (such as the emulated RAM, sometimes). + if (low && (!map_hint)) + map_hint = (char*)round_page(512*1024*1024); /* 0.5 GB rounded up to the next page */ +#endif void* ptr = mmap(map_hint, size, PROT_READ | PROT_WRITE | PROT_EXEC, - MAP_ANON | MAP_PRIVATE, -1, 0); + MAP_ANON | MAP_PRIVATE +#if defined(_M_X86_64) && defined(MAP_32BIT) + | (low ? MAP_32BIT : 0) +#endif + , -1, 0); #endif /* defined(_WIN32) */ + // printf("Mapped executable memory at %p (size %ld)\n", ptr, + // (unsigned long)size); + #ifdef _WIN32 if (ptr == nullptr) + { #else if (ptr == MAP_FAILED) -#endif { ptr = nullptr; - PanicAlert("Failed to allocate executable memory."); +#endif + PanicAlert("Failed to allocate executable memory. If you are running Dolphin in Valgrind, try '#undef MAP_32BIT'."); } +#if !defined(_WIN32) && defined(_M_X86_64) && !defined(MAP_32BIT) + else + { + if (low) + { + map_hint += size; + map_hint = (char*)round_page(map_hint); /* round up to the next page */ + // printf("Next map will (hopefully) be at %p\n", map_hint); + } + } +#endif -#ifdef _X86_64 - ptrdiff_t ofs = (u8*)ptr - (u8*)map_hint; - if (ofs < -0x80000000ll || ofs + size > 0x80000000ll) - PanicAlert("Executable range can't be used for RIP-relative addressing."); +#if _M_X86_64 + if ((u64)ptr >= 0x80000000 && low == true) + PanicAlert("Executable memory ended up above 2GB!"); #endif return ptr; @@ -75,12 +117,18 @@ void* AllocateMemoryPages(size_t size) void* AllocateAlignedMemory(size_t size, size_t alignment) { - void* ptr = nullptr; #ifdef _WIN32 - if (!(ptr = _aligned_malloc(size, alignment))) + void* ptr = _aligned_malloc(size, alignment); #else - if (posix_memalign(&ptr, alignment, size)) + void* ptr = nullptr; + if (posix_memalign(&ptr, alignment, size) != 0) + ERROR_LOG(MEMMAP, "Failed to allocate aligned memory"); #endif + + // printf("Mapped memory at %p (size %ld)\n", ptr, + // (unsigned long)size); + + if (ptr == nullptr) PanicAlert("Failed to allocate aligned memory"); return ptr; @@ -88,12 +136,23 @@ void* AllocateAlignedMemory(size_t size, size_t alignment) void FreeMemoryPages(void* ptr, size_t size) { + if (ptr) + { + bool error_occurred = false; + #ifdef _WIN32 - if (ptr && !VirtualFree(ptr, 0, MEM_RELEASE)) + if (!VirtualFree(ptr, 0, MEM_RELEASE)) + error_occurred = true; #else - if (ptr && munmap(ptr, size)) + int retval = munmap(ptr, size); + + if (retval != 0) + error_occurred = true; #endif - PanicAlert("FreeMemoryPages failed!\n%s", GetLastErrorMsg().c_str()); + + if (error_occurred) + PanicAlert("FreeMemoryPages failed!\n%s", GetLastErrorMsg().c_str()); + } } void FreeAlignedMemory(void* ptr) @@ -110,34 +169,58 @@ void FreeAlignedMemory(void* ptr) void ReadProtectMemory(void* ptr, size_t size) { + bool error_occurred = false; + #ifdef _WIN32 DWORD oldValue; if (!VirtualProtect(ptr, size, PAGE_NOACCESS, &oldValue)) + error_occurred = true; #else - if (mprotect(ptr, size, PROT_NONE)) + int retval = mprotect(ptr, size, PROT_NONE); + + if (retval != 0) + error_occurred = true; #endif + + if (error_occurred) PanicAlert("ReadProtectMemory failed!\n%s", GetLastErrorMsg().c_str()); } void WriteProtectMemory(void* ptr, size_t size, bool allowExecute) { + bool error_occurred = false; + #ifdef _WIN32 DWORD oldValue; if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READ : PAGE_READONLY, &oldValue)) + error_occurred = true; #else - if (mprotect(ptr, size, PROT_READ | (allowExecute ? PROT_EXEC : 0))) + int retval = mprotect(ptr, size, allowExecute ? (PROT_READ | PROT_EXEC) : PROT_READ); + + if (retval != 0) + error_occurred = true; #endif + + if (error_occurred) PanicAlert("WriteProtectMemory failed!\n%s", GetLastErrorMsg().c_str()); } void UnWriteProtectMemory(void* ptr, size_t size, bool allowExecute) { + bool error_occurred = false; + #ifdef _WIN32 DWORD oldValue; if (!VirtualProtect(ptr, size, allowExecute ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE, &oldValue)) + error_occurred = true; #else - if (mprotect(ptr, size, PROT_READ | PROT_WRITE | (allowExecute ? PROT_EXEC : 0))) + int retval = mprotect(ptr, size, allowExecute ? (PROT_READ | PROT_WRITE | PROT_EXEC) : PROT_WRITE | PROT_READ); + + if (retval != 0) + error_occurred = true; #endif + + if (error_occurred) PanicAlert("UnWriteProtectMemory failed!\n%s", GetLastErrorMsg().c_str()); } @@ -153,8 +236,7 @@ std::string MemUsage() // Print information about the memory usage of the process. hProcess = OpenProcess(PROCESS_QUERY_INFORMATION | PROCESS_VM_READ, FALSE, processID); - if (nullptr == hProcess) - return "MemUsage Error"; + if (nullptr == hProcess) return "MemUsage Error"; if (GetProcessMemoryInfo(hProcess, &pmc, sizeof(pmc))) Ret = StringFromFormat("%s K", ThousandSeparate(pmc.WorkingSetSize / 1024, 7).c_str()); diff --git a/Source/Core/Common/MemoryUtil.h b/Source/Core/Common/MemoryUtil.h index 248937611d..e986069d24 100644 --- a/Source/Core/Common/MemoryUtil.h +++ b/Source/Core/Common/MemoryUtil.h @@ -7,7 +7,7 @@ #include #include -void* AllocateExecutableMemory(size_t size, void* map_hint); +void* AllocateExecutableMemory(size_t size, bool low = true); void* AllocateMemoryPages(size_t size); void FreeMemoryPages(void* ptr, size_t size); void* AllocateAlignedMemory(size_t size,size_t alignment); diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index c62254aec4..7e3b64f49e 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -218,17 +218,17 @@ inline OpArg M(const T* ptr) {return OpArg((u64)(const void*)ptr, (int)SCALE_ inline OpArg R(X64Reg value) {return OpArg(0, SCALE_NONE, value);} inline OpArg MatR(X64Reg value) {return OpArg(0, SCALE_ATREG, value);} -inline OpArg MDisp(X64Reg value, ptrdiff_t offset) +inline OpArg MDisp(X64Reg value, int offset) { - return OpArg(offset, SCALE_ATREG, value); + return OpArg((u32)offset, SCALE_ATREG, value); } -inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, ptrdiff_t offset) +inline OpArg MComplex(X64Reg base, X64Reg scaled, int scale, int offset) { return OpArg(offset, scale, base, scaled); } -inline OpArg MScaled(X64Reg scaled, int scale, ptrdiff_t offset) +inline OpArg MScaled(X64Reg scaled, int scale, int offset) { if (scale == SCALE_1) return OpArg(offset, SCALE_ATREG, scaled); @@ -247,10 +247,17 @@ inline OpArg Imm32(u32 imm) {return OpArg(imm, SCALE_IMM32);} inline OpArg Imm64(u64 imm) {return OpArg(imm, SCALE_IMM64);} inline OpArg ImmPtr(const void* imm) {return Imm64((u64)imm);} -inline bool FitsInS32(const ptrdiff_t distance) +inline u32 PtrOffset(const void* ptr, const void* base) { - return distance < 0x80000000LL && - distance >= -0x80000000LL; + s64 distance = (s64)ptr-(s64)base; + if (distance >= 0x80000000LL || + distance < -0x80000000LL) + { + _assert_msg_(DYNA_REC, 0, "pointer offset out of range"); + return 0; + } + + return (u32)distance; } //usage: int a[]; ARRAY_OFFSET(a,10) diff --git a/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp b/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp index 64761455e3..51b0ffc7d0 100644 --- a/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp +++ b/Source/Core/Core/DSP/Jit/DSPJitUtil.cpp @@ -20,16 +20,14 @@ void DSPEmitter::dsp_reg_stack_push(int stack_reg) AND(8, R(AL), Imm8(DSP_STACK_MASK)); MOV(8, M(&g_dsp.reg_stack_ptr[stack_reg]), R(AL)); - X64Reg tmp1, tmp2; + X64Reg tmp1; gpr.getFreeXReg(tmp1); - gpr.getFreeXReg(tmp2); //g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]] = g_dsp.r[DSP_REG_ST0 + stack_reg]; MOV(16, R(tmp1), M(&g_dsp.r.st[stack_reg])); MOVZX(64, 8, RAX, R(AL)); - MOV(64, R(tmp2), ImmPtr(g_dsp.reg_stack[stack_reg])); - MOV(16, MComplex(tmp2, EAX, SCALE_2, 0), R(tmp1)); + MOV(16, MComplex(EAX, EAX, 1, + PtrOffset(&g_dsp.reg_stack[stack_reg][0],nullptr)), R(tmp1)); gpr.putXReg(tmp1); - gpr.putXReg(tmp2); } //clobbers: @@ -39,15 +37,13 @@ void DSPEmitter::dsp_reg_stack_pop(int stack_reg) { //g_dsp.r[DSP_REG_ST0 + stack_reg] = g_dsp.reg_stack[stack_reg][g_dsp.reg_stack_ptr[stack_reg]]; MOV(8, R(AL), M(&g_dsp.reg_stack_ptr[stack_reg])); - X64Reg tmp1, tmp2; + X64Reg tmp1; gpr.getFreeXReg(tmp1); - gpr.getFreeXReg(tmp2); MOVZX(64, 8, RAX, R(AL)); - MOV(64, R(tmp2), ImmPtr(g_dsp.reg_stack[stack_reg])); - MOV(16, R(tmp1), MComplex(tmp2, EAX, SCALE_2, 0)); + MOV(16, R(tmp1), MComplex(EAX, EAX, 1, + PtrOffset(&g_dsp.reg_stack[stack_reg][0],nullptr))); MOV(16, M(&g_dsp.r.st[stack_reg]), R(tmp1)); gpr.putXReg(tmp1); - gpr.putXReg(tmp2); //g_dsp.reg_stack_ptr[stack_reg]--; //g_dsp.reg_stack_ptr[stack_reg] &= DSP_STACK_MASK; diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index 7bdcab2b43..f9384b2df6 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -40,7 +40,7 @@ void Jit64AsmRoutineManager::Generate() // Two statically allocated registers. //MOV(64, R(RMEM), Imm64((u64)Memory::physical_base)); - MOV(64, R(RPPCSTATE), ImmPtr(PPCSTATE_BASE)); + MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80)); const u8* outerLoop = GetCodePtr(); ABI_PushRegistersAndAdjustStack({}, 0); @@ -103,9 +103,9 @@ void Jit64AsmRoutineManager::Generate() // optimizations safe, because IR and DR are usually set/cleared together. // TODO: Branching based on the 20 most significant bits of instruction // addresses without translating them is wrong. - u8* icache = jit->GetBlockCache()->iCache.data(); - u8* icacheVmem = jit->GetBlockCache()->iCacheVMEM.data(); - u8* icacheEx = jit->GetBlockCache()->iCacheEx.data(); + u64 icache = (u64)jit->GetBlockCache()->iCache.data(); + u64 icacheVmem = (u64)jit->GetBlockCache()->iCacheVMEM.data(); + u64 icacheEx = (u64)jit->GetBlockCache()->iCacheEx.data(); u32 mask = 0; FixupBranch no_mem; FixupBranch exit_mem; @@ -117,13 +117,13 @@ void Jit64AsmRoutineManager::Generate() no_mem = J_CC(CC_NZ); AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK)); - if (FitsInS32(PPCSTATE_OFS(icache))) + if (icache <= INT_MAX) { - MOV(32, R(RSCRATCH), MPIC(icache, RSCRATCH)); + MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icache)); } else { - MOV(64, R(RSCRATCH2), ImmPtr(icache)); + MOV(64, R(RSCRATCH2), Imm64(icache)); MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH)); } @@ -132,14 +132,13 @@ void Jit64AsmRoutineManager::Generate() TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_VMEM_BIT)); FixupBranch no_vmem = J_CC(CC_Z); AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK)); - - if (FitsInS32(PPCSTATE_OFS(icacheVmem))) + if (icacheVmem <= INT_MAX) { - MOV(32, R(RSCRATCH), MPIC(icacheVmem, RSCRATCH)); + MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheVmem)); } else { - MOV(64, R(RSCRATCH2), ImmPtr(icacheVmem)); + MOV(64, R(RSCRATCH2), Imm64(icacheVmem)); MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH)); } @@ -150,13 +149,14 @@ void Jit64AsmRoutineManager::Generate() TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_EXRAM_BIT)); FixupBranch no_exram = J_CC(CC_Z); AND(32, R(RSCRATCH), Imm32(JIT_ICACHEEX_MASK)); - if (FitsInS32(PPCSTATE_OFS(icacheEx))) + + if (icacheEx <= INT_MAX) { - MOV(32, R(RSCRATCH), MPIC(icacheEx, RSCRATCH)); + MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheEx)); } else { - MOV(64, R(RSCRATCH2), ImmPtr(icacheEx)); + MOV(64, R(RSCRATCH2), Imm64(icacheEx)); MOV(32, R(RSCRATCH), MRegSum(RSCRATCH2, RSCRATCH)); } @@ -169,17 +169,16 @@ void Jit64AsmRoutineManager::Generate() TEST(32, R(RSCRATCH), R(RSCRATCH)); FixupBranch notfound = J_CC(CC_L); //grab from list and jump to it - const u8** codePointers = jit->GetBlockCache()->GetCodePointers(); - if (FitsInS32(PPCSTATE_OFS(codePointers))) + u64 codePointers = (u64)jit->GetBlockCache()->GetCodePointers(); + if (codePointers <= INT_MAX) { - JMPptr(MPIC(codePointers, RSCRATCH, SCALE_8)); + JMPptr(MScaled(RSCRATCH, SCALE_8, (s32)codePointers)); } else { - MOV(64, R(RSCRATCH2), ImmPtr(codePointers)); + MOV(64, R(RSCRATCH2), Imm64(codePointers)); JMPptr(MComplex(RSCRATCH2, RSCRATCH, SCALE_8, 0)); } - SetJumpTarget(notfound); //Ok, no block, let's jit @@ -272,7 +271,7 @@ void Jit64AsmRoutineManager::GenerateCommon() CMP(32, R(ABI_PARAM2), Imm32(0xCC008000)); FixupBranch skip_fast_write = J_CC(CC_NE, false); MOV(32, RSCRATCH, M(&m_gatherPipeCount)); - MOV(8, MPIC(&m_gatherPipe, RSCRATCH), ABI_PARAM1); + MOV(8, MDisp(RSCRATCH, (u32)&m_gatherPipe), ABI_PARAM1); ADD(32, 1, M(&m_gatherPipeCount)); RET(); SetJumpTarget(skip_fast_write); diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.h b/Source/Core/Core/PowerPC/Jit64/JitAsm.h index 6b0ae28ebe..8c33c8bace 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.h +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.h @@ -34,7 +34,7 @@ public: m_stack_top = stack_top; // NOTE: When making large additions to the AsmCommon code, you might // want to ensure this number is big enough. - AllocCodeSpace(16384, PPCSTATE_BASE); + AllocCodeSpace(16384); Generate(); WriteProtect(); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 00d8fd52b3..407a39b867 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -45,7 +45,7 @@ void Jit64::GenerateOverflow() //rare). static const u8 ovtable[4] = {0, 0, XER_SO_MASK, XER_SO_MASK}; MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_so_ov)); - MOV(8, R(RSCRATCH), MPIC(ovtable, RSCRATCH)); + MOV(8, R(RSCRATCH), MDisp(RSCRATCH, (u32)(u64)ovtable)); MOV(8, PPCSTATE(xer_so_ov), R(RSCRATCH)); SetJumpTarget(exit); } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index 824e7f96e6..4d875323eb 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -132,13 +132,13 @@ void Jit64::psq_stXX(UGeckoInstruction inst) { // One value CVTSD2SS(XMM0, fpr.R(s)); - CALLptr(MPIC(asm_routines.singleStoreQuantized, RSCRATCH, SCALE_8)); + CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.singleStoreQuantized)); } else { // Pair of values CVTPD2PS(XMM0, fpr.R(s)); - CALLptr(MPIC(asm_routines.pairedStoreQuantized, RSCRATCH, SCALE_8)); + CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)asm_routines.pairedStoreQuantized)); } if (update && jo.memcheck) @@ -306,7 +306,7 @@ void Jit64::psq_lXX(UGeckoInstruction inst) AND(32, R(RSCRATCH2), gqr); MOVZX(32, 8, RSCRATCH, R(RSCRATCH2)); - CALLptr(MPIC(&asm_routines.pairedLoadQuantized[w * 8], RSCRATCH, SCALE_8)); + CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(&asm_routines.pairedLoadQuantized[w * 8]))); MemoryExceptionCheck(); CVTPS2PD(fpr.RX(s), R(XMM0)); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index e798237311..d7c56fbe10 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -458,7 +458,7 @@ void Jit64::mtcrf(UGeckoInstruction inst) SHR(32, R(RSCRATCH), Imm8(28 - (i * 4))); if (i != 0) AND(32, R(RSCRATCH), Imm8(0xF)); - MOV(64, R(RSCRATCH), MPIC(m_crTable, RSCRATCH, SCALE_8)); + MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable)); MOV(64, PPCSTATE(cr_val[i]), R(RSCRATCH)); } } @@ -493,7 +493,7 @@ void Jit64::mcrxr(UGeckoInstruction inst) // [SO OV CA 0] << 3 SHL(32, R(RSCRATCH), Imm8(4)); - MOV(64, R(RSCRATCH), MPIC(m_crTable, RSCRATCH)); + MOV(64, R(RSCRATCH), MDisp(RSCRATCH, (u32)(u64)m_crTable)); MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH)); // Clear XER[0-3] diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index d7fb70f5fe..64adf9b4de 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -24,8 +24,10 @@ void CommonAsmRoutines::GenFifoWrite(int size) const void* start = GetCodePtr(); // Assume value in RSCRATCH + u32 gather_pipe = (u32)(u64)GPFifo::m_gatherPipe; + _assert_msg_(DYNA_REC, gather_pipe <= 0x7FFFFFFF, "Gather pipe not in low 2GB of memory!"); MOV(32, R(RSCRATCH2), M(&GPFifo::m_gatherPipeCount)); - SwapAndStore(size, MPIC(GPFifo::m_gatherPipe, RSCRATCH2), RSCRATCH); + SwapAndStore(size, MDisp(RSCRATCH2, gather_pipe), RSCRATCH); ADD(32, R(RSCRATCH2), Imm8(size >> 3)); MOV(32, M(&GPFifo::m_gatherPipeCount), R(RSCRATCH2)); RET(); @@ -66,8 +68,8 @@ void CommonAsmRoutines::GenFrsqrte() SHR(64, R(RSCRATCH), Imm8(37)); AND(32, R(RSCRATCH), Imm32(0x7FF)); - IMUL(32, RSCRATCH, MPIC(MathUtil::frsqrte_expected_dec, RSCRATCH_EXTRA, SCALE_4)); - MOV(32, R(RSCRATCH_EXTRA), MPIC(MathUtil::frsqrte_expected_base, RSCRATCH_EXTRA, SCALE_4)); + IMUL(32, RSCRATCH, MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_dec)); + MOV(32, R(RSCRATCH_EXTRA), MScaled(RSCRATCH_EXTRA, SCALE_4, (u32)(u64)MathUtil::frsqrte_expected_base)); SUB(32, R(RSCRATCH_EXTRA), R(RSCRATCH)); SHL(64, R(RSCRATCH_EXTRA), Imm8(26)); OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(frsqrte_expected_base[index] - frsqrte_expected_dec[index] * (i % 2048)) << 26; @@ -134,11 +136,11 @@ void CommonAsmRoutines::GenFres() AND(32, R(RSCRATCH), Imm32(0x3FF)); // i % 1024 AND(32, R(RSCRATCH2), Imm8(0x1F)); // i / 1024 - IMUL(32, RSCRATCH, MPIC(MathUtil::fres_expected_dec, RSCRATCH2, SCALE_4)); + IMUL(32, RSCRATCH, MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_dec)); ADD(32, R(RSCRATCH), Imm8(1)); SHR(32, R(RSCRATCH), Imm8(1)); - MOV(32, R(RSCRATCH2), MPIC(MathUtil::fres_expected_base, RSCRATCH2, SCALE_4)); + MOV(32, R(RSCRATCH2), MScaled(RSCRATCH2, SCALE_4, (u32)(u64)MathUtil::fres_expected_base)); SUB(32, R(RSCRATCH2), R(RSCRATCH)); SHL(64, R(RSCRATCH2), Imm8(29)); OR(64, R(RSCRATCH2), R(RSCRATCH_EXTRA)); // vali |= (s64)(fres_expected_base[i / 1024] - (fres_expected_dec[i / 1024] * (i % 1024) + 1) / 2) << 29 @@ -197,7 +199,7 @@ void CommonAsmRoutines::GenMfcr() // SO: Bit 61 set; set flag bit 0 // LT: Bit 62 set; set flag bit 3 SHR(64, R(cr_val), Imm8(61)); - OR(32, R(dst), MPIC(m_flagTable, cr_val, SCALE_4)); + OR(32, R(dst), MScaled(cr_val, SCALE_4, (u32)(u64)m_flagTable)); } RET(); @@ -245,7 +247,7 @@ void CommonAsmRoutines::GenQuantizedStores() const u8* storePairedU8 = AlignCode4(); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MPIC(m_quantizeTableS, RSCRATCH2)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); MULPS(XMM0, R(XMM1)); #ifdef QUANTIZE_OVERFLOW_SAFE MINPS(XMM0, M(m_65535)); @@ -260,7 +262,7 @@ void CommonAsmRoutines::GenQuantizedStores() const u8* storePairedS8 = AlignCode4(); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MPIC(m_quantizeTableS, RSCRATCH2)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); MULPS(XMM0, R(XMM1)); #ifdef QUANTIZE_OVERFLOW_SAFE MINPS(XMM0, M(m_65535)); @@ -276,7 +278,7 @@ void CommonAsmRoutines::GenQuantizedStores() const u8* storePairedU16 = AlignCode4(); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MPIC(m_quantizeTableS, RSCRATCH2)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); MULPS(XMM0, R(XMM1)); if (cpu_info.bSSE4_1) @@ -308,7 +310,7 @@ void CommonAsmRoutines::GenQuantizedStores() const u8* storePairedS16 = AlignCode4(); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MPIC(m_quantizeTableS, RSCRATCH2)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); MULPS(XMM0, R(XMM1)); #ifdef QUANTIZE_OVERFLOW_SAFE MINPS(XMM0, M(m_65535)); @@ -353,7 +355,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() const u8* storeSingleU8 = AlignCode4(); // Used by MKWii SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MPIC(m_quantizeTableS, RSCRATCH2)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); XORPS(XMM1, R(XMM1)); MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M(&m_255)); @@ -363,7 +365,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() const u8* storeSingleS8 = AlignCode4(); SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MPIC(m_quantizeTableS, RSCRATCH2)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); MAXSS(XMM0, M(&m_m128)); MINSS(XMM0, M(&m_127)); CVTTSS2SI(RSCRATCH, R(XMM0)); @@ -372,7 +374,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() const u8* storeSingleU16 = AlignCode4(); // Used by MKWii SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MPIC(m_quantizeTableS, RSCRATCH2)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); XORPS(XMM1, R(XMM1)); MAXSS(XMM0, R(XMM1)); MINSS(XMM0, M(m_65535)); @@ -382,7 +384,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores() const u8* storeSingleS16 = AlignCode4(); SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MPIC(m_quantizeTableS, RSCRATCH2)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_quantizeTableS)); MAXSS(XMM0, M(&m_m32768)); MINSS(XMM0, M(&m_32767)); CVTTSS2SI(RSCRATCH, R(XMM0)); @@ -482,7 +484,7 @@ void CommonAsmRoutines::GenQuantizedLoads() } CVTDQ2PS(XMM0, R(XMM0)); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MPIC(m_dequantizeTableS, RSCRATCH2)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); MULPS(XMM0, R(XMM1)); RET(); @@ -493,7 +495,7 @@ void CommonAsmRoutines::GenQuantizedLoads() UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0); // RSCRATCH_EXTRA = 0x000000xx CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MPIC(m_dequantizeTableS, RSCRATCH2)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); UNPCKLPS(XMM0, M(m_one)); RET(); @@ -521,7 +523,7 @@ void CommonAsmRoutines::GenQuantizedLoads() } CVTDQ2PS(XMM0, R(XMM0)); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MPIC(m_dequantizeTableS, RSCRATCH2)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); MULPS(XMM0, R(XMM1)); RET(); @@ -532,7 +534,7 @@ void CommonAsmRoutines::GenQuantizedLoads() UnsafeLoadRegToRegNoSwap(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 8, 0, true); CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MPIC(m_dequantizeTableS, RSCRATCH2)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); UNPCKLPS(XMM0, M(m_one)); RET(); @@ -555,7 +557,7 @@ void CommonAsmRoutines::GenQuantizedLoads() } CVTDQ2PS(XMM0, R(XMM0)); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MPIC(m_dequantizeTableS, RSCRATCH2)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); MULPS(XMM0, R(XMM1)); RET(); @@ -566,7 +568,7 @@ void CommonAsmRoutines::GenQuantizedLoads() UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, false); CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MPIC(m_dequantizeTableS, RSCRATCH2)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); UNPCKLPS(XMM0, M(m_one)); RET(); @@ -588,7 +590,7 @@ void CommonAsmRoutines::GenQuantizedLoads() } CVTDQ2PS(XMM0, R(XMM0)); SHR(32, R(RSCRATCH2), Imm8(5)); - MOVQ_xmm(XMM1, MPIC(m_dequantizeTableS, RSCRATCH2)); + MOVQ_xmm(XMM1, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); MULPS(XMM0, R(XMM1)); RET(); @@ -599,7 +601,7 @@ void CommonAsmRoutines::GenQuantizedLoads() UnsafeLoadRegToReg(RSCRATCH_EXTRA, RSCRATCH_EXTRA, 16, 0, true); CVTSI2SS(XMM0, R(RSCRATCH_EXTRA)); SHR(32, R(RSCRATCH2), Imm8(5)); - MULSS(XMM0, MPIC(m_dequantizeTableS, RSCRATCH2)); + MULSS(XMM0, MDisp(RSCRATCH2, (u32)(u64)m_dequantizeTableS)); UNPCKLPS(XMM0, M(m_one)); RET(); diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 51763de8a9..17007be31f 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -1611,7 +1611,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->OR(32, R(RSCRATCH), Imm8(w << 3)); Jit->MOV(32, R(RSCRATCH_EXTRA), regLocForInst(RI, getOp1(I))); - Jit->CALLptr(MPIC(Jit->asm_routines.pairedLoadQuantized, RSCRATCH, SCALE_8)); + Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(Jit->asm_routines.pairedLoadQuantized))); Jit->MOVAPD(reg, R(XMM0)); RI.fregs[reg] = I; regNormalRegClear(RI, I); @@ -1669,7 +1669,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->MOV(32, R(RSCRATCH_EXTRA), regLocForInst(RI, getOp2(I))); Jit->MOVAPD(XMM0, fregLocForInst(RI, getOp1(I))); - Jit->CALLptr(MPIC(Jit->asm_routines.pairedStoreQuantized, RSCRATCH, SCALE_8)); + Jit->CALLptr(MScaled(RSCRATCH, SCALE_8, (u32)(u64)(Jit->asm_routines.pairedStoreQuantized))); if (RI.IInfo[I - RI.FirstI] & 4) fregClearInst(RI, getOp1(I)); if (RI.IInfo[I - RI.FirstI] & 8) diff --git a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp index e42001077d..0c82b234a7 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/JitIL.cpp @@ -250,7 +250,7 @@ void JitIL::Init() UpdateMemoryOptions(); trampolines.Init(jo.memcheck ? TRAMPOLINE_CODE_SIZE_MMU : TRAMPOLINE_CODE_SIZE); - AllocCodeSpace(CODE_SIZE, PPCSTATE_BASE); + AllocCodeSpace(CODE_SIZE); blocks.Init(); asm_routines.Init(nullptr); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index c699d00591..366a7fbcd6 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -43,18 +43,6 @@ // to address as much as possible in a one-byte offset form. #define RPPCSTATE RBP -namespace Gen -{ - -inline OpArg MPIC(const void* address, X64Reg scale_reg, int scale = SCALE_1) -{ - ptrdiff_t offset = PPCSTATE_OFS(address); - _dbg_assert_(DYNA_REC, FitsInS32(offset)); - return MComplex(RPPCSTATE, scale_reg, scale, offset); -} - -} - // Use these to control the instruction selection // #define INSTRUCTION_START FallBackToInterpreter(inst); return; // #define INSTRUCTION_START PPCTables::CountInstruction(inst); diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index bc4e00bdb8..bca1c1faed 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -173,11 +173,11 @@ private: u32 all_ones = (1ULL << sbits) - 1; if ((all_ones & mask) == all_ones) { - MoveOpArgToReg(sbits, MatR(RSCRATCH)); + MoveOpArgToReg(sbits, MDisp(RSCRATCH, 0)); } else { - m_code->MOVZX(32, sbits, m_dst_reg, MatR(RSCRATCH)); + m_code->MOVZX(32, sbits, m_dst_reg, MDisp(RSCRATCH, 0)); m_code->AND(32, R(m_dst_reg), Imm32(mask)); if (m_sign_extend) m_code->MOVSX(32, sbits, m_dst_reg, R(m_dst_reg)); diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index 5dc67d9f3c..21d19a88f6 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -9,15 +9,15 @@ #include "Common/BitSet.h" #include "Common/CPUDetect.h" #include "Common/x64Emitter.h" -#include "Core/PowerPC/PowerPC.h" namespace MMIO { class Mapping; } // We offset by 0x80 because the range of one byte memory offsets is // -0x80..0x7f. -#define PPCSTATE_BASE ((u8*)&PowerPC::ppcState + 0x80) -#define PPCSTATE_OFS(x) ((u8*)(x) - PPCSTATE_BASE) -#define PPCSTATE(x) MDisp(RPPCSTATE, PPCSTATE_OFS(&PowerPC::ppcState.x)) +#define PPCSTATE(x) MDisp(RPPCSTATE, \ + (int) ((char *) &PowerPC::ppcState.x - (char *) &PowerPC::ppcState) - 0x80) +// In case you want to disable the ppcstate register: +// #define PPCSTATE(x) M(&PowerPC::ppcState.x) #define PPCSTATE_LR PPCSTATE(spr[SPR_LR]) #define PPCSTATE_CTR PPCSTATE(spr[SPR_CTR]) #define PPCSTATE_SRR0 PPCSTATE(spr[SPR_SRR0]) @@ -31,7 +31,7 @@ private: bool m_enabled = false; public: bool Enabled() { return m_enabled; } - void Init(int size) { AllocCodeSpace(size, PPCSTATE_BASE); m_enabled = true; } + void Init(int size) { AllocCodeSpace(size); m_enabled = true; } void Shutdown() { FreeCodeSpace(); m_enabled = false; } }; diff --git a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp index 0a02764142..f9b7e219e2 100644 --- a/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/TrampolineCache.cpp @@ -22,7 +22,7 @@ using namespace Gen; void TrampolineCache::Init(int size) { - AllocCodeSpace(size, PPCSTATE_BASE); + AllocCodeSpace(size); } void TrampolineCache::ClearCodeSpace() diff --git a/Source/UnitTests/Common/x64EmitterTest.cpp b/Source/UnitTests/Common/x64EmitterTest.cpp index 6659542dee..766fc0436c 100644 --- a/Source/UnitTests/Common/x64EmitterTest.cpp +++ b/Source/UnitTests/Common/x64EmitterTest.cpp @@ -19,7 +19,6 @@ #include "Common/CPUDetect.h" #include "Common/x64Emitter.h" -#include "Core/PowerPC/JitCommon/Jit_Util.h" namespace Gen { @@ -95,7 +94,7 @@ protected: memset(&cpu_info, 0xFF, sizeof (cpu_info)); emitter.reset(new X64CodeBlock()); - emitter->AllocCodeSpace(4096, PPCSTATE_BASE); + emitter->AllocCodeSpace(4096); code_buffer = emitter->GetWritableCodePtr(); disasm.reset(new disassembler);