diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index 34965a6a24..7db8beac51 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -120,28 +120,29 @@ void XEmitter::ReserveCodeSpace(int bytes) *code++ = 0xCC; } +const u8* XEmitter::AlignCodeTo(size_t alignment) +{ + _assert_msg_(DYNA_REC, alignment != 0 && (alignment & (alignment - 1)) == 0, + "Alignment must be power of two"); + u64 c = reinterpret_cast(code) & (alignment - 1); + if (c) + ReserveCodeSpace(static_cast(alignment - c)); + return code; +} + const u8* XEmitter::AlignCode4() { - int c = int((u64)code & 3); - if (c) - ReserveCodeSpace(4 - c); - return code; + return AlignCodeTo(4); } const u8* XEmitter::AlignCode16() { - int c = int((u64)code & 15); - if (c) - ReserveCodeSpace(16 - c); - return code; + return AlignCodeTo(16); } const u8* XEmitter::AlignCodePage() { - int c = int((u64)code & 4095); - if (c) - ReserveCodeSpace(4096 - c); - return code; + return AlignCodeTo(4096); } // This operation modifies flags; check to see the flags are locked. diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index b294ed1358..1879d6b16c 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -412,6 +412,7 @@ public: virtual ~XEmitter() {} void SetCodePtr(u8* ptr); void ReserveCodeSpace(int bytes); + const u8* AlignCodeTo(size_t alignment); const u8* AlignCode4(); const u8* AlignCode16(); const u8* AlignCodePage(); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp index b260de56dc..26cb06c37e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStorePaired.cpp @@ -89,12 +89,12 @@ void Jit64::psq_stXX(UGeckoInstruction inst) // 0b0011111100000111, or 0x3F07. MOV(32, R(RSCRATCH2), Imm32(0x3F07)); AND(32, R(RSCRATCH2), PPCSTATE(spr[SPR_GQR0 + i])); - MOVZX(32, 8, RSCRATCH, R(RSCRATCH2)); - - if (w) - CALLptr(MScaled(RSCRATCH, SCALE_8, PtrOffset(asm_routines.singleStoreQuantized))); - else - CALLptr(MScaled(RSCRATCH, SCALE_8, PtrOffset(asm_routines.pairedStoreQuantized))); + LEA(64, RSCRATCH, M(w ? asm_routines.singleStoreQuantized : asm_routines.pairedStoreQuantized)); + // 8-bit operations do not zero upper 32-bits of 64-bit registers. + // Here we know that RSCRATCH's least significant byte is zero. + OR(8, R(RSCRATCH), R(RSCRATCH2)); + SHL(8, R(RSCRATCH), Imm8(3)); + CALLptr(MatR(RSCRATCH)); } if (update && jo.memcheck) diff --git a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp index dce31b4d4b..9180be67f4 100644 --- a/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp +++ b/Source/Core/Core/PowerPC/Jit64Common/Jit64AsmCommon.cpp @@ -243,7 +243,8 @@ constexpr std::array sizes{{32, 0, 0, 0, 8, 16, 8, 16}}; void CommonAsmRoutines::GenQuantizedStores() { - pairedStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); + // Aligned to 256 bytes as least significant byte needs to be zero (See: Jit64::psq_stXX). + pairedStoreQuantized = reinterpret_cast(const_cast(AlignCodeTo(256))); ReserveCodeSpace(8 * sizeof(u8*)); for (int type = 0; type < 8; type++) @@ -253,7 +254,8 @@ void CommonAsmRoutines::GenQuantizedStores() // See comment in header for in/outs. void CommonAsmRoutines::GenQuantizedSingleStores() { - singleStoreQuantized = reinterpret_cast(const_cast(AlignCode16())); + // Aligned to 256 bytes as least significant byte needs to be zero (See: Jit64::psq_stXX). + singleStoreQuantized = reinterpret_cast(const_cast(AlignCodeTo(256))); ReserveCodeSpace(8 * sizeof(u8*)); for (int type = 0; type < 8; type++)