From ee4a12ffe2cccb41c1cfc136e55903dc03a3e17c Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Fri, 21 Aug 2015 21:49:09 +0200 Subject: [PATCH] Jit64: some byte-swapping changes --- Source/Core/Common/x64Emitter.cpp | 45 +++++++++++--- Source/Core/Common/x64Emitter.h | 4 +- .../Core/PowerPC/JitCommon/JitBackpatch.cpp | 45 +++++++------- .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 61 ++----------------- Source/Core/VideoCommon/VertexLoaderX64.cpp | 17 ++---- 5 files changed, 69 insertions(+), 103 deletions(-) diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index 280e58d82a..f2763d83a8 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -887,21 +887,46 @@ void XEmitter::WriteMOVBE(int bits, u8 op, X64Reg reg, const OpArg& arg) void XEmitter::MOVBE(int bits, X64Reg dest, const OpArg& src) {WriteMOVBE(bits, 0xF0, dest, src);} void XEmitter::MOVBE(int bits, const OpArg& dest, X64Reg src) {WriteMOVBE(bits, 0xF1, src, dest);} -void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src) +void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend) { - if (cpu_info.bMOVBE) + switch (size) { - MOVBE(size, dst, src); - } - else - { - MOV(size, R(dst), src); - BSWAP(size, dst); + case 8: + if (sign_extend) + MOVSX(32, 8, dst, src); + else + MOVZX(32, 8, dst, src); + break; + case 16: + MOVZX(32, 16, dst, src); + if (sign_extend) + { + BSWAP(32, dst); + SAR(32, R(dst), Imm8(16)); + } + else + { + ROL(16, R(dst), Imm8(8)); + } + break; + case 32: + case 64: + if (cpu_info.bMOVBE) + { + MOVBE(size, dst, src); + } + else + { + MOV(size, R(dst), src); + BSWAP(size, dst); + } + break; } } -void XEmitter::SwapAndStore(int size, const OpArg& dst, X64Reg src) +u8* XEmitter::SwapAndStore(int size, const OpArg& dst, X64Reg src) { + u8* mov_location = GetWritableCodePtr(); if (cpu_info.bMOVBE) { MOVBE(size, dst, src); @@ -909,8 +934,10 @@ void XEmitter::SwapAndStore(int size, const OpArg& dst, X64Reg src) else { BSWAP(size, src); + mov_location = GetWritableCodePtr(); MOV(size, dst, R(src)); } + return mov_location; } diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index aa21956453..d444724705 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -480,8 +480,8 @@ public: // Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE. void MOVBE(int bits, X64Reg dest, const OpArg& src); void MOVBE(int bits, const OpArg& dest, X64Reg src); - void LoadAndSwap(int size, X64Reg dst, const OpArg& src); - void SwapAndStore(int size, const OpArg& dst, X64Reg src); + void LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend = false); + u8* SwapAndStore(int size, const OpArg& dst, X64Reg src); // Available only on AMD >= Phenom or Intel >= Haswell void LZCNT(int bits, X64Reg dest, const OpArg& src); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index 81b22972f4..fcfb6d4db1 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -86,33 +86,35 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx) // Compute the start and length of the memory operation, including // any byteswapping. - int totalSize; + int totalSize = info.instructionSize; u8 *start = codePtr; if (!info.isMemoryWrite) { - int bswapNopCount; - if (info.byteSwap || info.operandSize == 1) - bswapNopCount = 0; - // Check the following BSWAP for REX byte - else if ((codePtr[info.instructionSize] & 0xF0) == 0x40) - bswapNopCount = 3; - else - bswapNopCount = 2; - - totalSize = info.instructionSize + bswapNopCount; - if (info.operandSize == 2 && !info.byteSwap) + // MOVBE and single bytes don't need to be swapped. + if (!info.byteSwap && info.operandSize > 1) { + // REX if ((codePtr[totalSize] & 0xF0) == 0x40) + totalSize++; + + // BSWAP + if (codePtr[totalSize] == 0x0F && (codePtr[totalSize + 1] & 0xF8) == 0xC8) + totalSize += 2; + + if (info.operandSize == 2) { - ++totalSize; + // operand size override + if (codePtr[totalSize] == 0x66) + totalSize++; + // REX + if ((codePtr[totalSize] & 0xF0) == 0x40) + totalSize++; + // SAR/ROL + _assert_(codePtr[totalSize] == 0xC1 && (codePtr[totalSize + 2] == 0x10 || + codePtr[totalSize + 2] == 0x08)); + info.signExtend = (codePtr[totalSize + 1] & 0x10) != 0; + totalSize += 3; } - if (codePtr[totalSize] != 0xc1 || codePtr[totalSize + 2] != 0x10) - { - PanicAlert("BackPatch: didn't find expected shift %p", codePtr); - return false; - } - info.signExtend = (codePtr[totalSize + 1] & 0x10) != 0; - totalSize += 3; } } else @@ -120,7 +122,6 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx) if (info.byteSwap || info.hasImmediate) { // The instruction is a MOVBE but it failed so the value is still in little-endian byte order. - totalSize = info.instructionSize; } else { @@ -146,7 +147,7 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx) break; } start = codePtr - bswapSize; - totalSize = info.instructionSize + bswapSize; + totalSize += bswapSize; } } diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index a4a8a6212c..c9911ae853 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -24,24 +24,8 @@ void EmuCodeBlock::MemoryExceptionCheck() void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) { - MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset)); - if (accessSize == 32) - { - BSWAP(32, reg_value); - } - else if (accessSize == 16) - { - BSWAP(32, reg_value); - if (signExtend) - SAR(32, R(reg_value), Imm8(16)); - else - SHR(32, R(reg_value), Imm8(16)); - } - else if (signExtend) - { - // TODO: bake 8-bit into the original load. - MOVSX(32, accessSize, reg_value, R(reg_value)); - } + OpArg src = MComplex(RMEM, reg_addr, SCALE_1, offset); + LoadAndSwap(accessSize, reg_value, src, signExtend); } void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend) @@ -84,34 +68,7 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS } result = GetWritableCodePtr(); - if (accessSize == 8 && signExtend) - MOVSX(32, accessSize, reg_value, memOperand); - else - MOVZX(64, accessSize, reg_value, memOperand); - - switch (accessSize) - { - case 8: - _dbg_assert_(DYNA_REC, BACKPATCH_SIZE - (GetCodePtr() - result <= 0)); - break; - - case 16: - BSWAP(32, reg_value); - if (signExtend) - SAR(32, R(reg_value), Imm8(16)); - else - SHR(32, R(reg_value), Imm8(16)); - break; - - case 32: - BSWAP(32, reg_value); - break; - - case 64: - BSWAP(64, reg_value); - break; - } - + LoadAndSwap(accessSize, reg_value, memOperand, signExtend); return result; } @@ -415,17 +372,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce } else if (swap) { - if (cpu_info.bMOVBE) - { - MOVBE(accessSize, dest, reg_value.GetSimpleReg()); - } - else - { - if (accessSize > 8) - BSWAP(accessSize, reg_value.GetSimpleReg()); - result = GetWritableCodePtr(); - MOV(accessSize, dest, reg_value); - } + result = SwapAndStore(accessSize, dest, reg_value.GetSimpleReg()); } else { diff --git a/Source/Core/VideoCommon/VertexLoaderX64.cpp b/Source/Core/VideoCommon/VertexLoaderX64.cpp index a298d7e1dd..dd9cd0528d 100644 --- a/Source/Core/VideoCommon/VertexLoaderX64.cpp +++ b/Source/Core/VideoCommon/VertexLoaderX64.cpp @@ -53,21 +53,12 @@ OpArg VertexLoaderX64::GetVertexAddr(int array, u64 attribute) OpArg data = MDisp(src_reg, m_src_ofs); if (attribute & MASK_INDEXED) { - if (attribute == INDEX8) - { - MOVZX(64, 8, scratch1, data); - m_src_ofs += 1; - } - else - { - MOV(16, R(scratch1), data); - m_src_ofs += 2; - BSWAP(16, scratch1); - MOVZX(64, 16, scratch1, R(scratch1)); - } + int bits = attribute == INDEX8 ? 8 : 16; + LoadAndSwap(bits, scratch1, data); + m_src_ofs += bits / 8; if (array == ARRAY_POSITION) { - CMP(attribute == INDEX8 ? 8 : 16, R(scratch1), Imm8(-1)); + CMP(bits, R(scratch1), Imm8(-1)); m_skip_vertex = J_CC(CC_E, true); } IMUL(32, scratch1, MPIC(&g_main_cp_state.array_strides[array]));