diff --git a/Source/Core/Common/x64Analyzer.cpp b/Source/Core/Common/x64Analyzer.cpp index 72ff1105a5..4878f9dea8 100644 --- a/Source/Core/Common/x64Analyzer.cpp +++ b/Source/Core/Common/x64Analyzer.cpp @@ -8,8 +8,8 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info) { unsigned const char *startCodePtr = codePtr; u8 rex = 0; - u8 codeByte = 0; - u8 codeByte2 = 0; + u32 opcode; + int opcode_length; //Check for regular prefix info->operandSize = 4; @@ -17,6 +17,7 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info) info->signExtend = false; info->hasImmediate = false; info->isMemoryWrite = false; + info->byteSwap = false; u8 modRMbyte = 0; u8 sibByte = 0; @@ -45,41 +46,53 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info) codePtr++; } - codeByte = *codePtr++; - - // Skip two-byte opcode byte - bool twoByte = false; - if (codeByte == 0x0F) + opcode = *codePtr++; + opcode_length = 1; + if (opcode == 0x0F) { - twoByte = true; - codeByte2 = *codePtr++; - } - - if (!twoByte) - { - if ((codeByte & 0xF0) == 0x80 || - ((codeByte & 0xF8) == 0xC0 && (codeByte & 0x0E) != 0x02)) + opcode = (opcode << 8) | *codePtr++; + opcode_length = 2; + if ((opcode & 0xFB) == 0x38) { - modRMbyte = *codePtr++; - hasModRM = true; + opcode = (opcode << 8) | *codePtr++; + opcode_length = 3; } } - else + + switch (opcode_length) { - if (((codeByte2 & 0xF0) == 0x00 && (codeByte2 & 0x0F) >= 0x04 && (codeByte2 & 0x0D) != 0x0D) || - (codeByte2 & 0xF0) == 0x30 || - codeByte2 == 0x77 || - (codeByte2 & 0xF0) == 0x80 || - ((codeByte2 & 0xF0) == 0xA0 && (codeByte2 & 0x07) <= 0x02) || - (codeByte2 & 0xF8) == 0xC8) - { - // No mod R/M byte - } - else - { - modRMbyte = *codePtr++; - hasModRM = true; - } + case 1: + if ((opcode & 0xF0) == 0x80 || + ((opcode & 0xF8) == 0xC0 && (opcode & 0x0E) != 0x02)) + { + modRMbyte = *codePtr++; + hasModRM = true; + } + break; + case 2: + if (((opcode & 0xF0) == 0x00 && (opcode & 0x0F) >= 0x04 && (opcode & 0x0D) != 0x0D) || + ((opcode & 0xF0) == 0xA0 && (opcode & 0x07) <= 0x02) || + (opcode & 0xF0) == 0x30 || + (opcode & 0xFF) == 0x77 || + (opcode & 0xF0) == 0x80 || + (opcode & 0xF8) == 0xC8) + { + // No mod R/M byte + } + else + { + modRMbyte = *codePtr++; + hasModRM = true; + } + break; + case 3: + // TODO: support more 3-byte opcode instructions + if ((opcode & 0xFE) == 0xF0) + { + modRMbyte = *codePtr++; + hasModRM = true; + } + break; } if (hasModRM) @@ -114,109 +127,92 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info) if (displacementSize == 1) info->displacement = (s32)(s8)*codePtr; else - info->displacement = *((s32 *)codePtr); + info->displacement = *((s32*)codePtr); codePtr += displacementSize; - - switch (codeByte) + switch (opcode) { - // writes case 0xC6: // mem <- imm8 - { - info->isMemoryWrite = true; - info->hasImmediate = true; - info->immediate = *codePtr; - codePtr++; //move past immediate - } + info->isMemoryWrite = true; + info->hasImmediate = true; + info->immediate = *codePtr; + codePtr++; break; case 0xC7: // mem <- imm16/32 + info->isMemoryWrite = true; + switch (info->operandSize) { - info->isMemoryWrite = true; - if (info->operandSize == 2) - { - info->hasImmediate = true; - info->immediate = *(u16*)codePtr; - codePtr += 2; - } - else if (info->operandSize == 4) - { - info->hasImmediate = true; - info->immediate = *(u32*)codePtr; - codePtr += 4; - } - else if (info->operandSize == 8) - { - info->zeroExtend = true; - info->immediate = *(u32*)codePtr; - codePtr += 4; - } + case 2: + info->hasImmediate = true; + info->immediate = *(u16*)codePtr; + codePtr += 2; + break; + + case 4: + info->hasImmediate = true; + info->immediate = *(u32*)codePtr; + codePtr += 4; + break; + case 8: + info->zeroExtend = true; + info->immediate = *(u32*)codePtr; + codePtr += 4; + break; } + break; case 0x88: // mem <- r8 + info->isMemoryWrite = true; + if (info->operandSize != 4) { - info->isMemoryWrite = true; - if (info->operandSize == 4) - { - info->operandSize = 1; - break; - } - else - return false; - break; + return false; } + info->operandSize = 1; + break; case 0x89: // mem <- r16/32/64 - { - info->isMemoryWrite = true; - break; - } - - case 0x0F: // two-byte escape - { - info->isMemoryWrite = false; - switch (codeByte2) - { - case 0xB6: // movzx on byte - info->zeroExtend = true; - info->operandSize = 1; - break; - case 0xB7: // movzx on short - info->zeroExtend = true; - info->operandSize = 2; - break; - case 0xBE: // movsx on byte - info->signExtend = true; - info->operandSize = 1; - break; - case 0xBF: // movsx on short - info->signExtend = true; - info->operandSize = 2; - break; - default: - return false; - } - break; - } + info->isMemoryWrite = true; + break; case 0x8A: // r8 <- mem + if (info->operandSize != 4) { - info->isMemoryWrite = false; - if (info->operandSize == 4) - { - info->operandSize = 1; - break; - } - else - return false; + return false; } + info->operandSize = 1; + break; case 0x8B: // r16/32/64 <- mem - { - info->isMemoryWrite = false; - break; - } + break; + case 0x0FB6: // movzx on byte + info->zeroExtend = true; + info->operandSize = 1; + break; + + case 0x0FB7: // movzx on short + info->zeroExtend = true; + info->operandSize = 2; + break; + + case 0x0FBE: // movsx on byte + info->signExtend = true; + info->operandSize = 1; + break; + + case 0x0FBF: // movsx on short + info->signExtend = true; + info->operandSize = 2; + break; + + case 0x0F38F0: // movbe read + info->byteSwap = true; + break; + + case 0x0F38F1: // movbe write + info->byteSwap = true; + info->isMemoryWrite = true; break; default: diff --git a/Source/Core/Common/x64Analyzer.h b/Source/Core/Common/x64Analyzer.h index 613aafad73..4858f099da 100644 --- a/Source/Core/Common/x64Analyzer.h +++ b/Source/Core/Common/x64Analyzer.h @@ -17,6 +17,7 @@ struct InstructionInfo bool signExtend; bool hasImmediate; bool isMemoryWrite; + bool byteSwap; u64 immediate; s32 displacement; }; diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index b30a847181..1473d21dd3 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -368,8 +368,7 @@ void Jit64::stX(UGeckoInstruction inst) // Fast and daring - requires 64-bit MOV(32, R(EAX), gpr.R(s)); gpr.BindToRegister(a, true, false); - BSWAP(32, EAX); - MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX)); + SwapAndStore(32, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), EAX); return; } #endif*/ diff --git a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp index 5ef17670b9..4c5e4e3395 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitAsmCommon.cpp @@ -404,8 +404,7 @@ void CommonAsmRoutines::GenQuantizedLoads() UNPCKLPS(XMM0, M((void*)m_one)); } else { #if _M_X86_64 - MOV(32, R(RCX), MComplex(RBX, RCX, 1, 0)); - BSWAP(32, RCX); + LoadAndSwap(32, RCX, MComplex(RBX, RCX, 1, 0)); MOVD_xmm(XMM0, R(RCX)); UNPCKLPS(XMM0, M((void*)m_one)); #else diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp index ce374f050d..7f557734e4 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/JitBackpatch.cpp @@ -187,6 +187,12 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) return nullptr; } + if (info.byteSwap && info.instructionSize < 5) + { + PanicAlert("BackPatch: MOVBE is too small"); + return nullptr; + } + auto it = registersInUseAtLoc.find(codePtr); if (it == registersInUseAtLoc.end()) { @@ -200,8 +206,11 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) { XEmitter emitter(codePtr); int bswapNopCount; + if (info.byteSwap) + // MOVBE -> no BSWAP following + bswapNopCount = 0; // Check the following BSWAP for REX byte - if ((codePtr[info.instructionSize] & 0xF0) == 0x40) + else if ((codePtr[info.instructionSize] & 0xF0) == 0x40) bswapNopCount = 3; else bswapNopCount = 2; @@ -214,29 +223,38 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void) else { // TODO: special case FIFO writes. Also, support 32-bit mode. - // We entered here with a BSWAP-ed register. We'll have to swap it back. - u64 *ptr = ContextRN(ctx, info.regOperandReg); - int bswapSize = 0; - switch (info.operandSize) - { - case 1: - bswapSize = 0; - break; - case 2: - bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0); - *ptr = Common::swap16((u16) *ptr); - break; - case 4: - bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0); - *ptr = Common::swap32((u32) *ptr); - break; - case 8: - bswapSize = 3; - *ptr = Common::swap64(*ptr); - break; - } - u8 *start = codePtr - bswapSize; + u8 *start; + if (info.byteSwap) + { + // The instruction is a MOVBE but it failed so the value is still in little-endian byte order. + start = codePtr; + } + else + { + // We entered here with a BSWAP-ed register. We'll have to swap it back. + u64 *ptr = ContextRN(ctx, info.regOperandReg); + int bswapSize = 0; + switch (info.operandSize) + { + case 1: + bswapSize = 0; + break; + case 2: + bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0); + *ptr = Common::swap16((u16) *ptr); + break; + case 4: + bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0); + *ptr = Common::swap32((u32) *ptr); + break; + case 8: + bswapSize = 3; + *ptr = Common::swap64(*ptr); + break; + } + start = codePtr - bswapSize; + } XEmitter emitter(start); const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse); emitter.CALL((void *)trampoline); diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index ca5d533fc2..ef849dd1f7 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -426,11 +426,21 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc if (accessSize == 8 && reg_value >= 4) { PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!"); } - if (swap) BSWAP(accessSize, reg_value); #if _M_X86_64 result = GetWritableCodePtr(); - MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value)); + if (swap) + { + SwapAndStore(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), reg_value); + } + else + { + MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value)); + } #else + if (swap) + { + BSWAP(accessSize, reg_value); + } AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK)); result = GetWritableCodePtr(); MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value)); @@ -502,6 +512,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 registersInUse, int flags) { + // FIXME if (false && cpu_info.bSSSE3) { // This path should be faster but for some reason it causes errors so I've disabled it. u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS; @@ -516,8 +527,7 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 re TEST(32, R(reg_addr), Imm32(mem_mask)); FixupBranch argh = J_CC(CC_Z); MOVSS(M(&float_buffer), xmm_value); - MOV(32, R(EAX), M(&float_buffer)); - BSWAP(32, EAX); + LoadAndSwap(32, EAX, M(&float_buffer)); MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write ABI_PushRegistersAndAdjustStack(registersInUse, false); ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, reg_addr);