Merge pull request #325 from Tilka/nop

Fix NOP padding
This commit is contained in:
Pierre Bourdon 2014-04-30 18:55:36 +02:00
commit 951612f08e
5 changed files with 52 additions and 30 deletions

View File

@ -6,6 +6,7 @@
#include "Common/Common.h"
#include "Common/CPUDetect.h"
#include "Common/Log.h"
#include "Common/x64Emitter.h"
namespace Gen
@ -516,8 +517,9 @@ void XEmitter::RET() {Write8(0xC3);}
void XEmitter::RET_FAST() {Write8(0xF3); Write8(0xC3);} //two-byte return (rep ret) - recommended by AMD optimization manual for the case of jumping to a ret
// The first sign of decadence: optimized NOPs.
void XEmitter::NOP(int size)
void XEmitter::NOP(size_t size)
{
_dbg_assert_(DYNA_REC, (int)size > 0);
while (true)
{
switch (size)

View File

@ -290,7 +290,7 @@ public:
void INT3();
// Do nothing
void NOP(int count = 1);
void NOP(size_t count = 1);
// Save energy in wait-loops on P4 only. Probably not too useful.
void PAUSE();

View File

@ -187,7 +187,7 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
return nullptr;
}
if (info.byteSwap && info.instructionSize < 5)
if (info.byteSwap && info.instructionSize < BACKPATCH_SIZE)
{
PanicAlert("BackPatch: MOVBE is too small");
return nullptr;
@ -206,8 +206,7 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
{
XEmitter emitter(codePtr);
int bswapNopCount;
if (info.byteSwap)
// MOVBE -> no BSWAP following
if (info.byteSwap || info.operandSize == 1)
bswapNopCount = 0;
// Check the following BSWAP for REX byte
else if ((codePtr[info.instructionSize] & 0xF0) == 0x40)
@ -217,7 +216,11 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
const u8 *trampoline = trampolines.GetReadTrampoline(info, registersInUse);
emitter.CALL((void *)trampoline);
emitter.NOP((int)info.instructionSize + bswapNopCount - 5);
int padding = info.instructionSize + bswapNopCount - BACKPATCH_SIZE;
if (padding > 0)
{
emitter.NOP(padding);
}
return codePtr;
}
else
@ -258,11 +261,14 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
XEmitter emitter(start);
const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse);
emitter.CALL((void *)trampoline);
emitter.NOP((int)(codePtr + info.instructionSize - emitter.GetCodePtr()));
int padding = codePtr + info.instructionSize - emitter.GetCodePtr();
if (padding > 0)
{
emitter.NOP(padding);
}
return start;
}
#else
return 0;
#endif
}

View File

@ -8,6 +8,9 @@
#include "Common/x64Analyzer.h"
#include "Common/x64Emitter.h"
// We need at least this many bytes for backpatching.
const int BACKPATCH_SIZE = 5;
// meh.
#if defined(_WIN32)
#include <windows.h>

View File

@ -98,19 +98,28 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
}
result = GetWritableCodePtr();
MOVZX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
if (accessSize == 8 && signExtend)
MOVSX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
else
MOVZX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
}
else
{
MOV(32, R(reg_value), opAddress);
result = GetWritableCodePtr();
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
if (accessSize == 8 && signExtend)
MOVSX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
else
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
}
#else
if (opAddress.IsImm())
{
result = GetWritableCodePtr();
MOVZX(32, accessSize, reg_value, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK)));
if (accessSize == 8 && signExtend)
MOVSX(32, accessSize, reg_value, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK)));
else
MOVZX(32, accessSize, reg_value, M(Memory::base + (((u32)opAddress.offset + offset) & Memory::MEMVIEW32_MASK)));
}
else
{
@ -118,31 +127,32 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
MOV(32, R(reg_value), opAddress);
AND(32, R(reg_value), Imm32(Memory::MEMVIEW32_MASK));
result = GetWritableCodePtr();
MOVZX(32, accessSize, reg_value, MDisp(reg_value, (u32)Memory::base + offset));
if (accessSize == 8 && signExtend)
MOVSX(32, accessSize, reg_value, MDisp(reg_value, (u32)Memory::base + offset));
else
MOVZX(32, accessSize, reg_value, MDisp(reg_value, (u32)Memory::base + offset));
}
#endif
// Add a 2 bytes NOP to have some space for the backpatching
if (accessSize == 8)
NOP(2);
switch (accessSize)
{
case 8:
_dbg_assert_(DYNA_REC, BACKPATCH_SIZE - (GetCodePtr() - result <= 0));
break;
if (accessSize == 32)
{
BSWAP(32, reg_value);
}
else if (accessSize == 16)
{
case 16:
BSWAP(32, reg_value);
if (signExtend)
SAR(32, R(reg_value), Imm8(16));
else
SHR(32, R(reg_value), Imm8(16));
break;
case 32:
BSWAP(32, reg_value);
break;
}
else if (signExtend)
{
// TODO: bake 8-bit into the original load.
MOVSX(32, accessSize, reg_value, R(reg_value));
}
return result;
}
@ -472,11 +482,12 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
)
{
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
u8 *mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_LOADSTORE_NO_SWAP));
if (accessSize == 8)
const u8* backpatchStart = GetCodePtr();
u8* mov = UnsafeWriteRegToReg(reg_value, reg_addr, accessSize, offset, !(flags & SAFE_LOADSTORE_NO_SWAP));
int padding = BACKPATCH_SIZE - (GetCodePtr() - backpatchStart);
if (padding > 0)
{
NOP(1);
NOP(1);
NOP(padding);
}
registersInUseAtLoc[mov] = registersInUse;