Merge pull request #303 from Tilka/movbe

Add remaining possible uses of MOVBE
This commit is contained in:
Pierre Bourdon 2014-04-24 17:23:34 +02:00
commit 47373af9d9
6 changed files with 167 additions and 144 deletions

View File

@ -8,8 +8,8 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
{
unsigned const char *startCodePtr = codePtr;
u8 rex = 0;
u8 codeByte = 0;
u8 codeByte2 = 0;
u32 opcode;
int opcode_length;
//Check for regular prefix
info->operandSize = 4;
@ -17,6 +17,7 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
info->signExtend = false;
info->hasImmediate = false;
info->isMemoryWrite = false;
info->byteSwap = false;
u8 modRMbyte = 0;
u8 sibByte = 0;
@ -45,33 +46,36 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
codePtr++;
}
codeByte = *codePtr++;
// Skip two-byte opcode byte
bool twoByte = false;
if (codeByte == 0x0F)
opcode = *codePtr++;
opcode_length = 1;
if (opcode == 0x0F)
{
twoByte = true;
codeByte2 = *codePtr++;
opcode = (opcode << 8) | *codePtr++;
opcode_length = 2;
if ((opcode & 0xFB) == 0x38)
{
opcode = (opcode << 8) | *codePtr++;
opcode_length = 3;
}
}
if (!twoByte)
switch (opcode_length)
{
if ((codeByte & 0xF0) == 0x80 ||
((codeByte & 0xF8) == 0xC0 && (codeByte & 0x0E) != 0x02))
case 1:
if ((opcode & 0xF0) == 0x80 ||
((opcode & 0xF8) == 0xC0 && (opcode & 0x0E) != 0x02))
{
modRMbyte = *codePtr++;
hasModRM = true;
}
}
else
{
if (((codeByte2 & 0xF0) == 0x00 && (codeByte2 & 0x0F) >= 0x04 && (codeByte2 & 0x0D) != 0x0D) ||
(codeByte2 & 0xF0) == 0x30 ||
codeByte2 == 0x77 ||
(codeByte2 & 0xF0) == 0x80 ||
((codeByte2 & 0xF0) == 0xA0 && (codeByte2 & 0x07) <= 0x02) ||
(codeByte2 & 0xF8) == 0xC8)
break;
case 2:
if (((opcode & 0xF0) == 0x00 && (opcode & 0x0F) >= 0x04 && (opcode & 0x0D) != 0x0D) ||
((opcode & 0xF0) == 0xA0 && (opcode & 0x07) <= 0x02) ||
(opcode & 0xF0) == 0x30 ||
(opcode & 0xFF) == 0x77 ||
(opcode & 0xF0) == 0x80 ||
(opcode & 0xF8) == 0xC8)
{
// No mod R/M byte
}
@ -80,6 +84,15 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
modRMbyte = *codePtr++;
hasModRM = true;
}
break;
case 3:
// TODO: support more 3-byte opcode instructions
if ((opcode & 0xFE) == 0xF0)
{
modRMbyte = *codePtr++;
hasModRM = true;
}
break;
}
if (hasModRM)
@ -114,109 +127,92 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
if (displacementSize == 1)
info->displacement = (s32)(s8)*codePtr;
else
info->displacement = *((s32 *)codePtr);
info->displacement = *((s32*)codePtr);
codePtr += displacementSize;
switch (codeByte)
switch (opcode)
{
// writes
case 0xC6: // mem <- imm8
{
info->isMemoryWrite = true;
info->hasImmediate = true;
info->immediate = *codePtr;
codePtr++; //move past immediate
}
codePtr++;
break;
case 0xC7: // mem <- imm16/32
{
info->isMemoryWrite = true;
if (info->operandSize == 2)
switch (info->operandSize)
{
case 2:
info->hasImmediate = true;
info->immediate = *(u16*)codePtr;
codePtr += 2;
}
else if (info->operandSize == 4)
{
break;
case 4:
info->hasImmediate = true;
info->immediate = *(u32*)codePtr;
codePtr += 4;
}
else if (info->operandSize == 8)
{
break;
case 8:
info->zeroExtend = true;
info->immediate = *(u32*)codePtr;
codePtr += 4;
break;
}
}
break;
case 0x88: // mem <- r8
{
info->isMemoryWrite = true;
if (info->operandSize == 4)
if (info->operandSize != 4)
{
return false;
}
info->operandSize = 1;
break;
}
else
return false;
break;
}
case 0x89: // mem <- r16/32/64
{
info->isMemoryWrite = true;
break;
}
case 0x0F: // two-byte escape
{
info->isMemoryWrite = false;
switch (codeByte2)
{
case 0xB6: // movzx on byte
info->zeroExtend = true;
info->operandSize = 1;
break;
case 0xB7: // movzx on short
info->zeroExtend = true;
info->operandSize = 2;
break;
case 0xBE: // movsx on byte
info->signExtend = true;
info->operandSize = 1;
break;
case 0xBF: // movsx on short
info->signExtend = true;
info->operandSize = 2;
break;
default:
return false;
}
break;
}
case 0x8A: // r8 <- mem
if (info->operandSize != 4)
{
info->isMemoryWrite = false;
if (info->operandSize == 4)
{
info->operandSize = 1;
break;
}
else
return false;
}
info->operandSize = 1;
break;
case 0x8B: // r16/32/64 <- mem
{
info->isMemoryWrite = false;
break;
}
case 0x0FB6: // movzx on byte
info->zeroExtend = true;
info->operandSize = 1;
break;
case 0x0FB7: // movzx on short
info->zeroExtend = true;
info->operandSize = 2;
break;
case 0x0FBE: // movsx on byte
info->signExtend = true;
info->operandSize = 1;
break;
case 0x0FBF: // movsx on short
info->signExtend = true;
info->operandSize = 2;
break;
case 0x0F38F0: // movbe read
info->byteSwap = true;
break;
case 0x0F38F1: // movbe write
info->byteSwap = true;
info->isMemoryWrite = true;
break;
default:

View File

@ -17,6 +17,7 @@ struct InstructionInfo
bool signExtend;
bool hasImmediate;
bool isMemoryWrite;
bool byteSwap;
u64 immediate;
s32 displacement;
};

View File

@ -368,8 +368,7 @@ void Jit64::stX(UGeckoInstruction inst)
// Fast and daring - requires 64-bit
MOV(32, R(EAX), gpr.R(s));
gpr.BindToRegister(a, true, false);
BSWAP(32, EAX);
MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX));
SwapAndStore(32, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), EAX);
return;
}
#endif*/

View File

@ -404,8 +404,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
UNPCKLPS(XMM0, M((void*)m_one));
} else {
#if _M_X86_64
MOV(32, R(RCX), MComplex(RBX, RCX, 1, 0));
BSWAP(32, RCX);
LoadAndSwap(32, RCX, MComplex(RBX, RCX, 1, 0));
MOVD_xmm(XMM0, R(RCX));
UNPCKLPS(XMM0, M((void*)m_one));
#else

View File

@ -187,6 +187,12 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
return nullptr;
}
if (info.byteSwap && info.instructionSize < 5)
{
PanicAlert("BackPatch: MOVBE is too small");
return nullptr;
}
auto it = registersInUseAtLoc.find(codePtr);
if (it == registersInUseAtLoc.end())
{
@ -200,8 +206,11 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
{
XEmitter emitter(codePtr);
int bswapNopCount;
if (info.byteSwap)
// MOVBE -> no BSWAP following
bswapNopCount = 0;
// Check the following BSWAP for REX byte
if ((codePtr[info.instructionSize] & 0xF0) == 0x40)
else if ((codePtr[info.instructionSize] & 0xF0) == 0x40)
bswapNopCount = 3;
else
bswapNopCount = 2;
@ -214,6 +223,15 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
else
{
// TODO: special case FIFO writes. Also, support 32-bit mode.
u8 *start;
if (info.byteSwap)
{
// The instruction is a MOVBE but it failed so the value is still in little-endian byte order.
start = codePtr;
}
else
{
// We entered here with a BSWAP-ed register. We'll have to swap it back.
u64 *ptr = ContextRN(ctx, info.regOperandReg);
int bswapSize = 0;
@ -235,8 +253,8 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
*ptr = Common::swap64(*ptr);
break;
}
u8 *start = codePtr - bswapSize;
start = codePtr - bswapSize;
}
XEmitter emitter(start);
const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse);
emitter.CALL((void *)trampoline);

View File

@ -426,11 +426,21 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc
if (accessSize == 8 && reg_value >= 4) {
PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!");
}
if (swap) BSWAP(accessSize, reg_value);
#if _M_X86_64
result = GetWritableCodePtr();
if (swap)
{
SwapAndStore(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), reg_value);
}
else
{
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value));
}
#else
if (swap)
{
BSWAP(accessSize, reg_value);
}
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
result = GetWritableCodePtr();
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
@ -502,6 +512,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 registersInUse, int flags)
{
// FIXME
if (false && cpu_info.bSSSE3) {
// This path should be faster but for some reason it causes errors so I've disabled it.
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
@ -516,8 +527,7 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 re
TEST(32, R(reg_addr), Imm32(mem_mask));
FixupBranch argh = J_CC(CC_Z);
MOVSS(M(&float_buffer), xmm_value);
MOV(32, R(EAX), M(&float_buffer));
BSWAP(32, EAX);
LoadAndSwap(32, EAX, M(&float_buffer));
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
ABI_PushRegistersAndAdjustStack(registersInUse, false);
ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, reg_addr);