Merge pull request #303 from Tilka/movbe
Add remaining possible uses of MOVBE
This commit is contained in:
commit
47373af9d9
|
@ -8,8 +8,8 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
|
|||
{
|
||||
unsigned const char *startCodePtr = codePtr;
|
||||
u8 rex = 0;
|
||||
u8 codeByte = 0;
|
||||
u8 codeByte2 = 0;
|
||||
u32 opcode;
|
||||
int opcode_length;
|
||||
|
||||
//Check for regular prefix
|
||||
info->operandSize = 4;
|
||||
|
@ -17,6 +17,7 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
|
|||
info->signExtend = false;
|
||||
info->hasImmediate = false;
|
||||
info->isMemoryWrite = false;
|
||||
info->byteSwap = false;
|
||||
|
||||
u8 modRMbyte = 0;
|
||||
u8 sibByte = 0;
|
||||
|
@ -45,41 +46,53 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
|
|||
codePtr++;
|
||||
}
|
||||
|
||||
codeByte = *codePtr++;
|
||||
|
||||
// Skip two-byte opcode byte
|
||||
bool twoByte = false;
|
||||
if (codeByte == 0x0F)
|
||||
opcode = *codePtr++;
|
||||
opcode_length = 1;
|
||||
if (opcode == 0x0F)
|
||||
{
|
||||
twoByte = true;
|
||||
codeByte2 = *codePtr++;
|
||||
}
|
||||
|
||||
if (!twoByte)
|
||||
{
|
||||
if ((codeByte & 0xF0) == 0x80 ||
|
||||
((codeByte & 0xF8) == 0xC0 && (codeByte & 0x0E) != 0x02))
|
||||
opcode = (opcode << 8) | *codePtr++;
|
||||
opcode_length = 2;
|
||||
if ((opcode & 0xFB) == 0x38)
|
||||
{
|
||||
modRMbyte = *codePtr++;
|
||||
hasModRM = true;
|
||||
opcode = (opcode << 8) | *codePtr++;
|
||||
opcode_length = 3;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
switch (opcode_length)
|
||||
{
|
||||
if (((codeByte2 & 0xF0) == 0x00 && (codeByte2 & 0x0F) >= 0x04 && (codeByte2 & 0x0D) != 0x0D) ||
|
||||
(codeByte2 & 0xF0) == 0x30 ||
|
||||
codeByte2 == 0x77 ||
|
||||
(codeByte2 & 0xF0) == 0x80 ||
|
||||
((codeByte2 & 0xF0) == 0xA0 && (codeByte2 & 0x07) <= 0x02) ||
|
||||
(codeByte2 & 0xF8) == 0xC8)
|
||||
{
|
||||
// No mod R/M byte
|
||||
}
|
||||
else
|
||||
{
|
||||
modRMbyte = *codePtr++;
|
||||
hasModRM = true;
|
||||
}
|
||||
case 1:
|
||||
if ((opcode & 0xF0) == 0x80 ||
|
||||
((opcode & 0xF8) == 0xC0 && (opcode & 0x0E) != 0x02))
|
||||
{
|
||||
modRMbyte = *codePtr++;
|
||||
hasModRM = true;
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (((opcode & 0xF0) == 0x00 && (opcode & 0x0F) >= 0x04 && (opcode & 0x0D) != 0x0D) ||
|
||||
((opcode & 0xF0) == 0xA0 && (opcode & 0x07) <= 0x02) ||
|
||||
(opcode & 0xF0) == 0x30 ||
|
||||
(opcode & 0xFF) == 0x77 ||
|
||||
(opcode & 0xF0) == 0x80 ||
|
||||
(opcode & 0xF8) == 0xC8)
|
||||
{
|
||||
// No mod R/M byte
|
||||
}
|
||||
else
|
||||
{
|
||||
modRMbyte = *codePtr++;
|
||||
hasModRM = true;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
// TODO: support more 3-byte opcode instructions
|
||||
if ((opcode & 0xFE) == 0xF0)
|
||||
{
|
||||
modRMbyte = *codePtr++;
|
||||
hasModRM = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (hasModRM)
|
||||
|
@ -114,109 +127,92 @@ bool DisassembleMov(const unsigned char *codePtr, InstructionInfo *info)
|
|||
if (displacementSize == 1)
|
||||
info->displacement = (s32)(s8)*codePtr;
|
||||
else
|
||||
info->displacement = *((s32 *)codePtr);
|
||||
info->displacement = *((s32*)codePtr);
|
||||
codePtr += displacementSize;
|
||||
|
||||
|
||||
switch (codeByte)
|
||||
switch (opcode)
|
||||
{
|
||||
// writes
|
||||
case 0xC6: // mem <- imm8
|
||||
{
|
||||
info->isMemoryWrite = true;
|
||||
info->hasImmediate = true;
|
||||
info->immediate = *codePtr;
|
||||
codePtr++; //move past immediate
|
||||
}
|
||||
info->isMemoryWrite = true;
|
||||
info->hasImmediate = true;
|
||||
info->immediate = *codePtr;
|
||||
codePtr++;
|
||||
break;
|
||||
|
||||
case 0xC7: // mem <- imm16/32
|
||||
info->isMemoryWrite = true;
|
||||
switch (info->operandSize)
|
||||
{
|
||||
info->isMemoryWrite = true;
|
||||
if (info->operandSize == 2)
|
||||
{
|
||||
info->hasImmediate = true;
|
||||
info->immediate = *(u16*)codePtr;
|
||||
codePtr += 2;
|
||||
}
|
||||
else if (info->operandSize == 4)
|
||||
{
|
||||
info->hasImmediate = true;
|
||||
info->immediate = *(u32*)codePtr;
|
||||
codePtr += 4;
|
||||
}
|
||||
else if (info->operandSize == 8)
|
||||
{
|
||||
info->zeroExtend = true;
|
||||
info->immediate = *(u32*)codePtr;
|
||||
codePtr += 4;
|
||||
}
|
||||
case 2:
|
||||
info->hasImmediate = true;
|
||||
info->immediate = *(u16*)codePtr;
|
||||
codePtr += 2;
|
||||
break;
|
||||
|
||||
case 4:
|
||||
info->hasImmediate = true;
|
||||
info->immediate = *(u32*)codePtr;
|
||||
codePtr += 4;
|
||||
break;
|
||||
case 8:
|
||||
info->zeroExtend = true;
|
||||
info->immediate = *(u32*)codePtr;
|
||||
codePtr += 4;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x88: // mem <- r8
|
||||
info->isMemoryWrite = true;
|
||||
if (info->operandSize != 4)
|
||||
{
|
||||
info->isMemoryWrite = true;
|
||||
if (info->operandSize == 4)
|
||||
{
|
||||
info->operandSize = 1;
|
||||
break;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
break;
|
||||
return false;
|
||||
}
|
||||
info->operandSize = 1;
|
||||
break;
|
||||
|
||||
case 0x89: // mem <- r16/32/64
|
||||
{
|
||||
info->isMemoryWrite = true;
|
||||
break;
|
||||
}
|
||||
|
||||
case 0x0F: // two-byte escape
|
||||
{
|
||||
info->isMemoryWrite = false;
|
||||
switch (codeByte2)
|
||||
{
|
||||
case 0xB6: // movzx on byte
|
||||
info->zeroExtend = true;
|
||||
info->operandSize = 1;
|
||||
break;
|
||||
case 0xB7: // movzx on short
|
||||
info->zeroExtend = true;
|
||||
info->operandSize = 2;
|
||||
break;
|
||||
case 0xBE: // movsx on byte
|
||||
info->signExtend = true;
|
||||
info->operandSize = 1;
|
||||
break;
|
||||
case 0xBF: // movsx on short
|
||||
info->signExtend = true;
|
||||
info->operandSize = 2;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
}
|
||||
info->isMemoryWrite = true;
|
||||
break;
|
||||
|
||||
case 0x8A: // r8 <- mem
|
||||
if (info->operandSize != 4)
|
||||
{
|
||||
info->isMemoryWrite = false;
|
||||
if (info->operandSize == 4)
|
||||
{
|
||||
info->operandSize = 1;
|
||||
break;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
info->operandSize = 1;
|
||||
break;
|
||||
|
||||
case 0x8B: // r16/32/64 <- mem
|
||||
{
|
||||
info->isMemoryWrite = false;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case 0x0FB6: // movzx on byte
|
||||
info->zeroExtend = true;
|
||||
info->operandSize = 1;
|
||||
break;
|
||||
|
||||
case 0x0FB7: // movzx on short
|
||||
info->zeroExtend = true;
|
||||
info->operandSize = 2;
|
||||
break;
|
||||
|
||||
case 0x0FBE: // movsx on byte
|
||||
info->signExtend = true;
|
||||
info->operandSize = 1;
|
||||
break;
|
||||
|
||||
case 0x0FBF: // movsx on short
|
||||
info->signExtend = true;
|
||||
info->operandSize = 2;
|
||||
break;
|
||||
|
||||
case 0x0F38F0: // movbe read
|
||||
info->byteSwap = true;
|
||||
break;
|
||||
|
||||
case 0x0F38F1: // movbe write
|
||||
info->byteSwap = true;
|
||||
info->isMemoryWrite = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
|
@ -17,6 +17,7 @@ struct InstructionInfo
|
|||
bool signExtend;
|
||||
bool hasImmediate;
|
||||
bool isMemoryWrite;
|
||||
bool byteSwap;
|
||||
u64 immediate;
|
||||
s32 displacement;
|
||||
};
|
||||
|
|
|
@ -368,8 +368,7 @@ void Jit64::stX(UGeckoInstruction inst)
|
|||
// Fast and daring - requires 64-bit
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
gpr.BindToRegister(a, true, false);
|
||||
BSWAP(32, EAX);
|
||||
MOV(accessSize, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), R(EAX));
|
||||
SwapAndStore(32, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), EAX);
|
||||
return;
|
||||
}
|
||||
#endif*/
|
||||
|
|
|
@ -404,8 +404,7 @@ void CommonAsmRoutines::GenQuantizedLoads()
|
|||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
} else {
|
||||
#if _M_X86_64
|
||||
MOV(32, R(RCX), MComplex(RBX, RCX, 1, 0));
|
||||
BSWAP(32, RCX);
|
||||
LoadAndSwap(32, RCX, MComplex(RBX, RCX, 1, 0));
|
||||
MOVD_xmm(XMM0, R(RCX));
|
||||
UNPCKLPS(XMM0, M((void*)m_one));
|
||||
#else
|
||||
|
|
|
@ -187,6 +187,12 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
if (info.byteSwap && info.instructionSize < 5)
|
||||
{
|
||||
PanicAlert("BackPatch: MOVBE is too small");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
auto it = registersInUseAtLoc.find(codePtr);
|
||||
if (it == registersInUseAtLoc.end())
|
||||
{
|
||||
|
@ -200,8 +206,11 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
|
|||
{
|
||||
XEmitter emitter(codePtr);
|
||||
int bswapNopCount;
|
||||
if (info.byteSwap)
|
||||
// MOVBE -> no BSWAP following
|
||||
bswapNopCount = 0;
|
||||
// Check the following BSWAP for REX byte
|
||||
if ((codePtr[info.instructionSize] & 0xF0) == 0x40)
|
||||
else if ((codePtr[info.instructionSize] & 0xF0) == 0x40)
|
||||
bswapNopCount = 3;
|
||||
else
|
||||
bswapNopCount = 2;
|
||||
|
@ -214,29 +223,38 @@ const u8 *Jitx86Base::BackPatch(u8 *codePtr, u32 emAddress, void *ctx_void)
|
|||
else
|
||||
{
|
||||
// TODO: special case FIFO writes. Also, support 32-bit mode.
|
||||
// We entered here with a BSWAP-ed register. We'll have to swap it back.
|
||||
u64 *ptr = ContextRN(ctx, info.regOperandReg);
|
||||
int bswapSize = 0;
|
||||
switch (info.operandSize)
|
||||
{
|
||||
case 1:
|
||||
bswapSize = 0;
|
||||
break;
|
||||
case 2:
|
||||
bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0);
|
||||
*ptr = Common::swap16((u16) *ptr);
|
||||
break;
|
||||
case 4:
|
||||
bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0);
|
||||
*ptr = Common::swap32((u32) *ptr);
|
||||
break;
|
||||
case 8:
|
||||
bswapSize = 3;
|
||||
*ptr = Common::swap64(*ptr);
|
||||
break;
|
||||
}
|
||||
|
||||
u8 *start = codePtr - bswapSize;
|
||||
u8 *start;
|
||||
if (info.byteSwap)
|
||||
{
|
||||
// The instruction is a MOVBE but it failed so the value is still in little-endian byte order.
|
||||
start = codePtr;
|
||||
}
|
||||
else
|
||||
{
|
||||
// We entered here with a BSWAP-ed register. We'll have to swap it back.
|
||||
u64 *ptr = ContextRN(ctx, info.regOperandReg);
|
||||
int bswapSize = 0;
|
||||
switch (info.operandSize)
|
||||
{
|
||||
case 1:
|
||||
bswapSize = 0;
|
||||
break;
|
||||
case 2:
|
||||
bswapSize = 4 + (info.regOperandReg >= 8 ? 1 : 0);
|
||||
*ptr = Common::swap16((u16) *ptr);
|
||||
break;
|
||||
case 4:
|
||||
bswapSize = 2 + (info.regOperandReg >= 8 ? 1 : 0);
|
||||
*ptr = Common::swap32((u32) *ptr);
|
||||
break;
|
||||
case 8:
|
||||
bswapSize = 3;
|
||||
*ptr = Common::swap64(*ptr);
|
||||
break;
|
||||
}
|
||||
start = codePtr - bswapSize;
|
||||
}
|
||||
XEmitter emitter(start);
|
||||
const u8 *trampoline = trampolines.GetWriteTrampoline(info, registersInUse);
|
||||
emitter.CALL((void *)trampoline);
|
||||
|
|
|
@ -426,11 +426,21 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acc
|
|||
if (accessSize == 8 && reg_value >= 4) {
|
||||
PanicAlert("WARNING: likely incorrect use of UnsafeWriteRegToReg!");
|
||||
}
|
||||
if (swap) BSWAP(accessSize, reg_value);
|
||||
#if _M_X86_64
|
||||
result = GetWritableCodePtr();
|
||||
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value));
|
||||
if (swap)
|
||||
{
|
||||
SwapAndStore(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), reg_value);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(accessSize, MComplex(RBX, reg_addr, SCALE_1, offset), R(reg_value));
|
||||
}
|
||||
#else
|
||||
if (swap)
|
||||
{
|
||||
BSWAP(accessSize, reg_value);
|
||||
}
|
||||
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
|
||||
result = GetWritableCodePtr();
|
||||
MOV(accessSize, MDisp(reg_addr, (u32)Memory::base + offset), R(reg_value));
|
||||
|
@ -502,6 +512,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
|
|||
|
||||
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 registersInUse, int flags)
|
||||
{
|
||||
// FIXME
|
||||
if (false && cpu_info.bSSSE3) {
|
||||
// This path should be faster but for some reason it causes errors so I've disabled it.
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
|
@ -516,8 +527,7 @@ void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 re
|
|||
TEST(32, R(reg_addr), Imm32(mem_mask));
|
||||
FixupBranch argh = J_CC(CC_Z);
|
||||
MOVSS(M(&float_buffer), xmm_value);
|
||||
MOV(32, R(EAX), M(&float_buffer));
|
||||
BSWAP(32, EAX);
|
||||
LoadAndSwap(32, EAX, M(&float_buffer));
|
||||
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, false);
|
||||
ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, reg_addr);
|
||||
|
|
Loading…
Reference in New Issue