Merge pull request #2887 from Tilka/swap
Jit64: some byte-swapping changes
This commit is contained in:
commit
99e88a7af7
|
@ -887,21 +887,46 @@ void XEmitter::WriteMOVBE(int bits, u8 op, X64Reg reg, const OpArg& arg)
|
||||||
void XEmitter::MOVBE(int bits, X64Reg dest, const OpArg& src) {WriteMOVBE(bits, 0xF0, dest, src);}
|
void XEmitter::MOVBE(int bits, X64Reg dest, const OpArg& src) {WriteMOVBE(bits, 0xF0, dest, src);}
|
||||||
void XEmitter::MOVBE(int bits, const OpArg& dest, X64Reg src) {WriteMOVBE(bits, 0xF1, src, dest);}
|
void XEmitter::MOVBE(int bits, const OpArg& dest, X64Reg src) {WriteMOVBE(bits, 0xF1, src, dest);}
|
||||||
|
|
||||||
void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src)
|
void XEmitter::LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend)
|
||||||
{
|
{
|
||||||
if (cpu_info.bMOVBE)
|
switch (size)
|
||||||
{
|
{
|
||||||
MOVBE(size, dst, src);
|
case 8:
|
||||||
}
|
if (sign_extend)
|
||||||
else
|
MOVSX(32, 8, dst, src);
|
||||||
{
|
else
|
||||||
MOV(size, R(dst), src);
|
MOVZX(32, 8, dst, src);
|
||||||
BSWAP(size, dst);
|
break;
|
||||||
|
case 16:
|
||||||
|
MOVZX(32, 16, dst, src);
|
||||||
|
if (sign_extend)
|
||||||
|
{
|
||||||
|
BSWAP(32, dst);
|
||||||
|
SAR(32, R(dst), Imm8(16));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
ROL(16, R(dst), Imm8(8));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
case 64:
|
||||||
|
if (cpu_info.bMOVBE)
|
||||||
|
{
|
||||||
|
MOVBE(size, dst, src);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MOV(size, R(dst), src);
|
||||||
|
BSWAP(size, dst);
|
||||||
|
}
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void XEmitter::SwapAndStore(int size, const OpArg& dst, X64Reg src)
|
u8* XEmitter::SwapAndStore(int size, const OpArg& dst, X64Reg src)
|
||||||
{
|
{
|
||||||
|
u8* mov_location = GetWritableCodePtr();
|
||||||
if (cpu_info.bMOVBE)
|
if (cpu_info.bMOVBE)
|
||||||
{
|
{
|
||||||
MOVBE(size, dst, src);
|
MOVBE(size, dst, src);
|
||||||
|
@ -909,8 +934,10 @@ void XEmitter::SwapAndStore(int size, const OpArg& dst, X64Reg src)
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
BSWAP(size, src);
|
BSWAP(size, src);
|
||||||
|
mov_location = GetWritableCodePtr();
|
||||||
MOV(size, dst, R(src));
|
MOV(size, dst, R(src));
|
||||||
}
|
}
|
||||||
|
return mov_location;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -480,8 +480,8 @@ public:
|
||||||
// Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE.
|
// Available only on Atom or >= Haswell so far. Test with cpu_info.bMOVBE.
|
||||||
void MOVBE(int bits, X64Reg dest, const OpArg& src);
|
void MOVBE(int bits, X64Reg dest, const OpArg& src);
|
||||||
void MOVBE(int bits, const OpArg& dest, X64Reg src);
|
void MOVBE(int bits, const OpArg& dest, X64Reg src);
|
||||||
void LoadAndSwap(int size, X64Reg dst, const OpArg& src);
|
void LoadAndSwap(int size, X64Reg dst, const OpArg& src, bool sign_extend = false);
|
||||||
void SwapAndStore(int size, const OpArg& dst, X64Reg src);
|
u8* SwapAndStore(int size, const OpArg& dst, X64Reg src);
|
||||||
|
|
||||||
// Available only on AMD >= Phenom or Intel >= Haswell
|
// Available only on AMD >= Phenom or Intel >= Haswell
|
||||||
void LZCNT(int bits, X64Reg dest, const OpArg& src);
|
void LZCNT(int bits, X64Reg dest, const OpArg& src);
|
||||||
|
|
|
@ -86,33 +86,35 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
||||||
|
|
||||||
// Compute the start and length of the memory operation, including
|
// Compute the start and length of the memory operation, including
|
||||||
// any byteswapping.
|
// any byteswapping.
|
||||||
int totalSize;
|
int totalSize = info.instructionSize;
|
||||||
u8 *start = codePtr;
|
u8 *start = codePtr;
|
||||||
if (!info.isMemoryWrite)
|
if (!info.isMemoryWrite)
|
||||||
{
|
{
|
||||||
int bswapNopCount;
|
// MOVBE and single bytes don't need to be swapped.
|
||||||
if (info.byteSwap || info.operandSize == 1)
|
if (!info.byteSwap && info.operandSize > 1)
|
||||||
bswapNopCount = 0;
|
|
||||||
// Check the following BSWAP for REX byte
|
|
||||||
else if ((codePtr[info.instructionSize] & 0xF0) == 0x40)
|
|
||||||
bswapNopCount = 3;
|
|
||||||
else
|
|
||||||
bswapNopCount = 2;
|
|
||||||
|
|
||||||
totalSize = info.instructionSize + bswapNopCount;
|
|
||||||
if (info.operandSize == 2 && !info.byteSwap)
|
|
||||||
{
|
{
|
||||||
|
// REX
|
||||||
if ((codePtr[totalSize] & 0xF0) == 0x40)
|
if ((codePtr[totalSize] & 0xF0) == 0x40)
|
||||||
|
totalSize++;
|
||||||
|
|
||||||
|
// BSWAP
|
||||||
|
if (codePtr[totalSize] == 0x0F && (codePtr[totalSize + 1] & 0xF8) == 0xC8)
|
||||||
|
totalSize += 2;
|
||||||
|
|
||||||
|
if (info.operandSize == 2)
|
||||||
{
|
{
|
||||||
++totalSize;
|
// operand size override
|
||||||
|
if (codePtr[totalSize] == 0x66)
|
||||||
|
totalSize++;
|
||||||
|
// REX
|
||||||
|
if ((codePtr[totalSize] & 0xF0) == 0x40)
|
||||||
|
totalSize++;
|
||||||
|
// SAR/ROL
|
||||||
|
_assert_(codePtr[totalSize] == 0xC1 && (codePtr[totalSize + 2] == 0x10 ||
|
||||||
|
codePtr[totalSize + 2] == 0x08));
|
||||||
|
info.signExtend = (codePtr[totalSize + 1] & 0x10) != 0;
|
||||||
|
totalSize += 3;
|
||||||
}
|
}
|
||||||
if (codePtr[totalSize] != 0xc1 || codePtr[totalSize + 2] != 0x10)
|
|
||||||
{
|
|
||||||
PanicAlert("BackPatch: didn't find expected shift %p", codePtr);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
info.signExtend = (codePtr[totalSize + 1] & 0x10) != 0;
|
|
||||||
totalSize += 3;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -120,7 +122,6 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
||||||
if (info.byteSwap || info.hasImmediate)
|
if (info.byteSwap || info.hasImmediate)
|
||||||
{
|
{
|
||||||
// The instruction is a MOVBE but it failed so the value is still in little-endian byte order.
|
// The instruction is a MOVBE but it failed so the value is still in little-endian byte order.
|
||||||
totalSize = info.instructionSize;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -146,7 +147,7 @@ bool Jitx86Base::BackPatch(u32 emAddress, SContext* ctx)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
start = codePtr - bswapSize;
|
start = codePtr - bswapSize;
|
||||||
totalSize = info.instructionSize + bswapSize;
|
totalSize += bswapSize;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,24 +24,8 @@ void EmuCodeBlock::MemoryExceptionCheck()
|
||||||
|
|
||||||
void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
|
void EmuCodeBlock::UnsafeLoadRegToReg(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
|
||||||
{
|
{
|
||||||
MOVZX(32, accessSize, reg_value, MComplex(RMEM, reg_addr, SCALE_1, offset));
|
OpArg src = MComplex(RMEM, reg_addr, SCALE_1, offset);
|
||||||
if (accessSize == 32)
|
LoadAndSwap(accessSize, reg_value, src, signExtend);
|
||||||
{
|
|
||||||
BSWAP(32, reg_value);
|
|
||||||
}
|
|
||||||
else if (accessSize == 16)
|
|
||||||
{
|
|
||||||
BSWAP(32, reg_value);
|
|
||||||
if (signExtend)
|
|
||||||
SAR(32, R(reg_value), Imm8(16));
|
|
||||||
else
|
|
||||||
SHR(32, R(reg_value), Imm8(16));
|
|
||||||
}
|
|
||||||
else if (signExtend)
|
|
||||||
{
|
|
||||||
// TODO: bake 8-bit into the original load.
|
|
||||||
MOVSX(32, accessSize, reg_value, R(reg_value));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
|
void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, int accessSize, s32 offset, bool signExtend)
|
||||||
|
@ -84,34 +68,7 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessS
|
||||||
}
|
}
|
||||||
|
|
||||||
result = GetWritableCodePtr();
|
result = GetWritableCodePtr();
|
||||||
if (accessSize == 8 && signExtend)
|
LoadAndSwap(accessSize, reg_value, memOperand, signExtend);
|
||||||
MOVSX(32, accessSize, reg_value, memOperand);
|
|
||||||
else
|
|
||||||
MOVZX(64, accessSize, reg_value, memOperand);
|
|
||||||
|
|
||||||
switch (accessSize)
|
|
||||||
{
|
|
||||||
case 8:
|
|
||||||
_dbg_assert_(DYNA_REC, BACKPATCH_SIZE - (GetCodePtr() - result <= 0));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 16:
|
|
||||||
BSWAP(32, reg_value);
|
|
||||||
if (signExtend)
|
|
||||||
SAR(32, R(reg_value), Imm8(16));
|
|
||||||
else
|
|
||||||
SHR(32, R(reg_value), Imm8(16));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 32:
|
|
||||||
BSWAP(32, reg_value);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 64:
|
|
||||||
BSWAP(64, reg_value);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -415,17 +372,7 @@ u8 *EmuCodeBlock::UnsafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acce
|
||||||
}
|
}
|
||||||
else if (swap)
|
else if (swap)
|
||||||
{
|
{
|
||||||
if (cpu_info.bMOVBE)
|
result = SwapAndStore(accessSize, dest, reg_value.GetSimpleReg());
|
||||||
{
|
|
||||||
MOVBE(accessSize, dest, reg_value.GetSimpleReg());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (accessSize > 8)
|
|
||||||
BSWAP(accessSize, reg_value.GetSimpleReg());
|
|
||||||
result = GetWritableCodePtr();
|
|
||||||
MOV(accessSize, dest, reg_value);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -53,21 +53,12 @@ OpArg VertexLoaderX64::GetVertexAddr(int array, u64 attribute)
|
||||||
OpArg data = MDisp(src_reg, m_src_ofs);
|
OpArg data = MDisp(src_reg, m_src_ofs);
|
||||||
if (attribute & MASK_INDEXED)
|
if (attribute & MASK_INDEXED)
|
||||||
{
|
{
|
||||||
if (attribute == INDEX8)
|
int bits = attribute == INDEX8 ? 8 : 16;
|
||||||
{
|
LoadAndSwap(bits, scratch1, data);
|
||||||
MOVZX(64, 8, scratch1, data);
|
m_src_ofs += bits / 8;
|
||||||
m_src_ofs += 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOV(16, R(scratch1), data);
|
|
||||||
m_src_ofs += 2;
|
|
||||||
BSWAP(16, scratch1);
|
|
||||||
MOVZX(64, 16, scratch1, R(scratch1));
|
|
||||||
}
|
|
||||||
if (array == ARRAY_POSITION)
|
if (array == ARRAY_POSITION)
|
||||||
{
|
{
|
||||||
CMP(attribute == INDEX8 ? 8 : 16, R(scratch1), Imm8(-1));
|
CMP(bits, R(scratch1), Imm8(-1));
|
||||||
m_skip_vertex = J_CC(CC_E, true);
|
m_skip_vertex = J_CC(CC_E, true);
|
||||||
}
|
}
|
||||||
IMUL(32, scratch1, MPIC(&g_main_cp_state.array_strides[array]));
|
IMUL(32, scratch1, MPIC(&g_main_cp_state.array_strides[array]));
|
||||||
|
|
Loading…
Reference in New Issue