x64Emitter: add BMI1/BMI2 support

TZCNT and LZCNT use a completely different encoding scheme, so they should
probably go in a separate patch.

Also add some tests.
This commit is contained in:
Fiora 2014-08-24 17:39:30 -07:00
parent b583879c2a
commit 5088a2b4e2
3 changed files with 203 additions and 25 deletions

View File

@ -152,22 +152,13 @@ void OpArg::WriteRex(XEmitter *emit, int opBits, int bits, int customOp) const
}
}
void OpArg::WriteVex(XEmitter* emit, int size, bool packed, X64Reg regOp1, X64Reg regOp2) const
void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W) const
{
int R = !(regOp1 & 8);
int X = !(indexReg & 8);
int B = !(offsetOrBaseReg & 8);
// not so sure about this one...
int W = 0;
// aka map_select in AMD manuals
// only support VEX opcode map 1 for now (analog to secondary opcode map)
int mmmmm = 1;
int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf);
int L = size == 256;
int pp = (packed << 1) | (size == 64);
// do we need any VEX fields that only appear in the three-byte form?
if (X == 1 && B == 1 && W == 0 && mmmmm == 1)
@ -189,7 +180,7 @@ void OpArg::WriteVex(XEmitter* emit, int size, bool packed, X64Reg regOp1, X64Re
void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg,
bool warn_64bit_offset) const
{
if (_operandReg == 0xff)
if (_operandReg == INVALID_REG)
_operandReg = (X64Reg)this->operandReg;
int mod = 0;
int ireg = indexReg;
@ -878,7 +869,7 @@ void XEmitter::LEA(int bits, X64Reg dest, OpArg src)
if (bits == 16) Write8(0x66); //TODO: performance warning
src.WriteRex(this, bits, bits);
Write8(0x8D);
src.WriteRest(this, 0, (X64Reg)0xFF, bits == 64);
src.WriteRest(this, 0, INVALID_REG, bits == 64);
}
//shift can be either imm8 or cl
@ -1284,11 +1275,7 @@ void XEmitter::WriteSSEOp(int size, u16 sseOp, bool packed, X64Reg regOp, OpArg
arg.WriteRex(this, 0, 0);
Write8(0x0F);
if (sseOp > 0xFF)
{
// Currently, only 0x38 and 0x3A are used as secondary escape byte.
_assert_msg_(DYNA_REC, ((sseOp >> 8) & 0xFD) == 0x38, "Invalid SSE opcode: 0F%04X", sseOp);
Write8((sseOp >> 8) & 0xFF);
}
Write8(sseOp & 0xFF);
arg.WriteRest(this, extrabytes);
}
@ -1302,17 +1289,59 @@ void XEmitter::WriteAVXOp(int size, u16 sseOp, bool packed, X64Reg regOp1, X64Re
{
if (!cpu_info.bAVX)
PanicAlert("Trying to use AVX on a system that doesn't support it. Bad programmer.");
arg.WriteVex(this, size, packed, regOp1, regOp2);
if (sseOp > 0xFF)
{
// Currently, only 0x38 and 0x3A are used as secondary escape byte.
_assert_msg_(DYNA_REC, ((sseOp >> 8) & 0xFD) == 0x38, "Invalid SSE opcode: 0F%04X", sseOp);
Write8((sseOp >> 8) & 0xFF);
}
// Currently, only 0x38 and 0x3A are used as secondary escape byte.
int mmmmm;
if ((sseOp >> 8) == 0x3A)
mmmmm = 3;
else if ((sseOp >> 8) == 0x38)
mmmmm = 2;
else
mmmmm = 1;
// FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size here
arg.WriteVex(this, regOp1, regOp2, 0, (packed << 1) | (size == 64), mmmmm);
Write8(sseOp & 0xFF);
arg.WriteRest(this, extrabytes, regOp1);
}
// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2
void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
{
if (size != 32 && size != 64)
PanicAlert("VEX GPR instructions only support 32-bit and 64-bit modes!");
int mmmmm, pp;
if ((op >> 8) == 0x3A)
mmmmm = 3;
else if ((op >> 8) == 0x38)
mmmmm = 2;
else
mmmmm = 1;
if (opPrefix == 0x66)
pp = 1;
else if (opPrefix == 0xF3)
pp = 2;
else if (opPrefix == 0xF2)
pp = 3;
else
pp = 0;
arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm, size == 64);
Write8(op & 0xFF);
arg.WriteRest(this, extrabytes, regOp1);
}
void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
{
if (!cpu_info.bBMI1)
PanicAlert("Trying to use BMI1 on a system that doesn't support it. Bad programmer.");
WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
}
void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes)
{
if (!cpu_info.bBMI2)
PanicAlert("Trying to use BMI2 on a system that doesn't support it. Bad programmer.");
WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes);
}
void XEmitter::MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x6E, true, dest, arg, 0);}
void XEmitter::MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(64, 0x7E, true, src, arg, 0);}
@ -1663,6 +1692,20 @@ void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64,
void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseAND, false, regOp1, regOp2, arg);}
void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseANDN, false, regOp1, regOp2, arg);}
void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);}
void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);}
void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);}
void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);}
void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);}
void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);}
void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);}
void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);}
void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);}
void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);}
void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);}
void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);}
void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);}
// Prefixes
void XEmitter::LOCK() { Write8(0xF0); }

View File

@ -127,8 +127,8 @@ struct OpArg
offset = _offset;
}
void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const;
void WriteVex(XEmitter* emit, int size, bool packed, X64Reg regOp1, X64Reg regOp2) const;
void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=(X64Reg)0xFF, bool warn_64bit_offset = true) const;
void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W = 0) const;
void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=INVALID_REG, bool warn_64bit_offset = true) const;
void WriteFloatModRM(XEmitter *emit, FloatOp op);
void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits);
// This one is public - must be written to
@ -275,6 +275,9 @@ private:
void WriteSSE41Op(int size, u16 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0);
void WriteAVXOp(int size, u16 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0);
void WriteAVXOp(int size, u16 sseOp, bool packed, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0);
void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg);
void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2);
@ -708,6 +711,21 @@ public:
void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg);
void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg);
// VEX GPR instructions
void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate);
void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void BLSR(int bits, X64Reg regOp, OpArg arg);
void BLSMSK(int bits, X64Reg regOp, OpArg arg);
void BLSI(int bits, X64Reg regOp, OpArg arg);
void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2);
void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg);
void RDTSC();
// Utility functions

View File

@ -833,4 +833,121 @@ TWO_OP_SSE_TEST(PMOVZXDQ, "qword")
// TODO: AVX
// for VEX GPR instructions that take the form op reg, r/m, reg
#define VEX_RMR_TEST(Name) \
TEST_F(x64EmitterTest, Name) \
{ \
struct { \
int bits; \
std::vector<NamedReg> regs; \
std::string out_name; \
std::string size; \
} regsets[] = { \
{ 32, reg32names, "eax", "dword" }, \
{ 64, reg64names, "rax", "qword" }, \
}; \
for (const auto& regset : regsets) \
for (const auto& r : regset.regs) \
{ \
emitter->Name(regset.bits, r.reg, R(RAX), RAX); \
emitter->Name(regset.bits, RAX, R(r.reg), RAX); \
emitter->Name(regset.bits, RAX, MatR(R12), r.reg); \
ExpectDisassembly(#Name " " + r.name + ", " + regset.out_name + ", " + regset.out_name + " " \
#Name " " + regset.out_name + ", " + r.name + ", " + regset.out_name + " " \
#Name " " + regset.out_name + ", " + regset.size + " ptr ds:[r12], " + r.name + " "); \
} \
}
VEX_RMR_TEST(SHRX)
VEX_RMR_TEST(SARX)
VEX_RMR_TEST(SHLX)
VEX_RMR_TEST(BEXTR)
VEX_RMR_TEST(BZHI)
// for VEX GPR instructions that take the form op reg, reg, r/m
#define VEX_RRM_TEST(Name) \
TEST_F(x64EmitterTest, Name) \
{ \
struct { \
int bits; \
std::vector<NamedReg> regs; \
std::string out_name; \
std::string size; \
} regsets[] = { \
{ 32, reg32names, "eax", "dword" }, \
{ 64, reg64names, "rax", "qword" }, \
}; \
for (const auto& regset : regsets) \
for (const auto& r : regset.regs) \
{ \
emitter->Name(regset.bits, r.reg, RAX, R(RAX)); \
emitter->Name(regset.bits, RAX, RAX, R(r.reg)); \
emitter->Name(regset.bits, RAX, r.reg, MatR(R12)); \
ExpectDisassembly(#Name " " + r.name+ ", " + regset.out_name + ", " + regset.out_name + " " \
#Name " " + regset.out_name + ", " + regset.out_name + ", " + r.name + " " \
#Name " " + regset.out_name + ", " + r.name + ", " + regset.size + " ptr ds:[r12] "); \
} \
}
VEX_RRM_TEST(PEXT)
VEX_RRM_TEST(PDEP)
VEX_RRM_TEST(MULX)
VEX_RRM_TEST(ANDN)
// for VEX GPR instructions that take the form op reg, r/m
#define VEX_RM_TEST(Name) \
TEST_F(x64EmitterTest, Name) \
{ \
struct { \
int bits; \
std::vector<NamedReg> regs; \
std::string out_name; \
std::string size; \
} regsets[] = { \
{ 32, reg32names, "eax", "dword" }, \
{ 64, reg64names, "rax", "qword" }, \
}; \
for (const auto& regset : regsets) \
for (const auto& r : regset.regs) \
{ \
emitter->Name(regset.bits, r.reg, R(RAX)); \
emitter->Name(regset.bits, RAX, R(r.reg)); \
emitter->Name(regset.bits, r.reg, MatR(R12)); \
ExpectDisassembly(#Name " " + r.name+ ", " + regset.out_name + " " \
#Name " " + regset.out_name + ", " + r.name + " " \
#Name " " + r.name + ", " + regset.size + " ptr ds:[r12] "); \
} \
}
VEX_RM_TEST(BLSR)
VEX_RM_TEST(BLSMSK)
VEX_RM_TEST(BLSI)
// for VEX GPR instructions that take the form op reg, r/m, imm
#define VEX_RMI_TEST(Name) \
TEST_F(x64EmitterTest, Name) \
{ \
struct { \
int bits; \
std::vector<NamedReg> regs; \
std::string out_name; \
std::string size; \
} regsets[] = { \
{ 32, reg32names, "eax", "dword" }, \
{ 64, reg64names, "rax", "qword" }, \
}; \
for (const auto& regset : regsets) \
for (const auto& r : regset.regs) \
{ \
emitter->Name(regset.bits, r.reg, R(RAX), 4); \
emitter->Name(regset.bits, RAX, R(r.reg), 4); \
emitter->Name(regset.bits, r.reg, MatR(R12), 4); \
ExpectDisassembly(#Name " " + r.name+ ", " + regset.out_name + ", 0x04 " \
#Name " " + regset.out_name + ", " + r.name + ", 0x04 " \
#Name " " + r.name + ", " + regset.size + " ptr ds:[r12], 0x04 "); \
} \
}
VEX_RMI_TEST(RORX)
} // namespace Gen