From 5088a2b4e24527ddf0280ffbc935b4ee8605f326 Mon Sep 17 00:00:00 2001 From: Fiora Date: Sun, 24 Aug 2014 17:39:30 -0700 Subject: [PATCH] x64Emitter: add BMI1/BMI2 support TZCNT and LZCNT use a completely different encoding scheme, so they should probably go in a separate patch. Also add some tests. --- Source/Core/Common/x64Emitter.cpp | 89 ++++++++++++---- Source/Core/Common/x64Emitter.h | 22 +++- Source/UnitTests/Common/x64EmitterTest.cpp | 117 +++++++++++++++++++++ 3 files changed, 203 insertions(+), 25 deletions(-) diff --git a/Source/Core/Common/x64Emitter.cpp b/Source/Core/Common/x64Emitter.cpp index 4371763060..da8363d314 100644 --- a/Source/Core/Common/x64Emitter.cpp +++ b/Source/Core/Common/x64Emitter.cpp @@ -152,22 +152,13 @@ void OpArg::WriteRex(XEmitter *emit, int opBits, int bits, int customOp) const } } -void OpArg::WriteVex(XEmitter* emit, int size, bool packed, X64Reg regOp1, X64Reg regOp2) const +void OpArg::WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W) const { int R = !(regOp1 & 8); int X = !(indexReg & 8); int B = !(offsetOrBaseReg & 8); - // not so sure about this one... - int W = 0; - - // aka map_select in AMD manuals - // only support VEX opcode map 1 for now (analog to secondary opcode map) - int mmmmm = 1; - int vvvv = (regOp2 == X64Reg::INVALID_REG) ? 0xf : (regOp2 ^ 0xf); - int L = size == 256; - int pp = (packed << 1) | (size == 64); // do we need any VEX fields that only appear in the three-byte form? if (X == 1 && B == 1 && W == 0 && mmmmm == 1) @@ -189,7 +180,7 @@ void OpArg::WriteVex(XEmitter* emit, int size, bool packed, X64Reg regOp1, X64Re void OpArg::WriteRest(XEmitter *emit, int extraBytes, X64Reg _operandReg, bool warn_64bit_offset) const { - if (_operandReg == 0xff) + if (_operandReg == INVALID_REG) _operandReg = (X64Reg)this->operandReg; int mod = 0; int ireg = indexReg; @@ -878,7 +869,7 @@ void XEmitter::LEA(int bits, X64Reg dest, OpArg src) if (bits == 16) Write8(0x66); //TODO: performance warning src.WriteRex(this, bits, bits); Write8(0x8D); - src.WriteRest(this, 0, (X64Reg)0xFF, bits == 64); + src.WriteRest(this, 0, INVALID_REG, bits == 64); } //shift can be either imm8 or cl @@ -1284,11 +1275,7 @@ void XEmitter::WriteSSEOp(int size, u16 sseOp, bool packed, X64Reg regOp, OpArg arg.WriteRex(this, 0, 0); Write8(0x0F); if (sseOp > 0xFF) - { - // Currently, only 0x38 and 0x3A are used as secondary escape byte. - _assert_msg_(DYNA_REC, ((sseOp >> 8) & 0xFD) == 0x38, "Invalid SSE opcode: 0F%04X", sseOp); Write8((sseOp >> 8) & 0xFF); - } Write8(sseOp & 0xFF); arg.WriteRest(this, extrabytes); } @@ -1302,17 +1289,59 @@ void XEmitter::WriteAVXOp(int size, u16 sseOp, bool packed, X64Reg regOp1, X64Re { if (!cpu_info.bAVX) PanicAlert("Trying to use AVX on a system that doesn't support it. Bad programmer."); - arg.WriteVex(this, size, packed, regOp1, regOp2); - if (sseOp > 0xFF) - { - // Currently, only 0x38 and 0x3A are used as secondary escape byte. - _assert_msg_(DYNA_REC, ((sseOp >> 8) & 0xFD) == 0x38, "Invalid SSE opcode: 0F%04X", sseOp); - Write8((sseOp >> 8) & 0xFF); - } + // Currently, only 0x38 and 0x3A are used as secondary escape byte. + int mmmmm; + if ((sseOp >> 8) == 0x3A) + mmmmm = 3; + else if ((sseOp >> 8) == 0x38) + mmmmm = 2; + else + mmmmm = 1; + // FIXME: we currently don't support 256-bit instructions, and "size" is not the vector size here + arg.WriteVex(this, regOp1, regOp2, 0, (packed << 1) | (size == 64), mmmmm); Write8(sseOp & 0xFF); arg.WriteRest(this, extrabytes, regOp1); } +// Like the above, but more general; covers GPR-based VEX operations, like BMI1/2 +void XEmitter::WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +{ + if (size != 32 && size != 64) + PanicAlert("VEX GPR instructions only support 32-bit and 64-bit modes!"); + int mmmmm, pp; + if ((op >> 8) == 0x3A) + mmmmm = 3; + else if ((op >> 8) == 0x38) + mmmmm = 2; + else + mmmmm = 1; + if (opPrefix == 0x66) + pp = 1; + else if (opPrefix == 0xF3) + pp = 2; + else if (opPrefix == 0xF2) + pp = 3; + else + pp = 0; + arg.WriteVex(this, regOp1, regOp2, 0, pp, mmmmm, size == 64); + Write8(op & 0xFF); + arg.WriteRest(this, extrabytes, regOp1); +} + +void XEmitter::WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +{ + if (!cpu_info.bBMI1) + PanicAlert("Trying to use BMI1 on a system that doesn't support it. Bad programmer."); + WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); +} + +void XEmitter::WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes) +{ + if (!cpu_info.bBMI2) + PanicAlert("Trying to use BMI2 on a system that doesn't support it. Bad programmer."); + WriteVEXOp(size, opPrefix, op, regOp1, regOp2, arg, extrabytes); +} + void XEmitter::MOVD_xmm(X64Reg dest, const OpArg &arg) {WriteSSEOp(64, 0x6E, true, dest, arg, 0);} void XEmitter::MOVD_xmm(const OpArg &arg, X64Reg src) {WriteSSEOp(64, 0x7E, true, src, arg, 0);} @@ -1663,6 +1692,20 @@ void XEmitter::VSQRTSD(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, void XEmitter::VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseAND, false, regOp1, regOp2, arg);} void XEmitter::VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteAVXOp(64, sseANDN, false, regOp1, regOp2, arg);} +void XEmitter::SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF3, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x66, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0xF2, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate) {WriteBMI2Op(bits, 0xF2, 0x3AF0, regOp, INVALID_REG, arg, 1); Write8(rotate);} +void XEmitter::PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF3, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI2Op(bits, 0xF2, 0x38F6, regOp2, regOp1, arg);} +void XEmitter::BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2) {WriteBMI2Op(bits, 0x00, 0x38F5, regOp1, regOp2, arg);} +void XEmitter::BLSR(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x1, regOp, arg);} +void XEmitter::BLSMSK(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x2, regOp, arg);} +void XEmitter::BLSI(int bits, X64Reg regOp, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F3, (X64Reg)0x3, regOp, arg);} +void XEmitter::BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2){WriteBMI1Op(bits, 0x00, 0x38F7, regOp1, regOp2, arg);} +void XEmitter::ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg) {WriteBMI1Op(bits, 0x00, 0x38F2, regOp1, regOp2, arg);} + // Prefixes void XEmitter::LOCK() { Write8(0xF0); } diff --git a/Source/Core/Common/x64Emitter.h b/Source/Core/Common/x64Emitter.h index 58dc313419..558af41767 100644 --- a/Source/Core/Common/x64Emitter.h +++ b/Source/Core/Common/x64Emitter.h @@ -127,8 +127,8 @@ struct OpArg offset = _offset; } void WriteRex(XEmitter *emit, int opBits, int bits, int customOp = -1) const; - void WriteVex(XEmitter* emit, int size, bool packed, X64Reg regOp1, X64Reg regOp2) const; - void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=(X64Reg)0xFF, bool warn_64bit_offset = true) const; + void WriteVex(XEmitter* emit, X64Reg regOp1, X64Reg regOp2, int L, int pp, int mmmmm, int W = 0) const; + void WriteRest(XEmitter *emit, int extraBytes=0, X64Reg operandReg=INVALID_REG, bool warn_64bit_offset = true) const; void WriteFloatModRM(XEmitter *emit, FloatOp op); void WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg operandReg, int bits); // This one is public - must be written to @@ -275,6 +275,9 @@ private: void WriteSSE41Op(int size, u16 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0); void WriteAVXOp(int size, u16 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0); void WriteAVXOp(int size, u16 sseOp, bool packed, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); + void WriteVEXOp(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); + void WriteBMI1Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); + void WriteBMI2Op(int size, u8 opPrefix, u16 op, X64Reg regOp1, X64Reg regOp2, OpArg arg, int extrabytes = 0); void WriteFloatLoadStore(int bits, FloatOp op, FloatOp op_80b, OpArg arg); void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); @@ -708,6 +711,21 @@ public: void VPAND(X64Reg regOp1, X64Reg regOp2, OpArg arg); void VPANDN(X64Reg regOp1, X64Reg regOp2, OpArg arg); + // VEX GPR instructions + void SARX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void SHLX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void SHRX(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void RORX(int bits, X64Reg regOp, OpArg arg, u8 rotate); + void PEXT(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void PDEP(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void MULX(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void BZHI(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void BLSR(int bits, X64Reg regOp, OpArg arg); + void BLSMSK(int bits, X64Reg regOp, OpArg arg); + void BLSI(int bits, X64Reg regOp, OpArg arg); + void BEXTR(int bits, X64Reg regOp1, OpArg arg, X64Reg regOp2); + void ANDN(int bits, X64Reg regOp1, X64Reg regOp2, OpArg arg); + void RDTSC(); // Utility functions diff --git a/Source/UnitTests/Common/x64EmitterTest.cpp b/Source/UnitTests/Common/x64EmitterTest.cpp index fc47d5a20c..8051e10b07 100644 --- a/Source/UnitTests/Common/x64EmitterTest.cpp +++ b/Source/UnitTests/Common/x64EmitterTest.cpp @@ -833,4 +833,121 @@ TWO_OP_SSE_TEST(PMOVZXDQ, "qword") // TODO: AVX +// for VEX GPR instructions that take the form op reg, r/m, reg +#define VEX_RMR_TEST(Name) \ + TEST_F(x64EmitterTest, Name) \ + { \ + struct { \ + int bits; \ + std::vector regs; \ + std::string out_name; \ + std::string size; \ + } regsets[] = { \ + { 32, reg32names, "eax", "dword" }, \ + { 64, reg64names, "rax", "qword" }, \ + }; \ + for (const auto& regset : regsets) \ + for (const auto& r : regset.regs) \ + { \ + emitter->Name(regset.bits, r.reg, R(RAX), RAX); \ + emitter->Name(regset.bits, RAX, R(r.reg), RAX); \ + emitter->Name(regset.bits, RAX, MatR(R12), r.reg); \ + ExpectDisassembly(#Name " " + r.name + ", " + regset.out_name + ", " + regset.out_name + " " \ + #Name " " + regset.out_name + ", " + r.name + ", " + regset.out_name + " " \ + #Name " " + regset.out_name + ", " + regset.size + " ptr ds:[r12], " + r.name + " "); \ + } \ + } + +VEX_RMR_TEST(SHRX) +VEX_RMR_TEST(SARX) +VEX_RMR_TEST(SHLX) +VEX_RMR_TEST(BEXTR) +VEX_RMR_TEST(BZHI) + +// for VEX GPR instructions that take the form op reg, reg, r/m +#define VEX_RRM_TEST(Name) \ + TEST_F(x64EmitterTest, Name) \ + { \ + struct { \ + int bits; \ + std::vector regs; \ + std::string out_name; \ + std::string size; \ + } regsets[] = { \ + { 32, reg32names, "eax", "dword" }, \ + { 64, reg64names, "rax", "qword" }, \ + }; \ + for (const auto& regset : regsets) \ + for (const auto& r : regset.regs) \ + { \ + emitter->Name(regset.bits, r.reg, RAX, R(RAX)); \ + emitter->Name(regset.bits, RAX, RAX, R(r.reg)); \ + emitter->Name(regset.bits, RAX, r.reg, MatR(R12)); \ + ExpectDisassembly(#Name " " + r.name+ ", " + regset.out_name + ", " + regset.out_name + " " \ + #Name " " + regset.out_name + ", " + regset.out_name + ", " + r.name + " " \ + #Name " " + regset.out_name + ", " + r.name + ", " + regset.size + " ptr ds:[r12] "); \ + } \ + } + +VEX_RRM_TEST(PEXT) +VEX_RRM_TEST(PDEP) +VEX_RRM_TEST(MULX) +VEX_RRM_TEST(ANDN) + +// for VEX GPR instructions that take the form op reg, r/m +#define VEX_RM_TEST(Name) \ + TEST_F(x64EmitterTest, Name) \ + { \ + struct { \ + int bits; \ + std::vector regs; \ + std::string out_name; \ + std::string size; \ + } regsets[] = { \ + { 32, reg32names, "eax", "dword" }, \ + { 64, reg64names, "rax", "qword" }, \ + }; \ + for (const auto& regset : regsets) \ + for (const auto& r : regset.regs) \ + { \ + emitter->Name(regset.bits, r.reg, R(RAX)); \ + emitter->Name(regset.bits, RAX, R(r.reg)); \ + emitter->Name(regset.bits, r.reg, MatR(R12)); \ + ExpectDisassembly(#Name " " + r.name+ ", " + regset.out_name + " " \ + #Name " " + regset.out_name + ", " + r.name + " " \ + #Name " " + r.name + ", " + regset.size + " ptr ds:[r12] "); \ + } \ + } + +VEX_RM_TEST(BLSR) +VEX_RM_TEST(BLSMSK) +VEX_RM_TEST(BLSI) + +// for VEX GPR instructions that take the form op reg, r/m, imm +#define VEX_RMI_TEST(Name) \ + TEST_F(x64EmitterTest, Name) \ + { \ + struct { \ + int bits; \ + std::vector regs; \ + std::string out_name; \ + std::string size; \ + } regsets[] = { \ + { 32, reg32names, "eax", "dword" }, \ + { 64, reg64names, "rax", "qword" }, \ + }; \ + for (const auto& regset : regsets) \ + for (const auto& r : regset.regs) \ + { \ + emitter->Name(regset.bits, r.reg, R(RAX), 4); \ + emitter->Name(regset.bits, RAX, R(r.reg), 4); \ + emitter->Name(regset.bits, r.reg, MatR(R12), 4); \ + ExpectDisassembly(#Name " " + r.name+ ", " + regset.out_name + ", 0x04 " \ + #Name " " + regset.out_name + ", " + r.name + ", 0x04 " \ + #Name " " + r.name + ", " + regset.size + " ptr ds:[r12], 0x04 "); \ + } \ + } + +VEX_RMI_TEST(RORX) + } // namespace Gen