From 3d6a550f23e4d5beb187377d47c9e6a4e0052b40 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sun, 23 Oct 2022 18:58:46 +1000 Subject: [PATCH] x86Emitter: Add 8-bit variants for spl..r15b Also fixes PINSR/PEXTR. --- common/emitter/implement/simd_shufflepack.h | 41 ++++++++--------- common/emitter/simd.cpp | 39 ++++++++-------- common/emitter/x86emitter.cpp | 20 ++++++--- common/emitter/x86types.h | 45 +++++++++++++++++-- tests/ctest/x86emitter/codegen_tests_main.cpp | 28 ++++++++++++ 5 files changed, 121 insertions(+), 52 deletions(-) diff --git a/common/emitter/implement/simd_shufflepack.h b/common/emitter/implement/simd_shufflepack.h index 070087af2d..21ec1084d9 100644 --- a/common/emitter/implement/simd_shufflepack.h +++ b/common/emitter/implement/simd_shufflepack.h @@ -184,17 +184,6 @@ namespace x86Emitter }; - struct xImplSimd_InsertExtractHelper - { - u16 Opcode; - - // [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid) - void operator()(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const; - - // [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid) - void operator()(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const; - }; - // -------------------------------------------------------------------------------------- // SimdImpl_PInsert // -------------------------------------------------------------------------------------- @@ -202,17 +191,19 @@ namespace x86Emitter // struct xImplSimd_PInsert { + void B(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const; + void B(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const; + void W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const; void W(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const; - // [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid) - xImplSimd_InsertExtractHelper B; + void D(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const; + void D(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const; - // [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid) - xImplSimd_InsertExtractHelper D; + void Q(const xRegisterSSE& to, const xRegister64& from, u8 imm8) const; + void Q(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const; }; - ////////////////////////////////////////////////////////////////////////////////////////// // PEXTRW/B/D [all but Word form are SSE4.1 only!] // @@ -220,6 +211,12 @@ namespace x86Emitter // struct SimdImpl_PExtract { + // [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits + // of dest are zero-extended (cleared). This can be used to extract any single packed + // byte value from src into an x86 32 bit register. + void B(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const; + void B(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const; + // Copies the word element specified by imm8 from src to dest. The upper bits // of dest are zero-extended (cleared). This can be used to extract any single packed // word value from src into an x86 32 bit register. @@ -229,13 +226,13 @@ namespace x86Emitter void W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const; void W(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const; - // [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits - // of dest are zero-extended (cleared). This can be used to extract any single packed - // byte value from src into an x86 32 bit register. - const xImplSimd_InsertExtractHelper B; - // [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be // used to extract any single packed dword value from src into an x86 32 bit register. - const xImplSimd_InsertExtractHelper D; + void D(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const; + void D(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const; + + // Insert a qword integer value from r/m64 into the xmm1 at the destination element specified by imm8. + void Q(const xRegister64& to, const xRegisterSSE& from, u8 imm8) const; + void Q(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const; }; } // namespace x86Emitter diff --git a/common/emitter/simd.cpp b/common/emitter/simd.cpp index f1679e1d8d..46f9cbaba9 100644 --- a/common/emitter/simd.cpp +++ b/common/emitter/simd.cpp @@ -473,22 +473,30 @@ namespace x86Emitter xOpWrite0F(0x66, 0xc6, to, from, selector & 0x3); } - void xImplSimd_InsertExtractHelper::operator()(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const - { - xOpWrite0F(0x66, Opcode, to, from, imm8); - } - - void xImplSimd_InsertExtractHelper::operator()(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const - { - xOpWrite0F(0x66, Opcode, to, from, imm8); - } + void xImplSimd_PInsert::B(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0x203a, to, from, imm8); } + void xImplSimd_PInsert::B(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0x203a, to, from, imm8); } void xImplSimd_PInsert::W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); } void xImplSimd_PInsert::W(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); } + void xImplSimd_PInsert::D(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); } + void xImplSimd_PInsert::D(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); } + + void xImplSimd_PInsert::Q(const xRegisterSSE& to, const xRegister64& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); } + void xImplSimd_PInsert::Q(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); } + + void SimdImpl_PExtract::B(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x143a, to, from, imm8); } + void SimdImpl_PExtract::B(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x143a, from, dest, imm8); } + void SimdImpl_PExtract::W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0xc5, to, from, imm8); } void SimdImpl_PExtract::W(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x153a, from, dest, imm8); } + void SimdImpl_PExtract::D(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, to, from, imm8); } + void SimdImpl_PExtract::D(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, dest, imm8); } + + void SimdImpl_PExtract::Q(const xRegister64& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, to, from, imm8); } + void SimdImpl_PExtract::Q(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, dest, imm8); } + const xImplSimd_Shuffle xSHUF = {}; const xImplSimd_PShuffle xPSHUF = @@ -529,17 +537,8 @@ namespace x86Emitter {0x66, 0x14}, // LPD }; - const xImplSimd_PInsert xPINSR = - { - {0x203a}, // B - {0x223a}, // D - }; - - const SimdImpl_PExtract xPEXTR = - { - {0x143a}, // B - {0x163a}, // D - }; + const xImplSimd_PInsert xPINSR; + const SimdImpl_PExtract xPEXTR; // ===================================================================================================== // SIMD Move And Blend Instructions diff --git a/common/emitter/x86emitter.cpp b/common/emitter/x86emitter.cpp index 8bd9119ee6..562e75d0a4 100644 --- a/common/emitter/x86emitter.cpp +++ b/common/emitter/x86emitter.cpp @@ -160,7 +160,13 @@ const xRegister8 al(0), dl(2), bl(3), ah(4), ch(5), - dh(6), bh(7); + dh(6), bh(7), + spl(4, true), bpl(5, true), + sil(6, true), dil(7, true), + r8b(8), r9b(9), + r10b(10), r11b(11), + r12b(12), r13b(13), + r14b(14), r15b(15); #if defined(_WIN32) const xAddressReg @@ -436,10 +442,10 @@ const xRegister32 } ////////////////////////////////////////////////////////////////////////////////////////// - __emitinline static void EmitRex(bool w, bool r, bool x, bool b) + __emitinline static void EmitRex(bool w, bool r, bool x, bool b, bool ext8bit = false) { const u8 rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | (u8)b; - if (rex != 0x40) + if (rex != 0x40 || ext8bit) xWrite8(rex); } @@ -473,7 +479,7 @@ const xRegister32 bool r = false; bool x = false; bool b = reg2.IsExtended(); - EmitRex(w, r, x, b); + EmitRex(w, r, x, b, reg2.IsExtended8Bit()); } void EmitRex(const xRegisterBase& reg1, const xRegisterBase& reg2) @@ -482,7 +488,7 @@ const xRegister32 bool r = reg1.IsExtended(); bool x = false; bool b = reg2.IsExtended(); - EmitRex(w, r, x, b); + EmitRex(w, r, x, b, reg2.IsExtended8Bit()); } void EmitRex(const xRegisterBase& reg1, const void* src) @@ -492,7 +498,7 @@ const xRegister32 bool r = reg1.IsExtended(); bool x = false; bool b = false; // FIXME src.IsExtended(); - EmitRex(w, r, x, b); + EmitRex(w, r, x, b, reg1.IsExtended8Bit()); } void EmitRex(const xRegisterBase& reg1, const xIndirectVoid& sib) @@ -506,7 +512,7 @@ const xRegister32 b = x; x = false; } - EmitRex(w, r, x, b); + EmitRex(w, r, x, b, reg1.IsExtended8Bit()); } // For use by instructions that are implicitly wide diff --git a/common/emitter/x86types.h b/common/emitter/x86types.h index 9c41a52642..e628b6ddfd 100644 --- a/common/emitter/x86types.h +++ b/common/emitter/x86types.h @@ -268,7 +268,8 @@ namespace x86Emitter bool IsEmpty() const { return Id < 0; } bool IsInvalid() const { return Id == xRegId_Invalid; } - bool IsExtended() const { return Id > 7; } // Register 8-15 need an extra bit to be selected + bool IsExtended() const { return (Id >= 0 && (Id & 0x0F) > 7); } // Register 8-15 need an extra bit to be selected + bool IsExtended8Bit() const { return (Is8BitOp() && Id >= 0x10); } bool IsMem() const { return false; } bool IsReg() const { return true; } @@ -290,6 +291,9 @@ namespace x86Emitter // is a valid non-null string for any Id, valid or invalid. No assertions are generated. const char* GetName(); int GetId() const { return Id; } + + /// Returns true if the specified register is caller-saved (volatile). + static inline bool IsCallerSaved(uint id); }; class xRegisterInt : public xRegisterBase @@ -347,7 +351,14 @@ namespace x86Emitter explicit xRegister8(const xRegisterInt& other) : _parent(1, other.Id) { - pxAssertDev(other.canMapIDTo(1), "spl, bpl, sil, dil not yet supported"); + if (!other.canMapIDTo(1)) + Id |= 0x10; + } + xRegister8(int regId, bool ext8bit) + : _parent(1, regId) + { + if (ext8bit) + Id |= 0x10; } bool operator==(const xRegister8& src) const { return Id == src.Id; } @@ -447,6 +458,9 @@ namespace x86Emitter /// arg_number is the argument position from the left, starting with 0. /// sse_number is the argument position relative to the number of vector registers. static const inline xRegisterSSE& GetArgRegister(uint arg_number, uint sse_number, bool ymm = false); + + /// Returns true if the specified register is caller-saved (volatile). + static inline bool IsCallerSaved(uint id); }; class xRegisterCL : public xRegister8 @@ -617,7 +631,10 @@ extern const xRegister16 extern const xRegister8 al, dl, bl, - ah, ch, dh, bh; + ah, ch, dh, bh, + spl, bpl, sil, dil, + r8b, r9b, r10b, r11b, + r12b, r13b, r14b, r15b; extern const xAddressReg arg1reg, arg2reg, @@ -636,6 +653,28 @@ extern const xRegister32 extern const xRegisterCL cl; // I'm special! + bool xRegisterBase::IsCallerSaved(uint id) + { +#ifdef _WIN32 + // The x64 ABI considers the registers RAX, RCX, RDX, R8, R9, R10, R11, and XMM0-XMM5 volatile. + return (id <= 2 || (id >= 8 && id <= 11)); +#else + // rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers. + return (id <= 2 || id == 6 || id == 7 || (id >= 8 && id <= 11)); +#endif + } + + bool xRegisterSSE::IsCallerSaved(uint id) + { +#ifdef _WIN32 + // XMM6 through XMM15 are saved. Upper 128 bits is always volatile. + return (id < 6); +#else + // All vector registers are volatile. + return true; +#endif + } + const xRegisterSSE& xRegisterSSE::GetInstance(uint id) { static const xRegisterSSE* const m_tbl_xmmRegs[] = diff --git a/tests/ctest/x86emitter/codegen_tests_main.cpp b/tests/ctest/x86emitter/codegen_tests_main.cpp index 109c80f619..fa5ab417f9 100644 --- a/tests/ctest/x86emitter/codegen_tests_main.cpp +++ b/tests/ctest/x86emitter/codegen_tests_main.cpp @@ -172,6 +172,14 @@ TEST(CodegenTests, SSETest) CODEGEN_TEST_64(xMOVD(r10, xmm1), "66 49 0f 7e ca"); CODEGEN_TEST_64(xMOVD(rax, xmm10), "66 4c 0f 7e d0"); CODEGEN_TEST_64(xMOVD(r10, xmm10), "66 4d 0f 7e d2"); + CODEGEN_TEST_64(xPINSR.B(xmm0, eax, 1), "66 0f 3a 20 c0 01"); + CODEGEN_TEST_64(xPINSR.W(xmm0, eax, 1), "66 0f c4 c0 01"); + CODEGEN_TEST_64(xPINSR.D(xmm0, eax, 1), "66 0f 3a 22 c0 01"); + CODEGEN_TEST_64(xPINSR.Q(xmm0, rax, 1), "66 48 0f 3a 22 c0 01"); + CODEGEN_TEST_64(xPEXTR.B(eax, xmm0, 1), "66 0f 3a 14 c0 01"); + CODEGEN_TEST_64(xPEXTR.W(eax, xmm0, 1), "66 0f c5 c0 01"); + CODEGEN_TEST_64(xPEXTR.D(eax, xmm0, 1), "66 0f 3a 16 c0 01"); + CODEGEN_TEST_64(xPEXTR.Q(rax, xmm0, 1), "66 48 0f 3a 16 c0 01"); } TEST(CodegenTests, AVXTest) @@ -264,3 +272,23 @@ TEST(CodegenTests, AVX256Test) CODEGEN_TEST_64(xVMOVMSKPS(eax, ymm1), "c5 fc 50 c1"); CODEGEN_TEST_64(xVMOVMSKPD(eax, ymm1), "c5 fd 50 c1"); } + +TEST(CodegenTests, Extended8BitTest) +{ + CODEGEN_TEST_64(xSETL(al), "0f 9c c0"); + CODEGEN_TEST_64(xSETL(cl), "0f 9c c1"); + CODEGEN_TEST_64(xSETL(dl), "0f 9c c2"); + CODEGEN_TEST_64(xSETL(bl), "0f 9c c3"); + CODEGEN_TEST_64(xSETL(spl), "40 0f 9c c4"); + CODEGEN_TEST_64(xSETL(bpl), "40 0f 9c c5"); + CODEGEN_TEST_64(xSETL(sil), "40 0f 9c c6"); + CODEGEN_TEST_64(xSETL(dil), "40 0f 9c c7"); + CODEGEN_TEST_64(xSETL(r8b), "41 0f 9c c0"); + CODEGEN_TEST_64(xSETL(r9b), "41 0f 9c c1"); + CODEGEN_TEST_64(xSETL(r10b), "41 0f 9c c2"); + CODEGEN_TEST_64(xSETL(r11b), "41 0f 9c c3"); + CODEGEN_TEST_64(xSETL(r12b), "41 0f 9c c4"); + CODEGEN_TEST_64(xSETL(r13b), "41 0f 9c c5"); + CODEGEN_TEST_64(xSETL(r14b), "41 0f 9c c6"); + CODEGEN_TEST_64(xSETL(r15b), "41 0f 9c c7"); +}