x86Emitter: Add 8-bit variants for spl..r15b

Also fixes PINSR/PEXTR.
This commit is contained in:
Connor McLaughlin 2022-10-23 18:58:46 +10:00 committed by refractionpcsx2
parent 8de4e190dc
commit 3d6a550f23
5 changed files with 121 additions and 52 deletions

View File

@ -184,17 +184,6 @@ namespace x86Emitter
}; };
struct xImplSimd_InsertExtractHelper
{
u16 Opcode;
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
void operator()(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
void operator()(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
};
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
// SimdImpl_PInsert // SimdImpl_PInsert
// -------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------
@ -202,17 +191,19 @@ namespace x86Emitter
// //
struct xImplSimd_PInsert struct xImplSimd_PInsert
{ {
void B(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
void B(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
void W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const; void W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
void W(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const; void W(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid) void D(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
xImplSimd_InsertExtractHelper B; void D(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid) void Q(const xRegisterSSE& to, const xRegister64& from, u8 imm8) const;
xImplSimd_InsertExtractHelper D; void Q(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
}; };
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// PEXTRW/B/D [all but Word form are SSE4.1 only!] // PEXTRW/B/D [all but Word form are SSE4.1 only!]
// //
@ -220,6 +211,12 @@ namespace x86Emitter
// //
struct SimdImpl_PExtract struct SimdImpl_PExtract
{ {
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
// of dest are zero-extended (cleared). This can be used to extract any single packed
// byte value from src into an x86 32 bit register.
void B(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
void B(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const;
// Copies the word element specified by imm8 from src to dest. The upper bits // Copies the word element specified by imm8 from src to dest. The upper bits
// of dest are zero-extended (cleared). This can be used to extract any single packed // of dest are zero-extended (cleared). This can be used to extract any single packed
// word value from src into an x86 32 bit register. // word value from src into an x86 32 bit register.
@ -229,13 +226,13 @@ namespace x86Emitter
void W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const; void W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
void W(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const; void W(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const;
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
// of dest are zero-extended (cleared). This can be used to extract any single packed
// byte value from src into an x86 32 bit register.
const xImplSimd_InsertExtractHelper B;
// [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be // [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
// used to extract any single packed dword value from src into an x86 32 bit register. // used to extract any single packed dword value from src into an x86 32 bit register.
const xImplSimd_InsertExtractHelper D; void D(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
void D(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const;
// Insert a qword integer value from r/m64 into the xmm1 at the destination element specified by imm8.
void Q(const xRegister64& to, const xRegisterSSE& from, u8 imm8) const;
void Q(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const;
}; };
} // namespace x86Emitter } // namespace x86Emitter

View File

@ -473,22 +473,30 @@ namespace x86Emitter
xOpWrite0F(0x66, 0xc6, to, from, selector & 0x3); xOpWrite0F(0x66, 0xc6, to, from, selector & 0x3);
} }
void xImplSimd_InsertExtractHelper::operator()(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const void xImplSimd_PInsert::B(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0x203a, to, from, imm8); }
{ void xImplSimd_PInsert::B(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0x203a, to, from, imm8); }
xOpWrite0F(0x66, Opcode, to, from, imm8);
}
void xImplSimd_InsertExtractHelper::operator()(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const
{
xOpWrite0F(0x66, Opcode, to, from, imm8);
}
void xImplSimd_PInsert::W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); } void xImplSimd_PInsert::W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); }
void xImplSimd_PInsert::W(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); } void xImplSimd_PInsert::W(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); }
void xImplSimd_PInsert::D(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
void xImplSimd_PInsert::D(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
void xImplSimd_PInsert::Q(const xRegisterSSE& to, const xRegister64& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
void xImplSimd_PInsert::Q(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
void SimdImpl_PExtract::B(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x143a, to, from, imm8); }
void SimdImpl_PExtract::B(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x143a, from, dest, imm8); }
void SimdImpl_PExtract::W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0xc5, to, from, imm8); } void SimdImpl_PExtract::W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0xc5, to, from, imm8); }
void SimdImpl_PExtract::W(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x153a, from, dest, imm8); } void SimdImpl_PExtract::W(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x153a, from, dest, imm8); }
void SimdImpl_PExtract::D(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, to, from, imm8); }
void SimdImpl_PExtract::D(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, dest, imm8); }
void SimdImpl_PExtract::Q(const xRegister64& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, to, from, imm8); }
void SimdImpl_PExtract::Q(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, dest, imm8); }
const xImplSimd_Shuffle xSHUF = {}; const xImplSimd_Shuffle xSHUF = {};
const xImplSimd_PShuffle xPSHUF = const xImplSimd_PShuffle xPSHUF =
@ -529,17 +537,8 @@ namespace x86Emitter
{0x66, 0x14}, // LPD {0x66, 0x14}, // LPD
}; };
const xImplSimd_PInsert xPINSR = const xImplSimd_PInsert xPINSR;
{ const SimdImpl_PExtract xPEXTR;
{0x203a}, // B
{0x223a}, // D
};
const SimdImpl_PExtract xPEXTR =
{
{0x143a}, // B
{0x163a}, // D
};
// ===================================================================================================== // =====================================================================================================
// SIMD Move And Blend Instructions // SIMD Move And Blend Instructions

View File

@ -160,7 +160,13 @@ const xRegister8
al(0), al(0),
dl(2), bl(3), dl(2), bl(3),
ah(4), ch(5), ah(4), ch(5),
dh(6), bh(7); dh(6), bh(7),
spl(4, true), bpl(5, true),
sil(6, true), dil(7, true),
r8b(8), r9b(9),
r10b(10), r11b(11),
r12b(12), r13b(13),
r14b(14), r15b(15);
#if defined(_WIN32) #if defined(_WIN32)
const xAddressReg const xAddressReg
@ -436,10 +442,10 @@ const xRegister32
} }
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
__emitinline static void EmitRex(bool w, bool r, bool x, bool b) __emitinline static void EmitRex(bool w, bool r, bool x, bool b, bool ext8bit = false)
{ {
const u8 rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | (u8)b; const u8 rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | (u8)b;
if (rex != 0x40) if (rex != 0x40 || ext8bit)
xWrite8(rex); xWrite8(rex);
} }
@ -473,7 +479,7 @@ const xRegister32
bool r = false; bool r = false;
bool x = false; bool x = false;
bool b = reg2.IsExtended(); bool b = reg2.IsExtended();
EmitRex(w, r, x, b); EmitRex(w, r, x, b, reg2.IsExtended8Bit());
} }
void EmitRex(const xRegisterBase& reg1, const xRegisterBase& reg2) void EmitRex(const xRegisterBase& reg1, const xRegisterBase& reg2)
@ -482,7 +488,7 @@ const xRegister32
bool r = reg1.IsExtended(); bool r = reg1.IsExtended();
bool x = false; bool x = false;
bool b = reg2.IsExtended(); bool b = reg2.IsExtended();
EmitRex(w, r, x, b); EmitRex(w, r, x, b, reg2.IsExtended8Bit());
} }
void EmitRex(const xRegisterBase& reg1, const void* src) void EmitRex(const xRegisterBase& reg1, const void* src)
@ -492,7 +498,7 @@ const xRegister32
bool r = reg1.IsExtended(); bool r = reg1.IsExtended();
bool x = false; bool x = false;
bool b = false; // FIXME src.IsExtended(); bool b = false; // FIXME src.IsExtended();
EmitRex(w, r, x, b); EmitRex(w, r, x, b, reg1.IsExtended8Bit());
} }
void EmitRex(const xRegisterBase& reg1, const xIndirectVoid& sib) void EmitRex(const xRegisterBase& reg1, const xIndirectVoid& sib)
@ -506,7 +512,7 @@ const xRegister32
b = x; b = x;
x = false; x = false;
} }
EmitRex(w, r, x, b); EmitRex(w, r, x, b, reg1.IsExtended8Bit());
} }
// For use by instructions that are implicitly wide // For use by instructions that are implicitly wide

View File

@ -268,7 +268,8 @@ namespace x86Emitter
bool IsEmpty() const { return Id < 0; } bool IsEmpty() const { return Id < 0; }
bool IsInvalid() const { return Id == xRegId_Invalid; } bool IsInvalid() const { return Id == xRegId_Invalid; }
bool IsExtended() const { return Id > 7; } // Register 8-15 need an extra bit to be selected bool IsExtended() const { return (Id >= 0 && (Id & 0x0F) > 7); } // Register 8-15 need an extra bit to be selected
bool IsExtended8Bit() const { return (Is8BitOp() && Id >= 0x10); }
bool IsMem() const { return false; } bool IsMem() const { return false; }
bool IsReg() const { return true; } bool IsReg() const { return true; }
@ -290,6 +291,9 @@ namespace x86Emitter
// is a valid non-null string for any Id, valid or invalid. No assertions are generated. // is a valid non-null string for any Id, valid or invalid. No assertions are generated.
const char* GetName(); const char* GetName();
int GetId() const { return Id; } int GetId() const { return Id; }
/// Returns true if the specified register is caller-saved (volatile).
static inline bool IsCallerSaved(uint id);
}; };
class xRegisterInt : public xRegisterBase class xRegisterInt : public xRegisterBase
@ -347,7 +351,14 @@ namespace x86Emitter
explicit xRegister8(const xRegisterInt& other) explicit xRegister8(const xRegisterInt& other)
: _parent(1, other.Id) : _parent(1, other.Id)
{ {
pxAssertDev(other.canMapIDTo(1), "spl, bpl, sil, dil not yet supported"); if (!other.canMapIDTo(1))
Id |= 0x10;
}
xRegister8(int regId, bool ext8bit)
: _parent(1, regId)
{
if (ext8bit)
Id |= 0x10;
} }
bool operator==(const xRegister8& src) const { return Id == src.Id; } bool operator==(const xRegister8& src) const { return Id == src.Id; }
@ -447,6 +458,9 @@ namespace x86Emitter
/// arg_number is the argument position from the left, starting with 0. /// arg_number is the argument position from the left, starting with 0.
/// sse_number is the argument position relative to the number of vector registers. /// sse_number is the argument position relative to the number of vector registers.
static const inline xRegisterSSE& GetArgRegister(uint arg_number, uint sse_number, bool ymm = false); static const inline xRegisterSSE& GetArgRegister(uint arg_number, uint sse_number, bool ymm = false);
/// Returns true if the specified register is caller-saved (volatile).
static inline bool IsCallerSaved(uint id);
}; };
class xRegisterCL : public xRegister8 class xRegisterCL : public xRegister8
@ -617,7 +631,10 @@ extern const xRegister16
extern const xRegister8 extern const xRegister8
al, dl, bl, al, dl, bl,
ah, ch, dh, bh; ah, ch, dh, bh,
spl, bpl, sil, dil,
r8b, r9b, r10b, r11b,
r12b, r13b, r14b, r15b;
extern const xAddressReg extern const xAddressReg
arg1reg, arg2reg, arg1reg, arg2reg,
@ -636,6 +653,28 @@ extern const xRegister32
extern const xRegisterCL cl; // I'm special! extern const xRegisterCL cl; // I'm special!
bool xRegisterBase::IsCallerSaved(uint id)
{
#ifdef _WIN32
// The x64 ABI considers the registers RAX, RCX, RDX, R8, R9, R10, R11, and XMM0-XMM5 volatile.
return (id <= 2 || (id >= 8 && id <= 11));
#else
// rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers.
return (id <= 2 || id == 6 || id == 7 || (id >= 8 && id <= 11));
#endif
}
bool xRegisterSSE::IsCallerSaved(uint id)
{
#ifdef _WIN32
// XMM6 through XMM15 are saved. Upper 128 bits is always volatile.
return (id < 6);
#else
// All vector registers are volatile.
return true;
#endif
}
const xRegisterSSE& xRegisterSSE::GetInstance(uint id) const xRegisterSSE& xRegisterSSE::GetInstance(uint id)
{ {
static const xRegisterSSE* const m_tbl_xmmRegs[] = static const xRegisterSSE* const m_tbl_xmmRegs[] =

View File

@ -172,6 +172,14 @@ TEST(CodegenTests, SSETest)
CODEGEN_TEST_64(xMOVD(r10, xmm1), "66 49 0f 7e ca"); CODEGEN_TEST_64(xMOVD(r10, xmm1), "66 49 0f 7e ca");
CODEGEN_TEST_64(xMOVD(rax, xmm10), "66 4c 0f 7e d0"); CODEGEN_TEST_64(xMOVD(rax, xmm10), "66 4c 0f 7e d0");
CODEGEN_TEST_64(xMOVD(r10, xmm10), "66 4d 0f 7e d2"); CODEGEN_TEST_64(xMOVD(r10, xmm10), "66 4d 0f 7e d2");
CODEGEN_TEST_64(xPINSR.B(xmm0, eax, 1), "66 0f 3a 20 c0 01");
CODEGEN_TEST_64(xPINSR.W(xmm0, eax, 1), "66 0f c4 c0 01");
CODEGEN_TEST_64(xPINSR.D(xmm0, eax, 1), "66 0f 3a 22 c0 01");
CODEGEN_TEST_64(xPINSR.Q(xmm0, rax, 1), "66 48 0f 3a 22 c0 01");
CODEGEN_TEST_64(xPEXTR.B(eax, xmm0, 1), "66 0f 3a 14 c0 01");
CODEGEN_TEST_64(xPEXTR.W(eax, xmm0, 1), "66 0f c5 c0 01");
CODEGEN_TEST_64(xPEXTR.D(eax, xmm0, 1), "66 0f 3a 16 c0 01");
CODEGEN_TEST_64(xPEXTR.Q(rax, xmm0, 1), "66 48 0f 3a 16 c0 01");
} }
TEST(CodegenTests, AVXTest) TEST(CodegenTests, AVXTest)
@ -264,3 +272,23 @@ TEST(CodegenTests, AVX256Test)
CODEGEN_TEST_64(xVMOVMSKPS(eax, ymm1), "c5 fc 50 c1"); CODEGEN_TEST_64(xVMOVMSKPS(eax, ymm1), "c5 fc 50 c1");
CODEGEN_TEST_64(xVMOVMSKPD(eax, ymm1), "c5 fd 50 c1"); CODEGEN_TEST_64(xVMOVMSKPD(eax, ymm1), "c5 fd 50 c1");
} }
TEST(CodegenTests, Extended8BitTest)
{
CODEGEN_TEST_64(xSETL(al), "0f 9c c0");
CODEGEN_TEST_64(xSETL(cl), "0f 9c c1");
CODEGEN_TEST_64(xSETL(dl), "0f 9c c2");
CODEGEN_TEST_64(xSETL(bl), "0f 9c c3");
CODEGEN_TEST_64(xSETL(spl), "40 0f 9c c4");
CODEGEN_TEST_64(xSETL(bpl), "40 0f 9c c5");
CODEGEN_TEST_64(xSETL(sil), "40 0f 9c c6");
CODEGEN_TEST_64(xSETL(dil), "40 0f 9c c7");
CODEGEN_TEST_64(xSETL(r8b), "41 0f 9c c0");
CODEGEN_TEST_64(xSETL(r9b), "41 0f 9c c1");
CODEGEN_TEST_64(xSETL(r10b), "41 0f 9c c2");
CODEGEN_TEST_64(xSETL(r11b), "41 0f 9c c3");
CODEGEN_TEST_64(xSETL(r12b), "41 0f 9c c4");
CODEGEN_TEST_64(xSETL(r13b), "41 0f 9c c5");
CODEGEN_TEST_64(xSETL(r14b), "41 0f 9c c6");
CODEGEN_TEST_64(xSETL(r15b), "41 0f 9c c7");
}