x86Emitter: Add 8-bit variants for spl..r15b

Also fixes PINSR/PEXTR.
This commit is contained in:
Connor McLaughlin 2022-10-23 18:58:46 +10:00 committed by refractionpcsx2
parent 8de4e190dc
commit 3d6a550f23
5 changed files with 121 additions and 52 deletions

View File

@ -184,17 +184,6 @@ namespace x86Emitter
};
struct xImplSimd_InsertExtractHelper
{
u16 Opcode;
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
void operator()(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
void operator()(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
};
// --------------------------------------------------------------------------------------
// SimdImpl_PInsert
// --------------------------------------------------------------------------------------
@ -202,17 +191,19 @@ namespace x86Emitter
//
struct xImplSimd_PInsert
{
void B(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
void B(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
void W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
void W(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
xImplSimd_InsertExtractHelper B;
void D(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
void D(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
xImplSimd_InsertExtractHelper D;
void Q(const xRegisterSSE& to, const xRegister64& from, u8 imm8) const;
void Q(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
};
//////////////////////////////////////////////////////////////////////////////////////////
// PEXTRW/B/D [all but Word form are SSE4.1 only!]
//
@ -220,6 +211,12 @@ namespace x86Emitter
//
struct SimdImpl_PExtract
{
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
// of dest are zero-extended (cleared). This can be used to extract any single packed
// byte value from src into an x86 32 bit register.
void B(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
void B(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const;
// Copies the word element specified by imm8 from src to dest. The upper bits
// of dest are zero-extended (cleared). This can be used to extract any single packed
// word value from src into an x86 32 bit register.
@ -229,13 +226,13 @@ namespace x86Emitter
void W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
void W(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const;
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
// of dest are zero-extended (cleared). This can be used to extract any single packed
// byte value from src into an x86 32 bit register.
const xImplSimd_InsertExtractHelper B;
// [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
// used to extract any single packed dword value from src into an x86 32 bit register.
const xImplSimd_InsertExtractHelper D;
void D(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
void D(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const;
// Insert a qword integer value from r/m64 into the xmm1 at the destination element specified by imm8.
void Q(const xRegister64& to, const xRegisterSSE& from, u8 imm8) const;
void Q(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const;
};
} // namespace x86Emitter

View File

@ -473,22 +473,30 @@ namespace x86Emitter
xOpWrite0F(0x66, 0xc6, to, from, selector & 0x3);
}
void xImplSimd_InsertExtractHelper::operator()(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const
{
xOpWrite0F(0x66, Opcode, to, from, imm8);
}
void xImplSimd_InsertExtractHelper::operator()(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const
{
xOpWrite0F(0x66, Opcode, to, from, imm8);
}
void xImplSimd_PInsert::B(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0x203a, to, from, imm8); }
void xImplSimd_PInsert::B(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0x203a, to, from, imm8); }
void xImplSimd_PInsert::W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); }
void xImplSimd_PInsert::W(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); }
void xImplSimd_PInsert::D(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
void xImplSimd_PInsert::D(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
void xImplSimd_PInsert::Q(const xRegisterSSE& to, const xRegister64& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
void xImplSimd_PInsert::Q(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
void SimdImpl_PExtract::B(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x143a, to, from, imm8); }
void SimdImpl_PExtract::B(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x143a, from, dest, imm8); }
void SimdImpl_PExtract::W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0xc5, to, from, imm8); }
void SimdImpl_PExtract::W(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x153a, from, dest, imm8); }
void SimdImpl_PExtract::D(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, to, from, imm8); }
void SimdImpl_PExtract::D(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, dest, imm8); }
void SimdImpl_PExtract::Q(const xRegister64& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, to, from, imm8); }
void SimdImpl_PExtract::Q(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, dest, imm8); }
const xImplSimd_Shuffle xSHUF = {};
const xImplSimd_PShuffle xPSHUF =
@ -529,17 +537,8 @@ namespace x86Emitter
{0x66, 0x14}, // LPD
};
const xImplSimd_PInsert xPINSR =
{
{0x203a}, // B
{0x223a}, // D
};
const SimdImpl_PExtract xPEXTR =
{
{0x143a}, // B
{0x163a}, // D
};
const xImplSimd_PInsert xPINSR;
const SimdImpl_PExtract xPEXTR;
// =====================================================================================================
// SIMD Move And Blend Instructions

View File

@ -160,7 +160,13 @@ const xRegister8
al(0),
dl(2), bl(3),
ah(4), ch(5),
dh(6), bh(7);
dh(6), bh(7),
spl(4, true), bpl(5, true),
sil(6, true), dil(7, true),
r8b(8), r9b(9),
r10b(10), r11b(11),
r12b(12), r13b(13),
r14b(14), r15b(15);
#if defined(_WIN32)
const xAddressReg
@ -436,10 +442,10 @@ const xRegister32
}
//////////////////////////////////////////////////////////////////////////////////////////
__emitinline static void EmitRex(bool w, bool r, bool x, bool b)
__emitinline static void EmitRex(bool w, bool r, bool x, bool b, bool ext8bit = false)
{
const u8 rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | (u8)b;
if (rex != 0x40)
if (rex != 0x40 || ext8bit)
xWrite8(rex);
}
@ -473,7 +479,7 @@ const xRegister32
bool r = false;
bool x = false;
bool b = reg2.IsExtended();
EmitRex(w, r, x, b);
EmitRex(w, r, x, b, reg2.IsExtended8Bit());
}
void EmitRex(const xRegisterBase& reg1, const xRegisterBase& reg2)
@ -482,7 +488,7 @@ const xRegister32
bool r = reg1.IsExtended();
bool x = false;
bool b = reg2.IsExtended();
EmitRex(w, r, x, b);
EmitRex(w, r, x, b, reg2.IsExtended8Bit());
}
void EmitRex(const xRegisterBase& reg1, const void* src)
@ -492,7 +498,7 @@ const xRegister32
bool r = reg1.IsExtended();
bool x = false;
bool b = false; // FIXME src.IsExtended();
EmitRex(w, r, x, b);
EmitRex(w, r, x, b, reg1.IsExtended8Bit());
}
void EmitRex(const xRegisterBase& reg1, const xIndirectVoid& sib)
@ -506,7 +512,7 @@ const xRegister32
b = x;
x = false;
}
EmitRex(w, r, x, b);
EmitRex(w, r, x, b, reg1.IsExtended8Bit());
}
// For use by instructions that are implicitly wide

View File

@ -268,7 +268,8 @@ namespace x86Emitter
bool IsEmpty() const { return Id < 0; }
bool IsInvalid() const { return Id == xRegId_Invalid; }
bool IsExtended() const { return Id > 7; } // Register 8-15 need an extra bit to be selected
bool IsExtended() const { return (Id >= 0 && (Id & 0x0F) > 7); } // Register 8-15 need an extra bit to be selected
bool IsExtended8Bit() const { return (Is8BitOp() && Id >= 0x10); }
bool IsMem() const { return false; }
bool IsReg() const { return true; }
@ -290,6 +291,9 @@ namespace x86Emitter
// is a valid non-null string for any Id, valid or invalid. No assertions are generated.
const char* GetName();
int GetId() const { return Id; }
/// Returns true if the specified register is caller-saved (volatile).
static inline bool IsCallerSaved(uint id);
};
class xRegisterInt : public xRegisterBase
@ -347,7 +351,14 @@ namespace x86Emitter
explicit xRegister8(const xRegisterInt& other)
: _parent(1, other.Id)
{
pxAssertDev(other.canMapIDTo(1), "spl, bpl, sil, dil not yet supported");
if (!other.canMapIDTo(1))
Id |= 0x10;
}
xRegister8(int regId, bool ext8bit)
: _parent(1, regId)
{
if (ext8bit)
Id |= 0x10;
}
bool operator==(const xRegister8& src) const { return Id == src.Id; }
@ -447,6 +458,9 @@ namespace x86Emitter
/// arg_number is the argument position from the left, starting with 0.
/// sse_number is the argument position relative to the number of vector registers.
static const inline xRegisterSSE& GetArgRegister(uint arg_number, uint sse_number, bool ymm = false);
/// Returns true if the specified register is caller-saved (volatile).
static inline bool IsCallerSaved(uint id);
};
class xRegisterCL : public xRegister8
@ -617,7 +631,10 @@ extern const xRegister16
extern const xRegister8
al, dl, bl,
ah, ch, dh, bh;
ah, ch, dh, bh,
spl, bpl, sil, dil,
r8b, r9b, r10b, r11b,
r12b, r13b, r14b, r15b;
extern const xAddressReg
arg1reg, arg2reg,
@ -636,6 +653,28 @@ extern const xRegister32
extern const xRegisterCL cl; // I'm special!
bool xRegisterBase::IsCallerSaved(uint id)
{
#ifdef _WIN32
// The x64 ABI considers the registers RAX, RCX, RDX, R8, R9, R10, R11, and XMM0-XMM5 volatile.
return (id <= 2 || (id >= 8 && id <= 11));
#else
// rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers.
return (id <= 2 || id == 6 || id == 7 || (id >= 8 && id <= 11));
#endif
}
bool xRegisterSSE::IsCallerSaved(uint id)
{
#ifdef _WIN32
// XMM6 through XMM15 are saved. Upper 128 bits is always volatile.
return (id < 6);
#else
// All vector registers are volatile.
return true;
#endif
}
const xRegisterSSE& xRegisterSSE::GetInstance(uint id)
{
static const xRegisterSSE* const m_tbl_xmmRegs[] =

View File

@ -172,6 +172,14 @@ TEST(CodegenTests, SSETest)
CODEGEN_TEST_64(xMOVD(r10, xmm1), "66 49 0f 7e ca");
CODEGEN_TEST_64(xMOVD(rax, xmm10), "66 4c 0f 7e d0");
CODEGEN_TEST_64(xMOVD(r10, xmm10), "66 4d 0f 7e d2");
CODEGEN_TEST_64(xPINSR.B(xmm0, eax, 1), "66 0f 3a 20 c0 01");
CODEGEN_TEST_64(xPINSR.W(xmm0, eax, 1), "66 0f c4 c0 01");
CODEGEN_TEST_64(xPINSR.D(xmm0, eax, 1), "66 0f 3a 22 c0 01");
CODEGEN_TEST_64(xPINSR.Q(xmm0, rax, 1), "66 48 0f 3a 22 c0 01");
CODEGEN_TEST_64(xPEXTR.B(eax, xmm0, 1), "66 0f 3a 14 c0 01");
CODEGEN_TEST_64(xPEXTR.W(eax, xmm0, 1), "66 0f c5 c0 01");
CODEGEN_TEST_64(xPEXTR.D(eax, xmm0, 1), "66 0f 3a 16 c0 01");
CODEGEN_TEST_64(xPEXTR.Q(rax, xmm0, 1), "66 48 0f 3a 16 c0 01");
}
TEST(CodegenTests, AVXTest)
@ -264,3 +272,23 @@ TEST(CodegenTests, AVX256Test)
CODEGEN_TEST_64(xVMOVMSKPS(eax, ymm1), "c5 fc 50 c1");
CODEGEN_TEST_64(xVMOVMSKPD(eax, ymm1), "c5 fd 50 c1");
}
TEST(CodegenTests, Extended8BitTest)
{
CODEGEN_TEST_64(xSETL(al), "0f 9c c0");
CODEGEN_TEST_64(xSETL(cl), "0f 9c c1");
CODEGEN_TEST_64(xSETL(dl), "0f 9c c2");
CODEGEN_TEST_64(xSETL(bl), "0f 9c c3");
CODEGEN_TEST_64(xSETL(spl), "40 0f 9c c4");
CODEGEN_TEST_64(xSETL(bpl), "40 0f 9c c5");
CODEGEN_TEST_64(xSETL(sil), "40 0f 9c c6");
CODEGEN_TEST_64(xSETL(dil), "40 0f 9c c7");
CODEGEN_TEST_64(xSETL(r8b), "41 0f 9c c0");
CODEGEN_TEST_64(xSETL(r9b), "41 0f 9c c1");
CODEGEN_TEST_64(xSETL(r10b), "41 0f 9c c2");
CODEGEN_TEST_64(xSETL(r11b), "41 0f 9c c3");
CODEGEN_TEST_64(xSETL(r12b), "41 0f 9c c4");
CODEGEN_TEST_64(xSETL(r13b), "41 0f 9c c5");
CODEGEN_TEST_64(xSETL(r14b), "41 0f 9c c6");
CODEGEN_TEST_64(xSETL(r15b), "41 0f 9c c7");
}