mirror of https://github.com/PCSX2/pcsx2.git
x86Emitter: Add 8-bit variants for spl..r15b
Also fixes PINSR/PEXTR.
This commit is contained in:
parent
8de4e190dc
commit
3d6a550f23
|
@ -184,17 +184,6 @@ namespace x86Emitter
|
|||
};
|
||||
|
||||
|
||||
struct xImplSimd_InsertExtractHelper
|
||||
{
|
||||
u16 Opcode;
|
||||
|
||||
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
|
||||
void operator()(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
|
||||
|
||||
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
|
||||
void operator()(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// SimdImpl_PInsert
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
@ -202,17 +191,19 @@ namespace x86Emitter
|
|||
//
|
||||
struct xImplSimd_PInsert
|
||||
{
|
||||
void B(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
|
||||
void B(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
|
||||
|
||||
void W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
|
||||
void W(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
|
||||
|
||||
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
|
||||
xImplSimd_InsertExtractHelper B;
|
||||
void D(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
|
||||
void D(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
|
||||
|
||||
// [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
|
||||
xImplSimd_InsertExtractHelper D;
|
||||
void Q(const xRegisterSSE& to, const xRegister64& from, u8 imm8) const;
|
||||
void Q(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const;
|
||||
};
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
// PEXTRW/B/D [all but Word form are SSE4.1 only!]
|
||||
//
|
||||
|
@ -220,6 +211,12 @@ namespace x86Emitter
|
|||
//
|
||||
struct SimdImpl_PExtract
|
||||
{
|
||||
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
|
||||
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
||||
// byte value from src into an x86 32 bit register.
|
||||
void B(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
|
||||
void B(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const;
|
||||
|
||||
// Copies the word element specified by imm8 from src to dest. The upper bits
|
||||
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
||||
// word value from src into an x86 32 bit register.
|
||||
|
@ -229,13 +226,13 @@ namespace x86Emitter
|
|||
void W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
|
||||
void W(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const;
|
||||
|
||||
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
|
||||
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
||||
// byte value from src into an x86 32 bit register.
|
||||
const xImplSimd_InsertExtractHelper B;
|
||||
|
||||
// [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
|
||||
// used to extract any single packed dword value from src into an x86 32 bit register.
|
||||
const xImplSimd_InsertExtractHelper D;
|
||||
void D(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
|
||||
void D(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const;
|
||||
|
||||
// Insert a qword integer value from r/m64 into the xmm1 at the destination element specified by imm8.
|
||||
void Q(const xRegister64& to, const xRegisterSSE& from, u8 imm8) const;
|
||||
void Q(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const;
|
||||
};
|
||||
} // namespace x86Emitter
|
||||
|
|
|
@ -473,22 +473,30 @@ namespace x86Emitter
|
|||
xOpWrite0F(0x66, 0xc6, to, from, selector & 0x3);
|
||||
}
|
||||
|
||||
void xImplSimd_InsertExtractHelper::operator()(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const
|
||||
{
|
||||
xOpWrite0F(0x66, Opcode, to, from, imm8);
|
||||
}
|
||||
|
||||
void xImplSimd_InsertExtractHelper::operator()(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const
|
||||
{
|
||||
xOpWrite0F(0x66, Opcode, to, from, imm8);
|
||||
}
|
||||
void xImplSimd_PInsert::B(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0x203a, to, from, imm8); }
|
||||
void xImplSimd_PInsert::B(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0x203a, to, from, imm8); }
|
||||
|
||||
void xImplSimd_PInsert::W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); }
|
||||
void xImplSimd_PInsert::W(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); }
|
||||
|
||||
void xImplSimd_PInsert::D(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
|
||||
void xImplSimd_PInsert::D(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
|
||||
|
||||
void xImplSimd_PInsert::Q(const xRegisterSSE& to, const xRegister64& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
|
||||
void xImplSimd_PInsert::Q(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0x223a, to, from, imm8); }
|
||||
|
||||
void SimdImpl_PExtract::B(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x143a, to, from, imm8); }
|
||||
void SimdImpl_PExtract::B(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x143a, from, dest, imm8); }
|
||||
|
||||
void SimdImpl_PExtract::W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0xc5, to, from, imm8); }
|
||||
void SimdImpl_PExtract::W(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x153a, from, dest, imm8); }
|
||||
|
||||
void SimdImpl_PExtract::D(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, to, from, imm8); }
|
||||
void SimdImpl_PExtract::D(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, dest, imm8); }
|
||||
|
||||
void SimdImpl_PExtract::Q(const xRegister64& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, to, from, imm8); }
|
||||
void SimdImpl_PExtract::Q(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x163a, from, dest, imm8); }
|
||||
|
||||
const xImplSimd_Shuffle xSHUF = {};
|
||||
|
||||
const xImplSimd_PShuffle xPSHUF =
|
||||
|
@ -529,17 +537,8 @@ namespace x86Emitter
|
|||
{0x66, 0x14}, // LPD
|
||||
};
|
||||
|
||||
const xImplSimd_PInsert xPINSR =
|
||||
{
|
||||
{0x203a}, // B
|
||||
{0x223a}, // D
|
||||
};
|
||||
|
||||
const SimdImpl_PExtract xPEXTR =
|
||||
{
|
||||
{0x143a}, // B
|
||||
{0x163a}, // D
|
||||
};
|
||||
const xImplSimd_PInsert xPINSR;
|
||||
const SimdImpl_PExtract xPEXTR;
|
||||
|
||||
// =====================================================================================================
|
||||
// SIMD Move And Blend Instructions
|
||||
|
|
|
@ -160,7 +160,13 @@ const xRegister8
|
|||
al(0),
|
||||
dl(2), bl(3),
|
||||
ah(4), ch(5),
|
||||
dh(6), bh(7);
|
||||
dh(6), bh(7),
|
||||
spl(4, true), bpl(5, true),
|
||||
sil(6, true), dil(7, true),
|
||||
r8b(8), r9b(9),
|
||||
r10b(10), r11b(11),
|
||||
r12b(12), r13b(13),
|
||||
r14b(14), r15b(15);
|
||||
|
||||
#if defined(_WIN32)
|
||||
const xAddressReg
|
||||
|
@ -436,10 +442,10 @@ const xRegister32
|
|||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
__emitinline static void EmitRex(bool w, bool r, bool x, bool b)
|
||||
__emitinline static void EmitRex(bool w, bool r, bool x, bool b, bool ext8bit = false)
|
||||
{
|
||||
const u8 rex = 0x40 | (w << 3) | (r << 2) | (x << 1) | (u8)b;
|
||||
if (rex != 0x40)
|
||||
if (rex != 0x40 || ext8bit)
|
||||
xWrite8(rex);
|
||||
}
|
||||
|
||||
|
@ -473,7 +479,7 @@ const xRegister32
|
|||
bool r = false;
|
||||
bool x = false;
|
||||
bool b = reg2.IsExtended();
|
||||
EmitRex(w, r, x, b);
|
||||
EmitRex(w, r, x, b, reg2.IsExtended8Bit());
|
||||
}
|
||||
|
||||
void EmitRex(const xRegisterBase& reg1, const xRegisterBase& reg2)
|
||||
|
@ -482,7 +488,7 @@ const xRegister32
|
|||
bool r = reg1.IsExtended();
|
||||
bool x = false;
|
||||
bool b = reg2.IsExtended();
|
||||
EmitRex(w, r, x, b);
|
||||
EmitRex(w, r, x, b, reg2.IsExtended8Bit());
|
||||
}
|
||||
|
||||
void EmitRex(const xRegisterBase& reg1, const void* src)
|
||||
|
@ -492,7 +498,7 @@ const xRegister32
|
|||
bool r = reg1.IsExtended();
|
||||
bool x = false;
|
||||
bool b = false; // FIXME src.IsExtended();
|
||||
EmitRex(w, r, x, b);
|
||||
EmitRex(w, r, x, b, reg1.IsExtended8Bit());
|
||||
}
|
||||
|
||||
void EmitRex(const xRegisterBase& reg1, const xIndirectVoid& sib)
|
||||
|
@ -506,7 +512,7 @@ const xRegister32
|
|||
b = x;
|
||||
x = false;
|
||||
}
|
||||
EmitRex(w, r, x, b);
|
||||
EmitRex(w, r, x, b, reg1.IsExtended8Bit());
|
||||
}
|
||||
|
||||
// For use by instructions that are implicitly wide
|
||||
|
|
|
@ -268,7 +268,8 @@ namespace x86Emitter
|
|||
|
||||
bool IsEmpty() const { return Id < 0; }
|
||||
bool IsInvalid() const { return Id == xRegId_Invalid; }
|
||||
bool IsExtended() const { return Id > 7; } // Register 8-15 need an extra bit to be selected
|
||||
bool IsExtended() const { return (Id >= 0 && (Id & 0x0F) > 7); } // Register 8-15 need an extra bit to be selected
|
||||
bool IsExtended8Bit() const { return (Is8BitOp() && Id >= 0x10); }
|
||||
bool IsMem() const { return false; }
|
||||
bool IsReg() const { return true; }
|
||||
|
||||
|
@ -290,6 +291,9 @@ namespace x86Emitter
|
|||
// is a valid non-null string for any Id, valid or invalid. No assertions are generated.
|
||||
const char* GetName();
|
||||
int GetId() const { return Id; }
|
||||
|
||||
/// Returns true if the specified register is caller-saved (volatile).
|
||||
static inline bool IsCallerSaved(uint id);
|
||||
};
|
||||
|
||||
class xRegisterInt : public xRegisterBase
|
||||
|
@ -347,7 +351,14 @@ namespace x86Emitter
|
|||
explicit xRegister8(const xRegisterInt& other)
|
||||
: _parent(1, other.Id)
|
||||
{
|
||||
pxAssertDev(other.canMapIDTo(1), "spl, bpl, sil, dil not yet supported");
|
||||
if (!other.canMapIDTo(1))
|
||||
Id |= 0x10;
|
||||
}
|
||||
xRegister8(int regId, bool ext8bit)
|
||||
: _parent(1, regId)
|
||||
{
|
||||
if (ext8bit)
|
||||
Id |= 0x10;
|
||||
}
|
||||
|
||||
bool operator==(const xRegister8& src) const { return Id == src.Id; }
|
||||
|
@ -447,6 +458,9 @@ namespace x86Emitter
|
|||
/// arg_number is the argument position from the left, starting with 0.
|
||||
/// sse_number is the argument position relative to the number of vector registers.
|
||||
static const inline xRegisterSSE& GetArgRegister(uint arg_number, uint sse_number, bool ymm = false);
|
||||
|
||||
/// Returns true if the specified register is caller-saved (volatile).
|
||||
static inline bool IsCallerSaved(uint id);
|
||||
};
|
||||
|
||||
class xRegisterCL : public xRegister8
|
||||
|
@ -617,7 +631,10 @@ extern const xRegister16
|
|||
|
||||
extern const xRegister8
|
||||
al, dl, bl,
|
||||
ah, ch, dh, bh;
|
||||
ah, ch, dh, bh,
|
||||
spl, bpl, sil, dil,
|
||||
r8b, r9b, r10b, r11b,
|
||||
r12b, r13b, r14b, r15b;
|
||||
|
||||
extern const xAddressReg
|
||||
arg1reg, arg2reg,
|
||||
|
@ -636,6 +653,28 @@ extern const xRegister32
|
|||
|
||||
extern const xRegisterCL cl; // I'm special!
|
||||
|
||||
bool xRegisterBase::IsCallerSaved(uint id)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// The x64 ABI considers the registers RAX, RCX, RDX, R8, R9, R10, R11, and XMM0-XMM5 volatile.
|
||||
return (id <= 2 || (id >= 8 && id <= 11));
|
||||
#else
|
||||
// rax, rdi, rsi, rdx, rcx, r8, r9, r10, r11 are scratch registers.
|
||||
return (id <= 2 || id == 6 || id == 7 || (id >= 8 && id <= 11));
|
||||
#endif
|
||||
}
|
||||
|
||||
bool xRegisterSSE::IsCallerSaved(uint id)
|
||||
{
|
||||
#ifdef _WIN32
|
||||
// XMM6 through XMM15 are saved. Upper 128 bits is always volatile.
|
||||
return (id < 6);
|
||||
#else
|
||||
// All vector registers are volatile.
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
const xRegisterSSE& xRegisterSSE::GetInstance(uint id)
|
||||
{
|
||||
static const xRegisterSSE* const m_tbl_xmmRegs[] =
|
||||
|
|
|
@ -172,6 +172,14 @@ TEST(CodegenTests, SSETest)
|
|||
CODEGEN_TEST_64(xMOVD(r10, xmm1), "66 49 0f 7e ca");
|
||||
CODEGEN_TEST_64(xMOVD(rax, xmm10), "66 4c 0f 7e d0");
|
||||
CODEGEN_TEST_64(xMOVD(r10, xmm10), "66 4d 0f 7e d2");
|
||||
CODEGEN_TEST_64(xPINSR.B(xmm0, eax, 1), "66 0f 3a 20 c0 01");
|
||||
CODEGEN_TEST_64(xPINSR.W(xmm0, eax, 1), "66 0f c4 c0 01");
|
||||
CODEGEN_TEST_64(xPINSR.D(xmm0, eax, 1), "66 0f 3a 22 c0 01");
|
||||
CODEGEN_TEST_64(xPINSR.Q(xmm0, rax, 1), "66 48 0f 3a 22 c0 01");
|
||||
CODEGEN_TEST_64(xPEXTR.B(eax, xmm0, 1), "66 0f 3a 14 c0 01");
|
||||
CODEGEN_TEST_64(xPEXTR.W(eax, xmm0, 1), "66 0f c5 c0 01");
|
||||
CODEGEN_TEST_64(xPEXTR.D(eax, xmm0, 1), "66 0f 3a 16 c0 01");
|
||||
CODEGEN_TEST_64(xPEXTR.Q(rax, xmm0, 1), "66 48 0f 3a 16 c0 01");
|
||||
}
|
||||
|
||||
TEST(CodegenTests, AVXTest)
|
||||
|
@ -264,3 +272,23 @@ TEST(CodegenTests, AVX256Test)
|
|||
CODEGEN_TEST_64(xVMOVMSKPS(eax, ymm1), "c5 fc 50 c1");
|
||||
CODEGEN_TEST_64(xVMOVMSKPD(eax, ymm1), "c5 fd 50 c1");
|
||||
}
|
||||
|
||||
TEST(CodegenTests, Extended8BitTest)
|
||||
{
|
||||
CODEGEN_TEST_64(xSETL(al), "0f 9c c0");
|
||||
CODEGEN_TEST_64(xSETL(cl), "0f 9c c1");
|
||||
CODEGEN_TEST_64(xSETL(dl), "0f 9c c2");
|
||||
CODEGEN_TEST_64(xSETL(bl), "0f 9c c3");
|
||||
CODEGEN_TEST_64(xSETL(spl), "40 0f 9c c4");
|
||||
CODEGEN_TEST_64(xSETL(bpl), "40 0f 9c c5");
|
||||
CODEGEN_TEST_64(xSETL(sil), "40 0f 9c c6");
|
||||
CODEGEN_TEST_64(xSETL(dil), "40 0f 9c c7");
|
||||
CODEGEN_TEST_64(xSETL(r8b), "41 0f 9c c0");
|
||||
CODEGEN_TEST_64(xSETL(r9b), "41 0f 9c c1");
|
||||
CODEGEN_TEST_64(xSETL(r10b), "41 0f 9c c2");
|
||||
CODEGEN_TEST_64(xSETL(r11b), "41 0f 9c c3");
|
||||
CODEGEN_TEST_64(xSETL(r12b), "41 0f 9c c4");
|
||||
CODEGEN_TEST_64(xSETL(r13b), "41 0f 9c c5");
|
||||
CODEGEN_TEST_64(xSETL(r14b), "41 0f 9c c6");
|
||||
CODEGEN_TEST_64(xSETL(r15b), "41 0f 9c c7");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue