2023-12-22 11:57:49 +00:00
|
|
|
// SPDX-FileCopyrightText: 2002-2023 PCSX2 Dev Team
|
|
|
|
// SPDX-License-Identifier: LGPL-3.0+
|
2009-04-21 05:29:14 +00:00
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
2016-11-12 15:28:37 +00:00
|
|
|
namespace x86Emitter
|
|
|
|
{
|
2009-11-05 23:39:45 +00:00
|
|
|
|
2021-09-06 18:28:26 +00:00
|
|
|
// --------------------------------------------------------------------------------------
|
|
|
|
// xImplSimd_Shuffle
|
|
|
|
// --------------------------------------------------------------------------------------
|
|
|
|
struct xImplSimd_Shuffle
|
|
|
|
{
|
|
|
|
inline void _selector_assertion_check(u8 selector) const;
|
|
|
|
|
|
|
|
void PS(const xRegisterSSE& to, const xRegisterSSE& from, u8 selector) const;
|
|
|
|
void PS(const xRegisterSSE& to, const xIndirectVoid& from, u8 selector) const;
|
|
|
|
|
|
|
|
void PD(const xRegisterSSE& to, const xRegisterSSE& from, u8 selector) const;
|
|
|
|
void PD(const xRegisterSSE& to, const xIndirectVoid& from, u8 selector) const;
|
|
|
|
};
|
|
|
|
|
|
|
|
// --------------------------------------------------------------------------------------
|
|
|
|
// xImplSimd_PShuffle
|
|
|
|
// --------------------------------------------------------------------------------------
|
|
|
|
struct xImplSimd_PShuffle
|
|
|
|
{
|
|
|
|
// Copies doublewords from src and inserts them into dest at dword locations selected
|
|
|
|
// with the order operand (8 bit immediate).
|
|
|
|
const xImplSimd_DestRegImmSSE D;
|
|
|
|
|
|
|
|
// Copies words from the low quadword of src and inserts them into the low quadword
|
|
|
|
// of dest at word locations selected with the order operand (8 bit immediate).
|
|
|
|
// The high quadword of src is copied to the high quadword of dest.
|
|
|
|
const xImplSimd_DestRegImmSSE LW;
|
|
|
|
|
|
|
|
// Copies words from the high quadword of src and inserts them into the high quadword
|
|
|
|
// of dest at word locations selected with the order operand (8 bit immediate).
|
|
|
|
// The low quadword of src is copied to the low quadword of dest.
|
|
|
|
const xImplSimd_DestRegImmSSE HW;
|
|
|
|
|
|
|
|
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
|
|
|
|
// control mask in src. If the most significant bit (bit[7]) of each byte of the
|
|
|
|
// shuffle control mask is set, then constant zero is written in the result byte.
|
|
|
|
// Each byte in the shuffle control mask forms an index to permute the corresponding
|
|
|
|
// byte in dest. The value of each index is the least significant 4 bits (128-bit
|
|
|
|
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
|
|
|
|
//
|
|
|
|
const xImplSimd_DestRegEither B;
|
|
|
|
|
|
|
|
// below is my test bed for a new system, free of subclasses. Was supposed to improve intellisense
|
|
|
|
// but it doesn't (makes it worse). Will try again in MSVC 2010. --air
|
2016-11-12 15:28:37 +00:00
|
|
|
|
|
|
|
#if 0
|
2010-07-05 19:15:19 +00:00
|
|
|
// Copies words from src and inserts them into dest at word locations selected with
|
|
|
|
// the order operand (8 bit immediate).
|
|
|
|
|
|
|
|
// Copies doublewords from src and inserts them into dest at dword locations selected
|
|
|
|
// with the order operand (8 bit immediate).
|
|
|
|
void D( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0x66, 0x70, to, from, imm ); }
|
|
|
|
void D( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0x66, 0x70, to, from, imm ); }
|
|
|
|
|
|
|
|
// Copies words from the low quadword of src and inserts them into the low quadword
|
|
|
|
// of dest at word locations selected with the order operand (8 bit immediate).
|
|
|
|
// The high quadword of src is copied to the high quadword of dest.
|
|
|
|
void LW( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0xf2, 0x70, to, from, imm ); }
|
|
|
|
void LW( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0xf2, 0x70, to, from, imm ); }
|
|
|
|
|
|
|
|
// Copies words from the high quadword of src and inserts them into the high quadword
|
|
|
|
// of dest at word locations selected with the order operand (8 bit immediate).
|
|
|
|
// The low quadword of src is copied to the low quadword of dest.
|
|
|
|
void HW( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0xf3, 0x70, to, from, imm ); }
|
|
|
|
void HW( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0xf3, 0x70, to, from, imm ); }
|
|
|
|
|
|
|
|
// [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
|
|
|
|
// control mask in src. If the most significant bit (bit[7]) of each byte of the
|
|
|
|
// shuffle control mask is set, then constant zero is written in the result byte.
|
|
|
|
// Each byte in the shuffle control mask forms an index to permute the corresponding
|
|
|
|
// byte in dest. The value of each index is the least significant 4 bits (128-bit
|
|
|
|
// operation) or 3 bits (64-bit operation) of the shuffle control byte.
|
|
|
|
//
|
|
|
|
void B( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, 0x0038 ); }
|
|
|
|
void B( const xRegisterSSE& to, const xIndirectVoid& from ) const { OpWriteSSE( 0x66, 0x0038 ); }
|
2016-11-12 15:28:37 +00:00
|
|
|
#endif
|
2021-09-06 18:28:26 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
// --------------------------------------------------------------------------------------
|
|
|
|
// SimdImpl_PUnpack
|
|
|
|
// --------------------------------------------------------------------------------------
|
|
|
|
struct SimdImpl_PUnpack
|
|
|
|
{
|
|
|
|
// Unpack and interleave low-order bytes from src and dest into dest.
|
|
|
|
const xImplSimd_DestRegEither LBW;
|
|
|
|
// Unpack and interleave low-order words from src and dest into dest.
|
|
|
|
const xImplSimd_DestRegEither LWD;
|
|
|
|
// Unpack and interleave low-order doublewords from src and dest into dest.
|
|
|
|
const xImplSimd_DestRegEither LDQ;
|
|
|
|
// Unpack and interleave low-order quadwords from src and dest into dest.
|
|
|
|
const xImplSimd_DestRegSSE LQDQ;
|
|
|
|
|
|
|
|
// Unpack and interleave high-order bytes from src and dest into dest.
|
|
|
|
const xImplSimd_DestRegEither HBW;
|
|
|
|
// Unpack and interleave high-order words from src and dest into dest.
|
|
|
|
const xImplSimd_DestRegEither HWD;
|
|
|
|
// Unpack and interleave high-order doublewords from src and dest into dest.
|
|
|
|
const xImplSimd_DestRegEither HDQ;
|
|
|
|
// Unpack and interleave high-order quadwords from src and dest into dest.
|
|
|
|
const xImplSimd_DestRegSSE HQDQ;
|
|
|
|
};
|
|
|
|
|
|
|
|
// --------------------------------------------------------------------------------------
|
|
|
|
// SimdImpl_Pack
|
|
|
|
// --------------------------------------------------------------------------------------
|
|
|
|
// Pack with Signed or Unsigned Saturation
|
|
|
|
//
|
|
|
|
struct SimdImpl_Pack
|
|
|
|
{
|
|
|
|
// Converts packed signed word integers from src and dest into packed signed
|
|
|
|
// byte integers in dest, using signed saturation.
|
|
|
|
const xImplSimd_DestRegEither SSWB;
|
|
|
|
|
|
|
|
// Converts packed signed dword integers from src and dest into packed signed
|
|
|
|
// word integers in dest, using signed saturation.
|
|
|
|
const xImplSimd_DestRegEither SSDW;
|
|
|
|
|
|
|
|
// Converts packed unsigned word integers from src and dest into packed unsigned
|
|
|
|
// byte integers in dest, using unsigned saturation.
|
|
|
|
const xImplSimd_DestRegEither USWB;
|
|
|
|
|
|
|
|
// [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
|
|
|
|
// unsigned word integers in dest, using signed saturation.
|
|
|
|
const xImplSimd_DestRegSSE USDW;
|
|
|
|
};
|
|
|
|
|
|
|
|
// --------------------------------------------------------------------------------------
|
|
|
|
// SimdImpl_Unpack
|
|
|
|
// --------------------------------------------------------------------------------------
|
|
|
|
struct xImplSimd_Unpack
|
|
|
|
{
|
|
|
|
// Unpacks the high doubleword [single-precision] values from src and dest into
|
|
|
|
// dest, such that the result of dest looks like this:
|
|
|
|
// dest[0] <- dest[2]
|
|
|
|
// dest[1] <- src[2]
|
|
|
|
// dest[2] <- dest[3]
|
|
|
|
// dest[3] <- src[3]
|
|
|
|
//
|
|
|
|
const xImplSimd_DestRegSSE HPS;
|
|
|
|
|
|
|
|
// Unpacks the high quadword [double-precision] values from src and dest into
|
|
|
|
// dest, such that the result of dest looks like this:
|
|
|
|
// dest.lo <- dest.hi
|
|
|
|
// dest.hi <- src.hi
|
|
|
|
//
|
|
|
|
const xImplSimd_DestRegSSE HPD;
|
|
|
|
|
|
|
|
// Unpacks the low doubleword [single-precision] values from src and dest into
|
|
|
|
// dest, such that the result of dest looks like this:
|
|
|
|
// dest[3] <- src[1]
|
|
|
|
// dest[2] <- dest[1]
|
|
|
|
// dest[1] <- src[0]
|
|
|
|
// dest[0] <- dest[0]
|
|
|
|
//
|
|
|
|
const xImplSimd_DestRegSSE LPS;
|
|
|
|
|
|
|
|
// Unpacks the low quadword [double-precision] values from src and dest into
|
|
|
|
// dest, effectively moving the low portion of src into the upper portion of dest.
|
|
|
|
// The result of dest is loaded as such:
|
|
|
|
// dest.hi <- src.lo
|
|
|
|
// dest.lo <- dest.lo [remains unchanged!]
|
|
|
|
//
|
|
|
|
const xImplSimd_DestRegSSE LPD;
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
// --------------------------------------------------------------------------------------
|
|
|
|
// SimdImpl_PInsert
|
|
|
|
// --------------------------------------------------------------------------------------
|
|
|
|
// PINSRW/B/D [all but Word form are SSE4.1 only!]
|
|
|
|
//
|
|
|
|
struct xImplSimd_PInsert
|
|
|
|
{
|
2022-10-23 08:58:46 +00:00
|
|
|
void B(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
|
2022-11-06 07:22:00 +00:00
|
|
|
void B(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const;
|
2022-10-23 08:58:46 +00:00
|
|
|
|
2021-09-06 18:28:26 +00:00
|
|
|
void W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
|
2022-11-06 07:22:00 +00:00
|
|
|
void W(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const;
|
2009-04-21 05:29:14 +00:00
|
|
|
|
2022-10-23 08:58:46 +00:00
|
|
|
void D(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const;
|
2022-11-06 07:22:00 +00:00
|
|
|
void D(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) const;
|
2009-04-21 05:29:14 +00:00
|
|
|
|
2022-10-23 08:58:46 +00:00
|
|
|
void Q(const xRegisterSSE& to, const xRegister64& from, u8 imm8) const;
|
2022-11-06 07:22:00 +00:00
|
|
|
void Q(const xRegisterSSE& to, const xIndirect64& from, u8 imm8) const;
|
2021-09-06 18:28:26 +00:00
|
|
|
};
|
2009-04-21 05:29:14 +00:00
|
|
|
|
2021-09-06 18:28:26 +00:00
|
|
|
//////////////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// PEXTRW/B/D [all but Word form are SSE4.1 only!]
|
|
|
|
//
|
|
|
|
// Note: Word form's indirect memory form is only available in SSE4.1.
|
|
|
|
//
|
|
|
|
struct SimdImpl_PExtract
|
|
|
|
{
|
2022-10-23 08:58:46 +00:00
|
|
|
// [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
|
|
|
|
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
|
|
|
// byte value from src into an x86 32 bit register.
|
|
|
|
void B(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
|
2022-11-06 07:22:00 +00:00
|
|
|
void B(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const;
|
2022-10-23 08:58:46 +00:00
|
|
|
|
2021-09-06 18:28:26 +00:00
|
|
|
// Copies the word element specified by imm8 from src to dest. The upper bits
|
|
|
|
// of dest are zero-extended (cleared). This can be used to extract any single packed
|
|
|
|
// word value from src into an x86 32 bit register.
|
|
|
|
//
|
|
|
|
// [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension!
|
|
|
|
//
|
|
|
|
void W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
|
2022-11-06 07:22:00 +00:00
|
|
|
void W(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const;
|
2021-09-06 18:28:26 +00:00
|
|
|
|
|
|
|
// [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
|
|
|
|
// used to extract any single packed dword value from src into an x86 32 bit register.
|
2022-10-23 08:58:46 +00:00
|
|
|
void D(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const;
|
2022-11-06 07:22:00 +00:00
|
|
|
void D(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) const;
|
2022-10-23 08:58:46 +00:00
|
|
|
|
|
|
|
// Insert a qword integer value from r/m64 into the xmm1 at the destination element specified by imm8.
|
|
|
|
void Q(const xRegister64& to, const xRegisterSSE& from, u8 imm8) const;
|
2022-11-06 07:22:00 +00:00
|
|
|
void Q(const xIndirect64& dest, const xRegisterSSE& from, u8 imm8) const;
|
2021-09-06 18:28:26 +00:00
|
|
|
};
|
|
|
|
} // namespace x86Emitter
|