mirror of https://github.com/PCSX2/pcsx2.git
x86emitter: Add some AVX/AVX2 instructions and YMM registers
This commit is contained in:
parent
ac10e00d7c
commit
375c0a02bb
|
@ -32,6 +32,7 @@ target_sources(common PRIVATE
|
|||
Timer.cpp
|
||||
ThreadPool.cpp
|
||||
WindowInfo.cpp
|
||||
emitter/avx.cpp
|
||||
emitter/bmi.cpp
|
||||
emitter/cpudetect.cpp
|
||||
emitter/fpu.cpp
|
||||
|
|
|
@ -105,6 +105,7 @@
|
|||
<ClCompile Include="Windows\WinThreads.cpp" />
|
||||
<ClCompile Include="Misc.cpp" />
|
||||
<ClCompile Include="Semaphore.cpp" />
|
||||
<ClCompile Include="emitter\avx.cpp" />
|
||||
<ClCompile Include="emitter\bmi.cpp" />
|
||||
<ClCompile Include="emitter\cpudetect.cpp" />
|
||||
<ClCompile Include="emitter\fpu.cpp" />
|
||||
|
@ -188,6 +189,7 @@
|
|||
<ClInclude Include="Vulkan\Util.h" />
|
||||
<ClInclude Include="WindowInfo.h" />
|
||||
<ClInclude Include="Threading.h" />
|
||||
<ClInclude Include="emitter\implement\avx.h" />
|
||||
<ClInclude Include="emitter\implement\bmi.h" />
|
||||
<ClInclude Include="emitter\cpudetect_internal.h" />
|
||||
<ClInclude Include="emitter\instructions.h" />
|
||||
|
|
|
@ -34,6 +34,9 @@
|
|||
<ClCompile Include="emitter\LnxCpuDetect.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="emitter\avx.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Linux\LnxHostSys.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
@ -297,6 +300,9 @@
|
|||
<ClInclude Include="emitter\implement\movs.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="emitter\implement\avx.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Threading.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
|
|
|
@ -0,0 +1,174 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2022 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "common/emitter/internal.h"
|
||||
#include "common/emitter/tools.h"
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
const xImplAVX_Move xVMOVAPS = {0x00, 0x28, 0x29};
|
||||
const xImplAVX_Move xVMOVUPS = {0x00, 0x10, 0x11};
|
||||
|
||||
const xImplAVX_ArithFloat xVADD = {
|
||||
{0x00, 0x58}, // VADDPS
|
||||
{0x66, 0x58}, // VADDPD
|
||||
{0xF3, 0x58}, // VADDSS
|
||||
{0xF2, 0x58}, // VADDSD
|
||||
};
|
||||
const xImplAVX_ArithFloat xVSUB = {
|
||||
{0x00, 0x5C}, // VSUBPS
|
||||
{0x66, 0x5C}, // VSUBPD
|
||||
{0xF3, 0x5C}, // VSUBSS
|
||||
{0xF2, 0x5C}, // VSUBSD
|
||||
};
|
||||
const xImplAVX_ArithFloat xVMUL = {
|
||||
{0x00, 0x59}, // VMULPS
|
||||
{0x66, 0x59}, // VMULPD
|
||||
{0xF3, 0x59}, // VMULSS
|
||||
{0xF2, 0x59}, // VMULSD
|
||||
};
|
||||
const xImplAVX_ArithFloat xVDIV = {
|
||||
{0x00, 0x5E}, // VDIVPS
|
||||
{0x66, 0x5E}, // VDIVPD
|
||||
{0xF3, 0x5E}, // VDIVSS
|
||||
{0xF2, 0x5E}, // VDIVSD
|
||||
};
|
||||
const xImplAVX_CmpFloat xVCMP = {
|
||||
{SSE2_Equal},
|
||||
{SSE2_Less},
|
||||
{SSE2_LessOrEqual},
|
||||
{SSE2_Unordered},
|
||||
{SSE2_NotEqual},
|
||||
{SSE2_NotLess},
|
||||
{SSE2_NotLessOrEqual},
|
||||
{SSE2_Ordered},
|
||||
};
|
||||
const xImplAVX_ThreeArgYMM xVPAND = {0x66, 0xDB};
|
||||
const xImplAVX_ThreeArgYMM xVPANDN = {0x66, 0xDF};
|
||||
const xImplAVX_ThreeArgYMM xVPOR = {0x66, 0xEB};
|
||||
const xImplAVX_ThreeArgYMM xVPXOR = {0x66, 0xEF};
|
||||
const xImplAVX_CmpInt xVPCMP = {
|
||||
{0x66, 0x74}, // VPCMPEQB
|
||||
{0x66, 0x75}, // VPCMPEQW
|
||||
{0x66, 0x76}, // VPCMPEQD
|
||||
{0x66, 0x64}, // VPCMPGTB
|
||||
{0x66, 0x65}, // VPCMPGTW
|
||||
{0x66, 0x66}, // VPCMPGTD
|
||||
};
|
||||
|
||||
void xVMOVMSKPS(const xRegister32& to, const xRegisterSSE& from)
|
||||
{
|
||||
xOpWriteC5(0x00, 0x50, to, xRegister32(), from);
|
||||
}
|
||||
|
||||
void xVMOVMSKPD(const xRegister32& to, const xRegisterSSE& from)
|
||||
{
|
||||
xOpWriteC5(0x66, 0x50, to, xRegister32(), from);
|
||||
}
|
||||
|
||||
void xVZEROUPPER()
|
||||
{
|
||||
// rather than dealing with nonexistant operands..
|
||||
xWrite8(0xc5);
|
||||
xWrite8(0xf8);
|
||||
xWrite8(0x77);
|
||||
}
|
||||
|
||||
void xImplAVX_Move::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const
|
||||
{
|
||||
if (to != from)
|
||||
xOpWriteC5(Prefix, LoadOpcode, to, xRegisterSSE(), from);
|
||||
}
|
||||
|
||||
void xImplAVX_Move::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const
|
||||
{
|
||||
xOpWriteC5(Prefix, LoadOpcode, to, xRegisterSSE(), from);
|
||||
}
|
||||
|
||||
void xImplAVX_Move::operator()(const xIndirectVoid& to, const xRegisterSSE& from) const
|
||||
{
|
||||
xOpWriteC5(Prefix, StoreOpcode, from, xRegisterSSE(), to);
|
||||
}
|
||||
|
||||
void xImplAVX_ThreeArg::operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const
|
||||
{
|
||||
pxAssert(!to.IsWideSIMD() && !from1.IsWideSIMD() && !from2.IsWideSIMD());
|
||||
xOpWriteC5(Prefix, Opcode, to, from1, from2);
|
||||
}
|
||||
|
||||
void xImplAVX_ThreeArg::operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const
|
||||
{
|
||||
pxAssert(!to.IsWideSIMD() && !from1.IsWideSIMD());
|
||||
xOpWriteC5(Prefix, Opcode, to, from1, from2);
|
||||
}
|
||||
|
||||
void xImplAVX_ThreeArgYMM::operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const
|
||||
{
|
||||
xOpWriteC5(Prefix, Opcode, to, from1, from2);
|
||||
}
|
||||
|
||||
void xImplAVX_ThreeArgYMM::operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const
|
||||
{
|
||||
xOpWriteC5(Prefix, Opcode, to, from1, from2);
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::PS(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const
|
||||
{
|
||||
xOpWriteC5(0x00, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::PS(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const
|
||||
{
|
||||
xOpWriteC5(0x00, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::PD(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const
|
||||
{
|
||||
xOpWriteC5(0x66, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::PD(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const
|
||||
{
|
||||
xOpWriteC5(0x66, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::SS(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const
|
||||
{
|
||||
xOpWriteC5(0xF3, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::SS(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const
|
||||
{
|
||||
xOpWriteC5(0xF3, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::SD(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const
|
||||
{
|
||||
xOpWriteC5(0xF2, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
|
||||
void xImplAVX_CmpFloatHelper::SD(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const
|
||||
{
|
||||
xOpWriteC5(0xF2, 0xC2, to, from1, from2);
|
||||
xWrite8(static_cast<u8>(CType));
|
||||
}
|
||||
} // namespace x86Emitter
|
|
@ -0,0 +1,113 @@
|
|||
/* PCSX2 - PS2 Emulator for PCs
|
||||
* Copyright (C) 2002-2022 PCSX2 Dev Team
|
||||
*
|
||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||
* ation, either version 3 of the License, or (at your option) any later version.
|
||||
*
|
||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||
* PURPOSE. See the GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||
* If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace x86Emitter
|
||||
{
|
||||
struct xImplAVX_Move
|
||||
{
|
||||
u8 Prefix;
|
||||
u8 LoadOpcode;
|
||||
u8 StoreOpcode;
|
||||
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from) const;
|
||||
void operator()(const xRegisterSSE& to, const xIndirectVoid& from) const;
|
||||
void operator()(const xIndirectVoid& to, const xRegisterSSE& from) const;
|
||||
};
|
||||
|
||||
struct xImplAVX_ThreeArg
|
||||
{
|
||||
u8 Prefix;
|
||||
u8 Opcode;
|
||||
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const;
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const;
|
||||
};
|
||||
|
||||
struct xImplAVX_ThreeArgYMM : xImplAVX_ThreeArg
|
||||
{
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const;
|
||||
void operator()(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const;
|
||||
};
|
||||
|
||||
struct xImplAVX_ArithFloat
|
||||
{
|
||||
xImplAVX_ThreeArgYMM PS;
|
||||
xImplAVX_ThreeArgYMM PD;
|
||||
xImplAVX_ThreeArg SS;
|
||||
xImplAVX_ThreeArg SD;
|
||||
};
|
||||
|
||||
struct xImplAVX_CmpFloatHelper
|
||||
{
|
||||
SSE2_ComparisonType CType;
|
||||
|
||||
void PS(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const;
|
||||
void PS(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const;
|
||||
void PD(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const;
|
||||
void PD(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const;
|
||||
|
||||
void SS(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const;
|
||||
void SS(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const;
|
||||
void SD(const xRegisterSSE& to, const xRegisterSSE& from1, const xRegisterSSE& from2) const;
|
||||
void SD(const xRegisterSSE& to, const xRegisterSSE& from1, const xIndirectVoid& from2) const;
|
||||
};
|
||||
|
||||
struct xImplAVX_CmpFloat
|
||||
{
|
||||
xImplAVX_CmpFloatHelper EQ;
|
||||
xImplAVX_CmpFloatHelper LT;
|
||||
xImplAVX_CmpFloatHelper LE;
|
||||
xImplAVX_CmpFloatHelper UO;
|
||||
xImplAVX_CmpFloatHelper NE;
|
||||
xImplAVX_CmpFloatHelper GE;
|
||||
xImplAVX_CmpFloatHelper GT;
|
||||
xImplAVX_CmpFloatHelper OR;
|
||||
};
|
||||
|
||||
struct xImplAVX_CmpInt
|
||||
{
|
||||
// Compare packed bytes for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplAVX_ThreeArgYMM EQB;
|
||||
|
||||
// Compare packed words for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplAVX_ThreeArgYMM EQW;
|
||||
|
||||
// Compare packed doublewords [32-bits] for equality.
|
||||
// If a data element in dest is equal to the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplAVX_ThreeArgYMM EQD;
|
||||
|
||||
// Compare packed signed bytes for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplAVX_ThreeArgYMM GTB;
|
||||
|
||||
// Compare packed signed words for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplAVX_ThreeArgYMM GTW;
|
||||
|
||||
// Compare packed signed doublewords [32-bits] for greater than.
|
||||
// If a data element in dest is greater than the corresponding date element src, the
|
||||
// corresponding data element in dest is set to all 1s; otherwise, it is set to all 0s.
|
||||
const xImplAVX_ThreeArgYMM GTD;
|
||||
};
|
||||
} // namespace x86Emitter
|
|
@ -622,4 +622,24 @@ namespace x86Emitter
|
|||
extern const SimdImpl_Pack xPACK;
|
||||
extern const xImplSimd_PInsert xPINSR;
|
||||
extern const SimdImpl_PExtract xPEXTR;
|
||||
|
||||
// ------------------------------------------------------------------------
|
||||
|
||||
extern const xImplAVX_Move xVMOVAPS;
|
||||
extern const xImplAVX_Move xVMOVUPS;
|
||||
extern const xImplAVX_ArithFloat xVADD;
|
||||
extern const xImplAVX_ArithFloat xVSUB;
|
||||
extern const xImplAVX_ArithFloat xVMUL;
|
||||
extern const xImplAVX_ArithFloat xVDIV;
|
||||
extern const xImplAVX_CmpFloat xVCMP;
|
||||
extern const xImplAVX_ThreeArgYMM xVPAND;
|
||||
extern const xImplAVX_ThreeArgYMM xVPANDN;
|
||||
extern const xImplAVX_ThreeArgYMM xVPOR;
|
||||
extern const xImplAVX_ThreeArgYMM xVPXOR;
|
||||
extern const xImplAVX_CmpInt xVPCMP;
|
||||
|
||||
extern void xVMOVMSKPS(const xRegister32& to, const xRegisterSSE& from);
|
||||
extern void xVMOVMSKPD(const xRegister32& to, const xRegisterSSE& from);
|
||||
extern void xVZEROUPPER();
|
||||
|
||||
} // namespace x86Emitter
|
||||
|
|
|
@ -123,12 +123,18 @@ namespace x86Emitter
|
|||
{
|
||||
pxAssert(prefix == 0 || prefix == 0x66 || prefix == 0xF3 || prefix == 0xF2);
|
||||
|
||||
const xRegisterInt& reg = param1.IsReg() ? param1 : param2;
|
||||
const xRegisterBase& reg = param1.IsReg() ? param1 : param2;
|
||||
|
||||
u8 nR = reg.IsExtended() ? 0x00 : 0x80;
|
||||
u8 L = reg.IsWideSIMD() ? 4 : 0;
|
||||
u8 L;
|
||||
|
||||
u8 nv = (~param2.GetId() & 0xF) << 3;
|
||||
// Needed for 256-bit movemask.
|
||||
if constexpr (std::is_same_v<T3, xRegisterSSE>)
|
||||
L = param3.IsWideSIMD() ? 4 : 0;
|
||||
else
|
||||
L = reg.IsWideSIMD() ? 4 : 0;
|
||||
|
||||
u8 nv = (param2.IsEmpty() ? 0xF : ((~param2.GetId() & 0xF))) << 3;
|
||||
|
||||
u8 p =
|
||||
prefix == 0xF2 ? 3 :
|
||||
|
|
|
@ -120,6 +120,16 @@ const xRegisterSSE
|
|||
xmm12(12), xmm13(13),
|
||||
xmm14(14), xmm15(15);
|
||||
|
||||
const xRegisterSSE
|
||||
ymm0(0, xRegisterYMMTag()), ymm1(1, xRegisterYMMTag()),
|
||||
ymm2(2, xRegisterYMMTag()), ymm3(3, xRegisterYMMTag()),
|
||||
ymm4(4, xRegisterYMMTag()), ymm5(5, xRegisterYMMTag()),
|
||||
ymm6(6, xRegisterYMMTag()), ymm7(7, xRegisterYMMTag()),
|
||||
ymm8(8, xRegisterYMMTag()), ymm9(9, xRegisterYMMTag()),
|
||||
ymm10(10, xRegisterYMMTag()), ymm11(11, xRegisterYMMTag()),
|
||||
ymm12(12, xRegisterYMMTag()), ymm13(13, xRegisterYMMTag()),
|
||||
ymm14(14, xRegisterYMMTag()), ymm15(15, xRegisterYMMTag());
|
||||
|
||||
const xAddressReg
|
||||
rax(0), rbx(3),
|
||||
rcx(1), rdx(2),
|
||||
|
|
|
@ -420,6 +420,8 @@ namespace x86Emitter
|
|||
// This register type is provided to allow legal syntax for instructions that accept
|
||||
// an XMM register as a parameter, but do not allow for a GPR.
|
||||
|
||||
struct xRegisterYMMTag {};
|
||||
|
||||
class xRegisterSSE : public xRegisterBase
|
||||
{
|
||||
typedef xRegisterBase _parent;
|
||||
|
@ -430,11 +432,16 @@ namespace x86Emitter
|
|||
: _parent(16, regId)
|
||||
{
|
||||
}
|
||||
xRegisterSSE(int regId, xRegisterYMMTag)
|
||||
: _parent(32, regId)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator==(const xRegisterSSE& src) const { return this->Id == src.Id; }
|
||||
bool operator!=(const xRegisterSSE& src) const { return this->Id != src.Id; }
|
||||
|
||||
static const inline xRegisterSSE& GetInstance(uint id);
|
||||
static const inline xRegisterSSE& GetYMMInstance(uint id);
|
||||
};
|
||||
|
||||
class xRegisterCL : public xRegister8
|
||||
|
@ -570,13 +577,19 @@ namespace x86Emitter
|
|||
extern const xRegisterEmpty xEmptyReg;
|
||||
|
||||
// clang-format off
|
||||
|
||||
extern const xRegisterSSE
|
||||
extern const xRegisterSSE
|
||||
xmm0, xmm1, xmm2, xmm3,
|
||||
xmm4, xmm5, xmm6, xmm7,
|
||||
xmm8, xmm9, xmm10, xmm11,
|
||||
xmm12, xmm13, xmm14, xmm15;
|
||||
|
||||
// TODO: This needs to be _M_SSE >= 0x500'ed, but we can't do it atm because common doesn't have variants.
|
||||
extern const xRegisterSSE
|
||||
ymm0, ymm1, ymm2, ymm3,
|
||||
ymm4, ymm5, ymm6, ymm7,
|
||||
ymm8, ymm9, ymm10, ymm11,
|
||||
ymm12, ymm13, ymm14, ymm15;
|
||||
|
||||
extern const xAddressReg
|
||||
rax, rbx, rcx, rdx,
|
||||
rsi, rdi, rbp, rsp,
|
||||
|
@ -627,6 +640,19 @@ extern const xRegister32
|
|||
return *m_tbl_xmmRegs[id];
|
||||
}
|
||||
|
||||
const xRegisterSSE& xRegisterSSE::GetYMMInstance(uint id)
|
||||
{
|
||||
static const xRegisterSSE* const m_tbl_ymmRegs[] =
|
||||
{
|
||||
&ymm0, &ymm1, &ymm2, &ymm3,
|
||||
&ymm4, &ymm5, &ymm6, &ymm7,
|
||||
&ymm8, &ymm9, &ymm10, &ymm11,
|
||||
&ymm12, &ymm13, &ymm14, &ymm15};
|
||||
|
||||
pxAssert(id < iREGCNT_XMM);
|
||||
return *m_tbl_ymmRegs[id];
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------
|
||||
// xAddressVoid
|
||||
// --------------------------------------------------------------------------------------
|
||||
|
@ -949,3 +975,4 @@ extern const xRegister32
|
|||
#include "implement/jmpcall.h"
|
||||
|
||||
#include "implement/bmi.h"
|
||||
#include "implement/avx.h"
|
|
@ -167,3 +167,94 @@ TEST(CodegenTests, SSETest)
|
|||
CODEGEN_TEST_64(xBLEND.PD(xmm8, xmm9, 0xaa), "66 45 0f 3a 0d c1 aa");
|
||||
CODEGEN_TEST_64(xEXTRACTPS(ptr32[base], xmm1, 2), "66 0f 3a 17 0d f6 ff ff ff 02");
|
||||
}
|
||||
|
||||
TEST(CodegenTests, AVXTest)
|
||||
{
|
||||
CODEGEN_TEST_64(xVMOVAPS(xmm0, xmm1), "c5 f8 28 c1");
|
||||
CODEGEN_TEST_64(xVMOVAPS(xmm0, ptr32[rdi]), "c5 f8 28 07");
|
||||
CODEGEN_TEST_64(xVMOVAPS(ptr32[rdi], xmm0), "c5 f8 29 07");
|
||||
CODEGEN_TEST_64(xVMOVUPS(xmm0, ptr32[rdi]), "c5 f8 10 07");
|
||||
CODEGEN_TEST_64(xVMOVUPS(ptr32[rdi], xmm0), "c5 f8 11 07");
|
||||
|
||||
CODEGEN_TEST_64(xVADD.PS(xmm0, xmm1, xmm2), "c5 f0 58 c2");
|
||||
CODEGEN_TEST_64(xVADD.PD(xmm0, xmm1, xmm2), "c5 f1 58 c2");
|
||||
CODEGEN_TEST_64(xVADD.SS(xmm0, xmm1, xmm2), "c5 f2 58 c2");
|
||||
CODEGEN_TEST_64(xVADD.SD(xmm0, xmm1, xmm2), "c5 f3 58 c2");
|
||||
CODEGEN_TEST_64(xVSUB.PS(xmm0, xmm1, xmm2), "c5 f0 5c c2");
|
||||
CODEGEN_TEST_64(xVSUB.PD(xmm0, xmm1, xmm2), "c5 f1 5c c2");
|
||||
CODEGEN_TEST_64(xVSUB.SS(xmm0, xmm1, xmm2), "c5 f2 5c c2");
|
||||
CODEGEN_TEST_64(xVSUB.SD(xmm0, xmm1, xmm2), "c5 f3 5c c2");
|
||||
CODEGEN_TEST_64(xVMUL.PS(xmm0, xmm1, xmm2), "c5 f0 59 c2");
|
||||
CODEGEN_TEST_64(xVMUL.PD(xmm0, xmm1, xmm2), "c5 f1 59 c2");
|
||||
CODEGEN_TEST_64(xVMUL.SS(xmm0, xmm1, xmm2), "c5 f2 59 c2");
|
||||
CODEGEN_TEST_64(xVMUL.SD(xmm0, xmm1, xmm2), "c5 f3 59 c2");
|
||||
CODEGEN_TEST_64(xVDIV.PS(xmm0, xmm1, xmm2), "c5 f0 5e c2");
|
||||
CODEGEN_TEST_64(xVDIV.PD(xmm0, xmm1, xmm2), "c5 f1 5e c2");
|
||||
CODEGEN_TEST_64(xVDIV.SS(xmm0, xmm1, xmm2), "c5 f2 5e c2");
|
||||
CODEGEN_TEST_64(xVDIV.SD(xmm0, xmm1, xmm2), "c5 f3 5e c2");
|
||||
|
||||
// Don't need to test all variants, since they just change the condition immediate.
|
||||
CODEGEN_TEST_64(xVCMP.EQ.PS(xmm0, xmm1, xmm2), "c5 f0 c2 c2 00");
|
||||
CODEGEN_TEST_64(xVCMP.EQ.PD(xmm0, xmm1, xmm2), "c5 f1 c2 c2 00");
|
||||
CODEGEN_TEST_64(xVCMP.EQ.SS(xmm0, xmm1, xmm2), "c5 f2 c2 c2 00");
|
||||
CODEGEN_TEST_64(xVCMP.EQ.SD(xmm0, xmm1, xmm2), "c5 f3 c2 c2 00");
|
||||
CODEGEN_TEST_64(xVCMP.LE.PS(xmm0, xmm1, xmm2), "c5 f0 c2 c2 02");
|
||||
CODEGEN_TEST_64(xVCMP.LE.PD(xmm0, xmm1, xmm2), "c5 f1 c2 c2 02");
|
||||
CODEGEN_TEST_64(xVCMP.LE.SS(xmm0, xmm1, xmm2), "c5 f2 c2 c2 02");
|
||||
CODEGEN_TEST_64(xVCMP.LE.SD(xmm0, xmm1, xmm2), "c5 f3 c2 c2 02");
|
||||
|
||||
CODEGEN_TEST_64(xVPCMP.EQB(xmm0, xmm1, xmm2), "c5 f1 74 c2");
|
||||
CODEGEN_TEST_64(xVPCMP.EQW(xmm0, xmm1, xmm2), "c5 f1 75 c2");
|
||||
CODEGEN_TEST_64(xVPCMP.EQD(xmm0, xmm1, xmm2), "c5 f1 76 c2");
|
||||
CODEGEN_TEST_64(xVPCMP.GTB(xmm0, xmm1, xmm2), "c5 f1 64 c2");
|
||||
CODEGEN_TEST_64(xVPCMP.GTW(xmm0, xmm1, xmm2), "c5 f1 65 c2");
|
||||
CODEGEN_TEST_64(xVPCMP.GTD(xmm0, xmm1, xmm2), "c5 f1 66 c2");
|
||||
|
||||
CODEGEN_TEST_64(xVPAND(xmm0, xmm1, xmm2), "c5 f1 db c2");
|
||||
CODEGEN_TEST_64(xVPANDN(xmm0, xmm1, xmm2), "c5 f1 df c2");
|
||||
CODEGEN_TEST_64(xVPOR(xmm0, xmm1, xmm2), "c5 f1 eb c2");
|
||||
CODEGEN_TEST_64(xVPXOR(xmm0, xmm1, xmm2), "c5 f1 ef c2");
|
||||
|
||||
CODEGEN_TEST_64(xVMOVMSKPS(eax, xmm1), "c5 f8 50 c1");
|
||||
CODEGEN_TEST_64(xVMOVMSKPD(eax, xmm1), "c5 f9 50 c1");
|
||||
}
|
||||
|
||||
TEST(CodegenTests, AVX256Test)
|
||||
{
|
||||
CODEGEN_TEST_64(xVMOVAPS(ymm0, ymm1), "c5 fc 28 c1");
|
||||
CODEGEN_TEST_64(xVMOVAPS(ymm0, ptr32[rdi]), "c5 fc 28 07");
|
||||
CODEGEN_TEST_64(xVMOVAPS(ptr32[rdi], ymm0), "c5 fc 29 07");
|
||||
CODEGEN_TEST_64(xVMOVUPS(ymm0, ptr32[rdi]), "c5 fc 10 07");
|
||||
CODEGEN_TEST_64(xVMOVUPS(ptr32[rdi], ymm0), "c5 fc 11 07");
|
||||
|
||||
CODEGEN_TEST_64(xVZEROUPPER(), "c5 f8 77");
|
||||
|
||||
CODEGEN_TEST_64(xVADD.PS(ymm0, ymm1, ymm2), "c5 f4 58 c2");
|
||||
CODEGEN_TEST_64(xVADD.PD(ymm0, ymm1, ymm2), "c5 f5 58 c2");
|
||||
CODEGEN_TEST_64(xVSUB.PS(ymm0, ymm1, ymm2), "c5 f4 5c c2");
|
||||
CODEGEN_TEST_64(xVSUB.PD(ymm0, ymm1, ymm2), "c5 f5 5c c2");
|
||||
CODEGEN_TEST_64(xVMUL.PS(ymm0, ymm1, ymm2), "c5 f4 59 c2");
|
||||
CODEGEN_TEST_64(xVMUL.PD(ymm0, ymm1, ymm2), "c5 f5 59 c2");
|
||||
CODEGEN_TEST_64(xVDIV.PS(ymm0, ymm1, ymm2), "c5 f4 5e c2");
|
||||
CODEGEN_TEST_64(xVDIV.PD(ymm0, ymm1, ymm2), "c5 f5 5e c2");
|
||||
|
||||
CODEGEN_TEST_64(xVCMP.EQ.PS(ymm0, ymm1, ymm2), "c5 f4 c2 c2 00");
|
||||
CODEGEN_TEST_64(xVCMP.EQ.PD(ymm0, ymm1, ymm2), "c5 f5 c2 c2 00");
|
||||
CODEGEN_TEST_64(xVCMP.LE.PS(ymm0, ymm1, ymm2), "c5 f4 c2 c2 02");
|
||||
CODEGEN_TEST_64(xVCMP.LE.PD(ymm0, ymm1, ymm2), "c5 f5 c2 c2 02");
|
||||
|
||||
CODEGEN_TEST_64(xVPCMP.EQB(ymm0, ymm1, ymm2), "c5 f5 74 c2");
|
||||
CODEGEN_TEST_64(xVPCMP.EQW(ymm0, ymm1, ymm2), "c5 f5 75 c2");
|
||||
CODEGEN_TEST_64(xVPCMP.EQD(ymm0, ymm1, ymm2), "c5 f5 76 c2");
|
||||
CODEGEN_TEST_64(xVPCMP.GTB(ymm0, ymm1, ymm2), "c5 f5 64 c2");
|
||||
CODEGEN_TEST_64(xVPCMP.GTW(ymm0, ymm1, ymm2), "c5 f5 65 c2");
|
||||
CODEGEN_TEST_64(xVPCMP.GTD(ymm0, ymm1, ymm2), "c5 f5 66 c2");
|
||||
|
||||
CODEGEN_TEST_64(xVPAND(ymm0, ymm1, ymm2), "c5 f5 db c2");
|
||||
CODEGEN_TEST_64(xVPANDN(ymm0, ymm1, ymm2), "c5 f5 df c2");
|
||||
CODEGEN_TEST_64(xVPOR(ymm0, ymm1, ymm2), "c5 f5 eb c2");
|
||||
CODEGEN_TEST_64(xVPXOR(ymm0, ymm1, ymm2), "c5 f5 ef c2");
|
||||
|
||||
CODEGEN_TEST_64(xVMOVMSKPS(eax, ymm1), "c5 fc 50 c1");
|
||||
CODEGEN_TEST_64(xVMOVMSKPD(eax, ymm1), "c5 fd 50 c1");
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue