/* PCSX2 - PS2 Emulator for PCs * Copyright (C) 2002-2010 PCSX2 Dev Team * * PCSX2 is free software: you can redistribute it and/or modify it under the terms * of the GNU Lesser General Public License as published by the Free Software Found- * ation, either version 3 of the License, or (at your option) any later version. * * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along with PCSX2. * If not, see . */ #include "common/emitter/internal.h" #include "common/emitter/tools.h" // Mask of valid bit fields for the target CPU. Typically this is either 0xFFFF (SSE2 // or better) or 0xFFBF (SSE1 and earlier). Code can ensure a safe/valid MXCSR by // AND'ing this mask against an MXCSR prior to LDMXCSR. SSE_MXCSR MXCSR_Mask; const char* EnumToString(SSE_RoundMode sse) { switch (sse) { case SSEround_Nearest: return "Nearest"; case SSEround_NegInf: return "NegativeInfinity"; case SSEround_PosInf: return "PositiveInfinity"; case SSEround_Chop: return "Chop"; default: return "Invalid"; } } SSE_MXCSR SSE_MXCSR::GetCurrent() { SSE_MXCSR ret; ret.bitmask = _mm_getcsr(); return ret; } void SSE_MXCSR::SetCurrent(const SSE_MXCSR& value) { _mm_setcsr(value.bitmask); } SSE_RoundMode SSE_MXCSR::GetRoundMode() const { return (SSE_RoundMode)RoundingControl; } SSE_MXCSR& SSE_MXCSR::SetRoundMode(SSE_RoundMode mode) { pxAssert((uint)mode < 4); RoundingControl = (u32)mode; return *this; } SSE_MXCSR& SSE_MXCSR::ClearExceptionFlags() { bitmask &= ~0x3f; return *this; } SSE_MXCSR& SSE_MXCSR::EnableExceptions() { bitmask &= ~(0x3f << 7); return *this; } SSE_MXCSR& SSE_MXCSR::DisableExceptions() { bitmask |= 0x3f << 7; return *this; } // Applies the reserve bits mask for the current running cpu, as fetched from the CPU // during CPU init/detection. SSE_MXCSR& SSE_MXCSR::ApplyReserveMask() { bitmask &= MXCSR_Mask.bitmask; return *this; } SSE_MXCSR::operator x86Emitter::xIndirect32() const { return x86Emitter::ptr32[&bitmask]; } namespace x86Emitter { // ------------------------------------------------------------------------ // SimdPrefix - If the lower byte of the opcode is 0x38 or 0x3a, then the opcode is // treated as a 16 bit value (in SSE 0x38 and 0x3a denote prefixes for extended SSE3/4 // instructions). Any other lower value assumes the upper value is 0 and ignored. // Non-zero upper bytes, when the lower byte is not the 0x38 or 0x3a prefix, will // generate an assertion. // __emitinline void SimdPrefix(u8 prefix, u16 opcode) { pxAssertMsg(prefix == 0, "REX prefix must be just before the opcode"); const bool is16BitOpcode = ((opcode & 0xff) == 0x38) || ((opcode & 0xff) == 0x3a); // If the lower byte is not a valid prefix and the upper byte is non-zero it // means we made a mistake! if (!is16BitOpcode) pxAssert((opcode >> 8) == 0); if (prefix != 0) { if (is16BitOpcode) xWrite32((opcode << 16) | 0x0f00 | prefix); else { xWrite16(0x0f00 | prefix); xWrite8(opcode); } } else { if (is16BitOpcode) { xWrite8(0x0f); xWrite16(opcode); } else xWrite16((opcode << 8) | 0x0f); } } const xImplSimd_DestRegEither xPAND = {0x66, 0xdb}; const xImplSimd_DestRegEither xPANDN = {0x66, 0xdf}; const xImplSimd_DestRegEither xPOR = {0x66, 0xeb}; const xImplSimd_DestRegEither xPXOR = {0x66, 0xef}; // [SSE-4.1] Performs a bitwise AND of dest against src, and sets the ZF flag // only if all bits in the result are 0. PTEST also sets the CF flag according // to the following condition: (xmm2/m128 AND NOT xmm1) == 0; const xImplSimd_DestRegSSE xPTEST = {0x66, 0x1738}; // ===================================================================================================== // SSE Conversion Operations, as looney as they are. // ===================================================================================================== // These enforce pointer strictness for Indirect forms, due to the otherwise completely confusing // nature of the functions. (so if a function expects an m32, you must use (u32*) or ptr32[]). // __fi void xCVTDQ2PD(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0xf3, 0xe6); } __fi void xCVTDQ2PD(const xRegisterSSE& to, const xIndirect64& from) { OpWriteSSE(0xf3, 0xe6); } __fi void xCVTDQ2PS(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0x00, 0x5b); } __fi void xCVTDQ2PS(const xRegisterSSE& to, const xIndirect128& from) { OpWriteSSE(0x00, 0x5b); } __fi void xCVTPD2DQ(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0xf2, 0xe6); } __fi void xCVTPD2DQ(const xRegisterSSE& to, const xIndirect128& from) { OpWriteSSE(0xf2, 0xe6); } __fi void xCVTPD2PS(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0x66, 0x5a); } __fi void xCVTPD2PS(const xRegisterSSE& to, const xIndirect128& from) { OpWriteSSE(0x66, 0x5a); } __fi void xCVTPI2PD(const xRegisterSSE& to, const xIndirect64& from) { OpWriteSSE(0x66, 0x2a); } __fi void xCVTPI2PS(const xRegisterSSE& to, const xIndirect64& from) { OpWriteSSE(0x00, 0x2a); } __fi void xCVTPS2DQ(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0x66, 0x5b); } __fi void xCVTPS2DQ(const xRegisterSSE& to, const xIndirect128& from) { OpWriteSSE(0x66, 0x5b); } __fi void xCVTPS2PD(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0x00, 0x5a); } __fi void xCVTPS2PD(const xRegisterSSE& to, const xIndirect64& from) { OpWriteSSE(0x00, 0x5a); } __fi void xCVTSD2SI(const xRegister32or64& to, const xRegisterSSE& from) { OpWriteSSE(0xf2, 0x2d); } __fi void xCVTSD2SI(const xRegister32or64& to, const xIndirect64& from) { OpWriteSSE(0xf2, 0x2d); } __fi void xCVTSD2SS(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0xf2, 0x5a); } __fi void xCVTSD2SS(const xRegisterSSE& to, const xIndirect64& from) { OpWriteSSE(0xf2, 0x5a); } __fi void xCVTSI2SS(const xRegisterSSE& to, const xRegister32or64& from) { OpWriteSSE(0xf3, 0x2a); } __fi void xCVTSI2SS(const xRegisterSSE& to, const xIndirect32& from) { OpWriteSSE(0xf3, 0x2a); } __fi void xCVTSS2SD(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0xf3, 0x5a); } __fi void xCVTSS2SD(const xRegisterSSE& to, const xIndirect32& from) { OpWriteSSE(0xf3, 0x5a); } __fi void xCVTSS2SI(const xRegister32or64& to, const xRegisterSSE& from) { OpWriteSSE(0xf3, 0x2d); } __fi void xCVTSS2SI(const xRegister32or64& to, const xIndirect32& from) { OpWriteSSE(0xf3, 0x2d); } __fi void xCVTTPD2DQ(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0x66, 0xe6); } __fi void xCVTTPD2DQ(const xRegisterSSE& to, const xIndirect128& from) { OpWriteSSE(0x66, 0xe6); } __fi void xCVTTPS2DQ(const xRegisterSSE& to, const xRegisterSSE& from) { OpWriteSSE(0xf3, 0x5b); } __fi void xCVTTPS2DQ(const xRegisterSSE& to, const xIndirect128& from) { OpWriteSSE(0xf3, 0x5b); } __fi void xCVTTSD2SI(const xRegister32or64& to, const xRegisterSSE& from) { OpWriteSSE(0xf2, 0x2c); } __fi void xCVTTSD2SI(const xRegister32or64& to, const xIndirect64& from) { OpWriteSSE(0xf2, 0x2c); } __fi void xCVTTSS2SI(const xRegister32or64& to, const xRegisterSSE& from) { OpWriteSSE(0xf3, 0x2c); } __fi void xCVTTSS2SI(const xRegister32or64& to, const xIndirect32& from) { OpWriteSSE(0xf3, 0x2c); } // ------------------------------------------------------------------------ void xImplSimd_DestRegSSE::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(Prefix, Opcode); } void xImplSimd_DestRegSSE::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const { OpWriteSSE(Prefix, Opcode); } void xImplSimd_DestRegImmSSE::operator()(const xRegisterSSE& to, const xRegisterSSE& from, u8 imm) const { xOpWrite0F(Prefix, Opcode, to, from, imm); } void xImplSimd_DestRegImmSSE::operator()(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm) const { xOpWrite0F(Prefix, Opcode, to, from, imm); } void xImplSimd_DestRegEither::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(Prefix, Opcode); } void xImplSimd_DestRegEither::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const { OpWriteSSE(Prefix, Opcode); } void xImplSimd_DestSSE_CmpImm::operator()(const xRegisterSSE& to, const xRegisterSSE& from, SSE2_ComparisonType imm) const { xOpWrite0F(Prefix, Opcode, to, from, imm); } void xImplSimd_DestSSE_CmpImm::operator()(const xRegisterSSE& to, const xIndirectVoid& from, SSE2_ComparisonType imm) const { xOpWrite0F(Prefix, Opcode, to, from, imm); } // ===================================================================================================== // SIMD Arithmetic Instructions // ===================================================================================================== void _SimdShiftHelper::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(Prefix, Opcode); } void _SimdShiftHelper::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const { OpWriteSSE(Prefix, Opcode); } void _SimdShiftHelper::operator()(const xRegisterSSE& to, u8 imm8) const { xOpWrite0F(0x66, OpcodeImm, (int)Modcode, to); xWrite8(imm8); } void xImplSimd_Shift::DQ(const xRegisterSSE& to, u8 imm8) const { xOpWrite0F(0x66, 0x73, (int)Q.Modcode + 1, to, imm8); } const xImplSimd_ShiftWithoutQ xPSRA = { {0x66, 0xe1, 0x71, 4}, // W {0x66, 0xe2, 0x72, 4} // D }; const xImplSimd_Shift xPSRL = { {0x66, 0xd1, 0x71, 2}, // W {0x66, 0xd2, 0x72, 2}, // D {0x66, 0xd3, 0x73, 2}, // Q }; const xImplSimd_Shift xPSLL = { {0x66, 0xf1, 0x71, 6}, // W {0x66, 0xf2, 0x72, 6}, // D {0x66, 0xf3, 0x73, 6}, // Q }; const xImplSimd_AddSub xPADD = { {0x66, 0xdc + 0x20}, // B {0x66, 0xdc + 0x21}, // W {0x66, 0xdc + 0x22}, // D {0x66, 0xd4}, // Q {0x66, 0xdc + 0x10}, // SB {0x66, 0xdc + 0x11}, // SW {0x66, 0xdc}, // USB {0x66, 0xdc + 1}, // USW }; const xImplSimd_AddSub xPSUB = { {0x66, 0xd8 + 0x20}, // B {0x66, 0xd8 + 0x21}, // W {0x66, 0xd8 + 0x22}, // D {0x66, 0xfb}, // Q {0x66, 0xd8 + 0x10}, // SB {0x66, 0xd8 + 0x11}, // SW {0x66, 0xd8}, // USB {0x66, 0xd8 + 1}, // USW }; const xImplSimd_PMul xPMUL = { {0x66, 0xd5}, // LW {0x66, 0xe5}, // HW {0x66, 0xe4}, // HUW {0x66, 0xf4}, // UDQ {0x66, 0x0b38}, // HRSW {0x66, 0x4038}, // LD {0x66, 0x2838}, // DQ }; const xImplSimd_rSqrt xRSQRT = { {0x00, 0x52}, // PS {0xf3, 0x52} // SS }; const xImplSimd_rSqrt xRCP = { {0x00, 0x53}, // PS {0xf3, 0x53} // SS }; const xImplSimd_Sqrt xSQRT = { {0x00, 0x51}, // PS {0xf3, 0x51}, // SS {0xf2, 0x51} // SS }; const xImplSimd_AndNot xANDN = { {0x00, 0x55}, // PS {0x66, 0x55} // PD }; const xImplSimd_PAbsolute xPABS = { {0x66, 0x1c38}, // B {0x66, 0x1d38}, // W {0x66, 0x1e38} // D }; const xImplSimd_PSign xPSIGN = { {0x66, 0x0838}, // B {0x66, 0x0938}, // W {0x66, 0x0a38}, // D }; const xImplSimd_PMultAdd xPMADD = { {0x66, 0xf5}, // WD {0x66, 0xf438}, // UBSW }; const xImplSimd_HorizAdd xHADD = { {0xf2, 0x7c}, // PS {0x66, 0x7c}, // PD }; const xImplSimd_DotProduct xDP = { {0x66, 0x403a}, // PS {0x66, 0x413a}, // PD }; const xImplSimd_Round xROUND = { {0x66, 0x083a}, // PS {0x66, 0x093a}, // PD {0x66, 0x0a3a}, // SS {0x66, 0x0b3a}, // SD }; // ===================================================================================================== // SIMD Comparison Instructions // ===================================================================================================== void xImplSimd_Compare::PS(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0x00, 0xc2, to, from, (u8)CType); } void xImplSimd_Compare::PS(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0x00, 0xc2, to, from, (u8)CType); } void xImplSimd_Compare::PD(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0x66, 0xc2, to, from, (u8)CType); } void xImplSimd_Compare::PD(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0x66, 0xc2, to, from, (u8)CType); } void xImplSimd_Compare::SS(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0xf3, 0xc2, to, from, (u8)CType); } void xImplSimd_Compare::SS(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0xf3, 0xc2, to, from, (u8)CType); } void xImplSimd_Compare::SD(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0xf2, 0xc2, to, from, (u8)CType); } void xImplSimd_Compare::SD(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0xf2, 0xc2, to, from, (u8)CType); } const xImplSimd_MinMax xMIN = { {0x00, 0x5d}, // PS {0x66, 0x5d}, // PD {0xf3, 0x5d}, // SS {0xf2, 0x5d}, // SD }; const xImplSimd_MinMax xMAX = { {0x00, 0x5f}, // PS {0x66, 0x5f}, // PD {0xf3, 0x5f}, // SS {0xf2, 0x5f}, // SD }; // [TODO] : Merge this into the xCMP class, so that they are notation as: xCMP.EQ const xImplSimd_Compare xCMPEQ = {SSE2_Equal}; const xImplSimd_Compare xCMPLT = {SSE2_Less}; const xImplSimd_Compare xCMPLE = {SSE2_LessOrEqual}; const xImplSimd_Compare xCMPUNORD = {SSE2_LessOrEqual}; const xImplSimd_Compare xCMPNE = {SSE2_NotEqual}; const xImplSimd_Compare xCMPNLT = {SSE2_NotLess}; const xImplSimd_Compare xCMPNLE = {SSE2_NotLessOrEqual}; const xImplSimd_Compare xCMPORD = {SSE2_Ordered}; const xImplSimd_COMI xCOMI = { {0x00, 0x2f}, // SS {0x66, 0x2f}, // SD }; const xImplSimd_COMI xUCOMI = { {0x00, 0x2e}, // SS {0x66, 0x2e}, // SD }; const xImplSimd_PCompare xPCMP = { {0x66, 0x74}, // EQB {0x66, 0x75}, // EQW {0x66, 0x76}, // EQD {0x66, 0x64}, // GTB {0x66, 0x65}, // GTW {0x66, 0x66}, // GTD }; const xImplSimd_PMinMax xPMIN = { {0x66, 0xda}, // UB {0x66, 0xea}, // SW {0x66, 0x3838}, // SB {0x66, 0x3938}, // SD {0x66, 0x3a38}, // UW {0x66, 0x3b38}, // UD }; const xImplSimd_PMinMax xPMAX = { {0x66, 0xde}, // UB {0x66, 0xee}, // SW {0x66, 0x3c38}, // SB {0x66, 0x3d38}, // SD {0x66, 0x3e38}, // UW {0x66, 0x3f38}, // UD }; // ===================================================================================================== // SIMD Shuffle/Pack (Shuffle puck?) // ===================================================================================================== __fi void xImplSimd_Shuffle::_selector_assertion_check(u8 selector) const { pxAssertMsg((selector & ~3) == 0, "Invalid immediate operand on SSE Shuffle: Upper 6 bits of the SSE Shuffle-PD Selector are reserved and must be zero."); } void xImplSimd_Shuffle::PS(const xRegisterSSE& to, const xRegisterSSE& from, u8 selector) const { xOpWrite0F(0xc6, to, from, selector); } void xImplSimd_Shuffle::PS(const xRegisterSSE& to, const xIndirectVoid& from, u8 selector) const { xOpWrite0F(0xc6, to, from, selector); } void xImplSimd_Shuffle::PD(const xRegisterSSE& to, const xRegisterSSE& from, u8 selector) const { _selector_assertion_check(selector); xOpWrite0F(0x66, 0xc6, to, from, selector & 0x3); } void xImplSimd_Shuffle::PD(const xRegisterSSE& to, const xIndirectVoid& from, u8 selector) const { _selector_assertion_check(selector); xOpWrite0F(0x66, 0xc6, to, from, selector & 0x3); } void xImplSimd_InsertExtractHelper::operator()(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, Opcode, to, from, imm8); } void xImplSimd_InsertExtractHelper::operator()(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, Opcode, to, from, imm8); } void xImplSimd_PInsert::W(const xRegisterSSE& to, const xRegister32& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); } void xImplSimd_PInsert::W(const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8) const { xOpWrite0F(0x66, 0xc4, to, from, imm8); } void SimdImpl_PExtract::W(const xRegister32& to, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0xc5, to, from, imm8); } void SimdImpl_PExtract::W(const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8) const { xOpWrite0F(0x66, 0x153a, from, dest, imm8); } const xImplSimd_Shuffle xSHUF = {}; const xImplSimd_PShuffle xPSHUF = { {0x66, 0x70}, // D {0xf2, 0x70}, // LW {0xf3, 0x70}, // HW {0x66, 0x0038}, // B }; const SimdImpl_PUnpack xPUNPCK = { {0x66, 0x60}, // LBW {0x66, 0x61}, // LWD {0x66, 0x62}, // LDQ {0x66, 0x6c}, // LQDQ {0x66, 0x68}, // HBW {0x66, 0x69}, // HWD {0x66, 0x6a}, // HDQ {0x66, 0x6d}, // HQDQ }; const SimdImpl_Pack xPACK = { {0x66, 0x63}, // SSWB {0x66, 0x6b}, // SSDW {0x66, 0x67}, // USWB {0x66, 0x2b38}, // USDW }; const xImplSimd_Unpack xUNPCK = { {0x00, 0x15}, // HPS {0x66, 0x15}, // HPD {0x00, 0x14}, // LPS {0x66, 0x14}, // LPD }; const xImplSimd_PInsert xPINSR = { {0x203a}, // B {0x223a}, // D }; const SimdImpl_PExtract xPEXTR = { {0x143a}, // B {0x163a}, // D }; // ===================================================================================================== // SIMD Move And Blend Instructions // ===================================================================================================== void xImplSimd_MovHL::PS(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(Opcode, to, from); } void xImplSimd_MovHL::PS(const xIndirectVoid& to, const xRegisterSSE& from) const { xOpWrite0F(Opcode + 1, from, to); } void xImplSimd_MovHL::PD(const xRegisterSSE& to, const xIndirectVoid& from) const { xOpWrite0F(0x66, Opcode, to, from); } void xImplSimd_MovHL::PD(const xIndirectVoid& to, const xRegisterSSE& from) const { xOpWrite0F(0x66, Opcode + 1, from, to); } void xImplSimd_MovHL_RtoR::PS(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(Opcode, to, from); } void xImplSimd_MovHL_RtoR::PD(const xRegisterSSE& to, const xRegisterSSE& from) const { xOpWrite0F(0x66, Opcode, to, from); } static const u16 MovPS_OpAligned = 0x28; // Aligned [aps] form static const u16 MovPS_OpUnaligned = 0x10; // unaligned [ups] form void xImplSimd_MoveSSE::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { if (to != from) xOpWrite0F(Prefix, MovPS_OpAligned, to, from); } void xImplSimd_MoveSSE::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const { // ModSib form is aligned if it's displacement-only and the displacement is aligned: bool isReallyAligned = isAligned || (((from.Displacement & 0x0f) == 0) && from.Index.IsEmpty() && from.Base.IsEmpty()); xOpWrite0F(Prefix, isReallyAligned ? MovPS_OpAligned : MovPS_OpUnaligned, to, from); } void xImplSimd_MoveSSE::operator()(const xIndirectVoid& to, const xRegisterSSE& from) const { // ModSib form is aligned if it's displacement-only and the displacement is aligned: bool isReallyAligned = isAligned || ((to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty()); xOpWrite0F(Prefix, isReallyAligned ? MovPS_OpAligned + 1 : MovPS_OpUnaligned + 1, from, to); } static const u8 MovDQ_PrefixAligned = 0x66; // Aligned [dqa] form static const u8 MovDQ_PrefixUnaligned = 0xf3; // unaligned [dqu] form void xImplSimd_MoveDQ::operator()(const xRegisterSSE& to, const xRegisterSSE& from) const { if (to != from) xOpWrite0F(MovDQ_PrefixAligned, 0x6f, to, from); } void xImplSimd_MoveDQ::operator()(const xRegisterSSE& to, const xIndirectVoid& from) const { // ModSib form is aligned if it's displacement-only and the displacement is aligned: bool isReallyAligned = isAligned || ((from.Displacement & 0x0f) == 0 && from.Index.IsEmpty() && from.Base.IsEmpty()); xOpWrite0F(isReallyAligned ? MovDQ_PrefixAligned : MovDQ_PrefixUnaligned, 0x6f, to, from); } void xImplSimd_MoveDQ::operator()(const xIndirectVoid& to, const xRegisterSSE& from) const { // ModSib form is aligned if it's displacement-only and the displacement is aligned: bool isReallyAligned = isAligned || ((to.Displacement & 0x0f) == 0 && to.Index.IsEmpty() && to.Base.IsEmpty()); // use opcode 0x7f : alternate ModRM encoding (reverse src/dst) xOpWrite0F(isReallyAligned ? MovDQ_PrefixAligned : MovDQ_PrefixUnaligned, 0x7f, from, to); } void xImplSimd_PMove::BW(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(0x66, OpcodeBase); } void xImplSimd_PMove::BW(const xRegisterSSE& to, const xIndirect64& from) const { OpWriteSSE(0x66, OpcodeBase); } void xImplSimd_PMove::BD(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(0x66, OpcodeBase + 0x100); } void xImplSimd_PMove::BD(const xRegisterSSE& to, const xIndirect32& from) const { OpWriteSSE(0x66, OpcodeBase + 0x100); } void xImplSimd_PMove::BQ(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(0x66, OpcodeBase + 0x200); } void xImplSimd_PMove::BQ(const xRegisterSSE& to, const xIndirect16& from) const { OpWriteSSE(0x66, OpcodeBase + 0x200); } void xImplSimd_PMove::WD(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(0x66, OpcodeBase + 0x300); } void xImplSimd_PMove::WD(const xRegisterSSE& to, const xIndirect64& from) const { OpWriteSSE(0x66, OpcodeBase + 0x300); } void xImplSimd_PMove::WQ(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(0x66, OpcodeBase + 0x400); } void xImplSimd_PMove::WQ(const xRegisterSSE& to, const xIndirect32& from) const { OpWriteSSE(0x66, OpcodeBase + 0x400); } void xImplSimd_PMove::DQ(const xRegisterSSE& to, const xRegisterSSE& from) const { OpWriteSSE(0x66, OpcodeBase + 0x500); } void xImplSimd_PMove::DQ(const xRegisterSSE& to, const xIndirect64& from) const { OpWriteSSE(0x66, OpcodeBase + 0x500); } const xImplSimd_MoveSSE xMOVAPS = {0x00, true}; const xImplSimd_MoveSSE xMOVUPS = {0x00, false}; #ifdef ALWAYS_USE_MOVAPS const xImplSimd_MoveSSE xMOVDQA = {0x00, true}; const xImplSimd_MoveSSE xMOVAPD = {0x00, true}; const xImplSimd_MoveSSE xMOVDQU = {0x00, false}; const xImplSimd_MoveSSE xMOVUPD = {0x00, false}; #else const xImplSimd_MoveDQ xMOVDQA = {0x66, true}; const xImplSimd_MoveSSE xMOVAPD = {0x66, true}; const xImplSimd_MoveDQ xMOVDQU = {0xf3, false}; const xImplSimd_MoveSSE xMOVUPD = {0x66, false}; #endif const xImplSimd_MovHL xMOVH = {0x16}; const xImplSimd_MovHL xMOVL = {0x12}; const xImplSimd_MovHL_RtoR xMOVLH = {0x16}; const xImplSimd_MovHL_RtoR xMOVHL = {0x12}; const xImplSimd_Blend xBLEND = { {0x66, 0x0c3a}, // PS {0x66, 0x0d3a}, // PD {0x66, 0x1438}, // VPS {0x66, 0x1538}, // VPD }; const xImplSimd_PMove xPMOVSX = {0x2038}; const xImplSimd_PMove xPMOVZX = {0x3038}; // [SSE-3] const xImplSimd_DestRegSSE xMOVSLDUP = {0xf3, 0x12}; // [SSE-3] const xImplSimd_DestRegSSE xMOVSHDUP = {0xf3, 0x16}; ////////////////////////////////////////////////////////////////////////////////////////// // MMX Mov Instructions (MOVD, MOVQ, MOVSS). // // Notes: // * Some of the functions have been renamed to more clearly reflect what they actually // do. Namely we've affixed "ZX" to several MOVs that take a register as a destination // since that's what they do (MOVD clears upper 32/96 bits, etc). // // * MOVD has valid forms for MMX and XMM registers. // __fi void xMOVDZX(const xRegisterSSE& to, const xRegister32or64& from) { xOpWrite0F(0x66, 0x6e, to, from); } __fi void xMOVDZX(const xRegisterSSE& to, const xIndirectVoid& src) { xOpWrite0F(0x66, 0x6e, to, src); } __fi void xMOVD(const xRegister32or64& to, const xRegisterSSE& from) { xOpWrite0F(0x66, 0x7e, from, to); } __fi void xMOVD(const xIndirectVoid& dest, const xRegisterSSE& from) { xOpWrite0F(0x66, 0x7e, from, dest); } // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. __fi void xMOVQZX(const xRegisterSSE& to, const xRegisterSSE& from) { xOpWrite0F(0xf3, 0x7e, to, from); } // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. __fi void xMOVQZX(const xRegisterSSE& to, const xIndirectVoid& src) { xOpWrite0F(0xf3, 0x7e, to, src); } // Moves from XMM to XMM, with the *upper 64 bits* of the destination register // being cleared to zero. __fi void xMOVQZX(const xRegisterSSE& to, const void* src) { xOpWrite0F(0xf3, 0x7e, to, src); } // Moves lower quad of XMM to ptr64 (no bits are cleared) __fi void xMOVQ(const xIndirectVoid& dest, const xRegisterSSE& from) { xOpWrite0F(0x66, 0xd6, from, dest); } ////////////////////////////////////////////////////////////////////////////////////////// // #define IMPLEMENT_xMOVS(ssd, prefix) \ __fi void xMOV##ssd(const xRegisterSSE& to, const xRegisterSSE& from) \ { \ if (to != from) \ xOpWrite0F(prefix, 0x10, to, from); \ } \ __fi void xMOV##ssd##ZX(const xRegisterSSE& to, const xIndirectVoid& from) { xOpWrite0F(prefix, 0x10, to, from); } \ __fi void xMOV##ssd(const xIndirectVoid& to, const xRegisterSSE& from) { xOpWrite0F(prefix, 0x11, from, to); } IMPLEMENT_xMOVS(SS, 0xf3) IMPLEMENT_xMOVS(SD, 0xf2) ////////////////////////////////////////////////////////////////////////////////////////// // Non-temporal movs only support a register as a target (ie, load form only, no stores) // __fi void xMOVNTDQA(const xRegisterSSE& to, const xIndirectVoid& from) { xOpWrite0F(0x66, 0x2a38, to.Id, from); } __fi void xMOVNTDQA(const xIndirectVoid& to, const xRegisterSSE& from) { xOpWrite0F(0x66, 0xe7, from, to); } __fi void xMOVNTPD(const xIndirectVoid& to, const xRegisterSSE& from) { xOpWrite0F(0x66, 0x2b, from, to); } __fi void xMOVNTPS(const xIndirectVoid& to, const xRegisterSSE& from) { xOpWrite0F(0x2b, from, to); } // ------------------------------------------------------------------------ __fi void xMOVMSKPS(const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F(0x50, to, from); } __fi void xMOVMSKPD(const xRegister32& to, const xRegisterSSE& from) { xOpWrite0F(0x66, 0x50, to, from, true); } // xMASKMOV: // Selectively write bytes from mm1/xmm1 to memory location using the byte mask in mm2/xmm2. // The default memory location is specified by DS:EDI. The most significant bit in each byte // of the mask operand determines whether the corresponding byte in the source operand is // written to the corresponding byte location in memory. __fi void xMASKMOV(const xRegisterSSE& to, const xRegisterSSE& from) { xOpWrite0F(0x66, 0xf7, to, from); } // xPMOVMSKB: // Creates a mask made up of the most significant bit of each byte of the source // operand and stores the result in the low byte or word of the destination operand. // Upper bits of the destination are cleared to zero. // // When operating on a 64-bit (MMX) source, the byte mask is 8 bits; when operating on // 128-bit (SSE) source, the byte mask is 16-bits. // __fi void xPMOVMSKB(const xRegister32or64& to, const xRegisterSSE& from) { xOpWrite0F(0x66, 0xd7, to, from); } // [sSSE-3] Concatenates dest and source operands into an intermediate composite, // shifts the composite at byte granularity to the right by a constant immediate, // and extracts the right-aligned result into the destination. // __fi void xPALIGNR(const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8) { xOpWrite0F(0x66, 0x0f3a, to, from, imm8); } // -------------------------------------------------------------------------------------- // INSERTPS / EXTRACTPS [SSE4.1 only!] // -------------------------------------------------------------------------------------- // [TODO] these might be served better as classes, especially if other instructions use // the M32,sse,imm form (I forget offhand if any do). // [SSE-4.1] Insert a single-precision floating-point value from src into a specified // location in dest, and selectively zero out the data elements in dest according to // the mask field in the immediate byte. The source operand can be a memory location // (32 bits) or an XMM register (lower 32 bits used). // // Imm8 provides three fields: // * COUNT_S: The value of Imm8[7:6] selects the dword element from src. It is 0 if // the source is a memory operand. // * COUNT_D: The value of Imm8[5:4] selects the target dword element in dest. // * ZMASK: Each bit of Imm8[3:0] selects a dword element in dest to be written // with 0.0 if set to 1. // __emitinline void xINSERTPS(const xRegisterSSE& to, const xRegisterSSE& from, u8 imm8) { xOpWrite0F(0x66, 0x213a, to, from, imm8); } __emitinline void xINSERTPS(const xRegisterSSE& to, const xIndirect32& from, u8 imm8) { xOpWrite0F(0x66, 0x213a, to, from, imm8); } // [SSE-4.1] Extract a single-precision floating-point value from src at an offset // determined by imm8[1-0]*32. The extracted single precision floating-point value // is stored into the low 32-bits of dest (or at a 32-bit memory pointer). // __emitinline void xEXTRACTPS(const xRegister32or64& to, const xRegisterSSE& from, u8 imm8) { xOpWrite0F(0x66, 0x173a, to, from, imm8); } __emitinline void xEXTRACTPS(const xIndirect32& dest, const xRegisterSSE& from, u8 imm8) { xOpWrite0F(0x66, 0x173a, from, dest, imm8); } // ===================================================================================================== // Ungrouped Instructions! // ===================================================================================================== // Store Streaming SIMD Extension Control/Status to Mem32. __emitinline void xSTMXCSR(const xIndirect32& dest) { xOpWrite0F(0, 0xae, 3, dest); } // Load Streaming SIMD Extension Control/Status from Mem32. __emitinline void xLDMXCSR(const xIndirect32& src) { xOpWrite0F(0, 0xae, 2, src); } // Save x87 FPU, MMX Technology, and SSE State to buffer // Target buffer must be at least 512 bytes in length to hold the result. __emitinline void xFXSAVE(const xIndirectVoid& dest) { xOpWrite0F(0, 0xae, 0, dest); } // Restore x87 FPU, MMX , XMM, and MXCSR State. // Source buffer should be 512 bytes in length. __emitinline void xFXRSTOR(const xIndirectVoid& src) { xOpWrite0F(0, 0xae, 1, src); } } // namespace x86Emitter