//------------------------------------------------------------------------------------- // DirectXMathMisc.inl -- SIMD C++ Math library // // THIS CODE AND INFORMATION IS PROVIDED "AS IS" WITHOUT WARRANTY OF // ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING BUT NOT LIMITED TO // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND/OR FITNESS FOR A // PARTICULAR PURPOSE. // // Copyright (c) Microsoft Corporation. All rights reserved. // // http://go.microsoft.com/fwlink/?LinkID=615560 //------------------------------------------------------------------------------------- #pragma once /**************************************************************************** * * Quaternion * ****************************************************************************/ //------------------------------------------------------------------------------ // Comparison operations //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMQuaternionEqual ( FXMVECTOR Q1, FXMVECTOR Q2 ) { return XMVector4Equal(Q1, Q2); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMQuaternionNotEqual ( FXMVECTOR Q1, FXMVECTOR Q2 ) { return XMVector4NotEqual(Q1, Q2); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMQuaternionIsNaN ( FXMVECTOR Q ) { return XMVector4IsNaN(Q); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMQuaternionIsInfinite ( FXMVECTOR Q ) { return XMVector4IsInfinite(Q); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMQuaternionIsIdentity ( FXMVECTOR Q ) { return XMVector4Equal(Q, g_XMIdentityR3.v); } //------------------------------------------------------------------------------ // Computation operations //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionDot ( FXMVECTOR Q1, FXMVECTOR Q2 ) { return XMVector4Dot(Q1, Q2); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionMultiply ( FXMVECTOR Q1, FXMVECTOR Q2 ) { // Returns the product Q2*Q1 (which is the concatenation of a rotation Q1 followed by the rotation Q2) // [ (Q2.w * Q1.x) + (Q2.x * Q1.w) + (Q2.y * Q1.z) - (Q2.z * Q1.y), // (Q2.w * Q1.y) - (Q2.x * Q1.z) + (Q2.y * Q1.w) + (Q2.z * Q1.x), // (Q2.w * Q1.z) + (Q2.x * Q1.y) - (Q2.y * Q1.x) + (Q2.z * Q1.w), // (Q2.w * Q1.w) - (Q2.x * Q1.x) - (Q2.y * Q1.y) - (Q2.z * Q1.z) ] #if defined(_XM_NO_INTRINSICS_) XMVECTORF32 Result = { { { (Q2.vector4_f32[3] * Q1.vector4_f32[0]) + (Q2.vector4_f32[0] * Q1.vector4_f32[3]) + (Q2.vector4_f32[1] * Q1.vector4_f32[2]) - (Q2.vector4_f32[2] * Q1.vector4_f32[1]), (Q2.vector4_f32[3] * Q1.vector4_f32[1]) - (Q2.vector4_f32[0] * Q1.vector4_f32[2]) + (Q2.vector4_f32[1] * Q1.vector4_f32[3]) + (Q2.vector4_f32[2] * Q1.vector4_f32[0]), (Q2.vector4_f32[3] * Q1.vector4_f32[2]) + (Q2.vector4_f32[0] * Q1.vector4_f32[1]) - (Q2.vector4_f32[1] * Q1.vector4_f32[0]) + (Q2.vector4_f32[2] * Q1.vector4_f32[3]), (Q2.vector4_f32[3] * Q1.vector4_f32[3]) - (Q2.vector4_f32[0] * Q1.vector4_f32[0]) - (Q2.vector4_f32[1] * Q1.vector4_f32[1]) - (Q2.vector4_f32[2] * Q1.vector4_f32[2]) } } }; return Result.v; #elif defined(_XM_ARM_NEON_INTRINSICS_) static const XMVECTORF32 ControlWZYX = { { { 1.0f, -1.0f, 1.0f, -1.0f } } }; static const XMVECTORF32 ControlZWXY = { { { 1.0f, 1.0f, -1.0f, -1.0f } } }; static const XMVECTORF32 ControlYXWZ = { { { -1.0f, 1.0f, 1.0f, -1.0f } } }; float32x2_t Q2L = vget_low_f32(Q2); float32x2_t Q2H = vget_high_f32(Q2); float32x4_t Q2X = vdupq_lane_f32( Q2L, 0 ); float32x4_t Q2Y = vdupq_lane_f32( Q2L, 1 ); float32x4_t Q2Z = vdupq_lane_f32( Q2H, 0 ); XMVECTOR vResult = vmulq_lane_f32(Q1, Q2H, 1); // Mul by Q1WZYX float32x4_t vTemp = vrev64q_f32(Q1); vTemp = vcombine_f32( vget_high_f32(vTemp), vget_low_f32(vTemp) ); Q2X = vmulq_f32(Q2X,vTemp); vResult = vmlaq_f32( vResult, Q2X, ControlWZYX ); // Mul by Q1ZWXY vTemp = vrev64q_u32(vTemp); Q2Y = vmulq_f32(Q2Y,vTemp); vResult = vmlaq_f32(vResult, Q2Y, ControlZWXY); // Mul by Q1YXWZ vTemp = vrev64q_u32(vTemp); vTemp = vcombine_f32(vget_high_f32(vTemp), vget_low_f32(vTemp)); Q2Z = vmulq_f32(Q2Z,vTemp); vResult = vmlaq_f32(vResult, Q2Z, ControlYXWZ); return vResult; #elif defined(_XM_SSE_INTRINSICS_) static const XMVECTORF32 ControlWZYX = { { { 1.0f, -1.0f, 1.0f, -1.0f } } }; static const XMVECTORF32 ControlZWXY = { { { 1.0f, 1.0f, -1.0f, -1.0f } } }; static const XMVECTORF32 ControlYXWZ = { { { -1.0f, 1.0f, 1.0f, -1.0f } } }; // Copy to SSE registers and use as few as possible for x86 XMVECTOR Q2X = Q2; XMVECTOR Q2Y = Q2; XMVECTOR Q2Z = Q2; XMVECTOR vResult = Q2; // Splat with one instruction vResult = XM_PERMUTE_PS(vResult,_MM_SHUFFLE(3,3,3,3)); Q2X = XM_PERMUTE_PS(Q2X,_MM_SHUFFLE(0,0,0,0)); Q2Y = XM_PERMUTE_PS(Q2Y,_MM_SHUFFLE(1,1,1,1)); Q2Z = XM_PERMUTE_PS(Q2Z,_MM_SHUFFLE(2,2,2,2)); // Retire Q1 and perform Q1*Q2W vResult = _mm_mul_ps(vResult,Q1); XMVECTOR Q1Shuffle = Q1; // Shuffle the copies of Q1 Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3)); // Mul by Q1WZYX Q2X = _mm_mul_ps(Q2X,Q1Shuffle); Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(2,3,0,1)); // Flip the signs on y and z Q2X = _mm_mul_ps(Q2X,ControlWZYX); // Mul by Q1ZWXY Q2Y = _mm_mul_ps(Q2Y,Q1Shuffle); Q1Shuffle = XM_PERMUTE_PS(Q1Shuffle,_MM_SHUFFLE(0,1,2,3)); // Flip the signs on z and w Q2Y = _mm_mul_ps(Q2Y,ControlZWXY); // Mul by Q1YXWZ Q2Z = _mm_mul_ps(Q2Z,Q1Shuffle); vResult = _mm_add_ps(vResult,Q2X); // Flip the signs on x and w Q2Z = _mm_mul_ps(Q2Z,ControlYXWZ); Q2Y = _mm_add_ps(Q2Y,Q2Z); vResult = _mm_add_ps(vResult,Q2Y); return vResult; #endif } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionLengthSq ( FXMVECTOR Q ) { return XMVector4LengthSq(Q); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionReciprocalLength ( FXMVECTOR Q ) { return XMVector4ReciprocalLength(Q); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionLength ( FXMVECTOR Q ) { return XMVector4Length(Q); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionNormalizeEst ( FXMVECTOR Q ) { return XMVector4NormalizeEst(Q); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionNormalize ( FXMVECTOR Q ) { return XMVector4Normalize(Q); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionConjugate ( FXMVECTOR Q ) { #if defined(_XM_NO_INTRINSICS_) XMVECTORF32 Result = { { { -Q.vector4_f32[0], -Q.vector4_f32[1], -Q.vector4_f32[2], Q.vector4_f32[3] } } }; return Result.v; #elif defined(_XM_ARM_NEON_INTRINSICS_) static const XMVECTORF32 NegativeOne3 = { { { -1.0f, -1.0f, -1.0f, 1.0f } } }; return vmulq_f32(Q, NegativeOne3.v ); #elif defined(_XM_SSE_INTRINSICS_) static const XMVECTORF32 NegativeOne3 = { { { -1.0f, -1.0f, -1.0f, 1.0f } } }; return _mm_mul_ps(Q,NegativeOne3); #endif } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionInverse ( FXMVECTOR Q ) { const XMVECTOR Zero = XMVectorZero(); XMVECTOR L = XMVector4LengthSq(Q); XMVECTOR Conjugate = XMQuaternionConjugate(Q); XMVECTOR Control = XMVectorLessOrEqual(L, g_XMEpsilon.v); XMVECTOR Result = XMVectorDivide(Conjugate, L); Result = XMVectorSelect(Result, Zero, Control); return Result; } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionLn ( FXMVECTOR Q ) { static const XMVECTORF32 OneMinusEpsilon = { { { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f } } }; XMVECTOR QW = XMVectorSplatW(Q); XMVECTOR Q0 = XMVectorSelect(g_XMSelect1110.v, Q, g_XMSelect1110.v); XMVECTOR ControlW = XMVectorInBounds(QW, OneMinusEpsilon.v); XMVECTOR Theta = XMVectorACos(QW); XMVECTOR SinTheta = XMVectorSin(Theta); XMVECTOR S = XMVectorDivide(Theta,SinTheta); XMVECTOR Result = XMVectorMultiply(Q0, S); Result = XMVectorSelect(Q0, Result, ControlW); return Result; } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionExp ( FXMVECTOR Q ) { XMVECTOR Theta = XMVector3Length(Q); XMVECTOR SinTheta, CosTheta; XMVectorSinCos(&SinTheta, &CosTheta, Theta); XMVECTOR S = XMVectorDivide(SinTheta, Theta); XMVECTOR Result = XMVectorMultiply(Q, S); const XMVECTOR Zero = XMVectorZero(); XMVECTOR Control = XMVectorNearEqual(Theta, Zero, g_XMEpsilon.v); Result = XMVectorSelect(Result, Q, Control); Result = XMVectorSelect(CosTheta, Result, g_XMSelect1110.v); return Result; } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionSlerp ( FXMVECTOR Q0, FXMVECTOR Q1, float t ) { XMVECTOR T = XMVectorReplicate(t); return XMQuaternionSlerpV(Q0, Q1, T); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionSlerpV ( FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR T ) { assert((XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T))); // Result = Q0 * sin((1.0 - t) * Omega) / sin(Omega) + Q1 * sin(t * Omega) / sin(Omega) #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) const XMVECTORF32 OneMinusEpsilon = { { { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f } } }; XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1); const XMVECTOR Zero = XMVectorZero(); XMVECTOR Control = XMVectorLess(CosOmega, Zero); XMVECTOR Sign = XMVectorSelect(g_XMOne.v, g_XMNegativeOne.v, Control); CosOmega = XMVectorMultiply(CosOmega, Sign); Control = XMVectorLess(CosOmega, OneMinusEpsilon); XMVECTOR SinOmega = XMVectorNegativeMultiplySubtract(CosOmega, CosOmega, g_XMOne.v); SinOmega = XMVectorSqrt(SinOmega); XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega); XMVECTOR SignMask = XMVectorSplatSignMask(); XMVECTOR V01 = XMVectorShiftLeft(T, Zero, 2); SignMask = XMVectorShiftLeft(SignMask, Zero, 3); V01 = XMVectorXorInt(V01, SignMask); V01 = XMVectorAdd(g_XMIdentityR0.v, V01); XMVECTOR InvSinOmega = XMVectorReciprocal(SinOmega); XMVECTOR S0 = XMVectorMultiply(V01, Omega); S0 = XMVectorSin(S0); S0 = XMVectorMultiply(S0, InvSinOmega); S0 = XMVectorSelect(V01, S0, Control); XMVECTOR S1 = XMVectorSplatY(S0); S0 = XMVectorSplatX(S0); S1 = XMVectorMultiply(S1, Sign); XMVECTOR Result = XMVectorMultiply(Q0, S0); Result = XMVectorMultiplyAdd(Q1, S1, Result); return Result; #elif defined(_XM_SSE_INTRINSICS_) static const XMVECTORF32 OneMinusEpsilon = { { { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f } } }; static const XMVECTORU32 SignMask2 = { { { 0x80000000, 0x00000000, 0x00000000, 0x00000000 } } }; XMVECTOR CosOmega = XMQuaternionDot(Q0, Q1); const XMVECTOR Zero = XMVectorZero(); XMVECTOR Control = XMVectorLess(CosOmega, Zero); XMVECTOR Sign = XMVectorSelect(g_XMOne, g_XMNegativeOne, Control); CosOmega = _mm_mul_ps(CosOmega, Sign); Control = XMVectorLess(CosOmega, OneMinusEpsilon); XMVECTOR SinOmega = _mm_mul_ps(CosOmega,CosOmega); SinOmega = _mm_sub_ps(g_XMOne,SinOmega); SinOmega = _mm_sqrt_ps(SinOmega); XMVECTOR Omega = XMVectorATan2(SinOmega, CosOmega); XMVECTOR V01 = XM_PERMUTE_PS(T,_MM_SHUFFLE(2,3,0,1)); V01 = _mm_and_ps(V01,g_XMMaskXY); V01 = _mm_xor_ps(V01,SignMask2); V01 = _mm_add_ps(g_XMIdentityR0, V01); XMVECTOR S0 = _mm_mul_ps(V01, Omega); S0 = XMVectorSin(S0); S0 = _mm_div_ps(S0, SinOmega); S0 = XMVectorSelect(V01, S0, Control); XMVECTOR S1 = XMVectorSplatY(S0); S0 = XMVectorSplatX(S0); S1 = _mm_mul_ps(S1, Sign); XMVECTOR Result = _mm_mul_ps(Q0, S0); S1 = _mm_mul_ps(S1, Q1); Result = _mm_add_ps(Result,S1); return Result; #endif } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionSquad ( FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR Q2, GXMVECTOR Q3, float t ) { XMVECTOR T = XMVectorReplicate(t); return XMQuaternionSquadV(Q0, Q1, Q2, Q3, T); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionSquadV ( FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR Q2, GXMVECTOR Q3, HXMVECTOR T ) { assert( (XMVectorGetY(T) == XMVectorGetX(T)) && (XMVectorGetZ(T) == XMVectorGetX(T)) && (XMVectorGetW(T) == XMVectorGetX(T)) ); XMVECTOR TP = T; const XMVECTOR Two = XMVectorSplatConstant(2, 0); XMVECTOR Q03 = XMQuaternionSlerpV(Q0, Q3, T); XMVECTOR Q12 = XMQuaternionSlerpV(Q1, Q2, T); TP = XMVectorNegativeMultiplySubtract(TP, TP, TP); TP = XMVectorMultiply(TP, Two); XMVECTOR Result = XMQuaternionSlerpV(Q03, Q12, TP); return Result; } //------------------------------------------------------------------------------ _Use_decl_annotations_ inline void XM_CALLCONV XMQuaternionSquadSetup ( XMVECTOR* pA, XMVECTOR* pB, XMVECTOR* pC, FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR Q2, GXMVECTOR Q3 ) { assert(pA); assert(pB); assert(pC); XMVECTOR LS12 = XMQuaternionLengthSq(XMVectorAdd(Q1, Q2)); XMVECTOR LD12 = XMQuaternionLengthSq(XMVectorSubtract(Q1, Q2)); XMVECTOR SQ2 = XMVectorNegate(Q2); XMVECTOR Control1 = XMVectorLess(LS12, LD12); SQ2 = XMVectorSelect(Q2, SQ2, Control1); XMVECTOR LS01 = XMQuaternionLengthSq(XMVectorAdd(Q0, Q1)); XMVECTOR LD01 = XMQuaternionLengthSq(XMVectorSubtract(Q0, Q1)); XMVECTOR SQ0 = XMVectorNegate(Q0); XMVECTOR LS23 = XMQuaternionLengthSq(XMVectorAdd(SQ2, Q3)); XMVECTOR LD23 = XMQuaternionLengthSq(XMVectorSubtract(SQ2, Q3)); XMVECTOR SQ3 = XMVectorNegate(Q3); XMVECTOR Control0 = XMVectorLess(LS01, LD01); XMVECTOR Control2 = XMVectorLess(LS23, LD23); SQ0 = XMVectorSelect(Q0, SQ0, Control0); SQ3 = XMVectorSelect(Q3, SQ3, Control2); XMVECTOR InvQ1 = XMQuaternionInverse(Q1); XMVECTOR InvQ2 = XMQuaternionInverse(SQ2); XMVECTOR LnQ0 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ0)); XMVECTOR LnQ2 = XMQuaternionLn(XMQuaternionMultiply(InvQ1, SQ2)); XMVECTOR LnQ1 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, Q1)); XMVECTOR LnQ3 = XMQuaternionLn(XMQuaternionMultiply(InvQ2, SQ3)); const XMVECTOR NegativeOneQuarter = XMVectorSplatConstant(-1, 2); XMVECTOR ExpQ02 = XMVectorMultiply(XMVectorAdd(LnQ0, LnQ2), NegativeOneQuarter); XMVECTOR ExpQ13 = XMVectorMultiply(XMVectorAdd(LnQ1, LnQ3), NegativeOneQuarter); ExpQ02 = XMQuaternionExp(ExpQ02); ExpQ13 = XMQuaternionExp(ExpQ13); *pA = XMQuaternionMultiply(Q1, ExpQ02); *pB = XMQuaternionMultiply(SQ2, ExpQ13); *pC = SQ2; } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentric ( FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR Q2, float f, float g ) { float s = f + g; XMVECTOR Result; if ((s < 0.00001f) && (s > -0.00001f)) { Result = Q0; } else { XMVECTOR Q01 = XMQuaternionSlerp(Q0, Q1, s); XMVECTOR Q02 = XMQuaternionSlerp(Q0, Q2, s); Result = XMQuaternionSlerp(Q01, Q02, g / s); } return Result; } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionBaryCentricV ( FXMVECTOR Q0, FXMVECTOR Q1, FXMVECTOR Q2, GXMVECTOR F, HXMVECTOR G ) { assert( (XMVectorGetY(F) == XMVectorGetX(F)) && (XMVectorGetZ(F) == XMVectorGetX(F)) && (XMVectorGetW(F) == XMVectorGetX(F)) ); assert( (XMVectorGetY(G) == XMVectorGetX(G)) && (XMVectorGetZ(G) == XMVectorGetX(G)) && (XMVectorGetW(G) == XMVectorGetX(G)) ); const XMVECTOR Epsilon = XMVectorSplatConstant(1, 16); XMVECTOR S = XMVectorAdd(F, G); XMVECTOR Result; if (XMVector4InBounds(S, Epsilon)) { Result = Q0; } else { XMVECTOR Q01 = XMQuaternionSlerpV(Q0, Q1, S); XMVECTOR Q02 = XMQuaternionSlerpV(Q0, Q2, S); XMVECTOR GS = XMVectorReciprocal(S); GS = XMVectorMultiply(G, GS); Result = XMQuaternionSlerpV(Q01, Q02, GS); } return Result; } //------------------------------------------------------------------------------ // Transformation operations //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionIdentity() { return g_XMIdentityR3.v; } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYaw ( float Pitch, float Yaw, float Roll ) { XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f); XMVECTOR Q = XMQuaternionRotationRollPitchYawFromVector(Angles); return Q; } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionRotationRollPitchYawFromVector ( FXMVECTOR Angles // ) { static const XMVECTORF32 Sign = { { { 1.0f, -1.0f, -1.0f, 1.0f } } }; XMVECTOR HalfAngles = XMVectorMultiply(Angles, g_XMOneHalf.v); XMVECTOR SinAngles, CosAngles; XMVectorSinCos(&SinAngles, &CosAngles, HalfAngles); XMVECTOR P0 = XMVectorPermute(SinAngles, CosAngles); XMVECTOR Y0 = XMVectorPermute(SinAngles, CosAngles); XMVECTOR R0 = XMVectorPermute(SinAngles, CosAngles); XMVECTOR P1 = XMVectorPermute(CosAngles, SinAngles); XMVECTOR Y1 = XMVectorPermute(CosAngles, SinAngles); XMVECTOR R1 = XMVectorPermute(CosAngles, SinAngles); XMVECTOR Q1 = XMVectorMultiply(P1, Sign.v); XMVECTOR Q0 = XMVectorMultiply(P0, Y0); Q1 = XMVectorMultiply(Q1, Y1); Q0 = XMVectorMultiply(Q0, R0); XMVECTOR Q = XMVectorMultiplyAdd(Q1, R1, Q0); return Q; } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionRotationNormal ( FXMVECTOR NormalAxis, float Angle ) { #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) XMVECTOR N = XMVectorSelect(g_XMOne.v, NormalAxis, g_XMSelect1110.v); float SinV, CosV; XMScalarSinCos(&SinV, &CosV, 0.5f * Angle); XMVECTOR Scale = XMVectorSet( SinV, SinV, SinV, CosV ); return XMVectorMultiply(N, Scale); #elif defined(_XM_SSE_INTRINSICS_) XMVECTOR N = _mm_and_ps(NormalAxis,g_XMMask3); N = _mm_or_ps(N,g_XMIdentityR3); XMVECTOR Scale = _mm_set_ps1(0.5f * Angle); XMVECTOR vSine; XMVECTOR vCosine; XMVectorSinCos(&vSine,&vCosine,Scale); Scale = _mm_and_ps(vSine,g_XMMask3); vCosine = _mm_and_ps(vCosine,g_XMMaskW); Scale = _mm_or_ps(Scale,vCosine); N = _mm_mul_ps(N,Scale); return N; #endif } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionRotationAxis ( FXMVECTOR Axis, float Angle ) { assert(!XMVector3Equal(Axis, XMVectorZero())); assert(!XMVector3IsInfinite(Axis)); XMVECTOR Normal = XMVector3Normalize(Axis); XMVECTOR Q = XMQuaternionRotationNormal(Normal, Angle); return Q; } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMQuaternionRotationMatrix ( FXMMATRIX M ) { #if defined(_XM_NO_INTRINSICS_) XMVECTORF32 q; float r22 = M.m[2][2]; if (r22 <= 0.f) // x^2 + y^2 >= z^2 + w^2 { float dif10 = M.m[1][1] - M.m[0][0]; float omr22 = 1.f - r22; if (dif10 <= 0.f) // x^2 >= y^2 { float fourXSqr = omr22 - dif10; float inv4x = 0.5f / sqrtf(fourXSqr); q.f[0] = fourXSqr*inv4x; q.f[1] = (M.m[0][1] + M.m[1][0])*inv4x; q.f[2] = (M.m[0][2] + M.m[2][0])*inv4x; q.f[3] = (M.m[1][2] - M.m[2][1])*inv4x; } else // y^2 >= x^2 { float fourYSqr = omr22 + dif10; float inv4y = 0.5f / sqrtf(fourYSqr); q.f[0] = (M.m[0][1] + M.m[1][0])*inv4y; q.f[1] = fourYSqr*inv4y; q.f[2] = (M.m[1][2] + M.m[2][1])*inv4y; q.f[3] = (M.m[2][0] - M.m[0][2])*inv4y; } } else // z^2 + w^2 >= x^2 + y^2 { float sum10 = M.m[1][1] + M.m[0][0]; float opr22 = 1.f + r22; if (sum10 <= 0.f) // z^2 >= w^2 { float fourZSqr = opr22 - sum10; float inv4z = 0.5f / sqrtf(fourZSqr); q.f[0] = (M.m[0][2] + M.m[2][0])*inv4z; q.f[1] = (M.m[1][2] + M.m[2][1])*inv4z; q.f[2] = fourZSqr*inv4z; q.f[3] = (M.m[0][1] - M.m[1][0])*inv4z; } else // w^2 >= z^2 { float fourWSqr = opr22 + sum10; float inv4w = 0.5f / sqrtf(fourWSqr); q.f[0] = (M.m[1][2] - M.m[2][1])*inv4w; q.f[1] = (M.m[2][0] - M.m[0][2])*inv4w; q.f[2] = (M.m[0][1] - M.m[1][0])*inv4w; q.f[3] = fourWSqr*inv4w; } } return q.v; #elif defined(_XM_ARM_NEON_INTRINSICS_) static const XMVECTORF32 XMPMMP = { { { +1.0f, -1.0f, -1.0f, +1.0f } } }; static const XMVECTORF32 XMMPMP = { { { -1.0f, +1.0f, -1.0f, +1.0f } } }; static const XMVECTORF32 XMMMPP = { { { -1.0f, -1.0f, +1.0f, +1.0f } } }; static const XMVECTORU32 Select0110 = { { { XM_SELECT_0, XM_SELECT_1, XM_SELECT_1, XM_SELECT_0 } } }; static const XMVECTORU32 Select0010 = { { { XM_SELECT_0, XM_SELECT_0, XM_SELECT_1, XM_SELECT_0 } } }; XMVECTOR r0 = M.r[0]; XMVECTOR r1 = M.r[1]; XMVECTOR r2 = M.r[2]; XMVECTOR r00 = vdupq_lane_f32(vget_low_f32(r0), 0); XMVECTOR r11 = vdupq_lane_f32(vget_low_f32(r1), 1); XMVECTOR r22 = vdupq_lane_f32(vget_high_f32(r2), 0); // x^2 >= y^2 equivalent to r11 - r00 <= 0 XMVECTOR r11mr00 = vsubq_f32(r11, r00); XMVECTOR x2gey2 = vcleq_f32(r11mr00, g_XMZero); // z^2 >= w^2 equivalent to r11 + r00 <= 0 XMVECTOR r11pr00 = vaddq_f32(r11, r00); XMVECTOR z2gew2 = vcleq_f32(r11pr00, g_XMZero); // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0 XMVECTOR x2py2gez2pw2 = vcleq_f32(r22, g_XMZero); // (4*x^2, 4*y^2, 4*z^2, 4*w^2) XMVECTOR t0 = vmulq_f32( XMPMMP, r00 ); XMVECTOR x2y2z2w2 = vmlaq_f32( t0, XMMPMP, r11 ); x2y2z2w2 = vmlaq_f32( x2y2z2w2, XMMMPP, r22 ); x2y2z2w2 = vaddq_f32( x2y2z2w2, g_XMOne ); // (r01, r02, r12, r11) t0 = vextq_f32(r0, r0, 1); XMVECTOR t1 = vextq_f32(r1, r1, 1); t0 = vcombine_f32( vget_low_f32(t0), vrev64_f32( vget_low_f32( t1 ) ) ); // (r10, r20, r21, r10) t1 = vextq_f32(r2, r2, 3); XMVECTOR r10 = vdupq_lane_f32( vget_low_f32(r1), 0 ); t1 = vbslq_f32( Select0110, t1, r10 ); // (4*x*y, 4*x*z, 4*y*z, unused) XMVECTOR xyxzyz = vaddq_f32(t0, t1); // (r21, r20, r10, r10) t0 = vcombine_f32( vrev64_f32( vget_low_f32(r2) ), vget_low_f32(r10) ); // (r12, r02, r01, r12) XMVECTOR t2 = vcombine_f32( vrev64_f32( vget_high_f32(r0) ), vrev64_f32( vget_low_f32(r0) ) ); XMVECTOR t3 = vdupq_lane_f32( vget_high_f32(r1), 0 ); t1 = vbslq_f32( Select0110, t2, t3 ); // (4*x*w, 4*y*w, 4*z*w, unused) XMVECTOR xwywzw = vsubq_f32(t0, t1); xwywzw = vmulq_f32(XMMPMP, xwywzw); // (4*x*x, 4*x*y, 4*x*z, 4*x*w) t0 = vextq_f32( xyxzyz, xyxzyz, 3 ); t1 = vbslq_f32( Select0110, t0, x2y2z2w2 ); t2 = vdupq_lane_f32( vget_low_f32(xwywzw), 0 ); XMVECTOR tensor0 = vbslq_f32( g_XMSelect1110, t1, t2 ); // (4*y*x, 4*y*y, 4*y*z, 4*y*w) t0 = vbslq_f32( g_XMSelect1011, xyxzyz, x2y2z2w2 ); t1 = vdupq_lane_f32( vget_low_f32(xwywzw), 1 ); XMVECTOR tensor1 = vbslq_f32( g_XMSelect1110, t0, t1 ); // (4*z*x, 4*z*y, 4*z*z, 4*z*w) t0 = vextq_f32(xyxzyz, xyxzyz, 1); t1 = vcombine_f32( vget_low_f32(t0), vrev64_f32( vget_high_f32(xwywzw) ) ); XMVECTOR tensor2 = vbslq_f32( Select0010, x2y2z2w2, t1 ); // (4*w*x, 4*w*y, 4*w*z, 4*w*w) XMVECTOR tensor3 = vbslq_f32( g_XMSelect1110, xwywzw, x2y2z2w2 ); // Select the row of the tensor-product matrix that has the largest // magnitude. t0 = vbslq_f32( x2gey2, tensor0, tensor1 ); t1 = vbslq_f32( z2gew2, tensor2, tensor3 ); t2 = vbslq_f32( x2py2gez2pw2, t0, t1 ); // Normalize the row. No division by zero is possible because the // quaternion is unit-length (and the row is a nonzero multiple of // the quaternion). t0 = XMVector4Length(t2); return XMVectorDivide(t2, t0); #elif defined(_XM_SSE_INTRINSICS_) static const XMVECTORF32 XMPMMP = { { { +1.0f, -1.0f, -1.0f, +1.0f } } }; static const XMVECTORF32 XMMPMP = { { { -1.0f, +1.0f, -1.0f, +1.0f } } }; static const XMVECTORF32 XMMMPP = { { { -1.0f, -1.0f, +1.0f, +1.0f } } }; XMVECTOR r0 = M.r[0]; // (r00, r01, r02, 0) XMVECTOR r1 = M.r[1]; // (r10, r11, r12, 0) XMVECTOR r2 = M.r[2]; // (r20, r21, r22, 0) // (r00, r00, r00, r00) XMVECTOR r00 = XM_PERMUTE_PS(r0, _MM_SHUFFLE(0,0,0,0)); // (r11, r11, r11, r11) XMVECTOR r11 = XM_PERMUTE_PS(r1, _MM_SHUFFLE(1,1,1,1)); // (r22, r22, r22, r22) XMVECTOR r22 = XM_PERMUTE_PS(r2, _MM_SHUFFLE(2,2,2,2)); // x^2 >= y^2 equivalent to r11 - r00 <= 0 // (r11 - r00, r11 - r00, r11 - r00, r11 - r00) XMVECTOR r11mr00 = _mm_sub_ps(r11, r00); XMVECTOR x2gey2 = _mm_cmple_ps(r11mr00, g_XMZero); // z^2 >= w^2 equivalent to r11 + r00 <= 0 // (r11 + r00, r11 + r00, r11 + r00, r11 + r00) XMVECTOR r11pr00 = _mm_add_ps(r11, r00); XMVECTOR z2gew2 = _mm_cmple_ps(r11pr00, g_XMZero); // x^2 + y^2 >= z^2 + w^2 equivalent to r22 <= 0 XMVECTOR x2py2gez2pw2 = _mm_cmple_ps(r22, g_XMZero); // (+r00, -r00, -r00, +r00) XMVECTOR t0 = _mm_mul_ps(XMPMMP, r00); // (-r11, +r11, -r11, +r11) XMVECTOR t1 = _mm_mul_ps(XMMPMP, r11); // (-r22, -r22, +r22, +r22) XMVECTOR t2 = _mm_mul_ps(XMMMPP, r22); // (4*x^2, 4*y^2, 4*z^2, 4*w^2) XMVECTOR x2y2z2w2 = _mm_add_ps(t0, t1); x2y2z2w2 = _mm_add_ps(t2, x2y2z2w2); x2y2z2w2 = _mm_add_ps(x2y2z2w2, g_XMOne); // (r01, r02, r12, r11) t0 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1,2,2,1)); // (r10, r10, r20, r21) t1 = _mm_shuffle_ps(r1, r2, _MM_SHUFFLE(1,0,0,0)); // (r10, r20, r21, r10) t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0)); // (4*x*y, 4*x*z, 4*y*z, unused) XMVECTOR xyxzyz = _mm_add_ps(t0, t1); // (r21, r20, r10, r10) t0 = _mm_shuffle_ps(r2, r1, _MM_SHUFFLE(0,0,0,1)); // (r12, r12, r02, r01) t1 = _mm_shuffle_ps(r1, r0, _MM_SHUFFLE(1,2,2,2)); // (r12, r02, r01, r12) t1 = XM_PERMUTE_PS(t1, _MM_SHUFFLE(1,3,2,0)); // (4*x*w, 4*y*w, 4*z*w, unused) XMVECTOR xwywzw = _mm_sub_ps(t0, t1); xwywzw = _mm_mul_ps(XMMPMP, xwywzw); // (4*x^2, 4*y^2, 4*x*y, unused) t0 = _mm_shuffle_ps(x2y2z2w2, xyxzyz, _MM_SHUFFLE(0,0,1,0)); // (4*z^2, 4*w^2, 4*z*w, unused) t1 = _mm_shuffle_ps(x2y2z2w2, xwywzw, _MM_SHUFFLE(0,2,3,2)); // (4*x*z, 4*y*z, 4*x*w, 4*y*w) t2 = _mm_shuffle_ps(xyxzyz, xwywzw, _MM_SHUFFLE(1,0,2,1)); // (4*x*x, 4*x*y, 4*x*z, 4*x*w) XMVECTOR tensor0 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(2,0,2,0)); // (4*y*x, 4*y*y, 4*y*z, 4*y*w) XMVECTOR tensor1 = _mm_shuffle_ps(t0, t2, _MM_SHUFFLE(3,1,1,2)); // (4*z*x, 4*z*y, 4*z*z, 4*z*w) XMVECTOR tensor2 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(2,0,1,0)); // (4*w*x, 4*w*y, 4*w*z, 4*w*w) XMVECTOR tensor3 = _mm_shuffle_ps(t2, t1, _MM_SHUFFLE(1,2,3,2)); // Select the row of the tensor-product matrix that has the largest // magnitude. t0 = _mm_and_ps(x2gey2, tensor0); t1 = _mm_andnot_ps(x2gey2, tensor1); t0 = _mm_or_ps(t0, t1); t1 = _mm_and_ps(z2gew2, tensor2); t2 = _mm_andnot_ps(z2gew2, tensor3); t1 = _mm_or_ps(t1, t2); t0 = _mm_and_ps(x2py2gez2pw2, t0); t1 = _mm_andnot_ps(x2py2gez2pw2, t1); t2 = _mm_or_ps(t0, t1); // Normalize the row. No division by zero is possible because the // quaternion is unit-length (and the row is a nonzero multiple of // the quaternion). t0 = XMVector4Length(t2); return _mm_div_ps(t2, t0); #endif } //------------------------------------------------------------------------------ // Conversion operations //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ _Use_decl_annotations_ inline void XM_CALLCONV XMQuaternionToAxisAngle ( XMVECTOR* pAxis, float* pAngle, FXMVECTOR Q ) { assert(pAxis); assert(pAngle); *pAxis = Q; *pAngle = 2.0f * XMScalarACos(XMVectorGetW(Q)); } /**************************************************************************** * * Plane * ****************************************************************************/ //------------------------------------------------------------------------------ // Comparison operations //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMPlaneEqual ( FXMVECTOR P1, FXMVECTOR P2 ) { return XMVector4Equal(P1, P2); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMPlaneNearEqual ( FXMVECTOR P1, FXMVECTOR P2, FXMVECTOR Epsilon ) { XMVECTOR NP1 = XMPlaneNormalize(P1); XMVECTOR NP2 = XMPlaneNormalize(P2); return XMVector4NearEqual(NP1, NP2, Epsilon); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMPlaneNotEqual ( FXMVECTOR P1, FXMVECTOR P2 ) { return XMVector4NotEqual(P1, P2); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMPlaneIsNaN ( FXMVECTOR P ) { return XMVector4IsNaN(P); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMPlaneIsInfinite ( FXMVECTOR P ) { return XMVector4IsInfinite(P); } //------------------------------------------------------------------------------ // Computation operations //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMPlaneDot ( FXMVECTOR P, FXMVECTOR V ) { return XMVector4Dot(P, V); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMPlaneDotCoord ( FXMVECTOR P, FXMVECTOR V ) { // Result = P[0] * V[0] + P[1] * V[1] + P[2] * V[2] + P[3] XMVECTOR V3 = XMVectorSelect(g_XMOne.v, V, g_XMSelect1110.v); XMVECTOR Result = XMVector4Dot(P, V3); return Result; } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMPlaneDotNormal ( FXMVECTOR P, FXMVECTOR V ) { return XMVector3Dot(P, V); } //------------------------------------------------------------------------------ // XMPlaneNormalizeEst uses a reciprocal estimate and // returns QNaN on zero and infinite vectors. inline XMVECTOR XM_CALLCONV XMPlaneNormalizeEst ( FXMVECTOR P ) { #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) XMVECTOR Result = XMVector3ReciprocalLengthEst(P); return XMVectorMultiply(P, Result); #elif defined(_XM_SSE4_INTRINSICS_) XMVECTOR vTemp = _mm_dp_ps( P, P, 0x7f ); XMVECTOR vResult = _mm_rsqrt_ps( vTemp ); return _mm_mul_ps(vResult, P); #elif defined(_XM_SSE_INTRINSICS_) // Perform the dot product XMVECTOR vDot = _mm_mul_ps(P,P); // x=Dot.y, y=Dot.z XMVECTOR vTemp = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(2,1,2,1)); // Result.x = x+y vDot = _mm_add_ss(vDot,vTemp); // x=Dot.z vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1)); // Result.x = (x+y)+z vDot = _mm_add_ss(vDot,vTemp); // Splat x vDot = XM_PERMUTE_PS(vDot,_MM_SHUFFLE(0,0,0,0)); // Get the reciprocal vDot = _mm_rsqrt_ps(vDot); // Get the reciprocal vDot = _mm_mul_ps(vDot,P); return vDot; #endif } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMPlaneNormalize ( FXMVECTOR P ) { #if defined(_XM_NO_INTRINSICS_) float fLengthSq = sqrtf((P.vector4_f32[0]*P.vector4_f32[0])+(P.vector4_f32[1]*P.vector4_f32[1])+(P.vector4_f32[2]*P.vector4_f32[2])); // Prevent divide by zero if (fLengthSq) { fLengthSq = 1.0f/fLengthSq; } XMVECTORF32 vResult = { { { P.vector4_f32[0] * fLengthSq, P.vector4_f32[1] * fLengthSq, P.vector4_f32[2] * fLengthSq, P.vector4_f32[3] * fLengthSq } } }; return vResult.v; #elif defined(_XM_ARM_NEON_INTRINSICS_) XMVECTOR vLength = XMVector3ReciprocalLength(P); return XMVectorMultiply( P, vLength ); #elif defined(_XM_SSE4_INTRINSICS_) XMVECTOR vLengthSq = _mm_dp_ps( P, P, 0x7f ); // Prepare for the division XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); // Failsafe on zero (Or epsilon) length planes // If the length is infinity, set the elements to zero vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); // Reciprocal mul to perform the normalization vResult = _mm_div_ps(P,vResult); // Any that are infinity, set to zero vResult = _mm_and_ps(vResult,vLengthSq); return vResult; #elif defined(_XM_SSE_INTRINSICS_) // Perform the dot product on x,y and z only XMVECTOR vLengthSq = _mm_mul_ps(P,P); XMVECTOR vTemp = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(2,1,2,1)); vLengthSq = _mm_add_ss(vLengthSq,vTemp); vTemp = XM_PERMUTE_PS(vTemp,_MM_SHUFFLE(1,1,1,1)); vLengthSq = _mm_add_ss(vLengthSq,vTemp); vLengthSq = XM_PERMUTE_PS(vLengthSq,_MM_SHUFFLE(0,0,0,0)); // Prepare for the division XMVECTOR vResult = _mm_sqrt_ps(vLengthSq); // Failsafe on zero (Or epsilon) length planes // If the length is infinity, set the elements to zero vLengthSq = _mm_cmpneq_ps(vLengthSq,g_XMInfinity); // Reciprocal mul to perform the normalization vResult = _mm_div_ps(P,vResult); // Any that are infinity, set to zero vResult = _mm_and_ps(vResult,vLengthSq); return vResult; #endif } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMPlaneIntersectLine ( FXMVECTOR P, FXMVECTOR LinePoint1, FXMVECTOR LinePoint2 ) { XMVECTOR V1 = XMVector3Dot(P, LinePoint1); XMVECTOR V2 = XMVector3Dot(P, LinePoint2); XMVECTOR D = XMVectorSubtract(V1, V2); XMVECTOR VT = XMPlaneDotCoord(P, LinePoint1); VT = XMVectorDivide(VT, D); XMVECTOR Point = XMVectorSubtract(LinePoint2, LinePoint1); Point = XMVectorMultiplyAdd(Point, VT, LinePoint1); const XMVECTOR Zero = XMVectorZero(); XMVECTOR Control = XMVectorNearEqual(D, Zero, g_XMEpsilon.v); return XMVectorSelect(Point, g_XMQNaN.v, Control); } //------------------------------------------------------------------------------ _Use_decl_annotations_ inline void XM_CALLCONV XMPlaneIntersectPlane ( XMVECTOR* pLinePoint1, XMVECTOR* pLinePoint2, FXMVECTOR P1, FXMVECTOR P2 ) { assert(pLinePoint1); assert(pLinePoint2); XMVECTOR V1 = XMVector3Cross(P2, P1); XMVECTOR LengthSq = XMVector3LengthSq(V1); XMVECTOR V2 = XMVector3Cross(P2, V1); XMVECTOR P1W = XMVectorSplatW(P1); XMVECTOR Point = XMVectorMultiply(V2, P1W); XMVECTOR V3 = XMVector3Cross(V1, P1); XMVECTOR P2W = XMVectorSplatW(P2); Point = XMVectorMultiplyAdd(V3, P2W, Point); XMVECTOR LinePoint1 = XMVectorDivide(Point, LengthSq); XMVECTOR LinePoint2 = XMVectorAdd(LinePoint1, V1); XMVECTOR Control = XMVectorLessOrEqual(LengthSq, g_XMEpsilon.v); *pLinePoint1 = XMVectorSelect(LinePoint1,g_XMQNaN.v, Control); *pLinePoint2 = XMVectorSelect(LinePoint2,g_XMQNaN.v, Control); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMPlaneTransform ( FXMVECTOR P, FXMMATRIX M ) { XMVECTOR W = XMVectorSplatW(P); XMVECTOR Z = XMVectorSplatZ(P); XMVECTOR Y = XMVectorSplatY(P); XMVECTOR X = XMVectorSplatX(P); XMVECTOR Result = XMVectorMultiply(W, M.r[3]); Result = XMVectorMultiplyAdd(Z, M.r[2], Result); Result = XMVectorMultiplyAdd(Y, M.r[1], Result); Result = XMVectorMultiplyAdd(X, M.r[0], Result); return Result; } //------------------------------------------------------------------------------ _Use_decl_annotations_ inline XMFLOAT4* XM_CALLCONV XMPlaneTransformStream ( XMFLOAT4* pOutputStream, size_t OutputStride, const XMFLOAT4* pInputStream, size_t InputStride, size_t PlaneCount, FXMMATRIX M ) { return XMVector4TransformStream(pOutputStream, OutputStride, pInputStream, InputStride, PlaneCount, M); } //------------------------------------------------------------------------------ // Conversion operations //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMPlaneFromPointNormal ( FXMVECTOR Point, FXMVECTOR Normal ) { XMVECTOR W = XMVector3Dot(Point, Normal); W = XMVectorNegate(W); return XMVectorSelect(W, Normal, g_XMSelect1110.v); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMPlaneFromPoints ( FXMVECTOR Point1, FXMVECTOR Point2, FXMVECTOR Point3 ) { XMVECTOR V21 = XMVectorSubtract(Point1, Point2); XMVECTOR V31 = XMVectorSubtract(Point1, Point3); XMVECTOR N = XMVector3Cross(V21, V31); N = XMVector3Normalize(N); XMVECTOR D = XMPlaneDotNormal(N, Point1); D = XMVectorNegate(D); XMVECTOR Result = XMVectorSelect(D, N, g_XMSelect1110.v); return Result; } /**************************************************************************** * * Color * ****************************************************************************/ //------------------------------------------------------------------------------ // Comparison operations //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMColorEqual ( FXMVECTOR C1, FXMVECTOR C2 ) { return XMVector4Equal(C1, C2); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMColorNotEqual ( FXMVECTOR C1, FXMVECTOR C2 ) { return XMVector4NotEqual(C1, C2); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMColorGreater ( FXMVECTOR C1, FXMVECTOR C2 ) { return XMVector4Greater(C1, C2); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMColorGreaterOrEqual ( FXMVECTOR C1, FXMVECTOR C2 ) { return XMVector4GreaterOrEqual(C1, C2); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMColorLess ( FXMVECTOR C1, FXMVECTOR C2 ) { return XMVector4Less(C1, C2); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMColorLessOrEqual ( FXMVECTOR C1, FXMVECTOR C2 ) { return XMVector4LessOrEqual(C1, C2); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMColorIsNaN ( FXMVECTOR C ) { return XMVector4IsNaN(C); } //------------------------------------------------------------------------------ inline bool XM_CALLCONV XMColorIsInfinite ( FXMVECTOR C ) { return XMVector4IsInfinite(C); } //------------------------------------------------------------------------------ // Computation operations //------------------------------------------------------------------------------ //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorNegative ( FXMVECTOR vColor ) { #if defined(_XM_NO_INTRINSICS_) XMVECTORF32 vResult = { { { 1.0f - vColor.vector4_f32[0], 1.0f - vColor.vector4_f32[1], 1.0f - vColor.vector4_f32[2], vColor.vector4_f32[3] } } }; return vResult.v; #elif defined(_XM_ARM_NEON_INTRINSICS_) XMVECTOR vTemp = veorq_u32(vColor,g_XMNegate3); return vaddq_f32(vTemp,g_XMOne3); #elif defined(_XM_SSE_INTRINSICS_) // Negate only x,y and z. XMVECTOR vTemp = _mm_xor_ps(vColor,g_XMNegate3); // Add 1,1,1,0 to -x,-y,-z,w return _mm_add_ps(vTemp,g_XMOne3); #endif } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorModulate ( FXMVECTOR C1, FXMVECTOR C2 ) { return XMVectorMultiply(C1, C2); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorAdjustSaturation ( FXMVECTOR vColor, float fSaturation ) { // Luminance = 0.2125f * C[0] + 0.7154f * C[1] + 0.0721f * C[2]; // Result = (C - Luminance) * Saturation + Luminance; const XMVECTORF32 gvLuminance = { { { 0.2125f, 0.7154f, 0.0721f, 0.0f } } }; #if defined(_XM_NO_INTRINSICS_) float fLuminance = (vColor.vector4_f32[0]*gvLuminance.f[0])+(vColor.vector4_f32[1]*gvLuminance.f[1])+(vColor.vector4_f32[2]*gvLuminance.f[2]); XMVECTOR vResult; vResult.vector4_f32[0] = ((vColor.vector4_f32[0] - fLuminance)*fSaturation)+fLuminance; vResult.vector4_f32[1] = ((vColor.vector4_f32[1] - fLuminance)*fSaturation)+fLuminance; vResult.vector4_f32[2] = ((vColor.vector4_f32[2] - fLuminance)*fSaturation)+fLuminance; vResult.vector4_f32[3] = vColor.vector4_f32[3]; return vResult; #elif defined(_XM_ARM_NEON_INTRINSICS_) XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance ); XMVECTOR vResult = vsubq_f32(vColor, vLuminance); vResult = vmlaq_n_f32( vLuminance, vResult, fSaturation ); return vbslq_f32( g_XMSelect1110, vResult, vColor ); #elif defined(_XM_SSE_INTRINSICS_) XMVECTOR vLuminance = XMVector3Dot( vColor, gvLuminance ); // Splat fSaturation XMVECTOR vSaturation = _mm_set_ps1(fSaturation); // vResult = ((vColor-vLuminance)*vSaturation)+vLuminance; XMVECTOR vResult = _mm_sub_ps(vColor,vLuminance); vResult = _mm_mul_ps(vResult,vSaturation); vResult = _mm_add_ps(vResult,vLuminance); // Retain w from the source color vLuminance = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w vResult = _mm_shuffle_ps(vResult,vLuminance,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w return vResult; #endif } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorAdjustContrast ( FXMVECTOR vColor, float fContrast ) { // Result = (vColor - 0.5f) * fContrast + 0.5f; #if defined(_XM_NO_INTRINSICS_) XMVECTORF32 vResult = { { { ((vColor.vector4_f32[0] - 0.5f) * fContrast) + 0.5f, ((vColor.vector4_f32[1] - 0.5f) * fContrast) + 0.5f, ((vColor.vector4_f32[2] - 0.5f) * fContrast) + 0.5f, vColor.vector4_f32[3] // Leave W untouched } } }; return vResult.v; #elif defined(_XM_ARM_NEON_INTRINSICS_) XMVECTOR vResult = vsubq_f32(vColor, g_XMOneHalf.v); vResult = vmlaq_n_f32( g_XMOneHalf.v, vResult, fContrast ); return vbslq_f32( g_XMSelect1110, vResult, vColor ); #elif defined(_XM_SSE_INTRINSICS_) XMVECTOR vScale = _mm_set_ps1(fContrast); // Splat the scale XMVECTOR vResult = _mm_sub_ps(vColor,g_XMOneHalf); // Subtract 0.5f from the source (Saving source) vResult = _mm_mul_ps(vResult,vScale); // Mul by scale vResult = _mm_add_ps(vResult,g_XMOneHalf); // Add 0.5f // Retain w from the source color vScale = _mm_shuffle_ps(vResult,vColor,_MM_SHUFFLE(3,2,2,2)); // x = vResult.z,y = vResult.z,z = vColor.z,w=vColor.w vResult = _mm_shuffle_ps(vResult,vScale,_MM_SHUFFLE(3,0,1,0)); // x = vResult.x,y = vResult.y,z = vResult.z,w=vColor.w return vResult; #endif } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorRGBToHSL( FXMVECTOR rgb ) { XMVECTOR r = XMVectorSplatX( rgb ); XMVECTOR g = XMVectorSplatY( rgb ); XMVECTOR b = XMVectorSplatZ( rgb ); XMVECTOR min = XMVectorMin( r, XMVectorMin( g, b ) ); XMVECTOR max = XMVectorMax( r, XMVectorMax( g, b ) ); XMVECTOR l = XMVectorMultiply( XMVectorAdd( min, max ), g_XMOneHalf ); XMVECTOR d = XMVectorSubtract( max, min ); XMVECTOR la = XMVectorSelect( rgb, l, g_XMSelect1110 ); if ( XMVector3Less( d, g_XMEpsilon ) ) { // Achromatic, assume H and S of 0 return XMVectorSelect( la, g_XMZero, g_XMSelect1100 ); } else { XMVECTOR s, h; XMVECTOR d2 = XMVectorAdd( min, max ); if ( XMVector3Greater( l, g_XMOneHalf ) ) { // d / (2-max-min) s = XMVectorDivide( d, XMVectorSubtract( g_XMTwo, d2 ) ); } else { // d / (max+min) s = XMVectorDivide( d, d2 ); } if ( XMVector3Equal( r, max ) ) { // Red is max h = XMVectorDivide( XMVectorSubtract( g, b ), d ); } else if ( XMVector3Equal( g, max ) ) { // Green is max h = XMVectorDivide( XMVectorSubtract( b, r ), d ); h = XMVectorAdd( h, g_XMTwo ); } else { // Blue is max h = XMVectorDivide( XMVectorSubtract( r, g ), d ); h = XMVectorAdd( h, g_XMFour ); } h = XMVectorDivide( h, g_XMSix ); if ( XMVector3Less( h, g_XMZero ) ) h = XMVectorAdd( h, g_XMOne ); XMVECTOR lha = XMVectorSelect( la, h, g_XMSelect1100 ); return XMVectorSelect( s, lha, g_XMSelect1011 ); } } //------------------------------------------------------------------------------ namespace Internal { inline XMVECTOR XM_CALLCONV XMColorHue2Clr( FXMVECTOR p, FXMVECTOR q, FXMVECTOR h ) { static const XMVECTORF32 oneSixth = { { { 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f } } }; static const XMVECTORF32 twoThirds = { { { 2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f, 2.0f / 3.0f } } }; XMVECTOR t = h; if ( XMVector3Less( t, g_XMZero ) ) t = XMVectorAdd( t, g_XMOne ); if ( XMVector3Greater( t, g_XMOne ) ) t = XMVectorSubtract( t, g_XMOne ); if ( XMVector3Less( t, oneSixth ) ) { // p + (q - p) * 6 * t XMVECTOR t1 = XMVectorSubtract( q, p ); XMVECTOR t2 = XMVectorMultiply( g_XMSix, t ); return XMVectorMultiplyAdd( t1, t2, p ); } if ( XMVector3Less( t, g_XMOneHalf ) ) return q; if ( XMVector3Less( t, twoThirds ) ) { // p + (q - p) * 6 * (2/3 - t) XMVECTOR t1 = XMVectorSubtract( q, p ); XMVECTOR t2 = XMVectorMultiply( g_XMSix, XMVectorSubtract( twoThirds, t ) ); return XMVectorMultiplyAdd( t1, t2, p ); } return p; } }; // namespace Internal inline XMVECTOR XM_CALLCONV XMColorHSLToRGB( FXMVECTOR hsl ) { static const XMVECTORF32 oneThird = { { { 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f } } }; XMVECTOR s = XMVectorSplatY( hsl ); XMVECTOR l = XMVectorSplatZ( hsl ); if ( XMVector3NearEqual( s, g_XMZero, g_XMEpsilon ) ) { // Achromatic return XMVectorSelect( hsl, l, g_XMSelect1110 ); } else { XMVECTOR h = XMVectorSplatX( hsl ); XMVECTOR q; if ( XMVector3Less( l, g_XMOneHalf ) ) { q = XMVectorMultiply( l, XMVectorAdd ( g_XMOne, s ) ); } else { q = XMVectorSubtract( XMVectorAdd( l, s ), XMVectorMultiply( l, s ) ); } XMVECTOR p = XMVectorSubtract( XMVectorMultiply( g_XMTwo, l ), q ); XMVECTOR r = DirectX::Internal::XMColorHue2Clr( p, q, XMVectorAdd( h, oneThird ) ); XMVECTOR g = DirectX::Internal::XMColorHue2Clr( p, q, h ); XMVECTOR b = DirectX::Internal::XMColorHue2Clr( p, q, XMVectorSubtract( h, oneThird ) ); XMVECTOR rg = XMVectorSelect( g, r, g_XMSelect1000 ); XMVECTOR ba = XMVectorSelect( hsl, b, g_XMSelect1110 ); return XMVectorSelect( ba, rg, g_XMSelect1100 ); } } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorRGBToHSV( FXMVECTOR rgb ) { XMVECTOR r = XMVectorSplatX( rgb ); XMVECTOR g = XMVectorSplatY( rgb ); XMVECTOR b = XMVectorSplatZ( rgb ); XMVECTOR min = XMVectorMin( r, XMVectorMin( g, b ) ); XMVECTOR v = XMVectorMax( r, XMVectorMax( g, b ) ); XMVECTOR d = XMVectorSubtract( v, min ); XMVECTOR s = ( XMVector3NearEqual( v, g_XMZero, g_XMEpsilon ) ) ? g_XMZero : XMVectorDivide( d, v ); if ( XMVector3Less( d, g_XMEpsilon ) ) { // Achromatic, assume H of 0 XMVECTOR hv = XMVectorSelect( v, g_XMZero, g_XMSelect1000 ); XMVECTOR hva = XMVectorSelect( rgb, hv, g_XMSelect1110 ); return XMVectorSelect( s, hva, g_XMSelect1011 ); } else { XMVECTOR h; if ( XMVector3Equal( r, v ) ) { // Red is max h = XMVectorDivide( XMVectorSubtract( g, b ), d ); if ( XMVector3Less( g, b ) ) h = XMVectorAdd( h, g_XMSix ); } else if ( XMVector3Equal( g, v ) ) { // Green is max h = XMVectorDivide( XMVectorSubtract( b, r ), d ); h = XMVectorAdd( h, g_XMTwo ); } else { // Blue is max h = XMVectorDivide( XMVectorSubtract( r, g ), d ); h = XMVectorAdd( h, g_XMFour ); } h = XMVectorDivide( h, g_XMSix ); XMVECTOR hv = XMVectorSelect( v, h, g_XMSelect1000 ); XMVECTOR hva = XMVectorSelect( rgb, hv, g_XMSelect1110 ); return XMVectorSelect( s, hva, g_XMSelect1011 ); } } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorHSVToRGB( FXMVECTOR hsv ) { XMVECTOR h = XMVectorSplatX( hsv ); XMVECTOR s = XMVectorSplatY( hsv ); XMVECTOR v = XMVectorSplatZ( hsv ); XMVECTOR h6 = XMVectorMultiply( h, g_XMSix ); XMVECTOR i = XMVectorFloor( h6 ); XMVECTOR f = XMVectorSubtract( h6, i ); // p = v* (1-s) XMVECTOR p = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, s ) ); // q = v*(1-f*s) XMVECTOR q = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, XMVectorMultiply( f, s ) ) ); // t = v*(1 - (1-f)*s) XMVECTOR t = XMVectorMultiply( v, XMVectorSubtract( g_XMOne, XMVectorMultiply( XMVectorSubtract( g_XMOne, f ), s ) ) ); int ii = static_cast( XMVectorGetX( XMVectorMod( i, g_XMSix ) ) ); XMVECTOR _rgb; switch (ii) { case 0: // rgb = vtp { XMVECTOR vt = XMVectorSelect( t, v, g_XMSelect1000 ); _rgb = XMVectorSelect( p, vt, g_XMSelect1100 ); } break; case 1: // rgb = qvp { XMVECTOR qv = XMVectorSelect( v, q, g_XMSelect1000 ); _rgb = XMVectorSelect( p, qv, g_XMSelect1100 ); } break; case 2: // rgb = pvt { XMVECTOR pv = XMVectorSelect( v, p, g_XMSelect1000 ); _rgb = XMVectorSelect( t, pv, g_XMSelect1100 ); } break; case 3: // rgb = pqv { XMVECTOR pq = XMVectorSelect( q, p, g_XMSelect1000 ); _rgb = XMVectorSelect( v, pq, g_XMSelect1100 ); } break; case 4: // rgb = tpv { XMVECTOR tp = XMVectorSelect( p, t, g_XMSelect1000 ); _rgb = XMVectorSelect( v, tp, g_XMSelect1100 ); } break; default: // rgb = vpq { XMVECTOR vp = XMVectorSelect( p, v, g_XMSelect1000 ); _rgb = XMVectorSelect( q, vp, g_XMSelect1100 ); } break; } return XMVectorSelect( hsv, _rgb, g_XMSelect1110 ); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorRGBToYUV( FXMVECTOR rgb ) { static const XMVECTORF32 Scale0 = { { { 0.299f, -0.147f, 0.615f, 0.0f } } }; static const XMVECTORF32 Scale1 = { { { 0.587f, -0.289f, -0.515f, 0.0f } } }; static const XMVECTORF32 Scale2 = { { { 0.114f, 0.436f, -0.100f, 0.0f } } }; XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); XMVECTOR clr = XMVector3Transform( rgb, M ); return XMVectorSelect( rgb, clr, g_XMSelect1110 ); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorYUVToRGB( FXMVECTOR yuv ) { static const XMVECTORF32 Scale1 = { { { 0.0f, -0.395f, 2.032f, 0.0f } } }; static const XMVECTORF32 Scale2 = { { { 1.140f, -0.581f, 0.0f, 0.0f } } }; XMMATRIX M( g_XMOne, Scale1, Scale2, g_XMZero ); XMVECTOR clr = XMVector3Transform( yuv, M ); return XMVectorSelect( yuv, clr, g_XMSelect1110 ); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorRGBToYUV_HD( FXMVECTOR rgb ) { static const XMVECTORF32 Scale0 = { { { 0.2126f, -0.0997f, 0.6150f, 0.0f } } }; static const XMVECTORF32 Scale1 = { { { 0.7152f, -0.3354f, -0.5586f, 0.0f } } }; static const XMVECTORF32 Scale2 = { { { 0.0722f, 0.4351f, -0.0564f, 0.0f } } }; XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); XMVECTOR clr = XMVector3Transform( rgb, M ); return XMVectorSelect( rgb, clr, g_XMSelect1110 ); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorYUVToRGB_HD( FXMVECTOR yuv ) { static const XMVECTORF32 Scale1 = { { { 0.0f, -0.2153f, 2.1324f, 0.0f } } }; static const XMVECTORF32 Scale2 = { { { 1.2803f, -0.3806f, 0.0f, 0.0f } } }; XMMATRIX M( g_XMOne, Scale1, Scale2, g_XMZero ); XMVECTOR clr = XMVector3Transform( yuv, M ); return XMVectorSelect( yuv, clr, g_XMSelect1110 ); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorRGBToXYZ( FXMVECTOR rgb ) { static const XMVECTORF32 Scale0 = { { { 0.4887180f, 0.1762044f, 0.0000000f, 0.0f } } }; static const XMVECTORF32 Scale1 = { { { 0.3106803f, 0.8129847f, 0.0102048f, 0.0f } } }; static const XMVECTORF32 Scale2 = { { { 0.2006017f, 0.0108109f, 0.9897952f, 0.0f } } }; static const XMVECTORF32 Scale = { { { 1.f / 0.17697f, 1.f / 0.17697f, 1.f / 0.17697f, 0.0f } } }; XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); XMVECTOR clr = XMVectorMultiply( XMVector3Transform( rgb, M ), Scale ); return XMVectorSelect( rgb, clr, g_XMSelect1110 ); } inline XMVECTOR XM_CALLCONV XMColorXYZToRGB( FXMVECTOR xyz ) { static const XMVECTORF32 Scale0 = { { { 2.3706743f, -0.5138850f, 0.0052982f, 0.0f } } }; static const XMVECTORF32 Scale1 = { { { -0.9000405f, 1.4253036f, -0.0146949f, 0.0f } } }; static const XMVECTORF32 Scale2 = { { { -0.4706338f, 0.0885814f, 1.0093968f, 0.0f } } }; static const XMVECTORF32 Scale = { { { 0.17697f, 0.17697f, 0.17697f, 0.0f } } }; XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); XMVECTOR clr = XMVector3Transform( XMVectorMultiply( xyz, Scale ), M ); return XMVectorSelect( xyz, clr, g_XMSelect1110 ); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorXYZToSRGB( FXMVECTOR xyz ) { static const XMVECTORF32 Scale0 = { { { 3.2406f, -0.9689f, 0.0557f, 0.0f } } }; static const XMVECTORF32 Scale1 = { { { -1.5372f, 1.8758f, -0.2040f, 0.0f } } }; static const XMVECTORF32 Scale2 = { { { -0.4986f, 0.0415f, 1.0570f, 0.0f } } }; static const XMVECTORF32 Cutoff = { { { 0.0031308f, 0.0031308f, 0.0031308f, 0.0f } } }; static const XMVECTORF32 Exp = { { { 1.0f / 2.4f, 1.0f / 2.4f, 1.0f / 2.4f, 1.0f } } }; XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); XMVECTOR lclr = XMVector3Transform( xyz, M ); XMVECTOR sel = XMVectorGreater( lclr, Cutoff ); // clr = 12.92 * lclr for lclr <= 0.0031308f XMVECTOR smallC = XMVectorMultiply( lclr, g_XMsrgbScale ); // clr = (1+a)*pow(lclr, 1/2.4) - a for lclr > 0.0031308 (where a = 0.055) XMVECTOR largeC = XMVectorSubtract( XMVectorMultiply( g_XMsrgbA1, XMVectorPow( lclr, Exp ) ), g_XMsrgbA ); XMVECTOR clr = XMVectorSelect( smallC, largeC, sel ); return XMVectorSelect( xyz, clr, g_XMSelect1110 ); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorSRGBToXYZ( FXMVECTOR srgb ) { static const XMVECTORF32 Scale0 = { { { 0.4124f, 0.2126f, 0.0193f, 0.0f } } }; static const XMVECTORF32 Scale1 = { { { 0.3576f, 0.7152f, 0.1192f, 0.0f } } }; static const XMVECTORF32 Scale2 = { { { 0.1805f, 0.0722f, 0.9505f, 0.0f } } }; static const XMVECTORF32 Cutoff = { { { 0.04045f, 0.04045f, 0.04045f, 0.0f } } }; static const XMVECTORF32 Exp = { { { 2.4f, 2.4f, 2.4f, 1.0f } } }; XMVECTOR sel = XMVectorGreater( srgb, Cutoff ); // lclr = clr / 12.92 XMVECTOR smallC = XMVectorDivide( srgb, g_XMsrgbScale ); // lclr = pow( (clr + a) / (1+a), 2.4 ) XMVECTOR largeC = XMVectorPow( XMVectorDivide( XMVectorAdd( srgb, g_XMsrgbA ), g_XMsrgbA1 ), Exp ); XMVECTOR lclr = XMVectorSelect( smallC, largeC, sel ); XMMATRIX M( Scale0, Scale1, Scale2, g_XMZero ); XMVECTOR clr = XMVector3Transform( lclr, M ); return XMVectorSelect( srgb, clr, g_XMSelect1110 ); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorRGBToSRGB( FXMVECTOR rgb ) { static const XMVECTORF32 Cutoff = { { { 0.0031308f, 0.0031308f, 0.0031308f, 1.f } } }; static const XMVECTORF32 Linear = { { { 12.92f, 12.92f, 12.92f, 1.f } } }; static const XMVECTORF32 Scale = { { { 1.055f, 1.055f, 1.055f, 1.f } } }; static const XMVECTORF32 Bias = { { { 0.055f, 0.055f, 0.055f, 0.f } } }; static const XMVECTORF32 InvGamma = { { { 1.0f / 2.4f, 1.0f / 2.4f, 1.0f / 2.4f, 1.f } } }; XMVECTOR V = XMVectorSaturate(rgb); XMVECTOR V0 = XMVectorMultiply( V, Linear ); XMVECTOR V1 = XMVectorSubtract( XMVectorMultiply( Scale, XMVectorPow( V, InvGamma ) ), Bias ); XMVECTOR select = XMVectorLess( V, Cutoff ); V = XMVectorSelect( V1, V0, select ); return XMVectorSelect( rgb, V, g_XMSelect1110 ); } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMColorSRGBToRGB( FXMVECTOR srgb ) { static const XMVECTORF32 Cutoff = { { { 0.04045f, 0.04045f, 0.04045f, 1.f } } }; static const XMVECTORF32 ILinear = { { { 1.f / 12.92f, 1.f / 12.92f, 1.f / 12.92f, 1.f } } }; static const XMVECTORF32 Scale = { { { 1.f / 1.055f, 1.f / 1.055f, 1.f / 1.055f, 1.f } } }; static const XMVECTORF32 Bias = { { { 0.055f, 0.055f, 0.055f, 0.f } } }; static const XMVECTORF32 Gamma = { { { 2.4f, 2.4f, 2.4f, 1.f } } }; XMVECTOR V = XMVectorSaturate(srgb); XMVECTOR V0 = XMVectorMultiply( V, ILinear ); XMVECTOR V1 = XMVectorPow( XMVectorMultiply( XMVectorAdd( V, Bias ), Scale ), Gamma ); XMVECTOR select = XMVectorGreater( V, Cutoff ); V = XMVectorSelect( V0, V1, select ); return XMVectorSelect( srgb, V, g_XMSelect1110 ); } /**************************************************************************** * * Miscellaneous * ****************************************************************************/ //------------------------------------------------------------------------------ inline bool XMVerifyCPUSupport() { #if defined(_XM_SSE_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) int CPUInfo[4] = { -1 }; __cpuid(CPUInfo, 0); #ifdef __AVX2__ if (CPUInfo[0] < 7) return false; #else if (CPUInfo[0] < 1) return false; #endif __cpuid(CPUInfo, 1); #if defined(__AVX2__) || defined(_XM_AVX2_INTRINSICS_) // The compiler can emit FMA3 instructions even without explicit intrinsics use if ((CPUInfo[2] & 0x38081001) != 0x38081001) return false; // No F16C/AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support #elif defined(_XM_FMA3_INTRINSICS_) && defined(_XM_F16C_INTRINSICS_) if ((CPUInfo[2] & 0x38081001) != 0x38081001) return false; // No F16C/AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support #elif defined(_XM_FMA3_INTRINSICS_) if ((CPUInfo[2] & 0x18081001) != 0x18081001) return false; // No AVX/OSXSAVE/SSE4.1/FMA3/SSE3 support #elif defined(_XM_F16C_INTRINSICS_) if ((CPUInfo[2] & 0x38080001) != 0x38080001) return false; // No F16C/AVX/OSXSAVE/SSE4.1/SSE3 support #elif defined(__AVX__) || defined(_XM_AVX_INTRINSICS_) if ((CPUInfo[2] & 0x18080001) != 0x18080001) return false; // No AVX/OSXSAVE/SSE4.1/SSE3 support #elif defined(_XM_SSE4_INTRINSICS_) if ((CPUInfo[2] & 0x80001) != 0x80001) return false; // No SSE3/SSE4.1 support #elif defined(_XM_SSE3_INTRINSICS_) if (!(CPUInfo[2] & 0x1)) return false; // No SSE3 support #endif // The x64 processor model requires SSE2 support, but no harm in checking if ((CPUInfo[3] & 0x6000000) != 0x6000000) return false; // No SSE2/SSE support #if defined(__AVX2__) || defined(_XM_AVX2_INTRINSICS_) __cpuidex(CPUInfo, 7, 0); if (!(CPUInfo[1] & 0x20)) return false; // No AVX2 support #endif return true; #elif defined(_XM_ARM_NEON_INTRINSICS_) && !defined(_XM_NO_INTRINSICS_) // ARM-NEON support is required for the Windows on ARM platform return true; #else // No intrinsics path always supported return true; #endif } //------------------------------------------------------------------------------ inline XMVECTOR XM_CALLCONV XMFresnelTerm ( FXMVECTOR CosIncidentAngle, FXMVECTOR RefractionIndex ) { assert(!XMVector4IsInfinite(CosIncidentAngle)); // Result = 0.5f * (g - c)^2 / (g + c)^2 * ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) where // c = CosIncidentAngle // g = sqrt(c^2 + RefractionIndex^2 - 1) #if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_) XMVECTOR G = XMVectorMultiplyAdd(RefractionIndex, RefractionIndex, g_XMNegativeOne.v); G = XMVectorMultiplyAdd(CosIncidentAngle, CosIncidentAngle, G); G = XMVectorAbs(G); G = XMVectorSqrt(G); XMVECTOR S = XMVectorAdd(G, CosIncidentAngle); XMVECTOR D = XMVectorSubtract(G, CosIncidentAngle); XMVECTOR V0 = XMVectorMultiply(D, D); XMVECTOR V1 = XMVectorMultiply(S, S); V1 = XMVectorReciprocal(V1); V0 = XMVectorMultiply(g_XMOneHalf.v, V0); V0 = XMVectorMultiply(V0, V1); XMVECTOR V2 = XMVectorMultiplyAdd(CosIncidentAngle, S, g_XMNegativeOne.v); XMVECTOR V3 = XMVectorMultiplyAdd(CosIncidentAngle, D, g_XMOne.v); V2 = XMVectorMultiply(V2, V2); V3 = XMVectorMultiply(V3, V3); V3 = XMVectorReciprocal(V3); V2 = XMVectorMultiplyAdd(V2, V3, g_XMOne.v); XMVECTOR Result = XMVectorMultiply(V0, V2); Result = XMVectorSaturate(Result); return Result; #elif defined(_XM_SSE_INTRINSICS_) // G = sqrt(abs((RefractionIndex^2-1) + CosIncidentAngle^2)) XMVECTOR G = _mm_mul_ps(RefractionIndex,RefractionIndex); XMVECTOR vTemp = _mm_mul_ps(CosIncidentAngle,CosIncidentAngle); G = _mm_sub_ps(G,g_XMOne); vTemp = _mm_add_ps(vTemp,G); // max((0-vTemp),vTemp) == abs(vTemp) // The abs is needed to deal with refraction and cosine being zero G = _mm_setzero_ps(); G = _mm_sub_ps(G,vTemp); G = _mm_max_ps(G,vTemp); // Last operation, the sqrt() G = _mm_sqrt_ps(G); // Calc G-C and G+C XMVECTOR GAddC = _mm_add_ps(G,CosIncidentAngle); XMVECTOR GSubC = _mm_sub_ps(G,CosIncidentAngle); // Perform the term (0.5f *(g - c)^2) / (g + c)^2 XMVECTOR vResult = _mm_mul_ps(GSubC,GSubC); vTemp = _mm_mul_ps(GAddC,GAddC); vResult = _mm_mul_ps(vResult,g_XMOneHalf); vResult = _mm_div_ps(vResult,vTemp); // Perform the term ((c * (g + c) - 1)^2 / (c * (g - c) + 1)^2 + 1) GAddC = _mm_mul_ps(GAddC,CosIncidentAngle); GSubC = _mm_mul_ps(GSubC,CosIncidentAngle); GAddC = _mm_sub_ps(GAddC,g_XMOne); GSubC = _mm_add_ps(GSubC,g_XMOne); GAddC = _mm_mul_ps(GAddC,GAddC); GSubC = _mm_mul_ps(GSubC,GSubC); GAddC = _mm_div_ps(GAddC,GSubC); GAddC = _mm_add_ps(GAddC,g_XMOne); // Multiply the two term parts vResult = _mm_mul_ps(vResult,GAddC); // Clamp to 0.0 - 1.0f vResult = _mm_max_ps(vResult,g_XMZero); vResult = _mm_min_ps(vResult,g_XMOne); return vResult; #endif } //------------------------------------------------------------------------------ inline bool XMScalarNearEqual ( float S1, float S2, float Epsilon ) { float Delta = S1 - S2; return (fabsf(Delta) <= Epsilon); } //------------------------------------------------------------------------------ // Modulo the range of the given angle such that -XM_PI <= Angle < XM_PI inline float XMScalarModAngle ( float Angle ) { // Note: The modulo is performed with unsigned math only to work // around a precision error on numbers that are close to PI // Normalize the range from 0.0f to XM_2PI Angle = Angle + XM_PI; // Perform the modulo, unsigned float fTemp = fabsf(Angle); fTemp = fTemp - (XM_2PI * (float)((int32_t)(fTemp/XM_2PI))); // Restore the number to the range of -XM_PI to XM_PI-epsilon fTemp = fTemp - XM_PI; // If the modulo'd value was negative, restore negation if (Angle<0.0f) { fTemp = -fTemp; } return fTemp; } //------------------------------------------------------------------------------ inline float XMScalarSin ( float Value ) { // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. float quotient = XM_1DIV2PI*Value; if (Value >= 0.0f) { quotient = (float)((int)(quotient + 0.5f)); } else { quotient = (float)((int)(quotient - 0.5f)); } float y = Value - XM_2PI*quotient; // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). if (y > XM_PIDIV2) { y = XM_PI - y; } else if (y < -XM_PIDIV2) { y = -XM_PI - y; } // 11-degree minimax approximation float y2 = y * y; return ( ( ( ( (-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f ) * y2 + 0.0083333310f ) * y2 - 0.16666667f ) * y2 + 1.0f ) * y; } //------------------------------------------------------------------------------ inline float XMScalarSinEst ( float Value ) { // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. float quotient = XM_1DIV2PI*Value; if (Value >= 0.0f) { quotient = (float)((int)(quotient + 0.5f)); } else { quotient = (float)((int)(quotient - 0.5f)); } float y = Value - XM_2PI*quotient; // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). if (y > XM_PIDIV2) { y = XM_PI - y; } else if (y < -XM_PIDIV2) { y = -XM_PI - y; } // 7-degree minimax approximation float y2 = y * y; return ( ( ( -0.00018524670f * y2 + 0.0083139502f ) * y2 - 0.16665852f ) * y2 + 1.0f ) * y; } //------------------------------------------------------------------------------ inline float XMScalarCos ( float Value ) { // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. float quotient = XM_1DIV2PI*Value; if (Value >= 0.0f) { quotient = (float)((int)(quotient + 0.5f)); } else { quotient = (float)((int)(quotient - 0.5f)); } float y = Value - XM_2PI*quotient; // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x). float sign; if (y > XM_PIDIV2) { y = XM_PI - y; sign = -1.0f; } else if (y < -XM_PIDIV2) { y = -XM_PI - y; sign = -1.0f; } else { sign = +1.0f; } // 10-degree minimax approximation float y2 = y*y; float p = ( ( ( ( -2.6051615e-07f * y2 + 2.4760495e-05f ) * y2 - 0.0013888378f ) * y2 + 0.041666638f ) * y2 - 0.5f ) * y2 + 1.0f; return sign*p; } //------------------------------------------------------------------------------ inline float XMScalarCosEst ( float Value ) { // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. float quotient = XM_1DIV2PI*Value; if (Value >= 0.0f) { quotient = (float)((int)(quotient + 0.5f)); } else { quotient = (float)((int)(quotient - 0.5f)); } float y = Value - XM_2PI*quotient; // Map y to [-pi/2,pi/2] with cos(y) = sign*cos(x). float sign; if (y > XM_PIDIV2) { y = XM_PI - y; sign = -1.0f; } else if (y < -XM_PIDIV2) { y = -XM_PI - y; sign = -1.0f; } else { sign = +1.0f; } // 6-degree minimax approximation float y2 = y * y; float p = ( ( -0.0012712436f * y2 + 0.041493919f ) * y2 - 0.49992746f ) * y2 + 1.0f; return sign*p; } //------------------------------------------------------------------------------ _Use_decl_annotations_ inline void XMScalarSinCos ( float* pSin, float* pCos, float Value ) { assert(pSin); assert(pCos); // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. float quotient = XM_1DIV2PI*Value; if (Value >= 0.0f) { quotient = (float)((int)(quotient + 0.5f)); } else { quotient = (float)((int)(quotient - 0.5f)); } float y = Value - XM_2PI*quotient; // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). float sign; if (y > XM_PIDIV2) { y = XM_PI - y; sign = -1.0f; } else if (y < -XM_PIDIV2) { y = -XM_PI - y; sign = -1.0f; } else { sign = +1.0f; } float y2 = y * y; // 11-degree minimax approximation *pSin = ( ( ( ( (-2.3889859e-08f * y2 + 2.7525562e-06f) * y2 - 0.00019840874f ) * y2 + 0.0083333310f ) * y2 - 0.16666667f ) * y2 + 1.0f ) * y; // 10-degree minimax approximation float p = ( ( ( ( -2.6051615e-07f * y2 + 2.4760495e-05f ) * y2 - 0.0013888378f ) * y2 + 0.041666638f ) * y2 - 0.5f ) * y2 + 1.0f; *pCos = sign*p; } //------------------------------------------------------------------------------ _Use_decl_annotations_ inline void XMScalarSinCosEst ( float* pSin, float* pCos, float Value ) { assert(pSin); assert(pCos); // Map Value to y in [-pi,pi], x = 2*pi*quotient + remainder. float quotient = XM_1DIV2PI*Value; if (Value >= 0.0f) { quotient = (float)((int)(quotient + 0.5f)); } else { quotient = (float)((int)(quotient - 0.5f)); } float y = Value - XM_2PI*quotient; // Map y to [-pi/2,pi/2] with sin(y) = sin(Value). float sign; if (y > XM_PIDIV2) { y = XM_PI - y; sign = -1.0f; } else if (y < -XM_PIDIV2) { y = -XM_PI - y; sign = -1.0f; } else { sign = +1.0f; } float y2 = y * y; // 7-degree minimax approximation *pSin = ( ( ( -0.00018524670f * y2 + 0.0083139502f ) * y2 - 0.16665852f ) * y2 + 1.0f ) * y; // 6-degree minimax approximation float p = ( ( -0.0012712436f * y2 + 0.041493919f ) * y2 - 0.49992746f ) * y2 + 1.0f; *pCos = sign*p; } //------------------------------------------------------------------------------ inline float XMScalarASin ( float Value ) { // Clamp input to [-1,1]. bool nonnegative = (Value >= 0.0f); float x = fabsf(Value); float omx = 1.0f - x; if (omx < 0.0f) { omx = 0.0f; } float root = sqrtf(omx); // 7-degree minimax approximation float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f; result *= root; // acos(|x|) // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x) return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2); } //------------------------------------------------------------------------------ inline float XMScalarASinEst ( float Value ) { // Clamp input to [-1,1]. bool nonnegative = (Value >= 0.0f); float x = fabsf(Value); float omx = 1.0f - x; if (omx < 0.0f) { omx = 0.0f; } float root = sqrtf(omx); // 3-degree minimax approximation float result = ((-0.0187293f*x+0.0742610f)*x-0.2121144f)*x+1.5707288f; result *= root; // acos(|x|) // acos(x) = pi - acos(-x) when x < 0, asin(x) = pi/2 - acos(x) return (nonnegative ? XM_PIDIV2 - result : result - XM_PIDIV2); } //------------------------------------------------------------------------------ inline float XMScalarACos ( float Value ) { // Clamp input to [-1,1]. bool nonnegative = (Value >= 0.0f); float x = fabsf(Value); float omx = 1.0f - x; if (omx < 0.0f) { omx = 0.0f; } float root = sqrtf(omx); // 7-degree minimax approximation float result = ( ( ( ( ( ( -0.0012624911f * x + 0.0066700901f ) * x - 0.0170881256f ) * x + 0.0308918810f ) * x - 0.0501743046f ) * x + 0.0889789874f ) * x - 0.2145988016f ) * x + 1.5707963050f; result *= root; // acos(x) = pi - acos(-x) when x < 0 return (nonnegative ? result : XM_PI - result); } //------------------------------------------------------------------------------ inline float XMScalarACosEst ( float Value ) { // Clamp input to [-1,1]. bool nonnegative = (Value >= 0.0f); float x = fabsf(Value); float omx = 1.0f - x; if (omx < 0.0f) { omx = 0.0f; } float root = sqrtf(omx); // 3-degree minimax approximation float result = ( ( -0.0187293f * x + 0.0742610f ) * x - 0.2121144f ) * x + 1.5707288f; result *= root; // acos(x) = pi - acos(-x) when x < 0 return (nonnegative ? result : XM_PI - result); }