/* * Copyright (C) 2007-2009 Gabest * http://www.gabest.org * * This Program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * * This Program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with GNU Make; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * http://www.gnu.org/copyleft/gpl.html * */ #pragma once // sse2 #if _M_SSE >= 0x200 #include #include #ifndef _MM_DENORMALS_ARE_ZERO #define _MM_DENORMALS_ARE_ZERO 0x0040 #endif #define MXCSR (_MM_DENORMALS_ARE_ZERO | _MM_MASK_MASK | _MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON) #if _MSC_VER < 1500 __forceinline __m128i _mm_castps_si128(__m128 a) {return *(__m128i*)&a;} __forceinline __m128 _mm_castsi128_ps(__m128i a) {return *(__m128*)&a;} __forceinline __m128i _mm_castpd_si128(__m128d a) {return *(__m128i*)&a;} __forceinline __m128d _mm_castsi128_pd(__m128i a) {return *(__m128d*)&a;} __forceinline __m128d _mm_castps_pd(__m128 a) {return *(__m128d*)&a;} __forceinline __m128 _mm_castpd_ps(__m128d a) {return *(__m128*)&a;} #endif const __m128 ps_3f800000 = _mm_castsi128_ps(_mm_set1_epi32(0x3f800000)); const __m128 ps_4b000000 = _mm_castsi128_ps(_mm_set1_epi32(0x4b000000)); const __m128 ps_7fffffff = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff)); const __m128 ps_80000000 = _mm_castsi128_ps(_mm_set1_epi32(0x80000000)); const __m128 ps_ffffffff = _mm_castsi128_ps(_mm_set1_epi32(0xffffffff)); __forceinline __m128 _mm_neg_ps(__m128 r) { return _mm_xor_ps(ps_80000000, r); } __forceinline __m128 _mm_abs_ps(__m128 r) { return _mm_and_ps(ps_7fffffff, r); } #define _MM_TRANSPOSE4_SI128(row0, row1, row2, row3) \ { \ __m128 tmp0 = _mm_shuffle_ps(_mm_castsi128_ps(row0), _mm_castsi128_ps(row1), 0x44); \ __m128 tmp2 = _mm_shuffle_ps(_mm_castsi128_ps(row0), _mm_castsi128_ps(row1), 0xEE); \ __m128 tmp1 = _mm_shuffle_ps(_mm_castsi128_ps(row2), _mm_castsi128_ps(row3), 0x44); \ __m128 tmp3 = _mm_shuffle_ps(_mm_castsi128_ps(row2), _mm_castsi128_ps(row3), 0xEE); \ (row0) = _mm_castps_si128(_mm_shuffle_ps(tmp0, tmp1, 0x88)); \ (row1) = _mm_castps_si128(_mm_shuffle_ps(tmp0, tmp1, 0xDD)); \ (row2) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0x88)); \ (row3) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0xDD)); \ } __forceinline __m128 _mm_rcpnr_ps(__m128 r) { __m128 t = _mm_rcp_ps(r); return _mm_sub_ps(_mm_add_ps(t, t), _mm_mul_ps(_mm_mul_ps(t, t), r)); } #else #error TODO: GSVector4 and GSRasterizer needs SSE2 #endif // sse3 #if _M_SSE >= 0x301 #include #endif // sse4 #if _M_SSE >= 0x401 #include #else // not an equal replacement for sse4's blend but for our needs it is ok #define _mm_blendv_ps(a, b, mask) _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, b)) #define _mm_blendv_epi8(a, b, mask) _mm_or_si128(_mm_andnot_si128(mask, a), _mm_and_si128(mask, b)) __forceinline __m128 _mm_round_ps(__m128 x) { __m128 t = _mm_or_ps(_mm_and_ps(ps_80000000, x), ps_4b000000); return _mm_sub_ps(_mm_add_ps(x, t), t); } __forceinline __m128 _mm_floor_ps(__m128 x) { __m128 t = _mm_round_ps(x); return _mm_sub_ps(t, _mm_and_ps(_mm_cmplt_ps(x, t), ps_3f800000)); } __forceinline __m128 _mm_ceil_ps(__m128 x) { __m128 t = _mm_round_ps(x); return _mm_add_ps(t, _mm_and_ps(_mm_cmpgt_ps(x, t), ps_3f800000)); } #endif