mirror of https://github.com/PCSX2/pcsx2.git
133 lines
3.8 KiB
C
133 lines
3.8 KiB
C
/*
|
|
* Copyright (C) 2007-2009 Gabest
|
|
* http://www.gabest.org
|
|
*
|
|
* This Program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2, or (at your option)
|
|
* any later version.
|
|
*
|
|
* This Program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with GNU Make; see the file COPYING. If not, write to
|
|
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
* http://www.gnu.org/copyleft/gpl.html
|
|
*
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
// sse2
|
|
|
|
#if _M_SSE >= 0x200
|
|
|
|
#include <xmmintrin.h>
|
|
#include <emmintrin.h>
|
|
|
|
#ifndef _MM_DENORMALS_ARE_ZERO
|
|
#define _MM_DENORMALS_ARE_ZERO 0x0040
|
|
#endif
|
|
|
|
#define MXCSR (_MM_DENORMALS_ARE_ZERO | _MM_MASK_MASK | _MM_ROUND_NEAREST | _MM_FLUSH_ZERO_ON)
|
|
|
|
#if _MSC_VER < 1500
|
|
|
|
__forceinline __m128i _mm_castps_si128(__m128 a) {return *(__m128i*)&a;}
|
|
__forceinline __m128 _mm_castsi128_ps(__m128i a) {return *(__m128*)&a;}
|
|
__forceinline __m128i _mm_castpd_si128(__m128d a) {return *(__m128i*)&a;}
|
|
__forceinline __m128d _mm_castsi128_pd(__m128i a) {return *(__m128d*)&a;}
|
|
__forceinline __m128d _mm_castps_pd(__m128 a) {return *(__m128d*)&a;}
|
|
__forceinline __m128 _mm_castpd_ps(__m128d a) {return *(__m128*)&a;}
|
|
|
|
#endif
|
|
|
|
const __m128 ps_3f800000 = _mm_castsi128_ps(_mm_set1_epi32(0x3f800000));
|
|
const __m128 ps_4b000000 = _mm_castsi128_ps(_mm_set1_epi32(0x4b000000));
|
|
const __m128 ps_7fffffff = _mm_castsi128_ps(_mm_set1_epi32(0x7fffffff));
|
|
const __m128 ps_80000000 = _mm_castsi128_ps(_mm_set1_epi32(0x80000000));
|
|
const __m128 ps_ffffffff = _mm_castsi128_ps(_mm_set1_epi32(0xffffffff));
|
|
|
|
__forceinline __m128 _mm_neg_ps(__m128 r)
|
|
{
|
|
return _mm_xor_ps(ps_80000000, r);
|
|
}
|
|
|
|
__forceinline __m128 _mm_abs_ps(__m128 r)
|
|
{
|
|
return _mm_and_ps(ps_7fffffff, r);
|
|
}
|
|
|
|
#define _MM_TRANSPOSE4_SI128(row0, row1, row2, row3) \
|
|
{ \
|
|
__m128 tmp0 = _mm_shuffle_ps(_mm_castsi128_ps(row0), _mm_castsi128_ps(row1), 0x44); \
|
|
__m128 tmp2 = _mm_shuffle_ps(_mm_castsi128_ps(row0), _mm_castsi128_ps(row1), 0xEE); \
|
|
__m128 tmp1 = _mm_shuffle_ps(_mm_castsi128_ps(row2), _mm_castsi128_ps(row3), 0x44); \
|
|
__m128 tmp3 = _mm_shuffle_ps(_mm_castsi128_ps(row2), _mm_castsi128_ps(row3), 0xEE); \
|
|
(row0) = _mm_castps_si128(_mm_shuffle_ps(tmp0, tmp1, 0x88)); \
|
|
(row1) = _mm_castps_si128(_mm_shuffle_ps(tmp0, tmp1, 0xDD)); \
|
|
(row2) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0x88)); \
|
|
(row3) = _mm_castps_si128(_mm_shuffle_ps(tmp2, tmp3, 0xDD)); \
|
|
}
|
|
|
|
__forceinline __m128 _mm_rcpnr_ps(__m128 r)
|
|
{
|
|
__m128 t = _mm_rcp_ps(r);
|
|
|
|
return _mm_sub_ps(_mm_add_ps(t, t), _mm_mul_ps(_mm_mul_ps(t, t), r));
|
|
}
|
|
|
|
|
|
#else
|
|
|
|
#error TODO: GSVector4 and GSRasterizer needs SSE2
|
|
|
|
#endif
|
|
|
|
// sse3
|
|
|
|
#if _M_SSE >= 0x301
|
|
|
|
#include <tmmintrin.h>
|
|
|
|
#endif
|
|
|
|
// sse4
|
|
|
|
#if _M_SSE >= 0x401
|
|
|
|
#include <smmintrin.h>
|
|
|
|
#else
|
|
|
|
// not an equal replacement for sse4's blend but for our needs it is ok
|
|
|
|
#define _mm_blendv_ps(a, b, mask) _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, b))
|
|
#define _mm_blendv_epi8(a, b, mask) _mm_or_si128(_mm_andnot_si128(mask, a), _mm_and_si128(mask, b))
|
|
|
|
__forceinline __m128 _mm_round_ps(__m128 x)
|
|
{
|
|
__m128 t = _mm_or_ps(_mm_and_ps(ps_80000000, x), ps_4b000000);
|
|
|
|
return _mm_sub_ps(_mm_add_ps(x, t), t);
|
|
}
|
|
|
|
__forceinline __m128 _mm_floor_ps(__m128 x)
|
|
{
|
|
__m128 t = _mm_round_ps(x);
|
|
|
|
return _mm_sub_ps(t, _mm_and_ps(_mm_cmplt_ps(x, t), ps_3f800000));
|
|
}
|
|
|
|
__forceinline __m128 _mm_ceil_ps(__m128 x)
|
|
{
|
|
__m128 t = _mm_round_ps(x);
|
|
|
|
return _mm_add_ps(t, _mm_and_ps(_mm_cmpgt_ps(x, t), ps_3f800000));
|
|
}
|
|
|
|
#endif
|