rsx: Allow only sse4.1 capable CPUs to take the accelerated index path

- Older sets lack the required min/max functionality
This commit is contained in:
kd-11 2019-09-13 02:53:45 +03:00 committed by kd-11
parent dadfdc35f4
commit c59cb1bdd3
1 changed files with 43 additions and 92 deletions

View File

@ -627,8 +627,6 @@ namespace
_mm_storeu_si128(dst_stream++, value); _mm_storeu_si128(dst_stream++, value);
} }
if (s_use_sse4_1)
{
const __m128i mask_step1 = _mm_set_epi8( const __m128i mask_step1 = _mm_set_epi8(
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0xF, 0xE, 0xD, 0xC, 0xB, 0xA, 0x9, 0x8); 0xF, 0xE, 0xD, 0xC, 0xB, 0xA, 0x9, 0x8);
@ -660,27 +658,6 @@ namespace
return std::make_tuple(min_index, max_index, count); return std::make_tuple(min_index, max_index, count);
} }
else
{
// Manual min-max
alignas(16) u16 _min[8];
alignas(16) u16 _max[8];
_mm_store_si128((__m128i*)_min, min);
_mm_store_si128((__m128i*)_max, max);
u16 min_index = _min[0];
u16 max_index = _max[0];
for (int i = 1; i < 8; ++i)
{
min_index = std::min(min_index, _min[i]);
max_index = std::max(max_index, _max[i]);
}
return std::make_tuple(min_index, max_index, count);
}
}
static static
std::tuple<u32, u32, u32> upload_u32_swapped(const void *src, void *dst, u32 count) std::tuple<u32, u32, u32> upload_u32_swapped(const void *src, void *dst, u32 count)
@ -707,8 +684,6 @@ namespace
_mm_storeu_si128(dst_stream++, value); _mm_storeu_si128(dst_stream++, value);
} }
if (s_use_sse4_1)
{
// Aggregate min-max // Aggregate min-max
const __m128i mask_step1 = _mm_set_epi8( const __m128i mask_step1 = _mm_set_epi8(
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -718,9 +693,6 @@ namespace
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0x7, 0x6, 0x5, 0x4); 0, 0, 0, 0, 0x7, 0x6, 0x5, 0x4);
// a1, a2, a3, a4
// a1, a2, a1, a2
// mAX
__m128i tmp = __mm_shuffle_epi8(min, mask_step1); __m128i tmp = __mm_shuffle_epi8(min, mask_step1);
min = __mm_min_epu32(min, tmp); min = __mm_min_epu32(min, tmp);
tmp = __mm_shuffle_epi8(min, mask_step2); tmp = __mm_shuffle_epi8(min, mask_step2);
@ -736,27 +708,6 @@ namespace
return std::make_tuple(min_index, max_index, count); return std::make_tuple(min_index, max_index, count);
} }
else
{
// Manual min-max
alignas(16) u32 _min[4];
alignas(16) u32 _max[4];
_mm_store_si128((__m128i*)_min, min);
_mm_store_si128((__m128i*)_max, max);
u32 min_index = _min[0];
u32 max_index = _max[0];
for (int i = 1; i < 4; ++i)
{
min_index = std::min(min_index, _min[i]);
max_index = std::max(max_index, _max[i]);
}
return std::make_tuple(min_index, max_index, count);
}
}
template<typename T> template<typename T>
static static
@ -766,7 +717,7 @@ namespace
u32 written; u32 written;
u32 remaining = src.size(); u32 remaining = src.size();
if (s_use_ssse3 && remaining >= 32) if (s_use_sse4_1 && remaining >= 32)
{ {
if constexpr (std::is_same<T, u32>::value) if constexpr (std::is_same<T, u32>::value)
{ {