From fea115a6f42eed9e6d6760d823422a4dfb72883e Mon Sep 17 00:00:00 2001 From: raven02 Date: Thu, 2 Jul 2015 23:04:48 +0800 Subject: [PATCH] Use SSE intrinsics for uint16_t byte swapping Part of https://github.com/benvanik/xenia/issues/308 --- src/xenia/base/memory_generic.cc | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/xenia/base/memory_generic.cc b/src/xenia/base/memory_generic.cc index a48fc6c13..b25327770 100644 --- a/src/xenia/base/memory_generic.cc +++ b/src/xenia/base/memory_generic.cc @@ -43,7 +43,14 @@ void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src, void copy_and_swap_16_unaligned(uint16_t* dest, const uint16_t* src, size_t count) { - for (size_t i = 0; i < count; ++i) { + size_t i; + for (i = 0; i + 8 <= count; i += 8) { + __m128i s = _mm_loadu_si128((__m128i*)&src[i]); + __m128i d = _mm_or_si128(_mm_slli_epi16(s, 8), _mm_srli_epi16(s, 8)); + _mm_storeu_si128((__m128i*)&dest[i], d); + } + + for (; i < count; ++i) { // handle residual elements dest[i] = byte_swap(src[i]); } }