Merge pull request #321 from raven02/patch-2
gl4: Swap high and low 16 bits within a 32 bit word
This commit is contained in:
commit
87ea7a0233
|
@ -46,6 +46,8 @@ void copy_and_swap_64_aligned(uint64_t* dest, const uint64_t* src,
|
||||||
size_t count);
|
size_t count);
|
||||||
void copy_and_swap_64_unaligned(uint64_t* dest, const uint64_t* src,
|
void copy_and_swap_64_unaligned(uint64_t* dest, const uint64_t* src,
|
||||||
size_t count);
|
size_t count);
|
||||||
|
void copy_and_swap_16_in_32_aligned(uint32_t* dest, const uint32_t* src,
|
||||||
|
size_t count);
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void copy_and_swap(T* dest, const T* src, size_t count) {
|
void copy_and_swap(T* dest, const T* src, size_t count) {
|
||||||
|
|
|
@ -132,4 +132,18 @@ void copy_and_swap_64_unaligned(uint64_t* dest, const uint64_t* src,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void copy_and_swap_16_in_32_aligned(uint32_t* dest, const uint32_t* src,
|
||||||
|
size_t count) {
|
||||||
|
size_t i;
|
||||||
|
__m128i input, output;
|
||||||
|
for (i = 0; i + 4 <= count; i += 4) {
|
||||||
|
input = _mm_loadu_si128((__m128i*)&src[i]);
|
||||||
|
output = _mm_or_si128(_mm_slli_epi32(input, 16), _mm_srli_epi32(input, 16));
|
||||||
|
_mm_storeu_si128((__m128i*)&dest[i], output);
|
||||||
|
}
|
||||||
|
for (; i < count; ++i) { // handle residual elements
|
||||||
|
dest[i] = (src[i] >> 16) | (src[i] << 16);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
@ -687,13 +687,10 @@ void TextureSwap(Endian endianness, void* dest, const void* src,
|
||||||
reinterpret_cast<const uint32_t*>(src),
|
reinterpret_cast<const uint32_t*>(src),
|
||||||
length / 4);
|
length / 4);
|
||||||
break;
|
break;
|
||||||
case Endian::k16in32:
|
case Endian::k16in32: // Swap high and low 16 bits within a 32 bit word
|
||||||
// TODO(benvanik): make more efficient.
|
xe::copy_and_swap_16_in_32_aligned(reinterpret_cast<uint32_t*>(dest),
|
||||||
/*for (uint32_t i = 0; i < length; i += 4, src += 4, dest += 4) {
|
reinterpret_cast<const uint32_t*>(src),
|
||||||
uint32_t value = *(uint32_t*)src;
|
length);
|
||||||
*(uint32_t*)dest = ((value >> 16) & 0xFFFF) | (value << 16);
|
|
||||||
}*/
|
|
||||||
assert_always("16in32 not supported");
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
case Endian::kUnspecified:
|
case Endian::kUnspecified:
|
||||||
|
|
Loading…
Reference in New Issue