Revert "Fix buffer overrun issues with custom memset functions."

This reverts commit 53c4a27aef.

I forgot that these functions are based on element count, not based on
byte count. Rename “length” to “elementCount” for better clarification.
This commit is contained in:
rogerman 2017-01-26 01:23:42 -08:00
parent 62f230ef44
commit fc3570a18a
1 changed files with 16 additions and 16 deletions

View File

@ -120,50 +120,50 @@ FORCEINLINE s32 s32floor(double d)
//-------------
#ifdef ENABLE_SSE2
static void memset_u16(void *dst, const u16 val, const size_t length)
static void memset_u16(void *dst, const u16 val, const size_t elementCount)
{
__m128i *dst_vec128 = (__m128i *)dst;
const __m128i val_vec128 = _mm_set1_epi16(val);
const size_t length_vec128 = length / sizeof(val_vec128);
const size_t length_vec128 = elementCount / (sizeof(val_vec128) / sizeof(val));
for (size_t i = 0; i < length_vec128; i++)
_mm_stream_si128(dst_vec128 + i, val_vec128);
}
template <size_t LENGTH>
template <size_t ELEMENTCOUNT>
static void memset_u16_fast(void *dst, const u16 val)
{
__m128i *dst_vec128 = (__m128i *)dst;
const __m128i val_vec128 = _mm_set1_epi16(val);
MACRODO_N(LENGTH / sizeof(val_vec128), _mm_store_si128(dst_vec128 + (X), val_vec128));
MACRODO_N(ELEMENTCOUNT / (sizeof(val_vec128) / sizeof(val)), _mm_store_si128(dst_vec128 + (X), val_vec128));
}
static void memset_u32(void *dst, const u32 val, const size_t length)
static void memset_u32(void *dst, const u32 val, const size_t elementCount)
{
__m128i *dst_vec128 = (__m128i *)dst;
const __m128i val_vec128 = _mm_set1_epi32(val);
const size_t length_vec128 = length / sizeof(val_vec128);
const size_t length_vec128 = elementCount / (sizeof(val_vec128) / sizeof(val));
for (size_t i = 0; i < length_vec128; i++)
_mm_stream_si128(dst_vec128 + i, val_vec128);
}
template <size_t LENGTH>
template <size_t ELEMENTCOUNT>
static void memset_u32_fast(void *dst, const u32 val)
{
__m128i *dst_vec128 = (__m128i *)dst;
const __m128i val_vec128 = _mm_set1_epi32(val);
MACRODO_N(LENGTH / sizeof(val_vec128), _mm_store_si128(dst_vec128 + (X), val_vec128));
MACRODO_N(ELEMENTCOUNT / (sizeof(val_vec128) / sizeof(val)), _mm_store_si128(dst_vec128 + (X), val_vec128));
}
#else //no sse2
static void memset_u16(void *dst, const u16 val, const size_t length)
static void memset_u16(void *dst, const u16 val, const size_t elementCount)
{
#ifdef HOST_64
u64 *dst_u64 = (u64 *)dst;
const u64 val_u64 = ((u64)val << 48) | ((u64)val << 32) | ((u64)val << 16) | (u64)val;
const size_t length_u64 = length / sizeof(val_u64);
const size_t length_u64 = elementCount / (sizeof(val_u64) / sizeof(val));
for (size_t i = 0; i < length_u64; i++)
dst_u64[i] = val_u64;
@ -173,25 +173,25 @@ static void memset_u16(void *dst, const u16 val, const size_t length)
#endif
}
template <size_t LENGTH>
template <size_t ELEMENTCOUNT>
static void memset_u16_fast(void *dst, const u16 val)
{
#ifdef HOST_64
u64 *dst_u64 = (u64 *)dst;
const u64 val_u64 = ((u64)val << 48) | ((u64)val << 32) | ((u64)val << 16) | (u64)val;
MACRODO_N(LENGTH / sizeof(val_u64), (dst_u64[(X)] = val_u64));
MACRODO_N(ELEMENTCOUNT / (sizeof(val_u64) / sizeof(val)), (dst_u64[(X)] = val_u64));
#else
for (size_t i = 0; i < LENGTH; i++)
((u16 *)dst)[i] = val;
#endif
}
static void memset_u32(void *dst, const u32 val, const size_t length)
static void memset_u32(void *dst, const u32 val, const size_t elementCount)
{
#ifdef HOST_64
u64 *dst_u64 = (u64 *)dst;
const u64 val_u64 = ((u64)val << 32) | (u64)val;
const size_t length_u64 = length / sizeof(val_u64));
const size_t length_u64 = elementCount / (sizeof(val_u64) / sizeof(val));
for (size_t i = 0; i < length_u64; i++)
dst_u64[i] = val_u64;
@ -201,13 +201,13 @@ static void memset_u32(void *dst, const u32 val, const size_t length)
#endif
}
template <size_t LENGTH>
template <size_t ELEMENTCOUNT>
static void memset_u32_fast(void *dst, const u32 val)
{
#ifdef HOST_64
u64 *dst_u64 = (u64 *)dst;
const u64 val_u64 = ((u64)val << 32) | (u64)val;
MACRODO_N(LENGTH / sizeof(val_u64), (dst_u64[(X)] = val_u64));
MACRODO_N(ELEMENTCOUNT / (sizeof(val_u64) / sizeof(val)), (dst_u64[(X)] = val_u64));
#else
for (size_t i = 0; i < LENGTH; i++)
((u16 *)dst)[i] = val;