From e2379a66d670bedc29d0140bfba044c00038b569 Mon Sep 17 00:00:00 2001 From: rogerman Date: Sun, 16 Mar 2025 16:13:12 -0700 Subject: [PATCH] matrix.h: Fix const-correctness for some *_fast function parameters. - In practice, this only affected compiling for NEON on certain compilers. Other SIMD ISAs should remain unaffected. --- desmume/src/matrix.h | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/desmume/src/matrix.h b/desmume/src/matrix.h index def1faa71..b8d08025d 100644 --- a/desmume/src/matrix.h +++ b/desmume/src/matrix.h @@ -185,13 +185,13 @@ static void memset_u32_fast(void *dst, const u32 val) } template -static void stream_copy_fast(void *__restrict dst, void *__restrict src) +static void stream_copy_fast(void *__restrict dst, const void *__restrict src) { MACRODO_N( LENGTH / sizeof(v512s8), _mm512_stream_si512((v512s8 *)dst + (X), _mm512_stream_load_si512((v512s8 *)src + (X))) ); } template -static void buffer_copy_fast(void *__restrict dst, void *__restrict src) +static void buffer_copy_fast(void *__restrict dst, const void *__restrict src) { MACRODO_N( LENGTH / sizeof(v512s8), _mm512_store_si512((v512s8 *)dst + (X), _mm512_load_si512((v512s8 *)src + (X))) ); } @@ -479,7 +479,7 @@ static void memset_u32_fast(void *dst, const u32 val) } template -static void stream_copy_fast(void *__restrict dst, void *__restrict src) +static void stream_copy_fast(void *__restrict dst, const void *__restrict src) { #ifdef ENABLE_SSE4_1 MACRODO_N( VECLENGTH / sizeof(v128s8), _mm_stream_si128((v128s8 *)dst + (X), _mm_stream_load_si128((v128s8 *)src + (X))) ); @@ -489,7 +489,7 @@ static void stream_copy_fast(void *__restrict dst, void *__restrict src) } template -static void buffer_copy_fast(void *__restrict dst, void *__restrict src) +static void buffer_copy_fast(void *__restrict dst, const void *__restrict src) { MACRODO_N( VECLENGTH / sizeof(v128s8), _mm_store_si128((v128s8 *)dst + (X), _mm_load_si128((v128s8 *)src + (X))) ); } @@ -606,13 +606,13 @@ static void memset_u32_fast(void *dst, const u32 val) } template -static void buffer_copy_fast(void *__restrict dst, void *__restrict src) +static void buffer_copy_fast(void *__restrict dst, const void *__restrict src) { MACRODO_N( VECLENGTH / sizeof(uint8x16x4_t), vst1q_u8_x4((u8 *)dst + ((X) * sizeof(uint8x16x4_t)), vld1q_u8_x4((u8 *)src + ((X) * sizeof(uint8x16x4_t)))) ); } template -static void stream_copy_fast(void *__restrict dst, void *__restrict src) +static void stream_copy_fast(void *__restrict dst, const void *__restrict src) { // NEON doesn't have the same temporal/caching distinctions that SSE and AVX do, // so just use buffer_copy_fast() for this function too. @@ -656,10 +656,10 @@ static void buffer_copy_or_constant_s8(void *__restrict dst, const void *__restr } template -static void buffer_copy_or_constant_s8_fast(void *__restrict dst, void *__restrict src, const s8 c) +static void buffer_copy_or_constant_s8_fast(void *__restrict dst, const void *__restrict src, const s8 c) { const v128u8 c_vec = vreinterpretq_u8_s8( vdupq_n_s8(c) ); - __buffer_copy_or_constant_fast(dst, src, c_vec); + __buffer_copy_or_constant_fast(dst, src, c_vec); } template @@ -670,7 +670,7 @@ static void buffer_copy_or_constant_s16(void *__restrict dst, const void *__rest } template -static void buffer_copy_or_constant_s16_fast(void *__restrict dst, void *__restrict src, const s16 c) +static void buffer_copy_or_constant_s16_fast(void *__restrict dst, const void *__restrict src, const s16 c) { const v128u8 c_vec = vreinterpretq_u8_s16( vdupq_n_s16(c) ); __buffer_copy_or_constant_fast(dst, src, c_vec); @@ -684,7 +684,7 @@ static void buffer_copy_or_constant_s32(void *__restrict dst, const void *__rest } template -static void buffer_copy_or_constant_s32_fast(void *__restrict dst, void *__restrict src, const s32 c) +static void buffer_copy_or_constant_s32_fast(void *__restrict dst, const void *__restrict src, const s32 c) { const v128u8 c_vec = vreinterpretq_u8_s32( vdupq_n_s32(c) ); __buffer_copy_or_constant_fast(dst, src, c_vec); @@ -731,13 +731,13 @@ static void memset_u32_fast(void *dst, const u32 val) } template -static void buffer_copy_fast(void *__restrict dst, void *__restrict src) +static void buffer_copy_fast(void *__restrict dst, const void *__restrict src) { MACRODO_N( VECLENGTH / sizeof(v128s8), vec_st(vec_ld((X)*sizeof(v128s8),(u8 *__restrict)src), (X)*sizeof(v128s8), (u8 *__restrict)dst) ); } template -static void stream_copy_fast(void *__restrict dst, void *__restrict src) +static void stream_copy_fast(void *__restrict dst, const void *__restrict src) { // AltiVec doesn't have the same temporal/caching distinctions that SSE and AVX do, // so just use buffer_copy_fast() for this function too. @@ -782,7 +782,7 @@ static void buffer_copy_or_constant_s8(void *__restrict dst, const void *__restr } template -static void buffer_copy_or_constant_s8_fast(void *__restrict dst, void *__restrict src, const s8 c) +static void buffer_copy_or_constant_s8_fast(void *__restrict dst, const void *__restrict src, const s8 c) { const v128s8 c_vec = {c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c}; __buffer_copy_or_constant_fast(dst, src, c_vec); @@ -797,7 +797,7 @@ static void buffer_copy_or_constant_s16(void *__restrict dst, const void *__rest } template -static void buffer_copy_or_constant_s16_fast(void *__restrict dst, void *__restrict src, const s16 c) +static void buffer_copy_or_constant_s16_fast(void *__restrict dst, const void *__restrict src, const s16 c) { const s16 c_16 = (NEEDENDIANSWAP) ? LE_TO_LOCAL_16(c) : c; const v128s16 c_vec = {c_16, c_16, c_16, c_16, c_16, c_16, c_16, c_16}; @@ -813,7 +813,7 @@ static void buffer_copy_or_constant_s32(void *__restrict dst, const void *__rest } template -static void buffer_copy_or_constant_s32_fast(void *__restrict dst, void *__restrict src, const s32 c) +static void buffer_copy_or_constant_s32_fast(void *__restrict dst, const void *__restrict src, const s32 c) { const s32 c_32 = (NEEDENDIANSWAP) ? LE_TO_LOCAL_32(c) : c; const v128s32 c_vec = {c_32, c_32, c_32, c_32}; @@ -889,13 +889,13 @@ static void memset_u32_fast(void *dst, const u32 val) // vector intrinsics to control the temporal/caching behavior. template -static void stream_copy_fast(void *__restrict dst, void *__restrict src) +static void stream_copy_fast(void *__restrict dst, const void *__restrict src) { memcpy(dst, src, VECLENGTH); } template -static void buffer_copy_fast(void *__restrict dst, void *__restrict src) +static void buffer_copy_fast(void *__restrict dst, const void *__restrict src) { memcpy(dst, src, VECLENGTH); } @@ -920,7 +920,7 @@ static void buffer_copy_or_constant_s8(void *__restrict dst, const void *__restr } template -static void buffer_copy_or_constant_s8_fast(void *__restrict dst, void *__restrict src, const s8 c) +static void buffer_copy_or_constant_s8_fast(void *__restrict dst, const void *__restrict src, const s8 c) { #ifdef HOST_64 s64 *src_64 = (s64 *)src; @@ -980,7 +980,7 @@ static void buffer_copy_or_constant_s16(void *__restrict dst, const void *__rest } template -static void buffer_copy_or_constant_s16_fast(void *__restrict dst, void *__restrict src, const s16 c) +static void buffer_copy_or_constant_s16_fast(void *__restrict dst, const void *__restrict src, const s16 c) { #ifdef HOST_64 s64 *src_64 = (s64 *)src; @@ -1049,7 +1049,7 @@ static void buffer_copy_or_constant_s32(void *__restrict dst, const void *__rest } template -static void buffer_copy_or_constant_s32_fast(void *__restrict dst, void *__restrict src, const s32 c) +static void buffer_copy_or_constant_s32_fast(void *__restrict dst, const void *__restrict src, const s32 c) { #ifdef HOST_64 s64 *src_64 = (s64 *)src;