diff --git a/Makefile b/Makefile index 9e973548d0..873710c2d3 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ include config.mk TARGET = ssnes tools/ssnes-joyconfig -OBJ = ssnes.o file.o driver.o settings.o dynamic.o message.o rewind.o movie.o gfx/gfx_common.o ups.o bps.o strl.o getopt.o screenshot.o audio/hermite.o +OBJ = ssnes.o file.o driver.o settings.o dynamic.o message.o rewind.o movie.o gfx/gfx_common.o ups.o bps.o strl.o getopt.o screenshot.o audio/hermite.o audio/utils.o JOYCONFIG_OBJ = tools/ssnes-joyconfig.o conf/config_file.o strl.o HEADERS = $(wildcard */*.h) $(wildcard *.h) diff --git a/Makefile.win b/Makefile.win index 5d72dd6519..4eea4f3945 100644 --- a/Makefile.win +++ b/Makefile.win @@ -1,6 +1,6 @@ TARGET = ssnes.exe JTARGET = ssnes-joyconfig.exe -OBJ = ssnes.o file.o driver.o conf/config_file.o settings.o dynamic.o message.o rewind.o movie.o gfx/gfx_common.o bps.o ups.o strl.o screenshot.o audio/hermite.o getopt.o +OBJ = ssnes.o file.o driver.o conf/config_file.o settings.o dynamic.o message.o rewind.o movie.o gfx/gfx_common.o bps.o ups.o strl.o screenshot.o audio/hermite.o getopt.o audio/utils.o JOBJ = conf/config_file.o tools/main-stub.o tools/ssnes-joyconfig.o strl.o CC = gcc diff --git a/audio/utils.c b/audio/utils.c new file mode 100644 index 0000000000..9c68c23166 --- /dev/null +++ b/audio/utils.c @@ -0,0 +1,119 @@ +#include "utils.h" + +#if __SSE2__ +#include +#elif __ALTIVEC__ +#include +#endif + +void audio_convert_s16_to_float_C(float *out, + const int16_t *in, size_t samples) +{ + for (size_t i = 0; i < samples; i++) + out[i] = (float)in[i] / 0x8000; +} + +void audio_convert_float_to_s16_C(int16_t *out, + const float *in, size_t samples) +{ + for (size_t i = 0; i < samples; i++) + { + int32_t val = in[i] * 0x8000; + out[i] = (val > 0x7FFF) ? 0x7FFF : (val < -0x8000 ? -0x8000 : (int16_t)val); + } +} + +#if __SSE2__ +void audio_convert_s16_to_float_SSE2(float *out, + const int16_t *in, size_t samples) +{ + __m128 factor = _mm_set1_ps(1.0f / (0x7fff * 0x10000)); + size_t i; + for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8) + { + __m128i input = _mm_loadu_si128((const __m128i *)in); + __m128i regs[2] = { + _mm_unpacklo_epi16(_mm_setzero_si128(), input), + _mm_unpackhi_epi16(_mm_setzero_si128(), input), + }; + + __m128 output[2] = { + _mm_mul_ps(_mm_cvtepi32_ps(regs[0]), factor), + _mm_mul_ps(_mm_cvtepi32_ps(regs[1]), factor), + }; + + _mm_storeu_ps(out + 0, output[0]); + _mm_storeu_ps(out + 4, output[1]); + } + + audio_convert_s16_to_float_C(out, in, samples - i); +} + +void audio_convert_float_to_s16_SSE2(int16_t *out, + const float *in, size_t samples) +{ + __m128 factor = _mm_set1_ps((float)0x7fff); + size_t i; + for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8) + { + __m128 input[2] = { _mm_loadu_ps(in + 0), _mm_loadu_ps(in + 4) }; + __m128 res[2] = { _mm_mul_ps(input[0], factor), _mm_mul_ps(input[1], factor) }; + + __m128i ints[2] = { _mm_cvtps_epi32(res[0]), _mm_cvtps_epi32(res[1]) }; + __m128i packed = _mm_packs_epi32(ints[0], ints[1]); + + _mm_storeu_si128((__m128i *)out, packed); + } + + audio_convert_float_to_s16_C(out, in, samples - i); +} +#elif __ALTIVEC__ +void audio_convert_s16_to_float_altivec(float *out, + const int16_t *in, size_t samples) +{ + // Unaligned loads/store is a bit expensive, so we optimize for the good path (very likely). + if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0) + { + size_t i; + for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8) + { + vector signed short input = vec_ld(0, in); + vector signed int hi = vec_unpackh(input); + vector signed int lo = vec_unpackl(input); + vector float out_hi = vec_ctf(hi, 15); + vector float out_lo = vec_ctf(lo, 15); + + vec_st(out_hi, 0, out); + vec_st(out_lo, 16, out); + } + + audio_convert_s16_to_float_C(out, in, samples - i); + } + else + audio_convert_s16_to_float_C(out, in, samples); +} + +void audio_convert_float_to_s16_altivec(int16_t *out, + const float *in, size_t samples) +{ + // Unaligned loads/store is a bit expensive, so we optimize for the good path (very likely). + if (((uintptr_t)out & 15) + ((uintptr_t)in & 15) == 0) + { + size_t i; + for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8) + { + vector float input0 = vec_ld( 0, in); + vector float input1 = vec_ld(16, in); + vector signed int result0 = vec_cts(input0, 15); + vector signed int result1 = vec_cts(input1, 15); + vec_st(vec_packs(result0, result1), 0, out); + } + + audio_convert_float_to_s16_C(out, in, samples - i); + } + else + audio_convert_float_to_s16_C(out, in, samples); +} + +#endif + diff --git a/audio/utils.h b/audio/utils.h index bc0c48b6f3..cd8d280896 100644 --- a/audio/utils.h +++ b/audio/utils.h @@ -2,81 +2,37 @@ #define AUDIO_UTILS_H #include +#include #if __SSE2__ -#include -#include #define audio_convert_s16_to_float audio_convert_s16_to_float_SSE2 #define audio_convert_float_to_s16 audio_convert_float_to_s16_SSE2 + +void audio_convert_s16_to_float_SSE2(float *out, + const int16_t *in, size_t samples); + +void audio_convert_float_to_s16_SSE2(int16_t *out, + const float *in, size_t samples); + +#elif __ALTIVEC__ +#define audio_convert_s16_to_float audio_convert_s16_to_float_altivec +#define audio_convert_float_to_s16 audio_convert_float_to_s16_altivec + +void audio_convert_s16_to_float_altivec(float *out, + const int16_t *in, size_t samples); + +void audio_convert_float_to_s16_altivec(int16_t *out, + const float *in, size_t samples); + #else #define audio_convert_s16_to_float audio_convert_s16_to_float_C #define audio_convert_float_to_s16 audio_convert_float_to_s16_C #endif -static inline void audio_convert_s16_to_float_C(float *out, - const int16_t *in, unsigned samples) -{ - for (unsigned i = 0; i < samples; i++) - out[i] = (float)in[i] / 0x8000; -} - -static inline void audio_convert_float_to_s16_C(int16_t *out, - const float *in, unsigned samples) -{ - for (unsigned i = 0; i < samples; i++) - { - int32_t val = in[i] * 0x8000; - out[i] = (val > 0x7FFF) ? 0x7FFF : (val < -0x8000 ? -0x8000 : (int16_t)val); - } -} - -#if __SSE2__ -static inline void audio_convert_s16_to_float_SSE2(float *out, - const int16_t *in, unsigned samples) -{ - __m128 factor = _mm_set1_ps(1.0f / (0x7fff * 0x10000)); - unsigned i; - for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8) - { - __m128i input = _mm_loadu_si128((const __m128i *)in); - __m128i regs[2] = { - _mm_unpacklo_epi16(_mm_setzero_si128(), input), - _mm_unpackhi_epi16(_mm_setzero_si128(), input), - }; - - __m128 output[2] = { - _mm_mul_ps(_mm_cvtepi32_ps(regs[0]), factor), - _mm_mul_ps(_mm_cvtepi32_ps(regs[1]), factor), - }; - - _mm_storeu_ps(out + 0, output[0]); - _mm_storeu_ps(out + 4, output[1]); - } - - audio_convert_s16_to_float_C(out, in, samples - i); -} - -static inline void audio_convert_float_to_s16_SSE2(int16_t *out, - const float *in, unsigned samples) -{ - __m128 factor = _mm_set1_ps((float)0x7fff); - unsigned i; - for (i = 0; i + 8 <= samples; i += 8, in += 8, out += 8) - { - __m128 input[2] = { _mm_loadu_ps(in + 0), _mm_loadu_ps(in + 4) }; - __m128 res[2] = { _mm_mul_ps(input[0], factor), _mm_mul_ps(input[1], factor) }; - - __m128i ints[2] = { _mm_cvtps_epi32(res[0]), _mm_cvtps_epi32(res[1]) }; - __m128i packed = _mm_packs_epi32(ints[0], ints[1]); - - _mm_storeu_si128((__m128i *)out, packed); - } - - audio_convert_float_to_s16_C(out, in, samples - i); -} - - -#endif +void audio_convert_s16_to_float_C(float *out, + const int16_t *in, size_t samples); +void audio_convert_float_to_s16_C(int16_t *out, + const float *in, size_t samples); #endif diff --git a/ssnes.c b/ssnes.c index 2a711546b0..e54f6b3416 100644 --- a/ssnes.c +++ b/ssnes.c @@ -26,13 +26,13 @@ #include "file.h" #include "general.h" #include "dynamic.h" +#include "audio/utils.h" #include "record/ffemu.h" #include "rewind.h" #include "movie.h" #include "strl.h" #include "screenshot.h" #include "cheats.h" -#include "audio/utils.h" #include "getopt_ssnes.h" #include