diff --git a/android/native/jni/Android.mk b/android/native/jni/Android.mk index de472c0a25..b36ac2b57f 100644 --- a/android/native/jni/Android.mk +++ b/android/native/jni/Android.mk @@ -20,6 +20,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) ifeq ($(HAVE_NEON),1) LOCAL_CFLAGS += -DHAVE_NEON +LOCAL_SRC_FILES += ../../../audio/utils_neon.S.neon endif ifeq ($(HAVE_SINC),1) diff --git a/audio/utils.c b/audio/utils.c index 4fa8a3f89e..1781a4a06a 100644 --- a/audio/utils.c +++ b/audio/utils.c @@ -133,6 +133,30 @@ void audio_convert_float_to_s16_altivec(int16_t *out, else audio_convert_float_to_s16_C(out, in, samples); } +#elif HAVE_NEON +void audio_convert_s16_float_asm(float *out, const int16_t *in, size_t samples); +void audio_convert_s16_to_float_neon(float *out, const int16_t *in, size_t samples, + float gain) +{ + (void)gain; // gain is ignored for now. + size_t aligned_samples = samples & 7; + audio_convert_s16_float_asm(out, in, aligned_samples); + + // Could do all conversion in ASM, but keep it simple for now. + audio_convert_s16_to_float_C(out + aligned_samples, in + aligned_samples, + samples - aligned_samples); +} + +void audio_convert_float_s16_asm(int16_t *out, const float *in, size_t samples); +void audio_convert_float_to_s16_neon(int16_t *out, const float *in, size_t samples) +{ + (void)gain; // gain is ignored for now. + + size_t aligned_samples = samples & 7; + audio_convert_float_s16_asm(out, in, aligned_samples); + audio_convert_float_to_s16_C(out + aligned_samples, in + aligned_samples, + samples - aligned_samples); +} #endif diff --git a/audio/utils.h b/audio/utils.h index 26238881cc..ac7ca65caf 100644 --- a/audio/utils.h +++ b/audio/utils.h @@ -39,6 +39,16 @@ void audio_convert_s16_to_float_altivec(float *out, void audio_convert_float_to_s16_altivec(int16_t *out, const float *in, size_t samples); +#elif defined(HAVE_NEON) +#define audio_convert_s16_to_float audio_convert_s16_to_float_neon +#define audio_convert_float_to_s16 audio_convert_float_to_s16_neon + +void audio_convert_s16_to_float_neon(float *out, + const int16_t *in, size_t samples, float gain); + +void audio_convert_float_to_s16_neon(int16_t *out, + const float *in, size_t samples); + #else #define audio_convert_s16_to_float audio_convert_s16_to_float_C #define audio_convert_float_to_s16 audio_convert_float_to_s16_C diff --git a/audio/utils_neon.S b/audio/utils_neon.S new file mode 100644 index 0000000000..c4834a9249 --- /dev/null +++ b/audio/utils_neon.S @@ -0,0 +1,86 @@ +/* RetroArch - A frontend for libretro. + * Copyright (C) 2010-2012 - Hans-Kristian Arntzen + * + * RetroArch is free software: you can redistribute it and/or modify it under the terms + * of the GNU General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * RetroArch is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with RetroArch. + * If not, see . + */ + +.arm + +.align 4 +.global audio_convert_s16_float_asm +# audio_convert_s16_float_asm(float *out, const int16_t *in, size_t samples) +audio_convert_s16_float_asm: + # Hacky way to get a constant of 2^-15. + # Might be faster to just load a constant from memory. + # It's just done once however ... + vmov.f32 q5, #0.25 + vmul.f32 q5, q5, q5 + vmul.f32 q5, q5, q5 + vmul.f32 q5, q5, q5 + vadd.f32 q5, q5, q5 + +1: + # Preload here? + vld1.s16 {q0}, [r1]! + + # Widen to 32-bit + vmovl.s16 q1, d0 + vmovl.s16 q2, d1 + + # Convert to float + vcvt.f32.s32 q3, q1 + vcvt.f32.s32 q4, q2 + + vmul.f32 q3, q3, q5 + vmul.f32 q4, q4, q5 + + vst1.f32 {q3-q4}, [r0]! + + # Guaranteed to get samples in multiples of 8. + subs r2, r2, #8 + bne 1b + + bx lr + +.align 4 +.global audio_convert_float_s16_asm +# audio_convert_float_s16_asm(int16_t *out, const float *in, size_t samples) +audio_convert_float_s16_asm: + # Hacky way to get a constant of 2^15. + # ((2^4)^2)^2 * 0.5 = 2^15 + vmov.f32 q5, #16.0 + vmov.f32 q6, #0.5 + vmul.f32 q5, q5, q5 + vmul.f32 q5, q5, q5 + vmul.f32 q5, q5, q6 + +1: + # Preload here? + vld1.f32 {q0-q1}, [r1]! + + vmul.f32 q0, q0, q5 + vmul.f32 q1, q1, q5 + + vcvt.s32.f32 q0, q0 + vcvt.s32.f32 q1, q1 + + vqmovn.s32 d4, q0 + vqmovn.s32 d5, q1 + + vst1.f32 {d4-d5}, [r0]! + + # Guaranteed to get samples in multiples of 8. + subs r2, r2, #8 + bne 1b + + bx lr +