diff --git a/audio/sinc.c b/audio/sinc.c index 145a69ddc3..fb912a3371 100644 --- a/audio/sinc.c +++ b/audio/sinc.c @@ -224,15 +224,13 @@ static void process_sinc(rarch_sinc_resampler_t *resamp, float *out_buffer) } #elif defined(HAVE_NEON) -#if TAPS != 16 -#error "NEON sinc is for now only implemented with 16 taps. Cannot continue." -#endif - // Need to make this function pointer as Android doesn't have built-in targets // for NEON and plain ARMv7a. static void (*process_sinc_func)(rarch_sinc_resampler_t *resamp, float *out_buffer); -void process_sinc_neon_asm(float *out, const float *left, const float *right, const float *coeff); +// Assumes that taps >= 8, and that taps is a multiple of 8. +void process_sinc_neon_asm(float *out, const float *left, const float *right, const float *coeff, unsigned taps); + static void process_sinc_neon(rarch_sinc_resampler_t *resamp, float *out_buffer) { const float *buffer_l = resamp->buffer_l + resamp->ptr; @@ -241,7 +239,7 @@ static void process_sinc_neon(rarch_sinc_resampler_t *resamp, float *out_buffer) unsigned phase = resamp->time >> SUBPHASE_BITS; const float *phase_table = resamp->phase_table[phase]; - process_sinc_neon_asm(out_buffer, buffer_l, buffer_r, phase_table); + process_sinc_neon_asm(out_buffer, buffer_l, buffer_r, phase_table, TAPS); } #else // Plain ol' C99 #define process_sinc_func process_sinc_C diff --git a/audio/sinc_neon.S b/audio/sinc_neon.S index 0db4a4e3d0..d32ebf7a7b 100644 --- a/audio/sinc_neon.S +++ b/audio/sinc_neon.S @@ -1,5 +1,5 @@ /* RetroArch - A frontend for libretro. - * Copyright (C) 2010-2012 - Hans-Kristian Arntzen + * Copyright (C) 2010-2013 - Hans-Kristian Arntzen * * RetroArch is free software: you can redistribute it and/or modify it under the terms * of the GNU General Public License as published by the Free Software Found- @@ -16,37 +16,39 @@ .arm .align 4 .global process_sinc_neon_asm -# void process_sinc_neon(float *out, const float *left, const float *right, const float *coeff) -# Hardcoded to 16 taps. +# void process_sinc_neon(float *out, const float *left, const float *right, const float *coeff, unsigned taps) +# Assumes taps is >= 8, and a multiple of 8. process_sinc_neon_asm: + + push {r4, lr} + vmov.f32 q0, #0.0 + vmov.f32 q8, #0.0 + + # Taps argument (r4) goes on stack in armeabi. + ldr r4, [sp, #8] + +1: # Left - vld1.f32 {q0-q1}, [r1]! - vld1.f32 {q2-q3}, [r1]! + vld1.f32 {q2-q3}, [r1]! # Right - vld1.f32 {q8-q9}, [r2]! vld1.f32 {q10-q11}, [r2]! # Coeff vld1.f32 {q12-q13}, [r3, :128]! - vld1.f32 {q14-q15}, [r3, :128]! - # Left - vmul.f32 q0, q0, q12 - vmul.f32 q1, q1, q13 - vmla.f32 q0, q2, q14 - vmla.f32 q1, q3, q15 + # Left / Right + vmla.f32 q0, q2, q12 + vmla.f32 q8, q10, q12 + vmla.f32 q0, q3, q13 + vmla.f32 q8, q11, q13 - # Right - vmul.f32 q8, q8, q12 - vmul.f32 q9, q9, q13 - vmla.f32 q8, q10, q14 - vmla.f32 q9, q11, q15 + subs r4, r4, #8 + bne 1b # Add everything together - vadd.f32 q0, q0, q1 - vadd.f32 q8, q8, q9 vadd.f32 d0, d0, d1 vadd.f32 d16, d16, d17 vpadd.f32 d0, d0, d16 vst1.f32 d0, [r0] - bx lr + pop {r4, pc} + diff --git a/audio/utils_neon.S b/audio/utils_neon.S index 4c45cd5235..4c7ef7c6f2 100644 --- a/audio/utils_neon.S +++ b/audio/utils_neon.S @@ -1,5 +1,5 @@ /* RetroArch - A frontend for libretro. - * Copyright (C) 2010-2012 - Hans-Kristian Arntzen + * Copyright (C) 2010-2013 - Hans-Kristian Arntzen * * RetroArch is free software: you can redistribute it and/or modify it under the terms * of the GNU General Public License as published by the Free Software Found-