Move code back

This commit is contained in:
libretroadmin 2025-07-15 23:35:37 +02:00
parent 3162b6a438
commit d37068cfb8
9 changed files with 426 additions and 327 deletions

View File

@ -314,8 +314,6 @@ static void gfx_animation_ticker_loop(uint64_t idx,
size_t *offset2, size_t *width2,
size_t *offset3, size_t *width3)
{
int offset2_val;
/* Calculate ticker period and phase */
int ticker_period = (int)(str_width + spacer_width);
int phase = idx % ticker_period;
@ -334,30 +332,42 @@ static void gfx_animation_ticker_loop(uint64_t idx,
* all the string manipulation that has to happen afterwards...
*/
/* Initialize offsets and widths */
*offset1 = (phase < (int)str_width) ? (size_t)phase : 0;
*width1 = (size_t)((phase < (int)str_width) ? (str_width - phase) : 0);
/* String 1 */
int offset = 0;
int width = (int)(str_width - phase);
if (width < 0)
width = 0;
else if ((width > (int)max_width))
width = (int)max_width;
/* Clamp width1 to max_width */
if (*width1 > max_width)
*width1 = max_width;
if (phase < (int)str_width)
offset = phase;
/* Calculate offset2 and width2 */
offset2_val = (int)(phase - str_width);
*offset2 = (offset2_val < 0) ? 0 : (size_t)offset2_val;
*offset1 = offset;
*width1 = width;
*width2 = (size_t)(max_width - *width1);
if (*width2 > spacer_width)
*width2 = spacer_width;
*width2 -= *offset2;
/* String 2 */
offset = (int)(phase - str_width);
if (offset < 0)
offset = 0;
width = (int)(max_width - *width1);
if (width > (int)spacer_width)
width = (int)spacer_width;
width -= offset;
/* Calculate width3 */
*width3 = (size_t)(max_width - (*width1 + *width2));
if (*width3 < 0)
*width3 = 0;
*offset2 = offset;
*width2 = width;
/* offset3 is always zero */
*offset3 = 0;
/* String 3 */
width = (int)(max_width - (*width1 + *width2));
if (width < 0)
width = 0;
/* Note: offset is always zero here so offset3 is
* unnecessary - but include it anyway to preserve
* symmetry... */
*offset3 = 0;
*width3 = width;
}
static unsigned get_ticker_smooth_generic_scroll_offset(

View File

@ -2113,71 +2113,73 @@ void video_viewport_get_scaled_aspect2(struct video_viewport *vp,
float device_aspect, float desired_aspect)
{
settings_t *settings = config_get_ptr();
video_driver_state_t *video_st = &video_driver_st;
int x = 0, y = 0;
float vp_bias_x = settings->floats.video_vp_bias_x;
float vp_bias_y = settings->floats.video_vp_bias_y;
video_driver_state_t
*video_st = &video_driver_st;
int x = 0;
int y = 0;
float vp_bias_x = settings->floats.video_vp_bias_x;
float vp_bias_y = settings->floats.video_vp_bias_y;
#if defined(RARCH_MOBILE)
if (vp_width < vp_height)
{
vp_bias_x = settings->floats.video_vp_bias_portrait_x;
vp_bias_y = settings->floats.video_vp_bias_portrait_y;
vp_bias_x = settings->floats.video_vp_bias_portrait_x;
vp_bias_y = settings->floats.video_vp_bias_portrait_y;
}
#endif
if (!y_down)
vp_bias_y = 1.0f - vp_bias_y;
vp_bias_y = 1.0 - vp_bias_y;
if (settings->uints.video_aspect_ratio_idx == ASPECT_RATIO_CUSTOM)
{
video_viewport_t *custom_vp = &settings->video_vp_custom;
int padding_x = vp_width - custom_vp->width;
int padding_y = vp_height - custom_vp->height;
video_viewport_t
*custom_vp = &settings->video_vp_custom;
int padding_x = 0;
int padding_y = 0;
x = custom_vp->x;
y = custom_vp->y;
x = custom_vp->x;
y = custom_vp->y;
if (!y_down)
y = vp->full_height - (y + custom_vp->height);
/* Adjust padding directly without checking negative */
y = vp->full_height - (y + custom_vp->height);
padding_x += (vp_width - custom_vp->width);
if (padding_x < 0)
padding_x = -padding_x;
padding_x *= 2;
padding_y = vp_height - custom_vp->height;
if (padding_y < 0)
padding_y = -padding_y;
vp_width = custom_vp->width;
vp_height = custom_vp->height;
x += (int)(padding_x * vp_bias_x);
y += (int)(padding_y * vp_bias_y);
padding_y *= 2;
vp_width = custom_vp->width;
vp_height = custom_vp->height;
x += padding_x * vp_bias_x;
y += padding_y * vp_bias_y;
}
else
{
float delta;
float aspect_diff = fabsf(device_aspect - desired_aspect);
if (aspect_diff >= 0.0001f)
if (fabsf(device_aspect - desired_aspect) < 0.0001f)
{
if (device_aspect > desired_aspect)
{
delta = (desired_aspect / device_aspect - 1.0f) / 2.0f + 0.5f;
x += (int)roundf(vp_width * (0.5f - delta) * vp_bias_x * 2.0f);
vp_width = (unsigned)roundf(vp_width * delta * 2.0f);
}
else
{
delta = (device_aspect / desired_aspect - 1.0f) / 2.0f + 0.5f;
y += (int)roundf(vp_height * (0.5f - delta) * vp_bias_y * 2.0f);
vp_height = (unsigned)roundf(vp_height * delta * 2.0f);
}
/* If the aspect ratios of screen and desired aspect
* ratio are sufficiently equal (floating point stuff),
* assume they are actually equal.
*/
}
else if (device_aspect > desired_aspect)
{
delta = (desired_aspect / device_aspect - 1.0f) / 2.0f + 0.5f;
x += (int)roundf(vp_width * ((0.5f - delta) * (vp_bias_x * 2.0f)));
vp_width = (unsigned)roundf(2.0f * vp_width * delta);
}
else
{
delta = (device_aspect / desired_aspect - 1.0f) / 2.0f + 0.5f;
y += (int)roundf(vp_height * ((0.5f - delta) * (vp_bias_y * 2.0f)));
vp_height = (unsigned)roundf(2.0f * vp_height * delta);
}
}
vp->x = x;
vp->y = y;
vp->width = vp_width;
vp->x = x;
vp->y = y;
vp->width = vp_width;
vp->height = vp_height;
/* Statistics */

View File

@ -109,11 +109,13 @@ void convert_float_to_s16(int16_t *s, const float *in, size_t len)
_mm_storeu_si128((__m128i *)s, packed); /* Then put the result in the output array */
}
len -= i;
len = len - i;
i = 0;
/* If there are any stray samples at the end, we need to convert them
* (maybe the original array didn't contain a multiple of 8 samples) */
#elif defined(__ALTIVEC__)
int samples_in = len;
/* Unaligned loads/store is a bit expensive,
* so we optimize for the good path (very likely). */
if (((uintptr_t)s & 15) + ((uintptr_t)in & 15) == 0)
@ -128,11 +130,20 @@ void convert_float_to_s16(int16_t *s, const float *in, size_t len)
vec_st(vec_packs(result0, result1), 0, s);
}
len -= i;
samples_in -= i;
}
len = samples_in;
i = 0;
#elif defined(_MIPS_ARCH_ALLEGREX)
#ifdef DEBUG
/* Make sure the buffers are 16 byte aligned, this should be
* the default behaviour of malloc in the PSPSDK.
* Assume alignment. */
retro_assert(((uintptr_t)in & 0xf) == 0);
retro_assert(((uintptr_t)s & 0xf) == 0);
#endif
for (i = 0; i + 8 <= len; i += 8)
{
__asm__ (

View File

@ -111,9 +111,11 @@ void convert_s16_to_float(float *s,
_mm_storeu_ps(s + 4, output_r);
}
len -= i;
len = len - i;
i = 0;
#elif defined(__ALTIVEC__)
size_t samples_in = len;
/* Unaligned loads/store is a bit expensive, so we
* optimize for the good path (very likely). */
if (((uintptr_t)s & 15) + ((uintptr_t)in & 15) == 0)
@ -133,15 +135,23 @@ void convert_s16_to_float(float *s,
vec_st(out_lo, 16, s);
}
len -= i;
samples_in -= i;
}
len = samples_in;
i = 0;
#endif
gain /= 0x8000;
#if defined(_MIPS_ARCH_ALLEGREX)
#ifdef DEBUG
/* Make sure the buffer is 16 byte aligned, this should be the
* default behaviour of malloc in the PSPSDK.
* Only the output buffer can be assumed to be 16-byte aligned. */
retro_assert(((uintptr_t)s & 0xf) == 0);
#endif
__asm__ (
".set push \n"
".set noreorder \n"

View File

@ -103,43 +103,43 @@ void process_sinc_neon_asm(float *out, const float *left,
*/
static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data *data)
{
rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (re->phase_bits + re->subphase_bits);
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = re->taps;
unsigned taps = resamp->taps;
while (frames)
{
while (frames && re->time >= phases)
while (frames && resamp->time >= phases)
{
/* Push in reverse to make filter more obvious. */
if (!re->ptr)
re->ptr = taps;
re->ptr--;
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
re->buffer_l[re->ptr + taps] =
re->buffer_l[re->ptr ] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
re->buffer_r[re->ptr + taps] =
re->buffer_r[re->ptr ] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
re->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = re->buffer_l + re->ptr;
const float *buffer_r = re->buffer_r + re->ptr;
while (re->time < phases)
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
int i;
unsigned phase = re->time >> re->subphase_bits;
const float *phase_table = re->phase_table + phase * taps * 2;
unsigned phase = resamp->time >> resamp->subphase_bits;
const float *phase_table = resamp->phase_table + phase * taps * 2;
const float *delta_table = phase_table + taps;
float32x4_t delta = vdupq_n_f32((re->time & re->subphase_mask) * re->subphase_mod);
float32x4_t delta = vdupq_n_f32((resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
int i;
float32x4_t p1 = {0, 0, 0, 0}, p2 = {0, 0, 0, 0};
float32x2_t p3, p4;
@ -164,7 +164,7 @@ static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data
vst1_f32(output, vpadd_f32(p3, p4));
output += 2;
out_frames++;
re->time += ratio;
resamp->time += ratio;
}
}
}
@ -176,47 +176,47 @@ static void resampler_sinc_process_neon_kaiser(void *re_, struct resampler_data
/* Assumes that taps >= 8, and that taps is a multiple of 8. */
static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
{
rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (re->phase_bits + re->subphase_bits);
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = re->taps;
unsigned taps = resamp->taps;
while (frames)
{
while (frames && re->time >= phases)
while (frames && resamp->time >= phases)
{
/* Push in reverse to make filter more obvious. */
if (!re->ptr)
re->ptr = taps;
re->ptr--;
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
re->buffer_l[re->ptr + taps] =
re->buffer_l[re->ptr ] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
re->buffer_r[re->ptr + taps] =
re->buffer_r[re->ptr ] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
re->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = re->buffer_l + re->ptr;
const float *buffer_r = re->buffer_r + re->ptr;
while (re->time < phases)
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
unsigned phase = re->time >> re->subphase_bits;
const float *phase_table = re->phase_table + phase * taps;
unsigned phase = resamp->time >> resamp->subphase_bits;
const float *phase_table = resamp->phase_table + phase * taps;
#ifdef HAVE_ARM_NEON_ASM_OPTIMIZATIONS
process_sinc_neon_asm(output, buffer_l, buffer_r, phase_table, taps);
#else
int i;
float32x2_t p3, p4;
float32x4_t p1 = {0, 0, 0, 0}, p2 = {0, 0, 0, 0};
float32x2_t p3, p4;
for (i = 0; i < (int)taps; i += 8)
{
@ -234,9 +234,9 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
p4 = vadd_f32(vget_low_f32(p2), vget_high_f32(p2));
vst1_f32(output, vpadd_f32(p3, p4));
#endif
output += 2;
out_frames++;
output += 2;
re->time += ratio;
resamp->time += ratio;
}
}
}
@ -248,47 +248,49 @@ static void resampler_sinc_process_neon(void *re_, struct resampler_data *data)
#if defined(__AVX__)
static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *data)
{
rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (re->phase_bits + re->subphase_bits);
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = re->taps;
unsigned taps = resamp->taps;
{
while (frames)
{
while (frames && re->time >= phases)
while (frames && resamp->time >= phases)
{
/* Push in reverse to make filter more obvious. */
if (!re->ptr)
re->ptr = taps;
re->ptr--;
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
re->buffer_l[re->ptr + taps] =
re->buffer_l[re->ptr ] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
re->buffer_r[re->ptr + taps] =
re->buffer_r[re->ptr ] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
re->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = re->buffer_l + re->ptr;
const float *buffer_r = re->buffer_r + re->ptr;
while (re->time < phases)
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
int i;
unsigned phase = re->time >> re->subphase_bits;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = re->phase_table + phase * taps * 2;
float *delta_table = phase_table + taps;
float *phase_table = resamp->phase_table + phase * taps * 2;
float *delta_table = phase_table + taps;
__m256 delta = _mm256_set1_ps((float)
(re->time & re->subphase_mask) * re->subphase_mod);
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
__m256 sum_l = _mm256_setzero_ps();
__m256 sum_r = _mm256_setzero_ps();
@ -318,56 +320,58 @@ static void resampler_sinc_process_avx_kaiser(void *re_, struct resampler_data *
_mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
_mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));
output += 2;
out_frames++;
output += 2;
re->time += ratio;
resamp->time += ratio;
}
}
}
}
data->output_frames = out_frames;
}
static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
{
rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (re->phase_bits + re->subphase_bits);
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = re->taps;
unsigned taps = resamp->taps;
{
while (frames)
{
while (frames && re->time >= phases)
while (frames && resamp->time >= phases)
{
/* Push in reverse to make filter more obvious. */
if (!re->ptr)
re->ptr = taps;
re->ptr--;
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
re->buffer_l[re->ptr + taps] =
re->buffer_l[re->ptr ] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
re->buffer_r[re->ptr + taps] =
re->buffer_r[re->ptr ] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
re->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = re->buffer_l + re->ptr;
const float *buffer_r = re->buffer_r + re->ptr;
while (re->time < phases)
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
int i;
__m256 delta;
unsigned phase = re->time >> re->subphase_bits;
float *phase_table = re->phase_table + phase * taps;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps;
__m256 sum_l = _mm256_setzero_ps();
__m256 sum_r = _mm256_setzero_ps();
@ -396,12 +400,13 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
_mm_store_ss(output + 0, _mm256_extractf128_ps(res_l, 0));
_mm_store_ss(output + 1, _mm256_extractf128_ps(res_r, 0));
output += 2;
out_frames++;
output += 2;
re->time += ratio;
resamp->time += ratio;
}
}
}
}
data->output_frames = out_frames;
}
@ -410,47 +415,48 @@ static void resampler_sinc_process_avx(void *re_, struct resampler_data *data)
#if defined(__SSE__)
static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *data)
{
rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (re->phase_bits + re->subphase_bits);
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = re->taps;
unsigned taps = resamp->taps;
{
while (frames)
{
while (frames && re->time >= phases)
while (frames && resamp->time >= phases)
{
/* Push in reverse to make filter more obvious. */
if (!re->ptr)
re->ptr = taps;
re->ptr--;
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
re->buffer_l[re->ptr + taps] =
re->buffer_l[re->ptr ] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
re->buffer_r[re->ptr + taps] =
re->buffer_r[re->ptr ] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
re->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = re->buffer_l + re->ptr;
const float *buffer_r = re->buffer_r + re->ptr;
while (re->time < phases)
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
int i;
__m128 sum;
unsigned phase = re->time >> re->subphase_bits;
float *phase_table = re->phase_table + phase * taps * 2;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps * 2;
float *delta_table = phase_table + taps;
__m128 delta = _mm_set1_ps((float)
(re->time & re->subphase_mask) * re->subphase_mod);
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod);
__m128 sum_l = _mm_setzero_ps();
__m128 sum_r = _mm_setzero_ps();
@ -491,56 +497,58 @@ static void resampler_sinc_process_sse_kaiser(void *re_, struct resampler_data *
/* movehl { X, R, X, L } == { X, R, X, R } */
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));
output += 2;
out_frames++;
output += 2;
re->time += ratio;
resamp->time += ratio;
}
}
}
}
data->output_frames = out_frames;
}
static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
{
rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (re->phase_bits + re->subphase_bits);
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = re->taps;
unsigned taps = resamp->taps;
{
while (frames)
{
while (frames && re->time >= phases)
while (frames && resamp->time >= phases)
{
/* Push in reverse to make filter more obvious. */
if (!re->ptr)
re->ptr = taps;
re->ptr--;
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
re->buffer_l[re->ptr + taps] =
re->buffer_l[re->ptr ] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
re->buffer_r[re->ptr + taps] =
re->buffer_r[re->ptr ] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
re->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = re->buffer_l + re->ptr;
const float *buffer_r = re->buffer_r + re->ptr;
while (re->time < phases)
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
int i;
__m128 sum;
unsigned phase = re->time >> re->subphase_bits;
float *phase_table = re->phase_table + phase * taps;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps;
__m128 sum_l = _mm_setzero_ps();
__m128 sum_r = _mm_setzero_ps();
@ -559,9 +567,9 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
* sum_r = { r3, r2, r1, r0 }
*/
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
_MM_SHUFFLE(1, 0, 1, 0)),
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
sum = _mm_add_ps(_mm_shuffle_ps(sum_l, sum_r,
_MM_SHUFFLE(1, 0, 1, 0)),
_mm_shuffle_ps(sum_l, sum_r, _MM_SHUFFLE(3, 2, 3, 2)));
/* sum = { r1, r0, l1, l0 } + { r3, r2, l3, l2 }
* sum = { R1, R0, L1, L0 }
@ -579,12 +587,13 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
/* movehl { X, R, X, L } == { X, R, X, R } */
_mm_store_ss(output + 1, _mm_movehl_ps(sum, sum));
output += 2;
out_frames++;
output += 2;
re->time += ratio;
resamp->time += ratio;
}
}
}
}
data->output_frames = out_frames;
}
@ -592,48 +601,49 @@ static void resampler_sinc_process_sse(void *re_, struct resampler_data *data)
static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *data)
{
rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (re->phase_bits + re->subphase_bits);
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = re->taps;
unsigned taps = resamp->taps;
while (frames)
{
while (frames && re->time >= phases)
while (frames)
{
while (frames && resamp->time >= phases)
{
/* Push in reverse to make filter more obvious. */
if (!re->ptr)
re->ptr = taps;
re->ptr--;
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
re->buffer_l[re->ptr + taps] =
re->buffer_l[re->ptr ] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
re->buffer_r[re->ptr + taps] =
re->buffer_r[re->ptr ] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
re->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = re->buffer_l + re->ptr;
const float *buffer_r = re->buffer_r + re->ptr;
while (re->time < phases)
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
int i;
float sum_l = 0.0f;
float sum_r = 0.0f;
unsigned phase = re->time >> re->subphase_bits;
float *phase_table = re->phase_table + phase * taps * 2;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps * 2;
float *delta_table = phase_table + taps;
float delta = (float)
(re->time & re->subphase_mask) * re->subphase_mod;
(resamp->time & resamp->subphase_mask) * resamp->subphase_mod;
for (i = 0; i < (int)taps; i++)
{
@ -648,9 +658,11 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da
output += 2;
out_frames++;
re->time += ratio;
resamp->time += ratio;
}
}
}
}
data->output_frames = out_frames;
@ -658,45 +670,46 @@ static void resampler_sinc_process_c_kaiser(void *re_, struct resampler_data *da
static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
{
rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (re->phase_bits + re->subphase_bits);
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)re_;
unsigned phases = 1 << (resamp->phase_bits + resamp->subphase_bits);
uint32_t ratio = phases / data->ratio;
const float *input = data->data_in;
float *output = data->data_out;
size_t frames = data->input_frames;
size_t out_frames = 0;
unsigned taps = re->taps;
unsigned taps = resamp->taps;
{
while (frames)
{
while (frames && re->time >= phases)
while (frames && resamp->time >= phases)
{
/* Push in reverse to make filter more obvious. */
if (!re->ptr)
re->ptr = taps;
re->ptr--;
if (!resamp->ptr)
resamp->ptr = taps;
resamp->ptr--;
re->buffer_l[re->ptr + taps] =
re->buffer_l[re->ptr ] = *input++;
resamp->buffer_l[resamp->ptr + taps] =
resamp->buffer_l[resamp->ptr] = *input++;
re->buffer_r[re->ptr + taps] =
re->buffer_r[re->ptr ] = *input++;
resamp->buffer_r[resamp->ptr + taps] =
resamp->buffer_r[resamp->ptr] = *input++;
re->time -= phases;
resamp->time -= phases;
frames--;
}
{
const float *buffer_l = re->buffer_l + re->ptr;
const float *buffer_r = re->buffer_r + re->ptr;
while (re->time < phases)
const float *buffer_l = resamp->buffer_l + resamp->ptr;
const float *buffer_r = resamp->buffer_r + resamp->ptr;
while (resamp->time < phases)
{
int i;
float sum_l = 0.0f;
float sum_r = 0.0f;
unsigned phase = re->time >> re->subphase_bits;
float *phase_table = re->phase_table + phase * taps;
unsigned phase = resamp->time >> resamp->subphase_bits;
float *phase_table = resamp->phase_table + phase * taps;
for (i = 0; i < (int)taps; i++)
{
@ -711,30 +724,31 @@ static void resampler_sinc_process_c(void *re_, struct resampler_data *data)
output += 2;
out_frames++;
re->time += ratio;
resamp->time += ratio;
}
}
}
}
data->output_frames = out_frames;
}
static void resampler_sinc_free(void *data)
{
rarch_sinc_resampler_t *re = (rarch_sinc_resampler_t*)data;
if (re)
memalign_free(re->main_buffer);
free(re);
rarch_sinc_resampler_t *resamp = (rarch_sinc_resampler_t*)data;
if (resamp)
memalign_free(resamp->main_buffer);
free(resamp);
}
static void sinc_init_table_kaiser(rarch_sinc_resampler_t *re,
static void sinc_init_table_kaiser(rarch_sinc_resampler_t *resamp,
double cutoff,
float *phase_table, int phases, int taps, bool calculate_delta)
{
int i, j;
/* Kaiser window function - need to normalize w(0) to 1.0f */
float kaiser_beta = re->kaiser_beta;
float kaiser_beta = resamp->kaiser_beta;
double window_mod = besseli0(kaiser_beta);
int stride = calculate_delta ? 2 : 1;
double sidelobes = taps / 2.0;
@ -743,10 +757,13 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *re,
{
for (j = 0; j < taps; j++)
{
float val;
double sinc_phase;
int n = j * phases + i;
double window_phase = 2.0 * ((double)n / (phases * taps)) - 1.0;
double sinc_phase = sidelobes * window_phase;
float val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
double window_phase = (double)n / (phases * taps); /* [0, 1). */
window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */
sinc_phase = sidelobes * window_phase;
val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
besseli0(kaiser_beta * sqrtf(1 - window_phase * window_phase))
/ window_mod;
phase_table[i * stride * taps + j] = val;
@ -771,20 +788,24 @@ static void sinc_init_table_kaiser(rarch_sinc_resampler_t *re,
phase = phases - 1;
for (j = 0; j < taps; j++)
{
float val, delta;
double sinc_phase;
int n = j * phases + (phase + 1);
double window_phase = 2.0 * ((double)n / (phases * taps)) - 1.0;
double sinc_phase = sidelobes * window_phase;
float val = cutoff * sinc(M_PI * sinc_phase * cutoff)
* besseli0(re->kaiser_beta * sqrtf(1 - window_phase
* window_phase)) / window_mod;
float delta = (val - phase_table[phase * stride * taps + j]);
double window_phase = (double)n / (phases * taps); /* (0, 1]. */
window_phase = 2.0 * window_phase - 1.0; /* (-1, 1] */
sinc_phase = sidelobes * window_phase;
val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
besseli0(resamp->kaiser_beta * sqrtf(1 - window_phase *
window_phase)) / window_mod;
delta = (val - phase_table[phase * stride * taps + j]);
phase_table[(phase * stride + 1) * taps + j] = delta;
}
}
}
static void sinc_init_table_lanczos(
rarch_sinc_resampler_t *re, double cutoff,
rarch_sinc_resampler_t *resamp, double cutoff,
float *phase_table, int phases, int taps, bool calculate_delta)
{
int i, j;
@ -797,10 +818,13 @@ static void sinc_init_table_lanczos(
{
for (j = 0; j < taps; j++)
{
double sinc_phase;
float val;
int n = j * phases + i;
double window_phase = 2.0 * ((double)n / (phases * taps)) - 1.0; /* [-1, 1) */
double sinc_phase = sidelobes * window_phase;
float val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
double window_phase = (double)n / (phases * taps); /* [0, 1). */
window_phase = 2.0 * window_phase - 1.0; /* [-1, 1) */
sinc_phase = sidelobes * window_phase;
val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
sinc(M_PI * window_phase) / window_mod;
phase_table[i * stride * taps + j] = val;
}
@ -824,12 +848,16 @@ static void sinc_init_table_lanczos(
phase = phases - 1;
for (j = 0; j < taps; j++)
{
float val, delta;
double sinc_phase;
int n = j * phases + (phase + 1);
double window_phase = 2.0 * ((double)n / (phases * taps)) - 1.0;
double sinc_phase = sidelobes * window_phase;
float val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
double window_phase = (double)n / (phases * taps); /* (0, 1]. */
window_phase = 2.0 * window_phase - 1.0; /* (-1, 1] */
sinc_phase = sidelobes * window_phase;
val = cutoff * sinc(M_PI * sinc_phase * cutoff) *
sinc(M_PI * window_phase) / window_mod;
float delta = (val - phase_table[phase * stride * taps + j]);
delta = (val - phase_table[phase * stride * taps + j]);
phase_table[(phase * stride + 1) * taps + j] = delta;
}
}

View File

@ -20,14 +20,14 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
/* Implementation of strlcpy()/strlcat() based on OpenBSD. */
#ifndef __MACH__
#include <stdlib.h>
#include <ctype.h>
#include <compat/strl.h>
/* Implementation of strlcpy()/strlcat() based on OpenBSD. */
#ifndef __MACH__
size_t strlcpy(char *s, const char *source, size_t len)
{
size_t _len = len;
@ -52,4 +52,4 @@ size_t strlcat(char *s, const char *source, size_t len)
len -= _len;
return _len + strlcpy(s, source, len);
}
#endif
#endif

View File

@ -353,25 +353,10 @@ size_t fill_pathname(char *s, const char *in_path,
**/
char *find_last_slash(const char *str)
{
const char *p;
const char *last_slash = NULL;
const char *last_backslash = NULL;
/* Traverse the string once */
for (p = str; *p != '\0'; ++p)
{
if (*p == '/')
last_slash = p; /* Update last forward slash */
else if (*p == '\\')
last_backslash = p; /* Update last backslash */
}
/* Determine which one is last */
if (!last_slash) /* Backslash */
return (char*)last_backslash;
if (!last_backslash) /* Forward slash */
return (char*)last_slash;
return (last_backslash > last_slash) ? (char*)last_backslash : (char*)last_slash;
const char *slash = strrchr(str, '/');
const char *backslash = strrchr(str, '\\');
char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
return last_slash;
}
/**
@ -384,15 +369,14 @@ char *find_last_slash(const char *str)
**/
size_t fill_pathname_slash(char *s, size_t len)
{
char *last_slash = find_last_slash(s);
len = strlen(s);
const char *slash = strrchr(s, '/');
const char *backslash = strrchr(s, '\\');
char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
if (!last_slash)
{
s[ len] = PATH_DEFAULT_SLASH_C();
s[++len] = '\0';
}
return strlcat(s, PATH_DEFAULT_SLASH(), len);
len = strlen(s);
/* Try to preserve slash type. */
else if (last_slash != (s + len - 1))
if (last_slash != (s + len - 1))
{
s[ len] = last_slash[0];
s[++len] = '\0';
@ -478,12 +462,16 @@ size_t fill_pathname_parent_dir_name(char *s, const char *in_dir, size_t len)
{
size_t _len = 0;
char *tmp = strdup(in_dir);
char *last_slash = find_last_slash(tmp);
const char *slash = strrchr(tmp, '/');
const char *backslash = strrchr(tmp, '\\');
char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
if (last_slash && last_slash[1] == 0)
{
*last_slash = '\0';
last_slash = find_last_slash(tmp);
slash = strrchr(tmp, '/');
backslash = strrchr(tmp, '\\');
last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
}
/* Cut the last part of the string (the filename) after the slash,
@ -494,7 +482,9 @@ size_t fill_pathname_parent_dir_name(char *s, const char *in_dir, size_t len)
/* Point in_dir to the address of the last slash.
* If in_dir is NULL, it means there was no slash in tmp,
* so use tmp as-is. */
in_dir = find_last_slash(tmp);
slash = strrchr(tmp, '/');
backslash = strrchr(tmp, '\\');
in_dir = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
if (!in_dir)
in_dir = tmp;
@ -602,19 +592,23 @@ size_t fill_str_dated_filename(char *s,
**/
size_t path_basedir(char *s)
{
const char *slash;
const char *backslash;
char *last_slash = NULL;
if (!s || s[0] == '\0' || s[1] == '\0')
return (s && s[0] != '\0') ? 1 : 0;
last_slash = find_last_slash(s);
if (!last_slash)
slash = strrchr(s, '/');
backslash = strrchr(s, '\\');
last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
if (last_slash)
{
s[0] = '.';
s[1] = PATH_DEFAULT_SLASH_C();
s[2] = '\0';
return 2;
last_slash[1] = '\0';
return last_slash + 1 - s;
}
last_slash[1] = '\0';
return last_slash + 1 - s;
s[0] = '.';
s[1] = PATH_DEFAULT_SLASH_C();
s[2] = '\0';
return 2;
}
/**
@ -636,9 +630,15 @@ size_t path_parent_dir(char *s, size_t len)
if (len && PATH_CHAR_IS_SLASH(s[len - 1]))
{
char *last_slash;
const char *slash;
const char *backslash;
bool was_absolute = path_is_absolute(s);
s[len - 1] = '\0';
last_slash = find_last_slash(s);
slash = strrchr(s, '/');
backslash = strrchr(s, '\\');
last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
if (was_absolute && !last_slash)
{
@ -667,7 +667,9 @@ const char *path_basename(const char *path)
/* We cut either at the first compression-related hash,
* or we cut at the last slash */
const char *ptr = NULL;
char *last_slash = find_last_slash(path);
const char *slash = strrchr(path, '/');
const char *backslash = strrchr(path, '\\');
char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
return ((ptr = path_get_archive_delim(path)) || (ptr = last_slash))
? (ptr + 1) : path;
}
@ -685,7 +687,9 @@ const char *path_basename(const char *path)
const char *path_basename_nocompression(const char *path)
{
/* We cut at the last slash */
char *last_slash = find_last_slash(path);
const char *slash = strrchr(path, '/');
const char *backslash = strrchr(path, '\\');
char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
return (last_slash) ? (last_slash + 1) : path;
}
@ -995,18 +999,22 @@ size_t fill_pathname_join_special(char *s,
if (*s)
{
char *last_slash = find_last_slash(s);
if (!last_slash)
const char *slash = strrchr(s, '/');
const char *backslash = strrchr(s, '\\');
char *last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
if (last_slash)
{
s[ _len] = PATH_DEFAULT_SLASH_C();
s[++_len] = '\0';
/* Try to preserve slash type. */
if (last_slash != (s + _len - 1))
{
s[ _len] = last_slash[0];
s[++_len] = '\0';
}
}
/* Try to preserve slash type. */
else if (last_slash != (s + _len - 1))
else
{
s[ _len] = last_slash[0];
s[++_len] = '\0';
s[ _len] = PATH_DEFAULT_SLASH_C();
s[++_len] = '\0';
}
}
@ -1319,6 +1327,8 @@ size_t fill_pathname_abbreviated_or_relative(char *s,
**/
void path_basedir_wrapper(char *s)
{
const char *slash;
const char *backslash;
char *last_slash = NULL;
if (!s || s[0] == '\0' || s[1] == '\0')
return;
@ -1327,7 +1337,9 @@ void path_basedir_wrapper(char *s)
if ((last_slash = (char*)path_get_archive_delim(s)))
*last_slash = '\0';
#endif
last_slash = find_last_slash(s);
slash = strrchr(s, '/');
backslash = strrchr(s, '\\');
last_slash = (!slash || (backslash > slash)) ? (char*)backslash : (char*)slash;
if (last_slash)
last_slash[1] = '\0';
else

View File

@ -628,6 +628,7 @@ void path_basedir_wrapper(char *s);
* if not already there.
* Hidden non-leaf function cost:
* - can call strlcat once if it returns false
* - calls strlen
**/
size_t fill_pathname_slash(char *s, size_t len);

View File

@ -57,17 +57,16 @@ static bool file_list_deinitialize_internal(file_list_t *list)
bool file_list_reserve(file_list_t *list, size_t nitems)
{
struct item_file *new_data;
const size_t item_size = sizeof(struct item_file);
struct item_file *new_data;
if (nitems < list->capacity || nitems > (size_t)-1 / item_size)
if (nitems < list->capacity || nitems > (size_t)-1/item_size)
return false;
/* Allocate the new memory block */
if (!(new_data = (struct item_file*)realloc(list->list, nitems * item_size)))
return false;
memset(new_data + list->capacity, 0, (nitems - list->capacity) * item_size);
memset(&new_data[list->capacity], 0, item_size * (nitems - list->capacity));
list->list = new_data;
list->capacity = nitems;
@ -78,31 +77,57 @@ bool file_list_reserve(file_list_t *list, size_t nitems)
bool file_list_insert(file_list_t *list,
const char *path, const char *label,
unsigned type, size_t directory_ptr,
size_t entry_idx, size_t idx)
size_t entry_idx,
size_t idx)
{
struct item_file *new_item;
int i;
/* Expand file list if needed */
if ( list->size >= list->capacity
&& !file_list_reserve(list, list->capacity * 2 + 1))
return false;
if (list->size >= list->capacity)
if (!file_list_reserve(list, list->capacity * 2 + 1))
return false;
/* Shift elements to make room for the new item */
memmove(&list->list[idx + 1],
&list->list[idx],
(list->size - idx) * sizeof(struct item_file));
for (i = (unsigned)list->size; i > (int)idx; i--)
{
struct item_file *copy = (struct item_file*)
malloc(sizeof(struct item_file));
/* Initialize the new item */
new_item = &list->list[idx];
new_item->path = path ? strdup(path) : NULL;
new_item->label = label ? strdup(label) : NULL;
new_item->alt = NULL;
new_item->type = type;
new_item->directory_ptr = directory_ptr;
new_item->entry_idx = entry_idx;
new_item->userdata = NULL;
new_item->actiondata = NULL;
if (copy)
{
copy->path = NULL;
copy->label = NULL;
copy->alt = NULL;
copy->type = 0;
copy->directory_ptr = 0;
copy->entry_idx = 0;
copy->userdata = NULL;
copy->actiondata = NULL;
memcpy(copy, &list->list[i-1], sizeof(struct item_file));
memcpy(&list->list[i-1], &list->list[i], sizeof(struct item_file));
memcpy(&list->list[i], copy, sizeof(struct item_file));
free(copy);
}
}
list->list[idx].path = NULL;
list->list[idx].label = NULL;
list->list[idx].alt = NULL;
list->list[idx].type = type;
list->list[idx].directory_ptr = directory_ptr;
list->list[idx].entry_idx = entry_idx;
list->list[idx].userdata = NULL;
list->list[idx].actiondata = NULL;
if (label)
list->list[idx].label = strdup(label);
if (path)
list->list[idx].path = strdup(path);
list->size++;
return true;
}