project64/Source/Project64-rsp-core/Hle/alist.cpp

966 lines
29 KiB
C++

// Project64 - A Nintendo 64 emulator
// https://www.pj64-emu.com/
// Copyright(C) 2001-2021 Project64
// Copyright(C) 2014 Bobby Smiles
// Copyright(C) 2009 Richard Goedeken
// Copyright(C) 2002 Hacktarux
// GNU/GPLv2 licensed: https://gnu.org/licenses/gpl-2.0.html
#include "alist.h"
#include "arithmetics.h"
#include "audio.h"
#include "hle.h"
#include "mem.h"
#include <memory.h>
#include <stdint.h>
struct ramp_t
{
int64_t value;
int64_t step;
int64_t target;
};
// Local functions
static void swap(int16_t ** a, int16_t ** b)
{
int16_t * tmp = *b;
*b = *a;
*a = tmp;
}
static int16_t * sample(CHle * hle, unsigned pos)
{
return (int16_t *)hle->alist_buffer() + (pos ^ S);
}
static uint8_t * alist_u8(CHle * hle, uint16_t dmem)
{
return u8(hle->alist_buffer(), dmem);
}
static int16_t * alist_s16(CHle * hle, uint16_t dmem)
{
return (int16_t *)u16(hle->alist_buffer(), dmem);
}
static void sample_mix(int16_t * dst, int16_t src, int16_t gain)
{
*dst = clamp_s16(*dst + ((src * gain) >> 15));
}
static void alist_envmix_mix(size_t n, int16_t ** dst, const int16_t * gains, int16_t src)
{
size_t i;
for (i = 0; i < n; ++i)
{
sample_mix(dst[i], src, gains[i]);
}
}
static int16_t ramp_step(struct ramp_t * ramp)
{
bool target_reached;
ramp->value += ramp->step;
target_reached = (ramp->step <= 0) ? (ramp->value <= ramp->target) : (ramp->value >= ramp->target);
if (target_reached)
{
ramp->value = ramp->target;
ramp->step = 0;
}
return (int16_t)(ramp->value >> 16);
}
// Global functions
void alist_process(CHle * hle, const acmd_callback_t abi[], unsigned int abi_size)
{
uint32_t w1, w2;
unsigned int acmd;
const uint32_t * alist = dram_u32(hle, *dmem_u32(hle, TASK_DATA_PTR));
const uint32_t * const alist_end = alist + (*dmem_u32(hle, TASK_DATA_SIZE) >> 2);
while (alist != alist_end)
{
w1 = *(alist++);
w2 = *(alist++);
acmd = (w1 >> 24) & 0x7f;
if (acmd < abi_size)
{
(*abi[acmd])(hle, w1, w2);
}
else
{
hle->WarnMessage("Invalid ABI command %u", acmd);
}
}
}
uint32_t alist_get_address(CHle * hle, uint32_t so, const uint32_t * segments, size_t n)
{
uint8_t segment = (so >> 24) & 0x3f;
uint32_t offset = (so & 0xffffff);
if (segment >= n)
{
hle->WarnMessage("Invalid segment %u", segment);
return offset;
}
return segments[segment] + offset;
}
void alist_set_address(CHle * hle, uint32_t so, uint32_t * segments, size_t n)
{
uint8_t segment = (so >> 24) & 0x3f;
uint32_t offset = (so & 0xffffff);
if (segment >= n)
{
hle->WarnMessage("Invalid segment %u", segment);
return;
}
segments[segment] = offset;
}
void alist_clear(CHle * hle, uint16_t dmem, uint16_t count)
{
while (count != 0)
{
*alist_u8(hle, dmem++) = 0;
--count;
}
}
void alist_load(CHle * hle, uint16_t dmem, uint32_t address, uint16_t count)
{
// Enforce DMA alignment constraints
dmem &= ~3;
address &= ~7;
count = (uint16_t)align(count, 8);
memcpy(hle->alist_buffer() + dmem, hle->dram() + address, count);
}
void alist_save(CHle * hle, uint16_t dmem, uint32_t address, uint16_t count)
{
// Enforce DMA alignment constraints
dmem &= ~3;
address &= ~7;
count = align(count, 8);
memcpy(hle->dram() + address, hle->alist_buffer() + dmem, count);
}
void alist_move(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count)
{
while (count != 0)
{
*alist_u8(hle, dmemo++) = *alist_u8(hle, dmemi++);
--count;
}
}
void alist_copy_every_other_sample(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count)
{
while (count != 0)
{
*alist_s16(hle, dmemo) = *alist_s16(hle, dmemi);
dmemo += 2;
dmemi += 4;
--count;
}
}
void alist_repeat64(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint8_t count)
{
uint16_t buffer[64];
memcpy(buffer, hle->alist_buffer() + dmemi, 128);
while (count != 0)
{
memcpy(hle->alist_buffer() + dmemo, buffer, 128);
dmemo += 128;
--count;
}
}
void alist_copy_blocks(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t block_size, uint8_t count)
{
int block_left = count;
do
{
int bytes_left = block_size;
do
{
memcpy(hle->alist_buffer() + dmemo, hle->alist_buffer() + dmemi, 0x20);
bytes_left -= 0x20;
dmemi += 0x20;
dmemo += 0x20;
} while (bytes_left > 0);
--block_left;
} while (block_left > 0);
}
void alist_interleave(CHle * hle, uint16_t dmemo, uint16_t left, uint16_t right, uint16_t count)
{
uint16_t * dst = (uint16_t *)(hle->alist_buffer() + dmemo);
const uint16_t * srcL = (uint16_t *)(hle->alist_buffer() + left);
const uint16_t * srcR = (uint16_t *)(hle->alist_buffer() + right);
count >>= 2;
while (count != 0)
{
uint16_t l1 = *(srcL++);
uint16_t l2 = *(srcL++);
uint16_t r1 = *(srcR++);
uint16_t r2 = *(srcR++);
#if M64P_BIG_ENDIAN
*(dst++) = l1;
*(dst++) = r1;
*(dst++) = l2;
*(dst++) = r2;
#else
*(dst++) = r2;
*(dst++) = l2;
*(dst++) = r1;
*(dst++) = l1;
#endif
--count;
}
}
void alist_envmix_exp(CHle * hle, bool init, bool aux, uint16_t dmem_dl, uint16_t dmem_dr, uint16_t dmem_wl, uint16_t dmem_wr, uint16_t dmemi, uint16_t count, int16_t dry, int16_t wet, const int16_t * vol, const int16_t * target, const int32_t * rate, uint32_t address)
{
size_t n = (aux) ? 4 : 2;
const int16_t * const in = (int16_t *)(hle->alist_buffer() + dmemi);
int16_t * const dl = (int16_t *)(hle->alist_buffer() + dmem_dl);
int16_t * const dr = (int16_t *)(hle->alist_buffer() + dmem_dr);
int16_t * const wl = (int16_t *)(hle->alist_buffer() + dmem_wl);
int16_t * const wr = (int16_t *)(hle->alist_buffer() + dmem_wr);
struct ramp_t ramps[2];
int32_t exp_seq[2];
int32_t exp_rates[2];
uint32_t ptr = 0;
int x, y;
short save_buffer[40];
memcpy((uint8_t *)save_buffer, (hle->dram() + address), sizeof(save_buffer));
if (init)
{
ramps[0].value = (vol[0] << 16);
ramps[1].value = (vol[1] << 16);
ramps[0].target = (target[0] << 16);
ramps[1].target = (target[1] << 16);
exp_rates[0] = rate[0];
exp_rates[1] = rate[1];
exp_seq[0] = (vol[0] * rate[0]);
exp_seq[1] = (vol[1] * rate[1]);
}
else
{
wet = *(int16_t *)(save_buffer + 0); // 0-1
dry = *(int16_t *)(save_buffer + 2); // 2-3
ramps[0].target = *(int32_t *)(save_buffer + 4); // 4-5
ramps[1].target = *(int32_t *)(save_buffer + 6); // 6-7
exp_rates[0] = *(int32_t *)(save_buffer + 8); // 8-9 (save_buffer is a 16-bit pointer)
exp_rates[1] = *(int32_t *)(save_buffer + 10); // 10-11
exp_seq[0] = *(int32_t *)(save_buffer + 12); // 12-13
exp_seq[1] = *(int32_t *)(save_buffer + 14); // 14-15
ramps[0].value = *(int32_t *)(save_buffer + 16); // 12-13
ramps[1].value = *(int32_t *)(save_buffer + 18); // 14-15
}
// Initialize which ensure ramp.step != 0 iff ramp.value == ramp.target
ramps[0].step = ramps[0].target - ramps[0].value;
ramps[1].step = ramps[1].target - ramps[1].value;
for (y = 0; y < count; y += 16)
{
if (ramps[0].step != 0)
{
exp_seq[0] = ((int64_t)exp_seq[0] * (int64_t)exp_rates[0]) >> 16;
ramps[0].step = (exp_seq[0] - ramps[0].value) >> 3;
}
if (ramps[1].step != 0)
{
exp_seq[1] = ((int64_t)exp_seq[1] * (int64_t)exp_rates[1]) >> 16;
ramps[1].step = (exp_seq[1] - ramps[1].value) >> 3;
}
for (x = 0; x < 8; ++x)
{
int16_t gains[4];
int16_t * buffers[4];
int16_t l_vol = ramp_step(&ramps[0]);
int16_t r_vol = ramp_step(&ramps[1]);
buffers[0] = dl + (ptr ^ S);
buffers[1] = dr + (ptr ^ S);
buffers[2] = wl + (ptr ^ S);
buffers[3] = wr + (ptr ^ S);
gains[0] = clamp_s16((l_vol * dry + 0x4000) >> 15);
gains[1] = clamp_s16((r_vol * dry + 0x4000) >> 15);
gains[2] = clamp_s16((l_vol * wet + 0x4000) >> 15);
gains[3] = clamp_s16((r_vol * wet + 0x4000) >> 15);
alist_envmix_mix(n, buffers, gains, in[ptr ^ S]);
++ptr;
}
}
*(int16_t *)(save_buffer + 0) = wet; // 0-1
*(int16_t *)(save_buffer + 2) = dry; // 2-3
*(int32_t *)(save_buffer + 4) = (int32_t)ramps[0].target; // 4-5
*(int32_t *)(save_buffer + 6) = (int32_t)ramps[1].target; // 6-7
*(int32_t *)(save_buffer + 8) = exp_rates[0]; // 8-9 (save_buffer is a 16-bit pointer)
*(int32_t *)(save_buffer + 10) = exp_rates[1]; // 10-11
*(int32_t *)(save_buffer + 12) = exp_seq[0]; // 12-13
*(int32_t *)(save_buffer + 14) = exp_seq[1]; // 14-15
*(int32_t *)(save_buffer + 16) = (int32_t)ramps[0].value; // 12-13
*(int32_t *)(save_buffer + 18) = (int32_t)ramps[1].value; // 14-15
memcpy(hle->dram() + address, (uint8_t *)save_buffer, sizeof(save_buffer));
}
void alist_envmix_ge(CHle * hle, bool init, bool aux, uint16_t dmem_dl, uint16_t dmem_dr, uint16_t dmem_wl, uint16_t dmem_wr, uint16_t dmemi, uint16_t count, int16_t dry, int16_t wet, const int16_t * vol, const int16_t * target, const int32_t * rate, uint32_t address)
{
unsigned k;
size_t n = (aux) ? 4 : 2;
const int16_t * const in = (int16_t *)(hle->alist_buffer() + dmemi);
int16_t * const dl = (int16_t *)(hle->alist_buffer() + dmem_dl);
int16_t * const dr = (int16_t *)(hle->alist_buffer() + dmem_dr);
int16_t * const wl = (int16_t *)(hle->alist_buffer() + dmem_wl);
int16_t * const wr = (int16_t *)(hle->alist_buffer() + dmem_wr);
struct ramp_t ramps[2];
short save_buffer[40];
memcpy((uint8_t *)save_buffer, (hle->dram() + address), 80);
if (init)
{
ramps[0].value = (vol[0] << 16);
ramps[1].value = (vol[1] << 16);
ramps[0].target = (target[0] << 16);
ramps[1].target = (target[1] << 16);
ramps[0].step = rate[0] / 8;
ramps[1].step = rate[1] / 8;
}
else
{
wet = *(int16_t *)(save_buffer + 0); // 0-1
dry = *(int16_t *)(save_buffer + 2); // 2-3
ramps[0].target = *(int32_t *)(save_buffer + 4); // 4-5
ramps[1].target = *(int32_t *)(save_buffer + 6); // 6-7
ramps[0].step = *(int32_t *)(save_buffer + 8); // 8-9 (save_buffer is a 16-bit pointer)
ramps[1].step = *(int32_t *)(save_buffer + 10); // 10-11
/* *(int32_t *)(save_buffer + 12);*/ // 12-13
/* *(int32_t *)(save_buffer + 14);*/ // 14-15
ramps[0].value = *(int32_t *)(save_buffer + 16); // 12-13
ramps[1].value = *(int32_t *)(save_buffer + 18); // 14-15
}
count >>= 1;
for (k = 0; k < count; ++k)
{
int16_t gains[4];
int16_t * buffers[4];
int16_t l_vol = ramp_step(&ramps[0]);
int16_t r_vol = ramp_step(&ramps[1]);
buffers[0] = dl + (k ^ S);
buffers[1] = dr + (k ^ S);
buffers[2] = wl + (k ^ S);
buffers[3] = wr + (k ^ S);
gains[0] = clamp_s16((l_vol * dry + 0x4000) >> 15);
gains[1] = clamp_s16((r_vol * dry + 0x4000) >> 15);
gains[2] = clamp_s16((l_vol * wet + 0x4000) >> 15);
gains[3] = clamp_s16((r_vol * wet + 0x4000) >> 15);
alist_envmix_mix(n, buffers, gains, in[k ^ S]);
}
*(int16_t *)(save_buffer + 0) = wet; // 0-1
*(int16_t *)(save_buffer + 2) = dry; // 2-3
*(int32_t *)(save_buffer + 4) = (int32_t)ramps[0].target; // 4-5
*(int32_t *)(save_buffer + 6) = (int32_t)ramps[1].target; // 6-7
*(int32_t *)(save_buffer + 8) = (int32_t)ramps[0].step; // 8-9 (save_buffer is a 16-bit pointer)
*(int32_t *)(save_buffer + 10) = (int32_t)ramps[1].step; // 10-11
/**(int32_t *)(save_buffer + 12);*/ // 12-13
/**(int32_t *)(save_buffer + 14);*/ // 14-15
*(int32_t *)(save_buffer + 16) = (int32_t)ramps[0].value; // 12-13
*(int32_t *)(save_buffer + 18) = (int32_t)ramps[1].value; // 14-15
memcpy(hle->dram() + address, (uint8_t *)save_buffer, 80);
}
void alist_envmix_lin(CHle * hle, bool init, uint16_t dmem_dl, uint16_t dmem_dr, uint16_t dmem_wl, uint16_t dmem_wr, uint16_t dmemi, uint16_t count, int16_t dry, int16_t wet, const int16_t * vol, const int16_t * target, const int32_t * rate, uint32_t address)
{
size_t k;
struct ramp_t ramps[2];
int16_t save_buffer[40];
const int16_t * const in = (int16_t *)(hle->alist_buffer() + dmemi);
int16_t * const dl = (int16_t *)(hle->alist_buffer() + dmem_dl);
int16_t * const dr = (int16_t *)(hle->alist_buffer() + dmem_dr);
int16_t * const wl = (int16_t *)(hle->alist_buffer() + dmem_wl);
int16_t * const wr = (int16_t *)(hle->alist_buffer() + dmem_wr);
memcpy((uint8_t *)save_buffer, hle->dram() + address, 80);
if (init)
{
ramps[0].step = rate[0] / 8;
ramps[0].value = (vol[0] << 16);
ramps[0].target = (target[0] << 16);
ramps[1].step = rate[1] / 8;
ramps[1].value = (vol[1] << 16);
ramps[1].target = (target[1] << 16);
}
else
{
wet = *(int16_t *)(save_buffer + 0); // 0-1
dry = *(int16_t *)(save_buffer + 2); // 2-3
ramps[0].target = *(int16_t *)(save_buffer + 4) << 16; // 4-5
ramps[1].target = *(int16_t *)(save_buffer + 6) << 16; // 6-7
ramps[0].step = *(int32_t *)(save_buffer + 8); // 8-9 (save_buffer is a 16-bit pointer)
ramps[1].step = *(int32_t *)(save_buffer + 10); // 10-11
ramps[0].value = *(int32_t *)(save_buffer + 16); // 16-17
ramps[1].value = *(int32_t *)(save_buffer + 18); // 16-17
}
count >>= 1;
for (k = 0; k < count; ++k)
{
int16_t gains[4];
int16_t * buffers[4];
int16_t l_vol = ramp_step(&ramps[0]);
int16_t r_vol = ramp_step(&ramps[1]);
buffers[0] = dl + (k ^ S);
buffers[1] = dr + (k ^ S);
buffers[2] = wl + (k ^ S);
buffers[3] = wr + (k ^ S);
gains[0] = clamp_s16((l_vol * dry + 0x4000) >> 15);
gains[1] = clamp_s16((r_vol * dry + 0x4000) >> 15);
gains[2] = clamp_s16((l_vol * wet + 0x4000) >> 15);
gains[3] = clamp_s16((r_vol * wet + 0x4000) >> 15);
alist_envmix_mix(4, buffers, gains, in[k ^ S]);
}
*(int16_t *)(save_buffer + 0) = wet; // 0-1
*(int16_t *)(save_buffer + 2) = dry; // 2-3
*(int16_t *)(save_buffer + 4) = (int16_t)(ramps[0].target >> 16); // 4-5
*(int16_t *)(save_buffer + 6) = (int16_t)(ramps[1].target >> 16); // 6-7
*(int32_t *)(save_buffer + 8) = (int32_t)ramps[0].step; // 8-9 (save_buffer is a 16-bit pointer)
*(int32_t *)(save_buffer + 10) = (int32_t)ramps[1].step; // 10-11
*(int32_t *)(save_buffer + 16) = (int32_t)ramps[0].value; // 16-17
*(int32_t *)(save_buffer + 18) = (int32_t)ramps[1].value; // 18-19
memcpy(hle->dram() + address, (uint8_t *)save_buffer, 80);
}
void alist_mix(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count, int16_t gain)
{
int16_t * dst = (int16_t *)(hle->alist_buffer() + dmemo);
const int16_t * src = (int16_t *)(hle->alist_buffer() + dmemi);
count >>= 1;
while (count != 0)
{
sample_mix(dst, *src, gain);
++dst;
++src;
--count;
}
}
void alist_envmix_nead(CHle * hle, bool swap_wet_LR, uint16_t dmem_dl, uint16_t dmem_dr, uint16_t dmem_wl, uint16_t dmem_wr, uint16_t dmemi, unsigned count, uint16_t * env_values, uint16_t * env_steps, const int16_t * xors)
{
int16_t * in = (int16_t *)(hle->alist_buffer() + dmemi);
int16_t * dl = (int16_t *)(hle->alist_buffer() + dmem_dl);
int16_t * dr = (int16_t *)(hle->alist_buffer() + dmem_dr);
int16_t * wl = (int16_t *)(hle->alist_buffer() + dmem_wl);
int16_t * wr = (int16_t *)(hle->alist_buffer() + dmem_wr);
// Make sure count is a multiple of 8
count = align(count, 8);
if (swap_wet_LR)
{
swap(&wl, &wr);
}
while (count != 0)
{
size_t i;
for (i = 0; i < 8; ++i)
{
int16_t l = (((int32_t)in[i ^ S] * (uint32_t)env_values[0]) >> 16) ^ xors[0];
int16_t r = (((int32_t)in[i ^ S] * (uint32_t)env_values[1]) >> 16) ^ xors[1];
int16_t l2 = (((int32_t)l * (uint32_t)env_values[2]) >> 16) ^ xors[2];
int16_t r2 = (((int32_t)r * (uint32_t)env_values[2]) >> 16) ^ xors[3];
dl[i ^ S] = clamp_s16(dl[i ^ S] + l);
dr[i ^ S] = clamp_s16(dr[i ^ S] + r);
wl[i ^ S] = clamp_s16(wl[i ^ S] + l2);
wr[i ^ S] = clamp_s16(wr[i ^ S] + r2);
}
env_values[0] += env_steps[0];
env_values[1] += env_steps[1];
env_values[2] += env_steps[2];
dl += 8;
dr += 8;
wl += 8;
wr += 8;
in += 8;
count -= 8;
}
}
void alist_add(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count)
{
int16_t * dst = (int16_t *)(hle->alist_buffer() + dmemo);
const int16_t * src = (int16_t *)(hle->alist_buffer() + dmemi);
count >>= 1;
while (count != 0)
{
*dst = clamp_s16(*dst + *src);
++dst;
++src;
--count;
}
}
void alist_multQ44(CHle * hle, uint16_t dmem, uint16_t count, int8_t gain)
{
int16_t * dst = (int16_t *)(hle->alist_buffer() + dmem);
count >>= 1;
while (count != 0)
{
*dst = clamp_s16(*dst * gain >> 4);
++dst;
--count;
}
}
static void alist_resample_reset(CHle * hle, uint16_t pos, uint32_t * pitch_accu)
{
unsigned k;
for (k = 0; k < 4; ++k)
{
*sample(hle, pos + k) = 0;
}
*pitch_accu = 0;
}
static void alist_resample_load(CHle * hle, uint32_t address, uint16_t pos, uint32_t * pitch_accu)
{
*sample(hle, pos + 0) = *dram_u16(hle, address + 0);
*sample(hle, pos + 1) = *dram_u16(hle, address + 2);
*sample(hle, pos + 2) = *dram_u16(hle, address + 4);
*sample(hle, pos + 3) = *dram_u16(hle, address + 6);
*pitch_accu = *dram_u16(hle, address + 8);
}
static void alist_resample_save(CHle * hle, uint32_t address, uint16_t pos, uint32_t pitch_accu)
{
*dram_u16(hle, address + 0) = *sample(hle, pos + 0);
*dram_u16(hle, address + 2) = *sample(hle, pos + 1);
*dram_u16(hle, address + 4) = *sample(hle, pos + 2);
*dram_u16(hle, address + 6) = *sample(hle, pos + 3);
*dram_u16(hle, address + 8) = pitch_accu;
}
void alist_resample(CHle * hle, bool init, bool flag2, uint16_t dmemo, uint16_t dmemi, uint16_t count, uint32_t pitch /* Q16.16 */, uint32_t address)
{
uint32_t pitch_accu;
uint16_t ipos = dmemi >> 1;
uint16_t opos = dmemo >> 1;
count >>= 1;
ipos -= 4;
if (flag2)
{
hle->WarnMessage("alist_resample: flag2 is not implemented");
}
if (init)
{
alist_resample_reset(hle, ipos, &pitch_accu);
}
else
{
alist_resample_load(hle, address, ipos, &pitch_accu);
}
while (count != 0)
{
const int16_t * lut = RESAMPLE_LUT + ((pitch_accu & 0xfc00) >> 8);
*sample(hle, opos++) = clamp_s16((
(*sample(hle, ipos) * lut[0]) +
(*sample(hle, ipos + 1) * lut[1]) +
(*sample(hle, ipos + 2) * lut[2]) +
(*sample(hle, ipos + 3) * lut[3])) >>
15);
pitch_accu += pitch;
ipos += (pitch_accu >> 16);
pitch_accu &= 0xffff;
--count;
}
alist_resample_save(hle, address, ipos, pitch_accu);
}
void alist_resample_zoh(CHle * hle, uint16_t dmemo, uint16_t dmemi, uint16_t count, uint32_t pitch, uint32_t pitch_accu)
{
uint16_t ipos = dmemi >> 1;
uint16_t opos = dmemo >> 1;
count >>= 1;
while (count != 0)
{
*sample(hle, opos++) = *sample(hle, ipos);
pitch_accu += pitch;
ipos += (pitch_accu >> 16);
pitch_accu &= 0xffff;
--count;
}
}
typedef unsigned int (*adpcm_predict_frame_t)(CHle * hle, int16_t * dst, uint16_t dmemi, unsigned char scale);
static unsigned int adpcm_predict_frame_4bits(CHle * hle, int16_t * dst, uint16_t dmemi, unsigned char scale)
{
unsigned int i;
unsigned int rshift = (scale < 12) ? 12 - scale : 0;
for (i = 0; i < 8; ++i)
{
uint8_t byte = *alist_u8(hle, dmemi++);
*(dst++) = adpcm_predict_sample(byte, 0xf0, 8, rshift);
*(dst++) = adpcm_predict_sample(byte, 0x0f, 12, rshift);
}
return 8;
}
static unsigned int adpcm_predict_frame_2bits(CHle * hle, int16_t * dst, uint16_t dmemi, unsigned char scale)
{
unsigned int i;
unsigned int rshift = (scale < 14) ? 14 - scale : 0;
for (i = 0; i < 4; ++i)
{
uint8_t byte = *alist_u8(hle, dmemi++);
*(dst++) = adpcm_predict_sample(byte, 0xc0, 8, rshift);
*(dst++) = adpcm_predict_sample(byte, 0x30, 10, rshift);
*(dst++) = adpcm_predict_sample(byte, 0x0c, 12, rshift);
*(dst++) = adpcm_predict_sample(byte, 0x03, 14, rshift);
}
return 4;
}
void alist_adpcm(CHle * hle, bool init, bool loop, bool two_bit_per_sample, uint16_t dmemo, uint16_t dmemi, uint16_t count, const int16_t * codebook, uint32_t loop_address, uint32_t last_frame_address)
{
int16_t last_frame[16];
size_t i;
adpcm_predict_frame_t predict_frame = (two_bit_per_sample)
? adpcm_predict_frame_2bits
: adpcm_predict_frame_4bits;
assert((count & 0x1f) == 0);
if (init)
{
memset(last_frame, 0, 16 * sizeof(last_frame[0]));
}
else
{
dram_load_u16(hle, (uint16_t *)last_frame, (loop) ? loop_address : last_frame_address, 16);
}
for (i = 0; i < 16; ++i, dmemo += 2)
{
*alist_s16(hle, dmemo) = last_frame[i];
}
while (count != 0)
{
int16_t frame[16];
uint8_t code = *alist_u8(hle, dmemi++);
unsigned char scale = (code & 0xf0) >> 4;
const int16_t * const cb_entry = codebook + ((code & 0xf) << 4);
dmemi += predict_frame(hle, frame, dmemi, scale);
adpcm_compute_residuals(last_frame, frame, cb_entry, last_frame + 14, 8);
adpcm_compute_residuals(last_frame + 8, frame + 8, cb_entry, last_frame + 6, 8);
for (i = 0; i < 16; ++i, dmemo += 2)
{
*alist_s16(hle, dmemo) = last_frame[i];
}
count -= 32;
}
dram_store_u16(hle, (uint16_t *)last_frame, last_frame_address, 16);
}
void alist_filter(CHle * hle, uint16_t dmem, uint16_t count, uint32_t address, const uint32_t * lut_address)
{
int x;
int16_t outbuff[0x3c0];
int16_t * outp = outbuff;
int16_t * const lutt6 = (int16_t *)(hle->dram() + lut_address[0]);
int16_t * const lutt5 = (int16_t *)(hle->dram() + lut_address[1]);
int16_t * in1 = (int16_t *)(hle->dram() + address);
int16_t * in2 = (int16_t *)(hle->alist_buffer() + dmem);
for (x = 0; x < 8; ++x)
{
int32_t v = (lutt5[x] + lutt6[x]) >> 1;
lutt5[x] = lutt6[x] = v;
}
for (x = 0; x < count; x += 16)
{
int32_t v[8];
v[1] = in1[0] * lutt6[6];
v[1] += in1[3] * lutt6[7];
v[1] += in1[2] * lutt6[4];
v[1] += in1[5] * lutt6[5];
v[1] += in1[4] * lutt6[2];
v[1] += in1[7] * lutt6[3];
v[1] += in1[6] * lutt6[0];
v[1] += in2[1] * lutt6[1]; // 1
v[0] = in1[3] * lutt6[6];
v[0] += in1[2] * lutt6[7];
v[0] += in1[5] * lutt6[4];
v[0] += in1[4] * lutt6[5];
v[0] += in1[7] * lutt6[2];
v[0] += in1[6] * lutt6[3];
v[0] += in2[1] * lutt6[0];
v[0] += in2[0] * lutt6[1];
v[3] = in1[2] * lutt6[6];
v[3] += in1[5] * lutt6[7];
v[3] += in1[4] * lutt6[4];
v[3] += in1[7] * lutt6[5];
v[3] += in1[6] * lutt6[2];
v[3] += in2[1] * lutt6[3];
v[3] += in2[0] * lutt6[0];
v[3] += in2[3] * lutt6[1];
v[2] = in1[5] * lutt6[6];
v[2] += in1[4] * lutt6[7];
v[2] += in1[7] * lutt6[4];
v[2] += in1[6] * lutt6[5];
v[2] += in2[1] * lutt6[2];
v[2] += in2[0] * lutt6[3];
v[2] += in2[3] * lutt6[0];
v[2] += in2[2] * lutt6[1];
v[5] = in1[4] * lutt6[6];
v[5] += in1[7] * lutt6[7];
v[5] += in1[6] * lutt6[4];
v[5] += in2[1] * lutt6[5];
v[5] += in2[0] * lutt6[2];
v[5] += in2[3] * lutt6[3];
v[5] += in2[2] * lutt6[0];
v[5] += in2[5] * lutt6[1];
v[4] = in1[7] * lutt6[6];
v[4] += in1[6] * lutt6[7];
v[4] += in2[1] * lutt6[4];
v[4] += in2[0] * lutt6[5];
v[4] += in2[3] * lutt6[2];
v[4] += in2[2] * lutt6[3];
v[4] += in2[5] * lutt6[0];
v[4] += in2[4] * lutt6[1];
v[7] = in1[6] * lutt6[6];
v[7] += in2[1] * lutt6[7];
v[7] += in2[0] * lutt6[4];
v[7] += in2[3] * lutt6[5];
v[7] += in2[2] * lutt6[2];
v[7] += in2[5] * lutt6[3];
v[7] += in2[4] * lutt6[0];
v[7] += in2[7] * lutt6[1];
v[6] = in2[1] * lutt6[6];
v[6] += in2[0] * lutt6[7];
v[6] += in2[3] * lutt6[4];
v[6] += in2[2] * lutt6[5];
v[6] += in2[5] * lutt6[2];
v[6] += in2[4] * lutt6[3];
v[6] += in2[7] * lutt6[0];
v[6] += in2[6] * lutt6[1];
outp[1] = ((v[1] + 0x4000) >> 15);
outp[0] = ((v[0] + 0x4000) >> 15);
outp[3] = ((v[3] + 0x4000) >> 15);
outp[2] = ((v[2] + 0x4000) >> 15);
outp[5] = ((v[5] + 0x4000) >> 15);
outp[4] = ((v[4] + 0x4000) >> 15);
outp[7] = ((v[7] + 0x4000) >> 15);
outp[6] = ((v[6] + 0x4000) >> 15);
in1 = in2;
in2 += 8;
outp += 8;
}
memcpy(hle->dram() + address, in2 - 8, 16);
memcpy(hle->alist_buffer() + dmem, outbuff, count);
}
void alist_polef(CHle * hle, bool init, uint16_t dmemo, uint16_t dmemi, uint16_t count, uint16_t gain, int16_t * table, uint32_t address)
{
int16_t * dst = (int16_t *)(hle->alist_buffer() + dmemo);
const int16_t * const h1 = table;
int16_t * const h2 = table + 8;
unsigned i;
int16_t l1, l2;
int16_t h2_before[8];
count = align(count, 16);
if (init)
{
l1 = 0;
l2 = 0;
}
else
{
l1 = *dram_u16(hle, address + 4);
l2 = *dram_u16(hle, address + 6);
}
for (i = 0; i < 8; ++i)
{
h2_before[i] = h2[i];
h2[i] = (((int32_t)h2[i] * gain) >> 14);
}
do
{
int16_t frame[8];
for (i = 0; i < 8; ++i, dmemi += 2)
{
frame[i] = *alist_s16(hle, dmemi);
}
for (i = 0; i < 8; ++i)
{
int32_t accu = frame[i] * gain;
accu += h1[i] * l1 + h2_before[i] * l2 + rdot(i, h2, frame);
dst[i ^ S] = clamp_s16(accu >> 14);
}
l1 = dst[6 ^ S];
l2 = dst[7 ^ S];
dst += 8;
count -= 16;
} while (count != 0);
dram_store_u32(hle, (uint32_t *)(dst - 4), address, 2);
}
void alist_iirf(CHle * hle, bool init, uint16_t dmemo, uint16_t dmemi, uint16_t count, int16_t * table, uint32_t address)
{
int16_t * dst = (int16_t *)(hle->alist_buffer() + dmemo);
int32_t i, prev;
int16_t frame[8];
int16_t ibuf[4];
uint16_t index = 7;
count = align(count, 16);
if (init)
{
for (i = 0; i < 8; ++i)
{
frame[i] = 0;
}
ibuf[1] = 0;
ibuf[2] = 0;
}
else
{
frame[6] = *dram_u16(hle, address + 4);
frame[7] = *dram_u16(hle, address + 6);
ibuf[1] = (int16_t)*dram_u16(hle, address + 8);
ibuf[2] = (int16_t)*dram_u16(hle, address + 10);
}
prev = vmulf(table[9], frame[6]) * 2;
do
{
for (i = 0; i < 8; ++i)
{
int32_t accu;
ibuf[index & 3] = *alist_s16(hle, dmemi);
accu = prev + vmulf(table[0], ibuf[index & 3]) + vmulf(table[1], ibuf[(index - 1) & 3]) + vmulf(table[0], ibuf[(index - 2) & 3]);
accu += vmulf(table[8], frame[index]) * 2;
prev = vmulf(table[9], frame[index]) * 2;
dst[i ^ S] = frame[i] = accu;
index = (index + 1) & 7;
dmemi += 2;
}
dst += 8;
count -= 0x10;
} while (count > 0);
dram_store_u16(hle, (uint16_t *)&frame[6], address + 4, 4);
dram_store_u16(hle, (uint16_t *)&ibuf[(index - 2) & 3], address + 8, 2);
dram_store_u16(hle, (uint16_t *)&ibuf[(index - 1) & 3], address + 10, 2);
}