// Project64 - A Nintendo 64 emulator // https://www.pj64-emu.com/ // Copyright(C) 2001-2021 Project64 // Copyright(C) 2013 Bobby Smiles // GNU/GPLv2 licensed: https://gnu.org/licenses/gpl-2.0.html #include #include #include "arithmetics.h" #include "audio.h" #include "mem.h" // Various constants enum { SUBFRAME_SIZE = 192 }; enum { MAX_VOICES = 32 }; enum { SAMPLE_BUFFER_SIZE = 0x200 }; enum { SFD_SFX_INDEX = 0x2, SFD_VOICE_BITMASK = 0x4, SFD_STATE_PTR = 0x8, SFD_SFX_PTR = 0xc, SFD_VOICES = 0x10, // v2 only SFD2_10_PTR = 0x10, SFD2_14_BITMASK = 0x14, SFD2_15_BITMASK = 0x15, SFD2_16_BITMASK = 0x16, SFD2_18_PTR = 0x18, SFD2_1C_PTR = 0x1c, SFD2_20_PTR = 0x20, SFD2_24_PTR = 0x24, SFD2_VOICES = 0x28 }; enum { VOICE_ENV_BEGIN = 0x00, VOICE_ENV_STEP = 0x10, VOICE_PITCH_Q16 = 0x20, VOICE_PITCH_SHIFT = 0x22, VOICE_CATSRC_0 = 0x24, VOICE_CATSRC_1 = 0x30, VOICE_ADPCM_FRAMES = 0x3c, VOICE_SKIP_SAMPLES = 0x3e, // For PCM16 VOICE_U16_40 = 0x40, VOICE_U16_42 = 0x42, // For ADPCM VOICE_ADPCM_TABLE_PTR = 0x40, VOICE_INTERLEAVED_PTR = 0x44, VOICE_END_POINT = 0x48, VOICE_RESTART_POINT = 0x4a, VOICE_U16_4E = 0x4e, VOICE_SIZE = 0x50 }; enum { CATSRC_PTR1 = 0x00, CATSRC_PTR2 = 0x04, CATSRC_SIZE1 = 0x08, CATSRC_SIZE2 = 0x0a }; enum { STATE_LAST_SAMPLE = 0x0, STATE_BASE_VOL = 0x100, STATE_CC0 = 0x110, STATE_740_LAST4_V1 = 0x290, STATE_740_LAST4_V2 = 0x110 }; enum { SFX_CBUFFER_PTR = 0x00, SFX_CBUFFER_LENGTH = 0x04, SFX_TAP_COUNT = 0x08, SFX_FIR4_HGAIN = 0x0a, SFX_TAP_DELAYS = 0x0c, SFX_TAP_GAINS = 0x2c, SFX_U16_3C = 0x3c, SFX_U16_3E = 0x3e, SFX_FIR4_HCOEFFS = 0x40 }; // Struct definition typedef struct { // Internal sub-frames int16_t left[SUBFRAME_SIZE]; int16_t right[SUBFRAME_SIZE]; int16_t cc0[SUBFRAME_SIZE]; int16_t e50[SUBFRAME_SIZE]; // Internal sub-frames base volumes int32_t base_vol[4]; // TODO: comment? int16_t subframe_740_last4[4]; } musyx_t; typedef void (*mix_sfx_with_main_subframes_t)(musyx_t * musyx, const int16_t * subframe, const uint16_t * gains); // Helper functions prototypes static void load_base_vol(CHle * hle, int32_t * base_vol, uint32_t address); static void save_base_vol(CHle * hle, const int32_t * base_vol, uint32_t address); static void update_base_vol(CHle * hle, int32_t * base_vol, uint32_t voice_mask, uint32_t last_sample_ptr, uint8_t mask_15, uint32_t ptr_24); static void init_subframes_v1(musyx_t * musyx); static void init_subframes_v2(musyx_t * musyx); static uint32_t voice_stage(CHle * hle, musyx_t * musyx, uint32_t voice_ptr, uint32_t last_sample_ptr); static void dma_cat8(CHle * hle, uint8_t * dst, uint32_t catsrc_ptr); static void dma_cat16(CHle * hle, uint16_t * dst, uint32_t catsrc_ptr); static void sfx_stage(CHle * hle, mix_sfx_with_main_subframes_t mix_sfx_with_main_subframes, musyx_t * musyx, uint32_t sfx_ptr, uint16_t idx); static void load_samples_PCM16(CHle * hle, uint32_t voice_ptr, int16_t * samples, unsigned * segbase, unsigned * offset); static void load_samples_ADPCM(CHle * hle, uint32_t voice_ptr, int16_t * samples, unsigned * segbase, unsigned * offset); static void mix_voice_samples(CHle * hle, musyx_t * musyx, uint32_t voice_ptr, const int16_t * samples, unsigned segbase, unsigned offset, uint32_t last_sample_ptr); static void adpcm_decode_frames(CHle * hle, int16_t * dst, const uint8_t * src, const int16_t * table, uint8_t count, uint8_t skip_samples); static void adpcm_predict_frame(int16_t * dst, const uint8_t * src, const uint8_t * nibbles, unsigned int rshift); static void mix_sfx_with_main_subframes_v1(musyx_t * musyx, const int16_t * subframe, const uint16_t * gains); static void mix_sfx_with_main_subframes_v2(musyx_t * musyx, const int16_t * subframe, const uint16_t * gains); static void mix_samples(int16_t * y, int16_t x, int16_t hgain); static void mix_subframes(int16_t * y, const int16_t * x, int16_t hgain); static void mix_fir4(int16_t * y, const int16_t * x, int16_t hgain, const int16_t * hcoeffs); static void interleave_stage_v1(CHle * hle, musyx_t * musyx, uint32_t output_ptr); static void interleave_stage_v2(CHle * hle, musyx_t * musyx, uint16_t mask_16, uint32_t ptr_18, uint32_t ptr_1c, uint32_t output_ptr); static int32_t dot4(const int16_t * x, const int16_t * y) { int32_t accu = 0; for (size_t i = 0; i < 4; ++i) { accu = clamp_s16(accu + (((int32_t)x[i] * (int32_t)y[i]) >> 15)); } return accu; } // MusyX v1 audio microcode void musyx_v1_task(CHle * hle) { uint32_t sfd_ptr = *dmem_u32(hle, TASK_DATA_PTR); uint32_t sfd_count = *dmem_u32(hle, TASK_DATA_SIZE); uint32_t state_ptr; musyx_t musyx; hle->VerboseMessage("musyx_v1_task: *data=%x, #SF=%d", sfd_ptr, sfd_count); state_ptr = *dram_u32(hle, sfd_ptr + SFD_STATE_PTR); // Load initial state load_base_vol(hle, musyx.base_vol, state_ptr + STATE_BASE_VOL); dram_load_u16(hle, (uint16_t *)musyx.cc0, state_ptr + STATE_CC0, SUBFRAME_SIZE); dram_load_u16(hle, (uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4_V1, 4); for (;;) { // Parse SFD structure uint16_t sfx_index = *dram_u16(hle, sfd_ptr + SFD_SFX_INDEX); uint32_t voice_mask = *dram_u32(hle, sfd_ptr + SFD_VOICE_BITMASK); uint32_t sfx_ptr = *dram_u32(hle, sfd_ptr + SFD_SFX_PTR); uint32_t voice_ptr = sfd_ptr + SFD_VOICES; uint32_t last_sample_ptr = state_ptr + STATE_LAST_SAMPLE; uint32_t output_ptr; // Initialize internal sub-frames using updated base volumes update_base_vol(hle, musyx.base_vol, voice_mask, last_sample_ptr, 0, 0); init_subframes_v1(&musyx); // Active voices get mixed into L,R,cc0,e50 subframes (optional) output_ptr = voice_stage(hle, &musyx, voice_ptr, last_sample_ptr); // Apply delay-based effects (optional) sfx_stage(hle, mix_sfx_with_main_subframes_v1, &musyx, sfx_ptr, sfx_index); // Emit interleaved L,R sub-frames interleave_stage_v1(hle, &musyx, output_ptr); --sfd_count; if (sfd_count == 0) { break; } sfd_ptr += SFD_VOICES + MAX_VOICES * VOICE_SIZE; state_ptr = *dram_u32(hle, sfd_ptr + SFD_STATE_PTR); } // Writeback updated state save_base_vol(hle, musyx.base_vol, state_ptr + STATE_BASE_VOL); dram_store_u16(hle, (uint16_t *)musyx.cc0, state_ptr + STATE_CC0, SUBFRAME_SIZE); dram_store_u16(hle, (uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4_V1, 4); } // MusyX v2 audio microcode void musyx_v2_task(CHle * hle) { uint32_t sfd_ptr = *dmem_u32(hle, TASK_DATA_PTR); uint32_t sfd_count = *dmem_u32(hle, TASK_DATA_SIZE); musyx_t musyx; hle->VerboseMessage("musyx_v2_task: *data=%x, #SF=%d", sfd_ptr, sfd_count); for (;;) { // Parse SFD structure uint16_t sfx_index = *dram_u16(hle, sfd_ptr + SFD_SFX_INDEX); uint32_t voice_mask = *dram_u32(hle, sfd_ptr + SFD_VOICE_BITMASK); uint32_t state_ptr = *dram_u32(hle, sfd_ptr + SFD_STATE_PTR); uint32_t sfx_ptr = *dram_u32(hle, sfd_ptr + SFD_SFX_PTR); uint32_t voice_ptr = sfd_ptr + SFD2_VOICES; uint32_t ptr_10 = *dram_u32(hle, sfd_ptr + SFD2_10_PTR); uint8_t mask_14 = *dram_u8(hle, sfd_ptr + SFD2_14_BITMASK); uint8_t mask_15 = *dram_u8(hle, sfd_ptr + SFD2_15_BITMASK); uint16_t mask_16 = *dram_u16(hle, sfd_ptr + SFD2_16_BITMASK); uint32_t ptr_18 = *dram_u32(hle, sfd_ptr + SFD2_18_PTR); uint32_t ptr_1c = *dram_u32(hle, sfd_ptr + SFD2_1C_PTR); uint32_t ptr_20 = *dram_u32(hle, sfd_ptr + SFD2_20_PTR); uint32_t ptr_24 = *dram_u32(hle, sfd_ptr + SFD2_24_PTR); uint32_t last_sample_ptr = state_ptr + STATE_LAST_SAMPLE; uint32_t output_ptr; // Load state load_base_vol(hle, musyx.base_vol, state_ptr + STATE_BASE_VOL); dram_load_u16(hle, (uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4_V2, 4); // Initialize internal sub-frames using updated base volumes update_base_vol(hle, musyx.base_vol, voice_mask, last_sample_ptr, mask_15, ptr_24); init_subframes_v2(&musyx); if (ptr_10) { hle->WarnMessage("ptr_10=%08x mask_14=%02x ptr_24=%08x", ptr_10, mask_14, ptr_24); } // Active voices get mixed into L,R,cc0,e50 sub-frames (optional) output_ptr = voice_stage(hle, &musyx, voice_ptr, last_sample_ptr); // Apply delay-based effects (optional) sfx_stage(hle, mix_sfx_with_main_subframes_v2, &musyx, sfx_ptr, sfx_index); dram_store_u16(hle, (uint16_t *)musyx.left, output_ptr, SUBFRAME_SIZE); dram_store_u16(hle, (uint16_t *)musyx.right, output_ptr + 2 * SUBFRAME_SIZE, SUBFRAME_SIZE); dram_store_u16(hle, (uint16_t *)musyx.cc0, output_ptr + 4 * SUBFRAME_SIZE, SUBFRAME_SIZE); // Store state save_base_vol(hle, musyx.base_vol, state_ptr + STATE_BASE_VOL); dram_store_u16(hle, (uint16_t *)musyx.subframe_740_last4, state_ptr + STATE_740_LAST4_V2, 4); if (mask_16) { interleave_stage_v2(hle, &musyx, mask_16, ptr_18, ptr_1c, ptr_20); } --sfd_count; if (sfd_count == 0) { break; } sfd_ptr += SFD2_VOICES + MAX_VOICES * VOICE_SIZE; } } static void load_base_vol(CHle * hle, int32_t * base_vol, uint32_t address) { base_vol[0] = ((uint32_t)(*dram_u16(hle, address)) << 16) | (*dram_u16(hle, address + 8)); base_vol[1] = ((uint32_t)(*dram_u16(hle, address + 2)) << 16) | (*dram_u16(hle, address + 10)); base_vol[2] = ((uint32_t)(*dram_u16(hle, address + 4)) << 16) | (*dram_u16(hle, address + 12)); base_vol[3] = ((uint32_t)(*dram_u16(hle, address + 6)) << 16) | (*dram_u16(hle, address + 14)); } static void save_base_vol(CHle * hle, const int32_t * base_vol, uint32_t address) { unsigned k; for (k = 0; k < 4; ++k) { *dram_u16(hle, address) = (uint16_t)(base_vol[k] >> 16); address += 2; } for (k = 0; k < 4; ++k) { *dram_u16(hle, address) = (uint16_t)(base_vol[k]); address += 2; } } static void update_base_vol(CHle * hle, int32_t * base_vol, uint32_t voice_mask, uint32_t last_sample_ptr, uint8_t mask_15, uint32_t ptr_24) { unsigned i, k; uint32_t mask; hle->VerboseMessage("base_vol voice_mask = %08x", voice_mask); hle->VerboseMessage("BEFORE: base_vol = %08x %08x %08x %08x", base_vol[0], base_vol[1], base_vol[2], base_vol[3]); // Optimization: skip voices contributions entirely if voice_mask is empty if (voice_mask != 0) { for (i = 0, mask = 1; i < MAX_VOICES; ++i, mask <<= 1, last_sample_ptr += 8) { if ((voice_mask & mask) == 0) { continue; } for (k = 0; k < 4; ++k) { base_vol[k] += (int16_t)*dram_u16(hle, last_sample_ptr + k * 2); } } } // Optimization: skip contributions entirely if mask_15 is empty if (mask_15 != 0) { for (i = 0, mask = 1; i < 4; ++i, mask <<= 1, ptr_24 += 8) { if ((mask_15 & mask) == 0) { continue; } for (k = 0; k < 4; ++k) { base_vol[k] += (int16_t)*dram_u16(hle, ptr_24 + k * 2); } } } // Apply 3% decay for (k = 0; k < 4; ++k) { base_vol[k] = (base_vol[k] * 0x0000f850) >> 16; } hle->VerboseMessage("AFTER: base_vol = %08x %08x %08x %08x", base_vol[0], base_vol[1], base_vol[2], base_vol[3]); } static void init_subframes_v1(musyx_t * musyx) { unsigned i; int16_t base_cc0 = clamp_s16(musyx->base_vol[2]); int16_t base_e50 = clamp_s16(musyx->base_vol[3]); int16_t * left = musyx->left; int16_t * right = musyx->right; int16_t * cc0 = musyx->cc0; int16_t * e50 = musyx->e50; for (i = 0; i < SUBFRAME_SIZE; ++i) { *(e50++) = base_e50; *(left++) = clamp_s16(*cc0 + base_cc0); *(right++) = clamp_s16(-*cc0 - base_cc0); *(cc0++) = 0; } } static void init_subframes_v2(musyx_t * musyx) { unsigned i, k; int16_t values[4]; int16_t * subframes[4]; for (k = 0; k < 4; ++k) { values[k] = clamp_s16(musyx->base_vol[k]); } subframes[0] = musyx->left; subframes[1] = musyx->right; subframes[2] = musyx->cc0; subframes[3] = musyx->e50; for (i = 0; i < SUBFRAME_SIZE; ++i) { for (k = 0; k < 4; ++k) { *(subframes[k]++) = values[k]; } } } // Process voices, and returns interleaved sub-frame destination address static uint32_t voice_stage(CHle * hle, musyx_t * musyx, uint32_t voice_ptr, uint32_t last_sample_ptr) { uint32_t output_ptr; int i = 0; // Voice stage can be skipped if first voice has no samples if (*dram_u16(hle, voice_ptr + VOICE_CATSRC_0 + CATSRC_SIZE1) == 0) { hle->VerboseMessage("Skipping voice stage"); output_ptr = *dram_u32(hle, voice_ptr + VOICE_INTERLEAVED_PTR); } else { // Otherwise process voices until a non null output_ptr is encountered for (;;) { // Load voice samples (PCM16 or APDCM) int16_t samples[SAMPLE_BUFFER_SIZE]; unsigned segbase; unsigned offset; hle->VerboseMessage("Processing voice #%d", i); if (*dram_u8(hle, voice_ptr + VOICE_ADPCM_FRAMES) == 0) { load_samples_PCM16(hle, voice_ptr, samples, &segbase, &offset); } else { load_samples_ADPCM(hle, voice_ptr, samples, &segbase, &offset); } // Mix them with each internal sub-frame mix_voice_samples(hle, musyx, voice_ptr, samples, segbase, offset, last_sample_ptr + i * 8); // Check break condition output_ptr = *dram_u32(hle, voice_ptr + VOICE_INTERLEAVED_PTR); if (output_ptr != 0) { break; } // Next voice ++i; voice_ptr += VOICE_SIZE; } } return output_ptr; } static void dma_cat8(CHle * hle, uint8_t * dst, uint32_t catsrc_ptr) { uint32_t ptr1 = *dram_u32(hle, catsrc_ptr + CATSRC_PTR1); uint32_t ptr2 = *dram_u32(hle, catsrc_ptr + CATSRC_PTR2); uint16_t size1 = *dram_u16(hle, catsrc_ptr + CATSRC_SIZE1); uint16_t size2 = *dram_u16(hle, catsrc_ptr + CATSRC_SIZE2); size_t count1 = size1; size_t count2 = size2; hle->VerboseMessage("dma_cat: %08x %08x %04x %04x", ptr1, ptr2, size1, size2); dram_load_u8(hle, dst, ptr1, count1); if (size2 == 0) { return; } dram_load_u8(hle, dst + count1, ptr2, count2); } static void dma_cat16(CHle * hle, uint16_t * dst, uint32_t catsrc_ptr) { uint32_t ptr1 = *dram_u32(hle, catsrc_ptr + CATSRC_PTR1); uint32_t ptr2 = *dram_u32(hle, catsrc_ptr + CATSRC_PTR2); uint16_t size1 = *dram_u16(hle, catsrc_ptr + CATSRC_SIZE1); uint16_t size2 = *dram_u16(hle, catsrc_ptr + CATSRC_SIZE2); size_t count1 = size1 >> 1; size_t count2 = size2 >> 1; hle->VerboseMessage("dma_cat: %08x %08x %04x %04x", ptr1, ptr2, size1, size2); dram_load_u16(hle, dst, ptr1, count1); if (size2 == 0) { return; } dram_load_u16(hle, dst + count1, ptr2, count2); } static void load_samples_PCM16(CHle * hle, uint32_t voice_ptr, int16_t * samples, unsigned * segbase, unsigned * offset) { uint8_t u8_3e = *dram_u8(hle, voice_ptr + VOICE_SKIP_SAMPLES); uint16_t u16_40 = *dram_u16(hle, voice_ptr + VOICE_U16_40); uint16_t u16_42 = *dram_u16(hle, voice_ptr + VOICE_U16_42); unsigned count = align(u16_40 + u8_3e, 4); hle->VerboseMessage("Format: PCM16"); *segbase = SAMPLE_BUFFER_SIZE - count; *offset = u8_3e; dma_cat16(hle, (uint16_t *)samples + *segbase, voice_ptr + VOICE_CATSRC_0); if (u16_42 != 0) { dma_cat16(hle, (uint16_t *)samples, voice_ptr + VOICE_CATSRC_1); } } static void load_samples_ADPCM(CHle * hle, uint32_t voice_ptr, int16_t * samples, unsigned * segbase, unsigned * offset) { // Decompressed samples cannot exceed 0x400 bytes // ADPCM has a compression ratio of 5/16 uint8_t buffer[SAMPLE_BUFFER_SIZE * 2 * 5 / 16]; int16_t adpcm_table[128]; uint8_t u8_3c = *dram_u8(hle, voice_ptr + VOICE_ADPCM_FRAMES); uint8_t u8_3d = *dram_u8(hle, voice_ptr + VOICE_ADPCM_FRAMES + 1); uint8_t u8_3e = *dram_u8(hle, voice_ptr + VOICE_SKIP_SAMPLES); uint8_t u8_3f = *dram_u8(hle, voice_ptr + VOICE_SKIP_SAMPLES + 1); uint32_t adpcm_table_ptr = *dram_u32(hle, voice_ptr + VOICE_ADPCM_TABLE_PTR); unsigned count; hle->VerboseMessage("Format: ADPCM"); hle->VerboseMessage("Loading ADPCM table: %08x", adpcm_table_ptr); dram_load_u16(hle, (uint16_t *)adpcm_table, adpcm_table_ptr, 128); count = u8_3c << 5; *segbase = SAMPLE_BUFFER_SIZE - count; *offset = u8_3e & 0x1f; dma_cat8(hle, buffer, voice_ptr + VOICE_CATSRC_0); adpcm_decode_frames(hle, samples + *segbase, buffer, adpcm_table, u8_3c, u8_3e); if (u8_3d != 0) { dma_cat8(hle, buffer, voice_ptr + VOICE_CATSRC_1); adpcm_decode_frames(hle, samples, buffer, adpcm_table, u8_3d, u8_3f); } } static void adpcm_decode_frames(CHle * hle, int16_t * dst, const uint8_t * src, const int16_t * table, uint8_t count, uint8_t skip_samples) { int16_t frame[32]; const uint8_t * nibbles = src + 8; unsigned i; bool jump_gap = false; hle->VerboseMessage("ADPCM decode: count=%d, skip=%d", count, skip_samples); if (skip_samples >= 32) { jump_gap = true; nibbles += 16; src += 4; } for (i = 0; i < count; ++i) { uint8_t c2 = nibbles[0]; const int16_t * book = (c2 & 0xf0) + table; unsigned int rshift = (c2 & 0x0f); adpcm_predict_frame(frame, src, nibbles, rshift); memcpy(dst, frame, 2 * sizeof(frame[0])); adpcm_compute_residuals(dst + 2, frame + 2, book, dst, 6); adpcm_compute_residuals(dst + 8, frame + 8, book, dst + 6, 8); adpcm_compute_residuals(dst + 16, frame + 16, book, dst + 14, 8); adpcm_compute_residuals(dst + 24, frame + 24, book, dst + 22, 8); if (jump_gap) { nibbles += 8; src += 32; } jump_gap = !jump_gap; nibbles += 16; src += 4; dst += 32; } } static void adpcm_predict_frame(int16_t * dst, const uint8_t * src, const uint8_t * nibbles, unsigned int rshift) { unsigned int i; *(dst++) = (src[0] << 8) | src[1]; *(dst++) = (src[2] << 8) | src[3]; for (i = 1; i < 16; ++i) { uint8_t byte = nibbles[i]; *(dst++) = adpcm_predict_sample(byte, 0xf0, 8, rshift); *(dst++) = adpcm_predict_sample(byte, 0x0f, 12, rshift); } } static void mix_voice_samples(CHle * hle, musyx_t * musyx, uint32_t voice_ptr, const int16_t * samples, unsigned segbase, unsigned offset, uint32_t last_sample_ptr) { int i, k; // Parse VOICE structure const uint16_t pitch_q16 = *dram_u16(hle, voice_ptr + VOICE_PITCH_Q16); const uint16_t pitch_shift = *dram_u16(hle, voice_ptr + VOICE_PITCH_SHIFT); // Q4.12 const uint16_t end_point = *dram_u16(hle, voice_ptr + VOICE_END_POINT); const uint16_t restart_point = *dram_u16(hle, voice_ptr + VOICE_RESTART_POINT); const uint16_t u16_4e = *dram_u16(hle, voice_ptr + VOICE_U16_4E); // Initialize values and pointers const int16_t * sample = samples + segbase + offset + u16_4e; const int16_t * const sample_end = samples + segbase + end_point; const int16_t * const sample_restart = samples + (restart_point & 0x7fff) + (((restart_point & 0x8000) != 0) ? 0x000 : segbase); uint32_t pitch_accu = pitch_q16; uint32_t pitch_step = pitch_shift << 4; int32_t v4_env[4]; int32_t v4_env_step[4]; int16_t * v4_dst[4]; int16_t v4[4]; dram_load_u32(hle, (uint32_t *)v4_env, voice_ptr + VOICE_ENV_BEGIN, 4); dram_load_u32(hle, (uint32_t *)v4_env_step, voice_ptr + VOICE_ENV_STEP, 4); v4_dst[0] = musyx->left; v4_dst[1] = musyx->right; v4_dst[2] = musyx->cc0; v4_dst[3] = musyx->e50; hle->VerboseMessage("Voice debug: segbase=%d" "\tu16_4e=%04x\n" "\tpitch: frac0=%04x shift=%04x\n" "\tend_point=%04x restart_point=%04x\n" "\tenv = %08x %08x %08x %08x\n" "\tenv_step = %08x %08x %08x %08x\n", segbase, u16_4e, pitch_q16, pitch_shift, end_point, restart_point, v4_env[0], v4_env[1], v4_env[2], v4_env[3], v4_env_step[0], v4_env_step[1], v4_env_step[2], v4_env_step[3]); for (i = 0; i < SUBFRAME_SIZE; ++i) { // Update sample and LUT pointers and then pitch_accu const int16_t * lut = (RESAMPLE_LUT + ((pitch_accu & 0xfc00) >> 8)); int dist; int16_t v; sample += (pitch_accu >> 16); pitch_accu &= 0xffff; pitch_accu += pitch_step; // Handle end/restart points dist = sample - sample_end; if (dist >= 0) { sample = sample_restart + dist; } // Apply resample filter v = clamp_s16(dot4(sample, lut)); for (k = 0; k < 4; ++k) { // Envmix int32_t accu = (v * (v4_env[k] >> 16)) >> 15; v4[k] = clamp_s16(accu); *(v4_dst[k]) = clamp_s16(accu + *(v4_dst[k])); // Update envelopes and DST pointers ++(v4_dst[k]); v4_env[k] += v4_env_step[k]; } } // Save last resampled sample dram_store_u16(hle, (uint16_t *)v4, last_sample_ptr, 4); hle->VerboseMessage("last_sample = %04x %04x %04x %04x", v4[0], v4[1], v4[2], v4[3]); } static void sfx_stage(CHle * hle, mix_sfx_with_main_subframes_t mix_sfx_with_main_subframes, musyx_t * musyx, uint32_t sfx_ptr, uint16_t idx) { unsigned int i; int16_t buffer[SUBFRAME_SIZE + 4]; int16_t * subframe = buffer + 4; uint32_t tap_delays[8]; int16_t tap_gains[8]; int16_t fir4_hcoeffs[4]; int16_t delayed[SUBFRAME_SIZE]; int dpos, dlength; const uint32_t pos = idx * SUBFRAME_SIZE; uint32_t cbuffer_ptr; uint32_t cbuffer_length; uint16_t tap_count; int16_t fir4_hgain; uint16_t sfx_gains[2]; hle->VerboseMessage("SFX: %08x, idx=%d", sfx_ptr, idx); if (sfx_ptr == 0) { return; } // Load SFX parameters cbuffer_ptr = *dram_u32(hle, sfx_ptr + SFX_CBUFFER_PTR); cbuffer_length = *dram_u32(hle, sfx_ptr + SFX_CBUFFER_LENGTH); tap_count = *dram_u16(hle, sfx_ptr + SFX_TAP_COUNT); dram_load_u32(hle, tap_delays, sfx_ptr + SFX_TAP_DELAYS, 8); dram_load_u16(hle, (uint16_t *)tap_gains, sfx_ptr + SFX_TAP_GAINS, 8); fir4_hgain = *dram_u16(hle, sfx_ptr + SFX_FIR4_HGAIN); dram_load_u16(hle, (uint16_t *)fir4_hcoeffs, sfx_ptr + SFX_FIR4_HCOEFFS, 4); sfx_gains[0] = *dram_u16(hle, sfx_ptr + SFX_U16_3C); sfx_gains[1] = *dram_u16(hle, sfx_ptr + SFX_U16_3E); hle->VerboseMessage("cbuffer: ptr=%08x length=%x", cbuffer_ptr, cbuffer_length); hle->VerboseMessage("fir4: hgain=%04x hcoeff=%04x %04x %04x %04x", fir4_hgain, fir4_hcoeffs[0], fir4_hcoeffs[1], fir4_hcoeffs[2], fir4_hcoeffs[3]); hle->VerboseMessage("tap count=%d\n" "delays: %08x %08x %08x %08x %08x %08x %08x %08x\n" "gains: %04x %04x %04x %04x %04x %04x %04x %04x", tap_count, tap_delays[0], tap_delays[1], tap_delays[2], tap_delays[3], tap_delays[4], tap_delays[5], tap_delays[6], tap_delays[7], tap_gains[0], tap_gains[1], tap_gains[2], tap_gains[3], tap_gains[4], tap_gains[5], tap_gains[6], tap_gains[7]); hle->VerboseMessage("sfx_gains=%04x %04x", sfx_gains[0], sfx_gains[1]); // Mix up to 8 delayed sub-frames memset(subframe, 0, SUBFRAME_SIZE * sizeof(subframe[0])); for (i = 0; i < tap_count; ++i) { dpos = pos - tap_delays[i]; if (dpos <= 0) { dpos += cbuffer_length; } dlength = SUBFRAME_SIZE; if ((uint32_t)(dpos + SUBFRAME_SIZE) > cbuffer_length) { dlength = cbuffer_length - dpos; dram_load_u16(hle, (uint16_t *)delayed + dlength, cbuffer_ptr, SUBFRAME_SIZE - dlength); } dram_load_u16(hle, (uint16_t *)delayed, cbuffer_ptr + dpos * 2, dlength); mix_subframes(subframe, delayed, tap_gains[i]); } // Add resulting sub-frame to main sub-frames mix_sfx_with_main_subframes(musyx, subframe, sfx_gains); // Apply FIR4 filter and writeback filtered result memcpy(buffer, musyx->subframe_740_last4, 4 * sizeof(int16_t)); memcpy(musyx->subframe_740_last4, subframe + SUBFRAME_SIZE - 4, 4 * sizeof(int16_t)); mix_fir4(musyx->e50, buffer + 1, fir4_hgain, fir4_hcoeffs); dram_store_u16(hle, (uint16_t *)musyx->e50, cbuffer_ptr + pos * 2, SUBFRAME_SIZE); } static void mix_sfx_with_main_subframes_v1(musyx_t * musyx, const int16_t * subframe, const uint16_t * UNUSED(gains)) { unsigned i; for (i = 0; i < SUBFRAME_SIZE; ++i) { int16_t v = subframe[i]; musyx->left[i] = clamp_s16(musyx->left[i] + v); musyx->right[i] = clamp_s16(musyx->right[i] + v); } } static void mix_sfx_with_main_subframes_v2(musyx_t * musyx, const int16_t * subframe, const uint16_t * gains) { unsigned i; for (i = 0; i < SUBFRAME_SIZE; ++i) { int16_t v = subframe[i]; int16_t v1 = (int32_t)(v * gains[0]) >> 16; int16_t v2 = (int32_t)(v * gains[1]) >> 16; musyx->left[i] = clamp_s16(musyx->left[i] + v1); musyx->right[i] = clamp_s16(musyx->right[i] + v1); musyx->cc0[i] = clamp_s16(musyx->cc0[i] + v2); } } static void mix_samples(int16_t * y, int16_t x, int16_t hgain) { *y = clamp_s16(*y + ((x * hgain + 0x4000) >> 15)); } static void mix_subframes(int16_t * y, const int16_t * x, int16_t hgain) { for (unsigned int i = 0; i < SUBFRAME_SIZE; ++i) { mix_samples(&y[i], x[i], hgain); } } static void mix_fir4(int16_t * y, const int16_t * x, int16_t hgain, const int16_t * hcoeffs) { unsigned int i; int32_t h[4]; h[0] = (hgain * hcoeffs[0]) >> 15; h[1] = (hgain * hcoeffs[1]) >> 15; h[2] = (hgain * hcoeffs[2]) >> 15; h[3] = (hgain * hcoeffs[3]) >> 15; for (i = 0; i < SUBFRAME_SIZE; ++i) { int32_t v = (h[0] * x[i] + h[1] * x[i + 1] + h[2] * x[i + 2] + h[3] * x[i + 3]) >> 15; y[i] = clamp_s16(y[i] + v); } } static void interleave_stage_v1(CHle * hle, musyx_t * musyx, uint32_t output_ptr) { size_t i; int16_t base_left; int16_t base_right; int16_t * left; int16_t * right; uint32_t * dst; hle->VerboseMessage("Interleave: %08x", output_ptr); base_left = clamp_s16(musyx->base_vol[0]); base_right = clamp_s16(musyx->base_vol[1]); left = musyx->left; right = musyx->right; dst = dram_u32(hle, output_ptr); for (i = 0; i < SUBFRAME_SIZE; ++i) { uint16_t l = clamp_s16(*(left++) + base_left); uint16_t r = clamp_s16(*(right++) + base_right); *(dst++) = (l << 16) | r; } } static void interleave_stage_v2(CHle * hle, musyx_t * musyx, uint16_t mask_16, uint32_t ptr_18, uint32_t ptr_1c, uint32_t output_ptr) { unsigned i, k; int16_t subframe[SUBFRAME_SIZE]; uint32_t * dst; uint16_t mask; hle->VerboseMessage("mask_16=%04x ptr_18=%08x ptr_1c=%08x output_ptr=%08x", mask_16, ptr_18, ptr_1c, output_ptr); // Compute L_total, R_total and update sub-frame @ptr_1c memset(subframe, 0, SUBFRAME_SIZE * sizeof(subframe[0])); for (i = 0; i < SUBFRAME_SIZE; ++i) { int16_t v = *dram_u16(hle, ptr_1c + i * 2); musyx->left[i] = v; musyx->right[i] = clamp_s16(-v); } for (k = 0, mask = 1; k < 8; ++k, mask <<= 1, ptr_18 += 8) { int16_t hgain; uint32_t address; if ((mask_16 & mask) == 0) { continue; } address = *dram_u32(hle, ptr_18); hgain = *dram_u16(hle, ptr_18 + 4); for (i = 0; i < SUBFRAME_SIZE; ++i, address += 2) { mix_samples(&musyx->left[i], *dram_u16(hle, address), hgain); mix_samples(&musyx->right[i], *dram_u16(hle, address + 2 * SUBFRAME_SIZE), hgain); mix_samples(&subframe[i], *dram_u16(hle, address + 4 * SUBFRAME_SIZE), hgain); } } // Interleave L_total and R_total dst = dram_u32(hle, output_ptr); for (i = 0; i < SUBFRAME_SIZE; ++i) { uint16_t l = musyx->left[i]; uint16_t r = musyx->right[i]; *(dst++) = (l << 16) | r; } // Writeback sub-frame @ptr_1c dram_store_u16(hle, (uint16_t *)subframe, ptr_1c, SUBFRAME_SIZE); }