From ee538ae0ea5db79c57a85b41fed47b9e9dba2043 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 4 Aug 2024 15:55:17 +1000 Subject: [PATCH] CDROM: Improve resampling quality of 18900hz XA --- src/core/cdrom.cpp | 203 ++++++++++++++++++++++++++++----------------- 1 file changed, 129 insertions(+), 74 deletions(-) diff --git a/src/core/cdrom.cpp b/src/core/cdrom.cpp index f10b440cd..35a1d80de 100644 --- a/src/core/cdrom.cpp +++ b/src/core/cdrom.cpp @@ -352,9 +352,10 @@ static void CheckForSectorBufferReadComplete(); // Decodes XA-ADPCM samples in an audio sector. Stereo samples are interleaved with left first. template static void DecodeXAADPCMChunks(const u8* chunk_ptr, s16* samples); -template +template static void ResampleXAADPCM(const s16* frames_in, u32 num_frames_in); -static s16 ZigZagInterpolate(const s16* ringbuf, const s16* table, u8 p); +template +static void ResampleXAADPCM18900(const s16* frames_in, u32 num_frames_in); static TinyString LBAToMSFString(CDImage::LBA lba); @@ -3117,44 +3118,6 @@ ALWAYS_INLINE_RELEASE void CDROM::ProcessDataSector(const u8* raw_sector, const SetAsyncInterrupt(Interrupt::DataReady); } -static constexpr std::array s_xa_adpcm_filter_table_pos = { - {0, 60, 115, 98, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; - -static constexpr std::array s_xa_adpcm_filter_table_neg = { - {0, 0, -52, -55, -60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; - -static std::array, 7> s_zigzag_table = { - {{0, 0x0, 0x0, 0x0, 0x0, -0x0002, 0x000A, -0x0022, 0x0041, -0x0054, - 0x0034, 0x0009, -0x010A, 0x0400, -0x0A78, 0x234C, 0x6794, -0x1780, 0x0BCD, -0x0623, - 0x0350, -0x016D, 0x006B, 0x000A, -0x0010, 0x0011, -0x0008, 0x0003, -0x0001}, - {0, 0x0, 0x0, -0x0002, 0x0, 0x0003, -0x0013, 0x003C, -0x004B, 0x00A2, - -0x00E3, 0x0132, -0x0043, -0x0267, 0x0C9D, 0x74BB, -0x11B4, 0x09B8, -0x05BF, 0x0372, - -0x01A8, 0x00A6, -0x001B, 0x0005, 0x0006, -0x0008, 0x0003, -0x0001, 0x0}, - {0, 0x0, -0x0001, 0x0003, -0x0002, -0x0005, 0x001F, -0x004A, 0x00B3, -0x0192, - 0x02B1, -0x039E, 0x04F8, -0x05A6, 0x7939, -0x05A6, 0x04F8, -0x039E, 0x02B1, -0x0192, - 0x00B3, -0x004A, 0x001F, -0x0005, -0x0002, 0x0003, -0x0001, 0x0, 0x0}, - {0, -0x0001, 0x0003, -0x0008, 0x0006, 0x0005, -0x001B, 0x00A6, -0x01A8, 0x0372, - -0x05BF, 0x09B8, -0x11B4, 0x74BB, 0x0C9D, -0x0267, -0x0043, 0x0132, -0x00E3, 0x00A2, - -0x004B, 0x003C, -0x0013, 0x0003, 0x0, -0x0002, 0x0, 0x0, 0x0}, - {-0x0001, 0x0003, -0x0008, 0x0011, -0x0010, 0x000A, 0x006B, -0x016D, 0x0350, -0x0623, - 0x0BCD, -0x1780, 0x6794, 0x234C, -0x0A78, 0x0400, -0x010A, 0x0009, 0x0034, -0x0054, - 0x0041, -0x0022, 0x000A, -0x0001, 0x0, 0x0001, 0x0, 0x0, 0x0}, - {0x0002, -0x0008, 0x0010, -0x0023, 0x002B, 0x001A, -0x00EB, 0x027B, -0x0548, 0x0AFA, - -0x16FA, 0x53E0, 0x3C07, -0x1249, 0x080E, -0x0347, 0x015B, -0x0044, -0x0017, 0x0046, - -0x0023, 0x0011, -0x0005, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, - {-0x0005, 0x0011, -0x0023, 0x0046, -0x0017, -0x0044, 0x015B, -0x0347, 0x080E, -0x1249, - 0x3C07, 0x53E0, -0x16FA, 0x0AFA, -0x0548, 0x027B, -0x00EB, 0x001A, 0x002B, -0x0023, - 0x0010, -0x0008, 0x0002, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}}; - -ALWAYS_INLINE_RELEASE s16 CDROM::ZigZagInterpolate(const s16* ringbuf, const s16* table, u8 p) -{ - s32 sum = 0; - for (u8 i = 0; i < 29; i++) - sum += (static_cast(ringbuf[(p - i) & 0x1F]) * static_cast(table[i])) >> 15; - - return static_cast(std::clamp(sum, -0x8000, 0x7FFF)); -} - std::tuple CDROM::GetAudioFrame() { const u32 frame = s_audio_fifo.IsEmpty() ? 0u : s_audio_fifo.Pop(); @@ -3185,6 +3148,12 @@ s16 CDROM::SaturateVolume(s32 volume) template void CDROM::DecodeXAADPCMChunks(const u8* chunk_ptr, s16* samples) { + static constexpr std::array s_xa_adpcm_filter_table_pos = { + {0, 60, 115, 98, 122, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; + + static constexpr std::array s_xa_adpcm_filter_table_neg = { + {0, 0, -52, -55, -60, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}; + // The data layout is annoying here. Each word of data is interleaved with the other blocks, requiring multiple // passes to decode the whole chunk. constexpr u32 NUM_CHUNKS = 18; @@ -3240,46 +3209,132 @@ void CDROM::DecodeXAADPCMChunks(const u8* chunk_ptr, s16* samples) } } -template +template void CDROM::ResampleXAADPCM(const s16* frames_in, u32 num_frames_in) { - s16* left_ringbuf = s_xa_resample_ring_buffer[0].data(); - s16* right_ringbuf = s_xa_resample_ring_buffer[1].data(); - u8 p = s_xa_resample_p; - u8 sixstep = s_xa_resample_sixstep; + static constexpr auto zigzag_interpolate = [](const s16* ringbuf, u32 table_index, u32 p) -> s16 { + static std::array, 7> tables = { + {{0, 0x0, 0x0, 0x0, 0x0, -0x0002, 0x000A, -0x0022, 0x0041, -0x0054, + 0x0034, 0x0009, -0x010A, 0x0400, -0x0A78, 0x234C, 0x6794, -0x1780, 0x0BCD, -0x0623, + 0x0350, -0x016D, 0x006B, 0x000A, -0x0010, 0x0011, -0x0008, 0x0003, -0x0001}, + {0, 0x0, 0x0, -0x0002, 0x0, 0x0003, -0x0013, 0x003C, -0x004B, 0x00A2, + -0x00E3, 0x0132, -0x0043, -0x0267, 0x0C9D, 0x74BB, -0x11B4, 0x09B8, -0x05BF, 0x0372, + -0x01A8, 0x00A6, -0x001B, 0x0005, 0x0006, -0x0008, 0x0003, -0x0001, 0x0}, + {0, 0x0, -0x0001, 0x0003, -0x0002, -0x0005, 0x001F, -0x004A, 0x00B3, -0x0192, + 0x02B1, -0x039E, 0x04F8, -0x05A6, 0x7939, -0x05A6, 0x04F8, -0x039E, 0x02B1, -0x0192, + 0x00B3, -0x004A, 0x001F, -0x0005, -0x0002, 0x0003, -0x0001, 0x0, 0x0}, + {0, -0x0001, 0x0003, -0x0008, 0x0006, 0x0005, -0x001B, 0x00A6, -0x01A8, 0x0372, + -0x05BF, 0x09B8, -0x11B4, 0x74BB, 0x0C9D, -0x0267, -0x0043, 0x0132, -0x00E3, 0x00A2, + -0x004B, 0x003C, -0x0013, 0x0003, 0x0, -0x0002, 0x0, 0x0, 0x0}, + {-0x0001, 0x0003, -0x0008, 0x0011, -0x0010, 0x000A, 0x006B, -0x016D, 0x0350, -0x0623, + 0x0BCD, -0x1780, 0x6794, 0x234C, -0x0A78, 0x0400, -0x010A, 0x0009, 0x0034, -0x0054, + 0x0041, -0x0022, 0x000A, -0x0001, 0x0, 0x0001, 0x0, 0x0, 0x0}, + {0x0002, -0x0008, 0x0010, -0x0023, 0x002B, 0x001A, -0x00EB, 0x027B, -0x0548, 0x0AFA, + -0x16FA, 0x53E0, 0x3C07, -0x1249, 0x080E, -0x0347, 0x015B, -0x0044, -0x0017, 0x0046, + -0x0023, 0x0011, -0x0005, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}, + {-0x0005, 0x0011, -0x0023, 0x0046, -0x0017, -0x0044, 0x015B, -0x0347, 0x080E, -0x1249, + 0x3C07, 0x53E0, -0x16FA, 0x0AFA, -0x0548, 0x027B, -0x00EB, 0x001A, 0x002B, -0x0023, + 0x0010, -0x0008, 0x0002, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0}}}; + + const s16* table = tables[table_index].data(); + s32 sum = 0; + for (u32 i = 0; i < 29; i++) + sum += (static_cast(ringbuf[(p - i) & 0x1F]) * static_cast(table[i])) >> 15; + + return static_cast(std::clamp(sum, -0x8000, 0x7FFF)); + }; + + s16* const left_ringbuf = s_xa_resample_ring_buffer[0].data(); + [[maybe_unused]] s16* const right_ringbuf = s_xa_resample_ring_buffer[1].data(); + u32 p = s_xa_resample_p; + u32 sixstep = s_xa_resample_sixstep; + for (u32 in_sample_index = 0; in_sample_index < num_frames_in; in_sample_index++) { - const s16 left = *(frames_in++); - const s16 right = STEREO ? *(frames_in++) : left; + // TODO: We can vectorize the multiplications in zigzag_interpolate by duplicating the sample in the ringbuffer at + // offset +32, allowing it to wrap once. + left_ringbuf[p] = *(frames_in++); + if constexpr (STEREO) + right_ringbuf[p] = *(frames_in++); + p = (p + 1) % 32; + sixstep--; - if constexpr (!STEREO) + if (sixstep == 0) { - UNREFERENCED_VARIABLE(right); - } - - for (u32 sample_dup = 0; sample_dup < (SAMPLE_RATE ? 2 : 1); sample_dup++) - { - left_ringbuf[p] = left; - if constexpr (STEREO) - right_ringbuf[p] = right; - p = (p + 1) % 32; - sixstep--; - - if (sixstep == 0) + sixstep = 6; + for (u32 j = 0; j < 7; j++) { - sixstep = 6; - for (u32 j = 0; j < 7; j++) - { - const s16 left_interp = ZigZagInterpolate(left_ringbuf, s_zigzag_table[j].data(), p); - const s16 right_interp = STEREO ? ZigZagInterpolate(right_ringbuf, s_zigzag_table[j].data(), p) : left_interp; - AddCDAudioFrame(left_interp, right_interp); - } + const s16 left_interp = zigzag_interpolate(left_ringbuf, j, p); + const s16 right_interp = STEREO ? zigzag_interpolate(right_ringbuf, j, p) : left_interp; + AddCDAudioFrame(left_interp, right_interp); } } } - s_xa_resample_p = p; - s_xa_resample_sixstep = sixstep; + s_xa_resample_p = Truncate8(p); + s_xa_resample_sixstep = Truncate8(sixstep); +} + +template +void CDROM::ResampleXAADPCM18900(const s16* frames_in, u32 num_frames_in) +{ + // Weights originally from Mednafen's interpolator. It's unclear where these came from, perhaps it was calculated + // somehow. This doesn't appear to use a zigzag pattern like psx-spx suggests, therefore it is restricted to only + // 18900hz resampling. Duplicating the 18900hz samples to 37800hz sounds even more awful than lower sample rate audio + // should, with a big spike at ~16KHz, especially with music in FMVs. Fortunately, few games actually use 18900hz XA. + static constexpr auto interpolate = [](const s16* ringbuf, u32 table_index, u32 p) -> s16 { + static std::array, 7> tables = {{ + {{0x0, -0x5, 0x11, -0x23, 0x46, -0x17, -0x44, 0x15b, -0x347, 0x80e, -0x1249, 0x3c07, 0x53e0, + -0x16fa, 0xafa, -0x548, 0x27b, -0xeb, 0x1a, 0x2b, -0x23, 0x10, -0x8, 0x2, 0x0}}, + {{0x0, -0x2, 0xa, -0x22, 0x41, -0x54, 0x34, 0x9, -0x10a, 0x400, -0xa78, 0x234c, 0x6794, + -0x1780, 0xbcd, -0x623, 0x350, -0x16d, 0x6b, 0xa, -0x10, 0x11, -0x8, 0x3, -0x1}}, + {{-0x2, 0x0, 0x3, -0x13, 0x3c, -0x4b, 0xa2, -0xe3, 0x132, -0x43, -0x267, 0xc9d, 0x74bb, + -0x11b4, 0x9b8, -0x5bf, 0x372, -0x1a8, 0xa6, -0x1b, 0x5, 0x6, -0x8, 0x3, -0x1}}, + {{-0x1, 0x3, -0x2, -0x5, 0x1f, -0x4a, 0xb3, -0x192, 0x2b1, -0x39e, 0x4f8, -0x5a6, 0x7939, + -0x5a6, 0x4f8, -0x39e, 0x2b1, -0x192, 0xb3, -0x4a, 0x1f, -0x5, -0x2, 0x3, -0x1}}, + {{-0x1, 0x3, -0x8, 0x6, 0x5, -0x1b, 0xa6, -0x1a8, 0x372, -0x5bf, 0x9b8, -0x11b4, 0x74bb, + 0xc9d, -0x267, -0x43, 0x132, -0xe3, 0xa2, -0x4b, 0x3c, -0x13, 0x3, 0x0, -0x2}}, + {{-0x1, 0x3, -0x8, 0x11, -0x10, 0xa, 0x6b, -0x16d, 0x350, -0x623, 0xbcd, -0x1780, 0x6794, + 0x234c, -0xa78, 0x400, -0x10a, 0x9, 0x34, -0x54, 0x41, -0x22, 0xa, -0x2, 0x0}}, + {{0x0, 0x2, -0x8, 0x10, -0x23, 0x2b, 0x1a, -0xeb, 0x27b, -0x548, 0xafa, -0x16fa, 0x53e0, + 0x3c07, -0x1249, 0x80e, -0x347, 0x15b, -0x44, -0x17, 0x46, -0x23, 0x11, -0x5, 0x0}}, + }}; + + const s16* table = tables[table_index].data(); + s32 sum = 0; + for (u32 i = 0; i < 25; i++) + sum += (static_cast(ringbuf[(p + 32 - 25 + i) & 0x1F]) * static_cast(table[i])); + + return static_cast(std::clamp(sum >> 15, -0x8000, 0x7FFF)); + }; + + s16* const left_ringbuf = s_xa_resample_ring_buffer[0].data(); + [[maybe_unused]] s16* const right_ringbuf = s_xa_resample_ring_buffer[1].data(); + u32 p = s_xa_resample_p; + u32 sixstep = s_xa_resample_sixstep; + + for (u32 in_sample_index = 0; in_sample_index < num_frames_in;) + { + if (sixstep >= 7) + { + sixstep -= 7; + p = (p + 1) % 32; + + left_ringbuf[p] = *(frames_in++); + if constexpr (STEREO) + right_ringbuf[p] = *(frames_in++); + + in_sample_index++; + } + + const s16 left_interp = interpolate(left_ringbuf, sixstep, p); + const s16 right_interp = STEREO ? interpolate(right_ringbuf, sixstep, p) : left_interp; + AddCDAudioFrame(left_interp, right_interp); + sixstep += 3; + } + + s_xa_resample_p = Truncate8(p); + s_xa_resample_sixstep = Truncate8(sixstep); } void CDROM::ResetCurrentXAFile() @@ -3318,7 +3373,7 @@ ALWAYS_INLINE_RELEASE void CDROM::ProcessXAADPCMSector(const u8* raw_sector, con // is read. Fixes audio in Tomb Raider III menu. if (!s_xa_current_set) { - // Some games (Taxi 2 and Blues Blues) have junk audio sectors with a channel number of 255. + // Some games (Taxi 2 and Blues Clues) have junk audio sectors with a channel number of 255. // We need to skip them otherwise it ends up playing the incorrect file. // TODO: Verify with a hardware test. if (s_last_sector_subheader.channel_number == 255 && (!s_mode.xa_filter || s_xa_filter_channel_number != 255)) @@ -3387,16 +3442,16 @@ ALWAYS_INLINE_RELEASE void CDROM::ProcessXAADPCMSector(const u8* raw_sector, con if (s_last_sector_subheader.codinginfo.IsStereo()) { if (s_last_sector_subheader.codinginfo.IsHalfSampleRate()) - ResampleXAADPCM(sample_buffer.data(), num_frames); + ResampleXAADPCM18900(sample_buffer.data(), num_frames); else - ResampleXAADPCM(sample_buffer.data(), num_frames); + ResampleXAADPCM(sample_buffer.data(), num_frames); } else { if (s_last_sector_subheader.codinginfo.IsHalfSampleRate()) - ResampleXAADPCM(sample_buffer.data(), num_frames); + ResampleXAADPCM18900(sample_buffer.data(), num_frames); else - ResampleXAADPCM(sample_buffer.data(), num_frames); + ResampleXAADPCM(sample_buffer.data(), num_frames); } }