From 6215418dc746d9a33d61c09b85db1785da7815e3 Mon Sep 17 00:00:00 2001 From: Aikku93 Date: Sun, 12 Jun 2022 18:36:37 +1000 Subject: [PATCH 1/7] sampcnt/sampinc as .32fxp, add catmull-spline interpolation --- desmume/src/SPU.cpp | 496 ++++++++++----------- desmume/src/SPU.h | 33 +- desmume/src/frontend/windows/main.cpp | 1 + desmume/src/frontend/windows/soundView.cpp | 2 +- 4 files changed, 253 insertions(+), 279 deletions(-) diff --git a/desmume/src/SPU.cpp b/desmume/src/SPU.cpp index d113e7f97..4d2a9f6f3 100644 --- a/desmume/src/SPU.cpp +++ b/desmume/src/SPU.cpp @@ -49,7 +49,14 @@ static inline u8 read08(u32 addr) { return _MMU_read08 static inline s8 read_s8(u32 addr) { return (s8)_MMU_read08(addr); } #define K_ADPCM_LOOPING_RECOVERY_INDEX 99999 -#define COSINE_INTERPOLATION_RESOLUTION 8192 + +#define CATMULLROM_INTERPOLATION_RESOLUTION_BITS 11 +#define CATMULLROM_INTERPOLATION_RESOLUTION (1<sampinc = (((double)ARM7_CLOCK) / (DESMUME_SAMPLE_RATE * 2)) / (double)(0x10000 - chan->timer); + // ARM7_CLOCK / (DESMUME_SAMPLE_RATE*2) / (2^16 - Timer) + // = ARM7_CLOCK / (DESMUME_SAMPLE_RATE*2 * (2^16 - Timer)) + // ... and then round up for good measure + chan->sampinc = ((u32)ARM7_CLOCK*(1ull<<32)-1) / (DESMUME_SAMPLE_RATE*2ull * (0x10000 - chan->timer)) + 1; } void SPU_struct::KeyProbe(int chan_num) @@ -399,6 +413,12 @@ void SPU_struct::KeyOn(int channel) thischan.totlength = thischan.length + thischan.loopstart; adjust_channel_timer(&thischan); + thischan.pcm16bOffs = 0; + for(int i=0;i> 0; case 0x505: return regs.soundbias >> 8; - + //SNDCAP0CNT/SNDCAP1CNT case 0x508: case 0x509: @@ -1018,161 +1032,113 @@ void SPU_struct::WriteLong(u32 addr, u32 val) } //switch on address } -template static FORCEINLINE s32 Interpolate(s32 a, s32 b, double ratio) +////////////////////////////////////////////////////////////////////////////// + +template static FORCEINLINE s32 Interpolate(const s16 *pcm16b, u8 pcm16bOffs, u32 subPos) { - double sampleA = (double)a; - double sampleB = (double)b; - ratio = ratio - sputrunc(ratio); - switch (INTERPOLATE_MODE) { + case SPUInterpolation_CatmullRom: + { + // Catmull-Rom spline + // Delay: 2 samples + s32 a = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 3)]; + s32 b = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 2)]; + s32 c = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 1)]; + s32 d = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 0)]; + const s16 *w = catmullrom_lut[subPos >> (32 - CATMULLROM_INTERPOLATION_RESOLUTION_BITS)]; + return (a*w[0] + b*w[1] + c*w[2] + d*w[3]) >> 15; + } + case SPUInterpolation_Cosine: + { // Cosine Interpolation Formula: // ratio2 = (1 - cos(ratio * M_PI)) / 2 // sampleI = sampleA * (1 - ratio2) + sampleB * ratio2 - return s32floor((cos_lut[(unsigned int)(ratio * (double)COSINE_INTERPOLATION_RESOLUTION)] * (sampleB - sampleA)) + sampleA); - break; - + // Delay: 1 sample + s32 a = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 1)]; + s32 b = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 0)]; + return a + ((b - a)*cos_lut[subPos >> (32 - COSINE_INTERPOLATION_RESOLUTION_BITS)] >> 15); + } + case SPUInterpolation_Linear: + { // Linear Interpolation Formula: // sampleI = sampleA * (1 - ratio) + sampleB * ratio - return s32floor((ratio * (sampleB - sampleA)) + sampleA); - break; - + // Delay: 1 sample + s32 a = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 1)]; + s32 b = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 0)]; + s32 subPos15 = subPos >> (32 - 15); + return a + ((b - a)*subPos15 >> 15); + } + default: - break; + // Delay: 0 samples + return pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs)]; } - - return a; } -////////////////////////////////////////////////////////////////////////////// - -template static FORCEINLINE void Fetch8BitData(channel_struct *chan, s32 *data) +static FORCEINLINE s32 Fetch8BitData(channel_struct *chan, s32 pos) { - if (chan->sampcnt < 0) - { - *data = 0; - return; + if(pos < 0) return 0; + + return read_s8(chan->addr + pos*1) << 8; +} + +static FORCEINLINE s32 Fetch16BitData(channel_struct *chan, s32 pos) +{ + if(pos < 0) return 0; + + return read16(chan->addr + pos*2); +} + +static FORCEINLINE s32 FetchADPCMData(channel_struct *chan, s32 pos) +{ + if(pos < 8) return 0; + + const u32 shift = (pos&1) * 4; + const u32 data4bit = ((u32)read08(chan->addr + (pos>>1))) >> shift; + const s32 diff = precalcdifftbl [chan->index][data4bit & 0xF]; + chan->index = precalcindextbl[chan->index][data4bit & 0x7]; + + s16 last = chan->pcm16b[SPUCHAN_PCM16B_AT(chan->pcm16bOffs)]; + + if(pos == (chan->loopstart<<3)) { + //if(chan->loop_index != K_ADPCM_LOOPING_RECOVERY_INDEX) printf("over-snagging\n"); + chan->loop_pcm16b = last; + chan->loop_index = chan->index; } - u32 loc = sputrunc(chan->sampcnt); - if(INTERPOLATE_MODE != SPUInterpolation_None) + return MinMax(last + diff, -0x8000, 0x7FFF); +} + +static FORCEINLINE s32 FetchPSGData(channel_struct *chan, s32 pos) +{ + if(pos < 0 || chan->num < 8) return 0; + + // Chan 8..13: Square wave, Chan 14..15: Noise + if(chan->num < 14) { - s32 a = (s32)(read_s8(chan->addr + loc) << 8), b = a; - if(loc < (chan->totlength << 2) - 1) - b = (s32)(read_s8(chan->addr + loc + 1) << 8); - else if(chan->repeat == 1) - b = (s32)(read_s8(chan->addr + chan->loopstart*4) << 8); - *data = Interpolate(a, b, chan->sampcnt); + // Doing this avoids using a LUT + // Duty==0 (12.5%): -_______ + // Duty==1 (25.0%): --______ + // Duty==2 (50.0%): ----____ + // Duty==3 (75.0%): ------__ + u32 wavepos = (pos%8u) + (chan->waveduty != 0); + return (wavepos > chan->waveduty*2) ? (-0x7FFF) : (+0x7FFF); } else - *data = (s32)read_s8(chan->addr + loc)<< 8; -} - -template static FORCEINLINE void Fetch16BitData(const channel_struct * const chan, s32 *data) -{ - if (chan->sampcnt < 0) { - *data = 0; - return; - } - - u32 loc = sputrunc(chan->sampcnt); - if(INTERPOLATE_MODE != SPUInterpolation_None) - { - s32 a = (s32)read16(loc*2 + chan->addr), b = a; - if(loc < (chan->totlength << 1) - 1) - b = (s32)read16(chan->addr + loc*2 + 2); - else if(chan->repeat == 1) - b = (s32)read16(chan->addr + chan->loopstart*2); - *data = Interpolate(a, b, chan->sampcnt); - } - else - *data = read16(chan->addr + loc*2); -} - -template static FORCEINLINE void FetchADPCMData(channel_struct * const chan, s32 * const data) -{ - if (chan->sampcnt < 8) - { - *data = 0; - return; - } - - // No sense decoding, just return the last sample - if (chan->lastsampcnt != sputrunc(chan->sampcnt)){ - - const u32 endExclusive = sputrunc(chan->sampcnt+1); - for (u32 i = chan->lastsampcnt+1; i < endExclusive; i++) + if(chan->x & 0x1) { - const u32 shift = (i&1)<<2; - const u32 data4bit = ((u32)read08(chan->addr + (i>>1))) >> shift; - - const s32 diff = precalcdifftbl[chan->index][data4bit & 0xF]; - chan->index = precalcindextbl[chan->index][data4bit & 0x7]; - - chan->pcm16b_last = chan->pcm16b; - chan->pcm16b = MinMax(chan->pcm16b+diff, -0x8000, 0x7FFF); - - if(i == (chan->loopstart<<3)) { - if(chan->loop_index != K_ADPCM_LOOPING_RECOVERY_INDEX) printf("over-snagging\n"); - chan->loop_pcm16b = chan->pcm16b; - chan->loop_index = chan->index; - } + chan->x = (chan->x >> 1) ^ 0x6000; + return -0x7FFF; } - - chan->lastsampcnt = sputrunc(chan->sampcnt); - } - - if(INTERPOLATE_MODE != SPUInterpolation_None) - *data = Interpolate((s32)chan->pcm16b_last,(s32)chan->pcm16b,chan->sampcnt); - else - *data = (s32)chan->pcm16b; -} - -static FORCEINLINE void FetchPSGData(channel_struct *chan, s32 *data) -{ - if (chan->sampcnt < 0) - { - *data = 0; - return; - } - - if(chan->num < 8) - { - *data = 0; - } - else if(chan->num < 14) - { - *data = (s32)wavedutytbl[chan->waveduty][(sputrunc(chan->sampcnt)) & 0x7]; - } - else - { - if(chan->lastsampcnt == sputrunc(chan->sampcnt)) + else { - *data = (s32)chan->psgnoise_last; - return; + chan->x >>= 1; + return +0x7FFF; } - - u32 max = sputrunc(chan->sampcnt); - for(u32 i = chan->lastsampcnt; i < max; i++) - { - if(chan->x & 0x1) - { - chan->x = (chan->x >> 1) ^ 0x6000; - chan->psgnoise_last = -0x7FFF; - } - else - { - chan->x >>= 1; - chan->psgnoise_last = 0x7FFF; - } - } - - chan->lastsampcnt = sputrunc(chan->sampcnt); - - *data = (s32)chan->psgnoise_last; } } @@ -1201,66 +1167,42 @@ static FORCEINLINE void MixLR(SPU_struct* SPU, channel_struct *chan, s32 data) template static FORCEINLINE void TestForLoop(SPU_struct *SPU, channel_struct *chan) { - const int shift = (FORMAT == 0 ? 2 : 1); + // Do nothing if we haven't reached the end + if((chan->sampcnt >> 32) < chan->totlength_shifted) return; - chan->sampcnt += chan->sampinc; - - if (chan->sampcnt > chan->double_totlength_shifted) + // Kill the channel if we don't repeat + if(chan->repeat != 1) { - // Do we loop? Or are we done? - if (chan->repeat == 1) + SPU->KeyOff(chan->num); + SPU->bufpos = SPU->buflength; + return; + } + + // ADPCM needs special handling + if(FORMAT == 2) + { + // Minimum length (the sum of PNT+LEN) is 4 words (16 bytes), + // smaller values (0..3 words) are causing hang-ups + // (busy bit remains set infinite, but no sound output occurs). + // fix: 7th Dragon (JP) - http://sourceforge.net/p/desmume/bugs/1357/ + if (chan->totlength < 4) return; + + // Stash loop sample and index + if(chan->loop_index == K_ADPCM_LOOPING_RECOVERY_INDEX) { - while (chan->sampcnt > chan->double_totlength_shifted) - chan->sampcnt -= chan->double_totlength_shifted - (double)(chan->loopstart << shift); - //chan->sampcnt = (double)(chan->loopstart << shift); + chan->pcm16b[SPUCHAN_PCM16B_AT(chan->pcm16bOffs)] = (s16)read16(chan->addr); + chan->index = read08(chan->addr+2) & 0x7F; } else { - SPU->KeyOff(chan->num); - SPU->bufpos = SPU->buflength; + chan->pcm16b[SPUCHAN_PCM16B_AT(chan->pcm16bOffs)] = chan->loop_pcm16b; + chan->index = chan->loop_index; } } -} -static FORCEINLINE void TestForLoop2(SPU_struct *SPU, channel_struct *chan) -{ - // Minimum length (the sum of PNT+LEN) is 4 words (16 bytes), - // smaller values (0..3 words) are causing hang-ups - // (busy bit remains set infinite, but no sound output occurs). - // fix: 7th Dragon (JP) - http://sourceforge.net/p/desmume/bugs/1357/ - if (chan->totlength < 4) return; - - chan->sampcnt += chan->sampinc; - - if (chan->sampcnt > chan->double_totlength_shifted) - { - // Do we loop? Or are we done? - if (chan->repeat == 1) - { - double step = (chan->double_totlength_shifted - (double)(chan->loopstart << 3)); - - while (chan->sampcnt > chan->double_totlength_shifted) chan->sampcnt -= step; - - if(chan->loop_index == K_ADPCM_LOOPING_RECOVERY_INDEX) - { - chan->pcm16b = (s16)read16(chan->addr); - chan->index = read08(chan->addr+2) & 0x7F; - chan->lastsampcnt = 7; - } - else - { - chan->pcm16b = chan->loop_pcm16b; - chan->index = chan->loop_index; - chan->lastsampcnt = (chan->loopstart << 3); - } - } - else - { - chan->status = CHANSTAT_STOPPED; - SPU->KeyOff(chan->num); - SPU->bufpos = SPU->buflength; - } - } + // Wrap sampcnt + s64 step = chan->totlength_shifted - (chan->loopstart << format_shift[FORMAT]); + while ((chan->sampcnt >> 32) >= chan->totlength_shifted) chan->sampcnt -= step * (1ll << 32); } template FORCEINLINE static void SPU_Mix(SPU_struct* SPU, channel_struct *chan, s32 data) @@ -1281,25 +1223,36 @@ template { for (; SPU->bufpos < SPU->buflength; SPU->bufpos++) { - if(CHANNELS != -1) + // Advance sampcnt one sample at a time. This is + // needed to keep pcm16b[] filled for interpolation. + // We need to do some janky things here to keep the + // fractional bits in place when we loop :/ + s64 newsampcnt = chan->sampcnt + chan->sampinc; + u32 nSamplesToSkip = (u32)((newsampcnt >> 32) - (chan->sampcnt >> 32)); + while(nSamplesToSkip--) { - s32 data; + s16 data = 0; + s32 pos = chan->sampcnt >> 32; switch(FORMAT) { - case 0: Fetch8BitData(chan, &data); break; - case 1: Fetch16BitData(chan, &data); break; - case 2: FetchADPCMData(chan, &data); break; - case 3: FetchPSGData(chan, &data); break; + case 0: data = Fetch8BitData (chan, pos); break; + case 1: data = Fetch16BitData(chan, pos); break; + case 2: data = FetchADPCMData(chan, pos); break; + case 3: data = FetchPSGData (chan, pos); break; default: break; } - SPU_Mix(SPU, chan, data); - } + chan->pcm16bOffs++; + chan->pcm16b[SPUCHAN_PCM16B_AT(chan->pcm16bOffs)] = data; - switch(FORMAT) { - case 0: case 1: TestForLoop(SPU, chan); break; - case 2: TestForLoop2(SPU, chan); break; - case 3: chan->sampcnt += chan->sampinc; break; - default: break; + chan->sampcnt += 1ll << 32; + if (FORMAT != 3) TestForLoop(SPU, chan); + } + chan->sampcnt = ((chan->sampcnt >> 32) << 32) | (u32)newsampcnt; + + if(CHANNELS != -1) + { + s32 data = Interpolate(chan->pcm16b, chan->pcm16bOffs, (u32)chan->sampcnt); + SPU_Mix(SPU, chan, data); } } } @@ -1320,12 +1273,14 @@ template template FORCEINLINE static void __SPU_ChanUpdate(const bool actuallyMix, SPU_struct* const SPU, channel_struct* const chan) { + // NOTE: PSG doesn't use interpolation, or it would try to + // interpolate between the raw sample points (very bad) switch(chan->format) { case 0: ___SPU_ChanUpdate<0,INTERPOLATE_MODE>(actuallyMix, SPU, chan); break; case 1: ___SPU_ChanUpdate<1,INTERPOLATE_MODE>(actuallyMix, SPU, chan); break; case 2: ___SPU_ChanUpdate<2,INTERPOLATE_MODE>(actuallyMix, SPU, chan); break; - case 3: ___SPU_ChanUpdate<3,INTERPOLATE_MODE>(actuallyMix, SPU, chan); break; + case 3: ___SPU_ChanUpdate<3,SPUInterpolationMode::SPUInterpolation_None>(actuallyMix, SPU, chan); break; default: assert(false); } } @@ -1334,9 +1289,10 @@ FORCEINLINE static void _SPU_ChanUpdate(const bool actuallyMix, SPU_struct* cons { switch(CommonSettings.spuInterpolationMode) { - case SPUInterpolation_None: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; - case SPUInterpolation_Linear: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; - case SPUInterpolation_Cosine: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; + case SPUInterpolation_None: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; + case SPUInterpolation_Linear: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; + case SPUInterpolation_Cosine: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; + case SPUInterpolation_CatmullRom: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; default: assert(false); } } @@ -1490,9 +1446,9 @@ static void SPU_MixAudio_Advanced(bool actuallyMix, SPU_struct *SPU, int length) if (SPU->regs.cap[capchan].runtime.running) { SPU_struct::REGS::CAP& cap = SPU->regs.cap[capchan]; - u32 last = sputrunc(cap.runtime.sampcnt); + u32 last = cap.runtime.sampcnt >> 32; cap.runtime.sampcnt += SPU->channels[1+2*capchan].sampinc; - u32 curr = sputrunc(cap.runtime.sampcnt); + u32 curr = cap.runtime.sampcnt >> 32; for (u32 j = last; j < curr; j++) { //so, this is a little strange. why go through a fifo? @@ -1544,7 +1500,7 @@ static void SPU_MixAudio_Advanced(bool actuallyMix, SPU_struct *SPU, int length) if (cap.runtime.curdad >= cap.runtime.maxdad) { cap.runtime.curdad = cap.dad; - cap.runtime.sampcnt -= cap.len*multiplier; + cap.runtime.sampcnt -= cap.len*multiplier * (1ull<<32); } } //sampinc loop } //if capchan running @@ -1608,9 +1564,9 @@ static void SPU_MixAudio(bool actuallyMix, SPU_struct *SPU, int length) { for (int samp = 0; samp < length; samp++) { - u32 last = sputrunc(cap.runtime.sampcnt); + u32 last = cap.runtime.sampcnt >> 32; cap.runtime.sampcnt += SPU->channels[1+2*capchan].sampinc; - u32 curr = sputrunc(cap.runtime.sampcnt); + u32 curr = cap.runtime.sampcnt >> 32; for (u32 j = last; j < curr; j++) { if (cap.bits8) @@ -1627,7 +1583,7 @@ static void SPU_MixAudio(bool actuallyMix, SPU_struct *SPU, int length) if (cap.runtime.curdad >= cap.runtime.maxdad) { cap.runtime.curdad = cap.dad; - cap.runtime.sampcnt -= cap.len*(cap.bits8?4:2); + cap.runtime.sampcnt -= cap.len*(cap.bits8?4:2) * (1ull<<32); } } } @@ -1733,7 +1689,7 @@ void SPU_Emulate_user(bool mix) postProcessBufferSize = freeSampleCount * 2 * sizeof(s16); postProcessBuffer = (s16 *)realloc(postProcessBuffer, postProcessBufferSize); } - + if (soundProcessor->PostProcessSamples != NULL) { processedSampleCount = soundProcessor->PostProcessSamples(postProcessBuffer, freeSampleCount, _currentSynchMode, _currentSynchronizer); @@ -1957,7 +1913,7 @@ void WAV_WavSoundUpdate(void* soundData, int numSamples, WAVMode mode) void spu_savestate(EMUFILE &os) { //version - os.write_32LE(6); + os.write_32LE(7); SPU_struct *spu = SPU_core; @@ -1973,18 +1929,16 @@ void spu_savestate(EMUFILE &os) os.write_u8(chan.repeat); os.write_u8(chan.format); os.write_u8(chan.status); + os.write_u8(chan.pcm16bOffs); os.write_32LE(chan.addr); os.write_16LE(chan.timer); os.write_16LE(chan.loopstart); os.write_32LE(chan.length); - os.write_doubleLE(chan.sampcnt); - os.write_doubleLE(chan.sampinc); - os.write_32LE(chan.lastsampcnt); - os.write_16LE(chan.pcm16b); - os.write_16LE(chan.pcm16b_last); + os.write_64LE(chan.sampcnt); + os.write_64LE(chan.sampinc); + for (int i = 0; i < SPUINTERPOLATION_TAPS; i++) os.write_16LE(chan.pcm16b[i]); os.write_32LE(chan.index); os.write_16LE(chan.x); - os.write_16LE(chan.psgnoise_last); os.write_u8(chan.keyon); } @@ -2010,7 +1964,7 @@ void spu_savestate(EMUFILE &os) os.write_u8(spu->regs.cap[i].runtime.running); os.write_32LE(spu->regs.cap[i].runtime.curdad); os.write_32LE(spu->regs.cap[i].runtime.maxdad); - os.write_doubleLE(spu->regs.cap[i].runtime.sampcnt); + os.write_64LE(spu->regs.cap[i].runtime.sampcnt); } for (int i = 0; i < 2; i++) @@ -2044,29 +1998,44 @@ bool spu_loadstate(EMUFILE &is, int size) is.read_u8(chan.repeat); is.read_u8(chan.format); is.read_u8(chan.status); + if (version >= 7) is.read_u8(chan.pcm16bOffs); else chan.pcm16bOffs = 0; is.read_32LE(chan.addr); is.read_16LE(chan.timer); is.read_16LE(chan.loopstart); is.read_32LE(chan.length); chan.totlength = chan.length + chan.loopstart; - chan.double_totlength_shifted = (double)(chan.totlength << format_shift[chan.format]); - //printf("%f\n",chan.double_totlength_shifted); - if (version >= 2) + chan.totlength_shifted = chan.totlength << format_shift[chan.format]; + if(version >= 7) { + is.read_64LE(chan.sampcnt); + is.read_64LE(chan.sampinc); + } + else if (version >= 2) { - is.read_doubleLE(chan.sampcnt); - is.read_doubleLE(chan.sampinc); + double temp; + is.read_doubleLE(temp); chan.sampcnt = (s64)(temp * (1ll<<32)); + is.read_doubleLE(temp); chan.sampinc = (s64)(temp * (1ll<<32)); } else { + // FIXME + // What even is supposed to be happening here? + // sampcnt and sampinc were double type before + // I even made any changes, so this is broken. is.read_32LE(*(u32 *)&chan.sampcnt); is.read_32LE(*(u32 *)&chan.sampinc); } - is.read_32LE(chan.lastsampcnt); - is.read_16LE(chan.pcm16b); - is.read_16LE(chan.pcm16b_last); + if (version >= 7) { + for (int i = 0; i < SPUINTERPOLATION_TAPS; i++) is.read_16LE(chan.pcm16b[i]); + } + else + { + is.fseek(4, SEEK_CUR); // chan.lastsampcnt (LE32) + is.read_16LE(chan.pcm16b[0]); // chan.pcm16b + is.fseek(2, SEEK_CUR); // chan.pcm16b_last + } is.read_32LE(chan.index); is.read_16LE(chan.x); - is.read_16LE(chan.psgnoise_last); + if (version < 7) is.fseek(2, SEEK_CUR); // chan.psgnoise_last (LE16) if (version >= 4) is.read_u8(chan.keyon); @@ -2105,7 +2074,14 @@ bool spu_loadstate(EMUFILE &is, int size) is.read_u8(spu->regs.cap[i].runtime.running); is.read_32LE(spu->regs.cap[i].runtime.curdad); is.read_32LE(spu->regs.cap[i].runtime.maxdad); - is.read_doubleLE(spu->regs.cap[i].runtime.sampcnt); + if (version >= 7) { + is.read_64LE(spu->regs.cap[i].runtime.sampcnt); + } + else + { + double temp; + is.read_doubleLE(temp); spu->regs.cap[i].runtime.sampcnt = temp * (1ull << 32); + } } } diff --git a/desmume/src/SPU.h b/desmume/src/SPU.h index 6883b02ca..4951654a4 100644 --- a/desmume/src/SPU.h +++ b/desmume/src/SPU.h @@ -36,10 +36,9 @@ class EMUFILE; #define CHANSTAT_STOPPED 0 #define CHANSTAT_PLAY 1 +#define SPUINTERPOLATION_TAPS 4 // Must be at least 4 for Catmull-Rom interpolation //who made these static? theyre used in multiple places. -FORCEINLINE u32 sputrunc(float f) { return u32floor(f); } -FORCEINLINE u32 sputrunc(double d) { return u32floor(d); } FORCEINLINE s32 spumuldiv7(s32 val, u8 multiplier) { assert(multiplier <= 127); return (multiplier == 127) ? val : ((val * multiplier) >> 7); @@ -49,7 +48,8 @@ enum SPUInterpolationMode { SPUInterpolation_None = 0, SPUInterpolation_Linear = 1, - SPUInterpolation_Cosine = 2 + SPUInterpolation_Cosine = 2, + SPUInterpolation_CatmullRom = 3, }; struct SoundInterface_struct @@ -84,22 +84,19 @@ struct channel_struct format(0), keyon(0), status(0), + pcm16bOffs(0), addr(0), timer(0), loopstart(0), length(0), totlength(0), - double_totlength_shifted(0.0), - sampcnt(0.0), - sampinc(0.0), - lastsampcnt(0), - pcm16b(0), - pcm16b_last(0), + totlength_shifted(0), + sampcnt(0), + sampinc(0), loop_pcm16b(0), index(0), loop_index(0), - x(0), - psgnoise_last(0) + x(0) {} u32 num; u8 vol; @@ -111,22 +108,22 @@ struct channel_struct u8 format; u8 keyon; u8 status; + u8 pcm16bOffs; u32 addr; u16 timer; u16 loopstart; u32 length; u32 totlength; - double double_totlength_shifted; - double sampcnt; - double sampinc; + s64 totlength_shifted; + s64 sampcnt; // .32fxp + s64 sampinc; // .32fxp + s16 pcm16b[SPUINTERPOLATION_TAPS]; // ADPCM specific - u32 lastsampcnt; - s16 pcm16b, pcm16b_last; s16 loop_pcm16b; s32 index; int loop_index; + // PSG noise u16 x; - s16 psgnoise_last; }; class SPUFifo @@ -196,7 +193,7 @@ public: u8 running; u32 curdad; u32 maxdad; - double sampcnt; + s64 sampcnt; SPUFifo fifo; } runtime; } cap[2]; diff --git a/desmume/src/frontend/windows/main.cpp b/desmume/src/frontend/windows/main.cpp index b1001929b..5a3cb1608 100644 --- a/desmume/src/frontend/windows/main.cpp +++ b/desmume/src/frontend/windows/main.cpp @@ -6532,6 +6532,7 @@ static LRESULT CALLBACK SoundSettingsDlgProc(HWND hDlg, UINT uMsg, WPARAM wParam SendDlgItemMessage(hDlg, IDC_SPU_INTERPOLATION_CB, CB_ADDSTRING, 0, (LPARAM)"None (harsh, most accurate to NDS)"); SendDlgItemMessage(hDlg, IDC_SPU_INTERPOLATION_CB, CB_ADDSTRING, 0, (LPARAM)"Linear (smooth, most sound detail loss)"); SendDlgItemMessage(hDlg, IDC_SPU_INTERPOLATION_CB, CB_ADDSTRING, 0, (LPARAM)"Cosine (balanced, smooth and accurate)"); + SendDlgItemMessage(hDlg, IDC_SPU_INTERPOLATION_CB, CB_ADDSTRING, 0, (LPARAM)"Catmull-Rom (smooth and bright)"); SendDlgItemMessage(hDlg, IDC_SPU_INTERPOLATION_CB, CB_SETCURSEL, (int)CommonSettings.spuInterpolationMode, 0); // Setup Sound Buffer Size Edit Text diff --git a/desmume/src/frontend/windows/soundView.cpp b/desmume/src/frontend/windows/soundView.cpp index 809fe35f2..085a44707 100644 --- a/desmume/src/frontend/windows/soundView.cpp +++ b/desmume/src/frontend/windows/soundView.cpp @@ -195,7 +195,7 @@ void SoundView_Refresh(bool forceRedraw) sprintf(buf, "$%04X (%.1f Hz)", thischan.timer, (ARM7_CLOCK/2) / (double)(0x10000 - thischan.timer)); SetDlgItemText(hDlg, IDC_SOUND0TMR+chanId, buf); - sprintf(buf, "samp #%d / #%d", sputrunc(thischan.sampcnt), thischan.totlength << format_shift[thischan.format]); + sprintf(buf, "samp #%d / #%d", (s32)(thischan.sampcnt >> 32), thischan.totlength << format_shift[thischan.format]); SetDlgItemText(hDlg, IDC_SOUND0POSLEN+chanId, buf); } else { From b2c4d449ca59e9a551b3721df856489ca6308a37 Mon Sep 17 00:00:00 2001 From: Aikku93 Date: Sun, 12 Jun 2022 19:03:48 +1000 Subject: [PATCH 2/7] fix PSG square wave Was reading the wrong part of GBATek --- desmume/src/SPU.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/desmume/src/SPU.cpp b/desmume/src/SPU.cpp index 4d2a9f6f3..0e5cc47ae 100644 --- a/desmume/src/SPU.cpp +++ b/desmume/src/SPU.cpp @@ -1120,12 +1120,7 @@ static FORCEINLINE s32 FetchPSGData(channel_struct *chan, s32 pos) if(chan->num < 14) { // Doing this avoids using a LUT - // Duty==0 (12.5%): -_______ - // Duty==1 (25.0%): --______ - // Duty==2 (50.0%): ----____ - // Duty==3 (75.0%): ------__ - u32 wavepos = (pos%8u) + (chan->waveduty != 0); - return (wavepos > chan->waveduty*2) ? (-0x7FFF) : (+0x7FFF); + return ((pos%8u) > chan->waveduty) ? (-0x7FFF) : (+0x7FFF); } else { From 235d9efea7429820ac3c20f9c47a222ee5272f0b Mon Sep 17 00:00:00 2001 From: Aikku93 Date: Sun, 12 Jun 2022 19:33:09 +1000 Subject: [PATCH 3/7] attempt to fix for macos --- desmume/src/SPU.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/desmume/src/SPU.cpp b/desmume/src/SPU.cpp index 0e5cc47ae..0ca653ada 100644 --- a/desmume/src/SPU.cpp +++ b/desmume/src/SPU.cpp @@ -1275,7 +1275,7 @@ template case 0: ___SPU_ChanUpdate<0,INTERPOLATE_MODE>(actuallyMix, SPU, chan); break; case 1: ___SPU_ChanUpdate<1,INTERPOLATE_MODE>(actuallyMix, SPU, chan); break; case 2: ___SPU_ChanUpdate<2,INTERPOLATE_MODE>(actuallyMix, SPU, chan); break; - case 3: ___SPU_ChanUpdate<3,SPUInterpolationMode::SPUInterpolation_None>(actuallyMix, SPU, chan); break; + case 3: ___SPU_ChanUpdate<3,SPUInterpolation_None>(actuallyMix, SPU, chan); break; default: assert(false); } } From 41edf7be5ecddeae4d7adcb6392d9e45b0007d5c Mon Sep 17 00:00:00 2001 From: Aikku93 Date: Sun, 12 Jun 2022 20:10:26 +1000 Subject: [PATCH 4/7] fix broken adpcm --- desmume/src/SPU.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/desmume/src/SPU.cpp b/desmume/src/SPU.cpp index 0ca653ada..ef3d93656 100644 --- a/desmume/src/SPU.cpp +++ b/desmume/src/SPU.cpp @@ -1096,11 +1096,6 @@ static FORCEINLINE s32 FetchADPCMData(channel_struct *chan, s32 pos) { if(pos < 8) return 0; - const u32 shift = (pos&1) * 4; - const u32 data4bit = ((u32)read08(chan->addr + (pos>>1))) >> shift; - const s32 diff = precalcdifftbl [chan->index][data4bit & 0xF]; - chan->index = precalcindextbl[chan->index][data4bit & 0x7]; - s16 last = chan->pcm16b[SPUCHAN_PCM16B_AT(chan->pcm16bOffs)]; if(pos == (chan->loopstart<<3)) { @@ -1108,6 +1103,11 @@ static FORCEINLINE s32 FetchADPCMData(channel_struct *chan, s32 pos) chan->loop_pcm16b = last; chan->loop_index = chan->index; } + + const u32 shift = (pos&1) * 4; + const u32 data4bit = ((u32)read08(chan->addr + (pos>>1))) >> shift; + const s32 diff = precalcdifftbl [chan->index][data4bit & 0xF]; + chan->index = precalcindextbl[chan->index][data4bit & 0x7]; return MinMax(last + diff, -0x8000, 0x7FFF); } From 589084ec74b1b512e991123a1b3713eb946bc5b0 Mon Sep 17 00:00:00 2001 From: Aikku93 Date: Mon, 13 Jun 2022 14:28:44 +1000 Subject: [PATCH 5/7] Split 64bit counters into 2x32bit This appears to generate slightly saner code --- desmume/src/SPU.cpp | 99 +++++++++++++--------- desmume/src/SPU.h | 17 ++-- desmume/src/frontend/windows/soundView.cpp | 2 +- 3 files changed, 72 insertions(+), 46 deletions(-) diff --git a/desmume/src/SPU.cpp b/desmume/src/SPU.cpp index ef3d93656..46b92758e 100644 --- a/desmume/src/SPU.cpp +++ b/desmume/src/SPU.cpp @@ -126,6 +126,14 @@ static FORCEINLINE T MinMax(T val, T min, T max) return val; } +// T must be unsigned type +template +static FORCEINLINE T AddAndReturnCarry(T *a, T b) { + T c = (*a >= (-b)); + *a += b; + return c; +} + //--------------external spu interface--------------- int SPU_ChangeSoundCore(int coreid, int newBufferSizeBytes) @@ -380,7 +388,8 @@ static FORCEINLINE void adjust_channel_timer(channel_struct *chan) // ARM7_CLOCK / (DESMUME_SAMPLE_RATE*2) / (2^16 - Timer) // = ARM7_CLOCK / (DESMUME_SAMPLE_RATE*2 * (2^16 - Timer)) // ... and then round up for good measure - chan->sampinc = ((u32)ARM7_CLOCK*(1ull<<32)-1) / (DESMUME_SAMPLE_RATE*2ull * (0x10000 - chan->timer)) + 1; + u64 sampinc = ((u32)ARM7_CLOCK*(1ull << 32) - 1) / (DESMUME_SAMPLE_RATE * 2ull * (0x10000 - chan->timer)) + 1; + chan->sampincInt = (u32)(sampinc >> 32), chan->sampincFrac = (u32)sampinc; } void SPU_struct::KeyProbe(int chan_num) @@ -431,23 +440,23 @@ void SPU_struct::KeyOn(int channel) case 0: // 8-bit // thischan.loopstart = thischan.loopstart << 2; // thischan.length = (thischan.length << 2) + thischan.loopstart; - thischan.sampcnt = -3 * (1ll<<32); + thischan.sampcntFrac = 0, thischan.sampcntInt = -3; break; case 1: // 16-bit // thischan.loopstart = thischan.loopstart << 1; // thischan.length = (thischan.length << 1) + thischan.loopstart; - thischan.sampcnt = -3 * (1ll<<32); + thischan.sampcntFrac = 0, thischan.sampcntInt = -3; break; case 2: // ADPCM thischan.pcm16b[0] = (s16)read16(thischan.addr); thischan.index = read08(thischan.addr + 2) & 0x7F; - thischan.sampcnt = -3 * (1ll<<32); + thischan.sampcntFrac = 0, thischan.sampcntInt = -3; thischan.loop_index = K_ADPCM_LOOPING_RECOVERY_INDEX; // thischan.loopstart = thischan.loopstart << 3; // thischan.length = (thischan.length << 3) + thischan.loopstart; break; case 3: // PSG - thischan.sampcnt = -1 * (1ll<<32); + thischan.sampcntFrac = 0, thischan.sampcntInt = -1; thischan.x = 0x7FFF; break; default: break; @@ -756,7 +765,7 @@ void SPU_struct::ProbeCapture(int which) u32 len = cap.len; if(len==0) len=1; cap.runtime.maxdad = cap.dad + len*4; - cap.runtime.sampcnt = 0; + cap.runtime.sampcntFrac = cap.runtime.sampcntInt = 0; cap.runtime.fifo.reset(); } @@ -1163,7 +1172,7 @@ static FORCEINLINE void MixLR(SPU_struct* SPU, channel_struct *chan, s32 data) template static FORCEINLINE void TestForLoop(SPU_struct *SPU, channel_struct *chan) { // Do nothing if we haven't reached the end - if((chan->sampcnt >> 32) < chan->totlength_shifted) return; + if(chan->sampcntInt < chan->totlength_shifted) return; // Kill the channel if we don't repeat if(chan->repeat != 1) @@ -1196,8 +1205,8 @@ template static FORCEINLINE void TestForLoop(SPU_struct *SPU, channe } // Wrap sampcnt - s64 step = chan->totlength_shifted - (chan->loopstart << format_shift[FORMAT]); - while ((chan->sampcnt >> 32) >= chan->totlength_shifted) chan->sampcnt -= step * (1ll << 32); + u32 step = chan->totlength_shifted - (chan->loopstart << format_shift[FORMAT]); + while (chan->sampcntInt >= chan->totlength_shifted) chan->sampcntInt -= step; } template FORCEINLINE static void SPU_Mix(SPU_struct* SPU, channel_struct *chan, s32 data) @@ -1220,14 +1229,11 @@ template { // Advance sampcnt one sample at a time. This is // needed to keep pcm16b[] filled for interpolation. - // We need to do some janky things here to keep the - // fractional bits in place when we loop :/ - s64 newsampcnt = chan->sampcnt + chan->sampinc; - u32 nSamplesToSkip = (u32)((newsampcnt >> 32) - (chan->sampcnt >> 32)); + u32 nSamplesToSkip = chan->sampincInt + AddAndReturnCarry(&chan->sampcntFrac, chan->sampincFrac); while(nSamplesToSkip--) { s16 data = 0; - s32 pos = chan->sampcnt >> 32; + s32 pos = chan->sampcntInt; switch(FORMAT) { case 0: data = Fetch8BitData (chan, pos); break; @@ -1239,14 +1245,13 @@ template chan->pcm16bOffs++; chan->pcm16b[SPUCHAN_PCM16B_AT(chan->pcm16bOffs)] = data; - chan->sampcnt += 1ll << 32; + chan->sampcntInt++; if (FORMAT != 3) TestForLoop(SPU, chan); } - chan->sampcnt = ((chan->sampcnt >> 32) << 32) | (u32)newsampcnt; if(CHANNELS != -1) { - s32 data = Interpolate(chan->pcm16b, chan->pcm16bOffs, (u32)chan->sampcnt); + s32 data = Interpolate(chan->pcm16b, chan->pcm16bOffs, chan->sampcntFrac); SPU_Mix(SPU, chan, data); } } @@ -1438,13 +1443,13 @@ static void SPU_MixAudio_Advanced(bool actuallyMix, SPU_struct *SPU, int length) for (int capchan = 0; capchan < 2; capchan++) { + SPU_struct::REGS::CAP& cap = SPU->regs.cap[capchan]; + channel_struct& srcChan = SPU->channels[1 + 2 * capchan]; if (SPU->regs.cap[capchan].runtime.running) { - SPU_struct::REGS::CAP& cap = SPU->regs.cap[capchan]; - u32 last = cap.runtime.sampcnt >> 32; - cap.runtime.sampcnt += SPU->channels[1+2*capchan].sampinc; - u32 curr = cap.runtime.sampcnt >> 32; - for (u32 j = last; j < curr; j++) + u32 nSamplesToProcess = srcChan.sampincInt + AddAndReturnCarry(&cap.runtime.sampcntFrac, srcChan.sampincFrac); + cap.runtime.sampcntInt += nSamplesToProcess; + while(nSamplesToProcess--) { //so, this is a little strange. why go through a fifo? //it seems that some games will set up a reverb effect by capturing @@ -1495,7 +1500,7 @@ static void SPU_MixAudio_Advanced(bool actuallyMix, SPU_struct *SPU, int length) if (cap.runtime.curdad >= cap.runtime.maxdad) { cap.runtime.curdad = cap.dad; - cap.runtime.sampcnt -= cap.len*multiplier * (1ull<<32); + cap.runtime.sampcntInt -= cap.len*multiplier; } } //sampinc loop } //if capchan running @@ -1555,14 +1560,14 @@ static void SPU_MixAudio(bool actuallyMix, SPU_struct *SPU, int length) for (int capchan = 0; capchan < 2; capchan++) { SPU_struct::REGS::CAP& cap = SPU->regs.cap[capchan]; + channel_struct& srcChan = SPU->channels[1 + 2 * capchan]; if (cap.runtime.running) { for (int samp = 0; samp < length; samp++) { - u32 last = cap.runtime.sampcnt >> 32; - cap.runtime.sampcnt += SPU->channels[1+2*capchan].sampinc; - u32 curr = cap.runtime.sampcnt >> 32; - for (u32 j = last; j < curr; j++) + u32 nSamplesToProcess = srcChan.sampincInt + AddAndReturnCarry(&cap.runtime.sampcntFrac, srcChan.sampincFrac); + cap.runtime.sampcntInt += nSamplesToProcess; + while (nSamplesToProcess--) { if (cap.bits8) { @@ -1578,7 +1583,7 @@ static void SPU_MixAudio(bool actuallyMix, SPU_struct *SPU, int length) if (cap.runtime.curdad >= cap.runtime.maxdad) { cap.runtime.curdad = cap.dad; - cap.runtime.sampcnt -= cap.len*(cap.bits8?4:2) * (1ull<<32); + cap.runtime.sampcntInt -= cap.len*(cap.bits8?4:2); } } } @@ -1929,8 +1934,10 @@ void spu_savestate(EMUFILE &os) os.write_16LE(chan.timer); os.write_16LE(chan.loopstart); os.write_32LE(chan.length); - os.write_64LE(chan.sampcnt); - os.write_64LE(chan.sampinc); + os.write_32LE(chan.sampcntFrac); + os.write_32LE(chan.sampcntInt); + os.write_32LE(chan.sampincFrac); + os.write_32LE(chan.sampincInt); for (int i = 0; i < SPUINTERPOLATION_TAPS; i++) os.write_16LE(chan.pcm16b[i]); os.write_32LE(chan.index); os.write_16LE(chan.x); @@ -1959,7 +1966,8 @@ void spu_savestate(EMUFILE &os) os.write_u8(spu->regs.cap[i].runtime.running); os.write_32LE(spu->regs.cap[i].runtime.curdad); os.write_32LE(spu->regs.cap[i].runtime.maxdad); - os.write_64LE(spu->regs.cap[i].runtime.sampcnt); + os.write_32LE(spu->regs.cap[i].runtime.sampcntFrac); + os.write_32LE(spu->regs.cap[i].runtime.sampcntInt); } for (int i = 0; i < 2; i++) @@ -2001,14 +2009,21 @@ bool spu_loadstate(EMUFILE &is, int size) chan.totlength = chan.length + chan.loopstart; chan.totlength_shifted = chan.totlength << format_shift[chan.format]; if(version >= 7) { - is.read_64LE(chan.sampcnt); - is.read_64LE(chan.sampinc); + is.read_32LE(chan.sampcntFrac); + is.read_32LE(chan.sampcntInt); + is.read_32LE(chan.sampincFrac); + is.read_32LE(chan.sampincInt); } else if (version >= 2) { double temp; - is.read_doubleLE(temp); chan.sampcnt = (s64)(temp * (1ll<<32)); - is.read_doubleLE(temp); chan.sampinc = (s64)(temp * (1ll<<32)); + s64 temp2; + is.read_doubleLE(temp); temp2 = (s64)(temp * (1ll << 32)); + chan.sampcntFrac = (u32)temp2; + chan.sampcntInt = (s32)(temp2 >> 32); + is.read_doubleLE(temp); temp2 = (u64)(temp * (1ull << 32)); // Intentionally unsigned + chan.sampincFrac = (u32)temp2; + chan.sampincInt = (u32)(temp2 >> 32); } else { @@ -2016,8 +2031,10 @@ bool spu_loadstate(EMUFILE &is, int size) // What even is supposed to be happening here? // sampcnt and sampinc were double type before // I even made any changes, so this is broken. - is.read_32LE(*(u32 *)&chan.sampcnt); - is.read_32LE(*(u32 *)&chan.sampinc); + chan.sampcntFrac = 0; + is.read_32LE(chan.sampcntInt); + chan.sampincFrac = 0; + is.read_32LE(chan.sampincInt); } if (version >= 7) { for (int i = 0; i < SPUINTERPOLATION_TAPS; i++) is.read_16LE(chan.pcm16b[i]); @@ -2070,12 +2087,16 @@ bool spu_loadstate(EMUFILE &is, int size) is.read_32LE(spu->regs.cap[i].runtime.curdad); is.read_32LE(spu->regs.cap[i].runtime.maxdad); if (version >= 7) { - is.read_64LE(spu->regs.cap[i].runtime.sampcnt); + is.read_32LE(spu->regs.cap[i].runtime.sampcntFrac); + is.read_32LE(spu->regs.cap[i].runtime.sampcntInt); } else { double temp; - is.read_doubleLE(temp); spu->regs.cap[i].runtime.sampcnt = temp * (1ull << 32); + u64 temp2; + is.read_doubleLE(temp); temp2 = (u64)(temp * (1ull << 32)); + spu->regs.cap[i].runtime.sampcntFrac = (u32)temp2; + spu->regs.cap[i].runtime.sampcntInt = (u32)(temp2 >> 32); } } } diff --git a/desmume/src/SPU.h b/desmume/src/SPU.h index 4951654a4..5ba6d1c96 100644 --- a/desmume/src/SPU.h +++ b/desmume/src/SPU.h @@ -91,8 +91,10 @@ struct channel_struct length(0), totlength(0), totlength_shifted(0), - sampcnt(0), - sampinc(0), + sampcntFrac(0), + sampcntInt(0), + sampincFrac(0), + sampincInt(0), loop_pcm16b(0), index(0), loop_index(0), @@ -114,9 +116,11 @@ struct channel_struct u16 loopstart; u32 length; u32 totlength; - s64 totlength_shifted; - s64 sampcnt; // .32fxp - s64 sampinc; // .32fxp + s32 totlength_shifted; + u32 sampcntFrac; + s32 sampcntInt; + u32 sampincFrac; + u32 sampincInt; s16 pcm16b[SPUINTERPOLATION_TAPS]; // ADPCM specific s16 loop_pcm16b; @@ -193,7 +197,8 @@ public: u8 running; u32 curdad; u32 maxdad; - s64 sampcnt; + u32 sampcntFrac; + u32 sampcntInt; SPUFifo fifo; } runtime; } cap[2]; diff --git a/desmume/src/frontend/windows/soundView.cpp b/desmume/src/frontend/windows/soundView.cpp index 085a44707..786b789a1 100644 --- a/desmume/src/frontend/windows/soundView.cpp +++ b/desmume/src/frontend/windows/soundView.cpp @@ -195,7 +195,7 @@ void SoundView_Refresh(bool forceRedraw) sprintf(buf, "$%04X (%.1f Hz)", thischan.timer, (ARM7_CLOCK/2) / (double)(0x10000 - thischan.timer)); SetDlgItemText(hDlg, IDC_SOUND0TMR+chanId, buf); - sprintf(buf, "samp #%d / #%d", (s32)(thischan.sampcnt >> 32), thischan.totlength << format_shift[thischan.format]); + sprintf(buf, "samp #%d / #%d", thischan.sampcntInt, thischan.totlength_shifted); SetDlgItemText(hDlg, IDC_SOUND0POSLEN+chanId, buf); } else { From aa25e1dd54eff8bc1e701dcc7294b7280b51871f Mon Sep 17 00:00:00 2001 From: Aikku93 Date: Mon, 13 Jun 2022 15:14:54 +1000 Subject: [PATCH 6/7] tighter bounds on interpolation accuracy --- desmume/src/SPU.cpp | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/desmume/src/SPU.cpp b/desmume/src/SPU.cpp index 46b92758e..7bca2ae15 100644 --- a/desmume/src/SPU.cpp +++ b/desmume/src/SPU.cpp @@ -105,8 +105,8 @@ static const u16 adpcmtbl[89] = static s32 precalcdifftbl[89][16]; static u8 precalcindextbl[89][8]; -static s16 catmullrom_lut[CATMULLROM_INTERPOLATION_RESOLUTION][4]; -static s16 cos_lut[COSINE_INTERPOLATION_RESOLUTION]; +static u16 catmullrom_lut[CATMULLROM_INTERPOLATION_RESOLUTION][4]; +static u16 cos_lut[COSINE_INTERPOLATION_RESOLUTION]; static const double ARM7_CLOCK = 33513982; @@ -215,13 +215,13 @@ int SPU_Init(int coreid, int newBufferSizeBytes) double b = x*x*(3*x - 5) + 2; double c = x*(x*(-3*x + 4) + 1); double d = x*x*(x - 1); - catmullrom_lut[i][0] = (s16)(32767 * 0.5*a); - catmullrom_lut[i][1] = (s16)(32767 * 0.5*b); - catmullrom_lut[i][2] = (s16)(32767 * 0.5*c); - catmullrom_lut[i][3] = (s16)(32767 * 0.5*d); + catmullrom_lut[i][0] = (u16)(65535 * -0.5*a); + catmullrom_lut[i][1] = (u16)(65535 * 0.5*b); + catmullrom_lut[i][2] = (u16)(65535 * 0.5*c); + catmullrom_lut[i][3] = (u16)(65535 * -0.5*d); } for (size_t i = 0; i < COSINE_INTERPOLATION_RESOLUTION; i++) - cos_lut[i] = (s16)(32767 * ((1.0 - cos(((double)i/(double)COSINE_INTERPOLATION_RESOLUTION) * M_PI)) * 0.5)); + cos_lut[i] = (u16)((1u<<16) * ((1.0 - cos(((double)i/(double)COSINE_INTERPOLATION_RESOLUTION) * M_PI)) * 0.5)); SPU_core = new SPU_struct((int)ceil(samples_per_hline)); SPU_Reset(); @@ -1050,13 +1050,13 @@ template static FORCEINLINE s32 Interpola case SPUInterpolation_CatmullRom: { // Catmull-Rom spline - // Delay: 2 samples + // Delay: 2 samples, Maximum gain: 1.25 s32 a = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 3)]; s32 b = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 2)]; s32 c = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 1)]; s32 d = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 0)]; - const s16 *w = catmullrom_lut[subPos >> (32 - CATMULLROM_INTERPOLATION_RESOLUTION_BITS)]; - return (a*w[0] + b*w[1] + c*w[2] + d*w[3]) >> 15; + const u16 *w = catmullrom_lut[subPos >> (32 - CATMULLROM_INTERPOLATION_RESOLUTION_BITS)]; + return (-a*(s32)w[0] + b*(s32)w[1] + c*(s32)w[2] - d*(s32)w[3]) >> 16; } case SPUInterpolation_Cosine: @@ -1064,25 +1064,26 @@ template static FORCEINLINE s32 Interpola // Cosine Interpolation Formula: // ratio2 = (1 - cos(ratio * M_PI)) / 2 // sampleI = sampleA * (1 - ratio2) + sampleB * ratio2 - // Delay: 1 sample + // Delay: 1 sample, Maximum gain: 1.0 s32 a = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 1)]; s32 b = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 0)]; - return a + ((b - a)*cos_lut[subPos >> (32 - COSINE_INTERPOLATION_RESOLUTION_BITS)] >> 15); + s32 subPos16 = (s32)cos_lut[subPos >> (32 - COSINE_INTERPOLATION_RESOLUTION_BITS)]; + return a + ((b - a)*subPos16 >> 16); } case SPUInterpolation_Linear: { // Linear Interpolation Formula: // sampleI = sampleA * (1 - ratio) + sampleB * ratio - // Delay: 1 sample + // Delay: 1 sample, Maximum gain: 1.0 s32 a = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 1)]; s32 b = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 0)]; - s32 subPos15 = subPos >> (32 - 15); - return a + ((b - a)*subPos15 >> 15); + s32 subPos16 = subPos >> (32 - 16); + return a + ((b - a)*subPos16 >> 16); } default: - // Delay: 0 samples + // Delay: 0 samples, Maximum gain: 1.0 return pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs)]; } } From 09f7ab13c73b1f7c3aceb5e86a3a154cec0c858b Mon Sep 17 00:00:00 2001 From: Aikku93 Date: Mon, 13 Jun 2022 16:44:24 +1000 Subject: [PATCH 7/7] fix overflow problems Catmull-Rom can give outputs greater than 16bit, so we must use 15bit precision. Also, ensure to use floor() to force a round-down regardless of host rounding behaviour. --- desmume/src/SPU.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/desmume/src/SPU.cpp b/desmume/src/SPU.cpp index 7bca2ae15..4045be3c6 100644 --- a/desmume/src/SPU.cpp +++ b/desmume/src/SPU.cpp @@ -215,13 +215,13 @@ int SPU_Init(int coreid, int newBufferSizeBytes) double b = x*x*(3*x - 5) + 2; double c = x*(x*(-3*x + 4) + 1); double d = x*x*(x - 1); - catmullrom_lut[i][0] = (u16)(65535 * -0.5*a); - catmullrom_lut[i][1] = (u16)(65535 * 0.5*b); - catmullrom_lut[i][2] = (u16)(65535 * 0.5*c); - catmullrom_lut[i][3] = (u16)(65535 * -0.5*d); + catmullrom_lut[i][0] = (u16)floor((1u<<15) * -0.5*a); + catmullrom_lut[i][1] = (u16)floor((1u<<15) * 0.5*b); + catmullrom_lut[i][2] = (u16)floor((1u<<15) * 0.5*c); + catmullrom_lut[i][3] = (u16)floor((1u<<15) * -0.5*d); } for (size_t i = 0; i < COSINE_INTERPOLATION_RESOLUTION; i++) - cos_lut[i] = (u16)((1u<<16) * ((1.0 - cos(((double)i/(double)COSINE_INTERPOLATION_RESOLUTION) * M_PI)) * 0.5)); + cos_lut[i] = (u16)floor((1u<<16) * ((1.0 - cos(((double)i/(double)COSINE_INTERPOLATION_RESOLUTION) * M_PI)) * 0.5)); SPU_core = new SPU_struct((int)ceil(samples_per_hline)); SPU_Reset(); @@ -1056,7 +1056,7 @@ template static FORCEINLINE s32 Interpola s32 c = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 1)]; s32 d = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 0)]; const u16 *w = catmullrom_lut[subPos >> (32 - CATMULLROM_INTERPOLATION_RESOLUTION_BITS)]; - return (-a*(s32)w[0] + b*(s32)w[1] + c*(s32)w[2] - d*(s32)w[3]) >> 16; + return (-a*(s32)w[0] + b*(s32)w[1] + c*(s32)w[2] - d*(s32)w[3]) >> 15; } case SPUInterpolation_Cosine: