diff --git a/desmume/src/SPU.cpp b/desmume/src/SPU.cpp index d113e7f97..4045be3c6 100644 --- a/desmume/src/SPU.cpp +++ b/desmume/src/SPU.cpp @@ -49,7 +49,14 @@ static inline u8 read08(u32 addr) { return _MMU_read08 static inline s8 read_s8(u32 addr) { return (s8)_MMU_read08(addr); } #define K_ADPCM_LOOPING_RECOVERY_INDEX 99999 -#define COSINE_INTERPOLATION_RESOLUTION 8192 + +#define CATMULLROM_INTERPOLATION_RESOLUTION_BITS 11 +#define CATMULLROM_INTERPOLATION_RESOLUTION (1< +static FORCEINLINE T AddAndReturnCarry(T *a, T b) { + T c = (*a >= (-b)); + *a += b; + return c; +} + //--------------external spu interface--------------- int SPU_ChangeSoundCore(int coreid, int newBufferSizeBytes) @@ -200,9 +205,23 @@ void SPU_ReInit(bool fakeBoot) int SPU_Init(int coreid, int newBufferSizeBytes) { - // Build the cosine interpolation LUT + // Build the interpolation LUTs + for (size_t i = 0; i < CATMULLROM_INTERPOLATION_RESOLUTION; i++) { + // This is the Catmull-Rom spline, refactored into a FIR filter + // If we wanted to, we could stick entirely to integer maths + // here, but I doubt it's worth the hassle. + double x = i / (double)CATMULLROM_INTERPOLATION_RESOLUTION; + double a = x*(x*(-x + 2) - 1); + double b = x*x*(3*x - 5) + 2; + double c = x*(x*(-3*x + 4) + 1); + double d = x*x*(x - 1); + catmullrom_lut[i][0] = (u16)floor((1u<<15) * -0.5*a); + catmullrom_lut[i][1] = (u16)floor((1u<<15) * 0.5*b); + catmullrom_lut[i][2] = (u16)floor((1u<<15) * 0.5*c); + catmullrom_lut[i][3] = (u16)floor((1u<<15) * -0.5*d); + } for (size_t i = 0; i < COSINE_INTERPOLATION_RESOLUTION; i++) - cos_lut[i] = (1.0 - cos(((double)i/(double)COSINE_INTERPOLATION_RESOLUTION) * M_PI)) * 0.5; + cos_lut[i] = (u16)floor((1u<<16) * ((1.0 - cos(((double)i/(double)COSINE_INTERPOLATION_RESOLUTION) * M_PI)) * 0.5)); SPU_core = new SPU_struct((int)ceil(samples_per_hline)); SPU_Reset(); @@ -366,7 +385,11 @@ void SPU_struct::ShutUp() static FORCEINLINE void adjust_channel_timer(channel_struct *chan) { - chan->sampinc = (((double)ARM7_CLOCK) / (DESMUME_SAMPLE_RATE * 2)) / (double)(0x10000 - chan->timer); + // ARM7_CLOCK / (DESMUME_SAMPLE_RATE*2) / (2^16 - Timer) + // = ARM7_CLOCK / (DESMUME_SAMPLE_RATE*2 * (2^16 - Timer)) + // ... and then round up for good measure + u64 sampinc = ((u32)ARM7_CLOCK*(1ull << 32) - 1) / (DESMUME_SAMPLE_RATE * 2ull * (0x10000 - chan->timer)) + 1; + chan->sampincInt = (u32)(sampinc >> 32), chan->sampincFrac = (u32)sampinc; } void SPU_struct::KeyProbe(int chan_num) @@ -399,6 +422,12 @@ void SPU_struct::KeyOn(int channel) thischan.totlength = thischan.length + thischan.loopstart; adjust_channel_timer(&thischan); + thischan.pcm16bOffs = 0; + for(int i=0;i> 0; case 0x505: return regs.soundbias >> 8; - + //SNDCAP0CNT/SNDCAP1CNT case 0x508: case 0x509: @@ -742,7 +765,7 @@ void SPU_struct::ProbeCapture(int which) u32 len = cap.len; if(len==0) len=1; cap.runtime.maxdad = cap.dad + len*4; - cap.runtime.sampcnt = 0; + cap.runtime.sampcntFrac = cap.runtime.sampcntInt = 0; cap.runtime.fifo.reset(); } @@ -1018,161 +1041,109 @@ void SPU_struct::WriteLong(u32 addr, u32 val) } //switch on address } -template static FORCEINLINE s32 Interpolate(s32 a, s32 b, double ratio) +////////////////////////////////////////////////////////////////////////////// + +template static FORCEINLINE s32 Interpolate(const s16 *pcm16b, u8 pcm16bOffs, u32 subPos) { - double sampleA = (double)a; - double sampleB = (double)b; - ratio = ratio - sputrunc(ratio); - switch (INTERPOLATE_MODE) { + case SPUInterpolation_CatmullRom: + { + // Catmull-Rom spline + // Delay: 2 samples, Maximum gain: 1.25 + s32 a = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 3)]; + s32 b = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 2)]; + s32 c = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 1)]; + s32 d = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 0)]; + const u16 *w = catmullrom_lut[subPos >> (32 - CATMULLROM_INTERPOLATION_RESOLUTION_BITS)]; + return (-a*(s32)w[0] + b*(s32)w[1] + c*(s32)w[2] - d*(s32)w[3]) >> 15; + } + case SPUInterpolation_Cosine: + { // Cosine Interpolation Formula: // ratio2 = (1 - cos(ratio * M_PI)) / 2 // sampleI = sampleA * (1 - ratio2) + sampleB * ratio2 - return s32floor((cos_lut[(unsigned int)(ratio * (double)COSINE_INTERPOLATION_RESOLUTION)] * (sampleB - sampleA)) + sampleA); - break; - + // Delay: 1 sample, Maximum gain: 1.0 + s32 a = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 1)]; + s32 b = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 0)]; + s32 subPos16 = (s32)cos_lut[subPos >> (32 - COSINE_INTERPOLATION_RESOLUTION_BITS)]; + return a + ((b - a)*subPos16 >> 16); + } + case SPUInterpolation_Linear: + { // Linear Interpolation Formula: // sampleI = sampleA * (1 - ratio) + sampleB * ratio - return s32floor((ratio * (sampleB - sampleA)) + sampleA); - break; - + // Delay: 1 sample, Maximum gain: 1.0 + s32 a = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 1)]; + s32 b = pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs - 0)]; + s32 subPos16 = subPos >> (32 - 16); + return a + ((b - a)*subPos16 >> 16); + } + default: - break; + // Delay: 0 samples, Maximum gain: 1.0 + return pcm16b[SPUCHAN_PCM16B_AT(pcm16bOffs)]; + } +} + +static FORCEINLINE s32 Fetch8BitData(channel_struct *chan, s32 pos) +{ + if(pos < 0) return 0; + + return read_s8(chan->addr + pos*1) << 8; +} + +static FORCEINLINE s32 Fetch16BitData(channel_struct *chan, s32 pos) +{ + if(pos < 0) return 0; + + return read16(chan->addr + pos*2); +} + +static FORCEINLINE s32 FetchADPCMData(channel_struct *chan, s32 pos) +{ + if(pos < 8) return 0; + + s16 last = chan->pcm16b[SPUCHAN_PCM16B_AT(chan->pcm16bOffs)]; + + if(pos == (chan->loopstart<<3)) { + //if(chan->loop_index != K_ADPCM_LOOPING_RECOVERY_INDEX) printf("over-snagging\n"); + chan->loop_pcm16b = last; + chan->loop_index = chan->index; } - return a; + const u32 shift = (pos&1) * 4; + const u32 data4bit = ((u32)read08(chan->addr + (pos>>1))) >> shift; + const s32 diff = precalcdifftbl [chan->index][data4bit & 0xF]; + chan->index = precalcindextbl[chan->index][data4bit & 0x7]; + + return MinMax(last + diff, -0x8000, 0x7FFF); } -////////////////////////////////////////////////////////////////////////////// - -template static FORCEINLINE void Fetch8BitData(channel_struct *chan, s32 *data) +static FORCEINLINE s32 FetchPSGData(channel_struct *chan, s32 pos) { - if (chan->sampcnt < 0) - { - *data = 0; - return; - } + if(pos < 0 || chan->num < 8) return 0; - u32 loc = sputrunc(chan->sampcnt); - if(INTERPOLATE_MODE != SPUInterpolation_None) + // Chan 8..13: Square wave, Chan 14..15: Noise + if(chan->num < 14) { - s32 a = (s32)(read_s8(chan->addr + loc) << 8), b = a; - if(loc < (chan->totlength << 2) - 1) - b = (s32)(read_s8(chan->addr + loc + 1) << 8); - else if(chan->repeat == 1) - b = (s32)(read_s8(chan->addr + chan->loopstart*4) << 8); - *data = Interpolate(a, b, chan->sampcnt); + // Doing this avoids using a LUT + return ((pos%8u) > chan->waveduty) ? (-0x7FFF) : (+0x7FFF); } else - *data = (s32)read_s8(chan->addr + loc)<< 8; -} - -template static FORCEINLINE void Fetch16BitData(const channel_struct * const chan, s32 *data) -{ - if (chan->sampcnt < 0) { - *data = 0; - return; - } - - u32 loc = sputrunc(chan->sampcnt); - if(INTERPOLATE_MODE != SPUInterpolation_None) - { - s32 a = (s32)read16(loc*2 + chan->addr), b = a; - if(loc < (chan->totlength << 1) - 1) - b = (s32)read16(chan->addr + loc*2 + 2); - else if(chan->repeat == 1) - b = (s32)read16(chan->addr + chan->loopstart*2); - *data = Interpolate(a, b, chan->sampcnt); - } - else - *data = read16(chan->addr + loc*2); -} - -template static FORCEINLINE void FetchADPCMData(channel_struct * const chan, s32 * const data) -{ - if (chan->sampcnt < 8) - { - *data = 0; - return; - } - - // No sense decoding, just return the last sample - if (chan->lastsampcnt != sputrunc(chan->sampcnt)){ - - const u32 endExclusive = sputrunc(chan->sampcnt+1); - for (u32 i = chan->lastsampcnt+1; i < endExclusive; i++) + if(chan->x & 0x1) { - const u32 shift = (i&1)<<2; - const u32 data4bit = ((u32)read08(chan->addr + (i>>1))) >> shift; - - const s32 diff = precalcdifftbl[chan->index][data4bit & 0xF]; - chan->index = precalcindextbl[chan->index][data4bit & 0x7]; - - chan->pcm16b_last = chan->pcm16b; - chan->pcm16b = MinMax(chan->pcm16b+diff, -0x8000, 0x7FFF); - - if(i == (chan->loopstart<<3)) { - if(chan->loop_index != K_ADPCM_LOOPING_RECOVERY_INDEX) printf("over-snagging\n"); - chan->loop_pcm16b = chan->pcm16b; - chan->loop_index = chan->index; - } + chan->x = (chan->x >> 1) ^ 0x6000; + return -0x7FFF; } - - chan->lastsampcnt = sputrunc(chan->sampcnt); - } - - if(INTERPOLATE_MODE != SPUInterpolation_None) - *data = Interpolate((s32)chan->pcm16b_last,(s32)chan->pcm16b,chan->sampcnt); - else - *data = (s32)chan->pcm16b; -} - -static FORCEINLINE void FetchPSGData(channel_struct *chan, s32 *data) -{ - if (chan->sampcnt < 0) - { - *data = 0; - return; - } - - if(chan->num < 8) - { - *data = 0; - } - else if(chan->num < 14) - { - *data = (s32)wavedutytbl[chan->waveduty][(sputrunc(chan->sampcnt)) & 0x7]; - } - else - { - if(chan->lastsampcnt == sputrunc(chan->sampcnt)) + else { - *data = (s32)chan->psgnoise_last; - return; + chan->x >>= 1; + return +0x7FFF; } - - u32 max = sputrunc(chan->sampcnt); - for(u32 i = chan->lastsampcnt; i < max; i++) - { - if(chan->x & 0x1) - { - chan->x = (chan->x >> 1) ^ 0x6000; - chan->psgnoise_last = -0x7FFF; - } - else - { - chan->x >>= 1; - chan->psgnoise_last = 0x7FFF; - } - } - - chan->lastsampcnt = sputrunc(chan->sampcnt); - - *data = (s32)chan->psgnoise_last; } } @@ -1201,66 +1172,42 @@ static FORCEINLINE void MixLR(SPU_struct* SPU, channel_struct *chan, s32 data) template static FORCEINLINE void TestForLoop(SPU_struct *SPU, channel_struct *chan) { - const int shift = (FORMAT == 0 ? 2 : 1); + // Do nothing if we haven't reached the end + if(chan->sampcntInt < chan->totlength_shifted) return; - chan->sampcnt += chan->sampinc; - - if (chan->sampcnt > chan->double_totlength_shifted) + // Kill the channel if we don't repeat + if(chan->repeat != 1) { - // Do we loop? Or are we done? - if (chan->repeat == 1) + SPU->KeyOff(chan->num); + SPU->bufpos = SPU->buflength; + return; + } + + // ADPCM needs special handling + if(FORMAT == 2) + { + // Minimum length (the sum of PNT+LEN) is 4 words (16 bytes), + // smaller values (0..3 words) are causing hang-ups + // (busy bit remains set infinite, but no sound output occurs). + // fix: 7th Dragon (JP) - http://sourceforge.net/p/desmume/bugs/1357/ + if (chan->totlength < 4) return; + + // Stash loop sample and index + if(chan->loop_index == K_ADPCM_LOOPING_RECOVERY_INDEX) { - while (chan->sampcnt > chan->double_totlength_shifted) - chan->sampcnt -= chan->double_totlength_shifted - (double)(chan->loopstart << shift); - //chan->sampcnt = (double)(chan->loopstart << shift); + chan->pcm16b[SPUCHAN_PCM16B_AT(chan->pcm16bOffs)] = (s16)read16(chan->addr); + chan->index = read08(chan->addr+2) & 0x7F; } else { - SPU->KeyOff(chan->num); - SPU->bufpos = SPU->buflength; + chan->pcm16b[SPUCHAN_PCM16B_AT(chan->pcm16bOffs)] = chan->loop_pcm16b; + chan->index = chan->loop_index; } } -} -static FORCEINLINE void TestForLoop2(SPU_struct *SPU, channel_struct *chan) -{ - // Minimum length (the sum of PNT+LEN) is 4 words (16 bytes), - // smaller values (0..3 words) are causing hang-ups - // (busy bit remains set infinite, but no sound output occurs). - // fix: 7th Dragon (JP) - http://sourceforge.net/p/desmume/bugs/1357/ - if (chan->totlength < 4) return; - - chan->sampcnt += chan->sampinc; - - if (chan->sampcnt > chan->double_totlength_shifted) - { - // Do we loop? Or are we done? - if (chan->repeat == 1) - { - double step = (chan->double_totlength_shifted - (double)(chan->loopstart << 3)); - - while (chan->sampcnt > chan->double_totlength_shifted) chan->sampcnt -= step; - - if(chan->loop_index == K_ADPCM_LOOPING_RECOVERY_INDEX) - { - chan->pcm16b = (s16)read16(chan->addr); - chan->index = read08(chan->addr+2) & 0x7F; - chan->lastsampcnt = 7; - } - else - { - chan->pcm16b = chan->loop_pcm16b; - chan->index = chan->loop_index; - chan->lastsampcnt = (chan->loopstart << 3); - } - } - else - { - chan->status = CHANSTAT_STOPPED; - SPU->KeyOff(chan->num); - SPU->bufpos = SPU->buflength; - } - } + // Wrap sampcnt + u32 step = chan->totlength_shifted - (chan->loopstart << format_shift[FORMAT]); + while (chan->sampcntInt >= chan->totlength_shifted) chan->sampcntInt -= step; } template FORCEINLINE static void SPU_Mix(SPU_struct* SPU, channel_struct *chan, s32 data) @@ -1281,25 +1228,32 @@ template { for (; SPU->bufpos < SPU->buflength; SPU->bufpos++) { - if(CHANNELS != -1) + // Advance sampcnt one sample at a time. This is + // needed to keep pcm16b[] filled for interpolation. + u32 nSamplesToSkip = chan->sampincInt + AddAndReturnCarry(&chan->sampcntFrac, chan->sampincFrac); + while(nSamplesToSkip--) { - s32 data; + s16 data = 0; + s32 pos = chan->sampcntInt; switch(FORMAT) { - case 0: Fetch8BitData(chan, &data); break; - case 1: Fetch16BitData(chan, &data); break; - case 2: FetchADPCMData(chan, &data); break; - case 3: FetchPSGData(chan, &data); break; + case 0: data = Fetch8BitData (chan, pos); break; + case 1: data = Fetch16BitData(chan, pos); break; + case 2: data = FetchADPCMData(chan, pos); break; + case 3: data = FetchPSGData (chan, pos); break; default: break; } - SPU_Mix(SPU, chan, data); + chan->pcm16bOffs++; + chan->pcm16b[SPUCHAN_PCM16B_AT(chan->pcm16bOffs)] = data; + + chan->sampcntInt++; + if (FORMAT != 3) TestForLoop(SPU, chan); } - switch(FORMAT) { - case 0: case 1: TestForLoop(SPU, chan); break; - case 2: TestForLoop2(SPU, chan); break; - case 3: chan->sampcnt += chan->sampinc; break; - default: break; + if(CHANNELS != -1) + { + s32 data = Interpolate(chan->pcm16b, chan->pcm16bOffs, chan->sampcntFrac); + SPU_Mix(SPU, chan, data); } } } @@ -1320,12 +1274,14 @@ template template FORCEINLINE static void __SPU_ChanUpdate(const bool actuallyMix, SPU_struct* const SPU, channel_struct* const chan) { + // NOTE: PSG doesn't use interpolation, or it would try to + // interpolate between the raw sample points (very bad) switch(chan->format) { case 0: ___SPU_ChanUpdate<0,INTERPOLATE_MODE>(actuallyMix, SPU, chan); break; case 1: ___SPU_ChanUpdate<1,INTERPOLATE_MODE>(actuallyMix, SPU, chan); break; case 2: ___SPU_ChanUpdate<2,INTERPOLATE_MODE>(actuallyMix, SPU, chan); break; - case 3: ___SPU_ChanUpdate<3,INTERPOLATE_MODE>(actuallyMix, SPU, chan); break; + case 3: ___SPU_ChanUpdate<3,SPUInterpolation_None>(actuallyMix, SPU, chan); break; default: assert(false); } } @@ -1334,9 +1290,10 @@ FORCEINLINE static void _SPU_ChanUpdate(const bool actuallyMix, SPU_struct* cons { switch(CommonSettings.spuInterpolationMode) { - case SPUInterpolation_None: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; - case SPUInterpolation_Linear: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; - case SPUInterpolation_Cosine: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; + case SPUInterpolation_None: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; + case SPUInterpolation_Linear: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; + case SPUInterpolation_Cosine: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; + case SPUInterpolation_CatmullRom: __SPU_ChanUpdate(actuallyMix, SPU, chan); break; default: assert(false); } } @@ -1487,13 +1444,13 @@ static void SPU_MixAudio_Advanced(bool actuallyMix, SPU_struct *SPU, int length) for (int capchan = 0; capchan < 2; capchan++) { + SPU_struct::REGS::CAP& cap = SPU->regs.cap[capchan]; + channel_struct& srcChan = SPU->channels[1 + 2 * capchan]; if (SPU->regs.cap[capchan].runtime.running) { - SPU_struct::REGS::CAP& cap = SPU->regs.cap[capchan]; - u32 last = sputrunc(cap.runtime.sampcnt); - cap.runtime.sampcnt += SPU->channels[1+2*capchan].sampinc; - u32 curr = sputrunc(cap.runtime.sampcnt); - for (u32 j = last; j < curr; j++) + u32 nSamplesToProcess = srcChan.sampincInt + AddAndReturnCarry(&cap.runtime.sampcntFrac, srcChan.sampincFrac); + cap.runtime.sampcntInt += nSamplesToProcess; + while(nSamplesToProcess--) { //so, this is a little strange. why go through a fifo? //it seems that some games will set up a reverb effect by capturing @@ -1544,7 +1501,7 @@ static void SPU_MixAudio_Advanced(bool actuallyMix, SPU_struct *SPU, int length) if (cap.runtime.curdad >= cap.runtime.maxdad) { cap.runtime.curdad = cap.dad; - cap.runtime.sampcnt -= cap.len*multiplier; + cap.runtime.sampcntInt -= cap.len*multiplier; } } //sampinc loop } //if capchan running @@ -1604,14 +1561,14 @@ static void SPU_MixAudio(bool actuallyMix, SPU_struct *SPU, int length) for (int capchan = 0; capchan < 2; capchan++) { SPU_struct::REGS::CAP& cap = SPU->regs.cap[capchan]; + channel_struct& srcChan = SPU->channels[1 + 2 * capchan]; if (cap.runtime.running) { for (int samp = 0; samp < length; samp++) { - u32 last = sputrunc(cap.runtime.sampcnt); - cap.runtime.sampcnt += SPU->channels[1+2*capchan].sampinc; - u32 curr = sputrunc(cap.runtime.sampcnt); - for (u32 j = last; j < curr; j++) + u32 nSamplesToProcess = srcChan.sampincInt + AddAndReturnCarry(&cap.runtime.sampcntFrac, srcChan.sampincFrac); + cap.runtime.sampcntInt += nSamplesToProcess; + while (nSamplesToProcess--) { if (cap.bits8) { @@ -1627,7 +1584,7 @@ static void SPU_MixAudio(bool actuallyMix, SPU_struct *SPU, int length) if (cap.runtime.curdad >= cap.runtime.maxdad) { cap.runtime.curdad = cap.dad; - cap.runtime.sampcnt -= cap.len*(cap.bits8?4:2); + cap.runtime.sampcntInt -= cap.len*(cap.bits8?4:2); } } } @@ -1733,7 +1690,7 @@ void SPU_Emulate_user(bool mix) postProcessBufferSize = freeSampleCount * 2 * sizeof(s16); postProcessBuffer = (s16 *)realloc(postProcessBuffer, postProcessBufferSize); } - + if (soundProcessor->PostProcessSamples != NULL) { processedSampleCount = soundProcessor->PostProcessSamples(postProcessBuffer, freeSampleCount, _currentSynchMode, _currentSynchronizer); @@ -1957,7 +1914,7 @@ void WAV_WavSoundUpdate(void* soundData, int numSamples, WAVMode mode) void spu_savestate(EMUFILE &os) { //version - os.write_32LE(6); + os.write_32LE(7); SPU_struct *spu = SPU_core; @@ -1973,18 +1930,18 @@ void spu_savestate(EMUFILE &os) os.write_u8(chan.repeat); os.write_u8(chan.format); os.write_u8(chan.status); + os.write_u8(chan.pcm16bOffs); os.write_32LE(chan.addr); os.write_16LE(chan.timer); os.write_16LE(chan.loopstart); os.write_32LE(chan.length); - os.write_doubleLE(chan.sampcnt); - os.write_doubleLE(chan.sampinc); - os.write_32LE(chan.lastsampcnt); - os.write_16LE(chan.pcm16b); - os.write_16LE(chan.pcm16b_last); + os.write_32LE(chan.sampcntFrac); + os.write_32LE(chan.sampcntInt); + os.write_32LE(chan.sampincFrac); + os.write_32LE(chan.sampincInt); + for (int i = 0; i < SPUINTERPOLATION_TAPS; i++) os.write_16LE(chan.pcm16b[i]); os.write_32LE(chan.index); os.write_16LE(chan.x); - os.write_16LE(chan.psgnoise_last); os.write_u8(chan.keyon); } @@ -2010,7 +1967,8 @@ void spu_savestate(EMUFILE &os) os.write_u8(spu->regs.cap[i].runtime.running); os.write_32LE(spu->regs.cap[i].runtime.curdad); os.write_32LE(spu->regs.cap[i].runtime.maxdad); - os.write_doubleLE(spu->regs.cap[i].runtime.sampcnt); + os.write_32LE(spu->regs.cap[i].runtime.sampcntFrac); + os.write_32LE(spu->regs.cap[i].runtime.sampcntInt); } for (int i = 0; i < 2; i++) @@ -2044,29 +2002,53 @@ bool spu_loadstate(EMUFILE &is, int size) is.read_u8(chan.repeat); is.read_u8(chan.format); is.read_u8(chan.status); + if (version >= 7) is.read_u8(chan.pcm16bOffs); else chan.pcm16bOffs = 0; is.read_32LE(chan.addr); is.read_16LE(chan.timer); is.read_16LE(chan.loopstart); is.read_32LE(chan.length); chan.totlength = chan.length + chan.loopstart; - chan.double_totlength_shifted = (double)(chan.totlength << format_shift[chan.format]); - //printf("%f\n",chan.double_totlength_shifted); - if (version >= 2) + chan.totlength_shifted = chan.totlength << format_shift[chan.format]; + if(version >= 7) { + is.read_32LE(chan.sampcntFrac); + is.read_32LE(chan.sampcntInt); + is.read_32LE(chan.sampincFrac); + is.read_32LE(chan.sampincInt); + } + else if (version >= 2) { - is.read_doubleLE(chan.sampcnt); - is.read_doubleLE(chan.sampinc); + double temp; + s64 temp2; + is.read_doubleLE(temp); temp2 = (s64)(temp * (1ll << 32)); + chan.sampcntFrac = (u32)temp2; + chan.sampcntInt = (s32)(temp2 >> 32); + is.read_doubleLE(temp); temp2 = (u64)(temp * (1ull << 32)); // Intentionally unsigned + chan.sampincFrac = (u32)temp2; + chan.sampincInt = (u32)(temp2 >> 32); } else { - is.read_32LE(*(u32 *)&chan.sampcnt); - is.read_32LE(*(u32 *)&chan.sampinc); + // FIXME + // What even is supposed to be happening here? + // sampcnt and sampinc were double type before + // I even made any changes, so this is broken. + chan.sampcntFrac = 0; + is.read_32LE(chan.sampcntInt); + chan.sampincFrac = 0; + is.read_32LE(chan.sampincInt); + } + if (version >= 7) { + for (int i = 0; i < SPUINTERPOLATION_TAPS; i++) is.read_16LE(chan.pcm16b[i]); + } + else + { + is.fseek(4, SEEK_CUR); // chan.lastsampcnt (LE32) + is.read_16LE(chan.pcm16b[0]); // chan.pcm16b + is.fseek(2, SEEK_CUR); // chan.pcm16b_last } - is.read_32LE(chan.lastsampcnt); - is.read_16LE(chan.pcm16b); - is.read_16LE(chan.pcm16b_last); is.read_32LE(chan.index); is.read_16LE(chan.x); - is.read_16LE(chan.psgnoise_last); + if (version < 7) is.fseek(2, SEEK_CUR); // chan.psgnoise_last (LE16) if (version >= 4) is.read_u8(chan.keyon); @@ -2105,7 +2087,18 @@ bool spu_loadstate(EMUFILE &is, int size) is.read_u8(spu->regs.cap[i].runtime.running); is.read_32LE(spu->regs.cap[i].runtime.curdad); is.read_32LE(spu->regs.cap[i].runtime.maxdad); - is.read_doubleLE(spu->regs.cap[i].runtime.sampcnt); + if (version >= 7) { + is.read_32LE(spu->regs.cap[i].runtime.sampcntFrac); + is.read_32LE(spu->regs.cap[i].runtime.sampcntInt); + } + else + { + double temp; + u64 temp2; + is.read_doubleLE(temp); temp2 = (u64)(temp * (1ull << 32)); + spu->regs.cap[i].runtime.sampcntFrac = (u32)temp2; + spu->regs.cap[i].runtime.sampcntInt = (u32)(temp2 >> 32); + } } } diff --git a/desmume/src/SPU.h b/desmume/src/SPU.h index 6883b02ca..5ba6d1c96 100644 --- a/desmume/src/SPU.h +++ b/desmume/src/SPU.h @@ -36,10 +36,9 @@ class EMUFILE; #define CHANSTAT_STOPPED 0 #define CHANSTAT_PLAY 1 +#define SPUINTERPOLATION_TAPS 4 // Must be at least 4 for Catmull-Rom interpolation //who made these static? theyre used in multiple places. -FORCEINLINE u32 sputrunc(float f) { return u32floor(f); } -FORCEINLINE u32 sputrunc(double d) { return u32floor(d); } FORCEINLINE s32 spumuldiv7(s32 val, u8 multiplier) { assert(multiplier <= 127); return (multiplier == 127) ? val : ((val * multiplier) >> 7); @@ -49,7 +48,8 @@ enum SPUInterpolationMode { SPUInterpolation_None = 0, SPUInterpolation_Linear = 1, - SPUInterpolation_Cosine = 2 + SPUInterpolation_Cosine = 2, + SPUInterpolation_CatmullRom = 3, }; struct SoundInterface_struct @@ -84,22 +84,21 @@ struct channel_struct format(0), keyon(0), status(0), + pcm16bOffs(0), addr(0), timer(0), loopstart(0), length(0), totlength(0), - double_totlength_shifted(0.0), - sampcnt(0.0), - sampinc(0.0), - lastsampcnt(0), - pcm16b(0), - pcm16b_last(0), + totlength_shifted(0), + sampcntFrac(0), + sampcntInt(0), + sampincFrac(0), + sampincInt(0), loop_pcm16b(0), index(0), loop_index(0), - x(0), - psgnoise_last(0) + x(0) {} u32 num; u8 vol; @@ -111,22 +110,24 @@ struct channel_struct u8 format; u8 keyon; u8 status; + u8 pcm16bOffs; u32 addr; u16 timer; u16 loopstart; u32 length; u32 totlength; - double double_totlength_shifted; - double sampcnt; - double sampinc; + s32 totlength_shifted; + u32 sampcntFrac; + s32 sampcntInt; + u32 sampincFrac; + u32 sampincInt; + s16 pcm16b[SPUINTERPOLATION_TAPS]; // ADPCM specific - u32 lastsampcnt; - s16 pcm16b, pcm16b_last; s16 loop_pcm16b; s32 index; int loop_index; + // PSG noise u16 x; - s16 psgnoise_last; }; class SPUFifo @@ -196,7 +197,8 @@ public: u8 running; u32 curdad; u32 maxdad; - double sampcnt; + u32 sampcntFrac; + u32 sampcntInt; SPUFifo fifo; } runtime; } cap[2]; diff --git a/desmume/src/frontend/windows/main.cpp b/desmume/src/frontend/windows/main.cpp index b1001929b..5a3cb1608 100644 --- a/desmume/src/frontend/windows/main.cpp +++ b/desmume/src/frontend/windows/main.cpp @@ -6532,6 +6532,7 @@ static LRESULT CALLBACK SoundSettingsDlgProc(HWND hDlg, UINT uMsg, WPARAM wParam SendDlgItemMessage(hDlg, IDC_SPU_INTERPOLATION_CB, CB_ADDSTRING, 0, (LPARAM)"None (harsh, most accurate to NDS)"); SendDlgItemMessage(hDlg, IDC_SPU_INTERPOLATION_CB, CB_ADDSTRING, 0, (LPARAM)"Linear (smooth, most sound detail loss)"); SendDlgItemMessage(hDlg, IDC_SPU_INTERPOLATION_CB, CB_ADDSTRING, 0, (LPARAM)"Cosine (balanced, smooth and accurate)"); + SendDlgItemMessage(hDlg, IDC_SPU_INTERPOLATION_CB, CB_ADDSTRING, 0, (LPARAM)"Catmull-Rom (smooth and bright)"); SendDlgItemMessage(hDlg, IDC_SPU_INTERPOLATION_CB, CB_SETCURSEL, (int)CommonSettings.spuInterpolationMode, 0); // Setup Sound Buffer Size Edit Text diff --git a/desmume/src/frontend/windows/soundView.cpp b/desmume/src/frontend/windows/soundView.cpp index 809fe35f2..786b789a1 100644 --- a/desmume/src/frontend/windows/soundView.cpp +++ b/desmume/src/frontend/windows/soundView.cpp @@ -195,7 +195,7 @@ void SoundView_Refresh(bool forceRedraw) sprintf(buf, "$%04X (%.1f Hz)", thischan.timer, (ARM7_CLOCK/2) / (double)(0x10000 - thischan.timer)); SetDlgItemText(hDlg, IDC_SOUND0TMR+chanId, buf); - sprintf(buf, "samp #%d / #%d", sputrunc(thischan.sampcnt), thischan.totlength << format_shift[thischan.format]); + sprintf(buf, "samp #%d / #%d", thischan.sampcntInt, thischan.totlength_shifted); SetDlgItemText(hDlg, IDC_SOUND0POSLEN+chanId, buf); } else {