diff --git a/plugins/spu2-x/src/Debug.cpp b/plugins/spu2-x/src/Debug.cpp index db3127f586..8c88bcdf02 100644 --- a/plugins/spu2-x/src/Debug.cpp +++ b/plugins/spu2-x/src/Debug.cpp @@ -218,15 +218,15 @@ void DoFullDump() fprintf(dump, " - FB_ALPHA: %x\n", Cores[c].Revb.FB_ALPHA); fprintf(dump, " - FB_X: %x\n", Cores[c].Revb.FB_X); - fprintf(dump, " - FB_SRC_A: %x\n", Cores[c].Revb.FB_SRC_A); - fprintf(dump, " - FB_SRC_B: %x\n", Cores[c].Revb.FB_SRC_B); + fprintf(dump, " - FB_SIZE_A: %x\n", Cores[c].Revb.FB_SIZE_A); + fprintf(dump, " - FB_SIZE_B: %x\n", Cores[c].Revb.FB_SIZE_B); fprintf(dump, " - IIR_ALPHA: %x\n", Cores[c].Revb.IIR_ALPHA); fprintf(dump, " - IIR_COEF: %x\n", Cores[c].Revb.IIR_COEF); fprintf(dump, " - IIR_SRC_A0: %x\n", Cores[c].Revb.IIR_SRC_A0); fprintf(dump, " - IIR_SRC_A1: %x\n", Cores[c].Revb.IIR_SRC_A1); - fprintf(dump, " - IIR_SRC_B1: %x\n", Cores[c].Revb.IIR_SRC_B0); - fprintf(dump, " - IIR_SRC_B0: %x\n", Cores[c].Revb.IIR_SRC_B1); + fprintf(dump, " - IIR_SRC_B0: %x\n", Cores[c].Revb.IIR_SRC_B0); + fprintf(dump, " - IIR_SRC_B1: %x\n", Cores[c].Revb.IIR_SRC_B1); fprintf(dump, " - IIR_DEST_A0: %x\n", Cores[c].Revb.IIR_DEST_A0); fprintf(dump, " - IIR_DEST_A1: %x\n", Cores[c].Revb.IIR_DEST_A1); fprintf(dump, " - IIR_DEST_B0: %x\n", Cores[c].Revb.IIR_DEST_B0); diff --git a/plugins/spu2-x/src/Mixer.cpp b/plugins/spu2-x/src/Mixer.cpp index 19e9abc907..2574457494 100644 --- a/plugins/spu2-x/src/Mixer.cpp +++ b/plugins/spu2-x/src/Mixer.cpp @@ -675,6 +675,7 @@ StereoOut32 V_Core::Mix(const VoiceMixSet &inVoices, const StereoOut32 &Input, c // ToDo: // Bad EndA causes memory corruption. Bad for us, unknown on PS2! + // According to no$psx, effects always run but don't always write back, so the FxEnable check may be wrong if (!FxEnable || EffectsEndA >= 0x100000) return TD; @@ -886,194 +887,3 @@ __forceinline } } } - -///////////////////////////////////////////////////////////////////////////////////////// -///////////////////////////////////////////////////////////////////////////////////////// -// // - -/* ------------------------------------------------------------------------------ -PSX reverb hardware notes -by Neill Corlett ------------------------------------------------------------------------------ - -Yadda yadda disclaimer yadda probably not perfect yadda well it's okay anyway -yadda yadda. - ------------------------------------------------------------------------------ - -Basics ------- - -- The reverb buffer is 22khz 16-bit mono PCM. -- It starts at the reverb address given by 1DA2, extends to - the end of sound RAM, and wraps back to the 1DA2 address. - -Setting the address at 1DA2 resets the current reverb work address. - -This work address ALWAYS increments every 1/22050 sec., regardless of -whether reverb is enabled (bit 7 of 1DAA set). - -And the contents of the reverb buffer ALWAYS play, scaled by the -"reverberation depth left/right" volumes (1D84/1D86). -(which, by the way, appear to be scaled so 3FFF=approx. 1.0, 4000=-1.0) - ------------------------------------------------------------------------------ - -Register names --------------- - -These are probably not their real names. -These are probably not even correct names. -We will use them anyway, because we can. - -1DC0: FB_SRC_A (offset) -1DC2: FB_SRC_B (offset) -1DC4: IIR_ALPHA (coef.) -1DC6: ACC_COEF_A (coef.) -1DC8: ACC_COEF_B (coef.) -1DCA: ACC_COEF_C (coef.) -1DCC: ACC_COEF_D (coef.) -1DCE: IIR_COEF (coef.) -1DD0: FB_ALPHA (coef.) -1DD2: FB_X (coef.) -1DD4: IIR_DEST_A0 (offset) -1DD6: IIR_DEST_A1 (offset) -1DD8: ACC_SRC_A0 (offset) -1DDA: ACC_SRC_A1 (offset) -1DDC: ACC_SRC_B0 (offset) -1DDE: ACC_SRC_B1 (offset) -1DE0: IIR_SRC_A0 (offset) -1DE2: IIR_SRC_A1 (offset) -1DE4: IIR_DEST_B0 (offset) -1DE6: IIR_DEST_B1 (offset) -1DE8: ACC_SRC_C0 (offset) -1DEA: ACC_SRC_C1 (offset) -1DEC: ACC_SRC_D0 (offset) -1DEE: ACC_SRC_D1 (offset) -1DF0: IIR_SRC_B1 (offset) -1DF2: IIR_SRC_B0 (offset) -1DF4: MIX_DEST_A0 (offset) -1DF6: MIX_DEST_A1 (offset) -1DF8: MIX_DEST_B0 (offset) -1DFA: MIX_DEST_B1 (offset) -1DFC: IN_COEF_L (coef.) -1DFE: IN_COEF_R (coef.) - -The coefficients are signed fractional values. --32768 would be -1.0 - 32768 would be 1.0 (if it were possible... the highest is of course 32767) - -The offsets are (byte/8) offsets into the reverb buffer. -i.e. you multiply them by 8, you get byte offsets. -You can also think of them as (samples/4) offsets. -They appear to be signed. They can be negative. -None of the documented presets make them negative, though. - -Yes, 1DF0 and 1DF2 appear to be backwards. Not a typo. - ------------------------------------------------------------------------------ - -What it does ------------- - -We take all reverb sources: -- regular channels that have the reverb bit on -- cd and external sources, if their reverb bits are on -and mix them into one stereo 44100hz signal. - -Lowpass/downsample that to 22050hz. The PSX uses a proper bandlimiting -algorithm here, but I haven't figured out the hysterically exact specifics. -I use an 8-tap filter with these coefficients, which are nice but probably -not the real ones: - -0.037828187894 -0.157538631280 -0.321159685278 -0.449322115345 -0.449322115345 -0.321159685278 -0.157538631280 -0.037828187894 - -So we have two input samples (INPUT_SAMPLE_L, INPUT_SAMPLE_R) every 22050hz. - -* IN MY EMULATION, I divide these by 2 to make it clip less. - (and of course the L/R output coefficients are adjusted to compensate) - The real thing appears to not do this. - -At every 22050hz tick: -- If the reverb bit is enabled (bit 7 of 1DAA), execute the reverb - steady-state algorithm described below -- AFTERWARDS, retrieve the "wet out" L and R samples from the reverb buffer - (This part may not be exactly right and I guessed at the coefs. TODO: check later.) - L is: 0.333 * (buffer[MIX_DEST_A0] + buffer[MIX_DEST_B0]) - R is: 0.333 * (buffer[MIX_DEST_A1] + buffer[MIX_DEST_B1]) -- Advance the current buffer position by 1 sample - -The wet out L and R are then upsampled to 44100hz and played at the -"reverberation depth left/right" (1D84/1D86) volume, independent of the main -volume. - ------------------------------------------------------------------------------ - -Reverb steady-state -------------------- - -The reverb steady-state algorithm is fairly clever, and of course by -"clever" I mean "batshit insane". - -buffer[x] is relative to the current buffer position, not the beginning of -the buffer. Note that all buffer offsets must wrap around so they're -contained within the reverb work area. - -Clipping is performed at the end... maybe also sooner, but definitely at -the end. - -IIR_INPUT_A0 = buffer[IIR_SRC_A0] * IIR_COEF + INPUT_SAMPLE_L * IN_COEF_L; -IIR_INPUT_A1 = buffer[IIR_SRC_A1] * IIR_COEF + INPUT_SAMPLE_R * IN_COEF_R; -IIR_INPUT_B0 = buffer[IIR_SRC_B0] * IIR_COEF + INPUT_SAMPLE_L * IN_COEF_L; -IIR_INPUT_B1 = buffer[IIR_SRC_B1] * IIR_COEF + INPUT_SAMPLE_R * IN_COEF_R; - -IIR_A0 = IIR_INPUT_A0 * IIR_ALPHA + buffer[IIR_DEST_A0] * (1.0 - IIR_ALPHA); -IIR_A1 = IIR_INPUT_A1 * IIR_ALPHA + buffer[IIR_DEST_A1] * (1.0 - IIR_ALPHA); -IIR_B0 = IIR_INPUT_B0 * IIR_ALPHA + buffer[IIR_DEST_B0] * (1.0 - IIR_ALPHA); -IIR_B1 = IIR_INPUT_B1 * IIR_ALPHA + buffer[IIR_DEST_B1] * (1.0 - IIR_ALPHA); - -buffer[IIR_DEST_A0 + 1sample] = IIR_A0; -buffer[IIR_DEST_A1 + 1sample] = IIR_A1; -buffer[IIR_DEST_B0 + 1sample] = IIR_B0; -buffer[IIR_DEST_B1 + 1sample] = IIR_B1; - -ACC0 = buffer[ACC_SRC_A0] * ACC_COEF_A + - buffer[ACC_SRC_B0] * ACC_COEF_B + - buffer[ACC_SRC_C0] * ACC_COEF_C + - buffer[ACC_SRC_D0] * ACC_COEF_D; -ACC1 = buffer[ACC_SRC_A1] * ACC_COEF_A + - buffer[ACC_SRC_B1] * ACC_COEF_B + - buffer[ACC_SRC_C1] * ACC_COEF_C + - buffer[ACC_SRC_D1] * ACC_COEF_D; - -FB_A0 = buffer[MIX_DEST_A0 - FB_SRC_A]; -FB_A1 = buffer[MIX_DEST_A1 - FB_SRC_A]; -FB_B0 = buffer[MIX_DEST_B0 - FB_SRC_B]; -FB_B1 = buffer[MIX_DEST_B1 - FB_SRC_B]; - -buffer[MIX_DEST_A0] = ACC0 - FB_A0 * FB_ALPHA; -buffer[MIX_DEST_A1] = ACC1 - FB_A1 * FB_ALPHA; -buffer[MIX_DEST_B0] = (FB_ALPHA * ACC0) - FB_A0 * (FB_ALPHA^0x8000) - FB_B0 * FB_X; -buffer[MIX_DEST_B1] = (FB_ALPHA * ACC1) - FB_A1 * (FB_ALPHA^0x8000) - FB_B1 * FB_X; - -Air notes: - The above is effectivly the same as: - buffer[MIX_DEST_B0] = (ACC0 * FB_ALPHA) + (FB_A0 * (1.0-FB_ALPHA)) - FB_B0 * FB_X; - buffer[MIX_DEST_B1] = (ACC1 * FB_ALPHA) + (FB_A1 * (1.0-FB_ALPHA)) - FB_B1 * FB_X; - - Which reduces to: - buffer[MIX_DEST_B0] = FB_A0 + ((ACC0-FB_A0) * FB_ALPHA) - FB_B0 * FB_X; - buffer[MIX_DEST_B1] = FB_A1 + ((ACC1-FB_A1) * FB_ALPHA) - FB_B1 * FB_X; - - - ------------------------------------------------------------------------------ -*/ diff --git a/plugins/spu2-x/src/RegLog.cpp b/plugins/spu2-x/src/RegLog.cpp index 77354eb209..f2e4a80cde 100644 --- a/plugins/spu2-x/src/RegLog.cpp +++ b/plugins/spu2-x/src/RegLog.cpp @@ -265,8 +265,8 @@ void SPU2writeLog(const char *action, u32 rmem, u16 value) RegLog(2, t "L", mem, core, value); \ break; - LOG_REVB_REG(FB_SRC_A, "FB_SRC_A") - LOG_REVB_REG(FB_SRC_B, "FB_SRC_B") + LOG_REVB_REG(FB_SIZE_A, "FB_SIZE_A") + LOG_REVB_REG(FB_SIZE_B, "FB_SIZE_B") LOG_REVB_REG(IIR_SRC_A0, "IIR_SRC_A0") LOG_REVB_REG(IIR_SRC_A1, "IIR_SRC_A1") LOG_REVB_REG(IIR_SRC_B1, "IIR_SRC_B1") diff --git a/plugins/spu2-x/src/RegTable.cpp b/plugins/spu2-x/src/RegTable.cpp index 1fc075a385..d3a0a5d6a1 100644 --- a/plugins/spu2-x/src/RegTable.cpp +++ b/plugins/spu2-x/src/RegTable.cpp @@ -99,8 +99,8 @@ u16 const *const regtable_original[0x401] = PCORE(0, ExtEffectsStartA) + 1, PCORE(0, ExtEffectsStartA), - PREVB_REG(0, FB_SRC_A), - PREVB_REG(0, FB_SRC_B), + PREVB_REG(0, FB_SIZE_A), + PREVB_REG(0, FB_SIZE_B), PREVB_REG(0, IIR_DEST_A0), PREVB_REG(0, IIR_DEST_A1), PREVB_REG(0, ACC_SRC_A0), @@ -202,8 +202,8 @@ u16 const *const regtable_original[0x401] = PCORE(1, ExtEffectsStartA) + 1, PCORE(1, ExtEffectsStartA), - PREVB_REG(1, FB_SRC_A), - PREVB_REG(1, FB_SRC_B), + PREVB_REG(1, FB_SIZE_A), + PREVB_REG(1, FB_SIZE_B), PREVB_REG(1, IIR_DEST_A0), PREVB_REG(1, IIR_DEST_A1), PREVB_REG(1, ACC_SRC_A0), diff --git a/plugins/spu2-x/src/Reverb.cpp b/plugins/spu2-x/src/Reverb.cpp index bfbc10363f..05ac600dd8 100644 --- a/plugins/spu2-x/src/Reverb.cpp +++ b/plugins/spu2-x/src/Reverb.cpp @@ -16,13 +16,6 @@ */ #include "Global.h" -#include "Lowpass.h" - -// Low pass filters: Change these to 32 for a speedup (benchmarks needed to see if -// the speed gain is worth the quality drop) - -//static LowPassFilter64 lowpass_left( 11000, SampleRate ); -//static LowPassFilter64 lowpass_right( 11000, SampleRate ); __forceinline s32 V_Core::RevbGetIndexer(s32 offset) { @@ -56,215 +49,85 @@ void V_Core::Reverb_AdvanceBuffer() StereoOut32 V_Core::DoReverb(const StereoOut32 &Input) { -#if 0 - static const s32 downcoeffs[8] = - { - 1283, 5344, 10895, 15243, - 15243, 10895, 5344, 1283 - }; -#else - // 2/3 of the above - static const s32 downcoeffs[8] = - { - 855, 3562, 7263, 10163, - 10163, 7263, 3562, 855}; -#endif + if (EffectsBufferSize <= 0) { + return StereoOut32::Empty; + } - downbuf[dbpos] = Input; - dbpos = (dbpos + 1) & 7; + bool R = Cycles & 1; - // Reverb processing occurs at 24khz, so we skip processing every other sample, - // and use the previous calculation for this core instead. + // Calculate the read/write addresses we'll be needing for this session of reverb. - if ((Cycles & 1) == 0) { - // Important: Factor silence into the upsampler here, otherwise the reverb engine - // develops a nasty feedback loop. + const u32 same_src = RevbGetIndexer(R ? RevBuffers.IIR_SRC_A1 : RevBuffers.IIR_SRC_A0); + const u32 same_dst = RevbGetIndexer(R ? RevBuffers.IIR_DEST_A1 : RevBuffers.IIR_DEST_A0); + const u32 same_prv = RevbGetIndexer(R ? RevBuffers.SAME_R_PRV : RevBuffers.SAME_L_PRV); - upbuf[ubpos] = StereoOut32::Empty; - } else { - if (EffectsBufferSize <= 0) { - ubpos = (ubpos + 1) & 7; - return StereoOut32::Empty; - } + const u32 diff_src = RevbGetIndexer(R ? RevBuffers.IIR_SRC_B0 : RevBuffers.IIR_SRC_B1); + const u32 diff_dst = RevbGetIndexer(R ? RevBuffers.IIR_DEST_B1 : RevBuffers.IIR_DEST_B0); + const u32 diff_prv = RevbGetIndexer(R ? RevBuffers.DIFF_R_PRV : RevBuffers.DIFF_L_PRV); - // Advance the current reverb buffer pointer, and cache the read/write addresses we'll be - // needing for this session of reverb. + const u32 comb1_src = RevbGetIndexer(R ? RevBuffers.ACC_SRC_A1 : RevBuffers.ACC_SRC_A0); + const u32 comb2_src = RevbGetIndexer(R ? RevBuffers.ACC_SRC_B1 : RevBuffers.ACC_SRC_B0); + const u32 comb3_src = RevbGetIndexer(R ? RevBuffers.ACC_SRC_C1 : RevBuffers.ACC_SRC_C0); + const u32 comb4_src = RevbGetIndexer(R ? RevBuffers.ACC_SRC_D1 : RevBuffers.ACC_SRC_D0); - const u32 src_a0 = RevbGetIndexer(RevBuffers.IIR_SRC_A0); - const u32 src_a1 = RevbGetIndexer(RevBuffers.IIR_SRC_A1); - const u32 src_b0 = RevbGetIndexer(RevBuffers.IIR_SRC_B0); - const u32 src_b1 = RevbGetIndexer(RevBuffers.IIR_SRC_B1); + const u32 apf1_src = RevbGetIndexer(R ? RevBuffers.APF1_R_SRC : RevBuffers.APF1_L_SRC); + const u32 apf1_dst = RevbGetIndexer(R ? RevBuffers.MIX_DEST_A1 : RevBuffers.MIX_DEST_A0); + const u32 apf2_src = RevbGetIndexer(R ? RevBuffers.APF2_R_SRC : RevBuffers.APF2_L_SRC); + const u32 apf2_dst = RevbGetIndexer(R ? RevBuffers.MIX_DEST_B1 : RevBuffers.MIX_DEST_B0); - const u32 dest_a0 = RevbGetIndexer(RevBuffers.IIR_DEST_A0); - const u32 dest_a1 = RevbGetIndexer(RevBuffers.IIR_DEST_A1); - const u32 dest_b0 = RevbGetIndexer(RevBuffers.IIR_DEST_B0); - const u32 dest_b1 = RevbGetIndexer(RevBuffers.IIR_DEST_B1); + // ----------------------------------------- + // Optimized IRQ Testing ! + // ----------------------------------------- - const u32 dest2_a0 = RevbGetIndexer(RevBuffers.IIR_DEST_A0 + 1); - const u32 dest2_a1 = RevbGetIndexer(RevBuffers.IIR_DEST_A1 + 1); - const u32 dest2_b0 = RevbGetIndexer(RevBuffers.IIR_DEST_B0 + 1); - const u32 dest2_b1 = RevbGetIndexer(RevBuffers.IIR_DEST_B1 + 1); + // This test is enhanced by using the reverb effects area begin/end test as a + // shortcut, since all buffer addresses are within that area. If the IRQA isn't + // within that zone then the "bulk" of the test is skipped, so this should only + // be a slowdown on a few evil games. - const u32 acc_src_a0 = RevbGetIndexer(RevBuffers.ACC_SRC_A0); - const u32 acc_src_b0 = RevbGetIndexer(RevBuffers.ACC_SRC_B0); - const u32 acc_src_c0 = RevbGetIndexer(RevBuffers.ACC_SRC_C0); - const u32 acc_src_d0 = RevbGetIndexer(RevBuffers.ACC_SRC_D0); + for (int i = 0; i < 2; i++) { + if (Cores[i].IRQEnable && ((Cores[i].IRQA >= EffectsStartA) && (Cores[i].IRQA <= EffectsEndA))) { + if ((Cores[i].IRQA == same_src) || (Cores[i].IRQA == diff_src) || + (Cores[i].IRQA == same_dst) || (Cores[i].IRQA == diff_dst) || + (Cores[i].IRQA == same_prv) || (Cores[i].IRQA == diff_prv) || - const u32 acc_src_a1 = RevbGetIndexer(RevBuffers.ACC_SRC_A1); - const u32 acc_src_b1 = RevbGetIndexer(RevBuffers.ACC_SRC_B1); - const u32 acc_src_c1 = RevbGetIndexer(RevBuffers.ACC_SRC_C1); - const u32 acc_src_d1 = RevbGetIndexer(RevBuffers.ACC_SRC_D1); + (Cores[i].IRQA == comb1_src) || (Cores[i].IRQA == comb2_src) || + (Cores[i].IRQA == comb3_src) || (Cores[i].IRQA == comb4_src) || - const u32 fb_src_a0 = RevbGetIndexer(RevBuffers.FB_SRC_A0); - const u32 fb_src_a1 = RevbGetIndexer(RevBuffers.FB_SRC_A1); - const u32 fb_src_b0 = RevbGetIndexer(RevBuffers.FB_SRC_B0); - const u32 fb_src_b1 = RevbGetIndexer(RevBuffers.FB_SRC_B1); - - const u32 mix_dest_a0 = RevbGetIndexer(RevBuffers.MIX_DEST_A0); - const u32 mix_dest_a1 = RevbGetIndexer(RevBuffers.MIX_DEST_A1); - const u32 mix_dest_b0 = RevbGetIndexer(RevBuffers.MIX_DEST_B0); - const u32 mix_dest_b1 = RevbGetIndexer(RevBuffers.MIX_DEST_B1); - - // ----------------------------------------- - // Optimized IRQ Testing ! - // ----------------------------------------- - - // This test is enhanced by using the reverb effects area begin/end test as a - // shortcut, since all buffer addresses are within that area. If the IRQA isn't - // within that zone then the "bulk" of the test is skipped, so this should only - // be a slowdown on a few evil games. - - for (int i = 0; i < 2; i++) { - if (Cores[i].IRQEnable && ((Cores[i].IRQA >= EffectsStartA) && (Cores[i].IRQA <= EffectsEndA))) { - if ((Cores[i].IRQA == src_a0) || (Cores[i].IRQA == src_a1) || - (Cores[i].IRQA == src_b0) || (Cores[i].IRQA == src_b1) || - - (Cores[i].IRQA == dest_a0) || (Cores[i].IRQA == dest_a1) || - (Cores[i].IRQA == dest_b0) || (Cores[i].IRQA == dest_b1) || - - (Cores[i].IRQA == dest2_a0) || (Cores[i].IRQA == dest2_a1) || - (Cores[i].IRQA == dest2_b0) || (Cores[i].IRQA == dest2_b1) || - - (Cores[i].IRQA == acc_src_a0) || (Cores[i].IRQA == acc_src_a1) || - (Cores[i].IRQA == acc_src_b0) || (Cores[i].IRQA == acc_src_b1) || - (Cores[i].IRQA == acc_src_c0) || (Cores[i].IRQA == acc_src_c1) || - (Cores[i].IRQA == acc_src_d0) || (Cores[i].IRQA == acc_src_d1) || - - (Cores[i].IRQA == fb_src_a0) || (Cores[i].IRQA == fb_src_a1) || - (Cores[i].IRQA == fb_src_b0) || (Cores[i].IRQA == fb_src_b1) || - - (Cores[i].IRQA == mix_dest_a0) || (Cores[i].IRQA == mix_dest_a1) || - (Cores[i].IRQA == mix_dest_b0) || (Cores[i].IRQA == mix_dest_b1)) { - //printf("Core %d IRQ Called (Reverb). IRQA = %x\n",i,addr); - SetIrqCall(i); - } + (Cores[i].IRQA == apf1_dst) || (Cores[i].IRQA == apf1_src) || + (Cores[i].IRQA == apf2_dst) || (Cores[i].IRQA == apf2_src)) { + //printf("Core %d IRQ Called (Reverb). IRQA = %x\n",i,addr); + SetIrqCall(i); } } - - // ----------------------------------------- - // Begin Reverb Processing ! - // ----------------------------------------- - - StereoOut32 INPUT_SAMPLE; - - for (int x = 0; x < 8; ++x) { - INPUT_SAMPLE.Left += (downbuf[(dbpos + x) & 7].Left * downcoeffs[x]); - INPUT_SAMPLE.Right += (downbuf[(dbpos + x) & 7].Right * downcoeffs[x]); - } - - INPUT_SAMPLE.Left >>= 16; - INPUT_SAMPLE.Right >>= 16; - - s32 input_L = INPUT_SAMPLE.Left * Revb.IN_COEF_L; - s32 input_R = INPUT_SAMPLE.Right * Revb.IN_COEF_R; - - const s32 IIR_INPUT_A0 = clamp_mix((((s32)_spu2mem[src_a0] * Revb.IIR_COEF) + input_L) >> 15); - const s32 IIR_INPUT_A1 = clamp_mix((((s32)_spu2mem[src_a1] * Revb.IIR_COEF) + input_L) >> 15); - const s32 IIR_INPUT_B0 = clamp_mix((((s32)_spu2mem[src_b0] * Revb.IIR_COEF) + input_R) >> 15); - const s32 IIR_INPUT_B1 = clamp_mix((((s32)_spu2mem[src_b1] * Revb.IIR_COEF) + input_R) >> 15); - - const s32 src_dest_a0 = _spu2mem[dest_a0]; - const s32 src_dest_a1 = _spu2mem[dest_a1]; - const s32 src_dest_b0 = _spu2mem[dest_b0]; - const s32 src_dest_b1 = _spu2mem[dest_b1]; - - // This section differs from Neill's doc as it uses single-mul interpolation instead - // of 0x8000-val inversion. (same result, faster) - const s32 IIR_A0 = src_dest_a0 + (((IIR_INPUT_A0 - src_dest_a0) * Revb.IIR_ALPHA) >> 15); - const s32 IIR_A1 = src_dest_a1 + (((IIR_INPUT_A1 - src_dest_a1) * Revb.IIR_ALPHA) >> 15); - const s32 IIR_B0 = src_dest_b0 + (((IIR_INPUT_B0 - src_dest_b0) * Revb.IIR_ALPHA) >> 15); - const s32 IIR_B1 = src_dest_b1 + (((IIR_INPUT_B1 - src_dest_b1) * Revb.IIR_ALPHA) >> 15); - _spu2mem[dest2_a0] = clamp_mix(IIR_A0); - _spu2mem[dest2_a1] = clamp_mix(IIR_A1); - _spu2mem[dest2_b0] = clamp_mix(IIR_B0); - _spu2mem[dest2_b1] = clamp_mix(IIR_B1); - - const s32 ACC0 = clamp_mix( - ((_spu2mem[acc_src_a0] * Revb.ACC_COEF_A) >> 15) + - ((_spu2mem[acc_src_b0] * Revb.ACC_COEF_B) >> 15) + - ((_spu2mem[acc_src_c0] * Revb.ACC_COEF_C) >> 15) + - ((_spu2mem[acc_src_d0] * Revb.ACC_COEF_D) >> 15)); - - const s32 ACC1 = clamp_mix( - ((_spu2mem[acc_src_a1] * Revb.ACC_COEF_A) >> 15) + - ((_spu2mem[acc_src_b1] * Revb.ACC_COEF_B) >> 15) + - ((_spu2mem[acc_src_c1] * Revb.ACC_COEF_C) >> 15) + - ((_spu2mem[acc_src_d1] * Revb.ACC_COEF_D) >> 15)); - - // The following code differs from Neill's doc as it uses the more natural single-mul - // interpolative, instead of the funky ^0x8000 stuff. (better result, faster) - - const s32 FB_A0 = _spu2mem[fb_src_a0]; - const s32 FB_A1 = _spu2mem[fb_src_a1]; - const s32 FB_B0 = _spu2mem[fb_src_b0]; - const s32 FB_B1 = _spu2mem[fb_src_b1]; - - const s32 mix_a0 = clamp_mix(ACC0 - ((FB_A0 * Revb.FB_ALPHA) >> 15)); - const s32 mix_a1 = clamp_mix(ACC1 - ((FB_A1 * Revb.FB_ALPHA) >> 15)); - const s32 mix_b0 = clamp_mix(FB_A0 + (((ACC0 - FB_A0) * Revb.FB_ALPHA - FB_B0 * Revb.FB_X) >> 15)); - const s32 mix_b1 = clamp_mix(FB_A1 + (((ACC1 - FB_A1) * Revb.FB_ALPHA - FB_B1 * Revb.FB_X) >> 15)); - - _spu2mem[mix_dest_a0] = mix_a0; - _spu2mem[mix_dest_a1] = mix_a1; - _spu2mem[mix_dest_b0] = mix_b0; - _spu2mem[mix_dest_b1] = mix_b1; - - upbuf[ubpos] = clamp_mix(StereoOut32( - mix_a0 + mix_b0, // left - mix_a1 + mix_b1 // right - )); } - StereoOut32 retval; + // Reverb algorithm pretty much directly ripped from http://drhell.web.fc2.com/ps1/ + // minus the 35 step FIR which just seems to break things. - //for( int x=0; x<8; ++x ) - //{ - // retval.Left += (upbuf[(ubpos+x)&7].Left*downcoeffs[x]); - // retval.Right += (upbuf[(ubpos+x)&7].Right*downcoeffs[x]); - //} + s32 in, same, diff, apf1, apf2, out; - if ((Cycles & 1) == 0) { - retval.Left = (upbuf[(ubpos + 5) & 7].Left + upbuf[(ubpos + 7) & 7].Left) >> 1; - retval.Right = (upbuf[(ubpos + 5) & 7].Right + upbuf[(ubpos + 7) & 7].Right) >> 1; - } else { - retval.Left = upbuf[(ubpos + 6) & 7].Left; - retval.Right = upbuf[(ubpos + 6) & 7].Right; +#define MUL(x, y) ((x) * (y) >> 15) + in = MUL(R ? Revb.IN_COEF_R : Revb.IN_COEF_L, R ? Input.Right : Input.Left); + + same = MUL(Revb.IIR_ALPHA, in + MUL(Revb.IIR_COEF, _spu2mem[same_src]) - _spu2mem[same_prv]) + _spu2mem[same_prv]; + diff = MUL(Revb.IIR_ALPHA, in + MUL(Revb.IIR_COEF, _spu2mem[diff_src]) - _spu2mem[diff_prv]) + _spu2mem[diff_prv]; + + out = MUL(Revb.ACC_COEF_A, _spu2mem[comb1_src]) + MUL(Revb.ACC_COEF_B, _spu2mem[comb2_src]) + MUL(Revb.ACC_COEF_C, _spu2mem[comb3_src]) + MUL(Revb.ACC_COEF_D, _spu2mem[comb4_src]); + + apf1 = out - MUL(Revb.FB_ALPHA, _spu2mem[apf1_src]); + out = _spu2mem[apf1_src] + MUL(Revb.FB_ALPHA, apf1); + apf2 = out - MUL(Revb.FB_X, _spu2mem[apf2_src]); + out = _spu2mem[apf2_src] + MUL(Revb.FB_X, apf2); + + // According to no$psx the effects always run but don't always write back, see check in V_Core::Mix + if (FxEnable) { + _spu2mem[same_dst] = clamp_mix(same); + _spu2mem[diff_dst] = clamp_mix(diff); + _spu2mem[apf1_dst] = clamp_mix(apf1); + _spu2mem[apf2_dst] = clamp_mix(apf2); } - // Notes: - // the first -1 is to adjust for the null padding in every other upbuf sample (which - // halves the overall volume). - // The second +1 divides by two, which is part of Neill's suggestion to divide by 3. - // - // According Neill the final result should be divided by 3, but currently the output - // is way too quiet for that to fly. In fact no division at all might be better. - // In any case the problem always seems to be that the reverb isn't resonating enough - // (indicating short buffers or bad coefficient math?), not that it isn't loud enough. + (R ? LastEffect.Right : LastEffect.Left) = -clamp_mix(out); - //retval.Left >>= (16-1 + 1); - //retval.Right >>= (16-1 + 1); - - ubpos = (ubpos + 1) & 7; - - return retval; -} \ No newline at end of file + return LastEffect; +} diff --git a/plugins/spu2-x/src/defs.h b/plugins/spu2-x/src/defs.h index f3b1f85337..037d633bd1 100644 --- a/plugins/spu2-x/src/defs.h +++ b/plugins/spu2-x/src/defs.h @@ -18,6 +18,7 @@ #pragma once #include "Mixer.h" +#include "SndOut.h" // -------------------------------------------------------------------------------------- // SPU2 Memory Indexers @@ -229,8 +230,8 @@ struct V_Reverb s16 IN_COEF_L; s16 IN_COEF_R; - u32 FB_SRC_A; - u32 FB_SRC_B; + u32 FB_SIZE_A; + u32 FB_SIZE_B; s16 FB_ALPHA; s16 FB_X; @@ -269,11 +270,6 @@ struct V_Reverb struct V_ReverbBuffers { - s32 FB_SRC_A0; - s32 FB_SRC_B0; - s32 FB_SRC_A1; - s32 FB_SRC_B1; - s32 IIR_SRC_A0; s32 IIR_SRC_A1; s32 IIR_SRC_B0; @@ -297,6 +293,16 @@ struct V_ReverbBuffers s32 MIX_DEST_B0; s32 MIX_DEST_B1; + s32 SAME_L_PRV; + s32 SAME_R_PRV; + s32 DIFF_L_PRV; + s32 DIFF_R_PRV; + + s32 APF1_L_SRC; + s32 APF1_R_SRC; + s32 APF2_L_SRC; + s32 APF2_R_SRC; + bool NeedsUpdated; }; @@ -419,9 +425,7 @@ struct V_Core V_CoreRegs Regs; // Registers - // Last samples to pass through the effects processor. - // Used because the effects processor works at 24khz and just pulls - // from this for the odd Ts. + // Preserves the channel processed last cycle StereoOut32 LastEffect; u8 CoreEnabled; @@ -444,10 +448,6 @@ struct V_Core u16 psxSoundDataTransferControl; u16 psxSPUSTAT; - StereoOut32 downbuf[8]; - StereoOut32 upbuf[8]; - int dbpos, ubpos; - // HACK -- This is a temp buffer which is (or isn't?) used to circumvent some memory // corruption that originates elsewhere in the plugin. >_< The actual ADMA buffer // is an area mapped to SPU2 main memory. @@ -471,8 +471,6 @@ struct V_Core void AnalyzeReverbPreset(); s32 EffectsBufferIndexer(s32 offset) const; - void UpdateFeedbackBuffersA(); - void UpdateFeedbackBuffersB(); void WriteRegPS1(u32 mem, u16 value); u16 ReadRegPS1(u32 mem); diff --git a/plugins/spu2-x/src/regs.h b/plugins/spu2-x/src/regs.h index f1227e9638..1286796cca 100644 --- a/plugins/spu2-x/src/regs.h +++ b/plugins/spu2-x/src/regs.h @@ -61,9 +61,9 @@ // .. repeated for each voice .. -#define REG_A_ESA 0x02E0 //Address: Top address of working area for effects processing -#define R_FB_SRC_A 0x02E4 // Feedback Source A -#define R_FB_SRC_B 0x02E8 // Feedback Source B +#define REG_A_ESA 0x02E0 //Address: Top address of working area for effects processing +#define R_FB_SIZE_A 0x02E4 // Feedback Source A +#define R_FB_SIZE_B 0x02E8 // Feedback Source B #define R_IIR_DEST_A0 0x02EC #define R_IIR_DEST_A1 0x02F0 #define R_ACC_SRC_A0 0x02F4 diff --git a/plugins/spu2-x/src/spu2freeze.cpp b/plugins/spu2-x/src/spu2freeze.cpp index b1df63d2e3..255940d2d3 100644 --- a/plugins/spu2-x/src/spu2freeze.cpp +++ b/plugins/spu2-x/src/spu2freeze.cpp @@ -25,7 +25,7 @@ static const u32 SAVE_ID = 0x1227521; // versioning for saves. // Increment this when changes to the savestate system are made. -static const u32 SAVE_VERSION = 0x000d; +static const u32 SAVE_VERSION = 0x000e; static void wipe_the_cache() { diff --git a/plugins/spu2-x/src/spu2sys.cpp b/plugins/spu2-x/src/spu2sys.cpp index 25eeafaedf..98e803bf72 100644 --- a/plugins/spu2-x/src/spu2sys.cpp +++ b/plugins/spu2-x/src/spu2sys.cpp @@ -195,7 +195,7 @@ void V_Core::AnalyzeReverbPreset() ConLog("----------------------------------------------------------\n"); ConLog(" IN_COEF_L, IN_COEF_R 0x%08x, 0x%08x\n", Revb.IN_COEF_L, Revb.IN_COEF_R); - ConLog(" FB_SRC_A, FB_SRC_B 0x%08x, 0x%08x\n", Revb.FB_SRC_A, Revb.FB_SRC_B); + ConLog(" FB_SIZE_A, FB_SIZE_B 0x%08x, 0x%08x\n", Revb.FB_SIZE_A, Revb.FB_SIZE_B); ConLog(" FB_ALPHA, FB_X 0x%08x, 0x%08x\n", Revb.FB_ALPHA, Revb.FB_X); ConLog(" ACC_COEF_A 0x%08x\n", Revb.ACC_COEF_A); @@ -242,18 +242,6 @@ s32 V_Core::EffectsBufferIndexer(s32 offset) const return pos; } -void V_Core::UpdateFeedbackBuffersA() -{ - RevBuffers.FB_SRC_A0 = EffectsBufferIndexer(Revb.MIX_DEST_A0 - Revb.FB_SRC_A); - RevBuffers.FB_SRC_A1 = EffectsBufferIndexer(Revb.MIX_DEST_A1 - Revb.FB_SRC_A); -} - -void V_Core::UpdateFeedbackBuffersB() -{ - RevBuffers.FB_SRC_B0 = EffectsBufferIndexer(Revb.MIX_DEST_B0 - Revb.FB_SRC_B); - RevBuffers.FB_SRC_B1 = EffectsBufferIndexer(Revb.MIX_DEST_B1 - Revb.FB_SRC_B); -} - void V_Core::UpdateEffectsBufferSize() { const s32 newbufsize = EffectsEndA - EffectsStartA + 1; @@ -290,9 +278,6 @@ void V_Core::UpdateEffectsBufferSize() RevBuffers.ACC_SRC_D0 = EffectsBufferIndexer(Revb.ACC_SRC_D0); RevBuffers.ACC_SRC_D1 = EffectsBufferIndexer(Revb.ACC_SRC_D1); - UpdateFeedbackBuffersA(); - UpdateFeedbackBuffersB(); - RevBuffers.IIR_DEST_A0 = EffectsBufferIndexer(Revb.IIR_DEST_A0); RevBuffers.IIR_DEST_A1 = EffectsBufferIndexer(Revb.IIR_DEST_A1); RevBuffers.IIR_DEST_B0 = EffectsBufferIndexer(Revb.IIR_DEST_B0); @@ -307,6 +292,16 @@ void V_Core::UpdateEffectsBufferSize() RevBuffers.MIX_DEST_A1 = EffectsBufferIndexer(Revb.MIX_DEST_A1); RevBuffers.MIX_DEST_B0 = EffectsBufferIndexer(Revb.MIX_DEST_B0); RevBuffers.MIX_DEST_B1 = EffectsBufferIndexer(Revb.MIX_DEST_B1); + + RevBuffers.SAME_L_PRV = EffectsBufferIndexer(Revb.IIR_DEST_A0 - 1); + RevBuffers.SAME_R_PRV = EffectsBufferIndexer(Revb.IIR_DEST_A1 - 1); + RevBuffers.DIFF_L_PRV = EffectsBufferIndexer(Revb.IIR_DEST_B0 - 1); + RevBuffers.DIFF_R_PRV = EffectsBufferIndexer(Revb.IIR_DEST_B1 - 1); + + RevBuffers.APF1_L_SRC = EffectsBufferIndexer(Revb.MIX_DEST_A0 - Revb.FB_SIZE_A); + RevBuffers.APF1_R_SRC = EffectsBufferIndexer(Revb.MIX_DEST_A1 - Revb.FB_SIZE_A); + RevBuffers.APF2_L_SRC = EffectsBufferIndexer(Revb.MIX_DEST_B0 - Revb.FB_SIZE_B); + RevBuffers.APF2_R_SRC = EffectsBufferIndexer(Revb.MIX_DEST_B1 - Revb.FB_SIZE_B); } void V_Voice::QueueStart() @@ -719,10 +714,10 @@ void V_Core::WriteRegPS1(u32 mem, u16 value) break; case 0x1DC0: - Revb.FB_SRC_A = value * 4; + Revb.FB_SIZE_A = value * 4; break; case 0x1DC2: - Revb.FB_SRC_B = value * 4; + Revb.FB_SIZE_B = value * 4; break; case 0x1DC4: Revb.IIR_ALPHA = value; @@ -1550,8 +1545,8 @@ static RegWriteHandler *const tbl_reg_writes[0x401] = CoreParamsPair(0, REG_A_ESA), - ReverbPair(0, R_FB_SRC_A), // 0x02E4 // Feedback Source A - ReverbPair(0, R_FB_SRC_B), // 0x02E8 // Feedback Source B + ReverbPair(0, R_FB_SIZE_A), // 0x02E4 // Feedback Source A + ReverbPair(0, R_FB_SIZE_B), // 0x02E8 // Feedback Source B ReverbPair(0, R_IIR_DEST_A0), // 0x02EC ReverbPair(0, R_IIR_DEST_A1), // 0x02F0 ReverbPair(0, R_ACC_SRC_A0), // 0x02F4 @@ -1640,8 +1635,8 @@ static RegWriteHandler *const tbl_reg_writes[0x401] = CoreParamsPair(1, REG_A_ESA), - ReverbPair(1, R_FB_SRC_A), // 0x02E4 // Feedback Source A - ReverbPair(1, R_FB_SRC_B), // 0x02E8 // Feedback Source B + ReverbPair(1, R_FB_SIZE_A), // 0x02E4 // Feedback Source A + ReverbPair(1, R_FB_SIZE_B), // 0x02E8 // Feedback Source B ReverbPair(1, R_IIR_DEST_A0), // 0x02EC ReverbPair(1, R_IIR_DEST_A1), // 0x02F0 ReverbPair(1, R_ACC_SRC_A0), // 0x02F4