From 1cc740a9c0d3d0d213f86693af50965fe4e29eeb Mon Sep 17 00:00:00 2001
From: "Jake.Stine" <Jake.Stine@96395faa-99c1-11dd-bbfe-3dabce05a288>
Date: Tue, 27 Oct 2009 14:08:25 +0000
Subject: [PATCH] SPU2-X: Revert Neill's x4 buffer indexers -- Fixes feedback. 
 Update savestate version to fix prev savestates.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2089 96395faa-99c1-11dd-bbfe-3dabce05a288
---
 plugins/spu2-x/src/Mixer.cpp      | 10 +++++
 plugins/spu2-x/src/Reverb.cpp     | 72 +++++++++++++++----------------
 plugins/spu2-x/src/spu2freeze.cpp |  2 +-
 plugins/spu2-x/src/spu2sys.cpp    |  6 +--
 4 files changed, 49 insertions(+), 41 deletions(-)

diff --git a/plugins/spu2-x/src/Mixer.cpp b/plugins/spu2-x/src/Mixer.cpp
index 0a48570e10..ed675fc527 100644
--- a/plugins/spu2-x/src/Mixer.cpp
+++ b/plugins/spu2-x/src/Mixer.cpp
@@ -889,5 +889,15 @@ buffer[MIX_DEST_A1] = ACC1 - FB_A1 * FB_ALPHA;
 buffer[MIX_DEST_B0] = (FB_ALPHA * ACC0) - FB_A0 * (FB_ALPHA^0x8000) - FB_B0 * FB_X;
 buffer[MIX_DEST_B1] = (FB_ALPHA * ACC1) - FB_A1 * (FB_ALPHA^0x8000) - FB_B1 * FB_X;
 
+Air notes:
+  The above is effectivly the same as:
+    buffer[MIX_DEST_B0] = (ACC0 * FB_ALPHA) + (FB_A0 * (1.0-FB_ALPHA)) - FB_B0 * FB_X;
+    buffer[MIX_DEST_B1] = (ACC1 * FB_ALPHA) + (FB_A1 * (1.0-FB_ALPHA)) - FB_B1 * FB_X;
+
+  Which reduces to:
+    buffer[MIX_DEST_B0] = ACC0 + ((FB_A0-ACC0) * FB_ALPHA) - FB_B0 * FB_X;
+    buffer[MIX_DEST_B1] = ACC1 + ((FB_A1-ACC1) * FB_ALPHA) - FB_B1 * FB_X;
+
+
 -----------------------------------------------------------------------------
 */
diff --git a/plugins/spu2-x/src/Reverb.cpp b/plugins/spu2-x/src/Reverb.cpp
index 1390682939..eb49a2f1d9 100644
--- a/plugins/spu2-x/src/Reverb.cpp
+++ b/plugins/spu2-x/src/Reverb.cpp
@@ -28,12 +28,11 @@ __forceinline s32 V_Core::RevbGetIndexer( s32 offset )
 {
 	u32 pos = ReverbX + offset; //*4);
 
-	// Need to use modulus here, because games can and will drop the buffer size
-	// without notice, and it leads to offsets several times past the end of the buffer.
+	// Fast and simple single step wrapping, made possible by the preparation of the
+	// effects buffer addresses.
 
 	if( pos > EffectsEndA )
 	{
-		//pos = EffectsStartA + ((ReverbX + offset) % (u32)EffectsBufferSize);
 		pos -= EffectsEndA+1;
 		pos += EffectsStartA;
 	}
@@ -96,10 +95,10 @@ StereoOut32 V_Core::DoReverb( const StereoOut32& Input )
 		const u32 dest_b0 = RevbGetIndexer( RevBuffers.IIR_DEST_B0 );
 		const u32 dest_b1 = RevbGetIndexer( RevBuffers.IIR_DEST_B1 );
 		
-		const u32 dest2_a0 = RevbGetIndexer( RevBuffers.IIR_DEST_A0 + 4 );
-		const u32 dest2_a1 = RevbGetIndexer( RevBuffers.IIR_DEST_A1 + 4 );
-		const u32 dest2_b0 = RevbGetIndexer( RevBuffers.IIR_DEST_B0 + 4 );
-		const u32 dest2_b1 = RevbGetIndexer( RevBuffers.IIR_DEST_B1 + 4 );
+		const u32 dest2_a0 = RevbGetIndexer( RevBuffers.IIR_DEST_A0 + 2 );
+		const u32 dest2_a1 = RevbGetIndexer( RevBuffers.IIR_DEST_A1 + 2 );
+		const u32 dest2_b0 = RevbGetIndexer( RevBuffers.IIR_DEST_B0 + 2 );
+		const u32 dest2_b1 = RevbGetIndexer( RevBuffers.IIR_DEST_B1 + 2 );
 		
 		const u32 acc_src_a0 = RevbGetIndexer( RevBuffers.ACC_SRC_A0 );
 		const u32 acc_src_b0 = RevbGetIndexer( RevBuffers.ACC_SRC_B0 );
@@ -141,15 +140,8 @@ StereoOut32 V_Core::DoReverb( const StereoOut32& Input )
 		const s32 IIR_INPUT_B0 = ((_spu2mem[src_b0] * Revb.IIR_COEF) + (INPUT_SAMPLE.Left * Revb.IN_COEF_L))>>16;
 		const s32 IIR_INPUT_B1 = ((_spu2mem[src_b1] * Revb.IIR_COEF) + (INPUT_SAMPLE.Right * Revb.IN_COEF_R))>>16;
 
-		/*const s32 IIR_A0 = (IIR_INPUT_A0 * Revb.IIR_ALPHA) + (_spu2mem[dest_a0] * (0xffff - Revb.IIR_ALPHA));
-		const s32 IIR_A1 = (IIR_INPUT_A1 * Revb.IIR_ALPHA) + (_spu2mem[dest_a1] * (0xffff - Revb.IIR_ALPHA));
-		const s32 IIR_B0 = (IIR_INPUT_B0 * Revb.IIR_ALPHA) + (_spu2mem[dest_b0] * (0xffff - Revb.IIR_ALPHA));
-		const s32 IIR_B1 = (IIR_INPUT_B1 * Revb.IIR_ALPHA) + (_spu2mem[dest_b1] * (0xffff - Revb.IIR_ALPHA));
-		_spu2mem[dest2_a0] = clamp_mix( IIR_A0 >> 16 );
-		_spu2mem[dest2_a1] = clamp_mix( IIR_A1 >> 16 );
-		_spu2mem[dest2_b0] = clamp_mix( IIR_B0 >> 16 );
-		_spu2mem[dest2_b1] = clamp_mix( IIR_B1 >> 16 );*/
-
+		// This section differs from Neill's doc as it uses single-mul interpolation instead
+		// of 0x8000-val inversion.  (same result, faster)
 		const s32 IIR_A0 = IIR_INPUT_A0 + (((_spu2mem[dest_a0]-IIR_INPUT_A0) * Revb.IIR_ALPHA)>>16);
 		const s32 IIR_A1 = IIR_INPUT_A1 + (((_spu2mem[dest_a1]-IIR_INPUT_A1) * Revb.IIR_ALPHA)>>16);
 		const s32 IIR_B0 = IIR_INPUT_B0 + (((_spu2mem[dest_b0]-IIR_INPUT_B0) * Revb.IIR_ALPHA)>>16);
@@ -164,37 +156,32 @@ StereoOut32 V_Core::DoReverb( const StereoOut32& Input )
 			((_spu2mem[acc_src_b0] * Revb.ACC_COEF_B)) +
 			((_spu2mem[acc_src_c0] * Revb.ACC_COEF_C)) +
 			((_spu2mem[acc_src_d0] * Revb.ACC_COEF_D))
-		) >> 16;
+		); // >> 16;
 
 		const s32 ACC1 = (
 			((_spu2mem[acc_src_a1] * Revb.ACC_COEF_A)) +
 			((_spu2mem[acc_src_b1] * Revb.ACC_COEF_B)) +
 			((_spu2mem[acc_src_c1] * Revb.ACC_COEF_C)) +
 			((_spu2mem[acc_src_d1] * Revb.ACC_COEF_D))
-		) >> 16;
+		); // >> 16;
 
+		// The following code differs from Neill's doc as it uses the more natural single-mul
+		// interpolative, instead of the funky ^0x8000 stuff.  (better result, faster)
 
-		const s32 FB_A0 = (_spu2mem[fb_src_a0] * Revb.FB_ALPHA) >> 16;
-		const s32 FB_A1 = (_spu2mem[fb_src_a1] * Revb.FB_ALPHA) >> 16;
+		const s32 FB_A0 = _spu2mem[fb_src_a0] * Revb.FB_ALPHA;
+		const s32 FB_A1 = _spu2mem[fb_src_a1] * Revb.FB_ALPHA;
 
-		_spu2mem[mix_dest_a0] = clamp_mix( ACC0 - FB_A0 );
-		_spu2mem[mix_dest_a1] = clamp_mix( ACC1 - FB_A1 );
+		_spu2mem[mix_dest_a0] = clamp_mix( (ACC0 - FB_A0) >> 16 );
+		_spu2mem[mix_dest_a1] = clamp_mix( (ACC1 - FB_A1) >> 16 );
 
-		const s32 acc_fb_mix_a = ACC0 + (((_spu2mem[fb_src_a0] - ACC0) * Revb.FB_ALPHA)>>16);
-		const s32 acc_fb_mix_b = ACC1 + (((_spu2mem[fb_src_a1] - ACC1) * Revb.FB_ALPHA)>>16);
-		_spu2mem[mix_dest_b0] = clamp_mix( acc_fb_mix_a - ((_spu2mem[fb_src_b0] * Revb.FB_X) >> 16) );
-		_spu2mem[mix_dest_b1] = clamp_mix( acc_fb_mix_b - ((_spu2mem[fb_src_b1] * Revb.FB_X) >> 16) );
+		const s32 acc_fb_mix_a = ACC0 + ( (_spu2mem[fb_src_a0] - (ACC0>>16)) * Revb.FB_ALPHA );
+		const s32 acc_fb_mix_b = ACC1 + ( (_spu2mem[fb_src_a1] - (ACC1>>16)) * Revb.FB_ALPHA );
+		_spu2mem[mix_dest_b0] = clamp_mix( ( acc_fb_mix_a - (_spu2mem[fb_src_b0] * Revb.FB_X) ) >> 16 );
+		_spu2mem[mix_dest_b1] = clamp_mix( ( acc_fb_mix_b - (_spu2mem[fb_src_b1] * Revb.FB_X) ) >> 16 );
 
-		//const s32 fb_xor_a0 = _spu2mem[fb_src_a0] * ( Revb.FB_ALPHA ^ 0x8000 );
-		//const s32 fb_xor_a1 = _spu2mem[fb_src_a1] * ( Revb.FB_ALPHA ^ 0x8000 );
-		//_spu2mem[mix_dest_b0] = clamp_mix( (MulShr32(Revb.FB_ALPHA<<16, ACC0) - fb_xor_a0 - (_spu2mem[fb_src_b0] * Revb.FB_X)) >> 16 );
-		//_spu2mem[mix_dest_b1] = clamp_mix( (MulShr32(Revb.FB_ALPHA<<16, ACC1) - fb_xor_a1 - (_spu2mem[fb_src_b1] * Revb.FB_X)) >> 16 );
-
-		// Note: According Neill these should be divided by 3, but currently the
-		// output is way too quiet for that to fly.
 		upbuf[ubpos] = clamp_mix( StereoOut32(
-			(_spu2mem[mix_dest_a0] + _spu2mem[mix_dest_b0]) / 2,	// left
-			(_spu2mem[mix_dest_a1] + _spu2mem[mix_dest_b1]) / 2		// right
+			(_spu2mem[mix_dest_a0] + _spu2mem[mix_dest_b0]),	// left
+			(_spu2mem[mix_dest_a1] + _spu2mem[mix_dest_b1])		// right
 		) );
 	} 
 
@@ -205,8 +192,19 @@ StereoOut32 V_Core::DoReverb( const StereoOut32& Input )
 		retval.Left  += (upbuf[(ubpos+x)&7].Left*downcoeffs[x]);
 		retval.Right += (upbuf[(ubpos+x)&7].Right*downcoeffs[x]);
 	}
-	retval.Left  >>= (16-1); /* -1 To adjust for the null padding. */
-	retval.Right >>= (16-1);
+
+	// Notes:
+	//  the first -1 is to adjust for the null padding in every other upbuf sample (which
+	//  halves the overall volume).
+	//  The second -1 divides by two, which is part of Neill's suggestion to divide by 3.
+	//
+	// According Neill the final result should be divided by 3, but currently the output
+	// is way too quiet for that to fly.  In fact no division at all might be better.
+	// In any case the problem always seems to be that the reverb isn't resonating enough
+	// (indicating short buffers or bad coefficient math?), not that it isn't loud enough.
+
+	retval.Left  >>= (16-1 + 1);
+	retval.Right >>= (16-1 + 1);
 
 	ubpos = (ubpos+1) & 7;
 
diff --git a/plugins/spu2-x/src/spu2freeze.cpp b/plugins/spu2-x/src/spu2freeze.cpp
index e35e3825a9..87e5865638 100644
--- a/plugins/spu2-x/src/spu2freeze.cpp
+++ b/plugins/spu2-x/src/spu2freeze.cpp
@@ -25,7 +25,7 @@ namespace Savestate
 
 	// versioning for saves.
 	// Increment this when changes to the savestate system are made.
-	static const u32 SAVE_VERSION = 0x0005;
+	static const u32 SAVE_VERSION = 0x0006;
 
 	static void wipe_the_cache()
 	{
diff --git a/plugins/spu2-x/src/spu2sys.cpp b/plugins/spu2-x/src/spu2sys.cpp
index 3487fb514e..76e2706d8f 100644
--- a/plugins/spu2-x/src/spu2sys.cpp
+++ b/plugins/spu2-x/src/spu2sys.cpp
@@ -161,18 +161,18 @@ void V_Core::Reset( int index )
 
 s32 V_Core::EffectsBufferIndexer( s32 offset ) const
 {
-	u32 pos = EffectsStartA + (offset*4);
+	u32 pos = EffectsStartA + offset;
 
 	// Need to use modulus here, because games can and will drop the buffer size
 	// without notice, and it leads to offsets several times past the end of the buffer.
 
 	if( pos > EffectsEndA )
 	{
-		pos = EffectsStartA + ((offset*4) % EffectsBufferSize);
+		pos = EffectsStartA + (offset % EffectsBufferSize);
 	}
 	else if( pos < EffectsStartA )
 	{
-		pos = EffectsEndA+1 - ((offset*4) % EffectsBufferSize );
+		pos = EffectsEndA+1 - (offset % EffectsBufferSize );
 	}
 	return pos;
 }