SPU-X: Major code cleanups across the board, and optimizations to the reverb effects generator (possibly buggy yet)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@525 96395faa-99c1-11dd-bbfe-3dabce05a288
2009-02-18 13:36:20 +00:00 · 2009-02-18 13:36:20 +00:00 · 0ca2f4d2be
parent bee40a2948
commit 0ca2f4d2be
24 changed files with 1743 additions and 1257 deletions
--- a/plugins/spu2-x/src/3rdparty/liba52/parse.c
+++ b/plugins/spu2-x/src/3rdparty/liba52/parse.c
@ -39,7 +39,7 @@
 void * memalign (size_t align, size_t size);
 #else
 /* assume malloc alignment is sufficient */
-#define memalign(align,size) malloc (size)
+#define memalign(align,m_size) malloc (m_size)
 #endif
 typedef struct {
--- a/plugins/spu2-x/src/ADSR.cpp
+++ b/plugins/spu2-x/src/ADSR.cpp
@ -216,7 +216,7 @@ bool V_ADSR::Calculate()
 #define VOLFLAG_EXPONENTIAL		(1ul<<2)
 #define VOLFLAG_SLIDE_ENABLE	(1ul<<3)
-void V_Volume::Update()
+void V_VolumeSlide::Update()
 {
 	if( !(Mode & VOLFLAG_SLIDE_ENABLE) ) return;
--- a/plugins/spu2-x/src/BaseTypes.h
+++ b/plugins/spu2-x/src/BaseTypes.h
@ -27,6 +27,8 @@
 using std::string;
 using std::wstring;
 #include "PS2Edefs.h"
 //////////////////////////////////////////////////////////////////////////
 // Override Win32 min/max macros with the STL's type safe and macro
 // free varieties (much safer!)
@ -84,4 +86,40 @@ static const bool IsDebugBuild = false;
 #endif
 struct StereoOut16;
 struct StereoOutFloat;
 struct StereoOut32
 {
 	static StereoOut32 Empty;
 	s32 Left;
 	s32 Right;
 	StereoOut32() :
 		Left( 0 ),
 		Right( 0 )
 	{
 	}
 	StereoOut32( s32 left, s32 right ) :
 		Left( left ),
 		Right( right )
 	{
 	}
 	StereoOut32( const StereoOut16& src );
 	explicit StereoOut32( const StereoOutFloat& src );
 	StereoOut16 DownSample() const;
 	StereoOut32 operator+( const StereoOut32& right )
 	{
 		return StereoOut32(
 			Left + right.Left,
 			Right + right.Right
 		);
 	}
 };
 #endif
--- a/plugins/spu2-x/src/Debug.cpp
+++ b/plugins/spu2-x/src/Debug.cpp
@ -71,6 +71,27 @@ void ConLog(const char *fmt, ...) {
 #endif
 }
 void V_VolumeSlide::DebugDump( FILE* dump, const char* title, const char* nameLR )
 {
 	fprintf( dump, "%s Volume for %s Channel:\t%x\n"
 		"  - Value:     %x\n"
 		"  - Mode:      %x\n"
 		"  - Increment: %x\n",
 		title, nameLR, Reg_VOL, Value, Mode, Increment);
 }
 void V_VolumeSlideLR::DebugDump( FILE* dump, const char* title )
 {
 	Left.DebugDump( dump, title, "Left" );
 	Right.DebugDump( dump, title, "Right" );
 }
 void V_VolumeLR::DebugDump( FILE* dump, const char* title )
 {
 	fprintf( dump, "Volume for %s (%s Channel):\t%x\n", title, "Left", Left );
 	fprintf( dump, "Volume for %s (%s Channel):\t%x\n", title, "Right", Right );
 }
 void DoFullDump()
 {
 #ifdef SPU2_LOG
@ -98,32 +119,18 @@ void DoFullDump()
 	if(!CoresDump()) return;
 	dump = _wfopen( CoresDumpFileName, _T("wt") );
-	if (dump) {
+	if (dump)
 	{
 		for(c=0;c<2;c++)
 		{
 			fprintf(dump,"#### CORE %d DUMP.\n",c);
-			fprintf(dump,"Master Volume for Left Channel: %x\n"
+
-						 "  - Value:     %x\n"
+			Cores[c].MasterVol.DebugDump( dump, "Master" );
-						 "  - Mode:      %x\n"
+
-						 "  - Increment: %x\n",
+			Cores[c].ExtVol.DebugDump( dump, "External Data Input" );
-						 Cores[c].MasterL.Reg_VOL,
+			Cores[c].InpVol.DebugDump( dump, "Voice Data Input [dry]" );
-						 Cores[c].MasterL.Value,
+			Cores[c].FxVol.DebugDump( dump, "Effects/Reverb [wet]" );
-						 Cores[c].MasterL.Mode,
+
 						 Cores[c].MasterL.Increment);
 			fprintf(dump,"Master Volume for Right Channel: %x\n"
 						 "  - Value:     %x\n"
 						 "  - Mode:      %x\n"
 						 "  - Increment: %x\n",
 						 Cores[c].MasterR.Reg_VOL,
 						 Cores[c].MasterR.Value,
 						 Cores[c].MasterR.Mode,
 						 Cores[c].MasterR.Increment);
 			fprintf(dump,"Volume for External Data Input (Left Channel):  %x\n",Cores[c].ExtL);
 			fprintf(dump,"Volume for External Data Input (Right Channel): %x\n",Cores[c].ExtR);
 			fprintf(dump,"Volume for Sound Data Input (Left Channel):     %x\n",Cores[c].InpL);
 			fprintf(dump,"Volume for Sound Data Input (Right Channel):    %x\n",Cores[c].InpR);
 			fprintf(dump,"Volume for Output from Effects (Left Channel):  %x\n",Cores[c].FxL);
 			fprintf(dump,"Volume for Output from Effects (Right Channel): %x\n",Cores[c].FxR);
 			fprintf(dump,"Interrupt Address:          %x\n",Cores[c].IRQA);
 			fprintf(dump,"DMA Transfer Start Address: %x\n",Cores[c].TSA);
 			fprintf(dump,"External Input to Direct Output (Left):    %s\n",Cores[c].ExtDryL?"Yes":"No");
@ -156,24 +163,11 @@ void DoFullDump()
 			fprintf(dump,"  - ENDX:   %x\n",Cores[c].Regs.VMIXER);
 			fprintf(dump,"  - STATX:  %x\n",Cores[c].Regs.VMIXEL);
 			fprintf(dump,"  - ATTR:   %x\n",Cores[c].Regs.VMIXER);
-			for(v=0;v<24;v++) {
+			for(v=0;v<24;v++)
 			{
 				fprintf(dump,"Voice %d:\n",v);
-				fprintf(dump,"  - Volume for Left Channel: %x\n"
+				Cores[c].Voices[v].Volume.DebugDump( dump, "" );
-							 "     - Value:     %x\n"
+				
 							 "     - Mode:      %x\n"
 							 "     - Increment: %x\n",
 							 Cores[c].Voices[v].VolumeL.Reg_VOL,
 							 Cores[c].Voices[v].VolumeL.Value,
 							 Cores[c].Voices[v].VolumeL.Mode,
 							 Cores[c].Voices[v].VolumeL.Increment);
 				fprintf(dump,"  - Volume for Right Channel: %x\n"
 							 "     - Value:     %x\n"
 							 "     - Mode:      %x\n"
 							 "     - Increment: %x\n",
 							 Cores[c].Voices[v].VolumeR.Reg_VOL,
 							 Cores[c].Voices[v].VolumeR.Value,
 							 Cores[c].Voices[v].VolumeR.Mode,
 							 Cores[c].Voices[v].VolumeR.Increment);
 				fprintf(dump,"  - ADSR Envelope: %x & %x\n"
 							 "     - Ar: %x\n"
 							 "     - Am: %x\n"
@ -197,6 +191,7 @@ void DoFullDump()
 							 Cores[c].Voices[v].ADSR.ReleaseMode,
 							 Cores[c].Voices[v].ADSR.Phase,
 							 Cores[c].Voices[v].ADSR.Value);
 				fprintf(dump,"  - Pitch:     %x\n",Cores[c].Voices[v].Pitch);
 				fprintf(dump,"  - Modulated: %s\n",Cores[c].Voices[v].Modulated?"Yes":"No");
 				fprintf(dump,"  - Source:    %s\n",Cores[c].Voices[v].Noise?"Noise":"Wave");
@ -204,12 +199,12 @@ void DoFullDump()
 				fprintf(dump,"  - Direct Output for Right Channel:  %s\n",Cores[c].Voices[v].DryR?"Yes":"No");
 				fprintf(dump,"  - Effects Output for Left Channel:  %s\n",Cores[c].Voices[v].WetL?"Yes":"No");
 				fprintf(dump,"  - Effects Output for Right Channel: %s\n",Cores[c].Voices[v].WetR?"Yes":"No");
-				fprintf(dump,"  - Loop Start Adress:  %x\n",Cores[c].Voices[v].LoopStartA);
+				fprintf(dump,"  - Loop Start Address:  %x\n",Cores[c].Voices[v].LoopStartA);
-				fprintf(dump,"  - Sound Start Adress: %x\n",Cores[c].Voices[v].StartA);
+				fprintf(dump,"  - Sound Start Address: %x\n",Cores[c].Voices[v].StartA);
-				fprintf(dump,"  - Next Data Adress:   %x\n",Cores[c].Voices[v].NextA);
+				fprintf(dump,"  - Next Data Address:   %x\n",Cores[c].Voices[v].NextA);
-				fprintf(dump,"  - Play Start Cycle:   %d\n",Cores[c].Voices[v].PlayCycle);
+				fprintf(dump,"  - Play Start Cycle:    %d\n",Cores[c].Voices[v].PlayCycle);
-				fprintf(dump,"  - Play Status:        %s\n",(Cores[c].Voices[v].ADSR.Phase>0)?"Playing":"Not Playing");
+				fprintf(dump,"  - Play Status:         %s\n",(Cores[c].Voices[v].ADSR.Phase>0)?"Playing":"Not Playing");
-				fprintf(dump,"  - Block Sample:       %d\n",Cores[c].Voices[v].SCurrent);
+				fprintf(dump,"  - Block Sample:        %d\n",Cores[c].Voices[v].SCurrent);
 			}
 			fprintf(dump,"#### END OF DUMP.\n\n");
 		}
--- a/plugins/spu2-x/src/Debug.h
+++ b/plugins/spu2-x/src/Debug.h
@ -52,9 +52,10 @@ namespace WaveDump
 	,	CoreSrc_Count
 	};	
-	void Open();
+	extern void Open();
-	void Close();
+	extern void Close();
-	void WriteCore( uint coreidx, CoreSourceType src, s16 left, s16 right );
+	extern void WriteCore( uint coreidx, CoreSourceType src, s16 left, s16 right );
 	extern void WriteCore( uint coreidx, CoreSourceType src, const StereoOut16& sample );
 }
 using WaveDump::CoreSrc_Input;
--- a/plugins/spu2-x/src/Decoder.cpp
+++ b/plugins/spu2-x/src/Decoder.cpp
@ -58,7 +58,6 @@ int state=0;
 FILE *fSpdifDump;
 extern u32 core;
 void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR);
 union spdif_frame { // total size: 32bits
 	struct {
@ -132,22 +131,23 @@ s32 stoi(sample_t n) //input: [-1..1]
 void spdif_update()
 {
-	s32 Data,Zero;
+	StereoOut32 Data;
 	core=0;
 	V_Core& thiscore( Cores[core] );
 	for(int i=0;i<data_rate;i++)
 	{
-		ReadInput(thiscore, Data,Zero);
+		// Right side data should be zero / ignored
 		ReadInput( thiscore, Data );
 		if(fSpdifDump)
 		{
-			fwrite(&Data,4,1,fSpdifDump);
+			fwrite(&Data.Left,4,1,fSpdifDump);
-			fwrite(&Zero,4,1,fSpdifDump);
+			fwrite(&Data.Right,4,1,fSpdifDump);		// zero side.
 		}
 		if(ac3dec)
-			spdif_Write(Data);
+			spdif_Write(Data.Left);
 	}
 	if(!ac3dec) return;
--- a/plugins/spu2-x/src/DllInterface.cpp
+++ b/plugins/spu2-x/src/DllInterface.cpp
@ -120,7 +120,7 @@ EXPORT_C_(void) SPU2about()
 EXPORT_C_(s32) SPU2test()
 {
-	return SndTest();
+	return SndBuffer::Test();
 }
 EXPORT_C_(s32) SPU2init() 
@ -228,22 +228,20 @@ EXPORT_C_(s32) SPU2open(void *pDsp)
 	debugDialogOpen=1;
 	}*/
-	spu2open=true;
+	spu2open = true;
-	if (!SndInit())
+	try
 	{
 		SndBuffer::Init();
 		spdif_init();
 		DspLoadLibrary(dspPlugin,dspPluginModule);
 		WaveDump::Open();
 		return 0;
 	}
-	else 
+	catch( ... )
 	{
 		SPU2close();
 		return -1;
-	};
+	}
 	return 0;
 }
 EXPORT_C_(void) SPU2close() 
@ -253,7 +251,7 @@ EXPORT_C_(void) SPU2close()
 	DspCloseLibrary();
 	spdif_shutdown();
-	SndClose();
+	SndBuffer::Cleanup();
 	spu2open = false;
 }
--- a/plugins/spu2-x/src/Mixer.cpp
+++ b/plugins/spu2-x/src/Mixer.cpp
@ -61,11 +61,17 @@ __forceinline s32 MulShr32( s32 srcval, s32 mulval )
 	// It won't fly on big endian machines though... :)
 }
-__forceinline s32 clamp_mix(s32 x, u8 bitshift)
+__forceinline s32 clamp_mix( s32 x, u8 bitshift )
 {
 	return GetClamped( x, -0x8000<<bitshift, 0x7fff<<bitshift );
 }
 __forceinline void clamp_mix( StereoOut32& sample, u8 bitshift )
 {
 	Clampify( sample.Left, -0x8000<<bitshift, 0x7fff<<bitshift );
 	Clampify( sample.Right, -0x8000<<bitshift, 0x7fff<<bitshift );
 }
 static void __forceinline XA_decode_block(s16* buffer, const s16* block, s32& prev1, s32& prev2)
 {
 	const s32 header = *block;
@ -171,7 +177,7 @@ int g_counter_cache_ignores = 0;
 #define XAFLAG_LOOP			(1ul<<1)
 #define XAFLAG_LOOP_START	(1ul<<2)
-static void __forceinline __fastcall GetNextDataBuffered( V_Core& thiscore, V_Voice& vc, s32& Data) 
+static s32 __forceinline __fastcall GetNextDataBuffered( V_Core& thiscore, V_Voice& vc ) 
 {
 	if (vc.SCurrent<28)
 	{
@ -259,19 +265,19 @@ static void __forceinline __fastcall GetNextDataBuffered( V_Core& thiscore, V_Vo
 	IncrementNextA( thiscore, vc );
 _skipIncrement:
-	Data = vc.SBuffer[vc.SCurrent++];
+	return vc.SBuffer[vc.SCurrent++];
 }
 /////////////////////////////////////////////////////////////////////////////////////////
 /////////////////////////////////////////////////////////////////////////////////////////
 //                                                                                     //
-static void __forceinline GetNoiseValues(s32& VD) 
+static s32 __forceinline GetNoiseValues()
 {
 	static s32 Seed = 0x41595321;
 	s32 retval = 0x8000;
-	if(Seed&0x100) VD = (s32)((Seed&0xff)<<8);
+	if( Seed&0x100 ) retval = (Seed&0xff) << 8;
-	else if(!(Seed&0xffff)) VD = (s32)0x8000;
+	else if( Seed&0xffff ) retval = 0x7fff;
 	else VD = (s32)0x7fff;
 	__asm {
 		MOV eax,Seed
@ -284,6 +290,7 @@ static void __forceinline GetNoiseValues(s32& VD)
 		ROR eax,3
 		MOV Seed,eax
 	}
 	return retval;
 }
 /////////////////////////////////////////////////////////////////////////////////////////
@ -299,6 +306,22 @@ static __forceinline s32 ApplyVolume(s32 data, s32 volume)
 	return MulShr32( data<<1, volume );
 }
 static __forceinline StereoOut32 ApplyVolume( const StereoOut32& data, const V_VolumeLR& volume )
 {
 	return StereoOut32(
 		ApplyVolume( data.Left, volume.Left ),
 		ApplyVolume( data.Right, volume.Right )
 	);
 }
 static __forceinline StereoOut32 ApplyVolume( const StereoOut32& data, const V_VolumeSlideLR& volume )
 {
 	return StereoOut32(
 		ApplyVolume( data.Left, volume.Left.Value ),
 		ApplyVolume( data.Right, volume.Right.Value )
 	);
 }
 static void __forceinline UpdatePitch( V_Voice& vc )
 {
 	s32 pitch;
@ -339,14 +362,12 @@ static __forceinline void CalculateADSR( V_Core& thiscore, V_Voice& vc )
 }
 // Returns a 16 bit result in Value.
-static void __forceinline GetVoiceValues_Linear(V_Core& thiscore, V_Voice& vc, s32& Value)
+static s32 __forceinline GetVoiceValues_Linear( V_Core& thiscore, V_Voice& vc )
 {
 	while( vc.SP > 0 )
 	{
 		vc.PV2 = vc.PV1;
-
+		vc.PV1 = GetNextDataBuffered( thiscore, vc );
 		GetNextDataBuffered( thiscore, vc, vc.PV1 );
 		vc.SP -= 4096;
 	}
@ -358,28 +379,28 @@ static void __forceinline GetVoiceValues_Linear(V_Core& thiscore, V_Voice& vc, s
 	if(Interpolation==0)
 	{
-		Value = ApplyVolume( vc.PV1, vc.ADSR.Value );
+		return ApplyVolume( vc.PV1, vc.ADSR.Value );
 	} 
 	else //if(Interpolation==1) //must be linear
 	{
 		s32 t0 = vc.PV2 - vc.PV1;
-		Value = MulShr32( (vc.PV1<<1) - ((t0*vc.SP)>>11), vc.ADSR.Value );
+		return MulShr32( (vc.PV1<<1) - ((t0*vc.SP)>>11), vc.ADSR.Value );
 	}
 }
 // Returns a 16 bit result in Value.
-static void __forceinline GetVoiceValues_Cubic(V_Core& thiscore, V_Voice& vc, s32& Value)
+static s32 __forceinline GetVoiceValues_Cubic( V_Core& thiscore, V_Voice& vc )
 {
 	while( vc.SP > 0 )
 	{
-		vc.PV4=vc.PV3;
+		vc.PV4 = vc.PV3;
-		vc.PV3=vc.PV2;
+		vc.PV3 = vc.PV2;
-		vc.PV2=vc.PV1;
+		vc.PV2 = vc.PV1;
-		GetNextDataBuffered( thiscore, vc, vc.PV1 );
+		vc.PV1 = GetNextDataBuffered( thiscore, vc );
-		vc.PV1<<=2;
+		vc.PV1 <<= 2;
 		vc.SPc = vc.SP&4095;	// just the fractional part, please!
-		vc.SP-=4096;
+		vc.SP -= 4096;
 	}
 	CalculateADSR( thiscore, vc );
@ -398,19 +419,21 @@ static void __forceinline GetVoiceValues_Cubic(V_Core& thiscore, V_Voice& vc, s3
 	// Note!  It's very important that ADSR stay as accurate as possible.  By the way
 	// it is used, various sound effects can end prematurely if we truncate more than
 	// one or two bits.
-	Value = MulShr32( val, vc.ADSR.Value>>1 );
+	return MulShr32( val, vc.ADSR.Value>>1 );
 }
 // Noise values need to be mixed without going through interpolation, since it
 // can wreak havoc on the noise (causing muffling or popping).  Not that this noise
 // generator is accurate in its own right.. but eh, ah well :)
-static void __forceinline __fastcall GetNoiseValues(V_Core& thiscore, V_Voice& vc, s32& Data)
+static s32 __forceinline __fastcall GetNoiseValues( V_Core& thiscore, V_Voice& vc )
 {
-	while(vc.SP>=4096) 
+	s32 retval = GetNoiseValues();
 	/*while(vc.SP>=4096)
 	{
-		GetNoiseValues( Data );
+		retval = GetNoiseValues();
 		vc.SP-=4096;
-	}
+	}*/
 	// GetNoiseValues can't set the phase zero on us unexpectedly
 	// like GetVoiceValues can.  Better assert just in case though..
@ -419,14 +442,14 @@ static void __forceinline __fastcall GetNoiseValues(V_Core& thiscore, V_Voice& v
 	CalculateADSR( thiscore, vc );
 	// Yup, ADSR applies even to noise sources...
-	Data = MulShr32( Data, vc.ADSR.Value );
+	return ApplyVolume( retval, vc.ADSR.Value );
 }
 /////////////////////////////////////////////////////////////////////////////////////////
 /////////////////////////////////////////////////////////////////////////////////////////
 //                                                                                     //
-void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR) 
+void __fastcall ReadInput( V_Core& thiscore, StereoOut32& PData ) 
 {
 	if((thiscore.AutoDMACtrl&(core+1))==(core+1))
 	{
@ -442,17 +465,17 @@ void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR)
 			// so we just downgrade it to 16 bits for now.
 #ifdef PCM24_S1_INTERLEAVE
-			*PDataL=*(((s32*)(thiscore.ADMATempBuffer+(thiscore.InputPos<<1))));
+			*PData.Left=*(((s32*)(thiscore.ADMATempBuffer+(thiscore.InputPos<<1))));
-			*PDataR=*(((s32*)(thiscore.ADMATempBuffer+(thiscore.InputPos<<1)+2)));
+			*PData.Right=*(((s32*)(thiscore.ADMATempBuffer+(thiscore.InputPos<<1)+2)));
 #else
 			s32 *pl=(s32*)&(thiscore.ADMATempBuffer[thiscore.InputPos]);
 			s32 *pr=(s32*)&(thiscore.ADMATempBuffer[thiscore.InputPos+0x200]);
-			PDataL=*pl;
+			PData.Left = *pl;
-			PDataR=*pr;
+			PData.Right = *pr;
 #endif
-			PDataL>>=1; //give 31 bit data (SndOut downsamples the rest of the way)
+			PData.Left >>= 2; //give 30 bit data (SndOut downsamples the rest of the way)
-			PDataR>>=1;
+			PData.Right >>= 2;
 			thiscore.InputPos+=2;
 			if((thiscore.InputPos==0x100)||(thiscore.InputPos>=0x200)) {
@ -495,8 +518,8 @@ void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR)
 			s32 *pl=(s32*)&(thiscore.ADMATempBuffer[thiscore.InputPos]);
 			s32 *pr=(s32*)&(thiscore.ADMATempBuffer[thiscore.InputPos+0x200]);
-			PDataL=*pl;
+			PData.Left  = *pl;
-			PDataR=*pr;
+			PData.Right = *pr;
 			thiscore.InputPos+=2;
 			if(thiscore.InputPos>=0x200) {
@ -540,16 +563,16 @@ void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR)
 			else
 			{
 				// Using the temporary buffer because this area gets overwritten by some other code.
-				//*PDataL=(s32)*(s16*)(spu2mem+0x2000+(core<<10)+thiscore.InputPos);
+				//*PData.Left  = (s32)*(s16*)(spu2mem+0x2000+(core<<10)+thiscore.InputPos);
-				//*PDataR=(s32)*(s16*)(spu2mem+0x2200+(core<<10)+thiscore.InputPos);
+				//*PData.Right = (s32)*(s16*)(spu2mem+0x2200+(core<<10)+thiscore.InputPos);
-				tl=(s32)thiscore.ADMATempBuffer[thiscore.InputPos];
+				tl = (s32)thiscore.ADMATempBuffer[thiscore.InputPos];
-				tr=(s32)thiscore.ADMATempBuffer[thiscore.InputPos+0x200];
+				tr = (s32)thiscore.ADMATempBuffer[thiscore.InputPos+0x200];
 			}
-			PDataL=tl;
+			PData.Left  = tl;
-			PDataR=tr;
+			PData.Right = tr;
 			thiscore.InputPos++;
 			if((thiscore.InputPos==0x100)||(thiscore.InputPos>=0x200)) {
@ -585,9 +608,10 @@ void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR)
 			}
 		}
 	}
-	else {
+	else
-		PDataL=0;
+	{
-		PDataR=0;
+		PData.Left  = 0;
 		PData.Right = 0;
 	}
 }
@ -595,29 +619,21 @@ void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR)
 /////////////////////////////////////////////////////////////////////////////////////////
 //                                                                                     //
-static void __forceinline __fastcall ReadInputPV(V_Core& thiscore, s32& ValL,s32& ValR) 
+static __forceinline StereoOut32 ReadInputPV( V_Core& thiscore ) 
 {
 	s32 DL=0, DR=0;
 	u32 pitch=AutoDMAPlayRate[core];
 	if(pitch==0) pitch=48000;
-	thiscore.ADMAPV+=pitch;
+	thiscore.ADMAPV += pitch;
 	while(thiscore.ADMAPV>=48000) 
 	{
-		ReadInput(thiscore, DL,DR);
+		ReadInput( thiscore, thiscore.ADMAP );
-		thiscore.ADMAPV-=48000;
+		thiscore.ADMAPV -= 48000;
 		thiscore.ADMAPL=DL;
 		thiscore.ADMAPR=DR;
 	}
 	ValL=thiscore.ADMAPL;
 	ValR=thiscore.ADMAPR;
 	// Apply volumes:
-	ValL = ApplyVolume( ValL, thiscore.InpL );
+	return ApplyVolume( thiscore.ADMAP, thiscore.InpVol );
 	ValR = ApplyVolume( ValR, thiscore.InpR );
 }
 /////////////////////////////////////////////////////////////////////////////////////////
@ -637,108 +653,107 @@ static __forceinline void spu2M_WriteFast( u32 addr, s16 value )
 }
-static __forceinline void MixVoice( V_Core& thiscore, V_Voice& vc, s32& VValL, s32& VValR )
+static __forceinline StereoOut32 MixVoice( V_Core& thiscore, V_Voice& vc )
 {
 	s32 Value=0;
 	VValL = 0;
 	VValR = 0;
 	// Most games don't use much volume slide effects.  So only call the UpdateVolume
 	// methods when needed by checking the flag outside the method here...
-	vc.VolumeL.Update();
+	vc.Volume.Update();
-	vc.VolumeR.Update();
+
 	// SPU2 Note: The spu2 continues to process voices for eternity, always, so we
 	// have to run through all the motions of updating the voice regardless of it's
 	// audible status.  Otherwise IRQs might not trigger and emulation might fail.
 	if( vc.ADSR.Phase > 0 )
 	{
 		UpdatePitch( vc );
 		s32 Value;
 		if( vc.Noise )
-			GetNoiseValues( thiscore, vc, Value );
+			Value = GetNoiseValues( thiscore, vc );
 		else
 		{
 			if( Interpolation == 2 )
-				GetVoiceValues_Cubic( thiscore, vc, Value );
+				Value = GetVoiceValues_Cubic( thiscore, vc );
 			else
-				GetVoiceValues_Linear( thiscore, vc, Value );
+				Value = GetVoiceValues_Linear( thiscore, vc );
 		}
-		// Record the output (used for modulation effects)
+		// Note: All values recorded into OutX (may be used for modulation later)
 		vc.OutX = Value;
 		if( IsDevBuild )
-			DebugCores[core].Voices[voice].displayPeak = max(DebugCores[core].Voices[voice].displayPeak,abs(Value));
+			DebugCores[core].Voices[voice].displayPeak = max(DebugCores[core].Voices[voice].displayPeak,abs(vc.OutX));
-		// TODO : Implement this using high-def MulShr32.
+		// Write-back of raw voice data (post ADSR applied)
 		//   vc.VolumeL/R are 15 bits.  Value should be 32 bits (but is currently 16)
-		VValL = ApplyVolume(Value,vc.VolumeL.Value);
+		if (voice==1)      spu2M_WriteFast( 0x400 + (core<<12) + OutPos, vc.OutX );
-		VValR = ApplyVolume(Value,vc.VolumeR.Value);
+		else if (voice==3) spu2M_WriteFast( 0x600 + (core<<12) + OutPos, vc.OutX );
 		return ApplyVolume( StereoOut32( Value, Value ), vc.Volume );
 	}
 	else
 	{
 		// Write-back of raw voice data (some zeros since the voice is "dead")
-	// Write-back of raw voice data (post ADSR applied)
+		if (voice==1)      spu2M_WriteFast( 0x400 + (core<<12) + OutPos, 0 );
-
+		else if (voice==3) spu2M_WriteFast( 0x600 + (core<<12) + OutPos, 0 );
 	if (voice==1)      spu2M_WriteFast( 0x400 + (core<<12) + OutPos, (s16)Value );
 	else if (voice==3) spu2M_WriteFast( 0x600 + (core<<12) + OutPos, (s16)Value );
 		return StereoOut32( 0, 0 );
 	}
 }
-static void __fastcall MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR)
+static StereoOut32 __fastcall MixCore( const StereoOut32& Input, const StereoOut32& Ext )
 {
 	s32 RVL,RVR;
 	s32 SDL=0,SDR=0;
 	s32 SWL=0,SWR=0;
 	V_Core& thiscore( Cores[core] );
 	thiscore.MasterVol.Update();
 	StereoOut32 Dry(0,0), Wet(0,0);
 	for( voice=0; voice<24; ++voice )
 	{
 		s32 VValL,VValR;
 		V_Voice& vc( thiscore.Voices[voice] );
-		MixVoice( thiscore, vc, VValL, VValR );
+		StereoOut32 VVal( MixVoice( thiscore, vc ) );
 		// Note: Results from MixVoice are ranged at 16 bits.
 		// Following muls are toggles only (0 or 1)
-		SDL += VValL & vc.DryL;
+		Dry.Left += VVal.Left & vc.DryL;
-		SDR += VValR & vc.DryR;
+		Dry.Right += VVal.Right & vc.DryR;
-		SWL += VValL & vc.WetL;
+		Wet.Left += VVal.Left & vc.WetL;
-		SWR += VValR & vc.WetR;
+		Wet.Right += VVal.Right & vc.WetR;
 	}
 	// Saturate final result to standard 16 bit range.
-	SDL = clamp_mix( SDL );
+	clamp_mix( Dry );
-	SDR = clamp_mix( SDR );
+	clamp_mix( Wet );
 	SWL = clamp_mix( SWL );
 	SWR = clamp_mix( SWR );
 	// Write Mixed results To Output Area
-	spu2M_WriteFast( 0x1000 + (core<<12) + OutPos, (s16)SDL );
+	spu2M_WriteFast( 0x1000 + (core<<12) + OutPos, Dry.Left );
-	spu2M_WriteFast( 0x1200 + (core<<12) + OutPos, (s16)SDR );
+	spu2M_WriteFast( 0x1200 + (core<<12) + OutPos, Dry.Right );
-	spu2M_WriteFast( 0x1400 + (core<<12) + OutPos, (s16)SWL );
+	spu2M_WriteFast( 0x1400 + (core<<12) + OutPos, Wet.Left );
-	spu2M_WriteFast( 0x1600 + (core<<12) + OutPos, (s16)SWR );
+	spu2M_WriteFast( 0x1600 + (core<<12) + OutPos, Wet.Right );
 	// Write mixed results to logfile (if enabled)
-	WaveDump::WriteCore( core, CoreSrc_DryVoiceMix, SDL, SDR );
+	WaveDump::WriteCore( core, CoreSrc_DryVoiceMix, Dry );
-	WaveDump::WriteCore( core, CoreSrc_WetVoiceMix, SWL, SWR );
+	WaveDump::WriteCore( core, CoreSrc_WetVoiceMix, Wet );
 	s32 TDL,TDR;
 	// Mix in the Input data
-	TDL = OutL & thiscore.InpDryL;
+
-	TDR = OutR & thiscore.InpDryR;
+	StereoOut32 TD(
 		Input.Left & thiscore.InpDryL,
 		Input.Right & thiscore.InpDryR
 	);
 	// Mix in the Voice data
-	TDL += SDL & thiscore.SndDryL;
+	TD.Left += Dry.Left & thiscore.SndDryL;
-	TDR += SDR & thiscore.SndDryR;
+	TD.Right += Dry.Right & thiscore.SndDryR;
 	// Mix in the External (nothing/core0) data
-	TDL += ExtL & thiscore.ExtDryL;
+	TD.Left += Ext.Left & thiscore.ExtDryL;
-	TDR += ExtR & thiscore.ExtDryR;
+	TD.Right += Ext.Right & thiscore.ExtDryR;
 	if( !EffectsDisabled )
 	{
@ -747,138 +762,106 @@ static void __fastcall MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR)
 		if( thiscore.FxEnable )
 		{
 			s32 TWL,TWR;
 			// Mix Input, Voice, and External data:
-			TWL = OutL & thiscore.InpWetL;
+			StereoOut32 TW(
-			TWR = OutR & thiscore.InpWetR;
+				Input.Left & thiscore.InpWetL,
-			TWL += SWL & thiscore.SndWetL;
+				Input.Right & thiscore.InpWetR
-			TWR += SWR & thiscore.SndWetR;
+			);
 			TWL += ExtL & thiscore.ExtWetL; 
 			TWR += ExtR & thiscore.ExtWetR;
-			WaveDump::WriteCore( core, CoreSrc_PreReverb, TWL, TWR );
+			TW.Left += Wet.Left & thiscore.SndWetL;
 			TW.Right += Wet.Right & thiscore.SndWetR;
 			TW.Left += Ext.Left & thiscore.ExtWetL; 
 			TW.Right += Ext.Right & thiscore.ExtWetR;
-			DoReverb( thiscore, RVL, RVR, TWL, TWR );
+			WaveDump::WriteCore( core, CoreSrc_PreReverb, TW );
 			StereoOut32 RV( DoReverb( thiscore, TW ) );
 			// Volume boost after effects application.  Boosting volume prior to effects
 			// causes slight overflows in some games, and the volume boost is required.
 			// (like all over volumes on SPU2, reverb coefficients and stuff are signed,
 			// range -50% to 50%, thus *2 is needed)
-			RVL *= 2;
+			RV.Left  *= 2;
-			RVR *= 2;
+			RV.Right *= 2;
-			WaveDump::WriteCore( core, CoreSrc_PostReverb, RVL, RVR );
+			WaveDump::WriteCore( core, CoreSrc_PostReverb, RV );
 			TWL = ApplyVolume(RVL,thiscore.FxL);
 			TWR = ApplyVolume(RVR,thiscore.FxR);
 			// Mix Dry+Wet
-			OutL = TDL + TWL;
+			return StereoOut32( TD + ApplyVolume( RV, thiscore.FxVol ) );
 			OutR = TDR + TWR;
 		}
 		else
 		{
 			WaveDump::WriteCore( core, CoreSrc_PreReverb, 0, 0 );
 			WaveDump::WriteCore( core, CoreSrc_PostReverb, 0, 0 );
 			OutL = TDL;
 			OutR = TDR;
 		}
 	}
-	else
+	return TD;
 	{
 		OutL = TDL;
 		OutR = TDR;
 	}
 	// Apply Master Volume.  The core will need this when the function returns.
 	thiscore.MasterL.Update();
 	thiscore.MasterR.Update();
 }
 // used to throttle the output rate of cache stat reports
 static int p_cachestat_counter=0;
-void Mix() 
+__forceinline void Mix() 
 {
 	s32 ExtL=0, ExtR=0, OutL, OutR;
 	// ****  CORE ZERO  ****
 	core = 0;
-	core=0;
+	// Note: Playmode 4 is SPDIF, which overrides other inputs.
-	if( (PlayMode&4) == 0 )
+	StereoOut32 Ext( (PlayMode&4) ? StereoOut32::Empty : ReadInputPV( Cores[0] ) );
-	{
+	WaveDump::WriteCore( 0, CoreSrc_Input, Ext );
 		// get input data from input buffers
 		ReadInputPV(Cores[0], ExtL, ExtR);
 		WaveDump::WriteCore( 0, CoreSrc_Input, ExtL, ExtR );
 	}
-	MixCore( ExtL, ExtR, 0, 0 );
+	Ext = MixCore( Ext, StereoOut32::Empty );
 	if( (PlayMode & 4) || (Cores[0].Mute!=0) )
-	{
+		Ext = StereoOut32( 0, 0 );
 		ExtL=0;
 		ExtR=0;
 	}
 	else
 	{
-		ExtL = ApplyVolume( ExtL, Cores[0].MasterL.Value );
+		Ext = ApplyVolume( Ext, Cores[0].MasterVol );
-		ExtR = ApplyVolume( ExtR, Cores[0].MasterR.Value );
+		clamp_mix( Ext );
 	}
 	// Commit Core 0 output to ram before mixing Core 1:
-	ExtL = clamp_mix( ExtL );
+	spu2M_WriteFast( 0x800 + OutPos, Ext.Left );
-	ExtR = clamp_mix( ExtR );
+	spu2M_WriteFast( 0xA00 + OutPos, Ext.Right );
-
+	WaveDump::WriteCore( 0, CoreSrc_External, Ext );
 	spu2M_WriteFast( 0x800 + OutPos, ExtL );
 	spu2M_WriteFast( 0xA00 + OutPos, ExtR );
 	WaveDump::WriteCore( 0, CoreSrc_External, ExtL, ExtR );
 	// ****  CORE ONE  ****
 	core = 1;
-	if( (PlayMode&8) != 8 )
+	StereoOut32 Out( (PlayMode&8) ? StereoOut32::Empty : ReadInputPV( Cores[1] ) );
-	{
+	WaveDump::WriteCore( 1, CoreSrc_Input, Out );
 		ReadInputPV(Cores[1], OutL, OutR);	// get input data from input buffers
 		WaveDump::WriteCore( 1, CoreSrc_Input, OutL, OutR );
 	}
-	// Apply volume to the external (Core 0) input data.
+	ApplyVolume( Ext, Cores[1].ExtVol );
-
+	Out = MixCore( Out, Ext );
 	MixCore( OutL, OutR, ApplyVolume( ExtL, Cores[1].ExtL), ApplyVolume( ExtR, Cores[1].ExtR) );
 	if( PlayMode & 8 )
 	{
 		// Experimental CDDA support
 		// The CDDA overrides all other mixer output.  It's a direct feed!
-		ReadInput(Cores[1], OutL, OutR);
+		ReadInput( Cores[1], Out );
 		//WaveLog::WriteCore( 1, "CDDA-32", OutL, OutR );
 	}
 	else
 	{
-		OutL = MulShr32( OutL<<10, Cores[1].MasterL.Value );
+		Out.Left = MulShr32( Out.Left<<SndOutVolumeShift, Cores[1].MasterVol.Left.Value );
-		OutR = MulShr32( OutR<<10, Cores[1].MasterR.Value );
+		Out.Right = MulShr32( Out.Right<<SndOutVolumeShift, Cores[1].MasterVol.Right.Value );
-		// Final Clamp.
+		// Final Clamp!
 		// This could be circumvented by using 1/2th total output volume, although
-		// I suspect clamping at the higher volume is more true to the PS2's real
+		// I suspect this approach (clamping at the higher volume) is more true to the
-		// implementation.
+		// PS2's real implementation.
-		OutL = clamp_mix( OutL, SndOutVolumeShift );
+		clamp_mix( Out, SndOutVolumeShift );
 		OutR = clamp_mix( OutR, SndOutVolumeShift );
 	}
 	// Update spdif (called each sample)
 	if(PlayMode&4)
 		spdif_update();
-	// AddToBuffer
+	SndBuffer::Write( Out );
 	SndWrite(OutL, OutR);
 	// Update AutoDMA output positioning
 	OutPos++;
--- a/plugins/spu2-x/src/RegTable.cpp
+++ b/plugins/spu2-x/src/RegTable.cpp
@ -31,14 +31,14 @@ const u16 zero=0;
 	PCORE(c,Voices[v].##p)
 #define PVC(c,v) \
-	PVCP(c,v,VolumeL.Reg_VOL), \
+	PVCP(c,v,Volume.Left.Reg_VOL), \
-	PVCP(c,v,VolumeR.Reg_VOL), \
+	PVCP(c,v,Volume.Right.Reg_VOL), \
 	PVCP(c,v,Pitch), \
 	PVCP(c,v,ADSR.Reg_ADSR1), \
 	PVCP(c,v,ADSR.Reg_ADSR2), \
 	PVCP(c,v,ADSR.Value)+1, \
-	PVCP(c,v,VolumeL.Value)+1, \
+	PVCP(c,v,Volume.Left.Value)+1, \
-	PVCP(c,v,VolumeR.Value)+1
+	PVCP(c,v,Volume.Right.Value)+1
 #define PVCA(c,v) \
 	PVCP(c,v,StartA)+1, \
@ -247,16 +247,16 @@ u16* regtable[0x800] =
 	PRAW(0x758),PRAW(0x75A),PRAW(0x75C),PRAW(0x75E),
 	//0x760: weird area
-	PCORE(0,MasterL.Reg_VOL),
+	PCORE(0,MasterVol.Left.Reg_VOL),
-	PCORE(0,MasterR.Reg_VOL),
+	PCORE(0,MasterVol.Right.Reg_VOL),
-	PCORE(0,FxL)+1,
+	PCORE(0,FxVol.Left)+1,
-	PCORE(0,FxR)+1,
+	PCORE(0,FxVol.Right)+1,
-	PCORE(0,ExtL)+1,
+	PCORE(0,ExtVol.Left)+1,
-	PCORE(0,ExtR)+1,
+	PCORE(0,ExtVol.Right)+1,
-	PCORE(0,InpL)+1,
+	PCORE(0,InpVol.Left)+1,
-	PCORE(0,InpR)+1,
+	PCORE(0,InpVol.Right)+1,
-	PCORE(0,MasterL.Value)+1,
+	PCORE(0,MasterVol.Left.Value)+1,
-	PCORE(0,MasterR.Value)+1,
+	PCORE(0,MasterVol.Right.Value)+1,
 	PCORE(0,Revb.IIR_ALPHA),
 	PCORE(0,Revb.ACC_COEF_A),
 	PCORE(0,Revb.ACC_COEF_B),
@ -268,16 +268,16 @@ u16* regtable[0x800] =
 	PCORE(0,Revb.IN_COEF_L),
 	PCORE(0,Revb.IN_COEF_R),
-	PCORE(1,MasterL.Reg_VOL),
+	PCORE(1,MasterVol.Left.Reg_VOL),
-	PCORE(1,MasterR.Reg_VOL),
+	PCORE(1,MasterVol.Right.Reg_VOL),
-	PCORE(1,FxL)+1,
+	PCORE(1,FxVol.Left)+1,
-	PCORE(1,FxR)+1,
+	PCORE(1,FxVol.Right)+1,
-	PCORE(1,ExtL)+1,
+	PCORE(1,ExtVol.Left)+1,
-	PCORE(1,ExtR)+1,
+	PCORE(1,ExtVol.Right)+1,
-	PCORE(1,InpL)+1,
+	PCORE(1,InpVol.Left)+1,
-	PCORE(1,InpR)+1,
+	PCORE(1,InpVol.Right)+1,
-	PCORE(1,MasterL.Value)+1,
+	PCORE(1,MasterVol.Left.Value)+1,
-	PCORE(1,MasterR.Value)+1,
+	PCORE(1,MasterVol.Right.Value)+1,
 	PCORE(1,Revb.IIR_ALPHA),
 	PCORE(1,Revb.ACC_COEF_A),
 	PCORE(1,Revb.ACC_COEF_B),
--- a/plugins/spu2-x/src/Reverb.cpp
+++ b/plugins/spu2-x/src/Reverb.cpp
@ -24,20 +24,18 @@
 static LPF_data lowpass_left( 11000, SampleRate );
 static LPF_data lowpass_right( 11000, SampleRate );
-static s32 EffectsBufferIndexer( V_Core& thiscore, s32 offset )
+static __forceinline s32 RevbGetIndexer( V_Core& thiscore, s32 offset )
 {
-	u32 pos = thiscore.EffectsStartA + thiscore.ReverbX + offset;
+	u32 pos = thiscore.ReverbX + offset;
 	// Need to use modulus here, because games can and will drop the buffer size
 	// without notice, and it leads to offsets several times past the end of the buffer.
 	if( pos > thiscore.EffectsEndA )
 	{
-		pos = thiscore.EffectsStartA + ((thiscore.ReverbX + offset) % (u32)thiscore.EffectsBufferSize);
+		//pos = thiscore.EffectsStartA + ((thiscore.ReverbX + offset) % (u32)thiscore.EffectsBufferSize);
-	}
+		pos -= thiscore.EffectsEndA+1;
-	else if( pos < thiscore.EffectsStartA )
+		pos += thiscore.EffectsStartA;
 	{
 		pos = thiscore.EffectsEndA+1 - ((thiscore.ReverbX + offset) % (u32)thiscore.EffectsBufferSize );
 	}
 	return pos;
 } 
@ -52,15 +50,16 @@ void Reverb_AdvanceBuffer( V_Core& thiscore )
 {
 	if( (Cycles & 1) && (thiscore.EffectsBufferSize > 0) )
 	{
-		thiscore.ReverbX += 1;
+		thiscore.ReverbX = RevbGetIndexer( thiscore, 1 );
-		if(thiscore.ReverbX >= (u32)thiscore.EffectsBufferSize )
+		//thiscore.ReverbX += 1;
-			thiscore.ReverbX %= (u32)thiscore.EffectsBufferSize;
+		//if(thiscore.ReverbX >= (u32)thiscore.EffectsBufferSize )
 		//	thiscore.ReverbX %= (u32)thiscore.EffectsBufferSize;
 	}
 }
 /////////////////////////////////////////////////////////////////////////////////////////
-void DoReverb( V_Core& thiscore, s32& OutL, s32& OutR, s32 InL, s32 InR)
+StereoOut32 DoReverb( V_Core& thiscore, const StereoOut32& Input )
 {
 	// Reverb processing occurs at 24khz, so we skip processing every other sample,
 	// and use the previous calculation for this core instead.
@ -68,84 +67,90 @@ void DoReverb( V_Core& thiscore, s32& OutL, s32& OutR, s32 InL, s32 InR)
 	if( thiscore.EffectsBufferSize <= 0 )
 	{
 		// StartA is past EndA, so effects are disabled.
 		OutL = InL;
 		OutR = InR;
 		//ConLog( " * SPU2: Effects disabled due to leapfrogged EffectsStart." );
-		return;
+		return Input;
 	}
-	if((Cycles&1)==0) 
+	if( (Cycles&1)==0 )
 	{
-		OutL = thiscore.LastEffectL;
+		StereoOut32 retval( thiscore.LastEffect );
-		OutR = thiscore.LastEffectR;
+		thiscore.LastEffect = Input;
-		
+		return retval;
 		thiscore.LastEffectL = InL;
 		thiscore.LastEffectR = InR;
 	}
 	else  
 	{
 		if( thiscore.RevBuffers.NeedsUpdated )
 			thiscore.UpdateEffectsBufferSize();
 		// Advance the current reverb buffer pointer, and cache the read/write addresses we'll be
 		// needing for this session of reverb.
-		const u32 src_a0 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_SRC_A0 );
+		const u32 src_a0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_SRC_A0 );
-		const u32 src_a1 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_SRC_A1 );
+		const u32 src_a1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_SRC_A1 );
-		const u32 src_b0 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_SRC_B0 );
+		const u32 src_b0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_SRC_B0 );
-		const u32 src_b1 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_SRC_B1 );
+		const u32 src_b1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_SRC_B1 );
-		const u32 dest_a0 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_A0 );
+		const u32 dest_a0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_A0 );
-		const u32 dest_a1 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_A1 );
+		const u32 dest_a1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_A1 );
-		const u32 dest_b0 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_B0 );
+		const u32 dest_b0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_B0 );
-		const u32 dest_b1 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_B1 );
+		const u32 dest_b1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_B1 );
-		const u32 dest2_a0 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_A0 + 1 );
+		const u32 dest2_a0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_A0 + 1 );
-		const u32 dest2_a1 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_A1 + 1 );
+		const u32 dest2_a1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_A1 + 1 );
-		const u32 dest2_b0 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_B0 + 1 );
+		const u32 dest2_b0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_B0 + 1 );
-		const u32 dest2_b1 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_B1 + 1 );
+		const u32 dest2_b1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_B1 + 1 );
-		const u32 acc_src_a0 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_A0 );
+		const u32 acc_src_a0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_A0 );
-		const u32 acc_src_b0 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_B0 );
+		const u32 acc_src_b0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_B0 );
-		const u32 acc_src_c0 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_C0 );
+		const u32 acc_src_c0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_C0 );
-		const u32 acc_src_d0 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_D0 );
+		const u32 acc_src_d0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_D0 );
-		const u32 acc_src_a1 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_A1 );
+		const u32 acc_src_a1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_A1 );
-		const u32 acc_src_b1 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_B1 );
+		const u32 acc_src_b1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_B1 );
-		const u32 acc_src_c1 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_C1 );
+		const u32 acc_src_c1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_C1 );
-		const u32 acc_src_d1 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_D1 );
+		const u32 acc_src_d1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_D1 );
-		const u32 fb_src_a0 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_A0 - thiscore.Revb.FB_SRC_A );
+		const u32 fb_src_a0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.FB_SRC_A0 );
-		const u32 fb_src_a1 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_A1 - thiscore.Revb.FB_SRC_A );
+		const u32 fb_src_a1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.FB_SRC_A1 );
-		const u32 fb_src_b0 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_B0 - thiscore.Revb.FB_SRC_B );
+		const u32 fb_src_b0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.FB_SRC_B0 );
-		const u32 fb_src_b1 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_B1 - thiscore.Revb.FB_SRC_B );
+		const u32 fb_src_b1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.FB_SRC_B1 );
-		const u32 mix_dest_a0 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_A0 );
+		const u32 mix_dest_a0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.MIX_DEST_A0 );
-		const u32 mix_dest_a1 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_A1 );
+		const u32 mix_dest_a1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.MIX_DEST_A1 );
-		const u32 mix_dest_b0 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_B0 );
+		const u32 mix_dest_b0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.MIX_DEST_B0 );
-		const u32 mix_dest_b1 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_B1 );
+		const u32 mix_dest_b1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.MIX_DEST_B1 );
 		// -----------------------------------------
 		//    End Buffer Pointers, Begin Reverb!
 		// -----------------------------------------
-		const s32 INPUT_SAMPLE_L = (thiscore.LastEffectL+InL);
+		StereoOut32 INPUT_SAMPLE( thiscore.LastEffect + Input );
 		const s32 INPUT_SAMPLE_R = (thiscore.LastEffectR+InR);
-		//const s32 INPUT_SAMPLE_L = (s32)( lowpass_left.sample( (thiscore.LastEffectL+InL)/65536.0 ) * 65536.0 );
+		const s32 IIR_INPUT_A0 = ((_spu2mem[src_a0] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE.Left * thiscore.Revb.IN_COEF_L))>>16;
-		//const s32 INPUT_SAMPLE_R = (s32)( lowpass_right.sample( (thiscore.LastEffectR+InR)/65536.0 ) * 65536.0 );
+		const s32 IIR_INPUT_A1 = ((_spu2mem[src_a1] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE.Right * thiscore.Revb.IN_COEF_R))>>16;
 		const s32 IIR_INPUT_B0 = ((_spu2mem[src_b0] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE.Left * thiscore.Revb.IN_COEF_L))>>16;
 		const s32 IIR_INPUT_B1 = ((_spu2mem[src_b1] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE.Right * thiscore.Revb.IN_COEF_R))>>16;
-		const s32 IIR_INPUT_A0 = ((_spu2mem[src_a0] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE_L * thiscore.Revb.IN_COEF_L))>>16;
+		//const s32 IIR_A0 = (IIR_INPUT_A0 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_a0] * (0x7fff - thiscore.Revb.IIR_ALPHA));
-		const s32 IIR_INPUT_A1 = ((_spu2mem[src_a1] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE_R * thiscore.Revb.IN_COEF_R))>>16;
+		//const s32 IIR_A1 = (IIR_INPUT_A1 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_a1] * (0x7fff - thiscore.Revb.IIR_ALPHA));
-		const s32 IIR_INPUT_B0 = ((_spu2mem[src_b0] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE_L * thiscore.Revb.IN_COEF_L))>>16;
+		//const s32 IIR_B0 = (IIR_INPUT_B0 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_b0] * (0x7fff - thiscore.Revb.IIR_ALPHA));
-		const s32 IIR_INPUT_B1 = ((_spu2mem[src_b1] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE_R * thiscore.Revb.IN_COEF_R))>>16;
+		//const s32 IIR_B1 = (IIR_INPUT_B1 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_b1] * (0x7fff - thiscore.Revb.IIR_ALPHA));
-		const s32 IIR_A0 = (IIR_INPUT_A0 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_a0] * (0x7fff - thiscore.Revb.IIR_ALPHA));
+		//_spu2mem[dest2_a0] = clamp_mix( IIR_A0 >> 16 );
-		const s32 IIR_A1 = (IIR_INPUT_A1 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_a1] * (0x7fff - thiscore.Revb.IIR_ALPHA));
+		//_spu2mem[dest2_a1] = clamp_mix( IIR_A1 >> 16 );
-		const s32 IIR_B0 = (IIR_INPUT_B0 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_b0] * (0x7fff - thiscore.Revb.IIR_ALPHA));
+		//_spu2mem[dest2_b0] = clamp_mix( IIR_B0 >> 16 );
-		const s32 IIR_B1 = (IIR_INPUT_B1 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_b1] * (0x7fff - thiscore.Revb.IIR_ALPHA));
+		//_spu2mem[dest2_b1] = clamp_mix( IIR_B1 >> 16 );
-		_spu2mem[dest2_a0] = clamp_mix( IIR_A0 >> 16 );
+		// Faster single-mul approach to interpolation:
-		_spu2mem[dest2_a1] = clamp_mix( IIR_A1 >> 16 );
+		const s32 IIR_A0 = IIR_INPUT_A0 + ((_spu2mem[dest_a0]-IIR_INPUT_A0) * thiscore.Revb.IIR_ALPHA)>>16;
-		_spu2mem[dest2_b0] = clamp_mix( IIR_B0 >> 16 );
+		const s32 IIR_A1 = IIR_INPUT_A1 + ((_spu2mem[dest_a1]-IIR_INPUT_A1) * thiscore.Revb.IIR_ALPHA)>>16;
-		_spu2mem[dest2_b1] = clamp_mix( IIR_B1 >> 16 );
+		const s32 IIR_B0 = IIR_INPUT_B0 + ((_spu2mem[dest_b0]-IIR_INPUT_B0) * thiscore.Revb.IIR_ALPHA)>>16;
 		const s32 IIR_B1 = IIR_INPUT_B1 + ((_spu2mem[dest_b1]-IIR_INPUT_B1) * thiscore.Revb.IIR_ALPHA)>>16;
 		_spu2mem[dest2_a0] = clamp_mix( IIR_A0 );
 		_spu2mem[dest2_a1] = clamp_mix( IIR_A1 );
 		_spu2mem[dest2_b0] = clamp_mix( IIR_B0 );
 		_spu2mem[dest2_b1] = clamp_mix( IIR_B1 );
 		const s32 ACC0 =
 			((_spu2mem[acc_src_a0] * thiscore.Revb.ACC_COEF_A)) +
@ -161,8 +166,6 @@ void DoReverb( V_Core& thiscore, s32& OutL, s32& OutR, s32 InL, s32 InR)
 		const s32 FB_A0 = (_spu2mem[fb_src_a0] * thiscore.Revb.FB_ALPHA);
 		const s32 FB_A1 = (_spu2mem[fb_src_a1] * thiscore.Revb.FB_ALPHA);
 		const s32 FB_B0 = (_spu2mem[fb_src_b0] * (0x7fff - thiscore.Revb.FB_ALPHA)); //>>16;
 		const s32 FB_B1 = (_spu2mem[fb_src_b1] * (0x7fff - thiscore.Revb.FB_ALPHA)); //>>16;
 		const s32 fb_xor_a0 = (_spu2mem[fb_src_a0] * ( thiscore.Revb.FB_ALPHA ^ 0x8000 ))>>2;
 		const s32 fb_xor_a1 = (_spu2mem[fb_src_a1] * ( thiscore.Revb.FB_ALPHA ^ 0x8000 ))>>2;
@ -172,12 +175,13 @@ void DoReverb( V_Core& thiscore, s32& OutL, s32& OutR, s32 InL, s32 InR)
 		_spu2mem[mix_dest_b0] = clamp_mix( (MulShr32(thiscore.Revb.FB_ALPHA<<14, ACC0) - fb_xor_a0 - ((_spu2mem[fb_src_b0] * thiscore.Revb.FB_X)>>2)) >> 14 );
 		_spu2mem[mix_dest_b1] = clamp_mix( (MulShr32(thiscore.Revb.FB_ALPHA<<14, ACC1) - fb_xor_a1 - ((_spu2mem[fb_src_b1] * thiscore.Revb.FB_X)>>2)) >> 14 );
-		thiscore.LastEffectL = clamp_mix(_spu2mem[mix_dest_a0] + _spu2mem[mix_dest_b0]);
+		thiscore.LastEffect.Left  = _spu2mem[mix_dest_a0] + _spu2mem[mix_dest_b0];
-		thiscore.LastEffectR = clamp_mix(_spu2mem[mix_dest_a1] + _spu2mem[mix_dest_b1]);
+		thiscore.LastEffect.Right = _spu2mem[mix_dest_a1] + _spu2mem[mix_dest_b1];
 		clamp_mix( thiscore.LastEffect );
-		//OutL = thiscore.LastEffectL;
+		thiscore.LastEffect.Left = (s32)(lowpass_left.sample( thiscore.LastEffect.Left / 32768.0 ) * 32768.0);
-		//OutR = thiscore.LastEffectR;
+		thiscore.LastEffect.Right = (s32)(lowpass_right.sample( thiscore.LastEffect.Right / 32768.0 ) * 32768.0);
-		OutL = (s32)(lowpass_left.sample( thiscore.LastEffectL / 32768.0 ) * 32768.0);
+
-		OutR = (s32)(lowpass_right.sample( thiscore.LastEffectR / 32768.0 ) * 32768.0);
+		return thiscore.LastEffect;
 	} 
 }
--- a/plugins/spu2-x/src/SndOut.cpp
+++ b/plugins/spu2-x/src/SndOut.cpp
@ -19,24 +19,45 @@
 * 
 */
 // [TODO] : The layout of this code file is now a complete hackish mess after
 // numerous timestretch-related additions.  The whole thing should really be
 // rethought and redone at this point.
 #include "spu2.h"
 #include "SoundTouch/SoundTouch.h"
 #include "SoundTouch/WavFile.h"
 #include <new>
-static int ts_stats_stretchblocks = 0;
+StereoOut32 StereoOut32::Empty( 0, 0 );
-static int ts_stats_normalblocks = 0;
+
-static int ts_stats_logcounter = 0;
+StereoOut32::StereoOut32( const StereoOut16& src ) :
 	Left( src.Left ),
 	Right( src.Right )
 {
 }
 StereoOut32::StereoOut32( const StereoOutFloat& src ) :
 	Left( (s32)(src.Left * 2147483647.0f) ),
 	Right( (s32)(src.Right * 2147483647.0f) )
 {
 }
 StereoOut16 StereoOut32::DownSample() const
 {
 	return StereoOut16(
 		Left >> SndOutVolumeShift,
 		Right >> SndOutVolumeShift
 	);
 }
 StereoOut32 StereoOut16::UpSample() const
 {
 	return StereoOut32(
 		Left << SndOutVolumeShift,
 		Right << SndOutVolumeShift
 	);
 }
 class NullOutModule: public SndOutModule
 {
 public:
-	s32  Init(SndBuffer *)  { return 0; }
+	s32  Init()  { return 0; }
 	void Close() { }
 	s32  Test() const { return 0; }
 	void Configure(HWND parent)  { }
@ -61,7 +82,6 @@ SndOutModule* mods[]=
 	XAudio2Out,
 	DSoundOut,
 	WaveOut,
 	//ASIOOut,
 	NULL		// signals the end of our list
 };
@ -77,528 +97,173 @@ int FindOutputModuleById( const wchar_t* omodid )
 	return modcnt;
 }
 StereoOut32 *SndBuffer::m_buffer;
 s32 SndBuffer::m_size;
 s32 SndBuffer::m_rpos;
 s32 SndBuffer::m_wpos;
 s32 SndBuffer::m_data;
-__forceinline s16 SndScaleVol( s32 inval )
+bool SndBuffer::m_underrun_freeze;
 StereoOut32* SndBuffer::sndTempBuffer = NULL;
 StereoOut16* SndBuffer::sndTempBuffer16 = NULL;
 int SndBuffer::sndTempProgress = 0;
 int GetAlignedBufferSize( int comp )
 {
-	return inval >> SndOutVolumeShift;
+	return (comp + SndOutPacketSize-1) & ~(SndOutPacketSize-1);
 }
-
+// Returns TRUE if there is data to be output, or false if no data
-// records last buffer status (fill %, range -100 to 100, with 0 being 50% full)
+// is available to be copied.
-float lastPct;
+bool SndBuffer::CheckUnderrunStatus( int& nSamples, int& quietSampleCount )
 float lastEmergencyAdj;
 float cTempo=1;
 float eTempo = 1;
 int freezeTempo = 0;
 soundtouch::SoundTouch* pSoundTouch=NULL;
 //usefull when timestretch isn't available 
 class SndBufferImpl: public SndBuffer
 {
-private:
+	quietSampleCount = 0;
-	s32 *buffer;
+	if( m_underrun_freeze )
 	s32 size;
 	s32 rpos;
 	s32 wpos;
 	s32 data;
 	// data prediction amount, used to "commit" data that hasn't
 	// finished timestretch processing.
 	s32 predictData;
 	bool pw;
 	bool underrun_freeze;
 protected:
 	int GetAlignedBufferSize( int comp )
 	{			
-		return (comp + SndOutPacketSize-1) & ~(SndOutPacketSize-1);
+		int toFill = (int)(m_size * ( timeStretchDisabled ? 0.50f : 0.1f ) );
-	}
+		toFill = GetAlignedBufferSize( toFill );
-public:
+		// toFill is now aligned to a SndOutPacket
 	SndBufferImpl( float latencyMS )
 	{
 		rpos=0;
 		wpos=0;
 		data=0;
 		size=GetAlignedBufferSize( (int)(latencyMS * SampleRate / 500.0f ) );
 		buffer = new s32[size];
 		pw=false;
 		underrun_freeze = false;
 		predictData = 0;
 	}
-	virtual ~SndBufferImpl()
+		if( m_data < toFill )
 	{
 		delete buffer;
 	}
 	virtual void WriteSamples(s32 *bData, int nSamples)
 	{
 		int free = size-data;
 		predictData = 0;
 		jASSUME( data <= size );
 		// Problem:
 		//  If the SPU2 gets out of sync with the SndOut device, the writepos of the
 		//  circular buffer will overtake the readpos, leading to a prolonged period
 		//  of hopscotching read/write accesses (ie, lots of staticy crap sound for
 		//  several seconds).
 		//
 		// Compromise:
 		//  When an overrun occurs, we adapt by discarding a portion of the buffer.
 		//  The older portion of the buffer is discarded rather than incoming data,
 		//  so that the overall audio synchronization is better.
 		if( free < nSamples )
 		{
-			// Buffer overrun!
+			quietSampleCount = nSamples;
-			// Dump samples from the read portion of the buffer instead of dropping
+			return false;
 			// the newly written stuff.
 			s32 comp;
 			if( !timeStretchDisabled )
 			{
 				// If we overran it means the timestretcher failed.  We need to speed
 				// up audio playback.
 				cTempo += cTempo * 0.12f;
 				eTempo += eTempo * 0.40f;
 				if( eTempo > 7.5f ) eTempo = 7.5f;
 				pSoundTouch->setTempo( eTempo );
 				// Throw out just a little bit (two packets worth) to help
 				// give the TS some room to work:
 				comp = SndOutPacketSize*2;
 			}
 			else
 			{
 				// Toss half the buffer plus whatever's being written anew:
 				comp = GetAlignedBufferSize( (size + nSamples ) / 2 );
 				if( comp > (size-SndOutPacketSize) ) comp = size-SndOutPacketSize;
 			}
 			data -= comp;
 			rpos = (rpos+comp)%size;
 			if( MsgOverruns() )
 				ConLog(" * SPU2 > Overrun Compensation (%d packets tossed)\n", comp / SndOutPacketSize );
 			lastPct = 0.0;		// normalize the timestretcher
 		}
-		// copy in two phases, since there's a chance the packet
+		m_underrun_freeze = false;
-		// wraps around the buffer (it'd be nice to deal in packets only, but
+		if( MsgOverruns() )
-		// the timestretcher and DSP options require flexibility).
+			ConLog(" * SPU2 > Underrun compensation (%d packets buffered)\n", toFill / SndOutPacketSize );
-
+		lastPct = 0.0;		// normalize timestretcher
 		const int endPos = wpos + nSamples;
 		const int secondCopyLen = endPos - size;
 		s32* wposbuffer = &buffer[wpos];
 		data += nSamples;
 		if( secondCopyLen > 0 )
 		{
 			nSamples -= secondCopyLen;
 			memcpy( buffer, &bData[nSamples], secondCopyLen * sizeof( *bData ) );
 			wpos = secondCopyLen;
 		}
 		else
 			wpos += nSamples;
 		memcpy( wposbuffer, bData, nSamples * sizeof( *bData ) );
 	}
-
+	else if( m_data < nSamples )
 	protected:
 	// Returns TRUE if there is data to be output, or false if no data
 	// is available to be copied.
 	bool CheckUnderrunStatus( int& nSamples, int& quietSampleCount )
 	{
-		quietSampleCount = 0;
+		nSamples = m_data;
-		if( underrun_freeze )
+		quietSampleCount = SndOutPacketSize - m_data;
-		{			
+		m_underrun_freeze = true;
 			int toFill = (int)(size * ( timeStretchDisabled ? 0.50f : 0.1f ) );
 			toFill = GetAlignedBufferSize( toFill );
-			// toFill is now aligned to a SndOutPacket
+		if( !timeStretchDisabled )
 			timeStretchUnderrun();
-			if( data < toFill )
+		return nSamples != 0;
 			{
 				quietSampleCount = nSamples;
 				return false;
 			}
 			underrun_freeze = false;
 			if( MsgOverruns() )
 				ConLog(" * SPU2 > Underrun compensation (%d packets buffered)\n", toFill / SndOutPacketSize );
 			lastPct = 0.0;		// normalize timestretcher
 		}
 		else if( data < nSamples )
 		{
 			nSamples = data;
 			quietSampleCount = SndOutPacketSize - data;
 			underrun_freeze = true;
 			if( !timeStretchDisabled )
 			{
 				// timeStretcher failed it's job.  We need to slow down the audio some.
 				cTempo -= (cTempo * 0.12f);
 				eTempo -= (eTempo * 0.30f);
 				if( eTempo < 0.1f ) eTempo = 0.1f;
 				pSoundTouch->setTempo( eTempo );
 			}
 			return nSamples != 0;
 		}
 		return true;
 	}
-public:
+	return true;
 	void ReadSamples( s16* bData )
 	{
 		int nSamples = SndOutPacketSize;
 		// Problem:
 		//  If the SPU2 gets even the least bit out of sync with the SndOut device,
 		//  the readpos of the circular buffer will overtake the writepos,
 		//  leading to a prolonged period of hopscotching read/write accesses (ie,
 		//  lots of staticy crap sound for several seconds).
 		//
 		// Fix:
 		//  If the read position overtakes the write position, abort the
 		//  transfer immediately and force the SndOut driver to wait until
 		//  the read buffer has filled up again before proceeding.
 		//  This will cause one brief hiccup that can never exceed the user's
 		//  set buffer length in duration.
 		int quietSamples;
 		if( CheckUnderrunStatus( nSamples, quietSamples ) )
 		{
 			jASSUME( nSamples <= SndOutPacketSize );
 			// [Air] [TODO]: This loop is probably a candidiate for SSE2 optimization.
 			const int endPos = rpos + nSamples;
 			const int secondCopyLen = endPos - size;
 			const s32* rposbuffer = &buffer[rpos];
 			data -= nSamples;
 			if( secondCopyLen > 0 )
 			{
 				nSamples -= secondCopyLen;
 				for( int i=0; i<secondCopyLen; i++ )
 					bData[nSamples+i] = SndScaleVol( buffer[i] );
 				rpos = secondCopyLen;
 			}
 			else
 				rpos += nSamples;
 			for( int i=0; i<nSamples; i++ )
 				bData[i] = SndScaleVol( rposbuffer[i] );
 		}
 		// If quietSamples != 0 it means we have an underrun...
 		// Let's just dull out some silence, because that's usually the least
 		// painful way of dealing with underruns:
 		memset( bData, 0, quietSamples * sizeof(*bData) );
 	}
 	void ReadSamples( s32* bData )
 	{
 		int nSamples = SndOutPacketSize;
 		// Problem:
 		//  If the SPU2 gets even the least bit out of sync with the SndOut device,
 		//  the readpos of the circular buffer will overtake the writepos,
 		//  leading to a prolonged period of hopscotching read/write accesses (ie,
 		//  lots of staticy crap sound for several seconds).
 		//
 		// Fix:
 		//  If the read position overtakes the write position, abort the
 		//  transfer immediately and force the SndOut driver to wait until
 		//  the read buffer has filled up again before proceeding.
 		//  This will cause one brief hiccup that can never exceed the user's
 		//  set buffer length in duration.
 		int quietSamples;
 		if( CheckUnderrunStatus( nSamples, quietSamples ) )
 		{
 			// nSamples is garaunteed non-zero if CheckUnderrunStatus
 			// returned true.
 			const int endPos = rpos + nSamples;
 			const int secondCopyLen = endPos - size;
 			const int oldrpos = rpos;
 			data -= nSamples;
 			if( secondCopyLen > 0 )
 			{
 				nSamples -= secondCopyLen;
 				memcpy( &bData[nSamples], buffer, secondCopyLen * sizeof( *bData ) );
 				rpos = secondCopyLen;
 			}
 			else
 				rpos += nSamples;
 			memcpy( bData, &buffer[oldrpos], nSamples * sizeof( *bData ) );
 		}
 		// If quietSamples != 0 it means we have an underrun...
 		// Let's just dull out some silence, because that's usually the least
 		// painful way of dealing with underruns:
 		memset( bData, 0, quietSamples * sizeof(*bData) );
 	}
 	void PredictDataWrite( int samples )
 	{
 		predictData += samples;
 	}
 	virtual void PauseOnWrite(bool doPause) { pw = doPause; }
 	// Calculate the buffer status percentage.
 	// Returns range from -1.0 to 1.0
 	//    1.0 = buffer overflow!
 	//    0.0 = buffer nominal (50% full)
 	//   -1.0 = buffer underflow!
 	float GetStatusPct()
 	{
 		// Get the buffer status of the output driver too, so that we can
 		// obtain a more accurate overall buffer status.
 		int drvempty = mods[OutputModule]->GetEmptySampleCount(); // / 2;
 		//ConLog( "Data %d >>> driver: %d   predict: %d\n", data, drvempty, predictData );
 		float result = (float)(data + predictData - drvempty) - (size/2);
 		result /= (size/2);
 		return result;
 	}
 };
 SndBufferImpl *sndBuffer=NULL;
 s32* sndTempBuffer=NULL;
 s32 sndTempProgress=NULL;
 s16* sndTempBuffer16=NULL;
 void UpdateTempoChange()
 {
 	if( --freezeTempo > 0 )
 	{
 		return;
 	}
 	float statusPct = sndBuffer->GetStatusPct();
 	float pctChange = statusPct - lastPct;
 	float tempoChange;
 	float emergencyAdj = 0;
 	float newcee = cTempo;		// workspace var. for cTempo
 	// IMPORTANT!
 	// If you plan to tweak these values, make sure you're using a release build
 	// OUTSIDE THE DEBUGGER to test it!  The Visual Studio debugger can really cause
 	// erratic behavior in the audio buffers, and makes the timestretcher seem a
 	// lot more inconsistent than it really is.
 	// We have two factors.
 	//   * Distance from nominal buffer status (50% full)
 	//   * The change from previous update to this update.
 	// Prediction based on the buffer change:
 	// (linear seems to work better here)
 	tempoChange = pctChange * 0.75f;
 	if( statusPct * tempoChange < 0.0f )
 	{
 		// only apply tempo change if it is in synch with the buffer status.
 		// In other words, if the buffer is high (over 0%), and is decreasing,
 		// ignore it.  It'll just muck things up.
 		tempoChange = 0;
 	}
 	// Sudden spikes in framerate can cause the nominal buffer status
 	// to go critical, in which case we have to enact an emergency
 	// stretch. The following cubic formulas do that.  Values near
 	// the extremeites give much larger results than those near 0.
 	// And the value is added only this time, and does not accumulate.
 	// (otherwise a large value like this would cause problems down the road)
 	// Constants:
 	// Weight - weights the statusPct's "emergency" consideration.
 	//   higher values here will make the buffer perform more drastic
 	//   compensations at the outer edges of the buffer (at -75 or +75%
 	//   or beyond, for example).
 	// Range - scales the adjustment to the given range (more or less).
 	//   The actual range is dependent on the weight used, so if you increase
 	//   Weight you'll usually want to decrease Range somewhat to compensate.
 	// Prediction based on the buffer fill status:
 	const float statusWeight = 2.99f;
 	const float statusRange = 0.068f;
 	// "non-emergency" deadzone:  In this area stretching will be strongly discouraged.
 	// Note: due tot he nature of timestretch latency, it's always a wee bit harder to
 	// cope with low fps (underruns) tha it is high fps (overruns).  So to help out a
 	// little, the low-end portions of this check are less forgiving than the high-sides.
 	if( cTempo < 0.965f || cTempo > 1.060f ||
 		pctChange < -0.38f || pctChange > 0.54f ||
 		statusPct < -0.32f || statusPct > 0.39f ||
 		eTempo < 0.89f || eTempo > 1.19f )
 	{
 		emergencyAdj = ( pow( statusPct*statusWeight, 3.0f ) * statusRange);
 	}
 	// Smooth things out by factoring our previous adjustment into this one.
 	// It helps make the system 'feel' a little smarter by  giving it at least
 	// one packet worth of history to help work off of:
 	emergencyAdj = (emergencyAdj * 0.75f) + (lastEmergencyAdj * 0.25f );
 	lastEmergencyAdj = emergencyAdj;
 	lastPct = statusPct;
 	// Accumulate a fraction of the tempo change into the tempo itself.
 	// This helps the system run "smarter" to games that run consistently
 	// fast or slow by altering the base tempo to something closer to the
 	// game's active speed.  In tests most games normalize within 2 seconds
 	// at 100ms latency, which is pretty good (larger buffers normalize even
 	// quicker).
 	newcee += newcee * (tempoChange+emergencyAdj) * 0.03f;
 	// Apply tempoChange as a scale of cTempo.  That way the effect is proportional
 	// to the current tempo.  (otherwise tempos rate of change at the extremes would
 	// be too drastic)
 	float newTempo = newcee + ( emergencyAdj * cTempo );
 	// ... and as a final optimization, only stretch if the new tempo is outside
 	// a nominal threshold.  Keep this threshold check small, because it could
 	// cause some serious side effects otherwise. (enlarging the cTempo check above
 	// is usually better/safer)
 	if( newTempo < 0.970f || newTempo > 1.045f )
 	{
 		cTempo = (float)newcee;
 		if( newTempo < 0.10f ) newTempo = 0.10f;
 		else if( newTempo > 10.0f ) newTempo = 10.0f;
 		if( cTempo < 0.15f ) cTempo = 0.15f;
 		else if( cTempo > 7.5f ) cTempo = 7.5f;
 		pSoundTouch->setTempo( eTempo = (float)newTempo );
 		ts_stats_stretchblocks++;
 		/*ConLog(" * SPU2: [Nominal %d%%] [Emergency: %d%%] (baseTempo: %d%% ) (newTempo: %d%%) (buffer: %d%%)\n",
 			//(relation < 0.0) ? "Normalize" : "",
 			(int)(tempoChange * 100.0 * 0.03),
 			(int)(emergencyAdj * 100.0),
 			(int)(cTempo * 100.0),
 			(int)(newTempo * 100.0),
 			(int)(statusPct * 100.0)
 		);*/
 	}
 	else
 	{
 		// Nominal operation -- turn off stretching.
 		// note: eTempo 'slides' toward 1.0 for smoother audio and better
 		// protection against spikes.
 		if( cTempo != 1.0f )
 		{
 			cTempo = 1.0f;
 			eTempo = ( 1.0f + eTempo ) * 0.5f;
 			pSoundTouch->setTempo( eTempo );
 		}
 		else
 		{
 			if( eTempo != cTempo )
 				pSoundTouch->setTempo( eTempo=cTempo );
 			ts_stats_normalblocks++;
 		}
 	}
 }
-void soundtouchInit()
+void SndBuffer::_InitFail()
 {
 	pSoundTouch = new soundtouch::SoundTouch();
 	pSoundTouch->setSampleRate(SampleRate);
    pSoundTouch->setChannels(2);
    pSoundTouch->setSetting( SETTING_USE_QUICKSEEK, 0 );
    pSoundTouch->setSetting( SETTING_USE_AA_FILTER, 0 );
 	pSoundTouch->setSetting( SETTING_SEQUENCE_MS, SoundtouchCfg::SequenceLenMS );
 	pSoundTouch->setSetting( SETTING_SEEKWINDOW_MS, SoundtouchCfg::SeekWindowMS );
 	pSoundTouch->setSetting( SETTING_OVERLAP_MS, SoundtouchCfg::OverlapMS );
 	pSoundTouch->setTempo(1);
 	// some timestretch management vars:
 	cTempo = 1.0;
 	eTempo = 1.0;
 	lastPct = 0;
 	lastEmergencyAdj = 0;
 	// just freeze tempo changes for a while at startup.
 	// the driver buffers are bogus anyway.
 	freezeTempo = 8;
 }
 static void _sndInitFail()
 {
 	// If a failure occurs, just initialize the NoSound driver.  This'll allow
 	// the game to emulate properly (hopefully), albeit without sound.
 	OutputModule = FindOutputModuleById( NullOut.GetIdent() );
-	mods[OutputModule]->Init( sndBuffer );
+	mods[OutputModule]->Init();
 }
-s32 SndInit()
+void SndBuffer::_WriteSamples(StereoOut32 *bData, int nSamples)
 {
 	int free = m_size-m_data;
 	m_predictData = 0;
 	jASSUME( m_data <= m_size );
 	// Problem:
 	//  If the SPU2 gets out of sync with the SndOut device, the writepos of the
 	//  circular buffer will overtake the readpos, leading to a prolonged period
 	//  of hopscotching read/write accesses (ie, lots of staticy crap sound for
 	//  several seconds).
 	//
 	// Compromise:
 	//  When an overrun occurs, we adapt by discarding a portion of the buffer.
 	//  The older portion of the buffer is discarded rather than incoming data,
 	//  so that the overall audio synchronization is better.
 	if( free < nSamples )
 	{
 		// Buffer overrun!
 		// Dump samples from the read portion of the buffer instead of dropping
 		// the newly written stuff.
 		s32 comp;
 		if( !timeStretchDisabled )
 		{
 			comp = timeStretchOverrun();
 		}
 		else
 		{
 			// Toss half the buffer plus whatever's being written anew:
 			comp = GetAlignedBufferSize( (m_size + nSamples ) / 2 );
 			if( comp > (m_size-SndOutPacketSize) ) comp = m_size-SndOutPacketSize;
 		}
 		m_data -= comp;
 		m_rpos = (m_rpos+comp) % m_size;
 		if( MsgOverruns() )
 			ConLog(" * SPU2 > Overrun Compensation (%d packets tossed)\n", comp / SndOutPacketSize );
 		lastPct = 0.0;		// normalize the timestretcher
 	}
 	// copy in two phases, since there's a chance the packet
 	// wraps around the buffer (it'd be nice to deal in packets only, but
 	// the timestretcher and DSP options require flexibility).
 	const int endPos = m_wpos + nSamples;
 	const int secondCopyLen = endPos - m_size;
 	StereoOut32* wposbuffer = &m_buffer[m_wpos];
 	m_data += nSamples;
 	if( secondCopyLen > 0 )
 	{
 		nSamples -= secondCopyLen;
 		memcpy( m_buffer, &bData[nSamples], secondCopyLen * sizeof( *bData ) );
 		m_wpos = secondCopyLen;
 	}
 	else
 		m_wpos += nSamples;
 	memcpy( wposbuffer, bData, nSamples * sizeof( *bData ) );
 }
 void SndBuffer::Init()
 {
 	if( mods[OutputModule] == NULL )
 	{
-		_sndInitFail();
+		_InitFail();
-		return 0;
+		return;
 	}
 	// initialize sound buffer
 	// Buffer actually attempts to run ~50%, so allocate near double what
 	// the requested latency is:
 	m_rpos = 0;
 	m_wpos = 0;
 	m_data = 0;
 	try
 	{
-		sndBuffer = new SndBufferImpl( SndOutLatencyMS * (timeStretchDisabled ? 1.5f : 2.0f ) );
+		const float latencyMS = SndOutLatencyMS * (timeStretchDisabled ? 1.5f : 2.0f );
-		sndTempBuffer = new s32[SndOutPacketSize];
+		m_size = GetAlignedBufferSize( (int)(latencyMS * SampleRate / 1000.0f ) );
-		sndTempBuffer16 = new s16[SndOutPacketSize];
+		m_buffer = new StereoOut32[m_size];
 		m_underrun_freeze = false;
 		sndTempBuffer = new StereoOut32[SndOutPacketSize];
 		sndTempBuffer16 = new StereoOut16[SndOutPacketSize];
 	}
 	catch( std::bad_alloc& )
 	{
 		// out of memory exception (most likely)
-		SysMessage( "Out of memory error occured while initializing SPU2." );
+		SysMessage( "Out of memory error occurred while initializing SPU2." );
-		_sndInitFail();
+		_InitFail();
-		return 0;
+		return;
 	}
 	// clear buffers!
 	// Fixes loopy sounds on emu resets.
-	memset( sndTempBuffer, 0, sizeof(s32) * SndOutPacketSize );
+	memset( sndTempBuffer, 0, sizeof(StereoOut32) * SndOutPacketSize );
-	memset( sndTempBuffer16, 0, sizeof(s16) * SndOutPacketSize );
+	memset( sndTempBuffer16, 0, sizeof(StereoOut16) * SndOutPacketSize );
 	sndTempProgress = 0;
@ -608,104 +273,78 @@ s32 SndInit()
 	spdif_set51(mods[OutputModule]->Is51Out());
 	// initialize module
-	if( mods[OutputModule]->Init(sndBuffer) == -1 )
+	if( mods[OutputModule]->Init() == -1 ) _InitFail();
 	{
 		_sndInitFail();
 	}
 	return 0;
 }
-void SndClose()
+void SndBuffer::Cleanup()
 {
 	mods[OutputModule]->Close();
-	SAFE_DELETE_OBJ( sndBuffer );
+	SAFE_DELETE_ARRAY( m_buffer );
 	SAFE_DELETE_ARRAY( sndTempBuffer );
 	SAFE_DELETE_ARRAY( sndTempBuffer16 );
 	SAFE_DELETE_OBJ( pSoundTouch );
 }
-s32 SndWrite(s32 ValL, s32 ValR)
+int SndBuffer::m_dsp_progress = 0;
 int SndBuffer::m_dsp_writepos = 0;
 int SndBuffer::m_timestretch_progress = 0;
 void SndBuffer::Write( const StereoOut32& Sample )
 {
 	// Log final output to wavefile.
-	WaveDump::WriteCore( 1, CoreSrc_External, SndScaleVol(ValL), SndScaleVol(ValR) );
+	WaveDump::WriteCore( 1, CoreSrc_External, Sample.DownSample() );
-	RecordWrite(SndScaleVol(ValL),SndScaleVol(ValR));
+	RecordWrite( Sample.DownSample() );
 	if(mods[OutputModule] == &NullOut) // null output doesn't need buffering or stretching! :p
-		return 0;
+		return;
-	sndTempBuffer[sndTempProgress++] = ValL;
+	sndTempBuffer[sndTempProgress++] = Sample;
 	sndTempBuffer[sndTempProgress++] = ValR;
 	// If we haven't accumulated a full packet yet, do nothing more:
-	if(sndTempProgress < SndOutPacketSize) return 1;
+	if(sndTempProgress < SndOutPacketSize) return;
 	sndTempProgress = 0;
-	if(dspPluginEnabled)
+	if( dspPluginEnabled )
 	{
-		for(int i=0;i<SndOutPacketSize;i++) { sndTempBuffer16[i] = SndScaleVol( sndTempBuffer[i] ); }
+		// Convert in, send to winamp DSP, and convert out.
-		// send to winamp DSP
+		for( int i=0; i<SndOutPacketSize; ++i, ++m_dsp_writepos ) { sndTempBuffer16[m_dsp_writepos] = sndTempBuffer[i].DownSample(); }
-		sndTempProgress = DspProcess(sndTempBuffer16,sndTempProgress>>1)<<1;
+		m_dsp_progress += DspProcess( (s16*)sndTempBuffer16, SndOutPacketSize );
-		for(int i=0;i<sndTempProgress;i++) { sndTempBuffer[i] = sndTempBuffer16[i]<<SndOutVolumeShift; }
+		// Some ugly code to ensure full packet handling:
-	}
+		int ei = 0;
-
+		while( m_dsp_progress >= SndOutPacketSize )
 	static int equalized = 0;
 	if( !timeStretchDisabled )
 	{
 		bool progress = false;
 		// data prediction helps keep the tempo adjustments more accurate.
 		// The timestretcher returns packets in belated "clump" form.
 		// Meaning that most of the time we'll get nothing back, and then
 		// suddenly we'll get several chunks back at once.  Thus we use
 		// data prediction to make the timestretcher more responsive.
 		sndBuffer->PredictDataWrite( (int)( sndTempProgress / eTempo ) );
 		for(int i=0;i<sndTempProgress;i++) { ((float*)sndTempBuffer)[i] = sndTempBuffer[i]/2147483648.0f; }
 		pSoundTouch->putSamples((float*)sndTempBuffer, sndTempProgress>>1);
 		while( ( sndTempProgress = pSoundTouch->receiveSamples((float*)sndTempBuffer, sndTempProgress>>1)<<1 ) != 0 )
 		{
-			// [Air] [TODO] : Implement an SSE downsampler to int.
+			for( int i=0; i<SndOutPacketSize; ++i, ++ei ) { sndTempBuffer[i] = sndTempBuffer16[ei].UpSample(); }
-			for(int i=0;i<sndTempProgress;i++)
+
-			{
+			if( !timeStretchDisabled )
-				sndTempBuffer[i] = (s32)(((float*)sndTempBuffer)[i]*2147483648.0f);
+				timeStretchWrite();
-			}
+			else
-			sndBuffer->WriteSamples(sndTempBuffer, sndTempProgress);
+				_WriteSamples(sndTempBuffer, sndTempProgress);
-			progress = true;
+
 			m_dsp_progress -= SndOutPacketSize;
 		}
-		UpdateTempoChange();
+		// copy any leftovers to the front of the dsp buffer.
-
+		if( m_dsp_progress > 0 )
 		if( MsgOverruns() )
 		{
-			if( progress )
+			memcpy( &sndTempBuffer16[ei], sndTempBuffer16,
-			{
+				sizeof(sndTempBuffer16[0]) * m_dsp_progress
-				if( ++ts_stats_logcounter > 300 )
+			);
 				{
 					ts_stats_logcounter = 0;
 					ConLog( " * SPU2 > Timestretch Stats > %d%% of packets stretched.\n",
 						( ts_stats_stretchblocks * 100 ) / ( ts_stats_normalblocks + ts_stats_stretchblocks ) );
 					ts_stats_normalblocks = 0;
 					ts_stats_stretchblocks = 0;
 				}
 			}
 		}
 	}
 	else
 	{
-		sndBuffer->WriteSamples(sndTempBuffer, sndTempProgress);
+		if( !timeStretchDisabled )
-		sndTempProgress=0;
+			timeStretchWrite();
 		else
 			_WriteSamples(sndTempBuffer, SndOutPacketSize);
 	}
 	return 1;
 }
-s32 SndTest()
+s32 SndBuffer::Test()
 {
 	if( mods[OutputModule] == NULL )
 		return -1;
@ -713,10 +352,11 @@ s32 SndTest()
 	return mods[OutputModule]->Test();
 }
-void SndConfigure(HWND parent, u32 module )
+void SndBuffer::Configure(HWND parent, u32 module )
 {
 	if( mods[module] == NULL )
 		return;
 	mods[module]->Configure(parent);
 }
--- a/plugins/spu2-x/src/SndOut.h
+++ b/plugins/spu2-x/src/SndOut.h
@ -24,40 +24,310 @@
 // Number of stereo samples per SndOut block.
 // All drivers must work in units of this size when communicating with
 // SndOut.
-static const int SndOutPacketSize = 1024;
+static const int SndOutPacketSize = 512;
 // Overall master volume shift.
 // Converts the mixer's 32 bit value into a 16 bit value.
-static const int SndOutVolumeShift = 10;
+static const int SndOutVolumeShift = 13;
 // Samplerate of the SPU2. For accurate playback we need to match this
 // exactly.  Trying to scale samplerates and maintain SPU2's Ts timing accuracy
 // is too problematic. :)
 static const int SampleRate = 48000;
 extern s32  SndInit();
 extern void SndClose();
 extern s32  SndWrite(s32 ValL, s32 ValR);
 extern s32  SndTest();
 extern void SndConfigure(HWND parent, u32 outmodidx );
 extern bool SndGetStats(u32 *written, u32 *played);
 extern s16  SndScaleVol( s32 inval );
 int FindOutputModuleById( const wchar_t* omodid );
 struct StereoOut16
 {
 	s16 Left;
 	s16 Right;
 	StereoOut16() :
 		Left( 0 ),
 		Right( 0 )
 	{
 	}
 	StereoOut16( const StereoOut32& src ) :
 		Left( (s16)src.Left ),
 		Right( (s16)src.Right )
 	{
 	}
 	StereoOut16( s16 left, s16 right ) :
 		Left( left ),
 		Right( right )
 	{
 	}
 	StereoOut32 UpSample() const;
 	void ResampleFrom( const StereoOut32& src )
 	{
 		// Use StereoOut32's built in conversion
 		*this = src.DownSample();
 	}
 };
 struct StereoOutFloat
 {
 	float Left;
 	float Right;
 	StereoOutFloat() :
 		Left( 0 ),
 		Right( 0 )
 	{
 	}
 	explicit StereoOutFloat( const StereoOut32& src ) :
 		Left( src.Left / 2147483647.0f ),
 		Right( src.Right / 2147483647.0f )
 	{
 	}
 	explicit StereoOutFloat( s32 left, s32 right ) :
 		Left( left / 2147483647.0f ),
 		Right( right / 2147483647.0f )
 	{
 	}
 	StereoOutFloat( float left, float right ) :
 		Left( left ),
 		Right( right )
 	{
 	}
 };
 struct Stereo21Out16
 {
 	s16 Left;
 	s16 Right;
 	s16 LFE;
 	void ResampleFrom( const StereoOut32& src )
 	{
 		Left = src.Left >> SndOutVolumeShift;
 		Right = src.Right >> SndOutVolumeShift;
 		LFE = (src.Left + src.Right) >> (SndOutVolumeShift + 1);
 	}
 };
 struct StereoQuadOut16
 {
 	s16 Left;
 	s16 Right;
 	s16 LeftBack;
 	s16 RightBack;
 	void ResampleFrom( const StereoOut32& src )
 	{
 		Left = src.Left >> SndOutVolumeShift;
 		Right = src.Right >> SndOutVolumeShift;
 		LeftBack = src.Left >> SndOutVolumeShift;
 		RightBack = src.Right >> SndOutVolumeShift;
 	}
 };
 struct Stereo41Out16
 {
 	s16 Left;
 	s16 Right;
 	s16 LFE;
 	s16 LeftBack;
 	s16 RightBack;
 	void ResampleFrom( const StereoOut32& src )
 	{
 		Left = src.Left >> SndOutVolumeShift;
 		Right = src.Right >> SndOutVolumeShift;
 		LFE = (src.Left + src.Right) >> (SndOutVolumeShift + 1);
 		LeftBack = src.Left >> SndOutVolumeShift;
 		RightBack = src.Right >> SndOutVolumeShift;
 	}
 };
 struct Stereo51Out16
 {
 	s16 Left;
 	s16 Right;
 	s16 Center;
 	s16 LFE;
 	s16 LeftBack;
 	s16 RightBack;
 	// Implementation Note: Center and Subwoofer/LFE -->
 	// This method is simple and sounds nice.  It relies on the speaker/soundcard
 	// systems do to their own low pass / crossover.  Manual lowpass is wasted effort
 	// and can't match solid state results anyway.
 	void ResampleFrom( const StereoOut32& src )
 	{
 		Left = src.Left >> SndOutVolumeShift;
 		Right = src.Right >> SndOutVolumeShift;
 		Center = (src.Left + src.Right) >> (SndOutVolumeShift + 1);
 		LFE = Center;
 		LeftBack = src.Left >> SndOutVolumeShift;
 		RightBack = src.Right >> SndOutVolumeShift;
 	}
 };
 struct Stereo71Out16
 {
 	s16 Left;
 	s16 Right;
 	s16 Center;
 	s16 LFE;
 	s16 LeftBack;
 	s16 RightBack;
 	s16 LeftSide;
 	s16 RightSide;
 	void ResampleFrom( const StereoOut32& src )
 	{
 		Left = src.Left >> SndOutVolumeShift;
 		Right = src.Right >> SndOutVolumeShift;
 		Center = (src.Left + src.Right) >> (SndOutVolumeShift + 1);
 		LFE = Center;
 		LeftBack = src.Left >> SndOutVolumeShift;
 		RightBack = src.Right >> SndOutVolumeShift;
 		LeftSide = src.Left >> (SndOutVolumeShift+1);
 		RightSide = src.Right >> (SndOutVolumeShift+1);
 	}
 };
 struct Stereo21Out32
 {
 	s32 Left;
 	s32 Right;
 	s32 LFE;
 };
 struct Stereo41Out32
 {
 	s32 Left;
 	s32 Right;
 	s32 LFE;
 	s32 LeftBack;
 	s32 RightBack;
 };
 struct Stereo51Out32
 {
 	s32 Left;
 	s32 Right;
 	s32 Center;
 	s32 LFE;
 	s32 LeftBack;
 	s32 RightBack;
 };
 // Developer Note: This is a static class only (all static members).
 class SndBuffer
 {
 private:
 	static bool m_underrun_freeze;
 	static s32 m_predictData;
 	static float lastPct;
 	static StereoOut32* sndTempBuffer;
 	static StereoOut16* sndTempBuffer16;
 	static int sndTempProgress;
 	static int m_dsp_progress;
 	static int m_dsp_writepos;
 	static int m_timestretch_progress;
 	static int m_timestretch_writepos;
 	static StereoOut32 *m_buffer;
 	static s32 m_size;
 	static s32 m_rpos;
 	static s32 m_wpos;
 	static s32 m_data;
 	static float lastEmergencyAdj;
 	static float cTempo;
 	static float eTempo;
 	static int freezeTempo;
 	static void _InitFail();
 	static void _WriteSamples(StereoOut32* bData, int nSamples);
 	static bool CheckUnderrunStatus( int& nSamples, int& quietSampleCount );
 	static void soundtouchInit();
 	static void soundtouchCleanup();
 	static void timeStretchWrite();
 	static void timeStretchUnderrun();
 	static s32 timeStretchOverrun();
 	static void PredictDataWrite( int samples );
 	static float GetStatusPct();
 	static void UpdateTempoChange();
 public:
-	virtual ~SndBuffer() {}
+	static void Init();
 	static void Cleanup();
 	static void Write( const StereoOut32& Sample );
 	static s32 Test();
 	static void Configure(HWND parent, u32 module );
-	virtual void WriteSamples(s32 *buffer, int nSamples)=0;
+	// Note: When using with 32 bit output buffers, the user of this function is responsible
-	virtual void PauseOnWrite(bool doPause)=0;
+	// for shifting the values to where they need to be manually.  The fixed point depth of
 	// the sample output is determined by the SndOutVolumeShift, which is the number of bits
 	// to shift right to get a 16 bit result.
 	template< typename T >
 	static void ReadSamples( T* bData )
 	{
 		int nSamples = SndOutPacketSize;
-	virtual void ReadSamples( s16* bData )=0;
+		// Problem:
-	virtual void ReadSamples( s32* bData )=0;
+		//  If the SPU2 gets even the least bit out of sync with the SndOut device,
 		//  the readpos of the circular buffer will overtake the writepos,
 		//  leading to a prolonged period of hopscotching read/write accesses (ie,
 		//  lots of staticy crap sound for several seconds).
 		//
 		// Fix:
 		//  If the read position overtakes the write position, abort the
 		//  transfer immediately and force the SndOut driver to wait until
 		//  the read buffer has filled up again before proceeding.
 		//  This will cause one brief hiccup that can never exceed the user's
 		//  set buffer length in duration.
-	//virtual s32  GetBufferUsage()=0;
+		int quietSamples;
-	//virtual s32  GetBufferSize()=0;
+		if( CheckUnderrunStatus( nSamples, quietSamples ) )
 		{
 			jASSUME( nSamples <= SndOutPacketSize );
 			// [Air] [TODO]: This loop is probably a candidate for SSE2 optimization.
 			const int endPos = m_rpos + nSamples;
 			const int secondCopyLen = endPos - m_size;
 			const StereoOut32* rposbuffer = &m_buffer[m_rpos];
 			m_data -= nSamples;
 			if( secondCopyLen > 0 )
 			{
 				nSamples -= secondCopyLen;
 				for( int i=0; i<secondCopyLen; i++ )
 					bData[nSamples+i].ResampleFrom( m_buffer[i] );
 				m_rpos = secondCopyLen;
 			}
 			else
 				m_rpos += nSamples;
 			for( int i=0; i<nSamples; i++ )
 				bData[i].ResampleFrom( rposbuffer[i] );
 		}
 		// If quietSamples != 0 it means we have an underrun...
 		// Let's just dull out some silence, because that's usually the least
 		// painful way of dealing with underruns:
 		memset( bData, 0, quietSamples * sizeof(T) );
 	}
 };
 class SndOutModule
@ -74,7 +344,7 @@ public:
 	// (for use in configuration screen)
 	virtual const wchar_t* GetLongName() const=0;
-	virtual s32  Init(SndBuffer *buffer)=0;
+	virtual s32  Init()=0;
 	virtual void Close()=0;
 	virtual s32  Test() const=0;
 	virtual void Configure(HWND parent)=0;
@ -87,12 +357,9 @@ public:
 //internal
-extern SndOutModule *WaveOut;
+extern SndOutModule* WaveOut;
-extern SndOutModule *DSoundOut;
+extern SndOutModule* DSoundOut;
-extern SndOutModule *FModOut;
+extern SndOutModule* XAudio2Out;
 extern SndOutModule *ASIOOut;
 extern SndOutModule *XAudio2Out;
 extern SndOutModule *DSound51Out;
 extern SndOutModule* mods[];
--- a/plugins/spu2-x/src/Spu2.cpp
+++ b/plugins/spu2-x/src/Spu2.cpp
@ -133,6 +133,13 @@ __inline void __fastcall spu2M_Write( u32 addr, u16 value )
 	spu2M_Write( addr, (s16)value );
 }
 V_VolumeLR V_VolumeLR::Max( 0x7FFFFFFF );
 V_VolumeSlideLR V_VolumeSlideLR::Max( 0x3FFF, 0x7FFFFFFF );
 V_Core::V_Core()
 {
 }
 void V_Core::Reset()
 {
 	memset( this, 0, sizeof(V_Core) );
@ -141,16 +148,12 @@ void V_Core::Reset()
 	Regs.STATX=0;
 	Regs.ATTR=0;
-	ExtL = 0x7FFFFFFF;
+	ExtVol = V_VolumeLR::Max;
-	ExtR = 0x7FFFFFFF;
+	InpVol = V_VolumeLR::Max;
-	InpL = 0x7FFFFFFF;
+	FxVol  = V_VolumeLR::Max;
-	InpR = 0x7FFFFFFF;
+
-	FxL  = 0x7FFFFFFF;
+	MasterVol = V_VolumeSlideLR::Max;
-	FxR  = 0x7FFFFFFF;
+
 	MasterL.Reg_VOL= 0x3FFF;
 	MasterR.Reg_VOL= 0x3FFF;
 	MasterL.Value  = 0x7FFFFFFF;
 	MasterR.Value  = 0x7FFFFFFF;
 	ExtWetR = -1;
 	ExtWetL = -1;
 	ExtDryR = -1;
@ -176,32 +179,94 @@ void V_Core::Reset()
 	for( uint v=0; v<24; ++v )
 	{
-		Voices[v].VolumeL.Reg_VOL = 0x3FFF;
+		Voices[v].Volume = V_VolumeSlideLR::Max;
 		Voices[v].VolumeR.Reg_VOL = 0x3FFF;
-		Voices[v].VolumeL.Value = 0x7FFFFFFF;
+		Voices[v].ADSR.Value = 0;
-		Voices[v].VolumeR.Value = 0x7FFFFFFF;
+		Voices[v].ADSR.Phase = 0;
-		
+		Voices[v].Pitch = 0x3FFF;
 		Voices[v].ADSR.Value=0;
 		Voices[v].ADSR.Phase=0;
 		Voices[v].Pitch=0x3FFF;
 		Voices[v].DryL = -1;
 		Voices[v].DryR = -1;
 		Voices[v].WetL = -1;
 		Voices[v].WetR = -1;
-		Voices[v].NextA=2800;
+		Voices[v].NextA = 2800;
-		Voices[v].StartA=2800;
+		Voices[v].StartA = 2800;
-		Voices[v].LoopStartA=2800;
+		Voices[v].LoopStartA = 2800;
 	}
-	DMAICounter=0;
+	DMAICounter = 0;
-	AdmaInProgress=0;
+	AdmaInProgress = 0;
-	Regs.STATX=0x80;
+	Regs.STATX = 0x80;
- }
+}
 s32 V_Core::EffectsBufferIndexer( s32 offset ) const
 {
 	u32 pos = EffectsStartA + ReverbX + offset;
 	// Need to use modulus here, because games can and will drop the buffer size
 	// without notice, and it leads to offsets several times past the end of the buffer.
 	if( pos > EffectsEndA )
 	{
 		pos = EffectsStartA + ((ReverbX + offset) % (u32)EffectsBufferSize);
 	}
 	else if( pos < EffectsStartA )
 	{
 		pos = EffectsEndA+1 - ((ReverbX + offset) % (u32)EffectsBufferSize );
 	}
 	return pos;
 } 
 void V_Core::UpdateFeedbackBuffersA()
 {
 	RevBuffers.FB_SRC_A0 = EffectsBufferIndexer( Revb.MIX_DEST_A0 - Revb.FB_SRC_A );
 	RevBuffers.FB_SRC_A1 = EffectsBufferIndexer( Revb.MIX_DEST_A1 - Revb.FB_SRC_A );
 }
 void V_Core::UpdateFeedbackBuffersB()
 {
 	RevBuffers.FB_SRC_B0 = EffectsBufferIndexer( Revb.MIX_DEST_B0 - Revb.FB_SRC_B );
 	RevBuffers.FB_SRC_B1 = EffectsBufferIndexer( Revb.MIX_DEST_B1 - Revb.FB_SRC_B );
 }
 void V_Core::UpdateEffectsBufferSize()
 {
-	EffectsBufferSize = EffectsEndA - EffectsStartA + 1;
+	ReverbX = 0;
 	const s32 newbufsize = EffectsEndA - EffectsStartA + 1;
 	if( !RevBuffers.NeedsUpdated && newbufsize ==  EffectsBufferSize ) return;
 	RevBuffers.NeedsUpdated = false;
 	if( EffectsBufferSize == 0 ) return;
 	// Rebuild buffer indexers.
 	RevBuffers.ACC_SRC_A0 = EffectsBufferIndexer( Revb.ACC_SRC_A0 );
 	RevBuffers.ACC_SRC_A1 = EffectsBufferIndexer( Revb.ACC_SRC_A1 );
 	RevBuffers.ACC_SRC_B0 = EffectsBufferIndexer( Revb.ACC_SRC_B0 );
 	RevBuffers.ACC_SRC_B1 = EffectsBufferIndexer( Revb.ACC_SRC_B1 );
 	RevBuffers.ACC_SRC_C0 = EffectsBufferIndexer( Revb.ACC_SRC_C0 );
 	RevBuffers.ACC_SRC_C1 = EffectsBufferIndexer( Revb.ACC_SRC_C1 );
 	RevBuffers.ACC_SRC_D0 = EffectsBufferIndexer( Revb.ACC_SRC_D0 );
 	RevBuffers.ACC_SRC_D1 = EffectsBufferIndexer( Revb.ACC_SRC_D1 );
 	UpdateFeedbackBuffersA();
 	UpdateFeedbackBuffersB();
 	RevBuffers.IIR_DEST_A0 = EffectsBufferIndexer( Revb.IIR_DEST_A0 );
 	RevBuffers.IIR_DEST_A1 = EffectsBufferIndexer( Revb.IIR_DEST_A1 );
 	RevBuffers.IIR_DEST_B0 = EffectsBufferIndexer( Revb.IIR_DEST_B0 );
 	RevBuffers.IIR_DEST_B1 = EffectsBufferIndexer( Revb.IIR_DEST_B1 );
 	RevBuffers.IIR_SRC_A0 = EffectsBufferIndexer( Revb.IIR_SRC_A0 );
 	RevBuffers.IIR_SRC_A1 = EffectsBufferIndexer( Revb.IIR_SRC_A1 );
 	RevBuffers.IIR_SRC_B0 = EffectsBufferIndexer( Revb.IIR_SRC_B0 );
 	RevBuffers.IIR_SRC_B1 = EffectsBufferIndexer( Revb.IIR_SRC_B1 );
 	RevBuffers.MIX_DEST_A0 = EffectsBufferIndexer( Revb.MIX_DEST_A0 );
 	RevBuffers.MIX_DEST_A1 = EffectsBufferIndexer( Revb.MIX_DEST_A1 );
 	RevBuffers.MIX_DEST_B0 = EffectsBufferIndexer( Revb.MIX_DEST_B0 );
 	RevBuffers.MIX_DEST_B1 = EffectsBufferIndexer( Revb.MIX_DEST_B1 );
 }
 void V_Voice::Start()
@ -379,6 +444,11 @@ static s32 GetVol32( u16 src )
 	return (((s32)src) << 16 ) | ((src<<1) & 0xffff);
 }
 void V_VolumeSlide::RegSet( u16 src )
 {
 	Value = GetVol32( src );
 }
 void SPU_ps1_write(u32 mem, u16 value) 
 {
 	bool show=true;
@ -393,15 +463,15 @@ void SPU_ps1_write(u32 mem, u16 value)
 		switch(vval)
 		{
 			case 0: //VOLL (Volume L)
-				Cores[0].Voices[voice].VolumeL.Mode = 0;
+				Cores[0].Voices[voice].Volume.Left.Mode = 0;
-				Cores[0].Voices[voice].VolumeL.Value = GetVol32( value<<1 );
+				Cores[0].Voices[voice].Volume.Left.RegSet( value << 1 );
-				Cores[0].Voices[voice].VolumeL.Reg_VOL = value;
+				Cores[0].Voices[voice].Volume.Left.Reg_VOL = value;
 			break;
 			case 1: //VOLR (Volume R)
-				Cores[0].Voices[voice].VolumeR.Mode = 0;
+				Cores[0].Voices[voice].Volume.Right.Mode = 0;
-				Cores[0].Voices[voice].VolumeR.Value = GetVol32( value<<1 );
+				Cores[0].Voices[voice].Volume.Right.RegSet( value << 1 );
-				Cores[0].Voices[voice].VolumeR.Reg_VOL = value;
+				Cores[0].Voices[voice].Volume.Right.Reg_VOL = value;
 			break;
 			case 2:	Cores[0].Voices[voice].Pitch = value; break;
@ -437,19 +507,22 @@ void SPU_ps1_write(u32 mem, u16 value)
 	else switch(reg)
 	{
 		case 0x1d80://         Mainvolume left
-			Cores[0].MasterL.Mode = 0;
+			Cores[0].MasterVol.Left.Mode = 0;
-			Cores[0].MasterL.Value = GetVol32( value );
+			Cores[0].MasterVol.Left.RegSet( value );
-			break;
+		break;
 		case 0x1d82://         Mainvolume right
-			Cores[0].MasterL.Mode = 0;
+			Cores[0].MasterVol.Right.Mode = 0;
-			Cores[0].MasterR.Value = GetVol32( value );
+			Cores[0].MasterVol.Right.RegSet( value );
-			break;
+		break;
 		case 0x1d84://         Reverberation depth left
-			Cores[0].FxL = GetVol32( value );
+			Cores[0].FxVol.Left = GetVol32( value );
-			break;
+		break;
 		case 0x1d86://         Reverberation depth right
-			Cores[0].FxR = GetVol32( value );
+			Cores[0].FxVol.Right = GetVol32( value );
-			break;
+		break;
 		case 0x1d88://         Voice ON  (0-15)
 			SPU2_FastWrite(REG_S_KON,value);
@ -463,65 +536,74 @@ void SPU_ps1_write(u32 mem, u16 value)
 			break;
 		case 0x1d8e://         Voice OFF (16-23)
 			SPU2_FastWrite(REG_S_KOFF+2,value);
-			break;
+		break;
 		case 0x1d90://         Channel FM (pitch lfo) mode (0-15)
 			SPU2_FastWrite(REG_S_PMON,value);
-			break;
+		break;
 		case 0x1d92://         Channel FM (pitch lfo) mode (16-23)
 			SPU2_FastWrite(REG_S_PMON+2,value);
-			break;
+		break;
 		case 0x1d94://         Channel Noise mode (0-15)
 			SPU2_FastWrite(REG_S_NON,value);
-			break;
+		break;
 		case 0x1d96://         Channel Noise mode (16-23)
 			SPU2_FastWrite(REG_S_NON+2,value);
-			break;
+		break;
 		case 0x1d98://         Channel Reverb mode (0-15)
 			SPU2_FastWrite(REG_S_VMIXEL,value);
 			SPU2_FastWrite(REG_S_VMIXER,value);
-			break;
+		break;
 		case 0x1d9a://         Channel Reverb mode (16-23)
 			SPU2_FastWrite(REG_S_VMIXEL+2,value);
 			SPU2_FastWrite(REG_S_VMIXER+2,value);
-			break;
+		break;
 		case 0x1d9c://         Channel Reverb mode (0-15)
 			SPU2_FastWrite(REG_S_VMIXL,value);
 			SPU2_FastWrite(REG_S_VMIXR,value);
-			break;
+		break;
 		case 0x1d9e://         Channel Reverb mode (16-23)
 			SPU2_FastWrite(REG_S_VMIXL+2,value);
 			SPU2_FastWrite(REG_S_VMIXR+2,value);
-			break;
+		break;
 		case 0x1da2://         Reverb work area start
-			{
+		{
-				u32 val=(u32)value <<8;
+			u32 val = (u32)value << 8;
 			SPU2_FastWrite(REG_A_ESA,  val&0xFFFF);
 			SPU2_FastWrite(REG_A_ESA+2,val>>16);
 		}
 		break;
 				SPU2_FastWrite(REG_A_ESA,  val&0xFFFF);
 				SPU2_FastWrite(REG_A_ESA+2,val>>16);
 			}
 			break;
 		case 0x1da4:
 			Cores[0].IRQA=(u32)value<<8;
-			break;
+		break;
 		case 0x1da6:
 			Cores[0].TSA=(u32)value<<8;
-			break;
+		break;
 		case 0x1daa:
 			SPU2_FastWrite(REG_C_ATTR,value);
-			break;
+		break;
 		case 0x1dae:
 			SPU2_FastWrite(REG_P_STATX,value);
-			break;
+		break;
 		case 0x1da8:// Spu Write to Memory
 			DmaWrite(0,value);
 			show=false;
-			break;
+		break;
 	}
 	if(show) FileLog("[%10d] (!) SPU write mem %08x value %04x\n",Cycles,mem,value);
@ -546,27 +628,31 @@ u16 SPU_ps1_read(u32 mem)
 			case 0: //VOLL (Volume L)
 				//value=Cores[0].Voices[voice].VolumeL.Mode;
 				//value=Cores[0].Voices[voice].VolumeL.Value;
-				value=Cores[0].Voices[voice].VolumeL.Reg_VOL;	break;
+				value = Cores[0].Voices[voice].Volume.Left.Reg_VOL;
 			break;
 			case 1: //VOLR (Volume R)
 				//value=Cores[0].Voices[voice].VolumeR.Mode;
 				//value=Cores[0].Voices[voice].VolumeR.Value;
-				value=Cores[0].Voices[voice].VolumeR.Reg_VOL;	break;
+				value = Cores[0].Voices[voice].Volume.Right.Reg_VOL;
-			case 2:	value=Cores[0].Voices[voice].Pitch;			break;
+			break;
-			case 3:	value=Cores[0].Voices[voice].StartA;	break;
+			
-			case 4: value=Cores[0].Voices[voice].ADSR.Reg_ADSR1;	break;
+			case 2:	value = Cores[0].Voices[voice].Pitch;		break;
-			case 5: value=Cores[0].Voices[voice].ADSR.Reg_ADSR2;	break;
+			case 3:	value = Cores[0].Voices[voice].StartA;		break;
-			case 6:	value=Cores[0].Voices[voice].ADSR.Value >> 16;	break;
+			case 4: value = Cores[0].Voices[voice].ADSR.Reg_ADSR1;	break;
-			case 7:	value=Cores[0].Voices[voice].LoopStartA;	break;
+			case 5: value = Cores[0].Voices[voice].ADSR.Reg_ADSR2;	break;
 			case 6:	value = Cores[0].Voices[voice].ADSR.Value >> 16;	break;
 			case 7:	value = Cores[0].Voices[voice].LoopStartA;	break;
 			jNO_DEFAULT;
 		}
 	}
 	else switch(reg)
 	{
-		case 0x1d80: value = Cores[0].MasterL.Value>>16; break;
+		case 0x1d80: value = Cores[0].MasterVol.Left.Value >> 16;  break;
-		case 0x1d82: value = Cores[0].MasterR.Value>>16; break;
+		case 0x1d82: value = Cores[0].MasterVol.Right.Value >> 16; break;
-		case 0x1d84: value = Cores[0].FxL>>16;           break;
+		case 0x1d84: value = Cores[0].FxVol.Left >> 16;            break;
-		case 0x1d86: value = Cores[0].FxR>>16;           break;
+		case 0x1d86: value = Cores[0].FxVol.Right >> 16;           break;
 		case 0x1d88: value = 0; break;
 		case 0x1d8a: value = 0; break;
@ -585,8 +671,11 @@ u16 SPU_ps1_read(u32 mem)
 		case 0x1d9e: value = Cores[0].Regs.VMIXL>>16;     break;
 		case 0x1da2:
-			value = Cores[0].EffectsStartA>>3;
+			if( value != Cores[0].EffectsStartA>>3 )
-			Cores[0].UpdateEffectsBufferSize();
+			{
 				value = Cores[0].EffectsStartA>>3;
 				Cores[0].UpdateEffectsBufferSize();
 			}
 		break;
 		case 0x1da4: value = Cores[0].IRQA>>3;            break;
 		case 0x1da6: value = Cores[0].TSA>>3;             break;
@ -607,15 +696,49 @@ u16 SPU_ps1_read(u32 mem)
 	return value;
 }
-static u32 SetLoWord( u32 var, u16 writeval )
+// Ah the joys of endian-specific code! :D
 static __forceinline u32 SetHiWord( u32& src, u16 value )
 {
-	return (var & 0xFFFF0000) | writeval;
+	((u16*)&src)[1] = value;
 	return src;
 }
-
+static __forceinline u32 SetLoWord( u32& src, u16 value )
 static u32 SetHiWord( u32 var, u16 writeval )
 {
-	return (var & 0x0000FFFF) | (writeval<<16);
+	((u16*)&src)[0] = value;
 	return src;
 }
 static __forceinline s32 SetHiWord( s32& src, u16 value )
 {
 	((u16*)&src)[1] = value;
 	return src;
 }
 static __forceinline s32 SetLoWord( s32& src, u16 value )
 {
 	((u16*)&src)[0] = value;
 	return src;
 }
 static __forceinline u16 GetHiWord( u32& src )
 {
 	return ((u16*)&src)[1];
 }
 static __forceinline u16 GetLoWord( u32& src )
 {
 	return ((u16*)&src)[0];
 }
 static __forceinline u16 GetHiWord( s32& src )
 {
 	return ((u16*)&src)[1];
 }
 static __forceinline u16 GetLoWord( s32& src )
 {
 	return ((u16*)&src)[0];
 }
 __forceinline void SPU2_FastWrite( u32 rmem, u16 value )
@ -637,7 +760,9 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value )
 			case 0: //VOLL (Volume L)
 			case 1: //VOLR (Volume R)
 			{
-				V_Volume& thisvol = (param==0) ? thisvoice.VolumeL : thisvoice.VolumeR;
+				V_VolumeSlide& thisvol = (param==0) ? thisvoice.Volume.Left : thisvoice.Volume.Right;
 				thisvol.Reg_VOL = value;
 				if (value & 0x8000)		// +Lin/-Lin/+Exp/-Exp
 				{
 					thisvol.Mode = (value & 0xF000)>>12;
@ -649,11 +774,10 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value )
 					// Volumes range from 0x3fff to 0x7fff, with 0x4000 serving as
 					// the "sign" bit, so a simple bitwise extension will do the trick:
-					thisvol.Value = GetVol32( value<<1 );
+					thisvol.RegSet( value<<1 );
 					thisvol.Mode = 0;
 					thisvol.Increment = 0;
 				}
 				thisvol.Reg_VOL = value;
 			}
 			break;
@ -677,8 +801,8 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value )
 				ConLog( "* SPU2: Mysterious ADSR Volume Set to 0x%x", value );
 			break;
-			case 6:	thisvoice.VolumeL.Value = GetVol32( value ); break;
+			case 6:	thisvoice.Volume.Left.RegSet( value ); break;
-			case 7:	thisvoice.VolumeR.Value = GetVol32( value ); break;
+			case 7:	thisvoice.Volume.Right.RegSet( value ); break;
 			jNO_DEFAULT;
 		}
@ -727,6 +851,15 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value )
 		*(regtable[mem>>1]) = value;
 		UpdateSpdifMode();
 	}
 	else if( mem >= R_FB_SRC_A && mem < REG_A_EEA )
 	{
 		// Signal to the Reverb code that the effects buffers need to be re-aligned.
 		// This is both simple, efficient, and safe, since we only want to re-align
 		// buffers after both hi and lo words have been written.
 		*(regtable[mem>>1]) = value;
 		Cores[core].RevBuffers.NeedsUpdated = true;
 	}
 	else
 	{
 		switch(omem)
@ -783,22 +916,22 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value )
 			case REG_S_PMON:
 				vx=2; for (vc=1;vc<16;vc++) { Cores[core].Voices[vc].Modulated=(s8)((value & vx)/vx); vx<<=1; }
-				Cores[core].Regs.PMON = SetLoWord( Cores[core].Regs.PMON, value );
+				SetLoWord( Cores[core].Regs.PMON, value );
 			break;
 			case (REG_S_PMON + 2):
 				vx=1; for (vc=16;vc<24;vc++) { Cores[core].Voices[vc].Modulated=(s8)((value & vx)/vx); vx<<=1; }
-				Cores[core].Regs.PMON = SetHiWord( Cores[core].Regs.PMON, value );
+				SetHiWord( Cores[core].Regs.PMON, value );
 			break;
 			case REG_S_NON:
 				vx=1; for (vc=0;vc<16;vc++) { Cores[core].Voices[vc].Noise=(s8)((value & vx)/vx); vx<<=1; }
-				Cores[core].Regs.NON = SetLoWord( Cores[core].Regs.NON, value );
+				SetLoWord( Cores[core].Regs.NON, value );
 			break;
 			case (REG_S_NON + 2):
 				vx=1; for (vc=16;vc<24;vc++) { Cores[core].Voices[vc].Noise=(s8)((value & vx)/vx); vx<<=1; }
-				Cores[core].Regs.NON = SetHiWord( Cores[core].Regs.NON, value );
+				SetHiWord( Cores[core].Regs.NON, value );
 			break;
 // Games like to repeatedly write these regs over and over with the same value, hence
@ -895,26 +1028,23 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value )
 			// Reverb Start and End Address Writes!
 			//  * Yes, these are backwards from all the volumes -- the hiword comes FIRST (wtf!)
-			//  * End position is a hiword only!  Lowword is always ffff.
+			//  * End position is a hiword only!  Loword is always ffff.
 			//  * The Reverb buffer position resets on writes to StartA.  It probably resets
 			//    on writes to End too.  Docs don't say, but they're for PSX, which couldn't
 			//    change the end address anyway.
 			case REG_A_ESA:
-				Cores[core].EffectsStartA = (Cores[core].EffectsStartA & 0x0000FFFF) | (value<<16);
+				SetHiWord( Cores[core].EffectsStartA, value );
 				Cores[core].ReverbX = 0;
 				Cores[core].UpdateEffectsBufferSize();
 			break;
 			case (REG_A_ESA + 2):
-				Cores[core].EffectsStartA = (Cores[core].EffectsStartA & 0xFFFF0000) | value;
+				SetLoWord( Cores[core].EffectsStartA, value );
 				Cores[core].ReverbX = 0;
 				Cores[core].UpdateEffectsBufferSize();
 			break;
 			case REG_A_EEA:
 				Cores[core].EffectsEndA = ((u32)value<<16) | 0xFFFF;
 				Cores[core].ReverbX = 0;
 				Cores[core].UpdateEffectsBufferSize();
 			break;
@ -923,7 +1053,7 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value )
 			case REG_P_MVOLL:
 			case REG_P_MVOLR:
 			{
-				V_Volume& thisvol = (omem==REG_P_MVOLL) ? Cores[core].MasterL : Cores[core].MasterR;
+				V_VolumeSlide& thisvol = (omem==REG_P_MVOLL) ? Cores[core].MasterVol.Left : Cores[core].MasterVol.Right;
 				if( value & 0x8000 )	// +Lin/-Lin/+Exp/-Exp
 				{ 
@ -945,27 +1075,27 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value )
 			break;
 			case REG_P_EVOLL:
-				Cores[core].FxL = GetVol32( value );
+				Cores[core].FxVol.Left = GetVol32( value );
 			break;
 			case REG_P_EVOLR:
-				Cores[core].FxR = GetVol32( value );
+				Cores[core].FxVol.Right = GetVol32( value );
 			break;
 			case REG_P_AVOLL:
-				Cores[core].ExtL = GetVol32( value );
+				Cores[core].ExtVol.Left = GetVol32( value );
 			break;
 			case REG_P_AVOLR:
-				Cores[core].ExtR = GetVol32( value );
+				Cores[core].ExtVol.Right = GetVol32( value );
 			break;
 			case REG_P_BVOLL:
-				Cores[core].InpL = GetVol32( value );
+				Cores[core].InpVol.Left = GetVol32( value );
 			break;
 			case REG_P_BVOLR:
-				Cores[core].InpR = GetVol32( value );
+				Cores[core].InpVol.Right = GetVol32( value );
 			break;
 			case REG_S_ADMAS:
@ -1012,7 +1142,7 @@ void StartVoices(int core, u32 value)
 					(thisvc.WetL)?"+":"-",(thisvc.WetR)?"+":"-",
 					*(u8*)GetMemPtr(thisvc.StartA),*(u8 *)GetMemPtr((thisvc.StartA)+1),
 					thisvc.Pitch,
-					thisvc.VolumeL.Value,thisvc.VolumeR.Value,
+					thisvc.Volume.Left.Value,thisvc.Volume.Right.Value,
 					thisvc.ADSR.Reg_ADSR1,thisvc.ADSR.Reg_ADSR2);
 			}
 		}
--- a/plugins/spu2-x/src/Spu2.h
+++ b/plugins/spu2-x/src/Spu2.h
@ -182,21 +182,25 @@ extern void DspUpdate(); // to let the Dsp process window messages
 extern void RecordStart();
 extern void RecordStop();
-extern void RecordWrite(s16 left, s16 right);
+extern void RecordWrite( const StereoOut16& sample );
 extern void UpdateSpdifMode();
 extern void LowPassFilterInit();
 extern void InitADSR();
 extern void CalculateADSR( V_Voice& vc );
 extern void __fastcall ReadInput( V_Core& thiscore, StereoOut32& PData );
 //////////////////////////////
 //    The Mixer Section     //
 //////////////////////////////
 extern void Mix();
-extern s32 clamp_mix(s32 x, u8 bitshift=0);
+extern s32 clamp_mix( s32 x, u8 bitshift=0 );
 extern void clamp_mix( StereoOut32& sample, u8 bitshift=0 );
 extern void Reverb_AdvanceBuffer( V_Core& thiscore );
-extern void DoReverb( V_Core& thiscore, s32& OutL, s32& OutR, s32 InL, s32 InR);
+extern StereoOut32 DoReverb( V_Core& thiscore, const StereoOut32& Input );
 extern s32 MulShr32( s32 srcval, s32 mulval );
 //#define PCM24_S1_INTERLEAVE
--- a/plugins/spu2-x/src/Timestretcher.cpp
+++ b/plugins/spu2-x/src/Timestretcher.cpp
@ -0,0 +1,333 @@
 /* SPU2-X, A plugin for Emulating the Sound Processing Unit of the Playstation 2
 * Developed and maintained by the Pcsx2 Development Team.
 * 
 * Original portions from SPU2ghz are (c) 2008 by David Quintana [gigaherz]
 *
 * This library is free software; you can redistribute it and/or modify it under
 * the terms of the GNU Lesser General Public License as published by the Free 
 * Software Foundation; either version 2.1 of the the License, or (at your
 * option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT 
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along
 * with this library; if not, write to the Free Software Foundation, Inc., 59
 * Temple Place, Suite 330, Boston, MA  02111-1307  USA
 * 
 */
 #include "spu2.h"
 #include "SoundTouch/SoundTouch.h"
 #include "SoundTouch/WavFile.h"
 static soundtouch::SoundTouch* pSoundTouch = NULL;
 static int ts_stats_stretchblocks = 0;
 static int ts_stats_normalblocks = 0;
 static int ts_stats_logcounter = 0;
 // data prediction amount, used to "commit" data that hasn't
 // finished timestretch processing.
 s32 SndBuffer::m_predictData;
 // records last buffer status (fill %, range -100 to 100, with 0 being 50% full)
 float SndBuffer::lastPct;
 float SndBuffer::lastEmergencyAdj;
 float SndBuffer::cTempo = 1;
 float SndBuffer::eTempo = 1;
 int SndBuffer::freezeTempo = 0;
 void SndBuffer::PredictDataWrite( int samples )
 {
 	m_predictData += samples;
 }
 // Calculate the buffer status percentage.
 // Returns range from -1.0 to 1.0
 //    1.0 = buffer overflow!
 //    0.0 = buffer nominal (50% full)
 //   -1.0 = buffer underflow!
 float SndBuffer::GetStatusPct()
 {
 	// Get the buffer status of the output driver too, so that we can
 	// obtain a more accurate overall buffer status.
 	int drvempty = mods[OutputModule]->GetEmptySampleCount(); // / 2;
 	//ConLog( "Data %d >>> driver: %d   predict: %d\n", data, drvempty, predictData );
 	float result = (float)(m_data + m_predictData - drvempty) - (m_size/2);
 	result /= (m_size/2);
 	return result;
 }
 void SndBuffer::UpdateTempoChange()
 {
 	if( --freezeTempo > 0 )
 	{
 		return;
 	}
 	float statusPct = GetStatusPct();
 	float pctChange = statusPct - lastPct;
 	float tempoChange;
 	float emergencyAdj = 0;
 	float newcee = cTempo;		// workspace var. for cTempo
 	// IMPORTANT!
 	// If you plan to tweak these values, make sure you're using a release build
 	// OUTSIDE THE DEBUGGER to test it!  The Visual Studio debugger can really cause
 	// erratic behavior in the audio buffers, and makes the timestretcher seem a
 	// lot more inconsistent than it really is.
 	// We have two factors.
 	//   * Distance from nominal buffer status (50% full)
 	//   * The change from previous update to this update.
 	// Prediction based on the buffer change:
 	// (linear seems to work better here)
 	tempoChange = pctChange * 0.75f;
 	if( statusPct * tempoChange < 0.0f )
 	{
 		// only apply tempo change if it is in synch with the buffer status.
 		// In other words, if the buffer is high (over 0%), and is decreasing,
 		// ignore it.  It'll just muck things up.
 		tempoChange = 0;
 	}
 	// Sudden spikes in framerate can cause the nominal buffer status
 	// to go critical, in which case we have to enact an emergency
 	// stretch. The following cubic formulas do that.  Values near
 	// the extremeites give much larger results than those near 0.
 	// And the value is added only this time, and does not accumulate.
 	// (otherwise a large value like this would cause problems down the road)
 	// Constants:
 	// Weight - weights the statusPct's "emergency" consideration.
 	//   higher values here will make the buffer perform more drastic
 	//   compensations at the outer edges of the buffer (at -75 or +75%
 	//   or beyond, for example).
 	// Range - scales the adjustment to the given range (more or less).
 	//   The actual range is dependent on the weight used, so if you increase
 	//   Weight you'll usually want to decrease Range somewhat to compensate.
 	// Prediction based on the buffer fill status:
 	const float statusWeight = 2.99f;
 	const float statusRange = 0.068f;
 	// "non-emergency" deadzone:  In this area stretching will be strongly discouraged.
 	// Note: due tot he nature of timestretch latency, it's always a wee bit harder to
 	// cope with low fps (underruns) tha it is high fps (overruns).  So to help out a
 	// little, the low-end portions of this check are less forgiving than the high-sides.
 	if( cTempo < 0.965f || cTempo > 1.060f ||
 		pctChange < -0.38f || pctChange > 0.54f ||
 		statusPct < -0.32f || statusPct > 0.39f ||
 		eTempo < 0.89f || eTempo > 1.19f )
 	{
 		emergencyAdj = ( pow( statusPct*statusWeight, 3.0f ) * statusRange);
 	}
 	// Smooth things out by factoring our previous adjustment into this one.
 	// It helps make the system 'feel' a little smarter by  giving it at least
 	// one packet worth of history to help work off of:
 	emergencyAdj = (emergencyAdj * 0.75f) + (lastEmergencyAdj * 0.25f );
 	lastEmergencyAdj = emergencyAdj;
 	lastPct = statusPct;
 	// Accumulate a fraction of the tempo change into the tempo itself.
 	// This helps the system run "smarter" to games that run consistently
 	// fast or slow by altering the base tempo to something closer to the
 	// game's active speed.  In tests most games normalize within 2 seconds
 	// at 100ms latency, which is pretty good (larger buffers normalize even
 	// quicker).
 	newcee += newcee * (tempoChange+emergencyAdj) * 0.03f;
 	// Apply tempoChange as a scale of cTempo.  That way the effect is proportional
 	// to the current tempo.  (otherwise tempos rate of change at the extremes would
 	// be too drastic)
 	float newTempo = newcee + ( emergencyAdj * cTempo );
 	// ... and as a final optimization, only stretch if the new tempo is outside
 	// a nominal threshold.  Keep this threshold check small, because it could
 	// cause some serious side effects otherwise. (enlarging the cTempo check above
 	// is usually better/safer)
 	if( newTempo < 0.970f || newTempo > 1.045f )
 	{
 		cTempo = (float)newcee;
 		if( newTempo < 0.10f ) newTempo = 0.10f;
 		else if( newTempo > 10.0f ) newTempo = 10.0f;
 		if( cTempo < 0.15f ) cTempo = 0.15f;
 		else if( cTempo > 7.5f ) cTempo = 7.5f;
 		pSoundTouch->setTempo( eTempo = (float)newTempo );
 		ts_stats_stretchblocks++;
 		/*ConLog(" * SPU2: [Nominal %d%%] [Emergency: %d%%] (baseTempo: %d%% ) (newTempo: %d%%) (buffer: %d%%)\n",
 			//(relation < 0.0) ? "Normalize" : "",
 			(int)(tempoChange * 100.0 * 0.03),
 			(int)(emergencyAdj * 100.0),
 			(int)(cTempo * 100.0),
 			(int)(newTempo * 100.0),
 			(int)(statusPct * 100.0)
 		);*/
 	}
 	else
 	{
 		// Nominal operation -- turn off stretching.
 		// note: eTempo 'slides' toward 1.0 for smoother audio and better
 		// protection against spikes.
 		if( cTempo != 1.0f )
 		{
 			cTempo = 1.0f;
 			eTempo = ( 1.0f + eTempo ) * 0.5f;
 			pSoundTouch->setTempo( eTempo );
 		}
 		else
 		{
 			if( eTempo != cTempo )
 				pSoundTouch->setTempo( eTempo=cTempo );
 			ts_stats_normalblocks++;
 		}
 	}
 }
 void SndBuffer::timeStretchUnderrun()
 {
 	// timeStretcher failed it's job.  We need to slow down the audio some.
 	cTempo -= (cTempo * 0.12f);
 	eTempo -= (eTempo * 0.30f);
 	if( eTempo < 0.1f ) eTempo = 0.1f;
 	pSoundTouch->setTempo( eTempo );
 }
 s32 SndBuffer::timeStretchOverrun()
 {
 	// If we overran it means the timestretcher failed.  We need to speed
 	// up audio playback.
 	cTempo += cTempo * 0.12f;
 	eTempo += eTempo * 0.40f;
 	if( eTempo > 7.5f ) eTempo = 7.5f;
 	pSoundTouch->setTempo( eTempo );
 	// Throw out just a little bit (two packets worth) to help
 	// give the TS some room to work:
 	return SndOutPacketSize*2;
 }
 static void CvtPacketToFloat( StereoOut32* srcdest )
 {
 	StereoOutFloat* dest = (StereoOutFloat*)srcdest;
 	const StereoOut32* src = (StereoOut32*)srcdest;
 	for( uint i=0; i<SndOutPacketSize; ++i, ++dest, ++src )
 		*dest = (StereoOutFloat)*src;
 }
 // Parameter note: Size should always be a multiple of 128, thanks!
 static void CvtPacketToInt( StereoOut32* srcdest, uint size )
 {
 	jASSUME( (size & 127) == 0 );
 	const StereoOutFloat* src = (StereoOutFloat*)srcdest;
 	StereoOut32* dest = srcdest;
 	for( uint i=0; i<size; ++i, ++dest, ++src )
 		*dest = (StereoOut32)*src;
 }
 void SndBuffer::timeStretchWrite()
 {
 	bool progress = false;
 	// data prediction helps keep the tempo adjustments more accurate.
 	// The timestretcher returns packets in belated "clump" form.
 	// Meaning that most of the time we'll get nothing back, and then
 	// suddenly we'll get several chunks back at once.  Thus we use
 	// data prediction to make the timestretcher more responsive.
 	PredictDataWrite( (int)( SndOutPacketSize / eTempo ) );
 	CvtPacketToFloat( sndTempBuffer );
 	pSoundTouch->putSamples( (float*)sndTempBuffer, SndOutPacketSize );
 	int tempProgress;
 	while( tempProgress = pSoundTouch->receiveSamples( (float*)sndTempBuffer, SndOutPacketSize),
 		tempProgress != 0 )
 	{
 		// Hint: It's assumed that pSoundTouch will return chunks of 128 bytes (it always does as
 		// long as the SSE optimizations are enabled), which means we can do our own SSE opts here.
 		CvtPacketToInt( sndTempBuffer, tempProgress );
 		_WriteSamples( sndTempBuffer, tempProgress );
 		progress = true;
 	}
 	UpdateTempoChange();
 	if( MsgOverruns() )
 	{
 		if( progress )
 		{
 			if( ++ts_stats_logcounter > 300 )
 			{
 				ts_stats_logcounter = 0;
 				ConLog( " * SPU2 > Timestretch Stats > %d%% of packets stretched.\n",
 					( ts_stats_stretchblocks * 100 ) / ( ts_stats_normalblocks + ts_stats_stretchblocks ) );
 				ts_stats_normalblocks = 0;
 				ts_stats_stretchblocks = 0;
 			}
 		}
 	}
 }
 void SndBuffer::soundtouchInit()
 {
 	pSoundTouch = new soundtouch::SoundTouch();
 	pSoundTouch->setSampleRate(SampleRate);
 	pSoundTouch->setChannels(2);
 	pSoundTouch->setSetting( SETTING_USE_QUICKSEEK, 0 );
 	pSoundTouch->setSetting( SETTING_USE_AA_FILTER, 0 );
 	pSoundTouch->setSetting( SETTING_SEQUENCE_MS, SoundtouchCfg::SequenceLenMS );
 	pSoundTouch->setSetting( SETTING_SEEKWINDOW_MS, SoundtouchCfg::SeekWindowMS );
 	pSoundTouch->setSetting( SETTING_OVERLAP_MS, SoundtouchCfg::OverlapMS );
 	pSoundTouch->setTempo(1);
 	// some timestretch management vars:
 	cTempo = 1.0;
 	eTempo = 1.0;
 	lastPct = 0;
 	lastEmergencyAdj = 0;
 	// just freeze tempo changes for a while at startup.
 	// the driver buffers are bogus anyway.
 	freezeTempo = 8;
 	m_predictData = 0;
 }
 void SndBuffer::soundtouchCleanup()
 {
 	SAFE_DELETE_OBJ( pSoundTouch );
 }
--- a/plugins/spu2-x/src/Wavedump_wav.cpp
+++ b/plugins/spu2-x/src/Wavedump_wav.cpp
@ -83,14 +83,16 @@ namespace WaveDump
 		}
 	}
-	void WriteCore( uint coreidx, CoreSourceType src, s16 left, s16 right )
+	void WriteCore( uint coreidx, CoreSourceType src, const StereoOut16& sample )
 	{
 		if( !IsDevBuild ) return;
 		if( m_CoreWav[coreidx][src] != NULL )
-		{
+			m_CoreWav[coreidx][src]->write( (s16*)&sample, 2 );
-			s16 buffer[2] = { left, right };
+	}
-			m_CoreWav[coreidx][src]->write( buffer, 2 );
+
-		}
+	void WriteCore( uint coreidx, CoreSourceType src, s16 left, s16 right )
 	{
 		WriteCore( coreidx, src, StereoOut16( left, right ) );
 	}
 }
@ -116,10 +118,8 @@ void RecordStop()
 	SAFE_DELETE_OBJ( m_wavrecord );
 }
-void RecordWrite(s16 left, s16 right)
+void RecordWrite( const StereoOut16& sample )
 {
 	if( m_wavrecord == NULL ) return;
-
+	m_wavrecord->write( (s16*)&sample, 2 );
 	s16 buffer[2] = { left, right };
 	m_wavrecord->write( buffer, 2 );
 }
--- a/plugins/spu2-x/src/Win32/Config.cpp
+++ b/plugins/spu2-x/src/Win32/Config.cpp
@ -33,30 +33,32 @@ static const int LATENCY_MIN = 40;
 int AutoDMAPlayRate[2] = {0,0};
 // MIXING
-int Interpolation=1;
+int Interpolation = 1;
 /* values:
 		0: no interpolation (use nearest)
 		1. linear interpolation
 		2. cubic interpolation
 */
-bool EffectsDisabled=false;
+bool EffectsDisabled = false;
 // OUTPUT
-int SndOutLatencyMS=160;
+int SndOutLatencyMS = 160;
-bool timeStretchDisabled=false;
+bool timeStretchDisabled = false;
-u32 OutputModule=0; //OUTPUT_DSOUND;
+u32 OutputModule = 0;
 CONFIG_DSOUNDOUT Config_DSoundOut;
 CONFIG_WAVEOUT Config_WaveOut;
 CONFIG_XAUDIO2 Config_XAudio2;
 // DSP
-bool dspPluginEnabled=false;
+bool dspPluginEnabled = false;
-int  dspPluginModule=0;
+int  dspPluginModule = 0;
 wchar_t dspPlugin[256];
 bool StereoExpansionDisabled = true;
 /*****************************************************************************/
 void ReadSettings()
@ -69,7 +71,8 @@ void ReadSettings()
 	timeStretchDisabled = CfgReadBool( _T("OUTPUT"), _T("Disable_Timestretch"), false );
 	EffectsDisabled = CfgReadBool( _T("MIXING"), _T("Disable_Effects"), false );
-	SndOutLatencyMS=CfgReadInt(_T("OUTPUT"),_T("Latency"), 160);
+	StereoExpansionDisabled = CfgReadBool( _T("OUTPUT"), _T("Disable_StereoExpansion"), false );
 	SndOutLatencyMS = CfgReadInt(_T("OUTPUT"),_T("Latency"), 160);
 	wchar_t omodid[128];
 	CfgReadStr( _T("OUTPUT"), _T("Output_Module"), omodid, 127, XAudio2Out->GetIdent() );
@ -118,9 +121,10 @@ void WriteSettings()
 	CfgWriteBool(_T("MIXING"),_T("Disable_Effects"),EffectsDisabled);
-	CfgWriteStr(_T("OUTPUT"),_T("Output_Module"),mods[OutputModule]->GetIdent() );
+	CfgWriteStr(_T("OUTPUT"),_T("Output_Module"), mods[OutputModule]->GetIdent() );
-	CfgWriteInt(_T("OUTPUT"),_T("Latency"),SndOutLatencyMS);
+	CfgWriteInt(_T("OUTPUT"),_T("Latency"), SndOutLatencyMS);
-	CfgWriteBool(_T("OUTPUT"),_T("Disable_Timestretch"),timeStretchDisabled);
+	CfgWriteBool(_T("OUTPUT"),_T("Disable_Timestretch"), timeStretchDisabled);
 	CfgWriteBool(_T("OUTPUT"),_T("Disable_StereoExpansion"), StereoExpansionDisabled);
 	if( Config_DSoundOut.Device.empty() ) Config_DSoundOut.Device = _T("default");
 	if( Config_WaveOut.Device.empty() ) Config_WaveOut.Device = _T("default");
@ -181,6 +185,7 @@ BOOL CALLBACK ConfigProc(HWND hWnd,UINT uMsg,WPARAM wParam,LPARAM lParam)
 			EnableWindow( GetDlgItem( hWnd, IDC_OPEN_CONFIG_DEBUG ), DebugEnabled );
 			SET_CHECK(IDC_EFFECTS_DISABLE,	EffectsDisabled);
 			SET_CHECK(IDC_EXPANSION_DISABLE,StereoExpansionDisabled);
 			SET_CHECK(IDC_TS_DISABLE,		timeStretchDisabled);
 			SET_CHECK(IDC_DEBUG_ENABLE,		DebugEnabled);
 			SET_CHECK(IDC_DSP_ENABLE,		dspPluginEnabled);
@ -212,7 +217,7 @@ BOOL CALLBACK ConfigProc(HWND hWnd,UINT uMsg,WPARAM wParam,LPARAM lParam)
 				break;
 				case IDC_OUTCONF:
-					SndConfigure( hWnd,
+					SndBuffer::Configure( hWnd,
 						(int)SendMessage(GetDlgItem(hWnd,IDC_OUTPUT),CB_GETCURSEL,0,0)
 					);
 				break;
@ -234,6 +239,7 @@ BOOL CALLBACK ConfigProc(HWND hWnd,UINT uMsg,WPARAM wParam,LPARAM lParam)
 				HANDLE_CHECK(IDC_EFFECTS_DISABLE,EffectsDisabled);
 				HANDLE_CHECK(IDC_DSP_ENABLE,dspPluginEnabled);
 				HANDLE_CHECK(IDC_EXPANSION_DISABLE,StereoExpansionDisabled);
 				HANDLE_CHECKNB(IDC_TS_DISABLE,timeStretchDisabled);
 					EnableWindow( GetDlgItem( hWnd, IDC_OPEN_CONFIG_SOUNDTOUCH ), !timeStretchDisabled );
 				break;
--- a/plugins/spu2-x/src/Win32/Config.h
+++ b/plugins/spu2-x/src/Win32/Config.h
@ -82,6 +82,7 @@ extern int  dspPluginModule;
 extern bool	dspPluginEnabled;
 extern bool timeStretchDisabled;
 extern bool StereoExpansionDisabled;
 class SoundtouchCfg
 {
@ -120,12 +121,9 @@ struct CONFIG_XAUDIO2
 	std::wstring Device;
 	s8 NumBuffers;
 	bool ExpandTo51;
 	CONFIG_XAUDIO2() :
 		Device(),
-		NumBuffers( 2 ),
+		NumBuffers( 2 )
 		ExpandTo51( true )
 	{
 	}
 };
--- a/plugins/spu2-x/src/Win32/RealtimeDebugger.cpp
+++ b/plugins/spu2-x/src/Win32/RealtimeDebugger.cpp
@ -144,8 +144,8 @@ void UpdateDebugDialog()
 				SetDCBrushColor  (hdc,RGB(  0,255,  0));
-				int vl = abs(((vc.VolumeL.Value >> 16) * 24) >> 15);
+				int vl = abs(((vc.Volume.Left.Value >> 16) * 24) >> 15);
-				int vr = abs(((vc.VolumeR.Value >> 16) * 24) >> 15);
+				int vr = abs(((vc.Volume.Right.Value >> 16) * 24) >> 15);
 				FillRectangle(hdc,IX+38,IY+26 - vl, 4, vl);
 				FillRectangle(hdc,IX+42,IY+26 - vr, 4, vr);
--- a/plugins/spu2-x/src/Win32/SndOut_DSound.cpp
+++ b/plugins/spu2-x/src/Win32/SndOut_DSound.cpp
@ -23,6 +23,7 @@
 #include "spu2.h"
 #include "dialogs.h"
 #define DIRECTSOUND_VERSION 0x1000
 #include <dsound.h>
 static ds_device_data devices[32];
@ -37,7 +38,6 @@ private:
 	static const int PacketsPerBuffer = 1;
 	static const int BufferSize = SndOutPacketSize * PacketsPerBuffer;
 	static const int BufferSizeBytes = BufferSize << 1;
 	u32 numBuffers;		// cached copy of our configuration setting.
@ -57,25 +57,26 @@ private:
 	HANDLE waitEvent;
-	SndBuffer *buff;
+	template< typename T >
-
+	static DWORD CALLBACK RThread( DSound* obj )
 	static DWORD CALLBACK RThread(DSound*obj)
 	{
-		return obj->Thread();
+		return obj->Thread<T>();
 	}
 	template< typename T >
 	DWORD CALLBACK Thread()
 	{
 		static const int BufferSizeBytes = BufferSize * sizeof( T );
 		while( dsound_running )
 		{
 			u32 rv = WaitForMultipleObjects(numBuffers,buffer_events,FALSE,200);
-			s16* p1, *oldp1;
+			T* p1, *oldp1;
 			LPVOID p2;
 			DWORD s1,s2;
-			u32 poffset=BufferSizeBytes * rv;
+			u32 poffset = BufferSizeBytes * rv;
 			if( FAILED(buffer->Lock(poffset,BufferSizeBytes,(LPVOID*)&p1,&s1,&p2,&s2,0) ) )
 			{
@ -86,9 +87,9 @@ private:
 			oldp1 = p1;
 			for(int p=0; p<PacketsPerBuffer; p++, p1+=SndOutPacketSize )
-				buff->ReadSamples( p1 );
+				SndBuffer::ReadSamples( p1 );
-			buffer->Unlock(oldp1,s1,p2,s2);
+			buffer->Unlock( oldp1, s1, p2, s2 );
 			// Set the write pointer to the beginning of the next block.
 			myLastWrite = (poffset + BufferSizeBytes) & ~BufferSizeBytes;
@ -97,9 +98,8 @@ private:
 	}
 public:
-	s32 Init(SndBuffer *sb)
+	s32 Init()
 	{
 		buff = sb;
 		numBuffers = Config_DSoundOut.NumBuffers;
 		//
@ -130,19 +130,27 @@ public:
 		if( FAILED(dsound->SetCooperativeLevel(GetDesktopWindow(),DSSCL_PRIORITY)) )
 			throw std::runtime_error( "DirectSound Error: Cooperative level could not be set." );
 		// Determine the user's speaker configuration, and select an expansion option as needed.
 		// FAIL : Directsound doesn't appear to support audio expansion >_<
 		DWORD speakerConfig = 2;
 		//dsound->GetSpeakerConfig( &speakerConfig );
 		IDirectSoundBuffer* buffer_;
 		DSBUFFERDESC desc; 
 		// Set up WAV format structure. 
 		memset(&wfx, 0, sizeof(WAVEFORMATEX)); 
-		wfx.wFormatTag = WAVE_FORMAT_PCM;
+		wfx.wFormatTag		= WAVE_FORMAT_PCM;
-		wfx.nSamplesPerSec = SampleRate;
+		wfx.nSamplesPerSec	= SampleRate;
-		wfx.nChannels=2;
+		wfx.nChannels		= speakerConfig;
-		wfx.wBitsPerSample = 16;
+		wfx.wBitsPerSample	= 16;
-		wfx.nBlockAlign = 2*2;
+		wfx.nBlockAlign		= 2*speakerConfig;
-		wfx.nAvgBytesPerSec = SampleRate * wfx.nBlockAlign;
+		wfx.nAvgBytesPerSec	= SampleRate * wfx.nBlockAlign;
-		wfx.cbSize=0;
+		wfx.cbSize			= 0;
 		uint BufferSizeBytes = BufferSize * wfx.nBlockAlign;
 		// Set up DSBUFFERDESC structure. 
@ -155,12 +163,13 @@ public:
 		desc.dwFlags |= DSBCAPS_LOCSOFTWARE;
 		desc.dwFlags |= DSBCAPS_GLOBALFOCUS;
-		if( FAILED(dsound->CreateSoundBuffer(&desc,&buffer_,0) ) ||
+		if( FAILED(dsound->CreateSoundBuffer(&desc,&buffer_,0) ) )
-			FAILED(buffer_->QueryInterface(IID_IDirectSoundBuffer8,(void**)&buffer)) )
+			throw std::runtime_error( "DirectSound Error: Interface could not be queried." );
 		if(	FAILED(buffer_->QueryInterface(IID_IDirectSoundBuffer8,(void**)&buffer)) )
 			throw std::runtime_error( "DirectSound Error: Interface could not be queried." );
 		buffer_->Release();
 		verifyc( buffer->QueryInterface(IID_IDirectSoundNotify8,(void**)&buffer_notify) );
 		DSBPOSITIONNOTIFY not[MAX_BUFFER_COUNT];
@ -171,9 +180,9 @@ public:
 			// it was needed for some quirky driver?  Theoretically we want the notification as soon
 			// as possible after the buffer has finished playing.
-			buffer_events[i]=CreateEvent(NULL,FALSE,FALSE,NULL);
+			buffer_events[i] = CreateEvent(NULL,FALSE,FALSE,NULL);
-			not[i].dwOffset=(wfx.nBlockAlign*2 + BufferSizeBytes*(i+1))%desc.dwBufferBytes;
+			not[i].dwOffset = (wfx.nBlockAlign + BufferSizeBytes*(i+1)) % desc.dwBufferBytes;
-			not[i].hEventNotify=buffer_events[i];
+			not[i].hEventNotify = buffer_events[i];
 		}
 		buffer_notify->SetNotificationPositions(numBuffers,not);
@ -191,9 +200,9 @@ public:
 		// Start Thread
 		myLastWrite = 0;
-		dsound_running=true;
+		dsound_running = true;
-		thread=CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)RThread,this,0,&tid);
+		thread = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)RThread<StereoOut16>,this,0,&tid);
-		SetThreadPriority(thread,THREAD_PRIORITY_TIME_CRITICAL);
+		SetThreadPriority(thread,THREAD_PRIORITY_ABOVE_NORMAL);
 		return 0;
 	}
--- a/plugins/spu2-x/src/Win32/SndOut_XAudio2.cpp
+++ b/plugins/spu2-x/src/Win32/SndOut_XAudio2.cpp
@ -38,7 +38,6 @@ private:
 	class BaseStreamingVoice : public IXAudio2VoiceCallback
 	{
 	protected:
 		SndBuffer* m_sndout;
 		IXAudio2SourceVoice* pSourceVoice;
 		s16* qbuffer;
@ -69,11 +68,10 @@ private:
 			DeleteCriticalSection( &cs );
 		}
-		BaseStreamingVoice( SndBuffer* sb, uint numChannels ) :
+		BaseStreamingVoice( uint numChannels ) :
 			m_sndout( sb ),
 			m_nBuffers( Config_XAudio2.NumBuffers ),
 			m_nChannels( numChannels ),
-			m_BufferSize( SndOutPacketSize/2 * m_nChannels * PacketsPerBuffer ),
+			m_BufferSize( SndOutPacketSize * m_nChannels * PacketsPerBuffer ),
 			m_BufferSizeBytes( m_BufferSize * sizeof(s16) )
 		{
 		}
@ -133,18 +131,25 @@ private:
 			LeaveCriticalSection( &cs );
 		}
 		STDMETHOD_(void, OnVoiceProcessingPassStart) () {}
 		STDMETHOD_(void, OnVoiceProcessingPassStart) (UINT32) { };
 		STDMETHOD_(void, OnVoiceProcessingPassEnd) () {}
 		STDMETHOD_(void, OnStreamEnd) () {}
 		STDMETHOD_(void, OnBufferStart) ( void* ) {}
 		STDMETHOD_(void, OnLoopEnd) ( void* ) {}   
 		STDMETHOD_(void, OnVoiceError) (THIS_ void* pBufferContext, HRESULT Error) { };
 	};
-	
+	template< typename T >
-	class StreamingVoice_Stereo : public BaseStreamingVoice
+	class StreamingVoice : public BaseStreamingVoice
 	{
 	public:
-		StreamingVoice_Stereo( SndBuffer* sb, IXAudio2* pXAudio2 ) :
+		StreamingVoice( IXAudio2* pXAudio2 ) :
-			BaseStreamingVoice( sb, 2 )
+			BaseStreamingVoice( sizeof(T) / sizeof( s16 ) )
 		{
 		}
-		virtual ~StreamingVoice_Stereo() {}
+		virtual ~StreamingVoice() {}
 		void Init( IXAudio2* pXAudio2 )
 		{
@ -152,11 +157,6 @@ private:
 		}
 	protected:
 		STDMETHOD_(void, OnVoiceProcessingPassStart) () {}
 		STDMETHOD_(void, OnVoiceProcessingPassStart) (UINT32) { };
 		STDMETHOD_(void, OnVoiceProcessingPassEnd) () {}
 		STDMETHOD_(void, OnStreamEnd) () {}
 		STDMETHOD_(void, OnBufferStart) ( void* ) {}
 		STDMETHOD_(void, OnBufferEnd) ( void* context )
 		{
 			EnterCriticalSection( &cs );
@ -164,10 +164,10 @@ private:
 			// All of these checks are necessary because XAudio2 is wonky shizat.
 			if( pSourceVoice == NULL || context == NULL ) return;
-			s16* qb = (s16*)context;
+			T* qb = (T*)context;
 			for(int p=0; p<PacketsPerBuffer; p++, qb+=SndOutPacketSize )
-				m_sndout->ReadSamples( qb );
+				SndBuffer::ReadSamples( qb );
 			XAUDIO2_BUFFER buf = {0};
 			buf.AudioBytes	= m_BufferSizeBytes;
@ -177,83 +177,6 @@ private:
 			pSourceVoice->SubmitSourceBuffer( &buf );
 			LeaveCriticalSection( &cs );
 		}
 		STDMETHOD_(void, OnLoopEnd) ( void* ) {}   
 		STDMETHOD_(void, OnVoiceError) (THIS_ void* pBufferContext, HRESULT Error) { };
 	};
 	class StreamingVoice_Surround51 : public BaseStreamingVoice
 	{
 	public:
 		//LPF_data m_lpf_left;
 		//LPF_data m_lpf_right;
 		s32 buffer[2 * SndOutPacketSize * PacketsPerBuffer];
 		StreamingVoice_Surround51( SndBuffer* sb, IXAudio2* pXAudio2 ) :
 			BaseStreamingVoice( sb, 6 )
 			//m_lpf_left( Config_XAudio2.LowpassLFE, SampleRate ),
 			//m_lpf_right( Config_XAudio2.LowpassLFE, SampleRate )
 		{
 		}
 		virtual ~StreamingVoice_Surround51() {}
 		void Init( IXAudio2* pXAudio2 )
 		{
 			_init( pXAudio2, SPEAKER_5POINT1 );
 		}
 	protected:
 		STDMETHOD_(void, OnVoiceProcessingPassStart) () {}
 		STDMETHOD_(void, OnVoiceProcessingPassStart) (UINT32) { };
 		STDMETHOD_(void, OnVoiceProcessingPassEnd) () {}
 		STDMETHOD_(void, OnStreamEnd) () {}
 		STDMETHOD_(void, OnBufferStart) ( void* ) {}
 		STDMETHOD_(void, OnBufferEnd) ( void* context )
 		{
 			EnterCriticalSection( &cs );
 			// All of these checks are necessary because XAudio2 is wonky shizat.
 			if( pSourceVoice == NULL || context == NULL ) return;
 			s16* qb = (s16*)context;
 			for(int p=0; p<PacketsPerBuffer; p++ )
 			{
 				m_sndout->ReadSamples( buffer );
 				const s32* src = buffer;
 				for( int i=0; i<SndOutPacketSize/2; i++, qb+=6, src+=2 )
 				{
 					// Left and right Front!
 					qb[0] = SndScaleVol( src[0] );
 					qb[1] = SndScaleVol( src[1] );
 					// Center and Subwoofer/LFE -->
 					// This method is simple and sounds nice.  It relies on the speaker/soundcard
 					// systems do to their own low pass / crossover.  Manual lowpass is wasted effort
 					// and can't match solid state results anyway.
 					qb[2] = qb[3] = (src[0] + src[1]) >> (SndOutVolumeShift+1);
 					// Left and right rear!
 					qb[4] = SndScaleVol( src[0] );
 					qb[5] = SndScaleVol( src[1] );
 				}
 			}
 			XAUDIO2_BUFFER buf = { 0 };
 			buf.AudioBytes = m_BufferSizeBytes;
 			buf.pAudioData = (BYTE*)context;
 			buf.pContext = context;
 			pSourceVoice->SubmitSourceBuffer( &buf );
 			LeaveCriticalSection( &cs );
 		}
 		STDMETHOD_(void, OnLoopEnd) ( void* ) {}   
 		STDMETHOD_(void, OnVoiceError) (THIS_ void* pBufferContext, HRESULT Error) { };
 	};
@ -263,7 +186,7 @@ private:
 public:
-	s32 Init( SndBuffer *sb )
+	s32 Init()
 	{
 		HRESULT hr;
@ -273,9 +196,8 @@ public:
 		CoInitializeEx( NULL, COINIT_MULTITHREADED );
 		UINT32 flags = 0;
-#ifdef _DEBUG
+		if( IsDebugBuild )
-		flags |= XAUDIO2_DEBUG_ENGINE;
+			flags |= XAUDIO2_DEBUG_ENGINE;
 #endif
 		if ( FAILED(hr = XAudio2Create( &pXAudio2, flags ) ) )
 		{
@ -298,18 +220,47 @@ public:
 			return -1;
 		}
-		if( Config_XAudio2.ExpandTo51 && deviceDetails.OutputFormat.Format.nChannels >= 6 )
+		if( StereoExpansionDisabled )
 			deviceDetails.OutputFormat.Format.nChannels	= 2;
 		// Any windows driver should support stereo at the software level, I should think!
 		jASSUME( deviceDetails.OutputFormat.Format.nChannels > 1 );
 		switch( deviceDetails.OutputFormat.Format.nChannels )
 		{
-			ConLog( "* SPU2 > 5.1 speaker expansion enabled." );
+			case 2:
-			voiceContext = new StreamingVoice_Surround51( sb, pXAudio2 );
+				ConLog( "* SPU2 > Using normal 2 speaker stereo output." );
-		}
+				voiceContext = new StreamingVoice<StereoOut16>( pXAudio2 );
-		else
+			break;
-		{
+
-			voiceContext = new StreamingVoice_Stereo( sb, pXAudio2 );
+			case 3:
 				ConLog( "* SPU2 > 2.1 speaker expansion enabled." );
 				voiceContext = new StreamingVoice<Stereo21Out16>( pXAudio2 );
 			break;
 			case 4:
 				ConLog( "* SPU2 > 4 speaker expansion enabled [quadraphenia]" );
 				voiceContext = new StreamingVoice<StereoQuadOut16>( pXAudio2 );
 			break;
 			case 5:
 				ConLog( "* SPU2 > 4.1 speaker expansion enabled." );
 				voiceContext = new StreamingVoice<Stereo41Out16>( pXAudio2 );
 			break;
 			case 6:
 			case 7:
 				ConLog( "* SPU2 > 5.1 speaker expansion enabled." );
 				voiceContext = new StreamingVoice<Stereo51Out16>( pXAudio2 );
 			break;
 			default:	// anything 8 or more gets the 7.1 treatment!
 				ConLog( "* SPU2 > 7.1 speaker expansion enabled." );
 				voiceContext = new StreamingVoice<Stereo51Out16>( pXAudio2 );
 			break;
 		}
 		voiceContext->Init( pXAudio2 );
 		return 0;
 	}
--- a/plugins/spu2-x/src/Win32/SndOut_waveOut.cpp
+++ b/plugins/spu2-x/src/Win32/SndOut_waveOut.cpp
@ -31,14 +31,13 @@ private:
 	static const int PacketsPerBuffer = (1024 / SndOutPacketSize);
 	static const int BufferSize = SndOutPacketSize*PacketsPerBuffer;
 	static const int BufferSizeBytes = BufferSize << 1;
 	u32 numBuffers;
 	HWAVEOUT hwodevice;
 	WAVEFORMATEX wformat;
 	WAVEHDR whbuffer[MAX_BUFFER_COUNT];
-	s16* qbuffer;
+	StereoOut16* qbuffer;
 	#define QBUFFER(x) (qbuffer + BufferSize * (x))
@ -46,17 +45,13 @@ private:
 	HANDLE thread;
 	DWORD tid;
 	SndBuffer *buff;
 	wchar_t ErrText[256];
-	static DWORD CALLBACK RThread(WaveOutModule*obj)
+	template< typename T >
 	{
 		return obj->Thread();
 	}
 	DWORD CALLBACK Thread()
 	{
 		static const int BufferSizeBytes = BufferSize * sizeof( T );
 		while( waveout_running )
 		{
 			bool didsomething = false;
@ -64,16 +59,16 @@ private:
 			{
 				if(!(whbuffer[i].dwFlags & WHDR_DONE) ) continue;
-				WAVEHDR *buf=whbuffer+i;
+				WAVEHDR *buf = whbuffer+i;
 				buf->dwBytesRecorded = buf->dwBufferLength;
-				s16 *t = (s16*)buf->lpData;
+				T* t = (T*)buf->lpData;
 				for(int p=0; p<PacketsPerBuffer; p++, t+=SndOutPacketSize )
-					buff->ReadSamples( t );
+					SndBuffer::ReadSamples( t );
-				whbuffer[i].dwFlags&=~WHDR_DONE;
+				whbuffer[i].dwFlags &= ~WHDR_DONE;
-				waveOutWrite(hwodevice,buf,sizeof(WAVEHDR));
+				waveOutWrite( hwodevice, buf, sizeof(WAVEHDR) );
 				didsomething = true;
 			}
@ -85,25 +80,71 @@ private:
 		return 0;
 	}
-public:
+	template< typename T >
-	s32 Init(SndBuffer *sb)
+	static DWORD CALLBACK RThread(WaveOutModule*obj)
 	{
 		return obj->Thread<T>();
 	}
 public:
 	s32 Init()
 	{
 		buff = sb;
 		numBuffers = Config_WaveOut.NumBuffers;
 		MMRESULT woores;
 		if (Test()) return -1;
-		wformat.wFormatTag=WAVE_FORMAT_PCM;
+		// TODO : Use dsound to determine the speaker configuration, and expand audio from there.
 		wformat.nSamplesPerSec=SampleRate;
 		wformat.wBitsPerSample=16;
 		wformat.nChannels=2;
 		wformat.nBlockAlign=((wformat.wBitsPerSample * wformat.nChannels) / 8);
 		wformat.nAvgBytesPerSec=(wformat.nSamplesPerSec * wformat.nBlockAlign);
 		wformat.cbSize=0;
-		qbuffer=new s16[BufferSize*numBuffers];
+		#if 0
 		int speakerConfig;
 		if( StereoExpansionDisabled )
 			speakerConfig = 2;
 		// Any windows driver should support stereo at the software level, I should think!
 		jASSUME( speakerConfig > 1 );
 		LPTHREAD_START_ROUTINE threadproc;
 		switch( speakerConfig )
 		{
 		case 2:
 			ConLog( "* SPU2 > Using normal 2 speaker stereo output." );
 			threadproc = (LPTHREAD_START_ROUTINE)&RThread<StereoOut16>;
 			speakerConfig = 2;
 		break;
 		case 4:
 			ConLog( "* SPU2 > 4 speaker expansion enabled [quadraphenia]" );
 			threadproc = (LPTHREAD_START_ROUTINE)&RThread<StereoQuadOut16>;
 			speakerConfig = 4;
 		break;
 		case 6:
 		case 7:
 			ConLog( "* SPU2 > 5.1 speaker expansion enabled." );
 			threadproc = (LPTHREAD_START_ROUTINE)&RThread<Stereo51Out16>;
 			speakerConfig = 6;
 		break;
 		default:
 			ConLog( "* SPU2 > 7.1 speaker expansion enabled." );
 			threadproc = (LPTHREAD_START_ROUTINE)&RThread<Stereo51Out16>;
 			speakerConfig = 8;
 		break;
 		}
 		#endif
 		wformat.wFormatTag		= WAVE_FORMAT_PCM;
 		wformat.nSamplesPerSec	= SampleRate;
 		wformat.wBitsPerSample	= 16;
 		wformat.nChannels		= 2;
 		wformat.nBlockAlign		= ((wformat.wBitsPerSample * wformat.nChannels) / 8);
 		wformat.nAvgBytesPerSec	= (wformat.nSamplesPerSec * wformat.nBlockAlign);
 		wformat.cbSize			= 0;
 		qbuffer = new StereoOut16[BufferSize*numBuffers];
 		woores = waveOutOpen(&hwodevice,WAVE_MAPPER,&wformat,0,0,0);
 		if (woores != MMSYSERR_NOERROR)
@ -113,6 +154,8 @@ public:
 			return -1;
 		}
 		const int BufferSizeBytes = wformat.nBlockAlign * BufferSize;
 		for(u32 i=0;i<numBuffers;i++)
 		{
 			whbuffer[i].dwBufferLength=BufferSizeBytes;
@ -133,7 +176,7 @@ public:
 		// love it needs and won't suck resources idling pointlessly.  Just don't try to
 		// run it in uber-low-latency mode.
 		waveout_running = true;
-		thread = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)RThread,this,0,&tid);
+		thread = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)RThread<StereoOut16>,this,0,&tid);
 		return 0;
 	}
@ -276,4 +319,4 @@ public:
 } WO;
-SndOutModule *WaveOut=&WO;
+SndOutModule *WaveOut = &WO;
--- a/plugins/spu2-x/src/Win32/Spu2-X_vs2008.vcproj
+++ b/plugins/spu2-x/src/Win32/Spu2-X_vs2008.vcproj
@ -53,6 +53,7 @@
 				FavorSizeOrSpeed="1"
 				OmitFramePointers="true"
 				EnableFiberSafeOptimizations="true"
 				AdditionalIncludeDirectories=""
 				PreprocessorDefinitions="SPU2X_DEVBUILD;FLOAT_SAMPLES;NDEBUG;_USRDLL"
 				StringPooling="true"
 				RuntimeLibrary="0"
@ -608,6 +609,10 @@
 					RelativePath=".\SndOut_XAudio2.cpp"
 					>
 				</File>
 				<File
 					RelativePath="..\Timestretcher.cpp"
 					>
 				</File>
 			</Filter>
 			<Filter
 				Name="decoder"
--- a/plugins/spu2-x/src/defs.h
+++ b/plugins/spu2-x/src/defs.h
@ -22,7 +22,24 @@
 #ifndef DEFS_H_INCLUDED
 #define DEFS_H_INCLUDED
-struct V_Volume
+struct V_VolumeLR
 {
 	static V_VolumeLR Max;
 	s32 Left;
 	s32 Right;
 	V_VolumeLR() {}
 	V_VolumeLR( s32 both ) :
 		Left( both ),
 		Right( both )
 	{
 	}
 	void DebugDump( FILE* dump, const char* title );
 };
 struct V_VolumeSlide
 {
 	// Holds the "original" value of the volume for this voice, prior to slides.
 	// (ie, the volume as written to the register)
@ -33,9 +50,47 @@ struct V_Volume
 	s8 Mode;
 public:
 	V_VolumeSlide() {}
 	V_VolumeSlide( s16 regval, s32 fullvol ) :
 		Reg_VOL( regval ),
 		Value( fullvol ),
 		Increment( 0 ),
 		Mode( 0 )
 	{
 	}
 	void Update();
 	void RegSet( u16 src );		// used to set the volume from a register source (16 bit signed)
 	void DebugDump( FILE* dump, const char* title, const char* nameLR );
 };
 struct V_VolumeSlideLR
 {
 	static V_VolumeSlideLR Max;
 	V_VolumeSlide Left;
 	V_VolumeSlide Right;
 public:
 	V_VolumeSlideLR() {}
 	V_VolumeSlideLR( s16 regval, s32 bothval ) :
 		Left( regval, bothval ),
 		Right( regval, bothval )
 	{
 	}
 	void Update()
 	{
 		Left.Update();
 		Right.Update();
 	}
 	void DebugDump( FILE* dump, const char* title );
 };
 struct V_ADSR
 {
 	u16 Reg_ADSR1;
@ -61,12 +116,10 @@ public:
 struct V_Voice
 {
-// SPU2 cycle where the Playing started
+	u32 PlayCycle;		// SPU2 cycle where the Playing started
-	u32 PlayCycle;
+
-// Left Volume
+	V_VolumeSlideLR Volume;
-	V_Volume VolumeL;
+
 // Right Volume
 	V_Volume VolumeR;
 // Envelope
 	V_ADSR ADSR;
 // Pitch (also Reg_PITCH)
@ -198,6 +251,39 @@ struct V_Reverb
 	u32 MIX_DEST_B1;
 };
 struct V_ReverbBuffers
 {
 	s32 FB_SRC_A0;
 	s32 FB_SRC_B0;
 	s32 FB_SRC_A1;
 	s32 FB_SRC_B1;
 	s32 IIR_SRC_A0;
 	s32 IIR_SRC_A1;
 	s32 IIR_SRC_B1;
 	s32 IIR_SRC_B0;
 	s32 IIR_DEST_A0;
 	s32 IIR_DEST_A1;
 	s32 IIR_DEST_B0;
 	s32 IIR_DEST_B1;
 	s32 ACC_SRC_A0;
 	s32 ACC_SRC_A1;
 	s32 ACC_SRC_B0;
 	s32 ACC_SRC_B1;
 	s32 ACC_SRC_C0;
 	s32 ACC_SRC_C1;
 	s32 ACC_SRC_D0;
 	s32 ACC_SRC_D1;
 	s32 MIX_DEST_A0;
 	s32 MIX_DEST_A1;
 	s32 MIX_DEST_B0;
 	s32 MIX_DEST_B1;
 	bool NeedsUpdated;
 };
 struct V_SPDIF
 {
 	u16 Out;
@ -228,22 +314,14 @@ struct V_Core
 {
 // Core Voices
 	V_Voice Voices[24];
-// Master Volume for Left Channel
+
-	V_Volume MasterL;
+
-// Master Volume for Right Channel
+	V_VolumeSlideLR MasterVol;// Master Volume
-	V_Volume MasterR;
+	
-// Volume for External Data Input (Left Channel)
+	V_VolumeLR ExtVol;		// Volume for External Data Input
-	s32 ExtL;
+	V_VolumeLR InpVol;		// Volume for Sound Data Input
-// Volume for External Data Input (Right Channel)
+	V_VolumeLR FxVol;		// Volume for Output from Effects 
-	s32 ExtR;
+	
 // Volume for Sound Data Input (Left Channel)
 	s32 InpL;
 // Volume for Sound Data Input (Right Channel)
 	s32 InpR;
 // Volume for Output from Effects (Left Channel)
 	s32 FxL;
 // Volume for Output from Effects (Right Channel)
 	s32 FxR;
 // Interrupt Address
 	u32 IRQA;
 // DMA Transfer Start Address
@ -296,6 +374,7 @@ struct V_Core
 // Reverb
 	V_Reverb Revb;
 	V_ReverbBuffers RevBuffers;		// buffer pointers for reverb, pre-calculated and pre-clipped.
 	u32 EffectsStartA;
 	u32 EffectsEndA;
 	u32 ReverbX;
@ -311,8 +390,7 @@ struct V_Core
 	// Last samples to pass through the effects processor.
 	// Used because the effects processor works at 24khz and just pulls
 	// from this for the odd Ts.
-	s16 LastEffectL;
+	StereoOut32 LastEffect;
 	s16 LastEffectR;
 	u8 InitDelay;
@ -329,12 +407,15 @@ struct V_Core
 	s16 ADMATempBuffer[0x1000];
 	u32 ADMAPV;
-	u32 ADMAPL;
+	StereoOut32 ADMAP;
 	u32 ADMAPR;
 	void Reset();
 	void UpdateEffectsBufferSize();
 	V_Core();		// our badass constructor
 	s32 EffectsBufferIndexer( s32 offset ) const;
 	void UpdateFeedbackBuffersA();
 	void UpdateFeedbackBuffersB();
 };
 extern V_Core Cores[2];