diff --git a/plugins/spu2-x/src/3rdparty/liba52/parse.c b/plugins/spu2-x/src/3rdparty/liba52/parse.c index 3626944f65..7245a315c9 100644 --- a/plugins/spu2-x/src/3rdparty/liba52/parse.c +++ b/plugins/spu2-x/src/3rdparty/liba52/parse.c @@ -39,7 +39,7 @@ void * memalign (size_t align, size_t size); #else /* assume malloc alignment is sufficient */ -#define memalign(align,size) malloc (size) +#define memalign(align,m_size) malloc (m_size) #endif typedef struct { diff --git a/plugins/spu2-x/src/ADSR.cpp b/plugins/spu2-x/src/ADSR.cpp index e99484cf3c..fe4f3ae536 100644 --- a/plugins/spu2-x/src/ADSR.cpp +++ b/plugins/spu2-x/src/ADSR.cpp @@ -216,7 +216,7 @@ bool V_ADSR::Calculate() #define VOLFLAG_EXPONENTIAL (1ul<<2) #define VOLFLAG_SLIDE_ENABLE (1ul<<3) -void V_Volume::Update() +void V_VolumeSlide::Update() { if( !(Mode & VOLFLAG_SLIDE_ENABLE) ) return; diff --git a/plugins/spu2-x/src/BaseTypes.h b/plugins/spu2-x/src/BaseTypes.h index 78570cc09d..25f3e40dd2 100644 --- a/plugins/spu2-x/src/BaseTypes.h +++ b/plugins/spu2-x/src/BaseTypes.h @@ -27,6 +27,8 @@ using std::string; using std::wstring; +#include "PS2Edefs.h" + ////////////////////////////////////////////////////////////////////////// // Override Win32 min/max macros with the STL's type safe and macro // free varieties (much safer!) @@ -84,4 +86,40 @@ static const bool IsDebugBuild = false; #endif +struct StereoOut16; +struct StereoOutFloat; + +struct StereoOut32 +{ + static StereoOut32 Empty; + + s32 Left; + s32 Right; + + StereoOut32() : + Left( 0 ), + Right( 0 ) + { + } + + StereoOut32( s32 left, s32 right ) : + Left( left ), + Right( right ) + { + } + + StereoOut32( const StereoOut16& src ); + explicit StereoOut32( const StereoOutFloat& src ); + + StereoOut16 DownSample() const; + + StereoOut32 operator+( const StereoOut32& right ) + { + return StereoOut32( + Left + right.Left, + Right + right.Right + ); + } +}; + #endif diff --git a/plugins/spu2-x/src/Debug.cpp b/plugins/spu2-x/src/Debug.cpp index 79aedee3bc..3c001cfb70 100644 --- a/plugins/spu2-x/src/Debug.cpp +++ b/plugins/spu2-x/src/Debug.cpp @@ -71,6 +71,27 @@ void ConLog(const char *fmt, ...) { #endif } +void V_VolumeSlide::DebugDump( FILE* dump, const char* title, const char* nameLR ) +{ + fprintf( dump, "%s Volume for %s Channel:\t%x\n" + " - Value: %x\n" + " - Mode: %x\n" + " - Increment: %x\n", + title, nameLR, Reg_VOL, Value, Mode, Increment); +} + +void V_VolumeSlideLR::DebugDump( FILE* dump, const char* title ) +{ + Left.DebugDump( dump, title, "Left" ); + Right.DebugDump( dump, title, "Right" ); +} + +void V_VolumeLR::DebugDump( FILE* dump, const char* title ) +{ + fprintf( dump, "Volume for %s (%s Channel):\t%x\n", title, "Left", Left ); + fprintf( dump, "Volume for %s (%s Channel):\t%x\n", title, "Right", Right ); +} + void DoFullDump() { #ifdef SPU2_LOG @@ -98,32 +119,18 @@ void DoFullDump() if(!CoresDump()) return; dump = _wfopen( CoresDumpFileName, _T("wt") ); - if (dump) { + if (dump) + { for(c=0;c<2;c++) { fprintf(dump,"#### CORE %d DUMP.\n",c); - fprintf(dump,"Master Volume for Left Channel: %x\n" - " - Value: %x\n" - " - Mode: %x\n" - " - Increment: %x\n", - Cores[c].MasterL.Reg_VOL, - Cores[c].MasterL.Value, - Cores[c].MasterL.Mode, - Cores[c].MasterL.Increment); - fprintf(dump,"Master Volume for Right Channel: %x\n" - " - Value: %x\n" - " - Mode: %x\n" - " - Increment: %x\n", - Cores[c].MasterR.Reg_VOL, - Cores[c].MasterR.Value, - Cores[c].MasterR.Mode, - Cores[c].MasterR.Increment); - fprintf(dump,"Volume for External Data Input (Left Channel): %x\n",Cores[c].ExtL); - fprintf(dump,"Volume for External Data Input (Right Channel): %x\n",Cores[c].ExtR); - fprintf(dump,"Volume for Sound Data Input (Left Channel): %x\n",Cores[c].InpL); - fprintf(dump,"Volume for Sound Data Input (Right Channel): %x\n",Cores[c].InpR); - fprintf(dump,"Volume for Output from Effects (Left Channel): %x\n",Cores[c].FxL); - fprintf(dump,"Volume for Output from Effects (Right Channel): %x\n",Cores[c].FxR); + + Cores[c].MasterVol.DebugDump( dump, "Master" ); + + Cores[c].ExtVol.DebugDump( dump, "External Data Input" ); + Cores[c].InpVol.DebugDump( dump, "Voice Data Input [dry]" ); + Cores[c].FxVol.DebugDump( dump, "Effects/Reverb [wet]" ); + fprintf(dump,"Interrupt Address: %x\n",Cores[c].IRQA); fprintf(dump,"DMA Transfer Start Address: %x\n",Cores[c].TSA); fprintf(dump,"External Input to Direct Output (Left): %s\n",Cores[c].ExtDryL?"Yes":"No"); @@ -156,24 +163,11 @@ void DoFullDump() fprintf(dump," - ENDX: %x\n",Cores[c].Regs.VMIXER); fprintf(dump," - STATX: %x\n",Cores[c].Regs.VMIXEL); fprintf(dump," - ATTR: %x\n",Cores[c].Regs.VMIXER); - for(v=0;v<24;v++) { + for(v=0;v<24;v++) + { fprintf(dump,"Voice %d:\n",v); - fprintf(dump," - Volume for Left Channel: %x\n" - " - Value: %x\n" - " - Mode: %x\n" - " - Increment: %x\n", - Cores[c].Voices[v].VolumeL.Reg_VOL, - Cores[c].Voices[v].VolumeL.Value, - Cores[c].Voices[v].VolumeL.Mode, - Cores[c].Voices[v].VolumeL.Increment); - fprintf(dump," - Volume for Right Channel: %x\n" - " - Value: %x\n" - " - Mode: %x\n" - " - Increment: %x\n", - Cores[c].Voices[v].VolumeR.Reg_VOL, - Cores[c].Voices[v].VolumeR.Value, - Cores[c].Voices[v].VolumeR.Mode, - Cores[c].Voices[v].VolumeR.Increment); + Cores[c].Voices[v].Volume.DebugDump( dump, "" ); + fprintf(dump," - ADSR Envelope: %x & %x\n" " - Ar: %x\n" " - Am: %x\n" @@ -197,6 +191,7 @@ void DoFullDump() Cores[c].Voices[v].ADSR.ReleaseMode, Cores[c].Voices[v].ADSR.Phase, Cores[c].Voices[v].ADSR.Value); + fprintf(dump," - Pitch: %x\n",Cores[c].Voices[v].Pitch); fprintf(dump," - Modulated: %s\n",Cores[c].Voices[v].Modulated?"Yes":"No"); fprintf(dump," - Source: %s\n",Cores[c].Voices[v].Noise?"Noise":"Wave"); @@ -204,12 +199,12 @@ void DoFullDump() fprintf(dump," - Direct Output for Right Channel: %s\n",Cores[c].Voices[v].DryR?"Yes":"No"); fprintf(dump," - Effects Output for Left Channel: %s\n",Cores[c].Voices[v].WetL?"Yes":"No"); fprintf(dump," - Effects Output for Right Channel: %s\n",Cores[c].Voices[v].WetR?"Yes":"No"); - fprintf(dump," - Loop Start Adress: %x\n",Cores[c].Voices[v].LoopStartA); - fprintf(dump," - Sound Start Adress: %x\n",Cores[c].Voices[v].StartA); - fprintf(dump," - Next Data Adress: %x\n",Cores[c].Voices[v].NextA); - fprintf(dump," - Play Start Cycle: %d\n",Cores[c].Voices[v].PlayCycle); - fprintf(dump," - Play Status: %s\n",(Cores[c].Voices[v].ADSR.Phase>0)?"Playing":"Not Playing"); - fprintf(dump," - Block Sample: %d\n",Cores[c].Voices[v].SCurrent); + fprintf(dump," - Loop Start Address: %x\n",Cores[c].Voices[v].LoopStartA); + fprintf(dump," - Sound Start Address: %x\n",Cores[c].Voices[v].StartA); + fprintf(dump," - Next Data Address: %x\n",Cores[c].Voices[v].NextA); + fprintf(dump," - Play Start Cycle: %d\n",Cores[c].Voices[v].PlayCycle); + fprintf(dump," - Play Status: %s\n",(Cores[c].Voices[v].ADSR.Phase>0)?"Playing":"Not Playing"); + fprintf(dump," - Block Sample: %d\n",Cores[c].Voices[v].SCurrent); } fprintf(dump,"#### END OF DUMP.\n\n"); } diff --git a/plugins/spu2-x/src/Debug.h b/plugins/spu2-x/src/Debug.h index b6d29e837a..ce79684b90 100644 --- a/plugins/spu2-x/src/Debug.h +++ b/plugins/spu2-x/src/Debug.h @@ -52,9 +52,10 @@ namespace WaveDump , CoreSrc_Count }; - void Open(); - void Close(); - void WriteCore( uint coreidx, CoreSourceType src, s16 left, s16 right ); + extern void Open(); + extern void Close(); + extern void WriteCore( uint coreidx, CoreSourceType src, s16 left, s16 right ); + extern void WriteCore( uint coreidx, CoreSourceType src, const StereoOut16& sample ); } using WaveDump::CoreSrc_Input; diff --git a/plugins/spu2-x/src/Decoder.cpp b/plugins/spu2-x/src/Decoder.cpp index ac7efcf57f..b1d0142cb7 100644 --- a/plugins/spu2-x/src/Decoder.cpp +++ b/plugins/spu2-x/src/Decoder.cpp @@ -58,7 +58,6 @@ int state=0; FILE *fSpdifDump; extern u32 core; -void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR); union spdif_frame { // total size: 32bits struct { @@ -132,22 +131,23 @@ s32 stoi(sample_t n) //input: [-1..1] void spdif_update() { - s32 Data,Zero; + StereoOut32 Data; core=0; V_Core& thiscore( Cores[core] ); for(int i=0;i 0 ) { vc.PV2 = vc.PV1; - - GetNextDataBuffered( thiscore, vc, vc.PV1 ); - + vc.PV1 = GetNextDataBuffered( thiscore, vc ); vc.SP -= 4096; } @@ -358,28 +379,28 @@ static void __forceinline GetVoiceValues_Linear(V_Core& thiscore, V_Voice& vc, s if(Interpolation==0) { - Value = ApplyVolume( vc.PV1, vc.ADSR.Value ); + return ApplyVolume( vc.PV1, vc.ADSR.Value ); } else //if(Interpolation==1) //must be linear { s32 t0 = vc.PV2 - vc.PV1; - Value = MulShr32( (vc.PV1<<1) - ((t0*vc.SP)>>11), vc.ADSR.Value ); + return MulShr32( (vc.PV1<<1) - ((t0*vc.SP)>>11), vc.ADSR.Value ); } } // Returns a 16 bit result in Value. -static void __forceinline GetVoiceValues_Cubic(V_Core& thiscore, V_Voice& vc, s32& Value) +static s32 __forceinline GetVoiceValues_Cubic( V_Core& thiscore, V_Voice& vc ) { while( vc.SP > 0 ) { - vc.PV4=vc.PV3; - vc.PV3=vc.PV2; - vc.PV2=vc.PV1; + vc.PV4 = vc.PV3; + vc.PV3 = vc.PV2; + vc.PV2 = vc.PV1; - GetNextDataBuffered( thiscore, vc, vc.PV1 ); - vc.PV1<<=2; + vc.PV1 = GetNextDataBuffered( thiscore, vc ); + vc.PV1 <<= 2; vc.SPc = vc.SP&4095; // just the fractional part, please! - vc.SP-=4096; + vc.SP -= 4096; } CalculateADSR( thiscore, vc ); @@ -398,35 +419,37 @@ static void __forceinline GetVoiceValues_Cubic(V_Core& thiscore, V_Voice& vc, s3 // Note! It's very important that ADSR stay as accurate as possible. By the way // it is used, various sound effects can end prematurely if we truncate more than // one or two bits. - Value = MulShr32( val, vc.ADSR.Value>>1 ); + return MulShr32( val, vc.ADSR.Value>>1 ); } // Noise values need to be mixed without going through interpolation, since it // can wreak havoc on the noise (causing muffling or popping). Not that this noise // generator is accurate in its own right.. but eh, ah well :) -static void __forceinline __fastcall GetNoiseValues(V_Core& thiscore, V_Voice& vc, s32& Data) +static s32 __forceinline __fastcall GetNoiseValues( V_Core& thiscore, V_Voice& vc ) { - while(vc.SP>=4096) + s32 retval = GetNoiseValues(); + + /*while(vc.SP>=4096) { - GetNoiseValues( Data ); + retval = GetNoiseValues(); vc.SP-=4096; - } + }*/ // GetNoiseValues can't set the phase zero on us unexpectedly // like GetVoiceValues can. Better assert just in case though.. - jASSUME( vc.ADSR.Phase != 0 ); + jASSUME( vc.ADSR.Phase != 0 ); CalculateADSR( thiscore, vc ); // Yup, ADSR applies even to noise sources... - Data = MulShr32( Data, vc.ADSR.Value ); + return ApplyVolume( retval, vc.ADSR.Value ); } ///////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// // // -void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR) +void __fastcall ReadInput( V_Core& thiscore, StereoOut32& PData ) { if((thiscore.AutoDMACtrl&(core+1))==(core+1)) { @@ -442,17 +465,17 @@ void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR) // so we just downgrade it to 16 bits for now. #ifdef PCM24_S1_INTERLEAVE - *PDataL=*(((s32*)(thiscore.ADMATempBuffer+(thiscore.InputPos<<1)))); - *PDataR=*(((s32*)(thiscore.ADMATempBuffer+(thiscore.InputPos<<1)+2))); + *PData.Left=*(((s32*)(thiscore.ADMATempBuffer+(thiscore.InputPos<<1)))); + *PData.Right=*(((s32*)(thiscore.ADMATempBuffer+(thiscore.InputPos<<1)+2))); #else s32 *pl=(s32*)&(thiscore.ADMATempBuffer[thiscore.InputPos]); s32 *pr=(s32*)&(thiscore.ADMATempBuffer[thiscore.InputPos+0x200]); - PDataL=*pl; - PDataR=*pr; + PData.Left = *pl; + PData.Right = *pr; #endif - PDataL>>=1; //give 31 bit data (SndOut downsamples the rest of the way) - PDataR>>=1; + PData.Left >>= 2; //give 30 bit data (SndOut downsamples the rest of the way) + PData.Right >>= 2; thiscore.InputPos+=2; if((thiscore.InputPos==0x100)||(thiscore.InputPos>=0x200)) { @@ -495,8 +518,8 @@ void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR) s32 *pl=(s32*)&(thiscore.ADMATempBuffer[thiscore.InputPos]); s32 *pr=(s32*)&(thiscore.ADMATempBuffer[thiscore.InputPos+0x200]); - PDataL=*pl; - PDataR=*pr; + PData.Left = *pl; + PData.Right = *pr; thiscore.InputPos+=2; if(thiscore.InputPos>=0x200) { @@ -540,16 +563,16 @@ void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR) else { // Using the temporary buffer because this area gets overwritten by some other code. - //*PDataL=(s32)*(s16*)(spu2mem+0x2000+(core<<10)+thiscore.InputPos); - //*PDataR=(s32)*(s16*)(spu2mem+0x2200+(core<<10)+thiscore.InputPos); + //*PData.Left = (s32)*(s16*)(spu2mem+0x2000+(core<<10)+thiscore.InputPos); + //*PData.Right = (s32)*(s16*)(spu2mem+0x2200+(core<<10)+thiscore.InputPos); - tl=(s32)thiscore.ADMATempBuffer[thiscore.InputPos]; - tr=(s32)thiscore.ADMATempBuffer[thiscore.InputPos+0x200]; + tl = (s32)thiscore.ADMATempBuffer[thiscore.InputPos]; + tr = (s32)thiscore.ADMATempBuffer[thiscore.InputPos+0x200]; } - PDataL=tl; - PDataR=tr; + PData.Left = tl; + PData.Right = tr; thiscore.InputPos++; if((thiscore.InputPos==0x100)||(thiscore.InputPos>=0x200)) { @@ -585,9 +608,10 @@ void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR) } } } - else { - PDataL=0; - PDataR=0; + else + { + PData.Left = 0; + PData.Right = 0; } } @@ -595,29 +619,21 @@ void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR) ///////////////////////////////////////////////////////////////////////////////////////// // // -static void __forceinline __fastcall ReadInputPV(V_Core& thiscore, s32& ValL,s32& ValR) +static __forceinline StereoOut32 ReadInputPV( V_Core& thiscore ) { - s32 DL=0, DR=0; - u32 pitch=AutoDMAPlayRate[core]; if(pitch==0) pitch=48000; - thiscore.ADMAPV+=pitch; + thiscore.ADMAPV += pitch; while(thiscore.ADMAPV>=48000) { - ReadInput(thiscore, DL,DR); - thiscore.ADMAPV-=48000; - thiscore.ADMAPL=DL; - thiscore.ADMAPR=DR; + ReadInput( thiscore, thiscore.ADMAP ); + thiscore.ADMAPV -= 48000; } - ValL=thiscore.ADMAPL; - ValR=thiscore.ADMAPR; - // Apply volumes: - ValL = ApplyVolume( ValL, thiscore.InpL ); - ValR = ApplyVolume( ValR, thiscore.InpR ); + return ApplyVolume( thiscore.ADMAP, thiscore.InpVol ); } ///////////////////////////////////////////////////////////////////////////////////////// @@ -637,108 +653,107 @@ static __forceinline void spu2M_WriteFast( u32 addr, s16 value ) } -static __forceinline void MixVoice( V_Core& thiscore, V_Voice& vc, s32& VValL, s32& VValR ) +static __forceinline StereoOut32 MixVoice( V_Core& thiscore, V_Voice& vc ) { - s32 Value=0; - - VValL = 0; - VValR = 0; - // Most games don't use much volume slide effects. So only call the UpdateVolume // methods when needed by checking the flag outside the method here... - vc.VolumeL.Update(); - vc.VolumeR.Update(); + vc.Volume.Update(); + // SPU2 Note: The spu2 continues to process voices for eternity, always, so we + // have to run through all the motions of updating the voice regardless of it's + // audible status. Otherwise IRQs might not trigger and emulation might fail. + if( vc.ADSR.Phase > 0 ) { UpdatePitch( vc ); + s32 Value; + if( vc.Noise ) - GetNoiseValues( thiscore, vc, Value ); + Value = GetNoiseValues( thiscore, vc ); else { if( Interpolation == 2 ) - GetVoiceValues_Cubic( thiscore, vc, Value ); + Value = GetVoiceValues_Cubic( thiscore, vc ); else - GetVoiceValues_Linear( thiscore, vc, Value ); + Value = GetVoiceValues_Linear( thiscore, vc ); } - // Record the output (used for modulation effects) + // Note: All values recorded into OutX (may be used for modulation later) vc.OutX = Value; if( IsDevBuild ) - DebugCores[core].Voices[voice].displayPeak = max(DebugCores[core].Voices[voice].displayPeak,abs(Value)); + DebugCores[core].Voices[voice].displayPeak = max(DebugCores[core].Voices[voice].displayPeak,abs(vc.OutX)); - // TODO : Implement this using high-def MulShr32. - // vc.VolumeL/R are 15 bits. Value should be 32 bits (but is currently 16) + // Write-back of raw voice data (post ADSR applied) - VValL = ApplyVolume(Value,vc.VolumeL.Value); - VValR = ApplyVolume(Value,vc.VolumeR.Value); + if (voice==1) spu2M_WriteFast( 0x400 + (core<<12) + OutPos, vc.OutX ); + else if (voice==3) spu2M_WriteFast( 0x600 + (core<<12) + OutPos, vc.OutX ); + + return ApplyVolume( StereoOut32( Value, Value ), vc.Volume ); } + else + { + // Write-back of raw voice data (some zeros since the voice is "dead") - // Write-back of raw voice data (post ADSR applied) - - if (voice==1) spu2M_WriteFast( 0x400 + (core<<12) + OutPos, (s16)Value ); - else if (voice==3) spu2M_WriteFast( 0x600 + (core<<12) + OutPos, (s16)Value ); - + if (voice==1) spu2M_WriteFast( 0x400 + (core<<12) + OutPos, 0 ); + else if (voice==3) spu2M_WriteFast( 0x600 + (core<<12) + OutPos, 0 ); + + return StereoOut32( 0, 0 ); + } } -static void __fastcall MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR) +static StereoOut32 __fastcall MixCore( const StereoOut32& Input, const StereoOut32& Ext ) { - s32 RVL,RVR; - s32 SDL=0,SDR=0; - s32 SWL=0,SWR=0; - V_Core& thiscore( Cores[core] ); + thiscore.MasterVol.Update(); + + StereoOut32 Dry(0,0), Wet(0,0); for( voice=0; voice<24; ++voice ) { - s32 VValL,VValR; - V_Voice& vc( thiscore.Voices[voice] ); - MixVoice( thiscore, vc, VValL, VValR ); + StereoOut32 VVal( MixVoice( thiscore, vc ) ); // Note: Results from MixVoice are ranged at 16 bits. - // Following muls are toggles only (0 or 1) - SDL += VValL & vc.DryL; - SDR += VValR & vc.DryR; - SWL += VValL & vc.WetL; - SWR += VValR & vc.WetR; + Dry.Left += VVal.Left & vc.DryL; + Dry.Right += VVal.Right & vc.DryR; + Wet.Left += VVal.Left & vc.WetL; + Wet.Right += VVal.Right & vc.WetR; } // Saturate final result to standard 16 bit range. - SDL = clamp_mix( SDL ); - SDR = clamp_mix( SDR ); - SWL = clamp_mix( SWL ); - SWR = clamp_mix( SWR ); + clamp_mix( Dry ); + clamp_mix( Wet ); // Write Mixed results To Output Area - spu2M_WriteFast( 0x1000 + (core<<12) + OutPos, (s16)SDL ); - spu2M_WriteFast( 0x1200 + (core<<12) + OutPos, (s16)SDR ); - spu2M_WriteFast( 0x1400 + (core<<12) + OutPos, (s16)SWL ); - spu2M_WriteFast( 0x1600 + (core<<12) + OutPos, (s16)SWR ); + spu2M_WriteFast( 0x1000 + (core<<12) + OutPos, Dry.Left ); + spu2M_WriteFast( 0x1200 + (core<<12) + OutPos, Dry.Right ); + spu2M_WriteFast( 0x1400 + (core<<12) + OutPos, Wet.Left ); + spu2M_WriteFast( 0x1600 + (core<<12) + OutPos, Wet.Right ); // Write mixed results to logfile (if enabled) - WaveDump::WriteCore( core, CoreSrc_DryVoiceMix, SDL, SDR ); - WaveDump::WriteCore( core, CoreSrc_WetVoiceMix, SWL, SWR ); - - s32 TDL,TDR; + WaveDump::WriteCore( core, CoreSrc_DryVoiceMix, Dry ); + WaveDump::WriteCore( core, CoreSrc_WetVoiceMix, Wet ); // Mix in the Input data - TDL = OutL & thiscore.InpDryL; - TDR = OutR & thiscore.InpDryR; + StereoOut32 TD( + Input.Left & thiscore.InpDryL, + Input.Right & thiscore.InpDryR + ); + // Mix in the Voice data - TDL += SDL & thiscore.SndDryL; - TDR += SDR & thiscore.SndDryR; + TD.Left += Dry.Left & thiscore.SndDryL; + TD.Right += Dry.Right & thiscore.SndDryR; // Mix in the External (nothing/core0) data - TDL += ExtL & thiscore.ExtDryL; - TDR += ExtR & thiscore.ExtDryR; + TD.Left += Ext.Left & thiscore.ExtDryL; + TD.Right += Ext.Right & thiscore.ExtDryR; if( !EffectsDisabled ) { @@ -747,138 +762,106 @@ static void __fastcall MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR) if( thiscore.FxEnable ) { - s32 TWL,TWR; - // Mix Input, Voice, and External data: - TWL = OutL & thiscore.InpWetL; - TWR = OutR & thiscore.InpWetR; - TWL += SWL & thiscore.SndWetL; - TWR += SWR & thiscore.SndWetR; - TWL += ExtL & thiscore.ExtWetL; - TWR += ExtR & thiscore.ExtWetR; + StereoOut32 TW( + Input.Left & thiscore.InpWetL, + Input.Right & thiscore.InpWetR + ); + + TW.Left += Wet.Left & thiscore.SndWetL; + TW.Right += Wet.Right & thiscore.SndWetR; + TW.Left += Ext.Left & thiscore.ExtWetL; + TW.Right += Ext.Right & thiscore.ExtWetR; - WaveDump::WriteCore( core, CoreSrc_PreReverb, TWL, TWR ); + WaveDump::WriteCore( core, CoreSrc_PreReverb, TW ); - DoReverb( thiscore, RVL, RVR, TWL, TWR ); + StereoOut32 RV( DoReverb( thiscore, TW ) ); // Volume boost after effects application. Boosting volume prior to effects // causes slight overflows in some games, and the volume boost is required. // (like all over volumes on SPU2, reverb coefficients and stuff are signed, // range -50% to 50%, thus *2 is needed) - RVL *= 2; - RVR *= 2; + RV.Left *= 2; + RV.Right *= 2; - WaveDump::WriteCore( core, CoreSrc_PostReverb, RVL, RVR ); - - TWL = ApplyVolume(RVL,thiscore.FxL); - TWR = ApplyVolume(RVR,thiscore.FxR); + WaveDump::WriteCore( core, CoreSrc_PostReverb, RV ); // Mix Dry+Wet - OutL = TDL + TWL; - OutR = TDR + TWR; + return StereoOut32( TD + ApplyVolume( RV, thiscore.FxVol ) ); } else { WaveDump::WriteCore( core, CoreSrc_PreReverb, 0, 0 ); WaveDump::WriteCore( core, CoreSrc_PostReverb, 0, 0 ); - OutL = TDL; - OutR = TDR; } } - else - { - OutL = TDL; - OutR = TDR; - } - - // Apply Master Volume. The core will need this when the function returns. - - thiscore.MasterL.Update(); - thiscore.MasterR.Update(); + return TD; } // used to throttle the output rate of cache stat reports static int p_cachestat_counter=0; -void Mix() +__forceinline void Mix() { - s32 ExtL=0, ExtR=0, OutL, OutR; - // **** CORE ZERO **** + core = 0; - core=0; - if( (PlayMode&4) == 0 ) - { - // get input data from input buffers - ReadInputPV(Cores[0], ExtL, ExtR); - WaveDump::WriteCore( 0, CoreSrc_Input, ExtL, ExtR ); - } + // Note: Playmode 4 is SPDIF, which overrides other inputs. + StereoOut32 Ext( (PlayMode&4) ? StereoOut32::Empty : ReadInputPV( Cores[0] ) ); + WaveDump::WriteCore( 0, CoreSrc_Input, Ext ); - MixCore( ExtL, ExtR, 0, 0 ); + Ext = MixCore( Ext, StereoOut32::Empty ); if( (PlayMode & 4) || (Cores[0].Mute!=0) ) - { - ExtL=0; - ExtR=0; - } + Ext = StereoOut32( 0, 0 ); else { - ExtL = ApplyVolume( ExtL, Cores[0].MasterL.Value ); - ExtR = ApplyVolume( ExtR, Cores[0].MasterR.Value ); + Ext = ApplyVolume( Ext, Cores[0].MasterVol ); + clamp_mix( Ext ); } - + // Commit Core 0 output to ram before mixing Core 1: - - ExtL = clamp_mix( ExtL ); - ExtR = clamp_mix( ExtR ); - spu2M_WriteFast( 0x800 + OutPos, ExtL ); - spu2M_WriteFast( 0xA00 + OutPos, ExtR ); - - WaveDump::WriteCore( 0, CoreSrc_External, ExtL, ExtR ); + spu2M_WriteFast( 0x800 + OutPos, Ext.Left ); + spu2M_WriteFast( 0xA00 + OutPos, Ext.Right ); + WaveDump::WriteCore( 0, CoreSrc_External, Ext ); // **** CORE ONE **** core = 1; - if( (PlayMode&8) != 8 ) - { - ReadInputPV(Cores[1], OutL, OutR); // get input data from input buffers - WaveDump::WriteCore( 1, CoreSrc_Input, OutL, OutR ); - } + StereoOut32 Out( (PlayMode&8) ? StereoOut32::Empty : ReadInputPV( Cores[1] ) ); + WaveDump::WriteCore( 1, CoreSrc_Input, Out ); - // Apply volume to the external (Core 0) input data. - - MixCore( OutL, OutR, ApplyVolume( ExtL, Cores[1].ExtL), ApplyVolume( ExtR, Cores[1].ExtR) ); + ApplyVolume( Ext, Cores[1].ExtVol ); + Out = MixCore( Out, Ext ); if( PlayMode & 8 ) { // Experimental CDDA support // The CDDA overrides all other mixer output. It's a direct feed! - ReadInput(Cores[1], OutL, OutR); + ReadInput( Cores[1], Out ); //WaveLog::WriteCore( 1, "CDDA-32", OutL, OutR ); } else { - OutL = MulShr32( OutL<<10, Cores[1].MasterL.Value ); - OutR = MulShr32( OutR<<10, Cores[1].MasterR.Value ); + Out.Left = MulShr32( Out.Left< thiscore.EffectsEndA ) { - pos = thiscore.EffectsStartA + ((thiscore.ReverbX + offset) % (u32)thiscore.EffectsBufferSize); - } - else if( pos < thiscore.EffectsStartA ) - { - pos = thiscore.EffectsEndA+1 - ((thiscore.ReverbX + offset) % (u32)thiscore.EffectsBufferSize ); + //pos = thiscore.EffectsStartA + ((thiscore.ReverbX + offset) % (u32)thiscore.EffectsBufferSize); + pos -= thiscore.EffectsEndA+1; + pos += thiscore.EffectsStartA; } return pos; } @@ -52,15 +50,16 @@ void Reverb_AdvanceBuffer( V_Core& thiscore ) { if( (Cycles & 1) && (thiscore.EffectsBufferSize > 0) ) { - thiscore.ReverbX += 1; - if(thiscore.ReverbX >= (u32)thiscore.EffectsBufferSize ) - thiscore.ReverbX %= (u32)thiscore.EffectsBufferSize; + thiscore.ReverbX = RevbGetIndexer( thiscore, 1 ); + //thiscore.ReverbX += 1; + //if(thiscore.ReverbX >= (u32)thiscore.EffectsBufferSize ) + // thiscore.ReverbX %= (u32)thiscore.EffectsBufferSize; } } ///////////////////////////////////////////////////////////////////////////////////////// -void DoReverb( V_Core& thiscore, s32& OutL, s32& OutR, s32 InL, s32 InR) +StereoOut32 DoReverb( V_Core& thiscore, const StereoOut32& Input ) { // Reverb processing occurs at 24khz, so we skip processing every other sample, // and use the previous calculation for this core instead. @@ -68,84 +67,90 @@ void DoReverb( V_Core& thiscore, s32& OutL, s32& OutR, s32 InL, s32 InR) if( thiscore.EffectsBufferSize <= 0 ) { // StartA is past EndA, so effects are disabled. - OutL = InL; - OutR = InR; //ConLog( " * SPU2: Effects disabled due to leapfrogged EffectsStart." ); - return; + return Input; } - if((Cycles&1)==0) + if( (Cycles&1)==0 ) { - OutL = thiscore.LastEffectL; - OutR = thiscore.LastEffectR; - - thiscore.LastEffectL = InL; - thiscore.LastEffectR = InR; + StereoOut32 retval( thiscore.LastEffect ); + thiscore.LastEffect = Input; + return retval; } else { + if( thiscore.RevBuffers.NeedsUpdated ) + thiscore.UpdateEffectsBufferSize(); + // Advance the current reverb buffer pointer, and cache the read/write addresses we'll be // needing for this session of reverb. - const u32 src_a0 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_SRC_A0 ); - const u32 src_a1 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_SRC_A1 ); - const u32 src_b0 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_SRC_B0 ); - const u32 src_b1 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_SRC_B1 ); + const u32 src_a0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_SRC_A0 ); + const u32 src_a1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_SRC_A1 ); + const u32 src_b0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_SRC_B0 ); + const u32 src_b1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_SRC_B1 ); - const u32 dest_a0 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_A0 ); - const u32 dest_a1 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_A1 ); - const u32 dest_b0 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_B0 ); - const u32 dest_b1 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_B1 ); + const u32 dest_a0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_A0 ); + const u32 dest_a1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_A1 ); + const u32 dest_b0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_B0 ); + const u32 dest_b1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_B1 ); - const u32 dest2_a0 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_A0 + 1 ); - const u32 dest2_a1 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_A1 + 1 ); - const u32 dest2_b0 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_B0 + 1 ); - const u32 dest2_b1 = EffectsBufferIndexer( thiscore, thiscore.Revb.IIR_DEST_B1 + 1 ); + const u32 dest2_a0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_A0 + 1 ); + const u32 dest2_a1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_A1 + 1 ); + const u32 dest2_b0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_B0 + 1 ); + const u32 dest2_b1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.IIR_DEST_B1 + 1 ); - const u32 acc_src_a0 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_A0 ); - const u32 acc_src_b0 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_B0 ); - const u32 acc_src_c0 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_C0 ); - const u32 acc_src_d0 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_D0 ); + const u32 acc_src_a0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_A0 ); + const u32 acc_src_b0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_B0 ); + const u32 acc_src_c0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_C0 ); + const u32 acc_src_d0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_D0 ); - const u32 acc_src_a1 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_A1 ); - const u32 acc_src_b1 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_B1 ); - const u32 acc_src_c1 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_C1 ); - const u32 acc_src_d1 = EffectsBufferIndexer( thiscore, thiscore.Revb.ACC_SRC_D1 ); + const u32 acc_src_a1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_A1 ); + const u32 acc_src_b1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_B1 ); + const u32 acc_src_c1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_C1 ); + const u32 acc_src_d1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.ACC_SRC_D1 ); - const u32 fb_src_a0 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_A0 - thiscore.Revb.FB_SRC_A ); - const u32 fb_src_a1 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_A1 - thiscore.Revb.FB_SRC_A ); - const u32 fb_src_b0 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_B0 - thiscore.Revb.FB_SRC_B ); - const u32 fb_src_b1 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_B1 - thiscore.Revb.FB_SRC_B ); + const u32 fb_src_a0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.FB_SRC_A0 ); + const u32 fb_src_a1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.FB_SRC_A1 ); + const u32 fb_src_b0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.FB_SRC_B0 ); + const u32 fb_src_b1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.FB_SRC_B1 ); - const u32 mix_dest_a0 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_A0 ); - const u32 mix_dest_a1 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_A1 ); - const u32 mix_dest_b0 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_B0 ); - const u32 mix_dest_b1 = EffectsBufferIndexer( thiscore, thiscore.Revb.MIX_DEST_B1 ); + const u32 mix_dest_a0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.MIX_DEST_A0 ); + const u32 mix_dest_a1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.MIX_DEST_A1 ); + const u32 mix_dest_b0 = RevbGetIndexer( thiscore, thiscore.RevBuffers.MIX_DEST_B0 ); + const u32 mix_dest_b1 = RevbGetIndexer( thiscore, thiscore.RevBuffers.MIX_DEST_B1 ); // ----------------------------------------- // End Buffer Pointers, Begin Reverb! // ----------------------------------------- - const s32 INPUT_SAMPLE_L = (thiscore.LastEffectL+InL); - const s32 INPUT_SAMPLE_R = (thiscore.LastEffectR+InR); + StereoOut32 INPUT_SAMPLE( thiscore.LastEffect + Input ); - //const s32 INPUT_SAMPLE_L = (s32)( lowpass_left.sample( (thiscore.LastEffectL+InL)/65536.0 ) * 65536.0 ); - //const s32 INPUT_SAMPLE_R = (s32)( lowpass_right.sample( (thiscore.LastEffectR+InR)/65536.0 ) * 65536.0 ); + const s32 IIR_INPUT_A0 = ((_spu2mem[src_a0] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE.Left * thiscore.Revb.IN_COEF_L))>>16; + const s32 IIR_INPUT_A1 = ((_spu2mem[src_a1] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE.Right * thiscore.Revb.IN_COEF_R))>>16; + const s32 IIR_INPUT_B0 = ((_spu2mem[src_b0] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE.Left * thiscore.Revb.IN_COEF_L))>>16; + const s32 IIR_INPUT_B1 = ((_spu2mem[src_b1] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE.Right * thiscore.Revb.IN_COEF_R))>>16; - const s32 IIR_INPUT_A0 = ((_spu2mem[src_a0] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE_L * thiscore.Revb.IN_COEF_L))>>16; - const s32 IIR_INPUT_A1 = ((_spu2mem[src_a1] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE_R * thiscore.Revb.IN_COEF_R))>>16; - const s32 IIR_INPUT_B0 = ((_spu2mem[src_b0] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE_L * thiscore.Revb.IN_COEF_L))>>16; - const s32 IIR_INPUT_B1 = ((_spu2mem[src_b1] * thiscore.Revb.IIR_COEF) + (INPUT_SAMPLE_R * thiscore.Revb.IN_COEF_R))>>16; + //const s32 IIR_A0 = (IIR_INPUT_A0 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_a0] * (0x7fff - thiscore.Revb.IIR_ALPHA)); + //const s32 IIR_A1 = (IIR_INPUT_A1 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_a1] * (0x7fff - thiscore.Revb.IIR_ALPHA)); + //const s32 IIR_B0 = (IIR_INPUT_B0 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_b0] * (0x7fff - thiscore.Revb.IIR_ALPHA)); + //const s32 IIR_B1 = (IIR_INPUT_B1 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_b1] * (0x7fff - thiscore.Revb.IIR_ALPHA)); - const s32 IIR_A0 = (IIR_INPUT_A0 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_a0] * (0x7fff - thiscore.Revb.IIR_ALPHA)); - const s32 IIR_A1 = (IIR_INPUT_A1 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_a1] * (0x7fff - thiscore.Revb.IIR_ALPHA)); - const s32 IIR_B0 = (IIR_INPUT_B0 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_b0] * (0x7fff - thiscore.Revb.IIR_ALPHA)); - const s32 IIR_B1 = (IIR_INPUT_B1 * thiscore.Revb.IIR_ALPHA) + (_spu2mem[dest_b1] * (0x7fff - thiscore.Revb.IIR_ALPHA)); + //_spu2mem[dest2_a0] = clamp_mix( IIR_A0 >> 16 ); + //_spu2mem[dest2_a1] = clamp_mix( IIR_A1 >> 16 ); + //_spu2mem[dest2_b0] = clamp_mix( IIR_B0 >> 16 ); + //_spu2mem[dest2_b1] = clamp_mix( IIR_B1 >> 16 ); - _spu2mem[dest2_a0] = clamp_mix( IIR_A0 >> 16 ); - _spu2mem[dest2_a1] = clamp_mix( IIR_A1 >> 16 ); - _spu2mem[dest2_b0] = clamp_mix( IIR_B0 >> 16 ); - _spu2mem[dest2_b1] = clamp_mix( IIR_B1 >> 16 ); + // Faster single-mul approach to interpolation: + const s32 IIR_A0 = IIR_INPUT_A0 + ((_spu2mem[dest_a0]-IIR_INPUT_A0) * thiscore.Revb.IIR_ALPHA)>>16; + const s32 IIR_A1 = IIR_INPUT_A1 + ((_spu2mem[dest_a1]-IIR_INPUT_A1) * thiscore.Revb.IIR_ALPHA)>>16; + const s32 IIR_B0 = IIR_INPUT_B0 + ((_spu2mem[dest_b0]-IIR_INPUT_B0) * thiscore.Revb.IIR_ALPHA)>>16; + const s32 IIR_B1 = IIR_INPUT_B1 + ((_spu2mem[dest_b1]-IIR_INPUT_B1) * thiscore.Revb.IIR_ALPHA)>>16; + + _spu2mem[dest2_a0] = clamp_mix( IIR_A0 ); + _spu2mem[dest2_a1] = clamp_mix( IIR_A1 ); + _spu2mem[dest2_b0] = clamp_mix( IIR_B0 ); + _spu2mem[dest2_b1] = clamp_mix( IIR_B1 ); const s32 ACC0 = ((_spu2mem[acc_src_a0] * thiscore.Revb.ACC_COEF_A)) + @@ -161,8 +166,6 @@ void DoReverb( V_Core& thiscore, s32& OutL, s32& OutR, s32 InL, s32 InR) const s32 FB_A0 = (_spu2mem[fb_src_a0] * thiscore.Revb.FB_ALPHA); const s32 FB_A1 = (_spu2mem[fb_src_a1] * thiscore.Revb.FB_ALPHA); - const s32 FB_B0 = (_spu2mem[fb_src_b0] * (0x7fff - thiscore.Revb.FB_ALPHA)); //>>16; - const s32 FB_B1 = (_spu2mem[fb_src_b1] * (0x7fff - thiscore.Revb.FB_ALPHA)); //>>16; const s32 fb_xor_a0 = (_spu2mem[fb_src_a0] * ( thiscore.Revb.FB_ALPHA ^ 0x8000 ))>>2; const s32 fb_xor_a1 = (_spu2mem[fb_src_a1] * ( thiscore.Revb.FB_ALPHA ^ 0x8000 ))>>2; @@ -172,12 +175,13 @@ void DoReverb( V_Core& thiscore, s32& OutL, s32& OutR, s32 InL, s32 InR) _spu2mem[mix_dest_b0] = clamp_mix( (MulShr32(thiscore.Revb.FB_ALPHA<<14, ACC0) - fb_xor_a0 - ((_spu2mem[fb_src_b0] * thiscore.Revb.FB_X)>>2)) >> 14 ); _spu2mem[mix_dest_b1] = clamp_mix( (MulShr32(thiscore.Revb.FB_ALPHA<<14, ACC1) - fb_xor_a1 - ((_spu2mem[fb_src_b1] * thiscore.Revb.FB_X)>>2)) >> 14 ); - thiscore.LastEffectL = clamp_mix(_spu2mem[mix_dest_a0] + _spu2mem[mix_dest_b0]); - thiscore.LastEffectR = clamp_mix(_spu2mem[mix_dest_a1] + _spu2mem[mix_dest_b1]); + thiscore.LastEffect.Left = _spu2mem[mix_dest_a0] + _spu2mem[mix_dest_b0]; + thiscore.LastEffect.Right = _spu2mem[mix_dest_a1] + _spu2mem[mix_dest_b1]; + clamp_mix( thiscore.LastEffect ); - //OutL = thiscore.LastEffectL; - //OutR = thiscore.LastEffectR; - OutL = (s32)(lowpass_left.sample( thiscore.LastEffectL / 32768.0 ) * 32768.0); - OutR = (s32)(lowpass_right.sample( thiscore.LastEffectR / 32768.0 ) * 32768.0); + thiscore.LastEffect.Left = (s32)(lowpass_left.sample( thiscore.LastEffect.Left / 32768.0 ) * 32768.0); + thiscore.LastEffect.Right = (s32)(lowpass_right.sample( thiscore.LastEffect.Right / 32768.0 ) * 32768.0); + + return thiscore.LastEffect; } } diff --git a/plugins/spu2-x/src/SndOut.cpp b/plugins/spu2-x/src/SndOut.cpp index 8621c0a4a8..7a29816fe5 100644 --- a/plugins/spu2-x/src/SndOut.cpp +++ b/plugins/spu2-x/src/SndOut.cpp @@ -19,24 +19,45 @@ * */ -// [TODO] : The layout of this code file is now a complete hackish mess after -// numerous timestretch-related additions. The whole thing should really be -// rethought and redone at this point. - #include "spu2.h" -#include "SoundTouch/SoundTouch.h" -#include "SoundTouch/WavFile.h" -#include -static int ts_stats_stretchblocks = 0; -static int ts_stats_normalblocks = 0; -static int ts_stats_logcounter = 0; +StereoOut32 StereoOut32::Empty( 0, 0 ); + +StereoOut32::StereoOut32( const StereoOut16& src ) : + Left( src.Left ), + Right( src.Right ) +{ +} + +StereoOut32::StereoOut32( const StereoOutFloat& src ) : + Left( (s32)(src.Left * 2147483647.0f) ), + Right( (s32)(src.Right * 2147483647.0f) ) +{ +} + +StereoOut16 StereoOut32::DownSample() const +{ + return StereoOut16( + Left >> SndOutVolumeShift, + Right >> SndOutVolumeShift + ); +} + +StereoOut32 StereoOut16::UpSample() const +{ + return StereoOut32( + Left << SndOutVolumeShift, + Right << SndOutVolumeShift + ); + +} + class NullOutModule: public SndOutModule { public: - s32 Init(SndBuffer *) { return 0; } + s32 Init() { return 0; } void Close() { } s32 Test() const { return 0; } void Configure(HWND parent) { } @@ -61,7 +82,6 @@ SndOutModule* mods[]= XAudio2Out, DSoundOut, WaveOut, - //ASIOOut, NULL // signals the end of our list }; @@ -77,528 +97,173 @@ int FindOutputModuleById( const wchar_t* omodid ) return modcnt; } +StereoOut32 *SndBuffer::m_buffer; +s32 SndBuffer::m_size; +s32 SndBuffer::m_rpos; +s32 SndBuffer::m_wpos; +s32 SndBuffer::m_data; -__forceinline s16 SndScaleVol( s32 inval ) +bool SndBuffer::m_underrun_freeze; +StereoOut32* SndBuffer::sndTempBuffer = NULL; +StereoOut16* SndBuffer::sndTempBuffer16 = NULL; +int SndBuffer::sndTempProgress = 0; + +int GetAlignedBufferSize( int comp ) { - return inval >> SndOutVolumeShift; + return (comp + SndOutPacketSize-1) & ~(SndOutPacketSize-1); } - -// records last buffer status (fill %, range -100 to 100, with 0 being 50% full) -float lastPct; -float lastEmergencyAdj; - -float cTempo=1; -float eTempo = 1; -int freezeTempo = 0; - -soundtouch::SoundTouch* pSoundTouch=NULL; - - -//usefull when timestretch isn't available - -class SndBufferImpl: public SndBuffer +// Returns TRUE if there is data to be output, or false if no data +// is available to be copied. +bool SndBuffer::CheckUnderrunStatus( int& nSamples, int& quietSampleCount ) { -private: - s32 *buffer; - s32 size; - s32 rpos; - s32 wpos; - s32 data; + quietSampleCount = 0; + if( m_underrun_freeze ) + { + int toFill = (int)(m_size * ( timeStretchDisabled ? 0.50f : 0.1f ) ); + toFill = GetAlignedBufferSize( toFill ); - // data prediction amount, used to "commit" data that hasn't - // finished timestretch processing. - s32 predictData; + // toFill is now aligned to a SndOutPacket - bool pw; - bool underrun_freeze; - -protected: - int GetAlignedBufferSize( int comp ) - { - return (comp + SndOutPacketSize-1) & ~(SndOutPacketSize-1); - } - -public: - SndBufferImpl( float latencyMS ) - { - rpos=0; - wpos=0; - data=0; - size=GetAlignedBufferSize( (int)(latencyMS * SampleRate / 500.0f ) ); - buffer = new s32[size]; - pw=false; - underrun_freeze = false; - predictData = 0; - } - - virtual ~SndBufferImpl() - { - delete buffer; - } - - virtual void WriteSamples(s32 *bData, int nSamples) - { - int free = size-data; - predictData = 0; - - jASSUME( data <= size ); - - // Problem: - // If the SPU2 gets out of sync with the SndOut device, the writepos of the - // circular buffer will overtake the readpos, leading to a prolonged period - // of hopscotching read/write accesses (ie, lots of staticy crap sound for - // several seconds). - // - // Compromise: - // When an overrun occurs, we adapt by discarding a portion of the buffer. - // The older portion of the buffer is discarded rather than incoming data, - // so that the overall audio synchronization is better. - - if( free < nSamples ) + if( m_data < toFill ) { - // Buffer overrun! - // Dump samples from the read portion of the buffer instead of dropping - // the newly written stuff. - - s32 comp; - - if( !timeStretchDisabled ) - { - // If we overran it means the timestretcher failed. We need to speed - // up audio playback. - cTempo += cTempo * 0.12f; - eTempo += eTempo * 0.40f; - if( eTempo > 7.5f ) eTempo = 7.5f; - pSoundTouch->setTempo( eTempo ); - - // Throw out just a little bit (two packets worth) to help - // give the TS some room to work: - - comp = SndOutPacketSize*2; - } - else - { - // Toss half the buffer plus whatever's being written anew: - comp = GetAlignedBufferSize( (size + nSamples ) / 2 ); - if( comp > (size-SndOutPacketSize) ) comp = size-SndOutPacketSize; - } - - data -= comp; - rpos = (rpos+comp)%size; - if( MsgOverruns() ) - ConLog(" * SPU2 > Overrun Compensation (%d packets tossed)\n", comp / SndOutPacketSize ); - lastPct = 0.0; // normalize the timestretcher + quietSampleCount = nSamples; + return false; } - // copy in two phases, since there's a chance the packet - // wraps around the buffer (it'd be nice to deal in packets only, but - // the timestretcher and DSP options require flexibility). - - const int endPos = wpos + nSamples; - const int secondCopyLen = endPos - size; - s32* wposbuffer = &buffer[wpos]; - - data += nSamples; - if( secondCopyLen > 0 ) - { - nSamples -= secondCopyLen; - memcpy( buffer, &bData[nSamples], secondCopyLen * sizeof( *bData ) ); - wpos = secondCopyLen; - } - else - wpos += nSamples; - - memcpy( wposbuffer, bData, nSamples * sizeof( *bData ) ); + m_underrun_freeze = false; + if( MsgOverruns() ) + ConLog(" * SPU2 > Underrun compensation (%d packets buffered)\n", toFill / SndOutPacketSize ); + lastPct = 0.0; // normalize timestretcher } - - protected: - // Returns TRUE if there is data to be output, or false if no data - // is available to be copied. - bool CheckUnderrunStatus( int& nSamples, int& quietSampleCount ) + else if( m_data < nSamples ) { - quietSampleCount = 0; - if( underrun_freeze ) - { - int toFill = (int)(size * ( timeStretchDisabled ? 0.50f : 0.1f ) ); - toFill = GetAlignedBufferSize( toFill ); + nSamples = m_data; + quietSampleCount = SndOutPacketSize - m_data; + m_underrun_freeze = true; - // toFill is now aligned to a SndOutPacket + if( !timeStretchDisabled ) + timeStretchUnderrun(); - if( data < toFill ) - { - quietSampleCount = nSamples; - return false; - } - - underrun_freeze = false; - if( MsgOverruns() ) - ConLog(" * SPU2 > Underrun compensation (%d packets buffered)\n", toFill / SndOutPacketSize ); - lastPct = 0.0; // normalize timestretcher - } - else if( data < nSamples ) - { - nSamples = data; - quietSampleCount = SndOutPacketSize - data; - underrun_freeze = true; - - if( !timeStretchDisabled ) - { - // timeStretcher failed it's job. We need to slow down the audio some. - - cTempo -= (cTempo * 0.12f); - eTempo -= (eTempo * 0.30f); - if( eTempo < 0.1f ) eTempo = 0.1f; - pSoundTouch->setTempo( eTempo ); - } - - return nSamples != 0; - } - - return true; + return nSamples != 0; } -public: - void ReadSamples( s16* bData ) - { - int nSamples = SndOutPacketSize; - - // Problem: - // If the SPU2 gets even the least bit out of sync with the SndOut device, - // the readpos of the circular buffer will overtake the writepos, - // leading to a prolonged period of hopscotching read/write accesses (ie, - // lots of staticy crap sound for several seconds). - // - // Fix: - // If the read position overtakes the write position, abort the - // transfer immediately and force the SndOut driver to wait until - // the read buffer has filled up again before proceeding. - // This will cause one brief hiccup that can never exceed the user's - // set buffer length in duration. - - int quietSamples; - if( CheckUnderrunStatus( nSamples, quietSamples ) ) - { - jASSUME( nSamples <= SndOutPacketSize ); - - // [Air] [TODO]: This loop is probably a candidiate for SSE2 optimization. - - const int endPos = rpos + nSamples; - const int secondCopyLen = endPos - size; - const s32* rposbuffer = &buffer[rpos]; - - data -= nSamples; - - if( secondCopyLen > 0 ) - { - nSamples -= secondCopyLen; - for( int i=0; i 0 ) - { - nSamples -= secondCopyLen; - memcpy( &bData[nSamples], buffer, secondCopyLen * sizeof( *bData ) ); - rpos = secondCopyLen; - } - else - rpos += nSamples; - - memcpy( bData, &buffer[oldrpos], nSamples * sizeof( *bData ) ); - } - - // If quietSamples != 0 it means we have an underrun... - // Let's just dull out some silence, because that's usually the least - // painful way of dealing with underruns: - memset( bData, 0, quietSamples * sizeof(*bData) ); - } - - void PredictDataWrite( int samples ) - { - predictData += samples; - } - - virtual void PauseOnWrite(bool doPause) { pw = doPause; } - - // Calculate the buffer status percentage. - // Returns range from -1.0 to 1.0 - // 1.0 = buffer overflow! - // 0.0 = buffer nominal (50% full) - // -1.0 = buffer underflow! - float GetStatusPct() - { - // Get the buffer status of the output driver too, so that we can - // obtain a more accurate overall buffer status. - - int drvempty = mods[OutputModule]->GetEmptySampleCount(); // / 2; - - //ConLog( "Data %d >>> driver: %d predict: %d\n", data, drvempty, predictData ); - - float result = (float)(data + predictData - drvempty) - (size/2); - result /= (size/2); - return result; - } - -}; - -SndBufferImpl *sndBuffer=NULL; - -s32* sndTempBuffer=NULL; -s32 sndTempProgress=NULL; -s16* sndTempBuffer16=NULL; - -void UpdateTempoChange() -{ - if( --freezeTempo > 0 ) - { - return; - } - - float statusPct = sndBuffer->GetStatusPct(); - float pctChange = statusPct - lastPct; - - float tempoChange; - float emergencyAdj = 0; - float newcee = cTempo; // workspace var. for cTempo - - // IMPORTANT! - // If you plan to tweak these values, make sure you're using a release build - // OUTSIDE THE DEBUGGER to test it! The Visual Studio debugger can really cause - // erratic behavior in the audio buffers, and makes the timestretcher seem a - // lot more inconsistent than it really is. - - // We have two factors. - // * Distance from nominal buffer status (50% full) - // * The change from previous update to this update. - - // Prediction based on the buffer change: - // (linear seems to work better here) - - tempoChange = pctChange * 0.75f; - - if( statusPct * tempoChange < 0.0f ) - { - // only apply tempo change if it is in synch with the buffer status. - // In other words, if the buffer is high (over 0%), and is decreasing, - // ignore it. It'll just muck things up. - - tempoChange = 0; - } - - // Sudden spikes in framerate can cause the nominal buffer status - // to go critical, in which case we have to enact an emergency - // stretch. The following cubic formulas do that. Values near - // the extremeites give much larger results than those near 0. - // And the value is added only this time, and does not accumulate. - // (otherwise a large value like this would cause problems down the road) - - // Constants: - // Weight - weights the statusPct's "emergency" consideration. - // higher values here will make the buffer perform more drastic - // compensations at the outer edges of the buffer (at -75 or +75% - // or beyond, for example). - - // Range - scales the adjustment to the given range (more or less). - // The actual range is dependent on the weight used, so if you increase - // Weight you'll usually want to decrease Range somewhat to compensate. - - // Prediction based on the buffer fill status: - - const float statusWeight = 2.99f; - const float statusRange = 0.068f; - - // "non-emergency" deadzone: In this area stretching will be strongly discouraged. - // Note: due tot he nature of timestretch latency, it's always a wee bit harder to - // cope with low fps (underruns) tha it is high fps (overruns). So to help out a - // little, the low-end portions of this check are less forgiving than the high-sides. - - if( cTempo < 0.965f || cTempo > 1.060f || - pctChange < -0.38f || pctChange > 0.54f || - statusPct < -0.32f || statusPct > 0.39f || - eTempo < 0.89f || eTempo > 1.19f ) - { - emergencyAdj = ( pow( statusPct*statusWeight, 3.0f ) * statusRange); - } - - // Smooth things out by factoring our previous adjustment into this one. - // It helps make the system 'feel' a little smarter by giving it at least - // one packet worth of history to help work off of: - - emergencyAdj = (emergencyAdj * 0.75f) + (lastEmergencyAdj * 0.25f ); - - lastEmergencyAdj = emergencyAdj; - lastPct = statusPct; - - // Accumulate a fraction of the tempo change into the tempo itself. - // This helps the system run "smarter" to games that run consistently - // fast or slow by altering the base tempo to something closer to the - // game's active speed. In tests most games normalize within 2 seconds - // at 100ms latency, which is pretty good (larger buffers normalize even - // quicker). - - newcee += newcee * (tempoChange+emergencyAdj) * 0.03f; - - // Apply tempoChange as a scale of cTempo. That way the effect is proportional - // to the current tempo. (otherwise tempos rate of change at the extremes would - // be too drastic) - - float newTempo = newcee + ( emergencyAdj * cTempo ); - - // ... and as a final optimization, only stretch if the new tempo is outside - // a nominal threshold. Keep this threshold check small, because it could - // cause some serious side effects otherwise. (enlarging the cTempo check above - // is usually better/safer) - if( newTempo < 0.970f || newTempo > 1.045f ) - { - cTempo = (float)newcee; - - if( newTempo < 0.10f ) newTempo = 0.10f; - else if( newTempo > 10.0f ) newTempo = 10.0f; - - if( cTempo < 0.15f ) cTempo = 0.15f; - else if( cTempo > 7.5f ) cTempo = 7.5f; - - pSoundTouch->setTempo( eTempo = (float)newTempo ); - ts_stats_stretchblocks++; - - /*ConLog(" * SPU2: [Nominal %d%%] [Emergency: %d%%] (baseTempo: %d%% ) (newTempo: %d%%) (buffer: %d%%)\n", - //(relation < 0.0) ? "Normalize" : "", - (int)(tempoChange * 100.0 * 0.03), - (int)(emergencyAdj * 100.0), - (int)(cTempo * 100.0), - (int)(newTempo * 100.0), - (int)(statusPct * 100.0) - );*/ - } - else - { - // Nominal operation -- turn off stretching. - // note: eTempo 'slides' toward 1.0 for smoother audio and better - // protection against spikes. - if( cTempo != 1.0f ) - { - cTempo = 1.0f; - eTempo = ( 1.0f + eTempo ) * 0.5f; - pSoundTouch->setTempo( eTempo ); - } - else - { - if( eTempo != cTempo ) - pSoundTouch->setTempo( eTempo=cTempo ); - ts_stats_normalblocks++; - } - } + return true; } -void soundtouchInit() -{ - pSoundTouch = new soundtouch::SoundTouch(); - pSoundTouch->setSampleRate(SampleRate); - pSoundTouch->setChannels(2); - - pSoundTouch->setSetting( SETTING_USE_QUICKSEEK, 0 ); - pSoundTouch->setSetting( SETTING_USE_AA_FILTER, 0 ); - - pSoundTouch->setSetting( SETTING_SEQUENCE_MS, SoundtouchCfg::SequenceLenMS ); - pSoundTouch->setSetting( SETTING_SEEKWINDOW_MS, SoundtouchCfg::SeekWindowMS ); - pSoundTouch->setSetting( SETTING_OVERLAP_MS, SoundtouchCfg::OverlapMS ); - - pSoundTouch->setTempo(1); - - // some timestretch management vars: - - cTempo = 1.0; - eTempo = 1.0; - lastPct = 0; - lastEmergencyAdj = 0; - - // just freeze tempo changes for a while at startup. - // the driver buffers are bogus anyway. - freezeTempo = 8; -} - -static void _sndInitFail() +void SndBuffer::_InitFail() { // If a failure occurs, just initialize the NoSound driver. This'll allow // the game to emulate properly (hopefully), albeit without sound. OutputModule = FindOutputModuleById( NullOut.GetIdent() ); - mods[OutputModule]->Init( sndBuffer ); + mods[OutputModule]->Init(); } -s32 SndInit() +void SndBuffer::_WriteSamples(StereoOut32 *bData, int nSamples) +{ + int free = m_size-m_data; + m_predictData = 0; + + jASSUME( m_data <= m_size ); + + // Problem: + // If the SPU2 gets out of sync with the SndOut device, the writepos of the + // circular buffer will overtake the readpos, leading to a prolonged period + // of hopscotching read/write accesses (ie, lots of staticy crap sound for + // several seconds). + // + // Compromise: + // When an overrun occurs, we adapt by discarding a portion of the buffer. + // The older portion of the buffer is discarded rather than incoming data, + // so that the overall audio synchronization is better. + + if( free < nSamples ) + { + // Buffer overrun! + // Dump samples from the read portion of the buffer instead of dropping + // the newly written stuff. + + s32 comp; + + if( !timeStretchDisabled ) + { + comp = timeStretchOverrun(); + } + else + { + // Toss half the buffer plus whatever's being written anew: + comp = GetAlignedBufferSize( (m_size + nSamples ) / 2 ); + if( comp > (m_size-SndOutPacketSize) ) comp = m_size-SndOutPacketSize; + } + + m_data -= comp; + m_rpos = (m_rpos+comp) % m_size; + if( MsgOverruns() ) + ConLog(" * SPU2 > Overrun Compensation (%d packets tossed)\n", comp / SndOutPacketSize ); + lastPct = 0.0; // normalize the timestretcher + } + + // copy in two phases, since there's a chance the packet + // wraps around the buffer (it'd be nice to deal in packets only, but + // the timestretcher and DSP options require flexibility). + + const int endPos = m_wpos + nSamples; + const int secondCopyLen = endPos - m_size; + StereoOut32* wposbuffer = &m_buffer[m_wpos]; + + m_data += nSamples; + if( secondCopyLen > 0 ) + { + nSamples -= secondCopyLen; + memcpy( m_buffer, &bData[nSamples], secondCopyLen * sizeof( *bData ) ); + m_wpos = secondCopyLen; + } + else + m_wpos += nSamples; + + memcpy( wposbuffer, bData, nSamples * sizeof( *bData ) ); +} + +void SndBuffer::Init() { if( mods[OutputModule] == NULL ) { - _sndInitFail(); - return 0; + _InitFail(); + return; } // initialize sound buffer // Buffer actually attempts to run ~50%, so allocate near double what // the requested latency is: + + m_rpos = 0; + m_wpos = 0; + m_data = 0; + try { - sndBuffer = new SndBufferImpl( SndOutLatencyMS * (timeStretchDisabled ? 1.5f : 2.0f ) ); - sndTempBuffer = new s32[SndOutPacketSize]; - sndTempBuffer16 = new s16[SndOutPacketSize]; + const float latencyMS = SndOutLatencyMS * (timeStretchDisabled ? 1.5f : 2.0f ); + m_size = GetAlignedBufferSize( (int)(latencyMS * SampleRate / 1000.0f ) ); + m_buffer = new StereoOut32[m_size]; + m_underrun_freeze = false; + + sndTempBuffer = new StereoOut32[SndOutPacketSize]; + sndTempBuffer16 = new StereoOut16[SndOutPacketSize]; } catch( std::bad_alloc& ) { // out of memory exception (most likely) - SysMessage( "Out of memory error occured while initializing SPU2." ); - _sndInitFail(); - return 0; + SysMessage( "Out of memory error occurred while initializing SPU2." ); + _InitFail(); + return; } // clear buffers! // Fixes loopy sounds on emu resets. - memset( sndTempBuffer, 0, sizeof(s32) * SndOutPacketSize ); - memset( sndTempBuffer16, 0, sizeof(s16) * SndOutPacketSize ); + memset( sndTempBuffer, 0, sizeof(StereoOut32) * SndOutPacketSize ); + memset( sndTempBuffer16, 0, sizeof(StereoOut16) * SndOutPacketSize ); sndTempProgress = 0; @@ -608,104 +273,78 @@ s32 SndInit() spdif_set51(mods[OutputModule]->Is51Out()); // initialize module - if( mods[OutputModule]->Init(sndBuffer) == -1 ) - { - _sndInitFail(); - } - - return 0; + if( mods[OutputModule]->Init() == -1 ) _InitFail(); } -void SndClose() +void SndBuffer::Cleanup() { mods[OutputModule]->Close(); - SAFE_DELETE_OBJ( sndBuffer ); + SAFE_DELETE_ARRAY( m_buffer ); SAFE_DELETE_ARRAY( sndTempBuffer ); SAFE_DELETE_ARRAY( sndTempBuffer16 ); - SAFE_DELETE_OBJ( pSoundTouch ); } -s32 SndWrite(s32 ValL, s32 ValR) +int SndBuffer::m_dsp_progress = 0; +int SndBuffer::m_dsp_writepos = 0; + +int SndBuffer::m_timestretch_progress = 0; + +void SndBuffer::Write( const StereoOut32& Sample ) { // Log final output to wavefile. - WaveDump::WriteCore( 1, CoreSrc_External, SndScaleVol(ValL), SndScaleVol(ValR) ); + WaveDump::WriteCore( 1, CoreSrc_External, Sample.DownSample() ); + + RecordWrite( Sample.DownSample() ); - RecordWrite(SndScaleVol(ValL),SndScaleVol(ValR)); - if(mods[OutputModule] == &NullOut) // null output doesn't need buffering or stretching! :p - return 0; - - sndTempBuffer[sndTempProgress++] = ValL; - sndTempBuffer[sndTempProgress++] = ValR; - + return; + + sndTempBuffer[sndTempProgress++] = Sample; + // If we haven't accumulated a full packet yet, do nothing more: - if(sndTempProgress < SndOutPacketSize) return 1; + if(sndTempProgress < SndOutPacketSize) return; + sndTempProgress = 0; - if(dspPluginEnabled) + if( dspPluginEnabled ) { - for(int i=0;i>1)<<1; + for( int i=0; iPredictDataWrite( (int)( sndTempProgress / eTempo ) ); - for(int i=0;iputSamples((float*)sndTempBuffer, sndTempProgress>>1); - - while( ( sndTempProgress = pSoundTouch->receiveSamples((float*)sndTempBuffer, sndTempProgress>>1)<<1 ) != 0 ) + // Some ugly code to ensure full packet handling: + int ei = 0; + while( m_dsp_progress >= SndOutPacketSize ) { - // [Air] [TODO] : Implement an SSE downsampler to int. - for(int i=0;iWriteSamples(sndTempBuffer, sndTempProgress); - progress = true; + for( int i=0; i 0 ) { - if( progress ) - { - if( ++ts_stats_logcounter > 300 ) - { - ts_stats_logcounter = 0; - ConLog( " * SPU2 > Timestretch Stats > %d%% of packets stretched.\n", - ( ts_stats_stretchblocks * 100 ) / ( ts_stats_normalblocks + ts_stats_stretchblocks ) ); - ts_stats_normalblocks = 0; - ts_stats_stretchblocks = 0; - } - } + memcpy( &sndTempBuffer16[ei], sndTempBuffer16, + sizeof(sndTempBuffer16[0]) * m_dsp_progress + ); } } else { - sndBuffer->WriteSamples(sndTempBuffer, sndTempProgress); - sndTempProgress=0; + if( !timeStretchDisabled ) + timeStretchWrite(); + else + _WriteSamples(sndTempBuffer, SndOutPacketSize); } - - return 1; } -s32 SndTest() +s32 SndBuffer::Test() { if( mods[OutputModule] == NULL ) return -1; @@ -713,10 +352,11 @@ s32 SndTest() return mods[OutputModule]->Test(); } -void SndConfigure(HWND parent, u32 module ) +void SndBuffer::Configure(HWND parent, u32 module ) { if( mods[module] == NULL ) return; mods[module]->Configure(parent); } + diff --git a/plugins/spu2-x/src/SndOut.h b/plugins/spu2-x/src/SndOut.h index ccb5da1355..6314725256 100644 --- a/plugins/spu2-x/src/SndOut.h +++ b/plugins/spu2-x/src/SndOut.h @@ -24,40 +24,310 @@ // Number of stereo samples per SndOut block. // All drivers must work in units of this size when communicating with // SndOut. -static const int SndOutPacketSize = 1024; +static const int SndOutPacketSize = 512; // Overall master volume shift. // Converts the mixer's 32 bit value into a 16 bit value. -static const int SndOutVolumeShift = 10; +static const int SndOutVolumeShift = 13; // Samplerate of the SPU2. For accurate playback we need to match this // exactly. Trying to scale samplerates and maintain SPU2's Ts timing accuracy // is too problematic. :) static const int SampleRate = 48000; -extern s32 SndInit(); -extern void SndClose(); -extern s32 SndWrite(s32 ValL, s32 ValR); -extern s32 SndTest(); -extern void SndConfigure(HWND parent, u32 outmodidx ); -extern bool SndGetStats(u32 *written, u32 *played); -extern s16 SndScaleVol( s32 inval ); - int FindOutputModuleById( const wchar_t* omodid ); +struct StereoOut16 +{ + s16 Left; + s16 Right; + + StereoOut16() : + Left( 0 ), + Right( 0 ) + { + } + + StereoOut16( const StereoOut32& src ) : + Left( (s16)src.Left ), + Right( (s16)src.Right ) + { + } + + StereoOut16( s16 left, s16 right ) : + Left( left ), + Right( right ) + { + } + + StereoOut32 UpSample() const; + + void ResampleFrom( const StereoOut32& src ) + { + // Use StereoOut32's built in conversion + *this = src.DownSample(); + } +}; + +struct StereoOutFloat +{ + float Left; + float Right; + + StereoOutFloat() : + Left( 0 ), + Right( 0 ) + { + } + + explicit StereoOutFloat( const StereoOut32& src ) : + Left( src.Left / 2147483647.0f ), + Right( src.Right / 2147483647.0f ) + { + } + + explicit StereoOutFloat( s32 left, s32 right ) : + Left( left / 2147483647.0f ), + Right( right / 2147483647.0f ) + { + } + + StereoOutFloat( float left, float right ) : + Left( left ), + Right( right ) + { + } +}; + +struct Stereo21Out16 +{ + s16 Left; + s16 Right; + s16 LFE; + + void ResampleFrom( const StereoOut32& src ) + { + Left = src.Left >> SndOutVolumeShift; + Right = src.Right >> SndOutVolumeShift; + LFE = (src.Left + src.Right) >> (SndOutVolumeShift + 1); + } +}; + +struct StereoQuadOut16 +{ + s16 Left; + s16 Right; + s16 LeftBack; + s16 RightBack; + + void ResampleFrom( const StereoOut32& src ) + { + Left = src.Left >> SndOutVolumeShift; + Right = src.Right >> SndOutVolumeShift; + LeftBack = src.Left >> SndOutVolumeShift; + RightBack = src.Right >> SndOutVolumeShift; + } +}; + +struct Stereo41Out16 +{ + s16 Left; + s16 Right; + s16 LFE; + s16 LeftBack; + s16 RightBack; + + void ResampleFrom( const StereoOut32& src ) + { + Left = src.Left >> SndOutVolumeShift; + Right = src.Right >> SndOutVolumeShift; + LFE = (src.Left + src.Right) >> (SndOutVolumeShift + 1); + LeftBack = src.Left >> SndOutVolumeShift; + RightBack = src.Right >> SndOutVolumeShift; + } +}; + +struct Stereo51Out16 +{ + s16 Left; + s16 Right; + s16 Center; + s16 LFE; + s16 LeftBack; + s16 RightBack; + + // Implementation Note: Center and Subwoofer/LFE --> + // This method is simple and sounds nice. It relies on the speaker/soundcard + // systems do to their own low pass / crossover. Manual lowpass is wasted effort + // and can't match solid state results anyway. + + void ResampleFrom( const StereoOut32& src ) + { + Left = src.Left >> SndOutVolumeShift; + Right = src.Right >> SndOutVolumeShift; + Center = (src.Left + src.Right) >> (SndOutVolumeShift + 1); + LFE = Center; + LeftBack = src.Left >> SndOutVolumeShift; + RightBack = src.Right >> SndOutVolumeShift; + } +}; + +struct Stereo71Out16 +{ + s16 Left; + s16 Right; + s16 Center; + s16 LFE; + s16 LeftBack; + s16 RightBack; + s16 LeftSide; + s16 RightSide; + + void ResampleFrom( const StereoOut32& src ) + { + Left = src.Left >> SndOutVolumeShift; + Right = src.Right >> SndOutVolumeShift; + Center = (src.Left + src.Right) >> (SndOutVolumeShift + 1); + LFE = Center; + LeftBack = src.Left >> SndOutVolumeShift; + RightBack = src.Right >> SndOutVolumeShift; + + LeftSide = src.Left >> (SndOutVolumeShift+1); + RightSide = src.Right >> (SndOutVolumeShift+1); + } +}; + +struct Stereo21Out32 +{ + s32 Left; + s32 Right; + s32 LFE; +}; + +struct Stereo41Out32 +{ + s32 Left; + s32 Right; + s32 LFE; + s32 LeftBack; + s32 RightBack; +}; + +struct Stereo51Out32 +{ + s32 Left; + s32 Right; + s32 Center; + s32 LFE; + s32 LeftBack; + s32 RightBack; +}; + +// Developer Note: This is a static class only (all static members). class SndBuffer { +private: + static bool m_underrun_freeze; + static s32 m_predictData; + static float lastPct; + + static StereoOut32* sndTempBuffer; + static StereoOut16* sndTempBuffer16; + + static int sndTempProgress; + static int m_dsp_progress; + static int m_dsp_writepos; + + static int m_timestretch_progress; + static int m_timestretch_writepos; + + static StereoOut32 *m_buffer; + static s32 m_size; + static s32 m_rpos; + static s32 m_wpos; + static s32 m_data; + + static float lastEmergencyAdj; + static float cTempo; + static float eTempo; + static int freezeTempo; + + + static void _InitFail(); + static void _WriteSamples(StereoOut32* bData, int nSamples); + static bool CheckUnderrunStatus( int& nSamples, int& quietSampleCount ); + + static void soundtouchInit(); + static void soundtouchCleanup(); + static void timeStretchWrite(); + static void timeStretchUnderrun(); + static s32 timeStretchOverrun(); + + static void PredictDataWrite( int samples ); + static float GetStatusPct(); + static void UpdateTempoChange(); + public: - virtual ~SndBuffer() {} + static void Init(); + static void Cleanup(); + static void Write( const StereoOut32& Sample ); + static s32 Test(); + static void Configure(HWND parent, u32 module ); + + // Note: When using with 32 bit output buffers, the user of this function is responsible + // for shifting the values to where they need to be manually. The fixed point depth of + // the sample output is determined by the SndOutVolumeShift, which is the number of bits + // to shift right to get a 16 bit result. + template< typename T > + static void ReadSamples( T* bData ) + { + int nSamples = SndOutPacketSize; - virtual void WriteSamples(s32 *buffer, int nSamples)=0; - virtual void PauseOnWrite(bool doPause)=0; + // Problem: + // If the SPU2 gets even the least bit out of sync with the SndOut device, + // the readpos of the circular buffer will overtake the writepos, + // leading to a prolonged period of hopscotching read/write accesses (ie, + // lots of staticy crap sound for several seconds). + // + // Fix: + // If the read position overtakes the write position, abort the + // transfer immediately and force the SndOut driver to wait until + // the read buffer has filled up again before proceeding. + // This will cause one brief hiccup that can never exceed the user's + // set buffer length in duration. - virtual void ReadSamples( s16* bData )=0; - virtual void ReadSamples( s32* bData )=0; + int quietSamples; + if( CheckUnderrunStatus( nSamples, quietSamples ) ) + { + jASSUME( nSamples <= SndOutPacketSize ); - //virtual s32 GetBufferUsage()=0; - //virtual s32 GetBufferSize()=0; + // [Air] [TODO]: This loop is probably a candidate for SSE2 optimization. + + const int endPos = m_rpos + nSamples; + const int secondCopyLen = endPos - m_size; + const StereoOut32* rposbuffer = &m_buffer[m_rpos]; + + m_data -= nSamples; + + if( secondCopyLen > 0 ) + { + nSamples -= secondCopyLen; + for( int i=0; i EffectsEndA ) + { + pos = EffectsStartA + ((ReverbX + offset) % (u32)EffectsBufferSize); + } + else if( pos < EffectsStartA ) + { + pos = EffectsEndA+1 - ((ReverbX + offset) % (u32)EffectsBufferSize ); + } + return pos; +} + +void V_Core::UpdateFeedbackBuffersA() +{ + RevBuffers.FB_SRC_A0 = EffectsBufferIndexer( Revb.MIX_DEST_A0 - Revb.FB_SRC_A ); + RevBuffers.FB_SRC_A1 = EffectsBufferIndexer( Revb.MIX_DEST_A1 - Revb.FB_SRC_A ); +} + +void V_Core::UpdateFeedbackBuffersB() +{ + RevBuffers.FB_SRC_B0 = EffectsBufferIndexer( Revb.MIX_DEST_B0 - Revb.FB_SRC_B ); + RevBuffers.FB_SRC_B1 = EffectsBufferIndexer( Revb.MIX_DEST_B1 - Revb.FB_SRC_B ); +} void V_Core::UpdateEffectsBufferSize() { - EffectsBufferSize = EffectsEndA - EffectsStartA + 1; + ReverbX = 0; + + const s32 newbufsize = EffectsEndA - EffectsStartA + 1; + if( !RevBuffers.NeedsUpdated && newbufsize == EffectsBufferSize ) return; + + RevBuffers.NeedsUpdated = false; + + if( EffectsBufferSize == 0 ) return; + + // Rebuild buffer indexers. + + RevBuffers.ACC_SRC_A0 = EffectsBufferIndexer( Revb.ACC_SRC_A0 ); + RevBuffers.ACC_SRC_A1 = EffectsBufferIndexer( Revb.ACC_SRC_A1 ); + RevBuffers.ACC_SRC_B0 = EffectsBufferIndexer( Revb.ACC_SRC_B0 ); + RevBuffers.ACC_SRC_B1 = EffectsBufferIndexer( Revb.ACC_SRC_B1 ); + RevBuffers.ACC_SRC_C0 = EffectsBufferIndexer( Revb.ACC_SRC_C0 ); + RevBuffers.ACC_SRC_C1 = EffectsBufferIndexer( Revb.ACC_SRC_C1 ); + RevBuffers.ACC_SRC_D0 = EffectsBufferIndexer( Revb.ACC_SRC_D0 ); + RevBuffers.ACC_SRC_D1 = EffectsBufferIndexer( Revb.ACC_SRC_D1 ); + + UpdateFeedbackBuffersA(); + UpdateFeedbackBuffersB(); + + RevBuffers.IIR_DEST_A0 = EffectsBufferIndexer( Revb.IIR_DEST_A0 ); + RevBuffers.IIR_DEST_A1 = EffectsBufferIndexer( Revb.IIR_DEST_A1 ); + RevBuffers.IIR_DEST_B0 = EffectsBufferIndexer( Revb.IIR_DEST_B0 ); + RevBuffers.IIR_DEST_B1 = EffectsBufferIndexer( Revb.IIR_DEST_B1 ); + + RevBuffers.IIR_SRC_A0 = EffectsBufferIndexer( Revb.IIR_SRC_A0 ); + RevBuffers.IIR_SRC_A1 = EffectsBufferIndexer( Revb.IIR_SRC_A1 ); + RevBuffers.IIR_SRC_B0 = EffectsBufferIndexer( Revb.IIR_SRC_B0 ); + RevBuffers.IIR_SRC_B1 = EffectsBufferIndexer( Revb.IIR_SRC_B1 ); + + RevBuffers.MIX_DEST_A0 = EffectsBufferIndexer( Revb.MIX_DEST_A0 ); + RevBuffers.MIX_DEST_A1 = EffectsBufferIndexer( Revb.MIX_DEST_A1 ); + RevBuffers.MIX_DEST_B0 = EffectsBufferIndexer( Revb.MIX_DEST_B0 ); + RevBuffers.MIX_DEST_B1 = EffectsBufferIndexer( Revb.MIX_DEST_B1 ); } void V_Voice::Start() @@ -379,6 +444,11 @@ static s32 GetVol32( u16 src ) return (((s32)src) << 16 ) | ((src<<1) & 0xffff); } +void V_VolumeSlide::RegSet( u16 src ) +{ + Value = GetVol32( src ); +} + void SPU_ps1_write(u32 mem, u16 value) { bool show=true; @@ -393,15 +463,15 @@ void SPU_ps1_write(u32 mem, u16 value) switch(vval) { case 0: //VOLL (Volume L) - Cores[0].Voices[voice].VolumeL.Mode = 0; - Cores[0].Voices[voice].VolumeL.Value = GetVol32( value<<1 ); - Cores[0].Voices[voice].VolumeL.Reg_VOL = value; + Cores[0].Voices[voice].Volume.Left.Mode = 0; + Cores[0].Voices[voice].Volume.Left.RegSet( value << 1 ); + Cores[0].Voices[voice].Volume.Left.Reg_VOL = value; break; case 1: //VOLR (Volume R) - Cores[0].Voices[voice].VolumeR.Mode = 0; - Cores[0].Voices[voice].VolumeR.Value = GetVol32( value<<1 ); - Cores[0].Voices[voice].VolumeR.Reg_VOL = value; + Cores[0].Voices[voice].Volume.Right.Mode = 0; + Cores[0].Voices[voice].Volume.Right.RegSet( value << 1 ); + Cores[0].Voices[voice].Volume.Right.Reg_VOL = value; break; case 2: Cores[0].Voices[voice].Pitch = value; break; @@ -437,19 +507,22 @@ void SPU_ps1_write(u32 mem, u16 value) else switch(reg) { case 0x1d80:// Mainvolume left - Cores[0].MasterL.Mode = 0; - Cores[0].MasterL.Value = GetVol32( value ); - break; + Cores[0].MasterVol.Left.Mode = 0; + Cores[0].MasterVol.Left.RegSet( value ); + break; + case 0x1d82:// Mainvolume right - Cores[0].MasterL.Mode = 0; - Cores[0].MasterR.Value = GetVol32( value ); - break; + Cores[0].MasterVol.Right.Mode = 0; + Cores[0].MasterVol.Right.RegSet( value ); + break; + case 0x1d84:// Reverberation depth left - Cores[0].FxL = GetVol32( value ); - break; + Cores[0].FxVol.Left = GetVol32( value ); + break; + case 0x1d86:// Reverberation depth right - Cores[0].FxR = GetVol32( value ); - break; + Cores[0].FxVol.Right = GetVol32( value ); + break; case 0x1d88:// Voice ON (0-15) SPU2_FastWrite(REG_S_KON,value); @@ -463,65 +536,74 @@ void SPU_ps1_write(u32 mem, u16 value) break; case 0x1d8e:// Voice OFF (16-23) SPU2_FastWrite(REG_S_KOFF+2,value); - break; + break; case 0x1d90:// Channel FM (pitch lfo) mode (0-15) SPU2_FastWrite(REG_S_PMON,value); - break; + break; + case 0x1d92:// Channel FM (pitch lfo) mode (16-23) SPU2_FastWrite(REG_S_PMON+2,value); - break; + break; case 0x1d94:// Channel Noise mode (0-15) SPU2_FastWrite(REG_S_NON,value); - break; + break; + case 0x1d96:// Channel Noise mode (16-23) SPU2_FastWrite(REG_S_NON+2,value); - break; + break; case 0x1d98:// Channel Reverb mode (0-15) SPU2_FastWrite(REG_S_VMIXEL,value); SPU2_FastWrite(REG_S_VMIXER,value); - break; + break; + case 0x1d9a:// Channel Reverb mode (16-23) SPU2_FastWrite(REG_S_VMIXEL+2,value); SPU2_FastWrite(REG_S_VMIXER+2,value); - break; + break; + case 0x1d9c:// Channel Reverb mode (0-15) SPU2_FastWrite(REG_S_VMIXL,value); SPU2_FastWrite(REG_S_VMIXR,value); - break; + break; + case 0x1d9e:// Channel Reverb mode (16-23) SPU2_FastWrite(REG_S_VMIXL+2,value); SPU2_FastWrite(REG_S_VMIXR+2,value); - break; + break; case 0x1da2:// Reverb work area start - { - u32 val=(u32)value <<8; + { + u32 val = (u32)value << 8; - SPU2_FastWrite(REG_A_ESA, val&0xFFFF); - SPU2_FastWrite(REG_A_ESA+2,val>>16); - } - break; + SPU2_FastWrite(REG_A_ESA, val&0xFFFF); + SPU2_FastWrite(REG_A_ESA+2,val>>16); + } + break; + case 0x1da4: Cores[0].IRQA=(u32)value<<8; - break; + break; + case 0x1da6: Cores[0].TSA=(u32)value<<8; - break; + break; case 0x1daa: SPU2_FastWrite(REG_C_ATTR,value); - break; + break; + case 0x1dae: SPU2_FastWrite(REG_P_STATX,value); - break; + break; + case 0x1da8:// Spu Write to Memory DmaWrite(0,value); show=false; - break; + break; } if(show) FileLog("[%10d] (!) SPU write mem %08x value %04x\n",Cycles,mem,value); @@ -546,27 +628,31 @@ u16 SPU_ps1_read(u32 mem) case 0: //VOLL (Volume L) //value=Cores[0].Voices[voice].VolumeL.Mode; //value=Cores[0].Voices[voice].VolumeL.Value; - value=Cores[0].Voices[voice].VolumeL.Reg_VOL; break; + value = Cores[0].Voices[voice].Volume.Left.Reg_VOL; + break; + case 1: //VOLR (Volume R) //value=Cores[0].Voices[voice].VolumeR.Mode; //value=Cores[0].Voices[voice].VolumeR.Value; - value=Cores[0].Voices[voice].VolumeR.Reg_VOL; break; - case 2: value=Cores[0].Voices[voice].Pitch; break; - case 3: value=Cores[0].Voices[voice].StartA; break; - case 4: value=Cores[0].Voices[voice].ADSR.Reg_ADSR1; break; - case 5: value=Cores[0].Voices[voice].ADSR.Reg_ADSR2; break; - case 6: value=Cores[0].Voices[voice].ADSR.Value >> 16; break; - case 7: value=Cores[0].Voices[voice].LoopStartA; break; + value = Cores[0].Voices[voice].Volume.Right.Reg_VOL; + break; + + case 2: value = Cores[0].Voices[voice].Pitch; break; + case 3: value = Cores[0].Voices[voice].StartA; break; + case 4: value = Cores[0].Voices[voice].ADSR.Reg_ADSR1; break; + case 5: value = Cores[0].Voices[voice].ADSR.Reg_ADSR2; break; + case 6: value = Cores[0].Voices[voice].ADSR.Value >> 16; break; + case 7: value = Cores[0].Voices[voice].LoopStartA; break; jNO_DEFAULT; } } else switch(reg) { - case 0x1d80: value = Cores[0].MasterL.Value>>16; break; - case 0x1d82: value = Cores[0].MasterR.Value>>16; break; - case 0x1d84: value = Cores[0].FxL>>16; break; - case 0x1d86: value = Cores[0].FxR>>16; break; + case 0x1d80: value = Cores[0].MasterVol.Left.Value >> 16; break; + case 0x1d82: value = Cores[0].MasterVol.Right.Value >> 16; break; + case 0x1d84: value = Cores[0].FxVol.Left >> 16; break; + case 0x1d86: value = Cores[0].FxVol.Right >> 16; break; case 0x1d88: value = 0; break; case 0x1d8a: value = 0; break; @@ -585,8 +671,11 @@ u16 SPU_ps1_read(u32 mem) case 0x1d9e: value = Cores[0].Regs.VMIXL>>16; break; case 0x1da2: - value = Cores[0].EffectsStartA>>3; - Cores[0].UpdateEffectsBufferSize(); + if( value != Cores[0].EffectsStartA>>3 ) + { + value = Cores[0].EffectsStartA>>3; + Cores[0].UpdateEffectsBufferSize(); + } break; case 0x1da4: value = Cores[0].IRQA>>3; break; case 0x1da6: value = Cores[0].TSA>>3; break; @@ -607,15 +696,49 @@ u16 SPU_ps1_read(u32 mem) return value; } -static u32 SetLoWord( u32 var, u16 writeval ) +// Ah the joys of endian-specific code! :D +static __forceinline u32 SetHiWord( u32& src, u16 value ) { - return (var & 0xFFFF0000) | writeval; + ((u16*)&src)[1] = value; + return src; } - -static u32 SetHiWord( u32 var, u16 writeval ) +static __forceinline u32 SetLoWord( u32& src, u16 value ) { - return (var & 0x0000FFFF) | (writeval<<16); + ((u16*)&src)[0] = value; + return src; +} + +static __forceinline s32 SetHiWord( s32& src, u16 value ) +{ + ((u16*)&src)[1] = value; + return src; +} + +static __forceinline s32 SetLoWord( s32& src, u16 value ) +{ + ((u16*)&src)[0] = value; + return src; +} + +static __forceinline u16 GetHiWord( u32& src ) +{ + return ((u16*)&src)[1]; +} + +static __forceinline u16 GetLoWord( u32& src ) +{ + return ((u16*)&src)[0]; +} + +static __forceinline u16 GetHiWord( s32& src ) +{ + return ((u16*)&src)[1]; +} + +static __forceinline u16 GetLoWord( s32& src ) +{ + return ((u16*)&src)[0]; } __forceinline void SPU2_FastWrite( u32 rmem, u16 value ) @@ -637,7 +760,9 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value ) case 0: //VOLL (Volume L) case 1: //VOLR (Volume R) { - V_Volume& thisvol = (param==0) ? thisvoice.VolumeL : thisvoice.VolumeR; + V_VolumeSlide& thisvol = (param==0) ? thisvoice.Volume.Left : thisvoice.Volume.Right; + thisvol.Reg_VOL = value; + if (value & 0x8000) // +Lin/-Lin/+Exp/-Exp { thisvol.Mode = (value & 0xF000)>>12; @@ -649,11 +774,10 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value ) // Volumes range from 0x3fff to 0x7fff, with 0x4000 serving as // the "sign" bit, so a simple bitwise extension will do the trick: - thisvol.Value = GetVol32( value<<1 ); + thisvol.RegSet( value<<1 ); thisvol.Mode = 0; thisvol.Increment = 0; } - thisvol.Reg_VOL = value; } break; @@ -677,8 +801,8 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value ) ConLog( "* SPU2: Mysterious ADSR Volume Set to 0x%x", value ); break; - case 6: thisvoice.VolumeL.Value = GetVol32( value ); break; - case 7: thisvoice.VolumeR.Value = GetVol32( value ); break; + case 6: thisvoice.Volume.Left.RegSet( value ); break; + case 7: thisvoice.Volume.Right.RegSet( value ); break; jNO_DEFAULT; } @@ -727,6 +851,15 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value ) *(regtable[mem>>1]) = value; UpdateSpdifMode(); } + else if( mem >= R_FB_SRC_A && mem < REG_A_EEA ) + { + // Signal to the Reverb code that the effects buffers need to be re-aligned. + // This is both simple, efficient, and safe, since we only want to re-align + // buffers after both hi and lo words have been written. + + *(regtable[mem>>1]) = value; + Cores[core].RevBuffers.NeedsUpdated = true; + } else { switch(omem) @@ -783,22 +916,22 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value ) case REG_S_PMON: vx=2; for (vc=1;vc<16;vc++) { Cores[core].Voices[vc].Modulated=(s8)((value & vx)/vx); vx<<=1; } - Cores[core].Regs.PMON = SetLoWord( Cores[core].Regs.PMON, value ); + SetLoWord( Cores[core].Regs.PMON, value ); break; case (REG_S_PMON + 2): vx=1; for (vc=16;vc<24;vc++) { Cores[core].Voices[vc].Modulated=(s8)((value & vx)/vx); vx<<=1; } - Cores[core].Regs.PMON = SetHiWord( Cores[core].Regs.PMON, value ); + SetHiWord( Cores[core].Regs.PMON, value ); break; case REG_S_NON: vx=1; for (vc=0;vc<16;vc++) { Cores[core].Voices[vc].Noise=(s8)((value & vx)/vx); vx<<=1; } - Cores[core].Regs.NON = SetLoWord( Cores[core].Regs.NON, value ); + SetLoWord( Cores[core].Regs.NON, value ); break; case (REG_S_NON + 2): vx=1; for (vc=16;vc<24;vc++) { Cores[core].Voices[vc].Noise=(s8)((value & vx)/vx); vx<<=1; } - Cores[core].Regs.NON = SetHiWord( Cores[core].Regs.NON, value ); + SetHiWord( Cores[core].Regs.NON, value ); break; // Games like to repeatedly write these regs over and over with the same value, hence @@ -895,26 +1028,23 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value ) // Reverb Start and End Address Writes! // * Yes, these are backwards from all the volumes -- the hiword comes FIRST (wtf!) - // * End position is a hiword only! Lowword is always ffff. + // * End position is a hiword only! Loword is always ffff. // * The Reverb buffer position resets on writes to StartA. It probably resets // on writes to End too. Docs don't say, but they're for PSX, which couldn't // change the end address anyway. case REG_A_ESA: - Cores[core].EffectsStartA = (Cores[core].EffectsStartA & 0x0000FFFF) | (value<<16); - Cores[core].ReverbX = 0; + SetHiWord( Cores[core].EffectsStartA, value ); Cores[core].UpdateEffectsBufferSize(); break; case (REG_A_ESA + 2): - Cores[core].EffectsStartA = (Cores[core].EffectsStartA & 0xFFFF0000) | value; - Cores[core].ReverbX = 0; + SetLoWord( Cores[core].EffectsStartA, value ); Cores[core].UpdateEffectsBufferSize(); break; case REG_A_EEA: Cores[core].EffectsEndA = ((u32)value<<16) | 0xFFFF; - Cores[core].ReverbX = 0; Cores[core].UpdateEffectsBufferSize(); break; @@ -923,7 +1053,7 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value ) case REG_P_MVOLL: case REG_P_MVOLR: { - V_Volume& thisvol = (omem==REG_P_MVOLL) ? Cores[core].MasterL : Cores[core].MasterR; + V_VolumeSlide& thisvol = (omem==REG_P_MVOLL) ? Cores[core].MasterVol.Left : Cores[core].MasterVol.Right; if( value & 0x8000 ) // +Lin/-Lin/+Exp/-Exp { @@ -945,27 +1075,27 @@ __forceinline void SPU2_FastWrite( u32 rmem, u16 value ) break; case REG_P_EVOLL: - Cores[core].FxL = GetVol32( value ); + Cores[core].FxVol.Left = GetVol32( value ); break; case REG_P_EVOLR: - Cores[core].FxR = GetVol32( value ); + Cores[core].FxVol.Right = GetVol32( value ); break; case REG_P_AVOLL: - Cores[core].ExtL = GetVol32( value ); + Cores[core].ExtVol.Left = GetVol32( value ); break; case REG_P_AVOLR: - Cores[core].ExtR = GetVol32( value ); + Cores[core].ExtVol.Right = GetVol32( value ); break; case REG_P_BVOLL: - Cores[core].InpL = GetVol32( value ); + Cores[core].InpVol.Left = GetVol32( value ); break; case REG_P_BVOLR: - Cores[core].InpR = GetVol32( value ); + Cores[core].InpVol.Right = GetVol32( value ); break; case REG_S_ADMAS: @@ -1012,7 +1142,7 @@ void StartVoices(int core, u32 value) (thisvc.WetL)?"+":"-",(thisvc.WetR)?"+":"-", *(u8*)GetMemPtr(thisvc.StartA),*(u8 *)GetMemPtr((thisvc.StartA)+1), thisvc.Pitch, - thisvc.VolumeL.Value,thisvc.VolumeR.Value, + thisvc.Volume.Left.Value,thisvc.Volume.Right.Value, thisvc.ADSR.Reg_ADSR1,thisvc.ADSR.Reg_ADSR2); } } diff --git a/plugins/spu2-x/src/Spu2.h b/plugins/spu2-x/src/Spu2.h index 992a8c1297..8b21f6a9ef 100644 --- a/plugins/spu2-x/src/Spu2.h +++ b/plugins/spu2-x/src/Spu2.h @@ -182,21 +182,25 @@ extern void DspUpdate(); // to let the Dsp process window messages extern void RecordStart(); extern void RecordStop(); -extern void RecordWrite(s16 left, s16 right); +extern void RecordWrite( const StereoOut16& sample ); extern void UpdateSpdifMode(); extern void LowPassFilterInit(); extern void InitADSR(); extern void CalculateADSR( V_Voice& vc ); +extern void __fastcall ReadInput( V_Core& thiscore, StereoOut32& PData ); + + ////////////////////////////// // The Mixer Section // ////////////////////////////// extern void Mix(); -extern s32 clamp_mix(s32 x, u8 bitshift=0); +extern s32 clamp_mix( s32 x, u8 bitshift=0 ); +extern void clamp_mix( StereoOut32& sample, u8 bitshift=0 ); extern void Reverb_AdvanceBuffer( V_Core& thiscore ); -extern void DoReverb( V_Core& thiscore, s32& OutL, s32& OutR, s32 InL, s32 InR); +extern StereoOut32 DoReverb( V_Core& thiscore, const StereoOut32& Input ); extern s32 MulShr32( s32 srcval, s32 mulval ); //#define PCM24_S1_INTERLEAVE diff --git a/plugins/spu2-x/src/Timestretcher.cpp b/plugins/spu2-x/src/Timestretcher.cpp new file mode 100644 index 0000000000..e82d46165a --- /dev/null +++ b/plugins/spu2-x/src/Timestretcher.cpp @@ -0,0 +1,333 @@ +/* SPU2-X, A plugin for Emulating the Sound Processing Unit of the Playstation 2 +* Developed and maintained by the Pcsx2 Development Team. +* +* Original portions from SPU2ghz are (c) 2008 by David Quintana [gigaherz] +* +* This library is free software; you can redistribute it and/or modify it under +* the terms of the GNU Lesser General Public License as published by the Free +* Software Foundation; either version 2.1 of the the License, or (at your +* option) any later version. +* +* This library is distributed in the hope that it will be useful, but WITHOUT +* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +* FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +* for more details. +* +* You should have received a copy of the GNU Lesser General Public License along +* with this library; if not, write to the Free Software Foundation, Inc., 59 +* Temple Place, Suite 330, Boston, MA 02111-1307 USA +* +*/ + +#include "spu2.h" +#include "SoundTouch/SoundTouch.h" +#include "SoundTouch/WavFile.h" + + +static soundtouch::SoundTouch* pSoundTouch = NULL; +static int ts_stats_stretchblocks = 0; +static int ts_stats_normalblocks = 0; +static int ts_stats_logcounter = 0; + + +// data prediction amount, used to "commit" data that hasn't +// finished timestretch processing. +s32 SndBuffer::m_predictData; + +// records last buffer status (fill %, range -100 to 100, with 0 being 50% full) +float SndBuffer::lastPct; +float SndBuffer::lastEmergencyAdj; + +float SndBuffer::cTempo = 1; +float SndBuffer::eTempo = 1; +int SndBuffer::freezeTempo = 0; + +void SndBuffer::PredictDataWrite( int samples ) +{ + m_predictData += samples; +} + +// Calculate the buffer status percentage. +// Returns range from -1.0 to 1.0 +// 1.0 = buffer overflow! +// 0.0 = buffer nominal (50% full) +// -1.0 = buffer underflow! +float SndBuffer::GetStatusPct() +{ + // Get the buffer status of the output driver too, so that we can + // obtain a more accurate overall buffer status. + + int drvempty = mods[OutputModule]->GetEmptySampleCount(); // / 2; + + //ConLog( "Data %d >>> driver: %d predict: %d\n", data, drvempty, predictData ); + + float result = (float)(m_data + m_predictData - drvempty) - (m_size/2); + result /= (m_size/2); + return result; +} + +void SndBuffer::UpdateTempoChange() +{ + if( --freezeTempo > 0 ) + { + return; + } + + float statusPct = GetStatusPct(); + float pctChange = statusPct - lastPct; + + float tempoChange; + float emergencyAdj = 0; + float newcee = cTempo; // workspace var. for cTempo + + // IMPORTANT! + // If you plan to tweak these values, make sure you're using a release build + // OUTSIDE THE DEBUGGER to test it! The Visual Studio debugger can really cause + // erratic behavior in the audio buffers, and makes the timestretcher seem a + // lot more inconsistent than it really is. + + // We have two factors. + // * Distance from nominal buffer status (50% full) + // * The change from previous update to this update. + + // Prediction based on the buffer change: + // (linear seems to work better here) + + tempoChange = pctChange * 0.75f; + + if( statusPct * tempoChange < 0.0f ) + { + // only apply tempo change if it is in synch with the buffer status. + // In other words, if the buffer is high (over 0%), and is decreasing, + // ignore it. It'll just muck things up. + + tempoChange = 0; + } + + // Sudden spikes in framerate can cause the nominal buffer status + // to go critical, in which case we have to enact an emergency + // stretch. The following cubic formulas do that. Values near + // the extremeites give much larger results than those near 0. + // And the value is added only this time, and does not accumulate. + // (otherwise a large value like this would cause problems down the road) + + // Constants: + // Weight - weights the statusPct's "emergency" consideration. + // higher values here will make the buffer perform more drastic + // compensations at the outer edges of the buffer (at -75 or +75% + // or beyond, for example). + + // Range - scales the adjustment to the given range (more or less). + // The actual range is dependent on the weight used, so if you increase + // Weight you'll usually want to decrease Range somewhat to compensate. + + // Prediction based on the buffer fill status: + + const float statusWeight = 2.99f; + const float statusRange = 0.068f; + + // "non-emergency" deadzone: In this area stretching will be strongly discouraged. + // Note: due tot he nature of timestretch latency, it's always a wee bit harder to + // cope with low fps (underruns) tha it is high fps (overruns). So to help out a + // little, the low-end portions of this check are less forgiving than the high-sides. + + if( cTempo < 0.965f || cTempo > 1.060f || + pctChange < -0.38f || pctChange > 0.54f || + statusPct < -0.32f || statusPct > 0.39f || + eTempo < 0.89f || eTempo > 1.19f ) + { + emergencyAdj = ( pow( statusPct*statusWeight, 3.0f ) * statusRange); + } + + // Smooth things out by factoring our previous adjustment into this one. + // It helps make the system 'feel' a little smarter by giving it at least + // one packet worth of history to help work off of: + + emergencyAdj = (emergencyAdj * 0.75f) + (lastEmergencyAdj * 0.25f ); + + lastEmergencyAdj = emergencyAdj; + lastPct = statusPct; + + // Accumulate a fraction of the tempo change into the tempo itself. + // This helps the system run "smarter" to games that run consistently + // fast or slow by altering the base tempo to something closer to the + // game's active speed. In tests most games normalize within 2 seconds + // at 100ms latency, which is pretty good (larger buffers normalize even + // quicker). + + newcee += newcee * (tempoChange+emergencyAdj) * 0.03f; + + // Apply tempoChange as a scale of cTempo. That way the effect is proportional + // to the current tempo. (otherwise tempos rate of change at the extremes would + // be too drastic) + + float newTempo = newcee + ( emergencyAdj * cTempo ); + + // ... and as a final optimization, only stretch if the new tempo is outside + // a nominal threshold. Keep this threshold check small, because it could + // cause some serious side effects otherwise. (enlarging the cTempo check above + // is usually better/safer) + if( newTempo < 0.970f || newTempo > 1.045f ) + { + cTempo = (float)newcee; + + if( newTempo < 0.10f ) newTempo = 0.10f; + else if( newTempo > 10.0f ) newTempo = 10.0f; + + if( cTempo < 0.15f ) cTempo = 0.15f; + else if( cTempo > 7.5f ) cTempo = 7.5f; + + pSoundTouch->setTempo( eTempo = (float)newTempo ); + ts_stats_stretchblocks++; + + /*ConLog(" * SPU2: [Nominal %d%%] [Emergency: %d%%] (baseTempo: %d%% ) (newTempo: %d%%) (buffer: %d%%)\n", + //(relation < 0.0) ? "Normalize" : "", + (int)(tempoChange * 100.0 * 0.03), + (int)(emergencyAdj * 100.0), + (int)(cTempo * 100.0), + (int)(newTempo * 100.0), + (int)(statusPct * 100.0) + );*/ + } + else + { + // Nominal operation -- turn off stretching. + // note: eTempo 'slides' toward 1.0 for smoother audio and better + // protection against spikes. + if( cTempo != 1.0f ) + { + cTempo = 1.0f; + eTempo = ( 1.0f + eTempo ) * 0.5f; + pSoundTouch->setTempo( eTempo ); + } + else + { + if( eTempo != cTempo ) + pSoundTouch->setTempo( eTempo=cTempo ); + ts_stats_normalblocks++; + } + } +} + +void SndBuffer::timeStretchUnderrun() +{ + // timeStretcher failed it's job. We need to slow down the audio some. + + cTempo -= (cTempo * 0.12f); + eTempo -= (eTempo * 0.30f); + if( eTempo < 0.1f ) eTempo = 0.1f; + pSoundTouch->setTempo( eTempo ); +} + +s32 SndBuffer::timeStretchOverrun() +{ + // If we overran it means the timestretcher failed. We need to speed + // up audio playback. + cTempo += cTempo * 0.12f; + eTempo += eTempo * 0.40f; + if( eTempo > 7.5f ) eTempo = 7.5f; + pSoundTouch->setTempo( eTempo ); + + // Throw out just a little bit (two packets worth) to help + // give the TS some room to work: + + return SndOutPacketSize*2; +} + +static void CvtPacketToFloat( StereoOut32* srcdest ) +{ + StereoOutFloat* dest = (StereoOutFloat*)srcdest; + const StereoOut32* src = (StereoOut32*)srcdest; + for( uint i=0; iputSamples( (float*)sndTempBuffer, SndOutPacketSize ); + + int tempProgress; + while( tempProgress = pSoundTouch->receiveSamples( (float*)sndTempBuffer, SndOutPacketSize), + tempProgress != 0 ) + { + // Hint: It's assumed that pSoundTouch will return chunks of 128 bytes (it always does as + // long as the SSE optimizations are enabled), which means we can do our own SSE opts here. + + CvtPacketToInt( sndTempBuffer, tempProgress ); + _WriteSamples( sndTempBuffer, tempProgress ); + progress = true; + } + + UpdateTempoChange(); + + if( MsgOverruns() ) + { + if( progress ) + { + if( ++ts_stats_logcounter > 300 ) + { + ts_stats_logcounter = 0; + ConLog( " * SPU2 > Timestretch Stats > %d%% of packets stretched.\n", + ( ts_stats_stretchblocks * 100 ) / ( ts_stats_normalblocks + ts_stats_stretchblocks ) ); + ts_stats_normalblocks = 0; + ts_stats_stretchblocks = 0; + } + } + } +} + +void SndBuffer::soundtouchInit() +{ + pSoundTouch = new soundtouch::SoundTouch(); + pSoundTouch->setSampleRate(SampleRate); + pSoundTouch->setChannels(2); + + pSoundTouch->setSetting( SETTING_USE_QUICKSEEK, 0 ); + pSoundTouch->setSetting( SETTING_USE_AA_FILTER, 0 ); + + pSoundTouch->setSetting( SETTING_SEQUENCE_MS, SoundtouchCfg::SequenceLenMS ); + pSoundTouch->setSetting( SETTING_SEEKWINDOW_MS, SoundtouchCfg::SeekWindowMS ); + pSoundTouch->setSetting( SETTING_OVERLAP_MS, SoundtouchCfg::OverlapMS ); + + pSoundTouch->setTempo(1); + + // some timestretch management vars: + + cTempo = 1.0; + eTempo = 1.0; + lastPct = 0; + lastEmergencyAdj = 0; + + // just freeze tempo changes for a while at startup. + // the driver buffers are bogus anyway. + freezeTempo = 8; + m_predictData = 0; +} + +void SndBuffer::soundtouchCleanup() +{ + SAFE_DELETE_OBJ( pSoundTouch ); +} diff --git a/plugins/spu2-x/src/Wavedump_wav.cpp b/plugins/spu2-x/src/Wavedump_wav.cpp index b3f8d225dd..c157328942 100644 --- a/plugins/spu2-x/src/Wavedump_wav.cpp +++ b/plugins/spu2-x/src/Wavedump_wav.cpp @@ -83,14 +83,16 @@ namespace WaveDump } } - void WriteCore( uint coreidx, CoreSourceType src, s16 left, s16 right ) + void WriteCore( uint coreidx, CoreSourceType src, const StereoOut16& sample ) { if( !IsDevBuild ) return; if( m_CoreWav[coreidx][src] != NULL ) - { - s16 buffer[2] = { left, right }; - m_CoreWav[coreidx][src]->write( buffer, 2 ); - } + m_CoreWav[coreidx][src]->write( (s16*)&sample, 2 ); + } + + void WriteCore( uint coreidx, CoreSourceType src, s16 left, s16 right ) + { + WriteCore( coreidx, src, StereoOut16( left, right ) ); } } @@ -116,10 +118,8 @@ void RecordStop() SAFE_DELETE_OBJ( m_wavrecord ); } -void RecordWrite(s16 left, s16 right) +void RecordWrite( const StereoOut16& sample ) { if( m_wavrecord == NULL ) return; - - s16 buffer[2] = { left, right }; - m_wavrecord->write( buffer, 2 ); + m_wavrecord->write( (s16*)&sample, 2 ); } diff --git a/plugins/spu2-x/src/Win32/Config.cpp b/plugins/spu2-x/src/Win32/Config.cpp index 8b92f151e5..5e36fe52d8 100644 --- a/plugins/spu2-x/src/Win32/Config.cpp +++ b/plugins/spu2-x/src/Win32/Config.cpp @@ -33,30 +33,32 @@ static const int LATENCY_MIN = 40; int AutoDMAPlayRate[2] = {0,0}; // MIXING -int Interpolation=1; +int Interpolation = 1; /* values: 0: no interpolation (use nearest) 1. linear interpolation 2. cubic interpolation */ -bool EffectsDisabled=false; +bool EffectsDisabled = false; // OUTPUT -int SndOutLatencyMS=160; -bool timeStretchDisabled=false; +int SndOutLatencyMS = 160; +bool timeStretchDisabled = false; -u32 OutputModule=0; //OUTPUT_DSOUND; +u32 OutputModule = 0; CONFIG_DSOUNDOUT Config_DSoundOut; CONFIG_WAVEOUT Config_WaveOut; CONFIG_XAUDIO2 Config_XAudio2; // DSP -bool dspPluginEnabled=false; -int dspPluginModule=0; +bool dspPluginEnabled = false; +int dspPluginModule = 0; wchar_t dspPlugin[256]; +bool StereoExpansionDisabled = true; + /*****************************************************************************/ void ReadSettings() @@ -69,7 +71,8 @@ void ReadSettings() timeStretchDisabled = CfgReadBool( _T("OUTPUT"), _T("Disable_Timestretch"), false ); EffectsDisabled = CfgReadBool( _T("MIXING"), _T("Disable_Effects"), false ); - SndOutLatencyMS=CfgReadInt(_T("OUTPUT"),_T("Latency"), 160); + StereoExpansionDisabled = CfgReadBool( _T("OUTPUT"), _T("Disable_StereoExpansion"), false ); + SndOutLatencyMS = CfgReadInt(_T("OUTPUT"),_T("Latency"), 160); wchar_t omodid[128]; CfgReadStr( _T("OUTPUT"), _T("Output_Module"), omodid, 127, XAudio2Out->GetIdent() ); @@ -118,9 +121,10 @@ void WriteSettings() CfgWriteBool(_T("MIXING"),_T("Disable_Effects"),EffectsDisabled); - CfgWriteStr(_T("OUTPUT"),_T("Output_Module"),mods[OutputModule]->GetIdent() ); - CfgWriteInt(_T("OUTPUT"),_T("Latency"),SndOutLatencyMS); - CfgWriteBool(_T("OUTPUT"),_T("Disable_Timestretch"),timeStretchDisabled); + CfgWriteStr(_T("OUTPUT"),_T("Output_Module"), mods[OutputModule]->GetIdent() ); + CfgWriteInt(_T("OUTPUT"),_T("Latency"), SndOutLatencyMS); + CfgWriteBool(_T("OUTPUT"),_T("Disable_Timestretch"), timeStretchDisabled); + CfgWriteBool(_T("OUTPUT"),_T("Disable_StereoExpansion"), StereoExpansionDisabled); if( Config_DSoundOut.Device.empty() ) Config_DSoundOut.Device = _T("default"); if( Config_WaveOut.Device.empty() ) Config_WaveOut.Device = _T("default"); @@ -181,6 +185,7 @@ BOOL CALLBACK ConfigProc(HWND hWnd,UINT uMsg,WPARAM wParam,LPARAM lParam) EnableWindow( GetDlgItem( hWnd, IDC_OPEN_CONFIG_DEBUG ), DebugEnabled ); SET_CHECK(IDC_EFFECTS_DISABLE, EffectsDisabled); + SET_CHECK(IDC_EXPANSION_DISABLE,StereoExpansionDisabled); SET_CHECK(IDC_TS_DISABLE, timeStretchDisabled); SET_CHECK(IDC_DEBUG_ENABLE, DebugEnabled); SET_CHECK(IDC_DSP_ENABLE, dspPluginEnabled); @@ -212,7 +217,7 @@ BOOL CALLBACK ConfigProc(HWND hWnd,UINT uMsg,WPARAM wParam,LPARAM lParam) break; case IDC_OUTCONF: - SndConfigure( hWnd, + SndBuffer::Configure( hWnd, (int)SendMessage(GetDlgItem(hWnd,IDC_OUTPUT),CB_GETCURSEL,0,0) ); break; @@ -234,6 +239,7 @@ BOOL CALLBACK ConfigProc(HWND hWnd,UINT uMsg,WPARAM wParam,LPARAM lParam) HANDLE_CHECK(IDC_EFFECTS_DISABLE,EffectsDisabled); HANDLE_CHECK(IDC_DSP_ENABLE,dspPluginEnabled); + HANDLE_CHECK(IDC_EXPANSION_DISABLE,StereoExpansionDisabled); HANDLE_CHECKNB(IDC_TS_DISABLE,timeStretchDisabled); EnableWindow( GetDlgItem( hWnd, IDC_OPEN_CONFIG_SOUNDTOUCH ), !timeStretchDisabled ); break; diff --git a/plugins/spu2-x/src/Win32/Config.h b/plugins/spu2-x/src/Win32/Config.h index a662bc1880..57c6ad734e 100644 --- a/plugins/spu2-x/src/Win32/Config.h +++ b/plugins/spu2-x/src/Win32/Config.h @@ -82,6 +82,7 @@ extern int dspPluginModule; extern bool dspPluginEnabled; extern bool timeStretchDisabled; +extern bool StereoExpansionDisabled; class SoundtouchCfg { @@ -120,12 +121,9 @@ struct CONFIG_XAUDIO2 std::wstring Device; s8 NumBuffers; - bool ExpandTo51; - CONFIG_XAUDIO2() : Device(), - NumBuffers( 2 ), - ExpandTo51( true ) + NumBuffers( 2 ) { } }; diff --git a/plugins/spu2-x/src/Win32/RealtimeDebugger.cpp b/plugins/spu2-x/src/Win32/RealtimeDebugger.cpp index c2fdda1a47..d3243bd714 100644 --- a/plugins/spu2-x/src/Win32/RealtimeDebugger.cpp +++ b/plugins/spu2-x/src/Win32/RealtimeDebugger.cpp @@ -144,8 +144,8 @@ void UpdateDebugDialog() SetDCBrushColor (hdc,RGB( 0,255, 0)); - int vl = abs(((vc.VolumeL.Value >> 16) * 24) >> 15); - int vr = abs(((vc.VolumeR.Value >> 16) * 24) >> 15); + int vl = abs(((vc.Volume.Left.Value >> 16) * 24) >> 15); + int vr = abs(((vc.Volume.Right.Value >> 16) * 24) >> 15); FillRectangle(hdc,IX+38,IY+26 - vl, 4, vl); FillRectangle(hdc,IX+42,IY+26 - vr, 4, vr); diff --git a/plugins/spu2-x/src/Win32/SndOut_DSound.cpp b/plugins/spu2-x/src/Win32/SndOut_DSound.cpp index 4c55c053d2..86c72f2320 100644 --- a/plugins/spu2-x/src/Win32/SndOut_DSound.cpp +++ b/plugins/spu2-x/src/Win32/SndOut_DSound.cpp @@ -23,6 +23,7 @@ #include "spu2.h" #include "dialogs.h" +#define DIRECTSOUND_VERSION 0x1000 #include static ds_device_data devices[32]; @@ -37,7 +38,6 @@ private: static const int PacketsPerBuffer = 1; static const int BufferSize = SndOutPacketSize * PacketsPerBuffer; - static const int BufferSizeBytes = BufferSize << 1; u32 numBuffers; // cached copy of our configuration setting. @@ -57,25 +57,26 @@ private: HANDLE waitEvent; - SndBuffer *buff; - - static DWORD CALLBACK RThread(DSound*obj) + template< typename T > + static DWORD CALLBACK RThread( DSound* obj ) { - return obj->Thread(); + return obj->Thread(); } + template< typename T > DWORD CALLBACK Thread() { + static const int BufferSizeBytes = BufferSize * sizeof( T ); while( dsound_running ) { u32 rv = WaitForMultipleObjects(numBuffers,buffer_events,FALSE,200); - s16* p1, *oldp1; + T* p1, *oldp1; LPVOID p2; DWORD s1,s2; - u32 poffset=BufferSizeBytes * rv; + u32 poffset = BufferSizeBytes * rv; if( FAILED(buffer->Lock(poffset,BufferSizeBytes,(LPVOID*)&p1,&s1,&p2,&s2,0) ) ) { @@ -86,9 +87,9 @@ private: oldp1 = p1; for(int p=0; pReadSamples( p1 ); + SndBuffer::ReadSamples( p1 ); - buffer->Unlock(oldp1,s1,p2,s2); + buffer->Unlock( oldp1, s1, p2, s2 ); // Set the write pointer to the beginning of the next block. myLastWrite = (poffset + BufferSizeBytes) & ~BufferSizeBytes; @@ -97,9 +98,8 @@ private: } public: - s32 Init(SndBuffer *sb) + s32 Init() { - buff = sb; numBuffers = Config_DSoundOut.NumBuffers; // @@ -130,37 +130,46 @@ public: if( FAILED(dsound->SetCooperativeLevel(GetDesktopWindow(),DSSCL_PRIORITY)) ) throw std::runtime_error( "DirectSound Error: Cooperative level could not be set." ); + // Determine the user's speaker configuration, and select an expansion option as needed. + // FAIL : Directsound doesn't appear to support audio expansion >_< + + DWORD speakerConfig = 2; + //dsound->GetSpeakerConfig( &speakerConfig ); + IDirectSoundBuffer* buffer_; DSBUFFERDESC desc; // Set up WAV format structure. memset(&wfx, 0, sizeof(WAVEFORMATEX)); - wfx.wFormatTag = WAVE_FORMAT_PCM; - wfx.nSamplesPerSec = SampleRate; - wfx.nChannels=2; - wfx.wBitsPerSample = 16; - wfx.nBlockAlign = 2*2; - wfx.nAvgBytesPerSec = SampleRate * wfx.nBlockAlign; - wfx.cbSize=0; + wfx.wFormatTag = WAVE_FORMAT_PCM; + wfx.nSamplesPerSec = SampleRate; + wfx.nChannels = speakerConfig; + wfx.wBitsPerSample = 16; + wfx.nBlockAlign = 2*speakerConfig; + wfx.nAvgBytesPerSec = SampleRate * wfx.nBlockAlign; + wfx.cbSize = 0; + + uint BufferSizeBytes = BufferSize * wfx.nBlockAlign; // Set up DSBUFFERDESC structure. memset(&desc, 0, sizeof(DSBUFFERDESC)); desc.dwSize = sizeof(DSBUFFERDESC); desc.dwFlags = DSBCAPS_GETCURRENTPOSITION2 | DSBCAPS_CTRLPOSITIONNOTIFY;// _CTRLPAN | DSBCAPS_CTRLVOLUME | DSBCAPS_CTRLFREQUENCY; - desc.dwBufferBytes = BufferSizeBytes * numBuffers; - desc.lpwfxFormat = &wfx; + desc.dwBufferBytes = BufferSizeBytes * numBuffers; + desc.lpwfxFormat = &wfx; desc.dwFlags |= DSBCAPS_LOCSOFTWARE; desc.dwFlags |= DSBCAPS_GLOBALFOCUS; - if( FAILED(dsound->CreateSoundBuffer(&desc,&buffer_,0) ) || - FAILED(buffer_->QueryInterface(IID_IDirectSoundBuffer8,(void**)&buffer)) ) + if( FAILED(dsound->CreateSoundBuffer(&desc,&buffer_,0) ) ) + throw std::runtime_error( "DirectSound Error: Interface could not be queried." ); + + if( FAILED(buffer_->QueryInterface(IID_IDirectSoundBuffer8,(void**)&buffer)) ) throw std::runtime_error( "DirectSound Error: Interface could not be queried." ); buffer_->Release(); - verifyc( buffer->QueryInterface(IID_IDirectSoundNotify8,(void**)&buffer_notify) ); DSBPOSITIONNOTIFY not[MAX_BUFFER_COUNT]; @@ -171,9 +180,9 @@ public: // it was needed for some quirky driver? Theoretically we want the notification as soon // as possible after the buffer has finished playing. - buffer_events[i]=CreateEvent(NULL,FALSE,FALSE,NULL); - not[i].dwOffset=(wfx.nBlockAlign*2 + BufferSizeBytes*(i+1))%desc.dwBufferBytes; - not[i].hEventNotify=buffer_events[i]; + buffer_events[i] = CreateEvent(NULL,FALSE,FALSE,NULL); + not[i].dwOffset = (wfx.nBlockAlign + BufferSizeBytes*(i+1)) % desc.dwBufferBytes; + not[i].hEventNotify = buffer_events[i]; } buffer_notify->SetNotificationPositions(numBuffers,not); @@ -191,9 +200,9 @@ public: // Start Thread myLastWrite = 0; - dsound_running=true; - thread=CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)RThread,this,0,&tid); - SetThreadPriority(thread,THREAD_PRIORITY_TIME_CRITICAL); + dsound_running = true; + thread = CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)RThread,this,0,&tid); + SetThreadPriority(thread,THREAD_PRIORITY_ABOVE_NORMAL); return 0; } diff --git a/plugins/spu2-x/src/Win32/SndOut_XAudio2.cpp b/plugins/spu2-x/src/Win32/SndOut_XAudio2.cpp index 8f2f2526cf..bb3759be32 100644 --- a/plugins/spu2-x/src/Win32/SndOut_XAudio2.cpp +++ b/plugins/spu2-x/src/Win32/SndOut_XAudio2.cpp @@ -38,7 +38,6 @@ private: class BaseStreamingVoice : public IXAudio2VoiceCallback { protected: - SndBuffer* m_sndout; IXAudio2SourceVoice* pSourceVoice; s16* qbuffer; @@ -69,11 +68,10 @@ private: DeleteCriticalSection( &cs ); } - BaseStreamingVoice( SndBuffer* sb, uint numChannels ) : - m_sndout( sb ), + BaseStreamingVoice( uint numChannels ) : m_nBuffers( Config_XAudio2.NumBuffers ), m_nChannels( numChannels ), - m_BufferSize( SndOutPacketSize/2 * m_nChannels * PacketsPerBuffer ), + m_BufferSize( SndOutPacketSize * m_nChannels * PacketsPerBuffer ), m_BufferSizeBytes( m_BufferSize * sizeof(s16) ) { } @@ -133,18 +131,25 @@ private: LeaveCriticalSection( &cs ); } + STDMETHOD_(void, OnVoiceProcessingPassStart) () {} + STDMETHOD_(void, OnVoiceProcessingPassStart) (UINT32) { }; + STDMETHOD_(void, OnVoiceProcessingPassEnd) () {} + STDMETHOD_(void, OnStreamEnd) () {} + STDMETHOD_(void, OnBufferStart) ( void* ) {} + STDMETHOD_(void, OnLoopEnd) ( void* ) {} + STDMETHOD_(void, OnVoiceError) (THIS_ void* pBufferContext, HRESULT Error) { }; }; - - class StreamingVoice_Stereo : public BaseStreamingVoice + template< typename T > + class StreamingVoice : public BaseStreamingVoice { public: - StreamingVoice_Stereo( SndBuffer* sb, IXAudio2* pXAudio2 ) : - BaseStreamingVoice( sb, 2 ) + StreamingVoice( IXAudio2* pXAudio2 ) : + BaseStreamingVoice( sizeof(T) / sizeof( s16 ) ) { } - virtual ~StreamingVoice_Stereo() {} + virtual ~StreamingVoice() {} void Init( IXAudio2* pXAudio2 ) { @@ -152,11 +157,6 @@ private: } protected: - STDMETHOD_(void, OnVoiceProcessingPassStart) () {} - STDMETHOD_(void, OnVoiceProcessingPassStart) (UINT32) { }; - STDMETHOD_(void, OnVoiceProcessingPassEnd) () {} - STDMETHOD_(void, OnStreamEnd) () {} - STDMETHOD_(void, OnBufferStart) ( void* ) {} STDMETHOD_(void, OnBufferEnd) ( void* context ) { EnterCriticalSection( &cs ); @@ -164,10 +164,10 @@ private: // All of these checks are necessary because XAudio2 is wonky shizat. if( pSourceVoice == NULL || context == NULL ) return; - s16* qb = (s16*)context; + T* qb = (T*)context; for(int p=0; pReadSamples( qb ); + SndBuffer::ReadSamples( qb ); XAUDIO2_BUFFER buf = {0}; buf.AudioBytes = m_BufferSizeBytes; @@ -177,83 +177,6 @@ private: pSourceVoice->SubmitSourceBuffer( &buf ); LeaveCriticalSection( &cs ); } - STDMETHOD_(void, OnLoopEnd) ( void* ) {} - STDMETHOD_(void, OnVoiceError) (THIS_ void* pBufferContext, HRESULT Error) { }; - - }; - - class StreamingVoice_Surround51 : public BaseStreamingVoice - { - public: - //LPF_data m_lpf_left; - //LPF_data m_lpf_right; - - s32 buffer[2 * SndOutPacketSize * PacketsPerBuffer]; - - StreamingVoice_Surround51( SndBuffer* sb, IXAudio2* pXAudio2 ) : - BaseStreamingVoice( sb, 6 ) - //m_lpf_left( Config_XAudio2.LowpassLFE, SampleRate ), - //m_lpf_right( Config_XAudio2.LowpassLFE, SampleRate ) - { - } - - virtual ~StreamingVoice_Surround51() {} - - void Init( IXAudio2* pXAudio2 ) - { - _init( pXAudio2, SPEAKER_5POINT1 ); - } - - protected: - STDMETHOD_(void, OnVoiceProcessingPassStart) () {} - STDMETHOD_(void, OnVoiceProcessingPassStart) (UINT32) { }; - STDMETHOD_(void, OnVoiceProcessingPassEnd) () {} - STDMETHOD_(void, OnStreamEnd) () {} - STDMETHOD_(void, OnBufferStart) ( void* ) {} - STDMETHOD_(void, OnBufferEnd) ( void* context ) - { - EnterCriticalSection( &cs ); - - // All of these checks are necessary because XAudio2 is wonky shizat. - if( pSourceVoice == NULL || context == NULL ) return; - - s16* qb = (s16*)context; - - for(int p=0; pReadSamples( buffer ); - const s32* src = buffer; - - for( int i=0; i - // This method is simple and sounds nice. It relies on the speaker/soundcard - // systems do to their own low pass / crossover. Manual lowpass is wasted effort - // and can't match solid state results anyway. - - qb[2] = qb[3] = (src[0] + src[1]) >> (SndOutVolumeShift+1); - - // Left and right rear! - qb[4] = SndScaleVol( src[0] ); - qb[5] = SndScaleVol( src[1] ); - } - - } - - XAUDIO2_BUFFER buf = { 0 }; - buf.AudioBytes = m_BufferSizeBytes; - buf.pAudioData = (BYTE*)context; - buf.pContext = context; - - pSourceVoice->SubmitSourceBuffer( &buf ); - LeaveCriticalSection( &cs ); - } - STDMETHOD_(void, OnLoopEnd) ( void* ) {} - STDMETHOD_(void, OnVoiceError) (THIS_ void* pBufferContext, HRESULT Error) { }; }; @@ -263,7 +186,7 @@ private: public: - s32 Init( SndBuffer *sb ) + s32 Init() { HRESULT hr; @@ -273,9 +196,8 @@ public: CoInitializeEx( NULL, COINIT_MULTITHREADED ); UINT32 flags = 0; -#ifdef _DEBUG - flags |= XAUDIO2_DEBUG_ENGINE; -#endif + if( IsDebugBuild ) + flags |= XAUDIO2_DEBUG_ENGINE; if ( FAILED(hr = XAudio2Create( &pXAudio2, flags ) ) ) { @@ -298,18 +220,47 @@ public: return -1; } - if( Config_XAudio2.ExpandTo51 && deviceDetails.OutputFormat.Format.nChannels >= 6 ) - { - ConLog( "* SPU2 > 5.1 speaker expansion enabled." ); - voiceContext = new StreamingVoice_Surround51( sb, pXAudio2 ); - } - else - { - voiceContext = new StreamingVoice_Stereo( sb, pXAudio2 ); - } + if( StereoExpansionDisabled ) + deviceDetails.OutputFormat.Format.nChannels = 2; + // Any windows driver should support stereo at the software level, I should think! + jASSUME( deviceDetails.OutputFormat.Format.nChannels > 1 ); + + switch( deviceDetails.OutputFormat.Format.nChannels ) + { + case 2: + ConLog( "* SPU2 > Using normal 2 speaker stereo output." ); + voiceContext = new StreamingVoice( pXAudio2 ); + break; + + case 3: + ConLog( "* SPU2 > 2.1 speaker expansion enabled." ); + voiceContext = new StreamingVoice( pXAudio2 ); + break; + + case 4: + ConLog( "* SPU2 > 4 speaker expansion enabled [quadraphenia]" ); + voiceContext = new StreamingVoice( pXAudio2 ); + break; + + case 5: + ConLog( "* SPU2 > 4.1 speaker expansion enabled." ); + voiceContext = new StreamingVoice( pXAudio2 ); + break; + + case 6: + case 7: + ConLog( "* SPU2 > 5.1 speaker expansion enabled." ); + voiceContext = new StreamingVoice( pXAudio2 ); + break; + + default: // anything 8 or more gets the 7.1 treatment! + ConLog( "* SPU2 > 7.1 speaker expansion enabled." ); + voiceContext = new StreamingVoice( pXAudio2 ); + break; + } + voiceContext->Init( pXAudio2 ); - return 0; } diff --git a/plugins/spu2-x/src/Win32/SndOut_waveOut.cpp b/plugins/spu2-x/src/Win32/SndOut_waveOut.cpp index 9787a4c9ce..42e9c03c61 100644 --- a/plugins/spu2-x/src/Win32/SndOut_waveOut.cpp +++ b/plugins/spu2-x/src/Win32/SndOut_waveOut.cpp @@ -31,14 +31,13 @@ private: static const int PacketsPerBuffer = (1024 / SndOutPacketSize); static const int BufferSize = SndOutPacketSize*PacketsPerBuffer; - static const int BufferSizeBytes = BufferSize << 1; u32 numBuffers; HWAVEOUT hwodevice; WAVEFORMATEX wformat; WAVEHDR whbuffer[MAX_BUFFER_COUNT]; - s16* qbuffer; + StereoOut16* qbuffer; #define QBUFFER(x) (qbuffer + BufferSize * (x)) @@ -46,17 +45,13 @@ private: HANDLE thread; DWORD tid; - SndBuffer *buff; - wchar_t ErrText[256]; - static DWORD CALLBACK RThread(WaveOutModule*obj) - { - return obj->Thread(); - } - + template< typename T > DWORD CALLBACK Thread() { + static const int BufferSizeBytes = BufferSize * sizeof( T ); + while( waveout_running ) { bool didsomething = false; @@ -64,16 +59,16 @@ private: { if(!(whbuffer[i].dwFlags & WHDR_DONE) ) continue; - WAVEHDR *buf=whbuffer+i; + WAVEHDR *buf = whbuffer+i; buf->dwBytesRecorded = buf->dwBufferLength; - s16 *t = (s16*)buf->lpData; + T* t = (T*)buf->lpData; for(int p=0; pReadSamples( t ); + SndBuffer::ReadSamples( t ); - whbuffer[i].dwFlags&=~WHDR_DONE; - waveOutWrite(hwodevice,buf,sizeof(WAVEHDR)); + whbuffer[i].dwFlags &= ~WHDR_DONE; + waveOutWrite( hwodevice, buf, sizeof(WAVEHDR) ); didsomething = true; } @@ -85,25 +80,71 @@ private: return 0; } -public: - s32 Init(SndBuffer *sb) + template< typename T > + static DWORD CALLBACK RThread(WaveOutModule*obj) + { + return obj->Thread(); + } + +public: + s32 Init() { - buff = sb; numBuffers = Config_WaveOut.NumBuffers; MMRESULT woores; if (Test()) return -1; - wformat.wFormatTag=WAVE_FORMAT_PCM; - wformat.nSamplesPerSec=SampleRate; - wformat.wBitsPerSample=16; - wformat.nChannels=2; - wformat.nBlockAlign=((wformat.wBitsPerSample * wformat.nChannels) / 8); - wformat.nAvgBytesPerSec=(wformat.nSamplesPerSec * wformat.nBlockAlign); - wformat.cbSize=0; + // TODO : Use dsound to determine the speaker configuration, and expand audio from there. + + #if 0 + int speakerConfig; + + if( StereoExpansionDisabled ) + speakerConfig = 2; + + // Any windows driver should support stereo at the software level, I should think! + jASSUME( speakerConfig > 1 ); + LPTHREAD_START_ROUTINE threadproc; + + switch( speakerConfig ) + { + case 2: + ConLog( "* SPU2 > Using normal 2 speaker stereo output." ); + threadproc = (LPTHREAD_START_ROUTINE)&RThread; + speakerConfig = 2; + break; + + case 4: + ConLog( "* SPU2 > 4 speaker expansion enabled [quadraphenia]" ); + threadproc = (LPTHREAD_START_ROUTINE)&RThread; + speakerConfig = 4; + break; + + case 6: + case 7: + ConLog( "* SPU2 > 5.1 speaker expansion enabled." ); + threadproc = (LPTHREAD_START_ROUTINE)&RThread; + speakerConfig = 6; + break; + + default: + ConLog( "* SPU2 > 7.1 speaker expansion enabled." ); + threadproc = (LPTHREAD_START_ROUTINE)&RThread; + speakerConfig = 8; + break; + } + #endif + + wformat.wFormatTag = WAVE_FORMAT_PCM; + wformat.nSamplesPerSec = SampleRate; + wformat.wBitsPerSample = 16; + wformat.nChannels = 2; + wformat.nBlockAlign = ((wformat.wBitsPerSample * wformat.nChannels) / 8); + wformat.nAvgBytesPerSec = (wformat.nSamplesPerSec * wformat.nBlockAlign); + wformat.cbSize = 0; - qbuffer=new s16[BufferSize*numBuffers]; + qbuffer = new StereoOut16[BufferSize*numBuffers]; woores = waveOutOpen(&hwodevice,WAVE_MAPPER,&wformat,0,0,0); if (woores != MMSYSERR_NOERROR) @@ -113,6 +154,8 @@ public: return -1; } + const int BufferSizeBytes = wformat.nBlockAlign * BufferSize; + for(u32 i=0;i,this,0,&tid); return 0; } @@ -276,4 +319,4 @@ public: } WO; -SndOutModule *WaveOut=&WO; +SndOutModule *WaveOut = &WO; diff --git a/plugins/spu2-x/src/Win32/Spu2-X_vs2008.vcproj b/plugins/spu2-x/src/Win32/Spu2-X_vs2008.vcproj index 9837055fe9..6cbd31dbf1 100644 --- a/plugins/spu2-x/src/Win32/Spu2-X_vs2008.vcproj +++ b/plugins/spu2-x/src/Win32/Spu2-X_vs2008.vcproj @@ -53,6 +53,7 @@ FavorSizeOrSpeed="1" OmitFramePointers="true" EnableFiberSafeOptimizations="true" + AdditionalIncludeDirectories="" PreprocessorDefinitions="SPU2X_DEVBUILD;FLOAT_SAMPLES;NDEBUG;_USRDLL" StringPooling="true" RuntimeLibrary="0" @@ -608,6 +609,10 @@ RelativePath=".\SndOut_XAudio2.cpp" > + +