diff --git a/plugins/spu2ghz/decoder.cpp b/plugins/spu2ghz/decoder.cpp index 259421c519..ac7efcf57f 100644 --- a/plugins/spu2ghz/decoder.cpp +++ b/plugins/spu2ghz/decoder.cpp @@ -58,7 +58,7 @@ int state=0; FILE *fSpdifDump; extern u32 core; -void __fastcall ReadInput(s32& PDataL,s32& PDataR); +void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR); union spdif_frame { // total size: 32bits struct { @@ -134,10 +134,11 @@ void spdif_update() { s32 Data,Zero; + core=0; + V_Core& thiscore( Cores[core] ); for(int i=0;iGetCurrentPosition( &play, &write ); + //ConLog( " * SPU2 > Play: %d Write: %d poffset: %d\n", play, write, poffset ); + + buff->ReadSamples(tbuffer,BufferSize); + + verifyc(buffer->Lock(poffset,BufferSizeBytes,&p1,&s1,&p2,&s2,0)); + { - if (rv==WAIT_OBJECT_0+i) + s16 *t = (s16*)p1; + s32 *s = (s32*)tbuffer; + for(int j=0;jReadSamples(tbuffer,BufferSize); - - verifyc(buffer->Lock(poffset,BufferSizeBytes,&p1,&s1,&p2,&s2,0)); - s16 *t = (s16*)p1; - s32 *s = (s32*)tbuffer; - for(int j=0;j>8); - } - verifyc(buffer->Unlock(p1,s1,p2,s2)); - + *(t++) = (s16)((*(s++))>>8); } } + + /*if( p2 != NULL ) + { + ConLog( " * SPU2 > DSound Driver Loop-Around Occured. Length: %d", s2 ); + }*/ + + verifyc(buffer->Unlock(p1,s1,p2,s2)); } return 0; } @@ -379,4 +386,4 @@ public: } } DS; -SndOutModule *DSoundOut=&DS; \ No newline at end of file +SndOutModule *DSoundOut=&DS; diff --git a/plugins/spu2ghz/mixer.cpp b/plugins/spu2ghz/mixer.cpp index 3a08222559..9d906074f5 100644 --- a/plugins/spu2ghz/mixer.cpp +++ b/plugins/spu2ghz/mixer.cpp @@ -15,6 +15,14 @@ //License along with this library; if not, write to the Free Software //Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // + +// [Air] Notes -----> +// Adding 'static' to the __forceinline methods hints to the linker that it need not +// actually include procedural versions of the methods in the DLL. Under normal circumstances +// the compiler will still generate the procedures even though they are never used (the inline +// code is used instead). Using static reduced the size of my generated .DLL by a few KB. +// (doesn't really make anything faster, but eh... whatever :) +// #include "spu2.h" #include @@ -42,6 +50,7 @@ double srate_pv=1.0; extern u32 PsxRates[160]; + void InitADSR() // INIT ADSR { for (int i=0; i<(32+128); i++) @@ -71,18 +80,18 @@ const s32 f[5][2] ={{ 0, 0 }, { 98, -55 }, { 122, -60 }}; -s32 __forceinline XA_decode(s32 pred1, s32 pred2, s32 shift, s32& prev1, s32& prev2, s32 data) +static s16 __forceinline XA_decode(s32 pred1, s32 pred2, s32 shift, s32& prev1, s32& prev2, s32 data) { - s32 pcm =data>>shift; + s32 pcm = data>>shift; pcm+=((pred1*prev1)+(pred2*prev2))>>6; if(pcm> 32767) pcm= 32767; if(pcm<-32768) pcm=-32768; prev2=prev1; prev1=pcm; - return pcm; + return (s16)pcm; } -s32 __forceinline XA_decode_block(s32* buffer, s16* block, s32& prev1, s32& prev2) +static s16 __forceinline XA_decode_block(s16* buffer, const s16* block, s32& prev1, s32& prev2) { s32 data=*block; s32 Shift = ((data>> 0)&0xF)+16; @@ -102,11 +111,84 @@ s32 __forceinline XA_decode_block(s32* buffer, s16* block, s32& prev1, s32& prev return data; } -void __forceinline IncrementNextA() +static s16 __forceinline XA_decode_block_fast(s16* buffer, const s16* block, s32& prev1, s32& prev2) { - V_Voice& vc(Cores[core].Voices[voice]); + s32 header = *block; + s32 shift = ((header>> 0)&0xF)+16; + s32 pred1 = f[(header>> 4)&0xF][0]; + s32 pred2 = f[(header>> 4)&0xF][1]; - if((vc.NextA==Cores[core].IRQA)&&(Cores[core].IRQEnable)) { + const s8* blockbytes = (s8*)&block[1]; + + for(int i=0; i<14; i++, blockbytes++) + { + s32 pcm, pcm2; + { + s32 data = ((*blockbytes)<<28) & 0xF0000000; + pcm = data>>shift; + pcm+=((pred1*prev1)+(pred2*prev2))>>6; + if(pcm> 32767) pcm= 32767; + if(pcm<-32768) pcm=-32768; + *(buffer++) = pcm; + } + + //prev2=prev1; + //prev1=pcm; + + { + s32 data = ((*blockbytes)<<24) & 0xF0000000; + pcm2 = data>>shift; + pcm2+=((pred1*pcm)+(pred2*prev1))>>6; + if(pcm2> 32767) pcm2= 32767; + if(pcm2<-32768) pcm2=-32768; + *(buffer++) = pcm2; + } + + prev2=pcm; + prev1=pcm2; + } + + return header; +} + +static s16 __forceinline XA_decode_block_unsaturated(s16* buffer, const s16* block, s32& prev1, s32& prev2) +{ + s32 header = *block; + s32 shift = ((header>> 0)&0xF)+16; + s32 pred1 = f[(header>> 4)&0xF][0]; + s32 pred2 = f[(header>> 4)&0xF][1]; + + const s8* blockbytes = (s8*)&block[1]; + + for(int i=0; i<14; i++, blockbytes++) + { + s32 pcm, pcm2; + { + s32 data = ((*blockbytes)<<28) & 0xF0000000; + pcm = data>>shift; + pcm+=((pred1*prev1)+(pred2*prev2))>>6; + // [Air] : Fast method, no saturation is performed. + *(buffer++) = pcm; + } + + { + s32 data = ((*blockbytes)<<24) & 0xF0000000; + pcm2 = data>>shift; + pcm2+=((pred1*pcm)+(pred2*prev1))>>6; + // [Air] : Fast method, no saturation is performed. + *(buffer++) = pcm2; + } + + prev2=pcm; + prev1=pcm2; + } + + return header; +} + +static void __forceinline IncrementNextA( const V_Core& thiscore, V_Voice& vc ) +{ + if((vc.NextA==thiscore.IRQA)&&(thiscore.IRQEnable)) { ConLog(" * SPU2: IRQ Called (IRQ passed).\n"); Spdif.Info=4<=28) { @@ -135,31 +216,48 @@ void __fastcall GetNextDataBuffered(s32& Data) { if(MsgVoiceOff) ConLog(" * SPU2: Voice Off by EndPoint: %d \n", voice); VoiceStop(core,voice); - Cores[core].Regs.ENDX|=1<> 8)&1; vc.Loop = (data>> 9)&1; vc.LoopStart= (data>>10)&1; - vc.FirstBlock=0; vc.SCurrent = 0; + vc.FirstBlock = 0; - if (vc.LoopStart&&!vc.LoopMode) + if( vc.LoopStart && !vc.LoopMode ) { vc.LoopStartA=vc.NextA; } - IncrementNextA(); + IncrementNextA( thiscore, vc ); } Data=vc.SBuffer[vc.SCurrent]; if((vc.SCurrent&3)==3) { - IncrementNextA(); + IncrementNextA( thiscore, vc ); } vc.SCurrent++; } @@ -170,9 +268,9 @@ void __fastcall GetNextDataBuffered(s32& Data) const int InvExpOffsets[] = { 0,4,6,8,9,10,11,12 }; -void __forceinline CalculateADSR() +static void __forceinline CalculateADSR( V_Voice& vc ) { - V_ADSR& env(Cores[core].Voices[voice].ADSR); + V_ADSR& env(vc.ADSR); u32 SLevel=((u32)env.Sl)<<27; u32 off=InvExpOffsets[(env.Value>>28)&7]; @@ -293,6 +391,8 @@ void __forceinline CalculateADSR() case 6: // release end env.Value=0; break; + + //jNO_DEFAULT } if (env.Phase==6) { @@ -300,17 +400,17 @@ void __forceinline CalculateADSR() VoiceStop(core,voice); Cores[core].Regs.ENDX|=(1<>15; + + vc.SP+=pitch; + } - if((voice==0)||(vc.Modulated==0)) - pitch=vc.Pitch; - else - pitch=(vc.Pitch*(32768 + Cores[core].Voices[voice-1].OutX))>>15; - - vc.SP+=pitch; while(vc.SP>=4096) { - DT=0; - - if(vc.Noise) - GetNoiseValues(DT); - else - GetNextDataBuffered(DT); + GetNextDataBuffered( thiscore, vc, DT ); vc.PV4=vc.PV3; vc.PV3=vc.PV2; @@ -378,8 +482,7 @@ void GetVoiceValues(s32& Value) { vc.SP-=4096; } - CalculateADSR(); -// CalculateADSR(); + CalculateADSR( vc ); if(vc.ADSR.Phase==0) { @@ -388,26 +491,33 @@ void GetVoiceValues(s32& Value) { } else { - if(Interpolation==0) { + // [Air]: if SP is zero then we landed perfectly on a sample source, no + // interpolation necessary (besides being a little faster this is important + // too, since the interpolator will pick the wrong sample to mix otherwise). + + if(Interpolation==0 || vc.SP == 0) + { Data = vc.PV1; } else if(Interpolation==1) //linear { - s64 t0 = vc.PV1 - vc.PV2; + // [Air]: Inverted the interpolation delta. The old way was generating + // inverted waveforms. + s64 t0 = vc.PV2 - vc.PV1; s64 t1 = vc.PV1; Data = (((t0*vc.SP)>>12) + t1); } - else if(Interpolation==2) //cubic + else // if(Interpolation==2) //must be cubic { s64 a0 = vc.PV1 - vc.PV2 - vc.PV4 + vc.PV3; s64 a1 = vc.PV4 - vc.PV3 - a0; s64 a2 = vc.PV1 - vc.PV4; s64 a3 = vc.PV2; - s64 mu = vc.SP; + s64 mu = 4096-vc.SP; - s64 t0 = ((a0 )*mu)>>12; - s64 t1 = ((t0+a1)*mu)>>12; - s64 t2 = ((t1+a2)*mu)>>12; + s64 t0 = ((a0 )*mu)>>18; + s64 t1 = ((t0+a1)*mu)>>18; + s64 t2 = ((t1+a2)*mu)>>18; s64 t3 = ((t2+a3)); Data = t3; @@ -415,32 +525,73 @@ void GetVoiceValues(s32& Value) { Value=(s32)((Data*vc.ADSR.Value)>>48); //32bit ADSR + convert to 16bit - vc.OutX=abs(Value); + // [Air]: Moved abs() to the modulation code above, so that the abs conditionals are + // only run in select cases where modulation is active. + vc.OutX=Value; + } +} + +// [Air]: Noise values need to be mixed without going through interpolation, since it +// can wreak havoc on the noise (causing muffling or popping) +static void __fastcall GetNoiseValues(V_Core& thiscore, V_Voice& vc, s32& Value) +{ + s64 Data=0; + s32 DT=0; + + { + s32 pitch; + + if( (vc.Modulated==0) || (voice==0) ) + pitch=vc.Pitch; + else + pitch=(vc.Pitch*(32768 + abs(thiscore.Voices[voice-1].OutX)))>>15; + + vc.SP+=pitch; + } + + while(vc.SP>=4096) + { + GetNoiseValues(DT); + vc.SP-=4096; + } + + Data = DT<<16; //32bit processing + + CalculateADSR( vc ); + + if(vc.ADSR.Phase==0) + { + Value=0; + vc.OutX=0; + } + else + { + Value=(s32)((Data*vc.ADSR.Value)>>48); //32bit ADSR + convert to 16bit + vc.OutX=Value; } - if(vc.PeakX>=4; //give 16.8 data PDataR>>=4; - Cores[core].InputPos+=2; - if((Cores[core].InputPos==0x100)||(Cores[core].InputPos>=0x200)) { - Cores[core].AdmaInProgress=0; - if(Cores[core].InputDataLeft>=0x200) + thiscore.InputPos+=2; + if((thiscore.InputPos==0x100)||(thiscore.InputPos>=0x200)) { + thiscore.AdmaInProgress=0; + if(thiscore.InputDataLeft>=0x200) { - u8 k=Cores[core].InputDataLeft>=Cores[core].InputDataProgress; + u8 k=thiscore.InputDataLeft>=thiscore.InputDataProgress; #ifdef PCM24_S1_INTERLEAVE AutoDMAReadBuffer(core,1); #else AutoDMAReadBuffer(core,0); #endif - Cores[core].AdmaInProgress=1; + thiscore.AdmaInProgress=1; - Cores[core].TSA=(core<<10)+Cores[core].InputPos; + thiscore.TSA=(core<<10)+thiscore.InputPos; - if (Cores[core].InputDataLeft<0x200) + if (thiscore.InputDataLeft<0x200) { FileLog("[%10d] AutoDMA%c block end.\n",Cycles, (core==0)?'4':'7'); - if(Cores[core].InputDataLeft>0) + if(thiscore.InputDataLeft>0) { if(MsgAutoDMA) ConLog("WARNING: adma buffer didn't finish with a whole block!!\n"); } - Cores[core].InputDataLeft=0; - Cores[core].DMAICounter=1; + thiscore.InputDataLeft=0; + thiscore.DMAICounter=1; } } - Cores[core].InputPos&=0x1ff; + thiscore.InputPos&=0x1ff; } } else if((core==0)&&((PlayMode&4)==4)) { - Cores[core].InputPos&=~1; + thiscore.InputPos&=~1; - s32 *pl=(s32*)&(Cores[core].ADMATempBuffer[Cores[core].InputPos]); - s32 *pr=(s32*)&(Cores[core].ADMATempBuffer[Cores[core].InputPos+0x200]); + s32 *pl=(s32*)&(thiscore.ADMATempBuffer[thiscore.InputPos]); + s32 *pr=(s32*)&(thiscore.ADMATempBuffer[thiscore.InputPos+0x200]); PDataL=*pl; PDataR=*pr; - Cores[core].InputPos+=2; - if(Cores[core].InputPos>=0x200) { - Cores[core].AdmaInProgress=0; - if(Cores[core].InputDataLeft>=0x200) + thiscore.InputPos+=2; + if(thiscore.InputPos>=0x200) { + thiscore.AdmaInProgress=0; + if(thiscore.InputDataLeft>=0x200) { - u8 k=Cores[core].InputDataLeft>=Cores[core].InputDataProgress; + u8 k=thiscore.InputDataLeft>=thiscore.InputDataProgress; AutoDMAReadBuffer(core,0); - Cores[core].AdmaInProgress=1; + thiscore.AdmaInProgress=1; - Cores[core].TSA=(core<<10)+Cores[core].InputPos; + thiscore.TSA=(core<<10)+thiscore.InputPos; - if (Cores[core].InputDataLeft<0x200) + if (thiscore.InputDataLeft<0x200) { FileLog("[%10d] Spdif AutoDMA%c block end.\n",Cycles, (core==0)?'4':'7'); - if(Cores[core].InputDataLeft>0) + if(thiscore.InputDataLeft>0) { if(MsgAutoDMA) ConLog("WARNING: adma buffer didn't finish with a whole block!!\n"); } - Cores[core].InputDataLeft=0; - Cores[core].DMAICounter=1; + thiscore.InputDataLeft=0; + thiscore.DMAICounter=1; } } - Cores[core].InputPos&=0x1ff; + thiscore.InputPos&=0x1ff; } } @@ -528,45 +679,45 @@ void __fastcall ReadInput(s32& PDataL,s32& PDataR) else { // Using the temporary buffer because this area gets overwritten by some other code. - //*PDataL=(s32)*(s16*)(spu2mem+0x2000+(core<<10)+Cores[core].InputPos); - //*PDataR=(s32)*(s16*)(spu2mem+0x2200+(core<<10)+Cores[core].InputPos); + //*PDataL=(s32)*(s16*)(spu2mem+0x2000+(core<<10)+thiscore.InputPos); + //*PDataR=(s32)*(s16*)(spu2mem+0x2200+(core<<10)+thiscore.InputPos); - tl=(s32)Cores[core].ADMATempBuffer[Cores[core].InputPos]; - tr=(s32)Cores[core].ADMATempBuffer[Cores[core].InputPos+0x200]; + tl=(s32)thiscore.ADMATempBuffer[thiscore.InputPos]; + tr=(s32)thiscore.ADMATempBuffer[thiscore.InputPos+0x200]; } PDataL=tl; PDataR=tr; - Cores[core].InputPos++; - if((Cores[core].InputPos==0x100)||(Cores[core].InputPos>=0x200)) { - Cores[core].AdmaInProgress=0; - if(Cores[core].InputDataLeft>=0x200) + thiscore.InputPos++; + if((thiscore.InputPos==0x100)||(thiscore.InputPos>=0x200)) { + thiscore.AdmaInProgress=0; + if(thiscore.InputDataLeft>=0x200) { - u8 k=Cores[core].InputDataLeft>=Cores[core].InputDataProgress; + u8 k=thiscore.InputDataLeft>=thiscore.InputDataProgress; AutoDMAReadBuffer(core,0); - Cores[core].AdmaInProgress=1; + thiscore.AdmaInProgress=1; - Cores[core].TSA=(core<<10)+Cores[core].InputPos; + thiscore.TSA=(core<<10)+thiscore.InputPos; - if (Cores[core].InputDataLeft<0x200) + if (thiscore.InputDataLeft<0x200) { FileLog("[%10d] AutoDMA%c block end.\n",Cycles, (core==0)?'4':'7'); - Cores[core].AutoDMACtrl |=~3; + thiscore.AutoDMACtrl |=~3; - if(Cores[core].InputDataLeft>0) + if(thiscore.InputDataLeft>0) { if(MsgAutoDMA) ConLog("WARNING: adma buffer didn't finish with a whole block!!\n"); } - Cores[core].InputDataLeft=0; - Cores[core].DMAICounter=1; + thiscore.InputDataLeft=0; + thiscore.DMAICounter=1; } } - Cores[core].InputPos&=0x1ff; + thiscore.InputPos&=0x1ff; } } } @@ -580,7 +731,7 @@ void __fastcall ReadInput(s32& PDataL,s32& PDataR) ///////////////////////////////////////////////////////////////////////////////////////// // // -void ReadInputPV(s32& ValL,s32& ValR) +void __fastcall ReadInputPV(V_Core& thiscore, s32& ValL,s32& ValR) { s32 DL=0, DR=0; @@ -588,24 +739,25 @@ void ReadInputPV(s32& ValL,s32& ValR) if(pitch==0) pitch=48000; - Cores[core].ADMAPV+=pitch; - while(Cores[core].ADMAPV>=48000) + thiscore.ADMAPV+=pitch; + while(thiscore.ADMAPV>=48000) { - ReadInput(DL,DR); - Cores[core].ADMAPV-=48000; - Cores[core].ADMAPL=DL; - Cores[core].ADMAPR=DR; + ReadInput(thiscore, DL,DR); + thiscore.ADMAPV-=48000; + thiscore.ADMAPL=DL; + thiscore.ADMAPR=DR; } - ValL=Cores[core].ADMAPL; - ValR=Cores[core].ADMAPR; + ValL=thiscore.ADMAPL; + ValR=thiscore.ADMAPR; } ///////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////// // // -void __forceinline UpdateVolume(V_Volume& Vol) { +static void __forceinline UpdateVolume(V_Volume& Vol) +{ s32 NVal; // TIMINGS ARE FAKE!!! Need to investigate. @@ -664,7 +816,7 @@ void __forceinline UpdateVolume(V_Volume& Vol) { ///////////////////////////////////////////////////////////////////////////////////////// // // -s32 __forceinline clamp(s32 x) +static s32 __forceinline clamp(s32 x) { if (x>0x00ffffff) return 0x00ffffff; if (x<0xff000000) return 0xff000000; @@ -675,12 +827,12 @@ s32 __forceinline clamp(s32 x) ///////////////////////////////////////////////////////////////////////////////////////// // // -void DoReverb(s32& OutL, s32& OutR, s32 InL, s32 InR) +static void DoReverb( V_Core& thiscore, s32& OutL, s32& OutR, s32 InL, s32 InR) { static s32 INPUT_SAMPLE_L,INPUT_SAMPLE_R; static s32 OUTPUT_SAMPLE_L,OUTPUT_SAMPLE_R; - if(!(Cores[core].FxEnable&&EffectsEnabled)) + if(!(thiscore.FxEnable&&EffectsEnabled)) { OUTPUT_SAMPLE_L=0; OUTPUT_SAMPLE_R=0; @@ -698,14 +850,14 @@ void DoReverb(s32& OutL, s32& OutR, s32 InL, s32 InR) s32 IIR_INPUT_A0,IIR_INPUT_A1,IIR_INPUT_B0,IIR_INPUT_B1; s32 ACC0,ACC1; s32 FB_A0,FB_A1,FB_B0,FB_B1; - s32 buffsize=Cores[core].EffectsEndA-Cores[core].EffectsStartA+1; + s32 buffsize=thiscore.EffectsEndA-thiscore.EffectsStartA+1; if(buffsize<0) { - buffsize = Cores[core].EffectsEndA; - Cores[core].EffectsEndA=Cores[core].EffectsStartA; - Cores[core].EffectsStartA=buffsize; - buffsize=Cores[core].EffectsEndA-Cores[core].EffectsStartA+1; + buffsize = thiscore.EffectsEndA; + thiscore.EffectsEndA=thiscore.EffectsStartA; + thiscore.EffectsStartA=buffsize; + buffsize=thiscore.EffectsEndA-thiscore.EffectsStartA+1; } //filter the 2 samples (prev then current) @@ -715,42 +867,42 @@ void DoReverb(s32& OutL, s32& OutR, s32 InL, s32 InR) INPUT_SAMPLE_L=(INPUT_SAMPLE_L+InL)>>9; INPUT_SAMPLE_R=(INPUT_SAMPLE_R+InR)>>9; -#define BUFFER(x) ((s32)(*GetMemPtr(Cores[core].EffectsStartA + ((Cores[core].ReverbX + buffsize-((x)<<2))%buffsize)))) -#define SBUFFER(x) (*GetMemPtr(Cores[core].EffectsStartA + ((Cores[core].ReverbX + buffsize-((x)<<2))%buffsize))) +#define BUFFER(x) ((s32)(*GetMemPtr(thiscore.EffectsStartA + ((thiscore.ReverbX + buffsize-((x)<<2))%buffsize)))) +#define SBUFFER(x) (*GetMemPtr(thiscore.EffectsStartA + ((thiscore.ReverbX + buffsize-((x)<<2))%buffsize))) - Cores[core].ReverbX=((Cores[core].ReverbX + 4)%buffsize); + thiscore.ReverbX=((thiscore.ReverbX + 4)%buffsize); - IIR_INPUT_A0 = (BUFFER(Cores[core].Revb.IIR_SRC_A0) * Cores[core].Revb.IIR_COEF + INPUT_SAMPLE_L * Cores[core].Revb.IN_COEF_L)>>16; - IIR_INPUT_A1 = (BUFFER(Cores[core].Revb.IIR_SRC_A1) * Cores[core].Revb.IIR_COEF + INPUT_SAMPLE_R * Cores[core].Revb.IN_COEF_R)>>16; - IIR_INPUT_B0 = (BUFFER(Cores[core].Revb.IIR_SRC_B0) * Cores[core].Revb.IIR_COEF + INPUT_SAMPLE_L * Cores[core].Revb.IN_COEF_L)>>16; - IIR_INPUT_B1 = (BUFFER(Cores[core].Revb.IIR_SRC_B1) * Cores[core].Revb.IIR_COEF + INPUT_SAMPLE_R * Cores[core].Revb.IN_COEF_R)>>16; + IIR_INPUT_A0 = (BUFFER(thiscore.Revb.IIR_SRC_A0) * thiscore.Revb.IIR_COEF + INPUT_SAMPLE_L * thiscore.Revb.IN_COEF_L)>>16; + IIR_INPUT_A1 = (BUFFER(thiscore.Revb.IIR_SRC_A1) * thiscore.Revb.IIR_COEF + INPUT_SAMPLE_R * thiscore.Revb.IN_COEF_R)>>16; + IIR_INPUT_B0 = (BUFFER(thiscore.Revb.IIR_SRC_B0) * thiscore.Revb.IIR_COEF + INPUT_SAMPLE_L * thiscore.Revb.IN_COEF_L)>>16; + IIR_INPUT_B1 = (BUFFER(thiscore.Revb.IIR_SRC_B1) * thiscore.Revb.IIR_COEF + INPUT_SAMPLE_R * thiscore.Revb.IN_COEF_R)>>16; - SBUFFER(Cores[core].Revb.IIR_DEST_A0 + 4) = clamp((IIR_INPUT_A0 * Cores[core].Revb.IIR_ALPHA + BUFFER(Cores[core].Revb.IIR_DEST_A0) * (65535 - Cores[core].Revb.IIR_ALPHA))>>16); - SBUFFER(Cores[core].Revb.IIR_DEST_A1 + 4) = clamp((IIR_INPUT_A1 * Cores[core].Revb.IIR_ALPHA + BUFFER(Cores[core].Revb.IIR_DEST_A1) * (65535 - Cores[core].Revb.IIR_ALPHA))>>16); - SBUFFER(Cores[core].Revb.IIR_DEST_B0 + 4) = clamp((IIR_INPUT_B0 * Cores[core].Revb.IIR_ALPHA + BUFFER(Cores[core].Revb.IIR_DEST_B0) * (65535 - Cores[core].Revb.IIR_ALPHA))>>16); - SBUFFER(Cores[core].Revb.IIR_DEST_B1 + 4) = clamp((IIR_INPUT_B1 * Cores[core].Revb.IIR_ALPHA + BUFFER(Cores[core].Revb.IIR_DEST_B1) * (65535 - Cores[core].Revb.IIR_ALPHA))>>16); + SBUFFER(thiscore.Revb.IIR_DEST_A0 + 4) = clamp((IIR_INPUT_A0 * thiscore.Revb.IIR_ALPHA + BUFFER(thiscore.Revb.IIR_DEST_A0) * (65535 - thiscore.Revb.IIR_ALPHA))>>16); + SBUFFER(thiscore.Revb.IIR_DEST_A1 + 4) = clamp((IIR_INPUT_A1 * thiscore.Revb.IIR_ALPHA + BUFFER(thiscore.Revb.IIR_DEST_A1) * (65535 - thiscore.Revb.IIR_ALPHA))>>16); + SBUFFER(thiscore.Revb.IIR_DEST_B0 + 4) = clamp((IIR_INPUT_B0 * thiscore.Revb.IIR_ALPHA + BUFFER(thiscore.Revb.IIR_DEST_B0) * (65535 - thiscore.Revb.IIR_ALPHA))>>16); + SBUFFER(thiscore.Revb.IIR_DEST_B1 + 4) = clamp((IIR_INPUT_B1 * thiscore.Revb.IIR_ALPHA + BUFFER(thiscore.Revb.IIR_DEST_B1) * (65535 - thiscore.Revb.IIR_ALPHA))>>16); - ACC0 = (s32)(BUFFER(Cores[core].Revb.ACC_SRC_A0) * Cores[core].Revb.ACC_COEF_A + - BUFFER(Cores[core].Revb.ACC_SRC_B0) * Cores[core].Revb.ACC_COEF_B + - BUFFER(Cores[core].Revb.ACC_SRC_C0) * Cores[core].Revb.ACC_COEF_C + - BUFFER(Cores[core].Revb.ACC_SRC_D0) * Cores[core].Revb.ACC_COEF_D)>>16; - ACC1 = (s32)(BUFFER(Cores[core].Revb.ACC_SRC_A1) * Cores[core].Revb.ACC_COEF_A + - BUFFER(Cores[core].Revb.ACC_SRC_B1) * Cores[core].Revb.ACC_COEF_B + - BUFFER(Cores[core].Revb.ACC_SRC_C1) * Cores[core].Revb.ACC_COEF_C + - BUFFER(Cores[core].Revb.ACC_SRC_D1) * Cores[core].Revb.ACC_COEF_D)>>16; + ACC0 = (s32)(BUFFER(thiscore.Revb.ACC_SRC_A0) * thiscore.Revb.ACC_COEF_A + + BUFFER(thiscore.Revb.ACC_SRC_B0) * thiscore.Revb.ACC_COEF_B + + BUFFER(thiscore.Revb.ACC_SRC_C0) * thiscore.Revb.ACC_COEF_C + + BUFFER(thiscore.Revb.ACC_SRC_D0) * thiscore.Revb.ACC_COEF_D)>>16; + ACC1 = (s32)(BUFFER(thiscore.Revb.ACC_SRC_A1) * thiscore.Revb.ACC_COEF_A + + BUFFER(thiscore.Revb.ACC_SRC_B1) * thiscore.Revb.ACC_COEF_B + + BUFFER(thiscore.Revb.ACC_SRC_C1) * thiscore.Revb.ACC_COEF_C + + BUFFER(thiscore.Revb.ACC_SRC_D1) * thiscore.Revb.ACC_COEF_D)>>16; - FB_A0 = BUFFER(Cores[core].Revb.MIX_DEST_A0 - Cores[core].Revb.FB_SRC_A); - FB_A1 = BUFFER(Cores[core].Revb.MIX_DEST_A1 - Cores[core].Revb.FB_SRC_A); - FB_B0 = BUFFER(Cores[core].Revb.MIX_DEST_B0 - Cores[core].Revb.FB_SRC_B); - FB_B1 = BUFFER(Cores[core].Revb.MIX_DEST_B1 - Cores[core].Revb.FB_SRC_B); + FB_A0 = BUFFER(thiscore.Revb.MIX_DEST_A0 - thiscore.Revb.FB_SRC_A); + FB_A1 = BUFFER(thiscore.Revb.MIX_DEST_A1 - thiscore.Revb.FB_SRC_A); + FB_B0 = BUFFER(thiscore.Revb.MIX_DEST_B0 - thiscore.Revb.FB_SRC_B); + FB_B1 = BUFFER(thiscore.Revb.MIX_DEST_B1 - thiscore.Revb.FB_SRC_B); - SBUFFER(Cores[core].Revb.MIX_DEST_A0) = clamp((ACC0 - FB_A0 * Cores[core].Revb.FB_ALPHA)>>16); - SBUFFER(Cores[core].Revb.MIX_DEST_A1) = clamp((ACC1 - FB_A1 * Cores[core].Revb.FB_ALPHA)>>16); - SBUFFER(Cores[core].Revb.MIX_DEST_B0) = clamp(((Cores[core].Revb.FB_ALPHA * ACC0) - FB_A0 * (65535 - Cores[core].Revb.FB_ALPHA) - FB_B0 * Cores[core].Revb.FB_X)>>16); - SBUFFER(Cores[core].Revb.MIX_DEST_B1) = clamp(((Cores[core].Revb.FB_ALPHA * ACC1) - FB_A1 * (65535 - Cores[core].Revb.FB_ALPHA) - FB_B1 * Cores[core].Revb.FB_X)>>16); + SBUFFER(thiscore.Revb.MIX_DEST_A0) = clamp((ACC0 - FB_A0 * thiscore.Revb.FB_ALPHA)>>16); + SBUFFER(thiscore.Revb.MIX_DEST_A1) = clamp((ACC1 - FB_A1 * thiscore.Revb.FB_ALPHA)>>16); + SBUFFER(thiscore.Revb.MIX_DEST_B0) = clamp(((thiscore.Revb.FB_ALPHA * ACC0) - FB_A0 * (65535 - thiscore.Revb.FB_ALPHA) - FB_B0 * thiscore.Revb.FB_X)>>16); + SBUFFER(thiscore.Revb.MIX_DEST_B1) = clamp(((thiscore.Revb.FB_ALPHA * ACC1) - FB_A1 * (65535 - thiscore.Revb.FB_ALPHA) - FB_B1 * thiscore.Revb.FB_X)>>16); - OUTPUT_SAMPLE_L=clamp((BUFFER(Cores[core].Revb.MIX_DEST_A0)+BUFFER(Cores[core].Revb.MIX_DEST_B0))>>2); - OUTPUT_SAMPLE_R=clamp((BUFFER(Cores[core].Revb.MIX_DEST_B1)+BUFFER(Cores[core].Revb.MIX_DEST_B1))>>2); + OUTPUT_SAMPLE_L=clamp((BUFFER(thiscore.Revb.MIX_DEST_A0)+BUFFER(thiscore.Revb.MIX_DEST_B0))>>2); + OUTPUT_SAMPLE_R=clamp((BUFFER(thiscore.Revb.MIX_DEST_B1)+BUFFER(thiscore.Revb.MIX_DEST_B1))>>2); } OutL=OUTPUT_SAMPLE_L; OutR=OUTPUT_SAMPLE_R; @@ -766,15 +918,13 @@ double rfactor=1; double cfactor=1; double diff=0; -s32 __forceinline ApplyVolume(s32 data, s32 volume) +static s32 __forceinline ApplyVolume(s32 data, s32 volume) { return (volume * data); } -void __forceinline MixVoice(s32& VValL, s32& VValR) +static void __forceinline MixVoice(V_Voice& vc, s32& VValL, s32& VValR) { - V_Voice& vc(Cores[core].Voices[voice]); - s32 Value=0; VValL=0; @@ -783,11 +933,16 @@ void __forceinline MixVoice(s32& VValL, s32& VValR) UpdateVolume(vc.VolumeL); UpdateVolume(vc.VolumeR); - if (Cores[core].Voices[voice].ADSR.Phase>0) + if (vc.ADSR.Phase>0) { - GetVoiceValues(Value); + if( vc.Noise ) + GetNoiseValues( Cores[core], vc, Value ); + else + GetVoiceValues( Cores[core], vc, Value ); + #ifdef _DEBUG vc.displayPeak = max(vc.displayPeak,abs(Value)); + #endif VValL=ApplyVolume(Value,(vc.VolumeL.Value)); VValR=ApplyVolume(Value,(vc.VolumeR.Value)); @@ -798,7 +953,7 @@ void __forceinline MixVoice(s32& VValL, s32& VValR) } -__forceinline void MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR) +static void __fastcall MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR) { s32 InpL=0, InpR=0; @@ -813,23 +968,25 @@ __forceinline void MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR) spu2Ms16(0xA00 + OutPos)=(s16)(ExtR>>16); } + V_Core& thiscore( Cores[core] ); + if((core==0)&&((PlayMode&4)!=4)) { - ReadInputPV(InpL,InpR); // get input data from input buffers + ReadInputPV(thiscore, InpL,InpR); // get input data from input buffers } if((core==1)&&((PlayMode&8)!=8)) { - ReadInputPV(InpL,InpR); // get input data from input buffers + ReadInputPV(thiscore, InpL,InpR); // get input data from input buffers } s32 InputPeak = max(abs(InpL),abs(InpR)); - if(Cores[core].AutoDMAPeak>16); // Mix in the Voice data - TDL += SDL * Cores[core].SndDryL; - TDR += SDR * Cores[core].SndDryR; - TWL += SWL * Cores[core].SndWetL; - TWR += SWR * Cores[core].SndWetR; + TDL += SDL * thiscore.SndDryL; + TDR += SDR * thiscore.SndDryR; + TWL += SWL * thiscore.SndWetL; + TWR += SWR * thiscore.SndWetR; // Mix in the Input data - TDL += InpL * Cores[core].InpDryL; - TDR += InpR * Cores[core].InpDryR; - TWL += InpL * Cores[core].InpWetL; - TWR += InpR * Cores[core].InpWetR; + TDL += InpL * thiscore.InpDryL; + TDR += InpR * thiscore.InpDryR; + TWL += InpL * thiscore.InpWetL; + TWR += InpR * thiscore.InpWetR; // Mix in the External (nothing/core0) data - TDL += ExtL * Cores[core].ExtDryL; - TDR += ExtR * Cores[core].ExtDryR; - TWL += ExtL * Cores[core].ExtWetL; - TWR += ExtR * Cores[core].ExtWetR; + TDL += ExtL * thiscore.ExtDryL; + TDR += ExtR * thiscore.ExtDryR; + TWL += ExtL * thiscore.ExtWetL; + TWR += ExtR * thiscore.ExtWetR; if(EffectsEnabled) { //Apply Effects - DoReverb(RVL,RVR,TWL>>16,TWR>>16); + DoReverb( thiscore, RVL,RVR,TWL>>16,TWR>>16); - TWL=ApplyVolume(RVL,VOL(Cores[core].FxL)); - TWR=ApplyVolume(RVR,VOL(Cores[core].FxR)); + TWL=ApplyVolume(RVL,VOL(thiscore.FxL)); + TWR=ApplyVolume(RVR,VOL(thiscore.FxR)); } else { @@ -888,12 +1046,12 @@ __forceinline void MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR) OutR=(TDR + TWR); //Apply Master Volume - UpdateVolume(Cores[core].MasterL); - UpdateVolume(Cores[core].MasterR); + UpdateVolume(thiscore.MasterL); + UpdateVolume(thiscore.MasterR); - if (Cores[core].Mute==0) { - OutL=MulDiv(OutL,Cores[core].MasterL.Value,1<<16); - OutR=MulDiv(OutR,Cores[core].MasterR.Value,1<<16); + if (thiscore.Mute==0) { + OutL=MulDiv(OutL,thiscore.MasterL.Value,1<<16); + OutR=MulDiv(OutR,thiscore.MasterR.Value,1<<16); } else { @@ -903,7 +1061,7 @@ __forceinline void MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR) if((core==1)&&(PlayMode&8)) { - ReadInput(OutL,OutR); + ReadInput(thiscore, OutL,OutR); } if((core==0)&&(PlayMode&4)) @@ -923,12 +1081,13 @@ void __fastcall Mix() core=0; MixCore(ExtL,ExtR,0,0); - Peak0 = max(Peak0,max(ExtL,ExtR)); - core=1; MixCore(OutL,OutR,ExtL,ExtR); +#ifdef _DEBUG + Peak0 = max(Peak0,max(ExtL,ExtR)); Peak1 = max(Peak1,max(OutL,OutR)); +#endif ExtL=MulDiv(OutL,VolumeMultiplier,VolumeDivisor<<6); ExtR=MulDiv(OutR,VolumeMultiplier,VolumeDivisor<<6); @@ -1153,4 +1312,4 @@ buffer[MIX_DEST_B0] = (FB_ALPHA * ACC0) - FB_A0 * (FB_ALPHA^0x8000) - FB_B0 * FB buffer[MIX_DEST_B1] = (FB_ALPHA * ACC1) - FB_A1 * (FB_ALPHA^0x8000) - FB_B1 * FB_X; ----------------------------------------------------------------------------- -*/ \ No newline at end of file +*/ diff --git a/plugins/spu2ghz/mixer.h b/plugins/spu2ghz/mixer.h index d1f98a0446..ce809b95d3 100644 --- a/plugins/spu2ghz/mixer.h +++ b/plugins/spu2ghz/mixer.h @@ -23,5 +23,4 @@ void __fastcall Mix(); void __fastcall LogVolInit(); void __fastcall LogVolClose(); - -#endif // MIXER_H_INCLUDED // \ No newline at end of file +#endif // MIXER_H_INCLUDED // diff --git a/plugins/spu2ghz/sndout.cpp b/plugins/spu2ghz/sndout.cpp index 6996e3c1a7..c371bfdb96 100644 --- a/plugins/spu2ghz/sndout.cpp +++ b/plugins/spu2ghz/sndout.cpp @@ -126,16 +126,18 @@ public: #endif while((free=TickInterval) + //Update Mixing Progress + while(dClocks>=TickInterval) { - //Update Mixing Progress - while(dClocks>=TickInterval) + + //UpdateDebugDialog(); + + if(has_to_call_irq) { - - //UpdateDebugDialog(); - - if(has_to_call_irq) - { - ConLog(" * SPU2: Irq Called (%04x).\n",Spdif.Info); - has_to_call_irq=false; - if(_irqcallback) _irqcallback(); - } - - if(Cores[0].InitDelay>0) - { - Cores[0].InitDelay--; - if(Cores[0].InitDelay==0) - { - CoreReset(0); - } - } - - if(Cores[1].InitDelay>0) - { - Cores[1].InitDelay--; - if(Cores[1].InitDelay==0) - { - CoreReset(1); - } - } - - //Update DMA4 interrupt delay counter - if(Cores[0].DMAICounter>0) - { - Cores[0].DMAICounter-=TickInterval; - if(Cores[0].DMAICounter<=0) - { - Cores[0].MADR=Cores[0].TADR; - Cores[0].DMAICounter=0; - if(dma4callback) dma4callback(); - } - else { - Cores[0].MADR+=TickInterval<<1; - } - } - - //Update DMA7 interrupt delay counter - if(Cores[1].DMAICounter>0) - { - Cores[1].DMAICounter-=TickInterval; - if(Cores[1].DMAICounter<=0) - { - Cores[1].MADR=Cores[1].TADR; - Cores[1].DMAICounter=0; - if(dma7callback) dma7callback(); - } - else { - Cores[1].MADR+=TickInterval<<1; - } - } - - dClocks-=TickInterval; - lClocks+=TickInterval; - Cycles++; - - Mix(); + ConLog(" * SPU2: Irq Called (%04x).\n",Spdif.Info); + has_to_call_irq=false; + if(_irqcallback) _irqcallback(); } + + if(Cores[0].InitDelay>0) + { + Cores[0].InitDelay--; + if(Cores[0].InitDelay==0) + { + CoreReset(0); + } + } + + if(Cores[1].InitDelay>0) + { + Cores[1].InitDelay--; + if(Cores[1].InitDelay==0) + { + CoreReset(1); + } + } + + //Update DMA4 interrupt delay counter + if(Cores[0].DMAICounter>0) + { + Cores[0].DMAICounter-=TickInterval; + if(Cores[0].DMAICounter<=0) + { + Cores[0].MADR=Cores[0].TADR; + Cores[0].DMAICounter=0; + if(dma4callback) dma4callback(); + } + else { + Cores[0].MADR+=TickInterval<<1; + } + } + + //Update DMA7 interrupt delay counter + if(Cores[1].DMAICounter>0) + { + Cores[1].DMAICounter-=TickInterval; + if(Cores[1].DMAICounter<=0) + { + Cores[1].MADR=Cores[1].TADR; + Cores[1].DMAICounter=0; + if(dma7callback) dma7callback(); + } + else { + Cores[1].MADR+=TickInterval<<1; + } + } + + dClocks-=TickInterval; + lClocks+=TickInterval; + Cycles++; + + Mix(); } } @@ -1713,8 +1710,12 @@ void VoiceStart(int core,int vc) Cores[core].Voices[vc].Prev1=0; Cores[core].Voices[vc].Prev2=0; - Cores[core].Voices[vc].PV1=Cores[core].Voices[vc].PV2=0; - Cores[core].Voices[vc].PV3=Cores[core].Voices[vc].PV4=0; + // [Air]: Don't wipe interpolation values on VoiceStart. + // There'll be less popping/clicking if we just interpolate from the + // old sample and the new sample. + + //Cores[core].Voices[vc].PV1=Cores[core].Voices[vc].PV2=0; + //Cores[core].Voices[vc].PV3=Cores[core].Voices[vc].PV4=0; Cores[core].Regs.ENDX&=~(1<dwBytesRecorded = buf->dwBufferLength; + + buff->ReadSamples(tbuffer,BufferSize); + s16 *t = (s16*)buf->lpData; + s32 *s = (s32*)tbuffer; + + for(int bleh=0;bleh>8); } - if(free) - break; - else - Sleep(1); - } while(free==0); - WAVEHDR *buf=whbuffer+first; - - buf->dwBytesRecorded= buf->dwBufferLength; - - buff->ReadSamples(tbuffer,BufferSize); - s16 *t = (s16*)buf->lpData; - s32 *s = (s32*)tbuffer; - for(int i=0;i>8); + whbuffer[i].dwFlags&=~WHDR_DONE; + waveOutWrite(hwodevice,buf,sizeof(WAVEHDR)); + didsomething = true; } - waveOutWrite(hwodevice,buf,sizeof(WAVEHDR)); + if( didsomething ) + Sleep(1); + else + Sleep(0); } return 0; } @@ -137,9 +130,12 @@ public: } // Start Thread + // [Air]: The waveout code does not use wait objects, so setting a time critical + // priority level is a bad idea. Standard priority will do fine. The buffer will get the + // love it needs and won't suck resources idling pointlessly. waveout_running=true; - thread=CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)RThread,this,0,&tid); - SetThreadPriority(thread,THREAD_PRIORITY_TIME_CRITICAL); + thread=CreateThread(NULL,0,(LPTHREAD_START_ROUTINE)RThread,this,0,&tid); + //SetThreadPriority( thread, THREAD_PRIORITY_TIME_CRITICAL ); return 0; }