From d8bd81f00ddf57050da77bc6bb70ecd11bc26c2a Mon Sep 17 00:00:00 2001 From: "Jake.Stine" Date: Sat, 1 Nov 2008 07:34:34 +0000 Subject: [PATCH] SPU2ghz: Re-optimized the DMA write code so that DMA writes don't incur as much cache miss penalty anymore. Also made a couple minor tweaks to the timestretcher's overrun handler (Which only affects people who like their games to run at 120 fps. ;) git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@264 a6443dda-0b58-4228-96e9-037be469359c --- plugins/spu2ghz/dma.cpp | 58 ++++++++++++++++++++++++++++++-------- plugins/spu2ghz/dma.h | 6 ++-- plugins/spu2ghz/mixer.cpp | 6 ++-- plugins/spu2ghz/sndout.cpp | 25 ++++++++++++---- plugins/spu2ghz/spu2.cpp | 15 +++++----- 5 files changed, 80 insertions(+), 30 deletions(-) diff --git a/plugins/spu2ghz/dma.cpp b/plugins/spu2ghz/dma.cpp index 2bf9acfe64..abf5fc380c 100644 --- a/plugins/spu2ghz/dma.cpp +++ b/plugins/spu2ghz/dma.cpp @@ -90,14 +90,16 @@ void DMALogClose() { } -u16 DmaRead(u32 core) { +__forceinline u16 DmaRead(u32 core) +{ + Cores[core].TDA&=0xfffff; const u16 ret = (u16)spu2M_Read(Cores[core].TDA); Cores[core].TDA++; - Cores[core].TDA&=0xfffff; return ret; } -void DmaWrite(u32 core, u16 value) { +__forceinline void DmaWrite(u32 core, u16 value) +{ spu2M_Write( Cores[core].TSA, value ); Cores[core].TSA++; Cores[core].TSA&=0xfffff; @@ -189,14 +191,21 @@ void DoDMAWrite(int core,u16 *pMem,u32 size) { u32 i; - u32 pa = ((u32)pMem)&7; - u32 pm = Cores[core].TSA&0x7; - - if(pa || pm) { - printf("* Missaligned addr in DMA write!\n"); + // Perform an alignment check. + // Not really important. Everythign should work regardless, + // but it could be indicative of an emulation foopah elsewhere. + + uptr pa = ((uptr)pMem)&7; + uptr pm = Cores[core].TSA&0x7; + + if(pa || pm) + { + fprintf(stderr, "* SPU2 : Missaligned addr in DMA write!\n"); + } } + if(core==0) DMA4LogWrite(pMem,size<<1); else @@ -204,9 +213,36 @@ void DoDMAWrite(int core,u16 *pMem,u32 size) if(MsgDMA()) ConLog(" * SPU2: DMA%c Transfer of %d bytes to %x (%02x %x %04x).\n",(core==0)?'4':'7',size<<1,Cores[core].TSA,Cores[core].DMABits,Cores[core].AutoDMACtrl,(~Cores[core].Regs.ATTR)&0x7fff); - Cores[core].TDA=Cores[core].TSA; - for (i=0;i> 3; + u32 flagbitmask = 1ul << ( nexta & 31 ); + nexta >>= 5; + + // Traverse from start to finish in 8 word blocks, + // and clear the pcm cache flag for each block. + u32 stmp = ( size + 7 ) >> 3; // round up + for( i=0; i (size-SndOutPacketSize) ) comp = size-SndOutPacketSize; + s32 comp; if( timeStretchEnabled ) { @@ -199,8 +197,18 @@ public: eTempo += eTempo * 0.25f; if( eTempo > 7.5f ) eTempo = 5.0f; pSoundTouch->setTempo( eTempo ); - freezeTempo = (comp / SndOutPacketSize) - 1; - if( freezeTempo < 1 ) freezeTempo = 1; + freezeTempo = 0; // disabled tempo freeze for now. May not be needed anymore. + + // Throw out just a little bit (one packet worth) to help + // give the TS some room to work: + + comp = SndOutPacketSize; + } + else + { + // Toss half the buffer plus whatever's being written anew: + s32 comp = GetAlignedBufferSize( (size + nSamples ) / 2 ); + if( comp > (size-SndOutPacketSize) ) comp = size-SndOutPacketSize; } data-=comp; @@ -241,7 +249,7 @@ public: quietSampleCount = 0; if( underrun_freeze ) { - int toFill = (int)(size * ( timeStretchEnabled ? 0.1 : 0.70 ) ); + int toFill = (int)(size * ( timeStretchEnabled ? 0.1 : 0.50 ) ); toFill = GetAlignedBufferSize( toFill ); // toFill is now aligned to a SndOutPacket @@ -599,6 +607,11 @@ s32 SndInit() sndTempBuffer = new s32[SndOutPacketSize]; sndTempBuffer16 = new s16[SndOutPacketSize]; + // clear buffers! + // Fixes loopy sounds on emu resets. + memset( sndTempBuffer, 0, sizeof(s32) * SndOutPacketSize ); + memset( sndTempBuffer16, 0, sizeof(s16) * SndOutPacketSize ); + cTempo = 1.0; eTempo = 1.0; diff --git a/plugins/spu2ghz/spu2.cpp b/plugins/spu2ghz/spu2.cpp index 573523759c..c43b9342fa 100644 --- a/plugins/spu2ghz/spu2.cpp +++ b/plugins/spu2ghz/spu2.cpp @@ -153,13 +153,15 @@ void SysMessage(char *fmt, ...) MessageBox(0, tmp, "SPU2ghz Msg", 0); } -s16 __forceinline * __fastcall GetMemPtr(u32 addr) +__forceinline s16 * __fastcall GetMemPtr(u32 addr) { + // In case you're wondering, this assert is the reason spu2ghz + // runs so incrediously slow in Debug mode. :P assert(addr<0x100000); return (_spu2mem+addr); } -s16 __forceinline __fastcall spu2M_Read( u32 addr ) +__forceinline s16 __fastcall spu2M_Read( u32 addr ) { return *GetMemPtr( addr & 0xfffff ); } @@ -168,7 +170,7 @@ s16 __forceinline __fastcall spu2M_Read( u32 addr ) // Invalidates the ADPCM cache in the process. // Optimization note: don't use __forceinline because the footprint of this // function is a little too heavy now. Better to let the compiler decide. -void __inline __fastcall spu2M_Write( u32 addr, s16 value ) +__inline void __fastcall spu2M_Write( u32 addr, s16 value ) { // Make sure the cache is invalidated: // (note to self : addr address WORDs, not bytes) @@ -182,7 +184,7 @@ void __inline __fastcall spu2M_Write( u32 addr, s16 value ) } // writes an unsigned value to the SPU2 ram -void __inline __fastcall spu2M_Write( u32 addr, u16 value ) +__inline void __fastcall spu2M_Write( u32 addr, u16 value ) { spu2M_Write( addr, (s16)value ); } @@ -620,7 +622,6 @@ void UpdateDebugDialog() } #endif -//SHOULD be 768, but 751/752 seems to get better results #define TickInterval 768 u32 TicksCore=0; @@ -655,10 +656,10 @@ void __fastcall TimeUpdate(u32 cClocks, u32 syncType) // If for some reason our clock value seems way off base, just mix // out a little bit, skip the rest, and hope the ship "rights" itself later on. - if( dClocks > TickInterval*48 ) + if( dClocks > TickInterval*72 ) { ConLog( " * SPU2 > TimeUpdate Sanity Check (Tick Delta: %d) (PS2 Ticks: %d)\n", dClocks/TickInterval, cClocks/TickInterval ); - dClocks = TickInterval*48; + dClocks = TickInterval*72; lClocks = cClocks-dClocks; }