SPU2ghz: Re-optimized the DMA write code so that DMA writes don't incur as much cache miss penalty anymore. Also made a couple minor tweaks to the timestretcher's overrun handler (Which only affects people who like their games to run at 120 fps. ;)

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@264 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
Jake.Stine 2008-11-01 07:34:34 +00:00 committed by Gregory Hainaut
parent 2b93aa0709
commit d8bd81f00d
5 changed files with 80 additions and 30 deletions

View File

@ -90,14 +90,16 @@ void DMALogClose() {
}
u16 DmaRead(u32 core) {
__forceinline u16 DmaRead(u32 core)
{
Cores[core].TDA&=0xfffff;
const u16 ret = (u16)spu2M_Read(Cores[core].TDA);
Cores[core].TDA++;
Cores[core].TDA&=0xfffff;
return ret;
}
void DmaWrite(u32 core, u16 value) {
__forceinline void DmaWrite(u32 core, u16 value)
{
spu2M_Write( Cores[core].TSA, value );
Cores[core].TSA++;
Cores[core].TSA&=0xfffff;
@ -189,14 +191,21 @@ void DoDMAWrite(int core,u16 *pMem,u32 size)
{
u32 i;
u32 pa = ((u32)pMem)&7;
u32 pm = Cores[core].TSA&0x7;
if(pa || pm)
{
printf("* Missaligned addr in DMA write!\n");
// Perform an alignment check.
// Not really important. Everythign should work regardless,
// but it could be indicative of an emulation foopah elsewhere.
uptr pa = ((uptr)pMem)&7;
uptr pm = Cores[core].TSA&0x7;
if(pa || pm)
{
fprintf(stderr, "* SPU2 : Missaligned addr in DMA write!\n");
}
}
if(core==0)
DMA4LogWrite(pMem,size<<1);
else
@ -204,9 +213,36 @@ void DoDMAWrite(int core,u16 *pMem,u32 size)
if(MsgDMA()) ConLog(" * SPU2: DMA%c Transfer of %d bytes to %x (%02x %x %04x).\n",(core==0)?'4':'7',size<<1,Cores[core].TSA,Cores[core].DMABits,Cores[core].AutoDMACtrl,(~Cores[core].Regs.ATTR)&0x7fff);
Cores[core].TDA=Cores[core].TSA;
for (i=0;i<size;i++) {
spu2M_Write( Cores[core].TDA, pMem[i] );
// Optimized!
// Instead of checking the adpcm cache for every word, we check for every block.
// That way we can use the optimized fast write instruction to commit the memory.
Cores[core].TDA = Cores[core].TSA & 0xfffff;
{
u32 nexta = Cores[core].TDA >> 3;
u32 flagbitmask = 1ul << ( nexta & 31 );
nexta >>= 5;
// Traverse from start to finish in 8 word blocks,
// and clear the pcm cache flag for each block.
u32 stmp = ( size + 7 ) >> 3; // round up
for( i=0; i<stmp; i++ )
{
pcm_cache_flags[nexta] &= ~flagbitmask;
flagbitmask <<= 1;
if( flagbitmask == 0 )
{
nexta++;
flagbitmask = 1;
}
}
}
for(i=0;i<size;i++)
{
*GetMemPtr( Cores[core].TDA ) = pMem[i];
//spu2M_Write( Cores[core].TDA, pMem[i] );
Cores[core].TDA++;
Cores[core].TDA&=0xfffff;
}

View File

@ -23,9 +23,9 @@ void DMA4LogWrite(void *lpData, u32 ulSize);
void DMA7LogWrite(void *lpData, u32 ulSize);
void DMALogClose();
void DmaWrite(u32 core, u16 data);
u16 DmaRead(u32 core);
extern void DmaWrite(u32 core, u16 data);
extern u16 DmaRead(u32 core);
void AutoDMAReadBuffer(int core, int mode);
extern void AutoDMAReadBuffer(int core, int mode);
#endif // DMA_H_INCLUDED //

View File

@ -998,7 +998,7 @@ double rfactor=1;
double cfactor=1;
double diff=0;
static s32 __forceinline ApplyVolume(s32 data, s32 volume)
static __forceinline s32 ApplyVolume(s32 data, s32 volume)
{
return (volume * data);
}
@ -1006,7 +1006,7 @@ static s32 __forceinline ApplyVolume(s32 data, s32 volume)
// writes a signed value to the SPU2 ram
// Performs no cache invalidation -- use only for dynamic memory ranges
// of the SPU2 (between 0x0000 and SPU2_DYN_MEMLINE)
static void __forceinline spu2M_WriteFast( u32 addr, s16 value )
static __forceinline void spu2M_WriteFast( u32 addr, s16 value )
{
// throw an assertion if the memory range is invalid:
jASSUME( addr < SPU2_DYN_MEMLINE );
@ -1014,7 +1014,7 @@ static void __forceinline spu2M_WriteFast( u32 addr, s16 value )
}
static void __forceinline MixVoice( V_Core& thiscore, V_Voice& vc, s32& VValL, s32& VValR )
static __forceinline void MixVoice( V_Core& thiscore, V_Voice& vc, s32& VValL, s32& VValR )
{
s32 Value=0;

View File

@ -187,9 +187,7 @@ public:
// Dump samples from the read portion of the buffer instead of dropping
// the newly written stuff.
// Toss half the buffer plus whatever's being written anew:
s32 comp = GetAlignedBufferSize( (size + nSamples ) / 2 );
if( comp > (size-SndOutPacketSize) ) comp = size-SndOutPacketSize;
s32 comp;
if( timeStretchEnabled )
{
@ -199,8 +197,18 @@ public:
eTempo += eTempo * 0.25f;
if( eTempo > 7.5f ) eTempo = 5.0f;
pSoundTouch->setTempo( eTempo );
freezeTempo = (comp / SndOutPacketSize) - 1;
if( freezeTempo < 1 ) freezeTempo = 1;
freezeTempo = 0; // disabled tempo freeze for now. May not be needed anymore.
// Throw out just a little bit (one packet worth) to help
// give the TS some room to work:
comp = SndOutPacketSize;
}
else
{
// Toss half the buffer plus whatever's being written anew:
s32 comp = GetAlignedBufferSize( (size + nSamples ) / 2 );
if( comp > (size-SndOutPacketSize) ) comp = size-SndOutPacketSize;
}
data-=comp;
@ -241,7 +249,7 @@ public:
quietSampleCount = 0;
if( underrun_freeze )
{
int toFill = (int)(size * ( timeStretchEnabled ? 0.1 : 0.70 ) );
int toFill = (int)(size * ( timeStretchEnabled ? 0.1 : 0.50 ) );
toFill = GetAlignedBufferSize( toFill );
// toFill is now aligned to a SndOutPacket
@ -599,6 +607,11 @@ s32 SndInit()
sndTempBuffer = new s32[SndOutPacketSize];
sndTempBuffer16 = new s16[SndOutPacketSize];
// clear buffers!
// Fixes loopy sounds on emu resets.
memset( sndTempBuffer, 0, sizeof(s32) * SndOutPacketSize );
memset( sndTempBuffer16, 0, sizeof(s16) * SndOutPacketSize );
cTempo = 1.0;
eTempo = 1.0;

View File

@ -153,13 +153,15 @@ void SysMessage(char *fmt, ...)
MessageBox(0, tmp, "SPU2ghz Msg", 0);
}
s16 __forceinline * __fastcall GetMemPtr(u32 addr)
__forceinline s16 * __fastcall GetMemPtr(u32 addr)
{
// In case you're wondering, this assert is the reason spu2ghz
// runs so incrediously slow in Debug mode. :P
assert(addr<0x100000);
return (_spu2mem+addr);
}
s16 __forceinline __fastcall spu2M_Read( u32 addr )
__forceinline s16 __fastcall spu2M_Read( u32 addr )
{
return *GetMemPtr( addr & 0xfffff );
}
@ -168,7 +170,7 @@ s16 __forceinline __fastcall spu2M_Read( u32 addr )
// Invalidates the ADPCM cache in the process.
// Optimization note: don't use __forceinline because the footprint of this
// function is a little too heavy now. Better to let the compiler decide.
void __inline __fastcall spu2M_Write( u32 addr, s16 value )
__inline void __fastcall spu2M_Write( u32 addr, s16 value )
{
// Make sure the cache is invalidated:
// (note to self : addr address WORDs, not bytes)
@ -182,7 +184,7 @@ void __inline __fastcall spu2M_Write( u32 addr, s16 value )
}
// writes an unsigned value to the SPU2 ram
void __inline __fastcall spu2M_Write( u32 addr, u16 value )
__inline void __fastcall spu2M_Write( u32 addr, u16 value )
{
spu2M_Write( addr, (s16)value );
}
@ -620,7 +622,6 @@ void UpdateDebugDialog()
}
#endif
//SHOULD be 768, but 751/752 seems to get better results
#define TickInterval 768
u32 TicksCore=0;
@ -655,10 +656,10 @@ void __fastcall TimeUpdate(u32 cClocks, u32 syncType)
// If for some reason our clock value seems way off base, just mix
// out a little bit, skip the rest, and hope the ship "rights" itself later on.
if( dClocks > TickInterval*48 )
if( dClocks > TickInterval*72 )
{
ConLog( " * SPU2 > TimeUpdate Sanity Check (Tick Delta: %d) (PS2 Ticks: %d)\n", dClocks/TickInterval, cClocks/TickInterval );
dClocks = TickInterval*48;
dClocks = TickInterval*72;
lClocks = cClocks-dClocks;
}