mirror of https://github.com/PCSX2/pcsx2.git
SPU2ghz: Re-optimized the DMA write code so that DMA writes don't incur as much cache miss penalty anymore. Also made a couple minor tweaks to the timestretcher's overrun handler (Which only affects people who like their games to run at 120 fps. ;)
git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@264 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
parent
2b93aa0709
commit
d8bd81f00d
|
@ -90,14 +90,16 @@ void DMALogClose() {
|
|||
}
|
||||
|
||||
|
||||
u16 DmaRead(u32 core) {
|
||||
__forceinline u16 DmaRead(u32 core)
|
||||
{
|
||||
Cores[core].TDA&=0xfffff;
|
||||
const u16 ret = (u16)spu2M_Read(Cores[core].TDA);
|
||||
Cores[core].TDA++;
|
||||
Cores[core].TDA&=0xfffff;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void DmaWrite(u32 core, u16 value) {
|
||||
__forceinline void DmaWrite(u32 core, u16 value)
|
||||
{
|
||||
spu2M_Write( Cores[core].TSA, value );
|
||||
Cores[core].TSA++;
|
||||
Cores[core].TSA&=0xfffff;
|
||||
|
@ -189,14 +191,21 @@ void DoDMAWrite(int core,u16 *pMem,u32 size)
|
|||
{
|
||||
u32 i;
|
||||
|
||||
u32 pa = ((u32)pMem)&7;
|
||||
u32 pm = Cores[core].TSA&0x7;
|
||||
|
||||
if(pa || pm)
|
||||
{
|
||||
printf("* Missaligned addr in DMA write!\n");
|
||||
// Perform an alignment check.
|
||||
// Not really important. Everythign should work regardless,
|
||||
// but it could be indicative of an emulation foopah elsewhere.
|
||||
|
||||
uptr pa = ((uptr)pMem)&7;
|
||||
uptr pm = Cores[core].TSA&0x7;
|
||||
|
||||
if(pa || pm)
|
||||
{
|
||||
fprintf(stderr, "* SPU2 : Missaligned addr in DMA write!\n");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if(core==0)
|
||||
DMA4LogWrite(pMem,size<<1);
|
||||
else
|
||||
|
@ -204,9 +213,36 @@ void DoDMAWrite(int core,u16 *pMem,u32 size)
|
|||
|
||||
if(MsgDMA()) ConLog(" * SPU2: DMA%c Transfer of %d bytes to %x (%02x %x %04x).\n",(core==0)?'4':'7',size<<1,Cores[core].TSA,Cores[core].DMABits,Cores[core].AutoDMACtrl,(~Cores[core].Regs.ATTR)&0x7fff);
|
||||
|
||||
Cores[core].TDA=Cores[core].TSA;
|
||||
for (i=0;i<size;i++) {
|
||||
spu2M_Write( Cores[core].TDA, pMem[i] );
|
||||
// Optimized!
|
||||
// Instead of checking the adpcm cache for every word, we check for every block.
|
||||
// That way we can use the optimized fast write instruction to commit the memory.
|
||||
|
||||
Cores[core].TDA = Cores[core].TSA & 0xfffff;
|
||||
|
||||
{
|
||||
u32 nexta = Cores[core].TDA >> 3;
|
||||
u32 flagbitmask = 1ul << ( nexta & 31 );
|
||||
nexta >>= 5;
|
||||
|
||||
// Traverse from start to finish in 8 word blocks,
|
||||
// and clear the pcm cache flag for each block.
|
||||
u32 stmp = ( size + 7 ) >> 3; // round up
|
||||
for( i=0; i<stmp; i++ )
|
||||
{
|
||||
pcm_cache_flags[nexta] &= ~flagbitmask;
|
||||
flagbitmask <<= 1;
|
||||
if( flagbitmask == 0 )
|
||||
{
|
||||
nexta++;
|
||||
flagbitmask = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(i=0;i<size;i++)
|
||||
{
|
||||
*GetMemPtr( Cores[core].TDA ) = pMem[i];
|
||||
//spu2M_Write( Cores[core].TDA, pMem[i] );
|
||||
Cores[core].TDA++;
|
||||
Cores[core].TDA&=0xfffff;
|
||||
}
|
||||
|
|
|
@ -23,9 +23,9 @@ void DMA4LogWrite(void *lpData, u32 ulSize);
|
|||
void DMA7LogWrite(void *lpData, u32 ulSize);
|
||||
void DMALogClose();
|
||||
|
||||
void DmaWrite(u32 core, u16 data);
|
||||
u16 DmaRead(u32 core);
|
||||
extern void DmaWrite(u32 core, u16 data);
|
||||
extern u16 DmaRead(u32 core);
|
||||
|
||||
void AutoDMAReadBuffer(int core, int mode);
|
||||
extern void AutoDMAReadBuffer(int core, int mode);
|
||||
|
||||
#endif // DMA_H_INCLUDED //
|
|
@ -998,7 +998,7 @@ double rfactor=1;
|
|||
double cfactor=1;
|
||||
double diff=0;
|
||||
|
||||
static s32 __forceinline ApplyVolume(s32 data, s32 volume)
|
||||
static __forceinline s32 ApplyVolume(s32 data, s32 volume)
|
||||
{
|
||||
return (volume * data);
|
||||
}
|
||||
|
@ -1006,7 +1006,7 @@ static s32 __forceinline ApplyVolume(s32 data, s32 volume)
|
|||
// writes a signed value to the SPU2 ram
|
||||
// Performs no cache invalidation -- use only for dynamic memory ranges
|
||||
// of the SPU2 (between 0x0000 and SPU2_DYN_MEMLINE)
|
||||
static void __forceinline spu2M_WriteFast( u32 addr, s16 value )
|
||||
static __forceinline void spu2M_WriteFast( u32 addr, s16 value )
|
||||
{
|
||||
// throw an assertion if the memory range is invalid:
|
||||
jASSUME( addr < SPU2_DYN_MEMLINE );
|
||||
|
@ -1014,7 +1014,7 @@ static void __forceinline spu2M_WriteFast( u32 addr, s16 value )
|
|||
}
|
||||
|
||||
|
||||
static void __forceinline MixVoice( V_Core& thiscore, V_Voice& vc, s32& VValL, s32& VValR )
|
||||
static __forceinline void MixVoice( V_Core& thiscore, V_Voice& vc, s32& VValL, s32& VValR )
|
||||
{
|
||||
s32 Value=0;
|
||||
|
||||
|
|
|
@ -187,9 +187,7 @@ public:
|
|||
// Dump samples from the read portion of the buffer instead of dropping
|
||||
// the newly written stuff.
|
||||
|
||||
// Toss half the buffer plus whatever's being written anew:
|
||||
s32 comp = GetAlignedBufferSize( (size + nSamples ) / 2 );
|
||||
if( comp > (size-SndOutPacketSize) ) comp = size-SndOutPacketSize;
|
||||
s32 comp;
|
||||
|
||||
if( timeStretchEnabled )
|
||||
{
|
||||
|
@ -199,8 +197,18 @@ public:
|
|||
eTempo += eTempo * 0.25f;
|
||||
if( eTempo > 7.5f ) eTempo = 5.0f;
|
||||
pSoundTouch->setTempo( eTempo );
|
||||
freezeTempo = (comp / SndOutPacketSize) - 1;
|
||||
if( freezeTempo < 1 ) freezeTempo = 1;
|
||||
freezeTempo = 0; // disabled tempo freeze for now. May not be needed anymore.
|
||||
|
||||
// Throw out just a little bit (one packet worth) to help
|
||||
// give the TS some room to work:
|
||||
|
||||
comp = SndOutPacketSize;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Toss half the buffer plus whatever's being written anew:
|
||||
s32 comp = GetAlignedBufferSize( (size + nSamples ) / 2 );
|
||||
if( comp > (size-SndOutPacketSize) ) comp = size-SndOutPacketSize;
|
||||
}
|
||||
|
||||
data-=comp;
|
||||
|
@ -241,7 +249,7 @@ public:
|
|||
quietSampleCount = 0;
|
||||
if( underrun_freeze )
|
||||
{
|
||||
int toFill = (int)(size * ( timeStretchEnabled ? 0.1 : 0.70 ) );
|
||||
int toFill = (int)(size * ( timeStretchEnabled ? 0.1 : 0.50 ) );
|
||||
toFill = GetAlignedBufferSize( toFill );
|
||||
|
||||
// toFill is now aligned to a SndOutPacket
|
||||
|
@ -599,6 +607,11 @@ s32 SndInit()
|
|||
sndTempBuffer = new s32[SndOutPacketSize];
|
||||
sndTempBuffer16 = new s16[SndOutPacketSize];
|
||||
|
||||
// clear buffers!
|
||||
// Fixes loopy sounds on emu resets.
|
||||
memset( sndTempBuffer, 0, sizeof(s32) * SndOutPacketSize );
|
||||
memset( sndTempBuffer16, 0, sizeof(s16) * SndOutPacketSize );
|
||||
|
||||
cTempo = 1.0;
|
||||
eTempo = 1.0;
|
||||
|
||||
|
|
|
@ -153,13 +153,15 @@ void SysMessage(char *fmt, ...)
|
|||
MessageBox(0, tmp, "SPU2ghz Msg", 0);
|
||||
}
|
||||
|
||||
s16 __forceinline * __fastcall GetMemPtr(u32 addr)
|
||||
__forceinline s16 * __fastcall GetMemPtr(u32 addr)
|
||||
{
|
||||
// In case you're wondering, this assert is the reason spu2ghz
|
||||
// runs so incrediously slow in Debug mode. :P
|
||||
assert(addr<0x100000);
|
||||
return (_spu2mem+addr);
|
||||
}
|
||||
|
||||
s16 __forceinline __fastcall spu2M_Read( u32 addr )
|
||||
__forceinline s16 __fastcall spu2M_Read( u32 addr )
|
||||
{
|
||||
return *GetMemPtr( addr & 0xfffff );
|
||||
}
|
||||
|
@ -168,7 +170,7 @@ s16 __forceinline __fastcall spu2M_Read( u32 addr )
|
|||
// Invalidates the ADPCM cache in the process.
|
||||
// Optimization note: don't use __forceinline because the footprint of this
|
||||
// function is a little too heavy now. Better to let the compiler decide.
|
||||
void __inline __fastcall spu2M_Write( u32 addr, s16 value )
|
||||
__inline void __fastcall spu2M_Write( u32 addr, s16 value )
|
||||
{
|
||||
// Make sure the cache is invalidated:
|
||||
// (note to self : addr address WORDs, not bytes)
|
||||
|
@ -182,7 +184,7 @@ void __inline __fastcall spu2M_Write( u32 addr, s16 value )
|
|||
}
|
||||
|
||||
// writes an unsigned value to the SPU2 ram
|
||||
void __inline __fastcall spu2M_Write( u32 addr, u16 value )
|
||||
__inline void __fastcall spu2M_Write( u32 addr, u16 value )
|
||||
{
|
||||
spu2M_Write( addr, (s16)value );
|
||||
}
|
||||
|
@ -620,7 +622,6 @@ void UpdateDebugDialog()
|
|||
}
|
||||
#endif
|
||||
|
||||
//SHOULD be 768, but 751/752 seems to get better results
|
||||
#define TickInterval 768
|
||||
|
||||
u32 TicksCore=0;
|
||||
|
@ -655,10 +656,10 @@ void __fastcall TimeUpdate(u32 cClocks, u32 syncType)
|
|||
// If for some reason our clock value seems way off base, just mix
|
||||
// out a little bit, skip the rest, and hope the ship "rights" itself later on.
|
||||
|
||||
if( dClocks > TickInterval*48 )
|
||||
if( dClocks > TickInterval*72 )
|
||||
{
|
||||
ConLog( " * SPU2 > TimeUpdate Sanity Check (Tick Delta: %d) (PS2 Ticks: %d)\n", dClocks/TickInterval, cClocks/TickInterval );
|
||||
dClocks = TickInterval*48;
|
||||
dClocks = TickInterval*72;
|
||||
lClocks = cClocks-dClocks;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue