SPU2ghz: Fixed some problems with the PCM cache that caused crackling sound in Star Ocean 3.

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@678 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
Jake.Stine 2009-02-01 20:43:12 +00:00 committed by Gregory Hainaut
parent 60c3fe80f5
commit 6ec4518d35
5 changed files with 94 additions and 83 deletions

View File

@ -1,25 +1,22 @@

Microsoft Visual Studio Solution File, Format Version 10.00
# Visual Studio 2008
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SPU2ghz_vs2008", "spu2ghz\SPU2ghz_vs2008.vcproj", "{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "SPU2ghz-Pg", "spu2ghz\SPU2ghz_vs2008.vcproj", "{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug SSE2|Win32 = Debug SSE2|Win32
Debug|Win32 = Debug|Win32
DebugFast|Win32 = DebugFast|Win32
Public Release|Win32 = Public Release|Win32
DebugStrict|Win32 = DebugStrict|Win32
Devel|Win32 = Devel|Win32
Release|Win32 = Release|Win32
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Debug SSE2|Win32.ActiveCfg = Debug SSE2|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Debug SSE2|Win32.Build.0 = Debug SSE2|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Debug|Win32.ActiveCfg = Debug|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Debug|Win32.Build.0 = Debug|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.DebugFast|Win32.ActiveCfg = DebugFast|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.DebugFast|Win32.Build.0 = DebugFast|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Public Release|Win32.ActiveCfg = Public Release|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Public Release|Win32.Build.0 = Public Release|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.DebugStrict|Win32.ActiveCfg = DebugStrict|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.DebugStrict|Win32.Build.0 = DebugStrict|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Devel|Win32.ActiveCfg = Devel|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Devel|Win32.Build.0 = Devel|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Release|Win32.ActiveCfg = Release|Win32
{5307BBB7-EBB9-4AA4-8CB6-A94EC473C8C4}.Release|Win32.Build.0 = Release|Win32
EndGlobalSection

View File

@ -224,10 +224,10 @@ void DoDMAWrite(int core,u16 *pMem,u32 size)
u32 buff1end = Cores[core].TSA + size;
u32 buff2end=0;
if( buff1end > 0xfffff )
if( buff1end > 0x100000 )
{
buff2end = buff1end - 0xfffff;
buff1end = 0xfffff;
buff2end = buff1end - 0x100000;
buff1end = 0x100000;
}
// Ideally we would only mask bits actually written to, but it's a complex algorithm
@ -247,9 +247,12 @@ void DoDMAWrite(int core,u16 *pMem,u32 size)
// just rounding the count upward could cause problems if both start and end
// points are mis-aligned.
const u32 roundUp = (1<<(3+3))-1;
const u32 flagTSA = Cores[core].TSA >> (3+3);
const u32 flagTDA = (buff1end + roundUp) >> (3 + 3); // endpoint, rounded up
// indexer scalar - 8 addresses per block, and 8 bits per byte:
const u32 indexer_scalar = 8*8;
const u32 roundUp = indexer_scalar-1;
const u32 flagTSA = Cores[core].TSA / indexer_scalar;
const u32 flagTDA = (buff1end + roundUp) / indexer_scalar; // endpoint, rounded up
u8* cache = (u8*)pcm_cache_flags;
memset( &cache[flagTSA], 0, flagTDA - flagTSA );
@ -259,9 +262,13 @@ void DoDMAWrite(int core,u16 *pMem,u32 size)
// second branch needs cleared:
// It starts at the beginning of memory and moves forward to buff2end
const u32 endpt2 = (buff2end + roundUp) >> (3+3);
// endpoint cache should be irrelevant, since it's almost certainly dynamic
// memory (registers and such)
//const u32 endpt2 = (buff2end + roundUp) / indexer_scalar;
//memset( pcm_cache_flags, 0, endpt2 );
memcpy( GetMemPtr( 0 ), &pMem[buff1size], buff2end*2 );
memset( pcm_cache_flags, 0, endpt2 );
Cores[core].TDA = buff2end;
@ -315,8 +322,8 @@ void SPU2readDMA(int core, u16* pMem, u32 size)
for (i=0;i<size;i++)
pMem[i]=DmaRead(core);
i=Cores[core].TSA;
Cores[core].TDA=Cores[core].TSA+size+0x1f;
Cores[core].TSA=Cores[core].TDA&0xFFFFF;
Cores[core].TDA=Cores[core].TSA+size+0x20;
Cores[core].TSA=(Cores[core].TSA+size)&0xFFFFF;
if((Cores[core].TDA>0xFFFFF)||((Cores[core].TSA<=Cores[core].IRQA)&&(i>=Cores[core].IRQA))) {
if(Cores[core].IRQEnable)
{

View File

@ -51,7 +51,6 @@ double srate_pv=1.0;
extern u32 PsxRates[160];
static const s32 ADSR_MAX_VOL = 0x7fffffff;
static const s32 SPU2_DYN_MEMLINE = 0x3600;
// Performs a 64-bit multiplication between two values and returns the
// high 32 bits as a result (discarding the fractional 32 bits).
@ -185,7 +184,8 @@ static void __forceinline XA_decode_block_unsaturated(s16* buffer, const s16* bl
static void __forceinline IncrementNextA( const V_Core& thiscore, V_Voice& vc )
{
if((vc.NextA==thiscore.IRQA)&&(thiscore.IRQEnable)) {
if((vc.NextA==thiscore.IRQA)&&(thiscore.IRQEnable))
{
#ifndef PUBLIC
ConLog(" * SPU2: IRQ Called (IRQ passed).\n");
#endif
@ -198,8 +198,8 @@ static void __forceinline IncrementNextA( const V_Core& thiscore, V_Voice& vc )
}
u32 *pcm_cache_flags=NULL;
s16 *pcm_cache_data=NULL;
u32 *pcm_cache_flags = NULL;
s16 *pcm_cache_data = NULL;
#ifndef PUBLIC
int g_counter_cache_hits=0;
@ -283,16 +283,11 @@ static void __forceinline __fastcall GetNextDataBuffered( V_Core& thiscore, V_Vo
//XA_decode_block_unsaturated( vc.SBuffer, memptr, vc.Prev1, vc.Prev2 );
//vc.LoopEnd = (data>> 8)&1;
//vc.Loop = (data>> 9)&1;
//vc.LoopStart= (data>>10)&1;
}
vc.SCurrent = 0;
if( (vc.LoopFlags & XAFLAG_LOOP_START) && !vc.LoopMode )
{
vc.LoopStartA=vc.NextA;
}
vc.LoopStartA = vc.NextA;
// [Air] : Increment will get called below (change made to avoid needless code cache clutter)
//IncrementNextA( thiscore, vc );

View File

@ -114,7 +114,7 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL,DWORD dwReason,LPVOID lpvReserved)
return TRUE;
}
void SysMessage(char *fmt, ...)
void SysMessage(const char *fmt, ...)
{
va_list list;
char tmp[512];
@ -226,7 +226,7 @@ __inline void __fastcall spu2M_Write( u32 addr, s16 value )
addr &= 0xfffff;
const u32 nexta = addr >> 3; // 8 words per encoded block.
const u32 flagbitmask = 1ul<<(nexta & 31); // 31 flags per array entry
pcm_cache_flags[nexta>>5] &= ~flagbitmask;
pcm_cache_flags[nexta/32] &= ~flagbitmask;
*GetMemPtr( addr ) = value;
}
@ -323,6 +323,11 @@ void CoreReset(int c)
extern void LowPassFilterInit();
// number of cachable ADPCM blocks (any blocks above the SPU2_DYN_MEMLINE)
static const int pcm_BlockCount = 0x100000 / 8; // (0x100000-SPU2_DYN_MEMLINE) / 8;
static const int pcm_DecodedSamplesPerBlock = 28;
EXPORT_C_(s32) SPU2init()
{
#define MAKESURE(a,b) \
@ -365,8 +370,8 @@ EXPORT_C_(s32) SPU2init()
// Expanded: 16 bytes expands to 56 bytes [3.5:1 ratio]
// Resulting in 2MB * 3.5.
pcm_cache_flags = (u32*)calloc( 0x200000 / (16*32), 4 );
pcm_cache_data = (s16*)calloc( (0x200000 / 16) * 28, 2 );
pcm_cache_flags = (u32*)calloc( pcm_BlockCount / 32, sizeof(u32) );
pcm_cache_data = (s16*)calloc( pcm_BlockCount * pcm_DecodedSamplesPerBlock, sizeof(s16) );
if( (spu2regs == NULL) || (_spu2mem == NULL) ||
(pcm_cache_data == NULL) || (pcm_cache_flags == NULL) )
@ -531,11 +536,11 @@ EXPORT_C_(void) SPU2shutdown()
spu2init = false;
free(spu2regs);
free(_spu2mem);
SAFE_FREE(spu2regs);
SAFE_FREE(_spu2mem);
free( pcm_cache_flags );
free( pcm_cache_data );
SAFE_FREE( pcm_cache_flags );
SAFE_FREE( pcm_cache_data );
spu2regs = NULL;
_spu2mem = NULL;
@ -1696,11 +1701,9 @@ EXPORT_C_(u16) SPU2read(u32 rmem)
return ret;
}
#define PCM_CACHE_BLOCK_COUNT ( 0x200000 / 16 )
struct cacheFreezeData
{
u32 flags[PCM_CACHE_BLOCK_COUNT/32];
u32 flags[pcm_BlockCount/32];
s16 startData;
};
@ -1750,18 +1753,22 @@ static int getFreezeSize()
// calculate the amount of memory consumed by our cache:
for( int bidx=0; bidx<PCM_CACHE_BLOCK_COUNT; bidx++ )
for( int bidx=0; bidx<pcm_BlockCount; bidx++ )
{
const u32 flagmask = 1ul << (bidx & 31);
if( pcm_cache_flags[bidx>>5] & flagmask )
{
size += 28*2;
}
if( pcm_cache_flags[bidx/32] & flagmask )
size += pcm_DecodedSamplesPerBlock*sizeof(s16);
}
return size;
}
static void wipe_the_cache()
{
memset( pcm_cache_flags, 0, pcm_BlockCount/32 * sizeof(u32) );
memset( pcm_cache_data, 0, pcm_BlockCount * pcm_DecodedSamplesPerBlock * sizeof(s16) );
}
EXPORT_C_(s32) SPU2freeze(int mode, freezeData *data)
{
if (mode == FREEZE_LOAD)
@ -1790,8 +1797,7 @@ EXPORT_C_(s32) SPU2freeze(int mode, freezeData *data)
// adpcm cache : Clear all the cache flags and buffers.
memset( pcm_cache_flags, 0, (0x200000 / (16*32)) * 4 );
memset( pcm_cache_data, 0, (0x200000 / 16) * 28 * 2 );
wipe_the_cache();
}
else
{
@ -1803,34 +1809,35 @@ EXPORT_C_(s32) SPU2freeze(int mode, freezeData *data)
memcpy(Cores, spud->Cores, sizeof(Cores));
memcpy(&Spdif, &spud->Spdif, sizeof(Spdif));
OutPos=spud->OutPos;
InputPos=spud->InputPos;
InpBuff=spud->InpBuff;
Cycles=spud->Cycles;
uTicks=spud->uTicks;
srate_pv=spud->srate_pv;
opitch=spud->opitch;
osps=spud->osps;
PlayMode=spud->PlayMode;
lClocks = spud->lClocks;
OutPos = spud->OutPos;
InputPos = spud->InputPos;
InpBuff = spud->InpBuff;
Cycles = spud->Cycles;
uTicks = spud->uTicks;
srate_pv = spud->srate_pv;
opitch = spud->opitch;
osps = spud->osps;
PlayMode = spud->PlayMode;
lClocks = spud->lClocks;
// Load the ADPCM cache:
const cacheFreezeData &cfd = spud->cacheData;
const s16* pcmSrc = &cfd.startData;
memcpy( pcm_cache_flags, cfd.flags, PCM_CACHE_BLOCK_COUNT / 8 );
memcpy( pcm_cache_flags, cfd.flags, (pcm_BlockCount/32) * sizeof(u32) );
int blksLoaded=0;
for( int bidx=0; bidx<PCM_CACHE_BLOCK_COUNT; bidx++ )
for( int bidx=0; bidx<pcm_BlockCount; bidx++ )
{
const u32 flagmask = 1ul << (bidx & 31);
if( cfd.flags[bidx>>5] & flagmask )
if( cfd.flags[bidx/32] & flagmask )
{
// load a cache block!
memcpy( &pcm_cache_data[bidx*28], pcmSrc, 28*2 );
pcmSrc += 28;
memcpy( &pcm_cache_data[bidx*pcm_DecodedSamplesPerBlock],
pcmSrc, pcm_DecodedSamplesPerBlock*sizeof(s16) );
pcmSrc += pcm_DecodedSamplesPerBlock;
blksLoaded++;
}
}
@ -1842,7 +1849,7 @@ EXPORT_C_(s32) SPU2freeze(int mode, freezeData *data)
{
for( int v=0; v<24; v++ )
{
Cores[c].Voices[v].SBuffer = (s16*) ((u64)spud->Cores[c].Voices[v].SBuffer + (u64)pcm_cache_data );
Cores[c].Voices[v].SBuffer = (s16*) ((uptr)spud->Cores[c].Voices[v].SBuffer + (uptr)pcm_cache_data );
}
}
@ -1873,16 +1880,16 @@ EXPORT_C_(s32) SPU2freeze(int mode, freezeData *data)
memcpy(spud->mem, _spu2mem, 0x200000);
memcpy(spud->Cores, Cores, sizeof(Cores));
memcpy(&spud->Spdif, &Spdif, sizeof(Spdif));
spud->OutPos=OutPos;
spud->InputPos=InputPos;
spud->InpBuff=InpBuff;
spud->Cycles=Cycles;
spud->uTicks=uTicks;
spud->srate_pv=srate_pv;
spud->opitch=opitch;
spud->osps=osps;
spud->PlayMode=PlayMode;
spud->lClocks = lClocks;
spud->OutPos = OutPos;
spud->InputPos = InputPos;
spud->InpBuff = InpBuff;
spud->Cycles = Cycles;
spud->uTicks = uTicks;
spud->srate_pv = srate_pv;
spud->opitch = opitch;
spud->osps = osps;
spud->PlayMode = PlayMode;
spud->lClocks = lClocks;
// Save our cache:
// We could just force the user to rebuild the cache when loading
@ -1900,14 +1907,15 @@ EXPORT_C_(s32) SPU2freeze(int mode, freezeData *data)
memcpy( cfd.flags, pcm_cache_flags, sizeof(cfd.flags) );
int blksSaved=0;
for( int bidx=0; bidx<PCM_CACHE_BLOCK_COUNT; bidx++ )
for( int bidx=0; bidx<pcm_BlockCount; bidx++ )
{
const u32 flagmask = 1ul << (bidx & 31);
if( cfd.flags[bidx>>5] & flagmask )
if( cfd.flags[bidx/32] & flagmask )
{
// save a cache block!
memcpy( pcmDst, &pcm_cache_data[bidx*28], 28*2 );
pcmDst += 28;
memcpy( pcmDst, &pcm_cache_data[bidx*pcm_DecodedSamplesPerBlock],
pcm_DecodedSamplesPerBlock*sizeof(s16) );
pcmDst += pcm_DecodedSamplesPerBlock;
blksSaved++;
}
}
@ -1920,7 +1928,7 @@ EXPORT_C_(s32) SPU2freeze(int mode, freezeData *data)
for( int v=0; v<24; v++ )
{
spud->Cores[c].Voices[v].SBuffer =
(s16*) ((u64)spud->Cores[c].Voices[v].SBuffer - (u64)pcm_cache_data );
(s16*) ((uptr)spud->Cores[c].Voices[v].SBuffer - (uptr)pcm_cache_data );
}
}
//printf( " * SPU2 > FreezeSave > Saved %d cache blocks.\n", blksSaved++ );

View File

@ -125,6 +125,9 @@ default: \
//--------------------------------------------------------------------------------------
// Helper macros
//--------------------------------------------------------------------------------------
#ifndef SAFE_FREE
# define SAFE_FREE(p) { if(p) { free(p); (p)=NULL; } }
#endif
#ifndef SAFE_DELETE_ARRAY
# define SAFE_DELETE_ARRAY(p) { if(p) { delete[] (p); (p)=NULL; } }
#endif
@ -141,6 +144,10 @@ extern u32 spdif_init();
extern void spdif_shutdown();
extern void spdif_get_samples(s32 *samples); // fills the buffer with [l,r,c,lfe,sl,sr] if using 5.1 output, or [l,r] if using stereo
// The SPU2 has a dynamic memory range which is used for several internal operations, such as
// registers, CORE 1/2 mixing, AutoDMAs, and some other fancy stuff. We exclude this range
// from the cache here:
static const s32 SPU2_DYN_MEMLINE = 0x6000;
extern short *spu2regs;
extern short *_spu2mem;
@ -156,10 +163,7 @@ extern void __inline __fastcall spu2M_Write( u32 addr, u16 value );
#define spu2Rs16(mmem) (*(s16 *)((s8 *)spu2regs + ((mmem) & 0x1fff)))
#define spu2Ru16(mmem) (*(u16 *)((s8 *)spu2regs + ((mmem) & 0x1fff)))
//#define spu2Ms16(mmem) (*GetMemPtr((mmem) & 0xfffff))
//#define spu2Mu16(mmem) (*(u16*)GetMemPtr((mmem) & 0xfffff))
void SysMessage(char *fmt, ...);
void SysMessage(const char *fmt, ...);
extern void VoiceStart(int core,int vc);
extern void VoiceStop(int core,int vc);