mirror of https://github.com/PCSX2/pcsx2.git
SPU2ghz: Third try's the charm? This is an even better yet fix for the crackling sound in SO3. Note: This revision upgrades spu2ghz savestates to 0x101. Old states should still load fine for the most part.
git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@682 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
parent
ef489b45bd
commit
a92fc9900f
|
@ -220,27 +220,6 @@ void DoDMAWrite(int core,u16 *pMem,u32 size)
|
|||
Cores[core].TSA &= 0xfffff;
|
||||
|
||||
u32 buff1end = Cores[core].TSA + size;
|
||||
|
||||
// Pcm Cache Invalidation!
|
||||
// Ideally we would only mask bits actually written to, but it's a complex algorithm
|
||||
// that is way more work than it's worth. Masking out bytes would in theory work a
|
||||
// little more effiently, but was buggy in practice for some reason. So a dumb and
|
||||
// dirty 32-bit mask will suffice.
|
||||
|
||||
// Note: When clearing cache flags, the *endpoint* needs to be rounded upward.
|
||||
// just rounding the count upward could cause problems if both start and end
|
||||
// points are mis-aligned.
|
||||
|
||||
// indexer scalar - 8 addresses per block, and 32 bits per dword:
|
||||
const u32 indexer_scalar = 8*32;
|
||||
|
||||
const int roundUp = indexer_scalar-1;
|
||||
const int flagTSA = Cores[core].TSA / indexer_scalar;
|
||||
int flagTDA = (buff1end + roundUp) / indexer_scalar; // endpoint, rounded up
|
||||
u8* cache = (u8*)pcm_cache_flags;
|
||||
|
||||
memset( &pcm_cache_flags[flagTSA], 0, (flagTDA - flagTSA) * 4 );
|
||||
|
||||
u32 buff2end=0;
|
||||
if( buff1end > 0x100000 )
|
||||
{
|
||||
|
@ -248,6 +227,60 @@ void DoDMAWrite(int core,u16 *pMem,u32 size)
|
|||
buff1end = 0x100000;
|
||||
}
|
||||
|
||||
const int cacheIdxStart = Cores[core].TSA / pcm_WordsPerBlock;
|
||||
const int cacheIdxEnd = (buff1end+pcm_WordsPerBlock-1) / pcm_WordsPerBlock;
|
||||
PcmCacheEntry* cacheLine = &pcm_cache_data[cacheIdxStart];
|
||||
PcmCacheEntry& cacheEnd = pcm_cache_data[cacheIdxEnd];
|
||||
|
||||
do
|
||||
{
|
||||
cacheLine->Validated = false;
|
||||
cacheLine++;
|
||||
} while ( cacheLine != &cacheEnd );
|
||||
|
||||
#if 0
|
||||
// Pcm Cache Invalidation!
|
||||
// It's a requirement that we mask bits for the blocks that are written to *only*,
|
||||
// because doing anything else can cause the cache to fail, thanks to the progressive
|
||||
// nature of the SPU2's ADPCM encoding. (the same thing that makes it impossible
|
||||
// to use SSE optimizations on it).
|
||||
|
||||
u8* cache = (u8*)pcm_cache_flags;
|
||||
|
||||
// Step 1: Clear bits in the front remainder.
|
||||
|
||||
const int pcmTSA = Cores[core].TSA / pcm_WordsPerBlock;
|
||||
const int pcmTDA = buff1end / pcm_WordsPerBlock;
|
||||
const int remFront = pcmTSA & 31;
|
||||
const int remBack = ((buff1end+pcm_WordsPerBlock-1)/pcm_WordsPerBlock) & 31; // round up to get the end remainder
|
||||
|
||||
int flagTSA = pcmTSA / 32;
|
||||
|
||||
if( remFront )
|
||||
{
|
||||
// need to clear some upper bits of this u32
|
||||
uint mask = (1ul<<remFront)-1;
|
||||
cache[flagTSA++] &= mask;
|
||||
}
|
||||
|
||||
// Step 2: Clear the middle run
|
||||
const int flagClearLen = pcmTDA-pcmTSA;
|
||||
memset( &cache[flagTSA], 0, flagClearLen );
|
||||
|
||||
// Step 3: Clear bits in the end remainder.
|
||||
|
||||
if( remBack )
|
||||
{
|
||||
// need to clear some lower bits in this u32
|
||||
uint mask = ~(1ul<<remBack)-1;
|
||||
cache[flagTSA + flagClearLen] &= mask;
|
||||
}
|
||||
#endif
|
||||
|
||||
//ConLog( " * SPU2 : Cache Clear Range! TSA=0x%x, TDA=0x%x (low8=0x%x, high8=0x%x, len=0x%x)\n",
|
||||
// Cores[core].TSA, buff1end, flagTSA, flagTDA, clearLen );
|
||||
|
||||
|
||||
// First Branch needs cleared:
|
||||
// It starts at TSA and goes to buff1end.
|
||||
|
||||
|
|
|
@ -128,7 +128,7 @@ static void __forceinline XA_decode_block(s16* buffer, const s16* block, s32& pr
|
|||
pcm = data>>shift;
|
||||
pcm+=((pred1*prev1)+(pred2*prev2))>>6;
|
||||
if(pcm> 32767) pcm= 32767;
|
||||
if(pcm<-32768) pcm=-32768;
|
||||
else if(pcm<-32768) pcm=-32768;
|
||||
*(buffer++) = pcm;
|
||||
}
|
||||
|
||||
|
@ -140,7 +140,7 @@ static void __forceinline XA_decode_block(s16* buffer, const s16* block, s32& pr
|
|||
pcm2 = data>>shift;
|
||||
pcm2+=((pred1*pcm)+(pred2*prev1))>>6;
|
||||
if(pcm2> 32767) pcm2= 32767;
|
||||
if(pcm2<-32768) pcm2=-32768;
|
||||
else if(pcm2<-32768) pcm2=-32768;
|
||||
*(buffer++) = pcm2;
|
||||
}
|
||||
|
||||
|
@ -203,9 +203,10 @@ static void __forceinline IncrementNextA( const V_Core& thiscore, V_Voice& vc )
|
|||
vc.NextA&=0xFFFFF;
|
||||
}
|
||||
|
||||
|
||||
u32 *pcm_cache_flags = NULL;
|
||||
s16 *pcm_cache_data = NULL;
|
||||
// decoded pcm data, used to cache the decoded data so that it needn't be decoded
|
||||
// multiple times. Cache chunks are decoded when the mixer requests the blocks, and
|
||||
// invalided when DMA transfers and memory writes are performed.
|
||||
PcmCacheEntry *pcm_cache_data = NULL;
|
||||
|
||||
#ifndef PUBLIC
|
||||
int g_counter_cache_hits=0;
|
||||
|
@ -249,16 +250,20 @@ static void __forceinline __fastcall GetNextDataBuffered( V_Core& thiscore, V_Vo
|
|||
|
||||
s16* memptr = GetMemPtr(vc.NextA&0xFFFFF);
|
||||
vc.LoopFlags = *memptr >> 8; // grab loop flags from the upper byte.
|
||||
int nexta = vc.NextA / 8; // 8 words per encoded block.
|
||||
|
||||
vc.SBuffer = &pcm_cache_data[nexta * 28];
|
||||
const int cacheIdx = vc.NextA / pcm_WordsPerBlock;
|
||||
PcmCacheEntry& cacheLine = pcm_cache_data[cacheIdx];
|
||||
vc.SBuffer = cacheLine.Sampledata;
|
||||
|
||||
const u32 flagbitmask = 1ul<<(nexta & 31); // 32 flags per array entry
|
||||
nexta /= 32;
|
||||
|
||||
if( pcm_cache_flags[nexta] & flagbitmask )
|
||||
if( cacheLine.Validated )
|
||||
{
|
||||
// Cached block! Read from the cache directly (ie, do nothing)
|
||||
// Cached block! Read from the cache directly.
|
||||
// Make sure to propagate the prev1/prev2 ADPCM:
|
||||
|
||||
vc.Prev1 = vc.SBuffer[27];
|
||||
vc.Prev2 = vc.SBuffer[26];
|
||||
|
||||
//ConLog( " * SPU2 : Cache Hit! NextA=0x%x, cacheIdx=0x%x\n", vc.NextA, cacheIdx );
|
||||
|
||||
#ifndef PUBLIC
|
||||
g_counter_cache_hits++;
|
||||
|
@ -267,19 +272,20 @@ static void __forceinline __fastcall GetNextDataBuffered( V_Core& thiscore, V_Vo
|
|||
else
|
||||
{
|
||||
// Only flag the cache if it's a non-dynamic memory range.
|
||||
if( nexta >= (SPU2_DYN_MEMLINE / (8*32)) )
|
||||
pcm_cache_flags[nexta] |= flagbitmask;
|
||||
if( vc.NextA >= SPU2_DYN_MEMLINE )
|
||||
cacheLine.Validated = true;
|
||||
|
||||
#ifndef PUBLIC
|
||||
if( nexta < (SPU2_DYN_MEMLINE / (8*32)) )
|
||||
if( vc.NextA < SPU2_DYN_MEMLINE )
|
||||
g_counter_cache_ignores++;
|
||||
else
|
||||
g_counter_cache_misses++;
|
||||
#endif
|
||||
|
||||
// saturated decoder
|
||||
s16* sbuffer = cacheLine.Sampledata;
|
||||
|
||||
XA_decode_block(vc.SBuffer, memptr, vc.Prev1, vc.Prev2);
|
||||
// saturated decoder
|
||||
XA_decode_block( sbuffer, memptr, vc.Prev1, vc.Prev2 );
|
||||
|
||||
// [Air]: Testing use of a new unsaturated decoder. (benchmark needed)
|
||||
// Chances are the saturation isn't needed, but for a very few exception games.
|
||||
|
@ -288,7 +294,6 @@ static void __forceinline __fastcall GetNextDataBuffered( V_Core& thiscore, V_Vo
|
|||
// heavy use of the SPU2 via music or sfx will mostly use the cache anyway.
|
||||
|
||||
//XA_decode_block_unsaturated( vc.SBuffer, memptr, vc.Prev1, vc.Prev2 );
|
||||
|
||||
}
|
||||
|
||||
vc.SCurrent = 0;
|
||||
|
@ -1173,8 +1178,8 @@ void __fastcall Mix()
|
|||
}
|
||||
|
||||
// Commit Core 0 output to ram before mixing Core 1:
|
||||
ExtL>>=13;
|
||||
ExtR>>=13;
|
||||
ExtL>>=14;
|
||||
ExtR>>=14;
|
||||
spu2M_WriteFast( 0x800 + OutPos, ExtL>>3 );
|
||||
spu2M_WriteFast( 0xA00 + OutPos, ExtR>>3 );
|
||||
|
||||
|
|
|
@ -226,11 +226,10 @@ __inline void __fastcall spu2M_Write( u32 addr, s16 value )
|
|||
addr &= 0xfffff;
|
||||
if( addr >= SPU2_DYN_MEMLINE )
|
||||
{
|
||||
const u32 nexta = addr >> 3; // 8 words per encoded block.
|
||||
const u32 flagbitmask = 1ul<<(nexta & 31); // 31 flags per array entry
|
||||
pcm_cache_flags[nexta/32] &= ~flagbitmask;
|
||||
const int cacheIdx = addr / pcm_WordsPerBlock;
|
||||
pcm_cache_data[cacheIdx].Validated = false;
|
||||
|
||||
ConLog( " * SPU2 : PcmCache Block Clear at 0x%x (idx=0x%x, bit=%d)\n", addr, nexta, nexta & 31);
|
||||
ConLog( " * SPU2 : PcmCache Block Clear at 0x%x (cacheIdx=0x%x)\n", addr, cacheIdx);
|
||||
}
|
||||
*GetMemPtr( addr ) = value;
|
||||
}
|
||||
|
@ -327,11 +326,6 @@ void CoreReset(int c)
|
|||
|
||||
extern void LowPassFilterInit();
|
||||
|
||||
// number of cachable ADPCM blocks (any blocks above the SPU2_DYN_MEMLINE)
|
||||
static const int pcm_BlockCount = 0x100000 / 8; // (0x100000-SPU2_DYN_MEMLINE) / 8;
|
||||
|
||||
static const int pcm_DecodedSamplesPerBlock = 28;
|
||||
|
||||
EXPORT_C_(s32) SPU2init()
|
||||
{
|
||||
#define MAKESURE(a,b) \
|
||||
|
@ -374,11 +368,10 @@ EXPORT_C_(s32) SPU2init()
|
|||
// Expanded: 16 bytes expands to 56 bytes [3.5:1 ratio]
|
||||
// Resulting in 2MB * 3.5.
|
||||
|
||||
pcm_cache_flags = (u32*)calloc( pcm_BlockCount / 32, sizeof(u32) );
|
||||
pcm_cache_data = (s16*)calloc( pcm_BlockCount * pcm_DecodedSamplesPerBlock, sizeof(s16) );
|
||||
pcm_cache_data = (PcmCacheEntry*)calloc( pcm_BlockCount, sizeof(PcmCacheEntry) );
|
||||
|
||||
if( (spu2regs == NULL) || (_spu2mem == NULL) ||
|
||||
(pcm_cache_data == NULL) || (pcm_cache_flags == NULL) )
|
||||
(pcm_cache_data == NULL) )
|
||||
{
|
||||
SysMessage("SPU2: Error allocating Memory\n"); return -1;
|
||||
}
|
||||
|
@ -543,12 +536,10 @@ EXPORT_C_(void) SPU2shutdown()
|
|||
SAFE_FREE(spu2regs);
|
||||
SAFE_FREE(_spu2mem);
|
||||
|
||||
SAFE_FREE( pcm_cache_flags );
|
||||
SAFE_FREE( pcm_cache_data );
|
||||
|
||||
spu2regs = NULL;
|
||||
_spu2mem = NULL;
|
||||
pcm_cache_flags = NULL;
|
||||
pcm_cache_data = NULL;
|
||||
|
||||
#ifdef SPU2_LOG
|
||||
|
@ -1705,12 +1696,6 @@ EXPORT_C_(u16) SPU2read(u32 rmem)
|
|||
return ret;
|
||||
}
|
||||
|
||||
struct cacheFreezeData
|
||||
{
|
||||
u32 flags[pcm_BlockCount/32];
|
||||
s16 startData;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// compatibility with zerospu2 removed...
|
||||
|
@ -1734,7 +1719,7 @@ typedef struct
|
|||
|
||||
int lClocks;
|
||||
|
||||
cacheFreezeData cacheData;
|
||||
PcmCacheEntry cacheData;
|
||||
|
||||
} SPU2freezeData;
|
||||
|
||||
|
@ -1747,7 +1732,7 @@ typedef struct
|
|||
// Increment this if changes to V_Core or V_Voice structs are made.
|
||||
// Chances are we'll never explicitly support older save versions,
|
||||
// but might as well version them anyway. Could come in handly someday!
|
||||
#define SAVE_VERSION 0x0100
|
||||
#define SAVE_VERSION 0x0101
|
||||
|
||||
static int getFreezeSize()
|
||||
{
|
||||
|
@ -1759,9 +1744,8 @@ static int getFreezeSize()
|
|||
|
||||
for( int bidx=0; bidx<pcm_BlockCount; bidx++ )
|
||||
{
|
||||
const u32 flagmask = 1ul << (bidx & 31);
|
||||
if( pcm_cache_flags[bidx/32] & flagmask )
|
||||
size += pcm_DecodedSamplesPerBlock*sizeof(s16);
|
||||
if( pcm_cache_data[bidx].Validated )
|
||||
size += pcm_DecodedSamplesPerBlock*sizeof(PcmCacheEntry);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
@ -1769,17 +1753,19 @@ static int getFreezeSize()
|
|||
|
||||
static void wipe_the_cache()
|
||||
{
|
||||
memset( pcm_cache_flags, 0, pcm_BlockCount/32 * sizeof(u32) );
|
||||
memset( pcm_cache_data, 0, pcm_BlockCount * pcm_DecodedSamplesPerBlock * sizeof(s16) );
|
||||
memset( pcm_cache_data, 0, pcm_BlockCount * sizeof(PcmCacheEntry) );
|
||||
}
|
||||
|
||||
|
||||
static s16 old_state_sBuffer[pcm_DecodedSamplesPerBlock] = {0};
|
||||
|
||||
EXPORT_C_(s32) SPU2freeze(int mode, freezeData *data)
|
||||
{
|
||||
if (mode == FREEZE_LOAD)
|
||||
{
|
||||
const SPU2freezeData *spud = (SPU2freezeData*)data->data;
|
||||
|
||||
if( spud->id != SAVE_ID || spud->version != SAVE_VERSION )
|
||||
if( spud->id != SAVE_ID || spud->version < 0x100 )
|
||||
{
|
||||
printf("\n*** SPU2Ghz Warning:\n");
|
||||
printf(" The savestate you are trying to load was not made with this plugin.\n");
|
||||
|
@ -1826,36 +1812,51 @@ EXPORT_C_(s32) SPU2freeze(int mode, freezeData *data)
|
|||
|
||||
// Load the ADPCM cache:
|
||||
|
||||
const cacheFreezeData &cfd = spud->cacheData;
|
||||
const s16* pcmSrc = &cfd.startData;
|
||||
|
||||
memcpy( pcm_cache_flags, cfd.flags, (pcm_BlockCount/32) * sizeof(u32) );
|
||||
wipe_the_cache();
|
||||
if( spud->version == 0x100 ) // don't support 0x100 cache anymore.
|
||||
{
|
||||
printf("\n*** SPU2Ghz Warning:\n");
|
||||
printf("\tSavestate version is from an older version of this plugin.\n");
|
||||
printf("\tAudio may not recover correctly.");
|
||||
|
||||
const PcmCacheEntry* pcmSrc = &spud->cacheData;
|
||||
int blksLoaded=0;
|
||||
|
||||
for( int bidx=0; bidx<pcm_BlockCount; bidx++ )
|
||||
{
|
||||
const u32 flagmask = 1ul << (bidx & 31);
|
||||
if( cfd.flags[bidx/32] & flagmask )
|
||||
if( pcm_cache_data[bidx].Validated )
|
||||
{
|
||||
// load a cache block!
|
||||
memcpy( &pcm_cache_data[bidx*pcm_DecodedSamplesPerBlock],
|
||||
pcmSrc, pcm_DecodedSamplesPerBlock*sizeof(s16) );
|
||||
pcmSrc += pcm_DecodedSamplesPerBlock;
|
||||
memcpy( &pcm_cache_data[bidx], pcmSrc, sizeof(PcmCacheEntry) );
|
||||
pcmSrc++;
|
||||
blksLoaded++;
|
||||
}
|
||||
}
|
||||
|
||||
// Go through the V_Voice structs and replace the SBuffer pointer
|
||||
// with an absolute address into our cache buffer this session.
|
||||
// Go through the V_Voice structs and recalculate SBuffer pointer from
|
||||
// the NextA setting.
|
||||
|
||||
for( int c=0; c<2; c++ )
|
||||
{
|
||||
for( int v=0; v<24; v++ )
|
||||
{
|
||||
Cores[c].Voices[v].SBuffer = (s16*) ((uptr)spud->Cores[c].Voices[v].SBuffer + (uptr)pcm_cache_data );
|
||||
const int cacheIdx = Cores[c].Voices[v].NextA / pcm_WordsPerBlock;
|
||||
Cores[c].Voices[v].SBuffer = pcm_cache_data[cacheIdx].Sampledata;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// We don't support the cache, so make sure the SBuffer pointers
|
||||
// are safe (don't want any GPFs reading bad data)
|
||||
|
||||
for( int c=0; c<2; c++ )
|
||||
{
|
||||
for( int v=0; v<24; v++ )
|
||||
Cores[c].Voices[v].SBuffer = old_state_sBuffer;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//printf( " * SPU2 > FreezeLoad > Loaded %d cache blocks.\n", blksLoaded++ );
|
||||
}
|
||||
|
@ -1905,36 +1906,20 @@ EXPORT_C_(s32) SPU2freeze(int mode, freezeData *data)
|
|||
// decoded blocks currently in use by active voices. This allows
|
||||
// voices to resume seamlessly on load.
|
||||
|
||||
cacheFreezeData &cfd = spud->cacheData;
|
||||
s16* pcmDst = &cfd.startData;
|
||||
|
||||
memcpy( cfd.flags, pcm_cache_flags, sizeof(cfd.flags) );
|
||||
|
||||
PcmCacheEntry* pcmDst = &spud->cacheData;
|
||||
int blksSaved=0;
|
||||
|
||||
for( int bidx=0; bidx<pcm_BlockCount; bidx++ )
|
||||
{
|
||||
const u32 flagmask = 1ul << (bidx & 31);
|
||||
if( cfd.flags[bidx/32] & flagmask )
|
||||
if( pcm_cache_data[bidx].Validated )
|
||||
{
|
||||
// save a cache block!
|
||||
memcpy( pcmDst, &pcm_cache_data[bidx*pcm_DecodedSamplesPerBlock],
|
||||
pcm_DecodedSamplesPerBlock*sizeof(s16) );
|
||||
pcmDst += pcm_DecodedSamplesPerBlock;
|
||||
memcpy( pcmDst, &pcm_cache_data[bidx], sizeof(PcmCacheEntry) );
|
||||
pcmDst++;
|
||||
blksSaved++;
|
||||
}
|
||||
}
|
||||
|
||||
// Time to go through the V_Voice structs and replace the SBuffer pointer
|
||||
// with a relative address that can be applied later on when the state is loaded.
|
||||
|
||||
for( int c=0; c<2; c++ )
|
||||
{
|
||||
for( int v=0; v<24; v++ )
|
||||
{
|
||||
spud->Cores[c].Voices[v].SBuffer =
|
||||
(s16*) ((uptr)spud->Cores[c].Voices[v].SBuffer - (uptr)pcm_cache_data );
|
||||
}
|
||||
}
|
||||
//printf( " * SPU2 > FreezeSave > Saved %d cache blocks.\n", blksSaved++ );
|
||||
|
||||
}
|
||||
|
|
|
@ -138,22 +138,35 @@ default: \
|
|||
# define SAFE_RELEASE(p) { if(p) { (p)->Release(); (p)=NULL; } }
|
||||
#endif
|
||||
|
||||
// The SPU2 has a dynamic memory range which is used for several internal operations, such as
|
||||
// registers, CORE 1/2 mixing, AutoDMAs, and some other fancy stuff. We exclude this range
|
||||
// from the cache here:
|
||||
static const s32 SPU2_DYN_MEMLINE = 0x2800;
|
||||
|
||||
// 8 short words per encoded PCM block. (as stored in SPU2 ram)
|
||||
static const int pcm_WordsPerBlock = 8;
|
||||
|
||||
// number of cachable ADPCM blocks (any blocks above the SPU2_DYN_MEMLINE)
|
||||
static const int pcm_BlockCount = 0x100000 / pcm_WordsPerBlock;
|
||||
|
||||
// 28 samples per decoded PCM block (as stored in our cache)
|
||||
static const int pcm_DecodedSamplesPerBlock = 28;
|
||||
|
||||
struct PcmCacheEntry
|
||||
{
|
||||
bool Validated;
|
||||
s16 Sampledata[pcm_DecodedSamplesPerBlock];
|
||||
};
|
||||
|
||||
extern void spdif_set51(u32 is_5_1_out);
|
||||
extern u32 spdif_init();
|
||||
extern void spdif_shutdown();
|
||||
extern void spdif_get_samples(s32 *samples); // fills the buffer with [l,r,c,lfe,sl,sr] if using 5.1 output, or [l,r] if using stereo
|
||||
|
||||
// The SPU2 has a dynamic memory range which is used for several internal operations, such as
|
||||
// registers, CORE 1/2 mixing, AutoDMAs, and some other fancy stuff. We exclude this range
|
||||
// from the cache here:
|
||||
static const s32 SPU2_DYN_MEMLINE = 0x2800;
|
||||
|
||||
extern short *spu2regs;
|
||||
extern short *_spu2mem;
|
||||
|
||||
extern u32 *pcm_cache_flags;
|
||||
extern s16 *pcm_cache_data;
|
||||
extern PcmCacheEntry* pcm_cache_data;
|
||||
|
||||
extern s16 __forceinline * __fastcall GetMemPtr(u32 addr);
|
||||
extern s16 __forceinline __fastcall spu2M_Read( u32 addr );
|
||||
|
|
Loading…
Reference in New Issue