SPU2Ghz: Updated to include Jake.Stine's latest work. Some issues still exist, but will be fixed soon.

Also made the speedlimiter switch toggle timestretch for easier debugging and convenience :)

git-svn-id: http://pcsx2-playground.googlecode.com/svn/trunk@227 a6443dda-0b58-4228-96e9-037be469359c
This commit is contained in:
ramapcsx2 2008-10-22 16:39:31 +00:00 committed by Gregory Hainaut
parent e4b3c6e649
commit ee9dcd4130
8 changed files with 894 additions and 391 deletions

View File

@ -21,7 +21,6 @@
extern FILE *spu2Log;
extern int Log;
void FileLog(const char *fmt, ...);
void ConLog(const char *fmt, ...);
@ -34,4 +33,4 @@ void wavedump_close();
void wavedump_write(s16 left,s16 right);
#endif // DEBUG_H_INCLUDED //
#endif // DEBUG_H_INCLUDED //

View File

@ -90,39 +90,68 @@ typedef struct {
s32 Prev2;
s8 LoopMode;
s8 LoopStart;
s8 Loop;
s8 LoopEnd;
s8 LoopFlags;
// [Air] : Replaced loop flags read from the ADPCM header with
// a single LoopFlags value (above) -- more cache-friendly.
//s8 LoopStart;
//s8 Loop;
//s8 LoopEnd;
// Sample pointer (19:12 bit fixed point)
s32 SP;
s32 PV1;
s32 PV2;
s32 PV3;
s32 PV4;
// Sample pointer for Cubic Interpolation
// Cubic interpolation mixes a sample behind Linear, so that it
// can have sample data to either side of the end points from which
// to extrapolate. This SP represents that late sample position.
s32 SPc;
// Previous sample values - used for interpolation
// [Air] : Inverted order of these members to match the access order in the
// code (might improve cache hits).
s32 PV4;
s32 PV3;
s32 PV2;
s32 PV1;
// Last outputted audio value, used for voice modulation.
s32 OutX;
s8 FirstBlock;
// SBuffer now points directly to an ADPCM cache entry.
s16 *SBuffer;
s32 PeakX;
s32 SampleData;
// [Air]: Changed SBuffer from 32-bit to 16-bit. (this breaks old savestates)
// Everything stored in SBuffer is 16-bit values, and on modern CPUs the benefit
// of reduced data cache clutter out-weighs the benefit of using 'cpu native' 32-bit
// values. (doesn't apply to SIMD of course, but no SIMD here anyway)
// Because this breaks savestates it might not be worth the bother though.
s16 SBuffer[32];
// sample position within the current decoded packet.
s32 SCurrent;
s32 displayPeak;
s32 lastSetStartA;
s32 lastStopReason;
} V_Voice;
#ifndef PUBLIC
// ** Begin Debug-only variables section **
// Separated from the V_Voice struct to improve cache performance of
// the Public Release build.
struct V_VoiceDebug
{
s8 FirstBlock;
s32 SampleData;
s32 PeakX;
s32 displayPeak;
s32 lastSetStartA;
s32 lastStopReason;
};
struct V_CoreDebug
{
V_VoiceDebug Voices[24];
s32 AutoDMAPeak;
// Last Transfer Size
u32 lastsize;
};
// Debug tracking information - 24 voices and 2 cores.
extern V_CoreDebug DebugCores[2];
#endif
typedef struct {
u16 IN_COEF_L;
u16 IN_COEF_R;
@ -256,8 +285,6 @@ typedef struct {
u32 EffectsStartA;
u32 EffectsEndA;
u32 ReverbX;
// Last Transfer Size
u32 lastsize;
// Registers
V_CoreRegs Regs;
@ -279,7 +306,6 @@ typedef struct {
u32 ADMAPL;
u32 ADMAPR;
s32 AutoDMAPeak;
} V_Core;
extern V_Core Cores[2];

View File

@ -91,15 +91,14 @@ void DMALogClose() {
u16 DmaRead(u32 core) {
u16 ret;
ret=spu2Mu16(Cores[core].TDA);
const u16 ret = (u16)spu2M_Read(Cores[core].TDA);
Cores[core].TDA++;
Cores[core].TDA&=0xfffff;
return ret;
}
void DmaWrite(u32 core, u16 value) {
spu2Mu16(Cores[core].TSA)=value;
spu2M_Write( Cores[core].TSA, value );
Cores[core].TSA++;
Cores[core].TSA&=0xfffff;
}
@ -207,7 +206,7 @@ void DoDMAWrite(int core,u16 *pMem,u32 size)
Cores[core].TDA=Cores[core].TSA;
for (i=0;i<size;i++) {
spu2Mu16(Cores[core].TDA)=pMem[i];
spu2M_Write( Cores[core].TDA, pMem[i] );
Cores[core].TDA++;
Cores[core].TDA&=0xfffff;
}
@ -228,6 +227,8 @@ void DoDMAWrite(int core,u16 *pMem,u32 size)
void SPU2readDMA(int core, u16* pMem, u32 size)
{
if( disableEverything ) return;
if(hasPtr) TimeUpdate(*cPtr,1);
u32 i;
@ -254,6 +255,8 @@ void SPU2readDMA(int core, u16* pMem, u32 size)
void SPU2writeDMA(int core, u16* pMem, u32 size)
{
if( disableEverything ) return;
if(hasPtr) TimeUpdate(*cPtr,1);
Cores[core].DMAPtr=pMem;
@ -267,7 +270,9 @@ void SPU2writeDMA(int core, u16* pMem, u32 size)
return;
}
Cores[core].lastsize=size;
#ifndef PUBLIC
DebugCores[core].lastsize=size;
#endif
Cores[core].TSA&=~7;
bool adma_enable = ((Cores[core].AutoDMACtrl&(core+1))==(core+1));

View File

@ -51,12 +51,44 @@ double srate_pv=1.0;
extern u32 PsxRates[160];
#define SPU2_DYN_MEMLINE 0x3600
// Performs a 64-bit multiplication between two values and returns the
// high 32 bits as a result (discarding the fractional 32 bits).
// The combined fracional bits of both inputs must be 32 bits for this
// to work properly.
//
// This is meant to be a drop-in replacement for times when the 'div' part
// of a MulDiv is a constant. (example: 1<<8, or 4096, etc)
//
// [Air] Performance breakdown: This is over 10 times faster than MulDiv in
// a *worst case* scenario. It's also more accurate since it forces the
// caller to extend the inputs so that they make use of all 32 bits of
// precision.
//
static s32 __forceinline MulShr32( s32 srcval, s32 mulval )
{
s64 tmp = ((s64)srcval * mulval );
return ((s32*)&tmp)[1];
// Performance note: Using the temp var and memory reference
// actually ends up being roughly 2x faster than using a bitshift.
// It won't fly on big endian machines though... :)
}
static s32 __forceinline MulShr32su( s32 srcval, u32 mulval )
{
s64 tmp = ((s64)srcval * mulval );
return ((s32*)&tmp)[1];
}
void InitADSR() // INIT ADSR
{
for (int i=0; i<(32+128); i++)
{
int shift=(i-32)>>2;
__int64 rate=(i&3)+4;
s64 rate=(i&3)+4;
if (shift<0)
{
rate>>=-shift;
@ -80,40 +112,9 @@ const s32 f[5][2] ={{ 0, 0 },
{ 98, -55 },
{ 122, -60 }};
static s16 __forceinline XA_decode(s32 pred1, s32 pred2, s32 shift, s32& prev1, s32& prev2, s32 data)
static void __forceinline XA_decode_block(s16* buffer, const s16* block, s32& prev1, s32& prev2)
{
s32 pcm = data>>shift;
pcm+=((pred1*prev1)+(pred2*prev2))>>6;
if(pcm> 32767) pcm= 32767;
if(pcm<-32768) pcm=-32768;
prev2=prev1;
prev1=pcm;
return (s16)pcm;
}
static s16 __forceinline XA_decode_block(s16* buffer, const s16* block, s32& prev1, s32& prev2)
{
s32 data=*block;
s32 Shift = ((data>> 0)&0xF)+16;
s32 Predict1 = f[(data>> 4)&0xF][0];
s32 Predict2 = f[(data>> 4)&0xF][1];
for(int i=0;i<7;i++)
{
s32 SampleData=block[i+1];
*(buffer++) = XA_decode(Predict1, Predict2, Shift, prev1, prev2, (SampleData<<28)&0xF0000000);
*(buffer++) = XA_decode(Predict1, Predict2, Shift, prev1, prev2, (SampleData<<24)&0xF0000000);
*(buffer++) = XA_decode(Predict1, Predict2, Shift, prev1, prev2, (SampleData<<20)&0xF0000000);
*(buffer++) = XA_decode(Predict1, Predict2, Shift, prev1, prev2, (SampleData<<16)&0xF0000000);
}
return data;
}
static s16 __forceinline XA_decode_block_fast(s16* buffer, const s16* block, s32& prev1, s32& prev2)
{
s32 header = *block;
const s32 header = *block;
s32 shift = ((header>> 0)&0xF)+16;
s32 pred1 = f[(header>> 4)&0xF][0];
s32 pred2 = f[(header>> 4)&0xF][1];
@ -147,13 +148,11 @@ static s16 __forceinline XA_decode_block_fast(s16* buffer, const s16* block, s32
prev2=pcm;
prev1=pcm2;
}
return header;
}
static s16 __forceinline XA_decode_block_unsaturated(s16* buffer, const s16* block, s32& prev1, s32& prev2)
static void __forceinline XA_decode_block_unsaturated(s16* buffer, const s16* block, s32& prev1, s32& prev2)
{
s32 header = *block;
const s32 header = *block;
s32 shift = ((header>> 0)&0xF)+16;
s32 pred1 = f[(header>> 4)&0xF][0];
s32 pred2 = f[(header>> 4)&0xF][1];
@ -182,14 +181,14 @@ static s16 __forceinline XA_decode_block_unsaturated(s16* buffer, const s16* blo
prev2=pcm;
prev1=pcm2;
}
return header;
}
static void __forceinline IncrementNextA( const V_Core& thiscore, V_Voice& vc )
{
if((vc.NextA==thiscore.IRQA)&&(thiscore.IRQEnable)) {
#ifndef PUBLIC
ConLog(" * SPU2: IRQ Called (IRQ passed).\n");
#endif
Spdif.Info=4<<core;
SetIrqCall();
}
@ -199,67 +198,110 @@ static void __forceinline IncrementNextA( const V_Core& thiscore, V_Voice& vc )
}
static void __fastcall GetNextDataBuffered( V_Core& thiscore, V_Voice& vc, s32& Data)
{
//static s32 pcm=0;
s16 data=0;
u32 *pcm_cache_flags=NULL;
s16 *pcm_cache_data=NULL;
if (vc.SCurrent>=28)
#ifndef PUBLIC
int g_counter_cache_hits=0;
int g_counter_cache_misses=0;
int g_counter_cache_ignores=0;
#endif
#define XAFLAG_LOOP_END (1ul<<0)
#define XAFLAG_LOOP (1ul<<1)
#define XAFLAG_LOOP_START (1ul<<2)
static void __forceinline __fastcall GetNextDataBuffered( V_Core& thiscore, V_Voice& vc, s32& Data)
{
if (vc.SCurrent<28)
{
if(vc.LoopEnd)
// [Air] : skip the increment?
// (witness one of the rare ideal uses of a goto statement!)
if( (vc.SCurrent&3) != 3 ) goto _skipIncrement;
}
else
{
if(vc.LoopFlags & XAFLAG_LOOP_END)
{
if(vc.Loop)
if(vc.LoopFlags & XAFLAG_LOOP)
{
vc.NextA=vc.LoopStartA;
}
else
{
if(MsgVoiceOff) ConLog(" * SPU2: Voice Off by EndPoint: %d \n", voice);
VoiceStop(core,voice);
thiscore.Regs.ENDX|=1<<voice;
vc.lastStopReason = 1;
#ifndef PUBLIC
if(MsgVoiceOff) ConLog(" * SPU2: Voice Off by EndPoint: %d \n", voice);
DebugCores[core].Voices[voice].lastStopReason = 1;
#endif
}
}
// [Air]: Original ADPCM decoder.
//data = XA_decode_block(vc.SBuffer,GetMemPtr(vc.NextA&0xFFFFF), vc.Prev1, vc.Prev2);
// We'll need the loop flags and buffer pointers regardless of cache status:
// Note to Self : NextA addresses WORDS (not bytes).
// [Air]: Testing of a new saturated decoder. (benchmark needed)
// My gut tells me that this should be faster, but you never can tell with these types
// of things. Benchmark it against the original and see what you think.
s16* memptr = GetMemPtr(vc.NextA&0xFFFFF);
vc.LoopFlags = *memptr >> 8; // grab loop flags from the upper byte.
int nexta = vc.NextA >> 3; // 8 words per encoded block.
vc.SBuffer = &pcm_cache_data[nexta * 28];
//data = XA_decode_block_fast(vc.SBuffer,GetMemPtr(vc.NextA&0xFFFFF), vc.Prev1, vc.Prev2);
const u32 flagbitmask = 1ul<<(nexta & 31); // 32 flags per array entry
nexta >>= 5;
// [Air]: Testing use of a new unsaturated decoder. (benchmark needed)
// Chances are the saturation isn't needed, but for a very few exception games.
// This is definitely faster than either of the above versions, but the question is by how
// much (biggest impact will be on games like Xenosaga2, which use lots of SPU2 voices).
// If the speed boost is worth it then maybe it should be added as a speedhack option
// in the spu2ghz config.
data = XA_decode_block_unsaturated(vc.SBuffer,GetMemPtr(vc.NextA&0xFFFFF), vc.Prev1, vc.Prev2);
vc.LoopEnd = (data>> 8)&1;
vc.Loop = (data>> 9)&1;
vc.LoopStart= (data>>10)&1;
vc.SCurrent = 0;
vc.FirstBlock = 0;
if( vc.LoopStart && !vc.LoopMode )
if( pcm_cache_flags[nexta] & flagbitmask )
{
vc.LoopStartA=vc.NextA;
// Cached block! Read from the cache directly (ie, do nothing)
#ifndef PUBLIC
g_counter_cache_hits++;
#endif
}
else
{
// Only flag the cache if it's a non-dynamic memory range.
if( nexta >= (SPU2_DYN_MEMLINE / (8*32)) )
pcm_cache_flags[nexta] |= flagbitmask;
#ifndef PUBLIC
if( nexta < (SPU2_DYN_MEMLINE / (8*32)) )
g_counter_cache_ignores++;
else
g_counter_cache_misses++;
#endif
// saturated decoder
XA_decode_block(vc.SBuffer, memptr, vc.Prev1, vc.Prev2);
// [Air]: Testing use of a new unsaturated decoder. (benchmark needed)
// Chances are the saturation isn't needed, but for a very few exception games.
// This is definitely faster than the above version, but is it by enough to
// merit possible lower compatibility? Especially now that games that make
// heavy use of the SPU2 via music or sfx will mostly use the cache anyway.
//XA_decode_block_unsaturated( vc.SBuffer, memptr, vc.Prev1, vc.Prev2 );
//vc.LoopEnd = (data>> 8)&1;
//vc.Loop = (data>> 9)&1;
//vc.LoopStart= (data>>10)&1;
}
IncrementNextA( thiscore, vc );
vc.SCurrent = 0;
if( (vc.LoopFlags & XAFLAG_LOOP_START) && !vc.LoopMode )
{
vc.LoopStartA=vc.NextA;
}
// [Air] : Increment will get called below (change made to avoid needless code cache clutter)
//IncrementNextA( thiscore, vc );
}
Data=vc.SBuffer[vc.SCurrent];
IncrementNextA( thiscore, vc );
if((vc.SCurrent&3)==3)
{
IncrementNextA( thiscore, vc );
}
vc.SCurrent++;
_skipIncrement:
Data = vc.SBuffer[vc.SCurrent++];
}
/////////////////////////////////////////////////////////////////////////////////////////
@ -271,12 +313,17 @@ const int InvExpOffsets[] = { 0,4,6,8,9,10,11,12 };
static void __forceinline CalculateADSR( V_Voice& vc )
{
V_ADSR& env(vc.ADSR);
if( env.Phase == 0 ) return;
u32 SLevel=((u32)env.Sl)<<27;
u32 off=InvExpOffsets[(env.Value>>28)&7];
if(env.Releasing)
{
if((env.Phase>0)&&(env.Phase<5))
// [Air] : Simplified conditional, as phase cannot be zero here.
// (zeros get trapped above)
if(/*(env.Phase>0)&&*/(env.Phase<5))
{
env.Phase=5;
}
@ -392,15 +439,17 @@ static void __forceinline CalculateADSR( V_Voice& vc )
env.Value=0;
break;
//jNO_DEFAULT
jNO_DEFAULT
}
if (env.Phase==6) {
#ifndef PUBLIC
if(MsgVoiceOff) ConLog(" * SPU2: Voice Off by ADSR: %d \n", voice);
DebugCores[core].Voices[voice].lastStopReason = 2;
#endif
VoiceStop(core,voice);
Cores[core].Regs.ENDX|=(1<<voice);
env.Phase=0;
vc.lastStopReason = 2;
}
}
@ -448,36 +497,29 @@ void LowPass(s32& VL, s32& VR)
/////////////////////////////////////////////////////////////////////////////////////////
// //
static void __fastcall GetVoiceValues(V_Core& thiscore, V_Voice& vc, s32& Value)
static void __forceinline UpdatePitch( V_Voice& vc )
{
s64 Data=0;
s32 DT=0;
s32 pitch;
// [Air] : Put a scope on the pitch variable, which should help it get optimized to a
// register.
// [Air] : re-ordered comparisons: Modulated is much more likely to be zero than voice,
// and so the way it was before it's have to check both voice and modulated values
// most of the time. Now it'll just check Modulated and short-circuit past the voice
// check (not that it amounts to much, but eh every little bit helps).
if( (vc.Modulated==0) || (voice==0) )
pitch=vc.Pitch;
else
pitch=(vc.Pitch*(32768 + abs(Cores[core].Voices[voice-1].OutX)))>>15;
vc.SP+=pitch;
}
static void __forceinline GetVoiceValues_Linear(V_Core& thiscore, V_Voice& vc, s32& Value)
{
while( vc.SP > 0 )
{
s32 pitch;
// [Air] : re-ordered comparisons: Modulated is much more likely to be zero than voice,
// and so the way it was before it's have to check both voice and modulated values
// most of the time. Now it'll just check Modulated and short-circut past the voice
// check (not that it amounts to much, but eh every little bit helps).
if( (vc.Modulated==0) || (voice==0) )
pitch=vc.Pitch;
else
pitch=(vc.Pitch*(32768 + abs(thiscore.Voices[voice-1].OutX)))>>15;
vc.SP+=pitch;
}
while(vc.SP>=4096)
{
GetNextDataBuffered( thiscore, vc, DT );
vc.PV4=vc.PV3;
vc.PV3=vc.PV2;
vc.PV2=vc.PV1;
vc.PV1=DT<<16; //32bit processing
GetNextDataBuffered( thiscore, vc, vc.PV1 );
vc.SP-=4096;
}
@ -486,89 +528,91 @@ static void __fastcall GetVoiceValues(V_Core& thiscore, V_Voice& vc, s32& Value)
if(vc.ADSR.Phase==0)
{
Value=0;
vc.OutX=0;
Value = 0;
}
else
{
// [Air]: if SP is zero then we landed perfectly on a sample source, no
// interpolation necessary (besides being a little faster this is important
// too, since the interpolator will pick the wrong sample to mix otherwise).
if(Interpolation==0 || vc.SP == 0)
if(Interpolation==0) // || vc.SP == 0)
{
Data = vc.PV1;
Value = vc.PV1;
}
else if(Interpolation==1) //linear
else //if(Interpolation==1) //must be linear
{
// [Air]: Inverted the interpolation delta. The old way was generating
// inverted waveforms.
s64 t0 = vc.PV2 - vc.PV1;
s64 t1 = vc.PV1;
Data = (((t0*vc.SP)>>12) + t1);
s32 t0 = vc.PV2 - vc.PV1;
s32 t1 = vc.PV1<<12;
Value = t1 - (t0*vc.SP);
}
else // if(Interpolation==2) //must be cubic
{
s64 a0 = vc.PV1 - vc.PV2 - vc.PV4 + vc.PV3;
s64 a1 = vc.PV4 - vc.PV3 - a0;
s64 a2 = vc.PV1 - vc.PV4;
s64 a3 = vc.PV2;
s64 mu = 4096-vc.SP;
s64 t0 = ((a0 )*mu)>>18;
s64 t1 = ((t0+a1)*mu)>>18;
s64 t2 = ((t1+a2)*mu)>>18;
s64 t3 = ((t2+a3));
Data = t3;
}
Value=(s32)((Data*vc.ADSR.Value)>>48); //32bit ADSR + convert to 16bit
// [Air]: Moved abs() to the modulation code above, so that the abs conditionals are
// only run in select cases where modulation is active.
vc.OutX=Value;
Value = MulShr32su( Value, vc.ADSR.Value>>12 );
}
}
// [Air]: Noise values need to be mixed without going through interpolation, since it
// can wreak havoc on the noise (causing muffling or popping)
static void __fastcall GetNoiseValues(V_Core& thiscore, V_Voice& vc, s32& Value)
static void __forceinline GetVoiceValues_Cubic(V_Core& thiscore, V_Voice& vc, s32& Value)
{
s64 Data=0;
s32 DT=0;
while( vc.SP > 0 )
{
s32 pitch;
vc.PV4=vc.PV3;
vc.PV3=vc.PV2;
vc.PV2=vc.PV1;
if( (vc.Modulated==0) || (voice==0) )
pitch=vc.Pitch;
else
pitch=(vc.Pitch*(32768 + abs(thiscore.Voices[voice-1].OutX)))>>15;
vc.SP+=pitch;
}
while(vc.SP>=4096)
{
GetNoiseValues(DT);
GetNextDataBuffered( thiscore, vc, vc.PV1 );
vc.PV1<<=3;
vc.SPc = vc.SP&4095; // just the fractional part, please!
vc.SP-=4096;
}
Data = DT<<16; //32bit processing
CalculateADSR( vc );
if(vc.ADSR.Phase==0)
{
Value=0;
vc.OutX=0;
Value = 0;
}
else
{
Value=(s32)((Data*vc.ADSR.Value)>>48); //32bit ADSR + convert to 16bit
vc.OutX=Value;
s32 z0 = vc.PV3 - vc.PV4 + vc.PV1 - vc.PV2;
s32 z1 = (vc.PV4 - vc.PV3 - z0);
s32 z2 = (vc.PV2 - vc.PV4);
s32 mu = vc.SPc;
s32 val = (z0 * mu) >> 12;
val = ((val + z1) * mu) >> 12;
val = ((val + z2) * mu) >> 12;
val += vc.PV2;
/*
s64 a0 = vc.PV1 - vc.PV2 - vc.PV4 + vc.PV3;
s64 a1 = vc.PV4 - vc.PV3 - a0;
s64 a2 = vc.PV1 - vc.PV4;
s64 a3 = vc.PV2;
s64 mu = 4096+vc.SP;
s64 t0 = ((a0 )*mu)>>12;
s64 t1 = ((t0-a1)*mu)>>12;
s64 t2 = ((t1-a2)*mu)>>12;
s64 t3 = ((t2-a3));*/
Value = MulShr32su( val, vc.ADSR.Value>>3 );
}
//Value=(s32)((Data*vc.ADSR.Value)>>40); //32bit ADSR + convert to 16bit
}
// [Air]: Noise values need to be mixed without going through interpolation, since it
// can wreak havoc on the noise (causing muffling or popping).
static void __forceinline __fastcall GetNoiseValues(V_Core& thiscore, V_Voice& vc, s32& Data)
{
while(vc.SP>=4096)
{
GetNoiseValues( Data );
vc.SP-=4096;
}
// GetNoiseValues can't set the phase zero on us unexpectedly
// like GetVoiceValues can.
jASSUME( vc.ADSR.Phase != 0 );
CalculateADSR( vc );
}
/////////////////////////////////////////////////////////////////////////////////////////
@ -619,10 +663,12 @@ void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR)
{
FileLog("[%10d] AutoDMA%c block end.\n",Cycles, (core==0)?'4':'7');
#ifndef PUBLIC
if(thiscore.InputDataLeft>0)
{
if(MsgAutoDMA) ConLog("WARNING: adma buffer didn't finish with a whole block!!\n");
}
#endif
thiscore.InputDataLeft=0;
thiscore.DMAICounter=1;
}
@ -657,10 +703,12 @@ void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR)
{
FileLog("[%10d] Spdif AutoDMA%c block end.\n",Cycles, (core==0)?'4':'7');
#ifndef PUBLIC
if(thiscore.InputDataLeft>0)
{
if(MsgAutoDMA) ConLog("WARNING: adma buffer didn't finish with a whole block!!\n");
}
#endif
thiscore.InputDataLeft=0;
thiscore.DMAICounter=1;
}
@ -709,10 +757,12 @@ void __fastcall ReadInput(V_Core& thiscore, s32& PDataL,s32& PDataR)
thiscore.AutoDMACtrl |=~3;
#ifndef PUBLIC
if(thiscore.InputDataLeft>0)
{
if(MsgAutoDMA) ConLog("WARNING: adma buffer didn't finish with a whole block!!\n");
}
#endif
thiscore.InputDataLeft=0;
thiscore.DMAICounter=1;
}
@ -756,60 +806,79 @@ void __fastcall ReadInputPV(V_Core& thiscore, s32& ValL,s32& ValR)
/////////////////////////////////////////////////////////////////////////////////////////
// //
static void __forceinline UpdateVolume(V_Volume& Vol)
{
s32 NVal;
#define VOLFLAG_REVERSE_PHASE (1ul<<0)
#define VOLFLAG_DECREMENT (1ul<<1)
#define VOLFLAG_EXPONENTIAL (1ul<<2)
#define VOLFLAG_SLIDE_ENABLE (1ul<<3)
static void __fastcall UpdateVolume(V_Volume& Vol)
{
// TIMINGS ARE FAKE!!! Need to investigate.
int reverse_phase = Vol.Mode&1;
int exponential = Vol.Mode&4;
int decrement = Vol.Mode&2;
int slide_enable = Vol.Mode&8;
if (!slide_enable) return;
// [Air]: Cleaned up this code... may have broken it. Can't really
// test it here since none of my games seem to use it. If anything's
// not sounding right, we should revert the code in this method first.
NVal=Vol.Value;
if(reverse_phase) NVal = -NVal;
// [Air] Reverse phasing?
// Invert our value so that exponential mathematics are applied
// as if the volume were sliding the other direction. This makes
// a lot more sense than the old method's likeliness to chop off
// sound volumes to zero abruptly.
if (decrement) { // Decrement
if(exponential)
{
NVal=NVal * Vol.Increment >> 7;
}
else
{
NVal-=Vol.Increment;
}
NVal-=((32768*5)>>(Vol.Increment));
if (NVal<0) {
Vol.Value=0;
Vol.Mode=0;
}
else Vol.Value=NVal & 0xffff;
}
else { // Increment
if(exponential)
{
int T = Vol.Increment>>(NVal>>12);
NVal+=T;
}
else
{
NVal+=Vol.Increment;
}
}
if((NVal<0)||(NVal>0x7fff))
if(Vol.Mode & VOLFLAG_REVERSE_PHASE)
{
NVal=decrement?0:0x7fff;
Vol.Mode=0; // disable slide
ConLog( " *** SPU2 > Reverse Phase in progress!\n" );
Vol.Value = 0x7fff - Vol.Value;
}
if(reverse_phase) NVal = -NVal;
if (Vol.Mode & VOLFLAG_DECREMENT)
{
// Decrement
Vol.Value=NVal;
if(Vol.Mode & VOLFLAG_EXPONENTIAL)
{
//ConLog( " *** SPU2 > Exponential Volume Slide Down!\n" );
Vol.Value *= Vol.Increment >> 7;
Vol.Value-=((32768*5)>>(Vol.Increment));
}
else
{
Vol.Value-=Vol.Increment;
}
if (Vol.Value<0)
{
Vol.Value = 0;
Vol.Mode=0; // disable slide
}
}
else
{
//ConLog( " *** SPU2 > Volflag > Increment!\n" );
// Increment
if(Vol.Mode & VOLFLAG_EXPONENTIAL)
{
//ConLog( " *** SPU2 > Exponential Volume Slide Up!\n" );
int T = Vol.Increment>>(Vol.Value>>12);
Vol.Value+=T;
}
else
{
Vol.Value+=Vol.Increment;
}
if( Vol.Value > 0x7fff )
{
Vol.Value = 0x7fff;
Vol.Mode=0; // disable slide
}
}
// Reverse phasing
// Invert the value back into output form:
if(Vol.Mode & VOLFLAG_REVERSE_PHASE) Vol.Value = 0x7fff-Vol.Value;
//Vol.Value=NVal;
}
/////////////////////////////////////////////////////////////////////////////////////////
@ -923,36 +992,62 @@ static s32 __forceinline ApplyVolume(s32 data, s32 volume)
return (volume * data);
}
static void __forceinline MixVoice(V_Voice& vc, s32& VValL, s32& VValR)
// writes a signed value to the SPU2 ram
// Performs no cache invalidation -- use only for dynamic memory ranges
// of the SPU2 (between 0x0000 and SPU2_DYN_MEMLINE)
static void __forceinline spu2M_WriteFast( u32 addr, s16 value )
{
// throw an assertion if the memory range is invalid:
jASSUME( addr < SPU2_DYN_MEMLINE );
*GetMemPtr( addr ) = value;
}
static void __forceinline MixVoice( V_Core& thiscore, V_Voice& vc, s32& VValL, s32& VValR )
{
s32 Value=0;
VValL=0;
VValR=0;
UpdateVolume(vc.VolumeL);
UpdateVolume(vc.VolumeR);
// [Air] : Most games don't use much volume slide effects. So only
// call the UpdateVolume methods when needed by checking the flag
// outside the method here...
if( vc.VolumeL.Mode & VOLFLAG_SLIDE_ENABLE ) UpdateVolume( vc.VolumeL );
if( vc.VolumeR.Mode & VOLFLAG_SLIDE_ENABLE ) UpdateVolume( vc.VolumeR );
if (vc.ADSR.Phase>0)
{
if( vc.Noise )
GetNoiseValues( Cores[core], vc, Value );
else
GetVoiceValues( Cores[core], vc, Value );
UpdatePitch( vc );
#ifdef _DEBUG
vc.displayPeak = max(vc.displayPeak,abs(Value));
if( vc.Noise )
GetNoiseValues( thiscore, vc, Value );
else
{
if( Interpolation == 2 )
GetVoiceValues_Cubic( thiscore, vc, Value );
else
GetVoiceValues_Linear( thiscore, vc, Value );
}
// Record the output (used for modulation effects)
vc.OutX = Value;
#ifndef PUBLIC
DebugCores[core].Voices[voice].displayPeak = max(DebugCores[core].Voices[voice].displayPeak,abs(Value));
#endif
VValL=ApplyVolume(Value,(vc.VolumeL.Value));
VValR=ApplyVolume(Value,(vc.VolumeR.Value));
}
if (voice==1) spu2Ms16(0x400 + (core<<12) + OutPos)=(s16)((Value));
else if (voice==3) spu2Ms16(0x600 + (core<<12) + OutPos)=(s16)((Value));
if (voice==1) spu2M_WriteFast( 0x400 + (core<<12) + OutPos, (s16)Value );
else if (voice==3) spu2M_WriteFast( 0x600 + (core<<12) + OutPos, (s16)Value );
}
static void __fastcall MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR)
{
s32 InpL=0, InpR=0;
@ -964,8 +1059,8 @@ static void __fastcall MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR)
TDL=TDR=TWL=TWR=(s32)0;
if (core == 1) { //Core 0 doesn't have External input
spu2Ms16(0x800 + OutPos)=(s16)(ExtL>>16);
spu2Ms16(0xA00 + OutPos)=(s16)(ExtR>>16);
spu2M_WriteFast( 0x800 + OutPos, (s16)(ExtL>>16) );
spu2M_WriteFast( 0xA00 + OutPos, (s16)(ExtR>>16) );
}
V_Core& thiscore( Cores[core] );
@ -979,14 +1074,30 @@ static void __fastcall MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR)
ReadInputPV(thiscore, InpL,InpR); // get input data from input buffers
}
#ifndef PUBLIC
s32 InputPeak = max(abs(InpL),abs(InpR));
if(thiscore.AutoDMAPeak<InputPeak) thiscore.AutoDMAPeak=InputPeak;
if(DebugCores[core].AutoDMAPeak<InputPeak) DebugCores[core].AutoDMAPeak=InputPeak;
#endif
InpL = MulDiv(InpL,(thiscore.InpL),1<<1);
InpR = MulDiv(InpR,(thiscore.InpR),1<<1);
//MulShr32( InpL, thiscore.InpL, 1 );
//MulShr32( InpR, thiscore.InpR, 1 );
ExtL = MulDiv(ExtL,(thiscore.ExtL),1<<12);
ExtR = MulDiv(ExtR,(thiscore.ExtR),1<<12);
// [Air] : InpL and InpR don't need 64 bit muls.
InpL *= thiscore.InpL;
InpR *= thiscore.InpR;
InpL >>= 1;
InpR >>= 1;
// shift inputs by 20 collectively, so that the result is
// effectively downshifted by 12:
ExtL = MulShr32su( ExtL<<3, ((int)thiscore.ExtL)<<16);
ExtR = MulShr32su( ExtR<<3, ((int)thiscore.ExtR)<<16);
//InpL = MulDiv(InpL,(thiscore.InpL),1<<1);
//InpR = MulDiv(InpR,(thiscore.InpR),1<<1);
//ExtL = MulDiv(ExtL,(thiscore.ExtL),1<<12);
//ExtR = MulDiv(ExtR,(thiscore.ExtR),1<<12);
SDL=SDR=SWL=SWR=(s32)0;
@ -995,7 +1106,7 @@ static void __fastcall MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR)
s32 VValL,VValR;
V_Voice& vc( thiscore.Voices[voice] );
MixVoice( vc,VValL,VValR );
MixVoice( thiscore, vc, VValL, VValR );
SDL += VValL * vc.DryL;
SDR += VValR * vc.DryR;
@ -1004,10 +1115,10 @@ static void __fastcall MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR)
}
//Write To Output Area
spu2Ms16(0x1000 + (core<<12) + OutPos)=(s16)(SDL>>16);
spu2Ms16(0x1200 + (core<<12) + OutPos)=(s16)(SDR>>16);
spu2Ms16(0x1400 + (core<<12) + OutPos)=(s16)(SWL>>16);
spu2Ms16(0x1600 + (core<<12) + OutPos)=(s16)(SWR>>16);
spu2M_WriteFast( 0x1000 + (core<<12) + OutPos, (s16)(SDL>>16) );
spu2M_WriteFast( 0x1200 + (core<<12) + OutPos, (s16)(SDR>>16) );
spu2M_WriteFast( 0x1400 + (core<<12) + OutPos, (s16)(SWL>>16) );
spu2M_WriteFast( 0x1600 + (core<<12) + OutPos, (s16)(SWR>>16) );
// Mix in the Voice data
TDL += SDL * thiscore.SndDryL;
@ -1046,12 +1157,13 @@ static void __fastcall MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR)
OutR=(TDR + TWR);
//Apply Master Volume
UpdateVolume(thiscore.MasterL);
UpdateVolume(thiscore.MasterR);
if( thiscore.MasterL.Mode & VOLFLAG_SLIDE_ENABLE ) UpdateVolume(thiscore.MasterL);
if( thiscore.MasterR.Mode & VOLFLAG_SLIDE_ENABLE ) UpdateVolume(thiscore.MasterR);
if (thiscore.Mute==0) {
OutL=MulDiv(OutL,thiscore.MasterL.Value,1<<16);
OutR=MulDiv(OutR,thiscore.MasterR.Value,1<<16);
if (thiscore.Mute==0)
{
OutL = MulShr32( OutL, ((s32)thiscore.MasterL.Value)<<16 );
OutR = MulShr32( OutR, ((s32)thiscore.MasterR.Value)<<16 );
}
else
{
@ -1071,24 +1183,32 @@ static void __fastcall MixCore(s32& OutL, s32& OutR, s32 ExtL, s32 ExtR)
}
}
// used to throttle the output rate of cache stat reports
static int p_cachestat_counter=0;
void __fastcall Mix()
{
s32 ExtL=0, ExtR=0, OutL, OutR;
static s32 Peak0,Peak1;
static s32 PCount;
core=0;
MixCore(ExtL,ExtR,0,0);
core=1;
MixCore(OutL,OutR,ExtL,ExtR);
#ifdef _DEBUG
#ifndef PUBLIC
static s32 Peak0,Peak1;
static s32 PCount;
Peak0 = max(Peak0,max(ExtL,ExtR));
Peak1 = max(Peak1,max(OutL,OutR));
#endif
// [Air] [TODO] : Replace this with MulShr32.
// I haven't done it yet because it would require making the
// VolumeDivisor a constant .. which it should be anyway. The presence
// of VolumeMultiplier more or less negates the need for a variable divisor.
ExtL=MulDiv(OutL,VolumeMultiplier,VolumeDivisor<<6);
ExtR=MulDiv(OutR,VolumeMultiplier,VolumeDivisor<<6);
@ -1102,6 +1222,23 @@ void __fastcall Mix()
SndWrite(ExtL,ExtR);
OutPos++;
if (OutPos>=0x200) OutPos=0;
#ifndef PUBLIC
// [TODO]: Create an INI option to enable/disable this particular log.
p_cachestat_counter++;
if(p_cachestat_counter > (48000*6) )
{
p_cachestat_counter = 0;
ConLog( " * SPU2 > CacheStatistics > Hits: %d Misses: %d Ignores: %d\n",
g_counter_cache_hits,
g_counter_cache_misses,
g_counter_cache_ignores );
g_counter_cache_hits =
g_counter_cache_misses =
g_counter_cache_ignores = 0;
}
#endif
}
/////////////////////////////////////////////////////////////////////////////////////////

View File

@ -142,6 +142,32 @@ public:
}
// either pw=false or free>nSamples
// Problem:
// If the SPU2 gets out of sync with the SndOut device, the writepos of the
// circular buffer will overtake the readpos, leading to a prolonged period
// of hopscotching read/write accesses (ie, lots of staticy crap sound for
// several seconds).
//
// Compromise:
// Same as with underruns below, an overrun can be handled by aborting
// the write operation before the writepos goes past the readpos, and then
// ignoring the rest of the incoming data. The resultant sound will have
// a single hiccup when an overflow occurs, instead of getting crapped out
// for several seconds (or in many cases, until the SPU sndout driver was
// manually reset.. grr!).
#ifndef DYNAMIC_BUFFER_LIMITING
while(data<size && nSamples>0)
{
buffer[wpos] = *(bData++);
wpos=(wpos+1)%size;
data++;
nSamples--;
}
#elif defined( DYNAMIC_BUFFER_LIMITING )
while(nSamples>0)
{
buffer[wpos] = *(bData++);
@ -156,10 +182,9 @@ public:
data-=size;
}
while(data>size);
#ifdef DYNAMIC_BUFFER_LIMITING
overflows++;
#endif
}
#endif
LeaveCriticalSection(&cs);
@ -167,9 +192,70 @@ public:
virtual void ReadSamples (s32 *bData, s32 nSamples)
{
static bool underrun_freeze = false;
EnterCriticalSection(&cs);
dataread+=nSamples;
// Problem:
// If the SPU2 gets even the least bit out of sync with the SndOut device,
// the readpos of the circular buffer will overtake the writepos,
// leading to a prolonged period of hopscotching read/write accesses (ie,
// lots of staticy crap sound for several seconds).
//
// Fix:
// If the read position overtakes the write position, abort the
// transfer immediately and force the SndOut driver to wait until
// the read buffer has filled up again before proceeding.
// This will cause one brief hiccup that can never exceed the user's
// set buffer length in duration.
#ifndef DYNAMIC_BUFFER_LIMITING
if( underrun_freeze )
{
if( data < (int)(size * 0.85) )
{
while( nSamples>0 )
{
*(bData++) = 0;
nSamples--;
}
LeaveCriticalSection(&cs);
return;
}
underrun_freeze = false;
//ConLog( " * SPU2 > Underrun Freeze Finished!\n" );
}
while(data>0 && nSamples>0)
{
*(bData++) = buffer[rpos];
rpos=(rpos+1)%size;
data--;
nSamples--;
}
while( nSamples>0 )
{
// buffer underrun code:
// the contents of this loop only get run if data reached zero
// before nSamples.
// Let's just dull out some silence, because that's usually the least
// painful way of dealing with underruns.
*(bData++) = 0;
nSamples--;
}
if( data == 0 && !pw )
{
ConLog( " * SPU2 > Underrun compensation\n" );
underrun_freeze = true;
}
#elif defined( DYNAMIC_BUFFER_LIMITING )
while(nSamples>0)
{
*(bData++) = buffer[rpos];
@ -178,7 +264,6 @@ public:
nSamples--;
}
#ifdef DYNAMIC_BUFFER_LIMITING
if(data<0)
{
do
@ -195,16 +280,9 @@ public:
data+=size;
uflow = true;
}
//if( uflow )
//ConLog( " * SPU2 : Data Underflow detected!\n" );
#endif
//if(isWaiting)
{
PulseEvent(hSyncEvent);
}
PulseEvent(hSyncEvent);
#ifdef DYNAMIC_BUFFER_LIMITING
@ -318,7 +396,6 @@ void UpdateTempoChange()
s32 bufferUsage = sndBuffer->GetBufferUsage();
s32 bufferSize = sndBuffer->GetBufferSize();
//Emergency stretch to compensate for FPS fluctuations and keep the buffers happy
bool a=(bufferUsage < CurBufferSize * 4);
bool b=(bufferUsage >= (bufferSize - CurBufferSize * 4));
@ -438,12 +515,18 @@ void SndClose()
void SndUpdateLimitMode()
{
sndBuffer->PauseOnWrite(LimitMode!=0);
//sndBuffer->PauseOnWrite(LimitMode!=0);
if(LimitMode!=0)
printf(" * SPU2 limiter is now ON.\n");
else
printf(" * SPU2 limiter is now OFF.\n");
if(LimitMode!=0) {
timeStretchEnabled = true;
//printf(" * SPU2 limiter is now ON.\n");
printf(" * SPU2 timestretch is now ON.\n");
}
else {
//printf(" * SPU2 limiter is now OFF.\n");
printf(" * SPU2 timestretch is now OFF.\n");
timeStretchEnabled = false;
}
}

View File

@ -34,10 +34,11 @@ const unsigned char build = 9; // increase that with each version
static char *libraryName = "GiGaHeRz's SPU2 ("
#ifdef _DEBUG
"Playground Debug "
#endif
#ifdef PUBLIC
"Playground Debug"
#elif defined( PUBLIC )
"Playground Mod"
#else
"Playground Dev"
#endif
")";
@ -50,9 +51,18 @@ const char *AddressNames[6]={"SSAH","SSAL","LSAH","LSAL","NAXH","NAXL"};
double opitch;
int osps;
int Log = 1;
// [Air]: Adding the spu2init boolean wasn't necessary except to help me in
// debugging the spu2 suspend/resume behavior (when user hits escape).
static bool spu2open=false; // has spu2open plugin interface been called?
static bool spu2init=false; // has spu2init plugin interface been called?
s8 spu2open=0;
// [Air]: fixed the hacky part of UpdateTimer with this:
static bool resetClock = true;
// Used to make spu2 more robust at loading incompatible saves.
// You won't get any sound but it won't cause mass instability either.
// (should allow players to get to their next save point more easily)
bool disableEverything=false;
void (* _irqcallback)();
void (* dma4callback)();
@ -69,6 +79,9 @@ u32 ThreadFuncID;
char fname[]="01234567890123456789012345";
#ifndef PUBLIC
V_CoreDebug DebugCores[2];
#endif
V_Core Cores[2];
V_SPDIF Spdif;
@ -139,12 +152,40 @@ void SysMessage(char *fmt, ...)
MessageBox(0, tmp, "SPU2ghz Msg", 0);
}
s16 __forceinline *GetMemPtr(u32 addr)
s16 __forceinline * __fastcall GetMemPtr(u32 addr)
{
assert(addr<0x100000);
return (_spu2mem+addr);
}
s16 __forceinline __fastcall spu2M_Read( u32 addr )
{
return *GetMemPtr( addr & 0xfffff );
}
// writes a signed value to the SPU2 ram
// Invalidates the ADPCM cache in the process.
// Optimization note: don't use __forceinline because the footprint of this
// function is a little too heavy now. Better to let the compiler decide.
void __inline __fastcall spu2M_Write( u32 addr, s16 value )
{
// Make sure the cache is invalidated:
// (note to self : addr address WORDs, not bytes)
const u32 nexta = addr >> 3; // 8 words per encoded block.
const u32 flagbitmask = 1ul<<(nexta & 31); // 31 flags per array entry
pcm_cache_flags[nexta>>5] &= ~flagbitmask;
*GetMemPtr( addr & 0xfffff ) = value;
}
// writes an unsigned value to the SPU2 ram
void __inline __fastcall spu2M_Write( u32 addr, u16 value )
{
spu2M_Write( addr, (s16)value );
}
void CoreReset(int c)
{
int v=0;
@ -203,7 +244,10 @@ void CoreReset(int c)
Cores[c].Voices[v].NextA=2800;
Cores[c].Voices[v].StartA=2800;
Cores[c].Voices[v].LoopStartA=2800;
Cores[c].Voices[v].lastSetStartA=2800;
Cores[c].Voices[v].SBuffer=pcm_cache_data;
#ifndef PUBLIC
DebugCores[c].Voices[v].lastSetStartA=2800;
#endif
}
Cores[c].DMAICounter=0;
Cores[c].AdmaInProgress=0;
@ -226,6 +270,7 @@ s32 CALLBACK SPU2init()
s32 c=0,v=0;
ReadSettings();
acumCycles=0;
#ifdef SPU2_LOG
if(AccessLog)
{
@ -235,12 +280,34 @@ s32 CALLBACK SPU2init()
}
#endif
srand((unsigned)time(NULL));
if (spu2open) return 0;
disableEverything=false;
if (spu2init)
{
ConLog( " * SPU2: Already initialized - Ignoring SPU2init signal." );
return 0;
}
spu2init=true;
spu2regs = (short*)malloc(0x010000);
_spu2mem = (short*)malloc(0x200000);
if ((spu2regs == NULL) || (_spu2mem == NULL))
// adpcm decoder cache:
// the cache data size is determined by taking the number of adpcm blocks
// (2MB / 16) and multiplying it by the decoded block size (28 samples).
// Thus: pcm_cache_data = 7,340,032 bytes (ouch!)
// Expanded: 16 bytes expands to 56 bytes [3.5:1 ratio]
// Resulting in 2MB * 3.5.
pcm_cache_flags = (u32*)calloc( 0x200000 / (16*32), 4 );
pcm_cache_data = (s16*)calloc( (0x200000 / 16) * 28, 2 );
if( (spu2regs == NULL) || (_spu2mem == NULL) ||
(pcm_cache_data == NULL) || (pcm_cache_flags == NULL) )
{
SysMessage("Error allocating Memory\n"); return -1;
SysMessage("SPU2: Error allocating Memory\n"); return -1;
}
for(int mem=0;mem<0x800;mem++)
@ -273,7 +340,6 @@ s32 CALLBACK SPU2init()
}
LowPassFilterInit();
InitADSR();
#ifdef STREAM_DUMP
@ -329,7 +395,10 @@ BOOL CALLBACK DebugProc(HWND hWnd,UINT uMsg,WPARAM wParam,LPARAM lParam)
return TRUE;
}
s32 CALLBACK SPU2open(void *pDsp) {
s32 CALLBACK SPU2open(void *pDsp)
{
if( spu2open ) return 0;
FileLog("[%10d] SPU2 Open\n",Cycles);
/*if(debugDialogOpen==0)
@ -359,19 +428,23 @@ s32 CALLBACK SPU2open(void *pDsp) {
void CALLBACK SPU2close()
{
if( !spu2open ) return;
FileLog("[%10d] SPU2 Close\n",Cycles);
spu2open=0;
DspCloseLibrary();
spdif_shutdown();
SndClose();
spu2open = false;
}
void CALLBACK SPU2shutdown()
{
if(spu2open) SPU2close();
if(!spu2init) return;
ConLog( " * SPU2: Shutting down.\n" );
SPU2close();
#ifdef S2R_ENABLE
if(!replay_mode)
@ -390,8 +463,20 @@ void CALLBACK SPU2shutdown()
if(WaveLog && wavedump_ok) wavedump_close();
DMALogClose();
spu2init = false;
free(spu2regs);
free(_spu2mem);
free( pcm_cache_flags );
free( pcm_cache_data );
spu2regs = NULL;
_spu2mem = NULL;
pcm_cache_flags = NULL;
pcm_cache_data = NULL;
#ifdef SPU2_LOG
if(!AccessLog) return;
FileLog("[%10d] SPU2shutdown\n",Cycles);
@ -427,8 +512,8 @@ BOOL DrawRectangle(HDC dc, int left, int top, int width, int height)
return Polyline(dc, p, 5);
}
#ifndef PUBLIC
HFONT hf = NULL;
int lCount=0;
void UpdateDebugDialog()
{
@ -455,6 +540,7 @@ void UpdateDebugDialog()
int IX = 8+256*c;
int IY = 8+ 32*v;
V_Voice& vc(Cores[c].Voices[v]);
V_VoiceDebug& vcd( DebugCores[c].Voices[v] );
SetDCBrushColor(hdc,RGB( 0, 0, 0));
if((vc.ADSR.Phase>0)&&(vc.ADSR.Phase<6))
@ -463,11 +549,11 @@ void UpdateDebugDialog()
}
else
{
if(vc.lastStopReason==1)
if(vcd.lastStopReason==1)
{
SetDCBrushColor(hdc,RGB(128, 0, 0));
}
if(vc.lastStopReason==2)
if(vcd.lastStopReason==2)
{
SetDCBrushColor(hdc,RGB( 0,128, 0));
}
@ -491,7 +577,7 @@ void UpdateDebugDialog()
FillRectangle(hdc,IX+48,IY+26 - adsr, 4, adsr);
int peak = vc.displayPeak * 24 / 32768;
int peak = vcd.displayPeak * 24 / 32768;
FillRectangle(hdc,IX+56,IY+26 - peak, 4, peak);
@ -509,13 +595,13 @@ void UpdateDebugDialog()
sprintf(t,"%06x",vc.LoopStartA);
TextOut(hdc,IX+4,IY+21,t,6);
vc.displayPeak = 0;
vcd.displayPeak = 0;
if(vc.lastSetStartA != vc.StartA)
if(vcd.lastSetStartA != vc.StartA)
{
printf(" *** Warning! Core %d Voice %d: StartA should be %06x, and is %06x.\n",
c,v,vc.lastSetStartA,vc.StartA);
vc.lastSetStartA = vc.lastSetStartA;
c,v,vcd.lastSetStartA,vc.StartA);
vcd.lastSetStartA = vcd.lastSetStartA;
}
}
}
@ -530,6 +616,7 @@ void UpdateDebugDialog()
DispatchMessage(&msg);
}
}
#endif
//SHOULD be 768, but 751/752 seems to get better results
#define TickInterval 768
@ -558,12 +645,20 @@ DWORD CALLBACK TimeThread(PVOID /* unused param */)
return 0;
}
void CALLBACK TimeUpdate(u32 cClocks, u32 syncType)
void __fastcall TimeUpdate(u32 cClocks, u32 syncType)
{
u32 dClocks = cClocks-lClocks;
// HACKY but should work anyway.
if(lClocks==0) lClocks = cClocks;
// [Air]: Sanity Check
// If for some reason our clock value seems way off base, just mix
// out a little bit, skip the rest, and hope the ship "rights" itself later on.
if( dClocks > TickInterval*32 )
{
ConLog( " * SPU2 > TimeUpdate > Sanity Check Failed: %d (cc: %d)\n", dClocks/TickInterval, cClocks/TickInterval );
dClocks = TickInterval*32;
lClocks = cClocks-dClocks;
}
//Update Mixing Progress
while(dClocks>=TickInterval)
@ -636,12 +731,15 @@ void CALLBACK TimeUpdate(u32 cClocks, u32 syncType)
bool numpad_minus_old=false;
bool numpad_minus = false;
u32 timer=0,time1=0,time2=0;
void CALLBACK SPU2async(u32 cycles)
{
u32 oldClocks = lClocks;
timer++;
if( disableEverything ) return;
#ifndef PUBLIC
u32 oldClocks = lClocks;
static u32 timer=0,time1=0,time2=0;
timer++;
if (timer == 1){
time1=timeGetTime();
}
@ -649,6 +747,8 @@ void CALLBACK SPU2async(u32 cycles)
time2 = timeGetTime()-time1 ;
timer=0;
}
#endif
DspUpdate();
if(LimiterToggleEnabled)
@ -765,6 +865,8 @@ void CALLBACK SPU_ps1_write(u32 mem, u16 value)
Cores[0].Voices[voice].ADSR.Reg_ADSR2 = value; break;
case 6: Cores[0].Voices[voice].ADSR.Value=value; break;
case 7: Cores[0].Voices[voice].LoopStartA=(u32)value <<8; break;
jNO_DEFAULT;
}
}
else switch(reg)
@ -890,6 +992,8 @@ u16 CALLBACK SPU_ps1_read(u32 mem)
case 5: value=Cores[0].Voices[voice].ADSR.Reg_ADSR2; break;
case 6: value=Cores[0].Voices[voice].ADSR.Value; break;
case 7: value=Cores[0].Voices[voice].LoopStartA; break;
jNO_DEFAULT;
}
}
else switch(reg)
@ -1147,6 +1251,8 @@ void CALLBACK SPU2writeLog(u32 rmem, u16 value)
void CALLBACK SPU2write(u32 rmem, u16 value)
{
if( disableEverything ) return;
#ifdef S2R_ENABLE
if(!replay_mode)
s2r_writereg(Cycles,rmem,value);
@ -1160,7 +1266,7 @@ void CALLBACK SPU2write(u32 rmem, u16 value)
Spdif.Info=4;
SetIrqCall();
}
spu2Mu16(Cores[0].TSA++)=value;
spu2M_Write( Cores[0].TSA++, value );
Cores[0].TSA&=0xfffff;
return;
@ -1173,7 +1279,7 @@ void CALLBACK SPU2write(u32 rmem, u16 value)
Spdif.Info=4;
SetIrqCall();
}
spu2Mu16(Cores[1].TSA++)=value;
spu2M_Write( Cores[1].TSA++, value );
Cores[1].TSA&=0xfffff;
return;
@ -1240,6 +1346,8 @@ void CALLBACK SPU2write(u32 rmem, u16 value)
case 5: Cores[core].Voices[voice].ADSR.Value=value; break;
case 6: Cores[core].Voices[voice].VolumeL.Value=value; break;
case 7: Cores[core].Voices[voice].VolumeR.Value=value; break;
jNO_DEFAULT;
}
}
else if ((omem >= 0x01C0) && (omem < 0x02DE)) {
@ -1249,10 +1357,14 @@ void CALLBACK SPU2write(u32 rmem, u16 value)
switch (address) {
case 0: Cores[core].Voices[voice].StartA=((value & 0x0F) << 16) | (Cores[core].Voices[voice].StartA & 0xFFF8);
Cores[core].Voices[voice].lastSetStartA = Cores[core].Voices[voice].StartA;
#ifndef PUBLIC
DebugCores[core].Voices[voice].lastSetStartA = Cores[core].Voices[voice].StartA;
#endif
break;
case 1: Cores[core].Voices[voice].StartA=(Cores[core].Voices[voice].StartA & 0x0F0000) | (value & 0xFFF8);
Cores[core].Voices[voice].lastSetStartA = Cores[core].Voices[voice].StartA;
#ifndef PUBLIC
DebugCores[core].Voices[voice].lastSetStartA = Cores[core].Voices[voice].StartA;
#endif
//if(core==1) printf(" *** StartA for C%dV%02d set to 0x%05x\n",core,voice,Cores[core].Voices[voice].StartA);
break;
case 2: Cores[core].Voices[voice].LoopStartA=((value & 0x0F) << 16) | (Cores[core].Voices[voice].LoopStartA & 0xFFF8);
@ -1471,6 +1583,7 @@ void CALLBACK SPU2write(u32 rmem, u16 value)
u16 CALLBACK SPU2read(u32 rmem)
{
if( disableEverything ) return 0;
// if(!replay_mode)
// s2r_readreg(Cycles,rmem);
@ -1516,35 +1629,22 @@ s32 CALLBACK SPU2test() {
return SndTest();
}
#define PCM_CACHE_BLOCK_COUNT ( 0x200000 / 16 )
struct cacheFreezeData
{
u32 flags[PCM_CACHE_BLOCK_COUNT/32];
s16 startData;
};
typedef struct
{
// compatibility with zerospu2
u32 version;
// compatibility with zerospu2 removed...
u32 version;
u8 unkregs[0x10000];
u8 mem[0x200000];
u16 interrupt;
int nSpuIrq[2];
u32 dwNewChannel2[2], dwEndChannel2[2];
u32 dwNoiseVal;
int iFMod[48];
u32 MemAddr[2];
struct ADMA
{
unsigned short * MemAddr;
int Index;
int AmountLeft;
int Enabled;
} adma[2];
u32 Adma4MemAddr, Adma7MemAddr;
int SPUCycles, SPUWorkerCycles;
int SPUStartCycle[2];
int SPUTargetCycle[2];
int voicesize;
// compatibility with zerospu2
u32 id;
V_Core Cores[2];
V_SPDIF Spdif;
@ -1560,34 +1660,89 @@ typedef struct
int lClocks;
cacheFreezeData cacheData;
} SPU2freezeData;
#define ZEROSPU_VERSION 0x70000001
// No more ZeroSPU compatibility...
//#define ZEROSPU_VERSION 0x70000001
#define SAVE_ID 0x73326701
s32 CALLBACK SPU2freeze(int mode, freezeData *data){
// versioning for saves.
// Increment this if changes to V_Core or V_Voice structs are made.
// Chances are we'll never explicitly support older save versions,
// but might as well version them anyway. Could come in handly someday!
#define SAVE_VERSION 0x0100
SPU2freezeData *spud;
static int getFreezeSize()
{
if( disableEverything ) return 7; // length of the string id "invalid"
if (mode == FREEZE_LOAD) {
int size = sizeof(SPU2freezeData);
spud = (SPU2freezeData*)data->data;
// calculate the amount of memory consumed by our cache:
if(spud->id!=SAVE_ID)
//size += PCM_CACHE_BLOCK_COUNT / 8;
for( int bidx=0; bidx<PCM_CACHE_BLOCK_COUNT; bidx++ )
{
const u32 flagmask = 1ul << (bidx & 31);
if( pcm_cache_flags[bidx>>5] & flagmask )
{
printf("SPU2Ghz Warning:\n");
size += 28*2;
}
}
return size;
}
s32 CALLBACK SPU2freeze(int mode, freezeData *data)
{
if (mode == FREEZE_LOAD)
{
const SPU2freezeData *spud = (SPU2freezeData*)data->data;
if( spud->id != SAVE_ID || spud->version != SAVE_VERSION )
{
// [Air]: Running the SPU2 from an "empty" state this way is pretty unreliable.
// It usually didn't crash at least, but it never output sound anyway and would
// confuse the new cache system.
//
// To fix it I introduced a new global flag that disables the SPU2 logic completely.
// This is the safest way to recover from an unsupported SPU2 save, since it pretty
// well garauntees the user will have a stable enough environment to reach a save spot.
printf("\n*** SPU2Ghz Warning:\n");
printf("The savestate you are trying to load was not made with this plugin.\n");
printf("Let it try to load a while, it could take up to one minute\n");
printf("If it loads ok try to reach the next memorycard savespot, save your game and continue from there.\n");
printf("Sound will be disabled until the emulator is reset.\n");
printf("Find a memorycard savespot to save your game, reset, and then continue from there.\n\n");
// Clear stuff, not that it matters:
disableEverything=true;
lClocks = 0;
resetClock = true;
// Reset the cores.
CoreReset( 0 );
CoreReset( 1 );
// adpcm cache : Just clear all the cache flags, which forces the mixer
// to re-decode everything.
memset( pcm_cache_flags, 0, (0x200000 / (16*32)) * 4 );
memset( pcm_cache_data, 0, (0x200000 / 16) * 28 * 2 );
}
else
{
disableEverything=false;
// base stuff
memcpy(spu2regs, spud->unkregs, 0x010000);
memcpy(_spu2mem, spud->mem, 0x200000);
memcpy(Cores, spud->Cores, sizeof(Cores));
memcpy(&Spdif, &spud->Spdif, sizeof(Spdif));
OutPos=spud->OutPos;
@ -1599,21 +1754,62 @@ s32 CALLBACK SPU2freeze(int mode, freezeData *data){
opitch=spud->opitch;
osps=spud->osps;
PlayMode=spud->PlayMode;
lClocks = spud->lClocks;
lClocks = spud->lClocks;
// Load the ADPCM cache:
const cacheFreezeData &cfd = spud->cacheData;
const s16* pcmSrc = &cfd.startData;
memcpy( pcm_cache_flags, cfd.flags, PCM_CACHE_BLOCK_COUNT / 8 );
int blksLoaded=0;
for( int bidx=0; bidx<PCM_CACHE_BLOCK_COUNT; bidx++ )
{
const u32 flagmask = 1ul << (bidx & 31);
if( cfd.flags[bidx>>5] & flagmask )
{
// load a cache block!
memcpy( &pcm_cache_data[bidx*28], pcmSrc, 28*2 );
pcmSrc += 28;
blksLoaded++;
}
}
// Go through the V_Voice structs and replace the SBuffer pointer
// with an absolute address into our cache buffer this session.
for( int c=0; c<2; c++ )
{
for( int v=0; v<24; v++ )
{
Cores[c].Voices[v].SBuffer = (s16*) ((u64)spud->Cores[c].Voices[v].SBuffer + (u64)pcm_cache_data );
}
}
//printf( " * SPU2 > FreezeLoad > Loaded %d cache blocks.\n", blksLoaded++ );
}
} else if (mode == FREEZE_SAVE) {
data->size = sizeof(SPU2freezeData);
data->data = (s8*)malloc(data->size);
} else if (mode == FREEZE_SAVE)
{
if (data->data == NULL) return -1;
spud = (SPU2freezeData*)data->data;
if( disableEverything )
{
// No point in making a save state since the SPU2
// state is completely bogus anyway... Let's just
// give this some random ID that no one will recognize.
strcpy( data->data, "invalid" );
return 0;
}
SPU2freezeData *spud = (SPU2freezeData*)data->data;
spud->id=SAVE_ID;
spud->version=SAVE_ID;//ZEROSPU_VERSION; //Zero compat working bad, better not save that
spud->version=SAVE_VERSION;//ZEROSPU_VERSION; //Zero compat working bad, better not save that
memcpy(spud->unkregs, spu2regs, 0x010000);
memcpy(spud->mem, _spu2mem, 0x200000);
@ -1630,10 +1826,52 @@ s32 CALLBACK SPU2freeze(int mode, freezeData *data){
spud->PlayMode=PlayMode;
spud->lClocks = lClocks;
} else if (mode == FREEZE_SIZE) {
data->size = sizeof(SPU2freezeData);
}
// Save our cache:
// We could just force the user to rebuild the cache when loading
// from stavestates, but for most games the cache is pretty
// small and compresses well.
//
// Potential Alternative:
// If the cache is not saved then it is necessary to save the
// decoded blocks currently in use by active voices. This allows
// voices to resume seamlessly on load.
cacheFreezeData &cfd = spud->cacheData;
s16* pcmDst = &cfd.startData;
memcpy( cfd.flags, pcm_cache_flags, sizeof(cfd.flags) );
int blksSaved=0;
for( int bidx=0; bidx<PCM_CACHE_BLOCK_COUNT; bidx++ )
{
const u32 flagmask = 1ul << (bidx & 31);
if( cfd.flags[bidx>>5] & flagmask )
{
// save a cache block!
memcpy( pcmDst, &pcm_cache_data[bidx*28], 28*2 );
pcmDst += 28;
blksSaved++;
}
}
// Time to go through the V_Voice structs and replace the SBuffer pointer
// with a relative address that can be applied later on when the state is loaded.
for( int c=0; c<2; c++ )
{
for( int v=0; v<24; v++ )
{
spud->Cores[c].Voices[v].SBuffer =
(s16*) ((u64)spud->Cores[c].Voices[v].SBuffer - (u64)pcm_cache_data );
}
}
//printf( " * SPU2 > FreezeSave > Saved %d cache blocks.\n", blksSaved++ );
}
else if (mode == FREEZE_SIZE)
{
data->size = getFreezeSize();
}
return 0;
}
@ -1654,25 +1892,23 @@ void VoiceStart(int core,int vc)
Cores[core].Voices[vc].PlayCycle=Cycles;
Cores[core].Voices[vc].SCurrent=28;
Cores[core].Voices[vc].LoopMode=0;
Cores[core].Voices[vc].Loop=0;
Cores[core].Voices[vc].LoopStart=0;
Cores[core].Voices[vc].LoopEnd=0;
Cores[core].Voices[vc].LoopFlags=0;
Cores[core].Voices[vc].LoopStartA=Cores[core].Voices[vc].StartA;
Cores[core].Voices[vc].NextA=Cores[core].Voices[vc].StartA;
Cores[core].Voices[vc].FirstBlock=1;
Cores[core].Voices[vc].Prev1=0;
Cores[core].Voices[vc].Prev2=0;
// [Air]: Don't wipe interpolation values on VoiceStart.
// There'll be less popping/clicking if we just interpolate from the
// old sample and the new sample.
// There should be less popping/clicking if we just interpolate from the
// old sample into the new sample.
//Cores[core].Voices[vc].PV1=Cores[core].Voices[vc].PV2=0;
//Cores[core].Voices[vc].PV3=Cores[core].Voices[vc].PV4=0;
Cores[core].Voices[vc].PV1=Cores[core].Voices[vc].PV2=0;
Cores[core].Voices[vc].PV3=Cores[core].Voices[vc].PV4=0;
Cores[core].Regs.ENDX&=~(1<<vc);
#ifndef PUBLIC
DebugCores[core].Voices[vc].FirstBlock=1;
if(core==1)
{
if(MsgKeyOnOff) ConLog(" * SPU2: KeyOn: C%dV%02d: SSA: %8x; M: %s%s%s%s; H: %02x%02x; P: %04x V: %04x/%04x; ADSR: %04x%04x\n",
@ -1684,6 +1920,7 @@ void VoiceStart(int core,int vc)
Cores[core].Voices[vc].VolumeL.Value,Cores[core].Voices[vc].VolumeR.Value,
Cores[core].Voices[vc].ADSR.Reg_ADSR1,Cores[core].Voices[vc].ADSR.Reg_ADSR2);
}
#endif
}
else
{
@ -1699,8 +1936,8 @@ void VoiceStop(int core,int vc)
// [Air]: Wipe the interpolation values here, since stopped voices
// are essentially silence (and any new voices shold thusly interpolate up from
// such silence)
Cores[core].Voices[vc].PV1=Cores[core].Voices[vc].PV2=0;
Cores[core].Voices[vc].PV3=Cores[core].Voices[vc].PV4=0;
//Cores[core].Voices[vc].PV1=Cores[core].Voices[vc].PV2=0;
//Cores[core].Voices[vc].PV3=Cores[core].Voices[vc].PV4=0;
//Cores[core].Regs.ENDX|=(1<<vc);
}
@ -1734,6 +1971,8 @@ void StopVoices(int core, u32 value)
// for now, pData is not used
int CALLBACK SPU2setupRecording(int start, void* pData)
{
if( disableEverything ) return 0;
if(start==0)
{
//stop recording

View File

@ -93,13 +93,19 @@ extern void spdif_get_samples(s32 *samples); // fills the buffer with [l,r,c,lfe
extern short *spu2regs;
extern short *_spu2mem;
extern s16 __forceinline *GetMemPtr(u32 addr);
extern u32 *pcm_cache_flags;
extern s16 *pcm_cache_data;
extern s16 __forceinline * __fastcall GetMemPtr(u32 addr);
extern s16 __forceinline __fastcall spu2M_Read( u32 addr );
extern void __inline __fastcall spu2M_Write( u32 addr, s16 value );
extern void __inline __fastcall spu2M_Write( u32 addr, u16 value );
#define spu2Rs16(mmem) (*(s16 *)((s8 *)spu2regs + ((mmem) & 0x1fff)))
#define spu2Ru16(mmem) (*(u16 *)((s8 *)spu2regs + ((mmem) & 0x1fff)))
#define spu2Ms16(mmem) (*GetMemPtr((mmem) & 0xfffff))
#define spu2Mu16(mmem) (*(u16*)GetMemPtr((mmem) & 0xfffff))
//#define spu2Ms16(mmem) (*GetMemPtr((mmem) & 0xfffff))
//#define spu2Mu16(mmem) (*(u16*)GetMemPtr((mmem) & 0xfffff))
void SysMessage(char *fmt, ...);
@ -135,7 +141,9 @@ extern u32 lClocks;
extern u32* cPtr;
extern bool hasPtr;
void CALLBACK TimeUpdate(u32 cClocks, u32 syncType);
extern bool disableEverything;
void __fastcall TimeUpdate(u32 cClocks, u32 syncType);
void TimestretchUpdate(int bufferusage,int buffersize);

View File

@ -104,6 +104,9 @@ public:
wformat.nAvgBytesPerSec=(wformat.nSamplesPerSec * wformat.nBlockAlign);
wformat.cbSize=0;
qbuffer=new s16[BufferSize*MAX_BUFFER_COUNT];
tbuffer=new s32[BufferSize];
woores = waveOutOpen(&hwodevice,WAVE_MAPPER,&wformat,0,0,0);
if (woores != MMSYSERR_NOERROR)
{
@ -112,9 +115,6 @@ public:
return -1;
}
qbuffer=new s16[BufferSize*MAX_BUFFER_COUNT];
tbuffer=new s32[BufferSize];
for(int i=0;i<MAX_BUFFER_COUNT;i++)
{
whbuffer[i].dwBufferLength=BufferSizeBytes;
@ -127,6 +127,12 @@ public:
whbuffer[i].reserved=0;
waveOutPrepareHeader(hwodevice,whbuffer+i,sizeof(WAVEHDR));
whbuffer[i].dwFlags|=WHDR_DONE; //avoid deadlock
// Feed blocks into the device.
// It'll all be empty samples, but it helps reduce some of the pop-on-init.
//whbuffer[i].dwFlags&=~WHDR_DONE;
//waveOutWrite(hwodevice,&whbuffer[i],sizeof(WAVEHDR));
}
// Start Thread