SPU2: Use 16-bit samples for output

It's clamped to 16-bit in the output anyway.

Volume application is moved to the audio thread, so the stretcher has
higher precision if the user chooses a low volume.

Also vectorizes the int->float conversion (since it happens on the EE
thread, it's beneficial for performance).
This commit is contained in:
Stenzek 2023-01-18 19:40:05 +10:00 committed by refractionpcsx2
parent d94e861a78
commit 76fa37019e
5 changed files with 347 additions and 514 deletions

View File

@ -42,19 +42,19 @@ static float LMax = 0, RMax = 0;
static float AccL = 0; static float AccL = 0;
static float AccR = 0; static float AccR = 0;
const float Scale = 4294967296.0f; // tweak this value to change the overall output volume constexpr float Scale = 4294967296.0f; // tweak this value to change the overall output volume
const float GainL = 0.80f * Scale; constexpr float GainL = 0.80f * Scale;
const float GainR = 0.80f * Scale; constexpr float GainR = 0.80f * Scale;
const float GainC = 0.75f * Scale; constexpr float GainC = 0.75f * Scale;
const float GainSL = 0.90f * Scale; constexpr float GainSL = 0.90f * Scale;
const float GainSR = 0.90f * Scale; constexpr float GainSR = 0.90f * Scale;
const float GainLFE = 0.90f * Scale; constexpr float GainLFE = 0.90f * Scale;
const float AddCLR = 0.20f * Scale; // Stereo expansion constexpr float AddCLR = 0.20f * Scale; // Stereo expansion
extern void ResetDplIIDecoder() extern void ResetDplIIDecoder()
{ {
@ -66,10 +66,10 @@ extern void ResetDplIIDecoder()
AccR = 0; AccR = 0;
} }
void ProcessDplIISample32(const StereoOut32& src, Stereo51Out32DplII* s) void ProcessDplIISample32(const StereoOut16& src, Stereo51Out32DplII* s)
{ {
float IL = src.Left / (float)(1 << (SndOutVolumeShift + 16)); float IL = src.Left / (float)(1 << 16);
float IR = src.Right / (float)(1 << (SndOutVolumeShift + 16)); float IR = src.Right / (float)(1 << 16);
// Calculate center channel and LFE // Calculate center channel and LFE
float C = (IL + IR) * 0.5f; float C = (IL + IR) * 0.5f;
@ -121,7 +121,7 @@ void ProcessDplIISample32(const StereoOut32& src, Stereo51Out32DplII* s)
s->RightBack = (s32)(SR * GainSR); s->RightBack = (s32)(SR * GainSR);
} }
void ProcessDplIISample16(const StereoOut32& src, Stereo51Out16DplII* s) void ProcessDplIISample16(const StereoOut16& src, Stereo51Out16DplII* s)
{ {
Stereo51Out32DplII ss; Stereo51Out32DplII ss;
ProcessDplIISample32(src, &ss); ProcessDplIISample32(src, &ss);
@ -134,10 +134,10 @@ void ProcessDplIISample16(const StereoOut32& src, Stereo51Out16DplII* s)
s->RightBack = ss.RightBack >> 16; s->RightBack = ss.RightBack >> 16;
} }
void ProcessDplSample32(const StereoOut32& src, Stereo51Out32Dpl* s) void ProcessDplSample32(const StereoOut16& src, Stereo51Out32Dpl* s)
{ {
float ValL = src.Left / (float)(1 << (SndOutVolumeShift + 16)); float ValL = src.Left / (float)(1 << 16);
float ValR = src.Right / (float)(1 << (SndOutVolumeShift + 16)); float ValR = src.Right / (float)(1 << 16);
float C = (ValL + ValR) * 0.5f; //+15.8 float C = (ValL + ValR) * 0.5f; //+15.8
float S = (ValL - ValR) * 0.5f; float S = (ValL - ValR) * 0.5f;
@ -157,7 +157,7 @@ void ProcessDplSample32(const StereoOut32& src, Stereo51Out32Dpl* s)
s->RightBack = (s32)(S * GainSR); s->RightBack = (s32)(S * GainSR);
} }
void ProcessDplSample16(const StereoOut32& src, Stereo51Out16Dpl* s) void ProcessDplSample16(const StereoOut16& src, Stereo51Out16Dpl* s)
{ {
Stereo51Out32Dpl ss; Stereo51Out32Dpl ss;
ProcessDplSample32(src, &ss); ProcessDplSample32(src, &ss);

View File

@ -28,8 +28,6 @@ static const s32 tbl_XA_Factor[16][2] =
{98, -55}, {98, -55},
{122, -60}}; {122, -60}};
float SPU2::FinalVolume = 1.0f;
// Performs a 64-bit multiplication between two values and returns the // Performs a 64-bit multiplication between two values and returns the
// high 32 bits as a result (discarding the fractional 32 bits). // high 32 bits as a result (discarding the fractional 32 bits).
// The combined fractional bits of both inputs must be 32 bits for this // The combined fractional bits of both inputs must be 32 bits for this
@ -692,17 +690,10 @@ __forceinline
} }
else else
{ {
Out.Left = MulShr32(Out.Left << SndOutVolumeShift, Cores[1].MasterVol.Left.Value); Out.Left = MulShr32(Out.Left, Cores[1].MasterVol.Left.Value);
Out.Right = MulShr32(Out.Right << SndOutVolumeShift, Cores[1].MasterVol.Right.Value); Out.Right = MulShr32(Out.Right, Cores[1].MasterVol.Right.Value);
} }
// Configurable output volume
Out.Left *= SPU2::FinalVolume;
Out.Right *= SPU2::FinalVolume;
// Final Clamp! // Final Clamp!
// Like any good audio system, the PS2 pumps the volume and incurs some distortion in its // Like any good audio system, the PS2 pumps the volume and incurs some distortion in its
// output, giving us a nice thumpy sound at times. So we add 1 above (2x volume pump) and // output, giving us a nice thumpy sound at times. So we add 1 above (2x volume pump) and
@ -711,12 +702,9 @@ __forceinline
// Edit: I'm sorry Jake, but I know of no good audio system that arbitrary distorts and clips // Edit: I'm sorry Jake, but I know of no good audio system that arbitrary distorts and clips
// output by design. // output by design.
// Good thing though that this code gets the volume exactly right, as per tests :) // Good thing though that this code gets the volume exactly right, as per tests :)
Out = clamp_mix(Out, SndOutVolumeShift); Out = clamp_mix(Out);
SndBuffer::Write(Out); SndBuffer::Write(StereoOut16(Out));
if (SampleRate == 96000) // Double up samples for 96khz (Port Audio Non-Exclusive)
SndBuffer::Write(Out);
// Update AutoDMA output positioning // Update AutoDMA output positioning
OutPos++; OutPos++;
@ -743,13 +731,3 @@ __forceinline
} }
} }
} }
s32 SPU2::GetOutputVolume()
{
return static_cast<s32>(std::round(FinalVolume * 100.0f));
}
void SPU2::SetOutputVolume(s32 volume)
{
FinalVolume = static_cast<float>(std::clamp<s32>(volume, 0, Pcsx2Config::SPU2Options::MAX_VOLUME)) / 100.0f;
}

View File

@ -15,14 +15,9 @@
#pragma once #pragma once
namespace SPU2
{
extern float FinalVolume;
}
struct StereoOut32 struct StereoOut32
{ {
static StereoOut32 Empty; static const StereoOut32 Empty;
s32 Left; s32 Left;
s32 Right; s32 Right;
@ -39,11 +34,6 @@ struct StereoOut32
{ {
} }
StereoOut32(const StereoOut16& src);
explicit StereoOut32(const StereoOutFloat& src);
StereoOut16 DownSample() const;
StereoOut32 operator*(const int& factor) const StereoOut32 operator*(const int& factor) const
{ {
return StereoOut32( return StereoOut32(
@ -69,12 +59,6 @@ struct StereoOut32
{ {
return StereoOut32(Left / src, Right / src); return StereoOut32(Left / src, Right / src);
} }
void ResampleFrom(const StereoOut32& src)
{
this->Left = src.Left << 2;
this->Right = src.Right << 2;
}
}; };
extern void Mix(); extern void Mix();

View File

@ -23,38 +23,13 @@
#include "SoundTouch.h" #include "SoundTouch.h"
StereoOut32 StereoOut32::Empty(0, 0); const StereoOut32 StereoOut32::Empty(0, 0);
StereoOut32::StereoOut32(const StereoOut16& src) namespace
: Left(src.Left)
, Right(src.Right)
{ {
} class NullOutModule final : public SndOutModule
{
StereoOut32::StereoOut32(const StereoOutFloat& src) public:
: Left(static_cast<s32>(src.Left * 2147483647.0f))
, Right(static_cast<s32>(src.Right * 2147483647.0f))
{
}
StereoOut16 StereoOut32::DownSample() const
{
return StereoOut16(
Left >> SndOutVolumeShift,
Right >> SndOutVolumeShift);
}
StereoOut32 StereoOut16::UpSample() const
{
return StereoOut32(
Left << SndOutVolumeShift,
Right << SndOutVolumeShift);
}
namespace {
class NullOutModule final : public SndOutModule
{
public:
bool Init() override { return true; } bool Init() override { return true; }
void Close() override {} void Close() override {}
void SetPaused(bool paused) override {} void SetPaused(bool paused) override {}
@ -79,8 +54,8 @@ public:
{ {
return {}; return {};
} }
}; };
} } // namespace
static NullOutModule s_NullOut; static NullOutModule s_NullOut;
static SndOutModule* NullOut = &s_NullOut; static SndOutModule* NullOut = &s_NullOut;
@ -118,7 +93,9 @@ const char* const* GetOutputModuleBackends(const char* omodid)
} }
SndOutDeviceInfo::SndOutDeviceInfo(std::string name_, std::string display_name_, u32 minimum_latency_) SndOutDeviceInfo::SndOutDeviceInfo(std::string name_, std::string display_name_, u32 minimum_latency_)
: name(std::move(name_)), display_name(std::move(display_name_)), minimum_latency_frames(minimum_latency_) : name(std::move(name_))
, display_name(std::move(display_name_))
, minimum_latency_frames(minimum_latency_)
{ {
} }
@ -132,21 +109,77 @@ std::vector<SndOutDeviceInfo> GetOutputDeviceList(const char* omodid, const char
return ret; return ret;
} }
StereoOut32* SndBuffer::m_buffer; namespace SndBuffer
s32 SndBuffer::m_size; {
alignas(4) volatile s32 SndBuffer::m_rpos; static float s_final_volume = 1.0f;
alignas(4) volatile s32 SndBuffer::m_wpos;
bool SndBuffer::m_underrun_freeze; static bool s_underrun_freeze = 0;
StereoOut32* SndBuffer::sndTempBuffer = nullptr;
StereoOut16* SndBuffer::sndTempBuffer16 = nullptr;
int SndBuffer::sndTempProgress = 0;
int GetAlignedBufferSize(int comp) // data prediction amount, used to "commit" data that hasn't
// finished timestretch processing.
static s32 s_predict_data = 0;
// records last buffer status (fill %, range -100 to 100, with 0 being 50% full)
static float s_last_pct = 0;
static float s_last_emergency_adj = 0.0f;
static float s_cTempo = 1.0f;
static float s_eTempo = 1.0f;
static int s_ss_freeze = 0;
static std::unique_ptr<StereoOut16[]> s_staging_buffer;
static std::unique_ptr<float[]> s_float_buffer;
static int s_staging_progress = 0;
static std::unique_ptr<StereoOut16[]> s_output_buffer;
static s32 s_output_buffer_size = 0;
// TODO: Replace these with proper atomics.
alignas(4) static volatile s32 m_rpos = 0;
alignas(4) static volatile s32 m_wpos = 0;
static bool CheckUnderrunStatus(int& nSamples, int& quietSampleCount);
static void soundtouchInit();
static void soundtouchClearContents();
static void soundtouchCleanup();
static void timeStretchWrite();
static void timeStretchUnderrun();
static s32 timeStretchOverrun();
static void PredictDataWrite(int samples);
static float GetStatusPct();
static void UpdateTempoChangeSoundTouch();
static void UpdateTempoChangeSoundTouch2();
static void _WriteSamples(StereoOut16* bData, int nSamples);
static void _WriteSamples_Safe(StereoOut16* bData, int nSamples);
static void _ReadSamples_Safe(StereoOut16* bData, int nSamples);
static void _WriteSamples_Internal(StereoOut16* bData, int nSamples);
static void _DropSamples_Internal(int nSamples);
static void _ReadSamples_Internal(StereoOut16* bData, int nSamples);
static int _GetApproximateDataInBuffer();
} // namespace SndBuffer
static int GetAlignedBufferSize(int comp)
{ {
return (comp + SndOutPacketSize - 1) & ~(SndOutPacketSize - 1); return (comp + SndOutPacketSize - 1) & ~(SndOutPacketSize - 1);
} }
s32 SPU2::GetOutputVolume()
{
return static_cast<s32>(std::round(SndBuffer::s_final_volume * 100.0f));
}
void SPU2::SetOutputVolume(s32 volume)
{
SndBuffer::s_final_volume = static_cast<float>(std::clamp<s32>(volume, 0, Pcsx2Config::SPU2Options::MAX_VOLUME)) / 100.0f;
}
// Returns TRUE if there is data to be output, or false if no data // Returns TRUE if there is data to be output, or false if no data
// is available to be copied. // is available to be copied.
bool SndBuffer::CheckUnderrunStatus(int& nSamples, int& quietSampleCount) bool SndBuffer::CheckUnderrunStatus(int& nSamples, int& quietSampleCount)
@ -154,9 +187,9 @@ bool SndBuffer::CheckUnderrunStatus(int& nSamples, int& quietSampleCount)
quietSampleCount = 0; quietSampleCount = 0;
int data = _GetApproximateDataInBuffer(); int data = _GetApproximateDataInBuffer();
if (m_underrun_freeze) if (s_underrun_freeze)
{ {
int toFill = m_size / ((EmuConfig.SPU2.SynchMode == Pcsx2Config::SPU2Options::SynchronizationMode::NoSync) ? 32 : 400); // TimeStretch and Async off? int toFill = s_output_buffer_size / ((EmuConfig.SPU2.SynchMode == Pcsx2Config::SPU2Options::SynchronizationMode::NoSync) ? 32 : 400); // TimeStretch and Async off?
toFill = GetAlignedBufferSize(toFill); toFill = GetAlignedBufferSize(toFill);
// toFill is now aligned to a SndOutPacket // toFill is now aligned to a SndOutPacket
@ -168,16 +201,16 @@ bool SndBuffer::CheckUnderrunStatus(int& nSamples, int& quietSampleCount)
return false; return false;
} }
m_underrun_freeze = false; s_underrun_freeze = false;
if (SPU2::MsgOverruns()) if (SPU2::MsgOverruns())
SPU2::ConLog(" * SPU2 > Underrun compensation (%d packets buffered)\n", toFill / SndOutPacketSize); SPU2::ConLog(" * SPU2 > Underrun compensation (%d packets buffered)\n", toFill / SndOutPacketSize);
lastPct = 0.0; // normalize timestretcher s_last_pct = 0.0; // normalize timestretcher
} }
else if (data < nSamples) else if (data < nSamples)
{ {
quietSampleCount = nSamples - data; quietSampleCount = nSamples - data;
nSamples = data; nSamples = data;
m_underrun_freeze = true; s_underrun_freeze = true;
if (EmuConfig.SPU2.SynchMode == Pcsx2Config::SPU2Options::SynchronizationMode::TimeStretch) // TimeStrech on if (EmuConfig.SPU2.SynchMode == Pcsx2Config::SPU2Options::SynchronizationMode::TimeStretch) // TimeStrech on
timeStretchUnderrun(); timeStretchUnderrun();
@ -191,37 +224,37 @@ bool SndBuffer::CheckUnderrunStatus(int& nSamples, int& quietSampleCount)
int SndBuffer::_GetApproximateDataInBuffer() int SndBuffer::_GetApproximateDataInBuffer()
{ {
// WARNING: not necessarily 100% up to date by the time it's used, but it will have to do. // WARNING: not necessarily 100% up to date by the time it's used, but it will have to do.
return (m_wpos + m_size - m_rpos) % m_size; return (m_wpos + s_output_buffer_size - m_rpos) % s_output_buffer_size;
} }
void SndBuffer::_WriteSamples_Internal(StereoOut32* bData, int nSamples) void SndBuffer::_WriteSamples_Internal(StereoOut16* bData, int nSamples)
{ {
// WARNING: This assumes the write will NOT wrap around, // WARNING: This assumes the write will NOT wrap around,
// and also assumes there's enough free space in the buffer. // and also assumes there's enough free space in the buffer.
memcpy(m_buffer + m_wpos, bData, nSamples * sizeof(StereoOut32)); std::memcpy(s_output_buffer.get() + m_wpos, bData, nSamples * sizeof(StereoOut16));
m_wpos = (m_wpos + nSamples) % m_size; m_wpos = (m_wpos + nSamples) % s_output_buffer_size;
} }
void SndBuffer::_DropSamples_Internal(int nSamples) void SndBuffer::_DropSamples_Internal(int nSamples)
{ {
m_rpos = (m_rpos + nSamples) % m_size; m_rpos = (m_rpos + nSamples) % s_output_buffer_size;
} }
void SndBuffer::_ReadSamples_Internal(StereoOut32* bData, int nSamples) void SndBuffer::_ReadSamples_Internal(StereoOut16* bData, int nSamples)
{ {
// WARNING: This assumes the read will NOT wrap around, // WARNING: This assumes the read will NOT wrap around,
// and also assumes there's enough data in the buffer. // and also assumes there's enough data in the buffer.
memcpy(bData, m_buffer + m_rpos, nSamples * sizeof(StereoOut32)); std::memcpy(bData, s_output_buffer.get() + m_rpos, nSamples * sizeof(StereoOut16));
_DropSamples_Internal(nSamples); _DropSamples_Internal(nSamples);
} }
void SndBuffer::_WriteSamples_Safe(StereoOut32* bData, int nSamples) void SndBuffer::_WriteSamples_Safe(StereoOut16* bData, int nSamples)
{ {
// WARNING: This code assumes there's only ONE writing process. // WARNING: This code assumes there's only ONE writing process.
if ((m_size - m_wpos) < nSamples) if ((s_output_buffer_size - m_wpos) < nSamples)
{ {
const int b1 = m_size - m_wpos; const int b1 = s_output_buffer_size - m_wpos;
const int b2 = nSamples - b1; const int b2 = nSamples - b1;
_WriteSamples_Internal(bData, b1); _WriteSamples_Internal(bData, b1);
@ -233,12 +266,12 @@ void SndBuffer::_WriteSamples_Safe(StereoOut32* bData, int nSamples)
} }
} }
void SndBuffer::_ReadSamples_Safe(StereoOut32* bData, int nSamples) void SndBuffer::_ReadSamples_Safe(StereoOut16* bData, int nSamples)
{ {
// WARNING: This code assumes there's only ONE reading process. // WARNING: This code assumes there's only ONE reading process.
if ((m_size - m_rpos) < nSamples) if ((s_output_buffer_size - m_rpos) < nSamples)
{ {
const int b1 = m_size - m_rpos; const int b1 = s_output_buffer_size - m_rpos;
const int b2 = nSamples - b1; const int b2 = nSamples - b1;
_ReadSamples_Internal(bData, b1); _ReadSamples_Internal(bData, b1);
@ -250,6 +283,15 @@ void SndBuffer::_ReadSamples_Safe(StereoOut32* bData, int nSamples)
} }
} }
static __fi StereoOut16 ApplyVolume(StereoOut16 frame, float volume)
{
// TODO: This could be done with SSE/NEON, but we'd only be processing half our vector width.
// It happens on the audio thread anyway, so no biggie, but someone might want to do it at some point.
return StereoOut16(
static_cast<s16>(std::clamp(static_cast<float>(frame.Left) * volume, -32768.0f, 32767.0f)),
static_cast<s16>(std::clamp(static_cast<float>(frame.Right) * volume, -32768.0f, 32767.0f)));
}
// Note: When using with 32 bit output buffers, the user of this function is responsible // Note: When using with 32 bit output buffers, the user of this function is responsible
// for shifting the values to where they need to be manually. The fixed point depth of // for shifting the values to where they need to be manually. The fixed point depth of
// the sample output is determined by the SndOutVolumeShift, which is the number of bits // the sample output is determined by the SndOutVolumeShift, which is the number of bits
@ -276,19 +318,33 @@ void SndBuffer::ReadSamples(T* bData, int nSamples)
pxAssume(nSamples <= SndOutPacketSize); pxAssume(nSamples <= SndOutPacketSize);
// WARNING: This code assumes there's only ONE reading process. // WARNING: This code assumes there's only ONE reading process.
int b1 = m_size - m_rpos; int b1 = s_output_buffer_size - m_rpos;
if (b1 > nSamples) if (b1 > nSamples)
b1 = nSamples; b1 = nSamples;
const int b2 = nSamples - b1;
if (std::is_same_v<T, StereoOut16> && s_final_volume == 1.0f)
{
// First part // First part
for (int i = 0; i < b1; i++) if (b1 > 0)
bData[i].ResampleFrom(m_buffer[i + m_rpos]); std::memcpy(bData, &s_output_buffer[m_rpos], sizeof(StereoOut16) * b1);
// Second part
if (b2 > 0)
std::memcpy(bData + b1, s_output_buffer.get(), sizeof(StereoOut16) * b2);
}
else
{
// First part
for (int i = 0; i < b1; i++)
bData[i].SetFrom(ApplyVolume(s_output_buffer[i + m_rpos], s_final_volume));
// Second part // Second part
const int b2 = nSamples - b1;
for (int i = 0; i < b2; i++) for (int i = 0; i < b2; i++)
bData[i + b1].ResampleFrom(m_buffer[i]); bData[i + b1].SetFrom(ApplyVolume(s_output_buffer[i], s_final_volume));
}
_DropSamples_Internal(nSamples); _DropSamples_Internal(nSamples);
} }
@ -301,9 +357,6 @@ void SndBuffer::ReadSamples(T* bData, int nSamples)
} }
template void SndBuffer::ReadSamples(StereoOut16*, int); template void SndBuffer::ReadSamples(StereoOut16*, int);
template void SndBuffer::ReadSamples(StereoOut32*, int);
//template void SndBuffer::ReadSamples(StereoOutFloat*);
template void SndBuffer::ReadSamples(Stereo21Out16*, int); template void SndBuffer::ReadSamples(Stereo21Out16*, int);
template void SndBuffer::ReadSamples(Stereo40Out16*, int); template void SndBuffer::ReadSamples(Stereo40Out16*, int);
template void SndBuffer::ReadSamples(Stereo41Out16*, int); template void SndBuffer::ReadSamples(Stereo41Out16*, int);
@ -312,18 +365,9 @@ template void SndBuffer::ReadSamples(Stereo51Out16Dpl*, int);
template void SndBuffer::ReadSamples(Stereo51Out16DplII*, int); template void SndBuffer::ReadSamples(Stereo51Out16DplII*, int);
template void SndBuffer::ReadSamples(Stereo71Out16*, int); template void SndBuffer::ReadSamples(Stereo71Out16*, int);
template void SndBuffer::ReadSamples(Stereo20Out32*, int); void SndBuffer::_WriteSamples(StereoOut16* bData, int nSamples)
template void SndBuffer::ReadSamples(Stereo21Out32*, int);
template void SndBuffer::ReadSamples(Stereo40Out32*, int);
template void SndBuffer::ReadSamples(Stereo41Out32*, int);
template void SndBuffer::ReadSamples(Stereo51Out32*, int);
template void SndBuffer::ReadSamples(Stereo51Out32Dpl*, int);
template void SndBuffer::ReadSamples(Stereo51Out32DplII*, int);
template void SndBuffer::ReadSamples(Stereo71Out32*, int);
void SndBuffer::_WriteSamples(StereoOut32* bData, int nSamples)
{ {
m_predictData = 0; s_predict_data = 0;
// Problem: // Problem:
// If the SPU2 gets out of sync with the SndOut device, the writepos of the // If the SPU2 gets out of sync with the SndOut device, the writepos of the
@ -336,7 +380,7 @@ void SndBuffer::_WriteSamples(StereoOut32* bData, int nSamples)
// The older portion of the buffer is discarded rather than incoming data, // The older portion of the buffer is discarded rather than incoming data,
// so that the overall audio synchronization is better. // so that the overall audio synchronization is better.
const int free = m_size - _GetApproximateDataInBuffer(); // -1, but the <= handles that const int free = s_output_buffer_size - _GetApproximateDataInBuffer(); // -1, but the <= handles that
if (free <= nSamples) if (free <= nSamples)
{ {
// Disabled since the lock-free queue can't handle changing the read end from the write thread // Disabled since the lock-free queue can't handle changing the read end from the write thread
@ -367,7 +411,7 @@ void SndBuffer::_WriteSamples(StereoOut32* bData, int nSamples)
#else #else
if (SPU2::MsgOverruns()) if (SPU2::MsgOverruns())
SPU2::ConLog(" * SPU2 > Overrun! 1 packet tossed)\n"); SPU2::ConLog(" * SPU2 > Overrun! 1 packet tossed)\n");
lastPct = 0.0; // normalize the timestretcher s_last_pct = 0.0; // normalize the timestretcher
// Toss the packet because we overran the buffer. // Toss the packet because we overran the buffer.
return; return;
@ -391,13 +435,13 @@ bool SndBuffer::Init(const char* modname)
m_wpos = 0; m_wpos = 0;
const float latencyMS = EmuConfig.SPU2.Latency * 16; const float latencyMS = EmuConfig.SPU2.Latency * 16;
m_size = GetAlignedBufferSize((int)(latencyMS * SampleRate / 1000.0f)); s_output_buffer_size = GetAlignedBufferSize((int)(latencyMS * SampleRate / 1000.0f));
m_buffer = new StereoOut32[m_size]; s_output_buffer = std::make_unique<StereoOut16[]>(s_output_buffer_size);
m_underrun_freeze = false; s_underrun_freeze = false;
sndTempBuffer = new StereoOut32[SndOutPacketSize]; s_staging_buffer = std::make_unique<StereoOut16[]>(SndOutPacketSize);
sndTempBuffer16 = new StereoOut16[SndOutPacketSize * 2]; // in case of leftovers. s_float_buffer = std::make_unique<float[]>(SndOutPacketSize * 2);
sndTempProgress = 0; s_staging_progress = 0;
soundtouchInit(); // initializes the timestretching soundtouchInit(); // initializes the timestretching
@ -421,20 +465,14 @@ void SndBuffer::Cleanup()
soundtouchCleanup(); soundtouchCleanup();
safe_delete_array(m_buffer); s_output_buffer.reset();
safe_delete_array(sndTempBuffer); s_staging_buffer.reset();
safe_delete_array(sndTempBuffer16);
} }
int SndBuffer::m_dsp_progress = 0;
int SndBuffer::m_timestretch_progress = 0;
int SndBuffer::ssFreeze = 0;
void SndBuffer::ClearContents() void SndBuffer::ClearContents()
{ {
SndBuffer::soundtouchClearContents(); soundtouchClearContents();
SndBuffer::ssFreeze = 256; //Delays sound output for about 1 second. s_ss_freeze = 256; //Delays sound output for about 1 second.
} }
void SndBuffer::ResetBuffers() void SndBuffer::ResetBuffers()
@ -448,36 +486,35 @@ void SPU2::SetOutputPaused(bool paused)
s_output_module->SetPaused(paused); s_output_module->SetPaused(paused);
} }
void SndBuffer::Write(const StereoOut32& Sample) void SndBuffer::Write(StereoOut16 Sample)
{ {
#ifdef PCSX2_DEVBUILD #ifdef PCSX2_DEVBUILD
// Log final output to wavefile. // Log final output to wavefile.
WaveDump::WriteCore(1, CoreSrc_External, Sample.DownSample()); WaveDump::WriteCore(1, CoreSrc_External, Sample);
#endif #endif
if (WavRecordEnabled) if (WavRecordEnabled)
RecordWrite(Sample.DownSample()); RecordWrite(Sample);
sndTempBuffer[sndTempProgress++] = Sample; s_staging_buffer[s_staging_progress++] = Sample;
// If we haven't accumulated a full packet yet, do nothing more: // If we haven't accumulated a full packet yet, do nothing more:
if (sndTempProgress < SndOutPacketSize) if (s_staging_progress < SndOutPacketSize)
return; return;
sndTempProgress = 0; s_staging_progress = 0;
//Don't play anything directly after loading a savestate, avoids static killing your speakers. //Don't play anything directly after loading a savestate, avoids static killing your speakers.
if (ssFreeze > 0) if (s_ss_freeze > 0)
{ {
ssFreeze--; s_ss_freeze--;
// Play silence std::memset(s_staging_buffer.get(), 0, sizeof(StereoOut16) * SndOutPacketSize);
std::fill_n(sndTempBuffer, SndOutPacketSize, StereoOut32{});
} }
else else
{ {
if (EmuConfig.SPU2.SynchMode == Pcsx2Config::SPU2Options::SynchronizationMode::TimeStretch) // TimeStrech on if (EmuConfig.SPU2.SynchMode == Pcsx2Config::SPU2Options::SynchronizationMode::TimeStretch)
timeStretchWrite(); timeStretchWrite();
else else
_WriteSamples(sndTempBuffer, SndOutPacketSize); _WriteSamples(s_staging_buffer.get(), SndOutPacketSize);
} }
} }
@ -490,20 +527,9 @@ void SndBuffer::Write(const StereoOut32& Sample)
static std::unique_ptr<soundtouch::SoundTouch> pSoundTouch = nullptr; static std::unique_ptr<soundtouch::SoundTouch> pSoundTouch = nullptr;
// data prediction amount, used to "commit" data that hasn't
// finished timestretch processing.
s32 SndBuffer::m_predictData;
// records last buffer status (fill %, range -100 to 100, with 0 being 50% full)
float SndBuffer::lastPct;
float SndBuffer::lastEmergencyAdj;
float SndBuffer::cTempo = 1;
float SndBuffer::eTempo = 1;
void SndBuffer::PredictDataWrite(int samples) void SndBuffer::PredictDataWrite(int samples)
{ {
m_predictData += samples; s_predict_data += samples;
} }
// Calculate the buffer status percentage. // Calculate the buffer status percentage.
@ -521,8 +547,8 @@ float SndBuffer::GetStatusPct()
//ConLog( "Data %d >>> driver: %d predict: %d\n", m_data, drvempty, m_predictData ); //ConLog( "Data %d >>> driver: %d predict: %d\n", m_data, drvempty, m_predictData );
const int data = _GetApproximateDataInBuffer(); const int data = _GetApproximateDataInBuffer();
float result = static_cast<float>(data + m_predictData - drvempty) - (m_size / 16); float result = static_cast<float>(data + s_predict_data - drvempty) - (s_output_buffer_size / 16);
result /= (m_size / 16); result /= (s_output_buffer_size / 16);
return result; return result;
} }
@ -704,11 +730,11 @@ void SndBuffer::UpdateTempoChangeSoundTouch2()
void SndBuffer::UpdateTempoChangeSoundTouch() void SndBuffer::UpdateTempoChangeSoundTouch()
{ {
const float statusPct = GetStatusPct(); const float statusPct = GetStatusPct();
const float pctChange = statusPct - lastPct; const float pctChange = statusPct - s_last_pct;
float tempoChange; float tempoChange;
float emergencyAdj = 0; float emergencyAdj = 0;
float newcee = cTempo; // workspace var. for cTempo float newcee = s_cTempo; // workspace var. for cTempo
// IMPORTANT! // IMPORTANT!
// If you plan to tweak these values, make sure you're using a release build // If you plan to tweak these values, make sure you're using a release build
@ -761,10 +787,10 @@ void SndBuffer::UpdateTempoChangeSoundTouch()
// cope with low fps (underruns) than it is high fps (overruns). So to help out a // cope with low fps (underruns) than it is high fps (overruns). So to help out a
// little, the low-end portions of this check are less forgiving than the high-sides. // little, the low-end portions of this check are less forgiving than the high-sides.
if (cTempo < 0.965f || cTempo > 1.060f || if (s_cTempo < 0.965f || s_cTempo > 1.060f ||
pctChange < -0.38f || pctChange > 0.54f || pctChange < -0.38f || pctChange > 0.54f ||
statusPct < -0.42f || statusPct > 0.70f || statusPct < -0.42f || statusPct > 0.70f ||
eTempo < 0.89f || eTempo > 1.19f) s_eTempo < 0.89f || s_eTempo > 1.19f)
{ {
//printf("Emergency stretch: cTempo = %f eTempo = %f pctChange = %f statusPct = %f\n",cTempo,eTempo,pctChange,statusPct); //printf("Emergency stretch: cTempo = %f eTempo = %f pctChange = %f statusPct = %f\n",cTempo,eTempo,pctChange,statusPct);
emergencyAdj = (pow(statusPct * statusWeight, 3.0f) * statusRange); emergencyAdj = (pow(statusPct * statusWeight, 3.0f) * statusRange);
@ -774,10 +800,10 @@ void SndBuffer::UpdateTempoChangeSoundTouch()
// It helps make the system 'feel' a little smarter by giving it at least // It helps make the system 'feel' a little smarter by giving it at least
// one packet worth of history to help work off of: // one packet worth of history to help work off of:
emergencyAdj = (emergencyAdj * 0.75f) + (lastEmergencyAdj * 0.25f); emergencyAdj = (emergencyAdj * 0.75f) + (s_last_emergency_adj * 0.25f);
lastEmergencyAdj = emergencyAdj; s_last_emergency_adj = emergencyAdj;
lastPct = statusPct; s_last_pct = statusPct;
// Accumulate a fraction of the tempo change into the tempo itself. // Accumulate a fraction of the tempo change into the tempo itself.
// This helps the system run "smarter" to games that run consistently // This helps the system run "smarter" to games that run consistently
@ -792,7 +818,7 @@ void SndBuffer::UpdateTempoChangeSoundTouch()
// to the current tempo. (otherwise tempos rate of change at the extremes would // to the current tempo. (otherwise tempos rate of change at the extremes would
// be too drastic) // be too drastic)
float newTempo = newcee + (emergencyAdj * cTempo); float newTempo = newcee + (emergencyAdj * s_cTempo);
// ... and as a final optimization, only stretch if the new tempo is outside // ... and as a final optimization, only stretch if the new tempo is outside
// a nominal threshold. Keep this threshold check small, because it could // a nominal threshold. Keep this threshold check small, because it could
@ -800,19 +826,19 @@ void SndBuffer::UpdateTempoChangeSoundTouch()
// is usually better/safer) // is usually better/safer)
if (newTempo < 0.970f || newTempo > 1.045f) if (newTempo < 0.970f || newTempo > 1.045f)
{ {
cTempo = static_cast<float>(newcee); s_cTempo = static_cast<float>(newcee);
if (newTempo < 0.10f) if (newTempo < 0.10f)
newTempo = 0.10f; newTempo = 0.10f;
else if (newTempo > 10.0f) else if (newTempo > 10.0f)
newTempo = 10.0f; newTempo = 10.0f;
if (cTempo < 0.15f) if (s_cTempo < 0.15f)
cTempo = 0.15f; s_cTempo = 0.15f;
else if (cTempo > 7.5f) else if (s_cTempo > 7.5f)
cTempo = 7.5f; s_cTempo = 7.5f;
pSoundTouch->setTempo(eTempo = static_cast<float>(newTempo)); pSoundTouch->setTempo(s_eTempo = static_cast<float>(newTempo));
/*ConLog("* SPU2: [Nominal %d%%] [Emergency: %d%%] (baseTempo: %d%% ) (newTempo: %d%%) (buffer: %d%%)\n", /*ConLog("* SPU2: [Nominal %d%%] [Emergency: %d%%] (baseTempo: %d%% ) (newTempo: %d%%) (buffer: %d%%)\n",
//(relation < 0.0) ? "Normalize" : "", //(relation < 0.0) ? "Normalize" : "",
@ -828,16 +854,16 @@ void SndBuffer::UpdateTempoChangeSoundTouch()
// Nominal operation -- turn off stretching. // Nominal operation -- turn off stretching.
// note: eTempo 'slides' toward 1.0 for smoother audio and better // note: eTempo 'slides' toward 1.0 for smoother audio and better
// protection against spikes. // protection against spikes.
if (cTempo != 1.0f) if (s_cTempo != 1.0f)
{ {
cTempo = 1.0f; s_cTempo = 1.0f;
eTempo = (1.0f + eTempo) * 0.5f; s_eTempo = (1.0f + s_eTempo) * 0.5f;
pSoundTouch->setTempo(eTempo); pSoundTouch->setTempo(s_eTempo);
} }
else else
{ {
if (eTempo != cTempo) if (s_eTempo != s_cTempo)
pSoundTouch->setTempo(eTempo = cTempo); pSoundTouch->setTempo(s_eTempo = s_cTempo);
} }
} }
} }
@ -847,7 +873,7 @@ void SndBuffer::UpdateTempoChangeAsyncMixing()
{ {
const float statusPct = GetStatusPct(); const float statusPct = GetStatusPct();
lastPct = statusPct; s_last_pct = statusPct;
if (statusPct < -0.1f) if (statusPct < -0.1f)
{ {
TickInterval -= 4; TickInterval -= 4;
@ -873,10 +899,10 @@ void SndBuffer::timeStretchUnderrun()
gRequestStretcherReset++; gRequestStretcherReset++;
// timeStretcher failed it's job. We need to slow down the audio some. // timeStretcher failed it's job. We need to slow down the audio some.
cTempo -= (cTempo * 0.12f); s_cTempo -= (s_cTempo * 0.12f);
eTempo -= (eTempo * 0.30f); s_eTempo -= (s_eTempo * 0.30f);
if (eTempo < 0.1f) if (s_eTempo < 0.1f)
eTempo = 0.1f; s_eTempo = 0.1f;
// pSoundTouch->setTempo( eTempo ); // pSoundTouch->setTempo( eTempo );
//pSoundTouch->setTempoChange(-30); // temporary (until stretcher is called) slow down //pSoundTouch->setTempoChange(-30); // temporary (until stretcher is called) slow down
} }
@ -885,10 +911,10 @@ s32 SndBuffer::timeStretchOverrun()
{ {
// If we overran it means the timestretcher failed. We need to speed // If we overran it means the timestretcher failed. We need to speed
// up audio playback. // up audio playback.
cTempo += cTempo * 0.12f; s_cTempo += s_cTempo * 0.12f;
eTempo += eTempo * 0.40f; s_eTempo += s_eTempo * 0.40f;
if (eTempo > 7.5f) if (s_eTempo > 7.5f)
eTempo = 7.5f; s_eTempo = 7.5f;
//pSoundTouch->setTempo( eTempo ); //pSoundTouch->setTempo( eTempo );
//pSoundTouch->setTempoChange(30);// temporary (until stretcher is called) speed up //pSoundTouch->setTempoChange(30);// temporary (until stretcher is called) speed up
@ -898,24 +924,58 @@ s32 SndBuffer::timeStretchOverrun()
return SndOutPacketSize * 2; return SndOutPacketSize * 2;
} }
static void CvtPacketToFloat(StereoOut32* srcdest) static constexpr float S16_TO_FLOAT = 1.0f / 32767.0f;
static constexpr float FLOAT_TO_S16 = 32767.0f;
static void ConvertPacketToFloat(const StereoOut16* src, float* dst)
{ {
StereoOutFloat* dest = (StereoOutFloat*)srcdest; static_assert((SndOutPacketSize % 4) == 0);
const StereoOut32* src = (StereoOut32*)srcdest; constexpr u32 iterations = SndOutPacketSize / 4;
for (uint i = 0; i < SndOutPacketSize; ++i, ++dest, ++src)
*dest = (StereoOutFloat)*src; const __m128 S16_TO_FLOAT_V = _mm_set1_ps(S16_TO_FLOAT);
for (u32 i = 0; i < iterations; i++)
{
const __m128i sv = _mm_load_si128(reinterpret_cast<const __m128i*>(src));
src += 4;
__m128i iv1 = _mm_unpacklo_epi16(sv, sv); // [0, 0, 1, 1, 2, 2, 3, 3]
__m128i iv2 = _mm_unpackhi_epi16(sv, sv); // [4, 4, 5, 5, 6, 6, 7, 7]
iv1 = _mm_srai_epi32(iv1, 16); // [0, 1, 2, 3]
iv2 = _mm_srai_epi32(iv2, 16); // [4, 5, 6, 7]
__m128 fv1 = _mm_cvtepi32_ps(iv1); // [f0, f1, f2, f3]
__m128 fv2 = _mm_cvtepi32_ps(iv2); // [f4, f5, f6, f7]
fv1 = _mm_mul_ps(fv1, S16_TO_FLOAT_V);
fv2 = _mm_mul_ps(fv2, S16_TO_FLOAT_V);
_mm_store_ps(dst + 0, fv1);
_mm_store_ps(dst + 4, fv2);
dst += 8;
}
} }
// Parameter note: Size should always be a multiple of 128, thanks! static void ConvertPacketToInt(StereoOut16* dst, const float* src, uint size)
static void CvtPacketToInt(StereoOut32* srcdest, uint size)
{ {
//pxAssume( (size & 127) == 0 ); static_assert((SndOutPacketSize % 4) == 0);
constexpr u32 iterations = SndOutPacketSize / 4;
const StereoOutFloat* src = (StereoOutFloat*)srcdest; const __m128 FLOAT_TO_S16_V = _mm_set1_ps(FLOAT_TO_S16);
StereoOut32* dest = srcdest;
for (uint i = 0; i < size; ++i, ++dest, ++src) for (u32 i = 0; i < iterations; i++)
*dest = (StereoOut32)*src; {
__m128 fv1 = _mm_load_ps(src + 0);
__m128 fv2 = _mm_load_ps(src + 4);
src += 8;
fv1 = _mm_mul_ps(fv1, FLOAT_TO_S16_V);
fv2 = _mm_mul_ps(fv2, FLOAT_TO_S16_V);
__m128i iv1 = _mm_cvtps_epi32(fv1);
__m128i iv2 = _mm_cvtps_epi32(fv2);
__m128i iv = _mm_packs_epi32(iv1, iv2);
_mm_store_si128(reinterpret_cast<__m128i*>(dst), iv);
dst += 4;
}
} }
void SndBuffer::timeStretchWrite() void SndBuffer::timeStretchWrite()
@ -926,20 +986,20 @@ void SndBuffer::timeStretchWrite()
// suddenly we'll get several chunks back at once. Thus we use // suddenly we'll get several chunks back at once. Thus we use
// data prediction to make the timestretcher more responsive. // data prediction to make the timestretcher more responsive.
PredictDataWrite((int)(SndOutPacketSize / eTempo)); PredictDataWrite((int)(SndOutPacketSize / s_eTempo));
CvtPacketToFloat(sndTempBuffer); ConvertPacketToFloat(s_staging_buffer.get(), s_float_buffer.get());
pSoundTouch->putSamples((float*)sndTempBuffer, SndOutPacketSize); pSoundTouch->putSamples(s_float_buffer.get(), SndOutPacketSize);
int tempProgress; int tempProgress;
while (tempProgress = pSoundTouch->receiveSamples((float*)sndTempBuffer, SndOutPacketSize), while (tempProgress = pSoundTouch->receiveSamples(s_float_buffer.get(), SndOutPacketSize),
tempProgress != 0) tempProgress != 0)
{ {
// Hint: It's assumed that pSoundTouch will return chunks of 128 bytes (it always does as // Hint: It's assumed that pSoundTouch will return chunks of 128 bytes (it always does as
// long as the SSE optimizations are enabled), which means we can do our own SSE opts here. // long as the SSE optimizations are enabled), which means we can do our own SSE opts here.
CvtPacketToInt(sndTempBuffer, tempProgress); ConvertPacketToInt(s_staging_buffer.get(), s_float_buffer.get(), tempProgress);
_WriteSamples(sndTempBuffer, tempProgress); _WriteSamples(s_staging_buffer.get(), tempProgress);
} }
#ifdef SPU2X_USE_OLD_STRETCHER #ifdef SPU2X_USE_OLD_STRETCHER
@ -966,12 +1026,12 @@ void SndBuffer::soundtouchInit()
// some timestretch management vars: // some timestretch management vars:
cTempo = 1.0; s_cTempo = 1.0;
eTempo = 1.0; s_eTempo = 1.0;
lastPct = 0; s_last_pct = 0;
lastEmergencyAdj = 0; s_last_emergency_adj = 0;
m_predictData = 0; s_predict_data = 0;
} }
// reset timestretch management vars, and delay updates a bit: // reset timestretch management vars, and delay updates a bit:
@ -983,12 +1043,12 @@ void SndBuffer::soundtouchClearContents()
pSoundTouch->clear(); pSoundTouch->clear();
pSoundTouch->setTempo(1); pSoundTouch->setTempo(1);
cTempo = 1.0; s_cTempo = 1.0;
eTempo = 1.0; s_eTempo = 1.0;
lastPct = 0; s_last_pct = 0;
lastEmergencyAdj = 0; s_last_emergency_adj = 0;
m_predictData = 0; s_predict_data = 0;
} }
void SndBuffer::soundtouchCleanup() void SndBuffer::soundtouchCleanup()

View File

@ -20,14 +20,7 @@
// Number of stereo samples per SndOut block. // Number of stereo samples per SndOut block.
// All drivers must work in units of this size when communicating with // All drivers must work in units of this size when communicating with
// SndOut. // SndOut.
static const int SndOutPacketSize = 64; static constexpr int SndOutPacketSize = 64;
// Overall master volume shift; this is meant to be a precision value and does not affect
// actual output volumes. It converts SPU2 16 bit volumes to 32-bit volumes, and likewise
// downsamples 32 bit samples to 16 bit sound driver output (this way timestretching and
// DSP effects get better precision results)
static const int SndOutVolumeShift = 12;
static const int SndOutVolumeShift32 = 16 - SndOutVolumeShift; // shift up, not down
// Samplerate of the SPU2. For accurate playback we need to match this // Samplerate of the SPU2. For accurate playback we need to match this
// exactly. Trying to scale samplerates and maintain SPU2's Ts timing accuracy // exactly. Trying to scale samplerates and maintain SPU2's Ts timing accuracy
@ -57,70 +50,46 @@ struct Stereo51Out16Dpl; // similar to DplII but without rear balancing
struct Stereo51Out32Dpl; struct Stereo51Out32Dpl;
extern void ResetDplIIDecoder(); extern void ResetDplIIDecoder();
extern void ProcessDplIISample16(const StereoOut32& src, Stereo51Out16DplII* s); extern void ProcessDplIISample16(const StereoOut16& src, Stereo51Out16DplII* s);
extern void ProcessDplIISample32(const StereoOut32& src, Stereo51Out32DplII* s); extern void ProcessDplIISample32(const StereoOut16& src, Stereo51Out32DplII* s);
extern void ProcessDplSample16(const StereoOut32& src, Stereo51Out16Dpl* s); extern void ProcessDplSample16(const StereoOut16& src, Stereo51Out16Dpl* s);
extern void ProcessDplSample32(const StereoOut32& src, Stereo51Out32Dpl* s); extern void ProcessDplSample32(const StereoOut16& src, Stereo51Out32Dpl* s);
struct StereoOut16 struct StereoOut16
{ {
s16 Left; s16 Left;
s16 Right; s16 Right;
StereoOut16() __fi StereoOut16()
: Left(0) : Left(0)
, Right(0) , Right(0)
{ {
} }
StereoOut16(const StereoOut32& src) __fi StereoOut16(const StereoOut32& src)
: Left((s16)src.Left) : Left((s16)src.Left)
, Right((s16)src.Right) , Right((s16)src.Right)
{ {
} }
StereoOut16(s16 left, s16 right) __fi StereoOut16(s16 left, s16 right)
: Left(left) : Left(left)
, Right(right) , Right(right)
{ {
} }
StereoOut32 UpSample() const; __fi StereoOut16 ApplyVolume(float volume)
void ResampleFrom(const StereoOut32& src)
{
// Use StereoOut32's built in conversion
*this = src.DownSample();
}
};
struct StereoOutFloat
{
float Left;
float Right;
StereoOutFloat()
: Left(0)
, Right(0)
{ {
return StereoOut16(
static_cast<s16>(std::clamp(static_cast<float>(Left) * volume, -32768.0f, 32767.0f)),
static_cast<s16>(std::clamp(static_cast<float>(Right) * volume, -32768.0f, 32767.0f))
);
} }
explicit StereoOutFloat(const StereoOut32& src) __fi void SetFrom(const StereoOut16& src)
: Left(src.Left / 2147483647.0f)
, Right(src.Right / 2147483647.0f)
{
}
explicit StereoOutFloat(s32 left, s32 right)
: Left(left / 2147483647.0f)
, Right(right / 2147483647.0f)
{
}
StereoOutFloat(float left, float right)
: Left(left)
, Right(right)
{ {
Left = src.Left;
Right = src.Right;
} }
}; };
@ -130,11 +99,11 @@ struct Stereo21Out16
s16 Right; s16 Right;
s16 LFE; s16 LFE;
void ResampleFrom(const StereoOut32& src) __fi void SetFrom(const StereoOut16& src)
{ {
Left = src.Left >> SndOutVolumeShift; Left = src.Left;
Right = src.Right >> SndOutVolumeShift; Right = src.Right;
LFE = (src.Left + src.Right) >> (SndOutVolumeShift + 1); LFE = (src.Left + src.Right) >> 1;
} }
}; };
@ -145,28 +114,12 @@ struct Stereo40Out16
s16 LeftBack; s16 LeftBack;
s16 RightBack; s16 RightBack;
void ResampleFrom(const StereoOut32& src) __fi void SetFrom(const StereoOut16& src)
{ {
Left = src.Left >> SndOutVolumeShift; Left = src.Left;
Right = src.Right >> SndOutVolumeShift; Right = src.Right;
LeftBack = src.Left >> SndOutVolumeShift; LeftBack = src.Left;
RightBack = src.Right >> SndOutVolumeShift; RightBack = src.Right;
}
};
struct Stereo40Out32
{
s32 Left;
s32 Right;
s32 LeftBack;
s32 RightBack;
void ResampleFrom(const StereoOut32& src)
{
Left = src.Left << SndOutVolumeShift32;
Right = src.Right << SndOutVolumeShift32;
LeftBack = src.Left << SndOutVolumeShift32;
RightBack = src.Right << SndOutVolumeShift32;
} }
}; };
@ -178,13 +131,13 @@ struct Stereo41Out16
s16 LeftBack; s16 LeftBack;
s16 RightBack; s16 RightBack;
void ResampleFrom(const StereoOut32& src) __fi void SetFrom(const StereoOut16& src)
{ {
Left = src.Left >> SndOutVolumeShift; Left = src.Left;
Right = src.Right >> SndOutVolumeShift; Right = src.Right;
LFE = (src.Left + src.Right) >> (SndOutVolumeShift + 1); LFE = (src.Left + src.Right) >> 1;
LeftBack = src.Left >> SndOutVolumeShift; LeftBack = src.Left;
RightBack = src.Right >> SndOutVolumeShift; RightBack = src.Right;
} }
}; };
@ -202,14 +155,14 @@ struct Stereo51Out16
// systems do to their own low pass / crossover. Manual lowpass is wasted effort // systems do to their own low pass / crossover. Manual lowpass is wasted effort
// and can't match solid state results anyway. // and can't match solid state results anyway.
void ResampleFrom(const StereoOut32& src) __fi void SetFrom(const StereoOut16& src)
{ {
Left = src.Left >> SndOutVolumeShift; Left = src.Left;
Right = src.Right >> SndOutVolumeShift; Right = src.Right;
Center = (src.Left + src.Right) >> (SndOutVolumeShift + 1); Center = (src.Left + src.Right) >> 1;
LFE = Center; LFE = Center;
LeftBack = src.Left >> SndOutVolumeShift; LeftBack = src.Left >> 1;
RightBack = src.Right >> SndOutVolumeShift; RightBack = src.Right >> 1;
} }
}; };
@ -222,7 +175,7 @@ struct Stereo51Out16DplII
s16 LeftBack; s16 LeftBack;
s16 RightBack; s16 RightBack;
void ResampleFrom(const StereoOut32& src) __fi void SetFrom(const StereoOut16& src)
{ {
ProcessDplIISample16(src, this); ProcessDplIISample16(src, this);
} }
@ -237,7 +190,7 @@ struct Stereo51Out32DplII
s32 LeftBack; s32 LeftBack;
s32 RightBack; s32 RightBack;
void ResampleFrom(const StereoOut32& src) __fi void SetFrom(const StereoOut32& src)
{ {
ProcessDplIISample32(src, this); ProcessDplIISample32(src, this);
} }
@ -252,7 +205,7 @@ struct Stereo51Out16Dpl
s16 LeftBack; s16 LeftBack;
s16 RightBack; s16 RightBack;
void ResampleFrom(const StereoOut32& src) __fi void SetFrom(const StereoOut16& src)
{ {
ProcessDplSample16(src, this); ProcessDplSample16(src, this);
} }
@ -267,7 +220,7 @@ struct Stereo51Out32Dpl
s32 LeftBack; s32 LeftBack;
s32 RightBack; s32 RightBack;
void ResampleFrom(const StereoOut32& src) __fi void SetFrom(const StereoOut32& src)
{ {
ProcessDplSample32(src, this); ProcessDplSample32(src, this);
} }
@ -284,178 +237,36 @@ struct Stereo71Out16
s16 LeftSide; s16 LeftSide;
s16 RightSide; s16 RightSide;
void ResampleFrom(const StereoOut32& src) __fi void SetFrom(const StereoOut16& src)
{ {
Left = src.Left >> SndOutVolumeShift; Left = src.Left;
Right = src.Right >> SndOutVolumeShift; Right = src.Right;
Center = (src.Left + src.Right) >> (SndOutVolumeShift + 1); Center = (src.Left + src.Right) >> 1;
LFE = Center; LFE = Center;
LeftBack = src.Left >> SndOutVolumeShift; LeftBack = src.Left;
RightBack = src.Right >> SndOutVolumeShift; RightBack = src.Right;
LeftSide = src.Left >> (SndOutVolumeShift + 1); LeftSide = src.Left >> 1;
RightSide = src.Right >> (SndOutVolumeShift + 1); RightSide = src.Right >> 1;
} }
}; };
struct Stereo71Out32 namespace SndBuffer
{ {
s32 Left; void UpdateTempoChangeAsyncMixing();
s32 Right; bool Init(const char* modname);
s32 Center; void Cleanup();
s32 LFE; void Write(StereoOut16 Sample);
s32 LeftBack; void ClearContents();
s32 RightBack; void ResetBuffers();
s32 LeftSide;
s32 RightSide;
void ResampleFrom(const StereoOut32& src)
{
Left = src.Left << SndOutVolumeShift32;
Right = src.Right << SndOutVolumeShift32;
Center = (src.Left + src.Right) << (SndOutVolumeShift32 - 1);
LFE = Center;
LeftBack = src.Left << SndOutVolumeShift32;
RightBack = src.Right << SndOutVolumeShift32;
LeftSide = src.Left << (SndOutVolumeShift32 - 1);
RightSide = src.Right << (SndOutVolumeShift32 - 1);
}
};
struct Stereo20Out32
{
s32 Left;
s32 Right;
void ResampleFrom(const StereoOut32& src)
{
Left = src.Left << SndOutVolumeShift32;
Right = src.Right << SndOutVolumeShift32;
}
};
struct Stereo21Out32
{
s32 Left;
s32 Right;
s32 LFE;
void ResampleFrom(const StereoOut32& src)
{
Left = src.Left << SndOutVolumeShift32;
Right = src.Right << SndOutVolumeShift32;
LFE = (src.Left + src.Right) << (SndOutVolumeShift32 - 1);
}
};
struct Stereo41Out32
{
s32 Left;
s32 Right;
s32 LFE;
s32 LeftBack;
s32 RightBack;
void ResampleFrom(const StereoOut32& src)
{
Left = src.Left << SndOutVolumeShift32;
Right = src.Right << SndOutVolumeShift32;
LFE = (src.Left + src.Right) << (SndOutVolumeShift32 - 1);
LeftBack = src.Left << SndOutVolumeShift32;
RightBack = src.Right << SndOutVolumeShift32;
}
};
struct Stereo51Out32
{
s32 Left;
s32 Right;
s32 Center;
s32 LFE;
s32 LeftBack;
s32 RightBack;
void ResampleFrom(const StereoOut32& src)
{
Left = src.Left << SndOutVolumeShift32;
Right = src.Right << SndOutVolumeShift32;
Center = (src.Left + src.Right) << (SndOutVolumeShift32 - 1);
LFE = Center;
LeftBack = src.Left << SndOutVolumeShift32;
RightBack = src.Right << SndOutVolumeShift32;
}
};
// Developer Note: This is a static class only (all static members).
class SndBuffer
{
private:
static bool m_underrun_freeze;
static s32 m_predictData;
static float lastPct;
static StereoOut32* sndTempBuffer;
static StereoOut16* sndTempBuffer16;
static int sndTempProgress;
static int m_dsp_progress;
static int m_timestretch_progress;
static int m_timestretch_writepos;
static StereoOut32* m_buffer;
static s32 m_size;
alignas(4) static volatile s32 m_rpos;
alignas(4) static volatile s32 m_wpos;
static float lastEmergencyAdj;
static float cTempo;
static float eTempo;
static int ssFreeze;
static bool CheckUnderrunStatus(int& nSamples, int& quietSampleCount);
static void soundtouchInit();
static void soundtouchClearContents();
static void soundtouchCleanup();
static void timeStretchWrite();
static void timeStretchUnderrun();
static s32 timeStretchOverrun();
static void PredictDataWrite(int samples);
static float GetStatusPct();
static void UpdateTempoChangeSoundTouch();
static void UpdateTempoChangeSoundTouch2();
static void _WriteSamples(StereoOut32* bData, int nSamples);
static void _WriteSamples_Safe(StereoOut32* bData, int nSamples);
static void _ReadSamples_Safe(StereoOut32* bData, int nSamples);
static void _WriteSamples_Internal(StereoOut32* bData, int nSamples);
static void _DropSamples_Internal(int nSamples);
static void _ReadSamples_Internal(StereoOut32* bData, int nSamples);
static int _GetApproximateDataInBuffer();
public:
static void UpdateTempoChangeAsyncMixing();
static bool Init(const char* modname);
static void Cleanup();
static void Write(const StereoOut32& Sample);
static void ClearContents();
static void ResetBuffers();
// Note: When using with 32 bit output buffers, the user of this function is responsible // Note: When using with 32 bit output buffers, the user of this function is responsible
// for shifting the values to where they need to be manually. The fixed point depth of // for shifting the values to where they need to be manually. The fixed point depth of
// the sample output is determined by the SndOutVolumeShift, which is the number of bits // the sample output is determined by the SndOutVolumeShift, which is the number of bits
// to shift right to get a 16 bit result. // to shift right to get a 16 bit result.
template <typename T> template <typename T>
static void ReadSamples(T* bData, int nSamples = SndOutPacketSize); void ReadSamples(T* bData, int nSamples = SndOutPacketSize);
}; }
class SndOutModule class SndOutModule
{ {