DSPHLE/AX: fix low-pass/biquad clipping

The low-pass and biquad filters run in set40 mode where accessing ac#.m
returns the value of ac#.hm clamped to 16 bits.

This fixes the crackling in "Need for Speed: Nitro" (issue 13610).

Also make the lower bound match hardware (-0x8000 instead of -0x7FFF).
This commit is contained in:
Tillmann Karras 2024-09-04 22:21:32 +01:00
parent 20f2320fcf
commit eb54721475
3 changed files with 23 additions and 18 deletions

View File

@ -578,8 +578,8 @@ void AXUCode::OutputSamples(u32 lr_addr, u32 surround_addr)
// Output samples clamped to 16 bits and interlaced RLRLRLRLRL... // Output samples clamped to 16 bits and interlaced RLRLRLRLRL...
for (u32 i = 0; i < 5 * 32; ++i) for (u32 i = 0; i < 5 * 32; ++i)
{ {
int left = std::clamp(m_samples_main_left[i], -32767, 32767); s16 left = ClampS16(m_samples_main_left[i]);
int right = std::clamp(m_samples_main_right[i], -32767, 32767); s16 right = ClampS16(m_samples_main_right[i]);
buffer[2 * i + 0] = Common::swap16(right); buffer[2 * i + 0] = Common::swap16(right);
buffer[2 * i + 1] = Common::swap16(left); buffer[2 * i + 1] = Common::swap16(left);

View File

@ -372,6 +372,11 @@ void GetInputSamples(HLEAccelerator* accelerator, PB_TYPE& pb, s16* samples, u16
pb.adpcm.pred_scale = accelerator->GetPredScale(); pb.adpcm.pred_scale = accelerator->GetPredScale();
} }
s16 ClampS16(s64 sample)
{
return std::clamp<s64>(sample, -0x8000, 0x7FFF);
}
// Add samples to an output buffer, with optional volume ramping. // Add samples to an output buffer, with optional volume ramping.
void MixAdd(int* out, const s16* input, u32 count, VolumeData* vd, s16* dpop, bool ramp) void MixAdd(int* out, const s16* input, u32 count, VolumeData* vd, s16* dpop, bool ramp)
{ {
@ -389,21 +394,20 @@ void MixAdd(int* out, const s16* input, u32 count, VolumeData* vd, s16* dpop, bo
s64 sample = input[i]; s64 sample = input[i];
sample *= volume; sample *= volume;
sample >>= 15; sample >>= 15;
sample = std::clamp((s32)sample, -32767, 32767); // -32768 ? s16 sample16 = ClampS16((s32)sample);
out[i] += (s16)sample; out[i] += sample16;
volume += volume_delta; volume += volume_delta;
*dpop = (s16)sample; *dpop = sample16;
} }
} }
// Execute a low pass filter on the samples using one history value. Returns // Execute a low pass filter on the samples using one history value.
// the new history value.
static void LowPassFilter(s16* samples, u32 count, PBLowPassFilter& f) static void LowPassFilter(s16* samples, u32 count, PBLowPassFilter& f)
{ {
for (u32 i = 0; i < count; ++i) for (u32 i = 0; i < count; ++i)
f.yn1 = samples[i] = (f.a0 * (s32)samples[i] + f.b0 * (s32)f.yn1) >> 15; f.yn1 = samples[i] = ClampS16((f.a0 * (s32)samples[i] + f.b0 * (s32)f.yn1) >> 15);
} }
#ifdef AX_WII #ifdef AX_WII
@ -425,7 +429,7 @@ static void BiquadFilter(s16* samples, u32 count, PBBiquadFilter& f)
else else
tmp += 0x7FFF; tmp += 0x7FFF;
tmp >>= 16; tmp >>= 16;
s16 yn0 = s16(tmp); s16 yn0 = ClampS16(tmp);
f.xn2 = f.xn1; f.xn2 = f.xn1;
f.yn2 = f.yn1; f.yn2 = f.yn1;
f.xn1 = xn0; f.xn1 = xn0;
@ -459,7 +463,7 @@ void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buf
const s32 volume = (u16)pb.vol_env.cur_volume; const s32 volume = (u16)pb.vol_env.cur_volume;
#endif #endif
const s32 sample = ((s32)samples[i] * volume) >> 15; const s32 sample = ((s32)samples[i] * volume) >> 15;
samples[i] = std::clamp(sample, -32767, 32767); // -32768 ? samples[i] = ClampS16(sample);
pb.vol_env.cur_volume += pb.vol_env.cur_volume_delta; pb.vol_env.cur_volume += pb.vol_env.cur_volume_delta;
} }

View File

@ -602,15 +602,16 @@ void AXWiiUCode::OutputSamples(u32 lr_addr, u32 surround_addr, u16 volume, bool
// Clamp internal buffers to 16 bits. // Clamp internal buffers to 16 bits.
for (size_t i = 0; i < volume_ramp.size(); ++i) for (size_t i = 0; i < volume_ramp.size(); ++i)
{ {
int left = m_samples_main_left[i]; // Cast to s64 to avoid overflow.
int right = m_samples_main_right[i]; s64 left = m_samples_main_left[i];
s64 right = m_samples_main_right[i];
// Apply global volume. Cast to s64 to avoid overflow. // Apply global volume.
left = ((s64)left * volume_ramp[i]) >> 15; left = (left * volume_ramp[i]) >> 15;
right = ((s64)right * volume_ramp[i]) >> 15; right = (right * volume_ramp[i]) >> 15;
m_samples_main_left[i] = std::clamp(left, -32767, 32767); m_samples_main_left[i] = ClampS16(left);
m_samples_main_right[i] = std::clamp(right, -32767, 32767); m_samples_main_right[i] = ClampS16(right);
} }
std::array<s16, 3 * 32 * 2> buffer; std::array<s16, 3 * 32 * 2> buffer;
@ -635,7 +636,7 @@ void AXWiiUCode::OutputWMSamples(u32* addresses)
u16* out = (u16*)HLEMemory_Get_Pointer(memory, addresses[i]); u16* out = (u16*)HLEMemory_Get_Pointer(memory, addresses[i]);
for (u32 j = 0; j < 3 * 6; ++j) for (u32 j = 0; j < 3 * 6; ++j)
{ {
int sample = std::clamp(in[j], -32767, 32767); s16 sample = ClampS16(in[j]);
out[j] = Common::swap16((u16)sample); out[j] = Common::swap16((u16)sample);
} }
} }