/* SPU2-X, A plugin for Emulating the Sound Processing Unit of the Playstation 2
 * Developed and maintained by the Pcsx2 Development Team.
 *
 * Original portions from SPU2ghz are (c) 2008 by David Quintana [gigaherz]
 *
 * SPU2-X is free software: you can redistribute it and/or modify it under the terms
 * of the GNU Lesser General Public License as published by the Free Software Found-
 * ation, either version 3 of the License, or (at your option) any later version.
 *
 * SPU2-X is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 * PURPOSE.  See the GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with SPU2-X.  If not, see <http://www.gnu.org/licenses/>.
 */

#include "Global.h"
#include "Lowpass.h"

// Low pass filters: Change these to 32 for a speedup (benchmarks needed to see if
// the speed gain is worth the quality drop)

//static LowPassFilter64 lowpass_left( 11000, SampleRate );
//static LowPassFilter64 lowpass_right( 11000, SampleRate );

__forceinline s32 V_Core::RevbGetIndexer( s32 offset )
{
	u32 pos = ReverbX + offset;

	// Fast and simple single step wrapping, made possible by the preparation of the
	// effects buffer addresses.

	if( pos > EffectsEndA )
	{
		pos -= EffectsEndA+1;
		pos += EffectsStartA;
	}

	return pos;
}

u32 WrapAround(V_Core& thiscore, u32 offset)
{
	return (thiscore.ReverbX + offset) % thiscore.EffectsBufferSize;
}

void V_Core::Reverb_AdvanceBuffer()
{
	if( RevBuffers.NeedsUpdated )
			UpdateEffectsBufferSize();

	if( (Cycles & 1) && (EffectsBufferSize > 0) )
	{
		ReverbX += 1;
		if( ReverbX >= (u32)EffectsBufferSize ) ReverbX = 0;
	}
}

/////////////////////////////////////////////////////////////////////////////////////////

StereoOut32 V_Core::DoReverb( const StereoOut32& Input )
{
	static const s32 downcoeffs[8] =
	{
		1283,  5344,  10895, 15243,
		15243, 10895,  5344,  1283
	};

	downbuf[dbpos] = Input;
	dbpos = (dbpos+1) & 7;

	// Reverb processing occurs at 24khz, so we skip processing every other sample,
	// and use the previous calculation for this core instead.

	if( (Cycles&1) == 0 )
	{
		// Important: Factor silence into the upsampler here, otherwise the reverb engine
		// develops a nasty feedback loop.

		upbuf[ubpos] = StereoOut32::Empty;
	}
	else
	{
		if( EffectsBufferSize <= 0 )
		{
			ubpos = (ubpos+1) & 7;
			return StereoOut32::Empty;
		}

		// Advance the current reverb buffer pointer, and cache the read/write addresses we'll be
		// needing for this session of reverb.

		const u32 src_a0 = RevbGetIndexer( RevBuffers.IIR_SRC_A0 );
		const u32 src_a1 = RevbGetIndexer( RevBuffers.IIR_SRC_A1 );
		const u32 src_b0 = RevbGetIndexer( RevBuffers.IIR_SRC_B0 );
		const u32 src_b1 = RevbGetIndexer( RevBuffers.IIR_SRC_B1 );

		const u32 dest_a0 = RevbGetIndexer( RevBuffers.IIR_DEST_A0 );
		const u32 dest_a1 = RevbGetIndexer( RevBuffers.IIR_DEST_A1 );
		const u32 dest_b0 = RevbGetIndexer( RevBuffers.IIR_DEST_B0 );
		const u32 dest_b1 = RevbGetIndexer( RevBuffers.IIR_DEST_B1 );

		const u32 dest2_a0 = RevbGetIndexer( RevBuffers.IIR_DEST_A0 + 1 );
		const u32 dest2_a1 = RevbGetIndexer( RevBuffers.IIR_DEST_A1 + 1 );
		const u32 dest2_b0 = RevbGetIndexer( RevBuffers.IIR_DEST_B0 + 1 );
		const u32 dest2_b1 = RevbGetIndexer( RevBuffers.IIR_DEST_B1 + 1 );

		const u32 acc_src_a0 = RevbGetIndexer( RevBuffers.ACC_SRC_A0 );
		const u32 acc_src_b0 = RevbGetIndexer( RevBuffers.ACC_SRC_B0 );
		const u32 acc_src_c0 = RevbGetIndexer( RevBuffers.ACC_SRC_C0 );
		const u32 acc_src_d0 = RevbGetIndexer( RevBuffers.ACC_SRC_D0 );

		const u32 acc_src_a1 = RevbGetIndexer( RevBuffers.ACC_SRC_A1 );
		const u32 acc_src_b1 = RevbGetIndexer( RevBuffers.ACC_SRC_B1 );
		const u32 acc_src_c1 = RevbGetIndexer( RevBuffers.ACC_SRC_C1 );
		const u32 acc_src_d1 = RevbGetIndexer( RevBuffers.ACC_SRC_D1 );

		const u32 fb_src_a0 = RevbGetIndexer( RevBuffers.FB_SRC_A0 );
		const u32 fb_src_a1 = RevbGetIndexer( RevBuffers.FB_SRC_A1 );
		const u32 fb_src_b0 = RevbGetIndexer( RevBuffers.FB_SRC_B0 );
		const u32 fb_src_b1 = RevbGetIndexer( RevBuffers.FB_SRC_B1 );

		const u32 mix_dest_a0 = RevbGetIndexer( RevBuffers.MIX_DEST_A0 );
		const u32 mix_dest_a1 = RevbGetIndexer( RevBuffers.MIX_DEST_A1 );
		const u32 mix_dest_b0 = RevbGetIndexer( RevBuffers.MIX_DEST_B0 );
		const u32 mix_dest_b1 = RevbGetIndexer( RevBuffers.MIX_DEST_B1 );

		// -----------------------------------------
		//          Optimized IRQ Testing !
		// -----------------------------------------

		// This test is enhanced by using the reverb effects area begin/end test as a
		// shortcut, since all buffer addresses are within that area.  If the IRQA isn't
		// within that zone then the "bulk" of the test is skipped, so this should only
		// be a slowdown on a few evil games.

		for( uint i=0; i<2; i++ )
		{
			if( Cores[i].IRQEnable && ((Cores[i].IRQA >= EffectsStartA) && (Cores[i].IRQA <= EffectsEndA)) )
			{
				if(	(Cores[i].IRQA == src_a0)		||	(Cores[i].IRQA == src_a1)		||
					(Cores[i].IRQA == src_b0)		||	(Cores[i].IRQA == src_b1)		||

					(Cores[i].IRQA == dest_a0)		||	(Cores[i].IRQA == dest_a1)		||
					(Cores[i].IRQA == dest_b0)		||	(Cores[i].IRQA == dest_b1)		||

					(Cores[i].IRQA == dest2_a0)		||	(Cores[i].IRQA == dest2_a1)		||
					(Cores[i].IRQA == dest2_b0)		||	(Cores[i].IRQA == dest2_b1)		||

					(Cores[i].IRQA == acc_src_a0)	||	(Cores[i].IRQA == acc_src_a1)	||
					(Cores[i].IRQA == acc_src_b0)	||	(Cores[i].IRQA == acc_src_b1)	||
					(Cores[i].IRQA == acc_src_c0)	||	(Cores[i].IRQA == acc_src_c1)	||
					(Cores[i].IRQA == acc_src_d0)	||	(Cores[i].IRQA == acc_src_d1)	||

					(Cores[i].IRQA == fb_src_a0)	||	(Cores[i].IRQA == fb_src_a1)	||
					(Cores[i].IRQA == fb_src_b0)	||	(Cores[i].IRQA == fb_src_b1)	||

					(Cores[i].IRQA == mix_dest_a0)	||	(Cores[i].IRQA == mix_dest_a1)	||
					(Cores[i].IRQA == mix_dest_b0)	||	(Cores[i].IRQA == mix_dest_b1) )
				{
					//printf("Core %d IRQ Called (Reverb). IRQA = %x\n",i,addr);
					SetIrqCall(i);
				}
			}
		}

		// -----------------------------------------
		//         Begin Reverb Processing !
		// -----------------------------------------

		StereoOut32 INPUT_SAMPLE;

		for( int x=0; x<8; ++x )
		{
			INPUT_SAMPLE.Left += (downbuf[(dbpos+x)&7].Left * downcoeffs[x]);
			INPUT_SAMPLE.Right += (downbuf[(dbpos+x)&7].Right * downcoeffs[x]);
		}

		INPUT_SAMPLE.Left  >>= 16;
		INPUT_SAMPLE.Right >>= 16;

		s32 input_L = INPUT_SAMPLE.Left * Revb.IN_COEF_L - 1;
		s32 input_R = INPUT_SAMPLE.Right * Revb.IN_COEF_R - 1;

		const s32 IIR_INPUT_A0 = ((_spu2mem[src_a0] * Revb.IIR_COEF - 1) + input_L)>>15;
		const s32 IIR_INPUT_A1 = ((_spu2mem[src_a1] * Revb.IIR_COEF - 1) + input_L)>>15;
		const s32 IIR_INPUT_B0 = ((_spu2mem[src_b0] * Revb.IIR_COEF - 1) + input_R)>>15;
		const s32 IIR_INPUT_B1 = ((_spu2mem[src_b1] * Revb.IIR_COEF - 1) + input_R)>>15;

		const s32 src_dest_a0 = _spu2mem[dest_a0];
		const s32 src_dest_a1 = _spu2mem[dest_a1];
		const s32 src_dest_b0 = _spu2mem[dest_b0];
		const s32 src_dest_b1 = _spu2mem[dest_b1];

		// This section differs from Neill's doc as it uses single-mul interpolation instead
		// of 0x8000-val inversion.  (same result, faster)
		const s32 IIR_A0 = src_dest_a0 + (((IIR_INPUT_A0 - src_dest_a0) * Revb.IIR_ALPHA - 1)>>15);
		const s32 IIR_A1 = src_dest_a1 + (((IIR_INPUT_A1 - src_dest_a1) * Revb.IIR_ALPHA - 1)>>15);
		const s32 IIR_B0 = src_dest_b0 + (((IIR_INPUT_B0 - src_dest_b0) * Revb.IIR_ALPHA - 1)>>15);
		const s32 IIR_B1 = src_dest_b1 + (((IIR_INPUT_B1 - src_dest_b1) * Revb.IIR_ALPHA - 1)>>15);
		_spu2mem[dest2_a0] = clamp_mix( IIR_A0 );
		_spu2mem[dest2_a1] = clamp_mix( IIR_A1 );
		_spu2mem[dest2_b0] = clamp_mix( IIR_B0 );
		_spu2mem[dest2_b1] = clamp_mix( IIR_B1 );

		const s32 ACC0 = (
			((_spu2mem[acc_src_a0] * Revb.ACC_COEF_A - 1) >> 15) +
			((_spu2mem[acc_src_b0] * Revb.ACC_COEF_B - 1) >> 15) +
			((_spu2mem[acc_src_c0] * Revb.ACC_COEF_C - 1) >> 15) +
			((_spu2mem[acc_src_d0] * Revb.ACC_COEF_D - 1) >> 15)
		);

		const s32 ACC1 = (
			((_spu2mem[acc_src_a1] * Revb.ACC_COEF_A - 1) >> 15) +
			((_spu2mem[acc_src_b1] * Revb.ACC_COEF_B - 1) >> 15) +
			((_spu2mem[acc_src_c1] * Revb.ACC_COEF_C - 1) >> 15) +
			((_spu2mem[acc_src_d1] * Revb.ACC_COEF_D - 1) >> 15)
		);

		// The following code differs from Neill's doc as it uses the more natural single-mul
		// interpolative, instead of the funky ^0x8000 stuff.  (better result, faster)

		const s32 FB_A0 = _spu2mem[fb_src_a0];
		const s32 FB_A1 = _spu2mem[fb_src_a1];

		_spu2mem[mix_dest_a0] = clamp_mix( ACC0 - ((FB_A0 * Revb.FB_ALPHA - 1) >> 15) );
		_spu2mem[mix_dest_a1] = clamp_mix( ACC1 - ((FB_A1 * Revb.FB_ALPHA - 1) >> 15) );

		const s32 acc_fb_mix_a = FB_A0 + (((ACC0 - FB_A0) * Revb.FB_ALPHA - 1) >> 15);
		const s32 acc_fb_mix_b = FB_A1 + (((ACC1 - FB_A1) * Revb.FB_ALPHA - 1) >> 15);

		_spu2mem[mix_dest_b0] = clamp_mix( ( acc_fb_mix_a - (_spu2mem[fb_src_b0] * Revb.FB_X - 1) ) >> 15 );
		_spu2mem[mix_dest_b1] = clamp_mix( ( acc_fb_mix_b - (_spu2mem[fb_src_b1] * Revb.FB_X - 1) ) >> 15 );

		upbuf[ubpos] = clamp_mix( StereoOut32(
			(_spu2mem[mix_dest_a0] + _spu2mem[mix_dest_b0]),	// left
			(_spu2mem[mix_dest_a1] + _spu2mem[mix_dest_b1])		// right
		) );
	}

	StereoOut32 retval;

	//for( int x=0; x<8; ++x )
	//{
	//	retval.Left  += (upbuf[(ubpos+x)&7].Left*downcoeffs[x]);
	//	retval.Right += (upbuf[(ubpos+x)&7].Right*downcoeffs[x]);
	//}

	if( (Cycles&1) == 0 )
	{
		retval.Left = (upbuf[(ubpos+5)&7].Left + upbuf[(ubpos+7)&7].Left)>>1;
		retval.Right = (upbuf[(ubpos+5)&7].Right + upbuf[(ubpos+7)&7].Right)>>1;
	}
	else
	{
		retval.Left = upbuf[(ubpos+6)&7].Left;
		retval.Right = upbuf[(ubpos+6)&7].Right;
	}

	// Notes:
	//  the first -1 is to adjust for the null padding in every other upbuf sample (which
	//  halves the overall volume).
	//  The second +1 divides by two, which is part of Neill's suggestion to divide by 3.
	//
	// According Neill the final result should be divided by 3, but currently the output
	// is way too quiet for that to fly.  In fact no division at all might be better.
	// In any case the problem always seems to be that the reverb isn't resonating enough
	// (indicating short buffers or bad coefficient math?), not that it isn't loud enough.

	//retval.Left  >>= (16-1 + 1);
	//retval.Right >>= (16-1 + 1);

	ubpos = (ubpos+1) & 7;

	return retval;
}

StereoOut32 V_Core::DoReverb_Fake( const StereoOut32& Input )
{
	if(!FakeReverbActive /*|| (Cycles&1) == 0*/)
		return StereoOut32::Empty;

	V_Core& thiscore(Cores[Index]);

	s16* Base = GetMemPtr(thiscore.EffectsStartA);

	s32 accL = 0;
	s32 accR = 0;

	///////////////////////////////////////////////////////////
	// part 0: Parameters

	// Input volumes
	const s32 InputL = -0x3fff;
	const s32 InputR = -0x3fff;

	// Echo 1: Positive, short delay
	const u32 Echo1L = 0x3700;
	const u32 Echo1R = 0x2704;
	const s32 Echo1A = 0x5000 / 8;

	// Echo 2: Negative, slightly longer delay, quiet
	const u32 Echo2L = 0x2f10;
	const u32 Echo2R = 0x1f04;
	const s32 Echo2A = 0x4c00 / 8;

	// Echo 3: Negative, longer delay, full feedback
	const u32 Echo3L = 0x2800;
	const u32 Echo3R = 0x1b34;
	const s32 Echo3A = 0xb800 / 8;

	// Echo 4: Negative, longer delay, full feedback
	const u32 Echo4L = 0x2708;
	const u32 Echo4R = 0x1704;
	const s32 Echo4A = 0xbc00 / 8;

	// Output control:
	const u32 Mix1L = thiscore.Revb.MIX_DEST_A0;
	const u32 Mix1R = thiscore.Revb.MIX_DEST_A1;
	const u32 Mix2L = thiscore.Revb.MIX_DEST_B0;
	const u32 Mix2R = thiscore.Revb.MIX_DEST_B1;

	const u32 CrossChannelL = 0x4694;
	const u32 CrossChannelR = 0x52e4;
	const u32 CrossChannelA = thiscore.Revb.FB_ALPHA / 8;

	///////////////////////////////////////////////////////////
	// part 1: input

	const s32 inL = Input.Left  * InputL;
	const s32 inR = Input.Right * InputR;

	accL += inL;
	accR += inR;

	///////////////////////////////////////////////////////////
	// part 2: straight echos

	s32 e1L = Base[WrapAround(thiscore,Echo1L  )] * Echo1A;
	s32 e1R = Base[WrapAround(thiscore,Echo1R+1)] * Echo1A;

	accL += e1L;
	accR += e1R;

	s32 e2L = Base[WrapAround(thiscore,Echo2L  )] * Echo2A;
	s32 e2R = Base[WrapAround(thiscore,Echo2R+1)] * Echo2A;

	accL += e2L;
	accR += e2R;

	s32 e3L = Base[WrapAround(thiscore,Echo3L  )] * Echo3A;
	s32 e3R = Base[WrapAround(thiscore,Echo3R+1)] * Echo3A;


	accL += e3L;
	accR += e3R;

	s32 e4L = Base[WrapAround(thiscore,Echo4L  )] * Echo4A;
	s32 e4R = Base[WrapAround(thiscore,Echo4R+1)] * Echo4A;

	accL += e4L;
	accR += e4R;

	///////////////////////////////////////////////////////////
	// part 4: cross-channel feedback

	s32 ccL = Base[WrapAround(thiscore,CrossChannelL+1)] * CrossChannelA;
	s32 ccR = Base[WrapAround(thiscore,CrossChannelR  )] * CrossChannelA;
	accL += ccL;
	accR += ccR;

	///////////////////////////////////////////////////////////
	// part N-1: normalize output

	accL >>= 15;
	accR >>= 15;

	///////////////////////////////////////////////////////////
	// part N: write output

	s32 tmpL = accL>>5; //  reduce the volume
	s32 tmpR = accR>>5;


	Base[WrapAround(thiscore,Mix1L)] = clamp_mix(accL-tmpL);
	Base[WrapAround(thiscore,Mix1R)] = clamp_mix(accR-tmpR);
	Base[WrapAround(thiscore,Mix2L)] = clamp_mix(accL-tmpL);
	Base[WrapAround(thiscore,Mix2R)] = clamp_mix(accR-tmpR);

	s32 returnL = Base[WrapAround(thiscore,Mix1L)] + Base[WrapAround(thiscore,Mix2L)];
	s32 returnR = Base[WrapAround(thiscore,Mix1R)] + Base[WrapAround(thiscore,Mix2R)];

	return StereoOut32(returnL,returnR);
}