/*  PCSX2 - PS2 Emulator for PCs
 *  Copyright (C) 2002-2010  PCSX2 Dev Team
 *
 *  PCSX2 is free software: you can redistribute it and/or modify it under the terms
 *  of the GNU Lesser General Public License as published by the Free Software Found-
 *  ation, either version 3 of the License, or (at your option) any later version.
 *
 *  PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 *  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 *  PURPOSE.  See the GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along with PCSX2.
 *  If not, see <http://www.gnu.org/licenses/>.
 */

#pragma once

#include <xmmintrin.h>

template <u8 data>
__noinline void memset_sse_a(void* dest, const size_t size)
{
	const uint MZFqwc = size / 16;

	pxAssert((size & 0xf) == 0);

	__m128 srcreg;

	if (data != 0)
	{
		alignas(16) static const u8 loadval[8] = {data, data, data, data, data, data, data, data};
		srcreg = _mm_loadh_pi(_mm_load_ps((float*)loadval), (__m64*)loadval);
	}
	else
		srcreg = _mm_setzero_ps();

	float(*destxmm)[4] = (float(*)[4])dest;

	switch (MZFqwc & 0x07)
	{
		case 0x07:
			_mm_store_ps(&destxmm[0x07 - 1][0], srcreg);
			// Fall through
		case 0x06:
			_mm_store_ps(&destxmm[0x06 - 1][0], srcreg);
			// Fall through
		case 0x05:
			_mm_store_ps(&destxmm[0x05 - 1][0], srcreg);
			// Fall through
		case 0x04:
			_mm_store_ps(&destxmm[0x04 - 1][0], srcreg);
			// Fall through
		case 0x03:
			_mm_store_ps(&destxmm[0x03 - 1][0], srcreg);
			// Fall through
		case 0x02:
			_mm_store_ps(&destxmm[0x02 - 1][0], srcreg);
			// Fall through
		case 0x01:
			_mm_store_ps(&destxmm[0x01 - 1][0], srcreg);
			// Fall through
	}

	destxmm += (MZFqwc & 0x07);
	for (uint i = 0; i < MZFqwc / 8; ++i, destxmm += 8)
	{
		_mm_store_ps(&destxmm[0][0], srcreg);
		_mm_store_ps(&destxmm[1][0], srcreg);
		_mm_store_ps(&destxmm[2][0], srcreg);
		_mm_store_ps(&destxmm[3][0], srcreg);
		_mm_store_ps(&destxmm[4][0], srcreg);
		_mm_store_ps(&destxmm[5][0], srcreg);
		_mm_store_ps(&destxmm[6][0], srcreg);
		_mm_store_ps(&destxmm[7][0], srcreg);
	}
};

static __fi void memzero_sse_a(void* dest, const size_t size)
{
	memset_sse_a<0>(dest, size);
}

template <u8 data, typename T>
__noinline void memset_sse_a(T& dest)
{
	static_assert((sizeof(dest) & 0xf) == 0, "Bad size for SSE memset");
	memset_sse_a<data>(&dest, sizeof(dest));
}

template <typename T>
void memzero_sse_a(T& dest)
{
	static_assert((sizeof(dest) & 0xf) == 0, "Bad size for SSE memset");
	memset_sse_a<0>(&dest, sizeof(dest));
}