mirror of https://github.com/PCSX2/pcsx2.git
272 lines
6.8 KiB
C++
272 lines
6.8 KiB
C++
/* ZeroGS KOSMOS
|
|
* Copyright (C) 2005-2006 zerofrog@gmail.com
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*/
|
|
|
|
#include "GS.h"
|
|
#include "Mem.h"
|
|
#include "Mem_Swizzle.h"
|
|
|
|
// special swizzle macros - which I converted to functions.
|
|
#ifdef ZEROGS_SSE2
|
|
|
|
__forceinline void SwizzleBlock32(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock32_sse2(dst, src, pitch, WriteMask);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock16(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock16_sse2(dst, src, pitch/*, WriteMask*/);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock8(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock8_sse2(dst, src, pitch/*, WriteMask*/);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock4(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock4_sse2(dst, src, pitch/*, WriteMask*/);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock32u_sse2(dst, src, pitch, WriteMask);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock16u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock16u_sse2(dst, src, pitch/*, WriteMask*/);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock8u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock8u_sse2(dst, src, pitch/*, WriteMask*/);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock4u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock4u_sse2(dst, src, pitch/*, WriteMask*/);
|
|
}
|
|
|
|
#else
|
|
|
|
__forceinline void SwizzleBlock32(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock32_c(dst, src, pitch, WriteMask);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock16(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock16_c(dst, src, pitch/*, WriteMask*/);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock8(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock8_c(dst, src, pitch/*, WriteMask*/);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock4(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock4_c(dst, src, pitch/*, WriteMask*/);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock32_c(dst, src, pitch, WriteMask);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock16u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock16_c(dst, src, pitch/*, WriteMask*/);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock8u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock8_c(dst, src, pitch/*, WriteMask*/);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock4u(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
SwizzleBlock4_c(dst, src, pitch/*, WriteMask*/);
|
|
}
|
|
|
|
__forceinline void __fastcall SwizzleBlock32_c(u8* dst, u8* src, int srcpitch, u32 WriteMask)
|
|
{
|
|
u32* d = &g_columnTable32[0][0];
|
|
|
|
if (WriteMask == 0xffffffff)
|
|
{
|
|
for (int j = 0; j < 8; j++, d += 8, src += srcpitch)
|
|
for (int i = 0; i < 8; i++)
|
|
((u32*)dst)[d[i]] = ((u32*)src)[i];
|
|
}
|
|
else
|
|
{
|
|
for (int j = 0; j < 8; j++, d += 8, src += srcpitch)
|
|
for (int i = 0; i < 8; i++)
|
|
((u32*)dst)[d[i]] = (((u32*)dst)[d[i]] & ~WriteMask) | (((u32*)src)[i] & WriteMask);
|
|
}
|
|
}
|
|
|
|
|
|
__forceinline void __fastcall SwizzleBlock24_c(u8* dst, u8* src, int srcpitch, u32 WriteMask)
|
|
{
|
|
u32* d = &g_columnTable32[0][0];
|
|
|
|
if (WriteMask == 0x00ffffff)
|
|
{
|
|
for (int j = 0; j < 8; j++, d += 8, src += srcpitch)
|
|
for (int i = 0; i < 8; i++)
|
|
((u32*)dst)[d[i]] = ((u32*)src)[i];
|
|
}
|
|
else
|
|
{
|
|
for (int j = 0; j < 8; j++, d += 8, src += srcpitch)
|
|
for (int i = 0; i < 8; i++)
|
|
((u32*)dst)[d[i]] = (((u32*)dst)[d[i]] & ~WriteMask) | (((u32*)src)[i] & WriteMask);
|
|
}
|
|
}
|
|
|
|
__forceinline void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch, u32 WriteMask)
|
|
{
|
|
u32* d = &g_columnTable16[0][0];
|
|
|
|
for (int j = 0; j < 8; j++, d += 16, src += srcpitch)
|
|
for (int i = 0; i < 16; i++)
|
|
((u16*)dst)[d[i]] = ((u16*)src)[i];
|
|
}
|
|
|
|
__forceinline void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch, u32 WriteMask)
|
|
{
|
|
u32* d = &g_columnTable8[0][0];
|
|
|
|
for (int j = 0; j < 16; j++, d += 16, src += srcpitch)
|
|
for (int i = 0; i < 16; i++)
|
|
dst[d[i]] = src[i];
|
|
}
|
|
|
|
__forceinline void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch, u32 WriteMask)
|
|
{
|
|
u32* d = &g_columnTable4[0][0];
|
|
|
|
for (int j = 0; j < 16; j++, d += 32, src += srcpitch)
|
|
{
|
|
for (int i = 0; i < 32; i++)
|
|
{
|
|
u32 addr = d[i];
|
|
u8 c = (src[i>>1] >> ((i & 1) << 2)) & 0x0f;
|
|
u32 shift = (addr & 1) << 2;
|
|
dst[addr >> 1] = (dst[addr >> 1] & (0xf0 >> shift)) | (c << shift);
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|
|
__forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
u8* pnewsrc = src;
|
|
u32* pblock = tempblock;
|
|
|
|
for (int by = 0; by < 7; ++by, pblock += 8, pnewsrc += pitch - 24)
|
|
{
|
|
for (int bx = 0; bx < 8; ++bx, pnewsrc += 3)
|
|
{
|
|
pblock[bx] = *(u32*)pnewsrc;
|
|
}
|
|
}
|
|
|
|
for (int bx = 0; bx < 7; ++bx, pnewsrc += 3)
|
|
{
|
|
/* might be 1 byte out of bounds of GS memory */
|
|
pblock[bx] = *(u32*)pnewsrc;
|
|
}
|
|
|
|
/* do 3 bytes for the last copy */
|
|
*((u8*)pblock + 28) = pnewsrc[0];
|
|
|
|
*((u8*)pblock + 29) = pnewsrc[1];
|
|
|
|
*((u8*)pblock + 30) = pnewsrc[2];
|
|
|
|
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x00ffffff);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
u8* pnewsrc = src;
|
|
u32* pblock = tempblock;
|
|
|
|
for (int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
|
|
{
|
|
u32 u = *(u32*)pnewsrc;
|
|
pblock[0] = u << 24;
|
|
pblock[1] = u << 16;
|
|
pblock[2] = u << 8;
|
|
pblock[3] = u;
|
|
u = *(u32*)(pnewsrc + 4);
|
|
pblock[4] = u << 24;
|
|
pblock[5] = u << 16;
|
|
pblock[6] = u << 8;
|
|
pblock[7] = u;
|
|
}
|
|
|
|
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xff000000);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
u8* pnewsrc = src;
|
|
u32* pblock = tempblock;
|
|
|
|
for (int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
|
|
{
|
|
u32 u = *(u32*)pnewsrc;
|
|
pblock[0] = u << 28;
|
|
pblock[1] = u << 24;
|
|
pblock[2] = u << 20;
|
|
pblock[3] = u << 16;
|
|
pblock[4] = u << 12;
|
|
pblock[5] = u << 8;
|
|
pblock[6] = u << 4;
|
|
pblock[7] = u;
|
|
}
|
|
|
|
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xf0000000);
|
|
}
|
|
|
|
__forceinline void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask)
|
|
{
|
|
u8* pnewsrc = src;
|
|
u32* pblock = tempblock;
|
|
|
|
for (int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch)
|
|
{
|
|
u32 u = *(u32*)pnewsrc;
|
|
pblock[0] = u << 24;
|
|
pblock[1] = u << 20;
|
|
pblock[2] = u << 16;
|
|
pblock[3] = u << 12;
|
|
pblock[4] = u << 8;
|
|
pblock[5] = u << 4;
|
|
pblock[6] = u;
|
|
pblock[7] = u >> 4;
|
|
}
|
|
|
|
SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x0f000000);
|
|
}
|