diff --git a/plugins/zzogl-pg/opengl/Mem_Swizzle.cpp b/plugins/zzogl-pg/opengl/Mem_Swizzle.cpp new file mode 100644 index 0000000000..9932160c65 --- /dev/null +++ b/plugins/zzogl-pg/opengl/Mem_Swizzle.cpp @@ -0,0 +1,251 @@ +/* ZeroGS KOSMOS + * Copyright (C) 2005-2006 zerofrog@gmail.com + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include "GS.h" +#include "Mem.h" +#include "Mem_Swizzle.h" + +// special swizzle macros - which I converted to functions. +#ifdef ZEROGS_SSE2 + +__forceinline void SwizzleBlock32(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock32_sse2(dst, src, pitch, WriteMask); +} + +__forceinline void SwizzleBlock16(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock16_sse2(dst, src, pitch/*, WriteMask*/); +} +__forceinline void SwizzleBlock8(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock8_sse2(dst, src, pitch/*, WriteMask*/); +} +__forceinline void SwizzleBlock4(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock4_sse2(dst, src, pitch/*, WriteMask*/); +} +__forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock32u_sse2(dst, src, pitch, WriteMask); +} +__forceinline void SwizzleBlock16u(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock16u_sse2(dst, src, pitch/*, WriteMask*/); +} +__forceinline void SwizzleBlock8u(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock8u_sse2(dst, src, pitch/*, WriteMask*/); +} +__forceinline void SwizzleBlock4u(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock4u_sse2(dst, src, pitch/*, WriteMask*/); +} +#else + +__forceinline void SwizzleBlock32(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock32_c(dst, src, pitch, WriteMask); +} + +__forceinline void SwizzleBlock16(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock16_c(dst, src, pitch/*, WriteMask*/); +} +__forceinline void SwizzleBlock8(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock8_c(dst, src, pitch/*, WriteMask*/); +} +__forceinline void SwizzleBlock4(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock4_c(dst, src, pitch/*, WriteMask*/); +} +__forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock32_c(dst, src, pitch, WriteMask); +} +__forceinline void SwizzleBlock16u(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock16_c(dst, src, pitch/*, WriteMask*/); +} +__forceinline void SwizzleBlock8u(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock8_c(dst, src, pitch/*, WriteMask*/); +} +__forceinline void SwizzleBlock4u(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + SwizzleBlock4_c(dst, src, pitch/*, WriteMask*/); +} + +__forceinline void __fastcall SwizzleBlock32_c(u8* dst, u8* src, int srcpitch, u32 WriteMask) +{ + u32* d = &g_columnTable32[0][0]; + + if(WriteMask == 0xffffffff) + { + for(int j = 0; j < 8; j++, d += 8, src += srcpitch) + for(int i = 0; i < 8; i++) + ((u32*)dst)[d[i]] = ((u32*)src)[i]; + } + else + { + for(int j = 0; j < 8; j++, d += 8, src += srcpitch) + for(int i = 0; i < 8; i++) + ((u32*)dst)[d[i]] = (((u32*)dst)[d[i]] & ~WriteMask) | (((u32*)src)[i] & WriteMask); + } +} + + +__forceinline void __fastcall SwizzleBlock24_c(u8* dst, u8* src, int srcpitch, u32 WriteMask) +{ + u32* d = &g_columnTable32[0][0]; + + if(WriteMask == 0x00ffffff) + { + for(int j = 0; j < 8; j++, d += 8, src += srcpitch) + for(int i = 0; i < 8; i++) + ((u32*)dst)[d[i]] = ((u32*)src)[i]; + } + else + { + for(int j = 0; j < 8; j++, d += 8, src += srcpitch) + for(int i = 0; i < 8; i++) + ((u32*)dst)[d[i]] = (((u32*)dst)[d[i]] & ~WriteMask) | (((u32*)src)[i] & WriteMask); + } +} + +__forceinline void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch, u32 WriteMask) +{ + u32* d = &g_columnTable16[0][0]; + + for(int j = 0; j < 8; j++, d += 16, src += srcpitch) + for(int i = 0; i < 16; i++) + ((u16*)dst)[d[i]] = ((u16*)src)[i]; +} + +__forceinline void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch, u32 WriteMask) +{ + u32* d = &g_columnTable8[0][0]; + + for(int j = 0; j < 16; j++, d += 16, src += srcpitch) + for(int i = 0; i < 16; i++) + dst[d[i]] = src[i]; +} + +__forceinline void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch, u32 WriteMask) +{ + u32* d = &g_columnTable4[0][0]; + + for(int j = 0; j < 16; j++, d += 32, src += srcpitch) + { + for(int i = 0; i < 32; i++) + { + u32 addr = d[i]; + u8 c = (src[i>>1] >> ((i&1) << 2)) & 0x0f; + u32 shift = (addr&1) << 2; + dst[addr >> 1] = (dst[addr >> 1] & (0xf0 >> shift)) | (c << shift); + } + } +} +#endif +__forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + u8* pnewsrc = src; + u32* pblock = tempblock; + + for(int by = 0; by < 7; ++by, pblock += 8, pnewsrc += pitch-24) + { + for(int bx = 0; bx < 8; ++bx, pnewsrc += 3) + { + pblock[bx] = *(u32*)pnewsrc; + } + } + + for(int bx = 0; bx < 7; ++bx, pnewsrc += 3) + { + /* might be 1 byte out of bounds of GS memory */ + pblock[bx] = *(u32*)pnewsrc; + } + + /* do 3 bytes for the last copy */ + *((u8*)pblock+28) = pnewsrc[0]; + *((u8*)pblock+29) = pnewsrc[1]; + *((u8*)pblock+30) = pnewsrc[2]; + SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x00ffffff); +} + +__forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + u8* pnewsrc = src; + u32* pblock = tempblock; + + for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch) + { + u32 u = *(u32*)pnewsrc; + pblock[0] = u<<24; + pblock[1] = u<<16; + pblock[2] = u<<8; + pblock[3] = u; + u = *(u32*)(pnewsrc+4); + pblock[4] = u<<24; + pblock[5] = u<<16; + pblock[6] = u<<8; + pblock[7] = u; + } + SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xff000000); +} + +__forceinline void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + u8* pnewsrc = src; + u32* pblock = tempblock; + + for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch) + { + u32 u = *(u32*)pnewsrc; + pblock[0] = u<<28; + pblock[1] = u<<24; + pblock[2] = u<<20; + pblock[3] = u<<16; + pblock[4] = u<<12; + pblock[5] = u<<8; + pblock[6] = u<<4; + pblock[7] = u; + } + SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0xf0000000); +} + +__forceinline void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask) +{ + u8* pnewsrc = src; + u32* pblock = tempblock; + + for(int by = 0; by < 8; ++by, pblock += 8, pnewsrc += pitch) + { + u32 u = *(u32*)pnewsrc; + pblock[0] = u<<24; + pblock[1] = u<<20; + pblock[2] = u<<16; + pblock[3] = u<<12; + pblock[4] = u<<8; + pblock[5] = u<<4; + pblock[6] = u; + pblock[7] = u>>4; + } + SwizzleBlock32((u8*)dst, (u8*)tempblock, 32, 0x0f000000); +}