diff --git a/plugins/zzogl-pg/opengl/Mem.h b/plugins/zzogl-pg/opengl/Mem.h index 9d6d628227..bad6da0c45 100644 --- a/plugins/zzogl-pg/opengl/Mem.h +++ b/plugins/zzogl-pg/opengl/Mem.h @@ -42,7 +42,7 @@ typedef u32 (*_readPixel)(const void* pmem, int x, int y, u32 bp, u32 bw); typedef u32 (*_readPixel_0)(const void* pmem, int x, int y, u32 bw); typedef int (*_TransferHostLocal)(const void* pbyMem, u32 nQWordSize); typedef void (*_TransferLocalHost)(void* pbyMem, u32 nQWordSize); -typedef void (__fastcall *_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask); +typedef void (*_SwizzleBlock)(u8 *dst, u8 *src, int pitch, u32 WriteMask); // Both of the following structs should probably be local class variables or in a namespace, // but this works for the moment. diff --git a/plugins/zzogl-pg/opengl/Mem_Swizzle.h b/plugins/zzogl-pg/opengl/Mem_Swizzle.h index 588d1cafa5..3c22e4c960 100644 --- a/plugins/zzogl-pg/opengl/Mem_Swizzle.h +++ b/plugins/zzogl-pg/opengl/Mem_Swizzle.h @@ -23,8 +23,79 @@ #include "Mem.h" // special swizzle macros - which I converted to functions. - -static __forceinline void __fastcall SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +#ifdef ZEROGS_SSE2 + +static __forceinline void SwizzleBlock32(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock32_sse2(dst, src, pitch, WriteMask); +} + +static __forceinline void SwizzleBlock16(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock16_sse2(dst, src, pitch/*, WriteMask*/); +} +static __forceinline void SwizzleBlock8(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock8_sse2(dst, src, pitch/*, WriteMask*/); +} +static __forceinline void SwizzleBlock4(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock4_sse2(dst, src, pitch/*, WriteMask*/); +} +static __forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock32u_sse2(dst, src, pitch, WriteMask); +} +static __forceinline void SwizzleBlock16u(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock16u_sse2(dst, src, pitch/*, WriteMask*/); +} +static __forceinline void SwizzleBlock8u(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock8u_sse2(dst, src, pitch/*, WriteMask*/); +} +static __forceinline void SwizzleBlock4u(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock4u_sse2(dst, src, pitch/*, WriteMask*/); +} +#else + +static __forceinline void SwizzleBlock32(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock32_c(dst, src, pitch, WriteMask); +} + +static __forceinline void SwizzleBlock16(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock16_c(dst, src, pitch/*, WriteMask*/); +} +static __forceinline void SwizzleBlock8(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock8_c(dst, src, pitch/*, WriteMask*/); +} +static __forceinline void SwizzleBlock4(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock4_c(dst, src, pitch/*, WriteMask*/); +} +static __forceinline void SwizzleBlock32u(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock32_c(dst, src, pitch, WriteMask); +} +static __forceinline void SwizzleBlock16u(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock16_c(dst, src, pitch/*, WriteMask*/); +} +static __forceinline void SwizzleBlock8u(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock8_c(dst, src, pitch/*, WriteMask*/); +} +static __forceinline void SwizzleBlock4u(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +{ + SwizzleBlock4_c(dst, src, pitch/*, WriteMask*/); +} + +#endif +static __forceinline void SwizzleBlock24(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) { u8* pnewsrc = src; u32* pblock = tempblock; @@ -52,7 +123,7 @@ static __forceinline void __fastcall SwizzleBlock24(u8 *dst, u8 *src, int pitch, #define SwizzleBlock24u SwizzleBlock24 -static __forceinline void __fastcall SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +static __forceinline void SwizzleBlock8H(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) { u8* pnewsrc = src; u32* pblock = tempblock; @@ -75,7 +146,7 @@ static __forceinline void __fastcall SwizzleBlock8H(u8 *dst, u8 *src, int pitch, #define SwizzleBlock8Hu SwizzleBlock8H -static __forceinline void __fastcall SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +static __forceinline void SwizzleBlock4HH(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) { u8* pnewsrc = src; u32* pblock = tempblock; @@ -97,7 +168,7 @@ static __forceinline void __fastcall SwizzleBlock4HH(u8 *dst, u8 *src, int pitch #define SwizzleBlock4HHu SwizzleBlock4HH -static __forceinline void __fastcall SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) +static __forceinline void SwizzleBlock4HL(u8 *dst, u8 *src, int pitch, u32 WriteMask = 0xffffffff) { u8* pnewsrc = src; u32* pblock = tempblock; diff --git a/plugins/zzogl-pg/opengl/x86-32.asm b/plugins/zzogl-pg/opengl/x86-32.asm index 5a006cc262..1507d2192d 100644 --- a/plugins/zzogl-pg/opengl/x86-32.asm +++ b/plugins/zzogl-pg/opengl/x86-32.asm @@ -193,7 +193,7 @@ SwizzleBlock32_sse2@WM: ; SwizzleBlock16 ; -@SwizzleBlock16_sse2@16 proc public +@SwizzleBlock16_sse2@12 proc public push ebx @@ -225,13 +225,13 @@ SwizzleBlock32_sse2@WM: ret 4 -@SwizzleBlock16_sse2@16 endp +@SwizzleBlock16_sse2@12 endp ; ; SwizzleBlock8 ; -@SwizzleBlock8_sse2@16 proc public +@SwizzleBlock8_sse2@12 proc public push ebx @@ -287,13 +287,13 @@ SwizzleBlock32_sse2@WM: ret 4 -@SwizzleBlock8_sse2@16 endp +@SwizzleBlock8_sse2@12 endp ; ; SwizzleBlock4 ; -@SwizzleBlock4_sse2@16 proc public +@SwizzleBlock4_sse2@12 proc public push ebx @@ -365,7 +365,7 @@ SwizzleBlock32_sse2@WM: ret 4 -@SwizzleBlock4_sse2@16 endp +@SwizzleBlock4_sse2@12 endp ; ; swizzling with unaligned reads @@ -471,7 +471,7 @@ SwizzleBlock32u_sse2@WM: ; SwizzleBlock16u ; -@SwizzleBlock16u_sse2@16 proc public +@SwizzleBlock16u_sse2@12 proc public push ebx @@ -503,13 +503,13 @@ SwizzleBlock32u_sse2@WM: ret 4 -@SwizzleBlock16u_sse2@16 endp +@SwizzleBlock16u_sse2@12 endp ; ; SwizzleBlock8u ; -@SwizzleBlock8u_sse2@16 proc public +@SwizzleBlock8u_sse2@12 proc public push ebx @@ -569,13 +569,13 @@ SwizzleBlock32u_sse2@WM: ret 4 -@SwizzleBlock8u_sse2@16 endp +@SwizzleBlock8u_sse2@12 endp ; ; SwizzleBlock4u ; -@SwizzleBlock4u_sse2@16 proc public +@SwizzleBlock4u_sse2@12 proc public push ebx @@ -647,6 +647,6 @@ SwizzleBlock32u_sse2@WM: ret 4 -@SwizzleBlock4u_sse2@16 endp +@SwizzleBlock4u_sse2@12 endp end \ No newline at end of file diff --git a/plugins/zzogl-pg/opengl/x86.h b/plugins/zzogl-pg/opengl/x86.h index 075eb491a5..ce26a79aeb 100644 --- a/plugins/zzogl-pg/opengl/x86.h +++ b/plugins/zzogl-pg/opengl/x86.h @@ -1,124 +1,115 @@ -/* ZeroGS KOSMOS - * Copyright (C) 2005-2006 Gabest/zerofrog@gmail.com - * http://www.gabest.org - * - * This Program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2, or (at your option) - * any later version. - * - * This Program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with GNU Make; see the file COPYING. If not, write to - * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - * http://www.gnu.org/copyleft/gpl.html - * - */ - -#ifndef ZEROGS_X86 -#define ZEROGS_X86 - -#include "GS.h" - -extern "C" void __fastcall SwizzleBlock32_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern "C" void __fastcall SwizzleBlock16_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern "C" void __fastcall SwizzleBlock8_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern "C" void __fastcall SwizzleBlock4_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern "C" void __fastcall SwizzleBlock32u_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern "C" void __fastcall SwizzleBlock16u_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern "C" void __fastcall SwizzleBlock8u_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern "C" void __fastcall SwizzleBlock4u_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); - -// frame swizzling - -// no AA -extern "C" void __fastcall FrameSwizzleBlock32_sse2(u32* dst, u32* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall FrameSwizzleBlock16_sse2(u16* dst, u32* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall Frame16SwizzleBlock32_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall Frame16SwizzleBlock32Z_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall Frame16SwizzleBlock16_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall Frame16SwizzleBlock16Z_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); - -// AA 2x -extern "C" void __fastcall FrameSwizzleBlock32A2_sse2(u32* dst, u32* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall FrameSwizzleBlock16A2_sse2(u16* dst, u32* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall Frame16SwizzleBlock32A2_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall Frame16SwizzleBlock32ZA2_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall Frame16SwizzleBlock16A2_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall Frame16SwizzleBlock16ZA2_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); - -// AA 4x -extern "C" void __fastcall FrameSwizzleBlock32A4_sse2(u32* dst, u32* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall FrameSwizzleBlock16A4_sse2(u16* dst, u32* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall Frame16SwizzleBlock32A4_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall Frame16SwizzleBlock32ZA4_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall Frame16SwizzleBlock16A4_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern "C" void __fastcall Frame16SwizzleBlock16ZA4_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); - -extern void __fastcall SwizzleBlock32_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); - -// no AA -extern void __fastcall FrameSwizzleBlock32_c(u32* dst, u32* src, int srcpitch, u32 WriteMask); -extern void __fastcall FrameSwizzleBlock24_c(u32* dst, u32* src, int srcpitch, u32 WriteMask); -extern void __fastcall FrameSwizzleBlock16_c(u16* dst, u32* src, int srcpitch, u32 WriteMask); -extern void __fastcall Frame16SwizzleBlock32_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern void __fastcall Frame16SwizzleBlock32Z_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern void __fastcall Frame16SwizzleBlock16_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern void __fastcall Frame16SwizzleBlock16Z_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); - -// AA 2x -extern void __fastcall FrameSwizzleBlock32A2_c(u32* dst, u32* src, int srcpitch, u32 WriteMask); -extern void __fastcall FrameSwizzleBlock24A2_c(u32* dst, u32* src, int srcpitch, u32 WriteMask); -extern void __fastcall FrameSwizzleBlock16A2_c(u16* dst, u32* src, int srcpitch, u32 WriteMask); -extern void __fastcall Frame16SwizzleBlock32A2_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern void __fastcall Frame16SwizzleBlock32ZA2_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern void __fastcall Frame16SwizzleBlock16A2_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern void __fastcall Frame16SwizzleBlock16ZA2_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); - -// AA 4x -extern void __fastcall FrameSwizzleBlock32A4_c(u32* dst, u32* src, int srcpitch, u32 WriteMask); -extern void __fastcall FrameSwizzleBlock24A4_c(u32* dst, u32* src, int srcpitch, u32 WriteMask); -extern void __fastcall FrameSwizzleBlock16A4_c(u16* dst, u32* src, int srcpitch, u32 WriteMask); -extern void __fastcall Frame16SwizzleBlock32A4_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern void __fastcall Frame16SwizzleBlock32ZA4_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern void __fastcall Frame16SwizzleBlock16A4_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); -extern void __fastcall Frame16SwizzleBlock16ZA4_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); - -extern void __fastcall SwizzleColumn32_c(int y, u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern void __fastcall SwizzleColumn16_c(int y, u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern void __fastcall SwizzleColumn8_c(int y, u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); -extern void __fastcall SwizzleColumn4_c(int y, u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); - -extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32* clut); -extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(u32* vm, u32* clut); -extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut); -extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut); -extern void __fastcall WriteCLUT_T16_I8_CSM1_c(u32* vm, u32* clut); -extern void __fastcall WriteCLUT_T32_I8_CSM1_c(u32* vm, u32* clut); - -extern void __fastcall WriteCLUT_T16_I4_CSM1_c(u32* vm, u32* clut); -extern void __fastcall WriteCLUT_T32_I4_CSM1_c(u32* vm, u32* clut); - -extern void SSE2_UnswizzleZ16Target( u16* dst, u16* src, int iters ); +/* ZeroGS KOSMOS + * Copyright (C) 2005-2006 Gabest/zerofrog@gmail.com + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#ifndef ZEROGS_X86 +#define ZEROGS_X86 + +#include "GS.h" + +extern "C" void __fastcall SwizzleBlock32_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); +extern "C" void __fastcall SwizzleBlock16_sse2(u8* dst, u8* src, int srcpitch); +extern "C" void __fastcall SwizzleBlock8_sse2(u8* dst, u8* src, int srcpitch); +extern "C" void __fastcall SwizzleBlock4_sse2(u8* dst, u8* src, int srcpitch); +extern "C" void __fastcall SwizzleBlock32u_sse2(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); +extern "C" void __fastcall SwizzleBlock16u_sse2(u8* dst, u8* src, int srcpitch); +extern "C" void __fastcall SwizzleBlock8u_sse2(u8* dst, u8* src, int srcpitch); +extern "C" void __fastcall SwizzleBlock4u_sse2(u8* dst, u8* src, int srcpitch); + +// frame swizzling + +// no AA +extern "C" void __fastcall FrameSwizzleBlock32_sse2(u32* dst, u32* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall FrameSwizzleBlock16_sse2(u16* dst, u32* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall Frame16SwizzleBlock32_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall Frame16SwizzleBlock32Z_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall Frame16SwizzleBlock16_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall Frame16SwizzleBlock16Z_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); + +// AA 2x +extern "C" void __fastcall FrameSwizzleBlock32A2_sse2(u32* dst, u32* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall FrameSwizzleBlock16A2_sse2(u16* dst, u32* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall Frame16SwizzleBlock32A2_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall Frame16SwizzleBlock32ZA2_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall Frame16SwizzleBlock16A2_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall Frame16SwizzleBlock16ZA2_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); + +// AA 4x +extern "C" void __fastcall FrameSwizzleBlock32A4_sse2(u32* dst, u32* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall FrameSwizzleBlock16A4_sse2(u16* dst, u32* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall Frame16SwizzleBlock32A4_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall Frame16SwizzleBlock32ZA4_sse2(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall Frame16SwizzleBlock16A4_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern "C" void __fastcall Frame16SwizzleBlock16ZA4_sse2(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); + +extern void __fastcall SwizzleBlock32_c(u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); +extern void __fastcall SwizzleBlock16_c(u8* dst, u8* src, int srcpitch); +extern void __fastcall SwizzleBlock8_c(u8* dst, u8* src, int srcpitch); +extern void __fastcall SwizzleBlock4_c(u8* dst, u8* src, int srcpitch); + +// no AA +extern void __fastcall FrameSwizzleBlock32_c(u32* dst, u32* src, int srcpitch, u32 WriteMask); +extern void __fastcall FrameSwizzleBlock24_c(u32* dst, u32* src, int srcpitch, u32 WriteMask); +extern void __fastcall FrameSwizzleBlock16_c(u16* dst, u32* src, int srcpitch, u32 WriteMask); +extern void __fastcall Frame16SwizzleBlock32_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern void __fastcall Frame16SwizzleBlock32Z_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern void __fastcall Frame16SwizzleBlock16_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern void __fastcall Frame16SwizzleBlock16Z_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); + +// AA 2x +extern void __fastcall FrameSwizzleBlock32A2_c(u32* dst, u32* src, int srcpitch, u32 WriteMask); +extern void __fastcall FrameSwizzleBlock24A2_c(u32* dst, u32* src, int srcpitch, u32 WriteMask); +extern void __fastcall FrameSwizzleBlock16A2_c(u16* dst, u32* src, int srcpitch, u32 WriteMask); +extern void __fastcall Frame16SwizzleBlock32A2_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern void __fastcall Frame16SwizzleBlock32ZA2_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern void __fastcall Frame16SwizzleBlock16A2_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern void __fastcall Frame16SwizzleBlock16ZA2_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); + +// AA 4x +extern void __fastcall FrameSwizzleBlock32A4_c(u32* dst, u32* src, int srcpitch, u32 WriteMask); +extern void __fastcall FrameSwizzleBlock24A4_c(u32* dst, u32* src, int srcpitch, u32 WriteMask); +extern void __fastcall FrameSwizzleBlock16A4_c(u16* dst, u32* src, int srcpitch, u32 WriteMask); +extern void __fastcall Frame16SwizzleBlock32A4_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern void __fastcall Frame16SwizzleBlock32ZA4_c(u32* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern void __fastcall Frame16SwizzleBlock16A4_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); +extern void __fastcall Frame16SwizzleBlock16ZA4_c(u16* dst, Vector_16F* src, int srcpitch, u32 WriteMask); + +extern void __fastcall SwizzleColumn32_c(int y, u8* dst, u8* src, int srcpitch, u32 WriteMask = 0xffffffff); +extern void __fastcall SwizzleColumn16_c(int y, u8* dst, u8* src, int srcpitch); +extern void __fastcall SwizzleColumn8_c(int y, u8* dst, u8* src, int srcpitch); +extern void __fastcall SwizzleColumn4_c(int y, u8* dst, u8* src, int srcpitch); + +extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(u32* vm, u32* clut); +extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(u32* vm, u32* clut); +extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(u32* vm, u32* clut); +extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(u32* vm, u32* clut); +extern void __fastcall WriteCLUT_T16_I8_CSM1_c(u32* vm, u32* clut); +extern void __fastcall WriteCLUT_T32_I8_CSM1_c(u32* vm, u32* clut); + +extern void __fastcall WriteCLUT_T16_I4_CSM1_c(u32* vm, u32* clut); +extern void __fastcall WriteCLUT_T32_I4_CSM1_c(u32* vm, u32* clut); + +extern void SSE2_UnswizzleZ16Target( u16* dst, u16* src, int iters ); #ifdef ZEROGS_SSE2 -#define SwizzleBlock32 SwizzleBlock32_sse2 -#define SwizzleBlock16 SwizzleBlock16_sse2 -#define SwizzleBlock8 SwizzleBlock8_sse2 -#define SwizzleBlock4 SwizzleBlock4_sse2 -#define SwizzleBlock32u SwizzleBlock32u_sse2 -#define SwizzleBlock16u SwizzleBlock16u_sse2 -#define SwizzleBlock8u SwizzleBlock8u_sse2 -#define SwizzleBlock4u SwizzleBlock4u_sse2 - #define FrameSwizzleBlock32 FrameSwizzleBlock32_c #define FrameSwizzleBlock24 FrameSwizzleBlock24_c #define FrameSwizzleBlock16 FrameSwizzleBlock16_c @@ -150,15 +141,6 @@ extern void SSE2_UnswizzleZ16Target( u16* dst, u16* src, int iters ); #else -#define SwizzleBlock32 SwizzleBlock32_c -#define SwizzleBlock16 SwizzleBlock16_c -#define SwizzleBlock8 SwizzleBlock8_c -#define SwizzleBlock4 SwizzleBlock4_c -#define SwizzleBlock32u SwizzleBlock32_c -#define SwizzleBlock16u SwizzleBlock16_c -#define SwizzleBlock8u SwizzleBlock8_c -#define SwizzleBlock4u SwizzleBlock4_c - #define FrameSwizzleBlock32 FrameSwizzleBlock32_c #define FrameSwizzleBlock16 FrameSwizzleBlock16_c #define Frame16SwizzleBlock32 Frame16SwizzleBlock32_c