diff --git a/pcsx2/Vif0Dma.cpp b/pcsx2/Vif0Dma.cpp
index 2e0d738176..6f3d882412 100644
--- a/pcsx2/Vif0Dma.cpp
+++ b/pcsx2/Vif0Dma.cpp
@@ -16,8 +16,7 @@
#include "PrecompiledHeader.h"
#include "Common.h"
-
-#include "VifDma_internal.h"
+#include "VifDma.h"
#include "VUmicro.h"
#include "newVif.h"
diff --git a/pcsx2/Vif1Dma.cpp b/pcsx2/Vif1Dma.cpp
index 5cfa00195e..bca9ebd154 100644
--- a/pcsx2/Vif1Dma.cpp
+++ b/pcsx2/Vif1Dma.cpp
@@ -16,9 +16,7 @@
#include "PrecompiledHeader.h"
#include "Common.h"
-
-#include "VifDma_internal.h"
-
+#include "VifDma.h"
#include "GS.h"
#include "Gif.h"
#include "VUmicro.h"
diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp
index a542b213a6..e20c720f93 100644
--- a/pcsx2/VifDma.cpp
+++ b/pcsx2/VifDma.cpp
@@ -16,7 +16,7 @@
#include "PrecompiledHeader.h"
#include "Common.h"
-#include "VifDma_internal.h"
+#include "VifDma.h"
#include "VUmicro.h"
int g_vifCycles = 0;
diff --git a/pcsx2/VifDma.h b/pcsx2/VifDma.h
index 0b6c58737a..d82d6d7c86 100644
--- a/pcsx2/VifDma.h
+++ b/pcsx2/VifDma.h
@@ -12,8 +12,9 @@
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see .
*/
-#ifndef __VIFDMA_H__
-#define __VIFDMA_H__
+
+#pragma once
+#include "Vif_Unpack.h"
struct vifCode {
u32 addr;
@@ -43,9 +44,10 @@ struct vifStruct {
u8 dmamode;
};
-extern vifStruct vif0, vif1;
-extern u8 schedulepath3msk;
-static const int VifCycleVoodoo = 4;
+extern vifStruct* vif;
+extern vifStruct vif0, vif1;
+extern u8 schedulepath3msk;
+static const int VifCycleVoodoo = 4;
extern void vif0Init();
extern void vif0Interrupt();
@@ -63,4 +65,20 @@ __forceinline static int _limit(int a, int max)
return ((a > max) ? max : a);
}
-#endif
+enum VifModes
+{
+ VIF_NORMAL_TO_MEM_MODE = 0,
+ VIF_NORMAL_FROM_MEM_MODE = 1,
+ VIF_CHAIN_MODE = 2
+};
+
+// Generic constants
+static const unsigned int VIF0intc = 4;
+static const unsigned int VIF1intc = 5;
+
+extern int g_vifCycles;
+
+template void vuExecMicro(u32 addr);
+extern void vif0FLUSH();
+extern void vif1FLUSH();
+
diff --git a/pcsx2/VifDma_internal.h b/pcsx2/VifDma_internal.h
index 991d924780..4e7157cb19 100644
--- a/pcsx2/VifDma_internal.h
+++ b/pcsx2/VifDma_internal.h
@@ -13,68 +13,8 @@
* If not, see .
*/
-#ifndef __VIFDMA_INTERNAL_H__
-#define __VIFDMA_INTERNAL_H__
+#pragma once
#include "VifDma.h"
-enum VifModes
-{
- VIF_NORMAL_TO_MEM_MODE = 0,
- VIF_NORMAL_FROM_MEM_MODE = 1,
- VIF_CHAIN_MODE = 2
-};
-// Generic constants
-static const unsigned int VIF0intc = 4;
-static const unsigned int VIF1intc = 5;
-
-typedef void (__fastcall *UNPACKFUNCTYPE)(u32 *dest, u32 *data);
-typedef void (__fastcall *UNPACKFUNCTYPE_ODD)(u32 *dest, u32 *data, int size);
-typedef int (*UNPACKPARTFUNCTYPESSE)(u32 *dest, u32 *data, int size);
-
-#define create_unpack_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_U##bits)(u32 *dest, u##bits *data);
-#define create_unpack_odd_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_U##bits)(u32 *dest, u##bits *data, int size);
-#define create_unpack_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_S##bits)(u32 *dest, s##bits *data);
-#define create_unpack_odd_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_S##bits)(u32 *dest, s##bits *data, int size);
-
-#define create_some_unpacks(bits) \
- create_unpack_u_type(bits); \
- create_unpack_odd_u_type(bits); \
- create_unpack_s_type(bits); \
- create_unpack_odd_s_type(bits);
-
-create_some_unpacks(32);
-create_some_unpacks(16);
-create_some_unpacks(8);
-
-struct VIFUnpackFuncTable
-{
- UNPACKFUNCTYPE funcU;
- UNPACKFUNCTYPE funcS;
-
- UNPACKFUNCTYPE_ODD oddU; // needed for old-style vif only, remove when old vif is removed.
- UNPACKFUNCTYPE_ODD oddS; // needed for old-style vif only, remove when old vif is removed.
-
- u8 bsize; // currently unused
- u8 dsize; // byte size of one channel
- u8 gsize; // size of data in bytes used for each write cycle
- u8 qsize; // used for unpack parts, num of vectors that
- // will be decompressed from data for 1 cycle
-};
-
-extern const __aligned16 VIFUnpackFuncTable VIFfuncTable[32];
-
-extern int g_vifCycles;
-extern vifStruct *vif;
-
-template void VIFunpack(u32 *data, vifCode *v, u32 size);
-template void vuExecMicro(u32 addr);
-extern void vif0FLUSH();
-extern void vif1FLUSH();
-
-extern int nVifUnpack (int idx, u8 *data);
-extern void initNewVif (int idx);
-extern void resetNewVif(int idx);
-
-#endif
diff --git a/pcsx2/VIFunpack.cpp b/pcsx2/Vif_Unpack.cpp
similarity index 96%
rename from pcsx2/VIFunpack.cpp
rename to pcsx2/Vif_Unpack.cpp
index 3ffbc838de..f66a545aca 100644
--- a/pcsx2/VIFunpack.cpp
+++ b/pcsx2/Vif_Unpack.cpp
@@ -1,296 +1,295 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2009 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-
-#include "PrecompiledHeader.h"
-#include "Common.h"
-#include "Vif.h"
-#include "VifDma_internal.h"
-
-enum UnpackOffset {
- OFFSET_X = 0,
- OFFSET_Y = 1,
- OFFSET_Z = 2,
- OFFSET_W = 3
-};
-
-static __forceinline u32 setVifRowRegs(u32 reg, u32 data) {
- switch (reg) {
- case 0: vifRegs->r0 = data; break;
- case 1: vifRegs->r1 = data; break;
- case 2: vifRegs->r2 = data; break;
- case 3: vifRegs->r3 = data; break;
- jNO_DEFAULT;
- }
- return data;
-}
-
-static __forceinline u32 getVifRowRegs(u32 reg) {
- switch (reg) {
- case 0: return vifRegs->r0; break;
- case 1: return vifRegs->r1; break;
- case 2: return vifRegs->r2; break;
- case 3: return vifRegs->r3; break;
- jNO_DEFAULT;
- }
- return 0; // unreachable...
-}
-
-static __forceinline u32 getVifColRegs(u32 reg) {
- switch (reg) {
- case 0: return vifRegs->c0; break;
- case 1: return vifRegs->c1; break;
- case 2: return vifRegs->c2; break;
- default: return vifRegs->c3; break;
- }
- return 0; // unreachable...
-}
-
-template< bool doMask >
-static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) {
- u32 vifRowReg = getVifRowRegs(offnum);
- int n = 0;
-
- if (doMask) {
- switch (vif->cl) {
- case 0: n = (vifRegs->mask >> (offnum * 2)) & 0x3; break;
- case 1: n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; break;
- case 2: n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3; break;
- default: n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3; break;
- }
- }
-
- switch (n) {
- case 0:
- if ((vif->cmd & 0x6F) != 0x6f) {
- switch (vifRegs->mode) {
- case 1: dest = data + vifRowReg; break;
- case 2: dest = setVifRowRegs(offnum, vifRowReg + data); break;
- default: dest = data; break;
- }
- }
- else dest = data; // v4-5 Unpack Mode
- break;
- case 1: dest = vifRowReg; break;
- case 2: dest = getVifColRegs(vif->cl); break;
- case 3: break;
- }
-}
-
-template < bool doMask, class T >
-static __forceinline void __fastcall UNPACK_S(u32 *dest, T *data, int size)
-{
- //S-# will always be a complete packet, no matter what. So we can skip the offset bits
- writeXYZW(OFFSET_X, *dest++, *data);
- writeXYZW(OFFSET_Y, *dest++, *data);
- writeXYZW(OFFSET_Z, *dest++, *data);
- writeXYZW(OFFSET_W, *dest , *data);
-}
-
-template
-static __forceinline void __fastcall UNPACK_V2(u32 *dest, T *data, int size)
-{
- if (vifRegs->offset == OFFSET_X)
- {
- if (size > 0)
- {
- writeXYZW(vifRegs->offset, *dest++, *data++);
- vifRegs->offset = OFFSET_Y;
- size--;
- }
- }
-
- if (vifRegs->offset == OFFSET_Y)
- {
- if (size > 0)
- {
- writeXYZW(vifRegs->offset, *dest++, *data);
- vifRegs->offset = OFFSET_Z;
- size--;
- }
- }
-
- if (vifRegs->offset == OFFSET_Z)
- {
- writeXYZW(vifRegs->offset, *dest++, *dest-2);
- vifRegs->offset = OFFSET_W;
- }
-
- if (vifRegs->offset == OFFSET_W)
- {
- writeXYZW(vifRegs->offset, *dest, *data);
- vifRegs->offset = OFFSET_X;
- }
-}
-
-template
-static __forceinline void __fastcall UNPACK_V3(u32 *dest, T *data, int size)
-{
- if(vifRegs->offset == OFFSET_X)
- {
- if (size > 0)
- {
- writeXYZW(vifRegs->offset, *dest++, *data++);
- vifRegs->offset = OFFSET_Y;
- size--;
- }
- }
-
- if(vifRegs->offset == OFFSET_Y)
- {
- if (size > 0)
- {
- writeXYZW(vifRegs->offset, *dest++, *data++);
- vifRegs->offset = OFFSET_Z;
- size--;
- }
- }
-
- if(vifRegs->offset == OFFSET_Z)
- {
- if (size > 0)
- {
- writeXYZW(vifRegs->offset, *dest++, *data++);
- vifRegs->offset = OFFSET_W;
- size--;
- }
- }
-
- if(vifRegs->offset == OFFSET_W)
- {
- // V3-# does some bizarre thing with alignment, every 6qw of data the W becomes 0 (strange console!)
- // Ape Escape doesn't seem to like it tho (what the hell?) gonna have to investigate
- writeXYZW(vifRegs->offset, *dest, *data);
- vifRegs->offset = OFFSET_X;
- }
-}
-
-template
-static __forceinline void __fastcall UNPACK_V4(u32 *dest, T *data , int size)
-{
- while (size > 0)
- {
- writeXYZW(vifRegs->offset, *dest++, *data++);
- vifRegs->offset++;
- size--;
- }
-
- if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X;
-}
-
-template< bool doMask >
-static __releaseinline void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size)
-{
- //As with S-#, this will always be a complete packet
- writeXYZW(OFFSET_X, *dest++, ((*data & 0x001f) << 3));
- writeXYZW(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2));
- writeXYZW(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7));
- writeXYZW(OFFSET_W, *dest, ((*data & 0x8000) >> 8));
-}
-
-// =====================================================================================================
-
-template < bool doMask, int size, class T >
-static void __fastcall fUNPACK_S(u32 *dest, T *data)
-{
- UNPACK_S( dest, data, size );
-}
-
-template
-static void __fastcall fUNPACK_V2(u32 *dest, T *data)
-{
- UNPACK_V2( dest, data, size );
-}
-
-template
-static void __fastcall fUNPACK_V3(u32 *dest, T *data)
-{
- UNPACK_V3( dest, data, size );
-}
-
-template
-static void __fastcall fUNPACK_V4(u32 *dest, T *data)
-{
- UNPACK_V4( dest, data, size );
-}
-
-template< bool doMask >
-static void __fastcall fUNPACK_V4_5(u32 *dest, u32 *data)
-{
- UNPACK_V4_5(dest, data, 0); // size is ignored.
-}
-
-// --------------------------------------------------------------------------------------
-// Main table for function unpacking.
-// --------------------------------------------------------------------------------------
-// The extra data bsize/dsize/etc are all duplicated between the doMask enabled and
-// disabled versions. This is probably simpler and more efficient than bothering
-// to generate separate tables.
-//
-// The double-cast function pointer nonsense is to appease GCC, which gives some rather
-// cryptic error about being unable to deduce the type parameters (I think it's a bug
-// relating to __fastcall, which I recall having some other places as well). It's fixed
-// by explicitly casting the function to itself prior to casting it to what we need it
-// to be cast as. --air
-//
-
-#define _upk (UNPACKFUNCTYPE)
-#define _odd (UNPACKFUNCTYPE_ODD)
-#define _unpk_s(bits) (UNPACKFUNCTYPE_S##bits)
-#define _odd_s(bits) (UNPACKFUNCTYPE_ODD_S##bits)
-#define _unpk_u(bits) (UNPACKFUNCTYPE_U##bits)
-#define _odd_u(bits) (UNPACKFUNCTYPE_ODD_U##bits)
-
-// 32-bits versions are unsigned-only!!
-#define UnpackFuncPair32( sizefac, vt, doMask ) \
- (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt, \
- (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt, \
- (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt, \
- (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt,
-
-#define UnpackFuncPair( sizefac, vt, bits, doMask ) \
- (UNPACKFUNCTYPE)_unpk_u(bits) fUNPACK_##vt, \
- (UNPACKFUNCTYPE)_unpk_s(bits) fUNPACK_##vt, \
- (UNPACKFUNCTYPE_ODD)_odd_u(bits) UNPACK_##vt, \
- (UNPACKFUNCTYPE_ODD)_odd_s(bits) UNPACK_##vt,
-
-#define UnpackFuncSet( doMask ) \
- { UnpackFuncPair32( 4, S, doMask ) 1, 4, 4, 4 }, /* 0x0 - S-32 */ \
- { UnpackFuncPair ( 4, S, 16, doMask ) 2, 2, 2, 4 }, /* 0x1 - S-16 */ \
- { UnpackFuncPair ( 4, S, 8, doMask ) 4, 1, 1, 4 }, /* 0x2 - S-8 */ \
- { NULL, NULL, NULL, NULL, 0, 0, 0, 0 }, /* 0x3 (NULL) */ \
- { UnpackFuncPair32( 2, V2, doMask ) 24, 4, 8, 2 }, /* 0x4 - V2-32 */ \
- { UnpackFuncPair ( 2, V2, 16, doMask ) 12, 2, 4, 2 }, /* 0x5 - V2-16 */ \
- { UnpackFuncPair ( 2, V2, 8, doMask ) 6, 1, 2, 2 }, /* 0x6 - V2-8 */ \
- { NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0x7 (NULL) */ \
- { UnpackFuncPair32( 3, V3, doMask ) 36, 4, 12, 3 }, /* 0x8 - V3-32 */ \
- { UnpackFuncPair ( 3, V3, 16, doMask ) 18, 2, 6, 3 }, /* 0x9 - V3-16 */ \
- { UnpackFuncPair ( 3, V3, 8, doMask ) 9, 1, 3, 3 }, /* 0xA - V3-8 */ \
- { NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0xB (NULL) */ \
- { UnpackFuncPair32( 4, V4, doMask ) 48, 4, 16, 4 }, /* 0xC - V4-32 */ \
- { UnpackFuncPair ( 4, V4, 16, doMask ) 24, 2, 8, 4 }, /* 0xD - V4-16 */ \
- { UnpackFuncPair ( 4, V4, 8, doMask ) 12, 1, 4, 4 }, /* 0xE - V4-8 */ \
- { /* 0xF - V4-5 */ \
- (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5, \
- (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5, \
- (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5, \
- (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5, \
- 6, 2, 2, 4 },
-
-const __aligned16 VIFUnpackFuncTable VIFfuncTable[32] =
-{
- UnpackFuncSet( false )
- UnpackFuncSet( true )
-};
+/* PCSX2 - PS2 Emulator for PCs
+ * Copyright (C) 2002-2009 PCSX2 Dev Team
+ *
+ * PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ * of the GNU Lesser General Public License as published by the Free Software Found-
+ * ation, either version 3 of the License, or (at your option) any later version.
+ *
+ * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with PCSX2.
+ * If not, see .
+ */
+
+#include "PrecompiledHeader.h"
+#include "Common.h"
+#include "Vif.h"
+#include "VifDma.h"
+
+enum UnpackOffset {
+ OFFSET_X = 0,
+ OFFSET_Y = 1,
+ OFFSET_Z = 2,
+ OFFSET_W = 3
+};
+
+static __forceinline u32 setVifRowRegs(u32 reg, u32 data) {
+ switch (reg) {
+ case 0: vifRegs->r0 = data; break;
+ case 1: vifRegs->r1 = data; break;
+ case 2: vifRegs->r2 = data; break;
+ case 3: vifRegs->r3 = data; break;
+ jNO_DEFAULT;
+ }
+ return data;
+}
+
+static __forceinline u32 getVifRowRegs(u32 reg) {
+ switch (reg) {
+ case 0: return vifRegs->r0; break;
+ case 1: return vifRegs->r1; break;
+ case 2: return vifRegs->r2; break;
+ case 3: return vifRegs->r3; break;
+ jNO_DEFAULT;
+ }
+ return 0; // unreachable...
+}
+
+static __forceinline u32 getVifColRegs(u32 reg) {
+ switch (reg) {
+ case 0: return vifRegs->c0; break;
+ case 1: return vifRegs->c1; break;
+ case 2: return vifRegs->c2; break;
+ default: return vifRegs->c3; break;
+ }
+ return 0; // unreachable...
+}
+
+template< bool doMask >
+static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) {
+ u32 vifRowReg = getVifRowRegs(offnum);
+ int n = 0;
+
+ if (doMask) {
+ switch (vif->cl) {
+ case 0: n = (vifRegs->mask >> (offnum * 2)) & 0x3; break;
+ case 1: n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; break;
+ case 2: n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3; break;
+ default: n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3; break;
+ }
+ }
+
+ switch (n) {
+ case 0:
+ if ((vif->cmd & 0x6F) != 0x6f) {
+ switch (vifRegs->mode) {
+ case 1: dest = data + vifRowReg; break;
+ case 2: dest = setVifRowRegs(offnum, vifRowReg + data); break;
+ default: dest = data; break;
+ }
+ }
+ else dest = data; // v4-5 Unpack Mode
+ break;
+ case 1: dest = vifRowReg; break;
+ case 2: dest = getVifColRegs(vif->cl); break;
+ case 3: break;
+ }
+}
+
+template < bool doMask, class T >
+static __forceinline void __fastcall UNPACK_S(u32 *dest, T *data, int size)
+{
+ //S-# will always be a complete packet, no matter what. So we can skip the offset bits
+ writeXYZW(OFFSET_X, *dest++, *data);
+ writeXYZW(OFFSET_Y, *dest++, *data);
+ writeXYZW(OFFSET_Z, *dest++, *data);
+ writeXYZW(OFFSET_W, *dest , *data);
+}
+
+template
+static __forceinline void __fastcall UNPACK_V2(u32 *dest, T *data, int size)
+{
+ if (vifRegs->offset == OFFSET_X)
+ {
+ if (size > 0)
+ {
+ writeXYZW(vifRegs->offset, *dest++, *data++);
+ vifRegs->offset = OFFSET_Y;
+ size--;
+ }
+ }
+
+ if (vifRegs->offset == OFFSET_Y)
+ {
+ if (size > 0)
+ {
+ writeXYZW(vifRegs->offset, *dest++, *data);
+ vifRegs->offset = OFFSET_Z;
+ size--;
+ }
+ }
+
+ if (vifRegs->offset == OFFSET_Z)
+ {
+ writeXYZW(vifRegs->offset, *dest++, *dest-2);
+ vifRegs->offset = OFFSET_W;
+ }
+
+ if (vifRegs->offset == OFFSET_W)
+ {
+ writeXYZW(vifRegs->offset, *dest, *data);
+ vifRegs->offset = OFFSET_X;
+ }
+}
+
+template
+static __forceinline void __fastcall UNPACK_V3(u32 *dest, T *data, int size)
+{
+ if(vifRegs->offset == OFFSET_X)
+ {
+ if (size > 0)
+ {
+ writeXYZW(vifRegs->offset, *dest++, *data++);
+ vifRegs->offset = OFFSET_Y;
+ size--;
+ }
+ }
+
+ if(vifRegs->offset == OFFSET_Y)
+ {
+ if (size > 0)
+ {
+ writeXYZW(vifRegs->offset, *dest++, *data++);
+ vifRegs->offset = OFFSET_Z;
+ size--;
+ }
+ }
+
+ if(vifRegs->offset == OFFSET_Z)
+ {
+ if (size > 0)
+ {
+ writeXYZW(vifRegs->offset, *dest++, *data++);
+ vifRegs->offset = OFFSET_W;
+ size--;
+ }
+ }
+
+ if(vifRegs->offset == OFFSET_W)
+ {
+ // V3-# does some bizarre thing with alignment, every 6qw of data the W becomes 0 (strange console!)
+ // Ape Escape doesn't seem to like it tho (what the hell?) gonna have to investigate
+ writeXYZW(vifRegs->offset, *dest, *data);
+ vifRegs->offset = OFFSET_X;
+ }
+}
+
+template
+static __forceinline void __fastcall UNPACK_V4(u32 *dest, T *data , int size)
+{
+ while (size > 0)
+ {
+ writeXYZW(vifRegs->offset, *dest++, *data++);
+ vifRegs->offset++;
+ size--;
+ }
+
+ if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X;
+}
+
+template< bool doMask >
+static __releaseinline void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size)
+{
+ //As with S-#, this will always be a complete packet
+ writeXYZW(OFFSET_X, *dest++, ((*data & 0x001f) << 3));
+ writeXYZW(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2));
+ writeXYZW(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7));
+ writeXYZW(OFFSET_W, *dest, ((*data & 0x8000) >> 8));
+}
+
+// =====================================================================================================
+
+template < bool doMask, int size, class T >
+static void __fastcall fUNPACK_S(u32 *dest, T *data)
+{
+ UNPACK_S( dest, data, size );
+}
+
+template
+static void __fastcall fUNPACK_V2(u32 *dest, T *data)
+{
+ UNPACK_V2( dest, data, size );
+}
+
+template
+static void __fastcall fUNPACK_V3(u32 *dest, T *data)
+{
+ UNPACK_V3( dest, data, size );
+}
+
+template
+static void __fastcall fUNPACK_V4(u32 *dest, T *data)
+{
+ UNPACK_V4( dest, data, size );
+}
+
+template< bool doMask >
+static void __fastcall fUNPACK_V4_5(u32 *dest, u32 *data)
+{
+ UNPACK_V4_5(dest, data, 0); // size is ignored.
+}
+
+// --------------------------------------------------------------------------------------
+// Main table for function unpacking.
+// --------------------------------------------------------------------------------------
+// The extra data bsize/dsize/etc are all duplicated between the doMask enabled and
+// disabled versions. This is probably simpler and more efficient than bothering
+// to generate separate tables.
+//
+// The double-cast function pointer nonsense is to appease GCC, which gives some rather
+// cryptic error about being unable to deduce the type parameters (I think it's a bug
+// relating to __fastcall, which I recall having some other places as well). It's fixed
+// by explicitly casting the function to itself prior to casting it to what we need it
+// to be cast as. --air
+//
+
+#define _upk (UNPACKFUNCTYPE)
+#define _odd (UNPACKFUNCTYPE_ODD)
+#define _unpk_s(bits) (UNPACKFUNCTYPE_S##bits)
+#define _odd_s(bits) (UNPACKFUNCTYPE_ODD_S##bits)
+#define _unpk_u(bits) (UNPACKFUNCTYPE_U##bits)
+#define _odd_u(bits) (UNPACKFUNCTYPE_ODD_U##bits)
+
+// 32-bits versions are unsigned-only!!
+#define UnpackFuncPair32( sizefac, vt, doMask ) \
+ (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt, \
+ (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt, \
+ (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt, \
+ (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt,
+
+#define UnpackFuncPair( sizefac, vt, bits, doMask ) \
+ (UNPACKFUNCTYPE)_unpk_u(bits) fUNPACK_##vt, \
+ (UNPACKFUNCTYPE)_unpk_s(bits) fUNPACK_##vt, \
+ (UNPACKFUNCTYPE_ODD)_odd_u(bits) UNPACK_##vt, \
+ (UNPACKFUNCTYPE_ODD)_odd_s(bits) UNPACK_##vt,
+
+#define UnpackFuncSet( doMask ) \
+ { UnpackFuncPair32( 4, S, doMask ) 1, 4, 4, 4 }, /* 0x0 - S-32 */ \
+ { UnpackFuncPair ( 4, S, 16, doMask ) 2, 2, 2, 4 }, /* 0x1 - S-16 */ \
+ { UnpackFuncPair ( 4, S, 8, doMask ) 4, 1, 1, 4 }, /* 0x2 - S-8 */ \
+ { NULL, NULL, NULL, NULL, 0, 0, 0, 0 }, /* 0x3 (NULL) */ \
+ { UnpackFuncPair32( 2, V2, doMask ) 24, 4, 8, 2 }, /* 0x4 - V2-32 */ \
+ { UnpackFuncPair ( 2, V2, 16, doMask ) 12, 2, 4, 2 }, /* 0x5 - V2-16 */ \
+ { UnpackFuncPair ( 2, V2, 8, doMask ) 6, 1, 2, 2 }, /* 0x6 - V2-8 */ \
+ { NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0x7 (NULL) */ \
+ { UnpackFuncPair32( 3, V3, doMask ) 36, 4, 12, 3 }, /* 0x8 - V3-32 */ \
+ { UnpackFuncPair ( 3, V3, 16, doMask ) 18, 2, 6, 3 }, /* 0x9 - V3-16 */ \
+ { UnpackFuncPair ( 3, V3, 8, doMask ) 9, 1, 3, 3 }, /* 0xA - V3-8 */ \
+ { NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0xB (NULL) */ \
+ { UnpackFuncPair32( 4, V4, doMask ) 48, 4, 16, 4 }, /* 0xC - V4-32 */ \
+ { UnpackFuncPair ( 4, V4, 16, doMask ) 24, 2, 8, 4 }, /* 0xD - V4-16 */ \
+ { UnpackFuncPair ( 4, V4, 8, doMask ) 12, 1, 4, 4 }, /* 0xE - V4-8 */ \
+ { /* 0xF - V4-5 */ \
+ (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5, \
+ (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5, \
+ (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5, \
+ (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5, \
+ 6, 2, 2, 4 },
+
+const __aligned16 VIFUnpackFuncTable VIFfuncTable[32] =
+{
+ UnpackFuncSet( false )
+ UnpackFuncSet( true )
+};
diff --git a/pcsx2/Vif_Unpack.h b/pcsx2/Vif_Unpack.h
new file mode 100644
index 0000000000..efb723b85c
--- /dev/null
+++ b/pcsx2/Vif_Unpack.h
@@ -0,0 +1,56 @@
+/* PCSX2 - PS2 Emulator for PCs
+ * Copyright (C) 2002-2009 PCSX2 Dev Team
+ *
+ * PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ * of the GNU Lesser General Public License as published by the Free Software Found-
+ * ation, either version 3 of the License, or (at your option) any later version.
+ *
+ * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with PCSX2.
+ * If not, see .
+ */
+
+#pragma once
+
+typedef void (__fastcall *UNPACKFUNCTYPE)(u32 *dest, u32 *data);
+typedef void (__fastcall *UNPACKFUNCTYPE_ODD)(u32 *dest, u32 *data, int size);
+typedef int (*UNPACKPARTFUNCTYPESSE)(u32 *dest, u32 *data, int size);
+
+#define create_unpack_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_U##bits)(u32 *dest, u##bits *data);
+#define create_unpack_odd_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_U##bits)(u32 *dest, u##bits *data, int size);
+#define create_unpack_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_S##bits)(u32 *dest, s##bits *data);
+#define create_unpack_odd_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_S##bits)(u32 *dest, s##bits *data, int size);
+
+#define create_some_unpacks(bits) \
+ create_unpack_u_type(bits); \
+ create_unpack_odd_u_type(bits); \
+ create_unpack_s_type(bits); \
+ create_unpack_odd_s_type(bits);
+
+create_some_unpacks(32);
+create_some_unpacks(16);
+create_some_unpacks(8);
+
+struct VIFUnpackFuncTable
+{
+ UNPACKFUNCTYPE funcU;
+ UNPACKFUNCTYPE funcS;
+
+ UNPACKFUNCTYPE_ODD oddU; // needed for old-style vif only, remove when old vif is removed.
+ UNPACKFUNCTYPE_ODD oddS; // needed for old-style vif only, remove when old vif is removed.
+
+ u8 bsize; // currently unused
+ u8 dsize; // byte size of one channel
+ u8 gsize; // size of data in bytes used for each write cycle
+ u8 qsize; // used for unpack parts, num of vectors that
+ // will be decompressed from data for 1 cycle
+};
+
+extern const __aligned16 VIFUnpackFuncTable VIFfuncTable[32];
+
+extern int nVifUnpack (int idx, u8 *data);
+extern void initNewVif (int idx);
+extern void resetNewVif(int idx);
diff --git a/pcsx2/x86/newVif_OldUnpack.inl b/pcsx2/Vif_Unpack.inl
similarity index 95%
rename from pcsx2/x86/newVif_OldUnpack.inl
rename to pcsx2/Vif_Unpack.inl
index 19ddcbd081..97698a777b 100644
--- a/pcsx2/x86/newVif_OldUnpack.inl
+++ b/pcsx2/Vif_Unpack.inl
@@ -1,161 +1,159 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2009 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-// Old Vif Unpack Code
-// Only here for testing/reference
-// If newVif is defined and newVif1 isn't, vif1 will use this code
-// same goes for vif0...
-template void VIFunpack<0>(u32 *data, vifCode *v, u32 size);
-template void VIFunpack<1>(u32 *data, vifCode *v, u32 size);
-template void VIFunpack(u32 *data, vifCode *v, u32 size) {
- //if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
- VURegs * VU;
- u8 *cdata = (u8*)data;
- u32 tempsize = 0;
- const u32 memlimit = (VIFdmanum == 0) ? 0x1000 : 0x4000;
-
- if (VIFdmanum == 0) {
- VU = &VU0;
- vifRegs = vif0Regs;
- vif = &vif0;
- }
- else {
- VU = &VU1;
- vifRegs = vif1Regs;
- vif = &vif1;
- }
-
- u32 *dest = (u32*)(VU->Mem + v->addr);
-
- const VIFUnpackFuncTable& ft( VIFfuncTable[ v->cmd & 0x1f ] );
- UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS;
-
- size <<= 2;
-
- if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write
- if (v->addr >= memlimit) {
- DevCon.Warning("Overflown at the start");
- v->addr &= (memlimit - 1);
- dest = (u32*)(VU->Mem + v->addr);
- }
-
- size = std::min(size, vifRegs->num * ft.gsize); //size will always be the same or smaller
-
- tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
- (vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
-
- //Sanity Check (memory overflow)
- if (tempsize > memlimit) {
- if (((vifRegs->cycle.cl != vifRegs->cycle.wl) &&
- ((memlimit + (vifRegs->cycle.cl - vifRegs->cycle.wl) * 16) == tempsize))) {
- //It's a red herring, so ignore it! SSE unpacks will be much quicker.
- DevCon.WriteLn("what!!!!!!!!!");
- //tempsize = 0;
- tempsize = size;
- size = 0;
- }
- else {
- DevCon.Warning("VIF%x Unpack ending %x > %x", VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000);
- tempsize = size;
- size = 0;
- }
- }
- else {
- tempsize = size;
- size = 0;
- }
- if (tempsize) {
- int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
- size = 0;
- int addrstart = v->addr;
- //if((tempsize >> 2) != v->size) DevCon.Warning("split when size != tagsize");
-
- VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr);
-
- while ((tempsize >= ft.gsize) && (vifRegs->num > 0)) {
- if(v->addr >= memlimit) {
- DevCon.Warning("Mem limit overflow");
- v->addr &= (memlimit - 1);
- dest = (u32*)(VU->Mem + v->addr);
- }
-
- func(dest, (u32*)cdata);
- cdata += ft.gsize;
- tempsize -= ft.gsize;
-
- vifRegs->num--;
- vif->cl++;
-
- if (vif->cl == vifRegs->cycle.wl) {
- dest += incdest;
- v->addr +=(incdest * 4);
- vif->cl = 0;
- }
- else {
- dest += 4;
- v->addr += 16;
- }
- }
- if (v->addr >= memlimit) {
- v->addr &=(memlimit - 1);
- dest = (u32*)(VU->Mem + v->addr);
- }
- v->addr = addrstart;
- if(tempsize > 0) size = tempsize;
- }
-
- if (size >= ft.dsize && vifRegs->num > 0) { //Else write what we do have
- DevCon.Warning("huh!!!!!!!!!!!!!!!!!!!!!!");
- VIF_LOG("warning, end with size = %d", size);
- // unpack one qword
- //v->addr += (size / ft.dsize) * 4;
- (vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize);
- size = 0;
- VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr);
- }
- }
- else { // filling write
- if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
- if((u32)(((size / ft.gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
- DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft.gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
-
- DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, v->cmd & 0xf, vif->tag.addr);
- while (vifRegs->num > 0) {
- if (vif->cl == vifRegs->cycle.wl) {
- vif->cl = 0;
- }
- // unpack one qword
- if (vif->cl < vifRegs->cycle.cl) {
- if(size < ft.gsize) { DevCon.WriteLn("Out of Filling write data!"); break; }
- func(dest, (u32*)cdata);
- cdata += ft.gsize;
- size -= ft.gsize;
- vif->cl++;
- vifRegs->num--;
- if (vif->cl == vifRegs->cycle.wl) {
- vif->cl = 0;
- }
- }
- else {
- func(dest, (u32*)cdata);
- v->addr += 16;
- vifRegs->num--;
- vif->cl++;
- }
- dest += 4;
- if (vifRegs->num == 0) break;
- }
- }
-}
+/* PCSX2 - PS2 Emulator for PCs
+ * Copyright (C) 2002-2009 PCSX2 Dev Team
+ *
+ * PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ * of the GNU Lesser General Public License as published by the Free Software Found-
+ * ation, either version 3 of the License, or (at your option) any later version.
+ *
+ * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with PCSX2.
+ * If not, see .
+ */
+
+#pragma once
+
+// Old Vif Unpack Code
+// Only here for testing/reference
+template void VIFunpack(u32 *data, vifCode *v, u32 size) {
+ //if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
+ VURegs * VU;
+ u8 *cdata = (u8*)data;
+ u32 tempsize = 0;
+ const u32 memlimit = (VIFdmanum == 0) ? 0x1000 : 0x4000;
+
+ if (VIFdmanum == 0) {
+ VU = &VU0;
+ vifRegs = vif0Regs;
+ vif = &vif0;
+ }
+ else {
+ VU = &VU1;
+ vifRegs = vif1Regs;
+ vif = &vif1;
+ }
+
+ u32 *dest = (u32*)(VU->Mem + v->addr);
+
+ const VIFUnpackFuncTable& ft( VIFfuncTable[ v->cmd & 0x1f ] );
+ UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS;
+
+ size <<= 2;
+
+ if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write
+ if (v->addr >= memlimit) {
+ DevCon.Warning("Overflown at the start");
+ v->addr &= (memlimit - 1);
+ dest = (u32*)(VU->Mem + v->addr);
+ }
+
+ size = std::min(size, vifRegs->num * ft.gsize); //size will always be the same or smaller
+
+ tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
+ (vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
+
+ //Sanity Check (memory overflow)
+ if (tempsize > memlimit) {
+ if (((vifRegs->cycle.cl != vifRegs->cycle.wl) &&
+ ((memlimit + (vifRegs->cycle.cl - vifRegs->cycle.wl) * 16) == tempsize))) {
+ //It's a red herring, so ignore it! SSE unpacks will be much quicker.
+ DevCon.WriteLn("what!!!!!!!!!");
+ //tempsize = 0;
+ tempsize = size;
+ size = 0;
+ }
+ else {
+ DevCon.Warning("VIF%x Unpack ending %x > %x", VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000);
+ tempsize = size;
+ size = 0;
+ }
+ }
+ else {
+ tempsize = size;
+ size = 0;
+ }
+ if (tempsize) {
+ int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
+ size = 0;
+ int addrstart = v->addr;
+ //if((tempsize >> 2) != v->size) DevCon.Warning("split when size != tagsize");
+
+ VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr);
+
+ while ((tempsize >= ft.gsize) && (vifRegs->num > 0)) {
+ if(v->addr >= memlimit) {
+ DevCon.Warning("Mem limit overflow");
+ v->addr &= (memlimit - 1);
+ dest = (u32*)(VU->Mem + v->addr);
+ }
+
+ func(dest, (u32*)cdata);
+ cdata += ft.gsize;
+ tempsize -= ft.gsize;
+
+ vifRegs->num--;
+ vif->cl++;
+
+ if (vif->cl == vifRegs->cycle.wl) {
+ dest += incdest;
+ v->addr +=(incdest * 4);
+ vif->cl = 0;
+ }
+ else {
+ dest += 4;
+ v->addr += 16;
+ }
+ }
+ if (v->addr >= memlimit) {
+ v->addr &=(memlimit - 1);
+ dest = (u32*)(VU->Mem + v->addr);
+ }
+ v->addr = addrstart;
+ if(tempsize > 0) size = tempsize;
+ }
+
+ if (size >= ft.dsize && vifRegs->num > 0) { //Else write what we do have
+ DevCon.Warning("huh!!!!!!!!!!!!!!!!!!!!!!");
+ VIF_LOG("warning, end with size = %d", size);
+ // unpack one qword
+ //v->addr += (size / ft.dsize) * 4;
+ (vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize);
+ size = 0;
+ VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr);
+ }
+ }
+ else { // filling write
+ if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
+ if((u32)(((size / ft.gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
+ DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft.gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
+
+ DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, v->cmd & 0xf, vif->tag.addr);
+ while (vifRegs->num > 0) {
+ if (vif->cl == vifRegs->cycle.wl) {
+ vif->cl = 0;
+ }
+ // unpack one qword
+ if (vif->cl < vifRegs->cycle.cl) {
+ if(size < ft.gsize) { DevCon.WriteLn("Out of Filling write data!"); break; }
+ func(dest, (u32*)cdata);
+ cdata += ft.gsize;
+ size -= ft.gsize;
+ vif->cl++;
+ vifRegs->num--;
+ if (vif->cl == vifRegs->cycle.wl) {
+ vif->cl = 0;
+ }
+ }
+ else {
+ func(dest, (u32*)cdata);
+ v->addr += 16;
+ vifRegs->num--;
+ vif->cl++;
+ }
+ dest += 4;
+ if (vifRegs->num == 0) break;
+ }
+ }
+}
diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj
index 7f7eec15eb..d505395ccb 100644
--- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj
+++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj
@@ -820,52 +820,56 @@
RelativePath="..\..\VifDma.h"
>
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
diff --git a/pcsx2/x86/newVif.h b/pcsx2/x86/newVif.h
index 06635804f1..5b7bd1f5a6 100644
--- a/pcsx2/x86/newVif.h
+++ b/pcsx2/x86/newVif.h
@@ -101,4 +101,3 @@ extern __aligned16 u32 nVifMask[3][4][4]; // [MaskNumber][CycleNumber][Vector]
static const bool useOldUnpack = 0; // Use code in newVif_OldUnpack.inl
static const bool newVifDynaRec = 1; // Use code in newVif_Dynarec.inl
-
diff --git a/pcsx2/x86/VifUnpackSSE_Dynarec.cpp b/pcsx2/x86/newVif_Dynarec.cpp
similarity index 96%
rename from pcsx2/x86/VifUnpackSSE_Dynarec.cpp
rename to pcsx2/x86/newVif_Dynarec.cpp
index d27b153413..7ff3628588 100644
--- a/pcsx2/x86/VifUnpackSSE_Dynarec.cpp
+++ b/pcsx2/x86/newVif_Dynarec.cpp
@@ -1,267 +1,267 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2009 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-// newVif Dynarec - Dynamically Recompiles Vif 'unpack' Packets
-// authors: cottonvibes(@gmail.com)
-// Jake.Stine (@gmail.com)
-
-#include "PrecompiledHeader.h"
-#include "VifUnpackSSE.h"
-
-static __aligned16 nVifBlock _vBlock = {0};
-static __pagealigned u8 nVifMemCmp[__pagesize];
-
-void dVifInit(int idx) {
- nVif[idx].numBlocks = 0;
- nVif[idx].vifCache = new BlockBuffer(_1mb*4); // 4mb Rec Cache
- nVif[idx].vifBlocks = new HashBucket<_tParams>();
- nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
- nVif[idx].recEnd = &nVif[idx].recPtr[nVif[idx].vifCache->getSize()-(_1mb/4)]; // .25mb Safe Zone
-}
-
-void dVifClose(int idx) {
- nVif[idx].numBlocks = 0;
- safe_delete(nVif[idx].vifCache);
- safe_delete(nVif[idx].vifBlocks);
-}
-
-VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
- : v(vif_)
- , vB(vifBlock_)
-{
- isFill = (vB.cl < vB.wl);
- usn = (vB.upkType>>5) & 1;
- doMask = (vB.upkType>>4) & 1;
- doMode = vB.mode & 3;
-}
-
-#define makeMergeMask(x) { \
- x = ((x&0x40)>>6) | ((x&0x10)>>3) | (x&4) | ((x&1)<<3); \
-}
-
-_f void VifUnpackSSE_Dynarec::SetMasks(int cS) const {
- u32 m0 = vB.mask;
- u32 m1 = m0 & 0xaaaaaaaa;
- u32 m2 =(~m1>>1) & m0;
- u32 m3 = (m1>>1) & ~m0;
- u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
- u32* col = (v.idx) ? g_vifmask.Col1 : g_vifmask.Col0;
- if((m2&&doMask) || doMode) { xMOVAPS(xmmRow, ptr32[row]); }
- if (m3&&doMask) {
- xMOVAPS(xmmCol0, ptr32[col]);
- if ((cS>=2) && (m3&0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
- if ((cS>=3) && (m3&0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
- if ((cS>=4) && (m3&0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
- if ((cS>=1) && (m3&0x000000ff)) xPSHUF.D(xmmCol0, xmmCol0, _v0);
- }
- //if (doMask||doMode) loadRowCol((nVifStruct&)v);
-}
-
-void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
- pxAssumeDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking.");
- int t = regX.Id ? 0 : 1; // Get Temp Reg
- int cc = aMin(vCL, 3);
- u32 m0 = (vB.mask >> (cc * 8)) & 0xff;
- u32 m1 = m0 & 0xaa;
- u32 m2 =(~m1>>1) & m0;
- u32 m3 = (m1>>1) & ~m0;
- u32 m4 = (m1>>1) & m0;
- makeMergeMask(m2);
- makeMergeMask(m3);
- makeMergeMask(m4);
- if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); } // Load Write Protect
- if (doMask&&m2) { mergeVectors(regX.Id, xmmRow.Id, t, m2); } // Merge Row
- if (doMask&&m3) { mergeVectors(regX.Id, xmmCol0.Id+cc, t, m3); } // Merge Col
- if (doMask&&m4) { mergeVectors(regX.Id, xmmTemp.Id, t, m4); } // Merge Write Protect
- if (doMode) {
- u32 m5 = (~m1>>1) & ~m0;
- if (!doMask) m5 = 0xf;
- else makeMergeMask(m5);
- if (m5 < 0xf) {
- xPXOR(xmmTemp, xmmTemp);
- mergeVectors(xmmTemp.Id, xmmRow.Id, t, m5);
- xPADD.D(regX, xmmTemp);
- if (doMode==2) mergeVectors(xmmRow.Id, regX.Id, t, m5);
- }
- else if (m5 == 0xf) {
- xPADD.D(regX, xmmRow);
- if (doMode==2) xMOVAPS(xmmRow, regX);
- }
- }
- xMOVAPS(ptr32[dstIndirect], regX);
-}
-
-void VifUnpackSSE_Dynarec::writeBackRow() const {
- u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
- xMOVAPS(ptr32[row], xmmRow);
- DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]");
- // ToDo: Do we need to write back to vifregs.rX too!? :/
-}
-
-static void ShiftDisplacementWindow( xAddressInfo& addr, const xRegister32& modReg )
-{
- // Shifts the displacement factor of a given indirect address, so that the address
- // remains in the optimal 0xf0 range (which allows for byte-form displacements when
- // generating instructions).
-
- int addImm = 0;
- while( addr.Displacement >= 0x80 )
- {
- addImm += 0xf0;
- addr -= 0xf0;
- }
- if(addImm) xADD(modReg, addImm);
-}
-static bool UsesTwoRegs[] =
-{
- true, true, true, true,
- false, false, false, false,
- false, false, false, false,
- false, false, false, true,
-
-};
-
-void VifUnpackSSE_Dynarec::CompileRoutine() {
- const int upkNum = v.vif->cmd & 0xf;
- const u8& vift = nVifT[upkNum];
- const int cycleSize = isFill ? vB.cl : vB.wl;
- const int blockSize = isFill ? vB.wl : vB.cl;
- const int skipSize = blockSize - cycleSize;
-
- int vNum = v.vifRegs->num;
- vCL = v.vif->cl;
- doMode = upkNum == 0xf ? 0 : doMode;
-
- SetMasks(cycleSize);
-
- while (vNum) {
-
- ShiftDisplacementWindow( srcIndirect, edx );
- ShiftDisplacementWindow( dstIndirect, ecx );
-
- if (vCL < cycleSize) {
- xUnpack(upkNum);
- xMovDest();
-
- dstIndirect += 16;
- srcIndirect += vift;
-
- if( IsUnmaskedOp() ) {
- ++destReg;
- ++workReg;
- }
-
- vNum--;
- if (++vCL == blockSize) vCL = 0;
- }
- else if (isFill) {
- DevCon.WriteLn("filling mode!");
- VifUnpackSSE_Dynarec fill( VifUnpackSSE_Dynarec::FillingWrite( *this ) );
- fill.xUnpack(upkNum);
- fill.xMovDest();
-
- dstIndirect += 16;
- vNum--;
- if (++vCL == blockSize) vCL = 0;
- }
- else {
- dstIndirect += (16 * skipSize);
- vCL = 0;
- }
- }
-
- if (doMode==2) writeBackRow();
- xMOV(ptr32[&v.vif->cl], vCL);
- xMOV(ptr32[&v.vifRegs->num], vNum);
- xRET();
-}
-
-static _f u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isFill) {
- u8* endPtr; // Check if we need to wrap around VU memory
- u8* ptr = (u8*)(v.VU->Mem + (v.vif->tag.addr & v.vuMemLimit));
- if (!isFill) { // Account for skip-cycles
- int skipSize = cl - wl;
- int blocks = _vBlock.num / wl;
- int skips = (blocks * skipSize + _vBlock.num) * 16;
- endPtr = ptr + skips;
- }
- else endPtr = ptr + (_vBlock.num * 16);
- if ( endPtr > v.vuMemEnd ) {
- DevCon.WriteLn("nVif - VU Mem Ptr Overflow; falling back to interpreter.");
- ptr = NULL; // Fall Back to Interpreters which have wrap-around logic
- }
- return ptr;
-}
-
-static _f void dVifRecLimit(int idx) {
- if (nVif[idx].recPtr > nVif[idx].recEnd) {
- DevCon.WriteLn("nVif Rec - Out of Rec Cache! [%x > %x]", nVif[idx].recPtr, nVif[idx].recEnd);
- nVif[idx].vifBlocks->clear();
- nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
- }
-}
-
-_f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
-
- const nVifStruct& v = nVif[idx];
- const u8 upkType = v.vif->cmd & 0x1f | ((!!v.vif->usn) << 5);
- const int doMask = v.vif->cmd & 0x10;
- const int cycle_cl = v.vifRegs->cycle.cl;
- const int cycle_wl = v.vifRegs->cycle.wl;
- const int cycleSize = isFill ? cycle_cl : cycle_wl;
- const int blockSize = isFill ? cycle_wl : cycle_cl;
-
- if (v.vif->cl >= blockSize) v.vif->cl = 0;
-
- _vBlock.upkType = upkType;
- _vBlock.num = (u8&)v.vifRegs->num;
- _vBlock.mode = (u8&)v.vifRegs->mode;
- _vBlock.scl = v.vif->cl;
- _vBlock.cl = cycle_cl;
- _vBlock.wl = cycle_wl;
-
- // Zero out the mask parameter if it's unused -- games leave random junk
- // values here which cause false recblock cache misses.
- _vBlock.mask = doMask ? v.vifRegs->mask : 0;
-
- if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
- if (u8* dest = dVifsetVUptr(v, cycle_cl, cycle_wl, isFill)) {
- //DevCon.WriteLn("Running Recompiled Block!");
- ((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
- }
- else {
- //DevCon.WriteLn("Running Interpreter Block");
- _nVifUnpack(idx, data, size, isFill);
- }
- return;
- }
- DevCon.WriteLn("nVif%d: Recompiled Block! [%d]", idx, nVif[idx].numBlocks++);
- //DevCon.WriteLn(L"[num=% 3d][upkType=0x%02x][scl=%d][cl=%d][wl=%d][mode=%d][m=%d][mask=%s]",
- // _vBlock.num, _vBlock.upkType, _vBlock.scl, _vBlock.cl, _vBlock.wl, _vBlock.mode,
- // doMask >> 4, doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
- //);
-
- xSetPtr(v.recPtr);
- _vBlock.startPtr = (uptr)xGetAlignedCallTarget();
- v.vifBlocks->add(_vBlock);
- VifUnpackSSE_Dynarec( v, _vBlock ).CompileRoutine();
- nVif[idx].recPtr = xGetPtr();
-
- dVifRecLimit(idx);
-
- // Run the block we just compiled. Various conditions may force us to still use
- // the interpreter unpacker though, so a recursive call is the safest way here...
- dVifUnpack(idx, data, size, isFill);
-}
+/* PCSX2 - PS2 Emulator for PCs
+ * Copyright (C) 2002-2009 PCSX2 Dev Team
+ *
+ * PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ * of the GNU Lesser General Public License as published by the Free Software Found-
+ * ation, either version 3 of the License, or (at your option) any later version.
+ *
+ * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with PCSX2.
+ * If not, see .
+ */
+
+// newVif Dynarec - Dynamically Recompiles Vif 'unpack' Packets
+// authors: cottonvibes(@gmail.com)
+// Jake.Stine (@gmail.com)
+
+#include "PrecompiledHeader.h"
+#include "newVif_UnpackSSE.h"
+
+static __aligned16 nVifBlock _vBlock = {0};
+static __pagealigned u8 nVifMemCmp[__pagesize];
+
+void dVifInit(int idx) {
+ nVif[idx].numBlocks = 0;
+ nVif[idx].vifCache = new BlockBuffer(_1mb*4); // 4mb Rec Cache
+ nVif[idx].vifBlocks = new HashBucket<_tParams>();
+ nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
+ nVif[idx].recEnd = &nVif[idx].recPtr[nVif[idx].vifCache->getSize()-(_1mb/4)]; // .25mb Safe Zone
+}
+
+void dVifClose(int idx) {
+ nVif[idx].numBlocks = 0;
+ safe_delete(nVif[idx].vifCache);
+ safe_delete(nVif[idx].vifBlocks);
+}
+
+VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
+ : v(vif_)
+ , vB(vifBlock_)
+{
+ isFill = (vB.cl < vB.wl);
+ usn = (vB.upkType>>5) & 1;
+ doMask = (vB.upkType>>4) & 1;
+ doMode = vB.mode & 3;
+}
+
+#define makeMergeMask(x) { \
+ x = ((x&0x40)>>6) | ((x&0x10)>>3) | (x&4) | ((x&1)<<3); \
+}
+
+_f void VifUnpackSSE_Dynarec::SetMasks(int cS) const {
+ u32 m0 = vB.mask;
+ u32 m1 = m0 & 0xaaaaaaaa;
+ u32 m2 =(~m1>>1) & m0;
+ u32 m3 = (m1>>1) & ~m0;
+ u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
+ u32* col = (v.idx) ? g_vifmask.Col1 : g_vifmask.Col0;
+ if((m2&&doMask) || doMode) { xMOVAPS(xmmRow, ptr32[row]); }
+ if (m3&&doMask) {
+ xMOVAPS(xmmCol0, ptr32[col]);
+ if ((cS>=2) && (m3&0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
+ if ((cS>=3) && (m3&0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
+ if ((cS>=4) && (m3&0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
+ if ((cS>=1) && (m3&0x000000ff)) xPSHUF.D(xmmCol0, xmmCol0, _v0);
+ }
+ //if (doMask||doMode) loadRowCol((nVifStruct&)v);
+}
+
+void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
+ pxAssumeDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking.");
+ int t = regX.Id ? 0 : 1; // Get Temp Reg
+ int cc = aMin(vCL, 3);
+ u32 m0 = (vB.mask >> (cc * 8)) & 0xff;
+ u32 m1 = m0 & 0xaa;
+ u32 m2 =(~m1>>1) & m0;
+ u32 m3 = (m1>>1) & ~m0;
+ u32 m4 = (m1>>1) & m0;
+ makeMergeMask(m2);
+ makeMergeMask(m3);
+ makeMergeMask(m4);
+ if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); } // Load Write Protect
+ if (doMask&&m2) { mergeVectors(regX.Id, xmmRow.Id, t, m2); } // Merge Row
+ if (doMask&&m3) { mergeVectors(regX.Id, xmmCol0.Id+cc, t, m3); } // Merge Col
+ if (doMask&&m4) { mergeVectors(regX.Id, xmmTemp.Id, t, m4); } // Merge Write Protect
+ if (doMode) {
+ u32 m5 = (~m1>>1) & ~m0;
+ if (!doMask) m5 = 0xf;
+ else makeMergeMask(m5);
+ if (m5 < 0xf) {
+ xPXOR(xmmTemp, xmmTemp);
+ mergeVectors(xmmTemp.Id, xmmRow.Id, t, m5);
+ xPADD.D(regX, xmmTemp);
+ if (doMode==2) mergeVectors(xmmRow.Id, regX.Id, t, m5);
+ }
+ else if (m5 == 0xf) {
+ xPADD.D(regX, xmmRow);
+ if (doMode==2) xMOVAPS(xmmRow, regX);
+ }
+ }
+ xMOVAPS(ptr32[dstIndirect], regX);
+}
+
+void VifUnpackSSE_Dynarec::writeBackRow() const {
+ u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
+ xMOVAPS(ptr32[row], xmmRow);
+ DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]");
+ // ToDo: Do we need to write back to vifregs.rX too!? :/
+}
+
+static void ShiftDisplacementWindow( xAddressInfo& addr, const xRegister32& modReg )
+{
+ // Shifts the displacement factor of a given indirect address, so that the address
+ // remains in the optimal 0xf0 range (which allows for byte-form displacements when
+ // generating instructions).
+
+ int addImm = 0;
+ while( addr.Displacement >= 0x80 )
+ {
+ addImm += 0xf0;
+ addr -= 0xf0;
+ }
+ if(addImm) xADD(modReg, addImm);
+}
+static bool UsesTwoRegs[] =
+{
+ true, true, true, true,
+ false, false, false, false,
+ false, false, false, false,
+ false, false, false, true,
+
+};
+
+void VifUnpackSSE_Dynarec::CompileRoutine() {
+ const int upkNum = v.vif->cmd & 0xf;
+ const u8& vift = nVifT[upkNum];
+ const int cycleSize = isFill ? vB.cl : vB.wl;
+ const int blockSize = isFill ? vB.wl : vB.cl;
+ const int skipSize = blockSize - cycleSize;
+
+ int vNum = v.vifRegs->num;
+ vCL = v.vif->cl;
+ doMode = upkNum == 0xf ? 0 : doMode;
+
+ SetMasks(cycleSize);
+
+ while (vNum) {
+
+ ShiftDisplacementWindow( srcIndirect, edx );
+ ShiftDisplacementWindow( dstIndirect, ecx );
+
+ if (vCL < cycleSize) {
+ xUnpack(upkNum);
+ xMovDest();
+
+ dstIndirect += 16;
+ srcIndirect += vift;
+
+ if( IsUnmaskedOp() ) {
+ ++destReg;
+ ++workReg;
+ }
+
+ vNum--;
+ if (++vCL == blockSize) vCL = 0;
+ }
+ else if (isFill) {
+ DevCon.WriteLn("filling mode!");
+ VifUnpackSSE_Dynarec fill( VifUnpackSSE_Dynarec::FillingWrite( *this ) );
+ fill.xUnpack(upkNum);
+ fill.xMovDest();
+
+ dstIndirect += 16;
+ vNum--;
+ if (++vCL == blockSize) vCL = 0;
+ }
+ else {
+ dstIndirect += (16 * skipSize);
+ vCL = 0;
+ }
+ }
+
+ if (doMode==2) writeBackRow();
+ xMOV(ptr32[&v.vif->cl], vCL);
+ xMOV(ptr32[&v.vifRegs->num], vNum);
+ xRET();
+}
+
+static _f u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isFill) {
+ u8* endPtr; // Check if we need to wrap around VU memory
+ u8* ptr = (u8*)(v.VU->Mem + (v.vif->tag.addr & v.vuMemLimit));
+ if (!isFill) { // Account for skip-cycles
+ int skipSize = cl - wl;
+ int blocks = _vBlock.num / wl;
+ int skips = (blocks * skipSize + _vBlock.num) * 16;
+ endPtr = ptr + skips;
+ }
+ else endPtr = ptr + (_vBlock.num * 16);
+ if ( endPtr > v.vuMemEnd ) {
+ DevCon.WriteLn("nVif - VU Mem Ptr Overflow; falling back to interpreter.");
+ ptr = NULL; // Fall Back to Interpreters which have wrap-around logic
+ }
+ return ptr;
+}
+
+static _f void dVifRecLimit(int idx) {
+ if (nVif[idx].recPtr > nVif[idx].recEnd) {
+ DevCon.WriteLn("nVif Rec - Out of Rec Cache! [%x > %x]", nVif[idx].recPtr, nVif[idx].recEnd);
+ nVif[idx].vifBlocks->clear();
+ nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
+ }
+}
+
+_f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
+
+ const nVifStruct& v = nVif[idx];
+ const u8 upkType = v.vif->cmd & 0x1f | ((!!v.vif->usn) << 5);
+ const int doMask = v.vif->cmd & 0x10;
+ const int cycle_cl = v.vifRegs->cycle.cl;
+ const int cycle_wl = v.vifRegs->cycle.wl;
+ const int cycleSize = isFill ? cycle_cl : cycle_wl;
+ const int blockSize = isFill ? cycle_wl : cycle_cl;
+
+ if (v.vif->cl >= blockSize) v.vif->cl = 0;
+
+ _vBlock.upkType = upkType;
+ _vBlock.num = (u8&)v.vifRegs->num;
+ _vBlock.mode = (u8&)v.vifRegs->mode;
+ _vBlock.scl = v.vif->cl;
+ _vBlock.cl = cycle_cl;
+ _vBlock.wl = cycle_wl;
+
+ // Zero out the mask parameter if it's unused -- games leave random junk
+ // values here which cause false recblock cache misses.
+ _vBlock.mask = doMask ? v.vifRegs->mask : 0;
+
+ if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
+ if (u8* dest = dVifsetVUptr(v, cycle_cl, cycle_wl, isFill)) {
+ //DevCon.WriteLn("Running Recompiled Block!");
+ ((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
+ }
+ else {
+ //DevCon.WriteLn("Running Interpreter Block");
+ _nVifUnpack(idx, data, size, isFill);
+ }
+ return;
+ }
+ DevCon.WriteLn("nVif%d: Recompiled Block! [%d]", idx, nVif[idx].numBlocks++);
+ //DevCon.WriteLn(L"[num=% 3d][upkType=0x%02x][scl=%d][cl=%d][wl=%d][mode=%d][m=%d][mask=%s]",
+ // _vBlock.num, _vBlock.upkType, _vBlock.scl, _vBlock.cl, _vBlock.wl, _vBlock.mode,
+ // doMask >> 4, doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
+ //);
+
+ xSetPtr(v.recPtr);
+ _vBlock.startPtr = (uptr)xGetAlignedCallTarget();
+ v.vifBlocks->add(_vBlock);
+ VifUnpackSSE_Dynarec( v, _vBlock ).CompileRoutine();
+ nVif[idx].recPtr = xGetPtr();
+
+ dVifRecLimit(idx);
+
+ // Run the block we just compiled. Various conditions may force us to still use
+ // the interpreter unpacker though, so a recursive call is the safest way here...
+ dVifUnpack(idx, data, size, isFill);
+}
diff --git a/pcsx2/x86/newVif_Unpack.cpp b/pcsx2/x86/newVif_Unpack.cpp
index f4e4d09143..64e81fbd10 100644
--- a/pcsx2/x86/newVif_Unpack.cpp
+++ b/pcsx2/x86/newVif_Unpack.cpp
@@ -19,9 +19,9 @@
#include "PrecompiledHeader.h"
#include "Common.h"
-#include "VifDma_internal.h"
+#include "VifDma.h"
#include "newVif.h"
-#include "newVif_OldUnpack.inl"
+#include "Vif_Unpack.inl"
__aligned16 nVifStruct nVif[2];
__aligned16 nVifCall nVifUpk[(2*2*16) *4]; // ([USN][Masking][Unpack Type]) [curCycle]
diff --git a/pcsx2/x86/VifUnpackSSE.cpp b/pcsx2/x86/newVif_UnpackSSE.cpp
similarity index 96%
rename from pcsx2/x86/VifUnpackSSE.cpp
rename to pcsx2/x86/newVif_UnpackSSE.cpp
index 075a477868..9ddd912e37 100644
--- a/pcsx2/x86/VifUnpackSSE.cpp
+++ b/pcsx2/x86/newVif_UnpackSSE.cpp
@@ -1,310 +1,310 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2009 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-#include "PrecompiledHeader.h"
-#include "VifUnpackSSE.h"
-
-#define xMOV8(regX, loc) xMOVSSZX(regX, loc)
-#define xMOV16(regX, loc) xMOVSSZX(regX, loc)
-#define xMOV32(regX, loc) xMOVSSZX(regX, loc)
-#define xMOV64(regX, loc) xMOVUPS(regX, loc)
-#define xMOV128(regX, loc) xMOVUPS(regX, loc)
-
-static __pagealigned u8 nVifUpkExec[__pagesize*4];
-
-// Merges xmm vectors without modifying source reg
-void mergeVectors(int dest, int src, int temp, int xyzw) {
- if (x86caps.hasStreamingSIMD4Extensions || (xyzw==15)
- || (xyzw==12) || (xyzw==11) || (xyzw==8) || (xyzw==3)) {
- mVUmergeRegs(dest, src, xyzw);
- }
- else {
- SSE_MOVAPS_XMM_to_XMM(temp, src);
- mVUmergeRegs(dest, temp, xyzw);
- }
-}
-
-// Loads Row/Col Data from vifRegs instead of g_vifmask
-// Useful for testing vifReg and g_vifmask inconsistency.
-void loadRowCol(nVifStruct& v) {
- xMOVAPS(xmm0, ptr32[&v.vifRegs->r0]);
- xMOVAPS(xmm1, ptr32[&v.vifRegs->r1]);
- xMOVAPS(xmm2, ptr32[&v.vifRegs->r2]);
- xMOVAPS(xmm6, ptr32[&v.vifRegs->r3]);
- xPSHUF.D(xmm0, xmm0, _v0);
- xPSHUF.D(xmm1, xmm1, _v0);
- xPSHUF.D(xmm2, xmm2, _v0);
- xPSHUF.D(xmm6, xmm6, _v0);
- mVUmergeRegs(XMM6, XMM0, 8);
- mVUmergeRegs(XMM6, XMM1, 4);
- mVUmergeRegs(XMM6, XMM2, 2);
- xMOVAPS(xmm2, ptr32[&v.vifRegs->c0]);
- xMOVAPS(xmm3, ptr32[&v.vifRegs->c1]);
- xMOVAPS(xmm4, ptr32[&v.vifRegs->c2]);
- xMOVAPS(xmm5, ptr32[&v.vifRegs->c3]);
- xPSHUF.D(xmm2, xmm2, _v0);
- xPSHUF.D(xmm3, xmm3, _v0);
- xPSHUF.D(xmm4, xmm4, _v0);
- xPSHUF.D(xmm5, xmm5, _v0);
-}
-
-// =====================================================================================================
-// VifUnpackSSE_Base Section
-// =====================================================================================================
-VifUnpackSSE_Base::VifUnpackSSE_Base()
- : dstIndirect(ecx) // parameter 1 of __fastcall
- , srcIndirect(edx) // parameter 2 of __fastcall
- , workReg( xmm1 )
- , destReg( xmm0 )
-{
-}
-
-void VifUnpackSSE_Base::xMovDest() const {
- if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); }
- else { doMaskWrite(destReg); }
-}
-
-void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const {
- if (usn) { xPSRL.D(regX, n); }
- else { xPSRA.D(regX, n); }
-}
-
-void VifUnpackSSE_Base::xPMOVXX8(const xRegisterSSE& regX) const {
- if (usn) xPMOVZX.BD(regX, ptr32[srcIndirect]);
- else xPMOVSX.BD(regX, ptr32[srcIndirect]);
-}
-
-void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const {
- if (usn) xPMOVZX.WD(regX, ptr64[srcIndirect]);
- else xPMOVSX.WD(regX, ptr64[srcIndirect]);
-}
-
-void VifUnpackSSE_Base::xUPK_S_32() const {
- xMOV32 (workReg, ptr32[srcIndirect]);
- xPSHUF.D (destReg, workReg, _v0);
-}
-
-void VifUnpackSSE_Base::xUPK_S_16() const {
-if (x86caps.hasStreamingSIMD4Extensions) {
- xPMOVXX16 (workReg);
-}
-else {
- xMOV16 (workReg, ptr32[srcIndirect]);
- xPUNPCK.LWD(workReg, workReg);
- xShiftR (workReg, 16);
-}
- xPSHUF.D (destReg, workReg, _v0);
-}
-
-void VifUnpackSSE_Base::xUPK_S_8() const {
-if (x86caps.hasStreamingSIMD4Extensions) {
- xPMOVXX8 (workReg);
-}
-else {
- xMOV8 (workReg, ptr32[srcIndirect]);
- xPUNPCK.LBW(workReg, workReg);
- xPUNPCK.LWD(workReg, workReg);
- xShiftR (workReg, 24);
-}
- xPSHUF.D (destReg, workReg, _v0);
-}
-
-void VifUnpackSSE_Base::xUPK_V2_32() const {
- xMOV64 (destReg, ptr32[srcIndirect]);
-}
-
-void VifUnpackSSE_Base::xUPK_V2_16() const {
-if (x86caps.hasStreamingSIMD4Extensions) {
- xPMOVXX16 (destReg);
-}
-else {
- xMOV32 (destReg, ptr32[srcIndirect]);
- xPUNPCK.LWD(destReg, destReg);
- xShiftR (destReg, 16);
-}
-}
-
-void VifUnpackSSE_Base::xUPK_V2_8() const {
-if (x86caps.hasStreamingSIMD4Extensions) {
- xPMOVXX8 (destReg);
-}
-else {
- xMOV16 (destReg, ptr32[srcIndirect]);
- xPUNPCK.LBW(destReg, destReg);
- xPUNPCK.LWD(destReg, destReg);
- xShiftR (destReg, 24);
-}
-}
-
-void VifUnpackSSE_Base::xUPK_V3_32() const {
- xMOV128 (destReg, ptr32[srcIndirect]);
-}
-
-void VifUnpackSSE_Base::xUPK_V3_16() const {
-if (x86caps.hasStreamingSIMD4Extensions) {
- xPMOVXX16 (destReg);
-}
-else {
- xMOV64 (destReg, ptr32[srcIndirect]);
- xPUNPCK.LWD(destReg, destReg);
- xShiftR (destReg, 16);
-}
-}
-
-void VifUnpackSSE_Base::xUPK_V3_8() const {
-if (x86caps.hasStreamingSIMD4Extensions) {
- xPMOVXX8 (destReg);
-}
-else {
- xMOV32 (destReg, ptr32[srcIndirect]);
- xPUNPCK.LBW(destReg, destReg);
- xPUNPCK.LWD(destReg, destReg);
- xShiftR (destReg, 24);
-}
-}
-
-void VifUnpackSSE_Base::xUPK_V4_32() const {
- xMOV128 (destReg, ptr32[srcIndirect]);
-}
-
-void VifUnpackSSE_Base::xUPK_V4_16() const {
-if (x86caps.hasStreamingSIMD4Extensions) {
- xPMOVXX16 (destReg);
-}
-else {
- xMOV64 (destReg, ptr32[srcIndirect]);
- xPUNPCK.LWD(destReg, destReg);
- xShiftR (destReg, 16);
-}
-}
-
-void VifUnpackSSE_Base::xUPK_V4_8() const {
-if (x86caps.hasStreamingSIMD4Extensions) {
- xPMOVXX8 (destReg);
-}
-else {
- xMOV32 (destReg, ptr32[srcIndirect]);
- xPUNPCK.LBW(destReg, destReg);
- xPUNPCK.LWD(destReg, destReg);
- xShiftR (destReg, 24);
-}
-}
-
-void VifUnpackSSE_Base::xUPK_V4_5() const {
- xMOV16 (workReg, ptr32[srcIndirect]);
- xPSHUF.D (workReg, workReg, _v0);
- xPSLL.D (workReg, 3); // ABG|R5.000
- xMOVAPS (destReg, workReg); // x|x|x|R
- xPSRL.D (workReg, 8); // ABG
- xPSLL.D (workReg, 3); // AB|G5.000
- mVUmergeRegs(destReg.Id, workReg.Id, 0x4); // x|x|G|R
- xPSRL.D (workReg, 8); // AB
- xPSLL.D (workReg, 3); // A|B5.000
- mVUmergeRegs(destReg.Id, workReg.Id, 0x2); // x|B|G|R
- xPSRL.D (workReg, 8); // A
- xPSLL.D (workReg, 7); // A.0000000
- mVUmergeRegs(destReg.Id, workReg.Id, 0x1); // A|B|G|R
- xPSLL.D (destReg, 24); // can optimize to
- xPSRL.D (destReg, 24); // single AND...
-}
-
-void VifUnpackSSE_Base::xUnpack( int upknum ) const
-{
- switch( upknum )
- {
- case 0: xUPK_S_32(); break;
- case 1: xUPK_S_16(); break;
- case 2: xUPK_S_8(); break;
-
- case 4: xUPK_V2_32(); break;
- case 5: xUPK_V2_16(); break;
- case 6: xUPK_V2_8(); break;
-
- case 8: xUPK_V3_32(); break;
- case 9: xUPK_V3_16(); break;
- case 10: xUPK_V3_8(); break;
-
- case 12: xUPK_V4_32(); break;
- case 13: xUPK_V4_16(); break;
- case 14: xUPK_V4_8(); break;
- case 15: xUPK_V4_5(); break;
-
- case 3:
- case 7:
- case 11:
- pxFailRel( wxsFormat( L"Vpu/Vif - Invalid Unpack! [%d]", upknum ) );
- break;
- }
-}
-
-// =====================================================================================================
-// VifUnpackSSE_Simple
-// =====================================================================================================
-
-VifUnpackSSE_Simple::VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_)
-{
- curCycle = curCycle_;
- usn = usn_;
- doMask = domask_;
-}
-
-void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const {
- xMOVAPS(xmm7, ptr[dstIndirect]);
- int offX = aMin(curCycle, 3);
- xPAND(regX, ptr32[nVifMask[0][offX]]);
- xPAND(xmm7, ptr32[nVifMask[1][offX]]);
- xPOR (regX, ptr32[nVifMask[2][offX]]);
- xPOR (regX, xmm7);
- xMOVAPS(ptr[dstIndirect], regX);
-}
-
-// ecx = dest, edx = src
-static void nVifGen(int usn, int mask, int curCycle) {
-
- int usnpart = usn*2*16;
- int maskpart = mask*16;
-
- VifUnpackSSE_Simple vpugen( !!usn, !!mask, curCycle );
-
- for( int i=0; i<16; ++i )
- {
- nVifCall& ucall( nVifUpk[((usnpart+maskpart+i) * 4) + curCycle] );
- ucall = NULL;
- if( nVifT[i] == 0 ) continue;
-
- ucall = (nVifCall)xGetAlignedCallTarget();
- vpugen.xUnpack(i);
- vpugen.xMovDest();
- xRET();
-
- pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );
- }
-}
-
-void VifUnpackSSE_Init()
-{
- HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false);
- memset8<0xcc>( nVifUpkExec );
-
- xSetPtr( nVifUpkExec );
-
- for (int a = 0; a < 2; a++) {
- for (int b = 0; b < 2; b++) {
- for (int c = 0; c < 4; c++) {
- nVifGen(a, b, c);
- }}}
-
- HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true);
-}
+/* PCSX2 - PS2 Emulator for PCs
+ * Copyright (C) 2002-2009 PCSX2 Dev Team
+ *
+ * PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ * of the GNU Lesser General Public License as published by the Free Software Found-
+ * ation, either version 3 of the License, or (at your option) any later version.
+ *
+ * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with PCSX2.
+ * If not, see .
+ */
+
+#include "PrecompiledHeader.h"
+#include "newVif_UnpackSSE.h"
+
+#define xMOV8(regX, loc) xMOVSSZX(regX, loc)
+#define xMOV16(regX, loc) xMOVSSZX(regX, loc)
+#define xMOV32(regX, loc) xMOVSSZX(regX, loc)
+#define xMOV64(regX, loc) xMOVUPS(regX, loc)
+#define xMOV128(regX, loc) xMOVUPS(regX, loc)
+
+static __pagealigned u8 nVifUpkExec[__pagesize*4];
+
+// Merges xmm vectors without modifying source reg
+void mergeVectors(int dest, int src, int temp, int xyzw) {
+ if (x86caps.hasStreamingSIMD4Extensions || (xyzw==15)
+ || (xyzw==12) || (xyzw==11) || (xyzw==8) || (xyzw==3)) {
+ mVUmergeRegs(dest, src, xyzw);
+ }
+ else {
+ SSE_MOVAPS_XMM_to_XMM(temp, src);
+ mVUmergeRegs(dest, temp, xyzw);
+ }
+}
+
+// Loads Row/Col Data from vifRegs instead of g_vifmask
+// Useful for testing vifReg and g_vifmask inconsistency.
+void loadRowCol(nVifStruct& v) {
+ xMOVAPS(xmm0, ptr32[&v.vifRegs->r0]);
+ xMOVAPS(xmm1, ptr32[&v.vifRegs->r1]);
+ xMOVAPS(xmm2, ptr32[&v.vifRegs->r2]);
+ xMOVAPS(xmm6, ptr32[&v.vifRegs->r3]);
+ xPSHUF.D(xmm0, xmm0, _v0);
+ xPSHUF.D(xmm1, xmm1, _v0);
+ xPSHUF.D(xmm2, xmm2, _v0);
+ xPSHUF.D(xmm6, xmm6, _v0);
+ mVUmergeRegs(XMM6, XMM0, 8);
+ mVUmergeRegs(XMM6, XMM1, 4);
+ mVUmergeRegs(XMM6, XMM2, 2);
+ xMOVAPS(xmm2, ptr32[&v.vifRegs->c0]);
+ xMOVAPS(xmm3, ptr32[&v.vifRegs->c1]);
+ xMOVAPS(xmm4, ptr32[&v.vifRegs->c2]);
+ xMOVAPS(xmm5, ptr32[&v.vifRegs->c3]);
+ xPSHUF.D(xmm2, xmm2, _v0);
+ xPSHUF.D(xmm3, xmm3, _v0);
+ xPSHUF.D(xmm4, xmm4, _v0);
+ xPSHUF.D(xmm5, xmm5, _v0);
+}
+
+// =====================================================================================================
+// VifUnpackSSE_Base Section
+// =====================================================================================================
+VifUnpackSSE_Base::VifUnpackSSE_Base()
+ : dstIndirect(ecx) // parameter 1 of __fastcall
+ , srcIndirect(edx) // parameter 2 of __fastcall
+ , workReg( xmm1 )
+ , destReg( xmm0 )
+{
+}
+
+void VifUnpackSSE_Base::xMovDest() const {
+ if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); }
+ else { doMaskWrite(destReg); }
+}
+
+void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const {
+ if (usn) { xPSRL.D(regX, n); }
+ else { xPSRA.D(regX, n); }
+}
+
+void VifUnpackSSE_Base::xPMOVXX8(const xRegisterSSE& regX) const {
+ if (usn) xPMOVZX.BD(regX, ptr32[srcIndirect]);
+ else xPMOVSX.BD(regX, ptr32[srcIndirect]);
+}
+
+void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const {
+ if (usn) xPMOVZX.WD(regX, ptr64[srcIndirect]);
+ else xPMOVSX.WD(regX, ptr64[srcIndirect]);
+}
+
+void VifUnpackSSE_Base::xUPK_S_32() const {
+ xMOV32 (workReg, ptr32[srcIndirect]);
+ xPSHUF.D (destReg, workReg, _v0);
+}
+
+void VifUnpackSSE_Base::xUPK_S_16() const {
+if (x86caps.hasStreamingSIMD4Extensions) {
+ xPMOVXX16 (workReg);
+}
+else {
+ xMOV16 (workReg, ptr32[srcIndirect]);
+ xPUNPCK.LWD(workReg, workReg);
+ xShiftR (workReg, 16);
+}
+ xPSHUF.D (destReg, workReg, _v0);
+}
+
+void VifUnpackSSE_Base::xUPK_S_8() const {
+if (x86caps.hasStreamingSIMD4Extensions) {
+ xPMOVXX8 (workReg);
+}
+else {
+ xMOV8 (workReg, ptr32[srcIndirect]);
+ xPUNPCK.LBW(workReg, workReg);
+ xPUNPCK.LWD(workReg, workReg);
+ xShiftR (workReg, 24);
+}
+ xPSHUF.D (destReg, workReg, _v0);
+}
+
+void VifUnpackSSE_Base::xUPK_V2_32() const {
+ xMOV64 (destReg, ptr32[srcIndirect]);
+}
+
+void VifUnpackSSE_Base::xUPK_V2_16() const {
+if (x86caps.hasStreamingSIMD4Extensions) {
+ xPMOVXX16 (destReg);
+}
+else {
+ xMOV32 (destReg, ptr32[srcIndirect]);
+ xPUNPCK.LWD(destReg, destReg);
+ xShiftR (destReg, 16);
+}
+}
+
+void VifUnpackSSE_Base::xUPK_V2_8() const {
+if (x86caps.hasStreamingSIMD4Extensions) {
+ xPMOVXX8 (destReg);
+}
+else {
+ xMOV16 (destReg, ptr32[srcIndirect]);
+ xPUNPCK.LBW(destReg, destReg);
+ xPUNPCK.LWD(destReg, destReg);
+ xShiftR (destReg, 24);
+}
+}
+
+void VifUnpackSSE_Base::xUPK_V3_32() const {
+ xMOV128 (destReg, ptr32[srcIndirect]);
+}
+
+void VifUnpackSSE_Base::xUPK_V3_16() const {
+if (x86caps.hasStreamingSIMD4Extensions) {
+ xPMOVXX16 (destReg);
+}
+else {
+ xMOV64 (destReg, ptr32[srcIndirect]);
+ xPUNPCK.LWD(destReg, destReg);
+ xShiftR (destReg, 16);
+}
+}
+
+void VifUnpackSSE_Base::xUPK_V3_8() const {
+if (x86caps.hasStreamingSIMD4Extensions) {
+ xPMOVXX8 (destReg);
+}
+else {
+ xMOV32 (destReg, ptr32[srcIndirect]);
+ xPUNPCK.LBW(destReg, destReg);
+ xPUNPCK.LWD(destReg, destReg);
+ xShiftR (destReg, 24);
+}
+}
+
+void VifUnpackSSE_Base::xUPK_V4_32() const {
+ xMOV128 (destReg, ptr32[srcIndirect]);
+}
+
+void VifUnpackSSE_Base::xUPK_V4_16() const {
+if (x86caps.hasStreamingSIMD4Extensions) {
+ xPMOVXX16 (destReg);
+}
+else {
+ xMOV64 (destReg, ptr32[srcIndirect]);
+ xPUNPCK.LWD(destReg, destReg);
+ xShiftR (destReg, 16);
+}
+}
+
+void VifUnpackSSE_Base::xUPK_V4_8() const {
+if (x86caps.hasStreamingSIMD4Extensions) {
+ xPMOVXX8 (destReg);
+}
+else {
+ xMOV32 (destReg, ptr32[srcIndirect]);
+ xPUNPCK.LBW(destReg, destReg);
+ xPUNPCK.LWD(destReg, destReg);
+ xShiftR (destReg, 24);
+}
+}
+
+void VifUnpackSSE_Base::xUPK_V4_5() const {
+ xMOV16 (workReg, ptr32[srcIndirect]);
+ xPSHUF.D (workReg, workReg, _v0);
+ xPSLL.D (workReg, 3); // ABG|R5.000
+ xMOVAPS (destReg, workReg); // x|x|x|R
+ xPSRL.D (workReg, 8); // ABG
+ xPSLL.D (workReg, 3); // AB|G5.000
+ mVUmergeRegs(destReg.Id, workReg.Id, 0x4); // x|x|G|R
+ xPSRL.D (workReg, 8); // AB
+ xPSLL.D (workReg, 3); // A|B5.000
+ mVUmergeRegs(destReg.Id, workReg.Id, 0x2); // x|B|G|R
+ xPSRL.D (workReg, 8); // A
+ xPSLL.D (workReg, 7); // A.0000000
+ mVUmergeRegs(destReg.Id, workReg.Id, 0x1); // A|B|G|R
+ xPSLL.D (destReg, 24); // can optimize to
+ xPSRL.D (destReg, 24); // single AND...
+}
+
+void VifUnpackSSE_Base::xUnpack( int upknum ) const
+{
+ switch( upknum )
+ {
+ case 0: xUPK_S_32(); break;
+ case 1: xUPK_S_16(); break;
+ case 2: xUPK_S_8(); break;
+
+ case 4: xUPK_V2_32(); break;
+ case 5: xUPK_V2_16(); break;
+ case 6: xUPK_V2_8(); break;
+
+ case 8: xUPK_V3_32(); break;
+ case 9: xUPK_V3_16(); break;
+ case 10: xUPK_V3_8(); break;
+
+ case 12: xUPK_V4_32(); break;
+ case 13: xUPK_V4_16(); break;
+ case 14: xUPK_V4_8(); break;
+ case 15: xUPK_V4_5(); break;
+
+ case 3:
+ case 7:
+ case 11:
+ pxFailRel( wxsFormat( L"Vpu/Vif - Invalid Unpack! [%d]", upknum ) );
+ break;
+ }
+}
+
+// =====================================================================================================
+// VifUnpackSSE_Simple
+// =====================================================================================================
+
+VifUnpackSSE_Simple::VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_)
+{
+ curCycle = curCycle_;
+ usn = usn_;
+ doMask = domask_;
+}
+
+void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const {
+ xMOVAPS(xmm7, ptr[dstIndirect]);
+ int offX = aMin(curCycle, 3);
+ xPAND(regX, ptr32[nVifMask[0][offX]]);
+ xPAND(xmm7, ptr32[nVifMask[1][offX]]);
+ xPOR (regX, ptr32[nVifMask[2][offX]]);
+ xPOR (regX, xmm7);
+ xMOVAPS(ptr[dstIndirect], regX);
+}
+
+// ecx = dest, edx = src
+static void nVifGen(int usn, int mask, int curCycle) {
+
+ int usnpart = usn*2*16;
+ int maskpart = mask*16;
+
+ VifUnpackSSE_Simple vpugen( !!usn, !!mask, curCycle );
+
+ for( int i=0; i<16; ++i )
+ {
+ nVifCall& ucall( nVifUpk[((usnpart+maskpart+i) * 4) + curCycle] );
+ ucall = NULL;
+ if( nVifT[i] == 0 ) continue;
+
+ ucall = (nVifCall)xGetAlignedCallTarget();
+ vpugen.xUnpack(i);
+ vpugen.xMovDest();
+ xRET();
+
+ pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );
+ }
+}
+
+void VifUnpackSSE_Init()
+{
+ HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false);
+ memset8<0xcc>( nVifUpkExec );
+
+ xSetPtr( nVifUpkExec );
+
+ for (int a = 0; a < 2; a++) {
+ for (int b = 0; b < 2; b++) {
+ for (int c = 0; c < 4; c++) {
+ nVifGen(a, b, c);
+ }}}
+
+ HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true);
+}
diff --git a/pcsx2/x86/VifUnpackSSE.h b/pcsx2/x86/newVif_UnpackSSE.h
similarity index 96%
rename from pcsx2/x86/VifUnpackSSE.h
rename to pcsx2/x86/newVif_UnpackSSE.h
index d8ea2b38fe..4da4454a5e 100644
--- a/pcsx2/x86/VifUnpackSSE.h
+++ b/pcsx2/x86/newVif_UnpackSSE.h
@@ -1,145 +1,145 @@
-/* PCSX2 - PS2 Emulator for PCs
- * Copyright (C) 2002-2009 PCSX2 Dev Team
- *
- * PCSX2 is free software: you can redistribute it and/or modify it under the terms
- * of the GNU Lesser General Public License as published by the Free Software Found-
- * ation, either version 3 of the License, or (at your option) any later version.
- *
- * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
- * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
- * PURPOSE. See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along with PCSX2.
- * If not, see .
- */
-
-#pragma once
-
-#include "Common.h"
-#include "VifDma_internal.h"
-#include "newVif.h"
-
-#include
-#include
-
-using namespace x86Emitter;
-
-extern void mergeVectors(int dest, int src, int temp, int xyzw);
-extern void loadRowCol(nVifStruct& v);
-
-// --------------------------------------------------------------------------------------
-// VifUnpackSSE_Base
-// --------------------------------------------------------------------------------------
-class VifUnpackSSE_Base
-{
-public:
- bool usn; // unsigned flag
- bool doMask; // masking write enable flag
-
-protected:
- xAddressInfo dstIndirect;
- xAddressInfo srcIndirect;
- xRegisterSSE workReg;
- xRegisterSSE destReg;
-
-public:
- VifUnpackSSE_Base();
- virtual ~VifUnpackSSE_Base() throw() {}
-
- virtual void xUnpack( int upktype ) const;
- virtual bool IsUnmaskedOp() const=0;
- virtual void xMovDest() const;
-
-protected:
- virtual void doMaskWrite(const xRegisterSSE& regX ) const=0;
-
- virtual void xShiftR(const xRegisterSSE& regX, int n) const;
- virtual void xPMOVXX8(const xRegisterSSE& regX) const;
- virtual void xPMOVXX16(const xRegisterSSE& regX) const;
-
- virtual void xUPK_S_32() const;
- virtual void xUPK_S_16() const;
- virtual void xUPK_S_8() const;
-
- virtual void xUPK_V2_32() const;
- virtual void xUPK_V2_16() const;
- virtual void xUPK_V2_8() const;
-
- virtual void xUPK_V3_32() const;
- virtual void xUPK_V3_16() const;
- virtual void xUPK_V3_8() const;
-
- virtual void xUPK_V4_32() const;
- virtual void xUPK_V4_16() const;
- virtual void xUPK_V4_8() const;
- virtual void xUPK_V4_5() const;
-
-};
-
-// --------------------------------------------------------------------------------------
-// VifUnpackSSE_Simple
-// --------------------------------------------------------------------------------------
-class VifUnpackSSE_Simple : public VifUnpackSSE_Base
-{
- typedef VifUnpackSSE_Base _parent;
-
-public:
- int curCycle;
-
-public:
- VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_);
- virtual ~VifUnpackSSE_Simple() throw() {}
-
- virtual bool IsUnmaskedOp() const{ return !doMask; }
-
-protected:
- virtual void doMaskWrite(const xRegisterSSE& regX ) const;
-};
-
-// --------------------------------------------------------------------------------------
-// VifUnpackSSE_Dynarec
-// --------------------------------------------------------------------------------------
-class VifUnpackSSE_Dynarec : public VifUnpackSSE_Base
-{
- typedef VifUnpackSSE_Base _parent;
-
-public:
- bool isFill;
- int doMode; // two bit value representing... something!
-
-protected:
- const nVifStruct& v; // vif0 or vif1
- const nVifBlock& vB; // some pre-collected data from VifStruct
- int vCL; // internal copy of vif->cl
-
-public:
- VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_);
- VifUnpackSSE_Dynarec(const VifUnpackSSE_Dynarec& src) // copy constructor
- : _parent(src)
- , v(src.v)
- , vB(src.vB)
- {
- isFill = src.isFill;
- vCL = src.vCL;
- }
-
- virtual ~VifUnpackSSE_Dynarec() throw() {}
-
- virtual bool IsUnmaskedOp() const{ return !doMode && !doMask; }
-
- void CompileRoutine();
-
-protected:
- virtual void doMaskWrite(const xRegisterSSE& regX) const;
- void SetMasks(int cS) const;
- void writeBackRow() const;
-
- static VifUnpackSSE_Dynarec FillingWrite( const VifUnpackSSE_Dynarec& src )
- {
- VifUnpackSSE_Dynarec fillingWrite( src );
- fillingWrite.doMask = true;
- fillingWrite.doMode = 0;
- return fillingWrite;
- }
-};
-
+/* PCSX2 - PS2 Emulator for PCs
+ * Copyright (C) 2002-2009 PCSX2 Dev Team
+ *
+ * PCSX2 is free software: you can redistribute it and/or modify it under the terms
+ * of the GNU Lesser General Public License as published by the Free Software Found-
+ * ation, either version 3 of the License, or (at your option) any later version.
+ *
+ * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with PCSX2.
+ * If not, see .
+ */
+
+#pragma once
+
+#include "Common.h"
+#include "VifDma.h"
+#include "newVif.h"
+
+#include
+#include
+
+using namespace x86Emitter;
+
+extern void mergeVectors(int dest, int src, int temp, int xyzw);
+extern void loadRowCol(nVifStruct& v);
+
+// --------------------------------------------------------------------------------------
+// VifUnpackSSE_Base
+// --------------------------------------------------------------------------------------
+class VifUnpackSSE_Base
+{
+public:
+ bool usn; // unsigned flag
+ bool doMask; // masking write enable flag
+
+protected:
+ xAddressInfo dstIndirect;
+ xAddressInfo srcIndirect;
+ xRegisterSSE workReg;
+ xRegisterSSE destReg;
+
+public:
+ VifUnpackSSE_Base();
+ virtual ~VifUnpackSSE_Base() throw() {}
+
+ virtual void xUnpack( int upktype ) const;
+ virtual bool IsUnmaskedOp() const=0;
+ virtual void xMovDest() const;
+
+protected:
+ virtual void doMaskWrite(const xRegisterSSE& regX ) const=0;
+
+ virtual void xShiftR(const xRegisterSSE& regX, int n) const;
+ virtual void xPMOVXX8(const xRegisterSSE& regX) const;
+ virtual void xPMOVXX16(const xRegisterSSE& regX) const;
+
+ virtual void xUPK_S_32() const;
+ virtual void xUPK_S_16() const;
+ virtual void xUPK_S_8() const;
+
+ virtual void xUPK_V2_32() const;
+ virtual void xUPK_V2_16() const;
+ virtual void xUPK_V2_8() const;
+
+ virtual void xUPK_V3_32() const;
+ virtual void xUPK_V3_16() const;
+ virtual void xUPK_V3_8() const;
+
+ virtual void xUPK_V4_32() const;
+ virtual void xUPK_V4_16() const;
+ virtual void xUPK_V4_8() const;
+ virtual void xUPK_V4_5() const;
+
+};
+
+// --------------------------------------------------------------------------------------
+// VifUnpackSSE_Simple
+// --------------------------------------------------------------------------------------
+class VifUnpackSSE_Simple : public VifUnpackSSE_Base
+{
+ typedef VifUnpackSSE_Base _parent;
+
+public:
+ int curCycle;
+
+public:
+ VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_);
+ virtual ~VifUnpackSSE_Simple() throw() {}
+
+ virtual bool IsUnmaskedOp() const{ return !doMask; }
+
+protected:
+ virtual void doMaskWrite(const xRegisterSSE& regX ) const;
+};
+
+// --------------------------------------------------------------------------------------
+// VifUnpackSSE_Dynarec
+// --------------------------------------------------------------------------------------
+class VifUnpackSSE_Dynarec : public VifUnpackSSE_Base
+{
+ typedef VifUnpackSSE_Base _parent;
+
+public:
+ bool isFill;
+ int doMode; // two bit value representing... something!
+
+protected:
+ const nVifStruct& v; // vif0 or vif1
+ const nVifBlock& vB; // some pre-collected data from VifStruct
+ int vCL; // internal copy of vif->cl
+
+public:
+ VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_);
+ VifUnpackSSE_Dynarec(const VifUnpackSSE_Dynarec& src) // copy constructor
+ : _parent(src)
+ , v(src.v)
+ , vB(src.vB)
+ {
+ isFill = src.isFill;
+ vCL = src.vCL;
+ }
+
+ virtual ~VifUnpackSSE_Dynarec() throw() {}
+
+ virtual bool IsUnmaskedOp() const{ return !doMode && !doMask; }
+
+ void CompileRoutine();
+
+protected:
+ virtual void doMaskWrite(const xRegisterSSE& regX) const;
+ void SetMasks(int cS) const;
+ void writeBackRow() const;
+
+ static VifUnpackSSE_Dynarec FillingWrite( const VifUnpackSSE_Dynarec& src )
+ {
+ VifUnpackSSE_Dynarec fillingWrite( src );
+ fillingWrite.doMask = true;
+ fillingWrite.doMode = 0;
+ return fillingWrite;
+ }
+};
+