diff --git a/pcsx2/Vif0Dma.cpp b/pcsx2/Vif0Dma.cpp index 2e0d738176..6f3d882412 100644 --- a/pcsx2/Vif0Dma.cpp +++ b/pcsx2/Vif0Dma.cpp @@ -16,8 +16,7 @@ #include "PrecompiledHeader.h" #include "Common.h" - -#include "VifDma_internal.h" +#include "VifDma.h" #include "VUmicro.h" #include "newVif.h" diff --git a/pcsx2/Vif1Dma.cpp b/pcsx2/Vif1Dma.cpp index 5cfa00195e..bca9ebd154 100644 --- a/pcsx2/Vif1Dma.cpp +++ b/pcsx2/Vif1Dma.cpp @@ -16,9 +16,7 @@ #include "PrecompiledHeader.h" #include "Common.h" - -#include "VifDma_internal.h" - +#include "VifDma.h" #include "GS.h" #include "Gif.h" #include "VUmicro.h" diff --git a/pcsx2/VifDma.cpp b/pcsx2/VifDma.cpp index a542b213a6..e20c720f93 100644 --- a/pcsx2/VifDma.cpp +++ b/pcsx2/VifDma.cpp @@ -16,7 +16,7 @@ #include "PrecompiledHeader.h" #include "Common.h" -#include "VifDma_internal.h" +#include "VifDma.h" #include "VUmicro.h" int g_vifCycles = 0; diff --git a/pcsx2/VifDma.h b/pcsx2/VifDma.h index 0b6c58737a..d82d6d7c86 100644 --- a/pcsx2/VifDma.h +++ b/pcsx2/VifDma.h @@ -12,8 +12,9 @@ * You should have received a copy of the GNU General Public License along with PCSX2. * If not, see . */ -#ifndef __VIFDMA_H__ -#define __VIFDMA_H__ + +#pragma once +#include "Vif_Unpack.h" struct vifCode { u32 addr; @@ -43,9 +44,10 @@ struct vifStruct { u8 dmamode; }; -extern vifStruct vif0, vif1; -extern u8 schedulepath3msk; -static const int VifCycleVoodoo = 4; +extern vifStruct* vif; +extern vifStruct vif0, vif1; +extern u8 schedulepath3msk; +static const int VifCycleVoodoo = 4; extern void vif0Init(); extern void vif0Interrupt(); @@ -63,4 +65,20 @@ __forceinline static int _limit(int a, int max) return ((a > max) ? max : a); } -#endif +enum VifModes +{ + VIF_NORMAL_TO_MEM_MODE = 0, + VIF_NORMAL_FROM_MEM_MODE = 1, + VIF_CHAIN_MODE = 2 +}; + +// Generic constants +static const unsigned int VIF0intc = 4; +static const unsigned int VIF1intc = 5; + +extern int g_vifCycles; + +template void vuExecMicro(u32 addr); +extern void vif0FLUSH(); +extern void vif1FLUSH(); + diff --git a/pcsx2/VifDma_internal.h b/pcsx2/VifDma_internal.h index 991d924780..4e7157cb19 100644 --- a/pcsx2/VifDma_internal.h +++ b/pcsx2/VifDma_internal.h @@ -13,68 +13,8 @@ * If not, see . */ -#ifndef __VIFDMA_INTERNAL_H__ -#define __VIFDMA_INTERNAL_H__ +#pragma once #include "VifDma.h" -enum VifModes -{ - VIF_NORMAL_TO_MEM_MODE = 0, - VIF_NORMAL_FROM_MEM_MODE = 1, - VIF_CHAIN_MODE = 2 -}; -// Generic constants -static const unsigned int VIF0intc = 4; -static const unsigned int VIF1intc = 5; - -typedef void (__fastcall *UNPACKFUNCTYPE)(u32 *dest, u32 *data); -typedef void (__fastcall *UNPACKFUNCTYPE_ODD)(u32 *dest, u32 *data, int size); -typedef int (*UNPACKPARTFUNCTYPESSE)(u32 *dest, u32 *data, int size); - -#define create_unpack_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_U##bits)(u32 *dest, u##bits *data); -#define create_unpack_odd_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_U##bits)(u32 *dest, u##bits *data, int size); -#define create_unpack_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_S##bits)(u32 *dest, s##bits *data); -#define create_unpack_odd_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_S##bits)(u32 *dest, s##bits *data, int size); - -#define create_some_unpacks(bits) \ - create_unpack_u_type(bits); \ - create_unpack_odd_u_type(bits); \ - create_unpack_s_type(bits); \ - create_unpack_odd_s_type(bits); - -create_some_unpacks(32); -create_some_unpacks(16); -create_some_unpacks(8); - -struct VIFUnpackFuncTable -{ - UNPACKFUNCTYPE funcU; - UNPACKFUNCTYPE funcS; - - UNPACKFUNCTYPE_ODD oddU; // needed for old-style vif only, remove when old vif is removed. - UNPACKFUNCTYPE_ODD oddS; // needed for old-style vif only, remove when old vif is removed. - - u8 bsize; // currently unused - u8 dsize; // byte size of one channel - u8 gsize; // size of data in bytes used for each write cycle - u8 qsize; // used for unpack parts, num of vectors that - // will be decompressed from data for 1 cycle -}; - -extern const __aligned16 VIFUnpackFuncTable VIFfuncTable[32]; - -extern int g_vifCycles; -extern vifStruct *vif; - -template void VIFunpack(u32 *data, vifCode *v, u32 size); -template void vuExecMicro(u32 addr); -extern void vif0FLUSH(); -extern void vif1FLUSH(); - -extern int nVifUnpack (int idx, u8 *data); -extern void initNewVif (int idx); -extern void resetNewVif(int idx); - -#endif diff --git a/pcsx2/VIFunpack.cpp b/pcsx2/Vif_Unpack.cpp similarity index 96% rename from pcsx2/VIFunpack.cpp rename to pcsx2/Vif_Unpack.cpp index 3ffbc838de..f66a545aca 100644 --- a/pcsx2/VIFunpack.cpp +++ b/pcsx2/Vif_Unpack.cpp @@ -1,296 +1,295 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2009 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - - -#include "PrecompiledHeader.h" -#include "Common.h" -#include "Vif.h" -#include "VifDma_internal.h" - -enum UnpackOffset { - OFFSET_X = 0, - OFFSET_Y = 1, - OFFSET_Z = 2, - OFFSET_W = 3 -}; - -static __forceinline u32 setVifRowRegs(u32 reg, u32 data) { - switch (reg) { - case 0: vifRegs->r0 = data; break; - case 1: vifRegs->r1 = data; break; - case 2: vifRegs->r2 = data; break; - case 3: vifRegs->r3 = data; break; - jNO_DEFAULT; - } - return data; -} - -static __forceinline u32 getVifRowRegs(u32 reg) { - switch (reg) { - case 0: return vifRegs->r0; break; - case 1: return vifRegs->r1; break; - case 2: return vifRegs->r2; break; - case 3: return vifRegs->r3; break; - jNO_DEFAULT; - } - return 0; // unreachable... -} - -static __forceinline u32 getVifColRegs(u32 reg) { - switch (reg) { - case 0: return vifRegs->c0; break; - case 1: return vifRegs->c1; break; - case 2: return vifRegs->c2; break; - default: return vifRegs->c3; break; - } - return 0; // unreachable... -} - -template< bool doMask > -static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) { - u32 vifRowReg = getVifRowRegs(offnum); - int n = 0; - - if (doMask) { - switch (vif->cl) { - case 0: n = (vifRegs->mask >> (offnum * 2)) & 0x3; break; - case 1: n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; break; - case 2: n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3; break; - default: n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3; break; - } - } - - switch (n) { - case 0: - if ((vif->cmd & 0x6F) != 0x6f) { - switch (vifRegs->mode) { - case 1: dest = data + vifRowReg; break; - case 2: dest = setVifRowRegs(offnum, vifRowReg + data); break; - default: dest = data; break; - } - } - else dest = data; // v4-5 Unpack Mode - break; - case 1: dest = vifRowReg; break; - case 2: dest = getVifColRegs(vif->cl); break; - case 3: break; - } -} - -template < bool doMask, class T > -static __forceinline void __fastcall UNPACK_S(u32 *dest, T *data, int size) -{ - //S-# will always be a complete packet, no matter what. So we can skip the offset bits - writeXYZW(OFFSET_X, *dest++, *data); - writeXYZW(OFFSET_Y, *dest++, *data); - writeXYZW(OFFSET_Z, *dest++, *data); - writeXYZW(OFFSET_W, *dest , *data); -} - -template -static __forceinline void __fastcall UNPACK_V2(u32 *dest, T *data, int size) -{ - if (vifRegs->offset == OFFSET_X) - { - if (size > 0) - { - writeXYZW(vifRegs->offset, *dest++, *data++); - vifRegs->offset = OFFSET_Y; - size--; - } - } - - if (vifRegs->offset == OFFSET_Y) - { - if (size > 0) - { - writeXYZW(vifRegs->offset, *dest++, *data); - vifRegs->offset = OFFSET_Z; - size--; - } - } - - if (vifRegs->offset == OFFSET_Z) - { - writeXYZW(vifRegs->offset, *dest++, *dest-2); - vifRegs->offset = OFFSET_W; - } - - if (vifRegs->offset == OFFSET_W) - { - writeXYZW(vifRegs->offset, *dest, *data); - vifRegs->offset = OFFSET_X; - } -} - -template -static __forceinline void __fastcall UNPACK_V3(u32 *dest, T *data, int size) -{ - if(vifRegs->offset == OFFSET_X) - { - if (size > 0) - { - writeXYZW(vifRegs->offset, *dest++, *data++); - vifRegs->offset = OFFSET_Y; - size--; - } - } - - if(vifRegs->offset == OFFSET_Y) - { - if (size > 0) - { - writeXYZW(vifRegs->offset, *dest++, *data++); - vifRegs->offset = OFFSET_Z; - size--; - } - } - - if(vifRegs->offset == OFFSET_Z) - { - if (size > 0) - { - writeXYZW(vifRegs->offset, *dest++, *data++); - vifRegs->offset = OFFSET_W; - size--; - } - } - - if(vifRegs->offset == OFFSET_W) - { - // V3-# does some bizarre thing with alignment, every 6qw of data the W becomes 0 (strange console!) - // Ape Escape doesn't seem to like it tho (what the hell?) gonna have to investigate - writeXYZW(vifRegs->offset, *dest, *data); - vifRegs->offset = OFFSET_X; - } -} - -template -static __forceinline void __fastcall UNPACK_V4(u32 *dest, T *data , int size) -{ - while (size > 0) - { - writeXYZW(vifRegs->offset, *dest++, *data++); - vifRegs->offset++; - size--; - } - - if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X; -} - -template< bool doMask > -static __releaseinline void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) -{ - //As with S-#, this will always be a complete packet - writeXYZW(OFFSET_X, *dest++, ((*data & 0x001f) << 3)); - writeXYZW(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2)); - writeXYZW(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7)); - writeXYZW(OFFSET_W, *dest, ((*data & 0x8000) >> 8)); -} - -// ===================================================================================================== - -template < bool doMask, int size, class T > -static void __fastcall fUNPACK_S(u32 *dest, T *data) -{ - UNPACK_S( dest, data, size ); -} - -template -static void __fastcall fUNPACK_V2(u32 *dest, T *data) -{ - UNPACK_V2( dest, data, size ); -} - -template -static void __fastcall fUNPACK_V3(u32 *dest, T *data) -{ - UNPACK_V3( dest, data, size ); -} - -template -static void __fastcall fUNPACK_V4(u32 *dest, T *data) -{ - UNPACK_V4( dest, data, size ); -} - -template< bool doMask > -static void __fastcall fUNPACK_V4_5(u32 *dest, u32 *data) -{ - UNPACK_V4_5(dest, data, 0); // size is ignored. -} - -// -------------------------------------------------------------------------------------- -// Main table for function unpacking. -// -------------------------------------------------------------------------------------- -// The extra data bsize/dsize/etc are all duplicated between the doMask enabled and -// disabled versions. This is probably simpler and more efficient than bothering -// to generate separate tables. -// -// The double-cast function pointer nonsense is to appease GCC, which gives some rather -// cryptic error about being unable to deduce the type parameters (I think it's a bug -// relating to __fastcall, which I recall having some other places as well). It's fixed -// by explicitly casting the function to itself prior to casting it to what we need it -// to be cast as. --air -// - -#define _upk (UNPACKFUNCTYPE) -#define _odd (UNPACKFUNCTYPE_ODD) -#define _unpk_s(bits) (UNPACKFUNCTYPE_S##bits) -#define _odd_s(bits) (UNPACKFUNCTYPE_ODD_S##bits) -#define _unpk_u(bits) (UNPACKFUNCTYPE_U##bits) -#define _odd_u(bits) (UNPACKFUNCTYPE_ODD_U##bits) - -// 32-bits versions are unsigned-only!! -#define UnpackFuncPair32( sizefac, vt, doMask ) \ - (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt, \ - (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt, \ - (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt, \ - (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt, - -#define UnpackFuncPair( sizefac, vt, bits, doMask ) \ - (UNPACKFUNCTYPE)_unpk_u(bits) fUNPACK_##vt, \ - (UNPACKFUNCTYPE)_unpk_s(bits) fUNPACK_##vt, \ - (UNPACKFUNCTYPE_ODD)_odd_u(bits) UNPACK_##vt, \ - (UNPACKFUNCTYPE_ODD)_odd_s(bits) UNPACK_##vt, - -#define UnpackFuncSet( doMask ) \ - { UnpackFuncPair32( 4, S, doMask ) 1, 4, 4, 4 }, /* 0x0 - S-32 */ \ - { UnpackFuncPair ( 4, S, 16, doMask ) 2, 2, 2, 4 }, /* 0x1 - S-16 */ \ - { UnpackFuncPair ( 4, S, 8, doMask ) 4, 1, 1, 4 }, /* 0x2 - S-8 */ \ - { NULL, NULL, NULL, NULL, 0, 0, 0, 0 }, /* 0x3 (NULL) */ \ - { UnpackFuncPair32( 2, V2, doMask ) 24, 4, 8, 2 }, /* 0x4 - V2-32 */ \ - { UnpackFuncPair ( 2, V2, 16, doMask ) 12, 2, 4, 2 }, /* 0x5 - V2-16 */ \ - { UnpackFuncPair ( 2, V2, 8, doMask ) 6, 1, 2, 2 }, /* 0x6 - V2-8 */ \ - { NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0x7 (NULL) */ \ - { UnpackFuncPair32( 3, V3, doMask ) 36, 4, 12, 3 }, /* 0x8 - V3-32 */ \ - { UnpackFuncPair ( 3, V3, 16, doMask ) 18, 2, 6, 3 }, /* 0x9 - V3-16 */ \ - { UnpackFuncPair ( 3, V3, 8, doMask ) 9, 1, 3, 3 }, /* 0xA - V3-8 */ \ - { NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0xB (NULL) */ \ - { UnpackFuncPair32( 4, V4, doMask ) 48, 4, 16, 4 }, /* 0xC - V4-32 */ \ - { UnpackFuncPair ( 4, V4, 16, doMask ) 24, 2, 8, 4 }, /* 0xD - V4-16 */ \ - { UnpackFuncPair ( 4, V4, 8, doMask ) 12, 1, 4, 4 }, /* 0xE - V4-8 */ \ - { /* 0xF - V4-5 */ \ - (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5, \ - (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5, \ - (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5, \ - (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5, \ - 6, 2, 2, 4 }, - -const __aligned16 VIFUnpackFuncTable VIFfuncTable[32] = -{ - UnpackFuncSet( false ) - UnpackFuncSet( true ) -}; +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2009 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "PrecompiledHeader.h" +#include "Common.h" +#include "Vif.h" +#include "VifDma.h" + +enum UnpackOffset { + OFFSET_X = 0, + OFFSET_Y = 1, + OFFSET_Z = 2, + OFFSET_W = 3 +}; + +static __forceinline u32 setVifRowRegs(u32 reg, u32 data) { + switch (reg) { + case 0: vifRegs->r0 = data; break; + case 1: vifRegs->r1 = data; break; + case 2: vifRegs->r2 = data; break; + case 3: vifRegs->r3 = data; break; + jNO_DEFAULT; + } + return data; +} + +static __forceinline u32 getVifRowRegs(u32 reg) { + switch (reg) { + case 0: return vifRegs->r0; break; + case 1: return vifRegs->r1; break; + case 2: return vifRegs->r2; break; + case 3: return vifRegs->r3; break; + jNO_DEFAULT; + } + return 0; // unreachable... +} + +static __forceinline u32 getVifColRegs(u32 reg) { + switch (reg) { + case 0: return vifRegs->c0; break; + case 1: return vifRegs->c1; break; + case 2: return vifRegs->c2; break; + default: return vifRegs->c3; break; + } + return 0; // unreachable... +} + +template< bool doMask > +static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) { + u32 vifRowReg = getVifRowRegs(offnum); + int n = 0; + + if (doMask) { + switch (vif->cl) { + case 0: n = (vifRegs->mask >> (offnum * 2)) & 0x3; break; + case 1: n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; break; + case 2: n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3; break; + default: n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3; break; + } + } + + switch (n) { + case 0: + if ((vif->cmd & 0x6F) != 0x6f) { + switch (vifRegs->mode) { + case 1: dest = data + vifRowReg; break; + case 2: dest = setVifRowRegs(offnum, vifRowReg + data); break; + default: dest = data; break; + } + } + else dest = data; // v4-5 Unpack Mode + break; + case 1: dest = vifRowReg; break; + case 2: dest = getVifColRegs(vif->cl); break; + case 3: break; + } +} + +template < bool doMask, class T > +static __forceinline void __fastcall UNPACK_S(u32 *dest, T *data, int size) +{ + //S-# will always be a complete packet, no matter what. So we can skip the offset bits + writeXYZW(OFFSET_X, *dest++, *data); + writeXYZW(OFFSET_Y, *dest++, *data); + writeXYZW(OFFSET_Z, *dest++, *data); + writeXYZW(OFFSET_W, *dest , *data); +} + +template +static __forceinline void __fastcall UNPACK_V2(u32 *dest, T *data, int size) +{ + if (vifRegs->offset == OFFSET_X) + { + if (size > 0) + { + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Y; + size--; + } + } + + if (vifRegs->offset == OFFSET_Y) + { + if (size > 0) + { + writeXYZW(vifRegs->offset, *dest++, *data); + vifRegs->offset = OFFSET_Z; + size--; + } + } + + if (vifRegs->offset == OFFSET_Z) + { + writeXYZW(vifRegs->offset, *dest++, *dest-2); + vifRegs->offset = OFFSET_W; + } + + if (vifRegs->offset == OFFSET_W) + { + writeXYZW(vifRegs->offset, *dest, *data); + vifRegs->offset = OFFSET_X; + } +} + +template +static __forceinline void __fastcall UNPACK_V3(u32 *dest, T *data, int size) +{ + if(vifRegs->offset == OFFSET_X) + { + if (size > 0) + { + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Y; + size--; + } + } + + if(vifRegs->offset == OFFSET_Y) + { + if (size > 0) + { + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_Z; + size--; + } + } + + if(vifRegs->offset == OFFSET_Z) + { + if (size > 0) + { + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset = OFFSET_W; + size--; + } + } + + if(vifRegs->offset == OFFSET_W) + { + // V3-# does some bizarre thing with alignment, every 6qw of data the W becomes 0 (strange console!) + // Ape Escape doesn't seem to like it tho (what the hell?) gonna have to investigate + writeXYZW(vifRegs->offset, *dest, *data); + vifRegs->offset = OFFSET_X; + } +} + +template +static __forceinline void __fastcall UNPACK_V4(u32 *dest, T *data , int size) +{ + while (size > 0) + { + writeXYZW(vifRegs->offset, *dest++, *data++); + vifRegs->offset++; + size--; + } + + if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X; +} + +template< bool doMask > +static __releaseinline void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size) +{ + //As with S-#, this will always be a complete packet + writeXYZW(OFFSET_X, *dest++, ((*data & 0x001f) << 3)); + writeXYZW(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2)); + writeXYZW(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7)); + writeXYZW(OFFSET_W, *dest, ((*data & 0x8000) >> 8)); +} + +// ===================================================================================================== + +template < bool doMask, int size, class T > +static void __fastcall fUNPACK_S(u32 *dest, T *data) +{ + UNPACK_S( dest, data, size ); +} + +template +static void __fastcall fUNPACK_V2(u32 *dest, T *data) +{ + UNPACK_V2( dest, data, size ); +} + +template +static void __fastcall fUNPACK_V3(u32 *dest, T *data) +{ + UNPACK_V3( dest, data, size ); +} + +template +static void __fastcall fUNPACK_V4(u32 *dest, T *data) +{ + UNPACK_V4( dest, data, size ); +} + +template< bool doMask > +static void __fastcall fUNPACK_V4_5(u32 *dest, u32 *data) +{ + UNPACK_V4_5(dest, data, 0); // size is ignored. +} + +// -------------------------------------------------------------------------------------- +// Main table for function unpacking. +// -------------------------------------------------------------------------------------- +// The extra data bsize/dsize/etc are all duplicated between the doMask enabled and +// disabled versions. This is probably simpler and more efficient than bothering +// to generate separate tables. +// +// The double-cast function pointer nonsense is to appease GCC, which gives some rather +// cryptic error about being unable to deduce the type parameters (I think it's a bug +// relating to __fastcall, which I recall having some other places as well). It's fixed +// by explicitly casting the function to itself prior to casting it to what we need it +// to be cast as. --air +// + +#define _upk (UNPACKFUNCTYPE) +#define _odd (UNPACKFUNCTYPE_ODD) +#define _unpk_s(bits) (UNPACKFUNCTYPE_S##bits) +#define _odd_s(bits) (UNPACKFUNCTYPE_ODD_S##bits) +#define _unpk_u(bits) (UNPACKFUNCTYPE_U##bits) +#define _odd_u(bits) (UNPACKFUNCTYPE_ODD_U##bits) + +// 32-bits versions are unsigned-only!! +#define UnpackFuncPair32( sizefac, vt, doMask ) \ + (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt, \ + (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt, \ + (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt, \ + (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt, + +#define UnpackFuncPair( sizefac, vt, bits, doMask ) \ + (UNPACKFUNCTYPE)_unpk_u(bits) fUNPACK_##vt, \ + (UNPACKFUNCTYPE)_unpk_s(bits) fUNPACK_##vt, \ + (UNPACKFUNCTYPE_ODD)_odd_u(bits) UNPACK_##vt, \ + (UNPACKFUNCTYPE_ODD)_odd_s(bits) UNPACK_##vt, + +#define UnpackFuncSet( doMask ) \ + { UnpackFuncPair32( 4, S, doMask ) 1, 4, 4, 4 }, /* 0x0 - S-32 */ \ + { UnpackFuncPair ( 4, S, 16, doMask ) 2, 2, 2, 4 }, /* 0x1 - S-16 */ \ + { UnpackFuncPair ( 4, S, 8, doMask ) 4, 1, 1, 4 }, /* 0x2 - S-8 */ \ + { NULL, NULL, NULL, NULL, 0, 0, 0, 0 }, /* 0x3 (NULL) */ \ + { UnpackFuncPair32( 2, V2, doMask ) 24, 4, 8, 2 }, /* 0x4 - V2-32 */ \ + { UnpackFuncPair ( 2, V2, 16, doMask ) 12, 2, 4, 2 }, /* 0x5 - V2-16 */ \ + { UnpackFuncPair ( 2, V2, 8, doMask ) 6, 1, 2, 2 }, /* 0x6 - V2-8 */ \ + { NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0x7 (NULL) */ \ + { UnpackFuncPair32( 3, V3, doMask ) 36, 4, 12, 3 }, /* 0x8 - V3-32 */ \ + { UnpackFuncPair ( 3, V3, 16, doMask ) 18, 2, 6, 3 }, /* 0x9 - V3-16 */ \ + { UnpackFuncPair ( 3, V3, 8, doMask ) 9, 1, 3, 3 }, /* 0xA - V3-8 */ \ + { NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0xB (NULL) */ \ + { UnpackFuncPair32( 4, V4, doMask ) 48, 4, 16, 4 }, /* 0xC - V4-32 */ \ + { UnpackFuncPair ( 4, V4, 16, doMask ) 24, 2, 8, 4 }, /* 0xD - V4-16 */ \ + { UnpackFuncPair ( 4, V4, 8, doMask ) 12, 1, 4, 4 }, /* 0xE - V4-8 */ \ + { /* 0xF - V4-5 */ \ + (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5, \ + (UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5, \ + (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5, \ + (UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5, \ + 6, 2, 2, 4 }, + +const __aligned16 VIFUnpackFuncTable VIFfuncTable[32] = +{ + UnpackFuncSet( false ) + UnpackFuncSet( true ) +}; diff --git a/pcsx2/Vif_Unpack.h b/pcsx2/Vif_Unpack.h new file mode 100644 index 0000000000..efb723b85c --- /dev/null +++ b/pcsx2/Vif_Unpack.h @@ -0,0 +1,56 @@ +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2009 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +typedef void (__fastcall *UNPACKFUNCTYPE)(u32 *dest, u32 *data); +typedef void (__fastcall *UNPACKFUNCTYPE_ODD)(u32 *dest, u32 *data, int size); +typedef int (*UNPACKPARTFUNCTYPESSE)(u32 *dest, u32 *data, int size); + +#define create_unpack_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_U##bits)(u32 *dest, u##bits *data); +#define create_unpack_odd_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_U##bits)(u32 *dest, u##bits *data, int size); +#define create_unpack_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_S##bits)(u32 *dest, s##bits *data); +#define create_unpack_odd_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_S##bits)(u32 *dest, s##bits *data, int size); + +#define create_some_unpacks(bits) \ + create_unpack_u_type(bits); \ + create_unpack_odd_u_type(bits); \ + create_unpack_s_type(bits); \ + create_unpack_odd_s_type(bits); + +create_some_unpacks(32); +create_some_unpacks(16); +create_some_unpacks(8); + +struct VIFUnpackFuncTable +{ + UNPACKFUNCTYPE funcU; + UNPACKFUNCTYPE funcS; + + UNPACKFUNCTYPE_ODD oddU; // needed for old-style vif only, remove when old vif is removed. + UNPACKFUNCTYPE_ODD oddS; // needed for old-style vif only, remove when old vif is removed. + + u8 bsize; // currently unused + u8 dsize; // byte size of one channel + u8 gsize; // size of data in bytes used for each write cycle + u8 qsize; // used for unpack parts, num of vectors that + // will be decompressed from data for 1 cycle +}; + +extern const __aligned16 VIFUnpackFuncTable VIFfuncTable[32]; + +extern int nVifUnpack (int idx, u8 *data); +extern void initNewVif (int idx); +extern void resetNewVif(int idx); diff --git a/pcsx2/x86/newVif_OldUnpack.inl b/pcsx2/Vif_Unpack.inl similarity index 95% rename from pcsx2/x86/newVif_OldUnpack.inl rename to pcsx2/Vif_Unpack.inl index 19ddcbd081..97698a777b 100644 --- a/pcsx2/x86/newVif_OldUnpack.inl +++ b/pcsx2/Vif_Unpack.inl @@ -1,161 +1,159 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2009 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -// Old Vif Unpack Code -// Only here for testing/reference -// If newVif is defined and newVif1 isn't, vif1 will use this code -// same goes for vif0... -template void VIFunpack<0>(u32 *data, vifCode *v, u32 size); -template void VIFunpack<1>(u32 *data, vifCode *v, u32 size); -template void VIFunpack(u32 *data, vifCode *v, u32 size) { - //if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data); - VURegs * VU; - u8 *cdata = (u8*)data; - u32 tempsize = 0; - const u32 memlimit = (VIFdmanum == 0) ? 0x1000 : 0x4000; - - if (VIFdmanum == 0) { - VU = &VU0; - vifRegs = vif0Regs; - vif = &vif0; - } - else { - VU = &VU1; - vifRegs = vif1Regs; - vif = &vif1; - } - - u32 *dest = (u32*)(VU->Mem + v->addr); - - const VIFUnpackFuncTable& ft( VIFfuncTable[ v->cmd & 0x1f ] ); - UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS; - - size <<= 2; - - if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write - if (v->addr >= memlimit) { - DevCon.Warning("Overflown at the start"); - v->addr &= (memlimit - 1); - dest = (u32*)(VU->Mem + v->addr); - } - - size = std::min(size, vifRegs->num * ft.gsize); //size will always be the same or smaller - - tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) * - (vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16); - - //Sanity Check (memory overflow) - if (tempsize > memlimit) { - if (((vifRegs->cycle.cl != vifRegs->cycle.wl) && - ((memlimit + (vifRegs->cycle.cl - vifRegs->cycle.wl) * 16) == tempsize))) { - //It's a red herring, so ignore it! SSE unpacks will be much quicker. - DevCon.WriteLn("what!!!!!!!!!"); - //tempsize = 0; - tempsize = size; - size = 0; - } - else { - DevCon.Warning("VIF%x Unpack ending %x > %x", VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000); - tempsize = size; - size = 0; - } - } - else { - tempsize = size; - size = 0; - } - if (tempsize) { - int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4; - size = 0; - int addrstart = v->addr; - //if((tempsize >> 2) != v->size) DevCon.Warning("split when size != tagsize"); - - VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr); - - while ((tempsize >= ft.gsize) && (vifRegs->num > 0)) { - if(v->addr >= memlimit) { - DevCon.Warning("Mem limit overflow"); - v->addr &= (memlimit - 1); - dest = (u32*)(VU->Mem + v->addr); - } - - func(dest, (u32*)cdata); - cdata += ft.gsize; - tempsize -= ft.gsize; - - vifRegs->num--; - vif->cl++; - - if (vif->cl == vifRegs->cycle.wl) { - dest += incdest; - v->addr +=(incdest * 4); - vif->cl = 0; - } - else { - dest += 4; - v->addr += 16; - } - } - if (v->addr >= memlimit) { - v->addr &=(memlimit - 1); - dest = (u32*)(VU->Mem + v->addr); - } - v->addr = addrstart; - if(tempsize > 0) size = tempsize; - } - - if (size >= ft.dsize && vifRegs->num > 0) { //Else write what we do have - DevCon.Warning("huh!!!!!!!!!!!!!!!!!!!!!!"); - VIF_LOG("warning, end with size = %d", size); - // unpack one qword - //v->addr += (size / ft.dsize) * 4; - (vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize); - size = 0; - VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr); - } - } - else { // filling write - if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P - if((u32)(((size / ft.gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num) - DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft.gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl); - - DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, v->cmd & 0xf, vif->tag.addr); - while (vifRegs->num > 0) { - if (vif->cl == vifRegs->cycle.wl) { - vif->cl = 0; - } - // unpack one qword - if (vif->cl < vifRegs->cycle.cl) { - if(size < ft.gsize) { DevCon.WriteLn("Out of Filling write data!"); break; } - func(dest, (u32*)cdata); - cdata += ft.gsize; - size -= ft.gsize; - vif->cl++; - vifRegs->num--; - if (vif->cl == vifRegs->cycle.wl) { - vif->cl = 0; - } - } - else { - func(dest, (u32*)cdata); - v->addr += 16; - vifRegs->num--; - vif->cl++; - } - dest += 4; - if (vifRegs->num == 0) break; - } - } -} +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2009 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +// Old Vif Unpack Code +// Only here for testing/reference +template void VIFunpack(u32 *data, vifCode *v, u32 size) { + //if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data); + VURegs * VU; + u8 *cdata = (u8*)data; + u32 tempsize = 0; + const u32 memlimit = (VIFdmanum == 0) ? 0x1000 : 0x4000; + + if (VIFdmanum == 0) { + VU = &VU0; + vifRegs = vif0Regs; + vif = &vif0; + } + else { + VU = &VU1; + vifRegs = vif1Regs; + vif = &vif1; + } + + u32 *dest = (u32*)(VU->Mem + v->addr); + + const VIFUnpackFuncTable& ft( VIFfuncTable[ v->cmd & 0x1f ] ); + UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS; + + size <<= 2; + + if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write + if (v->addr >= memlimit) { + DevCon.Warning("Overflown at the start"); + v->addr &= (memlimit - 1); + dest = (u32*)(VU->Mem + v->addr); + } + + size = std::min(size, vifRegs->num * ft.gsize); //size will always be the same or smaller + + tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) * + (vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16); + + //Sanity Check (memory overflow) + if (tempsize > memlimit) { + if (((vifRegs->cycle.cl != vifRegs->cycle.wl) && + ((memlimit + (vifRegs->cycle.cl - vifRegs->cycle.wl) * 16) == tempsize))) { + //It's a red herring, so ignore it! SSE unpacks will be much quicker. + DevCon.WriteLn("what!!!!!!!!!"); + //tempsize = 0; + tempsize = size; + size = 0; + } + else { + DevCon.Warning("VIF%x Unpack ending %x > %x", VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000); + tempsize = size; + size = 0; + } + } + else { + tempsize = size; + size = 0; + } + if (tempsize) { + int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4; + size = 0; + int addrstart = v->addr; + //if((tempsize >> 2) != v->size) DevCon.Warning("split when size != tagsize"); + + VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr); + + while ((tempsize >= ft.gsize) && (vifRegs->num > 0)) { + if(v->addr >= memlimit) { + DevCon.Warning("Mem limit overflow"); + v->addr &= (memlimit - 1); + dest = (u32*)(VU->Mem + v->addr); + } + + func(dest, (u32*)cdata); + cdata += ft.gsize; + tempsize -= ft.gsize; + + vifRegs->num--; + vif->cl++; + + if (vif->cl == vifRegs->cycle.wl) { + dest += incdest; + v->addr +=(incdest * 4); + vif->cl = 0; + } + else { + dest += 4; + v->addr += 16; + } + } + if (v->addr >= memlimit) { + v->addr &=(memlimit - 1); + dest = (u32*)(VU->Mem + v->addr); + } + v->addr = addrstart; + if(tempsize > 0) size = tempsize; + } + + if (size >= ft.dsize && vifRegs->num > 0) { //Else write what we do have + DevCon.Warning("huh!!!!!!!!!!!!!!!!!!!!!!"); + VIF_LOG("warning, end with size = %d", size); + // unpack one qword + //v->addr += (size / ft.dsize) * 4; + (vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize); + size = 0; + VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr); + } + } + else { // filling write + if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P + if((u32)(((size / ft.gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num) + DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft.gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl); + + DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, v->cmd & 0xf, vif->tag.addr); + while (vifRegs->num > 0) { + if (vif->cl == vifRegs->cycle.wl) { + vif->cl = 0; + } + // unpack one qword + if (vif->cl < vifRegs->cycle.cl) { + if(size < ft.gsize) { DevCon.WriteLn("Out of Filling write data!"); break; } + func(dest, (u32*)cdata); + cdata += ft.gsize; + size -= ft.gsize; + vif->cl++; + vifRegs->num--; + if (vif->cl == vifRegs->cycle.wl) { + vif->cl = 0; + } + } + else { + func(dest, (u32*)cdata); + v->addr += 16; + vifRegs->num--; + vif->cl++; + } + dest += 4; + if (vifRegs->num == 0) break; + } + } +} diff --git a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj index 7f7eec15eb..d505395ccb 100644 --- a/pcsx2/windows/VCprojects/pcsx2_2008.vcproj +++ b/pcsx2/windows/VCprojects/pcsx2_2008.vcproj @@ -820,52 +820,56 @@ RelativePath="..\..\VifDma.h" > - - - - - - - - + + + + + + + + + + diff --git a/pcsx2/x86/newVif.h b/pcsx2/x86/newVif.h index 06635804f1..5b7bd1f5a6 100644 --- a/pcsx2/x86/newVif.h +++ b/pcsx2/x86/newVif.h @@ -101,4 +101,3 @@ extern __aligned16 u32 nVifMask[3][4][4]; // [MaskNumber][CycleNumber][Vector] static const bool useOldUnpack = 0; // Use code in newVif_OldUnpack.inl static const bool newVifDynaRec = 1; // Use code in newVif_Dynarec.inl - diff --git a/pcsx2/x86/VifUnpackSSE_Dynarec.cpp b/pcsx2/x86/newVif_Dynarec.cpp similarity index 96% rename from pcsx2/x86/VifUnpackSSE_Dynarec.cpp rename to pcsx2/x86/newVif_Dynarec.cpp index d27b153413..7ff3628588 100644 --- a/pcsx2/x86/VifUnpackSSE_Dynarec.cpp +++ b/pcsx2/x86/newVif_Dynarec.cpp @@ -1,267 +1,267 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2009 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -// newVif Dynarec - Dynamically Recompiles Vif 'unpack' Packets -// authors: cottonvibes(@gmail.com) -// Jake.Stine (@gmail.com) - -#include "PrecompiledHeader.h" -#include "VifUnpackSSE.h" - -static __aligned16 nVifBlock _vBlock = {0}; -static __pagealigned u8 nVifMemCmp[__pagesize]; - -void dVifInit(int idx) { - nVif[idx].numBlocks = 0; - nVif[idx].vifCache = new BlockBuffer(_1mb*4); // 4mb Rec Cache - nVif[idx].vifBlocks = new HashBucket<_tParams>(); - nVif[idx].recPtr = nVif[idx].vifCache->getBlock(); - nVif[idx].recEnd = &nVif[idx].recPtr[nVif[idx].vifCache->getSize()-(_1mb/4)]; // .25mb Safe Zone -} - -void dVifClose(int idx) { - nVif[idx].numBlocks = 0; - safe_delete(nVif[idx].vifCache); - safe_delete(nVif[idx].vifBlocks); -} - -VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_) - : v(vif_) - , vB(vifBlock_) -{ - isFill = (vB.cl < vB.wl); - usn = (vB.upkType>>5) & 1; - doMask = (vB.upkType>>4) & 1; - doMode = vB.mode & 3; -} - -#define makeMergeMask(x) { \ - x = ((x&0x40)>>6) | ((x&0x10)>>3) | (x&4) | ((x&1)<<3); \ -} - -_f void VifUnpackSSE_Dynarec::SetMasks(int cS) const { - u32 m0 = vB.mask; - u32 m1 = m0 & 0xaaaaaaaa; - u32 m2 =(~m1>>1) & m0; - u32 m3 = (m1>>1) & ~m0; - u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0; - u32* col = (v.idx) ? g_vifmask.Col1 : g_vifmask.Col0; - if((m2&&doMask) || doMode) { xMOVAPS(xmmRow, ptr32[row]); } - if (m3&&doMask) { - xMOVAPS(xmmCol0, ptr32[col]); - if ((cS>=2) && (m3&0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1); - if ((cS>=3) && (m3&0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2); - if ((cS>=4) && (m3&0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3); - if ((cS>=1) && (m3&0x000000ff)) xPSHUF.D(xmmCol0, xmmCol0, _v0); - } - //if (doMask||doMode) loadRowCol((nVifStruct&)v); -} - -void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const { - pxAssumeDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking."); - int t = regX.Id ? 0 : 1; // Get Temp Reg - int cc = aMin(vCL, 3); - u32 m0 = (vB.mask >> (cc * 8)) & 0xff; - u32 m1 = m0 & 0xaa; - u32 m2 =(~m1>>1) & m0; - u32 m3 = (m1>>1) & ~m0; - u32 m4 = (m1>>1) & m0; - makeMergeMask(m2); - makeMergeMask(m3); - makeMergeMask(m4); - if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); } // Load Write Protect - if (doMask&&m2) { mergeVectors(regX.Id, xmmRow.Id, t, m2); } // Merge Row - if (doMask&&m3) { mergeVectors(regX.Id, xmmCol0.Id+cc, t, m3); } // Merge Col - if (doMask&&m4) { mergeVectors(regX.Id, xmmTemp.Id, t, m4); } // Merge Write Protect - if (doMode) { - u32 m5 = (~m1>>1) & ~m0; - if (!doMask) m5 = 0xf; - else makeMergeMask(m5); - if (m5 < 0xf) { - xPXOR(xmmTemp, xmmTemp); - mergeVectors(xmmTemp.Id, xmmRow.Id, t, m5); - xPADD.D(regX, xmmTemp); - if (doMode==2) mergeVectors(xmmRow.Id, regX.Id, t, m5); - } - else if (m5 == 0xf) { - xPADD.D(regX, xmmRow); - if (doMode==2) xMOVAPS(xmmRow, regX); - } - } - xMOVAPS(ptr32[dstIndirect], regX); -} - -void VifUnpackSSE_Dynarec::writeBackRow() const { - u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0; - xMOVAPS(ptr32[row], xmmRow); - DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]"); - // ToDo: Do we need to write back to vifregs.rX too!? :/ -} - -static void ShiftDisplacementWindow( xAddressInfo& addr, const xRegister32& modReg ) -{ - // Shifts the displacement factor of a given indirect address, so that the address - // remains in the optimal 0xf0 range (which allows for byte-form displacements when - // generating instructions). - - int addImm = 0; - while( addr.Displacement >= 0x80 ) - { - addImm += 0xf0; - addr -= 0xf0; - } - if(addImm) xADD(modReg, addImm); -} -static bool UsesTwoRegs[] = -{ - true, true, true, true, - false, false, false, false, - false, false, false, false, - false, false, false, true, - -}; - -void VifUnpackSSE_Dynarec::CompileRoutine() { - const int upkNum = v.vif->cmd & 0xf; - const u8& vift = nVifT[upkNum]; - const int cycleSize = isFill ? vB.cl : vB.wl; - const int blockSize = isFill ? vB.wl : vB.cl; - const int skipSize = blockSize - cycleSize; - - int vNum = v.vifRegs->num; - vCL = v.vif->cl; - doMode = upkNum == 0xf ? 0 : doMode; - - SetMasks(cycleSize); - - while (vNum) { - - ShiftDisplacementWindow( srcIndirect, edx ); - ShiftDisplacementWindow( dstIndirect, ecx ); - - if (vCL < cycleSize) { - xUnpack(upkNum); - xMovDest(); - - dstIndirect += 16; - srcIndirect += vift; - - if( IsUnmaskedOp() ) { - ++destReg; - ++workReg; - } - - vNum--; - if (++vCL == blockSize) vCL = 0; - } - else if (isFill) { - DevCon.WriteLn("filling mode!"); - VifUnpackSSE_Dynarec fill( VifUnpackSSE_Dynarec::FillingWrite( *this ) ); - fill.xUnpack(upkNum); - fill.xMovDest(); - - dstIndirect += 16; - vNum--; - if (++vCL == blockSize) vCL = 0; - } - else { - dstIndirect += (16 * skipSize); - vCL = 0; - } - } - - if (doMode==2) writeBackRow(); - xMOV(ptr32[&v.vif->cl], vCL); - xMOV(ptr32[&v.vifRegs->num], vNum); - xRET(); -} - -static _f u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isFill) { - u8* endPtr; // Check if we need to wrap around VU memory - u8* ptr = (u8*)(v.VU->Mem + (v.vif->tag.addr & v.vuMemLimit)); - if (!isFill) { // Account for skip-cycles - int skipSize = cl - wl; - int blocks = _vBlock.num / wl; - int skips = (blocks * skipSize + _vBlock.num) * 16; - endPtr = ptr + skips; - } - else endPtr = ptr + (_vBlock.num * 16); - if ( endPtr > v.vuMemEnd ) { - DevCon.WriteLn("nVif - VU Mem Ptr Overflow; falling back to interpreter."); - ptr = NULL; // Fall Back to Interpreters which have wrap-around logic - } - return ptr; -} - -static _f void dVifRecLimit(int idx) { - if (nVif[idx].recPtr > nVif[idx].recEnd) { - DevCon.WriteLn("nVif Rec - Out of Rec Cache! [%x > %x]", nVif[idx].recPtr, nVif[idx].recEnd); - nVif[idx].vifBlocks->clear(); - nVif[idx].recPtr = nVif[idx].vifCache->getBlock(); - } -} - -_f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) { - - const nVifStruct& v = nVif[idx]; - const u8 upkType = v.vif->cmd & 0x1f | ((!!v.vif->usn) << 5); - const int doMask = v.vif->cmd & 0x10; - const int cycle_cl = v.vifRegs->cycle.cl; - const int cycle_wl = v.vifRegs->cycle.wl; - const int cycleSize = isFill ? cycle_cl : cycle_wl; - const int blockSize = isFill ? cycle_wl : cycle_cl; - - if (v.vif->cl >= blockSize) v.vif->cl = 0; - - _vBlock.upkType = upkType; - _vBlock.num = (u8&)v.vifRegs->num; - _vBlock.mode = (u8&)v.vifRegs->mode; - _vBlock.scl = v.vif->cl; - _vBlock.cl = cycle_cl; - _vBlock.wl = cycle_wl; - - // Zero out the mask parameter if it's unused -- games leave random junk - // values here which cause false recblock cache misses. - _vBlock.mask = doMask ? v.vifRegs->mask : 0; - - if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) { - if (u8* dest = dVifsetVUptr(v, cycle_cl, cycle_wl, isFill)) { - //DevCon.WriteLn("Running Recompiled Block!"); - ((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data); - } - else { - //DevCon.WriteLn("Running Interpreter Block"); - _nVifUnpack(idx, data, size, isFill); - } - return; - } - DevCon.WriteLn("nVif%d: Recompiled Block! [%d]", idx, nVif[idx].numBlocks++); - //DevCon.WriteLn(L"[num=% 3d][upkType=0x%02x][scl=%d][cl=%d][wl=%d][mode=%d][m=%d][mask=%s]", - // _vBlock.num, _vBlock.upkType, _vBlock.scl, _vBlock.cl, _vBlock.wl, _vBlock.mode, - // doMask >> 4, doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored" - //); - - xSetPtr(v.recPtr); - _vBlock.startPtr = (uptr)xGetAlignedCallTarget(); - v.vifBlocks->add(_vBlock); - VifUnpackSSE_Dynarec( v, _vBlock ).CompileRoutine(); - nVif[idx].recPtr = xGetPtr(); - - dVifRecLimit(idx); - - // Run the block we just compiled. Various conditions may force us to still use - // the interpreter unpacker though, so a recursive call is the safest way here... - dVifUnpack(idx, data, size, isFill); -} +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2009 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +// newVif Dynarec - Dynamically Recompiles Vif 'unpack' Packets +// authors: cottonvibes(@gmail.com) +// Jake.Stine (@gmail.com) + +#include "PrecompiledHeader.h" +#include "newVif_UnpackSSE.h" + +static __aligned16 nVifBlock _vBlock = {0}; +static __pagealigned u8 nVifMemCmp[__pagesize]; + +void dVifInit(int idx) { + nVif[idx].numBlocks = 0; + nVif[idx].vifCache = new BlockBuffer(_1mb*4); // 4mb Rec Cache + nVif[idx].vifBlocks = new HashBucket<_tParams>(); + nVif[idx].recPtr = nVif[idx].vifCache->getBlock(); + nVif[idx].recEnd = &nVif[idx].recPtr[nVif[idx].vifCache->getSize()-(_1mb/4)]; // .25mb Safe Zone +} + +void dVifClose(int idx) { + nVif[idx].numBlocks = 0; + safe_delete(nVif[idx].vifCache); + safe_delete(nVif[idx].vifBlocks); +} + +VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_) + : v(vif_) + , vB(vifBlock_) +{ + isFill = (vB.cl < vB.wl); + usn = (vB.upkType>>5) & 1; + doMask = (vB.upkType>>4) & 1; + doMode = vB.mode & 3; +} + +#define makeMergeMask(x) { \ + x = ((x&0x40)>>6) | ((x&0x10)>>3) | (x&4) | ((x&1)<<3); \ +} + +_f void VifUnpackSSE_Dynarec::SetMasks(int cS) const { + u32 m0 = vB.mask; + u32 m1 = m0 & 0xaaaaaaaa; + u32 m2 =(~m1>>1) & m0; + u32 m3 = (m1>>1) & ~m0; + u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0; + u32* col = (v.idx) ? g_vifmask.Col1 : g_vifmask.Col0; + if((m2&&doMask) || doMode) { xMOVAPS(xmmRow, ptr32[row]); } + if (m3&&doMask) { + xMOVAPS(xmmCol0, ptr32[col]); + if ((cS>=2) && (m3&0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1); + if ((cS>=3) && (m3&0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2); + if ((cS>=4) && (m3&0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3); + if ((cS>=1) && (m3&0x000000ff)) xPSHUF.D(xmmCol0, xmmCol0, _v0); + } + //if (doMask||doMode) loadRowCol((nVifStruct&)v); +} + +void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const { + pxAssumeDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking."); + int t = regX.Id ? 0 : 1; // Get Temp Reg + int cc = aMin(vCL, 3); + u32 m0 = (vB.mask >> (cc * 8)) & 0xff; + u32 m1 = m0 & 0xaa; + u32 m2 =(~m1>>1) & m0; + u32 m3 = (m1>>1) & ~m0; + u32 m4 = (m1>>1) & m0; + makeMergeMask(m2); + makeMergeMask(m3); + makeMergeMask(m4); + if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); } // Load Write Protect + if (doMask&&m2) { mergeVectors(regX.Id, xmmRow.Id, t, m2); } // Merge Row + if (doMask&&m3) { mergeVectors(regX.Id, xmmCol0.Id+cc, t, m3); } // Merge Col + if (doMask&&m4) { mergeVectors(regX.Id, xmmTemp.Id, t, m4); } // Merge Write Protect + if (doMode) { + u32 m5 = (~m1>>1) & ~m0; + if (!doMask) m5 = 0xf; + else makeMergeMask(m5); + if (m5 < 0xf) { + xPXOR(xmmTemp, xmmTemp); + mergeVectors(xmmTemp.Id, xmmRow.Id, t, m5); + xPADD.D(regX, xmmTemp); + if (doMode==2) mergeVectors(xmmRow.Id, regX.Id, t, m5); + } + else if (m5 == 0xf) { + xPADD.D(regX, xmmRow); + if (doMode==2) xMOVAPS(xmmRow, regX); + } + } + xMOVAPS(ptr32[dstIndirect], regX); +} + +void VifUnpackSSE_Dynarec::writeBackRow() const { + u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0; + xMOVAPS(ptr32[row], xmmRow); + DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]"); + // ToDo: Do we need to write back to vifregs.rX too!? :/ +} + +static void ShiftDisplacementWindow( xAddressInfo& addr, const xRegister32& modReg ) +{ + // Shifts the displacement factor of a given indirect address, so that the address + // remains in the optimal 0xf0 range (which allows for byte-form displacements when + // generating instructions). + + int addImm = 0; + while( addr.Displacement >= 0x80 ) + { + addImm += 0xf0; + addr -= 0xf0; + } + if(addImm) xADD(modReg, addImm); +} +static bool UsesTwoRegs[] = +{ + true, true, true, true, + false, false, false, false, + false, false, false, false, + false, false, false, true, + +}; + +void VifUnpackSSE_Dynarec::CompileRoutine() { + const int upkNum = v.vif->cmd & 0xf; + const u8& vift = nVifT[upkNum]; + const int cycleSize = isFill ? vB.cl : vB.wl; + const int blockSize = isFill ? vB.wl : vB.cl; + const int skipSize = blockSize - cycleSize; + + int vNum = v.vifRegs->num; + vCL = v.vif->cl; + doMode = upkNum == 0xf ? 0 : doMode; + + SetMasks(cycleSize); + + while (vNum) { + + ShiftDisplacementWindow( srcIndirect, edx ); + ShiftDisplacementWindow( dstIndirect, ecx ); + + if (vCL < cycleSize) { + xUnpack(upkNum); + xMovDest(); + + dstIndirect += 16; + srcIndirect += vift; + + if( IsUnmaskedOp() ) { + ++destReg; + ++workReg; + } + + vNum--; + if (++vCL == blockSize) vCL = 0; + } + else if (isFill) { + DevCon.WriteLn("filling mode!"); + VifUnpackSSE_Dynarec fill( VifUnpackSSE_Dynarec::FillingWrite( *this ) ); + fill.xUnpack(upkNum); + fill.xMovDest(); + + dstIndirect += 16; + vNum--; + if (++vCL == blockSize) vCL = 0; + } + else { + dstIndirect += (16 * skipSize); + vCL = 0; + } + } + + if (doMode==2) writeBackRow(); + xMOV(ptr32[&v.vif->cl], vCL); + xMOV(ptr32[&v.vifRegs->num], vNum); + xRET(); +} + +static _f u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isFill) { + u8* endPtr; // Check if we need to wrap around VU memory + u8* ptr = (u8*)(v.VU->Mem + (v.vif->tag.addr & v.vuMemLimit)); + if (!isFill) { // Account for skip-cycles + int skipSize = cl - wl; + int blocks = _vBlock.num / wl; + int skips = (blocks * skipSize + _vBlock.num) * 16; + endPtr = ptr + skips; + } + else endPtr = ptr + (_vBlock.num * 16); + if ( endPtr > v.vuMemEnd ) { + DevCon.WriteLn("nVif - VU Mem Ptr Overflow; falling back to interpreter."); + ptr = NULL; // Fall Back to Interpreters which have wrap-around logic + } + return ptr; +} + +static _f void dVifRecLimit(int idx) { + if (nVif[idx].recPtr > nVif[idx].recEnd) { + DevCon.WriteLn("nVif Rec - Out of Rec Cache! [%x > %x]", nVif[idx].recPtr, nVif[idx].recEnd); + nVif[idx].vifBlocks->clear(); + nVif[idx].recPtr = nVif[idx].vifCache->getBlock(); + } +} + +_f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) { + + const nVifStruct& v = nVif[idx]; + const u8 upkType = v.vif->cmd & 0x1f | ((!!v.vif->usn) << 5); + const int doMask = v.vif->cmd & 0x10; + const int cycle_cl = v.vifRegs->cycle.cl; + const int cycle_wl = v.vifRegs->cycle.wl; + const int cycleSize = isFill ? cycle_cl : cycle_wl; + const int blockSize = isFill ? cycle_wl : cycle_cl; + + if (v.vif->cl >= blockSize) v.vif->cl = 0; + + _vBlock.upkType = upkType; + _vBlock.num = (u8&)v.vifRegs->num; + _vBlock.mode = (u8&)v.vifRegs->mode; + _vBlock.scl = v.vif->cl; + _vBlock.cl = cycle_cl; + _vBlock.wl = cycle_wl; + + // Zero out the mask parameter if it's unused -- games leave random junk + // values here which cause false recblock cache misses. + _vBlock.mask = doMask ? v.vifRegs->mask : 0; + + if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) { + if (u8* dest = dVifsetVUptr(v, cycle_cl, cycle_wl, isFill)) { + //DevCon.WriteLn("Running Recompiled Block!"); + ((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data); + } + else { + //DevCon.WriteLn("Running Interpreter Block"); + _nVifUnpack(idx, data, size, isFill); + } + return; + } + DevCon.WriteLn("nVif%d: Recompiled Block! [%d]", idx, nVif[idx].numBlocks++); + //DevCon.WriteLn(L"[num=% 3d][upkType=0x%02x][scl=%d][cl=%d][wl=%d][mode=%d][m=%d][mask=%s]", + // _vBlock.num, _vBlock.upkType, _vBlock.scl, _vBlock.cl, _vBlock.wl, _vBlock.mode, + // doMask >> 4, doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored" + //); + + xSetPtr(v.recPtr); + _vBlock.startPtr = (uptr)xGetAlignedCallTarget(); + v.vifBlocks->add(_vBlock); + VifUnpackSSE_Dynarec( v, _vBlock ).CompileRoutine(); + nVif[idx].recPtr = xGetPtr(); + + dVifRecLimit(idx); + + // Run the block we just compiled. Various conditions may force us to still use + // the interpreter unpacker though, so a recursive call is the safest way here... + dVifUnpack(idx, data, size, isFill); +} diff --git a/pcsx2/x86/newVif_Unpack.cpp b/pcsx2/x86/newVif_Unpack.cpp index f4e4d09143..64e81fbd10 100644 --- a/pcsx2/x86/newVif_Unpack.cpp +++ b/pcsx2/x86/newVif_Unpack.cpp @@ -19,9 +19,9 @@ #include "PrecompiledHeader.h" #include "Common.h" -#include "VifDma_internal.h" +#include "VifDma.h" #include "newVif.h" -#include "newVif_OldUnpack.inl" +#include "Vif_Unpack.inl" __aligned16 nVifStruct nVif[2]; __aligned16 nVifCall nVifUpk[(2*2*16) *4]; // ([USN][Masking][Unpack Type]) [curCycle] diff --git a/pcsx2/x86/VifUnpackSSE.cpp b/pcsx2/x86/newVif_UnpackSSE.cpp similarity index 96% rename from pcsx2/x86/VifUnpackSSE.cpp rename to pcsx2/x86/newVif_UnpackSSE.cpp index 075a477868..9ddd912e37 100644 --- a/pcsx2/x86/VifUnpackSSE.cpp +++ b/pcsx2/x86/newVif_UnpackSSE.cpp @@ -1,310 +1,310 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2009 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#include "PrecompiledHeader.h" -#include "VifUnpackSSE.h" - -#define xMOV8(regX, loc) xMOVSSZX(regX, loc) -#define xMOV16(regX, loc) xMOVSSZX(regX, loc) -#define xMOV32(regX, loc) xMOVSSZX(regX, loc) -#define xMOV64(regX, loc) xMOVUPS(regX, loc) -#define xMOV128(regX, loc) xMOVUPS(regX, loc) - -static __pagealigned u8 nVifUpkExec[__pagesize*4]; - -// Merges xmm vectors without modifying source reg -void mergeVectors(int dest, int src, int temp, int xyzw) { - if (x86caps.hasStreamingSIMD4Extensions || (xyzw==15) - || (xyzw==12) || (xyzw==11) || (xyzw==8) || (xyzw==3)) { - mVUmergeRegs(dest, src, xyzw); - } - else { - SSE_MOVAPS_XMM_to_XMM(temp, src); - mVUmergeRegs(dest, temp, xyzw); - } -} - -// Loads Row/Col Data from vifRegs instead of g_vifmask -// Useful for testing vifReg and g_vifmask inconsistency. -void loadRowCol(nVifStruct& v) { - xMOVAPS(xmm0, ptr32[&v.vifRegs->r0]); - xMOVAPS(xmm1, ptr32[&v.vifRegs->r1]); - xMOVAPS(xmm2, ptr32[&v.vifRegs->r2]); - xMOVAPS(xmm6, ptr32[&v.vifRegs->r3]); - xPSHUF.D(xmm0, xmm0, _v0); - xPSHUF.D(xmm1, xmm1, _v0); - xPSHUF.D(xmm2, xmm2, _v0); - xPSHUF.D(xmm6, xmm6, _v0); - mVUmergeRegs(XMM6, XMM0, 8); - mVUmergeRegs(XMM6, XMM1, 4); - mVUmergeRegs(XMM6, XMM2, 2); - xMOVAPS(xmm2, ptr32[&v.vifRegs->c0]); - xMOVAPS(xmm3, ptr32[&v.vifRegs->c1]); - xMOVAPS(xmm4, ptr32[&v.vifRegs->c2]); - xMOVAPS(xmm5, ptr32[&v.vifRegs->c3]); - xPSHUF.D(xmm2, xmm2, _v0); - xPSHUF.D(xmm3, xmm3, _v0); - xPSHUF.D(xmm4, xmm4, _v0); - xPSHUF.D(xmm5, xmm5, _v0); -} - -// ===================================================================================================== -// VifUnpackSSE_Base Section -// ===================================================================================================== -VifUnpackSSE_Base::VifUnpackSSE_Base() - : dstIndirect(ecx) // parameter 1 of __fastcall - , srcIndirect(edx) // parameter 2 of __fastcall - , workReg( xmm1 ) - , destReg( xmm0 ) -{ -} - -void VifUnpackSSE_Base::xMovDest() const { - if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); } - else { doMaskWrite(destReg); } -} - -void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const { - if (usn) { xPSRL.D(regX, n); } - else { xPSRA.D(regX, n); } -} - -void VifUnpackSSE_Base::xPMOVXX8(const xRegisterSSE& regX) const { - if (usn) xPMOVZX.BD(regX, ptr32[srcIndirect]); - else xPMOVSX.BD(regX, ptr32[srcIndirect]); -} - -void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const { - if (usn) xPMOVZX.WD(regX, ptr64[srcIndirect]); - else xPMOVSX.WD(regX, ptr64[srcIndirect]); -} - -void VifUnpackSSE_Base::xUPK_S_32() const { - xMOV32 (workReg, ptr32[srcIndirect]); - xPSHUF.D (destReg, workReg, _v0); -} - -void VifUnpackSSE_Base::xUPK_S_16() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX16 (workReg); -} -else { - xMOV16 (workReg, ptr32[srcIndirect]); - xPUNPCK.LWD(workReg, workReg); - xShiftR (workReg, 16); -} - xPSHUF.D (destReg, workReg, _v0); -} - -void VifUnpackSSE_Base::xUPK_S_8() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX8 (workReg); -} -else { - xMOV8 (workReg, ptr32[srcIndirect]); - xPUNPCK.LBW(workReg, workReg); - xPUNPCK.LWD(workReg, workReg); - xShiftR (workReg, 24); -} - xPSHUF.D (destReg, workReg, _v0); -} - -void VifUnpackSSE_Base::xUPK_V2_32() const { - xMOV64 (destReg, ptr32[srcIndirect]); -} - -void VifUnpackSSE_Base::xUPK_V2_16() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX16 (destReg); -} -else { - xMOV32 (destReg, ptr32[srcIndirect]); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 16); -} -} - -void VifUnpackSSE_Base::xUPK_V2_8() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX8 (destReg); -} -else { - xMOV16 (destReg, ptr32[srcIndirect]); - xPUNPCK.LBW(destReg, destReg); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 24); -} -} - -void VifUnpackSSE_Base::xUPK_V3_32() const { - xMOV128 (destReg, ptr32[srcIndirect]); -} - -void VifUnpackSSE_Base::xUPK_V3_16() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX16 (destReg); -} -else { - xMOV64 (destReg, ptr32[srcIndirect]); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 16); -} -} - -void VifUnpackSSE_Base::xUPK_V3_8() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX8 (destReg); -} -else { - xMOV32 (destReg, ptr32[srcIndirect]); - xPUNPCK.LBW(destReg, destReg); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 24); -} -} - -void VifUnpackSSE_Base::xUPK_V4_32() const { - xMOV128 (destReg, ptr32[srcIndirect]); -} - -void VifUnpackSSE_Base::xUPK_V4_16() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX16 (destReg); -} -else { - xMOV64 (destReg, ptr32[srcIndirect]); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 16); -} -} - -void VifUnpackSSE_Base::xUPK_V4_8() const { -if (x86caps.hasStreamingSIMD4Extensions) { - xPMOVXX8 (destReg); -} -else { - xMOV32 (destReg, ptr32[srcIndirect]); - xPUNPCK.LBW(destReg, destReg); - xPUNPCK.LWD(destReg, destReg); - xShiftR (destReg, 24); -} -} - -void VifUnpackSSE_Base::xUPK_V4_5() const { - xMOV16 (workReg, ptr32[srcIndirect]); - xPSHUF.D (workReg, workReg, _v0); - xPSLL.D (workReg, 3); // ABG|R5.000 - xMOVAPS (destReg, workReg); // x|x|x|R - xPSRL.D (workReg, 8); // ABG - xPSLL.D (workReg, 3); // AB|G5.000 - mVUmergeRegs(destReg.Id, workReg.Id, 0x4); // x|x|G|R - xPSRL.D (workReg, 8); // AB - xPSLL.D (workReg, 3); // A|B5.000 - mVUmergeRegs(destReg.Id, workReg.Id, 0x2); // x|B|G|R - xPSRL.D (workReg, 8); // A - xPSLL.D (workReg, 7); // A.0000000 - mVUmergeRegs(destReg.Id, workReg.Id, 0x1); // A|B|G|R - xPSLL.D (destReg, 24); // can optimize to - xPSRL.D (destReg, 24); // single AND... -} - -void VifUnpackSSE_Base::xUnpack( int upknum ) const -{ - switch( upknum ) - { - case 0: xUPK_S_32(); break; - case 1: xUPK_S_16(); break; - case 2: xUPK_S_8(); break; - - case 4: xUPK_V2_32(); break; - case 5: xUPK_V2_16(); break; - case 6: xUPK_V2_8(); break; - - case 8: xUPK_V3_32(); break; - case 9: xUPK_V3_16(); break; - case 10: xUPK_V3_8(); break; - - case 12: xUPK_V4_32(); break; - case 13: xUPK_V4_16(); break; - case 14: xUPK_V4_8(); break; - case 15: xUPK_V4_5(); break; - - case 3: - case 7: - case 11: - pxFailRel( wxsFormat( L"Vpu/Vif - Invalid Unpack! [%d]", upknum ) ); - break; - } -} - -// ===================================================================================================== -// VifUnpackSSE_Simple -// ===================================================================================================== - -VifUnpackSSE_Simple::VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_) -{ - curCycle = curCycle_; - usn = usn_; - doMask = domask_; -} - -void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const { - xMOVAPS(xmm7, ptr[dstIndirect]); - int offX = aMin(curCycle, 3); - xPAND(regX, ptr32[nVifMask[0][offX]]); - xPAND(xmm7, ptr32[nVifMask[1][offX]]); - xPOR (regX, ptr32[nVifMask[2][offX]]); - xPOR (regX, xmm7); - xMOVAPS(ptr[dstIndirect], regX); -} - -// ecx = dest, edx = src -static void nVifGen(int usn, int mask, int curCycle) { - - int usnpart = usn*2*16; - int maskpart = mask*16; - - VifUnpackSSE_Simple vpugen( !!usn, !!mask, curCycle ); - - for( int i=0; i<16; ++i ) - { - nVifCall& ucall( nVifUpk[((usnpart+maskpart+i) * 4) + curCycle] ); - ucall = NULL; - if( nVifT[i] == 0 ) continue; - - ucall = (nVifCall)xGetAlignedCallTarget(); - vpugen.xUnpack(i); - vpugen.xMovDest(); - xRET(); - - pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) ); - } -} - -void VifUnpackSSE_Init() -{ - HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false); - memset8<0xcc>( nVifUpkExec ); - - xSetPtr( nVifUpkExec ); - - for (int a = 0; a < 2; a++) { - for (int b = 0; b < 2; b++) { - for (int c = 0; c < 4; c++) { - nVifGen(a, b, c); - }}} - - HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true); -} +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2009 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#include "PrecompiledHeader.h" +#include "newVif_UnpackSSE.h" + +#define xMOV8(regX, loc) xMOVSSZX(regX, loc) +#define xMOV16(regX, loc) xMOVSSZX(regX, loc) +#define xMOV32(regX, loc) xMOVSSZX(regX, loc) +#define xMOV64(regX, loc) xMOVUPS(regX, loc) +#define xMOV128(regX, loc) xMOVUPS(regX, loc) + +static __pagealigned u8 nVifUpkExec[__pagesize*4]; + +// Merges xmm vectors without modifying source reg +void mergeVectors(int dest, int src, int temp, int xyzw) { + if (x86caps.hasStreamingSIMD4Extensions || (xyzw==15) + || (xyzw==12) || (xyzw==11) || (xyzw==8) || (xyzw==3)) { + mVUmergeRegs(dest, src, xyzw); + } + else { + SSE_MOVAPS_XMM_to_XMM(temp, src); + mVUmergeRegs(dest, temp, xyzw); + } +} + +// Loads Row/Col Data from vifRegs instead of g_vifmask +// Useful for testing vifReg and g_vifmask inconsistency. +void loadRowCol(nVifStruct& v) { + xMOVAPS(xmm0, ptr32[&v.vifRegs->r0]); + xMOVAPS(xmm1, ptr32[&v.vifRegs->r1]); + xMOVAPS(xmm2, ptr32[&v.vifRegs->r2]); + xMOVAPS(xmm6, ptr32[&v.vifRegs->r3]); + xPSHUF.D(xmm0, xmm0, _v0); + xPSHUF.D(xmm1, xmm1, _v0); + xPSHUF.D(xmm2, xmm2, _v0); + xPSHUF.D(xmm6, xmm6, _v0); + mVUmergeRegs(XMM6, XMM0, 8); + mVUmergeRegs(XMM6, XMM1, 4); + mVUmergeRegs(XMM6, XMM2, 2); + xMOVAPS(xmm2, ptr32[&v.vifRegs->c0]); + xMOVAPS(xmm3, ptr32[&v.vifRegs->c1]); + xMOVAPS(xmm4, ptr32[&v.vifRegs->c2]); + xMOVAPS(xmm5, ptr32[&v.vifRegs->c3]); + xPSHUF.D(xmm2, xmm2, _v0); + xPSHUF.D(xmm3, xmm3, _v0); + xPSHUF.D(xmm4, xmm4, _v0); + xPSHUF.D(xmm5, xmm5, _v0); +} + +// ===================================================================================================== +// VifUnpackSSE_Base Section +// ===================================================================================================== +VifUnpackSSE_Base::VifUnpackSSE_Base() + : dstIndirect(ecx) // parameter 1 of __fastcall + , srcIndirect(edx) // parameter 2 of __fastcall + , workReg( xmm1 ) + , destReg( xmm0 ) +{ +} + +void VifUnpackSSE_Base::xMovDest() const { + if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); } + else { doMaskWrite(destReg); } +} + +void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const { + if (usn) { xPSRL.D(regX, n); } + else { xPSRA.D(regX, n); } +} + +void VifUnpackSSE_Base::xPMOVXX8(const xRegisterSSE& regX) const { + if (usn) xPMOVZX.BD(regX, ptr32[srcIndirect]); + else xPMOVSX.BD(regX, ptr32[srcIndirect]); +} + +void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const { + if (usn) xPMOVZX.WD(regX, ptr64[srcIndirect]); + else xPMOVSX.WD(regX, ptr64[srcIndirect]); +} + +void VifUnpackSSE_Base::xUPK_S_32() const { + xMOV32 (workReg, ptr32[srcIndirect]); + xPSHUF.D (destReg, workReg, _v0); +} + +void VifUnpackSSE_Base::xUPK_S_16() const { +if (x86caps.hasStreamingSIMD4Extensions) { + xPMOVXX16 (workReg); +} +else { + xMOV16 (workReg, ptr32[srcIndirect]); + xPUNPCK.LWD(workReg, workReg); + xShiftR (workReg, 16); +} + xPSHUF.D (destReg, workReg, _v0); +} + +void VifUnpackSSE_Base::xUPK_S_8() const { +if (x86caps.hasStreamingSIMD4Extensions) { + xPMOVXX8 (workReg); +} +else { + xMOV8 (workReg, ptr32[srcIndirect]); + xPUNPCK.LBW(workReg, workReg); + xPUNPCK.LWD(workReg, workReg); + xShiftR (workReg, 24); +} + xPSHUF.D (destReg, workReg, _v0); +} + +void VifUnpackSSE_Base::xUPK_V2_32() const { + xMOV64 (destReg, ptr32[srcIndirect]); +} + +void VifUnpackSSE_Base::xUPK_V2_16() const { +if (x86caps.hasStreamingSIMD4Extensions) { + xPMOVXX16 (destReg); +} +else { + xMOV32 (destReg, ptr32[srcIndirect]); + xPUNPCK.LWD(destReg, destReg); + xShiftR (destReg, 16); +} +} + +void VifUnpackSSE_Base::xUPK_V2_8() const { +if (x86caps.hasStreamingSIMD4Extensions) { + xPMOVXX8 (destReg); +} +else { + xMOV16 (destReg, ptr32[srcIndirect]); + xPUNPCK.LBW(destReg, destReg); + xPUNPCK.LWD(destReg, destReg); + xShiftR (destReg, 24); +} +} + +void VifUnpackSSE_Base::xUPK_V3_32() const { + xMOV128 (destReg, ptr32[srcIndirect]); +} + +void VifUnpackSSE_Base::xUPK_V3_16() const { +if (x86caps.hasStreamingSIMD4Extensions) { + xPMOVXX16 (destReg); +} +else { + xMOV64 (destReg, ptr32[srcIndirect]); + xPUNPCK.LWD(destReg, destReg); + xShiftR (destReg, 16); +} +} + +void VifUnpackSSE_Base::xUPK_V3_8() const { +if (x86caps.hasStreamingSIMD4Extensions) { + xPMOVXX8 (destReg); +} +else { + xMOV32 (destReg, ptr32[srcIndirect]); + xPUNPCK.LBW(destReg, destReg); + xPUNPCK.LWD(destReg, destReg); + xShiftR (destReg, 24); +} +} + +void VifUnpackSSE_Base::xUPK_V4_32() const { + xMOV128 (destReg, ptr32[srcIndirect]); +} + +void VifUnpackSSE_Base::xUPK_V4_16() const { +if (x86caps.hasStreamingSIMD4Extensions) { + xPMOVXX16 (destReg); +} +else { + xMOV64 (destReg, ptr32[srcIndirect]); + xPUNPCK.LWD(destReg, destReg); + xShiftR (destReg, 16); +} +} + +void VifUnpackSSE_Base::xUPK_V4_8() const { +if (x86caps.hasStreamingSIMD4Extensions) { + xPMOVXX8 (destReg); +} +else { + xMOV32 (destReg, ptr32[srcIndirect]); + xPUNPCK.LBW(destReg, destReg); + xPUNPCK.LWD(destReg, destReg); + xShiftR (destReg, 24); +} +} + +void VifUnpackSSE_Base::xUPK_V4_5() const { + xMOV16 (workReg, ptr32[srcIndirect]); + xPSHUF.D (workReg, workReg, _v0); + xPSLL.D (workReg, 3); // ABG|R5.000 + xMOVAPS (destReg, workReg); // x|x|x|R + xPSRL.D (workReg, 8); // ABG + xPSLL.D (workReg, 3); // AB|G5.000 + mVUmergeRegs(destReg.Id, workReg.Id, 0x4); // x|x|G|R + xPSRL.D (workReg, 8); // AB + xPSLL.D (workReg, 3); // A|B5.000 + mVUmergeRegs(destReg.Id, workReg.Id, 0x2); // x|B|G|R + xPSRL.D (workReg, 8); // A + xPSLL.D (workReg, 7); // A.0000000 + mVUmergeRegs(destReg.Id, workReg.Id, 0x1); // A|B|G|R + xPSLL.D (destReg, 24); // can optimize to + xPSRL.D (destReg, 24); // single AND... +} + +void VifUnpackSSE_Base::xUnpack( int upknum ) const +{ + switch( upknum ) + { + case 0: xUPK_S_32(); break; + case 1: xUPK_S_16(); break; + case 2: xUPK_S_8(); break; + + case 4: xUPK_V2_32(); break; + case 5: xUPK_V2_16(); break; + case 6: xUPK_V2_8(); break; + + case 8: xUPK_V3_32(); break; + case 9: xUPK_V3_16(); break; + case 10: xUPK_V3_8(); break; + + case 12: xUPK_V4_32(); break; + case 13: xUPK_V4_16(); break; + case 14: xUPK_V4_8(); break; + case 15: xUPK_V4_5(); break; + + case 3: + case 7: + case 11: + pxFailRel( wxsFormat( L"Vpu/Vif - Invalid Unpack! [%d]", upknum ) ); + break; + } +} + +// ===================================================================================================== +// VifUnpackSSE_Simple +// ===================================================================================================== + +VifUnpackSSE_Simple::VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_) +{ + curCycle = curCycle_; + usn = usn_; + doMask = domask_; +} + +void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const { + xMOVAPS(xmm7, ptr[dstIndirect]); + int offX = aMin(curCycle, 3); + xPAND(regX, ptr32[nVifMask[0][offX]]); + xPAND(xmm7, ptr32[nVifMask[1][offX]]); + xPOR (regX, ptr32[nVifMask[2][offX]]); + xPOR (regX, xmm7); + xMOVAPS(ptr[dstIndirect], regX); +} + +// ecx = dest, edx = src +static void nVifGen(int usn, int mask, int curCycle) { + + int usnpart = usn*2*16; + int maskpart = mask*16; + + VifUnpackSSE_Simple vpugen( !!usn, !!mask, curCycle ); + + for( int i=0; i<16; ++i ) + { + nVifCall& ucall( nVifUpk[((usnpart+maskpart+i) * 4) + curCycle] ); + ucall = NULL; + if( nVifT[i] == 0 ) continue; + + ucall = (nVifCall)xGetAlignedCallTarget(); + vpugen.xUnpack(i); + vpugen.xMovDest(); + xRET(); + + pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) ); + } +} + +void VifUnpackSSE_Init() +{ + HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false); + memset8<0xcc>( nVifUpkExec ); + + xSetPtr( nVifUpkExec ); + + for (int a = 0; a < 2; a++) { + for (int b = 0; b < 2; b++) { + for (int c = 0; c < 4; c++) { + nVifGen(a, b, c); + }}} + + HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true); +} diff --git a/pcsx2/x86/VifUnpackSSE.h b/pcsx2/x86/newVif_UnpackSSE.h similarity index 96% rename from pcsx2/x86/VifUnpackSSE.h rename to pcsx2/x86/newVif_UnpackSSE.h index d8ea2b38fe..4da4454a5e 100644 --- a/pcsx2/x86/VifUnpackSSE.h +++ b/pcsx2/x86/newVif_UnpackSSE.h @@ -1,145 +1,145 @@ -/* PCSX2 - PS2 Emulator for PCs - * Copyright (C) 2002-2009 PCSX2 Dev Team - * - * PCSX2 is free software: you can redistribute it and/or modify it under the terms - * of the GNU Lesser General Public License as published by the Free Software Found- - * ation, either version 3 of the License, or (at your option) any later version. - * - * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; - * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR - * PURPOSE. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License along with PCSX2. - * If not, see . - */ - -#pragma once - -#include "Common.h" -#include "VifDma_internal.h" -#include "newVif.h" - -#include -#include - -using namespace x86Emitter; - -extern void mergeVectors(int dest, int src, int temp, int xyzw); -extern void loadRowCol(nVifStruct& v); - -// -------------------------------------------------------------------------------------- -// VifUnpackSSE_Base -// -------------------------------------------------------------------------------------- -class VifUnpackSSE_Base -{ -public: - bool usn; // unsigned flag - bool doMask; // masking write enable flag - -protected: - xAddressInfo dstIndirect; - xAddressInfo srcIndirect; - xRegisterSSE workReg; - xRegisterSSE destReg; - -public: - VifUnpackSSE_Base(); - virtual ~VifUnpackSSE_Base() throw() {} - - virtual void xUnpack( int upktype ) const; - virtual bool IsUnmaskedOp() const=0; - virtual void xMovDest() const; - -protected: - virtual void doMaskWrite(const xRegisterSSE& regX ) const=0; - - virtual void xShiftR(const xRegisterSSE& regX, int n) const; - virtual void xPMOVXX8(const xRegisterSSE& regX) const; - virtual void xPMOVXX16(const xRegisterSSE& regX) const; - - virtual void xUPK_S_32() const; - virtual void xUPK_S_16() const; - virtual void xUPK_S_8() const; - - virtual void xUPK_V2_32() const; - virtual void xUPK_V2_16() const; - virtual void xUPK_V2_8() const; - - virtual void xUPK_V3_32() const; - virtual void xUPK_V3_16() const; - virtual void xUPK_V3_8() const; - - virtual void xUPK_V4_32() const; - virtual void xUPK_V4_16() const; - virtual void xUPK_V4_8() const; - virtual void xUPK_V4_5() const; - -}; - -// -------------------------------------------------------------------------------------- -// VifUnpackSSE_Simple -// -------------------------------------------------------------------------------------- -class VifUnpackSSE_Simple : public VifUnpackSSE_Base -{ - typedef VifUnpackSSE_Base _parent; - -public: - int curCycle; - -public: - VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_); - virtual ~VifUnpackSSE_Simple() throw() {} - - virtual bool IsUnmaskedOp() const{ return !doMask; } - -protected: - virtual void doMaskWrite(const xRegisterSSE& regX ) const; -}; - -// -------------------------------------------------------------------------------------- -// VifUnpackSSE_Dynarec -// -------------------------------------------------------------------------------------- -class VifUnpackSSE_Dynarec : public VifUnpackSSE_Base -{ - typedef VifUnpackSSE_Base _parent; - -public: - bool isFill; - int doMode; // two bit value representing... something! - -protected: - const nVifStruct& v; // vif0 or vif1 - const nVifBlock& vB; // some pre-collected data from VifStruct - int vCL; // internal copy of vif->cl - -public: - VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_); - VifUnpackSSE_Dynarec(const VifUnpackSSE_Dynarec& src) // copy constructor - : _parent(src) - , v(src.v) - , vB(src.vB) - { - isFill = src.isFill; - vCL = src.vCL; - } - - virtual ~VifUnpackSSE_Dynarec() throw() {} - - virtual bool IsUnmaskedOp() const{ return !doMode && !doMask; } - - void CompileRoutine(); - -protected: - virtual void doMaskWrite(const xRegisterSSE& regX) const; - void SetMasks(int cS) const; - void writeBackRow() const; - - static VifUnpackSSE_Dynarec FillingWrite( const VifUnpackSSE_Dynarec& src ) - { - VifUnpackSSE_Dynarec fillingWrite( src ); - fillingWrite.doMask = true; - fillingWrite.doMode = 0; - return fillingWrite; - } -}; - +/* PCSX2 - PS2 Emulator for PCs + * Copyright (C) 2002-2009 PCSX2 Dev Team + * + * PCSX2 is free software: you can redistribute it and/or modify it under the terms + * of the GNU Lesser General Public License as published by the Free Software Found- + * ation, either version 3 of the License, or (at your option) any later version. + * + * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; + * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + * PURPOSE. See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along with PCSX2. + * If not, see . + */ + +#pragma once + +#include "Common.h" +#include "VifDma.h" +#include "newVif.h" + +#include +#include + +using namespace x86Emitter; + +extern void mergeVectors(int dest, int src, int temp, int xyzw); +extern void loadRowCol(nVifStruct& v); + +// -------------------------------------------------------------------------------------- +// VifUnpackSSE_Base +// -------------------------------------------------------------------------------------- +class VifUnpackSSE_Base +{ +public: + bool usn; // unsigned flag + bool doMask; // masking write enable flag + +protected: + xAddressInfo dstIndirect; + xAddressInfo srcIndirect; + xRegisterSSE workReg; + xRegisterSSE destReg; + +public: + VifUnpackSSE_Base(); + virtual ~VifUnpackSSE_Base() throw() {} + + virtual void xUnpack( int upktype ) const; + virtual bool IsUnmaskedOp() const=0; + virtual void xMovDest() const; + +protected: + virtual void doMaskWrite(const xRegisterSSE& regX ) const=0; + + virtual void xShiftR(const xRegisterSSE& regX, int n) const; + virtual void xPMOVXX8(const xRegisterSSE& regX) const; + virtual void xPMOVXX16(const xRegisterSSE& regX) const; + + virtual void xUPK_S_32() const; + virtual void xUPK_S_16() const; + virtual void xUPK_S_8() const; + + virtual void xUPK_V2_32() const; + virtual void xUPK_V2_16() const; + virtual void xUPK_V2_8() const; + + virtual void xUPK_V3_32() const; + virtual void xUPK_V3_16() const; + virtual void xUPK_V3_8() const; + + virtual void xUPK_V4_32() const; + virtual void xUPK_V4_16() const; + virtual void xUPK_V4_8() const; + virtual void xUPK_V4_5() const; + +}; + +// -------------------------------------------------------------------------------------- +// VifUnpackSSE_Simple +// -------------------------------------------------------------------------------------- +class VifUnpackSSE_Simple : public VifUnpackSSE_Base +{ + typedef VifUnpackSSE_Base _parent; + +public: + int curCycle; + +public: + VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_); + virtual ~VifUnpackSSE_Simple() throw() {} + + virtual bool IsUnmaskedOp() const{ return !doMask; } + +protected: + virtual void doMaskWrite(const xRegisterSSE& regX ) const; +}; + +// -------------------------------------------------------------------------------------- +// VifUnpackSSE_Dynarec +// -------------------------------------------------------------------------------------- +class VifUnpackSSE_Dynarec : public VifUnpackSSE_Base +{ + typedef VifUnpackSSE_Base _parent; + +public: + bool isFill; + int doMode; // two bit value representing... something! + +protected: + const nVifStruct& v; // vif0 or vif1 + const nVifBlock& vB; // some pre-collected data from VifStruct + int vCL; // internal copy of vif->cl + +public: + VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_); + VifUnpackSSE_Dynarec(const VifUnpackSSE_Dynarec& src) // copy constructor + : _parent(src) + , v(src.v) + , vB(src.vB) + { + isFill = src.isFill; + vCL = src.vCL; + } + + virtual ~VifUnpackSSE_Dynarec() throw() {} + + virtual bool IsUnmaskedOp() const{ return !doMode && !doMask; } + + void CompileRoutine(); + +protected: + virtual void doMaskWrite(const xRegisterSSE& regX) const; + void SetMasks(int cS) const; + void writeBackRow() const; + + static VifUnpackSSE_Dynarec FillingWrite( const VifUnpackSSE_Dynarec& src ) + { + VifUnpackSSE_Dynarec fillingWrite( src ); + fillingWrite.doMask = true; + fillingWrite.doMode = 0; + return fillingWrite; + } +}; +