Vif Cleanups - did some code refactoring so things make more sense.

I need to do more later on...

I mostly separated the unpack code from the dma/transfer code in this commit.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2485 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
cottonvibes 2010-01-23 02:30:00 +00:00
parent 12200c2c10
commit 84bc805761
14 changed files with 1290 additions and 1279 deletions

View File

@ -16,8 +16,7 @@
#include "PrecompiledHeader.h"
#include "Common.h"
#include "VifDma_internal.h"
#include "VifDma.h"
#include "VUmicro.h"
#include "newVif.h"

View File

@ -16,9 +16,7 @@
#include "PrecompiledHeader.h"
#include "Common.h"
#include "VifDma_internal.h"
#include "VifDma.h"
#include "GS.h"
#include "Gif.h"
#include "VUmicro.h"

View File

@ -16,7 +16,7 @@
#include "PrecompiledHeader.h"
#include "Common.h"
#include "VifDma_internal.h"
#include "VifDma.h"
#include "VUmicro.h"
int g_vifCycles = 0;

View File

@ -12,8 +12,9 @@
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __VIFDMA_H__
#define __VIFDMA_H__
#pragma once
#include "Vif_Unpack.h"
struct vifCode {
u32 addr;
@ -43,9 +44,10 @@ struct vifStruct {
u8 dmamode;
};
extern vifStruct vif0, vif1;
extern u8 schedulepath3msk;
static const int VifCycleVoodoo = 4;
extern vifStruct* vif;
extern vifStruct vif0, vif1;
extern u8 schedulepath3msk;
static const int VifCycleVoodoo = 4;
extern void vif0Init();
extern void vif0Interrupt();
@ -63,4 +65,20 @@ __forceinline static int _limit(int a, int max)
return ((a > max) ? max : a);
}
#endif
enum VifModes
{
VIF_NORMAL_TO_MEM_MODE = 0,
VIF_NORMAL_FROM_MEM_MODE = 1,
VIF_CHAIN_MODE = 2
};
// Generic constants
static const unsigned int VIF0intc = 4;
static const unsigned int VIF1intc = 5;
extern int g_vifCycles;
template<const u32 VIFdmanum> void vuExecMicro(u32 addr);
extern void vif0FLUSH();
extern void vif1FLUSH();

View File

@ -13,68 +13,8 @@
* If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __VIFDMA_INTERNAL_H__
#define __VIFDMA_INTERNAL_H__
#pragma once
#include "VifDma.h"
enum VifModes
{
VIF_NORMAL_TO_MEM_MODE = 0,
VIF_NORMAL_FROM_MEM_MODE = 1,
VIF_CHAIN_MODE = 2
};
// Generic constants
static const unsigned int VIF0intc = 4;
static const unsigned int VIF1intc = 5;
typedef void (__fastcall *UNPACKFUNCTYPE)(u32 *dest, u32 *data);
typedef void (__fastcall *UNPACKFUNCTYPE_ODD)(u32 *dest, u32 *data, int size);
typedef int (*UNPACKPARTFUNCTYPESSE)(u32 *dest, u32 *data, int size);
#define create_unpack_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_U##bits)(u32 *dest, u##bits *data);
#define create_unpack_odd_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_U##bits)(u32 *dest, u##bits *data, int size);
#define create_unpack_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_S##bits)(u32 *dest, s##bits *data);
#define create_unpack_odd_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_S##bits)(u32 *dest, s##bits *data, int size);
#define create_some_unpacks(bits) \
create_unpack_u_type(bits); \
create_unpack_odd_u_type(bits); \
create_unpack_s_type(bits); \
create_unpack_odd_s_type(bits);
create_some_unpacks(32);
create_some_unpacks(16);
create_some_unpacks(8);
struct VIFUnpackFuncTable
{
UNPACKFUNCTYPE funcU;
UNPACKFUNCTYPE funcS;
UNPACKFUNCTYPE_ODD oddU; // needed for old-style vif only, remove when old vif is removed.
UNPACKFUNCTYPE_ODD oddS; // needed for old-style vif only, remove when old vif is removed.
u8 bsize; // currently unused
u8 dsize; // byte size of one channel
u8 gsize; // size of data in bytes used for each write cycle
u8 qsize; // used for unpack parts, num of vectors that
// will be decompressed from data for 1 cycle
};
extern const __aligned16 VIFUnpackFuncTable VIFfuncTable[32];
extern int g_vifCycles;
extern vifStruct *vif;
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size);
template<const u32 VIFdmanum> void vuExecMicro(u32 addr);
extern void vif0FLUSH();
extern void vif1FLUSH();
extern int nVifUnpack (int idx, u8 *data);
extern void initNewVif (int idx);
extern void resetNewVif(int idx);
#endif

View File

@ -1,296 +1,295 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "Common.h"
#include "Vif.h"
#include "VifDma_internal.h"
enum UnpackOffset {
OFFSET_X = 0,
OFFSET_Y = 1,
OFFSET_Z = 2,
OFFSET_W = 3
};
static __forceinline u32 setVifRowRegs(u32 reg, u32 data) {
switch (reg) {
case 0: vifRegs->r0 = data; break;
case 1: vifRegs->r1 = data; break;
case 2: vifRegs->r2 = data; break;
case 3: vifRegs->r3 = data; break;
jNO_DEFAULT;
}
return data;
}
static __forceinline u32 getVifRowRegs(u32 reg) {
switch (reg) {
case 0: return vifRegs->r0; break;
case 1: return vifRegs->r1; break;
case 2: return vifRegs->r2; break;
case 3: return vifRegs->r3; break;
jNO_DEFAULT;
}
return 0; // unreachable...
}
static __forceinline u32 getVifColRegs(u32 reg) {
switch (reg) {
case 0: return vifRegs->c0; break;
case 1: return vifRegs->c1; break;
case 2: return vifRegs->c2; break;
default: return vifRegs->c3; break;
}
return 0; // unreachable...
}
template< bool doMask >
static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) {
u32 vifRowReg = getVifRowRegs(offnum);
int n = 0;
if (doMask) {
switch (vif->cl) {
case 0: n = (vifRegs->mask >> (offnum * 2)) & 0x3; break;
case 1: n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; break;
case 2: n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3; break;
default: n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3; break;
}
}
switch (n) {
case 0:
if ((vif->cmd & 0x6F) != 0x6f) {
switch (vifRegs->mode) {
case 1: dest = data + vifRowReg; break;
case 2: dest = setVifRowRegs(offnum, vifRowReg + data); break;
default: dest = data; break;
}
}
else dest = data; // v4-5 Unpack Mode
break;
case 1: dest = vifRowReg; break;
case 2: dest = getVifColRegs(vif->cl); break;
case 3: break;
}
}
template < bool doMask, class T >
static __forceinline void __fastcall UNPACK_S(u32 *dest, T *data, int size)
{
//S-# will always be a complete packet, no matter what. So we can skip the offset bits
writeXYZW<doMask>(OFFSET_X, *dest++, *data);
writeXYZW<doMask>(OFFSET_Y, *dest++, *data);
writeXYZW<doMask>(OFFSET_Z, *dest++, *data);
writeXYZW<doMask>(OFFSET_W, *dest , *data);
}
template <bool doMask, class T>
static __forceinline void __fastcall UNPACK_V2(u32 *dest, T *data, int size)
{
if (vifRegs->offset == OFFSET_X)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Y;
size--;
}
}
if (vifRegs->offset == OFFSET_Y)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data);
vifRegs->offset = OFFSET_Z;
size--;
}
}
if (vifRegs->offset == OFFSET_Z)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *dest-2);
vifRegs->offset = OFFSET_W;
}
if (vifRegs->offset == OFFSET_W)
{
writeXYZW<doMask>(vifRegs->offset, *dest, *data);
vifRegs->offset = OFFSET_X;
}
}
template <bool doMask, class T>
static __forceinline void __fastcall UNPACK_V3(u32 *dest, T *data, int size)
{
if(vifRegs->offset == OFFSET_X)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Y;
size--;
}
}
if(vifRegs->offset == OFFSET_Y)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Z;
size--;
}
}
if(vifRegs->offset == OFFSET_Z)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_W;
size--;
}
}
if(vifRegs->offset == OFFSET_W)
{
// V3-# does some bizarre thing with alignment, every 6qw of data the W becomes 0 (strange console!)
// Ape Escape doesn't seem to like it tho (what the hell?) gonna have to investigate
writeXYZW<doMask>(vifRegs->offset, *dest, *data);
vifRegs->offset = OFFSET_X;
}
}
template <bool doMask, class T>
static __forceinline void __fastcall UNPACK_V4(u32 *dest, T *data , int size)
{
while (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset++;
size--;
}
if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X;
}
template< bool doMask >
static __releaseinline void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size)
{
//As with S-#, this will always be a complete packet
writeXYZW<doMask>(OFFSET_X, *dest++, ((*data & 0x001f) << 3));
writeXYZW<doMask>(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2));
writeXYZW<doMask>(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7));
writeXYZW<doMask>(OFFSET_W, *dest, ((*data & 0x8000) >> 8));
}
// =====================================================================================================
template < bool doMask, int size, class T >
static void __fastcall fUNPACK_S(u32 *dest, T *data)
{
UNPACK_S<doMask>( dest, data, size );
}
template <bool doMask, int size, class T>
static void __fastcall fUNPACK_V2(u32 *dest, T *data)
{
UNPACK_V2<doMask>( dest, data, size );
}
template <bool doMask, int size, class T>
static void __fastcall fUNPACK_V3(u32 *dest, T *data)
{
UNPACK_V3<doMask>( dest, data, size );
}
template <bool doMask, int size, class T>
static void __fastcall fUNPACK_V4(u32 *dest, T *data)
{
UNPACK_V4<doMask>( dest, data, size );
}
template< bool doMask >
static void __fastcall fUNPACK_V4_5(u32 *dest, u32 *data)
{
UNPACK_V4_5<doMask>(dest, data, 0); // size is ignored.
}
// --------------------------------------------------------------------------------------
// Main table for function unpacking.
// --------------------------------------------------------------------------------------
// The extra data bsize/dsize/etc are all duplicated between the doMask enabled and
// disabled versions. This is probably simpler and more efficient than bothering
// to generate separate tables.
//
// The double-cast function pointer nonsense is to appease GCC, which gives some rather
// cryptic error about being unable to deduce the type parameters (I think it's a bug
// relating to __fastcall, which I recall having some other places as well). It's fixed
// by explicitly casting the function to itself prior to casting it to what we need it
// to be cast as. --air
//
#define _upk (UNPACKFUNCTYPE)
#define _odd (UNPACKFUNCTYPE_ODD)
#define _unpk_s(bits) (UNPACKFUNCTYPE_S##bits)
#define _odd_s(bits) (UNPACKFUNCTYPE_ODD_S##bits)
#define _unpk_u(bits) (UNPACKFUNCTYPE_U##bits)
#define _odd_u(bits) (UNPACKFUNCTYPE_ODD_U##bits)
// 32-bits versions are unsigned-only!!
#define UnpackFuncPair32( sizefac, vt, doMask ) \
(UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt<doMask, sizefac, u32>, \
(UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt<doMask, sizefac, u32>, \
(UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt<doMask, u32>, \
(UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt<doMask, u32>,
#define UnpackFuncPair( sizefac, vt, bits, doMask ) \
(UNPACKFUNCTYPE)_unpk_u(bits) fUNPACK_##vt<doMask, sizefac, u##bits>, \
(UNPACKFUNCTYPE)_unpk_s(bits) fUNPACK_##vt<doMask, sizefac, s##bits>, \
(UNPACKFUNCTYPE_ODD)_odd_u(bits) UNPACK_##vt<doMask, u##bits>, \
(UNPACKFUNCTYPE_ODD)_odd_s(bits) UNPACK_##vt<doMask, s##bits>,
#define UnpackFuncSet( doMask ) \
{ UnpackFuncPair32( 4, S, doMask ) 1, 4, 4, 4 }, /* 0x0 - S-32 */ \
{ UnpackFuncPair ( 4, S, 16, doMask ) 2, 2, 2, 4 }, /* 0x1 - S-16 */ \
{ UnpackFuncPair ( 4, S, 8, doMask ) 4, 1, 1, 4 }, /* 0x2 - S-8 */ \
{ NULL, NULL, NULL, NULL, 0, 0, 0, 0 }, /* 0x3 (NULL) */ \
{ UnpackFuncPair32( 2, V2, doMask ) 24, 4, 8, 2 }, /* 0x4 - V2-32 */ \
{ UnpackFuncPair ( 2, V2, 16, doMask ) 12, 2, 4, 2 }, /* 0x5 - V2-16 */ \
{ UnpackFuncPair ( 2, V2, 8, doMask ) 6, 1, 2, 2 }, /* 0x6 - V2-8 */ \
{ NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0x7 (NULL) */ \
{ UnpackFuncPair32( 3, V3, doMask ) 36, 4, 12, 3 }, /* 0x8 - V3-32 */ \
{ UnpackFuncPair ( 3, V3, 16, doMask ) 18, 2, 6, 3 }, /* 0x9 - V3-16 */ \
{ UnpackFuncPair ( 3, V3, 8, doMask ) 9, 1, 3, 3 }, /* 0xA - V3-8 */ \
{ NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0xB (NULL) */ \
{ UnpackFuncPair32( 4, V4, doMask ) 48, 4, 16, 4 }, /* 0xC - V4-32 */ \
{ UnpackFuncPair ( 4, V4, 16, doMask ) 24, 2, 8, 4 }, /* 0xD - V4-16 */ \
{ UnpackFuncPair ( 4, V4, 8, doMask ) 12, 1, 4, 4 }, /* 0xE - V4-8 */ \
{ /* 0xF - V4-5 */ \
(UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5<doMask>, \
(UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5<doMask>, \
(UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5<doMask>, \
(UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5<doMask>, \
6, 2, 2, 4 },
const __aligned16 VIFUnpackFuncTable VIFfuncTable[32] =
{
UnpackFuncSet( false )
UnpackFuncSet( true )
};
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "Common.h"
#include "Vif.h"
#include "VifDma.h"
enum UnpackOffset {
OFFSET_X = 0,
OFFSET_Y = 1,
OFFSET_Z = 2,
OFFSET_W = 3
};
static __forceinline u32 setVifRowRegs(u32 reg, u32 data) {
switch (reg) {
case 0: vifRegs->r0 = data; break;
case 1: vifRegs->r1 = data; break;
case 2: vifRegs->r2 = data; break;
case 3: vifRegs->r3 = data; break;
jNO_DEFAULT;
}
return data;
}
static __forceinline u32 getVifRowRegs(u32 reg) {
switch (reg) {
case 0: return vifRegs->r0; break;
case 1: return vifRegs->r1; break;
case 2: return vifRegs->r2; break;
case 3: return vifRegs->r3; break;
jNO_DEFAULT;
}
return 0; // unreachable...
}
static __forceinline u32 getVifColRegs(u32 reg) {
switch (reg) {
case 0: return vifRegs->c0; break;
case 1: return vifRegs->c1; break;
case 2: return vifRegs->c2; break;
default: return vifRegs->c3; break;
}
return 0; // unreachable...
}
template< bool doMask >
static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data) {
u32 vifRowReg = getVifRowRegs(offnum);
int n = 0;
if (doMask) {
switch (vif->cl) {
case 0: n = (vifRegs->mask >> (offnum * 2)) & 0x3; break;
case 1: n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3; break;
case 2: n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3; break;
default: n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3; break;
}
}
switch (n) {
case 0:
if ((vif->cmd & 0x6F) != 0x6f) {
switch (vifRegs->mode) {
case 1: dest = data + vifRowReg; break;
case 2: dest = setVifRowRegs(offnum, vifRowReg + data); break;
default: dest = data; break;
}
}
else dest = data; // v4-5 Unpack Mode
break;
case 1: dest = vifRowReg; break;
case 2: dest = getVifColRegs(vif->cl); break;
case 3: break;
}
}
template < bool doMask, class T >
static __forceinline void __fastcall UNPACK_S(u32 *dest, T *data, int size)
{
//S-# will always be a complete packet, no matter what. So we can skip the offset bits
writeXYZW<doMask>(OFFSET_X, *dest++, *data);
writeXYZW<doMask>(OFFSET_Y, *dest++, *data);
writeXYZW<doMask>(OFFSET_Z, *dest++, *data);
writeXYZW<doMask>(OFFSET_W, *dest , *data);
}
template <bool doMask, class T>
static __forceinline void __fastcall UNPACK_V2(u32 *dest, T *data, int size)
{
if (vifRegs->offset == OFFSET_X)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Y;
size--;
}
}
if (vifRegs->offset == OFFSET_Y)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data);
vifRegs->offset = OFFSET_Z;
size--;
}
}
if (vifRegs->offset == OFFSET_Z)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *dest-2);
vifRegs->offset = OFFSET_W;
}
if (vifRegs->offset == OFFSET_W)
{
writeXYZW<doMask>(vifRegs->offset, *dest, *data);
vifRegs->offset = OFFSET_X;
}
}
template <bool doMask, class T>
static __forceinline void __fastcall UNPACK_V3(u32 *dest, T *data, int size)
{
if(vifRegs->offset == OFFSET_X)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Y;
size--;
}
}
if(vifRegs->offset == OFFSET_Y)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_Z;
size--;
}
}
if(vifRegs->offset == OFFSET_Z)
{
if (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset = OFFSET_W;
size--;
}
}
if(vifRegs->offset == OFFSET_W)
{
// V3-# does some bizarre thing with alignment, every 6qw of data the W becomes 0 (strange console!)
// Ape Escape doesn't seem to like it tho (what the hell?) gonna have to investigate
writeXYZW<doMask>(vifRegs->offset, *dest, *data);
vifRegs->offset = OFFSET_X;
}
}
template <bool doMask, class T>
static __forceinline void __fastcall UNPACK_V4(u32 *dest, T *data , int size)
{
while (size > 0)
{
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
vifRegs->offset++;
size--;
}
if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X;
}
template< bool doMask >
static __releaseinline void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size)
{
//As with S-#, this will always be a complete packet
writeXYZW<doMask>(OFFSET_X, *dest++, ((*data & 0x001f) << 3));
writeXYZW<doMask>(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2));
writeXYZW<doMask>(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7));
writeXYZW<doMask>(OFFSET_W, *dest, ((*data & 0x8000) >> 8));
}
// =====================================================================================================
template < bool doMask, int size, class T >
static void __fastcall fUNPACK_S(u32 *dest, T *data)
{
UNPACK_S<doMask>( dest, data, size );
}
template <bool doMask, int size, class T>
static void __fastcall fUNPACK_V2(u32 *dest, T *data)
{
UNPACK_V2<doMask>( dest, data, size );
}
template <bool doMask, int size, class T>
static void __fastcall fUNPACK_V3(u32 *dest, T *data)
{
UNPACK_V3<doMask>( dest, data, size );
}
template <bool doMask, int size, class T>
static void __fastcall fUNPACK_V4(u32 *dest, T *data)
{
UNPACK_V4<doMask>( dest, data, size );
}
template< bool doMask >
static void __fastcall fUNPACK_V4_5(u32 *dest, u32 *data)
{
UNPACK_V4_5<doMask>(dest, data, 0); // size is ignored.
}
// --------------------------------------------------------------------------------------
// Main table for function unpacking.
// --------------------------------------------------------------------------------------
// The extra data bsize/dsize/etc are all duplicated between the doMask enabled and
// disabled versions. This is probably simpler and more efficient than bothering
// to generate separate tables.
//
// The double-cast function pointer nonsense is to appease GCC, which gives some rather
// cryptic error about being unable to deduce the type parameters (I think it's a bug
// relating to __fastcall, which I recall having some other places as well). It's fixed
// by explicitly casting the function to itself prior to casting it to what we need it
// to be cast as. --air
//
#define _upk (UNPACKFUNCTYPE)
#define _odd (UNPACKFUNCTYPE_ODD)
#define _unpk_s(bits) (UNPACKFUNCTYPE_S##bits)
#define _odd_s(bits) (UNPACKFUNCTYPE_ODD_S##bits)
#define _unpk_u(bits) (UNPACKFUNCTYPE_U##bits)
#define _odd_u(bits) (UNPACKFUNCTYPE_ODD_U##bits)
// 32-bits versions are unsigned-only!!
#define UnpackFuncPair32( sizefac, vt, doMask ) \
(UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt<doMask, sizefac, u32>, \
(UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_##vt<doMask, sizefac, u32>, \
(UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt<doMask, u32>, \
(UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_##vt<doMask, u32>,
#define UnpackFuncPair( sizefac, vt, bits, doMask ) \
(UNPACKFUNCTYPE)_unpk_u(bits) fUNPACK_##vt<doMask, sizefac, u##bits>, \
(UNPACKFUNCTYPE)_unpk_s(bits) fUNPACK_##vt<doMask, sizefac, s##bits>, \
(UNPACKFUNCTYPE_ODD)_odd_u(bits) UNPACK_##vt<doMask, u##bits>, \
(UNPACKFUNCTYPE_ODD)_odd_s(bits) UNPACK_##vt<doMask, s##bits>,
#define UnpackFuncSet( doMask ) \
{ UnpackFuncPair32( 4, S, doMask ) 1, 4, 4, 4 }, /* 0x0 - S-32 */ \
{ UnpackFuncPair ( 4, S, 16, doMask ) 2, 2, 2, 4 }, /* 0x1 - S-16 */ \
{ UnpackFuncPair ( 4, S, 8, doMask ) 4, 1, 1, 4 }, /* 0x2 - S-8 */ \
{ NULL, NULL, NULL, NULL, 0, 0, 0, 0 }, /* 0x3 (NULL) */ \
{ UnpackFuncPair32( 2, V2, doMask ) 24, 4, 8, 2 }, /* 0x4 - V2-32 */ \
{ UnpackFuncPair ( 2, V2, 16, doMask ) 12, 2, 4, 2 }, /* 0x5 - V2-16 */ \
{ UnpackFuncPair ( 2, V2, 8, doMask ) 6, 1, 2, 2 }, /* 0x6 - V2-8 */ \
{ NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0x7 (NULL) */ \
{ UnpackFuncPair32( 3, V3, doMask ) 36, 4, 12, 3 }, /* 0x8 - V3-32 */ \
{ UnpackFuncPair ( 3, V3, 16, doMask ) 18, 2, 6, 3 }, /* 0x9 - V3-16 */ \
{ UnpackFuncPair ( 3, V3, 8, doMask ) 9, 1, 3, 3 }, /* 0xA - V3-8 */ \
{ NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0xB (NULL) */ \
{ UnpackFuncPair32( 4, V4, doMask ) 48, 4, 16, 4 }, /* 0xC - V4-32 */ \
{ UnpackFuncPair ( 4, V4, 16, doMask ) 24, 2, 8, 4 }, /* 0xD - V4-16 */ \
{ UnpackFuncPair ( 4, V4, 8, doMask ) 12, 1, 4, 4 }, /* 0xE - V4-8 */ \
{ /* 0xF - V4-5 */ \
(UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5<doMask>, \
(UNPACKFUNCTYPE)_unpk_u(32) fUNPACK_V4_5<doMask>, \
(UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5<doMask>, \
(UNPACKFUNCTYPE_ODD)_odd_u(32) UNPACK_V4_5<doMask>, \
6, 2, 2, 4 },
const __aligned16 VIFUnpackFuncTable VIFfuncTable[32] =
{
UnpackFuncSet( false )
UnpackFuncSet( true )
};

56
pcsx2/Vif_Unpack.h Normal file
View File

@ -0,0 +1,56 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
typedef void (__fastcall *UNPACKFUNCTYPE)(u32 *dest, u32 *data);
typedef void (__fastcall *UNPACKFUNCTYPE_ODD)(u32 *dest, u32 *data, int size);
typedef int (*UNPACKPARTFUNCTYPESSE)(u32 *dest, u32 *data, int size);
#define create_unpack_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_U##bits)(u32 *dest, u##bits *data);
#define create_unpack_odd_u_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_U##bits)(u32 *dest, u##bits *data, int size);
#define create_unpack_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_S##bits)(u32 *dest, s##bits *data);
#define create_unpack_odd_s_type(bits) typedef void (__fastcall *UNPACKFUNCTYPE_ODD_S##bits)(u32 *dest, s##bits *data, int size);
#define create_some_unpacks(bits) \
create_unpack_u_type(bits); \
create_unpack_odd_u_type(bits); \
create_unpack_s_type(bits); \
create_unpack_odd_s_type(bits);
create_some_unpacks(32);
create_some_unpacks(16);
create_some_unpacks(8);
struct VIFUnpackFuncTable
{
UNPACKFUNCTYPE funcU;
UNPACKFUNCTYPE funcS;
UNPACKFUNCTYPE_ODD oddU; // needed for old-style vif only, remove when old vif is removed.
UNPACKFUNCTYPE_ODD oddS; // needed for old-style vif only, remove when old vif is removed.
u8 bsize; // currently unused
u8 dsize; // byte size of one channel
u8 gsize; // size of data in bytes used for each write cycle
u8 qsize; // used for unpack parts, num of vectors that
// will be decompressed from data for 1 cycle
};
extern const __aligned16 VIFUnpackFuncTable VIFfuncTable[32];
extern int nVifUnpack (int idx, u8 *data);
extern void initNewVif (int idx);
extern void resetNewVif(int idx);

View File

@ -1,161 +1,159 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
// Old Vif Unpack Code
// Only here for testing/reference
// If newVif is defined and newVif1 isn't, vif1 will use this code
// same goes for vif0...
template void VIFunpack<0>(u32 *data, vifCode *v, u32 size);
template void VIFunpack<1>(u32 *data, vifCode *v, u32 size);
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
//if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
VURegs * VU;
u8 *cdata = (u8*)data;
u32 tempsize = 0;
const u32 memlimit = (VIFdmanum == 0) ? 0x1000 : 0x4000;
if (VIFdmanum == 0) {
VU = &VU0;
vifRegs = vif0Regs;
vif = &vif0;
}
else {
VU = &VU1;
vifRegs = vif1Regs;
vif = &vif1;
}
u32 *dest = (u32*)(VU->Mem + v->addr);
const VIFUnpackFuncTable& ft( VIFfuncTable[ v->cmd & 0x1f ] );
UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS;
size <<= 2;
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write
if (v->addr >= memlimit) {
DevCon.Warning("Overflown at the start");
v->addr &= (memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
size = std::min<u32>(size, vifRegs->num * ft.gsize); //size will always be the same or smaller
tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
//Sanity Check (memory overflow)
if (tempsize > memlimit) {
if (((vifRegs->cycle.cl != vifRegs->cycle.wl) &&
((memlimit + (vifRegs->cycle.cl - vifRegs->cycle.wl) * 16) == tempsize))) {
//It's a red herring, so ignore it! SSE unpacks will be much quicker.
DevCon.WriteLn("what!!!!!!!!!");
//tempsize = 0;
tempsize = size;
size = 0;
}
else {
DevCon.Warning("VIF%x Unpack ending %x > %x", VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000);
tempsize = size;
size = 0;
}
}
else {
tempsize = size;
size = 0;
}
if (tempsize) {
int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
size = 0;
int addrstart = v->addr;
//if((tempsize >> 2) != v->size) DevCon.Warning("split when size != tagsize");
VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr);
while ((tempsize >= ft.gsize) && (vifRegs->num > 0)) {
if(v->addr >= memlimit) {
DevCon.Warning("Mem limit overflow");
v->addr &= (memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
func(dest, (u32*)cdata);
cdata += ft.gsize;
tempsize -= ft.gsize;
vifRegs->num--;
vif->cl++;
if (vif->cl == vifRegs->cycle.wl) {
dest += incdest;
v->addr +=(incdest * 4);
vif->cl = 0;
}
else {
dest += 4;
v->addr += 16;
}
}
if (v->addr >= memlimit) {
v->addr &=(memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
v->addr = addrstart;
if(tempsize > 0) size = tempsize;
}
if (size >= ft.dsize && vifRegs->num > 0) { //Else write what we do have
DevCon.Warning("huh!!!!!!!!!!!!!!!!!!!!!!");
VIF_LOG("warning, end with size = %d", size);
// unpack one qword
//v->addr += (size / ft.dsize) * 4;
(vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize);
size = 0;
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr);
}
}
else { // filling write
if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
if((u32)(((size / ft.gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft.gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, v->cmd & 0xf, vif->tag.addr);
while (vifRegs->num > 0) {
if (vif->cl == vifRegs->cycle.wl) {
vif->cl = 0;
}
// unpack one qword
if (vif->cl < vifRegs->cycle.cl) {
if(size < ft.gsize) { DevCon.WriteLn("Out of Filling write data!"); break; }
func(dest, (u32*)cdata);
cdata += ft.gsize;
size -= ft.gsize;
vif->cl++;
vifRegs->num--;
if (vif->cl == vifRegs->cycle.wl) {
vif->cl = 0;
}
}
else {
func(dest, (u32*)cdata);
v->addr += 16;
vifRegs->num--;
vif->cl++;
}
dest += 4;
if (vifRegs->num == 0) break;
}
}
}
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
// Old Vif Unpack Code
// Only here for testing/reference
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
//if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
VURegs * VU;
u8 *cdata = (u8*)data;
u32 tempsize = 0;
const u32 memlimit = (VIFdmanum == 0) ? 0x1000 : 0x4000;
if (VIFdmanum == 0) {
VU = &VU0;
vifRegs = vif0Regs;
vif = &vif0;
}
else {
VU = &VU1;
vifRegs = vif1Regs;
vif = &vif1;
}
u32 *dest = (u32*)(VU->Mem + v->addr);
const VIFUnpackFuncTable& ft( VIFfuncTable[ v->cmd & 0x1f ] );
UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS;
size <<= 2;
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write
if (v->addr >= memlimit) {
DevCon.Warning("Overflown at the start");
v->addr &= (memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
size = std::min<u32>(size, vifRegs->num * ft.gsize); //size will always be the same or smaller
tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
//Sanity Check (memory overflow)
if (tempsize > memlimit) {
if (((vifRegs->cycle.cl != vifRegs->cycle.wl) &&
((memlimit + (vifRegs->cycle.cl - vifRegs->cycle.wl) * 16) == tempsize))) {
//It's a red herring, so ignore it! SSE unpacks will be much quicker.
DevCon.WriteLn("what!!!!!!!!!");
//tempsize = 0;
tempsize = size;
size = 0;
}
else {
DevCon.Warning("VIF%x Unpack ending %x > %x", VIFdmanum, tempsize, VIFdmanum ? 0x4000 : 0x1000);
tempsize = size;
size = 0;
}
}
else {
tempsize = size;
size = 0;
}
if (tempsize) {
int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
size = 0;
int addrstart = v->addr;
//if((tempsize >> 2) != v->size) DevCon.Warning("split when size != tagsize");
VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr);
while ((tempsize >= ft.gsize) && (vifRegs->num > 0)) {
if(v->addr >= memlimit) {
DevCon.Warning("Mem limit overflow");
v->addr &= (memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
func(dest, (u32*)cdata);
cdata += ft.gsize;
tempsize -= ft.gsize;
vifRegs->num--;
vif->cl++;
if (vif->cl == vifRegs->cycle.wl) {
dest += incdest;
v->addr +=(incdest * 4);
vif->cl = 0;
}
else {
dest += 4;
v->addr += 16;
}
}
if (v->addr >= memlimit) {
v->addr &=(memlimit - 1);
dest = (u32*)(VU->Mem + v->addr);
}
v->addr = addrstart;
if(tempsize > 0) size = tempsize;
}
if (size >= ft.dsize && vifRegs->num > 0) { //Else write what we do have
DevCon.Warning("huh!!!!!!!!!!!!!!!!!!!!!!");
VIF_LOG("warning, end with size = %d", size);
// unpack one qword
//v->addr += (size / ft.dsize) * 4;
(vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize);
size = 0;
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr);
}
}
else { // filling write
if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
if((u32)(((size / ft.gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft.gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, v->cmd & 0xf, vif->tag.addr);
while (vifRegs->num > 0) {
if (vif->cl == vifRegs->cycle.wl) {
vif->cl = 0;
}
// unpack one qword
if (vif->cl < vifRegs->cycle.cl) {
if(size < ft.gsize) { DevCon.WriteLn("Out of Filling write data!"); break; }
func(dest, (u32*)cdata);
cdata += ft.gsize;
size -= ft.gsize;
vif->cl++;
vifRegs->num--;
if (vif->cl == vifRegs->cycle.wl) {
vif->cl = 0;
}
}
else {
func(dest, (u32*)cdata);
v->addr += 16;
vifRegs->num--;
vif->cl++;
}
dest += 4;
if (vifRegs->num == 0) break;
}
}
}

View File

@ -820,52 +820,56 @@
RelativePath="..\..\VifDma.h"
>
</File>
<File
RelativePath="..\..\VifDma_internal.h"
>
</File>
<File
RelativePath="..\..\VIFunpack.cpp"
>
</File>
<Filter
Name="newVif"
Name="Unpack"
>
<File
RelativePath="..\..\x86\newVif.h"
RelativePath="..\..\Vif_Unpack.cpp"
>
</File>
<File
RelativePath="..\..\x86\newVif_BlockBuffer.h"
RelativePath="..\..\Vif_Unpack.h"
>
</File>
<File
RelativePath="..\..\x86\newVif_HashBucket.h"
>
</File>
<File
RelativePath="..\..\x86\newVif_OldUnpack.inl"
>
</File>
<File
RelativePath="..\..\x86\newVif_Unpack.cpp"
RelativePath="..\..\Vif_Unpack.inl"
>
</File>
<Filter
Name="Dynarec"
Name="newVif"
>
<File
RelativePath="..\..\x86\VifUnpackSSE.cpp"
RelativePath="..\..\x86\newVif.h"
>
</File>
<File
RelativePath="..\..\x86\VifUnpackSSE.h"
RelativePath="..\..\x86\newVif_BlockBuffer.h"
>
</File>
<File
RelativePath="..\..\x86\VifUnpackSSE_Dynarec.cpp"
RelativePath="..\..\x86\newVif_HashBucket.h"
>
</File>
<File
RelativePath="..\..\x86\newVif_Unpack.cpp"
>
</File>
<Filter
Name="Dynarec"
>
<File
RelativePath="..\..\x86\newVif_Dynarec.cpp"
>
</File>
<File
RelativePath="..\..\x86\newVif_UnpackSSE.cpp"
>
</File>
<File
RelativePath="..\..\x86\newVif_UnpackSSE.h"
>
</File>
</Filter>
</Filter>
</Filter>
</Filter>

View File

@ -101,4 +101,3 @@ extern __aligned16 u32 nVifMask[3][4][4]; // [MaskNumber][CycleNumber][Vector]
static const bool useOldUnpack = 0; // Use code in newVif_OldUnpack.inl
static const bool newVifDynaRec = 1; // Use code in newVif_Dynarec.inl

View File

@ -1,267 +1,267 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
// newVif Dynarec - Dynamically Recompiles Vif 'unpack' Packets
// authors: cottonvibes(@gmail.com)
// Jake.Stine (@gmail.com)
#include "PrecompiledHeader.h"
#include "VifUnpackSSE.h"
static __aligned16 nVifBlock _vBlock = {0};
static __pagealigned u8 nVifMemCmp[__pagesize];
void dVifInit(int idx) {
nVif[idx].numBlocks = 0;
nVif[idx].vifCache = new BlockBuffer(_1mb*4); // 4mb Rec Cache
nVif[idx].vifBlocks = new HashBucket<_tParams>();
nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
nVif[idx].recEnd = &nVif[idx].recPtr[nVif[idx].vifCache->getSize()-(_1mb/4)]; // .25mb Safe Zone
}
void dVifClose(int idx) {
nVif[idx].numBlocks = 0;
safe_delete(nVif[idx].vifCache);
safe_delete(nVif[idx].vifBlocks);
}
VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
: v(vif_)
, vB(vifBlock_)
{
isFill = (vB.cl < vB.wl);
usn = (vB.upkType>>5) & 1;
doMask = (vB.upkType>>4) & 1;
doMode = vB.mode & 3;
}
#define makeMergeMask(x) { \
x = ((x&0x40)>>6) | ((x&0x10)>>3) | (x&4) | ((x&1)<<3); \
}
_f void VifUnpackSSE_Dynarec::SetMasks(int cS) const {
u32 m0 = vB.mask;
u32 m1 = m0 & 0xaaaaaaaa;
u32 m2 =(~m1>>1) & m0;
u32 m3 = (m1>>1) & ~m0;
u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
u32* col = (v.idx) ? g_vifmask.Col1 : g_vifmask.Col0;
if((m2&&doMask) || doMode) { xMOVAPS(xmmRow, ptr32[row]); }
if (m3&&doMask) {
xMOVAPS(xmmCol0, ptr32[col]);
if ((cS>=2) && (m3&0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
if ((cS>=3) && (m3&0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
if ((cS>=4) && (m3&0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
if ((cS>=1) && (m3&0x000000ff)) xPSHUF.D(xmmCol0, xmmCol0, _v0);
}
//if (doMask||doMode) loadRowCol((nVifStruct&)v);
}
void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
pxAssumeDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking.");
int t = regX.Id ? 0 : 1; // Get Temp Reg
int cc = aMin(vCL, 3);
u32 m0 = (vB.mask >> (cc * 8)) & 0xff;
u32 m1 = m0 & 0xaa;
u32 m2 =(~m1>>1) & m0;
u32 m3 = (m1>>1) & ~m0;
u32 m4 = (m1>>1) & m0;
makeMergeMask(m2);
makeMergeMask(m3);
makeMergeMask(m4);
if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); } // Load Write Protect
if (doMask&&m2) { mergeVectors(regX.Id, xmmRow.Id, t, m2); } // Merge Row
if (doMask&&m3) { mergeVectors(regX.Id, xmmCol0.Id+cc, t, m3); } // Merge Col
if (doMask&&m4) { mergeVectors(regX.Id, xmmTemp.Id, t, m4); } // Merge Write Protect
if (doMode) {
u32 m5 = (~m1>>1) & ~m0;
if (!doMask) m5 = 0xf;
else makeMergeMask(m5);
if (m5 < 0xf) {
xPXOR(xmmTemp, xmmTemp);
mergeVectors(xmmTemp.Id, xmmRow.Id, t, m5);
xPADD.D(regX, xmmTemp);
if (doMode==2) mergeVectors(xmmRow.Id, regX.Id, t, m5);
}
else if (m5 == 0xf) {
xPADD.D(regX, xmmRow);
if (doMode==2) xMOVAPS(xmmRow, regX);
}
}
xMOVAPS(ptr32[dstIndirect], regX);
}
void VifUnpackSSE_Dynarec::writeBackRow() const {
u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
xMOVAPS(ptr32[row], xmmRow);
DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]");
// ToDo: Do we need to write back to vifregs.rX too!? :/
}
static void ShiftDisplacementWindow( xAddressInfo& addr, const xRegister32& modReg )
{
// Shifts the displacement factor of a given indirect address, so that the address
// remains in the optimal 0xf0 range (which allows for byte-form displacements when
// generating instructions).
int addImm = 0;
while( addr.Displacement >= 0x80 )
{
addImm += 0xf0;
addr -= 0xf0;
}
if(addImm) xADD(modReg, addImm);
}
static bool UsesTwoRegs[] =
{
true, true, true, true,
false, false, false, false,
false, false, false, false,
false, false, false, true,
};
void VifUnpackSSE_Dynarec::CompileRoutine() {
const int upkNum = v.vif->cmd & 0xf;
const u8& vift = nVifT[upkNum];
const int cycleSize = isFill ? vB.cl : vB.wl;
const int blockSize = isFill ? vB.wl : vB.cl;
const int skipSize = blockSize - cycleSize;
int vNum = v.vifRegs->num;
vCL = v.vif->cl;
doMode = upkNum == 0xf ? 0 : doMode;
SetMasks(cycleSize);
while (vNum) {
ShiftDisplacementWindow( srcIndirect, edx );
ShiftDisplacementWindow( dstIndirect, ecx );
if (vCL < cycleSize) {
xUnpack(upkNum);
xMovDest();
dstIndirect += 16;
srcIndirect += vift;
if( IsUnmaskedOp() ) {
++destReg;
++workReg;
}
vNum--;
if (++vCL == blockSize) vCL = 0;
}
else if (isFill) {
DevCon.WriteLn("filling mode!");
VifUnpackSSE_Dynarec fill( VifUnpackSSE_Dynarec::FillingWrite( *this ) );
fill.xUnpack(upkNum);
fill.xMovDest();
dstIndirect += 16;
vNum--;
if (++vCL == blockSize) vCL = 0;
}
else {
dstIndirect += (16 * skipSize);
vCL = 0;
}
}
if (doMode==2) writeBackRow();
xMOV(ptr32[&v.vif->cl], vCL);
xMOV(ptr32[&v.vifRegs->num], vNum);
xRET();
}
static _f u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isFill) {
u8* endPtr; // Check if we need to wrap around VU memory
u8* ptr = (u8*)(v.VU->Mem + (v.vif->tag.addr & v.vuMemLimit));
if (!isFill) { // Account for skip-cycles
int skipSize = cl - wl;
int blocks = _vBlock.num / wl;
int skips = (blocks * skipSize + _vBlock.num) * 16;
endPtr = ptr + skips;
}
else endPtr = ptr + (_vBlock.num * 16);
if ( endPtr > v.vuMemEnd ) {
DevCon.WriteLn("nVif - VU Mem Ptr Overflow; falling back to interpreter.");
ptr = NULL; // Fall Back to Interpreters which have wrap-around logic
}
return ptr;
}
static _f void dVifRecLimit(int idx) {
if (nVif[idx].recPtr > nVif[idx].recEnd) {
DevCon.WriteLn("nVif Rec - Out of Rec Cache! [%x > %x]", nVif[idx].recPtr, nVif[idx].recEnd);
nVif[idx].vifBlocks->clear();
nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
}
}
_f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
const nVifStruct& v = nVif[idx];
const u8 upkType = v.vif->cmd & 0x1f | ((!!v.vif->usn) << 5);
const int doMask = v.vif->cmd & 0x10;
const int cycle_cl = v.vifRegs->cycle.cl;
const int cycle_wl = v.vifRegs->cycle.wl;
const int cycleSize = isFill ? cycle_cl : cycle_wl;
const int blockSize = isFill ? cycle_wl : cycle_cl;
if (v.vif->cl >= blockSize) v.vif->cl = 0;
_vBlock.upkType = upkType;
_vBlock.num = (u8&)v.vifRegs->num;
_vBlock.mode = (u8&)v.vifRegs->mode;
_vBlock.scl = v.vif->cl;
_vBlock.cl = cycle_cl;
_vBlock.wl = cycle_wl;
// Zero out the mask parameter if it's unused -- games leave random junk
// values here which cause false recblock cache misses.
_vBlock.mask = doMask ? v.vifRegs->mask : 0;
if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
if (u8* dest = dVifsetVUptr(v, cycle_cl, cycle_wl, isFill)) {
//DevCon.WriteLn("Running Recompiled Block!");
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
}
else {
//DevCon.WriteLn("Running Interpreter Block");
_nVifUnpack(idx, data, size, isFill);
}
return;
}
DevCon.WriteLn("nVif%d: Recompiled Block! [%d]", idx, nVif[idx].numBlocks++);
//DevCon.WriteLn(L"[num=% 3d][upkType=0x%02x][scl=%d][cl=%d][wl=%d][mode=%d][m=%d][mask=%s]",
// _vBlock.num, _vBlock.upkType, _vBlock.scl, _vBlock.cl, _vBlock.wl, _vBlock.mode,
// doMask >> 4, doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
//);
xSetPtr(v.recPtr);
_vBlock.startPtr = (uptr)xGetAlignedCallTarget();
v.vifBlocks->add(_vBlock);
VifUnpackSSE_Dynarec( v, _vBlock ).CompileRoutine();
nVif[idx].recPtr = xGetPtr();
dVifRecLimit(idx);
// Run the block we just compiled. Various conditions may force us to still use
// the interpreter unpacker though, so a recursive call is the safest way here...
dVifUnpack(idx, data, size, isFill);
}
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
// newVif Dynarec - Dynamically Recompiles Vif 'unpack' Packets
// authors: cottonvibes(@gmail.com)
// Jake.Stine (@gmail.com)
#include "PrecompiledHeader.h"
#include "newVif_UnpackSSE.h"
static __aligned16 nVifBlock _vBlock = {0};
static __pagealigned u8 nVifMemCmp[__pagesize];
void dVifInit(int idx) {
nVif[idx].numBlocks = 0;
nVif[idx].vifCache = new BlockBuffer(_1mb*4); // 4mb Rec Cache
nVif[idx].vifBlocks = new HashBucket<_tParams>();
nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
nVif[idx].recEnd = &nVif[idx].recPtr[nVif[idx].vifCache->getSize()-(_1mb/4)]; // .25mb Safe Zone
}
void dVifClose(int idx) {
nVif[idx].numBlocks = 0;
safe_delete(nVif[idx].vifCache);
safe_delete(nVif[idx].vifBlocks);
}
VifUnpackSSE_Dynarec::VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
: v(vif_)
, vB(vifBlock_)
{
isFill = (vB.cl < vB.wl);
usn = (vB.upkType>>5) & 1;
doMask = (vB.upkType>>4) & 1;
doMode = vB.mode & 3;
}
#define makeMergeMask(x) { \
x = ((x&0x40)>>6) | ((x&0x10)>>3) | (x&4) | ((x&1)<<3); \
}
_f void VifUnpackSSE_Dynarec::SetMasks(int cS) const {
u32 m0 = vB.mask;
u32 m1 = m0 & 0xaaaaaaaa;
u32 m2 =(~m1>>1) & m0;
u32 m3 = (m1>>1) & ~m0;
u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
u32* col = (v.idx) ? g_vifmask.Col1 : g_vifmask.Col0;
if((m2&&doMask) || doMode) { xMOVAPS(xmmRow, ptr32[row]); }
if (m3&&doMask) {
xMOVAPS(xmmCol0, ptr32[col]);
if ((cS>=2) && (m3&0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
if ((cS>=3) && (m3&0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
if ((cS>=4) && (m3&0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
if ((cS>=1) && (m3&0x000000ff)) xPSHUF.D(xmmCol0, xmmCol0, _v0);
}
//if (doMask||doMode) loadRowCol((nVifStruct&)v);
}
void VifUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
pxAssumeDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking.");
int t = regX.Id ? 0 : 1; // Get Temp Reg
int cc = aMin(vCL, 3);
u32 m0 = (vB.mask >> (cc * 8)) & 0xff;
u32 m1 = m0 & 0xaa;
u32 m2 =(~m1>>1) & m0;
u32 m3 = (m1>>1) & ~m0;
u32 m4 = (m1>>1) & m0;
makeMergeMask(m2);
makeMergeMask(m3);
makeMergeMask(m4);
if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); } // Load Write Protect
if (doMask&&m2) { mergeVectors(regX.Id, xmmRow.Id, t, m2); } // Merge Row
if (doMask&&m3) { mergeVectors(regX.Id, xmmCol0.Id+cc, t, m3); } // Merge Col
if (doMask&&m4) { mergeVectors(regX.Id, xmmTemp.Id, t, m4); } // Merge Write Protect
if (doMode) {
u32 m5 = (~m1>>1) & ~m0;
if (!doMask) m5 = 0xf;
else makeMergeMask(m5);
if (m5 < 0xf) {
xPXOR(xmmTemp, xmmTemp);
mergeVectors(xmmTemp.Id, xmmRow.Id, t, m5);
xPADD.D(regX, xmmTemp);
if (doMode==2) mergeVectors(xmmRow.Id, regX.Id, t, m5);
}
else if (m5 == 0xf) {
xPADD.D(regX, xmmRow);
if (doMode==2) xMOVAPS(xmmRow, regX);
}
}
xMOVAPS(ptr32[dstIndirect], regX);
}
void VifUnpackSSE_Dynarec::writeBackRow() const {
u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
xMOVAPS(ptr32[row], xmmRow);
DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]");
// ToDo: Do we need to write back to vifregs.rX too!? :/
}
static void ShiftDisplacementWindow( xAddressInfo& addr, const xRegister32& modReg )
{
// Shifts the displacement factor of a given indirect address, so that the address
// remains in the optimal 0xf0 range (which allows for byte-form displacements when
// generating instructions).
int addImm = 0;
while( addr.Displacement >= 0x80 )
{
addImm += 0xf0;
addr -= 0xf0;
}
if(addImm) xADD(modReg, addImm);
}
static bool UsesTwoRegs[] =
{
true, true, true, true,
false, false, false, false,
false, false, false, false,
false, false, false, true,
};
void VifUnpackSSE_Dynarec::CompileRoutine() {
const int upkNum = v.vif->cmd & 0xf;
const u8& vift = nVifT[upkNum];
const int cycleSize = isFill ? vB.cl : vB.wl;
const int blockSize = isFill ? vB.wl : vB.cl;
const int skipSize = blockSize - cycleSize;
int vNum = v.vifRegs->num;
vCL = v.vif->cl;
doMode = upkNum == 0xf ? 0 : doMode;
SetMasks(cycleSize);
while (vNum) {
ShiftDisplacementWindow( srcIndirect, edx );
ShiftDisplacementWindow( dstIndirect, ecx );
if (vCL < cycleSize) {
xUnpack(upkNum);
xMovDest();
dstIndirect += 16;
srcIndirect += vift;
if( IsUnmaskedOp() ) {
++destReg;
++workReg;
}
vNum--;
if (++vCL == blockSize) vCL = 0;
}
else if (isFill) {
DevCon.WriteLn("filling mode!");
VifUnpackSSE_Dynarec fill( VifUnpackSSE_Dynarec::FillingWrite( *this ) );
fill.xUnpack(upkNum);
fill.xMovDest();
dstIndirect += 16;
vNum--;
if (++vCL == blockSize) vCL = 0;
}
else {
dstIndirect += (16 * skipSize);
vCL = 0;
}
}
if (doMode==2) writeBackRow();
xMOV(ptr32[&v.vif->cl], vCL);
xMOV(ptr32[&v.vifRegs->num], vNum);
xRET();
}
static _f u8* dVifsetVUptr(const nVifStruct& v, int cl, int wl, bool isFill) {
u8* endPtr; // Check if we need to wrap around VU memory
u8* ptr = (u8*)(v.VU->Mem + (v.vif->tag.addr & v.vuMemLimit));
if (!isFill) { // Account for skip-cycles
int skipSize = cl - wl;
int blocks = _vBlock.num / wl;
int skips = (blocks * skipSize + _vBlock.num) * 16;
endPtr = ptr + skips;
}
else endPtr = ptr + (_vBlock.num * 16);
if ( endPtr > v.vuMemEnd ) {
DevCon.WriteLn("nVif - VU Mem Ptr Overflow; falling back to interpreter.");
ptr = NULL; // Fall Back to Interpreters which have wrap-around logic
}
return ptr;
}
static _f void dVifRecLimit(int idx) {
if (nVif[idx].recPtr > nVif[idx].recEnd) {
DevCon.WriteLn("nVif Rec - Out of Rec Cache! [%x > %x]", nVif[idx].recPtr, nVif[idx].recEnd);
nVif[idx].vifBlocks->clear();
nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
}
}
_f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
const nVifStruct& v = nVif[idx];
const u8 upkType = v.vif->cmd & 0x1f | ((!!v.vif->usn) << 5);
const int doMask = v.vif->cmd & 0x10;
const int cycle_cl = v.vifRegs->cycle.cl;
const int cycle_wl = v.vifRegs->cycle.wl;
const int cycleSize = isFill ? cycle_cl : cycle_wl;
const int blockSize = isFill ? cycle_wl : cycle_cl;
if (v.vif->cl >= blockSize) v.vif->cl = 0;
_vBlock.upkType = upkType;
_vBlock.num = (u8&)v.vifRegs->num;
_vBlock.mode = (u8&)v.vifRegs->mode;
_vBlock.scl = v.vif->cl;
_vBlock.cl = cycle_cl;
_vBlock.wl = cycle_wl;
// Zero out the mask parameter if it's unused -- games leave random junk
// values here which cause false recblock cache misses.
_vBlock.mask = doMask ? v.vifRegs->mask : 0;
if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
if (u8* dest = dVifsetVUptr(v, cycle_cl, cycle_wl, isFill)) {
//DevCon.WriteLn("Running Recompiled Block!");
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
}
else {
//DevCon.WriteLn("Running Interpreter Block");
_nVifUnpack(idx, data, size, isFill);
}
return;
}
DevCon.WriteLn("nVif%d: Recompiled Block! [%d]", idx, nVif[idx].numBlocks++);
//DevCon.WriteLn(L"[num=% 3d][upkType=0x%02x][scl=%d][cl=%d][wl=%d][mode=%d][m=%d][mask=%s]",
// _vBlock.num, _vBlock.upkType, _vBlock.scl, _vBlock.cl, _vBlock.wl, _vBlock.mode,
// doMask >> 4, doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
//);
xSetPtr(v.recPtr);
_vBlock.startPtr = (uptr)xGetAlignedCallTarget();
v.vifBlocks->add(_vBlock);
VifUnpackSSE_Dynarec( v, _vBlock ).CompileRoutine();
nVif[idx].recPtr = xGetPtr();
dVifRecLimit(idx);
// Run the block we just compiled. Various conditions may force us to still use
// the interpreter unpacker though, so a recursive call is the safest way here...
dVifUnpack(idx, data, size, isFill);
}

View File

@ -19,9 +19,9 @@
#include "PrecompiledHeader.h"
#include "Common.h"
#include "VifDma_internal.h"
#include "VifDma.h"
#include "newVif.h"
#include "newVif_OldUnpack.inl"
#include "Vif_Unpack.inl"
__aligned16 nVifStruct nVif[2];
__aligned16 nVifCall nVifUpk[(2*2*16) *4]; // ([USN][Masking][Unpack Type]) [curCycle]

View File

@ -1,310 +1,310 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "VifUnpackSSE.h"
#define xMOV8(regX, loc) xMOVSSZX(regX, loc)
#define xMOV16(regX, loc) xMOVSSZX(regX, loc)
#define xMOV32(regX, loc) xMOVSSZX(regX, loc)
#define xMOV64(regX, loc) xMOVUPS(regX, loc)
#define xMOV128(regX, loc) xMOVUPS(regX, loc)
static __pagealigned u8 nVifUpkExec[__pagesize*4];
// Merges xmm vectors without modifying source reg
void mergeVectors(int dest, int src, int temp, int xyzw) {
if (x86caps.hasStreamingSIMD4Extensions || (xyzw==15)
|| (xyzw==12) || (xyzw==11) || (xyzw==8) || (xyzw==3)) {
mVUmergeRegs(dest, src, xyzw);
}
else {
SSE_MOVAPS_XMM_to_XMM(temp, src);
mVUmergeRegs(dest, temp, xyzw);
}
}
// Loads Row/Col Data from vifRegs instead of g_vifmask
// Useful for testing vifReg and g_vifmask inconsistency.
void loadRowCol(nVifStruct& v) {
xMOVAPS(xmm0, ptr32[&v.vifRegs->r0]);
xMOVAPS(xmm1, ptr32[&v.vifRegs->r1]);
xMOVAPS(xmm2, ptr32[&v.vifRegs->r2]);
xMOVAPS(xmm6, ptr32[&v.vifRegs->r3]);
xPSHUF.D(xmm0, xmm0, _v0);
xPSHUF.D(xmm1, xmm1, _v0);
xPSHUF.D(xmm2, xmm2, _v0);
xPSHUF.D(xmm6, xmm6, _v0);
mVUmergeRegs(XMM6, XMM0, 8);
mVUmergeRegs(XMM6, XMM1, 4);
mVUmergeRegs(XMM6, XMM2, 2);
xMOVAPS(xmm2, ptr32[&v.vifRegs->c0]);
xMOVAPS(xmm3, ptr32[&v.vifRegs->c1]);
xMOVAPS(xmm4, ptr32[&v.vifRegs->c2]);
xMOVAPS(xmm5, ptr32[&v.vifRegs->c3]);
xPSHUF.D(xmm2, xmm2, _v0);
xPSHUF.D(xmm3, xmm3, _v0);
xPSHUF.D(xmm4, xmm4, _v0);
xPSHUF.D(xmm5, xmm5, _v0);
}
// =====================================================================================================
// VifUnpackSSE_Base Section
// =====================================================================================================
VifUnpackSSE_Base::VifUnpackSSE_Base()
: dstIndirect(ecx) // parameter 1 of __fastcall
, srcIndirect(edx) // parameter 2 of __fastcall
, workReg( xmm1 )
, destReg( xmm0 )
{
}
void VifUnpackSSE_Base::xMovDest() const {
if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); }
else { doMaskWrite(destReg); }
}
void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const {
if (usn) { xPSRL.D(regX, n); }
else { xPSRA.D(regX, n); }
}
void VifUnpackSSE_Base::xPMOVXX8(const xRegisterSSE& regX) const {
if (usn) xPMOVZX.BD(regX, ptr32[srcIndirect]);
else xPMOVSX.BD(regX, ptr32[srcIndirect]);
}
void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const {
if (usn) xPMOVZX.WD(regX, ptr64[srcIndirect]);
else xPMOVSX.WD(regX, ptr64[srcIndirect]);
}
void VifUnpackSSE_Base::xUPK_S_32() const {
xMOV32 (workReg, ptr32[srcIndirect]);
xPSHUF.D (destReg, workReg, _v0);
}
void VifUnpackSSE_Base::xUPK_S_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (workReg);
}
else {
xMOV16 (workReg, ptr32[srcIndirect]);
xPUNPCK.LWD(workReg, workReg);
xShiftR (workReg, 16);
}
xPSHUF.D (destReg, workReg, _v0);
}
void VifUnpackSSE_Base::xUPK_S_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (workReg);
}
else {
xMOV8 (workReg, ptr32[srcIndirect]);
xPUNPCK.LBW(workReg, workReg);
xPUNPCK.LWD(workReg, workReg);
xShiftR (workReg, 24);
}
xPSHUF.D (destReg, workReg, _v0);
}
void VifUnpackSSE_Base::xUPK_V2_32() const {
xMOV64 (destReg, ptr32[srcIndirect]);
}
void VifUnpackSSE_Base::xUPK_V2_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (destReg);
}
else {
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
}
void VifUnpackSSE_Base::xUPK_V2_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (destReg);
}
else {
xMOV16 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
}
void VifUnpackSSE_Base::xUPK_V3_32() const {
xMOV128 (destReg, ptr32[srcIndirect]);
}
void VifUnpackSSE_Base::xUPK_V3_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (destReg);
}
else {
xMOV64 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
}
void VifUnpackSSE_Base::xUPK_V3_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (destReg);
}
else {
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
}
void VifUnpackSSE_Base::xUPK_V4_32() const {
xMOV128 (destReg, ptr32[srcIndirect]);
}
void VifUnpackSSE_Base::xUPK_V4_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (destReg);
}
else {
xMOV64 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
}
void VifUnpackSSE_Base::xUPK_V4_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (destReg);
}
else {
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
}
void VifUnpackSSE_Base::xUPK_V4_5() const {
xMOV16 (workReg, ptr32[srcIndirect]);
xPSHUF.D (workReg, workReg, _v0);
xPSLL.D (workReg, 3); // ABG|R5.000
xMOVAPS (destReg, workReg); // x|x|x|R
xPSRL.D (workReg, 8); // ABG
xPSLL.D (workReg, 3); // AB|G5.000
mVUmergeRegs(destReg.Id, workReg.Id, 0x4); // x|x|G|R
xPSRL.D (workReg, 8); // AB
xPSLL.D (workReg, 3); // A|B5.000
mVUmergeRegs(destReg.Id, workReg.Id, 0x2); // x|B|G|R
xPSRL.D (workReg, 8); // A
xPSLL.D (workReg, 7); // A.0000000
mVUmergeRegs(destReg.Id, workReg.Id, 0x1); // A|B|G|R
xPSLL.D (destReg, 24); // can optimize to
xPSRL.D (destReg, 24); // single AND...
}
void VifUnpackSSE_Base::xUnpack( int upknum ) const
{
switch( upknum )
{
case 0: xUPK_S_32(); break;
case 1: xUPK_S_16(); break;
case 2: xUPK_S_8(); break;
case 4: xUPK_V2_32(); break;
case 5: xUPK_V2_16(); break;
case 6: xUPK_V2_8(); break;
case 8: xUPK_V3_32(); break;
case 9: xUPK_V3_16(); break;
case 10: xUPK_V3_8(); break;
case 12: xUPK_V4_32(); break;
case 13: xUPK_V4_16(); break;
case 14: xUPK_V4_8(); break;
case 15: xUPK_V4_5(); break;
case 3:
case 7:
case 11:
pxFailRel( wxsFormat( L"Vpu/Vif - Invalid Unpack! [%d]", upknum ) );
break;
}
}
// =====================================================================================================
// VifUnpackSSE_Simple
// =====================================================================================================
VifUnpackSSE_Simple::VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_)
{
curCycle = curCycle_;
usn = usn_;
doMask = domask_;
}
void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const {
xMOVAPS(xmm7, ptr[dstIndirect]);
int offX = aMin(curCycle, 3);
xPAND(regX, ptr32[nVifMask[0][offX]]);
xPAND(xmm7, ptr32[nVifMask[1][offX]]);
xPOR (regX, ptr32[nVifMask[2][offX]]);
xPOR (regX, xmm7);
xMOVAPS(ptr[dstIndirect], regX);
}
// ecx = dest, edx = src
static void nVifGen(int usn, int mask, int curCycle) {
int usnpart = usn*2*16;
int maskpart = mask*16;
VifUnpackSSE_Simple vpugen( !!usn, !!mask, curCycle );
for( int i=0; i<16; ++i )
{
nVifCall& ucall( nVifUpk[((usnpart+maskpart+i) * 4) + curCycle] );
ucall = NULL;
if( nVifT[i] == 0 ) continue;
ucall = (nVifCall)xGetAlignedCallTarget();
vpugen.xUnpack(i);
vpugen.xMovDest();
xRET();
pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );
}
}
void VifUnpackSSE_Init()
{
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false);
memset8<0xcc>( nVifUpkExec );
xSetPtr( nVifUpkExec );
for (int a = 0; a < 2; a++) {
for (int b = 0; b < 2; b++) {
for (int c = 0; c < 4; c++) {
nVifGen(a, b, c);
}}}
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true);
}
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#include "PrecompiledHeader.h"
#include "newVif_UnpackSSE.h"
#define xMOV8(regX, loc) xMOVSSZX(regX, loc)
#define xMOV16(regX, loc) xMOVSSZX(regX, loc)
#define xMOV32(regX, loc) xMOVSSZX(regX, loc)
#define xMOV64(regX, loc) xMOVUPS(regX, loc)
#define xMOV128(regX, loc) xMOVUPS(regX, loc)
static __pagealigned u8 nVifUpkExec[__pagesize*4];
// Merges xmm vectors without modifying source reg
void mergeVectors(int dest, int src, int temp, int xyzw) {
if (x86caps.hasStreamingSIMD4Extensions || (xyzw==15)
|| (xyzw==12) || (xyzw==11) || (xyzw==8) || (xyzw==3)) {
mVUmergeRegs(dest, src, xyzw);
}
else {
SSE_MOVAPS_XMM_to_XMM(temp, src);
mVUmergeRegs(dest, temp, xyzw);
}
}
// Loads Row/Col Data from vifRegs instead of g_vifmask
// Useful for testing vifReg and g_vifmask inconsistency.
void loadRowCol(nVifStruct& v) {
xMOVAPS(xmm0, ptr32[&v.vifRegs->r0]);
xMOVAPS(xmm1, ptr32[&v.vifRegs->r1]);
xMOVAPS(xmm2, ptr32[&v.vifRegs->r2]);
xMOVAPS(xmm6, ptr32[&v.vifRegs->r3]);
xPSHUF.D(xmm0, xmm0, _v0);
xPSHUF.D(xmm1, xmm1, _v0);
xPSHUF.D(xmm2, xmm2, _v0);
xPSHUF.D(xmm6, xmm6, _v0);
mVUmergeRegs(XMM6, XMM0, 8);
mVUmergeRegs(XMM6, XMM1, 4);
mVUmergeRegs(XMM6, XMM2, 2);
xMOVAPS(xmm2, ptr32[&v.vifRegs->c0]);
xMOVAPS(xmm3, ptr32[&v.vifRegs->c1]);
xMOVAPS(xmm4, ptr32[&v.vifRegs->c2]);
xMOVAPS(xmm5, ptr32[&v.vifRegs->c3]);
xPSHUF.D(xmm2, xmm2, _v0);
xPSHUF.D(xmm3, xmm3, _v0);
xPSHUF.D(xmm4, xmm4, _v0);
xPSHUF.D(xmm5, xmm5, _v0);
}
// =====================================================================================================
// VifUnpackSSE_Base Section
// =====================================================================================================
VifUnpackSSE_Base::VifUnpackSSE_Base()
: dstIndirect(ecx) // parameter 1 of __fastcall
, srcIndirect(edx) // parameter 2 of __fastcall
, workReg( xmm1 )
, destReg( xmm0 )
{
}
void VifUnpackSSE_Base::xMovDest() const {
if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); }
else { doMaskWrite(destReg); }
}
void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const {
if (usn) { xPSRL.D(regX, n); }
else { xPSRA.D(regX, n); }
}
void VifUnpackSSE_Base::xPMOVXX8(const xRegisterSSE& regX) const {
if (usn) xPMOVZX.BD(regX, ptr32[srcIndirect]);
else xPMOVSX.BD(regX, ptr32[srcIndirect]);
}
void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const {
if (usn) xPMOVZX.WD(regX, ptr64[srcIndirect]);
else xPMOVSX.WD(regX, ptr64[srcIndirect]);
}
void VifUnpackSSE_Base::xUPK_S_32() const {
xMOV32 (workReg, ptr32[srcIndirect]);
xPSHUF.D (destReg, workReg, _v0);
}
void VifUnpackSSE_Base::xUPK_S_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (workReg);
}
else {
xMOV16 (workReg, ptr32[srcIndirect]);
xPUNPCK.LWD(workReg, workReg);
xShiftR (workReg, 16);
}
xPSHUF.D (destReg, workReg, _v0);
}
void VifUnpackSSE_Base::xUPK_S_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (workReg);
}
else {
xMOV8 (workReg, ptr32[srcIndirect]);
xPUNPCK.LBW(workReg, workReg);
xPUNPCK.LWD(workReg, workReg);
xShiftR (workReg, 24);
}
xPSHUF.D (destReg, workReg, _v0);
}
void VifUnpackSSE_Base::xUPK_V2_32() const {
xMOV64 (destReg, ptr32[srcIndirect]);
}
void VifUnpackSSE_Base::xUPK_V2_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (destReg);
}
else {
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
}
void VifUnpackSSE_Base::xUPK_V2_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (destReg);
}
else {
xMOV16 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
}
void VifUnpackSSE_Base::xUPK_V3_32() const {
xMOV128 (destReg, ptr32[srcIndirect]);
}
void VifUnpackSSE_Base::xUPK_V3_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (destReg);
}
else {
xMOV64 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
}
void VifUnpackSSE_Base::xUPK_V3_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (destReg);
}
else {
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
}
void VifUnpackSSE_Base::xUPK_V4_32() const {
xMOV128 (destReg, ptr32[srcIndirect]);
}
void VifUnpackSSE_Base::xUPK_V4_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (destReg);
}
else {
xMOV64 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
}
void VifUnpackSSE_Base::xUPK_V4_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (destReg);
}
else {
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
}
void VifUnpackSSE_Base::xUPK_V4_5() const {
xMOV16 (workReg, ptr32[srcIndirect]);
xPSHUF.D (workReg, workReg, _v0);
xPSLL.D (workReg, 3); // ABG|R5.000
xMOVAPS (destReg, workReg); // x|x|x|R
xPSRL.D (workReg, 8); // ABG
xPSLL.D (workReg, 3); // AB|G5.000
mVUmergeRegs(destReg.Id, workReg.Id, 0x4); // x|x|G|R
xPSRL.D (workReg, 8); // AB
xPSLL.D (workReg, 3); // A|B5.000
mVUmergeRegs(destReg.Id, workReg.Id, 0x2); // x|B|G|R
xPSRL.D (workReg, 8); // A
xPSLL.D (workReg, 7); // A.0000000
mVUmergeRegs(destReg.Id, workReg.Id, 0x1); // A|B|G|R
xPSLL.D (destReg, 24); // can optimize to
xPSRL.D (destReg, 24); // single AND...
}
void VifUnpackSSE_Base::xUnpack( int upknum ) const
{
switch( upknum )
{
case 0: xUPK_S_32(); break;
case 1: xUPK_S_16(); break;
case 2: xUPK_S_8(); break;
case 4: xUPK_V2_32(); break;
case 5: xUPK_V2_16(); break;
case 6: xUPK_V2_8(); break;
case 8: xUPK_V3_32(); break;
case 9: xUPK_V3_16(); break;
case 10: xUPK_V3_8(); break;
case 12: xUPK_V4_32(); break;
case 13: xUPK_V4_16(); break;
case 14: xUPK_V4_8(); break;
case 15: xUPK_V4_5(); break;
case 3:
case 7:
case 11:
pxFailRel( wxsFormat( L"Vpu/Vif - Invalid Unpack! [%d]", upknum ) );
break;
}
}
// =====================================================================================================
// VifUnpackSSE_Simple
// =====================================================================================================
VifUnpackSSE_Simple::VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_)
{
curCycle = curCycle_;
usn = usn_;
doMask = domask_;
}
void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const {
xMOVAPS(xmm7, ptr[dstIndirect]);
int offX = aMin(curCycle, 3);
xPAND(regX, ptr32[nVifMask[0][offX]]);
xPAND(xmm7, ptr32[nVifMask[1][offX]]);
xPOR (regX, ptr32[nVifMask[2][offX]]);
xPOR (regX, xmm7);
xMOVAPS(ptr[dstIndirect], regX);
}
// ecx = dest, edx = src
static void nVifGen(int usn, int mask, int curCycle) {
int usnpart = usn*2*16;
int maskpart = mask*16;
VifUnpackSSE_Simple vpugen( !!usn, !!mask, curCycle );
for( int i=0; i<16; ++i )
{
nVifCall& ucall( nVifUpk[((usnpart+maskpart+i) * 4) + curCycle] );
ucall = NULL;
if( nVifT[i] == 0 ) continue;
ucall = (nVifCall)xGetAlignedCallTarget();
vpugen.xUnpack(i);
vpugen.xMovDest();
xRET();
pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );
}
}
void VifUnpackSSE_Init()
{
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false);
memset8<0xcc>( nVifUpkExec );
xSetPtr( nVifUpkExec );
for (int a = 0; a < 2; a++) {
for (int b = 0; b < 2; b++) {
for (int c = 0; c < 4; c++) {
nVifGen(a, b, c);
}}}
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true);
}

View File

@ -1,145 +1,145 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "Common.h"
#include "VifDma_internal.h"
#include "newVif.h"
#include <xmmintrin.h>
#include <emmintrin.h>
using namespace x86Emitter;
extern void mergeVectors(int dest, int src, int temp, int xyzw);
extern void loadRowCol(nVifStruct& v);
// --------------------------------------------------------------------------------------
// VifUnpackSSE_Base
// --------------------------------------------------------------------------------------
class VifUnpackSSE_Base
{
public:
bool usn; // unsigned flag
bool doMask; // masking write enable flag
protected:
xAddressInfo dstIndirect;
xAddressInfo srcIndirect;
xRegisterSSE workReg;
xRegisterSSE destReg;
public:
VifUnpackSSE_Base();
virtual ~VifUnpackSSE_Base() throw() {}
virtual void xUnpack( int upktype ) const;
virtual bool IsUnmaskedOp() const=0;
virtual void xMovDest() const;
protected:
virtual void doMaskWrite(const xRegisterSSE& regX ) const=0;
virtual void xShiftR(const xRegisterSSE& regX, int n) const;
virtual void xPMOVXX8(const xRegisterSSE& regX) const;
virtual void xPMOVXX16(const xRegisterSSE& regX) const;
virtual void xUPK_S_32() const;
virtual void xUPK_S_16() const;
virtual void xUPK_S_8() const;
virtual void xUPK_V2_32() const;
virtual void xUPK_V2_16() const;
virtual void xUPK_V2_8() const;
virtual void xUPK_V3_32() const;
virtual void xUPK_V3_16() const;
virtual void xUPK_V3_8() const;
virtual void xUPK_V4_32() const;
virtual void xUPK_V4_16() const;
virtual void xUPK_V4_8() const;
virtual void xUPK_V4_5() const;
};
// --------------------------------------------------------------------------------------
// VifUnpackSSE_Simple
// --------------------------------------------------------------------------------------
class VifUnpackSSE_Simple : public VifUnpackSSE_Base
{
typedef VifUnpackSSE_Base _parent;
public:
int curCycle;
public:
VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_);
virtual ~VifUnpackSSE_Simple() throw() {}
virtual bool IsUnmaskedOp() const{ return !doMask; }
protected:
virtual void doMaskWrite(const xRegisterSSE& regX ) const;
};
// --------------------------------------------------------------------------------------
// VifUnpackSSE_Dynarec
// --------------------------------------------------------------------------------------
class VifUnpackSSE_Dynarec : public VifUnpackSSE_Base
{
typedef VifUnpackSSE_Base _parent;
public:
bool isFill;
int doMode; // two bit value representing... something!
protected:
const nVifStruct& v; // vif0 or vif1
const nVifBlock& vB; // some pre-collected data from VifStruct
int vCL; // internal copy of vif->cl
public:
VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_);
VifUnpackSSE_Dynarec(const VifUnpackSSE_Dynarec& src) // copy constructor
: _parent(src)
, v(src.v)
, vB(src.vB)
{
isFill = src.isFill;
vCL = src.vCL;
}
virtual ~VifUnpackSSE_Dynarec() throw() {}
virtual bool IsUnmaskedOp() const{ return !doMode && !doMask; }
void CompileRoutine();
protected:
virtual void doMaskWrite(const xRegisterSSE& regX) const;
void SetMasks(int cS) const;
void writeBackRow() const;
static VifUnpackSSE_Dynarec FillingWrite( const VifUnpackSSE_Dynarec& src )
{
VifUnpackSSE_Dynarec fillingWrite( src );
fillingWrite.doMask = true;
fillingWrite.doMode = 0;
return fillingWrite;
}
};
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2009 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "Common.h"
#include "VifDma.h"
#include "newVif.h"
#include <xmmintrin.h>
#include <emmintrin.h>
using namespace x86Emitter;
extern void mergeVectors(int dest, int src, int temp, int xyzw);
extern void loadRowCol(nVifStruct& v);
// --------------------------------------------------------------------------------------
// VifUnpackSSE_Base
// --------------------------------------------------------------------------------------
class VifUnpackSSE_Base
{
public:
bool usn; // unsigned flag
bool doMask; // masking write enable flag
protected:
xAddressInfo dstIndirect;
xAddressInfo srcIndirect;
xRegisterSSE workReg;
xRegisterSSE destReg;
public:
VifUnpackSSE_Base();
virtual ~VifUnpackSSE_Base() throw() {}
virtual void xUnpack( int upktype ) const;
virtual bool IsUnmaskedOp() const=0;
virtual void xMovDest() const;
protected:
virtual void doMaskWrite(const xRegisterSSE& regX ) const=0;
virtual void xShiftR(const xRegisterSSE& regX, int n) const;
virtual void xPMOVXX8(const xRegisterSSE& regX) const;
virtual void xPMOVXX16(const xRegisterSSE& regX) const;
virtual void xUPK_S_32() const;
virtual void xUPK_S_16() const;
virtual void xUPK_S_8() const;
virtual void xUPK_V2_32() const;
virtual void xUPK_V2_16() const;
virtual void xUPK_V2_8() const;
virtual void xUPK_V3_32() const;
virtual void xUPK_V3_16() const;
virtual void xUPK_V3_8() const;
virtual void xUPK_V4_32() const;
virtual void xUPK_V4_16() const;
virtual void xUPK_V4_8() const;
virtual void xUPK_V4_5() const;
};
// --------------------------------------------------------------------------------------
// VifUnpackSSE_Simple
// --------------------------------------------------------------------------------------
class VifUnpackSSE_Simple : public VifUnpackSSE_Base
{
typedef VifUnpackSSE_Base _parent;
public:
int curCycle;
public:
VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_);
virtual ~VifUnpackSSE_Simple() throw() {}
virtual bool IsUnmaskedOp() const{ return !doMask; }
protected:
virtual void doMaskWrite(const xRegisterSSE& regX ) const;
};
// --------------------------------------------------------------------------------------
// VifUnpackSSE_Dynarec
// --------------------------------------------------------------------------------------
class VifUnpackSSE_Dynarec : public VifUnpackSSE_Base
{
typedef VifUnpackSSE_Base _parent;
public:
bool isFill;
int doMode; // two bit value representing... something!
protected:
const nVifStruct& v; // vif0 or vif1
const nVifBlock& vB; // some pre-collected data from VifStruct
int vCL; // internal copy of vif->cl
public:
VifUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_);
VifUnpackSSE_Dynarec(const VifUnpackSSE_Dynarec& src) // copy constructor
: _parent(src)
, v(src.v)
, vB(src.vB)
{
isFill = src.isFill;
vCL = src.vCL;
}
virtual ~VifUnpackSSE_Dynarec() throw() {}
virtual bool IsUnmaskedOp() const{ return !doMode && !doMask; }
void CompileRoutine();
protected:
virtual void doMaskWrite(const xRegisterSSE& regX) const;
void SetMasks(int cS) const;
void writeBackRow() const;
static VifUnpackSSE_Dynarec FillingWrite( const VifUnpackSSE_Dynarec& src )
{
VifUnpackSSE_Dynarec fillingWrite( src );
fillingWrite.doMask = true;
fillingWrite.doMode = 0;
return fillingWrite;
}
};