mirror of https://github.com/PCSX2/pcsx2.git
* Significant optimizations to the VIFunpack interpreter (employs templated maskmode and cyclesize constants).
* Minor optimizations to newVifUnpackSSE, and more optimization notes. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2352 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
2b3b60511b
commit
f34f3ac0c4
|
@ -618,7 +618,6 @@ TraceLogFilters& SetTraceConfig();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define EE_CONST_PROP // rec2 - enables constant propagation (faster)
|
#define EE_CONST_PROP // rec2 - enables constant propagation (faster)
|
||||||
//#define NON_SSE_UNPACKS // Turns off SSE Unpacks (slower)
|
|
||||||
|
|
||||||
// Uncomment this if working on getting PS1 emulation working.
|
// Uncomment this if working on getting PS1 emulation working.
|
||||||
// This disables the exception normally caused by trying to load PS1
|
// This disables the exception normally caused by trying to load PS1
|
||||||
|
|
|
@ -311,6 +311,7 @@
|
||||||
<Unit filename="../Utilities/AsciiFile.h" />
|
<Unit filename="../Utilities/AsciiFile.h" />
|
||||||
<Unit filename="../Utilities/FileUtils.cpp" />
|
<Unit filename="../Utilities/FileUtils.cpp" />
|
||||||
<Unit filename="../Utilities/folderdesc.txt" />
|
<Unit filename="../Utilities/folderdesc.txt" />
|
||||||
|
<Unit filename="../VIFunpack.cpp" />
|
||||||
<Unit filename="../VU.h" />
|
<Unit filename="../VU.h" />
|
||||||
<Unit filename="../VU0.cpp" />
|
<Unit filename="../VU0.cpp" />
|
||||||
<Unit filename="../VU0micro.cpp" />
|
<Unit filename="../VU0micro.cpp" />
|
||||||
|
|
|
@ -0,0 +1,385 @@
|
||||||
|
/* PCSX2 - PS2 Emulator for PCs
|
||||||
|
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||||
|
*
|
||||||
|
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||||
|
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||||
|
* ation, either version 3 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||||
|
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE. See the GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
#include "PrecompiledHeader.h"
|
||||||
|
#include "Common.h"
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
#include "Vif.h"
|
||||||
|
#include "VifDma_internal.h"
|
||||||
|
|
||||||
|
enum UnpackOffset
|
||||||
|
{
|
||||||
|
OFFSET_X = 0,
|
||||||
|
OFFSET_Y = 1,
|
||||||
|
OFFSET_Z = 2,
|
||||||
|
OFFSET_W = 3
|
||||||
|
};
|
||||||
|
|
||||||
|
static __forceinline u32 setVifRowRegs(u32 reg, u32 data)
|
||||||
|
{
|
||||||
|
switch (reg)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
vifRegs->r0 = data;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
vifRegs->r1 = data;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
vifRegs->r2 = data;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
vifRegs->r3 = data;
|
||||||
|
break;
|
||||||
|
jNO_DEFAULT;
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline u32 getVifRowRegs(u32 reg)
|
||||||
|
{
|
||||||
|
switch (reg)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
return vifRegs->r0;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
return vifRegs->r1;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
return vifRegs->r2;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
return vifRegs->r3;
|
||||||
|
break;
|
||||||
|
jNO_DEFAULT;
|
||||||
|
}
|
||||||
|
return 0; // unreachable...
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline u32 setVifColRegs(u32 reg, u32 data)
|
||||||
|
{
|
||||||
|
switch (reg)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
vifRegs->c0 = data;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
vifRegs->c1 = data;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
vifRegs->c2 = data;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
vifRegs->c3 = data;
|
||||||
|
break;
|
||||||
|
jNO_DEFAULT;
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __forceinline u32 getVifColRegs(u32 reg)
|
||||||
|
{
|
||||||
|
switch (reg)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
return vifRegs->c0;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
return vifRegs->c1;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
return vifRegs->c2;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
return vifRegs->c3;
|
||||||
|
break;
|
||||||
|
jNO_DEFAULT;
|
||||||
|
}
|
||||||
|
return 0; // unreachable...
|
||||||
|
}
|
||||||
|
|
||||||
|
template< bool doMask >
|
||||||
|
static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
|
||||||
|
{
|
||||||
|
int n;
|
||||||
|
u32 vifRowReg = getVifRowRegs(offnum);
|
||||||
|
|
||||||
|
if (doMask)
|
||||||
|
{
|
||||||
|
switch (vif->cl)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
if (offnum == OFFSET_X)
|
||||||
|
n = (vifRegs->mask) & 0x3;
|
||||||
|
else
|
||||||
|
n = (vifRegs->mask >> (offnum * 2)) & 0x3;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else n = 0;
|
||||||
|
|
||||||
|
switch (n)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
if ((vif->cmd & 0x6F) == 0x6f)
|
||||||
|
{
|
||||||
|
dest = data;
|
||||||
|
}
|
||||||
|
else switch (vifRegs->mode)
|
||||||
|
{
|
||||||
|
case 1:
|
||||||
|
dest = data + vifRowReg;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
// vifRowReg isn't used after this, or I would make it equal to dest here.
|
||||||
|
dest = setVifRowRegs(offnum, vifRowReg + data);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
dest = data;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
dest = vifRowReg;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
dest = getVifColRegs((vif->cl > 2) ? 3 : vif->cl);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,vifRegs->mode,vifRegs->r0,data);
|
||||||
|
}
|
||||||
|
|
||||||
|
template < bool doMask, class T >
|
||||||
|
static __forceinline void __fastcall UNPACK_S(u32 *dest, T *data, int size)
|
||||||
|
{
|
||||||
|
//S-# will always be a complete packet, no matter what. So we can skip the offset bits
|
||||||
|
writeXYZW<doMask>(OFFSET_X, *dest++, *data);
|
||||||
|
writeXYZW<doMask>(OFFSET_Y, *dest++, *data);
|
||||||
|
writeXYZW<doMask>(OFFSET_Z, *dest++, *data);
|
||||||
|
writeXYZW<doMask>(OFFSET_W, *dest , *data);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool doMask, class T>
|
||||||
|
static __forceinline void __fastcall UNPACK_V2(u32 *dest, T *data, int size)
|
||||||
|
{
|
||||||
|
if (vifRegs->offset == OFFSET_X)
|
||||||
|
{
|
||||||
|
if (size > 0)
|
||||||
|
{
|
||||||
|
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
|
||||||
|
vifRegs->offset = OFFSET_Y;
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vifRegs->offset == OFFSET_Y)
|
||||||
|
{
|
||||||
|
if (size > 0)
|
||||||
|
{
|
||||||
|
writeXYZW<doMask>(vifRegs->offset, *dest++, *data);
|
||||||
|
vifRegs->offset = OFFSET_Z;
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vifRegs->offset == OFFSET_Z)
|
||||||
|
{
|
||||||
|
writeXYZW<doMask>(vifRegs->offset, *dest++, *dest-2);
|
||||||
|
vifRegs->offset = OFFSET_W;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vifRegs->offset == OFFSET_W)
|
||||||
|
{
|
||||||
|
writeXYZW<doMask>(vifRegs->offset, *dest, *data);
|
||||||
|
vifRegs->offset = OFFSET_X;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool doMask, class T>
|
||||||
|
static __forceinline void __fastcall UNPACK_V3(u32 *dest, T *data, int size)
|
||||||
|
{
|
||||||
|
if(vifRegs->offset == OFFSET_X)
|
||||||
|
{
|
||||||
|
if (size > 0)
|
||||||
|
{
|
||||||
|
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
|
||||||
|
vifRegs->offset = OFFSET_Y;
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(vifRegs->offset == OFFSET_Y)
|
||||||
|
{
|
||||||
|
if (size > 0)
|
||||||
|
{
|
||||||
|
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
|
||||||
|
vifRegs->offset = OFFSET_Z;
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(vifRegs->offset == OFFSET_Z)
|
||||||
|
{
|
||||||
|
if (size > 0)
|
||||||
|
{
|
||||||
|
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
|
||||||
|
vifRegs->offset = OFFSET_W;
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(vifRegs->offset == OFFSET_W)
|
||||||
|
{
|
||||||
|
//V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!)
|
||||||
|
//Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate
|
||||||
|
writeXYZW<doMask>(vifRegs->offset, *dest, *data);
|
||||||
|
vifRegs->offset = OFFSET_X;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool doMask, class T>
|
||||||
|
static __forceinline void __fastcall UNPACK_V4(u32 *dest, T *data , int size)
|
||||||
|
{
|
||||||
|
while (size > 0)
|
||||||
|
{
|
||||||
|
writeXYZW<doMask>(vifRegs->offset, *dest++, *data++);
|
||||||
|
vifRegs->offset++;
|
||||||
|
size--;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X;
|
||||||
|
}
|
||||||
|
|
||||||
|
template< bool doMask >
|
||||||
|
static __releaseinline void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size)
|
||||||
|
{
|
||||||
|
//As with S-#, this will always be a complete packet
|
||||||
|
writeXYZW<doMask>(OFFSET_X, *dest++, ((*data & 0x001f) << 3));
|
||||||
|
writeXYZW<doMask>(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2));
|
||||||
|
writeXYZW<doMask>(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7));
|
||||||
|
writeXYZW<doMask>(OFFSET_W, *dest, ((*data & 0x8000) >> 8));
|
||||||
|
}
|
||||||
|
|
||||||
|
// =====================================================================================================
|
||||||
|
|
||||||
|
template < bool doMask, int size, class T >
|
||||||
|
static void __fastcall fUNPACK_S(u32 *dest, T *data)
|
||||||
|
{
|
||||||
|
UNPACK_S<doMask>( dest, data, size );
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool doMask, int size, class T>
|
||||||
|
static void __fastcall fUNPACK_V2(u32 *dest, T *data)
|
||||||
|
{
|
||||||
|
UNPACK_V2<doMask>( dest, data, size );
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool doMask, int size, class T>
|
||||||
|
static void __fastcall fUNPACK_V3(u32 *dest, T *data)
|
||||||
|
{
|
||||||
|
UNPACK_V3<doMask>( dest, data, size );
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool doMask, int size, class T>
|
||||||
|
static void __fastcall fUNPACK_V4(u32 *dest, T *data)
|
||||||
|
{
|
||||||
|
UNPACK_V4<doMask>( dest, data, size );
|
||||||
|
}
|
||||||
|
|
||||||
|
template< bool doMask >
|
||||||
|
static void __fastcall fUNPACK_V4_5(u32 *dest, u32 *data)
|
||||||
|
{
|
||||||
|
UNPACK_V4_5<doMask>(dest, data, 0); // size is ignored.
|
||||||
|
}
|
||||||
|
|
||||||
|
#define _upk (UNPACKFUNCTYPE)
|
||||||
|
#define _odd (UNPACKFUNCTYPE_ODD)
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// Main table for function unpacking.
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// The extra data bsize/dsize/etc are all duplicated between the doMask enabled and
|
||||||
|
// disabled versions. This is probably simpler and more efficient than bothering
|
||||||
|
// to generate separate tables.
|
||||||
|
|
||||||
|
// 32-bits versions are unsigned-only!!
|
||||||
|
#define UnpackFuncPair32( sizefac, vt, doMask ) \
|
||||||
|
_upk fUNPACK_##vt<doMask, sizefac, u32>, \
|
||||||
|
_upk fUNPACK_##vt<doMask, sizefac, u32>, \
|
||||||
|
_odd UNPACK_##vt<doMask, u32>, \
|
||||||
|
_odd UNPACK_##vt<doMask, u32>,
|
||||||
|
|
||||||
|
#define UnpackFuncPair( sizefac, vt, bits, doMask ) \
|
||||||
|
_upk fUNPACK_##vt<doMask, sizefac, u##bits>, \
|
||||||
|
_upk fUNPACK_##vt<doMask, sizefac, s##bits>, \
|
||||||
|
_odd UNPACK_##vt<doMask, u##bits>, \
|
||||||
|
_odd UNPACK_##vt<doMask, s##bits>,
|
||||||
|
|
||||||
|
#define UnpackFuncSet( doMask ) \
|
||||||
|
{ UnpackFuncPair32( 4, S, doMask ) /* 0x0 - S-32 */ \
|
||||||
|
1, 4, 4, 4 }, \
|
||||||
|
{ UnpackFuncPair ( 4, S, 16, doMask ) /* 0x1 - S-16 */ \
|
||||||
|
2, 2, 2, 4 }, \
|
||||||
|
{ UnpackFuncPair ( 4, S, 8, doMask ) /* 0x2 - S-8 */ \
|
||||||
|
4, 1, 1, 4 }, \
|
||||||
|
{ NULL, NULL, NULL, NULL, 0, 0, 0, 0 }, /* 0x3 (NULL) */ \
|
||||||
|
\
|
||||||
|
{ UnpackFuncPair32( 2, V2, doMask ) /* 0x4 - V2-32 */ \
|
||||||
|
24, 4, 8, 2 }, \
|
||||||
|
{ UnpackFuncPair ( 2, V2, 16, doMask ) /* 0x5 - V2-16 */ \
|
||||||
|
12, 2, 4, 2 }, \
|
||||||
|
{ UnpackFuncPair ( 2, V2, 8, doMask ) /* 0x6 - V2-8 */ \
|
||||||
|
6, 1, 2, 2 }, \
|
||||||
|
{ NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0x7 (NULL) */ \
|
||||||
|
\
|
||||||
|
{ UnpackFuncPair32( 3, V3, doMask ) /* 0x8 - V3-32 */ \
|
||||||
|
36, 4, 12, 3 }, \
|
||||||
|
{ UnpackFuncPair ( 3, V3, 16, doMask ) /* 0x9 - V3-16 */ \
|
||||||
|
18, 2, 6, 3 }, \
|
||||||
|
{ UnpackFuncPair ( 3, V3, 8, doMask ) /* 0xA - V3-8 */ \
|
||||||
|
9, 1, 3, 3 }, \
|
||||||
|
{ NULL, NULL, NULL, NULL,0, 0, 0, 0 }, /* 0xB (NULL) */ \
|
||||||
|
\
|
||||||
|
{ UnpackFuncPair32( 4, V4, doMask ) /* 0xC - V4-32 */ \
|
||||||
|
48, 4, 16, 4 }, \
|
||||||
|
{ UnpackFuncPair ( 4, V4, 16, doMask ) /* 0xD - V4-16 */ \
|
||||||
|
24, 2, 8, 4 }, \
|
||||||
|
{ UnpackFuncPair ( 4, V4, 8, doMask ) /* 0xE - V4-8 */ \
|
||||||
|
12, 1, 4, 4 }, \
|
||||||
|
{ /* 0xF - V4-5 */ \
|
||||||
|
_upk fUNPACK_V4_5<doMask>, _upk fUNPACK_V4_5<doMask>, \
|
||||||
|
_odd UNPACK_V4_5<doMask>, _odd UNPACK_V4_5<doMask>, \
|
||||||
|
6, 2, 2, 4 },
|
||||||
|
|
||||||
|
const __aligned16 VIFUnpackFuncTable VIFfuncTable[32] =
|
||||||
|
{
|
||||||
|
UnpackFuncSet( false )
|
||||||
|
UnpackFuncSet( true )
|
||||||
|
};
|
381
pcsx2/Vif.cpp
381
pcsx2/Vif.cpp
|
@ -18,7 +18,6 @@
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <assert.h>
|
|
||||||
|
|
||||||
#include "Vif.h"
|
#include "Vif.h"
|
||||||
#include "VifDma.h"
|
#include "VifDma.h"
|
||||||
|
@ -33,386 +32,6 @@ __aligned16 VifMaskTypes g_vifmask;
|
||||||
|
|
||||||
extern int g_vifCycles;
|
extern int g_vifCycles;
|
||||||
|
|
||||||
enum UnpackOffset
|
|
||||||
{
|
|
||||||
OFFSET_X = 0,
|
|
||||||
OFFSET_Y = 1,
|
|
||||||
OFFSET_Z = 2,
|
|
||||||
OFFSET_W = 3
|
|
||||||
};
|
|
||||||
|
|
||||||
static __forceinline u32 setVifRowRegs(u32 reg, u32 data)
|
|
||||||
{
|
|
||||||
switch (reg)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
vifRegs->r0 = data;
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
vifRegs->r1 = data;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
vifRegs->r2 = data;
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
vifRegs->r3 = data;
|
|
||||||
break;
|
|
||||||
jNO_DEFAULT;
|
|
||||||
}
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 getVifRowRegs(u32 reg)
|
|
||||||
{
|
|
||||||
switch (reg)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
return vifRegs->r0;
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
return vifRegs->r1;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
return vifRegs->r2;
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
return vifRegs->r3;
|
|
||||||
break;
|
|
||||||
jNO_DEFAULT;
|
|
||||||
}
|
|
||||||
return 0; // unreachable...
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 setVifColRegs(u32 reg, u32 data)
|
|
||||||
{
|
|
||||||
switch (reg)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
vifRegs->c0 = data;
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
vifRegs->c1 = data;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
vifRegs->c2 = data;
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
vifRegs->c3 = data;
|
|
||||||
break;
|
|
||||||
jNO_DEFAULT;
|
|
||||||
}
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline u32 getVifColRegs(u32 reg)
|
|
||||||
{
|
|
||||||
switch (reg)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
return vifRegs->c0;
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
return vifRegs->c1;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
return vifRegs->c2;
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
return vifRegs->c3;
|
|
||||||
break;
|
|
||||||
jNO_DEFAULT;
|
|
||||||
}
|
|
||||||
return 0; // unreachable...
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static __releaseinline void writeXYZW(u32 offnum, u32 &dest, u32 data)
|
|
||||||
{
|
|
||||||
int n;
|
|
||||||
u32 vifRowReg = getVifRowRegs(offnum);
|
|
||||||
|
|
||||||
if (vifRegs->code & 0x10000000)
|
|
||||||
{
|
|
||||||
switch (vif->cl)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
if (offnum == OFFSET_X)
|
|
||||||
n = (vifRegs->mask) & 0x3;
|
|
||||||
else
|
|
||||||
n = (vifRegs->mask >> (offnum * 2)) & 0x3;
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
n = (vifRegs->mask >> ( 8 + (offnum * 2))) & 0x3;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
n = (vifRegs->mask >> (16 + (offnum * 2))) & 0x3;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
n = (vifRegs->mask >> (24 + (offnum * 2))) & 0x3;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else n = 0;
|
|
||||||
|
|
||||||
switch (n)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
if ((vif->cmd & 0x6F) == 0x6f)
|
|
||||||
{
|
|
||||||
dest = data;
|
|
||||||
}
|
|
||||||
else switch (vifRegs->mode)
|
|
||||||
{
|
|
||||||
case 1:
|
|
||||||
dest = data + vifRowReg;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
// vifRowReg isn't used after this, or I would make it equal to dest here.
|
|
||||||
dest = setVifRowRegs(offnum, vifRowReg + data);
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
dest = data;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
dest = vifRowReg;
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
dest = getVifColRegs((vif->cl > 2) ? 3 : vif->cl);
|
|
||||||
break;
|
|
||||||
case 3:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// VIF_LOG("writeX %8.8x : Mode %d, r0 = %x, data %8.8x", *dest,vifRegs->mode,vifRegs->r0,data);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
void __fastcall UNPACK_S(u32 *dest, T *data, int size)
|
|
||||||
{
|
|
||||||
//S-# will always be a complete packet, no matter what. So we can skip the offset bits
|
|
||||||
writeXYZW(OFFSET_X, *dest++, *data);
|
|
||||||
writeXYZW(OFFSET_Y, *dest++, *data);
|
|
||||||
writeXYZW(OFFSET_Z, *dest++, *data);
|
|
||||||
writeXYZW(OFFSET_W, *dest , *data);
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
void __fastcall UNPACK_V2(u32 *dest, T *data, int size)
|
|
||||||
{
|
|
||||||
if (vifRegs->offset == OFFSET_X)
|
|
||||||
{
|
|
||||||
if (size > 0)
|
|
||||||
{
|
|
||||||
writeXYZW(vifRegs->offset, *dest++, *data++);
|
|
||||||
vifRegs->offset = OFFSET_Y;
|
|
||||||
size--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vifRegs->offset == OFFSET_Y)
|
|
||||||
{
|
|
||||||
if (size > 0)
|
|
||||||
{
|
|
||||||
writeXYZW(vifRegs->offset, *dest++, *data);
|
|
||||||
vifRegs->offset = OFFSET_Z;
|
|
||||||
size--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vifRegs->offset == OFFSET_Z)
|
|
||||||
{
|
|
||||||
writeXYZW(vifRegs->offset, *dest++, *dest-2);
|
|
||||||
vifRegs->offset = OFFSET_W;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vifRegs->offset == OFFSET_W)
|
|
||||||
{
|
|
||||||
writeXYZW(vifRegs->offset, *dest, *data);
|
|
||||||
vifRegs->offset = OFFSET_X;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
void __fastcall UNPACK_V3(u32 *dest, T *data, int size)
|
|
||||||
{
|
|
||||||
if(vifRegs->offset == OFFSET_X)
|
|
||||||
{
|
|
||||||
if (size > 0)
|
|
||||||
{
|
|
||||||
writeXYZW(vifRegs->offset, *dest++, *data++);
|
|
||||||
vifRegs->offset = OFFSET_Y;
|
|
||||||
size--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(vifRegs->offset == OFFSET_Y)
|
|
||||||
{
|
|
||||||
if (size > 0)
|
|
||||||
{
|
|
||||||
writeXYZW(vifRegs->offset, *dest++, *data++);
|
|
||||||
vifRegs->offset = OFFSET_Z;
|
|
||||||
size--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(vifRegs->offset == OFFSET_Z)
|
|
||||||
{
|
|
||||||
if (size > 0)
|
|
||||||
{
|
|
||||||
writeXYZW(vifRegs->offset, *dest++, *data++);
|
|
||||||
vifRegs->offset = OFFSET_W;
|
|
||||||
size--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(vifRegs->offset == OFFSET_W)
|
|
||||||
{
|
|
||||||
//V3-# does some bizzare thing with alignment, every 6qw of data the W becomes 0 (strange console!)
|
|
||||||
//Ape Escape doesnt seem to like it tho (what the hell?) gonna have to investigate
|
|
||||||
writeXYZW(vifRegs->offset, *dest, *data);
|
|
||||||
vifRegs->offset = OFFSET_X;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class T>
|
|
||||||
void __fastcall UNPACK_V4(u32 *dest, T *data , int size)
|
|
||||||
{
|
|
||||||
while (size > 0)
|
|
||||||
{
|
|
||||||
writeXYZW(vifRegs->offset, *dest++, *data++);
|
|
||||||
vifRegs->offset++;
|
|
||||||
size--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (vifRegs->offset > OFFSET_W) vifRegs->offset = OFFSET_X;
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V4_5(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
//As with S-#, this will always be a complete packet
|
|
||||||
writeXYZW(OFFSET_X, *dest++, ((*data & 0x001f) << 3));
|
|
||||||
writeXYZW(OFFSET_Y, *dest++, ((*data & 0x03e0) >> 2));
|
|
||||||
writeXYZW(OFFSET_Z, *dest++, ((*data & 0x7c00) >> 7));
|
|
||||||
writeXYZW(OFFSET_W, *dest, ((*data & 0x8000) >> 8));
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_S_32(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
UNPACK_S(dest, data, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_S_16s(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
s16 *sdata = (s16*)data;
|
|
||||||
UNPACK_S(dest, sdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_S_16u(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
u16 *sdata = (u16*)data;
|
|
||||||
UNPACK_S(dest, sdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_S_8s(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
s8 *cdata = (s8*)data;
|
|
||||||
UNPACK_S(dest, cdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_S_8u(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
u8 *cdata = (u8*)data;
|
|
||||||
UNPACK_S(dest, cdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V2_32(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
UNPACK_V2(dest, data, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V2_16s(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
s16 *sdata = (s16*)data;
|
|
||||||
UNPACK_V2(dest, sdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V2_16u(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
u16 *sdata = (u16*)data;
|
|
||||||
UNPACK_V2(dest, sdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V2_8s(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
s8 *cdata = (s8*)data;
|
|
||||||
UNPACK_V2(dest, cdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V2_8u(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
u8 *cdata = (u8*)data;
|
|
||||||
UNPACK_V2(dest, cdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V3_32(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
UNPACK_V3(dest, data, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V3_16s(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
s16 *sdata = (s16*)data;
|
|
||||||
UNPACK_V3(dest, sdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V3_16u(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
u16 *sdata = (u16*)data;
|
|
||||||
UNPACK_V3(dest, sdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V3_8s(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
s8 *cdata = (s8*)data;
|
|
||||||
UNPACK_V3(dest, cdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V3_8u(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
u8 *cdata = (u8*)data;
|
|
||||||
UNPACK_V3(dest, cdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V4_32(u32 *dest, u32 *data , int size)
|
|
||||||
{
|
|
||||||
UNPACK_V4(dest, data, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V4_16s(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
s16 *sdata = (s16*)data;
|
|
||||||
UNPACK_V4(dest, sdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V4_16u(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
u16 *sdata = (u16*)data;
|
|
||||||
UNPACK_V4(dest, sdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V4_8s(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
s8 *cdata = (s8*)data;
|
|
||||||
UNPACK_V4(dest, cdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V4_8u(u32 *dest, u32 *data, int size)
|
|
||||||
{
|
|
||||||
u8 *cdata = (u8*)data;
|
|
||||||
UNPACK_V4(dest, cdata, size);
|
|
||||||
}
|
|
||||||
|
|
||||||
static __forceinline bool mfifoVIF1rbTransfer()
|
static __forceinline bool mfifoVIF1rbTransfer()
|
||||||
{
|
{
|
||||||
u32 maddr = dmacRegs->rbor.ADDR;
|
u32 maddr = dmacRegs->rbor.ADDR;
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
#include "PrecompiledHeader.h"
|
#include "PrecompiledHeader.h"
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
|
|
||||||
#include "VifDma.h"
|
|
||||||
#include "VifDma_internal.h"
|
#include "VifDma_internal.h"
|
||||||
|
|
||||||
#include "VUmicro.h"
|
#include "VUmicro.h"
|
||||||
|
|
|
@ -17,7 +17,6 @@
|
||||||
#include "PrecompiledHeader.h"
|
#include "PrecompiledHeader.h"
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
|
|
||||||
#include "VifDma.h"
|
|
||||||
#include "VifDma_internal.h"
|
#include "VifDma_internal.h"
|
||||||
|
|
||||||
#include "GS.h"
|
#include "GS.h"
|
||||||
|
|
170
pcsx2/VifDma.cpp
170
pcsx2/VifDma.cpp
|
@ -16,7 +16,6 @@
|
||||||
#include "PrecompiledHeader.h"
|
#include "PrecompiledHeader.h"
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
|
|
||||||
#include "VifDma.h"
|
|
||||||
#include "VifDma_internal.h"
|
#include "VifDma_internal.h"
|
||||||
#include "VUmicro.h"
|
#include "VUmicro.h"
|
||||||
|
|
||||||
|
@ -32,53 +31,9 @@ extern "C"
|
||||||
extern u32* vifRow;
|
extern u32* vifRow;
|
||||||
}
|
}
|
||||||
|
|
||||||
extern vifStruct *vif;
|
|
||||||
|
|
||||||
int g_vifCycles = 0;
|
int g_vifCycles = 0;
|
||||||
u8 s_maskwrite[256];
|
u8 s_maskwrite[256];
|
||||||
|
|
||||||
/* block size; data size; group size; qword size; */
|
|
||||||
#define _UNPACK_TABLE32(name, bsize, dsize, gsize, qsize) \
|
|
||||||
{ UNPACK_##name, UNPACK_##name, \
|
|
||||||
bsize, dsize, gsize, qsize },
|
|
||||||
|
|
||||||
#define _UNPACK_TABLE(name, bsize, dsize, gsize, qsize) \
|
|
||||||
{ UNPACK_##name##u, UNPACK_##name##s, \
|
|
||||||
bsize, dsize, gsize, qsize },
|
|
||||||
|
|
||||||
// Main table for function unpacking
|
|
||||||
const VIFUnpackFuncTable VIFfuncTable[16] =
|
|
||||||
{
|
|
||||||
_UNPACK_TABLE32(S_32, 1, 4, 4, 4) // 0x0 - S-32
|
|
||||||
_UNPACK_TABLE(S_16, 2, 2, 2, 4) // 0x1 - S-16
|
|
||||||
_UNPACK_TABLE(S_8, 4, 1, 1, 4) // 0x2 - S-8
|
|
||||||
{
|
|
||||||
NULL, NULL, 0, 0, 0, 0
|
|
||||||
}
|
|
||||||
, // 0x3
|
|
||||||
|
|
||||||
_UNPACK_TABLE32(V2_32, 24, 4, 8, 2) // 0x4 - V2-32
|
|
||||||
_UNPACK_TABLE(V2_16, 12, 2, 4, 2) // 0x5 - V2-16
|
|
||||||
_UNPACK_TABLE(V2_8, 6, 1, 2, 2) // 0x6 - V2-8
|
|
||||||
{
|
|
||||||
NULL, NULL, 0, 0, 0, 0
|
|
||||||
}
|
|
||||||
, // 0x7
|
|
||||||
|
|
||||||
_UNPACK_TABLE32(V3_32, 36, 4, 12, 3) // 0x8 - V3-32
|
|
||||||
_UNPACK_TABLE(V3_16, 18, 2, 6, 3) // 0x9 - V3-16
|
|
||||||
_UNPACK_TABLE(V3_8, 9, 1, 3, 3) // 0xA - V3-8
|
|
||||||
{
|
|
||||||
NULL, NULL, 0, 0, 0, 0
|
|
||||||
}
|
|
||||||
, // 0xB
|
|
||||||
|
|
||||||
_UNPACK_TABLE32(V4_32, 48, 4, 16, 4) // 0xC - V4-32
|
|
||||||
_UNPACK_TABLE(V4_16, 24, 2, 8, 4) // 0xD - V4-16
|
|
||||||
_UNPACK_TABLE(V4_8, 12, 1, 4, 4) // 0xE - V4-8
|
|
||||||
_UNPACK_TABLE32(V4_5, 6, 2, 2, 4) // 0xF - V4-5
|
|
||||||
};
|
|
||||||
|
|
||||||
struct VIFSSEUnpackTable
|
struct VIFSSEUnpackTable
|
||||||
{
|
{
|
||||||
// regular 0, 1, 2; mask 0, 1, 2
|
// regular 0, 1, 2; mask 0, 1, 2
|
||||||
|
@ -171,6 +126,9 @@ template<const u32 VIFdmanum> void ProcessMemSkip(u32 size, u32 unpackType)
|
||||||
{
|
{
|
||||||
const VIFUnpackFuncTable *unpack;
|
const VIFUnpackFuncTable *unpack;
|
||||||
|
|
||||||
|
// unpackType is only 0->0xf but that's ok, because the data we're using here is
|
||||||
|
// just duplicated in 0x10->0x1f.
|
||||||
|
|
||||||
unpack = &VIFfuncTable[ unpackType ];
|
unpack = &VIFfuncTable[ unpackType ];
|
||||||
|
|
||||||
switch (unpackType)
|
switch (unpackType)
|
||||||
|
@ -259,9 +217,6 @@ template u32 VIFalign<1>(u32 *data, vifCode *v, u32 size);
|
||||||
template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size)
|
template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size)
|
||||||
{
|
{
|
||||||
u32 *dest;
|
u32 *dest;
|
||||||
u32 unpackType;
|
|
||||||
UNPACKFUNCTYPE func;
|
|
||||||
const VIFUnpackFuncTable *ft;
|
|
||||||
VURegs * VU;
|
VURegs * VU;
|
||||||
u8 *cdata = (u8*)data;
|
u8 *cdata = (u8*)data;
|
||||||
|
|
||||||
|
@ -290,11 +245,8 @@ template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size)
|
||||||
VIF_LOG("VIF%d UNPACK Align: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
|
VIF_LOG("VIF%d UNPACK Align: Mode=%x, v->size=%d, size=%d, v->addr=%x v->num=%x",
|
||||||
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
|
VIFdmanum, v->cmd & 0xf, v->size, size, v->addr, vifRegs->num);
|
||||||
|
|
||||||
// The unpack type
|
const VIFUnpackFuncTable& ft( VIFfuncTable[ v->cmd & 0x1f ] );
|
||||||
unpackType = v->cmd & 0xf;
|
UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS;
|
||||||
|
|
||||||
ft = &VIFfuncTable[ unpackType ];
|
|
||||||
func = vif->usn ? ft->funcU : ft->funcS;
|
|
||||||
|
|
||||||
size <<= 2;
|
size <<= 2;
|
||||||
memsize = size;
|
memsize = size;
|
||||||
|
@ -311,17 +263,17 @@ template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size)
|
||||||
|
|
||||||
VIFUNPACK_LOG("Aligning packet size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
VIFUNPACK_LOG("Aligning packet size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
||||||
|
|
||||||
if (((u32)size / (u32)ft->dsize) < ((u32)ft->qsize - vifRegs->offset))
|
if (((u32)size / (u32)ft.dsize) < ((u32)ft.qsize - vifRegs->offset))
|
||||||
{
|
{
|
||||||
DevCon.Error("Wasn't enough left size/dsize = %x left to write %x", (size / ft->dsize), (ft->qsize - vifRegs->offset));
|
DevCon.Error("Wasn't enough left size/dsize = %x left to write %x", (size / ft.dsize), (ft.qsize - vifRegs->offset));
|
||||||
}
|
}
|
||||||
unpacksize = min((size / ft->dsize), (ft->qsize - vifRegs->offset));
|
unpacksize = min((size / ft.dsize), (ft.qsize - vifRegs->offset));
|
||||||
|
|
||||||
|
|
||||||
VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft->qsize) + unpacksize, vifRegs->offset);
|
VIFUNPACK_LOG("Increasing dest by %x from offset %x", (4 - ft.qsize) + unpacksize, vifRegs->offset);
|
||||||
|
|
||||||
func(dest, (u32*)cdata, unpacksize);
|
(vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, unpacksize);
|
||||||
size -= unpacksize * ft->dsize;
|
size -= unpacksize * ft.dsize;
|
||||||
|
|
||||||
if(vifRegs->offset == 0)
|
if(vifRegs->offset == 0)
|
||||||
{
|
{
|
||||||
|
@ -339,13 +291,13 @@ template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size)
|
||||||
{
|
{
|
||||||
if (vifRegs->cycle.cl != vifRegs->cycle.wl)
|
if (vifRegs->cycle.cl != vifRegs->cycle.wl)
|
||||||
{
|
{
|
||||||
vif->tag.addr += (((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + ((4 - ft->qsize) + unpacksize)) * 4;
|
vif->tag.addr += (((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + ((4 - ft.qsize) + unpacksize)) * 4;
|
||||||
dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + (4 - ft->qsize) + unpacksize;
|
dest += ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + (4 - ft.qsize) + unpacksize;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4;
|
vif->tag.addr += ((4 - ft.qsize) + unpacksize) * 4;
|
||||||
dest += (4 - ft->qsize) + unpacksize;
|
dest += (4 - ft.qsize) + unpacksize;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vif->tag.addr >= (u32)vif_size(VIFdmanum))
|
if (vif->tag.addr >= (u32)vif_size(VIFdmanum))
|
||||||
|
@ -354,7 +306,7 @@ template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size)
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
dest = (u32*)(VU->Mem + v->addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
cdata += unpacksize * ft->dsize;
|
cdata += unpacksize * ft.dsize;
|
||||||
vif->cl = 0;
|
vif->cl = 0;
|
||||||
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
||||||
if ((size & 0xf) == 0) return size >> 2;
|
if ((size & 0xf) == 0) return size >> 2;
|
||||||
|
@ -362,8 +314,8 @@ template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
vif->tag.addr += ((4 - ft->qsize) + unpacksize) * 4;
|
vif->tag.addr += ((4 - ft.qsize) + unpacksize) * 4;
|
||||||
dest += (4 - ft->qsize) + unpacksize;
|
dest += (4 - ft.qsize) + unpacksize;
|
||||||
|
|
||||||
if (vif->tag.addr >= (u32)vif_size(VIFdmanum))
|
if (vif->tag.addr >= (u32)vif_size(VIFdmanum))
|
||||||
{
|
{
|
||||||
|
@ -371,7 +323,7 @@ template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size)
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
dest = (u32*)(VU->Mem + v->addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
cdata += unpacksize * ft->dsize;
|
cdata += unpacksize * ft.dsize;
|
||||||
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
VIFUNPACK_LOG("Aligning packet done size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -391,11 +343,11 @@ template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size)
|
||||||
VIFUNPACK_LOG("Continuing last stream size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
VIFUNPACK_LOG("Continuing last stream size = %d offset %d addr %x", size, vifRegs->offset, vif->tag.addr);
|
||||||
incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
|
incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
|
||||||
|
|
||||||
while ((size >= ft->gsize) && (vifRegs->num > 0))
|
while ((size >= ft.gsize) && (vifRegs->num > 0))
|
||||||
{
|
{
|
||||||
func(dest, (u32*)cdata, ft->qsize);
|
func(dest, (u32*)cdata);
|
||||||
cdata += ft->gsize;
|
cdata += ft.gsize;
|
||||||
size -= ft->gsize;
|
size -= ft.gsize;
|
||||||
|
|
||||||
vifRegs->num--;
|
vifRegs->num--;
|
||||||
++vif->cl;
|
++vif->cl;
|
||||||
|
@ -431,20 +383,20 @@ template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size)
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
if (size >= ft->dsize && vifRegs->num > 0 && ((size & 0xf) != 0 || vif->cl != 0))
|
if (size >= ft.dsize && vifRegs->num > 0 && ((size & 0xf) != 0 || vif->cl != 0))
|
||||||
{
|
{
|
||||||
//VIF_LOG("warning, end with size = %d", size);
|
//VIF_LOG("warning, end with size = %d", size);
|
||||||
/* unpack one qword */
|
/* unpack one qword */
|
||||||
if(vif->tag.addr + ((size / ft->dsize) * 4) >= (u32)vif_size(VIFdmanum))
|
if(vif->tag.addr + ((size / ft.dsize) * 4) >= (u32)vif_size(VIFdmanum))
|
||||||
{
|
{
|
||||||
//DevCon.Warning("Overflow");
|
//DevCon.Warning("Overflow");
|
||||||
vif->tag.addr &= (u32)(vif_size(VIFdmanum) - 1);
|
vif->tag.addr &= (u32)(vif_size(VIFdmanum) - 1);
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
dest = (u32*)(VU->Mem + v->addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
vif->tag.addr += (size / ft->dsize) * 4;
|
vif->tag.addr += (size / ft.dsize) * 4;
|
||||||
|
|
||||||
func(dest, (u32*)cdata, size / ft->dsize);
|
(vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize);
|
||||||
size = 0;
|
size = 0;
|
||||||
|
|
||||||
if(vifRegs->mode == 2)
|
if(vifRegs->mode == 2)
|
||||||
|
@ -468,9 +420,6 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
||||||
{
|
{
|
||||||
//DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
|
//DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
|
||||||
u32 *dest;
|
u32 *dest;
|
||||||
u32 unpackType;
|
|
||||||
UNPACKFUNCTYPE func;
|
|
||||||
const VIFUnpackFuncTable *ft;
|
|
||||||
VURegs * VU;
|
VURegs * VU;
|
||||||
u8 *cdata = (u8*)data;
|
u8 *cdata = (u8*)data;
|
||||||
u32 tempsize = 0;
|
u32 tempsize = 0;
|
||||||
|
@ -507,13 +456,10 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
||||||
|
|
||||||
VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset);
|
VIFUNPACK_LOG("USN %x Masking %x Mask %x Mode %x CL %x WL %x Offset %x", vif->usn, (vifRegs->code & 0x10000000) >> 28, vifRegs->mask, vifRegs->mode, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->offset);
|
||||||
|
|
||||||
// The unpack type
|
|
||||||
unpackType = v->cmd & 0xf;
|
|
||||||
|
|
||||||
_mm_prefetch((char*)data + 128, _MM_HINT_NTA);
|
_mm_prefetch((char*)data + 128, _MM_HINT_NTA);
|
||||||
|
|
||||||
ft = &VIFfuncTable[ unpackType ];
|
const VIFUnpackFuncTable& ft( VIFfuncTable[ v->cmd & 0x1f ] );
|
||||||
func = vif->usn ? ft->funcU : ft->funcS;
|
UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS;
|
||||||
|
|
||||||
size <<= 2;
|
size <<= 2;
|
||||||
|
|
||||||
|
@ -528,12 +474,12 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
dest = (u32*)(VU->Mem + v->addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
size = min(size, (int)vifRegs->num * ft->gsize); //size will always be the same or smaller
|
size = std::min<u32>(size, vifRegs->num * ft.gsize); //size will always be the same or smaller
|
||||||
|
|
||||||
tempsize = vif->tag.addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
|
tempsize = vif->tag.addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
|
||||||
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
|
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
|
||||||
|
|
||||||
/*tempsize = vif->tag.addr + (((size / (ft->gsize * vifRegs->cycle.wl)) *
|
/*tempsize = vif->tag.addr + (((size / (ft.gsize * vifRegs->cycle.wl)) *
|
||||||
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);*/
|
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);*/
|
||||||
|
|
||||||
//Sanity Check (memory overflow)
|
//Sanity Check (memory overflow)
|
||||||
|
@ -562,7 +508,7 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size >= ft->gsize)
|
if (size >= ft.gsize)
|
||||||
{
|
{
|
||||||
const UNPACKPARTFUNCTYPESSE* pfn;
|
const UNPACKPARTFUNCTYPESSE* pfn;
|
||||||
int writemask;
|
int writemask;
|
||||||
|
@ -612,7 +558,7 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
||||||
vifRegs->cycle.cl = vifRegs->cycle.wl = 1;
|
vifRegs->cycle.cl = vifRegs->cycle.wl = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
pfn = vif->usn ? VIFfuncTableSSE[unpackType].funcU : VIFfuncTableSSE[unpackType].funcS;
|
pfn = vif->usn ? VIFfuncTableSSE[v->cmd & 0xf].funcU : VIFfuncTableSSE[v->cmd & 0xf].funcS;
|
||||||
writemask = VIFdmanum ? g_vif1HasMask3[min(vifRegs->cycle.wl,(u8)3)] : g_vif0HasMask3[min(vifRegs->cycle.wl,(u8)3)];
|
writemask = VIFdmanum ? g_vif1HasMask3[min(vifRegs->cycle.wl,(u8)3)] : g_vif0HasMask3[min(vifRegs->cycle.wl,(u8)3)];
|
||||||
writemask = pfn[(((vifRegs->code & 0x10000000)>>28)<<writemask)*3+vifRegs->mode](dest, (u32*)cdata, size);
|
writemask = pfn[(((vifRegs->code & 0x10000000)>>28)<<writemask)*3+vifRegs->mode](dest, (u32*)cdata, size);
|
||||||
|
|
||||||
|
@ -630,20 +576,20 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
||||||
// if size is left over, update the src,dst pointers
|
// if size is left over, update the src,dst pointers
|
||||||
if (writemask > 0)
|
if (writemask > 0)
|
||||||
{
|
{
|
||||||
int left = (size - writemask) / ft->gsize;
|
int left = (size - writemask) / ft.gsize;
|
||||||
cdata += left * ft->gsize;
|
cdata += left * ft.gsize;
|
||||||
dest = (u32*)((u8*)dest + ((left / vifRegs->cycle.wl) * vifRegs->cycle.cl + left % vifRegs->cycle.wl) * 16);
|
dest = (u32*)((u8*)dest + ((left / vifRegs->cycle.wl) * vifRegs->cycle.cl + left % vifRegs->cycle.wl) * 16);
|
||||||
vifRegs->num -= left;
|
vifRegs->num -= left;
|
||||||
vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
|
vif->cl = (size % (ft.gsize * vifRegs->cycle.wl)) / ft.gsize;
|
||||||
size = writemask;
|
size = writemask;
|
||||||
|
|
||||||
if (size >= ft->dsize && vifRegs->num > 0)
|
if (size >= ft.dsize && vifRegs->num > 0)
|
||||||
{
|
{
|
||||||
VIF_LOG("warning, end with size = %d", size);
|
VIF_LOG("warning, end with size = %d", size);
|
||||||
|
|
||||||
/* unpack one qword */
|
/* unpack one qword */
|
||||||
//vif->tag.addr += (size / ft->dsize) * 4;
|
//vif->tag.addr += (size / ft.dsize) * 4;
|
||||||
func(dest, (u32*)cdata, size / ft->dsize);
|
(vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize);
|
||||||
size = 0;
|
size = 0;
|
||||||
|
|
||||||
if(vifRegs->mode == 2)
|
if(vifRegs->mode == 2)
|
||||||
|
@ -659,8 +605,8 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
vifRegs->num -= size / ft->gsize;
|
vifRegs->num -= size / ft.gsize;
|
||||||
if (vifRegs->num > 0) vif->cl = (size % (ft->gsize * vifRegs->cycle.wl)) / ft->gsize;
|
if (vifRegs->num > 0) vif->cl = (size % (ft.gsize * vifRegs->cycle.wl)) / ft.gsize;
|
||||||
size = 0;
|
size = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -669,11 +615,14 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
||||||
int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
|
int incdest = ((vifRegs->cycle.cl - vifRegs->cycle.wl) << 2) + 4;
|
||||||
size = 0;
|
size = 0;
|
||||||
int addrstart = v->addr;
|
int addrstart = v->addr;
|
||||||
if((tempsize >> 2) != vif->tag.size) DevCon.Warning("split when size != tagsize");
|
|
||||||
|
#ifndef NON_SSE_UNPACKS // spams pointlessly when SSE unpacks are disabled
|
||||||
|
//if((tempsize >> 2) != vif->tag.size) DevCon.Warning("split when size != tagsize");
|
||||||
|
#endif
|
||||||
|
|
||||||
VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, vif->tag.addr);
|
VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, vif->tag.addr);
|
||||||
|
|
||||||
while ((tempsize >= ft->gsize) && (vifRegs->num > 0))
|
while ((tempsize >= ft.gsize) && (vifRegs->num > 0))
|
||||||
{
|
{
|
||||||
if(v->addr >= memlimit)
|
if(v->addr >= memlimit)
|
||||||
{
|
{
|
||||||
|
@ -682,9 +631,9 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
dest = (u32*)(VU->Mem + v->addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
func(dest, (u32*)cdata, ft->qsize);
|
func(dest, (u32*)cdata);
|
||||||
cdata += ft->gsize;
|
cdata += ft.gsize;
|
||||||
tempsize -= ft->gsize;
|
tempsize -= ft.gsize;
|
||||||
|
|
||||||
vifRegs->num--;
|
vifRegs->num--;
|
||||||
++vif->cl;
|
++vif->cl;
|
||||||
|
@ -721,13 +670,13 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
||||||
if(tempsize > 0) size = tempsize;
|
if(tempsize > 0) size = tempsize;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size >= ft->dsize && vifRegs->num > 0) //Else write what we do have
|
if (size >= ft.dsize && vifRegs->num > 0) //Else write what we do have
|
||||||
{
|
{
|
||||||
VIF_LOG("warning, end with size = %d", size);
|
VIF_LOG("warning, end with size = %d", size);
|
||||||
|
|
||||||
/* unpack one qword */
|
/* unpack one qword */
|
||||||
//vif->tag.addr += (size / ft->dsize) * 4;
|
//vif->tag.addr += (size / ft.dsize) * 4;
|
||||||
func(dest, (u32*)cdata, size / ft->dsize);
|
(vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize);
|
||||||
size = 0;
|
size = 0;
|
||||||
|
|
||||||
if(vifRegs->mode == 2)
|
if(vifRegs->mode == 2)
|
||||||
|
@ -745,8 +694,8 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
||||||
{
|
{
|
||||||
|
|
||||||
if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
|
if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
|
||||||
if((u32)(((size / ft->gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
|
if((u32)(((size / ft.gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
|
||||||
DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft->gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
|
DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft.gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
|
||||||
|
|
||||||
//DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType, vif->tag.addr);
|
//DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType, vif->tag.addr);
|
||||||
while (vifRegs->num > 0)
|
while (vifRegs->num > 0)
|
||||||
|
@ -758,15 +707,16 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
||||||
|
|
||||||
if (vif->cl < vifRegs->cycle.cl) /* unpack one qword */
|
if (vif->cl < vifRegs->cycle.cl) /* unpack one qword */
|
||||||
{
|
{
|
||||||
if(size < ft->gsize)
|
if(size < ft.gsize)
|
||||||
{
|
{
|
||||||
VIF_LOG("Out of Filling write data");
|
VIF_LOG("Out of Filling write data");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
func(dest, (u32*)cdata, ft->qsize);
|
func(dest, (u32*)cdata);
|
||||||
cdata += ft->gsize;
|
cdata += ft.gsize;
|
||||||
size -= ft->gsize;
|
size -= ft.gsize;
|
||||||
|
|
||||||
vif->cl++;
|
vif->cl++;
|
||||||
vifRegs->num--;
|
vifRegs->num--;
|
||||||
|
|
||||||
|
@ -777,7 +727,7 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
func(dest, (u32*)cdata, ft->qsize);
|
func(dest, (u32*)cdata);
|
||||||
vif->tag.addr += 16;
|
vif->tag.addr += 16;
|
||||||
vifRegs->num--;
|
vifRegs->num--;
|
||||||
++vif->cl;
|
++vif->cl;
|
||||||
|
|
|
@ -47,40 +47,6 @@ extern vifStruct vif0, vif1;
|
||||||
extern u8 schedulepath3msk;
|
extern u8 schedulepath3msk;
|
||||||
static const int VifCycleVoodoo = 4;
|
static const int VifCycleVoodoo = 4;
|
||||||
|
|
||||||
void __fastcall UNPACK_S_32( u32 *dest, u32 *data, int size );
|
|
||||||
|
|
||||||
void __fastcall UNPACK_S_16u( u32 *dest, u32 *data, int size );
|
|
||||||
void __fastcall UNPACK_S_16s( u32 *dest, u32 *data, int size );
|
|
||||||
|
|
||||||
void __fastcall UNPACK_S_8u( u32 *dest, u32 *data, int size );
|
|
||||||
void __fastcall UNPACK_S_8s( u32 *dest, u32 *data, int size );
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V2_32( u32 *dest, u32 *data, int size );
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V2_16u( u32 *dest, u32 *data, int size );
|
|
||||||
void __fastcall UNPACK_V2_16s( u32 *dest, u32 *data, int size );
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V2_8u( u32 *dest, u32 *data, int size );
|
|
||||||
void __fastcall UNPACK_V2_8s( u32 *dest, u32 *data, int size );
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V3_32( u32 *dest, u32 *data, int size );
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V3_16u( u32 *dest, u32 *data, int size );
|
|
||||||
void __fastcall UNPACK_V3_16s( u32 *dest, u32 *data, int size );
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V3_8u( u32 *dest, u32 *data, int size );
|
|
||||||
void __fastcall UNPACK_V3_8s( u32 *dest, u32 *data, int size );
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V4_32( u32 *dest, u32 *data, int size );
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V4_16u( u32 *dest, u32 *data, int size );
|
|
||||||
void __fastcall UNPACK_V4_16s( u32 *dest, u32 *data, int size );
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V4_8u( u32 *dest, u32 *data, int size );
|
|
||||||
void __fastcall UNPACK_V4_8s( u32 *dest, u32 *data, int size );
|
|
||||||
|
|
||||||
void __fastcall UNPACK_V4_5( u32 *dest, u32 *data, int size );
|
|
||||||
|
|
||||||
extern void vifDmaInit();
|
extern void vifDmaInit();
|
||||||
|
|
||||||
extern void vif0Init();
|
extern void vif0Init();
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
#ifndef __VIFDMA_INTERNAL_H__
|
#ifndef __VIFDMA_INTERNAL_H__
|
||||||
#define __VIFDMA_INTERNAL_H__
|
#define __VIFDMA_INTERNAL_H__
|
||||||
|
|
||||||
|
#include "VifDma.h"
|
||||||
|
|
||||||
enum VifModes
|
enum VifModes
|
||||||
{
|
{
|
||||||
VIF_NORMAL_TO_MEM_MODE = 0,
|
VIF_NORMAL_TO_MEM_MODE = 0,
|
||||||
|
@ -27,7 +29,8 @@ enum VifModes
|
||||||
static const unsigned int VIF0intc = 4;
|
static const unsigned int VIF0intc = 4;
|
||||||
static const unsigned int VIF1intc = 5;
|
static const unsigned int VIF1intc = 5;
|
||||||
|
|
||||||
typedef void (__fastcall *UNPACKFUNCTYPE)(u32 *dest, u32 *data, int size);
|
typedef void (__fastcall *UNPACKFUNCTYPE)(u32 *dest, u32 *data);
|
||||||
|
typedef void (__fastcall *UNPACKFUNCTYPE_ODD)(u32 *dest, u32 *data, int size);
|
||||||
typedef int (*UNPACKPARTFUNCTYPESSE)(u32 *dest, u32 *data, int size);
|
typedef int (*UNPACKPARTFUNCTYPESSE)(u32 *dest, u32 *data, int size);
|
||||||
|
|
||||||
struct VIFUnpackFuncTable
|
struct VIFUnpackFuncTable
|
||||||
|
@ -35,18 +38,23 @@ struct VIFUnpackFuncTable
|
||||||
UNPACKFUNCTYPE funcU;
|
UNPACKFUNCTYPE funcU;
|
||||||
UNPACKFUNCTYPE funcS;
|
UNPACKFUNCTYPE funcS;
|
||||||
|
|
||||||
u32 bsize; // currently unused
|
UNPACKFUNCTYPE_ODD oddU; // needed for old-style vif only, remove when old vif is removed.
|
||||||
u32 dsize; // byte size of one channel
|
UNPACKFUNCTYPE_ODD oddS; // needed for old-style vif only, remove when old vif is removed.
|
||||||
u32 gsize; // size of data in bytes used for each write cycle
|
|
||||||
u32 qsize; // used for unpack parts, num of vectors that
|
u8 bsize; // currently unused
|
||||||
|
u8 dsize; // byte size of one channel
|
||||||
|
u8 gsize; // size of data in bytes used for each write cycle
|
||||||
|
u8 qsize; // used for unpack parts, num of vectors that
|
||||||
// will be decompressed from data for 1 cycle
|
// will be decompressed from data for 1 cycle
|
||||||
};
|
};
|
||||||
|
|
||||||
extern const VIFUnpackFuncTable VIFfuncTable[16];
|
extern const __aligned16 VIFUnpackFuncTable VIFfuncTable[32];
|
||||||
|
|
||||||
extern __aligned16 u32 g_vif0Masks[64], g_vif1Masks[64];
|
extern __aligned16 u32 g_vif0Masks[64], g_vif1Masks[64];
|
||||||
extern u32 g_vif0HasMask3[4], g_vif1HasMask3[4];
|
extern u32 g_vif0HasMask3[4], g_vif1HasMask3[4];
|
||||||
extern int g_vifCycles;
|
extern int g_vifCycles;
|
||||||
extern u8 s_maskwrite[256];
|
extern u8 s_maskwrite[256];
|
||||||
|
extern vifStruct *vif;
|
||||||
|
|
||||||
template<const u32 VIFdmanum> void ProcessMemSkip(u32 size, u32 unpackType);
|
template<const u32 VIFdmanum> void ProcessMemSkip(u32 size, u32 unpackType);
|
||||||
template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size);
|
template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size);
|
||||||
|
@ -63,4 +71,9 @@ static __forceinline u32 vif_size(u8 num)
|
||||||
//#define newVif // Enable 'newVif' Code (if the below macros are not defined, it will use old non-sse code)
|
//#define newVif // Enable 'newVif' Code (if the below macros are not defined, it will use old non-sse code)
|
||||||
//#define newVif1 // Use New Code for Vif1 Unpacks (needs newVif defined)
|
//#define newVif1 // Use New Code for Vif1 Unpacks (needs newVif defined)
|
||||||
//#define newVif0 // Use New Code for Vif0 Unpacks (not implemented)
|
//#define newVif0 // Use New Code for Vif0 Unpacks (not implemented)
|
||||||
|
|
||||||
|
#ifndef newVif
|
||||||
|
//# define NON_SSE_UNPACKS // Turns off SSE Unpacks (slower)
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -824,6 +824,10 @@
|
||||||
RelativePath="..\..\VifDma_internal.h"
|
RelativePath="..\..\VifDma_internal.h"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath="..\..\VIFunpack.cpp"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
<Filter
|
<Filter
|
||||||
Name="Dynarec"
|
Name="Dynarec"
|
||||||
>
|
>
|
||||||
|
|
|
@ -24,8 +24,8 @@ extern void _nVifUnpack(int idx, u8 *data, u32 size);
|
||||||
|
|
||||||
typedef u32 (__fastcall *nVifCall)(void*, void*);
|
typedef u32 (__fastcall *nVifCall)(void*, void*);
|
||||||
|
|
||||||
static __pagealigned u8 nVifUpkExec[__pagesize*16];
|
static __pagealigned u8 nVifUpkExec[__pagesize*4];
|
||||||
static __aligned16 nVifCall nVifUpk[(2*2*16)*4]; // ([USN][Masking][Unpack Type]) [curCycle]
|
static __aligned16 nVifCall nVifUpk[(2*2*16) *4 ]; // ([USN][Masking][Unpack Type]) [curCycle]
|
||||||
static __aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Vector]
|
static __aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Vector]
|
||||||
|
|
||||||
#define _1mb (0x100000)
|
#define _1mb (0x100000)
|
||||||
|
@ -57,7 +57,30 @@ struct nVifStruct {
|
||||||
BlockBuffer* vifCache; // Block Buffer
|
BlockBuffer* vifCache; // Block Buffer
|
||||||
};
|
};
|
||||||
|
|
||||||
static const u32 nVifT[16] = {
|
// Contents of this table are doubled up for doMast(false) and doMask(true) lookups.
|
||||||
|
// (note: currently unused, I'm using gsize in the interp tables instead since it
|
||||||
|
// seems to be faster for now, which may change when nVif isn't reliant on interpreted
|
||||||
|
// unpackers anymore --air)
|
||||||
|
static const u32 nVifT[32] = {
|
||||||
|
4, // S-32
|
||||||
|
2, // S-16
|
||||||
|
1, // S-8
|
||||||
|
0, // ----
|
||||||
|
8, // V2-32
|
||||||
|
4, // V2-16
|
||||||
|
2, // V2-8
|
||||||
|
0, // ----
|
||||||
|
12,// V3-32
|
||||||
|
6, // V3-16
|
||||||
|
3, // V3-8
|
||||||
|
0, // ----
|
||||||
|
16,// V4-32
|
||||||
|
8, // V4-16
|
||||||
|
4, // V4-8
|
||||||
|
2, // V4-5
|
||||||
|
|
||||||
|
// Second verse, same as the first!
|
||||||
|
|
||||||
4, // S-32
|
4, // S-32
|
||||||
2, // S-16
|
2, // S-16
|
||||||
1, // S-8
|
1, // S-8
|
||||||
|
@ -77,8 +100,8 @@ static const u32 nVifT[16] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
#include "newVif_OldUnpack.inl"
|
#include "newVif_OldUnpack.inl"
|
||||||
#include "newVif_UnpackGen.inl"
|
|
||||||
#include "newVif_Unpack.inl"
|
#include "newVif_Unpack.inl"
|
||||||
|
#include "newVif_UnpackGen.inl"
|
||||||
|
|
||||||
//#include "newVif_Dynarec.inl"
|
//#include "newVif_Dynarec.inl"
|
||||||
|
|
||||||
|
|
|
@ -21,8 +21,6 @@ template void VIFunpack<0>(u32 *data, vifCode *v, u32 size);
|
||||||
template void VIFunpack<1>(u32 *data, vifCode *v, u32 size);
|
template void VIFunpack<1>(u32 *data, vifCode *v, u32 size);
|
||||||
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
|
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
|
||||||
//if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
|
//if (!VIFdmanum) DevCon.WriteLn("vif#%d, size = %d [%x]", VIFdmanum, size, data);
|
||||||
UNPACKFUNCTYPE func;
|
|
||||||
const VIFUnpackFuncTable *ft;
|
|
||||||
VURegs * VU;
|
VURegs * VU;
|
||||||
u8 *cdata = (u8*)data;
|
u8 *cdata = (u8*)data;
|
||||||
u32 tempsize = 0;
|
u32 tempsize = 0;
|
||||||
|
@ -44,10 +42,10 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 *dest = (u32*)(VU->Mem + v->addr);
|
u32 *dest = (u32*)(VU->Mem + v->addr);
|
||||||
u32 unpackType = v->cmd & 0xf;
|
|
||||||
|
|
||||||
ft = &VIFfuncTable[ unpackType ];
|
const VIFUnpackFuncTable& ft( VIFfuncTable[ v->cmd & 0x1f ] );
|
||||||
func = vif->usn ? ft->funcU : ft->funcS;
|
UNPACKFUNCTYPE func = vif->usn ? ft.funcU : ft.funcS;
|
||||||
|
|
||||||
size <<= 2;
|
size <<= 2;
|
||||||
|
|
||||||
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write
|
if (vifRegs->cycle.cl >= vifRegs->cycle.wl) { // skipping write
|
||||||
|
@ -57,7 +55,7 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
dest = (u32*)(VU->Mem + v->addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
size = min(size, (int)vifRegs->num * ft->gsize); //size will always be the same or smaller
|
size = std::min<u32>(size, vifRegs->num * ft.gsize); //size will always be the same or smaller
|
||||||
|
|
||||||
tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
|
tempsize = v->addr + ((((vifRegs->num-1) / vifRegs->cycle.wl) *
|
||||||
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
|
(vifRegs->cycle.cl - vifRegs->cycle.wl)) * 16) + (vifRegs->num * 16);
|
||||||
|
@ -90,16 +88,16 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
|
||||||
|
|
||||||
VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr);
|
VIFUNPACK_LOG("sorting tempsize :p, size %d, vifnum %d, addr %x", tempsize, vifRegs->num, v->addr);
|
||||||
|
|
||||||
while ((tempsize >= ft->gsize) && (vifRegs->num > 0)) {
|
while ((tempsize >= ft.gsize) && (vifRegs->num > 0)) {
|
||||||
if(v->addr >= memlimit) {
|
if(v->addr >= memlimit) {
|
||||||
DevCon.Warning("Mem limit overflow");
|
DevCon.Warning("Mem limit overflow");
|
||||||
v->addr &= (memlimit - 1);
|
v->addr &= (memlimit - 1);
|
||||||
dest = (u32*)(VU->Mem + v->addr);
|
dest = (u32*)(VU->Mem + v->addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
func(dest, (u32*)cdata, ft->qsize);
|
func(dest, (u32*)cdata);
|
||||||
cdata += ft->gsize;
|
cdata += ft.gsize;
|
||||||
tempsize -= ft->gsize;
|
tempsize -= ft.gsize;
|
||||||
|
|
||||||
vifRegs->num--;
|
vifRegs->num--;
|
||||||
vif->cl++;
|
vif->cl++;
|
||||||
|
@ -122,32 +120,32 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
|
||||||
if(tempsize > 0) size = tempsize;
|
if(tempsize > 0) size = tempsize;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (size >= ft->dsize && vifRegs->num > 0) { //Else write what we do have
|
if (size >= ft.dsize && vifRegs->num > 0) { //Else write what we do have
|
||||||
DevCon.Warning("huh!!!!!!!!!!!!!!!!!!!!!!");
|
DevCon.Warning("huh!!!!!!!!!!!!!!!!!!!!!!");
|
||||||
VIF_LOG("warning, end with size = %d", size);
|
VIF_LOG("warning, end with size = %d", size);
|
||||||
// unpack one qword
|
// unpack one qword
|
||||||
//v->addr += (size / ft->dsize) * 4;
|
//v->addr += (size / ft.dsize) * 4;
|
||||||
func(dest, (u32*)cdata, size / ft->dsize);
|
(vif->usn ? ft.oddU : ft.oddS)(dest, (u32*)cdata, size / ft.dsize);
|
||||||
size = 0;
|
size = 0;
|
||||||
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr);
|
VIFUNPACK_LOG("leftover done, size %d, vifnum %d, addr %x", size, vifRegs->num, v->addr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else { // filling write
|
else { // filling write
|
||||||
if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
|
if(vifRegs->cycle.cl > 0) // Quicker and avoids zero division :P
|
||||||
if((u32)(((size / ft->gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
|
if((u32)(((size / ft.gsize) / vifRegs->cycle.cl) * vifRegs->cycle.wl) < vifRegs->num)
|
||||||
DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft->gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
|
DevCon.Warning("Filling write warning! %x < %x and CL = %x WL = %x", (size / ft.gsize), vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl);
|
||||||
|
|
||||||
DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, unpackType, vif->tag.addr);
|
DevCon.Warning("filling write %d cl %d, wl %d mask %x mode %x unpacktype %x addr %x", vifRegs->num, vifRegs->cycle.cl, vifRegs->cycle.wl, vifRegs->mask, vifRegs->mode, v->cmd & 0xf, vif->tag.addr);
|
||||||
while (vifRegs->num > 0) {
|
while (vifRegs->num > 0) {
|
||||||
if (vif->cl == vifRegs->cycle.wl) {
|
if (vif->cl == vifRegs->cycle.wl) {
|
||||||
vif->cl = 0;
|
vif->cl = 0;
|
||||||
}
|
}
|
||||||
// unpack one qword
|
// unpack one qword
|
||||||
if (vif->cl < vifRegs->cycle.cl) {
|
if (vif->cl < vifRegs->cycle.cl) {
|
||||||
if(size < ft->gsize) { DevCon.WriteLn("Out of Filling write data!"); break; }
|
if(size < ft.gsize) { DevCon.WriteLn("Out of Filling write data!"); break; }
|
||||||
func(dest, (u32*)cdata, ft->qsize);
|
func(dest, (u32*)cdata);
|
||||||
cdata += ft->gsize;
|
cdata += ft.gsize;
|
||||||
size -= ft->gsize;
|
size -= ft.gsize;
|
||||||
vif->cl++;
|
vif->cl++;
|
||||||
vifRegs->num--;
|
vifRegs->num--;
|
||||||
if (vif->cl == vifRegs->cycle.wl) {
|
if (vif->cl == vifRegs->cycle.wl) {
|
||||||
|
@ -155,7 +153,7 @@ template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
func(dest, (u32*)cdata, ft->qsize);
|
func(dest, (u32*)cdata);
|
||||||
v->addr += 16;
|
v->addr += 16;
|
||||||
vifRegs->num--;
|
vifRegs->num--;
|
||||||
vif->cl++;
|
vif->cl++;
|
||||||
|
|
|
@ -21,29 +21,6 @@
|
||||||
|
|
||||||
static __aligned16 nVifStruct nVif[2];
|
static __aligned16 nVifStruct nVif[2];
|
||||||
|
|
||||||
void initNewVif(int idx) {
|
|
||||||
nVif[idx].idx = idx;
|
|
||||||
nVif[idx].VU = idx ? &VU1 : &VU0;
|
|
||||||
nVif[idx].vif = idx ? &vif1 : &vif0;
|
|
||||||
nVif[idx].vifRegs = idx ? vif1Regs : vif0Regs;
|
|
||||||
nVif[idx].vuMemEnd = idx ? ((u8*)(VU1.Mem + 0x4000)) : ((u8*)(VU0.Mem + 0x1000));
|
|
||||||
nVif[idx].vuMemLimit= idx ? 0x3ff0 : 0xff0;
|
|
||||||
nVif[idx].vifCache = NULL;
|
|
||||||
|
|
||||||
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false);
|
|
||||||
memset8<0xcc>( nVifUpkExec );
|
|
||||||
|
|
||||||
xSetPtr( nVifUpkExec );
|
|
||||||
|
|
||||||
for (int a = 0; a < 2; a++) {
|
|
||||||
for (int b = 0; b < 2; b++) {
|
|
||||||
for (int c = 0; c < 4; c++) {
|
|
||||||
nVifGen(a, b, c);
|
|
||||||
}}}
|
|
||||||
|
|
||||||
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
int nVifUnpack(int idx, u32 *data) {
|
int nVifUnpack(int idx, u32 *data) {
|
||||||
XMMRegisters::Freeze();
|
XMMRegisters::Freeze();
|
||||||
int ret = aMin(vif1.vifpacketsize, vif1.tag.size);
|
int ret = aMin(vif1.vifpacketsize, vif1.tag.size);
|
||||||
|
@ -108,65 +85,76 @@ static void setMasks(int idx, const VIFregisters& v) {
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
// Unpacking Optimization notes:
|
// Unpacking Optimization notes:
|
||||||
// ----------------------------------------------------------------------------
|
// ----------------------------------------------------------------------------
|
||||||
// Some games send a LOT of small packets. This is a problem because the new VIF unpacker
|
// Some games send a LOT of single-cycle packets (God of War, SotC, TriAce games, etc),
|
||||||
// has a lot of setup code to establish which unpack function to call. The best way to
|
// so we always need to be weary of keeping loop setup code optimized. It's not always
|
||||||
// optimize this is to cache the unpack function's base (see fnbase below) and update it
|
// a "win" to move code outside the loop, like normally in most other loop scenarios.
|
||||||
// when the variables it depends on are modified: writes to vif->tag.cmd and vif->usn.
|
|
||||||
// Problem: vif->tag.cmd is modified a lot. Like, constantly. So won't work.
|
|
||||||
//
|
//
|
||||||
// A secondary optimization would be adding special handlers for packets where vifRegs->num==1.
|
// The biggest bottleneck of the current code is the call/ret needed to invoke the SSE
|
||||||
// (which would remove the loop, simplify the incVUptr code, etc). But checking for it has
|
// unpackers. A better option is to generate the entire vifRegs->num loop code as part
|
||||||
// to be simple enough that it doesn't offset the benefits (which I'm not sure is possible).
|
// of the SSE template, and inline the SSE code into the heart of it. This both avoids
|
||||||
// -- air
|
// the call/ret and opens the door for resolving some register dependency chains in the
|
||||||
|
// current emitted functions. (this is what zero's SSE does to get it's final bit of
|
||||||
|
// speed advantage over the new vif). --air
|
||||||
|
//
|
||||||
|
// As a secondary optimization to above, special handlers could be generated for the
|
||||||
|
// cycleSize==1 case, which is used frequently enough, and results in enough code
|
||||||
|
// elimination that it would probably be a win in most cases (and for sure in many
|
||||||
|
// "slow" games that need it most). --air
|
||||||
|
|
||||||
template< int idx, bool doMode, bool isFill >
|
template< int idx, bool doMode, bool isFill >
|
||||||
__releaseinline void __fastcall _nVifUnpackLoop(u8 *data, u32 size) {
|
__releaseinline void __fastcall _nVifUnpackLoop(u8 *data, u32 size) {
|
||||||
|
|
||||||
const int usn = !!(vif->usn);
|
|
||||||
const int doMask = !!(vif->tag.cmd & 0x10);
|
|
||||||
const int upkNum = vif->tag.cmd & 0xf;
|
|
||||||
const u32& vift = nVifT[upkNum];
|
|
||||||
|
|
||||||
u8* dest = setVUptr(idx, vif->tag.addr);
|
|
||||||
const VIFUnpackFuncTable& ft = VIFfuncTable[upkNum];
|
|
||||||
UNPACKFUNCTYPE func = usn ? ft.funcU : ft.funcS;
|
|
||||||
|
|
||||||
// Did a bunch of work to make it so I could optimize this index lookup to outside
|
|
||||||
// the main loop but it was for naught -- too often the loop is only 1-2 iterations,
|
|
||||||
// so this setup code ends up being slower (1 iter) or same speed (2 iters).
|
|
||||||
const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + (doMask*16) + (upkNum)) * (4*1) ];
|
|
||||||
|
|
||||||
const int cycleSize = isFill ? vifRegs->cycle.cl : vifRegs->cycle.wl;
|
const int cycleSize = isFill ? vifRegs->cycle.cl : vifRegs->cycle.wl;
|
||||||
const int blockSize = isFill ? vifRegs->cycle.wl : vifRegs->cycle.cl;
|
const int blockSize = isFill ? vifRegs->cycle.wl : vifRegs->cycle.cl;
|
||||||
const int skipSize = blockSize - cycleSize;
|
const int skipSize = blockSize - cycleSize;
|
||||||
|
|
||||||
//if (skipSize > 2)
|
//if (skipSize > 2)
|
||||||
//DevCon.WriteLn("[num = %d][cl = %d][bl = %d][diff = %d]", vifRegs->num, vif->cl, blockSize, skipSize);
|
//DevCon.WriteLn("[num = %d][cl = %d][bl = %d][diff = %d]", vifRegs->num, vif->cl, blockSize, skipSize);
|
||||||
|
|
||||||
// This condition doesn't appear to ever occur, and really it never should.
|
if (vif->cmd & 0x10) setMasks(idx, *vifRegs);
|
||||||
// Normally it wouldn't matter, but even simple setup code matters here (see
|
|
||||||
// optimization notes above) >_<
|
const int usn = !!(vif->usn);
|
||||||
|
const int upkNum = vif->cmd & 0x1f;
|
||||||
|
//const s8& vift = nVifT[upkNum]; // might be useful later when other SSE paths are finished.
|
||||||
|
|
||||||
|
// Recompiled Unpacker, used when doMode is false.
|
||||||
|
// Did a bunch of work to make it so I could optimize this index lookup to outside
|
||||||
|
// the main loop but it was for naught -- too often the loop is only 1-2 iterations,
|
||||||
|
// so this setup code ends up being slower (1 iter) or same speed (2 iters).
|
||||||
|
const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + upkNum) * (4*1) ];
|
||||||
|
|
||||||
|
// Interpreted Unpacker, used if doMode is true OR if isFill is true. Lookup is
|
||||||
|
// always performed for now, due to ft.gsize reference (seems faster than using
|
||||||
|
// nVifT for now)
|
||||||
|
const VIFUnpackFuncTable& ft = VIFfuncTable[upkNum];
|
||||||
|
UNPACKFUNCTYPE func = usn ? ft.funcU : ft.funcS;
|
||||||
|
|
||||||
|
u8* dest = setVUptr(idx, vif->tag.addr);
|
||||||
|
|
||||||
if (vif->cl >= blockSize) vif->cl = 0;
|
if (vif->cl >= blockSize) vif->cl = 0;
|
||||||
if (doMask) setMasks(idx, *vifRegs);
|
|
||||||
|
|
||||||
while (vifRegs->num /*&& size*/) {
|
while (vifRegs->num /*&& size*/) {
|
||||||
if (vif->cl < cycleSize) {
|
if (vif->cl < cycleSize) {
|
||||||
if (doMode /*|| doMask*/) {
|
if (doMode /*|| doMask*/) {
|
||||||
//if (doMask)
|
//if (doMask)
|
||||||
//DevCon.WriteLn("Non SSE; unpackNum = %d", upkNum);
|
//DevCon.WriteLn("Non SSE; unpackNum = %d", upkNum);
|
||||||
func((u32*)dest, (u32*)data, ft.qsize);
|
func((u32*)dest, (u32*)data);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
//DevCon.WriteLn("SSE Unpack!");
|
//DevCon.WriteLn("SSE Unpack!");
|
||||||
fnbase[aMin(vif->cl, 4)](dest, data);
|
|
||||||
|
// Opt note: removing this min check (which isn't needed right now?) is +1%
|
||||||
|
// or more. Just something to keep in mind. :) --air
|
||||||
|
fnbase[0/*aMin(vif->cl, 4)*/](dest, data);
|
||||||
}
|
}
|
||||||
data += vift;
|
data += ft.gsize;
|
||||||
size -= vift;
|
size -= ft.gsize;
|
||||||
vifRegs->num--;
|
vifRegs->num--;
|
||||||
incVUptr(idx, dest, 16);
|
incVUptr(idx, dest, 16);
|
||||||
if (++vif->cl == blockSize) vif->cl = 0;
|
if (++vif->cl == blockSize) vif->cl = 0;
|
||||||
}
|
}
|
||||||
else if (isFill) {
|
else if (isFill) {
|
||||||
func((u32*)dest, (u32*)data, ft.qsize);
|
func((u32*)dest, (u32*)data);
|
||||||
vifRegs->num--;
|
vifRegs->num--;
|
||||||
incVUptr(idx, dest, 16);
|
incVUptr(idx, dest, 16);
|
||||||
if (++vif->cl == blockSize) vif->cl = 0;
|
if (++vif->cl == blockSize) vif->cl = 0;
|
||||||
|
@ -179,7 +167,24 @@ __releaseinline void __fastcall _nVifUnpackLoop(u8 *data, u32 size) {
|
||||||
//if (size > 0) DevCon.WriteLn("size = %d", size);
|
//if (size > 0) DevCon.WriteLn("size = %d", size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void _nVifUnpack(int idx, u8 *data, u32 size) {
|
typedef void (__fastcall* Fnptr_VifUnpackLoop)(u8 *data, u32 size);
|
||||||
|
|
||||||
|
static const __aligned16 Fnptr_VifUnpackLoop UnpackLoopTable[2][2][2] =
|
||||||
|
{
|
||||||
|
{
|
||||||
|
{ _nVifUnpackLoop<0,false,false>, _nVifUnpackLoop<0,false,true> },
|
||||||
|
{ _nVifUnpackLoop<0,true,false>, _nVifUnpackLoop<0,true,true> },
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
{ _nVifUnpackLoop<1,false,false>, _nVifUnpackLoop<1,false,true> },
|
||||||
|
{ _nVifUnpackLoop<1,true,false>, _nVifUnpackLoop<1,true,true> },
|
||||||
|
},
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static _f void _nVifUnpack(int idx, u8 *data, u32 size) {
|
||||||
/*if (nVif[idx].vifRegs->cycle.cl >= nVif[idx].vifRegs->cycle.wl) { // skipping write
|
/*if (nVif[idx].vifRegs->cycle.cl >= nVif[idx].vifRegs->cycle.wl) { // skipping write
|
||||||
if (!idx) VIFunpack<0>((u32*)data, &vif0.tag, size>>2);
|
if (!idx) VIFunpack<0>((u32*)data, &vif0.tag, size>>2);
|
||||||
else VIFunpack<1>((u32*)data, &vif1.tag, size>>2);
|
else VIFunpack<1>((u32*)data, &vif1.tag, size>>2);
|
||||||
|
@ -192,19 +197,7 @@ void _nVifUnpack(int idx, u8 *data, u32 size) {
|
||||||
const bool doMode = vifRegs->mode && !(vif->tag.cmd & 0x10);
|
const bool doMode = vifRegs->mode && !(vif->tag.cmd & 0x10);
|
||||||
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
|
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
|
||||||
|
|
||||||
//UnpackLoopTable[idx][doMode][isFill]( data, size );
|
UnpackLoopTable[idx][doMode][isFill]( data, size );
|
||||||
|
|
||||||
if (idx) {
|
|
||||||
if (doMode) {
|
|
||||||
if (isFill) _nVifUnpackLoop<1,true,true> (data, size);
|
|
||||||
else _nVifUnpackLoop<1,true,false> (data, size);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
if (isFill) _nVifUnpackLoop<1,false,true> (data, size);
|
|
||||||
else _nVifUnpackLoop<1,false,false>(data, size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else pxFailDev( "No VIF0 support yet, sorry!" );
|
|
||||||
|
|
||||||
//if (isFill)
|
//if (isFill)
|
||||||
//DevCon.WriteLn("%s Write! [num = %d][%s]", (isFill?"Filling":"Skipping"), vifRegs->num, (vifRegs->num%3 ? "bad!" : "ok"));
|
//DevCon.WriteLn("%s Write! [num = %d][%s]", (isFill?"Filling":"Skipping"), vifRegs->num, (vifRegs->num%3 ? "bad!" : "ok"));
|
||||||
|
|
|
@ -43,7 +43,7 @@ struct VifUnpackIndexer {
|
||||||
int packpart = packType;
|
int packpart = packType;
|
||||||
int curpart = curCycle;
|
int curpart = curCycle;
|
||||||
|
|
||||||
return nVifUpk[((usnpart+maskpart+packpart)*4) + (curpart)];
|
return nVifUpk[((usnpart+maskpart+packpart) * 4) + (curpart)];
|
||||||
}
|
}
|
||||||
|
|
||||||
void xSetCall(int packType) const {
|
void xSetCall(int packType) const {
|
||||||
|
@ -158,6 +158,12 @@ void nVifGen(int usn, int mask, int curCycle) {
|
||||||
|
|
||||||
// A | B5 | G5 | R5
|
// A | B5 | G5 | R5
|
||||||
// ..0.. A 0000000 | ..0.. B 000 | ..0.. G 000 | ..0.. R 000
|
// ..0.. A 0000000 | ..0.. B 000 | ..0.. G 000 | ..0.. R 000
|
||||||
|
|
||||||
|
// Optimization: This function has a *really* long dependency chain.
|
||||||
|
// It would be better if the [edx] is loaded into multiple regs and
|
||||||
|
// then the regs are shifted each independently, instead of using the
|
||||||
|
// progressive shift->move pattern below. --air
|
||||||
|
|
||||||
indexer.xSetCall(0xf); // V4-5
|
indexer.xSetCall(0xf); // V4-5
|
||||||
xMOV16 (xmm0, ptr32[edx]);
|
xMOV16 (xmm0, ptr32[edx]);
|
||||||
xMOVAPS (xmm1, xmm0);
|
xMOVAPS (xmm1, xmm0);
|
||||||
|
@ -184,3 +190,27 @@ void nVifGen(int usn, int mask, int curCycle) {
|
||||||
|
|
||||||
pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );
|
pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void initNewVif(int idx) {
|
||||||
|
nVif[idx].idx = idx;
|
||||||
|
nVif[idx].VU = idx ? &VU1 : &VU0;
|
||||||
|
nVif[idx].vif = idx ? &vif1 : &vif0;
|
||||||
|
nVif[idx].vifRegs = idx ? vif1Regs : vif0Regs;
|
||||||
|
nVif[idx].vuMemEnd = idx ? ((u8*)(VU1.Mem + 0x4000)) : ((u8*)(VU0.Mem + 0x1000));
|
||||||
|
nVif[idx].vuMemLimit= idx ? 0x3ff0 : 0xff0;
|
||||||
|
nVif[idx].vifCache = NULL;
|
||||||
|
|
||||||
|
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false);
|
||||||
|
memset8<0xcc>( nVifUpkExec );
|
||||||
|
|
||||||
|
xSetPtr( nVifUpkExec );
|
||||||
|
|
||||||
|
for (int a = 0; a < 2; a++) {
|
||||||
|
for (int b = 0; b < 2; b++) {
|
||||||
|
for (int c = 0; c < 4; c++) {
|
||||||
|
nVifGen(a, b, c);
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
|
||||||
|
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true);
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue