mirror of https://github.com/PCSX2/pcsx2.git
newVif: I'm a terrible person. What started out at an optimization turned into this.
* Optimized codegen of the VPU recompiler using displaced memory offsets (1-2% speedup) * Undid a lot of the inl stuff for more traditional cpp code layout (explained below) * Removed some redundant code and turned some macros into functions. * Renamed a few things to VPU (Vector Processing Unit, which is the specific name of the logic core that performs VIF Command Processing and Unpacking on the PS2) git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2387 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
5c8f4ded22
commit
b27b89b162
|
@ -99,15 +99,16 @@ extern pxDoAssertFnType* pxDoAssert;
|
||||||
// rear ugly heads in optimized builds, this is one of the few tools we have.
|
// rear ugly heads in optimized builds, this is one of the few tools we have.
|
||||||
|
|
||||||
#define pxAssertRel(cond, msg) ( (likely(cond)) || (pxOnAssert(pxAssertSpot(cond), msg), false) )
|
#define pxAssertRel(cond, msg) ( (likely(cond)) || (pxOnAssert(pxAssertSpot(cond), msg), false) )
|
||||||
#define pxAssumeMsg(cond, msg) ((void) ( (!likely(cond)) && (pxOnAssert(pxAssertSpot(cond), msg), false) ))
|
#define pxAssumeRel(cond, msg) ((void) ( (!likely(cond)) && (pxOnAssert(pxAssertSpot(cond), msg), false) ))
|
||||||
|
#define pxFailRel(msg) pxAssumeRel(false, msg)
|
||||||
|
|
||||||
#if defined(PCSX2_DEBUG)
|
#if defined(PCSX2_DEBUG)
|
||||||
|
|
||||||
# define pxAssertMsg(cond, msg) pxAssertRel(cond, msg)
|
# define pxAssertMsg(cond, msg) pxAssertRel(cond, msg)
|
||||||
# define pxAssertDev(cond, msg) pxAssertMsg(cond, msg)
|
# define pxAssertDev(cond, msg) pxAssertMsg(cond, msg)
|
||||||
|
|
||||||
# define pxAssume(cond) pxAssumeMsg(cond, wxNullChar)
|
# define pxAssumeMsg(cond, msg) pxAssumeRel(cond, msg)
|
||||||
# define pxAssumeDev(cond, msg) pxAssumeMsg(cond, msg)
|
# define pxAssumeDev(cond, msg) pxAssumeRel(cond, msg)
|
||||||
|
|
||||||
# define pxFail(msg) pxAssumeMsg(false, msg)
|
# define pxFail(msg) pxAssumeMsg(false, msg)
|
||||||
# define pxFailDev(msg) pxAssumeDev(false, msg)
|
# define pxFailDev(msg) pxAssumeDev(false, msg)
|
||||||
|
@ -120,7 +121,7 @@ extern pxDoAssertFnType* pxDoAssert;
|
||||||
# define pxAssertMsg(cond, msg) (likely(cond))
|
# define pxAssertMsg(cond, msg) (likely(cond))
|
||||||
# define pxAssertDev(cond, msg) pxAssertRel(cond, msg)
|
# define pxAssertDev(cond, msg) pxAssertRel(cond, msg)
|
||||||
|
|
||||||
# define pxAssume(cond) (__assume(cond))
|
# define pxAssumeMsg(cond, msg) (__assume(cond))
|
||||||
# define pxAssumeDev(cond, msg) pxAssumeMsg(cond, msg)
|
# define pxAssumeDev(cond, msg) pxAssumeMsg(cond, msg)
|
||||||
|
|
||||||
# define pxFail(msg) (__assume(false))
|
# define pxFail(msg) (__assume(false))
|
||||||
|
@ -134,7 +135,7 @@ extern pxDoAssertFnType* pxDoAssert;
|
||||||
# define pxAssertMsg(cond, msg) (likely(cond))
|
# define pxAssertMsg(cond, msg) (likely(cond))
|
||||||
# define pxAssertDev(cond, msg) (likely(cond))
|
# define pxAssertDev(cond, msg) (likely(cond))
|
||||||
|
|
||||||
# define pxAssume(cond) (__assume(cond))
|
# define pxAssumeMsg(cond, msg) (__assume(cond))
|
||||||
# define pxAssumeDev(cond, msg) (__assume(cond))
|
# define pxAssumeDev(cond, msg) (__assume(cond))
|
||||||
|
|
||||||
# define pxFail(msg) (__assume(false))
|
# define pxFail(msg) (__assume(false))
|
||||||
|
@ -143,6 +144,7 @@ extern pxDoAssertFnType* pxDoAssert;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define pxAssert(cond) pxAssertMsg(cond, wxNullChar)
|
#define pxAssert(cond) pxAssertMsg(cond, wxNullChar)
|
||||||
|
#define pxAssume(cond) pxAssumeMsg(cond, wxNullChar)
|
||||||
|
|
||||||
#define pxAssertRelease( cond, msg )
|
#define pxAssertRelease( cond, msg )
|
||||||
|
|
||||||
|
|
|
@ -490,6 +490,11 @@ template< typename T > void xWrite( T val );
|
||||||
__forceinline xAddressInfo operator+( s32 imm ) const { return xAddressInfo( *this ).Add( imm ); }
|
__forceinline xAddressInfo operator+( s32 imm ) const { return xAddressInfo( *this ).Add( imm ); }
|
||||||
__forceinline xAddressInfo operator-( s32 imm ) const { return xAddressInfo( *this ).Add( -imm ); }
|
__forceinline xAddressInfo operator-( s32 imm ) const { return xAddressInfo( *this ).Add( -imm ); }
|
||||||
__forceinline xAddressInfo operator+( const void* addr ) const { return xAddressInfo( *this ).Add( (uptr)addr ); }
|
__forceinline xAddressInfo operator+( const void* addr ) const { return xAddressInfo( *this ).Add( (uptr)addr ); }
|
||||||
|
|
||||||
|
__forceinline void operator+=( const xAddressReg& right ) { Add( right ); }
|
||||||
|
__forceinline void operator+=( const xAddressInfo& right ) { Add( right ); }
|
||||||
|
__forceinline void operator+=( s32 imm ) { Add( imm ); }
|
||||||
|
__forceinline void operator-=( s32 imm ) { Add( -imm ); }
|
||||||
};
|
};
|
||||||
|
|
||||||
extern const xRegisterSSE
|
extern const xRegisterSSE
|
||||||
|
|
|
@ -515,6 +515,10 @@
|
||||||
<Unit filename="../x86/ix86-32/iR5900Shift.cpp" />
|
<Unit filename="../x86/ix86-32/iR5900Shift.cpp" />
|
||||||
<Unit filename="../x86/ix86-32/iR5900Templates.cpp" />
|
<Unit filename="../x86/ix86-32/iR5900Templates.cpp" />
|
||||||
<Unit filename="../x86/ix86-32/recVTLB.cpp" />
|
<Unit filename="../x86/ix86-32/recVTLB.cpp" />
|
||||||
|
<Unit filename="../x86/VpuUnpackSSE.cpp" />
|
||||||
|
<Unit filename="../x86/VpuUnpackSSE.h" />
|
||||||
|
<Unit filename="../x86/VpuUnpackSSE_Dynarec.cpp" />
|
||||||
|
<Unit filename="../x86/newVof_Unpack.cpp" />
|
||||||
<Unit filename="../x86/microVU.cpp" />
|
<Unit filename="../x86/microVU.cpp" />
|
||||||
<Unit filename="../x86/microVU.h" />
|
<Unit filename="../x86/microVU.h" />
|
||||||
<Unit filename="../x86/microVU_Alloc.inl" />
|
<Unit filename="../x86/microVU_Alloc.inl" />
|
||||||
|
@ -534,12 +538,8 @@
|
||||||
<Unit filename="../x86/microVU_Upper.inl" />
|
<Unit filename="../x86/microVU_Upper.inl" />
|
||||||
<Unit filename="../x86/newVif.h" />
|
<Unit filename="../x86/newVif.h" />
|
||||||
<Unit filename="../x86/newVif_BlockBuffer.h" />
|
<Unit filename="../x86/newVif_BlockBuffer.h" />
|
||||||
<Unit filename="../x86/newVif_Dynarec.inl" />
|
|
||||||
<Unit filename="../x86/newVif_HashBucket.h" />
|
<Unit filename="../x86/newVif_HashBucket.h" />
|
||||||
<Unit filename="../x86/newVif_OldUnpack.inl" />
|
<Unit filename="../x86/newVif_OldUnpack.inl" />
|
||||||
<Unit filename="../x86/newVif_Tables.inl" />
|
|
||||||
<Unit filename="../x86/newVif_Unpack.inl" />
|
|
||||||
<Unit filename="../x86/newVif_UnpackGen.inl" />
|
|
||||||
<Unit filename="../x86/sVU_Debug.h" />
|
<Unit filename="../x86/sVU_Debug.h" />
|
||||||
<Unit filename="../x86/sVU_Lower.cpp" />
|
<Unit filename="../x86/sVU_Lower.cpp" />
|
||||||
<Unit filename="../x86/sVU_Micro.cpp" />
|
<Unit filename="../x86/sVU_Micro.cpp" />
|
||||||
|
|
|
@ -320,7 +320,6 @@ static int __fastcall Vif1TransDirectHL(u32 *data)
|
||||||
static int __fastcall Vif1TransUnpack(u32 *data)
|
static int __fastcall Vif1TransUnpack(u32 *data)
|
||||||
{
|
{
|
||||||
#ifdef newVif1
|
#ifdef newVif1
|
||||||
extern int nVifUnpack(int idx, u8 *data);
|
|
||||||
return nVifUnpack(1, (u8*)data);
|
return nVifUnpack(1, (u8*)data);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -75,19 +75,21 @@ template<const u32 VIFdmanum> void ProcessMemSkip(u32 size, u32 unpackType);
|
||||||
template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size);
|
template<const u32 VIFdmanum> u32 VIFalign(u32 *data, vifCode *v, u32 size);
|
||||||
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size);
|
template<const u32 VIFdmanum> void VIFunpack(u32 *data, vifCode *v, u32 size);
|
||||||
template<const u32 VIFdmanum> void vuExecMicro(u32 addr);
|
template<const u32 VIFdmanum> void vuExecMicro(u32 addr);
|
||||||
extern __forceinline void vif0FLUSH();
|
extern void vif0FLUSH();
|
||||||
extern __forceinline void vif1FLUSH();
|
extern void vif1FLUSH();
|
||||||
|
|
||||||
static __forceinline u32 vif_size(u8 num)
|
static __forceinline u32 vif_size(u8 num)
|
||||||
{
|
{
|
||||||
return (num == 0) ? 0x1000 : 0x4000;
|
return (num == 0) ? 0x1000 : 0x4000;
|
||||||
}
|
}
|
||||||
|
|
||||||
//#define newVif // Enable 'newVif' Code (if the below macros are not defined, it will use old non-sse code)
|
#define newVif // Enable 'newVif' Code (if the below macros are not defined, it will use old non-sse code)
|
||||||
//#define newVif1 // Use New Code for Vif1 Unpacks (needs newVif defined)
|
#define newVif1 // Use New Code for Vif1 Unpacks (needs newVif defined)
|
||||||
//#define newVif0 // Use New Code for Vif0 Unpacks (not implemented)
|
//#define newVif0 // Use New Code for Vif0 Unpacks (not implemented)
|
||||||
|
|
||||||
#ifndef newVif
|
#ifdef newVif
|
||||||
|
extern int nVifUnpack(int idx, u8 *data);
|
||||||
|
#else
|
||||||
//# define NON_SSE_UNPACKS // Turns off SSE Unpacks (slower)
|
//# define NON_SSE_UNPACKS // Turns off SSE Unpacks (slower)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -860,7 +860,7 @@
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\x86\newVif_Unpack.inl"
|
RelativePath="..\..\x86\newVif_Unpack.cpp"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
|
@ -871,11 +871,19 @@
|
||||||
Name="Dynarec"
|
Name="Dynarec"
|
||||||
>
|
>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\x86\newVif_Dynarec.inl"
|
RelativePath="..\..\x86\newVif_Tables.inl"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\x86\newVif_Tables.inl"
|
RelativePath="..\..\x86\VpuUnpackSSE.cpp"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath="..\..\x86\VpuUnpackSSE.h"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath="..\..\x86\VpuUnpackSSE_Dynarec.cpp"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
</Filter>
|
</Filter>
|
||||||
|
|
|
@ -0,0 +1,285 @@
|
||||||
|
/* PCSX2 - PS2 Emulator for PCs
|
||||||
|
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||||
|
*
|
||||||
|
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||||
|
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||||
|
* ation, either version 3 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||||
|
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE. See the GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "PrecompiledHeader.h"
|
||||||
|
#include "VpuUnpackSSE.h"
|
||||||
|
|
||||||
|
#define xMOV8(regX, loc) xMOVSSZX(regX, loc)
|
||||||
|
#define xMOV16(regX, loc) xMOVSSZX(regX, loc)
|
||||||
|
#define xMOV32(regX, loc) xMOVSSZX(regX, loc)
|
||||||
|
#define xMOV64(regX, loc) xMOVUPS(regX, loc)
|
||||||
|
#define xMOV128(regX, loc) xMOVUPS(regX, loc)
|
||||||
|
|
||||||
|
static __pagealigned u8 nVifUpkExec[__pagesize*4];
|
||||||
|
|
||||||
|
// =====================================================================================================
|
||||||
|
// VpuUnpackSSE_Base Section
|
||||||
|
// =====================================================================================================
|
||||||
|
VpuUnpackSSE_Base::VpuUnpackSSE_Base()
|
||||||
|
: dstIndirect(ecx) // parameter 1 of __fastcall
|
||||||
|
, srcIndirect(edx) // parameter 2 of __fastcall
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xMovDest(const xRegisterSSE& srcReg) const {
|
||||||
|
if (!doMode && !doMask) { xMOVAPS (ptr[dstIndirect], srcReg); }
|
||||||
|
else { doMaskWrite(srcReg); }
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const {
|
||||||
|
if (usn) { xPSRL.D(regX, n); }
|
||||||
|
else { xPSRA.D(regX, n); }
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xPMOVXX8(const xRegisterSSE& regX) const {
|
||||||
|
if (usn) xPMOVZX.BD(regX, ptr32[srcIndirect]);
|
||||||
|
else xPMOVSX.BD(regX, ptr32[srcIndirect]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const {
|
||||||
|
if (usn) xPMOVZX.WD(regX, ptr64[srcIndirect]);
|
||||||
|
else xPMOVSX.WD(regX, ptr64[srcIndirect]);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xUPK_S_32() const {
|
||||||
|
xMOV32 (xmm0, ptr32[srcIndirect]);
|
||||||
|
xPSHUF.D (xmm1, xmm0, _v0);
|
||||||
|
xMovDest (xmm1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xUPK_S_16() const {
|
||||||
|
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||||
|
xPMOVXX16 (xmm0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
xMOV16 (xmm0, ptr32[srcIndirect]);
|
||||||
|
xPUNPCK.LWD(xmm0, xmm0);
|
||||||
|
xShiftR (xmm0, 16);
|
||||||
|
}
|
||||||
|
xPSHUF.D (xmm1, xmm0, _v0);
|
||||||
|
xMovDest (xmm1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xUPK_S_8() const {
|
||||||
|
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||||
|
xPMOVXX8 (xmm0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
xMOV8 (xmm0, ptr32[srcIndirect]);
|
||||||
|
xPUNPCK.LBW(xmm0, xmm0);
|
||||||
|
xPUNPCK.LWD(xmm0, xmm0);
|
||||||
|
xShiftR (xmm0, 24);
|
||||||
|
}
|
||||||
|
xPSHUF.D (xmm1, xmm0, _v0);
|
||||||
|
xMovDest (xmm1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xUPK_V2_32() const {
|
||||||
|
xMOV64 (xmm0, ptr32[srcIndirect]);
|
||||||
|
xMovDest (xmm0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xUPK_V2_16() const {
|
||||||
|
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||||
|
xPMOVXX16 (xmm0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
xMOV32 (xmm0, ptr32[srcIndirect]);
|
||||||
|
xPUNPCK.LWD(xmm0, xmm0);
|
||||||
|
xShiftR (xmm0, 16);
|
||||||
|
}
|
||||||
|
xMovDest (xmm0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xUPK_V2_8() const {
|
||||||
|
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||||
|
xPMOVXX8 (xmm0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
xMOV16 (xmm0, ptr32[srcIndirect]);
|
||||||
|
xPUNPCK.LBW(xmm0, xmm0);
|
||||||
|
xPUNPCK.LWD(xmm0, xmm0);
|
||||||
|
xShiftR (xmm0, 24);
|
||||||
|
}
|
||||||
|
xMovDest (xmm0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xUPK_V3_32() const {
|
||||||
|
xMOV128 (xmm0, ptr32[srcIndirect]);
|
||||||
|
xMovDest (xmm0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xUPK_V3_16() const {
|
||||||
|
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||||
|
xPMOVXX16 (xmm0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
xMOV64 (xmm0, ptr32[srcIndirect]);
|
||||||
|
xPUNPCK.LWD(xmm0, xmm0);
|
||||||
|
xShiftR (xmm0, 16);
|
||||||
|
}
|
||||||
|
xMovDest (xmm0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xUPK_V3_8() const {
|
||||||
|
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||||
|
xPMOVXX8 (xmm0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
xMOV32 (xmm0, ptr32[srcIndirect]);
|
||||||
|
xPUNPCK.LBW(xmm0, xmm0);
|
||||||
|
xPUNPCK.LWD(xmm0, xmm0);
|
||||||
|
xShiftR (xmm0, 24);
|
||||||
|
}
|
||||||
|
xMovDest (xmm0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xUPK_V4_32() const {
|
||||||
|
xMOV128 (xmm0, ptr32[srcIndirect]);
|
||||||
|
xMovDest (xmm0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xUPK_V4_16() const {
|
||||||
|
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||||
|
xPMOVXX16 (xmm0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
xMOV64 (xmm0, ptr32[srcIndirect]);
|
||||||
|
xPUNPCK.LWD(xmm0, xmm0);
|
||||||
|
xShiftR (xmm0, 16);
|
||||||
|
}
|
||||||
|
xMovDest (xmm0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xUPK_V4_8() const {
|
||||||
|
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||||
|
xPMOVXX8 (xmm0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
xMOV32 (xmm0, ptr32[srcIndirect]);
|
||||||
|
xPUNPCK.LBW(xmm0, xmm0);
|
||||||
|
xPUNPCK.LWD(xmm0, xmm0);
|
||||||
|
xShiftR (xmm0, 24);
|
||||||
|
}
|
||||||
|
xMovDest (xmm0);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xUPK_V4_5() const {
|
||||||
|
xMOV16 (xmm0, ptr32[srcIndirect]);
|
||||||
|
xPSHUF.D (xmm0, xmm0, _v0);
|
||||||
|
xPSLL.D (xmm0, 3); // ABG|R5.000
|
||||||
|
xMOVAPS (xmm1, xmm0); // x|x|x|R
|
||||||
|
xPSRL.D (xmm0, 8); // ABG
|
||||||
|
xPSLL.D (xmm0, 3); // AB|G5.000
|
||||||
|
mVUmergeRegs(XMM1, XMM0, 0x4); // x|x|G|R
|
||||||
|
xPSRL.D (xmm0, 8); // AB
|
||||||
|
xPSLL.D (xmm0, 3); // A|B5.000
|
||||||
|
mVUmergeRegs(XMM1, XMM0, 0x2); // x|B|G|R
|
||||||
|
xPSRL.D (xmm0, 8); // A
|
||||||
|
xPSLL.D (xmm0, 7); // A.0000000
|
||||||
|
mVUmergeRegs(XMM1, XMM0, 0x1); // A|B|G|R
|
||||||
|
xPSLL.D (xmm1, 24); // can optimize to
|
||||||
|
xPSRL.D (xmm1, 24); // single AND...
|
||||||
|
xMovDest (xmm1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Base::xUnpack( int upknum )
|
||||||
|
{
|
||||||
|
switch( upknum )
|
||||||
|
{
|
||||||
|
case 0: xUPK_S_32(); break;
|
||||||
|
case 1: xUPK_S_16(); break;
|
||||||
|
case 2: xUPK_S_8(); break;
|
||||||
|
|
||||||
|
case 4: xUPK_V2_32(); break;
|
||||||
|
case 5: xUPK_V2_16(); break;
|
||||||
|
case 6: xUPK_V2_8(); break;
|
||||||
|
|
||||||
|
case 8: xUPK_V3_32(); break;
|
||||||
|
case 9: xUPK_V3_16(); break;
|
||||||
|
case 10: xUPK_V3_8(); break;
|
||||||
|
|
||||||
|
case 12: xUPK_V4_32(); break;
|
||||||
|
case 13: xUPK_V4_16(); break;
|
||||||
|
case 14: xUPK_V4_8(); break;
|
||||||
|
case 15: xUPK_V4_5(); break;
|
||||||
|
|
||||||
|
case 3:
|
||||||
|
case 7:
|
||||||
|
case 11:
|
||||||
|
pxFailRel( wxsFormat( L"Vpu/Vif - Invalid Unpack! [%d]", upknum ) );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// =====================================================================================================
|
||||||
|
// VpuUnpackSSE_Simple
|
||||||
|
// =====================================================================================================
|
||||||
|
|
||||||
|
VpuUnpackSSE_Simple::VpuUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_)
|
||||||
|
{
|
||||||
|
curCycle = curCycle_;
|
||||||
|
usn = usn_;
|
||||||
|
doMask = domask_;
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const {
|
||||||
|
xMOVAPS(xmm7, ptr[dstIndirect]);
|
||||||
|
int offX = aMin(curCycle, 3);
|
||||||
|
xPAND(regX, ptr32[nVifMask[0][offX]]);
|
||||||
|
xPAND(xmm7, ptr32[nVifMask[1][offX]]);
|
||||||
|
xPOR (regX, ptr32[nVifMask[2][offX]]);
|
||||||
|
xPOR (regX, xmm7);
|
||||||
|
xMOVAPS(ptr[dstIndirect], regX);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ecx = dest, edx = src
|
||||||
|
static void nVifGen(int usn, int mask, int curCycle) {
|
||||||
|
|
||||||
|
int usnpart = usn*2*16;
|
||||||
|
int maskpart = mask*16;
|
||||||
|
int curpart = curCycle;
|
||||||
|
|
||||||
|
VpuUnpackSSE_Simple vpugen( !!usn, !!mask, curCycle );
|
||||||
|
|
||||||
|
for( int i=0; i<16; ++i )
|
||||||
|
{
|
||||||
|
nVifCall& ucall( nVifUpk[((usnpart+maskpart+i) * 4) + (curpart)] );
|
||||||
|
ucall = NULL;
|
||||||
|
if( nVifT[i] == 0 ) continue;
|
||||||
|
|
||||||
|
ucall = (nVifCall)xGetAlignedCallTarget();
|
||||||
|
vpugen.xUnpack(i);
|
||||||
|
xRET();
|
||||||
|
|
||||||
|
pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Init()
|
||||||
|
{
|
||||||
|
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false);
|
||||||
|
memset8<0xcc>( nVifUpkExec );
|
||||||
|
|
||||||
|
xSetPtr( nVifUpkExec );
|
||||||
|
|
||||||
|
for (int a = 0; a < 2; a++) {
|
||||||
|
for (int b = 0; b < 2; b++) {
|
||||||
|
for (int c = 0; c < 4; c++) {
|
||||||
|
nVifGen(a, b, c);
|
||||||
|
}}}
|
||||||
|
|
||||||
|
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true);
|
||||||
|
}
|
|
@ -0,0 +1,134 @@
|
||||||
|
/* PCSX2 - PS2 Emulator for PCs
|
||||||
|
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||||
|
*
|
||||||
|
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||||
|
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||||
|
* ation, either version 3 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||||
|
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE. See the GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "Common.h"
|
||||||
|
#include "VifDma_internal.h"
|
||||||
|
#include "newVif.h"
|
||||||
|
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#include <emmintrin.h>
|
||||||
|
|
||||||
|
using namespace x86Emitter;
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// VpuUnpackSSE_Base
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
class VpuUnpackSSE_Base
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
bool usn; // unsigned flag
|
||||||
|
bool doMask; // masking write enable flag
|
||||||
|
int doMode; // two bit value representing... something!
|
||||||
|
|
||||||
|
protected:
|
||||||
|
xAddressInfo dstIndirect;
|
||||||
|
xAddressInfo srcIndirect;
|
||||||
|
|
||||||
|
public:
|
||||||
|
VpuUnpackSSE_Base();
|
||||||
|
virtual ~VpuUnpackSSE_Base() throw() {}
|
||||||
|
|
||||||
|
void xUnpack( int upktype );
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual void doMaskWrite(const xRegisterSSE& regX ) const=0;
|
||||||
|
|
||||||
|
virtual void xMovDest(const xRegisterSSE& srcReg) const;
|
||||||
|
virtual void xShiftR(const xRegisterSSE& regX, int n) const;
|
||||||
|
virtual void xPMOVXX8(const xRegisterSSE& regX) const;
|
||||||
|
virtual void xPMOVXX16(const xRegisterSSE& regX) const;
|
||||||
|
|
||||||
|
virtual void xUPK_S_32() const;
|
||||||
|
virtual void xUPK_S_16() const;
|
||||||
|
virtual void xUPK_S_8() const;
|
||||||
|
|
||||||
|
virtual void xUPK_V2_32() const;
|
||||||
|
virtual void xUPK_V2_16() const;
|
||||||
|
virtual void xUPK_V2_8() const;
|
||||||
|
|
||||||
|
virtual void xUPK_V3_32() const;
|
||||||
|
virtual void xUPK_V3_16() const;
|
||||||
|
virtual void xUPK_V3_8() const;
|
||||||
|
|
||||||
|
virtual void xUPK_V4_32() const;
|
||||||
|
virtual void xUPK_V4_16() const;
|
||||||
|
virtual void xUPK_V4_8() const;
|
||||||
|
virtual void xUPK_V4_5() const;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// VpuUnpackSSE_Simple
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
class VpuUnpackSSE_Simple : public VpuUnpackSSE_Base
|
||||||
|
{
|
||||||
|
typedef VpuUnpackSSE_Base _parent;
|
||||||
|
|
||||||
|
public:
|
||||||
|
int curCycle;
|
||||||
|
|
||||||
|
public:
|
||||||
|
VpuUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_);
|
||||||
|
virtual ~VpuUnpackSSE_Simple() throw() {}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual void doMaskWrite(const xRegisterSSE& regX ) const;
|
||||||
|
};
|
||||||
|
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
// VpuUnpackSSE_Dynarec
|
||||||
|
// --------------------------------------------------------------------------------------
|
||||||
|
class VpuUnpackSSE_Dynarec : public VpuUnpackSSE_Base
|
||||||
|
{
|
||||||
|
typedef VpuUnpackSSE_Base _parent;
|
||||||
|
|
||||||
|
public:
|
||||||
|
bool isFill;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
const nVifStruct& v; // vif0 or vif1
|
||||||
|
const nVifBlock& vB; // some pre-collected data from VifStruct
|
||||||
|
int vCL; // internal copy of vif->cl
|
||||||
|
|
||||||
|
public:
|
||||||
|
VpuUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_);
|
||||||
|
VpuUnpackSSE_Dynarec(const VpuUnpackSSE_Dynarec& src) // copy constructor
|
||||||
|
: _parent(src)
|
||||||
|
, v(src.v)
|
||||||
|
, vB(src.vB)
|
||||||
|
{
|
||||||
|
isFill = src.isFill;
|
||||||
|
vCL = src.vCL;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual ~VpuUnpackSSE_Dynarec() throw() {}
|
||||||
|
|
||||||
|
void CompileRoutine();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
virtual void doMaskWrite(const xRegisterSSE& regX) const;
|
||||||
|
void SetMasks(int cS) const;
|
||||||
|
void writeBackRow() const;
|
||||||
|
|
||||||
|
static VpuUnpackSSE_Dynarec FillingWrite( const VpuUnpackSSE_Dynarec& src )
|
||||||
|
{
|
||||||
|
VpuUnpackSSE_Dynarec fillingWrite( src );
|
||||||
|
fillingWrite.doMask = true;
|
||||||
|
fillingWrite.doMode = 0;
|
||||||
|
return fillingWrite;
|
||||||
|
}
|
||||||
|
};
|
|
@ -0,0 +1,278 @@
|
||||||
|
/* PCSX2 - PS2 Emulator for PCs
|
||||||
|
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
||||||
|
*
|
||||||
|
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
||||||
|
* of the GNU Lesser General Public License as published by the Free Software Found-
|
||||||
|
* ation, either version 3 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
||||||
|
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE. See the GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License along with PCSX2.
|
||||||
|
* If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// newVif Dynarec - Dynamically Recompiles Vif 'unpack' Packets
|
||||||
|
// authors: cottonvibes(@gmail.com)
|
||||||
|
// Jake.Stine (@gmail.com)
|
||||||
|
|
||||||
|
#include "PrecompiledHeader.h"
|
||||||
|
#include "VpuUnpackSSE.h"
|
||||||
|
|
||||||
|
static __aligned16 nVifBlock _vBlock = {0};
|
||||||
|
static __pagealigned u8 nVifMemCmp[__pagesize];
|
||||||
|
|
||||||
|
static void emitCustomCompare() {
|
||||||
|
HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadWrite, false);
|
||||||
|
memset8<0xcc>(nVifMemCmp);
|
||||||
|
xSetPtr(nVifMemCmp);
|
||||||
|
|
||||||
|
xMOVAPS (xmm0, ptr32[ecx]);
|
||||||
|
xPCMP.EQD(xmm0, ptr32[edx]);
|
||||||
|
xMOVMSKPS(eax, xmm0);
|
||||||
|
xAND (eax, 0x7); // ignore top 4 bytes (recBlock pointer)
|
||||||
|
|
||||||
|
xRET();
|
||||||
|
HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadOnly, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
void dVifInit(int idx) {
|
||||||
|
nVif[idx].idx = idx;
|
||||||
|
nVif[idx].VU = idx ? &VU1 : &VU0;
|
||||||
|
nVif[idx].vif = idx ? &vif1 : &vif0;
|
||||||
|
nVif[idx].vifRegs = idx ? vif1Regs : vif0Regs;
|
||||||
|
nVif[idx].vuMemEnd = idx ? ((u8*)(VU1.Mem + 0x4000)) : ((u8*)(VU0.Mem + 0x1000));
|
||||||
|
nVif[idx].vuMemLimit= idx ? 0x3ff0 : 0xff0;
|
||||||
|
nVif[idx].vifCache = new BlockBuffer(_1mb*4); // 4mb Rec Cache
|
||||||
|
nVif[idx].vifBlocks = new HashBucket<_tParams>();
|
||||||
|
nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
|
||||||
|
nVif[idx].recEnd = &nVif[idx].recPtr[nVif[idx].vifCache->getSize()-(_1mb/4)]; // .25mb Safe Zone
|
||||||
|
//emitCustomCompare();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Loads Row/Col Data from vifRegs instead of g_vifmask
|
||||||
|
// Useful for testing vifReg and g_vifmask inconsistency.
|
||||||
|
static void loadRowCol(nVifStruct& v) {
|
||||||
|
xMOVAPS(xmm0, ptr32[&v.vifRegs->r0]);
|
||||||
|
xMOVAPS(xmm1, ptr32[&v.vifRegs->r1]);
|
||||||
|
xMOVAPS(xmm2, ptr32[&v.vifRegs->r2]);
|
||||||
|
xMOVAPS(xmm6, ptr32[&v.vifRegs->r3]);
|
||||||
|
xPSHUF.D(xmm0, xmm0, _v0);
|
||||||
|
xPSHUF.D(xmm1, xmm1, _v0);
|
||||||
|
xPSHUF.D(xmm2, xmm2, _v0);
|
||||||
|
xPSHUF.D(xmm6, xmm6, _v0);
|
||||||
|
mVUmergeRegs(XMM6, XMM0, 8);
|
||||||
|
mVUmergeRegs(XMM6, XMM1, 4);
|
||||||
|
mVUmergeRegs(XMM6, XMM2, 2);
|
||||||
|
xMOVAPS(xmm2, ptr32[&v.vifRegs->c0]);
|
||||||
|
xMOVAPS(xmm3, ptr32[&v.vifRegs->c1]);
|
||||||
|
xMOVAPS(xmm4, ptr32[&v.vifRegs->c2]);
|
||||||
|
xMOVAPS(xmm5, ptr32[&v.vifRegs->c3]);
|
||||||
|
xPSHUF.D(xmm2, xmm2, _v0);
|
||||||
|
xPSHUF.D(xmm3, xmm3, _v0);
|
||||||
|
xPSHUF.D(xmm4, xmm4, _v0);
|
||||||
|
xPSHUF.D(xmm5, xmm5, _v0);
|
||||||
|
}
|
||||||
|
|
||||||
|
VpuUnpackSSE_Dynarec::VpuUnpackSSE_Dynarec(const nVifStruct& vif_, const nVifBlock& vifBlock_)
|
||||||
|
: v(vif_)
|
||||||
|
, vB(vifBlock_)
|
||||||
|
{
|
||||||
|
isFill = (vB.cl < vB.wl);
|
||||||
|
usn = (vB.upkType>>5) & 1;
|
||||||
|
doMask = (vB.upkType>>4) & 1;
|
||||||
|
doMode = vB.mode & 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define makeMergeMask(x) { \
|
||||||
|
x = ((x&0x40)>>6) | ((x&0x10)>>3) | (x&4) | ((x&1)<<3); \
|
||||||
|
}
|
||||||
|
|
||||||
|
_f void VpuUnpackSSE_Dynarec::SetMasks(int cS) const {
|
||||||
|
u32 m0 = vB.mask;
|
||||||
|
u32 m1 = m0 & 0xaaaaaaaa;
|
||||||
|
u32 m2 =(~m1>>1) & m0;
|
||||||
|
u32 m3 = (m1>>1) & ~m0;
|
||||||
|
u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
|
||||||
|
u32* col = (v.idx) ? g_vifmask.Col1 : g_vifmask.Col0;
|
||||||
|
if((m2&&doMask) || doMode) { xMOVAPS(xmmRow, ptr32[row]); }
|
||||||
|
if (m3&&doMask) {
|
||||||
|
xMOVAPS(xmmCol0, ptr32[col]);
|
||||||
|
if ((cS>=2) && (m3&0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
|
||||||
|
if ((cS>=3) && (m3&0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
|
||||||
|
if ((cS>=4) && (m3&0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
|
||||||
|
if ((cS>=1) && (m3&0x000000ff)) xPSHUF.D(xmmCol0, xmmCol0, _v0);
|
||||||
|
}
|
||||||
|
//if (mask||mode) loadRowCol(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Dynarec::doMaskWrite(const xRegisterSSE& regX) const {
|
||||||
|
pxAssumeDev(regX.Id <= 1, "Reg Overflow! XMM2 thru XMM6 are reserved for masking.");
|
||||||
|
int cc = aMin(vCL, 3);
|
||||||
|
u32 m0 = (vB.mask >> (cc * 8)) & 0xff;
|
||||||
|
u32 m1 = m0 & 0xaaaa;
|
||||||
|
u32 m2 =(~m1>>1) & m0;
|
||||||
|
u32 m3 = (m1>>1) & ~m0;
|
||||||
|
u32 m4 = (m1>>1) & m0;
|
||||||
|
makeMergeMask(m2);
|
||||||
|
makeMergeMask(m3);
|
||||||
|
makeMergeMask(m4);
|
||||||
|
if (doMask&&m4) { xMOVAPS(xmmTemp, ptr[dstIndirect]); } // Load Write Protect
|
||||||
|
if (doMask&&m2) { mVUmergeRegs(regX.Id, xmmRow.Id, m2); } // Merge Row
|
||||||
|
if (doMask&&m3) { mVUmergeRegs(regX.Id, xmmCol0.Id+cc, m3); } // Merge Col
|
||||||
|
if (doMask&&m4) { mVUmergeRegs(regX.Id, xmmTemp.Id, m4); } // Merge Write Protect
|
||||||
|
if (doMode) {
|
||||||
|
u32 m5 = (~m1>>1) & ~m0;
|
||||||
|
if (!doMask) m5 = 0xf;
|
||||||
|
else makeMergeMask(m5);
|
||||||
|
if (m5 < 0xf) {
|
||||||
|
xPXOR(xmmTemp, xmmTemp);
|
||||||
|
mVUmergeRegs(xmmTemp.Id, xmmRow.Id, m5);
|
||||||
|
xPADD.D(regX, xmmTemp);
|
||||||
|
if (doMode==2) mVUmergeRegs(xmmRow.Id, regX.Id, m5);
|
||||||
|
}
|
||||||
|
else if (m5 == 0xf) {
|
||||||
|
xPADD.D(regX, xmmRow);
|
||||||
|
if (doMode==2) xMOVAPS(xmmRow, regX);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
xMOVAPS(ptr32[dstIndirect], regX);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Dynarec::writeBackRow() const {
|
||||||
|
u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
|
||||||
|
xMOVAPS(ptr32[row], xmmRow);
|
||||||
|
DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]");
|
||||||
|
// ToDo: Do we need to write back to vifregs.rX too!? :/
|
||||||
|
}
|
||||||
|
|
||||||
|
static void ShiftDisplacementWindow( xAddressInfo& addr, const xRegister32& modReg )
|
||||||
|
{
|
||||||
|
// Shifts the displacement factor of a given indirect address, so that the address
|
||||||
|
// remains in the optimal 0xf0 range (which allows for byte-form displacements when
|
||||||
|
// generating instructions).
|
||||||
|
|
||||||
|
int addImm = 0;
|
||||||
|
while( addr.Displacement >= 0x80 )
|
||||||
|
{
|
||||||
|
addImm += 0xf0;
|
||||||
|
addr -= 0xf0;
|
||||||
|
}
|
||||||
|
if(addImm) xADD(modReg, addImm);
|
||||||
|
}
|
||||||
|
|
||||||
|
void VpuUnpackSSE_Dynarec::CompileRoutine() {
|
||||||
|
const int upkNum = vB.upkType & 0xf;
|
||||||
|
const u8& vift = nVifT[upkNum];
|
||||||
|
const int cycleSize = isFill ? vB.cl : vB.wl;
|
||||||
|
const int blockSize = isFill ? vB.wl : vB.cl;
|
||||||
|
const int skipSize = blockSize - cycleSize;
|
||||||
|
|
||||||
|
int vNum = vifRegs->num;
|
||||||
|
vCL = vif->cl;
|
||||||
|
|
||||||
|
SetMasks(cycleSize);
|
||||||
|
|
||||||
|
while (vNum) {
|
||||||
|
|
||||||
|
ShiftDisplacementWindow( srcIndirect, edx );
|
||||||
|
ShiftDisplacementWindow( dstIndirect, ecx );
|
||||||
|
|
||||||
|
if (vCL < cycleSize) {
|
||||||
|
xUnpack(upkNum);
|
||||||
|
srcIndirect += vift;
|
||||||
|
dstIndirect += 16;
|
||||||
|
vNum--;
|
||||||
|
if (++vCL == blockSize) vCL = 0;
|
||||||
|
}
|
||||||
|
else if (isFill) {
|
||||||
|
DevCon.WriteLn("filling mode!");
|
||||||
|
VpuUnpackSSE_Dynarec::FillingWrite( *this ).xUnpack(upkNum);
|
||||||
|
dstIndirect += 16;
|
||||||
|
vNum--;
|
||||||
|
if (++vCL == blockSize) vCL = 0;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
dstIndirect += (16 * skipSize);
|
||||||
|
vCL = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (doMode==2) writeBackRow();
|
||||||
|
xMOV(ptr32[&vif->cl], vCL);
|
||||||
|
xMOV(ptr32[&vifRegs->num], vNum);
|
||||||
|
xRET();
|
||||||
|
}
|
||||||
|
|
||||||
|
static _f u8* dVifsetVUptr(const nVifStruct& v, int offset) {
|
||||||
|
u8* ptr = (u8*)(v.VU->Mem + (offset & v.vuMemLimit));
|
||||||
|
u8* endPtr = ptr + _vBlock.num * 16;
|
||||||
|
if (endPtr > v.vuMemEnd) {
|
||||||
|
DevCon.WriteLn("nVif - VU Mem Ptr Overflow; falling back to interpreter.");
|
||||||
|
ptr = NULL; // Fall Back to Interpreters which have wrap-around logic
|
||||||
|
}
|
||||||
|
return ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static _f void dVifRecLimit(int idx) {
|
||||||
|
if (nVif[idx].recPtr > nVif[idx].recEnd) {
|
||||||
|
DevCon.WriteLn("nVif Rec - Out of Rec Cache! [%x > %x]", nVif[idx].recPtr, nVif[idx].recEnd);
|
||||||
|
nVif[idx].vifBlocks->clear();
|
||||||
|
nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
|
||||||
|
|
||||||
|
const nVifStruct& v = nVif[idx];
|
||||||
|
const u8 upkType = vif->cmd & 0x1f | ((!!vif->usn) << 5);
|
||||||
|
const int doMask = (upkType>>4) & 1;
|
||||||
|
|
||||||
|
const int cycle_cl = vifRegs->cycle.cl;
|
||||||
|
const int cycle_wl = vifRegs->cycle.wl;
|
||||||
|
const int cycleSize = isFill ? cycle_cl : cycle_wl;
|
||||||
|
const int blockSize = isFill ? cycle_wl : cycle_cl;
|
||||||
|
|
||||||
|
if (vif->cl >= blockSize) vif->cl = 0;
|
||||||
|
|
||||||
|
_vBlock.upkType = upkType;
|
||||||
|
_vBlock.num = *(u8*)&vifRegs->num;
|
||||||
|
_vBlock.mode = *(u8*)&vifRegs->mode;
|
||||||
|
_vBlock.scl = vif->cl;
|
||||||
|
_vBlock.cl = cycle_cl;
|
||||||
|
_vBlock.wl = cycle_wl;
|
||||||
|
|
||||||
|
// Zero out the mask parameter if it's unused -- games leave random junk
|
||||||
|
// values here which cause false recblock cache misses.
|
||||||
|
_vBlock.mask = doMask ? vifRegs->mask : 0x00;
|
||||||
|
|
||||||
|
if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
|
||||||
|
if( u8* dest = dVifsetVUptr(v, vif->tag.addr) ) {
|
||||||
|
//DevCon.WriteLn("Running Recompiled Block!");
|
||||||
|
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
//DevCon.WriteLn("Running Interpreter Block");
|
||||||
|
_nVifUnpack(idx, data, size, isFill);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
static int recBlockNum = 0;
|
||||||
|
DevCon.WriteLn("nVif: Recompiled Block! [%d]", recBlockNum++);
|
||||||
|
DevCon.WriteLn(L"\t(num=0x%02x, upkType=0x%02x, mode=0x%02x, scl=0x%02x, cl/wl=0x%x/0x%x, mask=%s)",
|
||||||
|
_vBlock.num, _vBlock.upkType, _vBlock.mode, _vBlock.scl, _vBlock.cl, _vBlock.wl,
|
||||||
|
doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
|
||||||
|
);
|
||||||
|
|
||||||
|
xSetPtr(v.recPtr);
|
||||||
|
_vBlock.startPtr = (uptr)xGetAlignedCallTarget();
|
||||||
|
v.vifBlocks->add(_vBlock);
|
||||||
|
VpuUnpackSSE_Dynarec( v, _vBlock ).CompileRoutine();
|
||||||
|
nVif[idx].recPtr = xGetPtr();
|
||||||
|
|
||||||
|
dVifRecLimit(idx);
|
||||||
|
|
||||||
|
// Run the block we just compiled. Various conditions may force us to still use
|
||||||
|
// the interpreter unpacker though, so a recursive call is the safest way here...
|
||||||
|
dVifUnpack(idx, data, size, isFill);
|
||||||
|
}
|
|
@ -15,6 +15,12 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "Vif.h"
|
||||||
|
#include "VU.h"
|
||||||
|
|
||||||
|
#include "x86emitter/x86emitter.h"
|
||||||
|
using namespace x86Emitter;
|
||||||
|
|
||||||
#ifdef newVif
|
#ifdef newVif
|
||||||
|
|
||||||
// newVif_HashBucket.h uses this typedef, so it has to be decared first.
|
// newVif_HashBucket.h uses this typedef, so it has to be decared first.
|
||||||
|
@ -23,17 +29,12 @@ typedef void (__fastcall *nVifrecCall)(uptr dest, uptr src);
|
||||||
|
|
||||||
#include "newVif_BlockBuffer.h"
|
#include "newVif_BlockBuffer.h"
|
||||||
#include "newVif_HashBucket.h"
|
#include "newVif_HashBucket.h"
|
||||||
#include "x86emitter/x86emitter.h"
|
|
||||||
using namespace x86Emitter;
|
|
||||||
extern void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW = 0);
|
|
||||||
extern void nVifGen (int usn, int mask, int curCycle);
|
|
||||||
extern void _nVifUnpack (int idx, u8 *data, u32 size);
|
|
||||||
extern void dVifUnpack (int idx, u8 *data, u32 size);
|
|
||||||
extern void dVifInit (int idx);
|
|
||||||
|
|
||||||
static __pagealigned u8 nVifUpkExec[__pagesize*4];
|
extern void mVUmergeRegs(int dest, int src, int xyzw, bool modXYZW = 0);
|
||||||
static __aligned16 nVifCall nVifUpk[(2*2*16) *4]; // ([USN][Masking][Unpack Type]) [curCycle]
|
extern void _nVifUnpack (int idx, u8 *data, u32 size, bool isFill);
|
||||||
static __aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Vector]
|
extern void dVifUnpack (int idx, u8 *data, u32 size, bool isFill);
|
||||||
|
extern void dVifInit (int idx);
|
||||||
|
extern void VpuUnpackSSE_Init();
|
||||||
|
|
||||||
#define VUFT VIFUnpackFuncTable
|
#define VUFT VIFUnpackFuncTable
|
||||||
#define _1mb (0x100000)
|
#define _1mb (0x100000)
|
||||||
|
@ -56,7 +57,10 @@ static __aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][
|
||||||
# pragma warning(disable:4996) // 'function': was declared deprecated
|
# pragma warning(disable:4996) // 'function': was declared deprecated
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct __aligned16 nVifBlock { // Ordered for Hashing
|
// nVifBlock - Ordered for Hashing; the 'num' field and the lower 6 bits of upkType are
|
||||||
|
// used as the hash bucke selector.
|
||||||
|
//
|
||||||
|
struct __aligned16 nVifBlock {
|
||||||
u8 num; // [00] Num Field
|
u8 num; // [00] Num Field
|
||||||
u8 upkType; // [01] Unpack Type [usn*1:mask*1:upk*4]
|
u8 upkType; // [01] Unpack Type [usn*1:mask*1:upk*4]
|
||||||
u8 mode; // [02] Mode Field
|
u8 mode; // [02] Mode Field
|
||||||
|
@ -88,63 +92,14 @@ struct nVifStruct {
|
||||||
u8* recEnd; // End of Rec Cache
|
u8* recEnd; // End of Rec Cache
|
||||||
BlockBuffer* vifCache; // Block Buffer
|
BlockBuffer* vifCache; // Block Buffer
|
||||||
HashBucket<_tParams>* vifBlocks; // Vif Blocks
|
HashBucket<_tParams>* vifBlocks; // Vif Blocks
|
||||||
nVifBlock* vifBlock; // Current Vif Block Ptr
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Contents of this table are doubled up for doMask(false) and doMask(true) lookups.
|
extern __aligned16 nVifStruct nVif[2];
|
||||||
// (note: currently unused, I'm using gsize in the interp tables instead since it
|
extern __aligned16 const u8 nVifT[32];
|
||||||
// seems to be faster for now, which may change when nVif isn't reliant on interpreted
|
extern __aligned16 nVifCall nVifUpk[(2*2*16) *4]; // ([USN][Masking][Unpack Type]) [curCycle]
|
||||||
// unpackers anymore --air)
|
extern __aligned16 u32 nVifMask[3][4][4]; // [MaskNumber][CycleNumber][Vector]
|
||||||
static const u32 nVifT[32] = {
|
|
||||||
4, // S-32
|
|
||||||
2, // S-16
|
|
||||||
1, // S-8
|
|
||||||
0, // ----
|
|
||||||
8, // V2-32
|
|
||||||
4, // V2-16
|
|
||||||
2, // V2-8
|
|
||||||
0, // ----
|
|
||||||
12,// V3-32
|
|
||||||
6, // V3-16
|
|
||||||
3, // V3-8
|
|
||||||
0, // ----
|
|
||||||
16,// V4-32
|
|
||||||
8, // V4-16
|
|
||||||
4, // V4-8
|
|
||||||
2, // V4-5
|
|
||||||
|
|
||||||
// Second verse, same as the first!
|
static const bool useOldUnpack = false; // Use code in newVif_OldUnpack.inl
|
||||||
4,2,1,0,8,4,2,0,12,6,3,0,16,8,4,2
|
static const bool newVifDynaRec = true; // Use code in newVif_Dynarec.inl
|
||||||
};
|
|
||||||
|
|
||||||
template< int idx, bool doMode, bool isFill, bool singleUnpack >
|
|
||||||
__releaseinline void __fastcall _nVifUnpackLoop(u8 *data, u32 size);
|
|
||||||
|
|
||||||
typedef void (__fastcall* Fnptr_VifUnpackLoop)(u8 *data, u32 size);
|
|
||||||
|
|
||||||
// Unpacks Until 'Num' is 0
|
|
||||||
static const __aligned16 Fnptr_VifUnpackLoop UnpackLoopTable[2][2][2] = {
|
|
||||||
{{ _nVifUnpackLoop<0,0,0,0>, _nVifUnpackLoop<0,0,1,0> },
|
|
||||||
{ _nVifUnpackLoop<0,1,0,0>, _nVifUnpackLoop<0,1,1,0> },},
|
|
||||||
{{ _nVifUnpackLoop<1,0,0,0>, _nVifUnpackLoop<1,0,1,0> },
|
|
||||||
{ _nVifUnpackLoop<1,1,0,0>, _nVifUnpackLoop<1,1,1,0> },},
|
|
||||||
};
|
|
||||||
|
|
||||||
// Unpacks until 1 normal write cycle unpack has been written to VU mem
|
|
||||||
static const __aligned16 Fnptr_VifUnpackLoop UnpackSingleTable[2][2][2] = {
|
|
||||||
{{ _nVifUnpackLoop<0,0,0,1>, _nVifUnpackLoop<0,0,1,1> },
|
|
||||||
{ _nVifUnpackLoop<0,1,0,1>, _nVifUnpackLoop<0,1,1,1> },},
|
|
||||||
{{ _nVifUnpackLoop<1,0,0,1>, _nVifUnpackLoop<1,0,1,1> },
|
|
||||||
{ _nVifUnpackLoop<1,1,0,1>, _nVifUnpackLoop<1,1,1,1> },},
|
|
||||||
};
|
|
||||||
|
|
||||||
#define useOldUnpack 0 // Use code in newVif_OldUnpack.inl
|
|
||||||
#define newVifDynaRec 1 // Use code in newVif_Dynarec.inl
|
|
||||||
#include "newVif_OldUnpack.inl"
|
|
||||||
#include "newVif_Unpack.inl"
|
|
||||||
#include "newVif_UnpackGen.inl"
|
|
||||||
|
|
||||||
#include "newVif_Tables.inl"
|
|
||||||
#include "newVif_Dynarec.inl"
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -25,7 +25,7 @@
|
||||||
// just use 'new' and 'delete' for initialization and
|
// just use 'new' and 'delete' for initialization and
|
||||||
// deletion/cleanup respectfully...
|
// deletion/cleanup respectfully...
|
||||||
class BlockBuffer {
|
class BlockBuffer {
|
||||||
private:
|
protected:
|
||||||
u32 mSize; // Cur Size
|
u32 mSize; // Cur Size
|
||||||
u32 mSizeT; // Total Size
|
u32 mSizeT; // Total Size
|
||||||
u8* mData; // Data Ptr
|
u8* mData; // Data Ptr
|
||||||
|
|
|
@ -1,163 +0,0 @@
|
||||||
/* PCSX2 - PS2 Emulator for PCs
|
|
||||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
|
||||||
*
|
|
||||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
|
||||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
|
||||||
* ation, either version 3 of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
||||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE. See the GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
|
||||||
* If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
// newVif Dynarec - Dynamically Recompiles Vif 'unpack' Packets
|
|
||||||
// authors: cottonvibes(@gmail.com)
|
|
||||||
// Jake.Stine (@gmail.com)
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
void dVifInit(int idx) {
|
|
||||||
nVif[idx].idx = idx;
|
|
||||||
nVif[idx].VU = idx ? &VU1 : &VU0;
|
|
||||||
nVif[idx].vif = idx ? &vif1 : &vif0;
|
|
||||||
nVif[idx].vifRegs = idx ? vif1Regs : vif0Regs;
|
|
||||||
nVif[idx].vuMemEnd = idx ? ((u8*)(VU1.Mem + 0x4000)) : ((u8*)(VU0.Mem + 0x1000));
|
|
||||||
nVif[idx].vuMemLimit= idx ? 0x3ff0 : 0xff0;
|
|
||||||
nVif[idx].vifCache = new BlockBuffer(_1mb*4); // 4mb Rec Cache
|
|
||||||
nVif[idx].vifBlocks = new HashBucket<_tParams>();
|
|
||||||
nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
|
|
||||||
nVif[idx].recEnd = &nVif[idx].recPtr[nVif[idx].vifCache->getSize()-(_1mb/4)]; // .25mb Safe Zone
|
|
||||||
emitCustomCompare();
|
|
||||||
}
|
|
||||||
|
|
||||||
_f void dVifRecLimit(int idx) {
|
|
||||||
if (nVif[idx].recPtr > nVif[idx].recEnd) {
|
|
||||||
DevCon.WriteLn("nVif Rec - Out of Rec Cache! [%x > %x]", nVif[idx].recPtr, nVif[idx].recEnd);
|
|
||||||
nVif[idx].vifBlocks->clear();
|
|
||||||
nVif[idx].recPtr = nVif[idx].vifCache->getBlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
_f void dVifSetMasks(nVifStruct& v, int mask, int mode, int cS) {
|
|
||||||
u32 m0 = v.vifBlock->mask;
|
|
||||||
u32 m1 = m0 & 0xaaaaaaaa;
|
|
||||||
u32 m2 =(~m1>>1) & m0;
|
|
||||||
u32 m3 = (m1>>1) & ~m0;
|
|
||||||
u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
|
|
||||||
u32* col = (v.idx) ? g_vifmask.Col1 : g_vifmask.Col0;
|
|
||||||
if((m2&&mask) || mode) { xMOVAPS(xmmRow, ptr32[row]); }
|
|
||||||
if (m3&&mask) {
|
|
||||||
xMOVAPS(xmmCol0, ptr32[col]);
|
|
||||||
if ((cS>=2) && (m3&0x0000ff00)) xPSHUF.D(xmmCol1, xmmCol0, _v1);
|
|
||||||
if ((cS>=3) && (m3&0x00ff0000)) xPSHUF.D(xmmCol2, xmmCol0, _v2);
|
|
||||||
if ((cS>=4) && (m3&0xff000000)) xPSHUF.D(xmmCol3, xmmCol0, _v3);
|
|
||||||
if ((cS>=1) && (m3&0x000000ff)) xPSHUF.D(xmmCol0, xmmCol0, _v0);
|
|
||||||
}
|
|
||||||
//if (mask||mode) loadRowCol(v);
|
|
||||||
}
|
|
||||||
|
|
||||||
void dVifRecompile(nVifStruct& v, nVifBlock* vB) {
|
|
||||||
const bool isFill = (vB->cl < vB->wl);
|
|
||||||
const int usn = (vB->upkType>>5)&1;
|
|
||||||
const int doMask = (vB->upkType>>4)&1;
|
|
||||||
const int upkNum = vB->upkType & 0xf;
|
|
||||||
const u32& vift = nVifT[upkNum];
|
|
||||||
const int doMode = vifRegs->mode & 3;
|
|
||||||
const int cycleSize = isFill ? vifRegs->cycle.cl : vifRegs->cycle.wl;
|
|
||||||
const int blockSize = isFill ? vifRegs->cycle.wl : vifRegs->cycle.cl;
|
|
||||||
const int skipSize = blockSize - cycleSize;
|
|
||||||
const bool simpleBlock = (vifRegs->num == 1);
|
|
||||||
const int backupCL = vif->cl;
|
|
||||||
const int backupNum = vifRegs->num;
|
|
||||||
if (vif->cl >= blockSize) vif->cl = 0;
|
|
||||||
|
|
||||||
v.vifBlock = vB;
|
|
||||||
xSetPtr(v.recPtr);
|
|
||||||
xAlignPtr(16);
|
|
||||||
vB->startPtr = (uptr)xGetPtr();
|
|
||||||
dVifSetMasks(v, doMask, doMode, cycleSize);
|
|
||||||
|
|
||||||
while (vifRegs->num) {
|
|
||||||
if (vif->cl < cycleSize) {
|
|
||||||
xUnpack[upkNum](&v, doMode<<1 | doMask);
|
|
||||||
if (!simpleBlock) xADD(edx, vift);
|
|
||||||
if (!simpleBlock) xADD(ecx, 16);
|
|
||||||
vifRegs->num--;
|
|
||||||
if (++vif->cl == blockSize) vif->cl = 0;
|
|
||||||
}
|
|
||||||
else if (isFill) {
|
|
||||||
DevCon.WriteLn("filling mode!");
|
|
||||||
xUnpack[upkNum](&v, 1);
|
|
||||||
xADD(ecx, 16);
|
|
||||||
vifRegs->num--;
|
|
||||||
if (++vif->cl == blockSize) vif->cl = 0;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xADD(ecx, 16 * skipSize);
|
|
||||||
vif->cl = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (doMode==2) writeBackRow(v);
|
|
||||||
xMOV(ptr32[&vif->cl], vif->cl);
|
|
||||||
xMOV(ptr32[&vifRegs->num], vifRegs->num);
|
|
||||||
xRET();
|
|
||||||
v.recPtr = xGetPtr();
|
|
||||||
vif->cl = backupCL;
|
|
||||||
vifRegs->num = backupNum;
|
|
||||||
}
|
|
||||||
|
|
||||||
static __aligned16 nVifBlock _vBlock = {0};
|
|
||||||
|
|
||||||
_f u8* dVifsetVUptr(nVifStruct& v, int offset) {
|
|
||||||
u8* ptr = (u8*)(v.VU->Mem + (offset & v.vuMemLimit));
|
|
||||||
u8* endPtr = ptr + _vBlock.num * 16;
|
|
||||||
if (endPtr > v.vuMemEnd) {
|
|
||||||
DevCon.WriteLn("nVif - VU Mem Ptr Overflow!");
|
|
||||||
ptr = NULL; // Fall Back to Interpreters which have wrap-around logic
|
|
||||||
}
|
|
||||||
return ptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
void dVifUnpack(int idx, u8 *data, u32 size) {
|
|
||||||
|
|
||||||
nVifStruct& v = nVif[idx];
|
|
||||||
const u8 upkType = vif->cmd & 0x1f | ((!!(vif->usn)) << 5);
|
|
||||||
const int doMask = (upkType>>4)&1;
|
|
||||||
|
|
||||||
_vBlock.upkType = upkType;
|
|
||||||
_vBlock.num = *(u8*)&vifRegs->num;
|
|
||||||
_vBlock.mode = *(u8*)&vifRegs->mode;
|
|
||||||
_vBlock.scl = vif->cl;
|
|
||||||
_vBlock.cl = vifRegs->cycle.cl;
|
|
||||||
_vBlock.wl = vifRegs->cycle.wl;
|
|
||||||
|
|
||||||
// Zero out the mask parameter if it's unused -- games leave random junk
|
|
||||||
// values here which cause false recblock cache misses.
|
|
||||||
_vBlock.mask = doMask ? vifRegs->mask : 0x00;
|
|
||||||
|
|
||||||
if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
|
|
||||||
if( u8* dest = dVifsetVUptr(v, vif->tag.addr) ) {
|
|
||||||
//DevCon.WriteLn("Running Recompiled Block!");
|
|
||||||
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
//DevCon.WriteLn("Running Interpreter Block");
|
|
||||||
_nVifUnpack(idx, data, size);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
static int recBlockNum = 0;
|
|
||||||
DevCon.WriteLn("nVif: Recompiled Block! [%d]", recBlockNum++);
|
|
||||||
DevCon.WriteLn(L"\t(num=0x%02x, upkType=0x%02x, mode=0x%02x, scl=0x%02x, cl=0x%x, wl=0x%x, mask=%s)",
|
|
||||||
_vBlock.num, _vBlock.upkType, _vBlock.mode, _vBlock.scl, _vBlock.cl, _vBlock.wl,
|
|
||||||
doMask ? wxsFormat( L"0x%08x", _vBlock.mask ).c_str() : L"ignored"
|
|
||||||
);
|
|
||||||
|
|
||||||
dVifRecompile(v, &_vBlock);
|
|
||||||
v.vifBlocks->add(&_vBlock);
|
|
||||||
dVifRecLimit(idx);
|
|
||||||
dVifUnpack(idx, data, size);
|
|
||||||
}
|
|
|
@ -15,8 +15,6 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
static __pagealigned u8 nVifMemCmp[__pagesize];
|
|
||||||
|
|
||||||
template< typename T >
|
template< typename T >
|
||||||
struct SizeChain
|
struct SizeChain
|
||||||
{
|
{
|
||||||
|
@ -66,8 +64,8 @@ public:
|
||||||
if( bucket.Size > 3 ) DevCon.Warning( "recVifUnpk: Bucket 0x%04x has %d micro-programs", d % hSize, bucket.Size );
|
if( bucket.Size > 3 ) DevCon.Warning( "recVifUnpk: Bucket 0x%04x has %d micro-programs", d % hSize, bucket.Size );
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
__forceinline void add(T* dataPtr) {
|
__forceinline void add(const T& dataPtr) {
|
||||||
u32 d = *(u32*)dataPtr;
|
u32 d = (u32&)dataPtr;
|
||||||
SizeChain<T>& bucket( mBucket[d % hSize] );
|
SizeChain<T>& bucket( mBucket[d % hSize] );
|
||||||
|
|
||||||
if( bucket.Chain = (T*)_aligned_realloc( bucket.Chain, sizeof(T)*(bucket.Size+1), 16), bucket.Chain==NULL ) {
|
if( bucket.Chain = (T*)_aligned_realloc( bucket.Chain, sizeof(T)*(bucket.Size+1), 16), bucket.Chain==NULL ) {
|
||||||
|
@ -76,7 +74,7 @@ public:
|
||||||
wxEmptyString
|
wxEmptyString
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
memcpy_fast(&bucket.Chain[bucket.Size++], dataPtr, sizeof(T));
|
memcpy_fast(&bucket.Chain[bucket.Size++], &dataPtr, sizeof(T));
|
||||||
}
|
}
|
||||||
void clear() {
|
void clear() {
|
||||||
for (int i = 0; i < hSize; i++) {
|
for (int i = 0; i < hSize; i++) {
|
||||||
|
|
|
@ -1,287 +0,0 @@
|
||||||
/* PCSX2 - PS2 Emulator for PCs
|
|
||||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
|
||||||
*
|
|
||||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
|
||||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
|
||||||
* ation, either version 3 of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
||||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE. See the GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
|
||||||
* If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#define vUPK(x) void x(nVifStruct* v, int doMask)
|
|
||||||
#define _doUSN (v->vifBlock->upkType & 0x20)
|
|
||||||
#undef xMovDest
|
|
||||||
#undef xShiftR
|
|
||||||
#undef xPMOVXX8
|
|
||||||
#undef xPMOVXX16
|
|
||||||
#undef xMaskWrite
|
|
||||||
#define makeMergeMask(x) { \
|
|
||||||
x = ((x&0x40)>>6) | ((x&0x10)>>3) | (x&4) | ((x&1)<<3); \
|
|
||||||
}
|
|
||||||
void doMaskWrite(const xRegisterSSE& regX, nVifStruct* v, int doMask) {
|
|
||||||
if (regX.Id > 1) DevCon.WriteLn("Reg Overflow!!!");
|
|
||||||
int doMode = doMask>>1; doMask &= 1;
|
|
||||||
int cc = aMin(v->vif->cl, 3);
|
|
||||||
u32 m0 = (v->vifBlock->mask >> (cc * 8)) & 0xff;
|
|
||||||
u32 m1 = m0 & 0xaaaa;
|
|
||||||
u32 m2 =(~m1>>1) & m0;
|
|
||||||
u32 m3 = (m1>>1) & ~m0;
|
|
||||||
u32 m4 = (m1>>1) & m0;
|
|
||||||
makeMergeMask(m2);
|
|
||||||
makeMergeMask(m3);
|
|
||||||
makeMergeMask(m4);
|
|
||||||
if (doMask&&m4) { xMOVAPS(xmmTemp, ptr32[ecx]); } // Load Write Protect
|
|
||||||
if (doMask&&m2) { mVUmergeRegs(regX.Id, xmmRow.Id, m2); } // Merge Row
|
|
||||||
if (doMask&&m3) { mVUmergeRegs(regX.Id, xmmCol0.Id+cc, m3); } // Merge Col
|
|
||||||
if (doMask&&m4) { mVUmergeRegs(regX.Id, xmmTemp.Id, m4); } // Merge Write Protect
|
|
||||||
if (doMode) {
|
|
||||||
u32 m5 = (~m1>>1) & ~m0;
|
|
||||||
if (!doMask) m5 = 0xf;
|
|
||||||
else makeMergeMask(m5);
|
|
||||||
if (m5 < 0xf) {
|
|
||||||
xPXOR(xmmTemp, xmmTemp);
|
|
||||||
mVUmergeRegs(xmmTemp.Id, xmmRow.Id, m5);
|
|
||||||
xPADD.D(regX, xmmTemp);
|
|
||||||
if (doMode==2) mVUmergeRegs(xmmRow.Id, regX.Id, m5);
|
|
||||||
}
|
|
||||||
else if (m5 == 0xf) {
|
|
||||||
xPADD.D(regX, xmmRow);
|
|
||||||
if (doMode==2) xMOVAPS(xmmRow, regX);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
xMOVAPS(ptr32[ecx], regX);
|
|
||||||
}
|
|
||||||
#define xMovDest(regX) { \
|
|
||||||
if (!doMask){ xMOVAPS (ptr32[ecx], regX); } \
|
|
||||||
else { doMaskWrite(regX, v, doMask); } \
|
|
||||||
}
|
|
||||||
#define xShiftR(regX, n) { \
|
|
||||||
if (_doUSN) { xPSRL.D(regX, n); } \
|
|
||||||
else { xPSRA.D(regX, n); } \
|
|
||||||
}
|
|
||||||
#define xPMOVXX8(regX, src) { \
|
|
||||||
if (_doUSN) xPMOVZX.BD(regX, src); \
|
|
||||||
else xPMOVSX.BD(regX, src); \
|
|
||||||
}
|
|
||||||
#define xPMOVXX16(regX, src) { \
|
|
||||||
if (_doUSN) xPMOVZX.WD(regX, src); \
|
|
||||||
else xPMOVSX.WD(regX, src); \
|
|
||||||
}
|
|
||||||
|
|
||||||
// ecx = dest, edx = src
|
|
||||||
vUPK(nVif_S_32) {
|
|
||||||
xMOV32 (xmm0, ptr32[edx]);
|
|
||||||
xPSHUF.D (xmm1, xmm0, _v0);
|
|
||||||
xMovDest (xmm1);
|
|
||||||
}
|
|
||||||
|
|
||||||
vUPK(nVif_S_16) {
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX16 (xmm0, ptr64[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV16 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 16);
|
|
||||||
}
|
|
||||||
xPSHUF.D (xmm1, xmm0, _v0);
|
|
||||||
xMovDest (xmm1);
|
|
||||||
}
|
|
||||||
|
|
||||||
vUPK(nVif_S_8) {
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX8 (xmm0, ptr32[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV8 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LBW(xmm0, xmm0);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 24);
|
|
||||||
}
|
|
||||||
xPSHUF.D (xmm1, xmm0, _v0);
|
|
||||||
xMovDest (xmm1);
|
|
||||||
}
|
|
||||||
|
|
||||||
vUPK(nVif_V2_32) {
|
|
||||||
xMOV64 (xmm0, ptr32[edx]);
|
|
||||||
xMovDest (xmm0);
|
|
||||||
}
|
|
||||||
|
|
||||||
vUPK(nVif_V2_16) {
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX16 (xmm0, ptr64[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV32 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 16);
|
|
||||||
}
|
|
||||||
xMovDest (xmm0);
|
|
||||||
}
|
|
||||||
|
|
||||||
vUPK(nVif_V2_8) {
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX8 (xmm0, ptr32[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV16 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LBW(xmm0, xmm0);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 24);
|
|
||||||
}
|
|
||||||
xMovDest (xmm0);
|
|
||||||
}
|
|
||||||
|
|
||||||
vUPK(nVif_V3_32) {
|
|
||||||
xMOV128 (xmm0, ptr32[edx]);
|
|
||||||
xMovDest (xmm0);
|
|
||||||
}
|
|
||||||
|
|
||||||
vUPK(nVif_V3_16) {
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX16 (xmm0, ptr64[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV64 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 16);
|
|
||||||
}
|
|
||||||
xMovDest (xmm0);
|
|
||||||
}
|
|
||||||
|
|
||||||
vUPK(nVif_V3_8) {
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX8 (xmm0, ptr32[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV32 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LBW(xmm0, xmm0);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 24);
|
|
||||||
}
|
|
||||||
xMovDest (xmm0);
|
|
||||||
}
|
|
||||||
|
|
||||||
vUPK(nVif_V4_32) {
|
|
||||||
xMOV128 (xmm0, ptr32[edx]);
|
|
||||||
xMovDest (xmm0);
|
|
||||||
}
|
|
||||||
|
|
||||||
vUPK(nVif_V4_16) {
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX16 (xmm0, ptr64[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV64 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 16);
|
|
||||||
}
|
|
||||||
xMovDest (xmm0);
|
|
||||||
}
|
|
||||||
|
|
||||||
vUPK(nVif_V4_8) {
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX8 (xmm0, ptr32[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV32 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LBW(xmm0, xmm0);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 24);
|
|
||||||
}
|
|
||||||
xMovDest (xmm0);
|
|
||||||
}
|
|
||||||
|
|
||||||
vUPK(nVif_V4_5) {
|
|
||||||
xMOV16 (xmm0, ptr32[edx]);
|
|
||||||
xPSHUF.D (xmm0, xmm0, _v0);
|
|
||||||
xPSLL.D (xmm0, 3); // ABG|R5.000
|
|
||||||
xMOVAPS (xmm1, xmm0); // x|x|x|R
|
|
||||||
xPSRL.D (xmm0, 8); // ABG
|
|
||||||
xPSLL.D (xmm0, 3); // AB|G5.000
|
|
||||||
mVUmergeRegs(XMM1, XMM0, 0x4); // x|x|G|R
|
|
||||||
xPSRL.D (xmm0, 8); // AB
|
|
||||||
xPSLL.D (xmm0, 3); // A|B5.000
|
|
||||||
mVUmergeRegs(XMM1, XMM0, 0x2); // x|B|G|R
|
|
||||||
xPSRL.D (xmm0, 8); // A
|
|
||||||
xPSLL.D (xmm0, 7); // A.0000000
|
|
||||||
mVUmergeRegs(XMM1, XMM0, 0x1); // A|B|G|R
|
|
||||||
xPSLL.D (xmm1, 24); // can optimize to
|
|
||||||
xPSRL.D (xmm1, 24); // single AND...
|
|
||||||
xMovDest (xmm1);
|
|
||||||
}
|
|
||||||
|
|
||||||
vUPK(nVif_unkown) {
|
|
||||||
Console.Error("nVif%d - Invalid Unpack! [%d]", v->idx, v->vif->tag.cmd & 0xf);
|
|
||||||
}
|
|
||||||
|
|
||||||
void (*xUnpack[16])(nVifStruct* v, int doMask) = {
|
|
||||||
nVif_S_32,
|
|
||||||
nVif_S_16,
|
|
||||||
nVif_S_8,
|
|
||||||
nVif_unkown,
|
|
||||||
nVif_V2_32,
|
|
||||||
nVif_V2_16,
|
|
||||||
nVif_V2_8,
|
|
||||||
nVif_unkown,
|
|
||||||
nVif_V3_32,
|
|
||||||
nVif_V3_16,
|
|
||||||
nVif_V3_8,
|
|
||||||
nVif_unkown,
|
|
||||||
nVif_V4_32,
|
|
||||||
nVif_V4_16,
|
|
||||||
nVif_V4_8,
|
|
||||||
nVif_V4_5,
|
|
||||||
};
|
|
||||||
|
|
||||||
// Loads Row/Col Data from vifRegs instead of g_vifmask
|
|
||||||
// Useful for testing vifReg and g_vifmask inconsistency.
|
|
||||||
void loadRowCol(nVifStruct& v) {
|
|
||||||
xMOVAPS(xmm0, ptr32[&v.vifRegs->r0]);
|
|
||||||
xMOVAPS(xmm1, ptr32[&v.vifRegs->r1]);
|
|
||||||
xMOVAPS(xmm2, ptr32[&v.vifRegs->r2]);
|
|
||||||
xMOVAPS(xmm6, ptr32[&v.vifRegs->r3]);
|
|
||||||
xPSHUF.D(xmm0, xmm0, _v0);
|
|
||||||
xPSHUF.D(xmm1, xmm1, _v0);
|
|
||||||
xPSHUF.D(xmm2, xmm2, _v0);
|
|
||||||
xPSHUF.D(xmm6, xmm6, _v0);
|
|
||||||
mVUmergeRegs(XMM6, XMM0, 8);
|
|
||||||
mVUmergeRegs(XMM6, XMM1, 4);
|
|
||||||
mVUmergeRegs(XMM6, XMM2, 2);
|
|
||||||
xMOVAPS(xmm2, ptr32[&v.vifRegs->c0]);
|
|
||||||
xMOVAPS(xmm3, ptr32[&v.vifRegs->c1]);
|
|
||||||
xMOVAPS(xmm4, ptr32[&v.vifRegs->c2]);
|
|
||||||
xMOVAPS(xmm5, ptr32[&v.vifRegs->c3]);
|
|
||||||
xPSHUF.D(xmm2, xmm2, _v0);
|
|
||||||
xPSHUF.D(xmm3, xmm3, _v0);
|
|
||||||
xPSHUF.D(xmm4, xmm4, _v0);
|
|
||||||
xPSHUF.D(xmm5, xmm5, _v0);
|
|
||||||
}
|
|
||||||
|
|
||||||
void writeBackRow(nVifStruct& v) {
|
|
||||||
u32* row = (v.idx) ? g_vifmask.Row1 : g_vifmask.Row0;
|
|
||||||
xMOVAPS(ptr32[row], xmmRow);
|
|
||||||
DevCon.WriteLn("nVif: writing back row reg! [doMode = 2]");
|
|
||||||
// ToDo: Do we need to write back to vifregs.rX too!? :/
|
|
||||||
}
|
|
||||||
|
|
||||||
void emitCustomCompare() {
|
|
||||||
HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadWrite, false);
|
|
||||||
memset8<0xcc>(nVifMemCmp);
|
|
||||||
xSetPtr(nVifMemCmp);
|
|
||||||
|
|
||||||
xMOVAPS (xmm0, ptr32[ecx]);
|
|
||||||
xPCMP.EQD(xmm0, ptr32[edx]);
|
|
||||||
xMOVMSKPS(eax, xmm0);
|
|
||||||
xAND (eax, 0x7); // ignore top 4 bytes (recBlock pointer)
|
|
||||||
|
|
||||||
xRET();
|
|
||||||
HostSys::MemProtectStatic(nVifMemCmp, Protect_ReadOnly, true);
|
|
||||||
}
|
|
|
@ -17,9 +17,66 @@
|
||||||
// authors: cottonvibes(@gmail.com)
|
// authors: cottonvibes(@gmail.com)
|
||||||
// Jake.Stine (@gmail.com)
|
// Jake.Stine (@gmail.com)
|
||||||
|
|
||||||
#pragma once
|
#include "PrecompiledHeader.h"
|
||||||
|
#include "Common.h"
|
||||||
|
#include "VifDma_internal.h"
|
||||||
|
#include "newVif.h"
|
||||||
|
|
||||||
static __aligned16 nVifStruct nVif[2];
|
#ifdef newVif
|
||||||
|
#include "newVif_OldUnpack.inl"
|
||||||
|
|
||||||
|
__aligned16 nVifStruct nVif[2];
|
||||||
|
__aligned16 nVifCall nVifUpk[(2*2*16) *4]; // ([USN][Masking][Unpack Type]) [curCycle]
|
||||||
|
__aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Vector]
|
||||||
|
|
||||||
|
// Contents of this table are doubled up for doMask(false) and doMask(true) lookups.
|
||||||
|
// (note: currently unused, I'm using gsize in the interp tables instead since it
|
||||||
|
// seems to be faster for now, which may change when nVif isn't reliant on interpreted
|
||||||
|
// unpackers anymore --air)
|
||||||
|
__aligned16 const u8 nVifT[32] = {
|
||||||
|
4, // S-32
|
||||||
|
2, // S-16
|
||||||
|
1, // S-8
|
||||||
|
0, // ----
|
||||||
|
8, // V2-32
|
||||||
|
4, // V2-16
|
||||||
|
2, // V2-8
|
||||||
|
0, // ----
|
||||||
|
12,// V3-32
|
||||||
|
6, // V3-16
|
||||||
|
3, // V3-8
|
||||||
|
0, // ----
|
||||||
|
16,// V4-32
|
||||||
|
8, // V4-16
|
||||||
|
4, // V4-8
|
||||||
|
2, // V4-5
|
||||||
|
|
||||||
|
// Second verse, same as the first!
|
||||||
|
4,2,1,0,8,4,2,0,12,6,3,0,16,8,4,2
|
||||||
|
};
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
template< int idx, bool doMode, bool isFill, bool singleUnpack >
|
||||||
|
__releaseinline void __fastcall _nVifUnpackLoop(u8 *data, u32 size);
|
||||||
|
|
||||||
|
typedef void (__fastcall* Fnptr_VifUnpackLoop)(u8 *data, u32 size);
|
||||||
|
|
||||||
|
// Unpacks Until 'Num' is 0
|
||||||
|
static const __aligned16 Fnptr_VifUnpackLoop UnpackLoopTable[2][2][2] = {
|
||||||
|
{{ _nVifUnpackLoop<0,0,0,0>, _nVifUnpackLoop<0,0,1,0> },
|
||||||
|
{ _nVifUnpackLoop<0,1,0,0>, _nVifUnpackLoop<0,1,1,0> },},
|
||||||
|
{{ _nVifUnpackLoop<1,0,0,0>, _nVifUnpackLoop<1,0,1,0> },
|
||||||
|
{ _nVifUnpackLoop<1,1,0,0>, _nVifUnpackLoop<1,1,1,0> },},
|
||||||
|
};
|
||||||
|
|
||||||
|
// Unpacks until 1 normal write cycle unpack has been written to VU mem
|
||||||
|
static const __aligned16 Fnptr_VifUnpackLoop UnpackSingleTable[2][2][2] = {
|
||||||
|
{{ _nVifUnpackLoop<0,0,0,1>, _nVifUnpackLoop<0,0,1,1> },
|
||||||
|
{ _nVifUnpackLoop<0,1,0,1>, _nVifUnpackLoop<0,1,1,1> },},
|
||||||
|
{{ _nVifUnpackLoop<1,0,0,1>, _nVifUnpackLoop<1,0,1,1> },
|
||||||
|
{ _nVifUnpackLoop<1,1,0,1>, _nVifUnpackLoop<1,1,1,1> },},
|
||||||
|
};
|
||||||
|
// ----------------------------------------------------------------------------
|
||||||
|
|
||||||
void initNewVif(int idx) {
|
void initNewVif(int idx) {
|
||||||
nVif[idx].idx = idx;
|
nVif[idx].idx = idx;
|
||||||
|
@ -31,26 +88,15 @@ void initNewVif(int idx) {
|
||||||
nVif[idx].vifCache = NULL;
|
nVif[idx].vifCache = NULL;
|
||||||
nVif[idx].partTransfer = 0;
|
nVif[idx].partTransfer = 0;
|
||||||
|
|
||||||
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadWrite, false);
|
VpuUnpackSSE_Init();
|
||||||
memset8<0xcc>( nVifUpkExec );
|
|
||||||
|
|
||||||
xSetPtr( nVifUpkExec );
|
|
||||||
|
|
||||||
for (int a = 0; a < 2; a++) {
|
|
||||||
for (int b = 0; b < 2; b++) {
|
|
||||||
for (int c = 0; c < 4; c++) {
|
|
||||||
nVifGen(a, b, c);
|
|
||||||
}}}
|
|
||||||
|
|
||||||
HostSys::MemProtectStatic(nVifUpkExec, Protect_ReadOnly, true);
|
|
||||||
if (newVifDynaRec) dVifInit(idx);
|
if (newVifDynaRec) dVifInit(idx);
|
||||||
}
|
}
|
||||||
|
|
||||||
_f u8* setVUptr(int vuidx, const u8* vuMemBase, int offset) {
|
static _f u8* setVUptr(int vuidx, const u8* vuMemBase, int offset) {
|
||||||
return (u8*)(vuMemBase + ( offset & (vuidx ? 0x3ff0 : 0xff0) ));
|
return (u8*)(vuMemBase + ( offset & (vuidx ? 0x3ff0 : 0xff0) ));
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void incVUptr(int vuidx, u8* &ptr, const u8* vuMemBase, int amount) {
|
static _f void incVUptr(int vuidx, u8* &ptr, const u8* vuMemBase, int amount) {
|
||||||
pxAssert( ((uptr)ptr & 0xf) == 0 ); // alignment check
|
pxAssert( ((uptr)ptr & 0xf) == 0 ); // alignment check
|
||||||
ptr += amount;
|
ptr += amount;
|
||||||
int diff = ptr - (vuMemBase + (vuidx ? 0x4000 : 0x1000));
|
int diff = ptr - (vuMemBase + (vuidx ? 0x4000 : 0x1000));
|
||||||
|
@ -59,7 +105,7 @@ _f void incVUptr(int vuidx, u8* &ptr, const u8* vuMemBase, int amount) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void incVUptrBy16(int vuidx, u8* &ptr, const u8* vuMemBase) {
|
static _f void incVUptrBy16(int vuidx, u8* &ptr, const u8* vuMemBase) {
|
||||||
pxAssert( ((uptr)ptr & 0xf) == 0 ); // alignment check
|
pxAssert( ((uptr)ptr & 0xf) == 0 ); // alignment check
|
||||||
ptr += 16;
|
ptr += 16;
|
||||||
if( ptr == (vuMemBase + (vuidx ? 0x4000 : 0x1000)) )
|
if( ptr == (vuMemBase + (vuidx ? 0x4000 : 0x1000)) )
|
||||||
|
@ -73,16 +119,16 @@ int nVifUnpack(int idx, u8* data) {
|
||||||
vifRegs = v.vifRegs;
|
vifRegs = v.vifRegs;
|
||||||
int ret = aMin(vif->vifpacketsize, vif->tag.size);
|
int ret = aMin(vif->vifpacketsize, vif->tag.size);
|
||||||
s32 size = ret << 2;
|
s32 size = ret << 2;
|
||||||
u32 vifT = nVifT[vif->cmd & 0xf];
|
const u8& vifT = nVifT[vif->cmd & 0xf];
|
||||||
|
|
||||||
vif->tag.size -= ret;
|
vif->tag.size -= ret;
|
||||||
|
|
||||||
|
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
|
||||||
|
|
||||||
if (v.partTransfer) { // Last transfer was a partial vector transfer...
|
if (v.partTransfer) { // Last transfer was a partial vector transfer...
|
||||||
const bool doMode = vifRegs->mode && !(vif->tag.cmd & 0x10);
|
const bool doMode = vifRegs->mode && !(vif->tag.cmd & 0x10);
|
||||||
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
|
|
||||||
const u8 upkNum = vif->cmd & 0x1f;
|
const u8 upkNum = vif->cmd & 0x1f;
|
||||||
const VUFT& ft = VIFfuncTable[upkNum];
|
const int diff = vifT - v.partTransfer;
|
||||||
const int diff = ft.gsize - v.partTransfer;
|
|
||||||
memcpy(&v.partBuffer[v.partTransfer], data, diff);
|
memcpy(&v.partBuffer[v.partTransfer], data, diff);
|
||||||
UnpackSingleTable[idx][doMode][isFill]( v.partBuffer, size );
|
UnpackSingleTable[idx][doMode][isFill]( v.partBuffer, size );
|
||||||
data += diff;
|
data += diff;
|
||||||
|
@ -95,8 +141,8 @@ int nVifUnpack(int idx, u8* data) {
|
||||||
u32 oldNum = vifRegs->num;
|
u32 oldNum = vifRegs->num;
|
||||||
|
|
||||||
if (size > 0) {
|
if (size > 0) {
|
||||||
if (newVifDynaRec) dVifUnpack(idx, data, size);
|
if (newVifDynaRec) dVifUnpack(idx, data, size, isFill);
|
||||||
else _nVifUnpack(idx, data, size);
|
else _nVifUnpack(idx, data, size, isFill);
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 s =(size/vifT) * vifT;
|
u32 s =(size/vifT) * vifT;
|
||||||
|
@ -230,7 +276,7 @@ __releaseinline void __fastcall _nVifUnpackLoop(u8 *data, u32 size) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_f void _nVifUnpack(int idx, u8 *data, u32 size) {
|
_f void _nVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
|
||||||
|
|
||||||
if (useOldUnpack) {
|
if (useOldUnpack) {
|
||||||
if (!idx) VIFunpack<0>((u32*)data, &vif0.tag, size>>2);
|
if (!idx) VIFunpack<0>((u32*)data, &vif0.tag, size>>2);
|
||||||
|
@ -239,7 +285,6 @@ _f void _nVifUnpack(int idx, u8 *data, u32 size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool doMode = vifRegs->mode && !(vif->tag.cmd & 0x10);
|
const bool doMode = vifRegs->mode && !(vif->tag.cmd & 0x10);
|
||||||
const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
|
|
||||||
|
|
||||||
UnpackLoopTable[idx][doMode][isFill]( data, size );
|
UnpackLoopTable[idx][doMode][isFill]( data, size );
|
||||||
}
|
}
|
||||||
|
#endif
|
|
@ -1,240 +0,0 @@
|
||||||
/* PCSX2 - PS2 Emulator for PCs
|
|
||||||
* Copyright (C) 2002-2009 PCSX2 Dev Team
|
|
||||||
*
|
|
||||||
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
|
||||||
* of the GNU Lesser General Public License as published by the Free Software Found-
|
|
||||||
* ation, either version 3 of the License, or (at your option) any later version.
|
|
||||||
*
|
|
||||||
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
||||||
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
||||||
* PURPOSE. See the GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License along with PCSX2.
|
|
||||||
* If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#define xMaskWrite(regX) { \
|
|
||||||
xMOVAPS(xmm7, ptr32[ecx]); \
|
|
||||||
int offX = aMin(curCycle, 3); \
|
|
||||||
xPAND(regX, ptr32[nVifMask[0][offX]]); \
|
|
||||||
xPAND(xmm7, ptr32[nVifMask[1][offX]]); \
|
|
||||||
xPOR (regX, ptr32[nVifMask[2][offX]]); \
|
|
||||||
xPOR (regX, xmm7); \
|
|
||||||
xMOVAPS(ptr32[ecx], regX); \
|
|
||||||
}
|
|
||||||
#define xMovDest(regX) { \
|
|
||||||
if (!mask) { xMOVAPS (ptr32[ecx], regX); } \
|
|
||||||
else { xMaskWrite(regX); } \
|
|
||||||
}
|
|
||||||
#define xShiftR(regX, n) { \
|
|
||||||
if (usn) { xPSRL.D(regX, n); } \
|
|
||||||
else { xPSRA.D(regX, n); } \
|
|
||||||
}
|
|
||||||
#define xPMOVXX8(regX, src) { \
|
|
||||||
if (usn) xPMOVZX.BD(regX, src); \
|
|
||||||
else xPMOVSX.BD(regX, src); \
|
|
||||||
}
|
|
||||||
#define xPMOVXX16(regX, src) { \
|
|
||||||
if (usn) xPMOVZX.WD(regX, src); \
|
|
||||||
else xPMOVSX.WD(regX, src); \
|
|
||||||
}
|
|
||||||
|
|
||||||
struct VifUnpackIndexer {
|
|
||||||
int usn, mask;
|
|
||||||
int curCycle, cyclesToWrite;
|
|
||||||
|
|
||||||
nVifCall& GetCall(int packType) const {
|
|
||||||
int usnpart = usn*2*16;
|
|
||||||
int maskpart = mask*16;
|
|
||||||
int packpart = packType;
|
|
||||||
int curpart = curCycle;
|
|
||||||
|
|
||||||
return nVifUpk[((usnpart+maskpart+packpart) * 4) + (curpart)];
|
|
||||||
}
|
|
||||||
|
|
||||||
void xSetCall(int packType) const {
|
|
||||||
GetCall( packType ) = (nVifCall)xGetAlignedCallTarget();
|
|
||||||
}
|
|
||||||
|
|
||||||
void xSetNullCall(int packType) const {
|
|
||||||
GetCall( packType ) = NULL;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
// xMOVSS doesn't seem to have all overloads defined with new emitter
|
|
||||||
#define xMOVSSS(regX, loc) SSE_MOVSS_Rm_to_XMM(0, 2, 0)
|
|
||||||
|
|
||||||
#define xMOV8(regX, loc) xMOVSSS(regX, loc)
|
|
||||||
#define xMOV16(regX, loc) xMOVSSS(regX, loc)
|
|
||||||
#define xMOV32(regX, loc) xMOVSSS(regX, loc)
|
|
||||||
#define xMOV64(regX, loc) xMOVUPS(regX, loc)
|
|
||||||
#define xMOV128(regX, loc) xMOVUPS(regX, loc)
|
|
||||||
|
|
||||||
// ecx = dest, edx = src
|
|
||||||
void nVifGen(int usn, int mask, int curCycle) {
|
|
||||||
const VifUnpackIndexer indexer = { usn, mask, curCycle, 0 };
|
|
||||||
|
|
||||||
indexer.xSetCall(0x0); // S-32
|
|
||||||
xMOV32 (xmm0, ptr32[edx]);
|
|
||||||
xPSHUF.D (xmm1, xmm0, _v0);
|
|
||||||
xMovDest (xmm1);
|
|
||||||
xRET();
|
|
||||||
|
|
||||||
indexer.xSetCall(0x1); // S-16
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX16 (xmm0, ptr64[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV16 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 16);
|
|
||||||
}
|
|
||||||
xPSHUF.D (xmm1, xmm0, _v0);
|
|
||||||
xMovDest (xmm1);
|
|
||||||
xRET();
|
|
||||||
|
|
||||||
indexer.xSetCall(0x2); // S-8
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX8 (xmm0, ptr32[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV8 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LBW(xmm0, xmm0);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 24);
|
|
||||||
}
|
|
||||||
xPSHUF.D (xmm1, xmm0, _v0);
|
|
||||||
xMovDest (xmm1);
|
|
||||||
xRET();
|
|
||||||
|
|
||||||
indexer.xSetNullCall(0x3); // ----
|
|
||||||
|
|
||||||
indexer.xSetCall(0x4); // V2-32
|
|
||||||
xMOV64 (xmm0, ptr32[edx]);
|
|
||||||
xMovDest (xmm0);
|
|
||||||
xRET();
|
|
||||||
|
|
||||||
indexer.xSetCall(0x5); // V2-16
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX16 (xmm0, ptr64[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV32 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 16);
|
|
||||||
}
|
|
||||||
xMovDest (xmm0);
|
|
||||||
xRET();
|
|
||||||
|
|
||||||
indexer.xSetCall(0x6); // V2-8
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX8 (xmm0, ptr32[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV16 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LBW(xmm0, xmm0);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 24);
|
|
||||||
}
|
|
||||||
xMovDest (xmm0);
|
|
||||||
xRET();
|
|
||||||
|
|
||||||
indexer.xSetNullCall(0x7); // ----
|
|
||||||
|
|
||||||
indexer.xSetCall(0x8); // V3-32
|
|
||||||
xMOV128 (xmm0, ptr32[edx]);
|
|
||||||
xMovDest (xmm0);
|
|
||||||
xRET();
|
|
||||||
|
|
||||||
indexer.xSetCall(0x9); // V3-16
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX16 (xmm0, ptr64[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV64 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 16);
|
|
||||||
}
|
|
||||||
xMovDest (xmm0);
|
|
||||||
xRET();
|
|
||||||
|
|
||||||
indexer.xSetCall(0xa); // V3-8
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX8 (xmm0, ptr32[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV32 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LBW(xmm0, xmm0);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 24);
|
|
||||||
}
|
|
||||||
xMovDest (xmm0);
|
|
||||||
xRET();
|
|
||||||
|
|
||||||
indexer.xSetNullCall(0xb); // ----
|
|
||||||
|
|
||||||
indexer.xSetCall(0xc); // V4-32
|
|
||||||
xMOV128 (xmm0, ptr32[edx]);
|
|
||||||
xMovDest (xmm0);
|
|
||||||
xRET();
|
|
||||||
|
|
||||||
indexer.xSetCall(0xd); // V4-16
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX16 (xmm0, ptr64[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV64 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 16);
|
|
||||||
}
|
|
||||||
xMovDest (xmm0);
|
|
||||||
xRET();
|
|
||||||
|
|
||||||
indexer.xSetCall(0xe); // V4-8
|
|
||||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
|
||||||
xPMOVXX8 (xmm0, ptr32[edx]);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
xMOV32 (xmm0, ptr32[edx]);
|
|
||||||
xPUNPCK.LBW(xmm0, xmm0);
|
|
||||||
xPUNPCK.LWD(xmm0, xmm0);
|
|
||||||
xShiftR (xmm0, 24);
|
|
||||||
}
|
|
||||||
xMovDest (xmm0);
|
|
||||||
xRET();
|
|
||||||
|
|
||||||
// A | B5 | G5 | R5
|
|
||||||
// ..0.. A 0000000 | ..0.. B 000 | ..0.. G 000 | ..0.. R 000
|
|
||||||
|
|
||||||
// Optimization: This function has a *really* long dependency chain.
|
|
||||||
// It would be better if the [edx] is loaded into multiple regs and
|
|
||||||
// then the regs are shifted each independently, instead of using the
|
|
||||||
// progressive shift->move pattern below. --air
|
|
||||||
|
|
||||||
indexer.xSetCall(0xf); // V4-5
|
|
||||||
xMOV16 (xmm0, ptr32[edx]);
|
|
||||||
xMOVAPS (xmm1, xmm0);
|
|
||||||
xPSLL.D (xmm1, 3); // ABG|R5.000
|
|
||||||
xMOVAPS (xmm2, xmm1);// R5.000 (garbage upper bits)
|
|
||||||
xPSRL.D (xmm1, 8); // ABG
|
|
||||||
xPSLL.D (xmm1, 3); // AB|G5.000
|
|
||||||
xMOVAPS (xmm3, xmm1);// G5.000 (garbage upper bits)
|
|
||||||
xPSRL.D (xmm1, 8); // AB
|
|
||||||
xPSLL.D (xmm1, 3); // A|B5.000
|
|
||||||
xMOVAPS (xmm4, xmm1);// B5.000 (garbage upper bits)
|
|
||||||
xPSRL.D (xmm1, 8); // A
|
|
||||||
xPSLL.D (xmm1, 7); // A.0000000
|
|
||||||
xPSHUF.D (xmm1, xmm1, _v0); // A|A|A|A
|
|
||||||
xPSHUF.D (xmm3, xmm3, _v0); // G|G|G|G
|
|
||||||
xPSHUF.D (xmm4, xmm4, _v0); // B|B|B|B
|
|
||||||
mVUmergeRegs(XMM2, XMM1, 0x3); // A|x|x|R
|
|
||||||
mVUmergeRegs(XMM2, XMM3, 0x4); // A|x|G|R
|
|
||||||
mVUmergeRegs(XMM2, XMM4, 0x2); // A|B|G|R
|
|
||||||
xPSLL.D (xmm2, 24); // can optimize to
|
|
||||||
xPSRL.D (xmm2, 24); // single AND...
|
|
||||||
xMovDest (xmm2);
|
|
||||||
xRET();
|
|
||||||
|
|
||||||
pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );
|
|
||||||
}
|
|
Loading…
Reference in New Issue