mirror of https://github.com/PCSX2/pcsx2.git
Bugfix for assertion breaks not working in Devbuilds.
newVif: * Bugfix to HashBucket::find() cuts microprogram caches misses in half. * Dynarec version now uses alternating XMM registers for unmasked unpacks (very minor speedup, ~1%). git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2397 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
9473e69b7f
commit
de637fc921
|
@ -356,6 +356,18 @@ template< typename T > void xWrite( T val );
|
|||
|
||||
bool operator==( const xRegisterSSE& src ) const { return this->Id == src.Id; }
|
||||
bool operator!=( const xRegisterSSE& src ) const { return this->Id != src.Id; }
|
||||
|
||||
xRegisterSSE& operator++()
|
||||
{
|
||||
++Id &= (iREGCNT_XMM-1);
|
||||
return *this;
|
||||
}
|
||||
|
||||
xRegisterSSE& operator--()
|
||||
{
|
||||
--Id &= (iREGCNT_XMM-1);
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
class xRegisterCL : public xRegister8
|
||||
|
|
|
@ -76,6 +76,26 @@ bool pxAssertImpl_LogIt( const DiagnosticOrigin& origin, const wxChar *msg )
|
|||
return false;
|
||||
}
|
||||
|
||||
// Because wxTrap isn't available on Linux builds of wxWidgets (non-Debug, typically)
|
||||
void pxTrap()
|
||||
{
|
||||
#if defined(__WXMSW__) && !defined(__WXMICROWIN__)
|
||||
__debugbreak();
|
||||
#elif defined(__WXMAC__) && !defined(__DARWIN__)
|
||||
#if __powerc
|
||||
Debugger();
|
||||
#else
|
||||
SysBreak();
|
||||
#endif
|
||||
#elif defined(_MSL_USING_MW_C_HEADERS) && _MSL_USING_MW_C_HEADERS
|
||||
Debugger();
|
||||
#elif defined(__UNIX__)
|
||||
raise(SIGTRAP);
|
||||
#else
|
||||
// TODO
|
||||
#endif // Win/Unix
|
||||
}
|
||||
|
||||
DEVASSERT_INLINE void pxOnAssert( const DiagnosticOrigin& origin, const wxChar* msg )
|
||||
{
|
||||
RecursionGuard guard( s_assert_guard );
|
||||
|
@ -98,7 +118,7 @@ DEVASSERT_INLINE void pxOnAssert( const DiagnosticOrigin& origin, const wxChar*
|
|||
trapit = pxDoAssert( origin, msg );
|
||||
}
|
||||
|
||||
if( trapit ) { wxTrap(); }
|
||||
if( trapit ) { pxTrap(); }
|
||||
}
|
||||
|
||||
__forceinline void pxOnAssert( const DiagnosticOrigin& origin, const char* msg)
|
||||
|
|
|
@ -863,10 +863,6 @@
|
|||
RelativePath="..\..\x86\newVif_Unpack.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\x86\newVif_UnpackGen.inl"
|
||||
>
|
||||
</File>
|
||||
<Filter
|
||||
Name="Dynarec"
|
||||
>
|
||||
|
|
|
@ -32,12 +32,14 @@ static __pagealigned u8 nVifUpkExec[__pagesize*4];
|
|||
VifUnpackSSE_Base::VifUnpackSSE_Base()
|
||||
: dstIndirect(ecx) // parameter 1 of __fastcall
|
||||
, srcIndirect(edx) // parameter 2 of __fastcall
|
||||
, workReg( xmm1 )
|
||||
, destReg( xmm0 )
|
||||
{
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xMovDest(const xRegisterSSE& srcReg) const {
|
||||
if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], srcReg); }
|
||||
else { doMaskWrite(srcReg); }
|
||||
void VifUnpackSSE_Base::xMovDest() const {
|
||||
if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); }
|
||||
else { doMaskWrite(destReg); }
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const {
|
||||
|
@ -56,145 +58,132 @@ void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const {
|
|||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_S_32() const {
|
||||
xMOV32 (xmm0, ptr32[srcIndirect]);
|
||||
xPSHUF.D (xmm1, xmm0, _v0);
|
||||
xMovDest (xmm1);
|
||||
xMOV32 (workReg, ptr32[srcIndirect]);
|
||||
xPSHUF.D (destReg, workReg, _v0);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_S_16() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xPMOVXX16 (xmm0);
|
||||
xPMOVXX16 (workReg);
|
||||
}
|
||||
else {
|
||||
xMOV16 (xmm0, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(xmm0, xmm0);
|
||||
xShiftR (xmm0, 16);
|
||||
xMOV16 (workReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(workReg, workReg);
|
||||
xShiftR (workReg, 16);
|
||||
}
|
||||
xPSHUF.D (xmm1, xmm0, _v0);
|
||||
xMovDest (xmm1);
|
||||
xPSHUF.D (destReg, workReg, _v0);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_S_8() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xPMOVXX8 (xmm0);
|
||||
xPMOVXX8 (workReg);
|
||||
}
|
||||
else {
|
||||
xMOV8 (xmm0, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(xmm0, xmm0);
|
||||
xPUNPCK.LWD(xmm0, xmm0);
|
||||
xShiftR (xmm0, 24);
|
||||
xMOV8 (workReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(workReg, workReg);
|
||||
xPUNPCK.LWD(workReg, workReg);
|
||||
xShiftR (workReg, 24);
|
||||
}
|
||||
xPSHUF.D (xmm1, xmm0, _v0);
|
||||
xMovDest (xmm1);
|
||||
xPSHUF.D (destReg, workReg, _v0);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V2_32() const {
|
||||
xMOV64 (xmm0, ptr32[srcIndirect]);
|
||||
xMovDest (xmm0);
|
||||
xMOV64 (destReg, ptr32[srcIndirect]);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V2_16() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xPMOVXX16 (xmm0);
|
||||
xPMOVXX16 (destReg);
|
||||
}
|
||||
else {
|
||||
xMOV32 (xmm0, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(xmm0, xmm0);
|
||||
xShiftR (xmm0, 16);
|
||||
xMOV32 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 16);
|
||||
}
|
||||
xMovDest (xmm0);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V2_8() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xPMOVXX8 (xmm0);
|
||||
xPMOVXX8 (destReg);
|
||||
}
|
||||
else {
|
||||
xMOV16 (xmm0, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(xmm0, xmm0);
|
||||
xPUNPCK.LWD(xmm0, xmm0);
|
||||
xShiftR (xmm0, 24);
|
||||
xMOV16 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(destReg, destReg);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 24);
|
||||
}
|
||||
xMovDest (xmm0);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V3_32() const {
|
||||
xMOV128 (xmm0, ptr32[srcIndirect]);
|
||||
xMovDest (xmm0);
|
||||
xMOV128 (destReg, ptr32[srcIndirect]);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V3_16() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xPMOVXX16 (xmm0);
|
||||
xPMOVXX16 (destReg);
|
||||
}
|
||||
else {
|
||||
xMOV64 (xmm0, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(xmm0, xmm0);
|
||||
xShiftR (xmm0, 16);
|
||||
xMOV64 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 16);
|
||||
}
|
||||
xMovDest (xmm0);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V3_8() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xPMOVXX8 (xmm0);
|
||||
xPMOVXX8 (destReg);
|
||||
}
|
||||
else {
|
||||
xMOV32 (xmm0, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(xmm0, xmm0);
|
||||
xPUNPCK.LWD(xmm0, xmm0);
|
||||
xShiftR (xmm0, 24);
|
||||
xMOV32 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(destReg, destReg);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 24);
|
||||
}
|
||||
xMovDest (xmm0);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V4_32() const {
|
||||
xMOV128 (xmm0, ptr32[srcIndirect]);
|
||||
xMovDest (xmm0);
|
||||
xMOV128 (destReg, ptr32[srcIndirect]);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V4_16() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xPMOVXX16 (xmm0);
|
||||
xPMOVXX16 (destReg);
|
||||
}
|
||||
else {
|
||||
xMOV64 (xmm0, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(xmm0, xmm0);
|
||||
xShiftR (xmm0, 16);
|
||||
xMOV64 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 16);
|
||||
}
|
||||
xMovDest (xmm0);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V4_8() const {
|
||||
if (x86caps.hasStreamingSIMD4Extensions) {
|
||||
xPMOVXX8 (xmm0);
|
||||
xPMOVXX8 (destReg);
|
||||
}
|
||||
else {
|
||||
xMOV32 (xmm0, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(xmm0, xmm0);
|
||||
xPUNPCK.LWD(xmm0, xmm0);
|
||||
xShiftR (xmm0, 24);
|
||||
xMOV32 (destReg, ptr32[srcIndirect]);
|
||||
xPUNPCK.LBW(destReg, destReg);
|
||||
xPUNPCK.LWD(destReg, destReg);
|
||||
xShiftR (destReg, 24);
|
||||
}
|
||||
xMovDest (xmm0);
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUPK_V4_5() const {
|
||||
xMOV16 (xmm0, ptr32[srcIndirect]);
|
||||
xPSHUF.D (xmm0, xmm0, _v0);
|
||||
xPSLL.D (xmm0, 3); // ABG|R5.000
|
||||
xMOVAPS (xmm1, xmm0); // x|x|x|R
|
||||
xPSRL.D (xmm0, 8); // ABG
|
||||
xPSLL.D (xmm0, 3); // AB|G5.000
|
||||
mVUmergeRegs(XMM1, XMM0, 0x4); // x|x|G|R
|
||||
xPSRL.D (xmm0, 8); // AB
|
||||
xPSLL.D (xmm0, 3); // A|B5.000
|
||||
mVUmergeRegs(XMM1, XMM0, 0x2); // x|B|G|R
|
||||
xPSRL.D (xmm0, 8); // A
|
||||
xPSLL.D (xmm0, 7); // A.0000000
|
||||
mVUmergeRegs(XMM1, XMM0, 0x1); // A|B|G|R
|
||||
xPSLL.D (xmm1, 24); // can optimize to
|
||||
xPSRL.D (xmm1, 24); // single AND...
|
||||
xMovDest (xmm1);
|
||||
xMOV16 (workReg, ptr32[srcIndirect]);
|
||||
xPSHUF.D (workReg, workReg, _v0);
|
||||
xPSLL.D (workReg, 3); // ABG|R5.000
|
||||
xMOVAPS (destReg, workReg); // x|x|x|R
|
||||
xPSRL.D (workReg, 8); // ABG
|
||||
xPSLL.D (workReg, 3); // AB|G5.000
|
||||
mVUmergeRegs(destReg.Id, workReg.Id, 0x4); // x|x|G|R
|
||||
xPSRL.D (workReg, 8); // AB
|
||||
xPSLL.D (workReg, 3); // A|B5.000
|
||||
mVUmergeRegs(destReg.Id, workReg.Id, 0x2); // x|B|G|R
|
||||
xPSRL.D (workReg, 8); // A
|
||||
xPSLL.D (workReg, 7); // A.0000000
|
||||
mVUmergeRegs(destReg.Id, workReg.Id, 0x1); // A|B|G|R
|
||||
xPSLL.D (destReg, 24); // can optimize to
|
||||
xPSRL.D (destReg, 24); // single AND...
|
||||
}
|
||||
|
||||
void VifUnpackSSE_Base::xUnpack( int upknum ) const
|
||||
|
@ -263,6 +252,7 @@ static void nVifGen(int usn, int mask, int curCycle) {
|
|||
|
||||
ucall = (nVifCall)xGetAlignedCallTarget();
|
||||
vpugen.xUnpack(i);
|
||||
vpugen.xMovDest();
|
||||
xRET();
|
||||
|
||||
pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );
|
||||
|
|
|
@ -38,18 +38,20 @@ public:
|
|||
protected:
|
||||
xAddressInfo dstIndirect;
|
||||
xAddressInfo srcIndirect;
|
||||
|
||||
xRegisterSSE workReg;
|
||||
xRegisterSSE destReg;
|
||||
|
||||
public:
|
||||
VifUnpackSSE_Base();
|
||||
virtual ~VifUnpackSSE_Base() throw() {}
|
||||
|
||||
virtual void xUnpack( int upktype ) const;
|
||||
virtual bool IsUnmaskedOp() const=0;
|
||||
virtual void xMovDest() const;
|
||||
|
||||
protected:
|
||||
virtual void doMaskWrite(const xRegisterSSE& regX ) const=0;
|
||||
|
||||
virtual void xMovDest(const xRegisterSSE& srcReg) const;
|
||||
virtual void xShiftR(const xRegisterSSE& regX, int n) const;
|
||||
virtual void xPMOVXX8(const xRegisterSSE& regX) const;
|
||||
virtual void xPMOVXX16(const xRegisterSSE& regX) const;
|
||||
|
|
|
@ -163,16 +163,24 @@ static void ShiftDisplacementWindow( xAddressInfo& addr, const xRegister32& modR
|
|||
}
|
||||
if(addImm) xADD(modReg, addImm);
|
||||
}
|
||||
static bool UsesTwoRegs[] =
|
||||
{
|
||||
true, true, true, true,
|
||||
false, false, false, false,
|
||||
false, false, false, false,
|
||||
false, false, false, true,
|
||||
|
||||
};
|
||||
|
||||
void VifUnpackSSE_Dynarec::CompileRoutine() {
|
||||
const int upkNum = vB.upkType & 0xf;
|
||||
const int upkNum = v.vif->cmd & 0xf;
|
||||
const u8& vift = nVifT[upkNum];
|
||||
const int cycleSize = isFill ? vB.cl : vB.wl;
|
||||
const int blockSize = isFill ? vB.wl : vB.cl;
|
||||
const int skipSize = blockSize - cycleSize;
|
||||
|
||||
int vNum = vifRegs->num;
|
||||
vCL = vif->cl;
|
||||
int vNum = v.vifRegs->num;
|
||||
vCL = v.vif->cl;
|
||||
|
||||
SetMasks(cycleSize);
|
||||
|
||||
|
@ -183,14 +191,25 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
|
|||
|
||||
if (vCL < cycleSize) {
|
||||
xUnpack(upkNum);
|
||||
srcIndirect += vift;
|
||||
xMovDest();
|
||||
|
||||
dstIndirect += 16;
|
||||
srcIndirect += vift;
|
||||
|
||||
if( IsUnmaskedOp() ) {
|
||||
++destReg;
|
||||
++workReg;
|
||||
}
|
||||
|
||||
vNum--;
|
||||
if (++vCL == blockSize) vCL = 0;
|
||||
}
|
||||
else if (isFill) {
|
||||
DevCon.WriteLn("filling mode!");
|
||||
VifUnpackSSE_Dynarec::FillingWrite( *this ).xUnpack(upkNum);
|
||||
VifUnpackSSE_Dynarec fill( VifUnpackSSE_Dynarec::FillingWrite( *this ) );
|
||||
fill.xUnpack(upkNum);
|
||||
fill.xMovDest();
|
||||
|
||||
dstIndirect += 16;
|
||||
vNum--;
|
||||
if (++vCL == blockSize) vCL = 0;
|
||||
|
@ -200,9 +219,10 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
|
|||
vCL = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (doMode==2) writeBackRow();
|
||||
xMOV(ptr32[&vif->cl], vCL);
|
||||
xMOV(ptr32[&vifRegs->num], vNum);
|
||||
xMOV(ptr32[&v.vif->cl], vCL);
|
||||
xMOV(ptr32[&v.vifRegs->num], vNum);
|
||||
xRET();
|
||||
}
|
||||
|
||||
|
@ -227,29 +247,29 @@ static _f void dVifRecLimit(int idx) {
|
|||
_f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
|
||||
|
||||
const nVifStruct& v = nVif[idx];
|
||||
const u8 upkType = vif->cmd & 0x1f | ((!!vif->usn) << 5);
|
||||
const int doMask = (upkType>>4) & 1;
|
||||
const u8 upkType = v.vif->cmd & 0x1f | ((!!v.vif->usn) << 5);
|
||||
const int doMask = v.vif->cmd & 0x10;
|
||||
|
||||
const int cycle_cl = vifRegs->cycle.cl;
|
||||
const int cycle_wl = vifRegs->cycle.wl;
|
||||
const int cycle_cl = v.vifRegs->cycle.cl;
|
||||
const int cycle_wl = v.vifRegs->cycle.wl;
|
||||
const int cycleSize = isFill ? cycle_cl : cycle_wl;
|
||||
const int blockSize = isFill ? cycle_wl : cycle_cl;
|
||||
|
||||
if (vif->cl >= blockSize) vif->cl = 0;
|
||||
if (v.vif->cl >= blockSize) v.vif->cl = 0;
|
||||
|
||||
_vBlock.upkType = upkType;
|
||||
_vBlock.num = *(u8*)&vifRegs->num;
|
||||
_vBlock.mode = *(u8*)&vifRegs->mode;
|
||||
_vBlock.scl = vif->cl;
|
||||
_vBlock.num = *(u8*)&v.vifRegs->num;
|
||||
_vBlock.mode = *(u8*)&v.vifRegs->mode;
|
||||
_vBlock.scl = v.vif->cl;
|
||||
_vBlock.cl = cycle_cl;
|
||||
_vBlock.wl = cycle_wl;
|
||||
|
||||
// Zero out the mask parameter if it's unused -- games leave random junk
|
||||
// values here which cause false recblock cache misses.
|
||||
_vBlock.mask = doMask ? vifRegs->mask : 0x00;
|
||||
_vBlock.mask = (doMask || ((_vBlock.mode&3)!=0) ) ? v.vifRegs->mask : 0x00;
|
||||
|
||||
if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
|
||||
if( u8* dest = dVifsetVUptr(v, vif->tag.addr) ) {
|
||||
if( u8* dest = dVifsetVUptr(v, v.vif->tag.addr) ) {
|
||||
//DevCon.WriteLn("Running Recompiled Block!");
|
||||
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
|
||||
}
|
||||
|
|
|
@ -65,11 +65,11 @@ public:
|
|||
u32 d = *((u32*)dataPtr);
|
||||
const SizeChain<T>& bucket( mBucket[d % hSize] );
|
||||
|
||||
for (int i=bucket.Size; i; --i) {
|
||||
for (int i=bucket.Size-1; i>0; --i) {
|
||||
// This inline version seems about 1-2% faster in tests of games that average 1
|
||||
// program per bucket. Games that average more should see a bigger improvement --air
|
||||
int result = _mm_movemask_ps( (cast_m128) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
|
||||
if( result == 0x7 ) return &bucket.Chain[i];
|
||||
int result = _mm_movemask_ps( (cast_m128) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) );
|
||||
if( (result&0x7) == 0x7 ) return &bucket.Chain[i];
|
||||
|
||||
// Dynamically generated function version, can't be inlined. :(
|
||||
//if ((((nVifCall)((void*)nVifMemCmp))(&bucket.Chain[i], dataPtr))==7) return &bucket.Chain[i];
|
||||
|
|
Loading…
Reference in New Issue