Bugfix for assertion breaks not working in Devbuilds.

newVif:
 * Bugfix to HashBucket::find() cuts microprogram caches misses in half.
 * Dynarec version now uses alternating XMM registers for unmasked unpacks (very minor speedup, ~1%).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2397 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-12-25 18:00:51 +00:00
parent 9473e69b7f
commit de637fc921
7 changed files with 141 additions and 101 deletions

View File

@ -356,6 +356,18 @@ template< typename T > void xWrite( T val );
bool operator==( const xRegisterSSE& src ) const { return this->Id == src.Id; }
bool operator!=( const xRegisterSSE& src ) const { return this->Id != src.Id; }
xRegisterSSE& operator++()
{
++Id &= (iREGCNT_XMM-1);
return *this;
}
xRegisterSSE& operator--()
{
--Id &= (iREGCNT_XMM-1);
return *this;
}
};
class xRegisterCL : public xRegister8

View File

@ -76,6 +76,26 @@ bool pxAssertImpl_LogIt( const DiagnosticOrigin& origin, const wxChar *msg )
return false;
}
// Because wxTrap isn't available on Linux builds of wxWidgets (non-Debug, typically)
void pxTrap()
{
#if defined(__WXMSW__) && !defined(__WXMICROWIN__)
__debugbreak();
#elif defined(__WXMAC__) && !defined(__DARWIN__)
#if __powerc
Debugger();
#else
SysBreak();
#endif
#elif defined(_MSL_USING_MW_C_HEADERS) && _MSL_USING_MW_C_HEADERS
Debugger();
#elif defined(__UNIX__)
raise(SIGTRAP);
#else
// TODO
#endif // Win/Unix
}
DEVASSERT_INLINE void pxOnAssert( const DiagnosticOrigin& origin, const wxChar* msg )
{
RecursionGuard guard( s_assert_guard );
@ -98,7 +118,7 @@ DEVASSERT_INLINE void pxOnAssert( const DiagnosticOrigin& origin, const wxChar*
trapit = pxDoAssert( origin, msg );
}
if( trapit ) { wxTrap(); }
if( trapit ) { pxTrap(); }
}
__forceinline void pxOnAssert( const DiagnosticOrigin& origin, const char* msg)

View File

@ -863,10 +863,6 @@
RelativePath="..\..\x86\newVif_Unpack.cpp"
>
</File>
<File
RelativePath="..\..\x86\newVif_UnpackGen.inl"
>
</File>
<Filter
Name="Dynarec"
>

View File

@ -32,12 +32,14 @@ static __pagealigned u8 nVifUpkExec[__pagesize*4];
VifUnpackSSE_Base::VifUnpackSSE_Base()
: dstIndirect(ecx) // parameter 1 of __fastcall
, srcIndirect(edx) // parameter 2 of __fastcall
, workReg( xmm1 )
, destReg( xmm0 )
{
}
void VifUnpackSSE_Base::xMovDest(const xRegisterSSE& srcReg) const {
if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], srcReg); }
else { doMaskWrite(srcReg); }
void VifUnpackSSE_Base::xMovDest() const {
if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); }
else { doMaskWrite(destReg); }
}
void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const {
@ -56,145 +58,132 @@ void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const {
}
void VifUnpackSSE_Base::xUPK_S_32() const {
xMOV32 (xmm0, ptr32[srcIndirect]);
xPSHUF.D (xmm1, xmm0, _v0);
xMovDest (xmm1);
xMOV32 (workReg, ptr32[srcIndirect]);
xPSHUF.D (destReg, workReg, _v0);
}
void VifUnpackSSE_Base::xUPK_S_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (xmm0);
xPMOVXX16 (workReg);
}
else {
xMOV16 (xmm0, ptr32[srcIndirect]);
xPUNPCK.LWD(xmm0, xmm0);
xShiftR (xmm0, 16);
xMOV16 (workReg, ptr32[srcIndirect]);
xPUNPCK.LWD(workReg, workReg);
xShiftR (workReg, 16);
}
xPSHUF.D (xmm1, xmm0, _v0);
xMovDest (xmm1);
xPSHUF.D (destReg, workReg, _v0);
}
void VifUnpackSSE_Base::xUPK_S_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (xmm0);
xPMOVXX8 (workReg);
}
else {
xMOV8 (xmm0, ptr32[srcIndirect]);
xPUNPCK.LBW(xmm0, xmm0);
xPUNPCK.LWD(xmm0, xmm0);
xShiftR (xmm0, 24);
xMOV8 (workReg, ptr32[srcIndirect]);
xPUNPCK.LBW(workReg, workReg);
xPUNPCK.LWD(workReg, workReg);
xShiftR (workReg, 24);
}
xPSHUF.D (xmm1, xmm0, _v0);
xMovDest (xmm1);
xPSHUF.D (destReg, workReg, _v0);
}
void VifUnpackSSE_Base::xUPK_V2_32() const {
xMOV64 (xmm0, ptr32[srcIndirect]);
xMovDest (xmm0);
xMOV64 (destReg, ptr32[srcIndirect]);
}
void VifUnpackSSE_Base::xUPK_V2_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (xmm0);
xPMOVXX16 (destReg);
}
else {
xMOV32 (xmm0, ptr32[srcIndirect]);
xPUNPCK.LWD(xmm0, xmm0);
xShiftR (xmm0, 16);
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
xMovDest (xmm0);
}
void VifUnpackSSE_Base::xUPK_V2_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (xmm0);
xPMOVXX8 (destReg);
}
else {
xMOV16 (xmm0, ptr32[srcIndirect]);
xPUNPCK.LBW(xmm0, xmm0);
xPUNPCK.LWD(xmm0, xmm0);
xShiftR (xmm0, 24);
xMOV16 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
xMovDest (xmm0);
}
void VifUnpackSSE_Base::xUPK_V3_32() const {
xMOV128 (xmm0, ptr32[srcIndirect]);
xMovDest (xmm0);
xMOV128 (destReg, ptr32[srcIndirect]);
}
void VifUnpackSSE_Base::xUPK_V3_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (xmm0);
xPMOVXX16 (destReg);
}
else {
xMOV64 (xmm0, ptr32[srcIndirect]);
xPUNPCK.LWD(xmm0, xmm0);
xShiftR (xmm0, 16);
xMOV64 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
xMovDest (xmm0);
}
void VifUnpackSSE_Base::xUPK_V3_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (xmm0);
xPMOVXX8 (destReg);
}
else {
xMOV32 (xmm0, ptr32[srcIndirect]);
xPUNPCK.LBW(xmm0, xmm0);
xPUNPCK.LWD(xmm0, xmm0);
xShiftR (xmm0, 24);
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
xMovDest (xmm0);
}
void VifUnpackSSE_Base::xUPK_V4_32() const {
xMOV128 (xmm0, ptr32[srcIndirect]);
xMovDest (xmm0);
xMOV128 (destReg, ptr32[srcIndirect]);
}
void VifUnpackSSE_Base::xUPK_V4_16() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX16 (xmm0);
xPMOVXX16 (destReg);
}
else {
xMOV64 (xmm0, ptr32[srcIndirect]);
xPUNPCK.LWD(xmm0, xmm0);
xShiftR (xmm0, 16);
xMOV64 (destReg, ptr32[srcIndirect]);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 16);
}
xMovDest (xmm0);
}
void VifUnpackSSE_Base::xUPK_V4_8() const {
if (x86caps.hasStreamingSIMD4Extensions) {
xPMOVXX8 (xmm0);
xPMOVXX8 (destReg);
}
else {
xMOV32 (xmm0, ptr32[srcIndirect]);
xPUNPCK.LBW(xmm0, xmm0);
xPUNPCK.LWD(xmm0, xmm0);
xShiftR (xmm0, 24);
xMOV32 (destReg, ptr32[srcIndirect]);
xPUNPCK.LBW(destReg, destReg);
xPUNPCK.LWD(destReg, destReg);
xShiftR (destReg, 24);
}
xMovDest (xmm0);
}
void VifUnpackSSE_Base::xUPK_V4_5() const {
xMOV16 (xmm0, ptr32[srcIndirect]);
xPSHUF.D (xmm0, xmm0, _v0);
xPSLL.D (xmm0, 3); // ABG|R5.000
xMOVAPS (xmm1, xmm0); // x|x|x|R
xPSRL.D (xmm0, 8); // ABG
xPSLL.D (xmm0, 3); // AB|G5.000
mVUmergeRegs(XMM1, XMM0, 0x4); // x|x|G|R
xPSRL.D (xmm0, 8); // AB
xPSLL.D (xmm0, 3); // A|B5.000
mVUmergeRegs(XMM1, XMM0, 0x2); // x|B|G|R
xPSRL.D (xmm0, 8); // A
xPSLL.D (xmm0, 7); // A.0000000
mVUmergeRegs(XMM1, XMM0, 0x1); // A|B|G|R
xPSLL.D (xmm1, 24); // can optimize to
xPSRL.D (xmm1, 24); // single AND...
xMovDest (xmm1);
xMOV16 (workReg, ptr32[srcIndirect]);
xPSHUF.D (workReg, workReg, _v0);
xPSLL.D (workReg, 3); // ABG|R5.000
xMOVAPS (destReg, workReg); // x|x|x|R
xPSRL.D (workReg, 8); // ABG
xPSLL.D (workReg, 3); // AB|G5.000
mVUmergeRegs(destReg.Id, workReg.Id, 0x4); // x|x|G|R
xPSRL.D (workReg, 8); // AB
xPSLL.D (workReg, 3); // A|B5.000
mVUmergeRegs(destReg.Id, workReg.Id, 0x2); // x|B|G|R
xPSRL.D (workReg, 8); // A
xPSLL.D (workReg, 7); // A.0000000
mVUmergeRegs(destReg.Id, workReg.Id, 0x1); // A|B|G|R
xPSLL.D (destReg, 24); // can optimize to
xPSRL.D (destReg, 24); // single AND...
}
void VifUnpackSSE_Base::xUnpack( int upknum ) const
@ -263,6 +252,7 @@ static void nVifGen(int usn, int mask, int curCycle) {
ucall = (nVifCall)xGetAlignedCallTarget();
vpugen.xUnpack(i);
vpugen.xMovDest();
xRET();
pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );

View File

@ -38,18 +38,20 @@ public:
protected:
xAddressInfo dstIndirect;
xAddressInfo srcIndirect;
xRegisterSSE workReg;
xRegisterSSE destReg;
public:
VifUnpackSSE_Base();
virtual ~VifUnpackSSE_Base() throw() {}
virtual void xUnpack( int upktype ) const;
virtual bool IsUnmaskedOp() const=0;
virtual void xMovDest() const;
protected:
virtual void doMaskWrite(const xRegisterSSE& regX ) const=0;
virtual void xMovDest(const xRegisterSSE& srcReg) const;
virtual void xShiftR(const xRegisterSSE& regX, int n) const;
virtual void xPMOVXX8(const xRegisterSSE& regX) const;
virtual void xPMOVXX16(const xRegisterSSE& regX) const;

View File

@ -163,16 +163,24 @@ static void ShiftDisplacementWindow( xAddressInfo& addr, const xRegister32& modR
}
if(addImm) xADD(modReg, addImm);
}
static bool UsesTwoRegs[] =
{
true, true, true, true,
false, false, false, false,
false, false, false, false,
false, false, false, true,
};
void VifUnpackSSE_Dynarec::CompileRoutine() {
const int upkNum = vB.upkType & 0xf;
const int upkNum = v.vif->cmd & 0xf;
const u8& vift = nVifT[upkNum];
const int cycleSize = isFill ? vB.cl : vB.wl;
const int blockSize = isFill ? vB.wl : vB.cl;
const int skipSize = blockSize - cycleSize;
int vNum = vifRegs->num;
vCL = vif->cl;
int vNum = v.vifRegs->num;
vCL = v.vif->cl;
SetMasks(cycleSize);
@ -183,14 +191,25 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
if (vCL < cycleSize) {
xUnpack(upkNum);
srcIndirect += vift;
xMovDest();
dstIndirect += 16;
srcIndirect += vift;
if( IsUnmaskedOp() ) {
++destReg;
++workReg;
}
vNum--;
if (++vCL == blockSize) vCL = 0;
}
else if (isFill) {
DevCon.WriteLn("filling mode!");
VifUnpackSSE_Dynarec::FillingWrite( *this ).xUnpack(upkNum);
VifUnpackSSE_Dynarec fill( VifUnpackSSE_Dynarec::FillingWrite( *this ) );
fill.xUnpack(upkNum);
fill.xMovDest();
dstIndirect += 16;
vNum--;
if (++vCL == blockSize) vCL = 0;
@ -200,9 +219,10 @@ void VifUnpackSSE_Dynarec::CompileRoutine() {
vCL = 0;
}
}
if (doMode==2) writeBackRow();
xMOV(ptr32[&vif->cl], vCL);
xMOV(ptr32[&vifRegs->num], vNum);
xMOV(ptr32[&v.vif->cl], vCL);
xMOV(ptr32[&v.vifRegs->num], vNum);
xRET();
}
@ -227,29 +247,29 @@ static _f void dVifRecLimit(int idx) {
_f void dVifUnpack(int idx, u8 *data, u32 size, bool isFill) {
const nVifStruct& v = nVif[idx];
const u8 upkType = vif->cmd & 0x1f | ((!!vif->usn) << 5);
const int doMask = (upkType>>4) & 1;
const u8 upkType = v.vif->cmd & 0x1f | ((!!v.vif->usn) << 5);
const int doMask = v.vif->cmd & 0x10;
const int cycle_cl = vifRegs->cycle.cl;
const int cycle_wl = vifRegs->cycle.wl;
const int cycle_cl = v.vifRegs->cycle.cl;
const int cycle_wl = v.vifRegs->cycle.wl;
const int cycleSize = isFill ? cycle_cl : cycle_wl;
const int blockSize = isFill ? cycle_wl : cycle_cl;
if (vif->cl >= blockSize) vif->cl = 0;
if (v.vif->cl >= blockSize) v.vif->cl = 0;
_vBlock.upkType = upkType;
_vBlock.num = *(u8*)&vifRegs->num;
_vBlock.mode = *(u8*)&vifRegs->mode;
_vBlock.scl = vif->cl;
_vBlock.num = *(u8*)&v.vifRegs->num;
_vBlock.mode = *(u8*)&v.vifRegs->mode;
_vBlock.scl = v.vif->cl;
_vBlock.cl = cycle_cl;
_vBlock.wl = cycle_wl;
// Zero out the mask parameter if it's unused -- games leave random junk
// values here which cause false recblock cache misses.
_vBlock.mask = doMask ? vifRegs->mask : 0x00;
_vBlock.mask = (doMask || ((_vBlock.mode&3)!=0) ) ? v.vifRegs->mask : 0x00;
if (nVifBlock* b = v.vifBlocks->find(&_vBlock)) {
if( u8* dest = dVifsetVUptr(v, vif->tag.addr) ) {
if( u8* dest = dVifsetVUptr(v, v.vif->tag.addr) ) {
//DevCon.WriteLn("Running Recompiled Block!");
((nVifrecCall)b->startPtr)((uptr)dest, (uptr)data);
}

View File

@ -65,11 +65,11 @@ public:
u32 d = *((u32*)dataPtr);
const SizeChain<T>& bucket( mBucket[d % hSize] );
for (int i=bucket.Size; i; --i) {
for (int i=bucket.Size-1; i>0; --i) {
// This inline version seems about 1-2% faster in tests of games that average 1
// program per bucket. Games that average more should see a bigger improvement --air
int result = _mm_movemask_ps( (cast_m128) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) ) & 0x7;
if( result == 0x7 ) return &bucket.Chain[i];
int result = _mm_movemask_ps( (cast_m128) _mm_cmpeq_epi32( _mm_load_si128((__m128i*)&bucket.Chain[i]), _mm_load_si128((__m128i*)dataPtr) ) );
if( (result&0x7) == 0x7 ) return &bucket.Chain[i];
// Dynamically generated function version, can't be inlined. :(
//if ((((nVifCall)((void*)nVifMemCmp))(&bucket.Chain[i], dataPtr))==7) return &bucket.Chain[i];