pcsx2: -highpriority switch, other processes generate too much noise and I cannot trust my performance counters without it :P

GSdx: nothing new just committing a few cleanups and my findings on AA1 before doing it, tested a few things with ps2dev on a real machine, got really strange results when not using the standard 0 1 0 1 blending mode, but it doesn't seem harder to implement than a line drawing (which it is), and only adds a few extra pixels here and there, should be fast at least.


git-svn-id: http://pcsx2.googlecode.com/svn/trunk@694 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-03-06 00:53:59 +00:00
parent 3ae6ff0856
commit 620ba58085
7 changed files with 38 additions and 48 deletions

View File

@ -93,6 +93,9 @@ int ParseCommandLine( int tokenCount, TCHAR *const *const tokens )
else if( CmdSwitchIs( "nogui" ) ) { else if( CmdSwitchIs( "nogui" ) ) {
UseGui = false; UseGui = false;
} }
else if( CmdSwitchIs( "highpriority" ) ) {
SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS);
}
#ifdef PCSX2_DEVBUILD #ifdef PCSX2_DEVBUILD
else if( CmdSwitchIs( "jpg" ) ) { else if( CmdSwitchIs( "jpg" ) ) {
g_TestRun.jpgcapture = 1; g_TestRun.jpgcapture = 1;

View File

@ -711,13 +711,14 @@ void GPUDrawScanlineCodeGenerator::ColorTFX()
// c[0] = c[0].modulate16<1>(r).clamp8(); // c[0] = c[0].modulate16<1>(r).clamp8();
// c[1] = c[1].modulate16<1>(g).clamp8(); // c[1] = c[1].modulate16<1>(g).clamp8();
// c[2] = c[2].modulate16<1>(b).clamp8(); // c[2] = c[2].modulate16<1>(b).clamp8();
if(!m_cpu.has(util::Cpu::tSSE41)) pxor(xmm0, xmm0); pcmpeqd(xmm0, xmm0);
psrlw(xmm0, 8);
modulate16<1>(xmm4, xmmword[&m_env.temp.r]); modulate16<1>(xmm4, xmmword[&m_env.temp.r]);
clamp16(xmm4, xmm0); pminsw(xmm4, xmm0);
modulate16<1>(xmm5, xmmword[&m_env.temp.g]); modulate16<1>(xmm5, xmmword[&m_env.temp.g]);
clamp16(xmm5, xmm0); pminsw(xmm5, xmm0);
modulate16<1>(xmm6, xmmword[&m_env.temp.b]); modulate16<1>(xmm6, xmmword[&m_env.temp.b]);
clamp16(xmm6, xmm0); pminsw(xmm6, xmm0);
break; break;
case 3: // decal (tfx = tme) case 3: // decal (tfx = tme)
break; break;
@ -980,20 +981,6 @@ void GPUDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Oper
paddw(a, b); paddw(a, b);
} }
void GPUDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& zero)
{
packuswb(a, a);
if(m_cpu.has(util::Cpu::tSSE41))
{
pmovzxbw(a, a);
}
else
{
punpcklbw(a, zero);
}
}
void GPUDrawScanlineCodeGenerator::alltrue() void GPUDrawScanlineCodeGenerator::alltrue()
{ {
pmovmskb(eax, xmm7); pmovmskb(eax, xmm7);

View File

@ -53,7 +53,6 @@ class GPUDrawScanlineCodeGenerator : public CodeGenerator
template<int shift> void modulate16(const Xmm& a, const Operand& f); template<int shift> void modulate16(const Xmm& a, const Operand& f);
template<int shift> void lerp16(const Xmm& a, const Xmm& b, const Operand& f); template<int shift> void lerp16(const Xmm& a, const Xmm& b, const Operand& f);
void clamp16(const Xmm& a, const Xmm& zero);
void alltrue(); void alltrue();
void blend8(const Xmm& a, const Xmm& b); void blend8(const Xmm& a, const Xmm& b);
void blend(const Xmm& a, const Xmm& b, const Xmm& mask); void blend(const Xmm& a, const Xmm& b, const Xmm& mask);

View File

@ -685,6 +685,32 @@ REG64_(GIFReg, PABE)
UINT32 _PAD2:32; UINT32 _PAD2:32;
REG_END REG_END
/*
AA1 == 1 (for triangles, as tested on the real thing)
C ABE A Ae Ao Aoe
0 0 c c c c
0 1 s c s* c
1 0 d d c c
1 1 d d s* c
2 0 f f ? ?
2 1 f f ? ?
C = ALPHA::C
A = alpha used for blending
Ae = edge alpha used for blending
Ao = alpha to output
Aoe = edge alpha to output
c = coverage
s = source alpha
d = destination alpha
f = fixed alpha (TODO: test with 0x80/2)
* = only if s != 0x80 (s == 0x80 => s == c, but what about s > 0x80? s or 0x80? TODO)
*/
REG64_(GIFReg, PRIM) REG64_(GIFReg, PRIM)
UINT32 PRIM:3; UINT32 PRIM:3;
UINT32 IIP:1; UINT32 IIP:1;

View File

@ -1277,7 +1277,6 @@ public:
GSVector4i rm = m_rxxx; GSVector4i rm = m_rxxx;
GSVector4i gm = m_xgxx; GSVector4i gm = m_xgxx;
GSVector4i bm = m_xxbx; GSVector4i bm = m_xxbx;
// GSVector4i am = m_xxxa;
GSVector4i l, h; GSVector4i l, h;
if(TEXA.AEM) if(TEXA.AEM)
@ -1285,13 +1284,7 @@ public:
for(int i = 0; i < 8; i++, dst += dstpitch) for(int i = 0; i < 8; i++, dst += dstpitch)
{ {
GSVector4i v0 = s[i * 2 + 0]; GSVector4i v0 = s[i * 2 + 0];
/*
l = v0.upl16();
h = v0.uph16();
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am).andnot(l == GSVector4i::zero());
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am).andnot(h == GSVector4i::zero());
*/
l = v0.upl16(v0); l = v0.upl16(v0);
h = v0.uph16(v0); h = v0.uph16(v0);
@ -1299,13 +1292,7 @@ public:
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend8(TA1, h.sra16(15)).andnot(h == GSVector4i::zero()); ((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend8(TA1, h.sra16(15)).andnot(h == GSVector4i::zero());
GSVector4i v1 = s[i * 2 + 1]; GSVector4i v1 = s[i * 2 + 1];
/*
l = v1.upl16();
h = v1.uph16();
((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am).andnot(l == GSVector4i::zero());
((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am).andnot(h == GSVector4i::zero());
*/
l = v1.upl16(v1); l = v1.upl16(v1);
h = v1.uph16(v1); h = v1.uph16(v1);
@ -1318,13 +1305,7 @@ public:
for(int i = 0; i < 8; i++, dst += dstpitch) for(int i = 0; i < 8; i++, dst += dstpitch)
{ {
GSVector4i v0 = s[i * 2 + 0]; GSVector4i v0 = s[i * 2 + 0];
/*
l = v0.upl16();
h = v0.uph16();
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am);
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am);
*/
l = v0.upl16(v0); l = v0.upl16(v0);
h = v0.uph16(v0); h = v0.uph16(v0);
@ -1332,13 +1313,7 @@ public:
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15)); ((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15));
GSVector4i v1 = s[i * 2 + 1]; GSVector4i v1 = s[i * 2 + 1];
/*
l = v1.upl16();
h = v1.uph16();
((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am);
((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am);
*/
l = v1.upl16(v1); l = v1.upl16(v1);
h = v1.uph16(v1); h = v1.uph16(v1);

View File

@ -1775,8 +1775,8 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
if(m_env.sel.colclamp == 0) if(m_env.sel.colclamp == 0)
{ {
// c[0] &= m_env.colclamp; // c[0] &= 0x000000ff;
// c[1] &= m_env.colclamp; // c[1] &= 0x000000ff;
pcmpeqd(xmm7, xmm7); pcmpeqd(xmm7, xmm7);
psrlw(xmm7, 8); psrlw(xmm7, 8);
@ -1802,7 +1802,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
if(m_env.sel.fba && m_env.sel.fpsm != 1) if(m_env.sel.fba && m_env.sel.fpsm != 1)
{ {
// fs |= m_env.fba; // fs |= 0x80000000;
pcmpeqd(xmm7, xmm7); pcmpeqd(xmm7, xmm7);
pslld(xmm7, 31); pslld(xmm7, 31);

View File

@ -1063,7 +1063,7 @@ if((len % ((m_env.TRXREG.RRW - m_x) * GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM]
str.Format(_T("c:\\temp1\\[%04d]_%05x_%d_%d_%d_%d_%d_%d.bmp"), str.Format(_T("c:\\temp1\\[%04d]_%05x_%d_%d_%d_%d_%d_%d.bmp"),
n++, (int)m_env.BITBLTBUF.DBP, (int)m_env.BITBLTBUF.DBW, (int)m_env.BITBLTBUF.DPSM, n++, (int)m_env.BITBLTBUF.DBP, (int)m_env.BITBLTBUF.DBW, (int)m_env.BITBLTBUF.DPSM,
r.left, r.top, r.right, r.bottom); r.left, r.top, r.right, r.bottom);
m_mem.SaveBMP(str, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, r.Width(), r.Height()); m_mem.SaveBMP(str, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, r.right, r.bottom);
*/ */
} }
} }