mirror of https://github.com/PCSX2/pcsx2.git
pcsx2: -highpriority switch, other processes generate too much noise and I cannot trust my performance counters without it :P
GSdx: nothing new just committing a few cleanups and my findings on AA1 before doing it, tested a few things with ps2dev on a real machine, got really strange results when not using the standard 0 1 0 1 blending mode, but it doesn't seem harder to implement than a line drawing (which it is), and only adds a few extra pixels here and there, should be fast at least. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@694 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
3ae6ff0856
commit
620ba58085
|
@ -93,6 +93,9 @@ int ParseCommandLine( int tokenCount, TCHAR *const *const tokens )
|
||||||
else if( CmdSwitchIs( "nogui" ) ) {
|
else if( CmdSwitchIs( "nogui" ) ) {
|
||||||
UseGui = false;
|
UseGui = false;
|
||||||
}
|
}
|
||||||
|
else if( CmdSwitchIs( "highpriority" ) ) {
|
||||||
|
SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS);
|
||||||
|
}
|
||||||
#ifdef PCSX2_DEVBUILD
|
#ifdef PCSX2_DEVBUILD
|
||||||
else if( CmdSwitchIs( "jpg" ) ) {
|
else if( CmdSwitchIs( "jpg" ) ) {
|
||||||
g_TestRun.jpgcapture = 1;
|
g_TestRun.jpgcapture = 1;
|
||||||
|
|
|
@ -711,13 +711,14 @@ void GPUDrawScanlineCodeGenerator::ColorTFX()
|
||||||
// c[0] = c[0].modulate16<1>(r).clamp8();
|
// c[0] = c[0].modulate16<1>(r).clamp8();
|
||||||
// c[1] = c[1].modulate16<1>(g).clamp8();
|
// c[1] = c[1].modulate16<1>(g).clamp8();
|
||||||
// c[2] = c[2].modulate16<1>(b).clamp8();
|
// c[2] = c[2].modulate16<1>(b).clamp8();
|
||||||
if(!m_cpu.has(util::Cpu::tSSE41)) pxor(xmm0, xmm0);
|
pcmpeqd(xmm0, xmm0);
|
||||||
|
psrlw(xmm0, 8);
|
||||||
modulate16<1>(xmm4, xmmword[&m_env.temp.r]);
|
modulate16<1>(xmm4, xmmword[&m_env.temp.r]);
|
||||||
clamp16(xmm4, xmm0);
|
pminsw(xmm4, xmm0);
|
||||||
modulate16<1>(xmm5, xmmword[&m_env.temp.g]);
|
modulate16<1>(xmm5, xmmword[&m_env.temp.g]);
|
||||||
clamp16(xmm5, xmm0);
|
pminsw(xmm5, xmm0);
|
||||||
modulate16<1>(xmm6, xmmword[&m_env.temp.b]);
|
modulate16<1>(xmm6, xmmword[&m_env.temp.b]);
|
||||||
clamp16(xmm6, xmm0);
|
pminsw(xmm6, xmm0);
|
||||||
break;
|
break;
|
||||||
case 3: // decal (tfx = tme)
|
case 3: // decal (tfx = tme)
|
||||||
break;
|
break;
|
||||||
|
@ -980,20 +981,6 @@ void GPUDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Oper
|
||||||
paddw(a, b);
|
paddw(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GPUDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& zero)
|
|
||||||
{
|
|
||||||
packuswb(a, a);
|
|
||||||
|
|
||||||
if(m_cpu.has(util::Cpu::tSSE41))
|
|
||||||
{
|
|
||||||
pmovzxbw(a, a);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
punpcklbw(a, zero);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void GPUDrawScanlineCodeGenerator::alltrue()
|
void GPUDrawScanlineCodeGenerator::alltrue()
|
||||||
{
|
{
|
||||||
pmovmskb(eax, xmm7);
|
pmovmskb(eax, xmm7);
|
||||||
|
|
|
@ -53,7 +53,6 @@ class GPUDrawScanlineCodeGenerator : public CodeGenerator
|
||||||
|
|
||||||
template<int shift> void modulate16(const Xmm& a, const Operand& f);
|
template<int shift> void modulate16(const Xmm& a, const Operand& f);
|
||||||
template<int shift> void lerp16(const Xmm& a, const Xmm& b, const Operand& f);
|
template<int shift> void lerp16(const Xmm& a, const Xmm& b, const Operand& f);
|
||||||
void clamp16(const Xmm& a, const Xmm& zero);
|
|
||||||
void alltrue();
|
void alltrue();
|
||||||
void blend8(const Xmm& a, const Xmm& b);
|
void blend8(const Xmm& a, const Xmm& b);
|
||||||
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
|
void blend(const Xmm& a, const Xmm& b, const Xmm& mask);
|
||||||
|
|
|
@ -685,6 +685,32 @@ REG64_(GIFReg, PABE)
|
||||||
UINT32 _PAD2:32;
|
UINT32 _PAD2:32;
|
||||||
REG_END
|
REG_END
|
||||||
|
|
||||||
|
/*
|
||||||
|
|
||||||
|
AA1 == 1 (for triangles, as tested on the real thing)
|
||||||
|
|
||||||
|
C ABE A Ae Ao Aoe
|
||||||
|
0 0 c c c c
|
||||||
|
0 1 s c s* c
|
||||||
|
1 0 d d c c
|
||||||
|
1 1 d d s* c
|
||||||
|
2 0 f f ? ?
|
||||||
|
2 1 f f ? ?
|
||||||
|
|
||||||
|
C = ALPHA::C
|
||||||
|
A = alpha used for blending
|
||||||
|
Ae = edge alpha used for blending
|
||||||
|
Ao = alpha to output
|
||||||
|
Aoe = edge alpha to output
|
||||||
|
|
||||||
|
c = coverage
|
||||||
|
s = source alpha
|
||||||
|
d = destination alpha
|
||||||
|
f = fixed alpha (TODO: test with 0x80/2)
|
||||||
|
* = only if s != 0x80 (s == 0x80 => s == c, but what about s > 0x80? s or 0x80? TODO)
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
REG64_(GIFReg, PRIM)
|
REG64_(GIFReg, PRIM)
|
||||||
UINT32 PRIM:3;
|
UINT32 PRIM:3;
|
||||||
UINT32 IIP:1;
|
UINT32 IIP:1;
|
||||||
|
|
|
@ -1277,7 +1277,6 @@ public:
|
||||||
GSVector4i rm = m_rxxx;
|
GSVector4i rm = m_rxxx;
|
||||||
GSVector4i gm = m_xgxx;
|
GSVector4i gm = m_xgxx;
|
||||||
GSVector4i bm = m_xxbx;
|
GSVector4i bm = m_xxbx;
|
||||||
// GSVector4i am = m_xxxa;
|
|
||||||
GSVector4i l, h;
|
GSVector4i l, h;
|
||||||
|
|
||||||
if(TEXA.AEM)
|
if(TEXA.AEM)
|
||||||
|
@ -1285,13 +1284,7 @@ public:
|
||||||
for(int i = 0; i < 8; i++, dst += dstpitch)
|
for(int i = 0; i < 8; i++, dst += dstpitch)
|
||||||
{
|
{
|
||||||
GSVector4i v0 = s[i * 2 + 0];
|
GSVector4i v0 = s[i * 2 + 0];
|
||||||
/*
|
|
||||||
l = v0.upl16();
|
|
||||||
h = v0.uph16();
|
|
||||||
|
|
||||||
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am).andnot(l == GSVector4i::zero());
|
|
||||||
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am).andnot(h == GSVector4i::zero());
|
|
||||||
*/
|
|
||||||
l = v0.upl16(v0);
|
l = v0.upl16(v0);
|
||||||
h = v0.uph16(v0);
|
h = v0.uph16(v0);
|
||||||
|
|
||||||
|
@ -1299,13 +1292,7 @@ public:
|
||||||
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend8(TA1, h.sra16(15)).andnot(h == GSVector4i::zero());
|
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend8(TA1, h.sra16(15)).andnot(h == GSVector4i::zero());
|
||||||
|
|
||||||
GSVector4i v1 = s[i * 2 + 1];
|
GSVector4i v1 = s[i * 2 + 1];
|
||||||
/*
|
|
||||||
l = v1.upl16();
|
|
||||||
h = v1.uph16();
|
|
||||||
|
|
||||||
((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am).andnot(l == GSVector4i::zero());
|
|
||||||
((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am).andnot(h == GSVector4i::zero());
|
|
||||||
*/
|
|
||||||
l = v1.upl16(v1);
|
l = v1.upl16(v1);
|
||||||
h = v1.uph16(v1);
|
h = v1.uph16(v1);
|
||||||
|
|
||||||
|
@ -1318,13 +1305,7 @@ public:
|
||||||
for(int i = 0; i < 8; i++, dst += dstpitch)
|
for(int i = 0; i < 8; i++, dst += dstpitch)
|
||||||
{
|
{
|
||||||
GSVector4i v0 = s[i * 2 + 0];
|
GSVector4i v0 = s[i * 2 + 0];
|
||||||
/*
|
|
||||||
l = v0.upl16();
|
|
||||||
h = v0.uph16();
|
|
||||||
|
|
||||||
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am);
|
|
||||||
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am);
|
|
||||||
*/
|
|
||||||
l = v0.upl16(v0);
|
l = v0.upl16(v0);
|
||||||
h = v0.uph16(v0);
|
h = v0.uph16(v0);
|
||||||
|
|
||||||
|
@ -1332,13 +1313,7 @@ public:
|
||||||
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15));
|
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15));
|
||||||
|
|
||||||
GSVector4i v1 = s[i * 2 + 1];
|
GSVector4i v1 = s[i * 2 + 1];
|
||||||
/*
|
|
||||||
l = v1.upl16();
|
|
||||||
h = v1.uph16();
|
|
||||||
|
|
||||||
((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA1.blend(TA0, l < am);
|
|
||||||
((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA1.blend(TA0, h < am);
|
|
||||||
*/
|
|
||||||
l = v1.upl16(v1);
|
l = v1.upl16(v1);
|
||||||
h = v1.uph16(v1);
|
h = v1.uph16(v1);
|
||||||
|
|
||||||
|
|
|
@ -1775,8 +1775,8 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
|
||||||
|
|
||||||
if(m_env.sel.colclamp == 0)
|
if(m_env.sel.colclamp == 0)
|
||||||
{
|
{
|
||||||
// c[0] &= m_env.colclamp;
|
// c[0] &= 0x000000ff;
|
||||||
// c[1] &= m_env.colclamp;
|
// c[1] &= 0x000000ff;
|
||||||
|
|
||||||
pcmpeqd(xmm7, xmm7);
|
pcmpeqd(xmm7, xmm7);
|
||||||
psrlw(xmm7, 8);
|
psrlw(xmm7, 8);
|
||||||
|
@ -1802,7 +1802,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame(int params)
|
||||||
|
|
||||||
if(m_env.sel.fba && m_env.sel.fpsm != 1)
|
if(m_env.sel.fba && m_env.sel.fpsm != 1)
|
||||||
{
|
{
|
||||||
// fs |= m_env.fba;
|
// fs |= 0x80000000;
|
||||||
|
|
||||||
pcmpeqd(xmm7, xmm7);
|
pcmpeqd(xmm7, xmm7);
|
||||||
pslld(xmm7, 31);
|
pslld(xmm7, 31);
|
||||||
|
|
|
@ -1063,7 +1063,7 @@ if((len % ((m_env.TRXREG.RRW - m_x) * GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM]
|
||||||
str.Format(_T("c:\\temp1\\[%04d]_%05x_%d_%d_%d_%d_%d_%d.bmp"),
|
str.Format(_T("c:\\temp1\\[%04d]_%05x_%d_%d_%d_%d_%d_%d.bmp"),
|
||||||
n++, (int)m_env.BITBLTBUF.DBP, (int)m_env.BITBLTBUF.DBW, (int)m_env.BITBLTBUF.DPSM,
|
n++, (int)m_env.BITBLTBUF.DBP, (int)m_env.BITBLTBUF.DBW, (int)m_env.BITBLTBUF.DPSM,
|
||||||
r.left, r.top, r.right, r.bottom);
|
r.left, r.top, r.right, r.bottom);
|
||||||
m_mem.SaveBMP(str, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, r.Width(), r.Height());
|
m_mem.SaveBMP(str, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, r.right, r.bottom);
|
||||||
*/
|
*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue