mirror of https://github.com/PCSX2/pcsx2.git
GSdx: this should fix xp/wine crashing when extrathreads > 0, and added a sprite drawing shortcut, hopefully won't break anything.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5089 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
4593b6ac52
commit
19be605150
|
@ -884,7 +884,7 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
static void ExpandBlock16(const uint16* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) // do not inline, uses too many xmm regs
|
||||
template<bool AEM> static void ExpandBlock16(const uint16* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA) // do not inline, uses too many xmm regs
|
||||
{
|
||||
const GSVector4i* s = (const GSVector4i*)src;
|
||||
|
||||
|
@ -895,44 +895,36 @@ public:
|
|||
GSVector4i bm = m_xxbx;
|
||||
GSVector4i l, h;
|
||||
|
||||
if(TEXA.AEM)
|
||||
for(int i = 0; i < 8; i++, dst += dstpitch)
|
||||
{
|
||||
for(int i = 0; i < 8; i++, dst += dstpitch)
|
||||
GSVector4i v0 = s[i * 2 + 0];
|
||||
|
||||
l = v0.upl16(v0);
|
||||
h = v0.uph16(v0);
|
||||
|
||||
if(AEM)
|
||||
{
|
||||
GSVector4i v0 = s[i * 2 + 0];
|
||||
|
||||
l = v0.upl16(v0);
|
||||
h = v0.uph16(v0);
|
||||
|
||||
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend8(TA1, l.sra16(15)).andnot(l == GSVector4i::zero());
|
||||
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend8(TA1, h.sra16(15)).andnot(h == GSVector4i::zero());
|
||||
}
|
||||
else
|
||||
{
|
||||
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend(TA1, l.sra16(15));
|
||||
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15));
|
||||
}
|
||||
|
||||
GSVector4i v1 = s[i * 2 + 1];
|
||||
GSVector4i v1 = s[i * 2 + 1];
|
||||
|
||||
l = v1.upl16(v1);
|
||||
h = v1.uph16(v1);
|
||||
l = v1.upl16(v1);
|
||||
h = v1.uph16(v1);
|
||||
|
||||
if(AEM)
|
||||
{
|
||||
((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend8(TA1, l.sra16(15)).andnot(l == GSVector4i::zero());
|
||||
((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend8(TA1, h.sra16(15)).andnot(h == GSVector4i::zero());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < 8; i++, dst += dstpitch)
|
||||
else
|
||||
{
|
||||
GSVector4i v0 = s[i * 2 + 0];
|
||||
|
||||
l = v0.upl16(v0);
|
||||
h = v0.uph16(v0);
|
||||
|
||||
((GSVector4i*)dst)[0] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend(TA1, l.sra16(15));
|
||||
((GSVector4i*)dst)[1] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15));
|
||||
|
||||
GSVector4i v1 = s[i * 2 + 1];
|
||||
|
||||
l = v1.upl16(v1);
|
||||
h = v1.uph16(v1);
|
||||
|
||||
((GSVector4i*)dst)[2] = ((l & rm) << 3) | ((l & gm) << 6) | ((l & bm) << 9) | TA0.blend(TA1, l.sra16(15));
|
||||
((GSVector4i*)dst)[3] = ((h & rm) << 3) | ((h & gm) << 6) | ((h & bm) << 9) | TA0.blend(TA1, h.sra16(15));
|
||||
}
|
||||
|
@ -1432,6 +1424,56 @@ public:
|
|||
}
|
||||
}
|
||||
}
|
||||
template<bool AEM> __forceinline static GSVector4i Expand16to32(const GSVector4i& c, const GSVector4i& TA0, const GSVector4i& TA1)
|
||||
{
|
||||
return ((c & m_rxxx) << 3) | ((c & m_xgxx) << 6) | ((c & m_xxbx) << 9) | (AEM ? TA0.blend8(TA1, c.sra16(15)).andnot(c == GSVector4i::zero()) : TA0.blend(TA1, c.sra16(15)));
|
||||
}
|
||||
|
||||
template<bool AEM> __forceinline static void ReadAndExpandBlock16(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
#if 0 // not faster
|
||||
|
||||
const GSVector4i* s = (const GSVector4i*)src;
|
||||
|
||||
GSVector4i TA0(TEXA.TA0 << 24);
|
||||
GSVector4i TA1(TEXA.TA1 << 24);
|
||||
|
||||
for(int i = 0; i < 4; i++, dst += dstpitch * 2)
|
||||
{
|
||||
GSVector4i v0 = s[i * 4 + 0];
|
||||
GSVector4i v1 = s[i * 4 + 1];
|
||||
GSVector4i v2 = s[i * 4 + 2];
|
||||
GSVector4i v3 = s[i * 4 + 3];
|
||||
|
||||
GSVector4i::sw16(v0, v1, v2, v3);
|
||||
GSVector4i::sw32(v0, v1, v2, v3);
|
||||
GSVector4i::sw16(v0, v2, v1, v3);
|
||||
|
||||
GSVector4i* d0 = (GSVector4i*)&dst[dstpitch * 0];
|
||||
|
||||
d0[0] = Expand16to32<AEM>(v0.upl16(v0), TA0, TA1);
|
||||
d0[1] = Expand16to32<AEM>(v0.uph16(v0), TA0, TA1);
|
||||
d0[2] = Expand16to32<AEM>(v1.upl16(v1), TA0, TA1);
|
||||
d0[3] = Expand16to32<AEM>(v1.uph16(v1), TA0, TA1);
|
||||
|
||||
GSVector4i* d1 = (GSVector4i*)&dst[dstpitch * 1];
|
||||
|
||||
d1[0] = Expand16to32<AEM>(v2.upl16(v2), TA0, TA1);
|
||||
d1[1] = Expand16to32<AEM>(v2.uph16(v2), TA0, TA1);
|
||||
d1[2] = Expand16to32<AEM>(v3.upl16(v3), TA0, TA1);
|
||||
d1[3] = Expand16to32<AEM>(v3.uph16(v3), TA0, TA1);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16<AEM>(block, dst, dstpitch, TEXA);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
__forceinline static void ReadAndExpandBlock8_32(const uint8* RESTRICT src, uint8* RESTRICT dst, int dstpitch, const uint32* RESTRICT pal)
|
||||
{
|
||||
|
|
|
@ -91,6 +91,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data)
|
|||
sel.fb = m_global.sel.fb;
|
||||
sel.zb = m_global.sel.zb;
|
||||
sel.zoverflow = m_global.sel.zoverflow;
|
||||
sel.notest = m_global.sel.notest;
|
||||
|
||||
m_sp = m_sp_map[sel];
|
||||
}
|
||||
|
@ -272,17 +273,24 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
// Init
|
||||
|
||||
int skip = left & 3;
|
||||
int skip, steps;
|
||||
|
||||
left -= skip;
|
||||
|
||||
int steps = pixels + skip - 4;
|
||||
if(!sel.notest)
|
||||
{
|
||||
skip = left & 3;
|
||||
steps = pixels + skip - 4;
|
||||
left -= skip;
|
||||
test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
|
||||
}
|
||||
else
|
||||
{
|
||||
skip = 0;
|
||||
steps = pixels - 4;
|
||||
}
|
||||
|
||||
const GSVector2i* fza_base = &m_global.fzbr[top];
|
||||
const GSVector2i* fza_offset = &m_global.fzbc[left >> 2];
|
||||
|
||||
test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
if(sel.prim != GS_SPRITE_CLASS)
|
||||
{
|
||||
if(sel.fwrite && sel.fge)
|
||||
|
@ -1000,27 +1008,30 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
int fzm = 0;
|
||||
|
||||
if(sel.fwrite)
|
||||
if(!sel.notest)
|
||||
{
|
||||
fm |= test;
|
||||
}
|
||||
if(sel.fwrite)
|
||||
{
|
||||
fm |= test;
|
||||
}
|
||||
|
||||
if(sel.zwrite)
|
||||
{
|
||||
zm |= test;
|
||||
}
|
||||
if(sel.zwrite)
|
||||
{
|
||||
zm |= test;
|
||||
}
|
||||
|
||||
if(sel.fwrite && sel.zwrite)
|
||||
{
|
||||
fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask();
|
||||
}
|
||||
else if(sel.fwrite)
|
||||
{
|
||||
fzm = ~(fm == GSVector4i::xffffffff()).ps32().mask();
|
||||
}
|
||||
else if(sel.zwrite)
|
||||
{
|
||||
fzm = ~(zm == GSVector4i::xffffffff()).ps32().mask();
|
||||
if(sel.fwrite && sel.zwrite)
|
||||
{
|
||||
fzm = ~(fm == GSVector4i::xffffffff()).ps32(zm == GSVector4i::xffffffff()).mask();
|
||||
}
|
||||
else if(sel.fwrite)
|
||||
{
|
||||
fzm = ~(fm == GSVector4i::xffffffff()).ps32().mask();
|
||||
}
|
||||
else if(sel.zwrite)
|
||||
{
|
||||
fzm = ~(zm == GSVector4i::xffffffff()).ps32().mask();
|
||||
}
|
||||
}
|
||||
|
||||
// WriteZBuf
|
||||
|
@ -1030,16 +1041,39 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
if(sel.ztest && sel.zpsm < 2)
|
||||
{
|
||||
zs = zs.blend8(zd, zm);
|
||||
}
|
||||
|
||||
if(fzm & 0x0f00) GSVector4i::storel((uint8*)m_global.vm + za * 2, zs);
|
||||
if(fzm & 0xf000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs);
|
||||
bool fast = sel.ztest ? sel.zpsm < 2 : sel.zpsm == 0 && sel.notest;
|
||||
|
||||
if(sel.notest)
|
||||
{
|
||||
if(fast)
|
||||
{
|
||||
GSVector4i::storel((uint8*)m_global.vm + za * 2, zs);
|
||||
GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs);
|
||||
}
|
||||
else
|
||||
{
|
||||
WritePixel(zs, za, 0, sel.zpsm);
|
||||
WritePixel(zs, za, 1, sel.zpsm);
|
||||
WritePixel(zs, za, 2, sel.zpsm);
|
||||
WritePixel(zs, za, 3, sel.zpsm);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(fzm & 0x0300) WritePixel(zs, za, 0, sel.zpsm);
|
||||
if(fzm & 0x0c00) WritePixel(zs, za, 1, sel.zpsm);
|
||||
if(fzm & 0x3000) WritePixel(zs, za, 2, sel.zpsm);
|
||||
if(fzm & 0xc000) WritePixel(zs, za, 3, sel.zpsm);
|
||||
if(fast)
|
||||
{
|
||||
if(fzm & 0x0f00) GSVector4i::storel((uint8*)m_global.vm + za * 2, zs);
|
||||
if(fzm & 0xf000) GSVector4i::storeh((uint8*)m_global.vm + za * 2 + 16, zs);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(fzm & 0x0300) WritePixel(zs, za, 0, sel.zpsm);
|
||||
if(fzm & 0x0c00) WritePixel(zs, za, 1, sel.zpsm);
|
||||
if(fzm & 0x3000) WritePixel(zs, za, 2, sel.zpsm);
|
||||
if(fzm & 0xc000) WritePixel(zs, za, 3, sel.zpsm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1197,17 +1231,37 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
fs = fs.blend(fd, fm);
|
||||
}
|
||||
|
||||
if(sel.rfb && sel.fpsm < 2)
|
||||
bool fast = sel.rfb ? sel.fpsm < 2 : sel.fpsm == 0 && sel.notest;
|
||||
|
||||
if(sel.notest)
|
||||
{
|
||||
if(fzm & 0x000f) GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs);
|
||||
if(fzm & 0x00f0) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs);
|
||||
if(fast)
|
||||
{
|
||||
GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs);
|
||||
GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs);
|
||||
}
|
||||
else
|
||||
{
|
||||
WritePixel(fs, fa, 0, sel.fpsm);
|
||||
WritePixel(fs, fa, 1, sel.fpsm);
|
||||
WritePixel(fs, fa, 2, sel.fpsm);
|
||||
WritePixel(fs, fa, 3, sel.fpsm);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(fzm & 0x0003) WritePixel(fs, fa, 0, sel.fpsm);
|
||||
if(fzm & 0x000c) WritePixel(fs, fa, 1, sel.fpsm);
|
||||
if(fzm & 0x0030) WritePixel(fs, fa, 2, sel.fpsm);
|
||||
if(fzm & 0x00c0) WritePixel(fs, fa, 3, sel.fpsm);
|
||||
if(fast)
|
||||
{
|
||||
if(fzm & 0x000f) GSVector4i::storel((uint8*)m_global.vm + fa * 2, fs);
|
||||
if(fzm & 0x00f0) GSVector4i::storeh((uint8*)m_global.vm + fa * 2 + 16, fs);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(fzm & 0x0003) WritePixel(fs, fa, 0, sel.fpsm);
|
||||
if(fzm & 0x000c) WritePixel(fs, fa, 1, sel.fpsm);
|
||||
if(fzm & 0x0030) WritePixel(fs, fa, 2, sel.fpsm);
|
||||
if(fzm & 0x00c0) WritePixel(fs, fa, 3, sel.fpsm);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1273,7 +1327,10 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
}
|
||||
}
|
||||
|
||||
test = GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
|
||||
if(!sel.notest)
|
||||
{
|
||||
test = GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -250,31 +250,40 @@ L("exit");
|
|||
|
||||
void GSDrawScanlineCodeGenerator::Init()
|
||||
{
|
||||
// int skip = left & 3;
|
||||
if(!m_sel.notest)
|
||||
{
|
||||
// int skip = left & 3;
|
||||
|
||||
mov(ebx, edx);
|
||||
and(edx, 3);
|
||||
mov(ebx, edx);
|
||||
and(edx, 3);
|
||||
|
||||
// left -= skip;
|
||||
// int steps = pixels + skip - 4;
|
||||
|
||||
sub(ebx, edx);
|
||||
lea(ecx, ptr[ecx + edx - 4]);
|
||||
|
||||
// int steps = pixels + skip - 4;
|
||||
// left -= skip;
|
||||
|
||||
lea(ecx, ptr[ecx + edx - 4]);
|
||||
sub(ebx, edx);
|
||||
|
||||
// GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))];
|
||||
// GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
shl(edx, 4);
|
||||
shl(edx, 4);
|
||||
|
||||
vmovdqa(xmm7, ptr[edx + (size_t)&m_test[0]]);
|
||||
vmovdqa(xmm7, ptr[edx + (size_t)&m_test[0]]);
|
||||
|
||||
mov(eax, ecx);
|
||||
sar(eax, 31);
|
||||
and(eax, ecx);
|
||||
shl(eax, 4);
|
||||
mov(eax, ecx);
|
||||
sar(eax, 31);
|
||||
and(eax, ecx);
|
||||
shl(eax, 4);
|
||||
|
||||
vpor(xmm7, ptr[eax + (size_t)&m_test[7]]);
|
||||
vpor(xmm7, ptr[eax + (size_t)&m_test[7]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
mov(ebx, edx); // left
|
||||
xor(edx, edx); // skip
|
||||
lea(ecx, ptr[ecx - 4]); // steps
|
||||
}
|
||||
|
||||
// GSVector2i* fza_base = &m_local.gd->fzbr[top];
|
||||
|
||||
|
@ -574,14 +583,17 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
}
|
||||
}
|
||||
|
||||
// test = m_test[7 + (steps & (steps >> 31))];
|
||||
if(!m_sel.notest)
|
||||
{
|
||||
// test = m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
mov(edx, ecx);
|
||||
sar(edx, 31);
|
||||
and(edx, ecx);
|
||||
shl(edx, 4);
|
||||
mov(edx, ecx);
|
||||
sar(edx, 31);
|
||||
and(edx, ecx);
|
||||
shl(edx, 4);
|
||||
|
||||
vmovdqa(xmm7, ptr[edx + (size_t)&m_test[7]]);
|
||||
vmovdqa(xmm7, ptr[edx + (size_t)&m_test[7]]);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
||||
|
@ -2309,6 +2321,11 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
|
||||
void GSDrawScanlineCodeGenerator::WriteMask()
|
||||
{
|
||||
if(m_sel.notest)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// fm |= test;
|
||||
// zm |= test;
|
||||
|
||||
|
@ -2355,17 +2372,17 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
return;
|
||||
}
|
||||
|
||||
bool fast = m_sel.ztest && m_sel.zpsm < 2;
|
||||
|
||||
vmovdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]);
|
||||
|
||||
if(fast)
|
||||
if(m_sel.ztest && m_sel.zpsm < 2)
|
||||
{
|
||||
// zs = zs.blend8(zd, zm);
|
||||
|
||||
vpblendvb(xmm1, ptr[&m_local.temp.zd], xmm4);
|
||||
}
|
||||
|
||||
bool fast = m_sel.ztest ? m_sel.zpsm < 2 : m_sel.zpsm == 0 && m_sel.notest;
|
||||
|
||||
WritePixel(xmm1, ebp, dh, fast, m_sel.zpsm, 1);
|
||||
}
|
||||
|
||||
|
@ -2671,7 +2688,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
|
|||
blend(xmm5, xmm2, xmm3); // TODO: could be skipped in certain cases, depending on fpsm and fm
|
||||
}
|
||||
|
||||
bool fast = m_sel.rfb && m_sel.fpsm < 2;
|
||||
bool fast = m_sel.rfb ? m_sel.fpsm < 2 : m_sel.fpsm == 0 && m_sel.notest;
|
||||
|
||||
WritePixel(xmm5, ebx, dl, fast, m_sel.fpsm, 0);
|
||||
}
|
||||
|
@ -2684,49 +2701,67 @@ void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr)
|
|||
|
||||
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz)
|
||||
{
|
||||
if(fast)
|
||||
if(m_sel.notest)
|
||||
{
|
||||
// if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs);
|
||||
// if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs);
|
||||
|
||||
test(mask, 0x0f);
|
||||
je("@f");
|
||||
vmovq(qword[addr * 2 + (size_t)m_local.gd->vm], src);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0xf0);
|
||||
je("@f");
|
||||
vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src);
|
||||
L("@@");
|
||||
|
||||
// vmaskmovps?
|
||||
if(fast)
|
||||
{
|
||||
vmovq(qword[addr * 2 + (size_t)m_local.gd->vm], src);
|
||||
vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src);
|
||||
}
|
||||
else
|
||||
{
|
||||
WritePixel(src, addr, 0, psm);
|
||||
WritePixel(src, addr, 1, psm);
|
||||
WritePixel(src, addr, 2, psm);
|
||||
WritePixel(src, addr, 3, psm);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// if(fzm & 0x03) WritePixel(fpsm, &vm16[addr + 0], fs.extract32<0>());
|
||||
// if(fzm & 0x0c) WritePixel(fpsm, &vm16[addr + 2], fs.extract32<1>());
|
||||
// if(fzm & 0x30) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>());
|
||||
// if(fzm & 0xc0) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>());
|
||||
if(fast)
|
||||
{
|
||||
// if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs);
|
||||
// if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs);
|
||||
|
||||
test(mask, 0x03);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 0, psm);
|
||||
L("@@");
|
||||
test(mask, 0x0f);
|
||||
je("@f");
|
||||
vmovq(qword[addr * 2 + (size_t)m_local.gd->vm], src);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x0c);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 1, psm);
|
||||
L("@@");
|
||||
test(mask, 0xf0);
|
||||
je("@f");
|
||||
vmovhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x30);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 2, psm);
|
||||
L("@@");
|
||||
// vmaskmovps?
|
||||
}
|
||||
else
|
||||
{
|
||||
// if(fzm & 0x03) WritePixel(fpsm, &vm16[addr + 0], fs.extract32<0>());
|
||||
// if(fzm & 0x0c) WritePixel(fpsm, &vm16[addr + 2], fs.extract32<1>());
|
||||
// if(fzm & 0x30) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>());
|
||||
// if(fzm & 0xc0) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>());
|
||||
|
||||
test(mask, 0xc0);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 3, psm);
|
||||
L("@@");
|
||||
test(mask, 0x03);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 0, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x0c);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 1, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x30);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 2, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0xc0);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 3, psm);
|
||||
L("@@");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -250,31 +250,40 @@ L("exit");
|
|||
|
||||
void GSDrawScanlineCodeGenerator::Init()
|
||||
{
|
||||
// int skip = left & 3;
|
||||
if(!m_sel.notest)
|
||||
{
|
||||
// int skip = left & 3;
|
||||
|
||||
mov(ebx, edx);
|
||||
and(edx, 3);
|
||||
mov(ebx, edx);
|
||||
and(edx, 3);
|
||||
|
||||
// left -= skip;
|
||||
// int steps = pixels + skip - 4;
|
||||
|
||||
sub(ebx, edx);
|
||||
lea(ecx, ptr[ecx + edx - 4]);
|
||||
|
||||
// int steps = pixels + skip - 4;
|
||||
// left -= skip;
|
||||
|
||||
lea(ecx, ptr[ecx + edx - 4]);
|
||||
sub(ebx, edx);
|
||||
|
||||
// GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))];
|
||||
// GSVector4i test = m_test[skip] | m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
shl(edx, 4);
|
||||
shl(edx, 4);
|
||||
|
||||
movdqa(xmm7, ptr[edx + (size_t)&m_test[0]]);
|
||||
movdqa(xmm7, ptr[edx + (size_t)&m_test[0]]);
|
||||
|
||||
mov(eax, ecx);
|
||||
sar(eax, 31);
|
||||
and(eax, ecx);
|
||||
shl(eax, 4);
|
||||
mov(eax, ecx);
|
||||
sar(eax, 31);
|
||||
and(eax, ecx);
|
||||
shl(eax, 4);
|
||||
|
||||
por(xmm7, ptr[eax + (size_t)&m_test[7]]);
|
||||
por(xmm7, ptr[eax + (size_t)&m_test[7]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
mov(ebx, edx); // left
|
||||
xor(edx, edx); // skip
|
||||
lea(ecx, ptr[ecx - 4]); // steps
|
||||
}
|
||||
|
||||
// GSVector2i* fza_base = &m_local.gd->fzbr[top];
|
||||
|
||||
|
@ -579,14 +588,17 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
}
|
||||
}
|
||||
|
||||
// test = m_test[7 + (steps & (steps >> 31))];
|
||||
if(!m_sel.notest)
|
||||
{
|
||||
// test = m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
mov(edx, ecx);
|
||||
sar(edx, 31);
|
||||
and(edx, ecx);
|
||||
shl(edx, 4);
|
||||
mov(edx, ecx);
|
||||
sar(edx, 31);
|
||||
and(edx, ecx);
|
||||
shl(edx, 4);
|
||||
|
||||
movdqa(xmm7, ptr[edx + (size_t)&m_test[7]]);
|
||||
movdqa(xmm7, ptr[edx + (size_t)&m_test[7]]);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
||||
|
@ -2422,6 +2434,11 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
|
||||
void GSDrawScanlineCodeGenerator::WriteMask()
|
||||
{
|
||||
if(m_sel.notest)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// fm |= test;
|
||||
// zm |= test;
|
||||
|
||||
|
@ -2469,11 +2486,9 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
return;
|
||||
}
|
||||
|
||||
bool fast = m_sel.ztest && m_sel.zpsm < 2;
|
||||
|
||||
movdqa(xmm1, ptr[m_sel.prim != GS_SPRITE_CLASS ? &m_local.temp.zs : &m_local.p.z]);
|
||||
|
||||
if(fast)
|
||||
if(m_sel.ztest && m_sel.zpsm < 2)
|
||||
{
|
||||
// zs = zs.blend8(zd, zm);
|
||||
|
||||
|
@ -2482,6 +2497,8 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
blend8(xmm1, xmm7);
|
||||
}
|
||||
|
||||
bool fast = m_sel.ztest ? m_sel.zpsm < 2 : m_sel.zpsm == 0 && m_sel.notest;
|
||||
|
||||
WritePixel(xmm1, ebp, dh, fast, m_sel.zpsm, 1);
|
||||
}
|
||||
|
||||
|
@ -2811,7 +2828,7 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
|
|||
blend(xmm5, xmm2, xmm3); // TODO: could be skipped in certain cases, depending on fpsm and fm
|
||||
}
|
||||
|
||||
bool fast = m_sel.rfb && m_sel.fpsm < 2;
|
||||
bool fast = m_sel.rfb ? m_sel.fpsm < 2 : m_sel.fpsm == 0 && m_sel.notest;
|
||||
|
||||
WritePixel(xmm5, ebx, dl, fast, m_sel.fpsm, 0);
|
||||
}
|
||||
|
@ -2824,47 +2841,65 @@ void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr)
|
|||
|
||||
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz)
|
||||
{
|
||||
if(fast)
|
||||
if(m_sel.notest)
|
||||
{
|
||||
// if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs);
|
||||
// if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs);
|
||||
|
||||
test(mask, 0x0f);
|
||||
je("@f");
|
||||
movq(qword[addr * 2 + (size_t)m_local.gd->vm], src);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0xf0);
|
||||
je("@f");
|
||||
movhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src);
|
||||
L("@@");
|
||||
if(fast)
|
||||
{
|
||||
movq(qword[addr * 2 + (size_t)m_local.gd->vm], src);
|
||||
movhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src);
|
||||
}
|
||||
else
|
||||
{
|
||||
WritePixel(src, addr, 0, psm);
|
||||
WritePixel(src, addr, 1, psm);
|
||||
WritePixel(src, addr, 2, psm);
|
||||
WritePixel(src, addr, 3, psm);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// if(fzm & 0x03) WritePixel(fpsm, &vm16[addr + 0], fs.extract32<0>());
|
||||
// if(fzm & 0x0c) WritePixel(fpsm, &vm16[addr + 2], fs.extract32<1>());
|
||||
// if(fzm & 0x30) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>());
|
||||
// if(fzm & 0xc0) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>());
|
||||
if(fast)
|
||||
{
|
||||
// if(fzm & 0x0f) GSVector4i::storel(&vm16[addr + 0], fs);
|
||||
// if(fzm & 0xf0) GSVector4i::storeh(&vm16[addr + 8], fs);
|
||||
|
||||
test(mask, 0x03);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 0, psm);
|
||||
L("@@");
|
||||
test(mask, 0x0f);
|
||||
je("@f");
|
||||
movq(qword[addr * 2 + (size_t)m_local.gd->vm], src);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x0c);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 1, psm);
|
||||
L("@@");
|
||||
test(mask, 0xf0);
|
||||
je("@f");
|
||||
movhps(qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2], src);
|
||||
L("@@");
|
||||
}
|
||||
else
|
||||
{
|
||||
// if(fzm & 0x03) WritePixel(fpsm, &vm16[addr + 0], fs.extract32<0>());
|
||||
// if(fzm & 0x0c) WritePixel(fpsm, &vm16[addr + 2], fs.extract32<1>());
|
||||
// if(fzm & 0x30) WritePixel(fpsm, &vm16[addr + 8], fs.extract32<2>());
|
||||
// if(fzm & 0xc0) WritePixel(fpsm, &vm16[addr + 10], fs.extract32<3>());
|
||||
|
||||
test(mask, 0x30);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 2, psm);
|
||||
L("@@");
|
||||
test(mask, 0x03);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 0, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0xc0);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 3, psm);
|
||||
L("@@");
|
||||
test(mask, 0x0c);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 1, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x30);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 2, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0xc0);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 3, psm);
|
||||
L("@@");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -342,55 +342,55 @@ GSLocalMemory::GSLocalMemory()
|
|||
|
||||
m_psm[PSM_PSMCT24].rtx = &GSLocalMemory::ReadTexture24;
|
||||
m_psm[PSM_PSMCT16].rtx = &GSLocalMemory::ReadTexture16;
|
||||
m_psm[PSM_PSMCT16S].rtx = &GSLocalMemory::ReadTexture16S;
|
||||
m_psm[PSM_PSMCT16S].rtx = &GSLocalMemory::ReadTexture16;
|
||||
m_psm[PSM_PSMT8].rtx = &GSLocalMemory::ReadTexture8;
|
||||
m_psm[PSM_PSMT4].rtx = &GSLocalMemory::ReadTexture4;
|
||||
m_psm[PSM_PSMT8H].rtx = &GSLocalMemory::ReadTexture8H;
|
||||
m_psm[PSM_PSMT4HL].rtx = &GSLocalMemory::ReadTexture4HL;
|
||||
m_psm[PSM_PSMT4HH].rtx = &GSLocalMemory::ReadTexture4HH;
|
||||
m_psm[PSM_PSMZ32].rtx = &GSLocalMemory::ReadTexture32Z;
|
||||
m_psm[PSM_PSMZ24].rtx = &GSLocalMemory::ReadTexture24Z;
|
||||
m_psm[PSM_PSMZ16].rtx = &GSLocalMemory::ReadTexture16Z;
|
||||
m_psm[PSM_PSMZ16S].rtx = &GSLocalMemory::ReadTexture16SZ;
|
||||
m_psm[PSM_PSMZ32].rtx = &GSLocalMemory::ReadTexture32;
|
||||
m_psm[PSM_PSMZ24].rtx = &GSLocalMemory::ReadTexture24;
|
||||
m_psm[PSM_PSMZ16].rtx = &GSLocalMemory::ReadTexture16;
|
||||
m_psm[PSM_PSMZ16S].rtx = &GSLocalMemory::ReadTexture16;
|
||||
|
||||
m_psm[PSM_PSMCT24].rtxP = &GSLocalMemory::ReadTexture24;
|
||||
m_psm[PSM_PSMCT16].rtxP = &GSLocalMemory::ReadTexture16;
|
||||
m_psm[PSM_PSMCT16S].rtxP = &GSLocalMemory::ReadTexture16S;
|
||||
m_psm[PSM_PSMCT16S].rtxP = &GSLocalMemory::ReadTexture16;
|
||||
m_psm[PSM_PSMT8].rtxP = &GSLocalMemory::ReadTexture8P;
|
||||
m_psm[PSM_PSMT4].rtxP = &GSLocalMemory::ReadTexture4P;
|
||||
m_psm[PSM_PSMT8H].rtxP = &GSLocalMemory::ReadTexture8HP;
|
||||
m_psm[PSM_PSMT4HL].rtxP = &GSLocalMemory::ReadTexture4HLP;
|
||||
m_psm[PSM_PSMT4HH].rtxP = &GSLocalMemory::ReadTexture4HHP;
|
||||
m_psm[PSM_PSMZ32].rtxP = &GSLocalMemory::ReadTexture32Z;
|
||||
m_psm[PSM_PSMZ24].rtxP = &GSLocalMemory::ReadTexture24Z;
|
||||
m_psm[PSM_PSMZ16].rtxP = &GSLocalMemory::ReadTexture16Z;
|
||||
m_psm[PSM_PSMZ16S].rtxP = &GSLocalMemory::ReadTexture16SZ;
|
||||
m_psm[PSM_PSMZ32].rtxP = &GSLocalMemory::ReadTexture32;
|
||||
m_psm[PSM_PSMZ24].rtxP = &GSLocalMemory::ReadTexture24;
|
||||
m_psm[PSM_PSMZ16].rtxP = &GSLocalMemory::ReadTexture16;
|
||||
m_psm[PSM_PSMZ16S].rtxP = &GSLocalMemory::ReadTexture16;
|
||||
|
||||
m_psm[PSM_PSMCT24].rtxb = &GSLocalMemory::ReadTextureBlock24;
|
||||
m_psm[PSM_PSMCT16].rtxb = &GSLocalMemory::ReadTextureBlock16;
|
||||
m_psm[PSM_PSMCT16S].rtxb = &GSLocalMemory::ReadTextureBlock16S;
|
||||
m_psm[PSM_PSMCT16S].rtxb = &GSLocalMemory::ReadTextureBlock16;
|
||||
m_psm[PSM_PSMT8].rtxb = &GSLocalMemory::ReadTextureBlock8;
|
||||
m_psm[PSM_PSMT4].rtxb = &GSLocalMemory::ReadTextureBlock4;
|
||||
m_psm[PSM_PSMT8H].rtxb = &GSLocalMemory::ReadTextureBlock8H;
|
||||
m_psm[PSM_PSMT4HL].rtxb = &GSLocalMemory::ReadTextureBlock4HL;
|
||||
m_psm[PSM_PSMT4HH].rtxb = &GSLocalMemory::ReadTextureBlock4HH;
|
||||
m_psm[PSM_PSMZ32].rtxb = &GSLocalMemory::ReadTextureBlock32Z;
|
||||
m_psm[PSM_PSMZ24].rtxb = &GSLocalMemory::ReadTextureBlock24Z;
|
||||
m_psm[PSM_PSMZ16].rtxb = &GSLocalMemory::ReadTextureBlock16Z;
|
||||
m_psm[PSM_PSMZ16S].rtxb = &GSLocalMemory::ReadTextureBlock16SZ;
|
||||
m_psm[PSM_PSMZ32].rtxb = &GSLocalMemory::ReadTextureBlock32;
|
||||
m_psm[PSM_PSMZ24].rtxb = &GSLocalMemory::ReadTextureBlock24;
|
||||
m_psm[PSM_PSMZ16].rtxb = &GSLocalMemory::ReadTextureBlock16;
|
||||
m_psm[PSM_PSMZ16S].rtxb = &GSLocalMemory::ReadTextureBlock16;
|
||||
|
||||
m_psm[PSM_PSMCT24].rtxbP = &GSLocalMemory::ReadTextureBlock24;
|
||||
m_psm[PSM_PSMCT16].rtxbP = &GSLocalMemory::ReadTextureBlock16;
|
||||
m_psm[PSM_PSMCT16S].rtxbP = &GSLocalMemory::ReadTextureBlock16S;
|
||||
m_psm[PSM_PSMCT16S].rtxbP = &GSLocalMemory::ReadTextureBlock16;
|
||||
m_psm[PSM_PSMT8].rtxbP = &GSLocalMemory::ReadTextureBlock8P;
|
||||
m_psm[PSM_PSMT4].rtxbP = &GSLocalMemory::ReadTextureBlock4P;
|
||||
m_psm[PSM_PSMT8H].rtxbP = &GSLocalMemory::ReadTextureBlock8HP;
|
||||
m_psm[PSM_PSMT4HL].rtxbP = &GSLocalMemory::ReadTextureBlock4HLP;
|
||||
m_psm[PSM_PSMT4HH].rtxbP = &GSLocalMemory::ReadTextureBlock4HHP;
|
||||
m_psm[PSM_PSMZ32].rtxbP = &GSLocalMemory::ReadTextureBlock32Z;
|
||||
m_psm[PSM_PSMZ24].rtxbP = &GSLocalMemory::ReadTextureBlock24Z;
|
||||
m_psm[PSM_PSMZ16].rtxbP = &GSLocalMemory::ReadTextureBlock16Z;
|
||||
m_psm[PSM_PSMZ16S].rtxbP = &GSLocalMemory::ReadTextureBlock16SZ;
|
||||
m_psm[PSM_PSMZ32].rtxbP = &GSLocalMemory::ReadTextureBlock32;
|
||||
m_psm[PSM_PSMZ24].rtxbP = &GSLocalMemory::ReadTextureBlock24;
|
||||
m_psm[PSM_PSMZ16].rtxbP = &GSLocalMemory::ReadTextureBlock16;
|
||||
m_psm[PSM_PSMZ16S].rtxbP = &GSLocalMemory::ReadTextureBlock16;
|
||||
|
||||
m_psm[PSM_PSMCT16].bpp = m_psm[PSM_PSMCT16S].bpp = 16;
|
||||
m_psm[PSM_PSMT8].bpp = 8;
|
||||
|
@ -1606,28 +1606,22 @@ void GSLocalMemory::ReadTexture24(const GSOffset* RESTRICT o, const GSVector4i&
|
|||
|
||||
void GSLocalMemory::ReadTexture16(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
if(TEXA.AEM)
|
||||
{
|
||||
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock16<true>(src, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture16S(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
else
|
||||
{
|
||||
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock16<false>(src, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture8(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
|
@ -1685,61 +1679,6 @@ void GSLocalMemory::ReadTexture4HH(const GSOffset* RESTRICT o, const GSVector4i&
|
|||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture32Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
{
|
||||
ReadBlock32<true>(src, dst, dstpitch);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture24Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
if(TEXA.AEM)
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock24<true>(src, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
else
|
||||
{
|
||||
FOREACH_BLOCK_START(r, 8, 8, 32)
|
||||
{
|
||||
ReadAndExpandBlock24<false>(src, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture16Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
{
|
||||
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTexture16SZ(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
FOREACH_BLOCK_START(r, 16, 8, 32)
|
||||
{
|
||||
ReadBlock16<true>(src, (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
}
|
||||
FOREACH_BLOCK_END
|
||||
}
|
||||
|
||||
///////////////////
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
|
@ -1765,20 +1704,16 @@ void GSLocalMemory::ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, cons
|
|||
|
||||
void GSLocalMemory::ReadTextureBlock16(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock16S(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
if(TEXA.AEM)
|
||||
{
|
||||
ReadAndExpandBlock16<true>(BlockPtr(bp), dst, dstpitch, TEXA);
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadAndExpandBlock16<false>(BlockPtr(bp), dst, dstpitch, TEXA);
|
||||
}
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock8(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
|
@ -1816,45 +1751,6 @@ void GSLocalMemory::ReadTextureBlock4HH(uint32 bp, uint8* dst, int dstpitch, con
|
|||
ReadAndExpandBlock4HH_32(BlockPtr(bp), dst, dstpitch, m_clut);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock32Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ReadBlock32<true>(BlockPtr(bp), dst, dstpitch);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock24Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
ALIGN_STACK(32);
|
||||
|
||||
if(TEXA.AEM)
|
||||
{
|
||||
ReadAndExpandBlock24<true>(BlockPtr(bp), dst, dstpitch, TEXA);
|
||||
}
|
||||
else
|
||||
{
|
||||
ReadAndExpandBlock24<false>(BlockPtr(bp), dst, dstpitch, TEXA);
|
||||
}
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock16Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
}
|
||||
|
||||
void GSLocalMemory::ReadTextureBlock16SZ(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const
|
||||
{
|
||||
__aligned(uint16, 32) block[16 * 8];
|
||||
|
||||
ReadBlock16<true>(BlockPtr(bp), (uint8*)block, sizeof(block) / 8);
|
||||
|
||||
ExpandBlock16(block, dst, dstpitch, TEXA);
|
||||
}
|
||||
|
||||
///////////////////
|
||||
|
||||
void GSLocalMemory::ReadTexture(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA)
|
||||
|
|
|
@ -875,32 +875,22 @@ public:
|
|||
void ReadTexture32(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture24(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture16(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture16S(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture8(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture4(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture8H(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture4HL(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture4HH(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture32Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture24Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture16Z(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
void ReadTexture16SZ(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
|
||||
void ReadTexture(const GSOffset* RESTRICT o, const GSVector4i& r, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA);
|
||||
|
||||
void ReadTextureBlock32(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock24(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock16(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock16S(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock8(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock4(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock8H(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock4HL(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock4HH(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock32Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock24Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock16Z(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
void ReadTextureBlock16SZ(uint32 bp, uint8* dst, int dstpitch, const GIFRegTEXA& TEXA) const;
|
||||
|
||||
// pal ? 8 : 32
|
||||
|
||||
|
|
|
@ -30,6 +30,8 @@
|
|||
|
||||
#define THREAD_HEIGHT 4
|
||||
|
||||
int GSRasterizerData::s_counter = 0;
|
||||
|
||||
GSRasterizer::GSRasterizer(IDrawScanline* ds, int id, int threads, GSPerfMon* perfmon)
|
||||
: m_ds(ds)
|
||||
, m_id(id)
|
||||
|
@ -124,6 +126,8 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
|||
|
||||
if(data->vertex != NULL && data->vertex_count == 0 || data->index != NULL && data->index_count == 0) return;
|
||||
|
||||
data->start = __rdtsc();
|
||||
|
||||
m_ds->BeginDraw(data);
|
||||
|
||||
const GSVertexSW* vertex = data->vertex;
|
||||
|
@ -140,8 +144,6 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
|||
m_fscissor_x = GSVector4(data->scissor).xzxz();
|
||||
m_fscissor_y = GSVector4(data->scissor).ywyw();
|
||||
|
||||
uint64 start = __rdtsc();
|
||||
|
||||
switch(data->primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
|
@ -206,7 +208,9 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
|||
__assume(0);
|
||||
}
|
||||
|
||||
uint64 ticks = __rdtsc() - start;
|
||||
data->pixels = m_pixels;
|
||||
|
||||
uint64 ticks = __rdtsc() - data->start;
|
||||
|
||||
m_ds->EndDraw(data->frame, ticks, m_pixels);
|
||||
}
|
||||
|
|
|
@ -30,6 +30,8 @@
|
|||
|
||||
__aligned(class, 32) GSRasterizerData : public GSAlignedClass<32>
|
||||
{
|
||||
static int s_counter;
|
||||
|
||||
public:
|
||||
GSVector4i scissor;
|
||||
GSVector4i bbox;
|
||||
|
@ -40,6 +42,9 @@ public:
|
|||
uint32* index;
|
||||
int index_count;
|
||||
uint64 frame;
|
||||
uint64 start;
|
||||
int pixels;
|
||||
int counter;
|
||||
|
||||
GSRasterizerData()
|
||||
: scissor(GSVector4i::zero())
|
||||
|
@ -51,7 +56,10 @@ public:
|
|||
, index(NULL)
|
||||
, index_count(0)
|
||||
, frame(0)
|
||||
, start(0)
|
||||
, pixels(0)
|
||||
{
|
||||
counter = s_counter++;
|
||||
}
|
||||
|
||||
virtual ~GSRasterizerData()
|
||||
|
|
|
@ -357,6 +357,28 @@ void GSRendererSW::Draw()
|
|||
|
||||
if(!GetScanlineGlobalData(sd)) return;
|
||||
|
||||
if(0) if(LOG)
|
||||
{
|
||||
int n = GSUtil::GetVertexCount(PRIM->PRIM);
|
||||
|
||||
for(int i = 0, j = 0; i < m_index.tail; i += n, j++)
|
||||
{
|
||||
for(int k = 0; k < n; k++)
|
||||
{
|
||||
GSVertex* v = &m_vertex.buff[m_index.buff[i + k]];
|
||||
GSVertex* vn = &m_vertex.buff[m_index.buff[i + n - 1]];
|
||||
|
||||
fprintf(s_fp, "%d:%d %f %f %f %f\n",
|
||||
j, k,
|
||||
(float)(v->XYZ.X - context->XYOFFSET.OFX) / 16,
|
||||
(float)(v->XYZ.Y - context->XYOFFSET.OFY) / 16,
|
||||
PRIM->FST ? (float)(v->U) / 16 : v->ST.S / (PRIM->PRIM == GS_SPRITE ? vn->RGBAQ.Q : v->RGBAQ.Q),
|
||||
PRIM->FST ? (float)(v->V) / 16 : v->ST.T / (PRIM->PRIM == GS_SPRITE ? vn->RGBAQ.Q : v->RGBAQ.Q)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GSVector4i scissor = GSVector4i(context->scissor.in);
|
||||
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
|
||||
GSVector4i r = bbox.rintersect(scissor);
|
||||
|
@ -404,11 +426,17 @@ void GSRendererSW::Draw()
|
|||
|
||||
//
|
||||
|
||||
if(LOG) {fprintf(s_fp, "queue %05x %d (%d) %05x %d (%d) %05x %d %dx%d | %d %d %d\n",
|
||||
m_context->FRAME.Block(), m_context->FRAME.PSM, gd.sel.fwrite,
|
||||
m_context->ZBUF.Block(), m_context->ZBUF.PSM, gd.sel.zwrite,
|
||||
PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH,
|
||||
PRIM->PRIM, sd->vertex_count, sd->index_count); fflush(s_fp);}
|
||||
if(LOG)
|
||||
{
|
||||
fprintf(s_fp, "[%d] queue %05x %d (%d) %05x %d (%d) %05x %d %dx%d (%d %d %d) | %d %d %d\n",
|
||||
sd->counter,
|
||||
m_context->FRAME.Block(), m_context->FRAME.PSM, gd.sel.fwrite,
|
||||
m_context->ZBUF.Block(), m_context->ZBUF.PSM, gd.sel.zwrite,
|
||||
PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH, m_context->TEX0.CSM, m_context->TEX0.CPSM, m_context->TEX0.CSA,
|
||||
PRIM->PRIM, sd->vertex_count, sd->index_count);
|
||||
|
||||
fflush(s_fp);
|
||||
}
|
||||
|
||||
if(s_dump)
|
||||
{
|
||||
|
@ -581,7 +609,7 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
|
||||
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
|
||||
{
|
||||
if(LOG) {fprintf(s_fp, "r %05x %d %d, %d %d %d %d\n", BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM, r.x, r.y, r.z, r.w); fflush(s_fp);}
|
||||
if(LOG) {fprintf(s_fp, "%s %05x %d %d, %d %d %d %d\n", clut ? "rp" : "r", BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM, r.x, r.y, r.z, r.w); fflush(s_fp);}
|
||||
|
||||
if(!m_rl->IsSynced())
|
||||
{
|
||||
|
@ -814,8 +842,6 @@ bool GSRendererSW::CheckSourcePages(SharedData* sd)
|
|||
|
||||
if(m_fzb_pages[*p]) // currently being drawn to? => sync
|
||||
{
|
||||
if(LOG) fprintf(s_fp, "r=8 %05x\n", *p << 5);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -864,7 +890,10 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
|
||||
if(PRIM->TME)
|
||||
{
|
||||
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
|
||||
if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
|
||||
{
|
||||
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
|
||||
}
|
||||
}
|
||||
|
||||
if(context->TEST.ATE)
|
||||
|
@ -1305,6 +1334,23 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
gd.zm |= GSVector4i::xffff0000();
|
||||
}
|
||||
|
||||
if(gd.sel.prim == GS_SPRITE_CLASS && !gd.sel.ftest && !gd.sel.ztest && data->bbox.eq(data->bbox.rintersect(data->scissor)))
|
||||
{
|
||||
gd.sel.notest = 1;
|
||||
|
||||
uint32 ofx = context->XYOFFSET.OFX;
|
||||
|
||||
for(int i = 0, j = m_vertex.tail; i < j; i++)
|
||||
{
|
||||
if((((m_vertex.buff[i].XYZ.X - ofx) + 15) >> 4) & 3) // aligned to 4
|
||||
{
|
||||
gd.sel.notest = 0;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1329,6 +1375,14 @@ GSRendererSW::SharedData::~SharedData()
|
|||
|
||||
if(global.clut) _aligned_free(global.clut);
|
||||
if(global.dimx) _aligned_free(global.dimx);
|
||||
|
||||
if(LOG) {fprintf(s_fp, "[%d] done t=%lld p=%d | %d %d %d | %08x_%08x\n",
|
||||
counter,
|
||||
__rdtsc() - start, pixels,
|
||||
primclass, vertex_count, index_count,
|
||||
global.sel.hi, global.sel.lo
|
||||
);
|
||||
fflush(s_fp);}
|
||||
}
|
||||
|
||||
void GSRendererSW::SharedData::UsePages(const uint32* fb_pages, int fpsm, const uint32* zb_pages, int zpsm)
|
||||
|
@ -1421,7 +1475,7 @@ void GSRendererSW::SharedData::UpdateSource()
|
|||
|
||||
if(m_parent->s_save && m_parent->s_n >= m_parent->s_saven)
|
||||
{
|
||||
s = format("c:\\temp1\\_%05d_f%lld_tex%d_%05x_%d.bmp", m_parent->s_n, frame, i, (int)m_parent->m_context->TEX0.TBP0, (int)m_parent->m_context->TEX0.PSM);
|
||||
s = format("c:\\temp1\\_%05d_f%lld_tex%d_%05x_%d.bmp", m_parent->s_n - 2, frame, i, (int)m_parent->m_context->TEX0.TBP0, (int)m_parent->m_context->TEX0.PSM);
|
||||
|
||||
m_tex[i].t->Save(s);
|
||||
}
|
||||
|
|
|
@ -67,8 +67,9 @@ union GSScanlineSelector
|
|||
|
||||
uint32 edge:1; // 48
|
||||
uint32 tw:3; // 49 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
|
||||
uint32 lcm:1; // 50
|
||||
uint32 mmin:2; // 51
|
||||
uint32 lcm:1; // 52
|
||||
uint32 mmin:2; // 53
|
||||
uint32 notest:1; // 54 (no ztest, no atest, no date, no scissor test, and horizontally aligned to 4 pixels)
|
||||
};
|
||||
|
||||
struct
|
||||
|
|
|
@ -315,7 +315,7 @@ void GSSettingsDlg::UpdateControls()
|
|||
EnableWindow(GetDlgItem(m_hWnd, IDC_NATIVERES), hw);
|
||||
EnableWindow(GetDlgItem(m_hWnd, IDC_FILTER), hw);
|
||||
EnableWindow(GetDlgItem(m_hWnd, IDC_PALTEX), hw);
|
||||
EnableWindow(GetDlgItem(m_hWnd, IDC_LOGZ), dx9 && hw && GSDevice9::GetMaxDepth(m_lastValidMsaa) < 32);
|
||||
EnableWindow(GetDlgItem(m_hWnd, IDC_LOGZ), dx9 && hw);
|
||||
EnableWindow(GetDlgItem(m_hWnd, IDC_FBA), dx9 && hw);
|
||||
//EnableWindow(GetDlgItem(m_hWnd, IDC_AA1), sw); // Let uers set software params regardless of renderer used
|
||||
//EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS_EDIT), sw);
|
||||
|
|
|
@ -38,7 +38,7 @@ void GSSetupPrimCodeGenerator::Generate()
|
|||
{
|
||||
mov(edx, dword[esp + _dscan]);
|
||||
|
||||
for(int i = 0; i < 5; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 2 : 5); i++)
|
||||
{
|
||||
vmovaps(Xmm(3 + i), ptr[&m_shift[i]]);
|
||||
}
|
||||
|
@ -80,7 +80,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
vpshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vmovdqa(ptr[&m_local.d4.f], xmm2);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
|
||||
|
||||
|
@ -103,7 +103,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
vmulps(xmm1, xmm0, xmm3);
|
||||
vmovdqa(ptr[&m_local.d4.z], xmm1);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// m_local.d[i].z = dz * m_shift[i];
|
||||
|
||||
|
@ -139,36 +139,6 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
|
||||
vmovdqa(xmm0, ptr[ecx + offsetof(GSVertexSW, t)]);
|
||||
vpshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
/*
|
||||
// GSVector4 z = p.zzzz();
|
||||
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
if(m_sel.zoverflow)
|
||||
{
|
||||
// m_local.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||
|
||||
vbroadcastss(xmm1, ptr[&GSVector4::m_half]);
|
||||
vmulps(xmm1, xmm0);
|
||||
vcvttps2dq(xmm1, xmm1);
|
||||
vpslld(xmm1, 1);
|
||||
|
||||
vcvttps2dq(xmm0, xmm0);
|
||||
vpcmpeqd(xmm2, xmm2);
|
||||
vpsrld(xmm2, 31);
|
||||
vpand(xmm0, xmm2);
|
||||
|
||||
vpor(xmm0, xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_local.p.z = GSVector4i(z);
|
||||
|
||||
vcvttps2dq(xmm0, xmm0);
|
||||
}
|
||||
*/
|
||||
|
||||
vmovdqa(ptr[&m_local.p.z], xmm0);
|
||||
}
|
||||
}
|
||||
|
@ -210,7 +180,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
|
||||
vshufps(xmm1, xmm0, xmm0, (uint8)_MM_SHUFFLE(j, j, j, j));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// GSVector4 v = ds/dt * m_shift[i];
|
||||
|
||||
|
@ -272,7 +242,7 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
|
||||
|
||||
|
@ -302,7 +272,7 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
vshufps(xmm2, xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ void GSSetupPrimCodeGenerator::Generate()
|
|||
{
|
||||
mov(edx, dword[esp + _dscan]);
|
||||
|
||||
for(int i = 0; i < 5; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 2 : 5); i++)
|
||||
{
|
||||
movaps(Xmm(3 + i), ptr[&m_shift[i]]);
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
pshufhw(xmm2, xmm2, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
movdqa(ptr[&m_local.d4.f], xmm2);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// m_local.d[i].f = GSVector4i(df * m_shift[i]).xxzzlh();
|
||||
|
||||
|
@ -107,7 +107,7 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
mulps(xmm1, xmm3);
|
||||
movdqa(ptr[&m_local.d4.z], xmm1);
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// m_local.d[i].z = dz * m_shift[i];
|
||||
|
||||
|
@ -144,36 +144,6 @@ void GSSetupPrimCodeGenerator::Depth()
|
|||
|
||||
movdqa(xmm0, ptr[ecx + offsetof(GSVertexSW, t)]);
|
||||
pshufd(xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
/*
|
||||
// GSVector4 z = p.zzzz();
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
if(m_sel.zoverflow)
|
||||
{
|
||||
// m_local.p.z = (GSVector4i(z * 0.5f) << 1) | (GSVector4i(z) & GSVector4i::x00000001());
|
||||
|
||||
movaps(xmm1, ptr[&GSVector4::m_half]);
|
||||
mulps(xmm1, xmm0);
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
pslld(xmm1, 1);
|
||||
|
||||
cvttps2dq(xmm0, xmm0);
|
||||
pcmpeqd(xmm2, xmm2);
|
||||
psrld(xmm2, 31);
|
||||
pand(xmm0, xmm2);
|
||||
|
||||
por(xmm0, xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
// m_local.p.z = GSVector4i(z);
|
||||
|
||||
cvttps2dq(xmm0, xmm0);
|
||||
}
|
||||
*/
|
||||
|
||||
movdqa(ptr[&m_local.p.z], xmm0);
|
||||
}
|
||||
}
|
||||
|
@ -217,7 +187,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
movaps(xmm1, xmm0);
|
||||
shufps(xmm1, xmm1, (uint8)_MM_SHUFFLE(j, j, j, j));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// GSVector4 v = ds/dt * m_shift[i];
|
||||
|
||||
|
@ -282,7 +252,7 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// GSVector4i r = GSVector4i(dr * m_shift[i]).ps32();
|
||||
|
||||
|
@ -315,7 +285,7 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
|
||||
for(int i = 0; i < 4; i++)
|
||||
for(int i = 0; i < (m_sel.notest ? 1 : 4); i++)
|
||||
{
|
||||
// GSVector4i g = GSVector4i(dg * m_shift[i]).ps32();
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ GSState::GSState()
|
|||
, m_frameskip(0)
|
||||
, m_vt(this)
|
||||
, m_q(1.0f)
|
||||
, m_texflush(true)
|
||||
{
|
||||
m_nativeres = !!theApp.GetConfig("nativeres", 0);
|
||||
|
||||
|
@ -200,6 +201,8 @@ void GSState::Reset()
|
|||
m_vertex.tail = 0;
|
||||
m_vertex.next = 0;
|
||||
m_index.tail = 0;
|
||||
|
||||
m_texflush = true;
|
||||
}
|
||||
|
||||
void GSState::ResetHandlers()
|
||||
|
@ -992,7 +995,7 @@ void GSState::GIFRegHandlerFOGCOL(const GIFReg* RESTRICT r)
|
|||
|
||||
void GSState::GIFRegHandlerTEXFLUSH(const GIFReg* RESTRICT r)
|
||||
{
|
||||
// TRACE(_T("TEXFLUSH\n"));
|
||||
m_texflush = true;
|
||||
}
|
||||
|
||||
template<int i> void GSState::GIFRegHandlerSCISSOR(const GIFReg* RESTRICT r)
|
||||
|
|
|
@ -143,6 +143,7 @@ protected:
|
|||
float m_q;
|
||||
GSVector4 m_scissor;
|
||||
uint32 m_ofxy;
|
||||
bool m_texflush;
|
||||
|
||||
struct
|
||||
{
|
||||
|
|
|
@ -28,10 +28,13 @@ InitializeConditionVariablePtr pInitializeConditionVariable;
|
|||
WakeConditionVariablePtr pWakeConditionVariable;
|
||||
WakeAllConditionVariablePtr pWakeAllConditionVariable;
|
||||
SleepConditionVariableSRWPtr pSleepConditionVariableSRW;
|
||||
InitializeSRWLockPtr pInitializeSRWLock;;
|
||||
InitializeSRWLockPtr pInitializeSRWLock;
|
||||
AcquireSRWLockExclusivePtr pAcquireSRWLockExclusive;
|
||||
TryAcquireSRWLockExclusivePtr pTryAcquireSRWLockExclusive;
|
||||
ReleaseSRWLockExclusivePtr pReleaseSRWLockExclusive;
|
||||
AcquireSRWLockSharedPtr pAcquireSRWLockShared;
|
||||
TryAcquireSRWLockSharedPtr pTryAcquireSRWLockShared;
|
||||
ReleaseSRWLockSharedPtr pReleaseSRWLockShared;
|
||||
|
||||
class InitCondVar
|
||||
{
|
||||
|
@ -50,6 +53,9 @@ public:
|
|||
pAcquireSRWLockExclusive = (AcquireSRWLockExclusivePtr)GetProcAddress(m_kernel32, "AcquireSRWLockExclusive");
|
||||
pTryAcquireSRWLockExclusive = (TryAcquireSRWLockExclusivePtr)GetProcAddress(m_kernel32, "TryAcquireSRWLockExclusive");
|
||||
pReleaseSRWLockExclusive = (ReleaseSRWLockExclusivePtr)GetProcAddress(m_kernel32, "ReleaseSRWLockExclusive");
|
||||
pAcquireSRWLockShared = (AcquireSRWLockSharedPtr)GetProcAddress(m_kernel32, "AcquireSRWLockShared");
|
||||
pTryAcquireSRWLockShared = (TryAcquireSRWLockSharedPtr)GetProcAddress(m_kernel32, "TryAcquireSRWLockShared");
|
||||
pReleaseSRWLockShared = (ReleaseSRWLockSharedPtr)GetProcAddress(m_kernel32, "ReleaseSRWLockShared");
|
||||
}
|
||||
|
||||
virtual ~InitCondVar()
|
||||
|
|
|
@ -23,25 +23,54 @@
|
|||
|
||||
#include "GSdx.h"
|
||||
|
||||
class IGSThread
|
||||
{
|
||||
protected:
|
||||
virtual void ThreadProc() = 0;
|
||||
};
|
||||
|
||||
class IGSLock
|
||||
{
|
||||
public:
|
||||
virtual void Lock() = 0;
|
||||
virtual bool TryLock() = 0;
|
||||
virtual void Unlock() = 0;
|
||||
};
|
||||
|
||||
class IGSEvent
|
||||
{
|
||||
public:
|
||||
virtual void Set() = 0;
|
||||
virtual bool Wait(IGSLock* l) = 0;
|
||||
};
|
||||
|
||||
#ifdef _WINDOWS
|
||||
|
||||
typedef void (WINAPI * InitializeConditionVariablePtr)(CONDITION_VARIABLE* ConditionVariable);
|
||||
typedef void (WINAPI * WakeConditionVariablePtr)(CONDITION_VARIABLE* ConditionVariable);
|
||||
typedef void (WINAPI * WakeAllConditionVariablePtr)(CONDITION_VARIABLE* ConditionVariable);
|
||||
typedef void (WINAPI * SleepConditionVariableSRWPtr)(CONDITION_VARIABLE* ConditionVariable, SRWLOCK* SRWLock, DWORD dwMilliseconds, ULONG Flags);
|
||||
typedef BOOL (WINAPI * SleepConditionVariableSRWPtr)(CONDITION_VARIABLE* ConditionVariable, SRWLOCK* SRWLock, DWORD dwMilliseconds, ULONG Flags);
|
||||
typedef void (WINAPI * InitializeSRWLockPtr)(SRWLOCK* SRWLock);
|
||||
typedef void (WINAPI * AcquireSRWLockExclusivePtr)(SRWLOCK* SRWLock);
|
||||
typedef BOOLEAN (WINAPI * TryAcquireSRWLockExclusivePtr)(SRWLOCK* SRWLock);typedef void (WINAPI * ReleaseSRWLockExclusivePtr)(SRWLOCK* SRWLock);
|
||||
typedef BOOLEAN (WINAPI * TryAcquireSRWLockExclusivePtr)(SRWLOCK* SRWLock);
|
||||
typedef void (WINAPI * ReleaseSRWLockExclusivePtr)(SRWLOCK* SRWLock);
|
||||
typedef void (WINAPI * AcquireSRWLockSharedPtr)(SRWLOCK* SRWLock);
|
||||
typedef BOOLEAN (WINAPI * TryAcquireSRWLockSharedPtr)(SRWLOCK* SRWLock);
|
||||
typedef void (WINAPI * ReleaseSRWLockSharedPtr)(SRWLOCK* SRWLock);
|
||||
|
||||
extern InitializeConditionVariablePtr pInitializeConditionVariable;
|
||||
extern WakeConditionVariablePtr pWakeConditionVariable;
|
||||
extern WakeAllConditionVariablePtr pWakeAllConditionVariable;
|
||||
extern SleepConditionVariableSRWPtr pSleepConditionVariableSRW;
|
||||
extern InitializeSRWLockPtr pInitializeSRWLock;;
|
||||
extern InitializeSRWLockPtr pInitializeSRWLock;
|
||||
extern AcquireSRWLockExclusivePtr pAcquireSRWLockExclusive;
|
||||
extern TryAcquireSRWLockExclusivePtr pTryAcquireSRWLockExclusive;extern ReleaseSRWLockExclusivePtr pReleaseSRWLockExclusive;
|
||||
extern TryAcquireSRWLockExclusivePtr pTryAcquireSRWLockExclusive;
|
||||
extern ReleaseSRWLockExclusivePtr pReleaseSRWLockExclusive;
|
||||
extern AcquireSRWLockSharedPtr pAcquireSRWLockShared;
|
||||
extern TryAcquireSRWLockSharedPtr pTryAcquireSRWLockShared;
|
||||
extern ReleaseSRWLockSharedPtr pReleaseSRWLockShared;
|
||||
|
||||
class GSThread
|
||||
class GSThread : public IGSThread
|
||||
{
|
||||
DWORD m_ThreadId;
|
||||
HANDLE m_hThread;
|
||||
|
@ -49,8 +78,6 @@ class GSThread
|
|||
static DWORD WINAPI StaticThreadProc(void* lpParam);
|
||||
|
||||
protected:
|
||||
virtual void ThreadProc() = 0;
|
||||
|
||||
void CreateThread();
|
||||
void CloseThread();
|
||||
|
||||
|
@ -59,7 +86,7 @@ public:
|
|||
virtual ~GSThread();
|
||||
};
|
||||
|
||||
class GSCritSec
|
||||
class GSCritSec : public IGSLock
|
||||
{
|
||||
CRITICAL_SECTION m_cs;
|
||||
|
||||
|
@ -67,26 +94,25 @@ public:
|
|||
GSCritSec() {InitializeCriticalSection(&m_cs);}
|
||||
~GSCritSec() {DeleteCriticalSection(&m_cs);}
|
||||
|
||||
void Lock() {EnterCriticalSection(&m_cs);}
|
||||
bool TryLock() {return TryEnterCriticalSection(&m_cs) == TRUE;}
|
||||
void Unlock() {LeaveCriticalSection(&m_cs);}
|
||||
void Lock() {EnterCriticalSection(&m_cs);}
|
||||
bool TryLock() {return TryEnterCriticalSection(&m_cs) == TRUE;}
|
||||
void Unlock() {LeaveCriticalSection(&m_cs);}
|
||||
};
|
||||
|
||||
class GSEvent
|
||||
class GSEvent : public IGSEvent
|
||||
{
|
||||
protected:
|
||||
HANDLE m_hEvent;
|
||||
|
||||
public:
|
||||
GSEvent(bool manual = false, bool initial = false) {m_hEvent = CreateEvent(NULL, manual, initial, NULL);}
|
||||
GSEvent() {m_hEvent = CreateEvent(NULL, FALSE, FALSE, NULL);}
|
||||
~GSEvent() {CloseHandle(m_hEvent);}
|
||||
|
||||
void Set() {SetEvent(m_hEvent);}
|
||||
void Reset() {ResetEvent(m_hEvent);}
|
||||
bool Wait() {return WaitForSingleObject(m_hEvent, INFINITE) == WAIT_OBJECT_0;}
|
||||
bool Wait(IGSLock* l) {if(l) l->Unlock(); bool b = WaitForSingleObject(m_hEvent, INFINITE) == WAIT_OBJECT_0; if(l) l->Lock(); return b;}
|
||||
};
|
||||
|
||||
class GSCondVarLock
|
||||
class GSCondVarLock : public IGSLock
|
||||
{
|
||||
SRWLOCK m_lock;
|
||||
|
||||
|
@ -94,12 +120,13 @@ public:
|
|||
GSCondVarLock() {pInitializeSRWLock(&m_lock);}
|
||||
|
||||
void Lock() {pAcquireSRWLockExclusive(&m_lock);}
|
||||
bool TryLock() {return pTryAcquireSRWLockExclusive(&m_lock) == TRUE;} void Unlock() {pReleaseSRWLockExclusive(&m_lock);}
|
||||
|
||||
bool TryLock() {return pTryAcquireSRWLockExclusive(&m_lock) == TRUE;}
|
||||
void Unlock() {pReleaseSRWLockExclusive(&m_lock);}
|
||||
|
||||
operator SRWLOCK* () {return &m_lock;}
|
||||
};
|
||||
|
||||
class GSCondVar
|
||||
class GSCondVar : public IGSEvent
|
||||
{
|
||||
CONDITION_VARIABLE m_cv;
|
||||
|
||||
|
@ -107,7 +134,7 @@ public:
|
|||
GSCondVar() {pInitializeConditionVariable(&m_cv);}
|
||||
|
||||
void Set() {pWakeConditionVariable(&m_cv);}
|
||||
void Wait(GSCondVarLock& lock) {pSleepConditionVariableSRW(&m_cv, lock, INFINITE, 0);}
|
||||
bool Wait(IGSLock* l) {return pSleepConditionVariableSRW(&m_cv, *(GSCondVarLock*)l, INFINITE, 0) != 0;}
|
||||
|
||||
operator CONDITION_VARIABLE* () {return &m_cv;}
|
||||
};
|
||||
|
@ -117,7 +144,7 @@ public:
|
|||
#include <pthread.h>
|
||||
#include <semaphore.h>
|
||||
|
||||
class GSThread
|
||||
class GSThread : public IGSThread
|
||||
{
|
||||
pthread_attr_t m_thread_attr;
|
||||
pthread_t m_thread;
|
||||
|
@ -125,8 +152,6 @@ class GSThread
|
|||
static void* StaticThreadProc(void* param);
|
||||
|
||||
protected:
|
||||
virtual void ThreadProc() = 0;
|
||||
|
||||
void CreateThread();
|
||||
void CloseThread();
|
||||
|
||||
|
@ -135,16 +160,16 @@ public:
|
|||
virtual ~GSThread();
|
||||
};
|
||||
|
||||
class GSCritSec
|
||||
class GSCritSec : public IGSLock
|
||||
{
|
||||
pthread_mutexattr_t m_mutex_attr;
|
||||
pthread_mutex_t m_mutex;
|
||||
|
||||
public:
|
||||
GSCritSec()
|
||||
GSCritSec(bool recursive = true)
|
||||
{
|
||||
pthread_mutexattr_init(&m_mutex_attr);
|
||||
pthread_mutexattr_settype(&m_mutex_attr, PTHREAD_MUTEX_RECURSIVE);
|
||||
pthread_mutexattr_settype(&m_mutex_attr, recursive ? PTHREAD_MUTEX_RECURSIVE : PTHREAD_MUTEX_NORMAL);
|
||||
pthread_mutex_init(&m_mutex, &m_mutex_attr);
|
||||
}
|
||||
|
||||
|
@ -159,7 +184,7 @@ public:
|
|||
void Unlock() {pthread_mutex_unlock(&m_mutex);}
|
||||
};
|
||||
|
||||
class GSEvent
|
||||
class GSEvent : public IGSEvent
|
||||
{
|
||||
protected:
|
||||
sem_t m_sem;
|
||||
|
@ -169,36 +194,18 @@ public:
|
|||
~GSEvent() {sem_destroy(&m_sem);}
|
||||
|
||||
void Set() {sem_post(&m_sem);}
|
||||
bool Wait() {return sem_wait(&m_sem) == 0;}
|
||||
bool Wait(IGSLock* l) {if(l) l->Unlock(); bool b = sem_wait(&m_sem) == 0; if(l) l->Lock(); return b;}
|
||||
};
|
||||
|
||||
// Note except the mutex attribute the code is same as GSCritSec object
|
||||
class GSCondVarLock
|
||||
class GSCondVarLock : public GSCritSec
|
||||
{
|
||||
pthread_mutexattr_t m_mutex_attr;
|
||||
pthread_mutex_t m_mutex;
|
||||
|
||||
public:
|
||||
GSCondVarLock()
|
||||
GSCondVarLock() : GSCritSec(false)
|
||||
{
|
||||
pthread_mutexattr_init(&m_mutex_attr);
|
||||
pthread_mutexattr_settype(&m_mutex_attr, PTHREAD_MUTEX_NORMAL);
|
||||
pthread_mutex_init(&m_mutex, &m_mutex_attr);
|
||||
}
|
||||
virtual ~GSCondVarLock()
|
||||
{
|
||||
pthread_mutex_destroy(&m_mutex);
|
||||
pthread_mutexattr_destroy(&m_mutex_attr);
|
||||
}
|
||||
|
||||
void Lock() {pthread_mutex_lock(&m_mutex);}
|
||||
bool TryLock() {return pthread_mutex_trylock(&m_mutex) == 0;}
|
||||
void Unlock() {pthread_mutex_unlock(&m_mutex);}
|
||||
|
||||
operator pthread_mutex_t* () {return &m_mutex;}
|
||||
};
|
||||
|
||||
class GSCondVar
|
||||
class GSCondVar : public IGSEvent
|
||||
{
|
||||
pthread_cond_t m_cv;
|
||||
pthread_condattr_t m_cv_attr;
|
||||
|
@ -209,6 +216,7 @@ public:
|
|||
pthread_condattr_init(&m_cv_attr);
|
||||
pthread_cond_init(&m_cv, &m_cv_attr);
|
||||
}
|
||||
|
||||
virtual ~GSCondVar()
|
||||
{
|
||||
pthread_condattr_destroy(&m_cv_attr);
|
||||
|
@ -216,7 +224,7 @@ public:
|
|||
}
|
||||
|
||||
void Set() {pthread_cond_signal(&m_cv);}
|
||||
void Wait(GSCondVarLock& lock) {pthread_cond_wait(&m_cv, lock);}
|
||||
bool Wait(IGSLock* l) {pthread_cond_wait(&m_cv, *(GSCondVarLock*)l) == 0;}
|
||||
|
||||
operator pthread_cond_t* () {return &m_cv;}
|
||||
};
|
||||
|
@ -225,32 +233,11 @@ public:
|
|||
|
||||
class GSAutoLock
|
||||
{
|
||||
protected:
|
||||
GSCritSec* m_cs;
|
||||
IGSLock* m_lock;
|
||||
|
||||
public:
|
||||
GSAutoLock(GSCritSec* cs) {m_cs = cs; m_cs->Lock();}
|
||||
~GSAutoLock() {m_cs->Unlock();}
|
||||
};
|
||||
|
||||
class GSEventSpin
|
||||
{
|
||||
protected:
|
||||
volatile long m_sync;
|
||||
volatile bool m_manual;
|
||||
|
||||
public:
|
||||
GSEventSpin(bool manual = false, bool initial = false) {m_sync = initial ? 1 : 0; m_manual = manual;}
|
||||
~GSEventSpin() {}
|
||||
|
||||
void Set() {_interlockedbittestandset(&m_sync, 0);}
|
||||
void Reset() {_interlockedbittestandreset(&m_sync, 0);}
|
||||
bool Wait()
|
||||
{
|
||||
if(m_manual) while(!m_sync) _mm_pause();
|
||||
else while(!_interlockedbittestandreset(&m_sync, 0)) _mm_pause();
|
||||
return true;
|
||||
}
|
||||
GSAutoLock(IGSLock* l) {(m_lock = l)->Lock();}
|
||||
~GSAutoLock() {m_lock->Unlock();}
|
||||
};
|
||||
|
||||
template<class T> class GSJobQueue : private GSThread
|
||||
|
@ -259,70 +246,36 @@ protected:
|
|||
queue<T> m_queue;
|
||||
volatile long m_count; // NOTE: it is the safest to have our own counter because m_queue.pop() might decrement its own before the last item runs out of its scope and gets destroyed (implementation dependent)
|
||||
volatile bool m_exit;
|
||||
struct {GSCritSec lock; GSEvent notempty;} m_ev;
|
||||
struct {GSCondVar notempty, empty; GSCondVarLock lock; bool available;} m_cv;
|
||||
IGSEvent* m_notempty;
|
||||
IGSEvent* m_empty;
|
||||
IGSLock* m_lock;
|
||||
|
||||
void ThreadProc()
|
||||
{
|
||||
if(m_cv.available)
|
||||
m_lock->Lock();
|
||||
|
||||
while(true)
|
||||
{
|
||||
m_cv.lock.Lock();
|
||||
|
||||
while(true)
|
||||
while(m_queue.empty())
|
||||
{
|
||||
while(m_queue.empty())
|
||||
{
|
||||
m_cv.notempty.Wait(m_cv.lock);
|
||||
m_notempty->Wait(m_lock);
|
||||
|
||||
if(m_exit) {m_cv.lock.Unlock(); return;}
|
||||
}
|
||||
|
||||
T& item = m_queue.front();
|
||||
|
||||
m_cv.lock.Unlock();
|
||||
|
||||
Process(item);
|
||||
|
||||
m_cv.lock.Lock();
|
||||
|
||||
m_queue.pop();
|
||||
|
||||
m_count--;
|
||||
|
||||
if(m_queue.empty())
|
||||
{
|
||||
m_cv.empty.Set();
|
||||
}
|
||||
if(m_exit) {m_lock->Unlock(); return;}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
m_ev.lock.Lock();
|
||||
|
||||
while(true)
|
||||
T& item = m_queue.front();
|
||||
|
||||
m_lock->Unlock();
|
||||
|
||||
Process(item);
|
||||
|
||||
m_lock->Lock();
|
||||
|
||||
m_queue.pop();
|
||||
|
||||
if(--m_count == 0)
|
||||
{
|
||||
while(m_queue.empty())
|
||||
{
|
||||
m_ev.lock.Unlock();
|
||||
|
||||
m_ev.notempty.Wait();
|
||||
|
||||
if(m_exit) {return;}
|
||||
|
||||
m_ev.lock.Lock();
|
||||
}
|
||||
|
||||
T& item = m_queue.front();
|
||||
|
||||
m_ev.lock.Unlock();
|
||||
|
||||
Process(item);
|
||||
|
||||
m_ev.lock.Lock();
|
||||
|
||||
m_queue.pop();
|
||||
|
||||
m_count--;
|
||||
m_empty->Set();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -332,17 +285,30 @@ public:
|
|||
: m_count(0)
|
||||
, m_exit(false)
|
||||
{
|
||||
m_cv.available = !!theApp.GetConfig("condvar", 1);
|
||||
bool condvar = !!theApp.GetConfig("condvar", 1);
|
||||
|
||||
#ifdef _WINDOWS
|
||||
|
||||
if(pInitializeConditionVariable == NULL)
|
||||
{
|
||||
m_cv.available = false;
|
||||
condvar = false;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
if(condvar)
|
||||
{
|
||||
m_notempty = new GSCondVar();
|
||||
m_empty = new GSCondVar();
|
||||
m_lock = new GSCondVarLock();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_notempty = new GSEvent();
|
||||
m_empty = new GSEvent();
|
||||
m_lock = new GSCritSec();
|
||||
}
|
||||
|
||||
CreateThread();
|
||||
}
|
||||
|
||||
|
@ -350,14 +316,13 @@ public:
|
|||
{
|
||||
m_exit = true;
|
||||
|
||||
if(m_cv.available)
|
||||
{
|
||||
m_cv.notempty.Set();
|
||||
}
|
||||
else
|
||||
{
|
||||
m_ev.notempty.Set();
|
||||
}
|
||||
m_notempty->Set();
|
||||
|
||||
CloseThread();
|
||||
|
||||
delete m_notempty;
|
||||
delete m_empty;
|
||||
delete m_lock;
|
||||
}
|
||||
|
||||
bool IsEmpty() const
|
||||
|
@ -369,51 +334,32 @@ public:
|
|||
|
||||
void Push(const T& item)
|
||||
{
|
||||
if(m_cv.available)
|
||||
{
|
||||
m_cv.lock.Lock();
|
||||
m_lock->Lock();
|
||||
|
||||
m_queue.push(item);
|
||||
m_queue.push(item);
|
||||
|
||||
m_count++;
|
||||
|
||||
m_cv.lock.Unlock();
|
||||
|
||||
m_cv.notempty.Set();
|
||||
}
|
||||
else
|
||||
if(m_count++ == 0)
|
||||
{
|
||||
GSAutoLock l(&m_ev.lock);
|
||||
|
||||
m_queue.push(item);
|
||||
|
||||
m_count++;
|
||||
|
||||
m_ev.notempty.Set();
|
||||
m_notempty->Set();
|
||||
}
|
||||
|
||||
m_lock->Unlock();
|
||||
}
|
||||
|
||||
void Wait()
|
||||
{
|
||||
if(m_cv.available)
|
||||
if(m_count > 0)
|
||||
{
|
||||
if(m_count > 0)
|
||||
m_lock->Lock();
|
||||
|
||||
while(m_count != 0)
|
||||
{
|
||||
m_cv.lock.Lock();
|
||||
|
||||
while(!m_queue.empty())
|
||||
{
|
||||
m_cv.empty.Wait(m_cv.lock);
|
||||
}
|
||||
|
||||
ASSERT(m_count == 0);
|
||||
|
||||
m_cv.lock.Unlock();
|
||||
m_empty->Wait(m_lock);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while(m_count > 0) _mm_pause();
|
||||
|
||||
ASSERT(m_queue.empty());
|
||||
|
||||
m_lock->Unlock();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue