mirror of https://github.com/PCSX2/pcsx2.git
GSdx: mipmapping fix (ford mustang racing, and probably other games which use small, non-square textures)
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4529 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
15f7b2b6d8
commit
732b038571
|
@ -249,10 +249,15 @@ public:
|
||||||
GSVector4 ys(0.257f, 0.504f, 0.098f, 0.0f);
|
GSVector4 ys(0.257f, 0.504f, 0.098f, 0.0f);
|
||||||
GSVector4 us(-0.148f / 2, -0.291f / 2, 0.439f / 2, 0.0f);
|
GSVector4 us(-0.148f / 2, -0.291f / 2, 0.439f / 2, 0.0f);
|
||||||
GSVector4 vs(0.439f / 2, -0.368f / 2, -0.071f / 2, 0.0f);
|
GSVector4 vs(0.439f / 2, -0.368f / 2, -0.071f / 2, 0.0f);
|
||||||
const GSVector4 offset(16, 128, 16, 128);
|
|
||||||
|
|
||||||
if(!rgba)
|
if(!rgba)
|
||||||
ys = ys.zyxw(), us = us.zyxw(), vs = vs.zyxw();
|
{
|
||||||
|
ys = ys.zyxw();
|
||||||
|
us = us.zyxw();
|
||||||
|
vs = vs.zyxw();
|
||||||
|
}
|
||||||
|
|
||||||
|
const GSVector4 offset(16, 128, 16, 128);
|
||||||
|
|
||||||
for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
|
for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
|
||||||
{
|
{
|
||||||
|
@ -261,8 +266,8 @@ public:
|
||||||
|
|
||||||
for(int i = 0; i < w; i += 2)
|
for(int i = 0; i < w; i += 2)
|
||||||
{
|
{
|
||||||
GSVector4 c0 = GSVector4(s[i + 0]);
|
GSVector4 c0 = GSVector4::rgba32(s[i + 0]);
|
||||||
GSVector4 c1 = GSVector4(s[i + 1]);
|
GSVector4 c1 = GSVector4::rgba32(s[i + 1]);
|
||||||
GSVector4 c2 = c0 + c1;
|
GSVector4 c2 = c0 + c1;
|
||||||
|
|
||||||
GSVector4 lo = (c0 * ys).hadd(c2 * us);
|
GSVector4 lo = (c0 * ys).hadd(c2 * us);
|
||||||
|
|
|
@ -109,7 +109,7 @@ bool GSClut::WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
||||||
case 4: if(m_CBP[0] == TEX0.CBP) return false; m_CBP[0] = TEX0.CBP; break;
|
case 4: if(m_CBP[0] == TEX0.CBP) return false; m_CBP[0] = TEX0.CBP; break;
|
||||||
case 5: if(m_CBP[1] == TEX0.CBP) return false; m_CBP[1] = TEX0.CBP; break;
|
case 5: if(m_CBP[1] == TEX0.CBP) return false; m_CBP[1] = TEX0.CBP; break;
|
||||||
case 6: ASSERT(0); return false; // ffx2 menu
|
case 6: ASSERT(0); return false; // ffx2 menu
|
||||||
case 7: ASSERT(0); return false;
|
case 7: ASSERT(0); return false; // ford mustang racing
|
||||||
default: __assume(0);
|
default: __assume(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -350,7 +350,7 @@ void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c)
|
||||||
|
|
||||||
void GSDevice11::ClearRenderTarget(GSTexture* t, uint32 c)
|
void GSDevice11::ClearRenderTarget(GSTexture* t, uint32 c)
|
||||||
{
|
{
|
||||||
GSVector4 color = GSVector4(c) * (1.0f / 255);
|
GSVector4 color = GSVector4::rgba32(c) * (1.0f / 255);
|
||||||
|
|
||||||
m_ctx->ClearRenderTargetView(*(GSTexture11*)t, color.v);
|
m_ctx->ClearRenderTargetView(*(GSTexture11*)t, color.v);
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
#if _M_SSE >= 0x500 && (defined(_M_AMD64) || defined(_WIN64))
|
#if _M_SSE >= 0x500 && (defined(_M_AMD64) || defined(_WIN64))
|
||||||
|
|
||||||
#error TODO: this is still bogus somewhere
|
#error TODO
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator::Generate()
|
void GSDrawScanlineCodeGenerator::Generate()
|
||||||
{
|
{
|
||||||
|
@ -38,17 +38,13 @@ void GSDrawScanlineCodeGenerator::Generate()
|
||||||
push(r12);
|
push(r12);
|
||||||
push(r13);
|
push(r13);
|
||||||
|
|
||||||
enter(10 * 16, true);
|
sub(rsp, 8 + 10 * 16);
|
||||||
|
|
||||||
for(int i = 6; i < 16; i++)
|
for(int i = 6; i < 16; i++)
|
||||||
{
|
{
|
||||||
vmovdqa(ptr[rsp + (i - 6) * 16], Xmm(i));
|
vmovdqa(ptr[rsp + (i - 6) * 16], Xmm(i));
|
||||||
}
|
}
|
||||||
|
|
||||||
movsxd(rcx, ecx); // right
|
|
||||||
movsxd(rdx, edx); // left
|
|
||||||
movsxd(r8, r8d); // top
|
|
||||||
|
|
||||||
mov(r10, (size_t)&m_test[0]);
|
mov(r10, (size_t)&m_test[0]);
|
||||||
mov(r11, (size_t)&m_local);
|
mov(r11, (size_t)&m_local);
|
||||||
mov(r12, (size_t)m_local.gd);
|
mov(r12, (size_t)m_local.gd);
|
||||||
|
@ -84,7 +80,14 @@ L("loop");
|
||||||
|
|
||||||
// ebp = za
|
// ebp = za
|
||||||
|
|
||||||
|
if(m_sel.mmin)
|
||||||
|
{
|
||||||
|
SampleTextureLOD();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
SampleTexture();
|
SampleTexture();
|
||||||
|
}
|
||||||
|
|
||||||
// ebp = za
|
// ebp = za
|
||||||
// xmm2 = rb
|
// xmm2 = rb
|
||||||
|
@ -201,7 +204,7 @@ L("exit");
|
||||||
vmovdqa(Xmm(i), ptr[rsp + (i - 6) * 16]);
|
vmovdqa(Xmm(i), ptr[rsp + (i - 6) * 16]);
|
||||||
}
|
}
|
||||||
|
|
||||||
leave();
|
add(rsp, 8 + 10 * 16);
|
||||||
|
|
||||||
pop(r13);
|
pop(r13);
|
||||||
pop(r12);
|
pop(r12);
|
||||||
|
@ -237,10 +240,9 @@ void GSDrawScanlineCodeGenerator::Init()
|
||||||
mov(rax, rcx);
|
mov(rax, rcx);
|
||||||
sar(rax, 63);
|
sar(rax, 63);
|
||||||
and(rax, rcx);
|
and(rax, rcx);
|
||||||
add(rax, 7);
|
|
||||||
shl(rax, 4);
|
shl(rax, 4);
|
||||||
|
|
||||||
vpor(xmm15, ptr[rax + r10]);
|
vpor(xmm15, ptr[rax + r10 + 7 * 16]);
|
||||||
|
|
||||||
// GSVector2i* fza_base = &m_local.gd->fzbr[top];
|
// GSVector2i* fza_base = &m_local.gd->fzbr[top];
|
||||||
|
|
||||||
|
@ -256,8 +258,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
||||||
{
|
{
|
||||||
// edx = &m_local.d[skip]
|
// edx = &m_local.d[skip]
|
||||||
|
|
||||||
shl(rdx, 3);
|
lea(rdx, ptr[rdx * 8 + r11 + offsetof(GSScanlineLocalData, d)]);
|
||||||
lea(rdx, ptr[rdx + r11 + offsetof(GSScanlineLocalData, d)]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!m_sel.sprite)
|
if(!m_sel.sprite)
|
||||||
|
@ -325,7 +326,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
||||||
|
|
||||||
vpaddd(xmm10, ptr[rdx + offsetof(GSScanlineLocalData::skip, s)]);
|
vpaddd(xmm10, ptr[rdx + offsetof(GSScanlineLocalData::skip, s)]);
|
||||||
|
|
||||||
if(!m_sel.sprite)
|
if(!m_sel.sprite || m_sel.mmin)
|
||||||
{
|
{
|
||||||
vpaddd(xmm11, ptr[rdx + offsetof(GSScanlineLocalData::skip, t)]);
|
vpaddd(xmm11, ptr[rdx + offsetof(GSScanlineLocalData::skip, t)]);
|
||||||
}
|
}
|
||||||
|
@ -338,12 +339,6 @@ void GSDrawScanlineCodeGenerator::Init()
|
||||||
vpsrlw(xmm6, 1);
|
vpsrlw(xmm6, 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if(m_sel.mipmap && !m_sel.lcm)
|
|
||||||
{
|
|
||||||
vshufps(xmm12, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
|
||||||
vaddps(xmm12, ptr[rdx + offsetof(GSScanlineLocalData::skip, q)]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -441,17 +436,11 @@ void GSDrawScanlineCodeGenerator::Step()
|
||||||
vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||||
vpaddd(xmm10, xmm1);
|
vpaddd(xmm10, xmm1);
|
||||||
|
|
||||||
if(!m_sel.sprite)
|
if(!m_sel.sprite || m_sel.mmin)
|
||||||
{
|
{
|
||||||
vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||||
vpaddd(xmm11, xmm1);
|
vpaddd(xmm11, xmm1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(m_sel.mipmap && !m_sel.lcm)
|
|
||||||
{
|
|
||||||
vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
|
||||||
vaddps(xmm12, xmm3);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -510,10 +499,9 @@ void GSDrawScanlineCodeGenerator::Step()
|
||||||
mov(rdx, rcx);
|
mov(rdx, rcx);
|
||||||
sar(rdx, 63);
|
sar(rdx, 63);
|
||||||
and(rdx, rcx);
|
and(rdx, rcx);
|
||||||
add(rdx, 7);
|
|
||||||
shl(rdx, 4);
|
shl(rdx, 4);
|
||||||
|
|
||||||
vmovdqa(xmm15, ptr[rdx + r10]);
|
vmovdqa(xmm15, ptr[rdx + r10 + 7 * 16]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
||||||
|
@ -589,11 +577,9 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
||||||
vpslld(xmm2, 31);
|
vpslld(xmm2, 31);
|
||||||
|
|
||||||
// GSVector4i zso = zs - o;
|
// GSVector4i zso = zs - o;
|
||||||
|
|
||||||
vpsubd(xmm0, xmm2);
|
|
||||||
|
|
||||||
// GSVector4i zdo = zd - o;
|
// GSVector4i zdo = zd - o;
|
||||||
|
|
||||||
|
vpsubd(xmm0, xmm2);
|
||||||
vpsubd(xmm1, xmm2);
|
vpsubd(xmm1, xmm2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -629,11 +615,6 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
||||||
|
|
||||||
// ebx = tex
|
// ebx = tex
|
||||||
|
|
||||||
if(m_sel.mipmap && !m_sel.lcm)
|
|
||||||
{
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if(!m_sel.fst)
|
if(!m_sel.fst)
|
||||||
{
|
{
|
||||||
vrcpps(xmm0, xmm12);
|
vrcpps(xmm0, xmm12);
|
||||||
|
@ -766,10 +747,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
||||||
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
|
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||||
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
|
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||||
|
|
||||||
ReadTexel(xmm0, xmm0, xmm4, xmm5);
|
ReadTexel(4, 0);
|
||||||
ReadTexel(xmm1, xmm1, xmm4, xmm5);
|
|
||||||
ReadTexel(xmm2, xmm2, xmm4, xmm5);
|
|
||||||
ReadTexel(xmm3, xmm3, xmm4, xmm5);
|
|
||||||
|
|
||||||
// xmm0 = c00
|
// xmm0 = c00
|
||||||
// xmm1 = c01
|
// xmm1 = c01
|
||||||
|
@ -863,7 +841,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
||||||
|
|
||||||
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
|
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||||
|
|
||||||
ReadTexel(xmm2, xmm3, xmm0, xmm1);
|
ReadTexel(1, 0);
|
||||||
|
|
||||||
// GSVector4i mask = GSVector4i::x00ff();
|
// GSVector4i mask = GSVector4i::x00ff();
|
||||||
|
|
||||||
|
@ -1032,6 +1010,18 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv0, const Xmm& uv1)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator::AlphaTFX()
|
void GSDrawScanlineCodeGenerator::AlphaTFX()
|
||||||
{
|
{
|
||||||
if(!m_sel.fb)
|
if(!m_sel.fb)
|
||||||
|
@ -1046,6 +1036,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
||||||
// gat = gat.modulate16<1>(ga).clamp8();
|
// gat = gat.modulate16<1>(ga).clamp8();
|
||||||
|
|
||||||
modulate16(xmm3, xmm14, 1);
|
modulate16(xmm3, xmm14, 1);
|
||||||
|
|
||||||
clamp16(xmm3, xmm0);
|
clamp16(xmm3, xmm0);
|
||||||
|
|
||||||
// if(!tcc) gat = gat.mix16(ga.srl16(7));
|
// if(!tcc) gat = gat.mix16(ga.srl16(7));
|
||||||
|
@ -1053,6 +1044,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
||||||
if(!m_sel.tcc)
|
if(!m_sel.tcc)
|
||||||
{
|
{
|
||||||
vpsrlw(xmm1, xmm14, 7);
|
vpsrlw(xmm1, xmm14, 7);
|
||||||
|
|
||||||
mix16(xmm3, xmm1, xmm0);
|
mix16(xmm3, xmm1, xmm0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1065,6 +1057,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
||||||
if(!m_sel.tcc)
|
if(!m_sel.tcc)
|
||||||
{
|
{
|
||||||
vpsrlw(xmm1, xmm14, 7);
|
vpsrlw(xmm1, xmm14, 7);
|
||||||
|
|
||||||
mix16(xmm3, xmm1, xmm0);
|
mix16(xmm3, xmm1, xmm0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1075,7 +1068,12 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
||||||
// gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7)));
|
// gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7)));
|
||||||
|
|
||||||
vpsrlw(xmm1, xmm14, 7);
|
vpsrlw(xmm1, xmm14, 7);
|
||||||
if(m_sel.tcc) vpaddusb(xmm1, xmm3);
|
|
||||||
|
if(m_sel.tcc)
|
||||||
|
{
|
||||||
|
vpaddusb(xmm1, xmm3);
|
||||||
|
}
|
||||||
|
|
||||||
mix16(xmm3, xmm1, xmm0);
|
mix16(xmm3, xmm1, xmm0);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -1087,6 +1085,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
||||||
if(!m_sel.tcc)
|
if(!m_sel.tcc)
|
||||||
{
|
{
|
||||||
vpsrlw(xmm1, xmm14, 7);
|
vpsrlw(xmm1, xmm14, 7);
|
||||||
|
|
||||||
mix16(xmm3, xmm1, xmm0);
|
mix16(xmm3, xmm1, xmm0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1103,6 +1102,8 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: aa1
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator::ReadMask()
|
void GSDrawScanlineCodeGenerator::ReadMask()
|
||||||
|
@ -1218,6 +1219,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
|
||||||
// rbt = rbt.modulate16<1>(rb).clamp8();
|
// rbt = rbt.modulate16<1>(rb).clamp8();
|
||||||
|
|
||||||
modulate16(xmm2, xmm13, 1);
|
modulate16(xmm2, xmm13, 1);
|
||||||
|
|
||||||
clamp16(xmm2, xmm0);
|
clamp16(xmm2, xmm0);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -1229,22 +1231,28 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
|
||||||
case TFX_HIGHLIGHT:
|
case TFX_HIGHLIGHT:
|
||||||
case TFX_HIGHLIGHT2:
|
case TFX_HIGHLIGHT2:
|
||||||
|
|
||||||
|
// gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat);
|
||||||
|
|
||||||
|
vmovdqa(xmm1, xmm3);
|
||||||
|
|
||||||
|
modulate16(xmm3, xmm14, 1);
|
||||||
|
|
||||||
vpshuflw(xmm6, xmm14, _MM_SHUFFLE(3, 3, 1, 1));
|
vpshuflw(xmm6, xmm14, _MM_SHUFFLE(3, 3, 1, 1));
|
||||||
vpshufhw(xmm6, xmm6, _MM_SHUFFLE(3, 3, 1, 1));
|
vpshufhw(xmm6, xmm6, _MM_SHUFFLE(3, 3, 1, 1));
|
||||||
vpsrlw(xmm6, 7);
|
vpsrlw(xmm6, 7);
|
||||||
|
|
||||||
// gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat);
|
|
||||||
|
|
||||||
vmovdqa(xmm1, xmm3);
|
|
||||||
modulate16(xmm3, xmm14, 1);
|
|
||||||
vpaddw(xmm3, xmm6);
|
vpaddw(xmm3, xmm6);
|
||||||
|
|
||||||
clamp16(xmm3, xmm0);
|
clamp16(xmm3, xmm0);
|
||||||
|
|
||||||
mix16(xmm3, xmm1, xmm0);
|
mix16(xmm3, xmm1, xmm0);
|
||||||
|
|
||||||
// rbt = rbt.modulate16<1>(rb).add16(af).clamp8();
|
// rbt = rbt.modulate16<1>(rb).add16(af).clamp8();
|
||||||
|
|
||||||
modulate16(xmm2, xmm13, 1);
|
modulate16(xmm2, xmm13, 1);
|
||||||
|
|
||||||
vpaddw(xmm2, xmm6);
|
vpaddw(xmm2, xmm6);
|
||||||
|
|
||||||
clamp16(xmm2, xmm0);
|
clamp16(xmm2, xmm0);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
@ -1797,25 +1805,22 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg64& addr,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, const Xmm& temp1, const Xmm& temp2)
|
void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
||||||
{
|
{
|
||||||
ReadTexel(dst, addr, 0);
|
// TODO
|
||||||
ReadTexel(dst, addr, 1);
|
|
||||||
ReadTexel(dst, addr, 2);
|
|
||||||
ReadTexel(dst, addr, 3);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i)
|
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i)
|
||||||
{
|
{
|
||||||
const Address& src = m_sel.tlu ? ptr[r12 + rax * 4 + offsetof(GSScanlineGlobalData, clut)] : ptr[rbx + rax * 4];
|
const Address& src = m_sel.tlu ? ptr[r12 + rax * 4 + offsetof(GSScanlineGlobalData, clut)] : ptr[rbx + rax * 4];
|
||||||
|
|
||||||
vpextrd(eax, addr, i);
|
if(i == 0) vmovd(eax, addr);
|
||||||
|
else vpextrd(eax, addr, i);
|
||||||
movsxd(rax, eax);
|
|
||||||
|
|
||||||
if(m_sel.tlu) movzx(rax, byte[rbx + rax]);
|
if(m_sel.tlu) movzx(rax, byte[rbx + rax]);
|
||||||
|
|
||||||
vpinsrd(dst, src, i);
|
if(i == 0) vmovd(dst, src);
|
||||||
|
else vpinsrd(dst, src, i);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -19,9 +19,6 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// TODO: x64 (use the extra regs to avoid spills of zs, zd, uf, vf, rb, ga and keep a few constants in the last two like aref or afix)
|
|
||||||
// TODO: for edges doing 4 pixels is wasteful (needed memory access * 4)
|
|
||||||
|
|
||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
#include "GSDrawScanlineCodeGenerator.h"
|
#include "GSDrawScanlineCodeGenerator.h"
|
||||||
#include "GSVertexSW.h"
|
#include "GSVertexSW.h"
|
||||||
|
|
|
@ -21,37 +21,3 @@
|
||||||
|
|
||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
#include "GSFunctionMap.h"
|
#include "GSFunctionMap.h"
|
||||||
|
|
||||||
void GSCodeGenerator::enter(uint32 size, bool align)
|
|
||||||
{
|
|
||||||
#ifdef _M_AMD64
|
|
||||||
|
|
||||||
push(r15);
|
|
||||||
mov(r15, rsp);
|
|
||||||
if(size > 0) sub(rsp, size);
|
|
||||||
if(align) and(rsp, 0xfffffffffffffff0);
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
push(ebp);
|
|
||||||
mov(ebp, esp);
|
|
||||||
if(size > 0) sub(esp, size);
|
|
||||||
if(align) and(esp, 0xfffffff0);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void GSCodeGenerator::leave()
|
|
||||||
{
|
|
||||||
#ifdef _M_AMD64
|
|
||||||
|
|
||||||
mov(rsp, r15);
|
|
||||||
pop(r15);
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
mov(esp, ebp);
|
|
||||||
pop(ebp);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
|
@ -161,9 +161,6 @@ class GSCodeGenerator : public Xbyak::CodeGenerator
|
||||||
protected:
|
protected:
|
||||||
Xbyak::util::Cpu m_cpu;
|
Xbyak::util::Cpu m_cpu;
|
||||||
|
|
||||||
void enter(uint32 size, bool align);
|
|
||||||
void leave();
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSCodeGenerator(void* code, size_t maxsize)
|
GSCodeGenerator(void* code, size_t maxsize)
|
||||||
: Xbyak::CodeGenerator(maxsize, code)
|
: Xbyak::CodeGenerator(maxsize, code)
|
||||||
|
|
|
@ -263,7 +263,7 @@ public:
|
||||||
{
|
{
|
||||||
ps_sel.fog = 1;
|
ps_sel.fog = 1;
|
||||||
|
|
||||||
ps_cb.FogColor_AREF = GSVector4(env.FOGCOL.u32[0]) / 255;
|
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(context->TEST.ATE)
|
if(context->TEST.ATE)
|
||||||
|
|
|
@ -384,7 +384,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
||||||
|
|
||||||
if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL)
|
if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL)
|
||||||
{
|
{
|
||||||
k = (int)m_vt.m_lod.x << 16; // set lod to max
|
k = (int)m_vt.m_lod.x << 16; // set lod to max level
|
||||||
|
|
||||||
gd.sel.lcm = 1; // lod is constant
|
gd.sel.lcm = 1; // lod is constant
|
||||||
gd.sel.mmin = 1; // tri-linear is meaningless
|
gd.sel.mmin = 1; // tri-linear is meaningless
|
||||||
|
@ -432,7 +432,11 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
||||||
|
|
||||||
static int s_counter = 0;
|
static int s_counter = 0;
|
||||||
|
|
||||||
//t->Save(format("c:/temp1/%08d_%05x_0.bmp", s_counter, context->TEX0.TBP0));
|
if(0)
|
||||||
|
//if(context->TEX0.TH > context->TEX0.TW)
|
||||||
|
//if(s_n >= s_saven && s_n < s_saven + 3)
|
||||||
|
//if(context->TEX0.TBP0 >= 0x2b80 && context->TEX0.TBW == 2 && context->TEX0.PSM == PSM_PSMT4)
|
||||||
|
t->Save(format("c:/temp1/%08d_%05x_0.bmp", s_counter, context->TEX0.TBP0));
|
||||||
|
|
||||||
for(int i = 1, j = std::min<int>((int)context->TEX1.MXL, 6); i <= j; i++)
|
for(int i = 1, j = std::min<int>((int)context->TEX1.MXL, 6); i <= j; i++)
|
||||||
{
|
{
|
||||||
|
@ -487,7 +491,28 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
||||||
|
|
||||||
gd.tex[i] = t->m_buff;
|
gd.tex[i] = t->m_buff;
|
||||||
|
|
||||||
// t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, context->TEX0.TBP0, i));
|
if(0)
|
||||||
|
//if(context->TEX0.TH > context->TEX0.TW)
|
||||||
|
//if(s_n >= s_saven && s_n < s_saven + 3)
|
||||||
|
//if(context->TEX0.TBP0 >= 0x2b80 && context->TEX0.TBW == 2 && context->TEX0.PSM == PSM_PSMT4)
|
||||||
|
{
|
||||||
|
t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, context->TEX0.TBP0, i));
|
||||||
|
/*
|
||||||
|
GIFRegTEX0 TEX0 = MIP_TEX0;
|
||||||
|
TEX0.TBP0 = context->TEX0.TBP0;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
TEX0.TBP0++;
|
||||||
|
const GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA, r, gd.sel.tw + 3);
|
||||||
|
if(t == NULL) {ASSERT(0); return false;}
|
||||||
|
t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, TEX0.TBP0, i));
|
||||||
|
}
|
||||||
|
while(TEX0.TBP0 < 0x3fff);
|
||||||
|
*/
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
s_counter++;
|
s_counter++;
|
||||||
|
@ -701,38 +726,31 @@ void GSRendererSW::VertexKick(bool skip)
|
||||||
{
|
{
|
||||||
const GSDrawingContext* context = m_context;
|
const GSDrawingContext* context = m_context;
|
||||||
|
|
||||||
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]);
|
GSVertexSW& dst = m_vl.AddTail();
|
||||||
|
|
||||||
xy = xy.insert16<3>(m_v.FOG.F);
|
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - context->XYOFFSET;
|
||||||
xy = xy.upl16();
|
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F);
|
||||||
xy -= context->XYOFFSET;
|
|
||||||
|
|
||||||
GSVertexSW v;
|
dst.p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
|
||||||
|
|
||||||
v.p = GSVector4(xy) * g_pos_scale;
|
|
||||||
|
|
||||||
v.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.u32[0]).u8to32() << 7);
|
|
||||||
|
|
||||||
if(tme)
|
if(tme)
|
||||||
{
|
{
|
||||||
|
GSVector4 t;
|
||||||
|
|
||||||
if(fst)
|
if(fst)
|
||||||
{
|
{
|
||||||
v.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
|
t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
v.t = GSVector4(m_v.ST.S, m_v.ST.T);
|
t = GSVector4(m_v.ST.S, m_v.ST.T) * GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
|
||||||
v.t *= GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
|
t = t.xyxy(GSVector4::load(m_v.RGBAQ.Q));
|
||||||
}
|
}
|
||||||
|
|
||||||
v.t = v.t.xyxy(GSVector4::load(m_v.RGBAQ.Q));
|
dst.t = t;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSVertexSW& dst = m_vl.AddTail();
|
dst.c = GSVector4::rgba32(m_v.RGBAQ.u32[0], 7);
|
||||||
|
|
||||||
dst = v;
|
|
||||||
|
|
||||||
dst.p.z = (float)min(m_v.XYZ.Z, 0xffffff00); // max value which can survive the uint32 => float => uint32 conversion
|
|
||||||
|
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ using namespace Xbyak;
|
||||||
|
|
||||||
void GSSetupPrimCodeGenerator::Generate()
|
void GSSetupPrimCodeGenerator::Generate()
|
||||||
{
|
{
|
||||||
enter(32, true);
|
sub(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
vmovdqa(ptr[rsp + 0], xmm6);
|
vmovdqa(ptr[rsp + 0], xmm6);
|
||||||
vmovdqa(ptr[rsp + 16], xmm7);
|
vmovdqa(ptr[rsp + 16], xmm7);
|
||||||
|
@ -55,7 +55,7 @@ void GSSetupPrimCodeGenerator::Generate()
|
||||||
vmovdqa(xmm6, ptr[rsp + 0]);
|
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||||
|
|
||||||
leave();
|
add(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
|
@ -186,16 +186,11 @@ void GSSetupPrimCodeGenerator::Texture()
|
||||||
|
|
||||||
if(m_sel.fst)
|
if(m_sel.fst)
|
||||||
{
|
{
|
||||||
// m_local.d4.st = GSVector4i(t * 4.0f);
|
// m_local.d4.stq = GSVector4i(t * 4.0f);
|
||||||
|
|
||||||
if(m_sel.mipmap && !m_sel.lcm)
|
|
||||||
{
|
|
||||||
vmovhps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq.z)], xmm1);
|
|
||||||
}
|
|
||||||
|
|
||||||
vcvttps2dq(xmm1, xmm1);
|
vcvttps2dq(xmm1, xmm1);
|
||||||
|
|
||||||
vmovq(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -204,7 +199,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
||||||
vmovaps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
vmovaps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int j = 0, k = m_sel.fst && !(m_sel.mipmap && !m_sel.lcm) ? 2 : 3; j < k; j++)
|
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||||
{
|
{
|
||||||
// GSVector4 ds = t.xxxx();
|
// GSVector4 ds = t.xxxx();
|
||||||
// GSVector4 dt = t.yyyy();
|
// GSVector4 dt = t.yyyy();
|
||||||
|
@ -218,7 +213,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
||||||
|
|
||||||
vmulps(xmm2, xmm1, Xmm(4 + i));
|
vmulps(xmm2, xmm1, Xmm(4 + i));
|
||||||
|
|
||||||
if(m_sel.fst && !(m_sel.mipmap && !m_sel.lcm))
|
if(m_sel.fst)
|
||||||
{
|
{
|
||||||
// m_local.d[i].s/t = GSVector4i(v);
|
// m_local.d[i].s/t = GSVector4i(v);
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ using namespace Xbyak;
|
||||||
|
|
||||||
void GSSetupPrimCodeGenerator::Generate()
|
void GSSetupPrimCodeGenerator::Generate()
|
||||||
{
|
{
|
||||||
enter(32, true);
|
sub(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
vmovdqa(ptr[rsp + 0], xmm6);
|
vmovdqa(ptr[rsp + 0], xmm6);
|
||||||
vmovdqa(ptr[rsp + 16], xmm7);
|
vmovdqa(ptr[rsp + 16], xmm7);
|
||||||
|
@ -53,7 +53,7 @@ void GSSetupPrimCodeGenerator::Generate()
|
||||||
vmovdqa(xmm6, ptr[rsp + 0]);
|
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||||
|
|
||||||
leave();
|
add(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
|
@ -191,16 +191,11 @@ void GSSetupPrimCodeGenerator::Texture()
|
||||||
|
|
||||||
if(m_sel.fst)
|
if(m_sel.fst)
|
||||||
{
|
{
|
||||||
// m_local.d4.st = GSVector4i(t * 4.0f);
|
// m_local.d4.stq = GSVector4i(t * 4.0f);
|
||||||
|
|
||||||
if(m_sel.mipmap && !m_sel.lcm)
|
|
||||||
{
|
|
||||||
movhps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq.z)], xmm1);
|
|
||||||
}
|
|
||||||
|
|
||||||
cvttps2dq(xmm1, xmm1);
|
cvttps2dq(xmm1, xmm1);
|
||||||
|
|
||||||
movq(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
movdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -209,7 +204,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
||||||
movaps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
movaps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
||||||
}
|
}
|
||||||
|
|
||||||
for(int j = 0, k = m_sel.fst && !(m_sel.mipmap && !m_sel.lcm) ? 2 : 3; j < k; j++)
|
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||||
{
|
{
|
||||||
// GSVector4 ds = t.xxxx();
|
// GSVector4 ds = t.xxxx();
|
||||||
// GSVector4 dt = t.yyyy();
|
// GSVector4 dt = t.yyyy();
|
||||||
|
@ -225,7 +220,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
||||||
movaps(xmm2, xmm1);
|
movaps(xmm2, xmm1);
|
||||||
mulps(xmm2, Xmm(4 + i));
|
mulps(xmm2, Xmm(4 + i));
|
||||||
|
|
||||||
if(m_sel.fst && !(m_sel.mipmap && !m_sel.lcm))
|
if(m_sel.fst)
|
||||||
{
|
{
|
||||||
// m_local.d[i].s/t = GSVector4i(v);
|
// m_local.d[i].s/t = GSVector4i(v);
|
||||||
|
|
||||||
|
|
|
@ -322,8 +322,7 @@ void GSSetupPrimCodeGenerator::Color()
|
||||||
{
|
{
|
||||||
// GSVector4i c = GSVector4i(vertices[0].c);
|
// GSVector4i c = GSVector4i(vertices[0].c);
|
||||||
|
|
||||||
movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);
|
cvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);
|
||||||
cvttps2dq(xmm0, xmm0);
|
|
||||||
|
|
||||||
// c = c.upl16(c.zwxy());
|
// c = c.upl16(c.zwxy());
|
||||||
|
|
||||||
|
|
|
@ -590,41 +590,41 @@ template<int i> void GSState::GIFRegHandlerTEX0(const GIFReg* r)
|
||||||
|
|
||||||
if(m_env.CTXT[i].TEX1.MTBA)
|
if(m_env.CTXT[i].TEX1.MTBA)
|
||||||
{
|
{
|
||||||
|
// NOTE 1: TEX1.MXL must not be automatically set to 3 here.
|
||||||
|
// NOTE 2: Mipmap levels are tightly packed, if (tbw << 6) > (1 << tw) then the left-over space to the right is used. (common for PSM_PSMT4)
|
||||||
|
// NOTE 3: Non-rectangular textures are treated as rectangular when calculating the occupied space (height is extended, not sure about width)
|
||||||
|
|
||||||
|
uint32 bp = TEX0.TBP0;
|
||||||
|
uint32 bw = TEX0.TBW;
|
||||||
|
uint32 w = 1u << TEX0.TW;
|
||||||
|
uint32 h = 1u << TEX0.TH;
|
||||||
uint32 bpp = GSLocalMemory::m_psm[TEX0.PSM].bpp;
|
uint32 bpp = GSLocalMemory::m_psm[TEX0.PSM].bpp;
|
||||||
|
|
||||||
uint32 tbp = TEX0.TBP0;
|
if(h < w) h = w;
|
||||||
uint32 tbw = TEX0.TBW;
|
|
||||||
uint32 th = TEX0.TH;
|
|
||||||
|
|
||||||
if(th >= 3)
|
bp += ((w * h * bpp >> 3) + 255) >> 8;
|
||||||
{
|
bw = std::max<uint32>(bw >> 1, 1);
|
||||||
tbp += (((tbw << 6) * (1 << th) * bpp >> 3) + 255) >> 8;
|
w = std::max<uint32>(w >> 1, 1);
|
||||||
tbw = std::max<uint32>(tbw >> 1, 1);
|
h = std::max<uint32>(h >> 1, 1);
|
||||||
th--;
|
|
||||||
|
|
||||||
m_env.CTXT[i].MIPTBP1.TBP1 = tbp;
|
m_env.CTXT[i].MIPTBP1.TBP1 = bp;
|
||||||
m_env.CTXT[i].MIPTBP1.TBW1 = tbw;
|
m_env.CTXT[i].MIPTBP1.TBW1 = bw;
|
||||||
|
|
||||||
tbp += (((tbw << 6) * (1 << th) * bpp >> 3) + 255) >> 8;
|
bp += ((w * h * bpp >> 3) + 255) >> 8;
|
||||||
tbw = std::max<uint32>(tbw >> 1, 1);
|
bw = std::max<uint32>(bw >> 1, 1);
|
||||||
th--;
|
w = std::max<uint32>(w >> 1, 1);
|
||||||
|
h = std::max<uint32>(h >> 1, 1);
|
||||||
|
|
||||||
m_env.CTXT[i].MIPTBP1.TBP2 = tbp;
|
m_env.CTXT[i].MIPTBP1.TBP2 = bp;
|
||||||
m_env.CTXT[i].MIPTBP1.TBW2 = tbw;
|
m_env.CTXT[i].MIPTBP1.TBW2 = bw;
|
||||||
|
|
||||||
tbp += (((tbw << 6) * (1 << th) * bpp >> 3) + 255) >> 8;
|
bp += ((w * h * bpp >> 3) + 255) >> 8;
|
||||||
tbw = std::max<uint32>(tbw >> 1, 1);
|
bw = std::max<uint32>(bw >> 1, 1);
|
||||||
th--;
|
w = std::max<uint32>(w >> 1, 1);
|
||||||
|
h = std::max<uint32>(h >> 1, 1);
|
||||||
|
|
||||||
m_env.CTXT[i].MIPTBP1.TBP3 = tbp;
|
m_env.CTXT[i].MIPTBP1.TBP3 = bp;
|
||||||
m_env.CTXT[i].MIPTBP1.TBW3 = tbw;
|
m_env.CTXT[i].MIPTBP1.TBW3 = bw;
|
||||||
|
|
||||||
// NOTE: TEX1.MXL must not be automatically set to 3 here
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ASSERT(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// printf("MTBA\n");
|
// printf("MTBA\n");
|
||||||
}
|
}
|
||||||
|
|
|
@ -29,6 +29,7 @@ const GSVector4 GSVector4::m_one(1.0f);
|
||||||
const GSVector4 GSVector4::m_two(2.0f);
|
const GSVector4 GSVector4::m_two(2.0f);
|
||||||
const GSVector4 GSVector4::m_four(4.0f);
|
const GSVector4 GSVector4::m_four(4.0f);
|
||||||
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
|
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
|
||||||
|
const GSVector4 GSVector4::m_x4f800000(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000)));
|
||||||
|
|
||||||
GSVector4i GSVector4i::fit(int arx, int ary) const
|
GSVector4i GSVector4i::fit(int arx, int ary) const
|
||||||
{
|
{
|
||||||
|
|
|
@ -2333,6 +2333,7 @@ public:
|
||||||
static const GSVector4 m_two;
|
static const GSVector4 m_two;
|
||||||
static const GSVector4 m_four;
|
static const GSVector4 m_four;
|
||||||
static const GSVector4 m_x4b000000;
|
static const GSVector4 m_x4b000000;
|
||||||
|
static const GSVector4 m_x4f800000;
|
||||||
|
|
||||||
__forceinline GSVector4()
|
__forceinline GSVector4()
|
||||||
{
|
{
|
||||||
|
@ -2385,9 +2386,18 @@ public:
|
||||||
this->m = m;
|
this->m = m;
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline explicit GSVector4(uint32 u32)
|
__forceinline explicit GSVector4(int i)
|
||||||
{
|
{
|
||||||
*this = GSVector4(GSVector4i::load((int)u32).u8to32());
|
GSVector4i v((int)i);
|
||||||
|
|
||||||
|
*this = GSVector4(v);
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline explicit GSVector4(uint32 u)
|
||||||
|
{
|
||||||
|
GSVector4i v((int)u);
|
||||||
|
|
||||||
|
*this = GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31)));
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline explicit GSVector4(const GSVector4i& v);
|
__forceinline explicit GSVector4(const GSVector4i& v);
|
||||||
|
@ -2407,11 +2417,6 @@ public:
|
||||||
this->m = m;
|
this->m = m;
|
||||||
}
|
}
|
||||||
|
|
||||||
__forceinline void operator = (uint32 u32)
|
|
||||||
{
|
|
||||||
*this = GSVector4(GSVector4i::load((int)u32).u8to32());
|
|
||||||
}
|
|
||||||
|
|
||||||
__forceinline operator __m128() const
|
__forceinline operator __m128() const
|
||||||
{
|
{
|
||||||
return m;
|
return m;
|
||||||
|
@ -2422,6 +2427,16 @@ public:
|
||||||
return GSVector4i(*this).rgba32();
|
return GSVector4i(*this).rgba32();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__forceinline static GSVector4 rgba32(uint32 rgba)
|
||||||
|
{
|
||||||
|
return GSVector4(GSVector4i::load((int)rgba).u8to32());
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline static GSVector4 rgba32(uint32 rgba, int shift)
|
||||||
|
{
|
||||||
|
return GSVector4(GSVector4i::load((int)rgba).u8to32() << shift);
|
||||||
|
}
|
||||||
|
|
||||||
__forceinline static GSVector4 cast(const GSVector4i& v);
|
__forceinline static GSVector4 cast(const GSVector4i& v);
|
||||||
|
|
||||||
__forceinline GSVector4 abs() const
|
__forceinline GSVector4 abs() const
|
||||||
|
@ -2840,6 +2855,13 @@ public:
|
||||||
return GSVector4(_mm_load_ss(&f));
|
return GSVector4(_mm_load_ss(&f));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__forceinline static GSVector4 load(uint32 u)
|
||||||
|
{
|
||||||
|
GSVector4i v = GSVector4i::load((int)u);
|
||||||
|
|
||||||
|
return GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31)));
|
||||||
|
}
|
||||||
|
|
||||||
template<bool aligned> __forceinline static GSVector4 load(const void* p)
|
template<bool aligned> __forceinline static GSVector4 load(const void* p)
|
||||||
{
|
{
|
||||||
return GSVector4(aligned ? _mm_load_ps((const float*)p) : _mm_loadu_ps((const float*)p));
|
return GSVector4(aligned ? _mm_load_ps((const float*)p) : _mm_loadu_ps((const float*)p));
|
||||||
|
|
|
@ -51,7 +51,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
enter(32, true);
|
sub(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
vmovdqa(ptr[rsp + 0], xmm6);
|
vmovdqa(ptr[rsp + 0], xmm6);
|
||||||
vmovdqa(ptr[rsp + 16], xmm7);
|
vmovdqa(ptr[rsp + 16], xmm7);
|
||||||
|
@ -168,7 +168,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
||||||
vmovdqa(xmm6, ptr[rsp + 0]);
|
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||||
|
|
||||||
leave();
|
add(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
|
@ -200,7 +200,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
enter(32, true);
|
sub(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
vmovdqa(ptr[rsp + 0], xmm6);
|
vmovdqa(ptr[rsp + 0], xmm6);
|
||||||
vmovdqa(ptr[rsp + 16], xmm7);
|
vmovdqa(ptr[rsp + 16], xmm7);
|
||||||
|
@ -334,7 +334,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
||||||
vmovdqa(xmm6, ptr[rsp + 0]);
|
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||||
|
|
||||||
leave();
|
add(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
|
@ -364,7 +364,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
enter(32, true);
|
sub(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
vmovdqa(ptr[rsp + 0], xmm6);
|
vmovdqa(ptr[rsp + 0], xmm6);
|
||||||
vmovdqa(ptr[rsp + 16], xmm7);
|
vmovdqa(ptr[rsp + 16], xmm7);
|
||||||
|
@ -488,7 +488,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
||||||
vmovdqa(xmm6, ptr[rsp + 0]);
|
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||||
|
|
||||||
leave();
|
add(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,7 +51,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
enter(32, true);
|
sub(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
movdqa(ptr[rsp + 0], xmm6);
|
movdqa(ptr[rsp + 0], xmm6);
|
||||||
movdqa(ptr[rsp + 16], xmm7);
|
movdqa(ptr[rsp + 16], xmm7);
|
||||||
|
@ -172,7 +172,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
||||||
movdqa(xmm6, ptr[rsp + 0]);
|
movdqa(xmm6, ptr[rsp + 0]);
|
||||||
movdqa(xmm7, ptr[rsp + 16]);
|
movdqa(xmm7, ptr[rsp + 16]);
|
||||||
|
|
||||||
leave();
|
add(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
|
@ -204,7 +204,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
enter(32, true);
|
sub(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
movdqa(ptr[rsp + 0], xmm6);
|
movdqa(ptr[rsp + 0], xmm6);
|
||||||
movdqa(ptr[rsp + 16], xmm7);
|
movdqa(ptr[rsp + 16], xmm7);
|
||||||
|
@ -355,7 +355,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
||||||
movdqa(xmm6, ptr[rsp + 0]);
|
movdqa(xmm6, ptr[rsp + 0]);
|
||||||
movdqa(xmm7, ptr[rsp + 16]);
|
movdqa(xmm7, ptr[rsp + 16]);
|
||||||
|
|
||||||
leave();
|
add(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
|
@ -385,7 +385,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
enter(32, true);
|
sub(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
movdqa(ptr[rsp + 0], xmm6);
|
movdqa(ptr[rsp + 0], xmm6);
|
||||||
movdqa(ptr[rsp + 16], xmm7);
|
movdqa(ptr[rsp + 16], xmm7);
|
||||||
|
@ -535,7 +535,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
||||||
movdqa(xmm6, ptr[rsp + 0]);
|
movdqa(xmm6, ptr[rsp + 0]);
|
||||||
movdqa(xmm7, ptr[rsp + 16]);
|
movdqa(xmm7, ptr[rsp + 16]);
|
||||||
|
|
||||||
leave();
|
add(rsp, 8 + 2 * 16);
|
||||||
|
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue