GSdx: mipmapping fix (ford mustang racing, and probably other games which use small, non-square textures)

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4529 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-04-04 11:05:54 +00:00
parent 15f7b2b6d8
commit 732b038571
17 changed files with 194 additions and 194 deletions

View File

@ -249,10 +249,15 @@ public:
GSVector4 ys(0.257f, 0.504f, 0.098f, 0.0f);
GSVector4 us(-0.148f / 2, -0.291f / 2, 0.439f / 2, 0.0f);
GSVector4 vs(0.439f / 2, -0.368f / 2, -0.071f / 2, 0.0f);
const GSVector4 offset(16, 128, 16, 128);
if (!rgba)
ys = ys.zyxw(), us = us.zyxw(), vs = vs.zyxw();
if(!rgba)
{
ys = ys.zyxw();
us = us.zyxw();
vs = vs.zyxw();
}
const GSVector4 offset(16, 128, 16, 128);
for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
{
@ -261,8 +266,8 @@ public:
for(int i = 0; i < w; i += 2)
{
GSVector4 c0 = GSVector4(s[i + 0]);
GSVector4 c1 = GSVector4(s[i + 1]);
GSVector4 c0 = GSVector4::rgba32(s[i + 0]);
GSVector4 c1 = GSVector4::rgba32(s[i + 1]);
GSVector4 c2 = c0 + c1;
GSVector4 lo = (c0 * ys).hadd(c2 * us);

View File

@ -109,7 +109,7 @@ bool GSClut::WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
case 4: if(m_CBP[0] == TEX0.CBP) return false; m_CBP[0] = TEX0.CBP; break;
case 5: if(m_CBP[1] == TEX0.CBP) return false; m_CBP[1] = TEX0.CBP; break;
case 6: ASSERT(0); return false; // ffx2 menu
case 7: ASSERT(0); return false;
case 7: ASSERT(0); return false; // ford mustang racing
default: __assume(0);
}

View File

@ -350,7 +350,7 @@ void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c)
void GSDevice11::ClearRenderTarget(GSTexture* t, uint32 c)
{
GSVector4 color = GSVector4(c) * (1.0f / 255);
GSVector4 color = GSVector4::rgba32(c) * (1.0f / 255);
m_ctx->ClearRenderTargetView(*(GSTexture11*)t, color.v);
}

View File

@ -25,7 +25,7 @@
#if _M_SSE >= 0x500 && (defined(_M_AMD64) || defined(_WIN64))
#error TODO: this is still bogus somewhere
#error TODO
void GSDrawScanlineCodeGenerator::Generate()
{
@ -38,17 +38,13 @@ void GSDrawScanlineCodeGenerator::Generate()
push(r12);
push(r13);
enter(10 * 16, true);
sub(rsp, 8 + 10 * 16);
for(int i = 6; i < 16; i++)
{
vmovdqa(ptr[rsp + (i - 6) * 16], Xmm(i));
}
movsxd(rcx, ecx); // right
movsxd(rdx, edx); // left
movsxd(r8, r8d); // top
mov(r10, (size_t)&m_test[0]);
mov(r11, (size_t)&m_local);
mov(r12, (size_t)m_local.gd);
@ -84,7 +80,14 @@ L("loop");
// ebp = za
SampleTexture();
if(m_sel.mmin)
{
SampleTextureLOD();
}
else
{
SampleTexture();
}
// ebp = za
// xmm2 = rb
@ -201,7 +204,7 @@ L("exit");
vmovdqa(Xmm(i), ptr[rsp + (i - 6) * 16]);
}
leave();
add(rsp, 8 + 10 * 16);
pop(r13);
pop(r12);
@ -237,10 +240,9 @@ void GSDrawScanlineCodeGenerator::Init()
mov(rax, rcx);
sar(rax, 63);
and(rax, rcx);
add(rax, 7);
shl(rax, 4);
vpor(xmm15, ptr[rax + r10]);
vpor(xmm15, ptr[rax + r10 + 7 * 16]);
// GSVector2i* fza_base = &m_local.gd->fzbr[top];
@ -256,8 +258,7 @@ void GSDrawScanlineCodeGenerator::Init()
{
// edx = &m_local.d[skip]
shl(rdx, 3);
lea(rdx, ptr[rdx + r11 + offsetof(GSScanlineLocalData, d)]);
lea(rdx, ptr[rdx * 8 + r11 + offsetof(GSScanlineLocalData, d)]);
}
if(!m_sel.sprite)
@ -325,7 +326,7 @@ void GSDrawScanlineCodeGenerator::Init()
vpaddd(xmm10, ptr[rdx + offsetof(GSScanlineLocalData::skip, s)]);
if(!m_sel.sprite)
if(!m_sel.sprite || m_sel.mmin)
{
vpaddd(xmm11, ptr[rdx + offsetof(GSScanlineLocalData::skip, t)]);
}
@ -338,12 +339,6 @@ void GSDrawScanlineCodeGenerator::Init()
vpsrlw(xmm6, 1);
}
}
if(m_sel.mipmap && !m_sel.lcm)
{
vshufps(xmm12, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
vaddps(xmm12, ptr[rdx + offsetof(GSScanlineLocalData::skip, q)]);
}
}
else
{
@ -441,17 +436,11 @@ void GSDrawScanlineCodeGenerator::Step()
vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
vpaddd(xmm10, xmm1);
if(!m_sel.sprite)
if(!m_sel.sprite || m_sel.mmin)
{
vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
vpaddd(xmm11, xmm1);
}
if(m_sel.mipmap && !m_sel.lcm)
{
vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
vaddps(xmm12, xmm3);
}
}
else
{
@ -510,10 +499,9 @@ void GSDrawScanlineCodeGenerator::Step()
mov(rdx, rcx);
sar(rdx, 63);
and(rdx, rcx);
add(rdx, 7);
shl(rdx, 4);
vmovdqa(xmm15, ptr[rdx + r10]);
vmovdqa(xmm15, ptr[rdx + r10 + 7 * 16]);
}
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
@ -589,11 +577,9 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
vpslld(xmm2, 31);
// GSVector4i zso = zs - o;
vpsubd(xmm0, xmm2);
// GSVector4i zdo = zd - o;
vpsubd(xmm0, xmm2);
vpsubd(xmm1, xmm2);
}
@ -629,11 +615,6 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// ebx = tex
if(m_sel.mipmap && !m_sel.lcm)
{
}
if(!m_sel.fst)
{
vrcpps(xmm0, xmm12);
@ -766,10 +747,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
ReadTexel(xmm0, xmm0, xmm4, xmm5);
ReadTexel(xmm1, xmm1, xmm4, xmm5);
ReadTexel(xmm2, xmm2, xmm4, xmm5);
ReadTexel(xmm3, xmm3, xmm4, xmm5);
ReadTexel(4, 0);
// xmm0 = c00
// xmm1 = c01
@ -863,7 +841,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
ReadTexel(xmm2, xmm3, xmm0, xmm1);
ReadTexel(1, 0);
// GSVector4i mask = GSVector4i::x00ff();
@ -1032,6 +1010,18 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
}
}
void GSDrawScanlineCodeGenerator::SampleTextureLOD()
{
}
void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv)
{
}
void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv0, const Xmm& uv1)
{
}
void GSDrawScanlineCodeGenerator::AlphaTFX()
{
if(!m_sel.fb)
@ -1046,6 +1036,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
// gat = gat.modulate16<1>(ga).clamp8();
modulate16(xmm3, xmm14, 1);
clamp16(xmm3, xmm0);
// if(!tcc) gat = gat.mix16(ga.srl16(7));
@ -1053,6 +1044,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
if(!m_sel.tcc)
{
vpsrlw(xmm1, xmm14, 7);
mix16(xmm3, xmm1, xmm0);
}
@ -1065,6 +1057,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
if(!m_sel.tcc)
{
vpsrlw(xmm1, xmm14, 7);
mix16(xmm3, xmm1, xmm0);
}
@ -1075,7 +1068,12 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
// gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7)));
vpsrlw(xmm1, xmm14, 7);
if(m_sel.tcc) vpaddusb(xmm1, xmm3);
if(m_sel.tcc)
{
vpaddusb(xmm1, xmm3);
}
mix16(xmm3, xmm1, xmm0);
break;
@ -1087,6 +1085,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
if(!m_sel.tcc)
{
vpsrlw(xmm1, xmm14, 7);
mix16(xmm3, xmm1, xmm0);
}
@ -1103,6 +1102,8 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
break;
}
// TODO: aa1
}
void GSDrawScanlineCodeGenerator::ReadMask()
@ -1218,6 +1219,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
// rbt = rbt.modulate16<1>(rb).clamp8();
modulate16(xmm2, xmm13, 1);
clamp16(xmm2, xmm0);
break;
@ -1229,22 +1231,28 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
case TFX_HIGHLIGHT:
case TFX_HIGHLIGHT2:
// gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat);
vmovdqa(xmm1, xmm3);
modulate16(xmm3, xmm14, 1);
vpshuflw(xmm6, xmm14, _MM_SHUFFLE(3, 3, 1, 1));
vpshufhw(xmm6, xmm6, _MM_SHUFFLE(3, 3, 1, 1));
vpsrlw(xmm6, 7);
// gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat);
vmovdqa(xmm1, xmm3);
modulate16(xmm3, xmm14, 1);
vpaddw(xmm3, xmm6);
clamp16(xmm3, xmm0);
mix16(xmm3, xmm1, xmm0);
// rbt = rbt.modulate16<1>(rb).add16(af).clamp8();
modulate16(xmm2, xmm13, 1);
vpaddw(xmm2, xmm6);
clamp16(xmm2, xmm0);
break;
@ -1797,25 +1805,22 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg64& addr,
}
}
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, const Xmm& temp1, const Xmm& temp2)
void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
{
ReadTexel(dst, addr, 0);
ReadTexel(dst, addr, 1);
ReadTexel(dst, addr, 2);
ReadTexel(dst, addr, 3);
// TODO
}
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i)
{
const Address& src = m_sel.tlu ? ptr[r12 + rax * 4 + offsetof(GSScanlineGlobalData, clut)] : ptr[rbx + rax * 4];
vpextrd(eax, addr, i);
movsxd(rax, eax);
if(i == 0) vmovd(eax, addr);
else vpextrd(eax, addr, i);
if(m_sel.tlu) movzx(rax, byte[rbx + rax]);
vpinsrd(dst, src, i);
if(i == 0) vmovd(dst, src);
else vpinsrd(dst, src, i);
}
#endif

View File

@ -19,9 +19,6 @@
*
*/
// TODO: x64 (use the extra regs to avoid spills of zs, zd, uf, vf, rb, ga and keep a few constants in the last two like aref or afix)
// TODO: for edges doing 4 pixels is wasteful (needed memory access * 4)
#include "stdafx.h"
#include "GSDrawScanlineCodeGenerator.h"
#include "GSVertexSW.h"

View File

@ -21,37 +21,3 @@
#include "stdafx.h"
#include "GSFunctionMap.h"
void GSCodeGenerator::enter(uint32 size, bool align)
{
#ifdef _M_AMD64
push(r15);
mov(r15, rsp);
if(size > 0) sub(rsp, size);
if(align) and(rsp, 0xfffffffffffffff0);
#else
push(ebp);
mov(ebp, esp);
if(size > 0) sub(esp, size);
if(align) and(esp, 0xfffffff0);
#endif
}
void GSCodeGenerator::leave()
{
#ifdef _M_AMD64
mov(rsp, r15);
pop(r15);
#else
mov(esp, ebp);
pop(ebp);
#endif
}

View File

@ -161,9 +161,6 @@ class GSCodeGenerator : public Xbyak::CodeGenerator
protected:
Xbyak::util::Cpu m_cpu;
void enter(uint32 size, bool align);
void leave();
public:
GSCodeGenerator(void* code, size_t maxsize)
: Xbyak::CodeGenerator(maxsize, code)

View File

@ -263,7 +263,7 @@ public:
{
ps_sel.fog = 1;
ps_cb.FogColor_AREF = GSVector4(env.FOGCOL.u32[0]) / 255;
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
}
if(context->TEST.ATE)

View File

@ -384,7 +384,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL)
{
k = (int)m_vt.m_lod.x << 16; // set lod to max
k = (int)m_vt.m_lod.x << 16; // set lod to max level
gd.sel.lcm = 1; // lod is constant
gd.sel.mmin = 1; // tri-linear is meaningless
@ -432,7 +432,11 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
static int s_counter = 0;
//t->Save(format("c:/temp1/%08d_%05x_0.bmp", s_counter, context->TEX0.TBP0));
if(0)
//if(context->TEX0.TH > context->TEX0.TW)
//if(s_n >= s_saven && s_n < s_saven + 3)
//if(context->TEX0.TBP0 >= 0x2b80 && context->TEX0.TBW == 2 && context->TEX0.PSM == PSM_PSMT4)
t->Save(format("c:/temp1/%08d_%05x_0.bmp", s_counter, context->TEX0.TBP0));
for(int i = 1, j = std::min<int>((int)context->TEX1.MXL, 6); i <= j; i++)
{
@ -487,7 +491,28 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
gd.tex[i] = t->m_buff;
// t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, context->TEX0.TBP0, i));
if(0)
//if(context->TEX0.TH > context->TEX0.TW)
//if(s_n >= s_saven && s_n < s_saven + 3)
//if(context->TEX0.TBP0 >= 0x2b80 && context->TEX0.TBW == 2 && context->TEX0.PSM == PSM_PSMT4)
{
t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, context->TEX0.TBP0, i));
/*
GIFRegTEX0 TEX0 = MIP_TEX0;
TEX0.TBP0 = context->TEX0.TBP0;
do
{
TEX0.TBP0++;
const GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA, r, gd.sel.tw + 3);
if(t == NULL) {ASSERT(0); return false;}
t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, TEX0.TBP0, i));
}
while(TEX0.TBP0 < 0x3fff);
*/
int i = 0;
}
}
s_counter++;
@ -701,38 +726,31 @@ void GSRendererSW::VertexKick(bool skip)
{
const GSDrawingContext* context = m_context;
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]);
GSVertexSW& dst = m_vl.AddTail();
xy = xy.insert16<3>(m_v.FOG.F);
xy = xy.upl16();
xy -= context->XYOFFSET;
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - context->XYOFFSET;
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F);
GSVertexSW v;
v.p = GSVector4(xy) * g_pos_scale;
v.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.u32[0]).u8to32() << 7);
dst.p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
if(tme)
{
GSVector4 t;
if(fst)
{
v.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
}
else
{
v.t = GSVector4(m_v.ST.S, m_v.ST.T);
v.t *= GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
t = GSVector4(m_v.ST.S, m_v.ST.T) * GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
t = t.xyxy(GSVector4::load(m_v.RGBAQ.Q));
}
v.t = v.t.xyxy(GSVector4::load(m_v.RGBAQ.Q));
dst.t = t;
}
GSVertexSW& dst = m_vl.AddTail();
dst = v;
dst.p.z = (float)min(m_v.XYZ.Z, 0xffffff00); // max value which can survive the uint32 => float => uint32 conversion
dst.c = GSVector4::rgba32(m_v.RGBAQ.u32[0], 7);
int count = 0;

View File

@ -29,7 +29,7 @@ using namespace Xbyak;
void GSSetupPrimCodeGenerator::Generate()
{
enter(32, true);
sub(rsp, 8 + 2 * 16);
vmovdqa(ptr[rsp + 0], xmm6);
vmovdqa(ptr[rsp + 16], xmm7);
@ -55,7 +55,7 @@ void GSSetupPrimCodeGenerator::Generate()
vmovdqa(xmm6, ptr[rsp + 0]);
vmovdqa(xmm7, ptr[rsp + 16]);
leave();
add(rsp, 8 + 2 * 16);
ret();
}
@ -186,16 +186,11 @@ void GSSetupPrimCodeGenerator::Texture()
if(m_sel.fst)
{
// m_local.d4.st = GSVector4i(t * 4.0f);
if(m_sel.mipmap && !m_sel.lcm)
{
vmovhps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq.z)], xmm1);
}
// m_local.d4.stq = GSVector4i(t * 4.0f);
vcvttps2dq(xmm1, xmm1);
vmovq(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
}
else
{
@ -204,7 +199,7 @@ void GSSetupPrimCodeGenerator::Texture()
vmovaps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
}
for(int j = 0, k = m_sel.fst && !(m_sel.mipmap && !m_sel.lcm) ? 2 : 3; j < k; j++)
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
{
// GSVector4 ds = t.xxxx();
// GSVector4 dt = t.yyyy();
@ -218,7 +213,7 @@ void GSSetupPrimCodeGenerator::Texture()
vmulps(xmm2, xmm1, Xmm(4 + i));
if(m_sel.fst && !(m_sel.mipmap && !m_sel.lcm))
if(m_sel.fst)
{
// m_local.d[i].s/t = GSVector4i(v);

View File

@ -29,7 +29,7 @@ using namespace Xbyak;
void GSSetupPrimCodeGenerator::Generate()
{
enter(32, true);
sub(rsp, 8 + 2 * 16);
vmovdqa(ptr[rsp + 0], xmm6);
vmovdqa(ptr[rsp + 16], xmm7);
@ -53,7 +53,7 @@ void GSSetupPrimCodeGenerator::Generate()
vmovdqa(xmm6, ptr[rsp + 0]);
vmovdqa(xmm7, ptr[rsp + 16]);
leave();
add(rsp, 8 + 2 * 16);
ret();
}
@ -191,16 +191,11 @@ void GSSetupPrimCodeGenerator::Texture()
if(m_sel.fst)
{
// m_local.d4.st = GSVector4i(t * 4.0f);
if(m_sel.mipmap && !m_sel.lcm)
{
movhps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq.z)], xmm1);
}
// m_local.d4.stq = GSVector4i(t * 4.0f);
cvttps2dq(xmm1, xmm1);
movq(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
movdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
}
else
{
@ -209,7 +204,7 @@ void GSSetupPrimCodeGenerator::Texture()
movaps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
}
for(int j = 0, k = m_sel.fst && !(m_sel.mipmap && !m_sel.lcm) ? 2 : 3; j < k; j++)
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
{
// GSVector4 ds = t.xxxx();
// GSVector4 dt = t.yyyy();
@ -225,7 +220,7 @@ void GSSetupPrimCodeGenerator::Texture()
movaps(xmm2, xmm1);
mulps(xmm2, Xmm(4 + i));
if(m_sel.fst && !(m_sel.mipmap && !m_sel.lcm))
if(m_sel.fst)
{
// m_local.d[i].s/t = GSVector4i(v);

View File

@ -322,8 +322,7 @@ void GSSetupPrimCodeGenerator::Color()
{
// GSVector4i c = GSVector4i(vertices[0].c);
movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);
cvttps2dq(xmm0, xmm0);
cvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);
// c = c.upl16(c.zwxy());

View File

@ -590,41 +590,41 @@ template<int i> void GSState::GIFRegHandlerTEX0(const GIFReg* r)
if(m_env.CTXT[i].TEX1.MTBA)
{
// NOTE 1: TEX1.MXL must not be automatically set to 3 here.
// NOTE 2: Mipmap levels are tightly packed, if (tbw << 6) > (1 << tw) then the left-over space to the right is used. (common for PSM_PSMT4)
// NOTE 3: Non-rectangular textures are treated as rectangular when calculating the occupied space (height is extended, not sure about width)
uint32 bp = TEX0.TBP0;
uint32 bw = TEX0.TBW;
uint32 w = 1u << TEX0.TW;
uint32 h = 1u << TEX0.TH;
uint32 bpp = GSLocalMemory::m_psm[TEX0.PSM].bpp;
uint32 tbp = TEX0.TBP0;
uint32 tbw = TEX0.TBW;
uint32 th = TEX0.TH;
if(h < w) h = w;
if(th >= 3)
{
tbp += (((tbw << 6) * (1 << th) * bpp >> 3) + 255) >> 8;
tbw = std::max<uint32>(tbw >> 1, 1);
th--;
bp += ((w * h * bpp >> 3) + 255) >> 8;
bw = std::max<uint32>(bw >> 1, 1);
w = std::max<uint32>(w >> 1, 1);
h = std::max<uint32>(h >> 1, 1);
m_env.CTXT[i].MIPTBP1.TBP1 = tbp;
m_env.CTXT[i].MIPTBP1.TBW1 = tbw;
m_env.CTXT[i].MIPTBP1.TBP1 = bp;
m_env.CTXT[i].MIPTBP1.TBW1 = bw;
tbp += (((tbw << 6) * (1 << th) * bpp >> 3) + 255) >> 8;
tbw = std::max<uint32>(tbw >> 1, 1);
th--;
bp += ((w * h * bpp >> 3) + 255) >> 8;
bw = std::max<uint32>(bw >> 1, 1);
w = std::max<uint32>(w >> 1, 1);
h = std::max<uint32>(h >> 1, 1);
m_env.CTXT[i].MIPTBP1.TBP2 = tbp;
m_env.CTXT[i].MIPTBP1.TBW2 = tbw;
m_env.CTXT[i].MIPTBP1.TBP2 = bp;
m_env.CTXT[i].MIPTBP1.TBW2 = bw;
tbp += (((tbw << 6) * (1 << th) * bpp >> 3) + 255) >> 8;
tbw = std::max<uint32>(tbw >> 1, 1);
th--;
bp += ((w * h * bpp >> 3) + 255) >> 8;
bw = std::max<uint32>(bw >> 1, 1);
w = std::max<uint32>(w >> 1, 1);
h = std::max<uint32>(h >> 1, 1);
m_env.CTXT[i].MIPTBP1.TBP3 = tbp;
m_env.CTXT[i].MIPTBP1.TBW3 = tbw;
// NOTE: TEX1.MXL must not be automatically set to 3 here
}
else
{
ASSERT(0);
}
m_env.CTXT[i].MIPTBP1.TBP3 = bp;
m_env.CTXT[i].MIPTBP1.TBW3 = bw;
// printf("MTBA\n");
}

View File

@ -29,6 +29,7 @@ const GSVector4 GSVector4::m_one(1.0f);
const GSVector4 GSVector4::m_two(2.0f);
const GSVector4 GSVector4::m_four(4.0f);
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
const GSVector4 GSVector4::m_x4f800000(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000)));
GSVector4i GSVector4i::fit(int arx, int ary) const
{

View File

@ -2333,6 +2333,7 @@ public:
static const GSVector4 m_two;
static const GSVector4 m_four;
static const GSVector4 m_x4b000000;
static const GSVector4 m_x4f800000;
__forceinline GSVector4()
{
@ -2385,9 +2386,18 @@ public:
this->m = m;
}
__forceinline explicit GSVector4(uint32 u32)
__forceinline explicit GSVector4(int i)
{
*this = GSVector4(GSVector4i::load((int)u32).u8to32());
GSVector4i v((int)i);
*this = GSVector4(v);
}
__forceinline explicit GSVector4(uint32 u)
{
GSVector4i v((int)u);
*this = GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31)));
}
__forceinline explicit GSVector4(const GSVector4i& v);
@ -2407,11 +2417,6 @@ public:
this->m = m;
}
__forceinline void operator = (uint32 u32)
{
*this = GSVector4(GSVector4i::load((int)u32).u8to32());
}
__forceinline operator __m128() const
{
return m;
@ -2422,6 +2427,16 @@ public:
return GSVector4i(*this).rgba32();
}
__forceinline static GSVector4 rgba32(uint32 rgba)
{
return GSVector4(GSVector4i::load((int)rgba).u8to32());
}
__forceinline static GSVector4 rgba32(uint32 rgba, int shift)
{
return GSVector4(GSVector4i::load((int)rgba).u8to32() << shift);
}
__forceinline static GSVector4 cast(const GSVector4i& v);
__forceinline GSVector4 abs() const
@ -2840,6 +2855,13 @@ public:
return GSVector4(_mm_load_ss(&f));
}
__forceinline static GSVector4 load(uint32 u)
{
GSVector4i v = GSVector4i::load((int)u);
return GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31)));
}
template<bool aligned> __forceinline static GSVector4 load(const void* p)
{
return GSVector4(aligned ? _mm_load_ps((const float*)p) : _mm_loadu_ps((const float*)p));

View File

@ -51,7 +51,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
break;
}
enter(32, true);
sub(rsp, 8 + 2 * 16);
vmovdqa(ptr[rsp + 0], xmm6);
vmovdqa(ptr[rsp + 16], xmm7);
@ -168,7 +168,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
vmovdqa(xmm6, ptr[rsp + 0]);
vmovdqa(xmm7, ptr[rsp + 16]);
leave();
add(rsp, 8 + 2 * 16);
ret();
}
@ -200,7 +200,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
break;
}
enter(32, true);
sub(rsp, 8 + 2 * 16);
vmovdqa(ptr[rsp + 0], xmm6);
vmovdqa(ptr[rsp + 16], xmm7);
@ -334,7 +334,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
vmovdqa(xmm6, ptr[rsp + 0]);
vmovdqa(xmm7, ptr[rsp + 16]);
leave();
add(rsp, 8 + 2 * 16);
ret();
}
@ -364,7 +364,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
break;
}
enter(32, true);
sub(rsp, 8 + 2 * 16);
vmovdqa(ptr[rsp + 0], xmm6);
vmovdqa(ptr[rsp + 16], xmm7);
@ -488,7 +488,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
vmovdqa(xmm6, ptr[rsp + 0]);
vmovdqa(xmm7, ptr[rsp + 16]);
leave();
add(rsp, 8 + 2 * 16);
ret();
}

View File

@ -51,7 +51,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
break;
}
enter(32, true);
sub(rsp, 8 + 2 * 16);
movdqa(ptr[rsp + 0], xmm6);
movdqa(ptr[rsp + 16], xmm7);
@ -172,7 +172,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
movdqa(xmm6, ptr[rsp + 0]);
movdqa(xmm7, ptr[rsp + 16]);
leave();
add(rsp, 8 + 2 * 16);
ret();
}
@ -204,7 +204,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
break;
}
enter(32, true);
sub(rsp, 8 + 2 * 16);
movdqa(ptr[rsp + 0], xmm6);
movdqa(ptr[rsp + 16], xmm7);
@ -355,7 +355,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
movdqa(xmm6, ptr[rsp + 0]);
movdqa(xmm7, ptr[rsp + 16]);
leave();
add(rsp, 8 + 2 * 16);
ret();
}
@ -385,7 +385,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
break;
}
enter(32, true);
sub(rsp, 8 + 2 * 16);
movdqa(ptr[rsp + 0], xmm6);
movdqa(ptr[rsp + 16], xmm7);
@ -535,7 +535,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
movdqa(xmm6, ptr[rsp + 0]);
movdqa(xmm7, ptr[rsp + 16]);
leave();
add(rsp, 8 + 2 * 16);
ret();
}