mirror of https://github.com/PCSX2/pcsx2.git
GSdx: mipmapping fix (ford mustang racing, and probably other games which use small, non-square textures)
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4529 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
15f7b2b6d8
commit
732b038571
|
@ -249,10 +249,15 @@ public:
|
|||
GSVector4 ys(0.257f, 0.504f, 0.098f, 0.0f);
|
||||
GSVector4 us(-0.148f / 2, -0.291f / 2, 0.439f / 2, 0.0f);
|
||||
GSVector4 vs(0.439f / 2, -0.368f / 2, -0.071f / 2, 0.0f);
|
||||
const GSVector4 offset(16, 128, 16, 128);
|
||||
|
||||
if (!rgba)
|
||||
ys = ys.zyxw(), us = us.zyxw(), vs = vs.zyxw();
|
||||
if(!rgba)
|
||||
{
|
||||
ys = ys.zyxw();
|
||||
us = us.zyxw();
|
||||
vs = vs.zyxw();
|
||||
}
|
||||
|
||||
const GSVector4 offset(16, 128, 16, 128);
|
||||
|
||||
for(int j = 0; j < h; j++, dst += dstpitch, src += srcpitch)
|
||||
{
|
||||
|
@ -261,8 +266,8 @@ public:
|
|||
|
||||
for(int i = 0; i < w; i += 2)
|
||||
{
|
||||
GSVector4 c0 = GSVector4(s[i + 0]);
|
||||
GSVector4 c1 = GSVector4(s[i + 1]);
|
||||
GSVector4 c0 = GSVector4::rgba32(s[i + 0]);
|
||||
GSVector4 c1 = GSVector4::rgba32(s[i + 1]);
|
||||
GSVector4 c2 = c0 + c1;
|
||||
|
||||
GSVector4 lo = (c0 * ys).hadd(c2 * us);
|
||||
|
|
|
@ -109,7 +109,7 @@ bool GSClut::WriteTest(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT)
|
|||
case 4: if(m_CBP[0] == TEX0.CBP) return false; m_CBP[0] = TEX0.CBP; break;
|
||||
case 5: if(m_CBP[1] == TEX0.CBP) return false; m_CBP[1] = TEX0.CBP; break;
|
||||
case 6: ASSERT(0); return false; // ffx2 menu
|
||||
case 7: ASSERT(0); return false;
|
||||
case 7: ASSERT(0); return false; // ford mustang racing
|
||||
default: __assume(0);
|
||||
}
|
||||
|
||||
|
|
|
@ -350,7 +350,7 @@ void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c)
|
|||
|
||||
void GSDevice11::ClearRenderTarget(GSTexture* t, uint32 c)
|
||||
{
|
||||
GSVector4 color = GSVector4(c) * (1.0f / 255);
|
||||
GSVector4 color = GSVector4::rgba32(c) * (1.0f / 255);
|
||||
|
||||
m_ctx->ClearRenderTargetView(*(GSTexture11*)t, color.v);
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
|
||||
#if _M_SSE >= 0x500 && (defined(_M_AMD64) || defined(_WIN64))
|
||||
|
||||
#error TODO: this is still bogus somewhere
|
||||
#error TODO
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Generate()
|
||||
{
|
||||
|
@ -38,17 +38,13 @@ void GSDrawScanlineCodeGenerator::Generate()
|
|||
push(r12);
|
||||
push(r13);
|
||||
|
||||
enter(10 * 16, true);
|
||||
sub(rsp, 8 + 10 * 16);
|
||||
|
||||
for(int i = 6; i < 16; i++)
|
||||
{
|
||||
vmovdqa(ptr[rsp + (i - 6) * 16], Xmm(i));
|
||||
}
|
||||
|
||||
movsxd(rcx, ecx); // right
|
||||
movsxd(rdx, edx); // left
|
||||
movsxd(r8, r8d); // top
|
||||
|
||||
mov(r10, (size_t)&m_test[0]);
|
||||
mov(r11, (size_t)&m_local);
|
||||
mov(r12, (size_t)m_local.gd);
|
||||
|
@ -84,7 +80,14 @@ L("loop");
|
|||
|
||||
// ebp = za
|
||||
|
||||
SampleTexture();
|
||||
if(m_sel.mmin)
|
||||
{
|
||||
SampleTextureLOD();
|
||||
}
|
||||
else
|
||||
{
|
||||
SampleTexture();
|
||||
}
|
||||
|
||||
// ebp = za
|
||||
// xmm2 = rb
|
||||
|
@ -201,7 +204,7 @@ L("exit");
|
|||
vmovdqa(Xmm(i), ptr[rsp + (i - 6) * 16]);
|
||||
}
|
||||
|
||||
leave();
|
||||
add(rsp, 8 + 10 * 16);
|
||||
|
||||
pop(r13);
|
||||
pop(r12);
|
||||
|
@ -237,10 +240,9 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
mov(rax, rcx);
|
||||
sar(rax, 63);
|
||||
and(rax, rcx);
|
||||
add(rax, 7);
|
||||
shl(rax, 4);
|
||||
|
||||
vpor(xmm15, ptr[rax + r10]);
|
||||
vpor(xmm15, ptr[rax + r10 + 7 * 16]);
|
||||
|
||||
// GSVector2i* fza_base = &m_local.gd->fzbr[top];
|
||||
|
||||
|
@ -256,8 +258,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
{
|
||||
// edx = &m_local.d[skip]
|
||||
|
||||
shl(rdx, 3);
|
||||
lea(rdx, ptr[rdx + r11 + offsetof(GSScanlineLocalData, d)]);
|
||||
lea(rdx, ptr[rdx * 8 + r11 + offsetof(GSScanlineLocalData, d)]);
|
||||
}
|
||||
|
||||
if(!m_sel.sprite)
|
||||
|
@ -325,7 +326,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
|
||||
vpaddd(xmm10, ptr[rdx + offsetof(GSScanlineLocalData::skip, s)]);
|
||||
|
||||
if(!m_sel.sprite)
|
||||
if(!m_sel.sprite || m_sel.mmin)
|
||||
{
|
||||
vpaddd(xmm11, ptr[rdx + offsetof(GSScanlineLocalData::skip, t)]);
|
||||
}
|
||||
|
@ -338,12 +339,6 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
vpsrlw(xmm6, 1);
|
||||
}
|
||||
}
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
vshufps(xmm12, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vaddps(xmm12, ptr[rdx + offsetof(GSScanlineLocalData::skip, q)]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -441,17 +436,11 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
vpshufd(xmm1, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
vpaddd(xmm10, xmm1);
|
||||
|
||||
if(!m_sel.sprite)
|
||||
if(!m_sel.sprite || m_sel.mmin)
|
||||
{
|
||||
vpshufd(xmm1, xmm0, _MM_SHUFFLE(1, 1, 1, 1));
|
||||
vpaddd(xmm11, xmm1);
|
||||
}
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
vshufps(xmm3, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vaddps(xmm12, xmm3);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -510,10 +499,9 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
mov(rdx, rcx);
|
||||
sar(rdx, 63);
|
||||
and(rdx, rcx);
|
||||
add(rdx, 7);
|
||||
shl(rdx, 4);
|
||||
|
||||
vmovdqa(xmm15, ptr[rdx + r10]);
|
||||
vmovdqa(xmm15, ptr[rdx + r10 + 7 * 16]);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
||||
|
@ -589,11 +577,9 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
vpslld(xmm2, 31);
|
||||
|
||||
// GSVector4i zso = zs - o;
|
||||
|
||||
vpsubd(xmm0, xmm2);
|
||||
|
||||
// GSVector4i zdo = zd - o;
|
||||
|
||||
vpsubd(xmm0, xmm2);
|
||||
vpsubd(xmm1, xmm2);
|
||||
}
|
||||
|
||||
|
@ -629,11 +615,6 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
// ebx = tex
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
if(!m_sel.fst)
|
||||
{
|
||||
vrcpps(xmm0, xmm12);
|
||||
|
@ -766,10 +747,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(xmm0, xmm0, xmm4, xmm5);
|
||||
ReadTexel(xmm1, xmm1, xmm4, xmm5);
|
||||
ReadTexel(xmm2, xmm2, xmm4, xmm5);
|
||||
ReadTexel(xmm3, xmm3, xmm4, xmm5);
|
||||
ReadTexel(4, 0);
|
||||
|
||||
// xmm0 = c00
|
||||
// xmm1 = c01
|
||||
|
@ -863,7 +841,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(xmm2, xmm3, xmm0, xmm1);
|
||||
ReadTexel(1, 0);
|
||||
|
||||
// GSVector4i mask = GSVector4i::x00ff();
|
||||
|
||||
|
@ -1032,6 +1010,18 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
||||
{
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv)
|
||||
{
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv0, const Xmm& uv1)
|
||||
{
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::AlphaTFX()
|
||||
{
|
||||
if(!m_sel.fb)
|
||||
|
@ -1046,6 +1036,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
|||
// gat = gat.modulate16<1>(ga).clamp8();
|
||||
|
||||
modulate16(xmm3, xmm14, 1);
|
||||
|
||||
clamp16(xmm3, xmm0);
|
||||
|
||||
// if(!tcc) gat = gat.mix16(ga.srl16(7));
|
||||
|
@ -1053,6 +1044,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
|||
if(!m_sel.tcc)
|
||||
{
|
||||
vpsrlw(xmm1, xmm14, 7);
|
||||
|
||||
mix16(xmm3, xmm1, xmm0);
|
||||
}
|
||||
|
||||
|
@ -1065,6 +1057,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
|||
if(!m_sel.tcc)
|
||||
{
|
||||
vpsrlw(xmm1, xmm14, 7);
|
||||
|
||||
mix16(xmm3, xmm1, xmm0);
|
||||
}
|
||||
|
||||
|
@ -1075,7 +1068,12 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
|||
// gat = gat.mix16(!tcc ? ga.srl16(7) : gat.addus8(ga.srl16(7)));
|
||||
|
||||
vpsrlw(xmm1, xmm14, 7);
|
||||
if(m_sel.tcc) vpaddusb(xmm1, xmm3);
|
||||
|
||||
if(m_sel.tcc)
|
||||
{
|
||||
vpaddusb(xmm1, xmm3);
|
||||
}
|
||||
|
||||
mix16(xmm3, xmm1, xmm0);
|
||||
|
||||
break;
|
||||
|
@ -1087,6 +1085,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
|||
if(!m_sel.tcc)
|
||||
{
|
||||
vpsrlw(xmm1, xmm14, 7);
|
||||
|
||||
mix16(xmm3, xmm1, xmm0);
|
||||
}
|
||||
|
||||
|
@ -1103,6 +1102,8 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
|||
|
||||
break;
|
||||
}
|
||||
|
||||
// TODO: aa1
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadMask()
|
||||
|
@ -1218,6 +1219,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
|
|||
// rbt = rbt.modulate16<1>(rb).clamp8();
|
||||
|
||||
modulate16(xmm2, xmm13, 1);
|
||||
|
||||
clamp16(xmm2, xmm0);
|
||||
|
||||
break;
|
||||
|
@ -1229,22 +1231,28 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
|
|||
case TFX_HIGHLIGHT:
|
||||
case TFX_HIGHLIGHT2:
|
||||
|
||||
// gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat);
|
||||
|
||||
vmovdqa(xmm1, xmm3);
|
||||
|
||||
modulate16(xmm3, xmm14, 1);
|
||||
|
||||
vpshuflw(xmm6, xmm14, _MM_SHUFFLE(3, 3, 1, 1));
|
||||
vpshufhw(xmm6, xmm6, _MM_SHUFFLE(3, 3, 1, 1));
|
||||
vpsrlw(xmm6, 7);
|
||||
|
||||
// gat = gat.modulate16<1>(ga).add16(af).clamp8().mix16(gat);
|
||||
|
||||
vmovdqa(xmm1, xmm3);
|
||||
modulate16(xmm3, xmm14, 1);
|
||||
vpaddw(xmm3, xmm6);
|
||||
|
||||
clamp16(xmm3, xmm0);
|
||||
|
||||
mix16(xmm3, xmm1, xmm0);
|
||||
|
||||
// rbt = rbt.modulate16<1>(rb).add16(af).clamp8();
|
||||
|
||||
modulate16(xmm2, xmm13, 1);
|
||||
|
||||
vpaddw(xmm2, xmm6);
|
||||
|
||||
clamp16(xmm2, xmm0);
|
||||
|
||||
break;
|
||||
|
@ -1797,25 +1805,22 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg64& addr,
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, const Xmm& temp1, const Xmm& temp2)
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
||||
{
|
||||
ReadTexel(dst, addr, 0);
|
||||
ReadTexel(dst, addr, 1);
|
||||
ReadTexel(dst, addr, 2);
|
||||
ReadTexel(dst, addr, 3);
|
||||
// TODO
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i)
|
||||
{
|
||||
const Address& src = m_sel.tlu ? ptr[r12 + rax * 4 + offsetof(GSScanlineGlobalData, clut)] : ptr[rbx + rax * 4];
|
||||
|
||||
vpextrd(eax, addr, i);
|
||||
|
||||
movsxd(rax, eax);
|
||||
if(i == 0) vmovd(eax, addr);
|
||||
else vpextrd(eax, addr, i);
|
||||
|
||||
if(m_sel.tlu) movzx(rax, byte[rbx + rax]);
|
||||
|
||||
vpinsrd(dst, src, i);
|
||||
if(i == 0) vmovd(dst, src);
|
||||
else vpinsrd(dst, src, i);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -19,9 +19,6 @@
|
|||
*
|
||||
*/
|
||||
|
||||
// TODO: x64 (use the extra regs to avoid spills of zs, zd, uf, vf, rb, ga and keep a few constants in the last two like aref or afix)
|
||||
// TODO: for edges doing 4 pixels is wasteful (needed memory access * 4)
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "GSDrawScanlineCodeGenerator.h"
|
||||
#include "GSVertexSW.h"
|
||||
|
|
|
@ -21,37 +21,3 @@
|
|||
|
||||
#include "stdafx.h"
|
||||
#include "GSFunctionMap.h"
|
||||
|
||||
void GSCodeGenerator::enter(uint32 size, bool align)
|
||||
{
|
||||
#ifdef _M_AMD64
|
||||
|
||||
push(r15);
|
||||
mov(r15, rsp);
|
||||
if(size > 0) sub(rsp, size);
|
||||
if(align) and(rsp, 0xfffffffffffffff0);
|
||||
|
||||
#else
|
||||
|
||||
push(ebp);
|
||||
mov(ebp, esp);
|
||||
if(size > 0) sub(esp, size);
|
||||
if(align) and(esp, 0xfffffff0);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void GSCodeGenerator::leave()
|
||||
{
|
||||
#ifdef _M_AMD64
|
||||
|
||||
mov(rsp, r15);
|
||||
pop(r15);
|
||||
|
||||
#else
|
||||
|
||||
mov(esp, ebp);
|
||||
pop(ebp);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -161,9 +161,6 @@ class GSCodeGenerator : public Xbyak::CodeGenerator
|
|||
protected:
|
||||
Xbyak::util::Cpu m_cpu;
|
||||
|
||||
void enter(uint32 size, bool align);
|
||||
void leave();
|
||||
|
||||
public:
|
||||
GSCodeGenerator(void* code, size_t maxsize)
|
||||
: Xbyak::CodeGenerator(maxsize, code)
|
||||
|
|
|
@ -263,7 +263,7 @@ public:
|
|||
{
|
||||
ps_sel.fog = 1;
|
||||
|
||||
ps_cb.FogColor_AREF = GSVector4(env.FOGCOL.u32[0]) / 255;
|
||||
ps_cb.FogColor_AREF = GSVector4::rgba32(env.FOGCOL.u32[0]) / 255;
|
||||
}
|
||||
|
||||
if(context->TEST.ATE)
|
||||
|
|
|
@ -384,7 +384,7 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL)
|
||||
{
|
||||
k = (int)m_vt.m_lod.x << 16; // set lod to max
|
||||
k = (int)m_vt.m_lod.x << 16; // set lod to max level
|
||||
|
||||
gd.sel.lcm = 1; // lod is constant
|
||||
gd.sel.mmin = 1; // tri-linear is meaningless
|
||||
|
@ -432,7 +432,11 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
static int s_counter = 0;
|
||||
|
||||
//t->Save(format("c:/temp1/%08d_%05x_0.bmp", s_counter, context->TEX0.TBP0));
|
||||
if(0)
|
||||
//if(context->TEX0.TH > context->TEX0.TW)
|
||||
//if(s_n >= s_saven && s_n < s_saven + 3)
|
||||
//if(context->TEX0.TBP0 >= 0x2b80 && context->TEX0.TBW == 2 && context->TEX0.PSM == PSM_PSMT4)
|
||||
t->Save(format("c:/temp1/%08d_%05x_0.bmp", s_counter, context->TEX0.TBP0));
|
||||
|
||||
for(int i = 1, j = std::min<int>((int)context->TEX1.MXL, 6); i <= j; i++)
|
||||
{
|
||||
|
@ -487,7 +491,28 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
gd.tex[i] = t->m_buff;
|
||||
|
||||
// t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, context->TEX0.TBP0, i));
|
||||
if(0)
|
||||
//if(context->TEX0.TH > context->TEX0.TW)
|
||||
//if(s_n >= s_saven && s_n < s_saven + 3)
|
||||
//if(context->TEX0.TBP0 >= 0x2b80 && context->TEX0.TBW == 2 && context->TEX0.PSM == PSM_PSMT4)
|
||||
{
|
||||
t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, context->TEX0.TBP0, i));
|
||||
/*
|
||||
GIFRegTEX0 TEX0 = MIP_TEX0;
|
||||
TEX0.TBP0 = context->TEX0.TBP0;
|
||||
do
|
||||
{
|
||||
TEX0.TBP0++;
|
||||
const GSTextureCacheSW::Texture* t = m_tc->Lookup(TEX0, env.TEXA, r, gd.sel.tw + 3);
|
||||
if(t == NULL) {ASSERT(0); return false;}
|
||||
t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, TEX0.TBP0, i));
|
||||
}
|
||||
while(TEX0.TBP0 < 0x3fff);
|
||||
*/
|
||||
|
||||
int i = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
s_counter++;
|
||||
|
@ -701,38 +726,31 @@ void GSRendererSW::VertexKick(bool skip)
|
|||
{
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
||||
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]);
|
||||
GSVertexSW& dst = m_vl.AddTail();
|
||||
|
||||
xy = xy.insert16<3>(m_v.FOG.F);
|
||||
xy = xy.upl16();
|
||||
xy -= context->XYOFFSET;
|
||||
GSVector4i xy = GSVector4i::load((int)m_v.XYZ.u32[0]).upl16() - context->XYOFFSET;
|
||||
GSVector4i zf = GSVector4i((int)std::min<uint32>(m_v.XYZ.Z, 0xffffff00), m_v.FOG.F);
|
||||
|
||||
GSVertexSW v;
|
||||
|
||||
v.p = GSVector4(xy) * g_pos_scale;
|
||||
|
||||
v.c = GSVector4(GSVector4i::load((int)m_v.RGBAQ.u32[0]).u8to32() << 7);
|
||||
dst.p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
|
||||
|
||||
if(tme)
|
||||
{
|
||||
GSVector4 t;
|
||||
|
||||
if(fst)
|
||||
{
|
||||
v.t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
|
||||
t = GSVector4(((GSVector4i)m_v.UV).upl16() << (16 - 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
v.t = GSVector4(m_v.ST.S, m_v.ST.T);
|
||||
v.t *= GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
|
||||
t = GSVector4(m_v.ST.S, m_v.ST.T) * GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH);
|
||||
t = t.xyxy(GSVector4::load(m_v.RGBAQ.Q));
|
||||
}
|
||||
|
||||
v.t = v.t.xyxy(GSVector4::load(m_v.RGBAQ.Q));
|
||||
dst.t = t;
|
||||
}
|
||||
|
||||
GSVertexSW& dst = m_vl.AddTail();
|
||||
|
||||
dst = v;
|
||||
|
||||
dst.p.z = (float)min(m_v.XYZ.Z, 0xffffff00); // max value which can survive the uint32 => float => uint32 conversion
|
||||
dst.c = GSVector4::rgba32(m_v.RGBAQ.u32[0], 7);
|
||||
|
||||
int count = 0;
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ using namespace Xbyak;
|
|||
|
||||
void GSSetupPrimCodeGenerator::Generate()
|
||||
{
|
||||
enter(32, true);
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
vmovdqa(ptr[rsp + 0], xmm6);
|
||||
vmovdqa(ptr[rsp + 16], xmm7);
|
||||
|
@ -55,7 +55,7 @@ void GSSetupPrimCodeGenerator::Generate()
|
|||
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
leave();
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
@ -186,16 +186,11 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// m_local.d4.st = GSVector4i(t * 4.0f);
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
vmovhps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq.z)], xmm1);
|
||||
}
|
||||
// m_local.d4.stq = GSVector4i(t * 4.0f);
|
||||
|
||||
vcvttps2dq(xmm1, xmm1);
|
||||
|
||||
vmovq(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
||||
vmovdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -204,7 +199,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
vmovaps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
||||
}
|
||||
|
||||
for(int j = 0, k = m_sel.fst && !(m_sel.mipmap && !m_sel.lcm) ? 2 : 3; j < k; j++)
|
||||
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||
{
|
||||
// GSVector4 ds = t.xxxx();
|
||||
// GSVector4 dt = t.yyyy();
|
||||
|
@ -218,7 +213,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
|
||||
vmulps(xmm2, xmm1, Xmm(4 + i));
|
||||
|
||||
if(m_sel.fst && !(m_sel.mipmap && !m_sel.lcm))
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// m_local.d[i].s/t = GSVector4i(v);
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ using namespace Xbyak;
|
|||
|
||||
void GSSetupPrimCodeGenerator::Generate()
|
||||
{
|
||||
enter(32, true);
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
vmovdqa(ptr[rsp + 0], xmm6);
|
||||
vmovdqa(ptr[rsp + 16], xmm7);
|
||||
|
@ -53,7 +53,7 @@ void GSSetupPrimCodeGenerator::Generate()
|
|||
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
leave();
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
@ -191,16 +191,11 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// m_local.d4.st = GSVector4i(t * 4.0f);
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
{
|
||||
movhps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq.z)], xmm1);
|
||||
}
|
||||
// m_local.d4.stq = GSVector4i(t * 4.0f);
|
||||
|
||||
cvttps2dq(xmm1, xmm1);
|
||||
|
||||
movq(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
||||
movdqa(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -209,7 +204,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
movaps(ptr[r8 + offsetof(GSScanlineLocalData, d4.stq)], xmm1);
|
||||
}
|
||||
|
||||
for(int j = 0, k = m_sel.fst && !(m_sel.mipmap && !m_sel.lcm) ? 2 : 3; j < k; j++)
|
||||
for(int j = 0, k = m_sel.fst ? 2 : 3; j < k; j++)
|
||||
{
|
||||
// GSVector4 ds = t.xxxx();
|
||||
// GSVector4 dt = t.yyyy();
|
||||
|
@ -225,7 +220,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
movaps(xmm2, xmm1);
|
||||
mulps(xmm2, Xmm(4 + i));
|
||||
|
||||
if(m_sel.fst && !(m_sel.mipmap && !m_sel.lcm))
|
||||
if(m_sel.fst)
|
||||
{
|
||||
// m_local.d[i].s/t = GSVector4i(v);
|
||||
|
||||
|
|
|
@ -322,8 +322,7 @@ void GSSetupPrimCodeGenerator::Color()
|
|||
{
|
||||
// GSVector4i c = GSVector4i(vertices[0].c);
|
||||
|
||||
movaps(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);
|
||||
cvttps2dq(xmm0, xmm0);
|
||||
cvttps2dq(xmm0, ptr[ecx + offsetof(GSVertexSW, c)]);
|
||||
|
||||
// c = c.upl16(c.zwxy());
|
||||
|
||||
|
|
|
@ -590,41 +590,41 @@ template<int i> void GSState::GIFRegHandlerTEX0(const GIFReg* r)
|
|||
|
||||
if(m_env.CTXT[i].TEX1.MTBA)
|
||||
{
|
||||
// NOTE 1: TEX1.MXL must not be automatically set to 3 here.
|
||||
// NOTE 2: Mipmap levels are tightly packed, if (tbw << 6) > (1 << tw) then the left-over space to the right is used. (common for PSM_PSMT4)
|
||||
// NOTE 3: Non-rectangular textures are treated as rectangular when calculating the occupied space (height is extended, not sure about width)
|
||||
|
||||
uint32 bp = TEX0.TBP0;
|
||||
uint32 bw = TEX0.TBW;
|
||||
uint32 w = 1u << TEX0.TW;
|
||||
uint32 h = 1u << TEX0.TH;
|
||||
uint32 bpp = GSLocalMemory::m_psm[TEX0.PSM].bpp;
|
||||
|
||||
uint32 tbp = TEX0.TBP0;
|
||||
uint32 tbw = TEX0.TBW;
|
||||
uint32 th = TEX0.TH;
|
||||
if(h < w) h = w;
|
||||
|
||||
if(th >= 3)
|
||||
{
|
||||
tbp += (((tbw << 6) * (1 << th) * bpp >> 3) + 255) >> 8;
|
||||
tbw = std::max<uint32>(tbw >> 1, 1);
|
||||
th--;
|
||||
bp += ((w * h * bpp >> 3) + 255) >> 8;
|
||||
bw = std::max<uint32>(bw >> 1, 1);
|
||||
w = std::max<uint32>(w >> 1, 1);
|
||||
h = std::max<uint32>(h >> 1, 1);
|
||||
|
||||
m_env.CTXT[i].MIPTBP1.TBP1 = tbp;
|
||||
m_env.CTXT[i].MIPTBP1.TBW1 = tbw;
|
||||
m_env.CTXT[i].MIPTBP1.TBP1 = bp;
|
||||
m_env.CTXT[i].MIPTBP1.TBW1 = bw;
|
||||
|
||||
tbp += (((tbw << 6) * (1 << th) * bpp >> 3) + 255) >> 8;
|
||||
tbw = std::max<uint32>(tbw >> 1, 1);
|
||||
th--;
|
||||
bp += ((w * h * bpp >> 3) + 255) >> 8;
|
||||
bw = std::max<uint32>(bw >> 1, 1);
|
||||
w = std::max<uint32>(w >> 1, 1);
|
||||
h = std::max<uint32>(h >> 1, 1);
|
||||
|
||||
m_env.CTXT[i].MIPTBP1.TBP2 = tbp;
|
||||
m_env.CTXT[i].MIPTBP1.TBW2 = tbw;
|
||||
m_env.CTXT[i].MIPTBP1.TBP2 = bp;
|
||||
m_env.CTXT[i].MIPTBP1.TBW2 = bw;
|
||||
|
||||
tbp += (((tbw << 6) * (1 << th) * bpp >> 3) + 255) >> 8;
|
||||
tbw = std::max<uint32>(tbw >> 1, 1);
|
||||
th--;
|
||||
bp += ((w * h * bpp >> 3) + 255) >> 8;
|
||||
bw = std::max<uint32>(bw >> 1, 1);
|
||||
w = std::max<uint32>(w >> 1, 1);
|
||||
h = std::max<uint32>(h >> 1, 1);
|
||||
|
||||
m_env.CTXT[i].MIPTBP1.TBP3 = tbp;
|
||||
m_env.CTXT[i].MIPTBP1.TBW3 = tbw;
|
||||
|
||||
// NOTE: TEX1.MXL must not be automatically set to 3 here
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT(0);
|
||||
}
|
||||
m_env.CTXT[i].MIPTBP1.TBP3 = bp;
|
||||
m_env.CTXT[i].MIPTBP1.TBW3 = bw;
|
||||
|
||||
// printf("MTBA\n");
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ const GSVector4 GSVector4::m_one(1.0f);
|
|||
const GSVector4 GSVector4::m_two(2.0f);
|
||||
const GSVector4 GSVector4::m_four(4.0f);
|
||||
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
|
||||
const GSVector4 GSVector4::m_x4f800000(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000)));
|
||||
|
||||
GSVector4i GSVector4i::fit(int arx, int ary) const
|
||||
{
|
||||
|
|
|
@ -2333,6 +2333,7 @@ public:
|
|||
static const GSVector4 m_two;
|
||||
static const GSVector4 m_four;
|
||||
static const GSVector4 m_x4b000000;
|
||||
static const GSVector4 m_x4f800000;
|
||||
|
||||
__forceinline GSVector4()
|
||||
{
|
||||
|
@ -2385,9 +2386,18 @@ public:
|
|||
this->m = m;
|
||||
}
|
||||
|
||||
__forceinline explicit GSVector4(uint32 u32)
|
||||
__forceinline explicit GSVector4(int i)
|
||||
{
|
||||
*this = GSVector4(GSVector4i::load((int)u32).u8to32());
|
||||
GSVector4i v((int)i);
|
||||
|
||||
*this = GSVector4(v);
|
||||
}
|
||||
|
||||
__forceinline explicit GSVector4(uint32 u)
|
||||
{
|
||||
GSVector4i v((int)u);
|
||||
|
||||
*this = GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31)));
|
||||
}
|
||||
|
||||
__forceinline explicit GSVector4(const GSVector4i& v);
|
||||
|
@ -2407,11 +2417,6 @@ public:
|
|||
this->m = m;
|
||||
}
|
||||
|
||||
__forceinline void operator = (uint32 u32)
|
||||
{
|
||||
*this = GSVector4(GSVector4i::load((int)u32).u8to32());
|
||||
}
|
||||
|
||||
__forceinline operator __m128() const
|
||||
{
|
||||
return m;
|
||||
|
@ -2422,6 +2427,16 @@ public:
|
|||
return GSVector4i(*this).rgba32();
|
||||
}
|
||||
|
||||
__forceinline static GSVector4 rgba32(uint32 rgba)
|
||||
{
|
||||
return GSVector4(GSVector4i::load((int)rgba).u8to32());
|
||||
}
|
||||
|
||||
__forceinline static GSVector4 rgba32(uint32 rgba, int shift)
|
||||
{
|
||||
return GSVector4(GSVector4i::load((int)rgba).u8to32() << shift);
|
||||
}
|
||||
|
||||
__forceinline static GSVector4 cast(const GSVector4i& v);
|
||||
|
||||
__forceinline GSVector4 abs() const
|
||||
|
@ -2840,6 +2855,13 @@ public:
|
|||
return GSVector4(_mm_load_ss(&f));
|
||||
}
|
||||
|
||||
__forceinline static GSVector4 load(uint32 u)
|
||||
{
|
||||
GSVector4i v = GSVector4i::load((int)u);
|
||||
|
||||
return GSVector4(v) + (m_x4f800000 & GSVector4::cast(v.sra32(31)));
|
||||
}
|
||||
|
||||
template<bool aligned> __forceinline static GSVector4 load(const void* p)
|
||||
{
|
||||
return GSVector4(aligned ? _mm_load_ps((const float*)p) : _mm_loadu_ps((const float*)p));
|
||||
|
|
|
@ -51,7 +51,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
break;
|
||||
}
|
||||
|
||||
enter(32, true);
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
vmovdqa(ptr[rsp + 0], xmm6);
|
||||
vmovdqa(ptr[rsp + 16], xmm7);
|
||||
|
@ -168,7 +168,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
leave();
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
@ -200,7 +200,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
break;
|
||||
}
|
||||
|
||||
enter(32, true);
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
vmovdqa(ptr[rsp + 0], xmm6);
|
||||
vmovdqa(ptr[rsp + 16], xmm7);
|
||||
|
@ -334,7 +334,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
leave();
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
@ -364,7 +364,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
break;
|
||||
}
|
||||
|
||||
enter(32, true);
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
vmovdqa(ptr[rsp + 0], xmm6);
|
||||
vmovdqa(ptr[rsp + 16], xmm7);
|
||||
|
@ -488,7 +488,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
leave();
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
break;
|
||||
}
|
||||
|
||||
enter(32, true);
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
movdqa(ptr[rsp + 0], xmm6);
|
||||
movdqa(ptr[rsp + 16], xmm7);
|
||||
|
@ -172,7 +172,7 @@ GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxs
|
|||
movdqa(xmm6, ptr[rsp + 0]);
|
||||
movdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
leave();
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
@ -204,7 +204,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
break;
|
||||
}
|
||||
|
||||
enter(32, true);
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
movdqa(ptr[rsp + 0], xmm6);
|
||||
movdqa(ptr[rsp + 16], xmm7);
|
||||
|
@ -355,7 +355,7 @@ GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t ma
|
|||
movdqa(xmm6, ptr[rsp + 0]);
|
||||
movdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
leave();
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
@ -385,7 +385,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
break;
|
||||
}
|
||||
|
||||
enter(32, true);
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
movdqa(ptr[rsp + 0], xmm6);
|
||||
movdqa(ptr[rsp + 16], xmm7);
|
||||
|
@ -535,7 +535,7 @@ GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t
|
|||
movdqa(xmm6, ptr[rsp + 0]);
|
||||
movdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
leave();
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue