mirror of https://github.com/PCSX2/pcsx2.git
GSdx: (almost) complete mipmapping support, if the min/mag filter differs then bilinear is used.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4440 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
288176e23d
commit
243a0f4ad1
|
@ -44,6 +44,9 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
void SampleTexture();
|
||||
void Wrap(const Xmm& uv0);
|
||||
void Wrap(const Xmm& uv0, const Xmm& uv1);
|
||||
void SampleTextureLOD();
|
||||
void WrapLOD(const Xmm& uv0);
|
||||
void WrapLOD(const Xmm& uv0, const Xmm& uv1);
|
||||
void AlphaTFX();
|
||||
void ReadMask();
|
||||
void TestAlpha();
|
||||
|
@ -66,7 +69,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
void WritePixel(const Xmm& src, const Reg32& addr, uint8 i, int psm);
|
||||
#endif
|
||||
|
||||
void ReadTexel(const Xmm& dst, const Xmm& addr, const Xmm& temp1, const Xmm& temp2);
|
||||
void ReadTexel(int pixels, int mip_offset = 0);
|
||||
void ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i);
|
||||
|
||||
void modulate16(const Xmm& a, const Operand& f, int shift);
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -242,6 +242,8 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
|
||||
}
|
||||
|
||||
#include "GSTextureSW.h"
|
||||
|
||||
void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
||||
{
|
||||
const GSDrawingEnvironment& env = m_env;
|
||||
|
@ -321,6 +323,8 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
if(PRIM->TME)
|
||||
{
|
||||
gd.clut = m_mem.m_clut;
|
||||
|
||||
gd.sel.tfx = context->TEX0.TFX;
|
||||
gd.sel.tcc = context->TEX0.TCC;
|
||||
gd.sel.fst = PRIM->FST;
|
||||
|
@ -336,63 +340,53 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
gd.sel.tfx = TFX_DECAL;
|
||||
}
|
||||
|
||||
if(gd.sel.fst == 0)
|
||||
GSVector4i r;
|
||||
|
||||
GetTextureMinMax(r, context->TEX0, context->CLAMP, gd.sel.ltf);
|
||||
|
||||
const GSTextureCacheSW::Texture* t = m_tc->Lookup(context->TEX0, env.TEXA, r);
|
||||
|
||||
if(t == NULL) {ASSERT(0); return;}
|
||||
|
||||
gd.tex[0] = t->m_buff;
|
||||
gd.sel.tw = t->m_tw - 3;
|
||||
|
||||
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0)
|
||||
{
|
||||
// skip per pixel division if q is constant
|
||||
|
||||
GSVertexSW* v = m_vertices;
|
||||
|
||||
if(m_vt.m_eq.q)
|
||||
{
|
||||
gd.sel.fst = 1;
|
||||
|
||||
if(v[0].t.z != 1.0f)
|
||||
{
|
||||
GSVector4 w = v[0].t.zzzz().rcpnr();
|
||||
|
||||
for(int i = 0, j = m_count; i < j; i++)
|
||||
{
|
||||
v[i].t *= w;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if(primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
gd.sel.fst = 1;
|
||||
|
||||
for(int i = 0, j = m_count; i < j; i += 2)
|
||||
{
|
||||
GSVector4 w = v[i + 1].t.zzzz().rcpnr();
|
||||
|
||||
v[i + 0].t *= w;
|
||||
v[i + 1].t *= w;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GIFRegTEX0 MIP_TEX0 = context->TEX0;
|
||||
GIFRegCLAMP MIP_CLAMP = context->CLAMP;
|
||||
|
||||
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
|
||||
{
|
||||
gd.sel.mipmap = 1; // TODO: pass mmin here and store mxl to m_global for clamping the lod
|
||||
//gd.sel.ltf = context->TEX1.MMIN & 1; // TODO: mmag != (mmin & 1) && lod <= 0
|
||||
gd.sel.mmin = context->TEX1.MMIN >> 1;
|
||||
gd.sel.lcm = context->TEX1.LCM;
|
||||
|
||||
int mxl = (std::min<int>((int)context->TEX1.MXL, 6) << 16) - 1;
|
||||
int k = context->TEX1.K << 12;
|
||||
|
||||
gd.mxl = GSVector4((float)mxl);
|
||||
gd.l = GSVector4((float)(-0x10000 << context->TEX1.L));
|
||||
gd.k = GSVector4((float)(0x1000 * context->TEX1.K));
|
||||
gd.k = GSVector4((float)k);
|
||||
|
||||
// the rest is fake, should be removed later
|
||||
|
||||
int level = (int)(m_vt.m_lod.x + 0.5f);
|
||||
|
||||
level = std::min<int>(level, context->TEX1.MXL);
|
||||
level = std::min<int>(level, 6);
|
||||
|
||||
if(level > 0)
|
||||
if(gd.sel.lcm)
|
||||
{
|
||||
// printf("lvl %d\n", level);
|
||||
int lod = std::min<int>(k, mxl);
|
||||
|
||||
switch(level)
|
||||
gd.lod.i = GSVector4i(lod >> 16);
|
||||
gd.lod.f = GSVector4i(lod & 0xffff).xxxxl().xxzz();
|
||||
|
||||
// TODO: lot to optimize when lod is constant
|
||||
}
|
||||
|
||||
GIFRegTEX0 MIP_TEX0 = context->TEX0;
|
||||
GIFRegCLAMP MIP_CLAMP = context->CLAMP;
|
||||
|
||||
GSVector4 tmin = m_vt.m_min.t;
|
||||
GSVector4 tmax = m_vt.m_max.t;
|
||||
|
||||
//static int s_counter = 0;
|
||||
|
||||
//t->Save(format("c:/temp1/%08d_%05x_0.bmp", s_counter, context->TEX0.TBP0));
|
||||
|
||||
for(int i = 1, j = std::min<int>((int)context->TEX1.MXL, 6); i <= j; i++)
|
||||
{
|
||||
switch(i)
|
||||
{
|
||||
case 1:
|
||||
MIP_TEX0.TBP0 = context->MIPTBP1.TBP1;
|
||||
|
@ -422,94 +416,84 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
__assume(0);
|
||||
}
|
||||
|
||||
ASSERT(MIP_TEX0.TBP0 != 0 && MIP_TEX0.TBW != 0);
|
||||
|
||||
int tw = (int)MIP_TEX0.TW - level;
|
||||
int th = (int)MIP_TEX0.TH - level;
|
||||
if(MIP_TEX0.TW > 0) MIP_TEX0.TW--;
|
||||
if(MIP_TEX0.TH > 0) MIP_TEX0.TH--;
|
||||
|
||||
switch(context->TEX1.MMIN)
|
||||
{
|
||||
case 2: case 3: // point (min size 1)
|
||||
tw = std::max<int>(tw, 0);
|
||||
th = std::max<int>(th, 0);
|
||||
break;
|
||||
case 4: case 5: // linear (min size 8)
|
||||
tw = std::max<int>(tw, 3);
|
||||
th = std::max<int>(th, 3);
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
MIP_CLAMP.MINU >>= 1;
|
||||
MIP_CLAMP.MINV >>= 1;
|
||||
MIP_CLAMP.MAXU >>= 1;
|
||||
MIP_CLAMP.MAXV >>= 1;
|
||||
|
||||
// scale down the texture coordinates, including vertex trace
|
||||
m_vt.m_min.t *= 0.5f;
|
||||
m_vt.m_max.t *= 0.5f;
|
||||
|
||||
GSVector4 scale = GSVector4(1.0f) / GSVector4(1 << ((int)MIP_TEX0.TW - tw), 1 << ((int)MIP_TEX0.TH - th), 1, 1);
|
||||
GSVector4i r;
|
||||
|
||||
GetTextureMinMax(r, MIP_TEX0, MIP_CLAMP, gd.sel.ltf);
|
||||
|
||||
const GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, r, gd.sel.tw + 3);
|
||||
|
||||
if(t == NULL) {ASSERT(0); return;}
|
||||
|
||||
gd.tex[i] = t->m_buff;
|
||||
gd.tex[i + 1] = NULL;
|
||||
|
||||
//t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, context->TEX0.TBP0, i));
|
||||
}
|
||||
|
||||
//s_counter++;
|
||||
|
||||
m_vt.m_min.t = tmin;
|
||||
m_vt.m_max.t = tmax;
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: these shortcuts are not compatible with mipmapping, yet
|
||||
|
||||
if(gd.sel.fst == 0)
|
||||
{
|
||||
// skip per pixel division if q is constant
|
||||
|
||||
GSVertexSW* v = m_vertices;
|
||||
|
||||
for(int i = 0, j = m_count; i < j; i++)
|
||||
if(m_vt.m_eq.q)
|
||||
{
|
||||
v[i].t *= scale;
|
||||
}
|
||||
gd.sel.fst = 1;
|
||||
|
||||
m_vt.m_min.t *= scale;
|
||||
m_vt.m_max.t *= scale;
|
||||
|
||||
MIP_TEX0.TW = (uint32)tw;
|
||||
MIP_TEX0.TH = (uint32)th;
|
||||
|
||||
// this shift is done even for repeat modes
|
||||
|
||||
MIP_CLAMP.MINU >>= level;
|
||||
MIP_CLAMP.MAXU >>= level;
|
||||
MIP_CLAMP.MINV >>= level;
|
||||
MIP_CLAMP.MAXV >>= level;
|
||||
/*
|
||||
printf("%d%d%d%d%d L %d K %03x %.2f lod %.2f %.2f q %f %f\n",
|
||||
m_context->TEX1.MXL,
|
||||
m_context->TEX1.MMAG,
|
||||
m_context->TEX1.MMIN,
|
||||
PRIM->FST,
|
||||
m_context->TEX1.LCM,
|
||||
m_context->TEX1.L,
|
||||
m_context->TEX1.K,
|
||||
(float)m_context->TEX1.K / 16,
|
||||
m_context->TEX1.MXL > 0 ? m_vt.m_lod.x : 0,
|
||||
m_context->TEX1.MXL > 0 ? m_vt.m_lod.y : 0,
|
||||
1.0f / m_vt.m_min.t.z,
|
||||
1.0f / m_vt.m_max.t.z);
|
||||
*/
|
||||
if(s_dump)
|
||||
{
|
||||
uint64 frame = m_perfmon.GetFrame();
|
||||
|
||||
string s;
|
||||
|
||||
if(s_save && s_n >= s_saven)
|
||||
if(v[0].t.z != 1.0f)
|
||||
{
|
||||
s = format("c:\\temp1\\_%05d_f%lld_tex_%05x_%d_(%d%d%d%d%d %.2f %.2f).bmp",
|
||||
s_n, frame, (int)MIP_TEX0.TBP0, (int)MIP_TEX0.PSM,
|
||||
m_context->TEX1.MXL,
|
||||
m_context->TEX1.MMAG,
|
||||
m_vt.m_filter.mmag,
|
||||
m_context->TEX1.MMIN,
|
||||
m_vt.m_filter.mmin,
|
||||
m_context->TEX1.MXL > 0 ? m_vt.m_lod.x : 0,
|
||||
m_context->TEX1.MXL > 0 ? m_vt.m_lod.y : 0
|
||||
);
|
||||
GSVector4 w = v[0].t.zzzz().rcpnr();
|
||||
|
||||
m_mem.SaveBMP(s, MIP_TEX0.TBP0, MIP_TEX0.TBW, MIP_TEX0.PSM, 1 << MIP_TEX0.TW, 1 << MIP_TEX0.TH);
|
||||
for(int i = 0, j = m_count; i < j; i++)
|
||||
{
|
||||
v[i].t *= w;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: q is now destoroyed, but since q is constant we should be able to pre-calc gd.lod and change LCM to 1
|
||||
}
|
||||
else if(primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
gd.sel.fst = 1;
|
||||
|
||||
for(int i = 0, j = m_count; i < j; i += 2)
|
||||
{
|
||||
GSVector4 w = v[i + 1].t.zzzz().rcpnr();
|
||||
|
||||
v[i + 0].t *= w;
|
||||
v[i + 1].t *= w;
|
||||
}
|
||||
|
||||
// TODO: preserve q, or if there only one sprite then see the comment above
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(gd.sel.ltf)
|
||||
{
|
||||
if(gd.sel.fst)
|
||||
if(gd.sel.ltf && gd.sel.fst)
|
||||
{
|
||||
// if q is constant we can do the half pel shift for bilinear sampling on the vertices
|
||||
|
||||
// TODO: but not when mipmapping is used!!!
|
||||
|
||||
GSVector4 half(0x8000, 0x8000);
|
||||
|
||||
GSVertexSW* v = m_vertices;
|
||||
|
@ -521,68 +505,55 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
}
|
||||
}
|
||||
|
||||
GSVector4i r;
|
||||
uint16 tw = 1u << context->TEX0.TW;
|
||||
uint16 th = 1u << context->TEX0.TH;
|
||||
|
||||
GetTextureMinMax(r, MIP_TEX0, MIP_CLAMP, gd.sel.ltf);
|
||||
|
||||
const GSTextureCacheSW::GSTexture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, r);
|
||||
|
||||
if(!t) {ASSERT(0); return;}
|
||||
|
||||
gd.tex = t->m_buff;
|
||||
gd.clut = m_mem.m_clut;
|
||||
|
||||
gd.sel.tw = t->m_tw - 3;
|
||||
|
||||
uint16 tw = (uint16)(1 << MIP_TEX0.TW);
|
||||
uint16 th = (uint16)(1 << MIP_TEX0.TH);
|
||||
|
||||
switch(MIP_CLAMP.WMS)
|
||||
switch(context->CLAMP.WMS)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
gd.t.min.u16[0] = tw - 1;
|
||||
gd.t.max.u16[0] = 0;
|
||||
gd.t.min.u16[0] = gd.t.minmax.u16[0] = tw - 1;
|
||||
gd.t.max.u16[0] = gd.t.minmax.u16[2] = 0;
|
||||
gd.t.mask.u32[0] = 0xffffffff;
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
gd.t.min.u16[0] = 0;
|
||||
gd.t.max.u16[0] = tw - 1;
|
||||
gd.t.min.u16[0] = gd.t.minmax.u16[0] = 0;
|
||||
gd.t.max.u16[0] = gd.t.minmax.u16[2] = tw - 1;
|
||||
gd.t.mask.u32[0] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
gd.t.min.u16[0] = std::min<int>(MIP_CLAMP.MINU, tw - 1);
|
||||
gd.t.max.u16[0] = std::min<int>(MIP_CLAMP.MAXU, tw - 1);
|
||||
gd.t.min.u16[0] = gd.t.minmax.u16[0] = std::min<uint16>(context->CLAMP.MINU, tw - 1);
|
||||
gd.t.max.u16[0] = gd.t.minmax.u16[2] = std::min<uint16>(context->CLAMP.MAXU, tw - 1);
|
||||
gd.t.mask.u32[0] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
gd.t.min.u16[0] = MIP_CLAMP.MINU;
|
||||
gd.t.max.u16[0] = MIP_CLAMP.MAXU;
|
||||
gd.t.min.u16[0] = gd.t.minmax.u16[0] = context->CLAMP.MINU;
|
||||
gd.t.max.u16[0] = gd.t.minmax.u16[2] = context->CLAMP.MAXU;
|
||||
gd.t.mask.u32[0] = 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
switch(MIP_CLAMP.WMT)
|
||||
switch(context->CLAMP.WMT)
|
||||
{
|
||||
case CLAMP_REPEAT:
|
||||
gd.t.min.u16[4] = th - 1;
|
||||
gd.t.max.u16[4] = 0;
|
||||
gd.t.min.u16[4] = gd.t.minmax.u16[1] = th - 1;
|
||||
gd.t.max.u16[4] = gd.t.minmax.u16[3] = 0;
|
||||
gd.t.mask.u32[2] = 0xffffffff;
|
||||
break;
|
||||
case CLAMP_CLAMP:
|
||||
gd.t.min.u16[4] = 0;
|
||||
gd.t.max.u16[4] = th - 1;
|
||||
gd.t.min.u16[4] = gd.t.minmax.u16[1] = 0;
|
||||
gd.t.max.u16[4] = gd.t.minmax.u16[3] = th - 1;
|
||||
gd.t.mask.u32[2] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_CLAMP:
|
||||
gd.t.min.u16[4] = std::min<int>(MIP_CLAMP.MINV, th - 1);
|
||||
gd.t.max.u16[4] = std::min<int>(MIP_CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
|
||||
gd.t.min.u16[4] = gd.t.minmax.u16[1] = std::min<uint16>(context->CLAMP.MINV, th - 1);
|
||||
gd.t.max.u16[4] = gd.t.minmax.u16[3] = std::min<uint16>(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
|
||||
gd.t.mask.u32[2] = 0;
|
||||
break;
|
||||
case CLAMP_REGION_REPEAT:
|
||||
gd.t.min.u16[4] = MIP_CLAMP.MINV;
|
||||
gd.t.max.u16[4] = MIP_CLAMP.MAXV;
|
||||
gd.t.min.u16[4] = gd.t.minmax.u16[1] = context->CLAMP.MINV;
|
||||
gd.t.max.u16[4] = gd.t.minmax.u16[3] = context->CLAMP.MAXV;
|
||||
gd.t.mask.u32[2] = 0xffffffff;
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -65,8 +65,8 @@ union GSScanlineSelector
|
|||
uint32 edge:1; // 47
|
||||
|
||||
uint32 tw:3; // 48 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
|
||||
uint32 mipmap:1; // 49
|
||||
uint32 lcm:1; // 50
|
||||
uint32 lcm:1; // 49
|
||||
uint32 mmin:2; // 50
|
||||
};
|
||||
|
||||
struct
|
||||
|
@ -113,7 +113,7 @@ __aligned(struct, 32) GSScanlineGlobalData // per batch variables, this is like
|
|||
// - if in the future drawing does not have to be synchronized per batch, the rest of GSRasterizerData should be copied here, too (scissor, prim type, vertices)
|
||||
|
||||
void* vm;
|
||||
const void* tex;
|
||||
const void* tex[7];
|
||||
const uint32* clut;
|
||||
const GSVector4i* dimx;
|
||||
|
||||
|
@ -125,12 +125,14 @@ __aligned(struct, 32) GSScanlineGlobalData // per batch variables, this is like
|
|||
const GSVector2i* fzbc;
|
||||
|
||||
GSVector4i fm, zm;
|
||||
struct {GSVector4i min, max, mask, invmask;} t; // [u] x 4 [v] x 4
|
||||
struct {GSVector4i min, max, minmax, mask, invmask;} t; // [u] x 4 [v] x 4
|
||||
GSVector4i aref;
|
||||
GSVector4i afix;
|
||||
GSVector4i frb, fga;
|
||||
GSVector4 mxl;
|
||||
GSVector4 k; // TEX1.K * 0x10000
|
||||
GSVector4 l; // TEX1.L * -0x10000
|
||||
struct {GSVector4i i, f;} lod; // lcm == 1
|
||||
};
|
||||
|
||||
__aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has its own
|
||||
|
@ -144,5 +146,20 @@ __aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has
|
|||
|
||||
// these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack)
|
||||
|
||||
struct {GSVector4i z, f, s, t, q, rb, ga, zs, zd, uf, vf, cov, lod;} temp;
|
||||
struct
|
||||
{
|
||||
GSVector4i z, f;
|
||||
GSVector4i s, t, q;
|
||||
GSVector4i rb, ga;
|
||||
GSVector4i zs, zd;
|
||||
GSVector4i uf, vf;
|
||||
GSVector4i cov;
|
||||
|
||||
// mipmapping
|
||||
struct {GSVector4i i, f;} lod;
|
||||
GSVector4i uv[2];
|
||||
GSVector4i uv_minmax[2];
|
||||
GSVector4i trb, tga;
|
||||
GSVector4i test;
|
||||
} temp;
|
||||
};
|
||||
|
|
|
@ -172,7 +172,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
{
|
||||
// m_local.d4.st = GSVector4i(t * 4.0f);
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
if(m_sel.mmin && !m_sel.lcm)
|
||||
{
|
||||
vmovhps(ptr[&m_local.d4.stq.z], xmm1);
|
||||
}
|
||||
|
@ -188,7 +188,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
vmovaps(ptr[&m_local.d4.stq], xmm1);
|
||||
}
|
||||
|
||||
for(int j = 0, k = m_sel.fst && !(m_sel.mipmap && !m_sel.lcm) ? 2 : 3; j < k; j++)
|
||||
for(int j = 0, k = m_sel.fst && !(m_sel.mmin && !m_sel.lcm) ? 2 : 3; j < k; j++)
|
||||
{
|
||||
// GSVector4 ds = t.xxxx();
|
||||
// GSVector4 dt = t.yyyy();
|
||||
|
@ -202,7 +202,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
|
||||
vmulps(xmm2, xmm1, Xmm(4 + i));
|
||||
|
||||
if(m_sel.fst && !(m_sel.mipmap && !m_sel.lcm))
|
||||
if(m_sel.fst && !(m_sel.mmin && !m_sel.lcm))
|
||||
{
|
||||
// m_local.d[i].s/t = GSVector4i(v);
|
||||
|
||||
|
|
|
@ -178,7 +178,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
{
|
||||
// m_local.d4.st = GSVector4i(t * 4.0f);
|
||||
|
||||
if(m_sel.mipmap && !m_sel.lcm)
|
||||
if(m_sel.mmin && !m_sel.lcm)
|
||||
{
|
||||
movhps(ptr[&m_local.d4.stq.z], xmm1);
|
||||
}
|
||||
|
@ -194,7 +194,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
movaps(ptr[&m_local.d4.stq], xmm1);
|
||||
}
|
||||
|
||||
for(int j = 0, k = m_sel.fst && !(m_sel.mipmap && !m_sel.lcm) ? 2 : 3; j < k; j++)
|
||||
for(int j = 0, k = m_sel.fst && !(m_sel.mmin && !m_sel.lcm) ? 2 : 3; j < k; j++)
|
||||
{
|
||||
// GSVector4 ds = t.xxxx();
|
||||
// GSVector4 dt = t.yyyy();
|
||||
|
@ -210,7 +210,7 @@ void GSSetupPrimCodeGenerator::Texture()
|
|||
movaps(xmm2, xmm1);
|
||||
mulps(xmm2, Xmm(4 + i));
|
||||
|
||||
if(m_sel.fst && !(m_sel.mipmap && !m_sel.lcm))
|
||||
if(m_sel.fst && !(m_sel.mmin && !m_sel.lcm))
|
||||
{
|
||||
// m_local.d[i].s/t = GSVector4i(v);
|
||||
|
||||
|
|
|
@ -33,17 +33,17 @@ GSTextureCacheSW::~GSTextureCacheSW()
|
|||
RemoveAll();
|
||||
}
|
||||
|
||||
const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r)
|
||||
const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, uint32 tw0)
|
||||
{
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
|
||||
GSTexture* t = NULL;
|
||||
Texture* t = NULL;
|
||||
|
||||
list<GSTexture*>& m = m_map[TEX0.TBP0 >> 5];
|
||||
list<Texture*>& m = m_map[TEX0.TBP0 >> 5];
|
||||
|
||||
for(list<GSTexture*>::iterator i = m.begin(); i != m.end(); i++)
|
||||
for(list<Texture*>::iterator i = m.begin(); i != m.end(); i++)
|
||||
{
|
||||
GSTexture* t2 = *i;
|
||||
Texture* t2 = *i;
|
||||
|
||||
if(((TEX0.u32[0] ^ t2->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ t2->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
|
||||
{
|
||||
|
@ -55,6 +55,11 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
|
|||
continue;
|
||||
}
|
||||
|
||||
if(tw0 != 0 && t2->m_tw != tw0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
m.splice(m.begin(), m, i);
|
||||
|
||||
t = t2;
|
||||
|
@ -68,7 +73,7 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
|
|||
{
|
||||
const GSOffset* o = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||
|
||||
t = new GSTexture(m_state, o);
|
||||
t = new Texture(m_state, o, tw0);
|
||||
|
||||
m_textures.insert(t);
|
||||
|
||||
|
@ -98,7 +103,7 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
|
|||
{
|
||||
m_pages[i] = 0;
|
||||
|
||||
list<GSTexture*>* m = &m_map[i << 5];
|
||||
list<Texture*>* m = &m_map[i << 5];
|
||||
|
||||
unsigned long j;
|
||||
|
||||
|
@ -144,11 +149,11 @@ void GSTextureCacheSW::InvalidateVideoMem(const GSOffset* o, const GSVector4i& r
|
|||
|
||||
if(page < MAX_PAGES)
|
||||
{
|
||||
const list<GSTexture*>& map = m_map[page];
|
||||
const list<Texture*>& map = m_map[page];
|
||||
|
||||
for(list<GSTexture*>::const_iterator i = map.begin(); i != map.end(); i++)
|
||||
for(list<Texture*>::const_iterator i = map.begin(); i != map.end(); i++)
|
||||
{
|
||||
GSTexture* t = *i;
|
||||
Texture* t = *i;
|
||||
|
||||
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
|
||||
{
|
||||
|
@ -173,17 +178,17 @@ void GSTextureCacheSW::RemoveAll()
|
|||
}
|
||||
}
|
||||
|
||||
void GSTextureCacheSW::RemoveAt(GSTexture* t)
|
||||
void GSTextureCacheSW::RemoveAt(Texture* t)
|
||||
{
|
||||
m_textures.erase(t);
|
||||
|
||||
for(uint32 start = t->m_TEX0.TBP0 >> 5, end = countof(m_map) - 1; start <= end; start++)
|
||||
{
|
||||
list<GSTexture*>& m = m_map[start];
|
||||
list<Texture*>& m = m_map[start];
|
||||
|
||||
for(list<GSTexture*>::iterator i = m.begin(); i != m.end(); )
|
||||
for(list<Texture*>::iterator i = m.begin(); i != m.end(); )
|
||||
{
|
||||
list<GSTexture*>::iterator j = i++;
|
||||
list<Texture*>::iterator j = i++;
|
||||
|
||||
if(*j == t) {m.erase(j); break;}
|
||||
}
|
||||
|
@ -194,11 +199,11 @@ void GSTextureCacheSW::RemoveAt(GSTexture* t)
|
|||
|
||||
void GSTextureCacheSW::IncAge()
|
||||
{
|
||||
for(hash_set<GSTexture*>::iterator i = m_textures.begin(); i != m_textures.end(); )
|
||||
for(hash_set<Texture*>::iterator i = m_textures.begin(); i != m_textures.end(); )
|
||||
{
|
||||
hash_set<GSTexture*>::iterator j = i++;
|
||||
hash_set<Texture*>::iterator j = i++;
|
||||
|
||||
GSTexture* t = *j;
|
||||
Texture* t = *j;
|
||||
|
||||
if(++t->m_age > 30)
|
||||
{
|
||||
|
@ -209,18 +214,18 @@ void GSTextureCacheSW::IncAge()
|
|||
|
||||
//
|
||||
|
||||
GSTextureCacheSW::GSTexture::GSTexture(GSState* state, const GSOffset* offset)
|
||||
GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint32 tw0)
|
||||
: m_state(state)
|
||||
, m_offset(offset)
|
||||
, m_buff(NULL)
|
||||
, m_tw(0)
|
||||
, m_tw(tw0)
|
||||
, m_age(0)
|
||||
, m_complete(false)
|
||||
{
|
||||
memset(m_valid, 0, sizeof(m_valid));
|
||||
}
|
||||
|
||||
GSTextureCacheSW::GSTexture::~GSTexture()
|
||||
GSTextureCacheSW::Texture::~Texture()
|
||||
{
|
||||
if(m_buff)
|
||||
{
|
||||
|
@ -228,7 +233,7 @@ GSTextureCacheSW::GSTexture::~GSTexture()
|
|||
}
|
||||
}
|
||||
|
||||
bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect)
|
||||
bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect)
|
||||
{
|
||||
if(m_complete)
|
||||
{
|
||||
|
@ -255,7 +260,7 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
|
|||
{
|
||||
// FIXME:
|
||||
// - marking a block prevents fetching it again to a different part of the texture
|
||||
// - only a real issue for TBW = 1 mipmap levels, where the repeating part is below and often exploited
|
||||
// - only a real issue for TBW = 1 mipmap levels, where the repeating part is below and often exploited (onimusha 3 intro / sidewalk)
|
||||
|
||||
r = GSVector4i(0, 0, tw, th);
|
||||
}
|
||||
|
@ -269,18 +274,25 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
|
|||
|
||||
if(m_buff == NULL)
|
||||
{
|
||||
m_buff = _aligned_malloc(tw * th * sizeof(uint32), 32);
|
||||
uint32 tw0 = std::max<int>(TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
|
||||
|
||||
if(m_tw == 0)
|
||||
{
|
||||
m_tw = tw0;
|
||||
}
|
||||
else
|
||||
{
|
||||
ASSERT(m_tw >= tw0);
|
||||
}
|
||||
|
||||
uint32 pitch = (1 << m_tw) << shift;
|
||||
|
||||
m_buff = _aligned_malloc(pitch * th * 4, 32);
|
||||
|
||||
if(m_buff == NULL)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
for(uint32 i = 0, j = tw * th * sizeof(uint8); i < j; i++) ((uint8*)m_buff)[i] = 0xff;
|
||||
#endif
|
||||
|
||||
m_tw = std::max<int>(TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
|
||||
}
|
||||
|
||||
GSLocalMemory& mem = m_state->m_mem;
|
||||
|
@ -351,3 +363,46 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
#include "GSTextureSW.h"
|
||||
|
||||
bool GSTextureCacheSW::Texture::Save(const string& fn, bool dds) const
|
||||
{
|
||||
const uint32* RESTRICT clut = m_state->m_mem.m_clut;
|
||||
|
||||
int w = 1 << m_TEX0.TW;
|
||||
int h = 1 << m_TEX0.TH;
|
||||
|
||||
GSTextureSW t(0, w, h);
|
||||
|
||||
GSTexture::GSMap m;
|
||||
|
||||
if(t.Map(m, NULL))
|
||||
{
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM];
|
||||
|
||||
const uint8* RESTRICT src = (uint8*)m_buff;
|
||||
int pitch = 1 << (m_tw + (psm.pal == 0 ? 2 : 0));
|
||||
|
||||
for(int j = 0; j < h; j++, src += pitch, m.bits += m.pitch)
|
||||
{
|
||||
if(psm.pal == 0)
|
||||
{
|
||||
memcpy(m.bits, src, sizeof(uint32) * w);
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i = 0; i < w; i++)
|
||||
{
|
||||
((uint32*)m.bits)[i] = clut[((uint8*)src)[i]];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
t.Unmap();
|
||||
|
||||
return t.Save(fn.c_str());
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
|
@ -26,7 +26,7 @@
|
|||
class GSTextureCacheSW
|
||||
{
|
||||
public:
|
||||
class GSTexture
|
||||
class Texture
|
||||
{
|
||||
public:
|
||||
GSState* m_state;
|
||||
|
@ -39,27 +39,28 @@ public:
|
|||
uint32 m_age;
|
||||
bool m_complete;
|
||||
|
||||
explicit GSTexture(GSState* state, const GSOffset* offset);
|
||||
virtual ~GSTexture();
|
||||
explicit Texture(GSState* state, const GSOffset* offset, uint32 tw0);
|
||||
virtual ~Texture();
|
||||
|
||||
bool Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
|
||||
bool Save(const string& fn, bool dds = false) const;
|
||||
};
|
||||
|
||||
protected:
|
||||
GSState* m_state;
|
||||
hash_set<GSTexture*> m_textures;
|
||||
list<GSTexture*> m_map[MAX_PAGES];
|
||||
hash_set<Texture*> m_textures;
|
||||
list<Texture*> m_map[MAX_PAGES];
|
||||
uint32 m_pages[16];
|
||||
|
||||
public:
|
||||
GSTextureCacheSW(GSState* state);
|
||||
virtual ~GSTextureCacheSW();
|
||||
|
||||
const GSTexture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
|
||||
const Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, uint32 tw0 = 0);
|
||||
|
||||
void InvalidateVideoMem(const GSOffset* o, const GSVector4i& r);
|
||||
|
||||
void RemoveAll();
|
||||
void RemoveAt(GSTexture* t);
|
||||
void RemoveAt(Texture* t);
|
||||
void IncAge();
|
||||
};
|
||||
|
|
|
@ -2500,6 +2500,9 @@ public:
|
|||
{
|
||||
// NOTE: sign bit ignored, safe to pass negative numbers
|
||||
|
||||
// The idea behind this algorithm is to split the float into two parts, log2(m * 2^e) => log2(m) + log2(2^e) => log2(m) + e,
|
||||
// and then approximate the logarithm of the mantissa (it's 1.x when normalized, a nice short range).
|
||||
|
||||
GSVector4 one(1.0f);
|
||||
|
||||
GSVector4i i = GSVector4i::cast(*this);
|
||||
|
|
|
@ -613,9 +613,7 @@
|
|||
<ClCompile Include="GSThread.cpp" />
|
||||
<ClCompile Include="GSUtil.cpp" />
|
||||
<ClCompile Include="GSVector.cpp">
|
||||
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">AssemblyAndSourceCode</AssemblerOutput>
|
||||
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">AssemblyAndSourceCode</AssemblerOutput>
|
||||
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">AssemblyAndSourceCode</AssemblerOutput>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexList.cpp" />
|
||||
<ClCompile Include="GSVertexSW.cpp" />
|
||||
|
|
Loading…
Reference in New Issue