GSdx: (almost) complete mipmapping support, if the min/mag filter differs then bilinear is used.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4440 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-03-17 02:55:20 +00:00
parent 288176e23d
commit 243a0f4ad1
11 changed files with 2243 additions and 398 deletions

View File

@ -44,6 +44,9 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
void SampleTexture();
void Wrap(const Xmm& uv0);
void Wrap(const Xmm& uv0, const Xmm& uv1);
void SampleTextureLOD();
void WrapLOD(const Xmm& uv0);
void WrapLOD(const Xmm& uv0, const Xmm& uv1);
void AlphaTFX();
void ReadMask();
void TestAlpha();
@ -66,7 +69,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
void WritePixel(const Xmm& src, const Reg32& addr, uint8 i, int psm);
#endif
void ReadTexel(const Xmm& dst, const Xmm& addr, const Xmm& temp1, const Xmm& temp2);
void ReadTexel(int pixels, int mip_offset = 0);
void ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i);
void modulate16(const Xmm& a, const Operand& f, int shift);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -242,6 +242,8 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
m_tc->InvalidateVideoMem(m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM), r);
}
#include "GSTextureSW.h"
void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
{
const GSDrawingEnvironment& env = m_env;
@ -321,6 +323,8 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
if(PRIM->TME)
{
gd.clut = m_mem.m_clut;
gd.sel.tfx = context->TEX0.TFX;
gd.sel.tcc = context->TEX0.TCC;
gd.sel.fst = PRIM->FST;
@ -336,63 +340,53 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
gd.sel.tfx = TFX_DECAL;
}
if(gd.sel.fst == 0)
GSVector4i r;
GetTextureMinMax(r, context->TEX0, context->CLAMP, gd.sel.ltf);
const GSTextureCacheSW::Texture* t = m_tc->Lookup(context->TEX0, env.TEXA, r);
if(t == NULL) {ASSERT(0); return;}
gd.tex[0] = t->m_buff;
gd.sel.tw = t->m_tw - 3;
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0)
{
// skip per pixel division if q is constant
GSVertexSW* v = m_vertices;
if(m_vt.m_eq.q)
{
gd.sel.fst = 1;
if(v[0].t.z != 1.0f)
{
GSVector4 w = v[0].t.zzzz().rcpnr();
for(int i = 0, j = m_count; i < j; i++)
{
v[i].t *= w;
}
}
}
else if(primclass == GS_SPRITE_CLASS)
{
gd.sel.fst = 1;
for(int i = 0, j = m_count; i < j; i += 2)
{
GSVector4 w = v[i + 1].t.zzzz().rcpnr();
v[i + 0].t *= w;
v[i + 1].t *= w;
}
}
}
GIFRegTEX0 MIP_TEX0 = context->TEX0;
GIFRegCLAMP MIP_CLAMP = context->CLAMP;
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
{
gd.sel.mipmap = 1; // TODO: pass mmin here and store mxl to m_global for clamping the lod
//gd.sel.ltf = context->TEX1.MMIN & 1; // TODO: mmag != (mmin & 1) && lod <= 0
gd.sel.mmin = context->TEX1.MMIN >> 1;
gd.sel.lcm = context->TEX1.LCM;
int mxl = (std::min<int>((int)context->TEX1.MXL, 6) << 16) - 1;
int k = context->TEX1.K << 12;
gd.mxl = GSVector4((float)mxl);
gd.l = GSVector4((float)(-0x10000 << context->TEX1.L));
gd.k = GSVector4((float)(0x1000 * context->TEX1.K));
gd.k = GSVector4((float)k);
// the rest is fake, should be removed later
int level = (int)(m_vt.m_lod.x + 0.5f);
level = std::min<int>(level, context->TEX1.MXL);
level = std::min<int>(level, 6);
if(level > 0)
if(gd.sel.lcm)
{
// printf("lvl %d\n", level);
int lod = std::min<int>(k, mxl);
switch(level)
gd.lod.i = GSVector4i(lod >> 16);
gd.lod.f = GSVector4i(lod & 0xffff).xxxxl().xxzz();
// TODO: lot to optimize when lod is constant
}
GIFRegTEX0 MIP_TEX0 = context->TEX0;
GIFRegCLAMP MIP_CLAMP = context->CLAMP;
GSVector4 tmin = m_vt.m_min.t;
GSVector4 tmax = m_vt.m_max.t;
//static int s_counter = 0;
//t->Save(format("c:/temp1/%08d_%05x_0.bmp", s_counter, context->TEX0.TBP0));
for(int i = 1, j = std::min<int>((int)context->TEX1.MXL, 6); i <= j; i++)
{
switch(i)
{
case 1:
MIP_TEX0.TBP0 = context->MIPTBP1.TBP1;
@ -422,94 +416,84 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
__assume(0);
}
ASSERT(MIP_TEX0.TBP0 != 0 && MIP_TEX0.TBW != 0);
int tw = (int)MIP_TEX0.TW - level;
int th = (int)MIP_TEX0.TH - level;
if(MIP_TEX0.TW > 0) MIP_TEX0.TW--;
if(MIP_TEX0.TH > 0) MIP_TEX0.TH--;
switch(context->TEX1.MMIN)
{
case 2: case 3: // point (min size 1)
tw = std::max<int>(tw, 0);
th = std::max<int>(th, 0);
break;
case 4: case 5: // linear (min size 8)
tw = std::max<int>(tw, 3);
th = std::max<int>(th, 3);
break;
default:
__assume(0);
}
MIP_CLAMP.MINU >>= 1;
MIP_CLAMP.MINV >>= 1;
MIP_CLAMP.MAXU >>= 1;
MIP_CLAMP.MAXV >>= 1;
// scale down the texture coordinates, including vertex trace
m_vt.m_min.t *= 0.5f;
m_vt.m_max.t *= 0.5f;
GSVector4 scale = GSVector4(1.0f) / GSVector4(1 << ((int)MIP_TEX0.TW - tw), 1 << ((int)MIP_TEX0.TH - th), 1, 1);
GSVector4i r;
GetTextureMinMax(r, MIP_TEX0, MIP_CLAMP, gd.sel.ltf);
const GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, r, gd.sel.tw + 3);
if(t == NULL) {ASSERT(0); return;}
gd.tex[i] = t->m_buff;
gd.tex[i + 1] = NULL;
//t->Save(format("c:/temp1/%08d_%05x_%d.bmp", s_counter, context->TEX0.TBP0, i));
}
//s_counter++;
m_vt.m_min.t = tmin;
m_vt.m_max.t = tmax;
}
else
{
// TODO: these shortcuts are not compatible with mipmapping, yet
if(gd.sel.fst == 0)
{
// skip per pixel division if q is constant
GSVertexSW* v = m_vertices;
for(int i = 0, j = m_count; i < j; i++)
if(m_vt.m_eq.q)
{
v[i].t *= scale;
}
gd.sel.fst = 1;
m_vt.m_min.t *= scale;
m_vt.m_max.t *= scale;
MIP_TEX0.TW = (uint32)tw;
MIP_TEX0.TH = (uint32)th;
// this shift is done even for repeat modes
MIP_CLAMP.MINU >>= level;
MIP_CLAMP.MAXU >>= level;
MIP_CLAMP.MINV >>= level;
MIP_CLAMP.MAXV >>= level;
/*
printf("%d%d%d%d%d L %d K %03x %.2f lod %.2f %.2f q %f %f\n",
m_context->TEX1.MXL,
m_context->TEX1.MMAG,
m_context->TEX1.MMIN,
PRIM->FST,
m_context->TEX1.LCM,
m_context->TEX1.L,
m_context->TEX1.K,
(float)m_context->TEX1.K / 16,
m_context->TEX1.MXL > 0 ? m_vt.m_lod.x : 0,
m_context->TEX1.MXL > 0 ? m_vt.m_lod.y : 0,
1.0f / m_vt.m_min.t.z,
1.0f / m_vt.m_max.t.z);
*/
if(s_dump)
{
uint64 frame = m_perfmon.GetFrame();
string s;
if(s_save && s_n >= s_saven)
if(v[0].t.z != 1.0f)
{
s = format("c:\\temp1\\_%05d_f%lld_tex_%05x_%d_(%d%d%d%d%d %.2f %.2f).bmp",
s_n, frame, (int)MIP_TEX0.TBP0, (int)MIP_TEX0.PSM,
m_context->TEX1.MXL,
m_context->TEX1.MMAG,
m_vt.m_filter.mmag,
m_context->TEX1.MMIN,
m_vt.m_filter.mmin,
m_context->TEX1.MXL > 0 ? m_vt.m_lod.x : 0,
m_context->TEX1.MXL > 0 ? m_vt.m_lod.y : 0
);
GSVector4 w = v[0].t.zzzz().rcpnr();
m_mem.SaveBMP(s, MIP_TEX0.TBP0, MIP_TEX0.TBW, MIP_TEX0.PSM, 1 << MIP_TEX0.TW, 1 << MIP_TEX0.TH);
for(int i = 0, j = m_count; i < j; i++)
{
v[i].t *= w;
}
}
// TODO: q is now destoroyed, but since q is constant we should be able to pre-calc gd.lod and change LCM to 1
}
else if(primclass == GS_SPRITE_CLASS)
{
gd.sel.fst = 1;
for(int i = 0, j = m_count; i < j; i += 2)
{
GSVector4 w = v[i + 1].t.zzzz().rcpnr();
v[i + 0].t *= w;
v[i + 1].t *= w;
}
// TODO: preserve q, or if there only one sprite then see the comment above
}
}
}
if(gd.sel.ltf)
{
if(gd.sel.fst)
if(gd.sel.ltf && gd.sel.fst)
{
// if q is constant we can do the half pel shift for bilinear sampling on the vertices
// TODO: but not when mipmapping is used!!!
GSVector4 half(0x8000, 0x8000);
GSVertexSW* v = m_vertices;
@ -521,68 +505,55 @@ void GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
}
}
GSVector4i r;
uint16 tw = 1u << context->TEX0.TW;
uint16 th = 1u << context->TEX0.TH;
GetTextureMinMax(r, MIP_TEX0, MIP_CLAMP, gd.sel.ltf);
const GSTextureCacheSW::GSTexture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, r);
if(!t) {ASSERT(0); return;}
gd.tex = t->m_buff;
gd.clut = m_mem.m_clut;
gd.sel.tw = t->m_tw - 3;
uint16 tw = (uint16)(1 << MIP_TEX0.TW);
uint16 th = (uint16)(1 << MIP_TEX0.TH);
switch(MIP_CLAMP.WMS)
switch(context->CLAMP.WMS)
{
case CLAMP_REPEAT:
gd.t.min.u16[0] = tw - 1;
gd.t.max.u16[0] = 0;
gd.t.min.u16[0] = gd.t.minmax.u16[0] = tw - 1;
gd.t.max.u16[0] = gd.t.minmax.u16[2] = 0;
gd.t.mask.u32[0] = 0xffffffff;
break;
case CLAMP_CLAMP:
gd.t.min.u16[0] = 0;
gd.t.max.u16[0] = tw - 1;
gd.t.min.u16[0] = gd.t.minmax.u16[0] = 0;
gd.t.max.u16[0] = gd.t.minmax.u16[2] = tw - 1;
gd.t.mask.u32[0] = 0;
break;
case CLAMP_REGION_CLAMP:
gd.t.min.u16[0] = std::min<int>(MIP_CLAMP.MINU, tw - 1);
gd.t.max.u16[0] = std::min<int>(MIP_CLAMP.MAXU, tw - 1);
gd.t.min.u16[0] = gd.t.minmax.u16[0] = std::min<uint16>(context->CLAMP.MINU, tw - 1);
gd.t.max.u16[0] = gd.t.minmax.u16[2] = std::min<uint16>(context->CLAMP.MAXU, tw - 1);
gd.t.mask.u32[0] = 0;
break;
case CLAMP_REGION_REPEAT:
gd.t.min.u16[0] = MIP_CLAMP.MINU;
gd.t.max.u16[0] = MIP_CLAMP.MAXU;
gd.t.min.u16[0] = gd.t.minmax.u16[0] = context->CLAMP.MINU;
gd.t.max.u16[0] = gd.t.minmax.u16[2] = context->CLAMP.MAXU;
gd.t.mask.u32[0] = 0xffffffff;
break;
default:
__assume(0);
}
switch(MIP_CLAMP.WMT)
switch(context->CLAMP.WMT)
{
case CLAMP_REPEAT:
gd.t.min.u16[4] = th - 1;
gd.t.max.u16[4] = 0;
gd.t.min.u16[4] = gd.t.minmax.u16[1] = th - 1;
gd.t.max.u16[4] = gd.t.minmax.u16[3] = 0;
gd.t.mask.u32[2] = 0xffffffff;
break;
case CLAMP_CLAMP:
gd.t.min.u16[4] = 0;
gd.t.max.u16[4] = th - 1;
gd.t.min.u16[4] = gd.t.minmax.u16[1] = 0;
gd.t.max.u16[4] = gd.t.minmax.u16[3] = th - 1;
gd.t.mask.u32[2] = 0;
break;
case CLAMP_REGION_CLAMP:
gd.t.min.u16[4] = std::min<int>(MIP_CLAMP.MINV, th - 1);
gd.t.max.u16[4] = std::min<int>(MIP_CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
gd.t.min.u16[4] = gd.t.minmax.u16[1] = std::min<uint16>(context->CLAMP.MINV, th - 1);
gd.t.max.u16[4] = gd.t.minmax.u16[3] = std::min<uint16>(context->CLAMP.MAXV, th - 1); // ffx anima summon scene, when the anchor appears (th = 256, maxv > 256)
gd.t.mask.u32[2] = 0;
break;
case CLAMP_REGION_REPEAT:
gd.t.min.u16[4] = MIP_CLAMP.MINV;
gd.t.max.u16[4] = MIP_CLAMP.MAXV;
gd.t.min.u16[4] = gd.t.minmax.u16[1] = context->CLAMP.MINV;
gd.t.max.u16[4] = gd.t.minmax.u16[3] = context->CLAMP.MAXV;
gd.t.mask.u32[2] = 0xffffffff;
break;
default:

View File

@ -65,8 +65,8 @@ union GSScanlineSelector
uint32 edge:1; // 47
uint32 tw:3; // 48 (encodes values between 3 -> 10, texture cache makes sure it is at least 3)
uint32 mipmap:1; // 49
uint32 lcm:1; // 50
uint32 lcm:1; // 49
uint32 mmin:2; // 50
};
struct
@ -113,7 +113,7 @@ __aligned(struct, 32) GSScanlineGlobalData // per batch variables, this is like
// - if in the future drawing does not have to be synchronized per batch, the rest of GSRasterizerData should be copied here, too (scissor, prim type, vertices)
void* vm;
const void* tex;
const void* tex[7];
const uint32* clut;
const GSVector4i* dimx;
@ -125,12 +125,14 @@ __aligned(struct, 32) GSScanlineGlobalData // per batch variables, this is like
const GSVector2i* fzbc;
GSVector4i fm, zm;
struct {GSVector4i min, max, mask, invmask;} t; // [u] x 4 [v] x 4
struct {GSVector4i min, max, minmax, mask, invmask;} t; // [u] x 4 [v] x 4
GSVector4i aref;
GSVector4i afix;
GSVector4i frb, fga;
GSVector4 mxl;
GSVector4 k; // TEX1.K * 0x10000
GSVector4 l; // TEX1.L * -0x10000
struct {GSVector4i i, f;} lod; // lcm == 1
};
__aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has its own
@ -144,5 +146,20 @@ __aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has
// these should be stored on stack as normal local variables (no free regs to use, esp cannot be saved to anywhere, and we need an aligned stack)
struct {GSVector4i z, f, s, t, q, rb, ga, zs, zd, uf, vf, cov, lod;} temp;
struct
{
GSVector4i z, f;
GSVector4i s, t, q;
GSVector4i rb, ga;
GSVector4i zs, zd;
GSVector4i uf, vf;
GSVector4i cov;
// mipmapping
struct {GSVector4i i, f;} lod;
GSVector4i uv[2];
GSVector4i uv_minmax[2];
GSVector4i trb, tga;
GSVector4i test;
} temp;
};

View File

@ -172,7 +172,7 @@ void GSSetupPrimCodeGenerator::Texture()
{
// m_local.d4.st = GSVector4i(t * 4.0f);
if(m_sel.mipmap && !m_sel.lcm)
if(m_sel.mmin && !m_sel.lcm)
{
vmovhps(ptr[&m_local.d4.stq.z], xmm1);
}
@ -188,7 +188,7 @@ void GSSetupPrimCodeGenerator::Texture()
vmovaps(ptr[&m_local.d4.stq], xmm1);
}
for(int j = 0, k = m_sel.fst && !(m_sel.mipmap && !m_sel.lcm) ? 2 : 3; j < k; j++)
for(int j = 0, k = m_sel.fst && !(m_sel.mmin && !m_sel.lcm) ? 2 : 3; j < k; j++)
{
// GSVector4 ds = t.xxxx();
// GSVector4 dt = t.yyyy();
@ -202,7 +202,7 @@ void GSSetupPrimCodeGenerator::Texture()
vmulps(xmm2, xmm1, Xmm(4 + i));
if(m_sel.fst && !(m_sel.mipmap && !m_sel.lcm))
if(m_sel.fst && !(m_sel.mmin && !m_sel.lcm))
{
// m_local.d[i].s/t = GSVector4i(v);

View File

@ -178,7 +178,7 @@ void GSSetupPrimCodeGenerator::Texture()
{
// m_local.d4.st = GSVector4i(t * 4.0f);
if(m_sel.mipmap && !m_sel.lcm)
if(m_sel.mmin && !m_sel.lcm)
{
movhps(ptr[&m_local.d4.stq.z], xmm1);
}
@ -194,7 +194,7 @@ void GSSetupPrimCodeGenerator::Texture()
movaps(ptr[&m_local.d4.stq], xmm1);
}
for(int j = 0, k = m_sel.fst && !(m_sel.mipmap && !m_sel.lcm) ? 2 : 3; j < k; j++)
for(int j = 0, k = m_sel.fst && !(m_sel.mmin && !m_sel.lcm) ? 2 : 3; j < k; j++)
{
// GSVector4 ds = t.xxxx();
// GSVector4 dt = t.yyyy();
@ -210,7 +210,7 @@ void GSSetupPrimCodeGenerator::Texture()
movaps(xmm2, xmm1);
mulps(xmm2, Xmm(4 + i));
if(m_sel.fst && !(m_sel.mipmap && !m_sel.lcm))
if(m_sel.fst && !(m_sel.mmin && !m_sel.lcm))
{
// m_local.d[i].s/t = GSVector4i(v);

View File

@ -33,17 +33,17 @@ GSTextureCacheSW::~GSTextureCacheSW()
RemoveAll();
}
const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r)
const GSTextureCacheSW::Texture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, uint32 tw0)
{
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
GSTexture* t = NULL;
Texture* t = NULL;
list<GSTexture*>& m = m_map[TEX0.TBP0 >> 5];
list<Texture*>& m = m_map[TEX0.TBP0 >> 5];
for(list<GSTexture*>::iterator i = m.begin(); i != m.end(); i++)
for(list<Texture*>::iterator i = m.begin(); i != m.end(); i++)
{
GSTexture* t2 = *i;
Texture* t2 = *i;
if(((TEX0.u32[0] ^ t2->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ t2->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
{
@ -55,6 +55,11 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
continue;
}
if(tw0 != 0 && t2->m_tw != tw0)
{
continue;
}
m.splice(m.begin(), m, i);
t = t2;
@ -68,7 +73,7 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
{
const GSOffset* o = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
t = new GSTexture(m_state, o);
t = new Texture(m_state, o, tw0);
m_textures.insert(t);
@ -98,7 +103,7 @@ const GSTextureCacheSW::GSTexture* GSTextureCacheSW::Lookup(const GIFRegTEX0& TE
{
m_pages[i] = 0;
list<GSTexture*>* m = &m_map[i << 5];
list<Texture*>* m = &m_map[i << 5];
unsigned long j;
@ -144,11 +149,11 @@ void GSTextureCacheSW::InvalidateVideoMem(const GSOffset* o, const GSVector4i& r
if(page < MAX_PAGES)
{
const list<GSTexture*>& map = m_map[page];
const list<Texture*>& map = m_map[page];
for(list<GSTexture*>::const_iterator i = map.begin(); i != map.end(); i++)
for(list<Texture*>::const_iterator i = map.begin(); i != map.end(); i++)
{
GSTexture* t = *i;
Texture* t = *i;
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
{
@ -173,17 +178,17 @@ void GSTextureCacheSW::RemoveAll()
}
}
void GSTextureCacheSW::RemoveAt(GSTexture* t)
void GSTextureCacheSW::RemoveAt(Texture* t)
{
m_textures.erase(t);
for(uint32 start = t->m_TEX0.TBP0 >> 5, end = countof(m_map) - 1; start <= end; start++)
{
list<GSTexture*>& m = m_map[start];
list<Texture*>& m = m_map[start];
for(list<GSTexture*>::iterator i = m.begin(); i != m.end(); )
for(list<Texture*>::iterator i = m.begin(); i != m.end(); )
{
list<GSTexture*>::iterator j = i++;
list<Texture*>::iterator j = i++;
if(*j == t) {m.erase(j); break;}
}
@ -194,11 +199,11 @@ void GSTextureCacheSW::RemoveAt(GSTexture* t)
void GSTextureCacheSW::IncAge()
{
for(hash_set<GSTexture*>::iterator i = m_textures.begin(); i != m_textures.end(); )
for(hash_set<Texture*>::iterator i = m_textures.begin(); i != m_textures.end(); )
{
hash_set<GSTexture*>::iterator j = i++;
hash_set<Texture*>::iterator j = i++;
GSTexture* t = *j;
Texture* t = *j;
if(++t->m_age > 30)
{
@ -209,18 +214,18 @@ void GSTextureCacheSW::IncAge()
//
GSTextureCacheSW::GSTexture::GSTexture(GSState* state, const GSOffset* offset)
GSTextureCacheSW::Texture::Texture(GSState* state, const GSOffset* offset, uint32 tw0)
: m_state(state)
, m_offset(offset)
, m_buff(NULL)
, m_tw(0)
, m_tw(tw0)
, m_age(0)
, m_complete(false)
{
memset(m_valid, 0, sizeof(m_valid));
}
GSTextureCacheSW::GSTexture::~GSTexture()
GSTextureCacheSW::Texture::~Texture()
{
if(m_buff)
{
@ -228,7 +233,7 @@ GSTextureCacheSW::GSTexture::~GSTexture()
}
}
bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect)
bool GSTextureCacheSW::Texture::Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& rect)
{
if(m_complete)
{
@ -255,7 +260,7 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
{
// FIXME:
// - marking a block prevents fetching it again to a different part of the texture
// - only a real issue for TBW = 1 mipmap levels, where the repeating part is below and often exploited
// - only a real issue for TBW = 1 mipmap levels, where the repeating part is below and often exploited (onimusha 3 intro / sidewalk)
r = GSVector4i(0, 0, tw, th);
}
@ -269,18 +274,25 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
if(m_buff == NULL)
{
m_buff = _aligned_malloc(tw * th * sizeof(uint32), 32);
uint32 tw0 = std::max<int>(TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
if(m_tw == 0)
{
m_tw = tw0;
}
else
{
ASSERT(m_tw >= tw0);
}
uint32 pitch = (1 << m_tw) << shift;
m_buff = _aligned_malloc(pitch * th * 4, 32);
if(m_buff == NULL)
{
return false;
}
#ifdef DEBUG
for(uint32 i = 0, j = tw * th * sizeof(uint8); i < j; i++) ((uint8*)m_buff)[i] = 0xff;
#endif
m_tw = std::max<int>(TEX0.TW, 5 - shift); // makes one row 32 bytes at least, matches the smallest block size that is allocated above for m_buff
}
GSLocalMemory& mem = m_state->m_mem;
@ -351,3 +363,46 @@ bool GSTextureCacheSW::GSTexture::Update(const GIFRegTEX0& TEX0, const GIFRegTEX
return true;
}
#include "GSTextureSW.h"
bool GSTextureCacheSW::Texture::Save(const string& fn, bool dds) const
{
const uint32* RESTRICT clut = m_state->m_mem.m_clut;
int w = 1 << m_TEX0.TW;
int h = 1 << m_TEX0.TH;
GSTextureSW t(0, w, h);
GSTexture::GSMap m;
if(t.Map(m, NULL))
{
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_TEX0.PSM];
const uint8* RESTRICT src = (uint8*)m_buff;
int pitch = 1 << (m_tw + (psm.pal == 0 ? 2 : 0));
for(int j = 0; j < h; j++, src += pitch, m.bits += m.pitch)
{
if(psm.pal == 0)
{
memcpy(m.bits, src, sizeof(uint32) * w);
}
else
{
for(int i = 0; i < w; i++)
{
((uint32*)m.bits)[i] = clut[((uint8*)src)[i]];
}
}
}
t.Unmap();
return t.Save(fn.c_str());
}
return false;
}

View File

@ -26,7 +26,7 @@
class GSTextureCacheSW
{
public:
class GSTexture
class Texture
{
public:
GSState* m_state;
@ -39,27 +39,28 @@ public:
uint32 m_age;
bool m_complete;
explicit GSTexture(GSState* state, const GSOffset* offset);
virtual ~GSTexture();
explicit Texture(GSState* state, const GSOffset* offset, uint32 tw0);
virtual ~Texture();
bool Update(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
bool Save(const string& fn, bool dds = false) const;
};
protected:
GSState* m_state;
hash_set<GSTexture*> m_textures;
list<GSTexture*> m_map[MAX_PAGES];
hash_set<Texture*> m_textures;
list<Texture*> m_map[MAX_PAGES];
uint32 m_pages[16];
public:
GSTextureCacheSW(GSState* state);
virtual ~GSTextureCacheSW();
const GSTexture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
const Texture* Lookup(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, uint32 tw0 = 0);
void InvalidateVideoMem(const GSOffset* o, const GSVector4i& r);
void RemoveAll();
void RemoveAt(GSTexture* t);
void RemoveAt(Texture* t);
void IncAge();
};

View File

@ -2500,6 +2500,9 @@ public:
{
// NOTE: sign bit ignored, safe to pass negative numbers
// The idea behind this algorithm is to split the float into two parts, log2(m * 2^e) => log2(m) + log2(2^e) => log2(m) + e,
// and then approximate the logarithm of the mantissa (it's 1.x when normalized, a nice short range).
GSVector4 one(1.0f);
GSVector4i i = GSVector4i::cast(*this);

View File

@ -613,9 +613,7 @@
<ClCompile Include="GSThread.cpp" />
<ClCompile Include="GSUtil.cpp" />
<ClCompile Include="GSVector.cpp">
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">AssemblyAndSourceCode</AssemblerOutput>
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">AssemblyAndSourceCode</AssemblerOutput>
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">AssemblyAndSourceCode</AssemblerOutput>
</ClCompile>
<ClCompile Include="GSVertexList.cpp" />
<ClCompile Include="GSVertexSW.cpp" />