GSdx: moved around some code and optimized texture caching a bit, there may be a slight speed-up in hw mode for those games that use many textures.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1425 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-06-23 04:12:32 +00:00
parent bc5c78f124
commit b284fae2d5
34 changed files with 1667 additions and 1521 deletions

View File

@ -73,6 +73,11 @@ EXPORT_C GSsetBaseMem(uint8* mem)
EXPORT_C_(INT32) GSinit()
{
if(!GSUtil::CheckSSE())
{
return -1;
}
return 0;
}
@ -113,11 +118,6 @@ static INT32 GSopen(void* dsp, char* title, int mt, int renderer)
#endif
if(!GSUtil::CheckSSE())
{
return -1;
}
switch(renderer)
{
default:

View File

@ -81,6 +81,7 @@ public:
virtual void Flip() {}
virtual void BeginScene() {}
virtual void DrawPrimitive() {};
virtual void EndScene() {}
virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {}

View File

@ -302,7 +302,7 @@ GSTexture* GSDevice10::Create(int type, int w, int h, int format)
desc.BindFlags = D3D10_BIND_RENDER_TARGET | D3D10_BIND_SHADER_RESOURCE;
break;
case GSTexture::DepthStencil:
desc.BindFlags = D3D10_BIND_DEPTH_STENCIL;
desc.BindFlags = D3D10_BIND_DEPTH_STENCIL;// | D3D10_BIND_SHADER_RESOURCE;
break;
case GSTexture::Texture:
desc.BindFlags = D3D10_BIND_SHADER_RESOURCE;
@ -344,7 +344,7 @@ GSTexture* GSDevice10::CreateRenderTarget(int w, int h, int format)
GSTexture* GSDevice10::CreateDepthStencil(int w, int h, int format)
{
return __super::CreateDepthStencil(w, h, format ? format : DXGI_FORMAT_D32_FLOAT_S8X24_UINT);
return __super::CreateDepthStencil(w, h, format ? format : DXGI_FORMAT_D32_FLOAT_S8X24_UINT); // DXGI_FORMAT_R32G8X24_TYPELESS
}
GSTexture* GSDevice10::CreateTexture(int w, int h, int format)

View File

@ -426,3 +426,314 @@ void GSRenderer::KeyEvent(GSKeyEventData* e)
}
}
void GSRenderer::GetTextureMinMax(GSVector4i& r)
{
const GSDrawingContext* context = m_context;
int tw = context->TEX0.TW;
int th = context->TEX0.TH;
int w = 1 << tw;
int h = 1 << th;
GSVector4i tr(0, 0, w, h);
int wms = context->CLAMP.WMS;
int wmt = context->CLAMP.WMT;
int minu = (int)context->CLAMP.MINU;
int minv = (int)context->CLAMP.MINV;
int maxu = (int)context->CLAMP.MAXU;
int maxv = (int)context->CLAMP.MAXV;
GSVector4i vr = tr;
switch(wms)
{
case CLAMP_REPEAT:
break;
case CLAMP_CLAMP:
break;
case CLAMP_REGION_CLAMP:
if(vr.x < minu) vr.x = minu;
if(vr.z > maxu + 1) vr.z = maxu + 1;
break;
case CLAMP_REGION_REPEAT:
vr.x = maxu;
vr.z = vr.x + (minu + 1);
break;
default:
__assume(0);
}
switch(wmt)
{
case CLAMP_REPEAT:
break;
case CLAMP_CLAMP:
break;
case CLAMP_REGION_CLAMP:
if(vr.y < minv) vr.y = minv;
if(vr.w > maxv + 1) vr.w = maxv + 1;
break;
case CLAMP_REGION_REPEAT:
vr.y = maxv;
vr.w = vr.y + (minv + 1);
break;
default:
__assume(0);
}
if(wms + wmt < 6)
{
GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t);
if(context->TEX1.IsLinear())
{
st += GSVector4(-0x8000, 0x8000).xxyy();
}
GSVector4i uv = GSVector4i(st).sra32(16);
GSVector4i u, v;
int mask = 0;
if(wms == CLAMP_REPEAT || wmt == CLAMP_REPEAT)
{
u = uv & GSVector4i::xffffffff().srl32(32 - tw);
v = uv & GSVector4i::xffffffff().srl32(32 - th);
GSVector4i uu = uv.sra32(tw);
GSVector4i vv = uv.sra32(th);
mask = (uu.upl32(vv) == uu.uph32(vv)).mask();
}
uv = uv.rintersect(tr);
switch(wms)
{
case CLAMP_REPEAT:
if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if(vr.x < uv.x) vr.x = uv.x;
if(vr.z > uv.z + 1) vr.z = uv.z + 1;
break;
case CLAMP_REGION_REPEAT: // TODO
break;
default:
__assume(0);
}
switch(wmt)
{
case CLAMP_REPEAT:
if(mask & 0xf000) {if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1;}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if(vr.y < uv.y) vr.y = uv.y;
if(vr.w > uv.w + 1) vr.w = uv.w + 1;
break;
case CLAMP_REGION_REPEAT: // TODO
break;
default:
__assume(0);
}
}
GSVector2i bs = GSLocalMemory::m_psm[context->TEX0.PSM].bs;
r = vr.ralign<GSVector4i::Outside>(bs).rintersect(tr);
}
void GSRenderer::GetAlphaMinMax()
{
if(m_vt.m_alpha.valid)
{
return;
}
const GSDrawingEnvironment& env = m_env;
const GSDrawingContext* context = m_context;
GSVector4i a = m_vt.m_min.c.uph32(m_vt.m_max.c).zzww();
if(PRIM->TME && context->TEX0.TCC)
{
uint32 bpp = GSLocalMemory::m_psm[context->TEX0.PSM].trbpp;
uint32 cbpp = GSLocalMemory::m_psm[context->TEX0.CPSM].trbpp;
uint32 pal = GSLocalMemory::m_psm[context->TEX0.PSM].pal;
if(bpp == 32)
{
a.y = 0;
a.w = 0xff;
}
else if(bpp == 24)
{
a.y = env.TEXA.AEM ? 0 : env.TEXA.TA0;
a.w = env.TEXA.TA0;
}
else if(bpp == 16)
{
a.y = env.TEXA.AEM ? 0 : min(env.TEXA.TA0, env.TEXA.TA1);
a.w = max(env.TEXA.TA0, env.TEXA.TA1);
}
else
{
m_mem.m_clut.GetAlphaMinMax32(a.y, a.w);
}
switch(context->TEX0.TFX)
{
case TFX_MODULATE:
a.x = (a.x * a.y) >> 7;
a.z = (a.z * a.w) >> 7;
if(a.x > 0xff) a.x = 0xff;
if(a.z > 0xff) a.z = 0xff;
break;
case TFX_DECAL:
a.x = a.y;
a.z = a.w;
break;
case TFX_HIGHLIGHT:
a.x = a.x + a.y;
a.z = a.z + a.w;
if(a.x > 0xff) a.x = 0xff;
if(a.z > 0xff) a.z = 0xff;
break;
case TFX_HIGHLIGHT2:
a.x = a.y;
a.z = a.w;
break;
default:
__assume(0);
}
}
m_vt.m_alpha.min = a.x;
m_vt.m_alpha.max = a.z;
m_vt.m_alpha.valid = true;
}
bool GSRenderer::TryAlphaTest(uint32& fm, uint32& zm)
{
const GSDrawingContext* context = m_context;
bool pass = true;
if(context->TEST.ATST == ATST_NEVER)
{
pass = false;
}
else if(context->TEST.ATST != ATST_ALWAYS)
{
GetAlphaMinMax();
int amin = m_vt.m_alpha.min;
int amax = m_vt.m_alpha.max;
int aref = context->TEST.AREF;
switch(context->TEST.ATST)
{
case ATST_NEVER:
pass = false;
break;
case ATST_ALWAYS:
pass = true;
break;
case ATST_LESS:
if(amax < aref) pass = true;
else if(amin >= aref) pass = false;
else return false;
break;
case ATST_LEQUAL:
if(amax <= aref) pass = true;
else if(amin > aref) pass = false;
else return false;
break;
case ATST_EQUAL:
if(amin == aref && amax == aref) pass = true;
else if(amin > aref || amax < aref) pass = false;
else return false;
break;
case ATST_GEQUAL:
if(amin >= aref) pass = true;
else if(amax < aref) pass = false;
else return false;
break;
case ATST_GREATER:
if(amin > aref) pass = true;
else if(amax <= aref) pass = false;
else return false;
break;
case ATST_NOTEQUAL:
if(amin == aref && amax == aref) pass = false;
else if(amin > aref || amax < aref) pass = true;
else return false;
break;
default:
__assume(0);
}
}
if(!pass)
{
switch(context->TEST.AFAIL)
{
case AFAIL_KEEP: fm = zm = 0xffffffff; break;
case AFAIL_FB_ONLY: zm = 0xffffffff; break;
case AFAIL_ZB_ONLY: fm = 0xffffffff; break;
case AFAIL_RGB_ONLY: fm |= 0xff000000; zm = 0xffffffff; break;
default: __assume(0);
}
}
return true;
}
bool GSRenderer::IsOpaque()
{
if(PRIM->AA1)
{
return false;
}
if(!PRIM->ABE)
{
return true;
}
const GSDrawingContext* context = m_context;
int amin = 0, amax = 0xff;
if(context->ALPHA.A != context->ALPHA.B)
{
if(context->ALPHA.C == 0)
{
GetAlphaMinMax();
amin = m_vt.m_alpha.min;
amax = m_vt.m_alpha.max;
}
else if(context->ALPHA.C == 1)
{
if(context->FRAME.PSM == PSM_PSMCT24 || context->FRAME.PSM == PSM_PSMZ24)
{
amin = amax = 0x80;
}
}
else if(context->ALPHA.C == 1)
{
amin = amax = context->ALPHA.FIX;
}
}
return context->ALPHA.IsOpaque(amin, amax);
}

View File

@ -24,6 +24,7 @@
#include "GSdx.h"
#include "GSWnd.h"
#include "GSState.h"
#include "GSVertexTrace.h"
#include "GSVertexList.h"
#include "GSSettingsDlg.h"
#include "GSCapture.h"
@ -48,6 +49,15 @@ protected:
virtual void ResetDevice() {}
virtual GSTexture* GetOutput(int i) = 0;
GSVertexTrace m_vt;
// following functions need m_vt to be initialized
void GetTextureMinMax(GSVector4i& r);
void GetAlphaMinMax();
bool TryAlphaTest(uint32& fm, uint32& zm);
bool IsOpaque();
public:
GSWnd m_wnd;
GSDevice* m_dev;
@ -66,11 +76,6 @@ public:
virtual bool MakeSnapshot(const string& path);
virtual void KeyEvent(GSKeyEventData* e);
virtual void MinMaxUV(int w, int h, GSVector4i& r)
{
r = GSVector4i(0, 0, w, h);
}
virtual bool CanUpscale()
{
return !m_nativeres;

View File

@ -46,176 +46,6 @@ protected:
__super::Reset();
}
void MinMaxUV(int w, int h, GSVector4i& r)
{
int wms = m_context->CLAMP.WMS;
int wmt = m_context->CLAMP.WMT;
int minu = (int)m_context->CLAMP.MINU;
int minv = (int)m_context->CLAMP.MINV;
int maxu = (int)m_context->CLAMP.MAXU;
int maxv = (int)m_context->CLAMP.MAXV;
GSVector4i vr = GSVector4i(0, 0, w, h);
GSVector4i wm[3];
if(wms + wmt < 6)
{
GSVector4 mm;
if(m_count < 100)
{
Vertex* v = m_vertices;
GSVector4 minv(+1e10f);
GSVector4 maxv(-1e10f);
int i = 0;
if(PRIM->FST)
{
for(int j = m_count - 3; i < j; i += 4)
{
GSVector4 v0 = v[i + 0].vf[0];
GSVector4 v1 = v[i + 1].vf[0];
GSVector4 v2 = v[i + 2].vf[0];
GSVector4 v3 = v[i + 3].vf[0];
minv = minv.minv((v0.minv(v1)).minv(v2.minv(v3)));
maxv = maxv.maxv((v0.maxv(v1)).maxv(v2.maxv(v3)));
}
for(int j = m_count; i < j; i++)
{
GSVector4 v0 = v[i + 0].vf[0];
minv = minv.minv(v0);
maxv = maxv.maxv(v0);
}
mm = minv.xyxy(maxv) * GSVector4(16 << m_context->TEX0.TW, 16 << m_context->TEX0.TH).xyxy().rcpnr();
}
else
{
/*
for(int j = m_count - 3; i < j; i += 4)
{
GSVector4 v0 = GSVector4(v[i + 0].m128[0]) / GSVector4(v[i + 0].GetQ());
GSVector4 v1 = GSVector4(v[i + 1].m128[0]) / GSVector4(v[i + 1].GetQ());
GSVector4 v2 = GSVector4(v[i + 2].m128[0]) / GSVector4(v[i + 2].GetQ());
GSVector4 v3 = GSVector4(v[i + 3].m128[0]) / GSVector4(v[i + 3].GetQ());
minv = minv.minv((v0.minv(v1)).minv(v2.minv(v3)));
maxv = maxv.maxv((v0.maxv(v1)).maxv(v2.maxv(v3)));
}
for(int j = m_count; i < j; i++)
{
GSVector4 v0 = GSVector4(v[i + 0].m128[0]) / GSVector4(v[i + 0].GetQ());;
minv = minv.minv(v0);
maxv = maxv.maxv(v0);
}
mm = minv.xyxy(maxv);
*/
// just can't beat the compiler generated scalar sse code with packed div or rcp
mm.x = mm.y = +1e10;
mm.z = mm.w = -1e10;
for(int j = m_count; i < j; i++)
{
float w = 1.0f / v[i].GetQ();
float x = v[i].t.x * w;
if(x < mm.x) mm.x = x;
if(x > mm.z) mm.z = x;
float y = v[i].t.y * w;
if(y < mm.y) mm.y = y;
if(y > mm.w) mm.w = y;
}
}
}
else
{
mm = GSVector4(0.0f, 0.0f, 1.0f, 1.0f);
}
GSVector4 v0 = GSVector4(vr);
GSVector4 v1 = v0.zwzw();
GSVector4 mmf = mm.floor();
GSVector4 mask = mmf.xyxy() == mmf.zwzw();
wm[0] = GSVector4i(v0.blend8((mm - mmf) * v1, mask));
mm *= v1;
wm[1] = GSVector4i(mm.sat(GSVector4::zero(), v1));
wm[2] = GSVector4i(mm.sat(GSVector4(minu, minv, maxu, maxv)));
}
GSVector4i v;
switch(wms)
{
case CLAMP_REPEAT:
v = wm[0];
if(v.x == 0 && v.z != w) v.z = w; // FIXME
vr.x = v.x;
vr.z = v.z;
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
v = wm[wms];
if(v.x > v.z) v.x = v.z;
vr.x = v.x;
vr.z = v.z;
break;
case CLAMP_REGION_REPEAT:
vr.x = maxu;
vr.z = vr.x + (minu + 1);
break;
default:
__assume(0);
}
switch(wmt)
{
case CLAMP_REPEAT:
v = wm[0];
if(v.y == 0 && v.w != h) v.w = h; // FIXME
vr.y = v.y;
vr.w = v.w;
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
v = wm[wmt];
if(v.y > v.w) v.y = v.w;
vr.y = v.y;
vr.w = v.w;
break;
case CLAMP_REGION_REPEAT:
vr.y = maxv;
vr.w = vr.y + (minv + 1);
break;
default:
__assume(0);
}
r = vr + GSVector4i(-1, -1, 1, 1); // one more pixel because of bilinear filtering
GSVector2i bs = GSLocalMemory::m_psm[m_context->TEX0.PSM].bs;
r = r.ralign<GSVector4i::Outside>(bs).rintersect(GSVector4i(0, 0, w, h));
}
void VSync(int field)
{
__super::VSync(field);
@ -285,10 +115,9 @@ protected:
void Draw()
{
if(IsBadFrame(m_skip))
{
return;
}
if(IsBadFrame(m_skip)) return;
m_vt.Update(m_vertices, m_count, GSUtil::GetPrimClass(PRIM->PRIM), PRIM, m_context);
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
@ -311,7 +140,13 @@ protected:
if(PRIM->TME)
{
tex = m_tc->GetTexture();
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
GSVector4i r;
GetTextureMinMax(r);
tex = m_tc->GetTexture(r);
if(!tex) return;
}
@ -366,7 +201,37 @@ protected:
return;
}
Draw(prim, rt->m_texture, ds->m_texture, tex);
// skip alpha test if possible
GIFRegTEST TEST = context->TEST;
GIFRegFRAME FRAME = context->FRAME;
GIFRegZBUF ZBUF = context->ZBUF;
uint32 fm = context->FRAME.FBMSK;
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS)
{
if(TryAlphaTest(fm, zm))
{
context->TEST.ATE = 0;
}
}
context->FRAME.FBMSK = fm;
context->ZBUF.ZMSK = zm != 0;
//
Draw(GSUtil::GetPrimClass(prim), rt->m_texture, ds->m_texture, tex);
//
context->TEST = TEST;
context->FRAME = FRAME;
context->ZBUF = ZBUF;
//
OverrideOutput();
@ -396,7 +261,7 @@ protected:
m_tc->InvalidateTextures(context->FRAME, context->ZBUF);
}
virtual void Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex) = 0;
virtual void Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex) = 0;
virtual bool OverrideInput(int& prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* t)
{

View File

@ -169,44 +169,26 @@ void GSRendererHW10::VertexKick(bool skip)
}
}
void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex)
void GSRendererHW10::Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex)
{
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
/*
if(s_dump)
{
TRACE(_T("\n"));
TRACE(_T("PRIM = %d, ZMSK = %d, ZTE = %d, ZTST = %d, ATE = %d, ATST = %d, AFAIL = %d, AREF = %02x\n"),
PRIM->PRIM, context->ZBUF.ZMSK,
context->TEST.ZTE, context->TEST.ZTST,
context->TEST.ATE, context->TEST.ATST, context->TEST.AFAIL, context->TEST.AREF);
for(int i = 0; i < m_count; i++)
{
TRACE(_T("[%d] %3.0f %3.0f %3.0f %3.0f\n"), i, (float)m_vertices[i].p.x / 16, (float)m_vertices[i].p.y / 16, (float)m_vertices[i].p.z, (float)m_vertices[i].a);
}
}
*/
D3D10_PRIMITIVE_TOPOLOGY topology;
int prims = 0;
switch(prim)
switch(primclass)
{
case GS_POINTLIST:
case GS_POINT_CLASS:
topology = D3D10_PRIMITIVE_TOPOLOGY_POINTLIST;
prims = m_count;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
topology = D3D10_PRIMITIVE_TOPOLOGY_LINELIST;
prims = m_count / 2;
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_TRIANGLE_CLASS:
topology = D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
prims = m_count / 3;
break;
@ -227,16 +209,16 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
// om
GSTextureFX10::OMDepthStencilSelector om_dssel;
GSTextureFX::OMDepthStencilSelector om_dssel;
om_dssel.zte = context->TEST.ZTE;
om_dssel.ztst = context->TEST.ZTST;
om_dssel.zwe = !context->ZBUF.ZMSK;
om_dssel.date = context->FRAME.PSM != PSM_PSMCT24 ? context->TEST.DATE : 0;
GSTextureFX10::OMBlendSelector om_bsel;
GSTextureFX::OMBlendSelector om_bsel;
om_bsel.abe = PRIM->ABE || (prim == 1 || prim == 2) && PRIM->AA1;
om_bsel.abe = !IsOpaque();
om_bsel.a = context->ALPHA.A;
om_bsel.b = context->ALPHA.B;
om_bsel.c = context->ALPHA.C;
@ -250,34 +232,38 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
// vs
GSTextureFX10::VSSelector vs_sel;
GSTextureFX::VSSelector vs_sel;
vs_sel.bppz = 0;
vs_sel.tme = PRIM->TME;
vs_sel.fst = PRIM->FST;
vs_sel.prim = prim;
vs_sel.prim = primclass;
if(om_dssel.zte && om_dssel.ztst > 0 && om_dssel.zwe)
{
if(context->ZBUF.PSM == PSM_PSMZ24)
{
if(WrapZ(0xffffff))
if(m_vt.m_max.p.z > 0xffffff)
{
ASSERT(m_vt.m_min.p.z > 0xffffff);
vs_sel.bppz = 1;
om_dssel.ztst = 1;
}
}
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
{
if(WrapZ(0xffff))
if(m_vt.m_max.p.z > 0xffff)
{
ASSERT(m_vt.m_min.p.z > 0xffff);
vs_sel.bppz = 2;
om_dssel.ztst = 1;
}
}
}
GSTextureFX10::VSConstantBuffer vs_cb;
GSTextureFX::VSConstantBuffer vs_cb;
float sx = 2.0f * rt->m_scale.x / (rt->GetWidth() * 16);
float sy = 2.0f * rt->m_scale.y / (rt->GetHeight() * 16);
@ -298,14 +284,14 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
// gs
GSTextureFX10::GSSelector gs_sel;
GSTextureFX::GSSelector gs_sel;
gs_sel.iip = PRIM->IIP;
gs_sel.prim = GSUtil::GetPrimClass(prim);
gs_sel.prim = primclass;
// ps
GSTextureFX10::PSSelector ps_sel;
GSTextureFX::PSSelector ps_sel;
ps_sel.fst = PRIM->FST;
ps_sel.wms = context->CLAMP.WMS;
@ -322,21 +308,21 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
ps_sel.ltf = m_filter == 2 ? context->TEX1.IsLinear() : m_filter;
GSTextureFX10::PSSamplerSelector ps_ssel;
GSTextureFX::PSSamplerSelector ps_ssel;
ps_ssel.tau = 0;
ps_ssel.tav = 0;
ps_ssel.ltf = ps_sel.ltf;
GSTextureFX10::PSConstantBuffer ps_cb;
GSTextureFX::PSConstantBuffer ps_cb;
ps_cb.FogColor_AREF = GSVector4((int)env.FOGCOL.FCR, (int)env.FOGCOL.FCG, (int)env.FOGCOL.FCB, (int)context->TEST.AREF) / 255;
if(context->TEST.ATST == 2 || context->TEST.ATST == 5)
if(ps_sel.atst == 2 || ps_sel.atst == 5)
{
ps_cb.FogColor_AREF.a -= 0.9f / 255;
}
else if(context->TEST.ATST == 3 || context->TEST.ATST == 6)
else if(ps_sel.atst == 3 || ps_sel.atst == 6)
{
ps_cb.FogColor_AREF.a += 0.9f / 255;
}
@ -424,7 +410,7 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
if(context->TEST.DoFirstPass())
{
m_tfx.Draw();
m_dev->DrawPrimitive();
}
if(context->TEST.DoSecondPass())
@ -462,28 +448,13 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
m_tfx.UpdateOM(om_dssel, om_bsel, bf);
m_tfx.Draw();
m_dev->DrawPrimitive();
}
}
m_dev->EndScene();
}
bool GSRendererHW10::WrapZ(uint32 maxz)
{
// should only run once if z values are in the z buffer range
for(int i = 0, j = m_count; i < j; i++)
{
if(m_vertices[i].p.z <= maxz)
{
return false;
}
}
return true;
}
void GSRendererHW10::SetupDATE(GSTexture* rt, GSTexture* ds)
{
if(!m_context->TEST.DATE) return; // || (::GetAsyncKeyState(VK_CONTROL) & 0x8000)

View File

@ -28,12 +28,10 @@
class GSRendererHW10 : public GSRendererHW<GSVertexHW10>
{
bool WrapZ(uint32 maxz);
protected:
GSTextureFX10 m_tfx;
void Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex);
void Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex);
struct
{

View File

@ -169,44 +169,26 @@ void GSRendererHW11::VertexKick(bool skip)
}
}
void GSRendererHW11::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex)
void GSRendererHW11::Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex)
{
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
/*
if(s_dump)
{
TRACE(_T("\n"));
TRACE(_T("PRIM = %d, ZMSK = %d, ZTE = %d, ZTST = %d, ATE = %d, ATST = %d, AFAIL = %d, AREF = %02x\n"),
PRIM->PRIM, context->ZBUF.ZMSK,
context->TEST.ZTE, context->TEST.ZTST,
context->TEST.ATE, context->TEST.ATST, context->TEST.AFAIL, context->TEST.AREF);
for(int i = 0; i < m_count; i++)
{
TRACE(_T("[%d] %3.0f %3.0f %3.0f %3.0f\n"), i, (float)m_vertices[i].p.x / 16, (float)m_vertices[i].p.y / 16, (float)m_vertices[i].p.z, (float)m_vertices[i].a);
}
}
*/
D3D11_PRIMITIVE_TOPOLOGY topology;
int prims = 0;
switch(prim)
switch(primclass)
{
case GS_POINTLIST:
case GS_POINT_CLASS:
topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
prims = m_count;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
prims = m_count / 2;
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_TRIANGLE_CLASS:
topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
prims = m_count / 3;
break;
@ -227,16 +209,16 @@ void GSRendererHW11::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
// om
GSTextureFX11::OMDepthStencilSelector om_dssel;
GSTextureFX::OMDepthStencilSelector om_dssel;
om_dssel.zte = context->TEST.ZTE;
om_dssel.ztst = context->TEST.ZTST;
om_dssel.zwe = !context->ZBUF.ZMSK;
om_dssel.date = context->FRAME.PSM != PSM_PSMCT24 ? context->TEST.DATE : 0;
GSTextureFX11::OMBlendSelector om_bsel;
GSTextureFX::OMBlendSelector om_bsel;
om_bsel.abe = PRIM->ABE || (prim == 1 || prim == 2) && PRIM->AA1;
om_bsel.abe = !IsOpaque();
om_bsel.a = context->ALPHA.A;
om_bsel.b = context->ALPHA.B;
om_bsel.c = context->ALPHA.C;
@ -250,34 +232,38 @@ void GSRendererHW11::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
// vs
GSTextureFX11::VSSelector vs_sel;
GSTextureFX::VSSelector vs_sel;
vs_sel.bppz = 0;
vs_sel.tme = PRIM->TME;
vs_sel.fst = PRIM->FST;
vs_sel.prim = prim;
vs_sel.prim = primclass;
if(om_dssel.zte && om_dssel.ztst > 0 && om_dssel.zwe)
{
if(context->ZBUF.PSM == PSM_PSMZ24)
{
if(WrapZ(0xffffff))
if(m_vt.m_max.p.z > 0xffffff)
{
ASSERT(m_vt.m_min.p.z > 0xffffff);
vs_sel.bppz = 1;
om_dssel.ztst = 1;
}
}
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
{
if(WrapZ(0xffff))
if(m_vt.m_max.p.z > 0xffff)
{
ASSERT(m_vt.m_min.p.z > 0xffff);
vs_sel.bppz = 2;
om_dssel.ztst = 1;
}
}
}
GSTextureFX11::VSConstantBuffer vs_cb;
GSTextureFX::VSConstantBuffer vs_cb;
float sx = 2.0f * rt->m_scale.x / (rt->GetWidth() * 16);
float sy = 2.0f * rt->m_scale.y / (rt->GetHeight() * 16);
@ -298,14 +284,14 @@ void GSRendererHW11::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
// gs
GSTextureFX11::GSSelector gs_sel;
GSTextureFX::GSSelector gs_sel;
gs_sel.iip = PRIM->IIP;
gs_sel.prim = GSUtil::GetPrimClass(prim);
gs_sel.prim = primclass;
// ps
GSTextureFX11::PSSelector ps_sel;
GSTextureFX::PSSelector ps_sel;
ps_sel.fst = PRIM->FST;
ps_sel.wms = context->CLAMP.WMS;
@ -322,21 +308,21 @@ void GSRendererHW11::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
ps_sel.ltf = m_filter == 2 ? context->TEX1.IsLinear() : m_filter;
GSTextureFX11::PSSamplerSelector ps_ssel;
GSTextureFX::PSSamplerSelector ps_ssel;
ps_ssel.tau = 0;
ps_ssel.tav = 0;
ps_ssel.ltf = ps_sel.ltf;
GSTextureFX11::PSConstantBuffer ps_cb;
GSTextureFX::PSConstantBuffer ps_cb;
ps_cb.FogColor_AREF = GSVector4((int)env.FOGCOL.FCR, (int)env.FOGCOL.FCG, (int)env.FOGCOL.FCB, (int)context->TEST.AREF) / 255;
if(context->TEST.ATST == 2 || context->TEST.ATST == 5)
if(ps_sel.atst == 2 || ps_sel.atst == 5)
{
ps_cb.FogColor_AREF.a -= 0.9f / 255;
}
else if(context->TEST.ATST == 3 || context->TEST.ATST == 6)
else if(ps_sel.atst == 3 || ps_sel.atst == 6)
{
ps_cb.FogColor_AREF.a += 0.9f / 255;
}
@ -424,7 +410,7 @@ void GSRendererHW11::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
if(context->TEST.DoFirstPass())
{
m_tfx.Draw();
m_dev->DrawPrimitive();
}
if(context->TEST.DoSecondPass())
@ -462,28 +448,13 @@ void GSRendererHW11::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
m_tfx.UpdateOM(om_dssel, om_bsel, bf);
m_tfx.Draw();
m_dev->DrawPrimitive();
}
}
m_dev->EndScene();
}
bool GSRendererHW11::WrapZ(uint32 maxz)
{
// should only run once if z values are in the z buffer range
for(int i = 0, j = m_count; i < j; i++)
{
if(m_vertices[i].p.z <= maxz)
{
return false;
}
}
return true;
}
void GSRendererHW11::SetupDATE(GSTexture* rt, GSTexture* ds)
{
if(!m_context->TEST.DATE) return; // || (::GetAsyncKeyState(VK_CONTROL) & 0x8000)

View File

@ -28,12 +28,10 @@
class GSRendererHW11 : public GSRendererHW<GSVertexHW11>
{
bool WrapZ(uint32 maxz);
protected:
GSTextureFX11 m_tfx;
void Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex);
void Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex);
struct
{

View File

@ -171,7 +171,7 @@ void GSRendererHW9::VertexKick(bool skip)
}
}
void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex)
void GSRendererHW9::Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex)
{
GSDrawingEnvironment& env = m_env;
GSDrawingContext* context = m_context;
@ -179,21 +179,18 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
D3DPRIMITIVETYPE topology;
int prims = 0;
switch(prim)
switch(primclass)
{
case GS_POINTLIST:
case GS_POINT_CLASS:
topology = D3DPT_POINTLIST;
prims = m_count;
break;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_LINE_CLASS:
topology = D3DPT_LINELIST;
prims = m_count / 2;
break;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
case GS_SPRITE:
case GS_TRIANGLE_CLASS:
case GS_SPRITE_CLASS:
topology = D3DPT_TRIANGLELIST;
prims = m_count / 3;
break;
@ -216,7 +213,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
// om
GSTextureFX9::OMDepthStencilSelector om_dssel;
GSTextureFX::OMDepthStencilSelector om_dssel;
om_dssel.zte = context->TEST.ZTE;
om_dssel.ztst = context->TEST.ZTST;
@ -224,9 +221,9 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
om_dssel.date = context->FRAME.PSM != PSM_PSMCT24 ? context->TEST.DATE : 0;
om_dssel.fba = m_fba.enabled ? context->FBA.FBA : 0;
GSTextureFX9::OMBlendSelector om_bsel;
GSTextureFX::OMBlendSelector om_bsel;
om_bsel.abe = PRIM->ABE || (prim == 1 || prim == 2) && PRIM->AA1;
om_bsel.abe = !IsOpaque();
om_bsel.a = context->ALPHA.A;
om_bsel.b = context->ALPHA.B;
om_bsel.c = context->ALPHA.C;
@ -240,7 +237,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
// vs
GSTextureFX9::VSSelector vs_sel;
GSTextureFX::VSSelector vs_sel;
vs_sel.bppz = 0;
vs_sel.tme = PRIM->TME;
@ -251,23 +248,27 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
{
if(context->ZBUF.PSM == PSM_PSMZ24)
{
if(WrapZ(0xffffff))
if(m_vt.m_max.p.z > 0xffffff)
{
ASSERT(m_vt.m_min.p.z > 0xffffff);
vs_sel.bppz = 1;
om_dssel.ztst = 1;
}
}
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
{
if(WrapZ(0xffff))
if(m_vt.m_max.p.z > 0xffff)
{
ASSERT(m_vt.m_min.p.z > 0xffff);
vs_sel.bppz = 2;
om_dssel.ztst = 1;
}
}
}
GSTextureFX9::VSConstantBuffer vs_cb;
GSTextureFX::VSConstantBuffer vs_cb;
float sx = 2.0f * rt->m_scale.x / (rt->GetWidth() * 16);
float sy = 2.0f * rt->m_scale.y / (rt->GetHeight() * 16);
@ -286,7 +287,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
// ps
GSTextureFX9::PSSelector ps_sel;
GSTextureFX::PSSelector ps_sel;
ps_sel.fst = PRIM->FST;
ps_sel.wms = context->CLAMP.WMS;
@ -302,7 +303,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
ps_sel.rt = tex && tex->m_rendered;
ps_sel.ltf = m_filter == 2 ? context->TEX1.IsLinear() : m_filter;
GSTextureFX9::PSSamplerSelector ps_ssel;
GSTextureFX::PSSamplerSelector ps_ssel;
ps_ssel.tau = 0;
ps_ssel.tav = 0;
@ -312,11 +313,11 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
ps_cb.FogColor_AREF = GSVector4((int)env.FOGCOL.FCR, (int)env.FOGCOL.FCG, (int)env.FOGCOL.FCB, (int)context->TEST.AREF) / 255;
if(context->TEST.ATST == 2 || context->TEST.ATST == 5)
if(ps_sel.atst == 2 || ps_sel.atst == 5)
{
ps_cb.FogColor_AREF.a -= 0.9f / 255;
}
else if(context->TEST.ATST == 3 || context->TEST.ATST == 6)
else if(ps_sel.atst == 3 || ps_sel.atst == 6)
{
ps_cb.FogColor_AREF.a += 0.9f / 255;
}
@ -403,7 +404,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
if(context->TEST.DoFirstPass())
{
m_tfx.Draw();
m_dev->DrawPrimitive();
}
if(context->TEST.DoSecondPass())
@ -441,7 +442,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
m_tfx.UpdateOM(om_dssel, om_bsel, bf);
m_tfx.Draw();
m_dev->DrawPrimitive();
}
}
@ -450,21 +451,6 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
if(om_dssel.fba) UpdateFBA(rt);
}
bool GSRendererHW9::WrapZ(float maxz)
{
// should only run once if z values are in the z buffer range
for(int i = 0, j = m_count; i < j; i++)
{
if(m_vertices[i].p.z <= maxz)
{
return false;
}
}
return true;
}
void GSRendererHW9::SetupDATE(GSTexture* rt, GSTexture* ds)
{
if(!m_context->TEST.DATE) return; // || (::GetAsyncKeyState(VK_CONTROL) & 0x8000)

View File

@ -28,13 +28,11 @@
class GSRendererHW9 : public GSRendererHW<GSVertexHW9>
{
bool WrapZ(float maxz);
protected:
GSTextureFX9 m_tfx;
bool m_logz;
void Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex);
void Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex);
struct
{

View File

@ -139,7 +139,7 @@ void GSRendererSW::Draw()
{
GS_PRIM_CLASS primclass = GSUtil::GetPrimClass(PRIM->PRIM);
m_vtrace.Update(m_vertices, m_count, primclass, PRIM->IIP, PRIM->TME, m_context->TEX0.TFX, m_context->TEX0.TCC);
m_vt.Update(m_vertices, m_count, primclass, PRIM, m_context);
if(m_dump)
{
@ -206,7 +206,7 @@ void GSRendererSW::Draw()
m_perfmon.Put(GSPerfMon::Prim, stats.prims);
m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels);
GSVector4i r = GSVector4i(m_vtrace.m_min.p.xyxy(m_vtrace.m_max.p)).rintersect(data.scissor);
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(data.scissor);
GIFRegBITBLTBUF BITBLTBUF;
@ -266,261 +266,6 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
m_tc->InvalidateVideoMem(BITBLTBUF, r);
}
void GSRendererSW::GetTextureMinMax(int w, int h, GSVector4i& r, uint32 fst)
{
const GSDrawingContext* context = m_context;
int wms = context->CLAMP.WMS;
int wmt = context->CLAMP.WMT;
int minu = (int)context->CLAMP.MINU;
int minv = (int)context->CLAMP.MINV;
int maxu = (int)context->CLAMP.MAXU;
int maxv = (int)context->CLAMP.MAXV;
GSVector4i vr(0, 0, w, h);
switch(wms)
{
case CLAMP_REPEAT:
break;
case CLAMP_CLAMP:
break;
case CLAMP_REGION_CLAMP:
if(vr.x < minu) vr.x = minu;
if(vr.z > maxu + 1) vr.z = maxu + 1;
break;
case CLAMP_REGION_REPEAT:
vr.x = maxu;
vr.z = vr.x + (minu + 1);
break;
default:
__assume(0);
}
switch(wmt)
{
case CLAMP_REPEAT:
break;
case CLAMP_CLAMP:
break;
case CLAMP_REGION_CLAMP:
if(vr.y < minv) vr.y = minv;
if(vr.w > maxv + 1) vr.w = maxv + 1;
break;
case CLAMP_REGION_REPEAT:
vr.y = maxv;
vr.w = vr.y + (minv + 1);
break;
default:
__assume(0);
}
if(fst)
{
GSVector4i uv = GSVector4i(m_vtrace.m_min.t.xyxy(m_vtrace.m_max.t)).sra32(16);
GSVector4i u, v;
int mask = 0;
if(wms == CLAMP_REPEAT || wmt == CLAMP_REPEAT)
{
int tw = context->TEX0.TW;
int th = context->TEX0.TH;
u = uv & GSVector4i::xffffffff().srl32(32 - tw);
v = uv & GSVector4i::xffffffff().srl32(32 - th);
GSVector4i uu = uv.sra32(tw);
GSVector4i vv = uv.sra32(th);
mask = (uu.upl32(vv) == uu.uph32(vv)).mask();
}
switch(wms)
{
case CLAMP_REPEAT:
if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if(vr.x < uv.x) vr.x = uv.x;
if(vr.z > uv.z + 1) vr.z = uv.z + 1;
break;
case CLAMP_REGION_REPEAT: // TODO
break;
default:
__assume(0);
}
switch(wmt)
{
case CLAMP_REPEAT:
if(mask & 0xf000) {if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1;}
break;
case CLAMP_CLAMP:
case CLAMP_REGION_CLAMP:
if(vr.y < uv.y) vr.y = uv.y;
if(vr.w > uv.w + 1) vr.w = uv.w + 1;
break;
case CLAMP_REGION_REPEAT: // TODO
break;
default:
__assume(0);
}
}
r = vr.rintersect(GSVector4i(0, 0, w, h));
}
void GSRendererSW::GetAlphaMinMax()
{
if(m_vtrace.m_alpha.valid)
{
return;
}
const GSDrawingEnvironment& env = m_env;
const GSDrawingContext* context = m_context;
GSVector4i a = GSVector4i(m_vtrace.m_min.c.wwww(m_vtrace.m_max.c)) >> 7;
if(PRIM->TME && context->TEX0.TCC)
{
uint32 bpp = GSLocalMemory::m_psm[context->TEX0.PSM].trbpp;
uint32 cbpp = GSLocalMemory::m_psm[context->TEX0.CPSM].trbpp;
uint32 pal = GSLocalMemory::m_psm[context->TEX0.PSM].pal;
if(bpp == 32)
{
a.y = 0;
a.w = 0xff;
}
else if(bpp == 24)
{
a.y = env.TEXA.AEM ? 0 : env.TEXA.TA0;
a.w = env.TEXA.TA0;
}
else if(bpp == 16)
{
a.y = env.TEXA.AEM ? 0 : min(env.TEXA.TA0, env.TEXA.TA1);
a.w = max(env.TEXA.TA0, env.TEXA.TA1);
}
else
{
m_mem.m_clut.GetAlphaMinMax32(a.y, a.w);
}
switch(context->TEX0.TFX)
{
case TFX_MODULATE:
a.x = (a.x * a.y) >> 7;
a.z = (a.z * a.w) >> 7;
if(a.x > 0xff) a.x = 0xff;
if(a.z > 0xff) a.z = 0xff;
break;
case TFX_DECAL:
a.x = a.y;
a.z = a.w;
break;
case TFX_HIGHLIGHT:
a.x = a.x + a.y;
a.z = a.z + a.w;
if(a.x > 0xff) a.x = 0xff;
if(a.z > 0xff) a.z = 0xff;
break;
case TFX_HIGHLIGHT2:
a.x = a.y;
a.z = a.w;
break;
default:
__assume(0);
}
}
m_vtrace.m_alpha.min = a.x;
m_vtrace.m_alpha.max = a.z;
m_vtrace.m_alpha.valid = true;
}
bool GSRendererSW::TryAlphaTest(uint32& fm, uint32& zm)
{
const GSDrawingContext* context = m_context;
bool pass = true;
if(context->TEST.ATST == ATST_NEVER)
{
pass = false;
}
else if(context->TEST.ATST != ATST_ALWAYS)
{
GetAlphaMinMax();
int amin = m_vtrace.m_alpha.min;
int amax = m_vtrace.m_alpha.max;
int aref = context->TEST.AREF;
switch(context->TEST.ATST)
{
case ATST_NEVER:
pass = false;
break;
case ATST_ALWAYS:
pass = true;
break;
case ATST_LESS:
if(amax < aref) pass = true;
else if(amin >= aref) pass = false;
else return false;
break;
case ATST_LEQUAL:
if(amax <= aref) pass = true;
else if(amin > aref) pass = false;
else return false;
break;
case ATST_EQUAL:
if(amin == aref && amax == aref) pass = true;
else if(amin > aref || amax < aref) pass = false;
else return false;
break;
case ATST_GEQUAL:
if(amin >= aref) pass = true;
else if(amax < aref) pass = false;
else return false;
break;
case ATST_GREATER:
if(amin > aref) pass = true;
else if(amax <= aref) pass = false;
else return false;
break;
case ATST_NOTEQUAL:
if(amin == aref && amax == aref) pass = false;
else if(amin > aref || amax < aref) pass = true;
else return false;
break;
default:
__assume(0);
}
}
if(!pass)
{
switch(context->TEST.AFAIL)
{
case AFAIL_KEEP: fm = zm = 0xffffffff; break;
case AFAIL_FB_ONLY: zm = 0xffffffff; break;
case AFAIL_ZB_ONLY: fm = 0xffffffff; break;
case AFAIL_RGB_ONLY: fm |= 0xff000000; zm = 0xffffffff; break;
default: __assume(0);
}
}
return true;
}
void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
{
const GSDrawingEnvironment& env = m_env;
@ -574,7 +319,7 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
{
p.sel.fpsm = GSUtil::EncodePSM(context->FRAME.PSM);
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vtrace.m_eq.rgba != 15)
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff)
{
p.sel.iip = PRIM->IIP;
}
@ -589,15 +334,12 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
p.sel.wms = context->CLAMP.WMS;
p.sel.wmt = context->CLAMP.WMT;
if(p.sel.iip == 0 && p.sel.tfx == TFX_MODULATE && p.sel.tcc)
{
if(m_vtrace.m_eq.rgba == 15 && (m_vtrace.m_min.c == GSVector4(128.0f * 128.0f)).alltrue())
if(/*p.sel.iip == 0 &&*/ p.sel.tfx == TFX_MODULATE && p.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128)))
{
// modulate does not do anything when vertex color is 0x80
p.sel.tfx = TFX_DECAL;
}
}
if(p.sel.fst == 0)
{
@ -605,7 +347,7 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
GSVertexSW* v = m_vertices;
if(m_vtrace.m_eq.q)
if(m_vt.m_eq.q)
{
p.sel.fst = 1;
@ -617,91 +359,42 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
{
v[i].t *= w;
}
m_vtrace.m_min.t *= w;
m_vtrace.m_max.t *= w;
}
}
else if(primclass == GS_SPRITE_CLASS)
{
p.sel.fst = 1;
GSVector4 tmin = GSVector4(FLT_MAX);
GSVector4 tmax = GSVector4(-FLT_MAX);
for(int i = 0, j = m_count; i < j; i += 2)
{
GSVector4 w = v[i + 1].t.zzzz().rcpnr();
GSVector4 v0 = v[i + 0].t * w;
GSVector4 v1 = v[i + 1].t * w;
v[i + 0].t = v0;
v[i + 1].t = v1;
tmin = tmin.minv(v0).minv(v1);
tmax = tmax.maxv(v0).maxv(v1);
}
m_vtrace.m_max.t = tmax;
m_vtrace.m_min.t = tmin;
v[i + 0].t *= w;
v[i + 1].t *= w;
}
}
}
if(p.sel.fst)
{
// if q is constant we can do the half pel shift for bilinear sampling on the vertices
if(p.sel.ltf)
{
GSVector4 half(0x8000, 0x8000);
if(p.sel.fst)
{
// if q is constant we can do the half pel shift for bilinear sampling on the vertices
GSVertexSW* v = m_vertices;
for(int i = 0, j = m_count; i < j; i++)
{
v[i].t -= half;
}
m_vtrace.m_min.t -= half;
m_vtrace.m_max.t += half;
}
}
/*
else
{
GSVector4 tmin = GSVector4(FLT_MAX);
GSVector4 tmax = GSVector4(-FLT_MAX);
GSVertexSW* v = m_vertices;
for(int i = 0, j = m_count; i < j; i++)
{
GSVector4 v0 = v[i].t * v[i].t.zzzz().rcpnr();
tmin = tmin.minv(v0);
tmax = tmax.maxv(v0);
}
if(p.sel.ltf)
{
GSVector4 half(0x8000, 0x8000);
tmin -= half;
tmax += half;
}
m_vtrace.min.t = tmin;
m_vtrace.max.t = tmax;
}
*/
int w = 1 << context->TEX0.TW;
int h = 1 << context->TEX0.TH;
GSVector4i r;
GetTextureMinMax(w, h, r, p.sel.fst);
GetTextureMinMax(r);
const GSTextureCacheSW::GSTexture* t = m_tc->Lookup(context->TEX0, env.TEXA, r);
@ -720,31 +413,7 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
p.sel.datm = context->TEST.DATM;
}
int amin = 0, amax = 0xff;
if(PRIM->ABE && context->ALPHA.A != context->ALPHA.B && !PRIM->AA1)
{
if(context->ALPHA.C == 0)
{
GetAlphaMinMax();
amin = m_vtrace.m_alpha.min;
amax = m_vtrace.m_alpha.max;
}
else if(context->ALPHA.C == 1)
{
if(p.sel.fpsm == 1)
{
amin = amax = 0x80;
}
}
else if(context->ALPHA.C == 1)
{
amin = amax = context->ALPHA.FIX;
}
}
if(PRIM->ABE && !context->ALPHA.IsOpaque(amin, amax) || PRIM->AA1)
if(!IsOpaque())
{
p.sel.abe = PRIM->ABE;
p.sel.ababcd = context->ALPHA.u32[0];
@ -785,7 +454,7 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass)
{
p.sel.zpsm = GSUtil::EncodePSM(context->ZBUF.PSM);
p.sel.ztst = ztest ? context->TEST.ZTST : 1;
p.sel.zoverflow = GSVector4i(m_vtrace.m_max.p).z == 0x80000000;
p.sel.zoverflow = GSVector4i(m_vt.m_max.p).z == 0x80000000;
}
}

View File

@ -30,7 +30,6 @@ class GSRendererSW : public GSRendererT<GSVertexSW>
protected:
GSRasterizerList m_rl;
GSTextureCacheSW* m_tc;
GSVertexTrace m_vtrace;
GSTexture* m_texture[2];
bool m_reset;
@ -42,9 +41,6 @@ protected:
void Draw();
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
void GetTextureMinMax(int w, int h, GSVector4i& r, uint32 fst);
void GetAlphaMinMax();
bool TryAlphaTest(uint32& fm, uint32& zm);
void GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass);
public:

View File

@ -169,7 +169,21 @@ GSTexture10::operator ID3D10ShaderResourceView*()
{
if(!m_srv && m_dev && m_texture)
{
m_dev->CreateShaderResourceView(m_texture, NULL, &m_srv);
D3D10_SHADER_RESOURCE_VIEW_DESC* desc = NULL;
if(m_desc.Format == DXGI_FORMAT_R32G8X24_TYPELESS)
{
desc = new D3D10_SHADER_RESOURCE_VIEW_DESC();
memset(desc, 0, sizeof(*desc));
desc->Format = DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS;
desc->ViewDimension = D3D10_SRV_DIMENSION_TEXTURE2D;
desc->Texture2D.MostDetailedMip = 0;
desc->Texture2D.MipLevels = 1;
}
m_dev->CreateShaderResourceView(m_texture, desc, &m_srv);
delete desc;
}
return m_srv;
@ -191,7 +205,19 @@ GSTexture10::operator ID3D10DepthStencilView*()
{
if(!m_dsv && m_dev && m_texture)
{
m_dev->CreateDepthStencilView(m_texture, NULL, &m_dsv);
D3D10_DEPTH_STENCIL_VIEW_DESC* desc = NULL;
if(m_desc.Format == DXGI_FORMAT_R32G8X24_TYPELESS)
{
desc = new D3D10_DEPTH_STENCIL_VIEW_DESC();
memset(desc, 0, sizeof(*desc));
desc->Format = DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
desc->ViewDimension = D3D10_DSV_DIMENSION_TEXTURE2D;
}
m_dev->CreateDepthStencilView(m_texture, desc, &m_dsv);
delete desc;
}
return m_dsv;

View File

@ -198,7 +198,7 @@ GSTextureCache::GSDepthStencil* GSTextureCache::GetDepthStencil(const GIFRegTEX0
return ds;
}
GSTextureCache::GSCachedTexture* GSTextureCache::GetTexture()
GSTextureCache::GSCachedTexture* GSTextureCache::GetTexture(const GSVector4i& r)
{
const GIFRegTEX0& TEX0 = m_renderer->m_context->TEX0;
const GIFRegCLAMP& CLAMP = m_renderer->m_context->CLAMP;
@ -207,61 +207,6 @@ GSTextureCache::GSCachedTexture* GSTextureCache::GetTexture()
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
const uint32* clut = m_renderer->m_mem.m_clut;
if(psm.pal > 0)
{
m_renderer->m_mem.m_clut.Read32(TEX0, TEXA);
/*
POSITION pos = m_tex.GetHeadPosition();
while(pos)
{
POSITION cur = pos;
GSSurface* s = m_tex.GetNext(pos);
if(s->m_TEX0.TBP0 == TEX0.CBP)
{
m_tex.RemoveAt(cur);
delete s;
}
}
pos = m_rt.GetHeadPosition();
while(pos)
{
POSITION cur = pos;
GSSurface* s = m_rt.GetNext(pos);
if(s->m_TEX0.TBP0 == TEX0.CBP)
{
m_rt.RemoveAt(cur);
delete s;
}
}
pos = m_ds.GetHeadPosition();
while(pos)
{
POSITION cur = pos;
GSSurface* s = m_ds.GetNext(pos);
if(s->m_TEX0.TBP0 == TEX0.CBP)
{
m_ds.RemoveAt(cur);
delete s;
}
}
*/
}
GSCachedTexture* t = NULL;
for(list<GSCachedTexture*>::iterator i = m_tex.begin(); i != m_tex.end(); i++)
@ -378,7 +323,7 @@ GSTextureCache::GSCachedTexture* GSTextureCache::GetTexture()
}
}
t->Update();
t->Update(r);
m_tex_used = true;
@ -757,7 +702,7 @@ GSTextureCache::GSCachedTexture::~GSCachedTexture()
_aligned_free(m_clut);
}
void GSTextureCache::GSCachedTexture::Update()
void GSTextureCache::GSCachedTexture::Update(const GSVector4i& rect)
{
__super::Update();
@ -766,7 +711,7 @@ void GSTextureCache::GSCachedTexture::Update()
return;
}
GSVector4i r;
GSVector4i r = rect;
if(!GetDirtyRect(r))
{
@ -799,16 +744,16 @@ void GSTextureCache::GSCachedTexture::Update()
m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, r.width() * r.height() * 4);
}
bool GSTextureCache::GSCachedTexture::GetDirtyRect(GSVector4i& rr)
bool GSTextureCache::GSCachedTexture::GetDirtyRect(GSVector4i& r)
{
int w = 1 << m_TEX0.TW;
int h = 1 << m_TEX0.TH;
GSVector4i r(0, 0, w, h);
GSVector4i tr(0, 0, w, h);
for(list<GSDirtyRect>::iterator i = m_dirty.begin(); i != m_dirty.end(); i++)
{
const GSVector4i& dirty = i->GetDirtyRect(m_TEX0).rintersect(r);
const GSVector4i& dirty = i->GetDirtyRect(m_TEX0).rintersect(tr);
if(!m_valid.rintersect(dirty).rempty())
{
@ -837,8 +782,6 @@ bool GSTextureCache::GSCachedTexture::GetDirtyRect(GSVector4i& rr)
m_dirty.clear();
m_renderer->MinMaxUV(w, h, r);
if(GSUtil::IsRectInRect(r, m_valid))
{
return false;
@ -867,7 +810,5 @@ bool GSTextureCache::GSCachedTexture::GetDirtyRect(GSVector4i& rr)
return false;
}
rr = r;
return true;
}

View File

@ -85,7 +85,7 @@ public:
explicit GSCachedTexture(GSRenderer* renderer);
virtual ~GSCachedTexture();
void Update();
void Update(const GSVector4i& rect);
virtual bool Create() = 0;
virtual bool Create(GSRenderTarget* rt) = 0;
@ -130,7 +130,7 @@ public:
GSRenderTarget* GetRenderTarget(const GIFRegTEX0& TEX0, int w, int h, bool fb = false);
GSDepthStencil* GetDepthStencil(const GIFRegTEX0& TEX0, int w, int h);
GSCachedTexture* GetTexture();
GSCachedTexture* GetTexture(const GSVector4i& r);
void InvalidateTextures(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF);
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);

View File

@ -142,6 +142,8 @@ bool GSTextureCache10::GSCachedTextureHW10::Create()
bool GSTextureCache10::GSCachedTextureHW10::Create(GSRenderTarget* rt)
{
m_rendered = true;
// TODO: clean up this mess
rt->Update();
@ -151,8 +153,6 @@ bool GSTextureCache10::GSCachedTextureHW10::Create(GSRenderTarget* rt)
m_TEX0 = m_renderer->m_context->TEX0;
m_TEXA = m_renderer->m_env.TEXA;
m_rendered = true;
int tw = 1 << m_TEX0.TW;
int th = 1 << m_TEX0.TH;
int tp = (int)m_TEX0.TW << 6;
@ -301,7 +301,92 @@ bool GSTextureCache10::GSCachedTextureHW10::Create(GSDepthStencil* ds)
{
m_rendered = true;
// TODO
return false;
/*
// TODO: clean up this mess
ds->Update();
// m_renderer->m_perfmon.Put(GSPerfMon::ConvertRT2T, 1);
m_TEX0 = m_renderer->m_context->TEX0;
m_TEXA = m_renderer->m_env.TEXA;
int tw = 1 << m_TEX0.TW;
int th = 1 << m_TEX0.TH;
int tp = (int)m_TEX0.TW << 6;
// do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows)
int w = (int)(ds->m_texture->m_scale.x * tw);
int h = (int)(ds->m_texture->m_scale.y * th);
GSVector2i dssize = ds->m_texture->GetSize();
// pitch conversion
if(ds->m_TEX0.TBW != m_TEX0.TBW) // && rt->m_TEX0.PSM == m_TEX0.PSM
{
ASSERT(0);
}
else if(tw < tp)
{
}
// width/height conversion
GSVector2 scale = ds->m_texture->m_scale;
GSVector4 dst(0, 0, w, h);
if(w > dssize.x)
{
scale.x = (float)dssize.x / tw;
dst.z = (float)dssize.x * scale.x / ds->m_texture->m_scale.x;
w = dssize.x;
}
if(h > dssize.y)
{
scale.y = (float)dssize.y / th;
dst.w = (float)dssize.y * scale.y / ds->m_texture->m_scale.y;
h = dssize.y;
}
m_texture = m_renderer->m_dev->CreateRenderTarget(w, h);
GSVector4 src(0, 0, w, h);
src.z /= ds->m_texture->GetWidth();
src.w /= ds->m_texture->GetHeight();
m_renderer->m_dev->StretchRect(ds->m_texture, src, m_texture, dst, 7);
m_texture->m_scale = scale;
switch(m_TEX0.PSM)
{
case PSM_PSMCT32:
m_bpp = 0;
break;
case PSM_PSMCT24:
m_bpp = 1;
break;
case PSM_PSMCT16:
case PSM_PSMCT16S:
m_bpp = 2;
break;
case PSM_PSMT8H:
m_bpp = 3;
m_palette = m_renderer->m_dev->CreateTexture(256, 1);
m_initpalette = true;
break;
case PSM_PSMT4HL:
case PSM_PSMT4HH:
ASSERT(0); // TODO
break;
}
return true;
*/
}

View File

@ -0,0 +1,23 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include "GSTextureFX.h"

243
plugins/GSdx/GSTextureFX.h Normal file
View File

@ -0,0 +1,243 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GSVector.h"
class GSTextureFX
{
public:
#pragma pack(push, 1)
__declspec(align(16)) struct VSConstantBuffer
{
GSVector4 VertexScale;
GSVector4 VertexOffset;
GSVector2 TextureScale;
float _pad[2];
struct VSConstantBuffer()
{
memset(this, 0, sizeof(*this));
}
__forceinline bool Update(const VSConstantBuffer* cb)
{
GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb;
GSVector4i b0 = b[0];
GSVector4i b1 = b[1];
GSVector4i b2 = b[2];
if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2)).alltrue())
{
a[0] = b0;
a[1] = b1;
a[2] = b2;
return true;
}
return false;
}
};
struct VSSelector
{
union
{
struct
{
uint32 bppz:2;
uint32 tme:1;
uint32 fst:1;
uint32 logz:1;
uint32 prim:2;
};
uint32 key;
};
operator uint32() {return key & 0x7f;}
VSSelector() : key(0) {}
};
__declspec(align(16)) struct PSConstantBuffer
{
GSVector4 FogColor_AREF;
GSVector4 HalfTexel;
GSVector4 WH_TA;
GSVector4 MinMax;
GSVector4 MinMaxF;
GSVector4i MskFix;
struct PSConstantBuffer()
{
memset(this, 0, sizeof(*this));
}
__forceinline bool Update(const PSConstantBuffer* cb)
{
GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb;
GSVector4i b0 = b[0];
GSVector4i b1 = b[1];
GSVector4i b2 = b[2];
GSVector4i b3 = b[3];
GSVector4i b4 = b[4];
GSVector4i b5 = b[5];
if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue())
{
a[0] = b0;
a[1] = b1;
a[2] = b2;
a[3] = b3;
a[4] = b4;
a[5] = b5;
return true;
}
return false;
}
};
struct GSSelector
{
union
{
struct
{
uint32 iip:1;
uint32 prim:2;
};
uint32 key;
};
operator uint32() {return key & 0x7;}
GSSelector() : key(0) {}
};
struct PSSelector
{
union
{
struct
{
uint32 fst:1;
uint32 wms:2;
uint32 wmt:2;
uint32 bpp:3;
uint32 aem:1;
uint32 tfx:3;
uint32 tcc:1;
uint32 ate:1;
uint32 atst:3;
uint32 fog:1;
uint32 clr1:1;
uint32 fba:1;
uint32 aout:1;
uint32 rt:1;
uint32 ltf:1;
};
uint32 key;
};
operator uint32() {return key & 0x7fffff;}
PSSelector() : key(0) {}
};
struct PSSamplerSelector
{
union
{
struct
{
uint32 tau:1;
uint32 tav:1;
uint32 ltf:1;
};
uint32 key;
};
operator uint32() {return key & 0x7;}
PSSamplerSelector() : key(0) {}
};
struct OMDepthStencilSelector
{
union
{
struct
{
uint32 zte:1;
uint32 ztst:2;
uint32 zwe:1;
uint32 date:1;
uint32 fba:1;
};
uint32 key;
};
operator uint32() {return key & 0x3f;}
OMDepthStencilSelector() : key(0) {}
};
struct OMBlendSelector
{
union
{
struct
{
uint32 abe:1;
uint32 a:2;
uint32 b:2;
uint32 c:2;
uint32 d:2;
uint32 wr:1;
uint32 wg:1;
uint32 wb:1;
uint32 wa:1;
};
uint32 key;
};
operator uint32() {return key & 0x1fff;}
OMBlendSelector() : key(0) {}
};
#pragma pack(pop)
};

View File

@ -36,10 +36,6 @@ bool GSTextureFX10::Create(GSDevice10* dev)
VSSelector sel;
sel.bppz = 0;
sel.tme = 0;
sel.fst = 0;
VSConstantBuffer cb;
SetupVS(sel, &cb); // creates layout
@ -122,11 +118,11 @@ bool GSTextureFX10::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
D3D10_INPUT_ELEMENT_DESC layout[] =
{
{"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 8, D3D10_INPUT_PER_VERTEX_DATA, 0},
{"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 12, D3D10_INPUT_PER_VERTEX_DATA, 0},
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D10_INPUT_PER_VERTEX_DATA, 0},
{"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 20, D3D10_INPUT_PER_VERTEX_DATA, 0},
{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 16, D3D10_INPUT_PER_VERTEX_DATA, 0},
{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 8, D3D10_INPUT_PER_VERTEX_DATA, 0},
{"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D10_INPUT_PER_VERTEX_DATA, 0},
{"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D10_INPUT_PER_VERTEX_DATA, 0},
{"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D10_INPUT_PER_VERTEX_DATA, 0},
{"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D10_INPUT_PER_VERTEX_DATA, 0},
};
@ -515,8 +511,3 @@ void GSTextureFX10::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel,
m_dev->OMSetBlendState((*j).second, bf);
}
void GSTextureFX10::Draw()
{
m_dev->DrawPrimitive();
}

View File

@ -21,188 +21,11 @@
#pragma once
#include "GSTextureFX.h"
#include "GSDevice10.h"
class GSTextureFX10
class GSTextureFX10 : public GSTextureFX
{
public:
#pragma pack(push, 1)
__declspec(align(16)) struct VSConstantBuffer
{
GSVector4 VertexScale;
GSVector4 VertexOffset;
GSVector2 TextureScale;
float _pad[2];
struct VSConstantBuffer() {memset(this, 0, sizeof(*this));}
__forceinline bool Update(const VSConstantBuffer* cb)
{
GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb;
GSVector4i b0 = b[0];
GSVector4i b1 = b[1];
GSVector4i b2 = b[2];
if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2)).alltrue())
{
a[0] = b0;
a[1] = b1;
a[2] = b2;
return true;
}
return false;
}
};
union VSSelector
{
struct
{
uint32 bppz:2;
uint32 tme:1;
uint32 fst:1;
uint32 prim:3;
};
uint32 key;
operator uint32() {return key & 0x7f;}
};
__declspec(align(16)) struct PSConstantBuffer
{
GSVector4 FogColor_AREF;
GSVector4 HalfTexel;
GSVector4 WH_TA;
GSVector4 MinMax;
GSVector4 MinMaxF;
GSVector4i MskFix;
struct PSConstantBuffer() {memset(this, 0, sizeof(*this));}
__forceinline bool Update(const PSConstantBuffer* cb)
{
GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb;
GSVector4i b0 = b[0];
GSVector4i b1 = b[1];
GSVector4i b2 = b[2];
GSVector4i b3 = b[3];
GSVector4i b4 = b[4];
GSVector4i b5 = b[5];
if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue())
{
a[0] = b0;
a[1] = b1;
a[2] = b2;
a[3] = b3;
a[4] = b4;
a[5] = b5;
return true;
}
return false;
}
};
union GSSelector
{
struct
{
uint32 iip:1;
uint32 prim:2;
};
uint32 key;
operator uint32() {return key & 0x7;}
};
union PSSelector
{
struct
{
uint32 fst:1;
uint32 wms:2;
uint32 wmt:2;
uint32 bpp:3;
uint32 aem:1;
uint32 tfx:3;
uint32 tcc:1;
uint32 ate:1;
uint32 atst:3;
uint32 fog:1;
uint32 clr1:1;
uint32 fba:1;
uint32 aout:1;
uint32 ltf:1;
};
uint32 key;
operator uint32() {return key & 0x3fffff;}
};
union PSSamplerSelector
{
struct
{
uint32 tau:1;
uint32 tav:1;
uint32 ltf:1;
};
uint32 key;
operator uint32() {return key & 0x7;}
};
union OMDepthStencilSelector
{
struct
{
uint32 zte:1;
uint32 ztst:2;
uint32 zwe:1;
uint32 date:1;
};
uint32 key;
operator uint32() {return key & 0x1f;}
};
union OMBlendSelector
{
struct
{
uint32 abe:1;
uint32 a:2;
uint32 b:2;
uint32 c:2;
uint32 d:2;
uint32 wr:1;
uint32 wg:1;
uint32 wb:1;
uint32 wa:1;
};
uint32 key;
operator uint32() {return key & 0x1fff;}
};
#pragma pack(pop)
private:
GSDevice10* m_dev;
CComPtr<ID3D10InputLayout> m_il;
hash_map<uint32, CComPtr<ID3D10VertexShader> > m_vs;
@ -215,11 +38,6 @@ private:
hash_map<uint32, CComPtr<ID3D10DepthStencilState> > m_om_dss;
hash_map<uint32, CComPtr<ID3D10BlendState> > m_om_bs;
CComPtr<ID3D10Buffer> m_vb, m_vb_old;
int m_vb_max;
int m_vb_start;
int m_vb_count;
VSConstantBuffer m_vs_cb_cache;
PSConstantBuffer m_ps_cb_cache;
@ -236,5 +54,4 @@ public:
void SetupRS(int w, int h, const GSVector4i& scissor);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf, GSTexture* rt, GSTexture* ds);
void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf);
void Draw();
};

View File

@ -36,10 +36,6 @@ bool GSTextureFX11::Create(GSDevice11* dev)
VSSelector sel;
sel.bppz = 0;
sel.tme = 0;
sel.fst = 0;
VSConstantBuffer cb;
SetupVS(sel, &cb); // creates layout
@ -122,11 +118,11 @@ bool GSTextureFX11::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
D3D11_INPUT_ELEMENT_DESC layout[] =
{
{"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0},
};
@ -519,8 +515,3 @@ void GSTextureFX11::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel,
m_dev->OMSetBlendState((*j).second, bf);
}
void GSTextureFX11::Draw()
{
m_dev->DrawPrimitive();
}

View File

@ -21,188 +21,11 @@
#pragma once
#include "GSTextureFX.h"
#include "GSDevice11.h"
class GSTextureFX11
class GSTextureFX11 : public GSTextureFX
{
public:
#pragma pack(push, 1)
__declspec(align(16)) struct VSConstantBuffer
{
GSVector4 VertexScale;
GSVector4 VertexOffset;
GSVector2 TextureScale;
float _pad[2];
struct VSConstantBuffer() {memset(this, 0, sizeof(*this));}
__forceinline bool Update(const VSConstantBuffer* cb)
{
GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb;
GSVector4i b0 = b[0];
GSVector4i b1 = b[1];
GSVector4i b2 = b[2];
if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2)).alltrue())
{
a[0] = b0;
a[1] = b1;
a[2] = b2;
return true;
}
return false;
}
};
union VSSelector
{
struct
{
uint32 bppz:2;
uint32 tme:1;
uint32 fst:1;
uint32 prim:3;
};
uint32 key;
operator uint32() {return key & 0x7f;}
};
__declspec(align(16)) struct PSConstantBuffer
{
GSVector4 FogColor_AREF;
GSVector4 HalfTexel;
GSVector4 WH_TA;
GSVector4 MinMax;
GSVector4 MinMaxF;
GSVector4i MskFix;
struct PSConstantBuffer() {memset(this, 0, sizeof(*this));}
__forceinline bool Update(const PSConstantBuffer* cb)
{
GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb;
GSVector4i b0 = b[0];
GSVector4i b1 = b[1];
GSVector4i b2 = b[2];
GSVector4i b3 = b[3];
GSVector4i b4 = b[4];
GSVector4i b5 = b[5];
if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue())
{
a[0] = b0;
a[1] = b1;
a[2] = b2;
a[3] = b3;
a[4] = b4;
a[5] = b5;
return true;
}
return false;
}
};
union GSSelector
{
struct
{
uint32 iip:1;
uint32 prim:2;
};
uint32 key;
operator uint32() {return key & 0x7;}
};
union PSSelector
{
struct
{
uint32 fst:1;
uint32 wms:2;
uint32 wmt:2;
uint32 bpp:3;
uint32 aem:1;
uint32 tfx:3;
uint32 tcc:1;
uint32 ate:1;
uint32 atst:3;
uint32 fog:1;
uint32 clr1:1;
uint32 fba:1;
uint32 aout:1;
uint32 ltf:1;
};
uint32 key;
operator uint32() {return key & 0x3fffff;}
};
union PSSamplerSelector
{
struct
{
uint32 tau:1;
uint32 tav:1;
uint32 ltf:1;
};
uint32 key;
operator uint32() {return key & 0x7;}
};
union OMDepthStencilSelector
{
struct
{
uint32 zte:1;
uint32 ztst:2;
uint32 zwe:1;
uint32 date:1;
};
uint32 key;
operator uint32() {return key & 0x1f;}
};
union OMBlendSelector
{
struct
{
uint32 abe:1;
uint32 a:2;
uint32 b:2;
uint32 c:2;
uint32 d:2;
uint32 wr:1;
uint32 wg:1;
uint32 wb:1;
uint32 wa:1;
};
uint32 key;
operator uint32() {return key & 0x1fff;}
};
#pragma pack(pop)
private:
GSDevice11* m_dev;
CComPtr<ID3D11InputLayout> m_il;
hash_map<uint32, CComPtr<ID3D11VertexShader> > m_vs;
@ -215,11 +38,6 @@ private:
hash_map<uint32, CComPtr<ID3D11DepthStencilState> > m_om_dss;
hash_map<uint32, CComPtr<ID3D11BlendState> > m_om_bs;
CComPtr<ID3D11Buffer> m_vb, m_vb_old;
int m_vb_max;
int m_vb_start;
int m_vb_count;
VSConstantBuffer m_vs_cb_cache;
PSConstantBuffer m_ps_cb_cache;
@ -236,5 +54,4 @@ public:
void SetupRS(int w, int h, const GSVector4i& scissor);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf, GSTexture* rt, GSTexture* ds);
void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf);
void Draw();
};

View File

@ -34,11 +34,6 @@ bool GSTextureFX9::Create(GSDevice9* dev)
VSSelector sel;
sel.bppz = 0;
sel.tme = 0;
sel.fst = 0;
sel.logz = 0;
VSConstantBuffer cb;
SetupVS(sel, &cb); // creates layout
@ -455,8 +450,3 @@ void GSTextureFX9::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel,
m_dev->OMSetBlendState((*j).second, 0x010101 * bf);
}
void GSTextureFX9::Draw()
{
m_dev->DrawPrimitive();
}

View File

@ -21,123 +21,11 @@
#pragma once
#include "GSTextureFX.h"
#include "GSDevice9.h"
class GSTextureFX9
class GSTextureFX9 : public GSTextureFX
{
public:
#pragma pack(push, 1)
struct VSConstantBuffer
{
GSVector4 VertexScale;
GSVector4 VertexOffset;
GSVector2 TextureScale;
float _pad[2];
};
union VSSelector
{
struct
{
uint32 bppz:2;
uint32 tme:1;
uint32 fst:1;
uint32 logz:1;
};
uint32 key;
operator uint32() {return key & 0x1f;}
};
struct PSConstantBuffer
{
GSVector4 FogColor_AREF;
GSVector4 HalfTexel;
GSVector4 WH_TA;
GSVector4 MinMax;
GSVector4 MinMaxF;
GSVector4i MskFix;
};
union PSSelector
{
struct
{
uint32 fst:1;
uint32 wms:2;
uint32 wmt:2;
uint32 bpp:3;
uint32 aem:1;
uint32 tfx:3;
uint32 tcc:1;
uint32 ate:1;
uint32 atst:3;
uint32 fog:1;
uint32 clr1:1;
uint32 rt:1;
uint32 ltf:1;
};
uint32 key;
operator uint32() {return key & 0x1fffff;}
};
union PSSamplerSelector
{
struct
{
uint32 tau:1;
uint32 tav:1;
uint32 ltf:1;
};
uint32 key;
operator uint32() {return key & 0x7;}
};
union OMDepthStencilSelector
{
struct
{
uint32 zte:1;
uint32 ztst:2;
uint32 zwe:1;
uint32 date:1;
uint32 fba:1;
};
uint32 key;
operator uint32() {return key & 0x3f;}
};
union OMBlendSelector
{
struct
{
uint32 abe:1;
uint32 a:2;
uint32 b:2;
uint32 c:2;
uint32 d:2;
uint32 wr:1;
uint32 wg:1;
uint32 wb:1;
uint32 wa:1;
};
uint32 key;
operator uint32() {return key & 0x1fff;}
};
#pragma pack(pop)
private:
GSDevice9* m_dev;
CComPtr<IDirect3DVertexDeclaration9> m_il;
hash_map<uint32, CComPtr<IDirect3DVertexShader9> > m_vs;
@ -162,5 +50,4 @@ public:
void SetupRS(int w, int h, const GSVector4i& scissor);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 bf, GSTexture* rt, GSTexture* ds);
void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 bf);
void Draw();
};

View File

@ -35,8 +35,8 @@ __declspec(align(16)) struct GSVertex
struct
{
GIFRegST ST;
GIFRegXYZ XYZ;
GIFRegRGBAQ RGBAQ;
GIFRegXYZ XYZ;
GIFRegFOG FOG;
};

View File

@ -21,6 +21,7 @@
#pragma once
#include "GS.h"
#include "GSVector.h"
#pragma pack(push, 1)
@ -55,14 +56,14 @@ __declspec(align(16)) union GSVertexHW10
union
{
struct {union {struct {uint16 x, y;}; uint32 xy;}; uint32 z;} p;
GIFRegXYZ XYZ;
union {struct {uint8 r, g, b, a; float q;}; uint32 c0;};
GIFRegRGBAQ RGBAQ;
};
union
{
union {struct {uint8 r, g, b, a; float q;}; uint32 c0;};
GIFRegRGBAQ RGBAQ;
struct {union {struct {uint16 x, y;}; uint32 xy;}; uint32 z;} p;
GIFRegXYZ XYZ;
};
union

View File

@ -23,145 +23,3 @@
#include "stdafx.h"
#include "GSVertexSW.h"
using namespace Xbyak;
GSVertexTrace::GSVertexTraceCodeGenerator::GSVertexTraceCodeGenerator(uint32 key, void* ptr, size_t maxsize)
: CodeGenerator(maxsize, ptr)
{
#if _M_AMD64
#error TODO
#endif
const int params = 0;
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 color = (key >> 4) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
const int _v = params + 4;
const int _count = params + 8;
const int _min = params + 12;
const int _max = params + 16;
//
static const float fmin = -FLT_MAX;
static const float fmax = FLT_MAX;
movss(xmm0, xmmword[&fmax]);
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
movss(xmm1, xmmword[&fmin]);
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
if(color)
{
// min.c = FLT_MAX;
// max.c = -FLT_MAX;
movaps(xmm2, xmm0);
movaps(xmm3, xmm1);
}
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movaps(xmm4, xmm0);
movaps(xmm5, xmm1);
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm0);
movaps(xmm7, xmm1);
}
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(ecx, dword[esp + _count]);
align(16);
L("loop");
for(int j = 0; j < n; j++)
{
if(color && (iip || j == n - 1))
{
// min.c = min.c.minv(v[i + j].c);
// max.c = max.c.maxv(v[i + j].c);
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW)]);
minps(xmm2, xmm0);
maxps(xmm3, xmm0);
}
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 16]);
minps(xmm4, xmm0);
maxps(xmm5, xmm0);
if(tme)
{
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 32]);
minps(xmm6, xmm0);
maxps(xmm7, xmm0);
}
}
add(edx, n * sizeof(GSVertexSW));
sub(ecx, n);
jg("loop");
// }
mov(eax, dword[esp + _min]);
mov(edx, dword[esp + _max]);
if(color)
{
movaps(xmmword[eax], xmm2);
movaps(xmmword[edx], xmm3);
}
movaps(xmmword[eax + 16], xmm4);
movaps(xmmword[edx + 16], xmm5);
if(tme)
{
movaps(xmmword[eax + 32], xmm6);
movaps(xmmword[edx + 32], xmm7);
}
ret();
}

View File

@ -214,81 +214,3 @@ __forceinline GSVertexSW operator / (const GSVertexSW& v, float f)
return v0;
}
#include "GSFunctionMap.h"
#include "xbyak/xbyak.h"
__declspec(align(16)) class GSVertexTrace
{
class GSVertexTraceCodeGenerator : public Xbyak::CodeGenerator
{
public:
GSVertexTraceCodeGenerator(uint32 key, void* ptr, size_t maxsize);
};
typedef void (*VertexTracePtr)(const GSVertexSW* v, int count, GSVertexSW& min, GSVertexSW& max);
class GSVertexTraceMap : public GSCodeGeneratorFunctionMap<GSVertexTraceCodeGenerator, uint32, VertexTracePtr>
{
public:
GSVertexTraceMap() : GSCodeGeneratorFunctionMap("VertexTrace") {}
GSVertexTraceCodeGenerator* Create(uint32 key, void* ptr, size_t maxsize) {return new GSVertexTraceCodeGenerator(key, ptr, maxsize);}
} m_map;
public:
GSVertexSW m_min, m_max;
struct {int min, max; bool valid;} m_alpha; // source alpha range after tfx
union
{
uint32 value;
struct {uint32 x:1, y:1, z:1, f:1, s:1, t:1, q:1, _pad:1, r:1, g:1, b:1, a:1;};
struct {uint32 xyzf:4, stq:4, rgba:4;};
} m_eq;
void Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 tfx, uint32 tcc)
{
uint32 key = primclass | (iip << 2) | (tme << 3);
if(!(tme && tfx == TFX_DECAL && tcc))
{
key |= 1 << 4;
}
m_map[key](v, count, m_min, m_max);
m_eq.value = (m_min.p == m_max.p).mask() | ((m_min.t == m_max.t).mask() << 4) | ((m_min.c == m_max.c).mask() << 8);
m_alpha.valid = false;
}
/*
void Update(const GSVertexSW* v, int count)
{
GSVertexSW min, max;
min.c = v[0].c;
max.c = v[0].c;
min.t = v[0].t;
max.t = v[0].t;
min.p = v[0].p;
max.p = v[0].p;
for(int i = 1; i < count; i++)
{
min.c = min.c.minv(v[i].c);
max.c = max.c.maxv(v[i].c);
min.p = min.p.minv(v[i].p);
max.p = max.p.maxv(v[i].p);
min.t = min.t.minv(v[i].t);
max.t = max.t.maxv(v[i].t);
}
m_min = min;
m_max = max;
m_eq.value = (min.p == max.p).mask() | ((min.t == max.t).mask() << 4) | ((min.c == max.c).mask() << 8);
m_alpha.valid = false;
}
*/
};

View File

@ -0,0 +1,670 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "stdafx.h"
#include "GSVertexTrace.h"
void GSVertexTrace::Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context)
{
uint32 key = primclass | (PRIM->IIP << 2) | (PRIM->TME << 3) | (PRIM->FST << 4);
if(!(PRIM->TME && context->TEX0.TFX == TFX_DECAL && context->TEX0.TCC))
{
key |= 1 << 5;
}
m_map_sw[key](v, count, m_min, m_max);
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
m_alpha.valid = false;
}
void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context)
{
uint32 key = primclass | (PRIM->IIP << 2) | (PRIM->TME << 3) | (PRIM->FST << 4);
if(!(PRIM->TME && context->TEX0.TFX == TFX_DECAL && context->TEX0.TCC))
{
key |= 1 << 5;
}
m_map_hw9[key](v, count, m_min, m_max);
GSVector4 o(context->XYOFFSET);
GSVector4 s(1.0f / 16, 1.0f / 16, 1.0f, 1.0f);
m_min.p = (m_min.p - o) * s;
m_max.p = (m_max.p - o) * s;
if(PRIM->TME)
{
if(PRIM->FST)
{
s = GSVector4(1 << (16 - 4), 1).xxyy();
}
else
{
s = GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH, 1, 1);
}
m_min.t *= s;
m_max.t *= s;
}
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
m_alpha.valid = false;
}
void GSVertexTrace::Update(const GSVertexHW10* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context)
{
uint32 key = primclass | (PRIM->IIP << 2) | (PRIM->TME << 3) | (PRIM->FST << 4);
if(!(PRIM->TME && context->TEX0.TFX == TFX_DECAL && context->TEX0.TCC))
{
key |= 1 << 5;
}
m_map_hw10[key](v, count, m_min, m_max);
GSVector4 o(context->XYOFFSET);
GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f);
m_min.p = (m_min.p - o) * s;
m_max.p = (m_max.p - o) * s;
if(PRIM->TME)
{
if(PRIM->FST)
{
s = GSVector4(1 << (16 - 4), 1).xxyy();
}
else
{
s = GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH, 1, 1);
}
m_min.t *= s;
m_max.t *= s;
}
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
m_alpha.valid = false;
}
using namespace Xbyak;
GSVertexTrace::CGSW::CGSW(uint32 key, void* ptr, size_t maxsize)
: CodeGenerator(maxsize, ptr)
{
#if _M_AMD64
#error TODO
#endif
const int params = 0;
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
const int _v = params + 4;
const int _count = params + 8;
const int _min = params + 12;
const int _max = params + 16;
//
static const float fmin = -FLT_MAX;
static const float fmax = FLT_MAX;
movss(xmm0, xmmword[&fmax]);
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
movss(xmm1, xmmword[&fmin]);
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
if(color)
{
// min.c = FLT_MAX;
// max.c = -FLT_MAX;
movaps(xmm2, xmm0);
movaps(xmm3, xmm1);
}
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movaps(xmm4, xmm0);
movaps(xmm5, xmm1);
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm0);
movaps(xmm7, xmm1);
}
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(ecx, dword[esp + _count]);
align(16);
L("loop");
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
movaps(xmm1, xmmword[edx + 1 * sizeof(GSVertexSW) + 32]);
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
}
for(int j = 0; j < n; j++)
{
if(color && (iip || j == n - 1))
{
// min.c = min.c.minv(v[i + j].c);
// max.c = max.c.maxv(v[i + j].c);
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW)]);
minps(xmm2, xmm0);
maxps(xmm3, xmm0);
}
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 16]);
minps(xmm4, xmm0);
maxps(xmm5, xmm0);
if(tme)
{
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 32]);
if(!fst)
{
if(primclass != GS_SPRITE_CLASS)
{
movaps(xmm1, xmm0);
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
}
divps(xmm0, xmm1);
shufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0));
}
minps(xmm6, xmm0);
maxps(xmm7, xmm0);
}
}
add(edx, n * sizeof(GSVertexSW));
sub(ecx, n);
jg("loop");
// }
mov(eax, dword[esp + _min]);
mov(edx, dword[esp + _max]);
if(color)
{
cvttps2dq(xmm2, xmm2);
psrld(xmm2, 7);
movaps(xmmword[eax], xmm2);
cvttps2dq(xmm3, xmm3);
psrld(xmm3, 7);
movaps(xmmword[edx], xmm3);
}
movaps(xmmword[eax + 16], xmm4);
movaps(xmmword[edx + 16], xmm5);
if(tme)
{
movaps(xmmword[eax + 32], xmm6);
movaps(xmmword[edx + 32], xmm7);
}
ret();
}
GSVertexTrace::CGHW9::CGHW9(uint32 key, void* ptr, size_t maxsize)
: CodeGenerator(maxsize, ptr)
{
#if _M_AMD64
#error TODO
#endif
const int params = 0;
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
case GS_SPRITE_CLASS:
n = 6;
break;
}
const int _v = params + 4;
const int _count = params + 8;
const int _min = params + 12;
const int _max = params + 16;
//
static const float fmin = -FLT_MAX;
static const float fmax = FLT_MAX;
movss(xmm0, xmmword[&fmax]);
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
movss(xmm1, xmmword[&fmin]);
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
if(color)
{
// min.c = 0xffffffff;
// max.c = 0;
pcmpeqd(xmm2, xmm2);
pxor(xmm3, xmm3);
}
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movaps(xmm4, xmm0);
movaps(xmm5, xmm1);
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm0);
movaps(xmm7, xmm1);
}
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(ecx, dword[esp + _count]);
align(16);
L("loop");
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
movaps(xmm1, xmmword[edx + 5 * sizeof(GSVertexHW9) + 16]);
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
}
for(int j = 0; j < n; j++)
{
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexHW9) + 16]);
minps(xmm4, xmm0);
maxps(xmm5, xmm0);
if(tme && !fst && primclass != GS_SPRITE_CLASS)
{
movaps(xmm1, xmm0);
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
}
if(color && (iip || j == n - 1) || tme)
{
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexHW9)]);
}
if(color && (iip || j == n - 1))
{
// min.c = min.c.min_u8(v[i + j].c);
// max.c = max.c.min_u8(v[i + j].c);
pminub(xmm2, xmm0);
pmaxub(xmm3, xmm0);
}
if(tme)
{
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral
if(!fst)
{
// t /= p.wwww();
divps(xmm0, xmm1);
}
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
minps(xmm6, xmm0);
maxps(xmm7, xmm0);
}
}
add(edx, n * sizeof(GSVertexHW9));
sub(ecx, n);
jg("loop");
// }
mov(eax, dword[esp + _min]);
mov(edx, dword[esp + _max]);
if(color)
{
// m_min.c = cmin.zzzz().u8to32();
// m_max.c = cmax.zzzz().u8to32();
if(m_cpu.has(util::Cpu::tSSE41))
{
pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
pmovzxbd(xmm2, xmm2);
pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
pmovzxbd(xmm3, xmm3);
}
else
{
pxor(xmm0, xmm0);
punpckhbw(xmm2, xmm0);
punpcklwd(xmm2, xmm0);
punpckhbw(xmm3, xmm0);
punpcklwd(xmm3, xmm0);
}
movaps(xmmword[eax], xmm2);
movaps(xmmword[edx], xmm3);
}
// m_min.p = pmin;
// m_max.p = pmax;
movaps(xmmword[eax + 16], xmm4);
movaps(xmmword[edx + 16], xmm5);
if(tme)
{
// m_min.t = tmin.xyww(pmin);
// m_max.t = tmax.xyww(pmax);
shufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
shufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
movaps(xmmword[eax + 32], xmm6);
movaps(xmmword[edx + 32], xmm7);
}
ret();
}
GSVertexTrace::CGHW10::CGHW10(uint32 key, void* ptr, size_t maxsize)
: CodeGenerator(maxsize, ptr)
{
#if _M_AMD64
#error TODO
#endif
const int params = 0;
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
const int _v = params + 4;
const int _count = params + 8;
const int _min = params + 12;
const int _max = params + 16;
//
static const float fmin = -FLT_MAX;
static const float fmax = FLT_MAX;
movss(xmm0, xmmword[&fmax]);
shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0));
movss(xmm1, xmmword[&fmin]);
shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0));
if(color)
{
// min.c = 0xffffffff;
// max.c = 0;
pcmpeqd(xmm2, xmm2);
pxor(xmm3, xmm3);
}
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movaps(xmm4, xmm0);
movaps(xmm5, xmm1);
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm0);
movaps(xmm7, xmm1);
}
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _v]);
mov(ecx, dword[esp + _count]);
align(16);
L("loop");
for(int j = 0; j < n; j++)
{
if(color && (iip || j == n - 1) || tme)
{
movaps(xmm0, xmmword[edx + j * sizeof(GSVertexHW10)]);
}
if(color && (iip || j == n - 1))
{
pminub(xmm2, xmm0);
pmaxub(xmm3, xmm0);
}
if(tme)
{
if(!fst)
{
movaps(xmm1, xmm0);
}
shufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral
if(!fst)
{
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
divps(xmm0, xmm1);
shufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q
}
minps(xmm6, xmm0);
maxps(xmm7, xmm0);
}
movdqa(xmm0, xmmword[edx + j * sizeof(GSVertexHW10) + 16]);
if(m_cpu.has(util::Cpu::tSSE41))
{
pmovzxwd(xmm1, xmm0);
}
else
{
movdqa(xmm1, xmm0);
punpcklwd(xmm1, xmm1);
psrld(xmm1, 16);
}
psrld(xmm0, 1);
punpcklqdq(xmm1, xmm0);
cvtdq2ps(xmm1, xmm1);
minps(xmm4, xmm1);
maxps(xmm5, xmm1);
}
add(edx, n * sizeof(GSVertexHW10));
sub(ecx, n);
jg("loop");
// }
mov(eax, dword[esp + _min]);
mov(edx, dword[esp + _max]);
if(color)
{
// m_min.c = cmin.zzzz().u8to32();
// m_max.c = cmax.zzzz().u8to32();
if(m_cpu.has(util::Cpu::tSSE41))
{
pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
pmovzxbd(xmm2, xmm2);
pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
pmovzxbd(xmm3, xmm3);
}
else
{
pxor(xmm0, xmm0);
punpckhbw(xmm2, xmm0);
punpcklwd(xmm2, xmm0);
punpckhbw(xmm3, xmm0);
punpcklwd(xmm3, xmm0);
}
movaps(xmmword[eax], xmm2);
movaps(xmmword[edx], xmm3);
}
// m_min.p = pmin.xyww();
// m_max.p = pmax.xyww();
shufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
movaps(xmmword[eax + 16], xmm4);
movaps(xmmword[edx + 16], xmm5);
if(tme)
{
// m_min.t = tmin;
// m_max.t = tmax;
movaps(xmmword[eax + 32], xmm6);
movaps(xmmword[edx + 32], xmm7);
}
ret();
}

View File

@ -0,0 +1,99 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#pragma once
#include "GSDrawingContext.h"
#include "GSVertexSW.h"
#include "GSVertexHW.h"
#include "GSFunctionMap.h"
#include "xbyak/xbyak.h"
#include "xbyak/xbyak_util.h"
__declspec(align(16)) class GSVertexTrace
{
struct Vertex {GSVector4i c; GSVector4 p, t;};
struct VertexAlpha {int min, max; bool valid;};
typedef void (*VertexTracePtr)(const void* v, int count, Vertex& min, Vertex& max);
class CGSW : public Xbyak::CodeGenerator
{
public:
CGSW(uint32 key, void* ptr, size_t maxsize);
};
class GSVertexTraceMapSW : public GSCodeGeneratorFunctionMap<CGSW, uint32, VertexTracePtr>
{
public:
GSVertexTraceMapSW() : GSCodeGeneratorFunctionMap("VertexTraceSW") {}
CGSW* Create(uint32 key, void* ptr, size_t maxsize) {return new CGSW(key, ptr, maxsize);}
};
class CGHW9 : public Xbyak::CodeGenerator
{
Xbyak::util::Cpu m_cpu;
public:
CGHW9(uint32 key, void* ptr, size_t maxsize);
};
class GSVertexTraceMapHW9 : public GSCodeGeneratorFunctionMap<CGHW9, uint32, VertexTracePtr>
{
public:
GSVertexTraceMapHW9() : GSCodeGeneratorFunctionMap("VertexTraceHW9") {}
CGHW9* Create(uint32 key, void* ptr, size_t maxsize) {return new CGHW9(key, ptr, maxsize);}
};
class CGHW10 : public Xbyak::CodeGenerator
{
Xbyak::util::Cpu m_cpu;
public:
CGHW10(uint32 key, void* ptr, size_t maxsize);
};
class GSVertexTraceMapHW10 : public GSCodeGeneratorFunctionMap<CGHW10, uint32, VertexTracePtr>
{
public:
GSVertexTraceMapHW10() : GSCodeGeneratorFunctionMap("VertexTraceHW10") {}
CGHW10* Create(uint32 key, void* ptr, size_t maxsize) {return new CGHW10(key, ptr, maxsize);}
};
GSVertexTraceMapSW m_map_sw;
GSVertexTraceMapHW9 m_map_hw9;
GSVertexTraceMapHW10 m_map_hw10;
public:
Vertex m_min, m_max; // t.xy * 0x10000
VertexAlpha m_alpha; // source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it
union
{
uint32 value;
struct {uint32 r:4, g:4, b:4, a:4, x:1, y:1, z:1, f:1, s:1, t:1, q:1, _pad:1;};
struct {uint32 rgba:16, xyzf:4, stq:4;};
} m_eq;
void Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context);
void Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context);
void Update(const GSVertexHW10* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context);
};

View File

@ -1440,6 +1440,10 @@
/>
</FileConfiguration>
</File>
<File
RelativePath=".\GSTextureFX.cpp"
>
</File>
<File
RelativePath=".\GSTextureFX10.cpp"
>
@ -1496,6 +1500,10 @@
RelativePath=".\GSVertexSW.cpp"
>
</File>
<File
RelativePath=".\GSVertexTrace.cpp"
>
</File>
<File
RelativePath=".\GSWnd.cpp"
>
@ -1878,6 +1886,10 @@
RelativePath=".\GSTextureCacheSW.h"
>
</File>
<File
RelativePath=".\GSTextureFX.h"
>
</File>
<File
RelativePath=".\GSTextureFX10.h"
>
@ -1926,6 +1938,10 @@
RelativePath=".\GSVertexSW.h"
>
</File>
<File
RelativePath=".\GSVertexTrace.h"
>
</File>
<File
RelativePath=".\GSWnd.h"
>

View File

@ -345,7 +345,7 @@ VS_OUTPUT vs_main(VS_INPUT input)
input.z = input.z & 0xffff;
}
if(VS_PRIM == 6) // sprite
if(VS_PRIM == 3) // sprite
{
//input.p.xy = (input.p.xy + 15) & ~15; // HACK
}