From b284fae2d5ef13d244491ece5c5ab17928ec75ca Mon Sep 17 00:00:00 2001 From: gabest11 Date: Tue, 23 Jun 2009 04:12:32 +0000 Subject: [PATCH] GSdx: moved around some code and optimized texture caching a bit, there may be a slight speed-up in hw mode for those games that use many textures. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1425 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GS.cpp | 10 +- plugins/GSdx/GSDevice.h | 1 + plugins/GSdx/GSDevice10.cpp | 4 +- plugins/GSdx/GSRenderer.cpp | 311 ++++++++++++++ plugins/GSdx/GSRenderer.h | 15 +- plugins/GSdx/GSRendererHW.h | 219 ++-------- plugins/GSdx/GSRendererHW10.cpp | 83 ++-- plugins/GSdx/GSRendererHW10.h | 4 +- plugins/GSdx/GSRendererHW11.cpp | 83 ++-- plugins/GSdx/GSRendererHW11.h | 4 +- plugins/GSdx/GSRendererHW9.cpp | 60 +-- plugins/GSdx/GSRendererHW9.h | 4 +- plugins/GSdx/GSRendererSW.cpp | 363 +--------------- plugins/GSdx/GSRendererSW.h | 4 - plugins/GSdx/GSTexture10.cpp | 30 +- plugins/GSdx/GSTextureCache.cpp | 73 +--- plugins/GSdx/GSTextureCache.h | 4 +- plugins/GSdx/GSTextureCache10.cpp | 93 ++++- plugins/GSdx/GSTextureFX.cpp | 23 + plugins/GSdx/GSTextureFX.h | 243 +++++++++++ plugins/GSdx/GSTextureFX10.cpp | 17 +- plugins/GSdx/GSTextureFX10.h | 187 +-------- plugins/GSdx/GSTextureFX11.cpp | 17 +- plugins/GSdx/GSTextureFX11.h | 187 +-------- plugins/GSdx/GSTextureFX9.cpp | 10 - plugins/GSdx/GSTextureFX9.h | 117 +----- plugins/GSdx/GSVertex.h | 2 +- plugins/GSdx/GSVertexHW.h | 13 +- plugins/GSdx/GSVertexSW.cpp | 142 ------- plugins/GSdx/GSVertexSW.h | 78 ---- plugins/GSdx/GSVertexTrace.cpp | 670 ++++++++++++++++++++++++++++++ plugins/GSdx/GSVertexTrace.h | 99 +++++ plugins/GSdx/GSdx_vs2008.vcproj | 16 + plugins/GSdx/res/tfx.fx | 2 +- 34 files changed, 1667 insertions(+), 1521 deletions(-) create mode 100644 plugins/GSdx/GSTextureFX.cpp create mode 100644 plugins/GSdx/GSTextureFX.h create mode 100644 plugins/GSdx/GSVertexTrace.cpp create mode 100644 plugins/GSdx/GSVertexTrace.h diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index 430bb23407..67aeab825d 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -73,6 +73,11 @@ EXPORT_C GSsetBaseMem(uint8* mem) EXPORT_C_(INT32) GSinit() { + if(!GSUtil::CheckSSE()) + { + return -1; + } + return 0; } @@ -113,11 +118,6 @@ static INT32 GSopen(void* dsp, char* title, int mt, int renderer) #endif - if(!GSUtil::CheckSSE()) - { - return -1; - } - switch(renderer) { default: diff --git a/plugins/GSdx/GSDevice.h b/plugins/GSdx/GSDevice.h index 59799e814d..a5d36775a3 100644 --- a/plugins/GSdx/GSDevice.h +++ b/plugins/GSdx/GSDevice.h @@ -81,6 +81,7 @@ public: virtual void Flip() {} virtual void BeginScene() {} + virtual void DrawPrimitive() {}; virtual void EndScene() {} virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {} diff --git a/plugins/GSdx/GSDevice10.cpp b/plugins/GSdx/GSDevice10.cpp index 7f3fbf2791..6d7d2b73ec 100644 --- a/plugins/GSdx/GSDevice10.cpp +++ b/plugins/GSdx/GSDevice10.cpp @@ -302,7 +302,7 @@ GSTexture* GSDevice10::Create(int type, int w, int h, int format) desc.BindFlags = D3D10_BIND_RENDER_TARGET | D3D10_BIND_SHADER_RESOURCE; break; case GSTexture::DepthStencil: - desc.BindFlags = D3D10_BIND_DEPTH_STENCIL; + desc.BindFlags = D3D10_BIND_DEPTH_STENCIL;// | D3D10_BIND_SHADER_RESOURCE; break; case GSTexture::Texture: desc.BindFlags = D3D10_BIND_SHADER_RESOURCE; @@ -344,7 +344,7 @@ GSTexture* GSDevice10::CreateRenderTarget(int w, int h, int format) GSTexture* GSDevice10::CreateDepthStencil(int w, int h, int format) { - return __super::CreateDepthStencil(w, h, format ? format : DXGI_FORMAT_D32_FLOAT_S8X24_UINT); + return __super::CreateDepthStencil(w, h, format ? format : DXGI_FORMAT_D32_FLOAT_S8X24_UINT); // DXGI_FORMAT_R32G8X24_TYPELESS } GSTexture* GSDevice10::CreateTexture(int w, int h, int format) diff --git a/plugins/GSdx/GSRenderer.cpp b/plugins/GSdx/GSRenderer.cpp index 8af6720dfc..046483fba3 100644 --- a/plugins/GSdx/GSRenderer.cpp +++ b/plugins/GSdx/GSRenderer.cpp @@ -426,3 +426,314 @@ void GSRenderer::KeyEvent(GSKeyEventData* e) } } +void GSRenderer::GetTextureMinMax(GSVector4i& r) +{ + const GSDrawingContext* context = m_context; + + int tw = context->TEX0.TW; + int th = context->TEX0.TH; + + int w = 1 << tw; + int h = 1 << th; + + GSVector4i tr(0, 0, w, h); + + int wms = context->CLAMP.WMS; + int wmt = context->CLAMP.WMT; + + int minu = (int)context->CLAMP.MINU; + int minv = (int)context->CLAMP.MINV; + int maxu = (int)context->CLAMP.MAXU; + int maxv = (int)context->CLAMP.MAXV; + + GSVector4i vr = tr; + + switch(wms) + { + case CLAMP_REPEAT: + break; + case CLAMP_CLAMP: + break; + case CLAMP_REGION_CLAMP: + if(vr.x < minu) vr.x = minu; + if(vr.z > maxu + 1) vr.z = maxu + 1; + break; + case CLAMP_REGION_REPEAT: + vr.x = maxu; + vr.z = vr.x + (minu + 1); + break; + default: + __assume(0); + } + + switch(wmt) + { + case CLAMP_REPEAT: + break; + case CLAMP_CLAMP: + break; + case CLAMP_REGION_CLAMP: + if(vr.y < minv) vr.y = minv; + if(vr.w > maxv + 1) vr.w = maxv + 1; + break; + case CLAMP_REGION_REPEAT: + vr.y = maxv; + vr.w = vr.y + (minv + 1); + break; + default: + __assume(0); + } + + if(wms + wmt < 6) + { + GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t); + + if(context->TEX1.IsLinear()) + { + st += GSVector4(-0x8000, 0x8000).xxyy(); + } + + GSVector4i uv = GSVector4i(st).sra32(16); + + GSVector4i u, v; + + int mask = 0; + + if(wms == CLAMP_REPEAT || wmt == CLAMP_REPEAT) + { + u = uv & GSVector4i::xffffffff().srl32(32 - tw); + v = uv & GSVector4i::xffffffff().srl32(32 - th); + + GSVector4i uu = uv.sra32(tw); + GSVector4i vv = uv.sra32(th); + + mask = (uu.upl32(vv) == uu.uph32(vv)).mask(); + } + + uv = uv.rintersect(tr); + + switch(wms) + { + case CLAMP_REPEAT: + if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;} + break; + case CLAMP_CLAMP: + case CLAMP_REGION_CLAMP: + if(vr.x < uv.x) vr.x = uv.x; + if(vr.z > uv.z + 1) vr.z = uv.z + 1; + break; + case CLAMP_REGION_REPEAT: // TODO + break; + default: + __assume(0); + } + + switch(wmt) + { + case CLAMP_REPEAT: + if(mask & 0xf000) {if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1;} + break; + case CLAMP_CLAMP: + case CLAMP_REGION_CLAMP: + if(vr.y < uv.y) vr.y = uv.y; + if(vr.w > uv.w + 1) vr.w = uv.w + 1; + break; + case CLAMP_REGION_REPEAT: // TODO + break; + default: + __assume(0); + } + } + + GSVector2i bs = GSLocalMemory::m_psm[context->TEX0.PSM].bs; + + r = vr.ralign(bs).rintersect(tr); +} + +void GSRenderer::GetAlphaMinMax() +{ + if(m_vt.m_alpha.valid) + { + return; + } + + const GSDrawingEnvironment& env = m_env; + const GSDrawingContext* context = m_context; + + GSVector4i a = m_vt.m_min.c.uph32(m_vt.m_max.c).zzww(); + + if(PRIM->TME && context->TEX0.TCC) + { + uint32 bpp = GSLocalMemory::m_psm[context->TEX0.PSM].trbpp; + uint32 cbpp = GSLocalMemory::m_psm[context->TEX0.CPSM].trbpp; + uint32 pal = GSLocalMemory::m_psm[context->TEX0.PSM].pal; + + if(bpp == 32) + { + a.y = 0; + a.w = 0xff; + } + else if(bpp == 24) + { + a.y = env.TEXA.AEM ? 0 : env.TEXA.TA0; + a.w = env.TEXA.TA0; + } + else if(bpp == 16) + { + a.y = env.TEXA.AEM ? 0 : min(env.TEXA.TA0, env.TEXA.TA1); + a.w = max(env.TEXA.TA0, env.TEXA.TA1); + } + else + { + m_mem.m_clut.GetAlphaMinMax32(a.y, a.w); + } + + switch(context->TEX0.TFX) + { + case TFX_MODULATE: + a.x = (a.x * a.y) >> 7; + a.z = (a.z * a.w) >> 7; + if(a.x > 0xff) a.x = 0xff; + if(a.z > 0xff) a.z = 0xff; + break; + case TFX_DECAL: + a.x = a.y; + a.z = a.w; + break; + case TFX_HIGHLIGHT: + a.x = a.x + a.y; + a.z = a.z + a.w; + if(a.x > 0xff) a.x = 0xff; + if(a.z > 0xff) a.z = 0xff; + break; + case TFX_HIGHLIGHT2: + a.x = a.y; + a.z = a.w; + break; + default: + __assume(0); + } + } + + m_vt.m_alpha.min = a.x; + m_vt.m_alpha.max = a.z; + m_vt.m_alpha.valid = true; +} + +bool GSRenderer::TryAlphaTest(uint32& fm, uint32& zm) +{ + const GSDrawingContext* context = m_context; + + bool pass = true; + + if(context->TEST.ATST == ATST_NEVER) + { + pass = false; + } + else if(context->TEST.ATST != ATST_ALWAYS) + { + GetAlphaMinMax(); + + int amin = m_vt.m_alpha.min; + int amax = m_vt.m_alpha.max; + + int aref = context->TEST.AREF; + + switch(context->TEST.ATST) + { + case ATST_NEVER: + pass = false; + break; + case ATST_ALWAYS: + pass = true; + break; + case ATST_LESS: + if(amax < aref) pass = true; + else if(amin >= aref) pass = false; + else return false; + break; + case ATST_LEQUAL: + if(amax <= aref) pass = true; + else if(amin > aref) pass = false; + else return false; + break; + case ATST_EQUAL: + if(amin == aref && amax == aref) pass = true; + else if(amin > aref || amax < aref) pass = false; + else return false; + break; + case ATST_GEQUAL: + if(amin >= aref) pass = true; + else if(amax < aref) pass = false; + else return false; + break; + case ATST_GREATER: + if(amin > aref) pass = true; + else if(amax <= aref) pass = false; + else return false; + break; + case ATST_NOTEQUAL: + if(amin == aref && amax == aref) pass = false; + else if(amin > aref || amax < aref) pass = true; + else return false; + break; + default: + __assume(0); + } + } + + if(!pass) + { + switch(context->TEST.AFAIL) + { + case AFAIL_KEEP: fm = zm = 0xffffffff; break; + case AFAIL_FB_ONLY: zm = 0xffffffff; break; + case AFAIL_ZB_ONLY: fm = 0xffffffff; break; + case AFAIL_RGB_ONLY: fm |= 0xff000000; zm = 0xffffffff; break; + default: __assume(0); + } + } + + return true; +} + +bool GSRenderer::IsOpaque() +{ + if(PRIM->AA1) + { + return false; + } + + if(!PRIM->ABE) + { + return true; + } + + const GSDrawingContext* context = m_context; + + int amin = 0, amax = 0xff; + + if(context->ALPHA.A != context->ALPHA.B) + { + if(context->ALPHA.C == 0) + { + GetAlphaMinMax(); + + amin = m_vt.m_alpha.min; + amax = m_vt.m_alpha.max; + } + else if(context->ALPHA.C == 1) + { + if(context->FRAME.PSM == PSM_PSMCT24 || context->FRAME.PSM == PSM_PSMZ24) + { + amin = amax = 0x80; + } + } + else if(context->ALPHA.C == 1) + { + amin = amax = context->ALPHA.FIX; + } + } + + return context->ALPHA.IsOpaque(amin, amax); +} diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index 97ad72df2c..21f1651d4b 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -24,6 +24,7 @@ #include "GSdx.h" #include "GSWnd.h" #include "GSState.h" +#include "GSVertexTrace.h" #include "GSVertexList.h" #include "GSSettingsDlg.h" #include "GSCapture.h" @@ -48,6 +49,15 @@ protected: virtual void ResetDevice() {} virtual GSTexture* GetOutput(int i) = 0; + GSVertexTrace m_vt; + + // following functions need m_vt to be initialized + + void GetTextureMinMax(GSVector4i& r); + void GetAlphaMinMax(); + bool TryAlphaTest(uint32& fm, uint32& zm); + bool IsOpaque(); + public: GSWnd m_wnd; GSDevice* m_dev; @@ -66,11 +76,6 @@ public: virtual bool MakeSnapshot(const string& path); virtual void KeyEvent(GSKeyEventData* e); - virtual void MinMaxUV(int w, int h, GSVector4i& r) - { - r = GSVector4i(0, 0, w, h); - } - virtual bool CanUpscale() { return !m_nativeres; diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index af3bfefa6c..67a16e449b 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -46,176 +46,6 @@ protected: __super::Reset(); } - void MinMaxUV(int w, int h, GSVector4i& r) - { - int wms = m_context->CLAMP.WMS; - int wmt = m_context->CLAMP.WMT; - - int minu = (int)m_context->CLAMP.MINU; - int minv = (int)m_context->CLAMP.MINV; - int maxu = (int)m_context->CLAMP.MAXU; - int maxv = (int)m_context->CLAMP.MAXV; - - GSVector4i vr = GSVector4i(0, 0, w, h); - - GSVector4i wm[3]; - - if(wms + wmt < 6) - { - GSVector4 mm; - - if(m_count < 100) - { - Vertex* v = m_vertices; - - GSVector4 minv(+1e10f); - GSVector4 maxv(-1e10f); - - int i = 0; - - if(PRIM->FST) - { - for(int j = m_count - 3; i < j; i += 4) - { - GSVector4 v0 = v[i + 0].vf[0]; - GSVector4 v1 = v[i + 1].vf[0]; - GSVector4 v2 = v[i + 2].vf[0]; - GSVector4 v3 = v[i + 3].vf[0]; - - minv = minv.minv((v0.minv(v1)).minv(v2.minv(v3))); - maxv = maxv.maxv((v0.maxv(v1)).maxv(v2.maxv(v3))); - } - - for(int j = m_count; i < j; i++) - { - GSVector4 v0 = v[i + 0].vf[0]; - - minv = minv.minv(v0); - maxv = maxv.maxv(v0); - } - - mm = minv.xyxy(maxv) * GSVector4(16 << m_context->TEX0.TW, 16 << m_context->TEX0.TH).xyxy().rcpnr(); - } - else - { - /* - for(int j = m_count - 3; i < j; i += 4) - { - GSVector4 v0 = GSVector4(v[i + 0].m128[0]) / GSVector4(v[i + 0].GetQ()); - GSVector4 v1 = GSVector4(v[i + 1].m128[0]) / GSVector4(v[i + 1].GetQ()); - GSVector4 v2 = GSVector4(v[i + 2].m128[0]) / GSVector4(v[i + 2].GetQ()); - GSVector4 v3 = GSVector4(v[i + 3].m128[0]) / GSVector4(v[i + 3].GetQ()); - - minv = minv.minv((v0.minv(v1)).minv(v2.minv(v3))); - maxv = maxv.maxv((v0.maxv(v1)).maxv(v2.maxv(v3))); - } - - for(int j = m_count; i < j; i++) - { - GSVector4 v0 = GSVector4(v[i + 0].m128[0]) / GSVector4(v[i + 0].GetQ());; - - minv = minv.minv(v0); - maxv = maxv.maxv(v0); - } - - mm = minv.xyxy(maxv); - */ - - // just can't beat the compiler generated scalar sse code with packed div or rcp - - mm.x = mm.y = +1e10; - mm.z = mm.w = -1e10; - - for(int j = m_count; i < j; i++) - { - float w = 1.0f / v[i].GetQ(); - - float x = v[i].t.x * w; - - if(x < mm.x) mm.x = x; - if(x > mm.z) mm.z = x; - - float y = v[i].t.y * w; - - if(y < mm.y) mm.y = y; - if(y > mm.w) mm.w = y; - } - } - } - else - { - mm = GSVector4(0.0f, 0.0f, 1.0f, 1.0f); - } - - GSVector4 v0 = GSVector4(vr); - GSVector4 v1 = v0.zwzw(); - - GSVector4 mmf = mm.floor(); - GSVector4 mask = mmf.xyxy() == mmf.zwzw(); - - wm[0] = GSVector4i(v0.blend8((mm - mmf) * v1, mask)); - - mm *= v1; - - wm[1] = GSVector4i(mm.sat(GSVector4::zero(), v1)); - wm[2] = GSVector4i(mm.sat(GSVector4(minu, minv, maxu, maxv))); - } - - GSVector4i v; - - switch(wms) - { - case CLAMP_REPEAT: - v = wm[0]; - if(v.x == 0 && v.z != w) v.z = w; // FIXME - vr.x = v.x; - vr.z = v.z; - break; - case CLAMP_CLAMP: - case CLAMP_REGION_CLAMP: - v = wm[wms]; - if(v.x > v.z) v.x = v.z; - vr.x = v.x; - vr.z = v.z; - break; - case CLAMP_REGION_REPEAT: - vr.x = maxu; - vr.z = vr.x + (minu + 1); - break; - default: - __assume(0); - } - - switch(wmt) - { - case CLAMP_REPEAT: - v = wm[0]; - if(v.y == 0 && v.w != h) v.w = h; // FIXME - vr.y = v.y; - vr.w = v.w; - break; - case CLAMP_CLAMP: - case CLAMP_REGION_CLAMP: - v = wm[wmt]; - if(v.y > v.w) v.y = v.w; - vr.y = v.y; - vr.w = v.w; - break; - case CLAMP_REGION_REPEAT: - vr.y = maxv; - vr.w = vr.y + (minv + 1); - break; - default: - __assume(0); - } - - r = vr + GSVector4i(-1, -1, 1, 1); // one more pixel because of bilinear filtering - - GSVector2i bs = GSLocalMemory::m_psm[m_context->TEX0.PSM].bs; - - r = r.ralign(bs).rintersect(GSVector4i(0, 0, w, h)); - } - void VSync(int field) { __super::VSync(field); @@ -285,10 +115,9 @@ protected: void Draw() { - if(IsBadFrame(m_skip)) - { - return; - } + if(IsBadFrame(m_skip)) return; + + m_vt.Update(m_vertices, m_count, GSUtil::GetPrimClass(PRIM->PRIM), PRIM, m_context); GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; @@ -311,7 +140,13 @@ protected: if(PRIM->TME) { - tex = m_tc->GetTexture(); + m_mem.m_clut.Read32(context->TEX0, env.TEXA); + + GSVector4i r; + + GetTextureMinMax(r); + + tex = m_tc->GetTexture(r); if(!tex) return; } @@ -366,7 +201,37 @@ protected: return; } - Draw(prim, rt->m_texture, ds->m_texture, tex); + // skip alpha test if possible + + GIFRegTEST TEST = context->TEST; + GIFRegFRAME FRAME = context->FRAME; + GIFRegZBUF ZBUF = context->ZBUF; + + uint32 fm = context->FRAME.FBMSK; + uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0; + + if(context->TEST.ATE && context->TEST.ATST != ATST_ALWAYS) + { + if(TryAlphaTest(fm, zm)) + { + context->TEST.ATE = 0; + } + } + + context->FRAME.FBMSK = fm; + context->ZBUF.ZMSK = zm != 0; + + // + + Draw(GSUtil::GetPrimClass(prim), rt->m_texture, ds->m_texture, tex); + + // + + context->TEST = TEST; + context->FRAME = FRAME; + context->ZBUF = ZBUF; + + // OverrideOutput(); @@ -396,7 +261,7 @@ protected: m_tc->InvalidateTextures(context->FRAME, context->ZBUF); } - virtual void Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex) = 0; + virtual void Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex) = 0; virtual bool OverrideInput(int& prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* t) { diff --git a/plugins/GSdx/GSRendererHW10.cpp b/plugins/GSdx/GSRendererHW10.cpp index 39a66640e3..b39afae7c0 100644 --- a/plugins/GSdx/GSRendererHW10.cpp +++ b/plugins/GSdx/GSRendererHW10.cpp @@ -169,44 +169,26 @@ void GSRendererHW10::VertexKick(bool skip) } } -void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex) +void GSRendererHW10::Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex) { GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; -/* - if(s_dump) - { - TRACE(_T("\n")); - TRACE(_T("PRIM = %d, ZMSK = %d, ZTE = %d, ZTST = %d, ATE = %d, ATST = %d, AFAIL = %d, AREF = %02x\n"), - PRIM->PRIM, context->ZBUF.ZMSK, - context->TEST.ZTE, context->TEST.ZTST, - context->TEST.ATE, context->TEST.ATST, context->TEST.AFAIL, context->TEST.AREF); - - for(int i = 0; i < m_count; i++) - { - TRACE(_T("[%d] %3.0f %3.0f %3.0f %3.0f\n"), i, (float)m_vertices[i].p.x / 16, (float)m_vertices[i].p.y / 16, (float)m_vertices[i].p.z, (float)m_vertices[i].a); - } - } -*/ D3D10_PRIMITIVE_TOPOLOGY topology; int prims = 0; - switch(prim) + switch(primclass) { - case GS_POINTLIST: + case GS_POINT_CLASS: topology = D3D10_PRIMITIVE_TOPOLOGY_POINTLIST; prims = m_count; break; - case GS_LINELIST: - case GS_LINESTRIP: - case GS_SPRITE: + case GS_LINE_CLASS: + case GS_SPRITE_CLASS: topology = D3D10_PRIMITIVE_TOPOLOGY_LINELIST; prims = m_count / 2; break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: + case GS_TRIANGLE_CLASS: topology = D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST; prims = m_count / 3; break; @@ -227,16 +209,16 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache // om - GSTextureFX10::OMDepthStencilSelector om_dssel; + GSTextureFX::OMDepthStencilSelector om_dssel; om_dssel.zte = context->TEST.ZTE; om_dssel.ztst = context->TEST.ZTST; om_dssel.zwe = !context->ZBUF.ZMSK; om_dssel.date = context->FRAME.PSM != PSM_PSMCT24 ? context->TEST.DATE : 0; - GSTextureFX10::OMBlendSelector om_bsel; + GSTextureFX::OMBlendSelector om_bsel; - om_bsel.abe = PRIM->ABE || (prim == 1 || prim == 2) && PRIM->AA1; + om_bsel.abe = !IsOpaque(); om_bsel.a = context->ALPHA.A; om_bsel.b = context->ALPHA.B; om_bsel.c = context->ALPHA.C; @@ -250,34 +232,38 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache // vs - GSTextureFX10::VSSelector vs_sel; + GSTextureFX::VSSelector vs_sel; vs_sel.bppz = 0; vs_sel.tme = PRIM->TME; vs_sel.fst = PRIM->FST; - vs_sel.prim = prim; + vs_sel.prim = primclass; if(om_dssel.zte && om_dssel.ztst > 0 && om_dssel.zwe) { if(context->ZBUF.PSM == PSM_PSMZ24) { - if(WrapZ(0xffffff)) + if(m_vt.m_max.p.z > 0xffffff) { + ASSERT(m_vt.m_min.p.z > 0xffffff); + vs_sel.bppz = 1; om_dssel.ztst = 1; } } else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) { - if(WrapZ(0xffff)) + if(m_vt.m_max.p.z > 0xffff) { + ASSERT(m_vt.m_min.p.z > 0xffff); + vs_sel.bppz = 2; om_dssel.ztst = 1; } } } - GSTextureFX10::VSConstantBuffer vs_cb; + GSTextureFX::VSConstantBuffer vs_cb; float sx = 2.0f * rt->m_scale.x / (rt->GetWidth() * 16); float sy = 2.0f * rt->m_scale.y / (rt->GetHeight() * 16); @@ -298,14 +284,14 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache // gs - GSTextureFX10::GSSelector gs_sel; + GSTextureFX::GSSelector gs_sel; gs_sel.iip = PRIM->IIP; - gs_sel.prim = GSUtil::GetPrimClass(prim); + gs_sel.prim = primclass; // ps - GSTextureFX10::PSSelector ps_sel; + GSTextureFX::PSSelector ps_sel; ps_sel.fst = PRIM->FST; ps_sel.wms = context->CLAMP.WMS; @@ -322,21 +308,21 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; ps_sel.ltf = m_filter == 2 ? context->TEX1.IsLinear() : m_filter; - GSTextureFX10::PSSamplerSelector ps_ssel; + GSTextureFX::PSSamplerSelector ps_ssel; ps_ssel.tau = 0; ps_ssel.tav = 0; ps_ssel.ltf = ps_sel.ltf; - GSTextureFX10::PSConstantBuffer ps_cb; + GSTextureFX::PSConstantBuffer ps_cb; ps_cb.FogColor_AREF = GSVector4((int)env.FOGCOL.FCR, (int)env.FOGCOL.FCG, (int)env.FOGCOL.FCB, (int)context->TEST.AREF) / 255; - if(context->TEST.ATST == 2 || context->TEST.ATST == 5) + if(ps_sel.atst == 2 || ps_sel.atst == 5) { ps_cb.FogColor_AREF.a -= 0.9f / 255; } - else if(context->TEST.ATST == 3 || context->TEST.ATST == 6) + else if(ps_sel.atst == 3 || ps_sel.atst == 6) { ps_cb.FogColor_AREF.a += 0.9f / 255; } @@ -424,7 +410,7 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache if(context->TEST.DoFirstPass()) { - m_tfx.Draw(); + m_dev->DrawPrimitive(); } if(context->TEST.DoSecondPass()) @@ -462,28 +448,13 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache m_tfx.UpdateOM(om_dssel, om_bsel, bf); - m_tfx.Draw(); + m_dev->DrawPrimitive(); } } m_dev->EndScene(); } -bool GSRendererHW10::WrapZ(uint32 maxz) -{ - // should only run once if z values are in the z buffer range - - for(int i = 0, j = m_count; i < j; i++) - { - if(m_vertices[i].p.z <= maxz) - { - return false; - } - } - - return true; -} - void GSRendererHW10::SetupDATE(GSTexture* rt, GSTexture* ds) { if(!m_context->TEST.DATE) return; // || (::GetAsyncKeyState(VK_CONTROL) & 0x8000) diff --git a/plugins/GSdx/GSRendererHW10.h b/plugins/GSdx/GSRendererHW10.h index 51cd9648b4..0568758624 100644 --- a/plugins/GSdx/GSRendererHW10.h +++ b/plugins/GSdx/GSRendererHW10.h @@ -28,12 +28,10 @@ class GSRendererHW10 : public GSRendererHW { - bool WrapZ(uint32 maxz); - protected: GSTextureFX10 m_tfx; - void Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex); + void Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex); struct { diff --git a/plugins/GSdx/GSRendererHW11.cpp b/plugins/GSdx/GSRendererHW11.cpp index ec7f11447b..ac47dad0e7 100644 --- a/plugins/GSdx/GSRendererHW11.cpp +++ b/plugins/GSdx/GSRendererHW11.cpp @@ -169,44 +169,26 @@ void GSRendererHW11::VertexKick(bool skip) } } -void GSRendererHW11::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex) +void GSRendererHW11::Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex) { GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; -/* - if(s_dump) - { - TRACE(_T("\n")); - TRACE(_T("PRIM = %d, ZMSK = %d, ZTE = %d, ZTST = %d, ATE = %d, ATST = %d, AFAIL = %d, AREF = %02x\n"), - PRIM->PRIM, context->ZBUF.ZMSK, - context->TEST.ZTE, context->TEST.ZTST, - context->TEST.ATE, context->TEST.ATST, context->TEST.AFAIL, context->TEST.AREF); - - for(int i = 0; i < m_count; i++) - { - TRACE(_T("[%d] %3.0f %3.0f %3.0f %3.0f\n"), i, (float)m_vertices[i].p.x / 16, (float)m_vertices[i].p.y / 16, (float)m_vertices[i].p.z, (float)m_vertices[i].a); - } - } -*/ D3D11_PRIMITIVE_TOPOLOGY topology; int prims = 0; - switch(prim) + switch(primclass) { - case GS_POINTLIST: + case GS_POINT_CLASS: topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; prims = m_count; break; - case GS_LINELIST: - case GS_LINESTRIP: - case GS_SPRITE: + case GS_LINE_CLASS: + case GS_SPRITE_CLASS: topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; prims = m_count / 2; break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: + case GS_TRIANGLE_CLASS: topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; prims = m_count / 3; break; @@ -227,16 +209,16 @@ void GSRendererHW11::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache // om - GSTextureFX11::OMDepthStencilSelector om_dssel; + GSTextureFX::OMDepthStencilSelector om_dssel; om_dssel.zte = context->TEST.ZTE; om_dssel.ztst = context->TEST.ZTST; om_dssel.zwe = !context->ZBUF.ZMSK; om_dssel.date = context->FRAME.PSM != PSM_PSMCT24 ? context->TEST.DATE : 0; - GSTextureFX11::OMBlendSelector om_bsel; + GSTextureFX::OMBlendSelector om_bsel; - om_bsel.abe = PRIM->ABE || (prim == 1 || prim == 2) && PRIM->AA1; + om_bsel.abe = !IsOpaque(); om_bsel.a = context->ALPHA.A; om_bsel.b = context->ALPHA.B; om_bsel.c = context->ALPHA.C; @@ -250,34 +232,38 @@ void GSRendererHW11::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache // vs - GSTextureFX11::VSSelector vs_sel; + GSTextureFX::VSSelector vs_sel; vs_sel.bppz = 0; vs_sel.tme = PRIM->TME; vs_sel.fst = PRIM->FST; - vs_sel.prim = prim; + vs_sel.prim = primclass; if(om_dssel.zte && om_dssel.ztst > 0 && om_dssel.zwe) { if(context->ZBUF.PSM == PSM_PSMZ24) { - if(WrapZ(0xffffff)) + if(m_vt.m_max.p.z > 0xffffff) { + ASSERT(m_vt.m_min.p.z > 0xffffff); + vs_sel.bppz = 1; om_dssel.ztst = 1; } } else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) { - if(WrapZ(0xffff)) + if(m_vt.m_max.p.z > 0xffff) { + ASSERT(m_vt.m_min.p.z > 0xffff); + vs_sel.bppz = 2; om_dssel.ztst = 1; } } } - GSTextureFX11::VSConstantBuffer vs_cb; + GSTextureFX::VSConstantBuffer vs_cb; float sx = 2.0f * rt->m_scale.x / (rt->GetWidth() * 16); float sy = 2.0f * rt->m_scale.y / (rt->GetHeight() * 16); @@ -298,14 +284,14 @@ void GSRendererHW11::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache // gs - GSTextureFX11::GSSelector gs_sel; + GSTextureFX::GSSelector gs_sel; gs_sel.iip = PRIM->IIP; - gs_sel.prim = GSUtil::GetPrimClass(prim); + gs_sel.prim = primclass; // ps - GSTextureFX11::PSSelector ps_sel; + GSTextureFX::PSSelector ps_sel; ps_sel.fst = PRIM->FST; ps_sel.wms = context->CLAMP.WMS; @@ -322,21 +308,21 @@ void GSRendererHW11::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; ps_sel.ltf = m_filter == 2 ? context->TEX1.IsLinear() : m_filter; - GSTextureFX11::PSSamplerSelector ps_ssel; + GSTextureFX::PSSamplerSelector ps_ssel; ps_ssel.tau = 0; ps_ssel.tav = 0; ps_ssel.ltf = ps_sel.ltf; - GSTextureFX11::PSConstantBuffer ps_cb; + GSTextureFX::PSConstantBuffer ps_cb; ps_cb.FogColor_AREF = GSVector4((int)env.FOGCOL.FCR, (int)env.FOGCOL.FCG, (int)env.FOGCOL.FCB, (int)context->TEST.AREF) / 255; - if(context->TEST.ATST == 2 || context->TEST.ATST == 5) + if(ps_sel.atst == 2 || ps_sel.atst == 5) { ps_cb.FogColor_AREF.a -= 0.9f / 255; } - else if(context->TEST.ATST == 3 || context->TEST.ATST == 6) + else if(ps_sel.atst == 3 || ps_sel.atst == 6) { ps_cb.FogColor_AREF.a += 0.9f / 255; } @@ -424,7 +410,7 @@ void GSRendererHW11::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache if(context->TEST.DoFirstPass()) { - m_tfx.Draw(); + m_dev->DrawPrimitive(); } if(context->TEST.DoSecondPass()) @@ -462,28 +448,13 @@ void GSRendererHW11::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache m_tfx.UpdateOM(om_dssel, om_bsel, bf); - m_tfx.Draw(); + m_dev->DrawPrimitive(); } } m_dev->EndScene(); } -bool GSRendererHW11::WrapZ(uint32 maxz) -{ - // should only run once if z values are in the z buffer range - - for(int i = 0, j = m_count; i < j; i++) - { - if(m_vertices[i].p.z <= maxz) - { - return false; - } - } - - return true; -} - void GSRendererHW11::SetupDATE(GSTexture* rt, GSTexture* ds) { if(!m_context->TEST.DATE) return; // || (::GetAsyncKeyState(VK_CONTROL) & 0x8000) diff --git a/plugins/GSdx/GSRendererHW11.h b/plugins/GSdx/GSRendererHW11.h index f3763a7dd1..fab4c76911 100644 --- a/plugins/GSdx/GSRendererHW11.h +++ b/plugins/GSdx/GSRendererHW11.h @@ -28,12 +28,10 @@ class GSRendererHW11 : public GSRendererHW { - bool WrapZ(uint32 maxz); - protected: GSTextureFX11 m_tfx; - void Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex); + void Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex); struct { diff --git a/plugins/GSdx/GSRendererHW9.cpp b/plugins/GSdx/GSRendererHW9.cpp index 778bb9ef12..cfc0d99cdc 100644 --- a/plugins/GSdx/GSRendererHW9.cpp +++ b/plugins/GSdx/GSRendererHW9.cpp @@ -171,7 +171,7 @@ void GSRendererHW9::VertexKick(bool skip) } } -void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex) +void GSRendererHW9::Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex) { GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; @@ -179,21 +179,18 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: D3DPRIMITIVETYPE topology; int prims = 0; - switch(prim) + switch(primclass) { - case GS_POINTLIST: + case GS_POINT_CLASS: topology = D3DPT_POINTLIST; prims = m_count; break; - case GS_LINELIST: - case GS_LINESTRIP: + case GS_LINE_CLASS: topology = D3DPT_LINELIST; prims = m_count / 2; break; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - case GS_SPRITE: + case GS_TRIANGLE_CLASS: + case GS_SPRITE_CLASS: topology = D3DPT_TRIANGLELIST; prims = m_count / 3; break; @@ -216,7 +213,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: // om - GSTextureFX9::OMDepthStencilSelector om_dssel; + GSTextureFX::OMDepthStencilSelector om_dssel; om_dssel.zte = context->TEST.ZTE; om_dssel.ztst = context->TEST.ZTST; @@ -224,9 +221,9 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: om_dssel.date = context->FRAME.PSM != PSM_PSMCT24 ? context->TEST.DATE : 0; om_dssel.fba = m_fba.enabled ? context->FBA.FBA : 0; - GSTextureFX9::OMBlendSelector om_bsel; + GSTextureFX::OMBlendSelector om_bsel; - om_bsel.abe = PRIM->ABE || (prim == 1 || prim == 2) && PRIM->AA1; + om_bsel.abe = !IsOpaque(); om_bsel.a = context->ALPHA.A; om_bsel.b = context->ALPHA.B; om_bsel.c = context->ALPHA.C; @@ -240,7 +237,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: // vs - GSTextureFX9::VSSelector vs_sel; + GSTextureFX::VSSelector vs_sel; vs_sel.bppz = 0; vs_sel.tme = PRIM->TME; @@ -251,23 +248,27 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: { if(context->ZBUF.PSM == PSM_PSMZ24) { - if(WrapZ(0xffffff)) + if(m_vt.m_max.p.z > 0xffffff) { + ASSERT(m_vt.m_min.p.z > 0xffffff); + vs_sel.bppz = 1; om_dssel.ztst = 1; } } else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S) { - if(WrapZ(0xffff)) + if(m_vt.m_max.p.z > 0xffff) { + ASSERT(m_vt.m_min.p.z > 0xffff); + vs_sel.bppz = 2; om_dssel.ztst = 1; } } } - GSTextureFX9::VSConstantBuffer vs_cb; + GSTextureFX::VSConstantBuffer vs_cb; float sx = 2.0f * rt->m_scale.x / (rt->GetWidth() * 16); float sy = 2.0f * rt->m_scale.y / (rt->GetHeight() * 16); @@ -286,7 +287,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: // ps - GSTextureFX9::PSSelector ps_sel; + GSTextureFX::PSSelector ps_sel; ps_sel.fst = PRIM->FST; ps_sel.wms = context->CLAMP.WMS; @@ -302,7 +303,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: ps_sel.rt = tex && tex->m_rendered; ps_sel.ltf = m_filter == 2 ? context->TEX1.IsLinear() : m_filter; - GSTextureFX9::PSSamplerSelector ps_ssel; + GSTextureFX::PSSamplerSelector ps_ssel; ps_ssel.tau = 0; ps_ssel.tav = 0; @@ -312,11 +313,11 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: ps_cb.FogColor_AREF = GSVector4((int)env.FOGCOL.FCR, (int)env.FOGCOL.FCG, (int)env.FOGCOL.FCB, (int)context->TEST.AREF) / 255; - if(context->TEST.ATST == 2 || context->TEST.ATST == 5) + if(ps_sel.atst == 2 || ps_sel.atst == 5) { ps_cb.FogColor_AREF.a -= 0.9f / 255; } - else if(context->TEST.ATST == 3 || context->TEST.ATST == 6) + else if(ps_sel.atst == 3 || ps_sel.atst == 6) { ps_cb.FogColor_AREF.a += 0.9f / 255; } @@ -403,7 +404,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: if(context->TEST.DoFirstPass()) { - m_tfx.Draw(); + m_dev->DrawPrimitive(); } if(context->TEST.DoSecondPass()) @@ -441,7 +442,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: m_tfx.UpdateOM(om_dssel, om_bsel, bf); - m_tfx.Draw(); + m_dev->DrawPrimitive(); } } @@ -450,21 +451,6 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: if(om_dssel.fba) UpdateFBA(rt); } -bool GSRendererHW9::WrapZ(float maxz) -{ - // should only run once if z values are in the z buffer range - - for(int i = 0, j = m_count; i < j; i++) - { - if(m_vertices[i].p.z <= maxz) - { - return false; - } - } - - return true; -} - void GSRendererHW9::SetupDATE(GSTexture* rt, GSTexture* ds) { if(!m_context->TEST.DATE) return; // || (::GetAsyncKeyState(VK_CONTROL) & 0x8000) diff --git a/plugins/GSdx/GSRendererHW9.h b/plugins/GSdx/GSRendererHW9.h index 9b8ff4221a..896fe892a5 100644 --- a/plugins/GSdx/GSRendererHW9.h +++ b/plugins/GSdx/GSRendererHW9.h @@ -28,13 +28,11 @@ class GSRendererHW9 : public GSRendererHW { - bool WrapZ(float maxz); - protected: GSTextureFX9 m_tfx; bool m_logz; - void Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex); + void Draw(GS_PRIM_CLASS primclass, GSTexture* rt, GSTexture* ds, GSTextureCache::GSCachedTexture* tex); struct { diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index d9e0d732b6..9828f8757e 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -139,7 +139,7 @@ void GSRendererSW::Draw() { GS_PRIM_CLASS primclass = GSUtil::GetPrimClass(PRIM->PRIM); - m_vtrace.Update(m_vertices, m_count, primclass, PRIM->IIP, PRIM->TME, m_context->TEX0.TFX, m_context->TEX0.TCC); + m_vt.Update(m_vertices, m_count, primclass, PRIM, m_context); if(m_dump) { @@ -206,7 +206,7 @@ void GSRendererSW::Draw() m_perfmon.Put(GSPerfMon::Prim, stats.prims); m_perfmon.Put(GSPerfMon::Fillrate, stats.pixels); - GSVector4i r = GSVector4i(m_vtrace.m_min.p.xyxy(m_vtrace.m_max.p)).rintersect(data.scissor); + GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(data.scissor); GIFRegBITBLTBUF BITBLTBUF; @@ -266,261 +266,6 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS m_tc->InvalidateVideoMem(BITBLTBUF, r); } -void GSRendererSW::GetTextureMinMax(int w, int h, GSVector4i& r, uint32 fst) -{ - const GSDrawingContext* context = m_context; - - int wms = context->CLAMP.WMS; - int wmt = context->CLAMP.WMT; - - int minu = (int)context->CLAMP.MINU; - int minv = (int)context->CLAMP.MINV; - int maxu = (int)context->CLAMP.MAXU; - int maxv = (int)context->CLAMP.MAXV; - - GSVector4i vr(0, 0, w, h); - - switch(wms) - { - case CLAMP_REPEAT: - break; - case CLAMP_CLAMP: - break; - case CLAMP_REGION_CLAMP: - if(vr.x < minu) vr.x = minu; - if(vr.z > maxu + 1) vr.z = maxu + 1; - break; - case CLAMP_REGION_REPEAT: - vr.x = maxu; - vr.z = vr.x + (minu + 1); - break; - default: - __assume(0); - } - - switch(wmt) - { - case CLAMP_REPEAT: - break; - case CLAMP_CLAMP: - break; - case CLAMP_REGION_CLAMP: - if(vr.y < minv) vr.y = minv; - if(vr.w > maxv + 1) vr.w = maxv + 1; - break; - case CLAMP_REGION_REPEAT: - vr.y = maxv; - vr.w = vr.y + (minv + 1); - break; - default: - __assume(0); - } - - if(fst) - { - GSVector4i uv = GSVector4i(m_vtrace.m_min.t.xyxy(m_vtrace.m_max.t)).sra32(16); - - GSVector4i u, v; - - int mask = 0; - - if(wms == CLAMP_REPEAT || wmt == CLAMP_REPEAT) - { - int tw = context->TEX0.TW; - int th = context->TEX0.TH; - - u = uv & GSVector4i::xffffffff().srl32(32 - tw); - v = uv & GSVector4i::xffffffff().srl32(32 - th); - - GSVector4i uu = uv.sra32(tw); - GSVector4i vv = uv.sra32(th); - - mask = (uu.upl32(vv) == uu.uph32(vv)).mask(); - } - - switch(wms) - { - case CLAMP_REPEAT: - if(mask & 0x000f) {if(vr.x < u.x) vr.x = u.x; if(vr.z > u.z + 1) vr.z = u.z + 1;} - break; - case CLAMP_CLAMP: - case CLAMP_REGION_CLAMP: - if(vr.x < uv.x) vr.x = uv.x; - if(vr.z > uv.z + 1) vr.z = uv.z + 1; - break; - case CLAMP_REGION_REPEAT: // TODO - break; - default: - __assume(0); - } - - switch(wmt) - { - case CLAMP_REPEAT: - if(mask & 0xf000) {if(vr.y < v.y) vr.y = v.y; if(vr.w > v.w + 1) vr.w = v.w + 1;} - break; - case CLAMP_CLAMP: - case CLAMP_REGION_CLAMP: - if(vr.y < uv.y) vr.y = uv.y; - if(vr.w > uv.w + 1) vr.w = uv.w + 1; - break; - case CLAMP_REGION_REPEAT: // TODO - break; - default: - __assume(0); - } - } - - r = vr.rintersect(GSVector4i(0, 0, w, h)); -} - -void GSRendererSW::GetAlphaMinMax() -{ - if(m_vtrace.m_alpha.valid) - { - return; - } - - const GSDrawingEnvironment& env = m_env; - const GSDrawingContext* context = m_context; - - GSVector4i a = GSVector4i(m_vtrace.m_min.c.wwww(m_vtrace.m_max.c)) >> 7; - - if(PRIM->TME && context->TEX0.TCC) - { - uint32 bpp = GSLocalMemory::m_psm[context->TEX0.PSM].trbpp; - uint32 cbpp = GSLocalMemory::m_psm[context->TEX0.CPSM].trbpp; - uint32 pal = GSLocalMemory::m_psm[context->TEX0.PSM].pal; - - if(bpp == 32) - { - a.y = 0; - a.w = 0xff; - } - else if(bpp == 24) - { - a.y = env.TEXA.AEM ? 0 : env.TEXA.TA0; - a.w = env.TEXA.TA0; - } - else if(bpp == 16) - { - a.y = env.TEXA.AEM ? 0 : min(env.TEXA.TA0, env.TEXA.TA1); - a.w = max(env.TEXA.TA0, env.TEXA.TA1); - } - else - { - m_mem.m_clut.GetAlphaMinMax32(a.y, a.w); - } - - switch(context->TEX0.TFX) - { - case TFX_MODULATE: - a.x = (a.x * a.y) >> 7; - a.z = (a.z * a.w) >> 7; - if(a.x > 0xff) a.x = 0xff; - if(a.z > 0xff) a.z = 0xff; - break; - case TFX_DECAL: - a.x = a.y; - a.z = a.w; - break; - case TFX_HIGHLIGHT: - a.x = a.x + a.y; - a.z = a.z + a.w; - if(a.x > 0xff) a.x = 0xff; - if(a.z > 0xff) a.z = 0xff; - break; - case TFX_HIGHLIGHT2: - a.x = a.y; - a.z = a.w; - break; - default: - __assume(0); - } - } - - m_vtrace.m_alpha.min = a.x; - m_vtrace.m_alpha.max = a.z; - m_vtrace.m_alpha.valid = true; -} - -bool GSRendererSW::TryAlphaTest(uint32& fm, uint32& zm) -{ - const GSDrawingContext* context = m_context; - - bool pass = true; - - if(context->TEST.ATST == ATST_NEVER) - { - pass = false; - } - else if(context->TEST.ATST != ATST_ALWAYS) - { - GetAlphaMinMax(); - - int amin = m_vtrace.m_alpha.min; - int amax = m_vtrace.m_alpha.max; - - int aref = context->TEST.AREF; - - switch(context->TEST.ATST) - { - case ATST_NEVER: - pass = false; - break; - case ATST_ALWAYS: - pass = true; - break; - case ATST_LESS: - if(amax < aref) pass = true; - else if(amin >= aref) pass = false; - else return false; - break; - case ATST_LEQUAL: - if(amax <= aref) pass = true; - else if(amin > aref) pass = false; - else return false; - break; - case ATST_EQUAL: - if(amin == aref && amax == aref) pass = true; - else if(amin > aref || amax < aref) pass = false; - else return false; - break; - case ATST_GEQUAL: - if(amin >= aref) pass = true; - else if(amax < aref) pass = false; - else return false; - break; - case ATST_GREATER: - if(amin > aref) pass = true; - else if(amax <= aref) pass = false; - else return false; - break; - case ATST_NOTEQUAL: - if(amin == aref && amax == aref) pass = false; - else if(amin > aref || amax < aref) pass = true; - else return false; - break; - default: - __assume(0); - } - } - - if(!pass) - { - switch(context->TEST.AFAIL) - { - case AFAIL_KEEP: fm = zm = 0xffffffff; break; - case AFAIL_FB_ONLY: zm = 0xffffffff; break; - case AFAIL_ZB_ONLY: fm = 0xffffffff; break; - case AFAIL_RGB_ONLY: fm |= 0xff000000; zm = 0xffffffff; break; - default: __assume(0); - } - } - - return true; -} - void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass) { const GSDrawingEnvironment& env = m_env; @@ -574,7 +319,7 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass) { p.sel.fpsm = GSUtil::EncodePSM(context->FRAME.PSM); - if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vtrace.m_eq.rgba != 15) + if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff) { p.sel.iip = PRIM->IIP; } @@ -589,14 +334,11 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass) p.sel.wms = context->CLAMP.WMS; p.sel.wmt = context->CLAMP.WMT; - if(p.sel.iip == 0 && p.sel.tfx == TFX_MODULATE && p.sel.tcc) + if(/*p.sel.iip == 0 &&*/ p.sel.tfx == TFX_MODULATE && p.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128))) { - if(m_vtrace.m_eq.rgba == 15 && (m_vtrace.m_min.c == GSVector4(128.0f * 128.0f)).alltrue()) - { - // modulate does not do anything when vertex color is 0x80 + // modulate does not do anything when vertex color is 0x80 - p.sel.tfx = TFX_DECAL; - } + p.sel.tfx = TFX_DECAL; } if(p.sel.fst == 0) @@ -605,7 +347,7 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass) GSVertexSW* v = m_vertices; - if(m_vtrace.m_eq.q) + if(m_vt.m_eq.q) { p.sel.fst = 1; @@ -617,44 +359,29 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass) { v[i].t *= w; } - - m_vtrace.m_min.t *= w; - m_vtrace.m_max.t *= w; } } else if(primclass == GS_SPRITE_CLASS) { p.sel.fst = 1; - GSVector4 tmin = GSVector4(FLT_MAX); - GSVector4 tmax = GSVector4(-FLT_MAX); - for(int i = 0, j = m_count; i < j; i += 2) { GSVector4 w = v[i + 1].t.zzzz().rcpnr(); - GSVector4 v0 = v[i + 0].t * w; - GSVector4 v1 = v[i + 1].t * w; - - v[i + 0].t = v0; - v[i + 1].t = v1; - - tmin = tmin.minv(v0).minv(v1); - tmax = tmax.maxv(v0).maxv(v1); + v[i + 0].t *= w; + v[i + 1].t *= w; } - - m_vtrace.m_max.t = tmax; - m_vtrace.m_min.t = tmin; } } - if(p.sel.fst) + if(p.sel.ltf) { - // if q is constant we can do the half pel shift for bilinear sampling on the vertices + GSVector4 half(0x8000, 0x8000); - if(p.sel.ltf) + if(p.sel.fst) { - GSVector4 half(0x8000, 0x8000); + // if q is constant we can do the half pel shift for bilinear sampling on the vertices GSVertexSW* v = m_vertices; @@ -662,46 +389,12 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass) { v[i].t -= half; } - - m_vtrace.m_min.t -= half; - m_vtrace.m_max.t += half; } } - /* - else - { - GSVector4 tmin = GSVector4(FLT_MAX); - GSVector4 tmax = GSVector4(-FLT_MAX); - - GSVertexSW* v = m_vertices; - - for(int i = 0, j = m_count; i < j; i++) - { - GSVector4 v0 = v[i].t * v[i].t.zzzz().rcpnr(); - - tmin = tmin.minv(v0); - tmax = tmax.maxv(v0); - } - - if(p.sel.ltf) - { - GSVector4 half(0x8000, 0x8000); - - tmin -= half; - tmax += half; - } - - m_vtrace.min.t = tmin; - m_vtrace.max.t = tmax; - } - */ - - int w = 1 << context->TEX0.TW; - int h = 1 << context->TEX0.TH; GSVector4i r; - GetTextureMinMax(w, h, r, p.sel.fst); + GetTextureMinMax(r); const GSTextureCacheSW::GSTexture* t = m_tc->Lookup(context->TEX0, env.TEXA, r); @@ -720,31 +413,7 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass) p.sel.datm = context->TEST.DATM; } - int amin = 0, amax = 0xff; - - if(PRIM->ABE && context->ALPHA.A != context->ALPHA.B && !PRIM->AA1) - { - if(context->ALPHA.C == 0) - { - GetAlphaMinMax(); - - amin = m_vtrace.m_alpha.min; - amax = m_vtrace.m_alpha.max; - } - else if(context->ALPHA.C == 1) - { - if(p.sel.fpsm == 1) - { - amin = amax = 0x80; - } - } - else if(context->ALPHA.C == 1) - { - amin = amax = context->ALPHA.FIX; - } - } - - if(PRIM->ABE && !context->ALPHA.IsOpaque(amin, amax) || PRIM->AA1) + if(!IsOpaque()) { p.sel.abe = PRIM->ABE; p.sel.ababcd = context->ALPHA.u32[0]; @@ -785,7 +454,7 @@ void GSRendererSW::GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass) { p.sel.zpsm = GSUtil::EncodePSM(context->ZBUF.PSM); p.sel.ztst = ztest ? context->TEST.ZTST : 1; - p.sel.zoverflow = GSVector4i(m_vtrace.m_max.p).z == 0x80000000; + p.sel.zoverflow = GSVector4i(m_vt.m_max.p).z == 0x80000000; } } diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index 0be46daee9..542e53e6b3 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -30,7 +30,6 @@ class GSRendererSW : public GSRendererT protected: GSRasterizerList m_rl; GSTextureCacheSW* m_tc; - GSVertexTrace m_vtrace; GSTexture* m_texture[2]; bool m_reset; @@ -42,9 +41,6 @@ protected: void Draw(); void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); - void GetTextureMinMax(int w, int h, GSVector4i& r, uint32 fst); - void GetAlphaMinMax(); - bool TryAlphaTest(uint32& fm, uint32& zm); void GetScanlineParam(GSScanlineParam& p, GS_PRIM_CLASS primclass); public: diff --git a/plugins/GSdx/GSTexture10.cpp b/plugins/GSdx/GSTexture10.cpp index 421e650ec6..1062861db8 100644 --- a/plugins/GSdx/GSTexture10.cpp +++ b/plugins/GSdx/GSTexture10.cpp @@ -169,7 +169,21 @@ GSTexture10::operator ID3D10ShaderResourceView*() { if(!m_srv && m_dev && m_texture) { - m_dev->CreateShaderResourceView(m_texture, NULL, &m_srv); + D3D10_SHADER_RESOURCE_VIEW_DESC* desc = NULL; + + if(m_desc.Format == DXGI_FORMAT_R32G8X24_TYPELESS) + { + desc = new D3D10_SHADER_RESOURCE_VIEW_DESC(); + memset(desc, 0, sizeof(*desc)); + desc->Format = DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; + desc->ViewDimension = D3D10_SRV_DIMENSION_TEXTURE2D; + desc->Texture2D.MostDetailedMip = 0; + desc->Texture2D.MipLevels = 1; + } + + m_dev->CreateShaderResourceView(m_texture, desc, &m_srv); + + delete desc; } return m_srv; @@ -191,7 +205,19 @@ GSTexture10::operator ID3D10DepthStencilView*() { if(!m_dsv && m_dev && m_texture) { - m_dev->CreateDepthStencilView(m_texture, NULL, &m_dsv); + D3D10_DEPTH_STENCIL_VIEW_DESC* desc = NULL; + + if(m_desc.Format == DXGI_FORMAT_R32G8X24_TYPELESS) + { + desc = new D3D10_DEPTH_STENCIL_VIEW_DESC(); + memset(desc, 0, sizeof(*desc)); + desc->Format = DXGI_FORMAT_D32_FLOAT_S8X24_UINT; + desc->ViewDimension = D3D10_DSV_DIMENSION_TEXTURE2D; + } + + m_dev->CreateDepthStencilView(m_texture, desc, &m_dsv); + + delete desc; } return m_dsv; diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index df2a17ac10..28ae26542c 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -198,7 +198,7 @@ GSTextureCache::GSDepthStencil* GSTextureCache::GetDepthStencil(const GIFRegTEX0 return ds; } -GSTextureCache::GSCachedTexture* GSTextureCache::GetTexture() +GSTextureCache::GSCachedTexture* GSTextureCache::GetTexture(const GSVector4i& r) { const GIFRegTEX0& TEX0 = m_renderer->m_context->TEX0; const GIFRegCLAMP& CLAMP = m_renderer->m_context->CLAMP; @@ -207,61 +207,6 @@ GSTextureCache::GSCachedTexture* GSTextureCache::GetTexture() const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; const uint32* clut = m_renderer->m_mem.m_clut; - if(psm.pal > 0) - { - m_renderer->m_mem.m_clut.Read32(TEX0, TEXA); - - /* - POSITION pos = m_tex.GetHeadPosition(); - - while(pos) - { - POSITION cur = pos; - - GSSurface* s = m_tex.GetNext(pos); - - if(s->m_TEX0.TBP0 == TEX0.CBP) - { - m_tex.RemoveAt(cur); - - delete s; - } - } - - pos = m_rt.GetHeadPosition(); - - while(pos) - { - POSITION cur = pos; - - GSSurface* s = m_rt.GetNext(pos); - - if(s->m_TEX0.TBP0 == TEX0.CBP) - { - m_rt.RemoveAt(cur); - - delete s; - } - } - - pos = m_ds.GetHeadPosition(); - - while(pos) - { - POSITION cur = pos; - - GSSurface* s = m_ds.GetNext(pos); - - if(s->m_TEX0.TBP0 == TEX0.CBP) - { - m_ds.RemoveAt(cur); - - delete s; - } - } - */ - } - GSCachedTexture* t = NULL; for(list::iterator i = m_tex.begin(); i != m_tex.end(); i++) @@ -378,7 +323,7 @@ GSTextureCache::GSCachedTexture* GSTextureCache::GetTexture() } } - t->Update(); + t->Update(r); m_tex_used = true; @@ -757,7 +702,7 @@ GSTextureCache::GSCachedTexture::~GSCachedTexture() _aligned_free(m_clut); } -void GSTextureCache::GSCachedTexture::Update() +void GSTextureCache::GSCachedTexture::Update(const GSVector4i& rect) { __super::Update(); @@ -766,7 +711,7 @@ void GSTextureCache::GSCachedTexture::Update() return; } - GSVector4i r; + GSVector4i r = rect; if(!GetDirtyRect(r)) { @@ -799,16 +744,16 @@ void GSTextureCache::GSCachedTexture::Update() m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, r.width() * r.height() * 4); } -bool GSTextureCache::GSCachedTexture::GetDirtyRect(GSVector4i& rr) +bool GSTextureCache::GSCachedTexture::GetDirtyRect(GSVector4i& r) { int w = 1 << m_TEX0.TW; int h = 1 << m_TEX0.TH; - GSVector4i r(0, 0, w, h); + GSVector4i tr(0, 0, w, h); for(list::iterator i = m_dirty.begin(); i != m_dirty.end(); i++) { - const GSVector4i& dirty = i->GetDirtyRect(m_TEX0).rintersect(r); + const GSVector4i& dirty = i->GetDirtyRect(m_TEX0).rintersect(tr); if(!m_valid.rintersect(dirty).rempty()) { @@ -837,8 +782,6 @@ bool GSTextureCache::GSCachedTexture::GetDirtyRect(GSVector4i& rr) m_dirty.clear(); - m_renderer->MinMaxUV(w, h, r); - if(GSUtil::IsRectInRect(r, m_valid)) { return false; @@ -867,7 +810,5 @@ bool GSTextureCache::GSCachedTexture::GetDirtyRect(GSVector4i& rr) return false; } - rr = r; - return true; } \ No newline at end of file diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index ad17e46eb3..300d648bfd 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -85,7 +85,7 @@ public: explicit GSCachedTexture(GSRenderer* renderer); virtual ~GSCachedTexture(); - void Update(); + void Update(const GSVector4i& rect); virtual bool Create() = 0; virtual bool Create(GSRenderTarget* rt) = 0; @@ -130,7 +130,7 @@ public: GSRenderTarget* GetRenderTarget(const GIFRegTEX0& TEX0, int w, int h, bool fb = false); GSDepthStencil* GetDepthStencil(const GIFRegTEX0& TEX0, int w, int h); - GSCachedTexture* GetTexture(); + GSCachedTexture* GetTexture(const GSVector4i& r); void InvalidateTextures(const GIFRegFRAME& FRAME, const GIFRegZBUF& ZBUF); void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r); diff --git a/plugins/GSdx/GSTextureCache10.cpp b/plugins/GSdx/GSTextureCache10.cpp index 2ec9f37db2..0c20e24c24 100644 --- a/plugins/GSdx/GSTextureCache10.cpp +++ b/plugins/GSdx/GSTextureCache10.cpp @@ -142,6 +142,8 @@ bool GSTextureCache10::GSCachedTextureHW10::Create() bool GSTextureCache10::GSCachedTextureHW10::Create(GSRenderTarget* rt) { + m_rendered = true; + // TODO: clean up this mess rt->Update(); @@ -151,8 +153,6 @@ bool GSTextureCache10::GSCachedTextureHW10::Create(GSRenderTarget* rt) m_TEX0 = m_renderer->m_context->TEX0; m_TEXA = m_renderer->m_env.TEXA; - m_rendered = true; - int tw = 1 << m_TEX0.TW; int th = 1 << m_TEX0.TH; int tp = (int)m_TEX0.TW << 6; @@ -301,7 +301,92 @@ bool GSTextureCache10::GSCachedTextureHW10::Create(GSDepthStencil* ds) { m_rendered = true; - // TODO - return false; +/* + // TODO: clean up this mess + + ds->Update(); + + // m_renderer->m_perfmon.Put(GSPerfMon::ConvertRT2T, 1); + + m_TEX0 = m_renderer->m_context->TEX0; + m_TEXA = m_renderer->m_env.TEXA; + + int tw = 1 << m_TEX0.TW; + int th = 1 << m_TEX0.TH; + int tp = (int)m_TEX0.TW << 6; + + // do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows) + + int w = (int)(ds->m_texture->m_scale.x * tw); + int h = (int)(ds->m_texture->m_scale.y * th); + + GSVector2i dssize = ds->m_texture->GetSize(); + + // pitch conversion + + if(ds->m_TEX0.TBW != m_TEX0.TBW) // && rt->m_TEX0.PSM == m_TEX0.PSM + { + ASSERT(0); + } + else if(tw < tp) + { + } + + // width/height conversion + + GSVector2 scale = ds->m_texture->m_scale; + + GSVector4 dst(0, 0, w, h); + + if(w > dssize.x) + { + scale.x = (float)dssize.x / tw; + dst.z = (float)dssize.x * scale.x / ds->m_texture->m_scale.x; + w = dssize.x; + } + + if(h > dssize.y) + { + scale.y = (float)dssize.y / th; + dst.w = (float)dssize.y * scale.y / ds->m_texture->m_scale.y; + h = dssize.y; + } + + m_texture = m_renderer->m_dev->CreateRenderTarget(w, h); + + GSVector4 src(0, 0, w, h); + + src.z /= ds->m_texture->GetWidth(); + src.w /= ds->m_texture->GetHeight(); + + m_renderer->m_dev->StretchRect(ds->m_texture, src, m_texture, dst, 7); + + m_texture->m_scale = scale; + + switch(m_TEX0.PSM) + { + case PSM_PSMCT32: + m_bpp = 0; + break; + case PSM_PSMCT24: + m_bpp = 1; + break; + case PSM_PSMCT16: + case PSM_PSMCT16S: + m_bpp = 2; + break; + case PSM_PSMT8H: + m_bpp = 3; + m_palette = m_renderer->m_dev->CreateTexture(256, 1); + m_initpalette = true; + break; + case PSM_PSMT4HL: + case PSM_PSMT4HH: + ASSERT(0); // TODO + break; + } + + return true; +*/ } diff --git a/plugins/GSdx/GSTextureFX.cpp b/plugins/GSdx/GSTextureFX.cpp new file mode 100644 index 0000000000..16a86f2475 --- /dev/null +++ b/plugins/GSdx/GSTextureFX.cpp @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2007-2009 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#include "stdafx.h" +#include "GSTextureFX.h" diff --git a/plugins/GSdx/GSTextureFX.h b/plugins/GSdx/GSTextureFX.h new file mode 100644 index 0000000000..ed338a59b7 --- /dev/null +++ b/plugins/GSdx/GSTextureFX.h @@ -0,0 +1,243 @@ +/* + * Copyright (C) 2007-2009 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GSVector.h" + +class GSTextureFX +{ +public: + #pragma pack(push, 1) + + __declspec(align(16)) struct VSConstantBuffer + { + GSVector4 VertexScale; + GSVector4 VertexOffset; + GSVector2 TextureScale; + float _pad[2]; + + struct VSConstantBuffer() + { + memset(this, 0, sizeof(*this)); + } + + __forceinline bool Update(const VSConstantBuffer* cb) + { + GSVector4i* a = (GSVector4i*)this; + GSVector4i* b = (GSVector4i*)cb; + + GSVector4i b0 = b[0]; + GSVector4i b1 = b[1]; + GSVector4i b2 = b[2]; + + if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2)).alltrue()) + { + a[0] = b0; + a[1] = b1; + a[2] = b2; + + return true; + } + + return false; + } + }; + + struct VSSelector + { + union + { + struct + { + uint32 bppz:2; + uint32 tme:1; + uint32 fst:1; + uint32 logz:1; + uint32 prim:2; + }; + + uint32 key; + }; + + operator uint32() {return key & 0x7f;} + + VSSelector() : key(0) {} + }; + + __declspec(align(16)) struct PSConstantBuffer + { + GSVector4 FogColor_AREF; + GSVector4 HalfTexel; + GSVector4 WH_TA; + GSVector4 MinMax; + GSVector4 MinMaxF; + GSVector4i MskFix; + + struct PSConstantBuffer() + { + memset(this, 0, sizeof(*this)); + } + + __forceinline bool Update(const PSConstantBuffer* cb) + { + GSVector4i* a = (GSVector4i*)this; + GSVector4i* b = (GSVector4i*)cb; + + GSVector4i b0 = b[0]; + GSVector4i b1 = b[1]; + GSVector4i b2 = b[2]; + GSVector4i b3 = b[3]; + GSVector4i b4 = b[4]; + GSVector4i b5 = b[5]; + + if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue()) + { + a[0] = b0; + a[1] = b1; + a[2] = b2; + a[3] = b3; + a[4] = b4; + a[5] = b5; + + return true; + } + + return false; + } + }; + + struct GSSelector + { + union + { + struct + { + uint32 iip:1; + uint32 prim:2; + }; + + uint32 key; + }; + + operator uint32() {return key & 0x7;} + + GSSelector() : key(0) {} + }; + + struct PSSelector + { + union + { + struct + { + uint32 fst:1; + uint32 wms:2; + uint32 wmt:2; + uint32 bpp:3; + uint32 aem:1; + uint32 tfx:3; + uint32 tcc:1; + uint32 ate:1; + uint32 atst:3; + uint32 fog:1; + uint32 clr1:1; + uint32 fba:1; + uint32 aout:1; + uint32 rt:1; + uint32 ltf:1; + }; + + uint32 key; + }; + + operator uint32() {return key & 0x7fffff;} + + PSSelector() : key(0) {} + }; + + struct PSSamplerSelector + { + union + { + struct + { + uint32 tau:1; + uint32 tav:1; + uint32 ltf:1; + }; + + uint32 key; + }; + + operator uint32() {return key & 0x7;} + + PSSamplerSelector() : key(0) {} + }; + + struct OMDepthStencilSelector + { + union + { + struct + { + uint32 zte:1; + uint32 ztst:2; + uint32 zwe:1; + uint32 date:1; + uint32 fba:1; + }; + + uint32 key; + }; + + operator uint32() {return key & 0x3f;} + + OMDepthStencilSelector() : key(0) {} + }; + + struct OMBlendSelector + { + union + { + struct + { + uint32 abe:1; + uint32 a:2; + uint32 b:2; + uint32 c:2; + uint32 d:2; + uint32 wr:1; + uint32 wg:1; + uint32 wb:1; + uint32 wa:1; + }; + + uint32 key; + }; + + operator uint32() {return key & 0x1fff;} + + OMBlendSelector() : key(0) {} + }; + + #pragma pack(pop) +}; diff --git a/plugins/GSdx/GSTextureFX10.cpp b/plugins/GSdx/GSTextureFX10.cpp index 3005ca5918..0a34979799 100644 --- a/plugins/GSdx/GSTextureFX10.cpp +++ b/plugins/GSdx/GSTextureFX10.cpp @@ -36,10 +36,6 @@ bool GSTextureFX10::Create(GSDevice10* dev) VSSelector sel; - sel.bppz = 0; - sel.tme = 0; - sel.fst = 0; - VSConstantBuffer cb; SetupVS(sel, &cb); // creates layout @@ -122,11 +118,11 @@ bool GSTextureFX10::SetupVS(VSSelector sel, const VSConstantBuffer* cb) D3D10_INPUT_ELEMENT_DESC layout[] = { - {"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 8, D3D10_INPUT_PER_VERTEX_DATA, 0}, - {"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 12, D3D10_INPUT_PER_VERTEX_DATA, 0}, {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D10_INPUT_PER_VERTEX_DATA, 0}, - {"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 20, D3D10_INPUT_PER_VERTEX_DATA, 0}, - {"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 16, D3D10_INPUT_PER_VERTEX_DATA, 0}, + {"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 8, D3D10_INPUT_PER_VERTEX_DATA, 0}, + {"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D10_INPUT_PER_VERTEX_DATA, 0}, + {"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D10_INPUT_PER_VERTEX_DATA, 0}, + {"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D10_INPUT_PER_VERTEX_DATA, 0}, {"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D10_INPUT_PER_VERTEX_DATA, 0}, }; @@ -515,8 +511,3 @@ void GSTextureFX10::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, m_dev->OMSetBlendState((*j).second, bf); } - -void GSTextureFX10::Draw() -{ - m_dev->DrawPrimitive(); -} diff --git a/plugins/GSdx/GSTextureFX10.h b/plugins/GSdx/GSTextureFX10.h index 285d930538..9e3c18a5ee 100644 --- a/plugins/GSdx/GSTextureFX10.h +++ b/plugins/GSdx/GSTextureFX10.h @@ -21,188 +21,11 @@ #pragma once +#include "GSTextureFX.h" #include "GSDevice10.h" -class GSTextureFX10 +class GSTextureFX10 : public GSTextureFX { -public: - #pragma pack(push, 1) - - __declspec(align(16)) struct VSConstantBuffer - { - GSVector4 VertexScale; - GSVector4 VertexOffset; - GSVector2 TextureScale; - float _pad[2]; - - struct VSConstantBuffer() {memset(this, 0, sizeof(*this));} - - __forceinline bool Update(const VSConstantBuffer* cb) - { - GSVector4i* a = (GSVector4i*)this; - GSVector4i* b = (GSVector4i*)cb; - - GSVector4i b0 = b[0]; - GSVector4i b1 = b[1]; - GSVector4i b2 = b[2]; - - if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2)).alltrue()) - { - a[0] = b0; - a[1] = b1; - a[2] = b2; - - return true; - } - - return false; - } - }; - - union VSSelector - { - struct - { - uint32 bppz:2; - uint32 tme:1; - uint32 fst:1; - uint32 prim:3; - }; - - uint32 key; - - operator uint32() {return key & 0x7f;} - }; - - __declspec(align(16)) struct PSConstantBuffer - { - GSVector4 FogColor_AREF; - GSVector4 HalfTexel; - GSVector4 WH_TA; - GSVector4 MinMax; - GSVector4 MinMaxF; - GSVector4i MskFix; - - struct PSConstantBuffer() {memset(this, 0, sizeof(*this));} - - __forceinline bool Update(const PSConstantBuffer* cb) - { - GSVector4i* a = (GSVector4i*)this; - GSVector4i* b = (GSVector4i*)cb; - - GSVector4i b0 = b[0]; - GSVector4i b1 = b[1]; - GSVector4i b2 = b[2]; - GSVector4i b3 = b[3]; - GSVector4i b4 = b[4]; - GSVector4i b5 = b[5]; - - if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue()) - { - a[0] = b0; - a[1] = b1; - a[2] = b2; - a[3] = b3; - a[4] = b4; - a[5] = b5; - - return true; - } - - return false; - } - }; - - union GSSelector - { - struct - { - uint32 iip:1; - uint32 prim:2; - }; - - uint32 key; - - operator uint32() {return key & 0x7;} - }; - - union PSSelector - { - struct - { - uint32 fst:1; - uint32 wms:2; - uint32 wmt:2; - uint32 bpp:3; - uint32 aem:1; - uint32 tfx:3; - uint32 tcc:1; - uint32 ate:1; - uint32 atst:3; - uint32 fog:1; - uint32 clr1:1; - uint32 fba:1; - uint32 aout:1; - uint32 ltf:1; - }; - - uint32 key; - - operator uint32() {return key & 0x3fffff;} - }; - - union PSSamplerSelector - { - struct - { - uint32 tau:1; - uint32 tav:1; - uint32 ltf:1; - }; - - uint32 key; - - operator uint32() {return key & 0x7;} - }; - - union OMDepthStencilSelector - { - struct - { - uint32 zte:1; - uint32 ztst:2; - uint32 zwe:1; - uint32 date:1; - }; - - uint32 key; - - operator uint32() {return key & 0x1f;} - }; - - union OMBlendSelector - { - struct - { - uint32 abe:1; - uint32 a:2; - uint32 b:2; - uint32 c:2; - uint32 d:2; - uint32 wr:1; - uint32 wg:1; - uint32 wb:1; - uint32 wa:1; - }; - - uint32 key; - - operator uint32() {return key & 0x1fff;} - }; - - #pragma pack(pop) - -private: GSDevice10* m_dev; CComPtr m_il; hash_map > m_vs; @@ -215,11 +38,6 @@ private: hash_map > m_om_dss; hash_map > m_om_bs; - CComPtr m_vb, m_vb_old; - int m_vb_max; - int m_vb_start; - int m_vb_count; - VSConstantBuffer m_vs_cb_cache; PSConstantBuffer m_ps_cb_cache; @@ -236,5 +54,4 @@ public: void SetupRS(int w, int h, const GSVector4i& scissor); void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf, GSTexture* rt, GSTexture* ds); void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf); - void Draw(); }; diff --git a/plugins/GSdx/GSTextureFX11.cpp b/plugins/GSdx/GSTextureFX11.cpp index 4fe3de6a28..d3c1205888 100644 --- a/plugins/GSdx/GSTextureFX11.cpp +++ b/plugins/GSdx/GSTextureFX11.cpp @@ -36,10 +36,6 @@ bool GSTextureFX11::Create(GSDevice11* dev) VSSelector sel; - sel.bppz = 0; - sel.tme = 0; - sel.fst = 0; - VSConstantBuffer cb; SetupVS(sel, &cb); // creates layout @@ -122,11 +118,11 @@ bool GSTextureFX11::SetupVS(VSSelector sel, const VSConstantBuffer* cb) D3D11_INPUT_ELEMENT_DESC layout[] = { - {"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0}, {"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0}, - {"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0}, + {"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0}, + {"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0}, + {"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0}, + {"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0}, {"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0}, }; @@ -519,8 +515,3 @@ void GSTextureFX11::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, m_dev->OMSetBlendState((*j).second, bf); } - -void GSTextureFX11::Draw() -{ - m_dev->DrawPrimitive(); -} diff --git a/plugins/GSdx/GSTextureFX11.h b/plugins/GSdx/GSTextureFX11.h index e64b9349ab..53a4f2f337 100644 --- a/plugins/GSdx/GSTextureFX11.h +++ b/plugins/GSdx/GSTextureFX11.h @@ -21,188 +21,11 @@ #pragma once +#include "GSTextureFX.h" #include "GSDevice11.h" -class GSTextureFX11 +class GSTextureFX11 : public GSTextureFX { -public: - #pragma pack(push, 1) - - __declspec(align(16)) struct VSConstantBuffer - { - GSVector4 VertexScale; - GSVector4 VertexOffset; - GSVector2 TextureScale; - float _pad[2]; - - struct VSConstantBuffer() {memset(this, 0, sizeof(*this));} - - __forceinline bool Update(const VSConstantBuffer* cb) - { - GSVector4i* a = (GSVector4i*)this; - GSVector4i* b = (GSVector4i*)cb; - - GSVector4i b0 = b[0]; - GSVector4i b1 = b[1]; - GSVector4i b2 = b[2]; - - if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2)).alltrue()) - { - a[0] = b0; - a[1] = b1; - a[2] = b2; - - return true; - } - - return false; - } - }; - - union VSSelector - { - struct - { - uint32 bppz:2; - uint32 tme:1; - uint32 fst:1; - uint32 prim:3; - }; - - uint32 key; - - operator uint32() {return key & 0x7f;} - }; - - __declspec(align(16)) struct PSConstantBuffer - { - GSVector4 FogColor_AREF; - GSVector4 HalfTexel; - GSVector4 WH_TA; - GSVector4 MinMax; - GSVector4 MinMaxF; - GSVector4i MskFix; - - struct PSConstantBuffer() {memset(this, 0, sizeof(*this));} - - __forceinline bool Update(const PSConstantBuffer* cb) - { - GSVector4i* a = (GSVector4i*)this; - GSVector4i* b = (GSVector4i*)cb; - - GSVector4i b0 = b[0]; - GSVector4i b1 = b[1]; - GSVector4i b2 = b[2]; - GSVector4i b3 = b[3]; - GSVector4i b4 = b[4]; - GSVector4i b5 = b[5]; - - if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue()) - { - a[0] = b0; - a[1] = b1; - a[2] = b2; - a[3] = b3; - a[4] = b4; - a[5] = b5; - - return true; - } - - return false; - } - }; - - union GSSelector - { - struct - { - uint32 iip:1; - uint32 prim:2; - }; - - uint32 key; - - operator uint32() {return key & 0x7;} - }; - - union PSSelector - { - struct - { - uint32 fst:1; - uint32 wms:2; - uint32 wmt:2; - uint32 bpp:3; - uint32 aem:1; - uint32 tfx:3; - uint32 tcc:1; - uint32 ate:1; - uint32 atst:3; - uint32 fog:1; - uint32 clr1:1; - uint32 fba:1; - uint32 aout:1; - uint32 ltf:1; - }; - - uint32 key; - - operator uint32() {return key & 0x3fffff;} - }; - - union PSSamplerSelector - { - struct - { - uint32 tau:1; - uint32 tav:1; - uint32 ltf:1; - }; - - uint32 key; - - operator uint32() {return key & 0x7;} - }; - - union OMDepthStencilSelector - { - struct - { - uint32 zte:1; - uint32 ztst:2; - uint32 zwe:1; - uint32 date:1; - }; - - uint32 key; - - operator uint32() {return key & 0x1f;} - }; - - union OMBlendSelector - { - struct - { - uint32 abe:1; - uint32 a:2; - uint32 b:2; - uint32 c:2; - uint32 d:2; - uint32 wr:1; - uint32 wg:1; - uint32 wb:1; - uint32 wa:1; - }; - - uint32 key; - - operator uint32() {return key & 0x1fff;} - }; - - #pragma pack(pop) - -private: GSDevice11* m_dev; CComPtr m_il; hash_map > m_vs; @@ -215,11 +38,6 @@ private: hash_map > m_om_dss; hash_map > m_om_bs; - CComPtr m_vb, m_vb_old; - int m_vb_max; - int m_vb_start; - int m_vb_count; - VSConstantBuffer m_vs_cb_cache; PSConstantBuffer m_ps_cb_cache; @@ -236,5 +54,4 @@ public: void SetupRS(int w, int h, const GSVector4i& scissor); void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf, GSTexture* rt, GSTexture* ds); void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf); - void Draw(); }; diff --git a/plugins/GSdx/GSTextureFX9.cpp b/plugins/GSdx/GSTextureFX9.cpp index ac64eed885..5327f2d93f 100644 --- a/plugins/GSdx/GSTextureFX9.cpp +++ b/plugins/GSdx/GSTextureFX9.cpp @@ -34,11 +34,6 @@ bool GSTextureFX9::Create(GSDevice9* dev) VSSelector sel; - sel.bppz = 0; - sel.tme = 0; - sel.fst = 0; - sel.logz = 0; - VSConstantBuffer cb; SetupVS(sel, &cb); // creates layout @@ -455,8 +450,3 @@ void GSTextureFX9::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, m_dev->OMSetBlendState((*j).second, 0x010101 * bf); } - -void GSTextureFX9::Draw() -{ - m_dev->DrawPrimitive(); -} diff --git a/plugins/GSdx/GSTextureFX9.h b/plugins/GSdx/GSTextureFX9.h index b47c227a7b..50f187a3bf 100644 --- a/plugins/GSdx/GSTextureFX9.h +++ b/plugins/GSdx/GSTextureFX9.h @@ -21,123 +21,11 @@ #pragma once +#include "GSTextureFX.h" #include "GSDevice9.h" -class GSTextureFX9 +class GSTextureFX9 : public GSTextureFX { -public: - #pragma pack(push, 1) - - struct VSConstantBuffer - { - GSVector4 VertexScale; - GSVector4 VertexOffset; - GSVector2 TextureScale; - float _pad[2]; - }; - - union VSSelector - { - struct - { - uint32 bppz:2; - uint32 tme:1; - uint32 fst:1; - uint32 logz:1; - }; - - uint32 key; - - operator uint32() {return key & 0x1f;} - }; - - struct PSConstantBuffer - { - GSVector4 FogColor_AREF; - GSVector4 HalfTexel; - GSVector4 WH_TA; - GSVector4 MinMax; - GSVector4 MinMaxF; - GSVector4i MskFix; - }; - - union PSSelector - { - struct - { - uint32 fst:1; - uint32 wms:2; - uint32 wmt:2; - uint32 bpp:3; - uint32 aem:1; - uint32 tfx:3; - uint32 tcc:1; - uint32 ate:1; - uint32 atst:3; - uint32 fog:1; - uint32 clr1:1; - uint32 rt:1; - uint32 ltf:1; - }; - - uint32 key; - - operator uint32() {return key & 0x1fffff;} - }; - - union PSSamplerSelector - { - struct - { - uint32 tau:1; - uint32 tav:1; - uint32 ltf:1; - }; - - uint32 key; - - operator uint32() {return key & 0x7;} - }; - - union OMDepthStencilSelector - { - struct - { - uint32 zte:1; - uint32 ztst:2; - uint32 zwe:1; - uint32 date:1; - uint32 fba:1; - }; - - uint32 key; - - operator uint32() {return key & 0x3f;} - }; - - union OMBlendSelector - { - struct - { - uint32 abe:1; - uint32 a:2; - uint32 b:2; - uint32 c:2; - uint32 d:2; - uint32 wr:1; - uint32 wg:1; - uint32 wb:1; - uint32 wa:1; - }; - - uint32 key; - - operator uint32() {return key & 0x1fff;} - }; - - #pragma pack(pop) - -private: GSDevice9* m_dev; CComPtr m_il; hash_map > m_vs; @@ -162,5 +50,4 @@ public: void SetupRS(int w, int h, const GSVector4i& scissor); void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 bf, GSTexture* rt, GSTexture* ds); void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 bf); - void Draw(); }; diff --git a/plugins/GSdx/GSVertex.h b/plugins/GSdx/GSVertex.h index ee3bdc9874..9634a61aef 100644 --- a/plugins/GSdx/GSVertex.h +++ b/plugins/GSdx/GSVertex.h @@ -35,8 +35,8 @@ __declspec(align(16)) struct GSVertex struct { GIFRegST ST; - GIFRegXYZ XYZ; GIFRegRGBAQ RGBAQ; + GIFRegXYZ XYZ; GIFRegFOG FOG; }; diff --git a/plugins/GSdx/GSVertexHW.h b/plugins/GSdx/GSVertexHW.h index 76a84ed74d..9948a76b1d 100644 --- a/plugins/GSdx/GSVertexHW.h +++ b/plugins/GSdx/GSVertexHW.h @@ -21,6 +21,7 @@ #pragma once +#include "GS.h" #include "GSVector.h" #pragma pack(push, 1) @@ -53,17 +54,17 @@ __declspec(align(16)) union GSVertexHW10 GIFRegST ST; }; - union - { - struct {union {struct {uint16 x, y;}; uint32 xy;}; uint32 z;} p; - GIFRegXYZ XYZ; - }; - union { union {struct {uint8 r, g, b, a; float q;}; uint32 c0;}; GIFRegRGBAQ RGBAQ; }; + + union + { + struct {union {struct {uint16 x, y;}; uint32 xy;}; uint32 z;} p; + GIFRegXYZ XYZ; + }; union { diff --git a/plugins/GSdx/GSVertexSW.cpp b/plugins/GSdx/GSVertexSW.cpp index 43702ed972..0706fa00f8 100644 --- a/plugins/GSdx/GSVertexSW.cpp +++ b/plugins/GSdx/GSVertexSW.cpp @@ -23,145 +23,3 @@ #include "stdafx.h" #include "GSVertexSW.h" - -using namespace Xbyak; - -GSVertexTrace::GSVertexTraceCodeGenerator::GSVertexTraceCodeGenerator(uint32 key, void* ptr, size_t maxsize) - : CodeGenerator(maxsize, ptr) -{ - #if _M_AMD64 - #error TODO - #endif - - const int params = 0; - - uint32 primclass = (key >> 0) & 3; - uint32 iip = (key >> 2) & 1; - uint32 tme = (key >> 3) & 1; - uint32 color = (key >> 4) & 1; - - int n = 1; - - switch(primclass) - { - case GS_POINT_CLASS: - n = 1; - break; - case GS_LINE_CLASS: - case GS_SPRITE_CLASS: - n = 2; - break; - case GS_TRIANGLE_CLASS: - n = 3; - break; - } - - const int _v = params + 4; - const int _count = params + 8; - const int _min = params + 12; - const int _max = params + 16; - - // - - static const float fmin = -FLT_MAX; - static const float fmax = FLT_MAX; - - movss(xmm0, xmmword[&fmax]); - shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); - - movss(xmm1, xmmword[&fmin]); - shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0)); - - if(color) - { - // min.c = FLT_MAX; - // max.c = -FLT_MAX; - - movaps(xmm2, xmm0); - movaps(xmm3, xmm1); - } - - // min.p = FLT_MAX; - // max.p = -FLT_MAX; - - movaps(xmm4, xmm0); - movaps(xmm5, xmm1); - - if(tme) - { - // min.t = FLT_MAX; - // max.t = -FLT_MAX; - - movaps(xmm6, xmm0); - movaps(xmm7, xmm1); - } - - // for(int i = 0; i < count; i += step) { - - mov(edx, dword[esp + _v]); - mov(ecx, dword[esp + _count]); - - align(16); - -L("loop"); - - for(int j = 0; j < n; j++) - { - if(color && (iip || j == n - 1)) - { - // min.c = min.c.minv(v[i + j].c); - // max.c = max.c.maxv(v[i + j].c); - - movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW)]); - - minps(xmm2, xmm0); - maxps(xmm3, xmm0); - } - - // min.p = min.p.minv(v[i + j].p); - // max.p = max.p.maxv(v[i + j].p); - - movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 16]); - - minps(xmm4, xmm0); - maxps(xmm5, xmm0); - - if(tme) - { - // min.t = min.t.minv(v[i + j].t); - // max.t = max.t.maxv(v[i + j].t); - - movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 32]); - - minps(xmm6, xmm0); - maxps(xmm7, xmm0); - } - } - - add(edx, n * sizeof(GSVertexSW)); - sub(ecx, n); - - jg("loop"); - - // } - - mov(eax, dword[esp + _min]); - mov(edx, dword[esp + _max]); - - if(color) - { - movaps(xmmword[eax], xmm2); - movaps(xmmword[edx], xmm3); - } - - movaps(xmmword[eax + 16], xmm4); - movaps(xmmword[edx + 16], xmm5); - - if(tme) - { - movaps(xmmword[eax + 32], xmm6); - movaps(xmmword[edx + 32], xmm7); - } - - ret(); -} diff --git a/plugins/GSdx/GSVertexSW.h b/plugins/GSdx/GSVertexSW.h index 3f51aa756a..e3e181d0ef 100644 --- a/plugins/GSdx/GSVertexSW.h +++ b/plugins/GSdx/GSVertexSW.h @@ -214,81 +214,3 @@ __forceinline GSVertexSW operator / (const GSVertexSW& v, float f) return v0; } -#include "GSFunctionMap.h" -#include "xbyak/xbyak.h" - -__declspec(align(16)) class GSVertexTrace -{ - class GSVertexTraceCodeGenerator : public Xbyak::CodeGenerator - { - public: - GSVertexTraceCodeGenerator(uint32 key, void* ptr, size_t maxsize); - }; - - typedef void (*VertexTracePtr)(const GSVertexSW* v, int count, GSVertexSW& min, GSVertexSW& max); - - class GSVertexTraceMap : public GSCodeGeneratorFunctionMap - { - public: - GSVertexTraceMap() : GSCodeGeneratorFunctionMap("VertexTrace") {} - GSVertexTraceCodeGenerator* Create(uint32 key, void* ptr, size_t maxsize) {return new GSVertexTraceCodeGenerator(key, ptr, maxsize);} - } m_map; - -public: - GSVertexSW m_min, m_max; - - struct {int min, max; bool valid;} m_alpha; // source alpha range after tfx - - union - { - uint32 value; - struct {uint32 x:1, y:1, z:1, f:1, s:1, t:1, q:1, _pad:1, r:1, g:1, b:1, a:1;}; - struct {uint32 xyzf:4, stq:4, rgba:4;}; - } m_eq; - - void Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 tfx, uint32 tcc) - { - uint32 key = primclass | (iip << 2) | (tme << 3); - - if(!(tme && tfx == TFX_DECAL && tcc)) - { - key |= 1 << 4; - } - - m_map[key](v, count, m_min, m_max); - - m_eq.value = (m_min.p == m_max.p).mask() | ((m_min.t == m_max.t).mask() << 4) | ((m_min.c == m_max.c).mask() << 8); - - m_alpha.valid = false; - } -/* - void Update(const GSVertexSW* v, int count) - { - GSVertexSW min, max; - - min.c = v[0].c; - max.c = v[0].c; - min.t = v[0].t; - max.t = v[0].t; - min.p = v[0].p; - max.p = v[0].p; - - for(int i = 1; i < count; i++) - { - min.c = min.c.minv(v[i].c); - max.c = max.c.maxv(v[i].c); - min.p = min.p.minv(v[i].p); - max.p = max.p.maxv(v[i].p); - min.t = min.t.minv(v[i].t); - max.t = max.t.maxv(v[i].t); - } - - m_min = min; - m_max = max; - - m_eq.value = (min.p == max.p).mask() | ((min.t == max.t).mask() << 4) | ((min.c == max.c).mask() << 8); - - m_alpha.valid = false; - } -*/ -}; diff --git a/plugins/GSdx/GSVertexTrace.cpp b/plugins/GSdx/GSVertexTrace.cpp new file mode 100644 index 0000000000..2f58e9a555 --- /dev/null +++ b/plugins/GSdx/GSVertexTrace.cpp @@ -0,0 +1,670 @@ +/* + * Copyright (C) 2007-2009 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "stdafx.h" +#include "GSVertexTrace.h" + +void GSVertexTrace::Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context) +{ + uint32 key = primclass | (PRIM->IIP << 2) | (PRIM->TME << 3) | (PRIM->FST << 4); + + if(!(PRIM->TME && context->TEX0.TFX == TFX_DECAL && context->TEX0.TCC)) + { + key |= 1 << 5; + } + + m_map_sw[key](v, count, m_min, m_max); + + m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20); + + m_alpha.valid = false; +} + +void GSVertexTrace::Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context) +{ + uint32 key = primclass | (PRIM->IIP << 2) | (PRIM->TME << 3) | (PRIM->FST << 4); + + if(!(PRIM->TME && context->TEX0.TFX == TFX_DECAL && context->TEX0.TCC)) + { + key |= 1 << 5; + } + + m_map_hw9[key](v, count, m_min, m_max); + + GSVector4 o(context->XYOFFSET); + GSVector4 s(1.0f / 16, 1.0f / 16, 1.0f, 1.0f); + + m_min.p = (m_min.p - o) * s; + m_max.p = (m_max.p - o) * s; + + if(PRIM->TME) + { + if(PRIM->FST) + { + s = GSVector4(1 << (16 - 4), 1).xxyy(); + } + else + { + s = GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH, 1, 1); + } + + m_min.t *= s; + m_max.t *= s; + } + + m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20); + + m_alpha.valid = false; +} + +void GSVertexTrace::Update(const GSVertexHW10* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context) +{ + uint32 key = primclass | (PRIM->IIP << 2) | (PRIM->TME << 3) | (PRIM->FST << 4); + + if(!(PRIM->TME && context->TEX0.TFX == TFX_DECAL && context->TEX0.TCC)) + { + key |= 1 << 5; + } + + m_map_hw10[key](v, count, m_min, m_max); + + GSVector4 o(context->XYOFFSET); + GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f); + + m_min.p = (m_min.p - o) * s; + m_max.p = (m_max.p - o) * s; + + if(PRIM->TME) + { + if(PRIM->FST) + { + s = GSVector4(1 << (16 - 4), 1).xxyy(); + } + else + { + s = GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH, 1, 1); + } + + m_min.t *= s; + m_max.t *= s; + } + + m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20); + + m_alpha.valid = false; +} + +using namespace Xbyak; + +GSVertexTrace::CGSW::CGSW(uint32 key, void* ptr, size_t maxsize) + : CodeGenerator(maxsize, ptr) +{ + #if _M_AMD64 + #error TODO + #endif + + const int params = 0; + + uint32 primclass = (key >> 0) & 3; + uint32 iip = (key >> 2) & 1; + uint32 tme = (key >> 3) & 1; + uint32 fst = (key >> 4) & 1; + uint32 color = (key >> 5) & 1; + + int n = 1; + + switch(primclass) + { + case GS_POINT_CLASS: + n = 1; + break; + case GS_LINE_CLASS: + case GS_SPRITE_CLASS: + n = 2; + break; + case GS_TRIANGLE_CLASS: + n = 3; + break; + } + + const int _v = params + 4; + const int _count = params + 8; + const int _min = params + 12; + const int _max = params + 16; + + // + + static const float fmin = -FLT_MAX; + static const float fmax = FLT_MAX; + + movss(xmm0, xmmword[&fmax]); + shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); + + movss(xmm1, xmmword[&fmin]); + shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0)); + + if(color) + { + // min.c = FLT_MAX; + // max.c = -FLT_MAX; + + movaps(xmm2, xmm0); + movaps(xmm3, xmm1); + } + + // min.p = FLT_MAX; + // max.p = -FLT_MAX; + + movaps(xmm4, xmm0); + movaps(xmm5, xmm1); + + if(tme) + { + // min.t = FLT_MAX; + // max.t = -FLT_MAX; + + movaps(xmm6, xmm0); + movaps(xmm7, xmm1); + } + + // for(int i = 0; i < count; i += step) { + + mov(edx, dword[esp + _v]); + mov(ecx, dword[esp + _count]); + + align(16); + +L("loop"); + + if(tme && !fst && primclass == GS_SPRITE_CLASS) + { + movaps(xmm1, xmmword[edx + 1 * sizeof(GSVertexSW) + 32]); + shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); + } + + for(int j = 0; j < n; j++) + { + if(color && (iip || j == n - 1)) + { + // min.c = min.c.minv(v[i + j].c); + // max.c = max.c.maxv(v[i + j].c); + + movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW)]); + + minps(xmm2, xmm0); + maxps(xmm3, xmm0); + } + + // min.p = min.p.minv(v[i + j].p); + // max.p = max.p.maxv(v[i + j].p); + + movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 16]); + + minps(xmm4, xmm0); + maxps(xmm5, xmm0); + + if(tme) + { + // min.t = min.t.minv(v[i + j].t); + // max.t = max.t.maxv(v[i + j].t); + + movaps(xmm0, xmmword[edx + j * sizeof(GSVertexSW) + 32]); + + if(!fst) + { + if(primclass != GS_SPRITE_CLASS) + { + movaps(xmm1, xmm0); + shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2)); + } + + divps(xmm0, xmm1); + shufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0)); + } + + minps(xmm6, xmm0); + maxps(xmm7, xmm0); + } + } + + add(edx, n * sizeof(GSVertexSW)); + sub(ecx, n); + + jg("loop"); + + // } + + mov(eax, dword[esp + _min]); + mov(edx, dword[esp + _max]); + + if(color) + { + cvttps2dq(xmm2, xmm2); + psrld(xmm2, 7); + movaps(xmmword[eax], xmm2); + + cvttps2dq(xmm3, xmm3); + psrld(xmm3, 7); + movaps(xmmword[edx], xmm3); + } + + movaps(xmmword[eax + 16], xmm4); + movaps(xmmword[edx + 16], xmm5); + + if(tme) + { + movaps(xmmword[eax + 32], xmm6); + movaps(xmmword[edx + 32], xmm7); + } + + ret(); +} + +GSVertexTrace::CGHW9::CGHW9(uint32 key, void* ptr, size_t maxsize) + : CodeGenerator(maxsize, ptr) +{ + #if _M_AMD64 + #error TODO + #endif + + const int params = 0; + + uint32 primclass = (key >> 0) & 3; + uint32 iip = (key >> 2) & 1; + uint32 tme = (key >> 3) & 1; + uint32 fst = (key >> 4) & 1; + uint32 color = (key >> 5) & 1; + + int n = 1; + + switch(primclass) + { + case GS_POINT_CLASS: + n = 1; + break; + case GS_LINE_CLASS: + n = 2; + break; + case GS_TRIANGLE_CLASS: + n = 3; + break; + case GS_SPRITE_CLASS: + n = 6; + break; + } + + const int _v = params + 4; + const int _count = params + 8; + const int _min = params + 12; + const int _max = params + 16; + + // + + static const float fmin = -FLT_MAX; + static const float fmax = FLT_MAX; + + movss(xmm0, xmmword[&fmax]); + shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); + + movss(xmm1, xmmword[&fmin]); + shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0)); + + if(color) + { + // min.c = 0xffffffff; + // max.c = 0; + + pcmpeqd(xmm2, xmm2); + pxor(xmm3, xmm3); + } + + // min.p = FLT_MAX; + // max.p = -FLT_MAX; + + movaps(xmm4, xmm0); + movaps(xmm5, xmm1); + + if(tme) + { + // min.t = FLT_MAX; + // max.t = -FLT_MAX; + + movaps(xmm6, xmm0); + movaps(xmm7, xmm1); + } + + // for(int i = 0; i < count; i += step) { + + mov(edx, dword[esp + _v]); + mov(ecx, dword[esp + _count]); + + align(16); + +L("loop"); + + if(tme && !fst && primclass == GS_SPRITE_CLASS) + { + movaps(xmm1, xmmword[edx + 5 * sizeof(GSVertexHW9) + 16]); + shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); + } + + for(int j = 0; j < n; j++) + { + // min.p = min.p.minv(v[i + j].p); + // max.p = max.p.maxv(v[i + j].p); + + movaps(xmm0, xmmword[edx + j * sizeof(GSVertexHW9) + 16]); + + minps(xmm4, xmm0); + maxps(xmm5, xmm0); + + if(tme && !fst && primclass != GS_SPRITE_CLASS) + { + movaps(xmm1, xmm0); + shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); + } + + if(color && (iip || j == n - 1) || tme) + { + movaps(xmm0, xmmword[edx + j * sizeof(GSVertexHW9)]); + } + + if(color && (iip || j == n - 1)) + { + // min.c = min.c.min_u8(v[i + j].c); + // max.c = max.c.min_u8(v[i + j].c); + + pminub(xmm2, xmm0); + pmaxub(xmm3, xmm0); + } + + if(tme) + { + shufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral + + if(!fst) + { + // t /= p.wwww(); + + divps(xmm0, xmm1); + } + + // min.t = min.t.minv(v[i + j].t); + // max.t = max.t.maxv(v[i + j].t); + + minps(xmm6, xmm0); + maxps(xmm7, xmm0); + } + } + + add(edx, n * sizeof(GSVertexHW9)); + sub(ecx, n); + + jg("loop"); + + // } + + mov(eax, dword[esp + _min]); + mov(edx, dword[esp + _max]); + + if(color) + { + // m_min.c = cmin.zzzz().u8to32(); + // m_max.c = cmax.zzzz().u8to32(); + + if(m_cpu.has(util::Cpu::tSSE41)) + { + pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2)); + pmovzxbd(xmm2, xmm2); + + pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2)); + pmovzxbd(xmm3, xmm3); + } + else + { + pxor(xmm0, xmm0); + + punpckhbw(xmm2, xmm0); + punpcklwd(xmm2, xmm0); + + punpckhbw(xmm3, xmm0); + punpcklwd(xmm3, xmm0); + } + + movaps(xmmword[eax], xmm2); + movaps(xmmword[edx], xmm3); + } + + // m_min.p = pmin; + // m_max.p = pmax; + + movaps(xmmword[eax + 16], xmm4); + movaps(xmmword[edx + 16], xmm5); + + if(tme) + { + // m_min.t = tmin.xyww(pmin); + // m_max.t = tmax.xyww(pmax); + + shufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0)); + shufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0)); + + movaps(xmmword[eax + 32], xmm6); + movaps(xmmword[edx + 32], xmm7); + } + + ret(); +} + +GSVertexTrace::CGHW10::CGHW10(uint32 key, void* ptr, size_t maxsize) + : CodeGenerator(maxsize, ptr) +{ + #if _M_AMD64 + #error TODO + #endif + + const int params = 0; + + uint32 primclass = (key >> 0) & 3; + uint32 iip = (key >> 2) & 1; + uint32 tme = (key >> 3) & 1; + uint32 fst = (key >> 4) & 1; + uint32 color = (key >> 5) & 1; + + int n = 1; + + switch(primclass) + { + case GS_POINT_CLASS: + n = 1; + break; + case GS_LINE_CLASS: + case GS_SPRITE_CLASS: + n = 2; + break; + case GS_TRIANGLE_CLASS: + n = 3; + break; + } + + const int _v = params + 4; + const int _count = params + 8; + const int _min = params + 12; + const int _max = params + 16; + + // + + static const float fmin = -FLT_MAX; + static const float fmax = FLT_MAX; + + movss(xmm0, xmmword[&fmax]); + shufps(xmm0, xmm0, _MM_SHUFFLE(0, 0, 0, 0)); + + movss(xmm1, xmmword[&fmin]); + shufps(xmm1, xmm1, _MM_SHUFFLE(0, 0, 0, 0)); + + if(color) + { + // min.c = 0xffffffff; + // max.c = 0; + + pcmpeqd(xmm2, xmm2); + pxor(xmm3, xmm3); + } + + // min.p = FLT_MAX; + // max.p = -FLT_MAX; + + movaps(xmm4, xmm0); + movaps(xmm5, xmm1); + + if(tme) + { + // min.t = FLT_MAX; + // max.t = -FLT_MAX; + + movaps(xmm6, xmm0); + movaps(xmm7, xmm1); + } + + // for(int i = 0; i < count; i += step) { + + mov(edx, dword[esp + _v]); + mov(ecx, dword[esp + _count]); + + align(16); + +L("loop"); + + for(int j = 0; j < n; j++) + { + if(color && (iip || j == n - 1) || tme) + { + movaps(xmm0, xmmword[edx + j * sizeof(GSVertexHW10)]); + } + + if(color && (iip || j == n - 1)) + { + pminub(xmm2, xmm0); + pmaxub(xmm3, xmm0); + } + + if(tme) + { + if(!fst) + { + movaps(xmm1, xmm0); + } + + shufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral + + if(!fst) + { + shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3)); + divps(xmm0, xmm1); + shufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q + } + + minps(xmm6, xmm0); + maxps(xmm7, xmm0); + } + + movdqa(xmm0, xmmword[edx + j * sizeof(GSVertexHW10) + 16]); + + if(m_cpu.has(util::Cpu::tSSE41)) + { + pmovzxwd(xmm1, xmm0); + } + else + { + movdqa(xmm1, xmm0); + punpcklwd(xmm1, xmm1); + psrld(xmm1, 16); + } + + psrld(xmm0, 1); + punpcklqdq(xmm1, xmm0); + cvtdq2ps(xmm1, xmm1); + + minps(xmm4, xmm1); + maxps(xmm5, xmm1); + } + + add(edx, n * sizeof(GSVertexHW10)); + sub(ecx, n); + + jg("loop"); + + // } + + mov(eax, dword[esp + _min]); + mov(edx, dword[esp + _max]); + + if(color) + { + // m_min.c = cmin.zzzz().u8to32(); + // m_max.c = cmax.zzzz().u8to32(); + + if(m_cpu.has(util::Cpu::tSSE41)) + { + pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2)); + pmovzxbd(xmm2, xmm2); + + pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2)); + pmovzxbd(xmm3, xmm3); + } + else + { + pxor(xmm0, xmm0); + + punpckhbw(xmm2, xmm0); + punpcklwd(xmm2, xmm0); + + punpckhbw(xmm3, xmm0); + punpcklwd(xmm3, xmm0); + } + + movaps(xmmword[eax], xmm2); + movaps(xmmword[edx], xmm3); + } + + // m_min.p = pmin.xyww(); + // m_max.p = pmax.xyww(); + + shufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0)); + shufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0)); + + movaps(xmmword[eax + 16], xmm4); + movaps(xmmword[edx + 16], xmm5); + + if(tme) + { + // m_min.t = tmin; + // m_max.t = tmax; + + movaps(xmmword[eax + 32], xmm6); + movaps(xmmword[edx + 32], xmm7); + } + + ret(); +} \ No newline at end of file diff --git a/plugins/GSdx/GSVertexTrace.h b/plugins/GSdx/GSVertexTrace.h new file mode 100644 index 0000000000..802513fe2a --- /dev/null +++ b/plugins/GSdx/GSVertexTrace.h @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2007-2009 Gabest + * http://www.gabest.org + * + * This Program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This Program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * http://www.gnu.org/copyleft/gpl.html + * + */ + +#pragma once + +#include "GSDrawingContext.h" +#include "GSVertexSW.h" +#include "GSVertexHW.h" +#include "GSFunctionMap.h" +#include "xbyak/xbyak.h" +#include "xbyak/xbyak_util.h" + +__declspec(align(16)) class GSVertexTrace +{ + struct Vertex {GSVector4i c; GSVector4 p, t;}; + struct VertexAlpha {int min, max; bool valid;}; + + typedef void (*VertexTracePtr)(const void* v, int count, Vertex& min, Vertex& max); + + class CGSW : public Xbyak::CodeGenerator + { + public: + CGSW(uint32 key, void* ptr, size_t maxsize); + }; + + class GSVertexTraceMapSW : public GSCodeGeneratorFunctionMap + { + public: + GSVertexTraceMapSW() : GSCodeGeneratorFunctionMap("VertexTraceSW") {} + CGSW* Create(uint32 key, void* ptr, size_t maxsize) {return new CGSW(key, ptr, maxsize);} + }; + + class CGHW9 : public Xbyak::CodeGenerator + { + Xbyak::util::Cpu m_cpu; + + public: + CGHW9(uint32 key, void* ptr, size_t maxsize); + }; + + class GSVertexTraceMapHW9 : public GSCodeGeneratorFunctionMap + { + public: + GSVertexTraceMapHW9() : GSCodeGeneratorFunctionMap("VertexTraceHW9") {} + CGHW9* Create(uint32 key, void* ptr, size_t maxsize) {return new CGHW9(key, ptr, maxsize);} + }; + + class CGHW10 : public Xbyak::CodeGenerator + { + Xbyak::util::Cpu m_cpu; + + public: + CGHW10(uint32 key, void* ptr, size_t maxsize); + }; + + class GSVertexTraceMapHW10 : public GSCodeGeneratorFunctionMap + { + public: + GSVertexTraceMapHW10() : GSCodeGeneratorFunctionMap("VertexTraceHW10") {} + CGHW10* Create(uint32 key, void* ptr, size_t maxsize) {return new CGHW10(key, ptr, maxsize);} + }; + + GSVertexTraceMapSW m_map_sw; + GSVertexTraceMapHW9 m_map_hw9; + GSVertexTraceMapHW10 m_map_hw10; + +public: + Vertex m_min, m_max; // t.xy * 0x10000 + VertexAlpha m_alpha; // source alpha range after tfx, GSRenderer::GetAlphaMinMax() updates it + + union + { + uint32 value; + struct {uint32 r:4, g:4, b:4, a:4, x:1, y:1, z:1, f:1, s:1, t:1, q:1, _pad:1;}; + struct {uint32 rgba:16, xyzf:4, stq:4;}; + } m_eq; + + void Update(const GSVertexSW* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context); + void Update(const GSVertexHW9* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context); + void Update(const GSVertexHW10* v, int count, GS_PRIM_CLASS primclass, const GIFRegPRIM* PRIM, const GSDrawingContext* context); +}; diff --git a/plugins/GSdx/GSdx_vs2008.vcproj b/plugins/GSdx/GSdx_vs2008.vcproj index aa3a060e89..7addc2fb08 100644 --- a/plugins/GSdx/GSdx_vs2008.vcproj +++ b/plugins/GSdx/GSdx_vs2008.vcproj @@ -1440,6 +1440,10 @@ /> + + @@ -1496,6 +1500,10 @@ RelativePath=".\GSVertexSW.cpp" > + + @@ -1878,6 +1886,10 @@ RelativePath=".\GSTextureCacheSW.h" > + + @@ -1926,6 +1938,10 @@ RelativePath=".\GSVertexSW.h" > + + diff --git a/plugins/GSdx/res/tfx.fx b/plugins/GSdx/res/tfx.fx index ebd24e290b..84ebbe190e 100644 --- a/plugins/GSdx/res/tfx.fx +++ b/plugins/GSdx/res/tfx.fx @@ -345,7 +345,7 @@ VS_OUTPUT vs_main(VS_INPUT input) input.z = input.z & 0xffff; } - if(VS_PRIM == 6) // sprite + if(VS_PRIM == 3) // sprite { //input.p.xy = (input.p.xy + 15) & ~15; // HACK }