From 4ed303566147c243a5da5fbf407d58798c9a6751 Mon Sep 17 00:00:00 2001 From: gabest11 Date: Sat, 6 Jun 2009 17:53:34 +0000 Subject: [PATCH] GSdx: pixel shaders were reorganized, things might be broken :P git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1341 96395faa-99c1-11dd-bbfe-3dabce05a288 --- plugins/GSdx/GSRenderer.cpp | 3 +- plugins/GSdx/GSRenderer.h | 7 +- plugins/GSdx/GSRendererHW.h | 12 +- plugins/GSdx/GSRendererHW10.cpp | 7 +- plugins/GSdx/GSRendererHW9.cpp | 11 +- plugins/GSdx/GSTextureCache.cpp | 19 +- plugins/GSdx/GSTextureFX10.cpp | 5 - plugins/GSdx/GSTextureFX10.h | 9 +- plugins/GSdx/GSTextureFX9.cpp | 22 +- plugins/GSdx/GSTextureFX9.h | 9 +- plugins/GSdx/res/tfx10.fx | 384 ++++++++++++++++---------------- plugins/GSdx/res/tfx9.fx | 295 ++++++++++++++---------- 12 files changed, 414 insertions(+), 369 deletions(-) diff --git a/plugins/GSdx/GSRenderer.cpp b/plugins/GSdx/GSRenderer.cpp index 1391efcd21..0096c85607 100644 --- a/plugins/GSdx/GSRenderer.cpp +++ b/plugins/GSdx/GSRenderer.cpp @@ -22,11 +22,10 @@ #include "StdAfx.h" #include "GSRenderer.h" -GSRenderer::GSRenderer(uint8* base, bool mt, void (*irq)(), GSDevice* dev, bool psrr) +GSRenderer::GSRenderer(uint8* base, bool mt, void (*irq)(), GSDevice* dev) : GSState(base, mt, irq) , m_dev(dev) , m_shader(0) - , m_psrr(psrr) { m_interlace = theApp.GetConfig("interlace", 0); m_aspectratio = theApp.GetConfig("aspectratio", 1); diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index 3e9bdd2308..97ad72df2c 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -51,7 +51,6 @@ protected: public: GSWnd m_wnd; GSDevice* m_dev; - bool m_psrr; int s_n; bool s_dump; @@ -59,7 +58,7 @@ public: bool s_savez; public: - GSRenderer(uint8* base, bool mt, void (*irq)(), GSDevice* dev, bool psrr = true); + GSRenderer(uint8* base, bool mt, void (*irq)(), GSDevice* dev); virtual ~GSRenderer(); virtual bool Create(const string& title); @@ -216,8 +215,8 @@ protected: virtual void Draw() = 0; public: - GSRendererT(uint8* base, bool mt, void (*irq)(), GSDevice* dev, bool psrr = true) - : GSRenderer(base, mt, irq, dev, psrr) + GSRendererT(uint8* base, bool mt, void (*irq)(), GSDevice* dev) + : GSRenderer(base, mt, irq, dev) , m_count(0) , m_maxcount(0) , m_vertices(NULL) diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index 8155aaffca..aea77663f3 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -179,8 +179,8 @@ protected: vr.z = v.z; break; case CLAMP_REGION_REPEAT: - if(m_psrr) {vr.x = maxu; vr.z = vr.x + (minu + 1);} - //else {vr.x = 0; vr.z = w;} + vr.x = maxu; + vr.z = vr.x + (minu + 1); break; default: __assume(0); @@ -202,8 +202,8 @@ protected: vr.w = v.w; break; case CLAMP_REGION_REPEAT: - if(m_psrr) {vr.y = maxv; vr.w = vr.y + (minv + 1);} - //else {r.y = 0; r.w = w;} + vr.y = maxv; + vr.w = vr.y + (minv + 1); break; default: __assume(0); @@ -691,8 +691,8 @@ protected: } public: - GSRendererHW(uint8* base, bool mt, void (*irq)(), GSDevice* dev, GSTextureCache* tc, bool psrr) - : GSRendererT(base, mt, irq, dev, psrr) + GSRendererHW(uint8* base, bool mt, void (*irq)(), GSDevice* dev, GSTextureCache* tc) + : GSRendererT(base, mt, irq, dev) , m_tc(tc) , m_width(1024) , m_height(1024) diff --git a/plugins/GSdx/GSRendererHW10.cpp b/plugins/GSdx/GSRendererHW10.cpp index 26a3ddee7c..69bf582df4 100644 --- a/plugins/GSdx/GSRendererHW10.cpp +++ b/plugins/GSdx/GSRendererHW10.cpp @@ -25,7 +25,7 @@ #include "resource.h" GSRendererHW10::GSRendererHW10(uint8* base, bool mt, void (*irq)()) - : GSRendererHW(base, mt, irq, new GSDevice10(), new GSTextureCache10(this), true) + : GSRendererHW(base, mt, irq, new GSDevice10(), new GSTextureCache10(this)) { InitVertexKick(); } @@ -361,7 +361,7 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache ps_ssel.tau = 0; break; case 2: - ps_cb.MINU = ((float)(int)context->CLAMP.MINU + 0.5f) / (1 << context->TEX0.TW); + ps_cb.MINU = ((float)(int)context->CLAMP.MINU) / (1 << context->TEX0.TW); ps_cb.MAXU = ((float)(int)context->CLAMP.MAXU) / (1 << context->TEX0.TW); ps_ssel.tau = 0; break; @@ -383,7 +383,7 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache ps_ssel.tav = 0; break; case 2: - ps_cb.MINV = ((float)(int)context->CLAMP.MINV + 0.5f) / (1 << context->TEX0.TH); + ps_cb.MINV = ((float)(int)context->CLAMP.MINV) / (1 << context->TEX0.TH); ps_cb.MAXV = ((float)(int)context->CLAMP.MAXV) / (1 << context->TEX0.TH); ps_ssel.tav = 0; break; @@ -401,6 +401,7 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache ps_cb.WH = GSVector2(w, h); ps_cb.rWrH = GSVector2(1.0f / w, 1.0f / h); + ps_cb.HalfTexel = GSVector4(-0.5f / w, -0.5f / h, +0.5f / w, +0.5f / h); } else { diff --git a/plugins/GSdx/GSRendererHW9.cpp b/plugins/GSdx/GSRendererHW9.cpp index 27be32f1d2..3b90ca47a8 100644 --- a/plugins/GSdx/GSRendererHW9.cpp +++ b/plugins/GSdx/GSRendererHW9.cpp @@ -25,7 +25,7 @@ #include "resource.h" GSRendererHW9::GSRendererHW9(uint8* base, bool mt, void (*irq)()) - : GSRendererHW(base, mt, irq, new GSDevice9(), new GSTextureCache9(this), true) + : GSRendererHW(base, mt, irq, new GSDevice9(), new GSTextureCache9(this)) { m_fba.enabled = !!theApp.GetConfig("fba", 1); m_logz = !!theApp.GetConfig("logz", 0); @@ -335,7 +335,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: ps_ssel.tau = 0; break; case 2: - ps_cb.MINU = ((float)(int)context->CLAMP.MINU + 0.5f) / (1 << context->TEX0.TW); + ps_cb.MINU = ((float)(int)context->CLAMP.MINU) / (1 << context->TEX0.TW); ps_cb.MAXU = ((float)(int)context->CLAMP.MAXU) / (1 << context->TEX0.TW); ps_ssel.tau = 0; break; @@ -357,7 +357,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: ps_ssel.tav = 0; break; case 2: - ps_cb.MINV = ((float)(int)context->CLAMP.MINV + 0.5f) / (1 << context->TEX0.TH); + ps_cb.MINV = ((float)(int)context->CLAMP.MINV) / (1 << context->TEX0.TH); ps_cb.MAXV = ((float)(int)context->CLAMP.MAXV) / (1 << context->TEX0.TH); ps_ssel.tav = 0; break; @@ -375,6 +375,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: ps_cb.WH = GSVector2(w, h); ps_cb.rWrH = GSVector2(1.0f / w, 1.0f / h); + ps_cb.HalfTexel = GSVector4(-0.5f / w, -0.5f / h, +0.5f / w, +0.5f / h); } else { @@ -393,7 +394,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: m_tfx.SetupOM(om_dssel, om_bsel, bf, rt, ds); m_tfx.SetupIA(m_vertices, m_count, topology); m_tfx.SetupVS(vs_sel, &vs_cb); - m_tfx.SetupPS(ps_sel, &ps_cb, ps_ssel, tex ? tex->m_texture : NULL, tex ? tex->m_palette : NULL, m_psrr); + m_tfx.SetupPS(ps_sel, &ps_cb, ps_ssel, tex ? tex->m_texture : NULL, tex ? tex->m_palette : NULL); m_tfx.SetupRS(w, h, scissor); // draw @@ -411,7 +412,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache: ps_sel.atst = iatst[ps_sel.atst]; - m_tfx.UpdatePS(ps_sel, &ps_cb, ps_ssel, m_psrr); + m_tfx.UpdatePS(ps_sel, &ps_cb, ps_ssel); bool z = om_dssel.zwe; bool r = om_bsel.wr; diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 27b789ee0f..43a6587019 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -266,7 +266,6 @@ GSTextureCache::GSCachedTexture* GSTextureCache::GetTexture() { if(TEX0.PSM == t->m_TEX0.PSM && TEX0.TBW == t->m_TEX0.TBW && TEX0.TW == t->m_TEX0.TW && TEX0.TH == t->m_TEX0.TH - && (m_renderer->m_psrr || (CLAMP.WMS != 3 && t->m_CLAMP.WMS != 3 && CLAMP.WMT != 3 && t->m_CLAMP.WMT != 3 || CLAMP.u64 == t->m_CLAMP.u64)) && (pal == 0 || TEX0.CPSM == t->m_TEX0.CPSM && GSVector4i::compare(t->m_clut, clut, pal * sizeof(clut[0])))) { m_tex.splice(m_tex.begin(), m_tex, i); @@ -705,14 +704,7 @@ void GSTextureCache::GSCachedTexture::Update() { // in dx9 managed textures can be written directly, less copying is faster, but still not as fast as dx10's UpdateResource - if(m_renderer->m_psrr) - { - m_renderer->m_mem.ReadTextureNPNC(r, bits, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP); - } - else - { - m_renderer->m_mem.ReadTextureNP(r, bits, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP); - } + m_renderer->m_mem.ReadTextureNPNC(r, bits, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP); m_texture->Unmap(); } @@ -722,14 +714,7 @@ void GSTextureCache::GSCachedTexture::Update() pitch = ((r.width() + 3) & ~3) * 4; - if(m_renderer->m_psrr) - { - m_renderer->m_mem.ReadTextureNPNC(r, buff, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP); - } - else - { - m_renderer->m_mem.ReadTextureNP(r, buff, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP); - } + m_renderer->m_mem.ReadTextureNPNC(r, buff, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP); m_texture->Update(r, buff, pitch); } diff --git a/plugins/GSdx/GSTextureFX10.cpp b/plugins/GSdx/GSTextureFX10.cpp index 48cf096cf7..0dec170dcc 100644 --- a/plugins/GSdx/GSTextureFX10.cpp +++ b/plugins/GSdx/GSTextureFX10.cpp @@ -268,11 +268,6 @@ void GSTextureFX10::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl if(sel.tfx != 4) { - if(sel.bpp >= 3 || sel.wms >= 3 || sel.wmt >= 3) - { - ssel.min = ssel.mag = 0; - } - hash_map >::const_iterator i = m_ps_ss.find(ssel); if(i != m_ps_ss.end()) diff --git a/plugins/GSdx/GSTextureFX10.h b/plugins/GSdx/GSTextureFX10.h index f87306972d..cc8653ca7e 100644 --- a/plugins/GSdx/GSTextureFX10.h +++ b/plugins/GSdx/GSTextureFX10.h @@ -79,12 +79,12 @@ public: { GSVector4 FogColor; float MINU; - float MAXU; float MINV; + float MAXU; float MAXV; uint32 UMSK; - uint32 UFIX; uint32 VMSK; + uint32 UFIX; uint32 VFIX; float TA0; float TA1; @@ -92,6 +92,7 @@ public: float _pad[1]; GSVector2 WH; GSVector2 rWrH; + GSVector4 HalfTexel; struct PSConstantBuffer() {memset(this, 0, sizeof(*this));} @@ -105,14 +106,16 @@ public: GSVector4i b2 = b[2]; GSVector4i b3 = b[3]; GSVector4i b4 = b[4]; + GSVector4i b5 = b[5]; - if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4)).alltrue()) + if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue()) { a[0] = b0; a[1] = b1; a[2] = b2; a[3] = b3; a[4] = b4; + a[5] = b5; return true; } diff --git a/plugins/GSdx/GSTextureFX9.cpp b/plugins/GSdx/GSTextureFX9.cpp index 5a28c42b3e..7abf2a550c 100644 --- a/plugins/GSdx/GSTextureFX9.cpp +++ b/plugins/GSdx/GSTextureFX9.cpp @@ -144,11 +144,11 @@ bool GSTextureFX9::SetupVS(VSSelector sel, const VSConstantBuffer* cb) return true; } -bool GSTextureFX9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal, bool psrr) +bool GSTextureFX9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal) { m_dev->PSSetShaderResources(tex, pal); - if(tex && psrr && (sel.wms == 3 || sel.wmt == 3)) + if(tex && (sel.wms == 3 || sel.wmt == 3)) { if(sel.wms == 3) { @@ -167,21 +167,15 @@ bool GSTextureFX9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSampler } } - UpdatePS(sel, cb, ssel, psrr); + UpdatePS(sel, cb, ssel); return true; } -void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, bool psrr) +void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) { HRESULT hr; - if(!psrr) - { - if(sel.wms == 3) sel.wms = 0; - if(sel.wmt == 3) sel.wmt = 0; - } - hash_map >::const_iterator i = m_ps.find(sel); if(i == m_ps.end()) @@ -233,10 +227,10 @@ void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSample if(sel.tfx != 4) { - if(sel.bpp >= 3 || sel.wms >= 3 || sel.wmt >= 3) - { - ssel.min = ssel.mag = 0; - } + bool b = sel.bpp < 3 && sel.wms < 3 && sel.wmt < 3; + + ssel.min = b; + ssel.mag = b; hash_map::const_iterator i = m_ps_ss.find(ssel); diff --git a/plugins/GSdx/GSTextureFX9.h b/plugins/GSdx/GSTextureFX9.h index 65bb7bbfbf..54fbb258ca 100644 --- a/plugins/GSdx/GSTextureFX9.h +++ b/plugins/GSdx/GSTextureFX9.h @@ -55,12 +55,12 @@ public: { GSVector4 FogColor; float MINU; - float MAXU; float MINV; + float MAXU; float MAXV; uint32 UMSK; - uint32 UFIX; uint32 VMSK; + uint32 UFIX; uint32 VFIX; float TA0; float TA1; @@ -68,6 +68,7 @@ public: float _pad[1]; GSVector2 WH; GSVector2 rWrH; + GSVector4 HalfTexel; }; union PSSelector @@ -166,8 +167,8 @@ public: bool SetupIA(const GSVertexHW9* vertices, int count, D3DPRIMITIVETYPE prim); bool SetupVS(VSSelector sel, const VSConstantBuffer* cb); - bool SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal, bool psrr); - void UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, bool psrr); + bool SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal); + void UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel); void SetupRS(int w, int h, const GSVector4i& scissor); void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 bf, GSTexture* rt, GSTexture* ds); void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 bf); diff --git a/plugins/GSdx/res/tfx10.fx b/plugins/GSdx/res/tfx10.fx index 78d1c4403d..dd172431d8 100644 --- a/plugins/GSdx/res/tfx10.fx +++ b/plugins/GSdx/res/tfx10.fx @@ -155,28 +155,25 @@ void gs_main(line VS_OUTPUT input[2], inout TriangleStream stream) #endif -Texture2D Texture; -Texture2D Palette; +Texture2D Texture; +Texture2D Palette; SamplerState TextureSampler; SamplerState PaletteSampler; cbuffer cb1 { float4 FogColor; - float MINU; - float MAXU; - float MINV; - float MAXV; - uint UMSK; - uint UFIX; - uint VMSK; - uint VFIX; + float2 MINUV; + float2 MAXUV; + uint2 UVMSK; + uint2 UVFIX; float TA0; float TA1; float AREF; float _pad; float2 WH; float2 rWrH; + float4 HalfTexel; }; struct PS_INPUT @@ -194,8 +191,8 @@ struct PS_OUTPUT #ifndef FST #define FST 0 -#define WMS 3 -#define WMT 3 +#define WMS 0 +#define WMT 0 #define BPP 0 #define AEM 0 #define TFX 0 @@ -225,191 +222,185 @@ float4 Extract16(uint i) return f; } -int repeatu(int tc) +int2 wrapu(float2 f, int2 i) { - return WMS == 3 ? ((tc & UMSK) | UFIX) : tc; -} - -int repeatv(int tc) -{ - return WMT == 3 ? ((tc & VMSK) | VFIX) : tc; -} - -float4 sample(float2 tc) -{ - float4 t; - - // if(WMS >= 2 || WMT >= 2) - if(WMS >= 3 || WMT >= 3) + if(WMS == 0) { - int4 itc = tc.xyxy * WH.xyxy; - - float4 tc01; - - tc01.x = repeatu(itc.x); - tc01.y = repeatv(itc.y); - tc01.z = repeatu(itc.z + 1); - tc01.w = repeatv(itc.w + 1); - - tc01 *= rWrH.xyxy; + i = frac(f) * WH.xx; + } + else if(WMS == 1) + { + i = saturate(f) * WH.xx; + } + else if(WMS == 2) + { + i = clamp(f, MINUV.xx, MAXUV.xx) * WH.xx; + } + else if(WMS == 3) + { + i = (i & UVMSK.xx) | UVFIX.xx; + } + + return i; +} - float4 t00 = Texture.Sample(TextureSampler, tc01.xy); - float4 t01 = Texture.Sample(TextureSampler, tc01.zy); - float4 t10 = Texture.Sample(TextureSampler, tc01.xw); - float4 t11 = Texture.Sample(TextureSampler, tc01.zw); +int2 wrapv(float2 f, int2 i) +{ + if(WMT == 0) + { + i = frac(f) * WH.yy; + } + else if(WMT == 1) + { + i = saturate(f) * WH.yy; + } + else if(WMT == 2) + { + i = clamp(f, MINUV.yy, MAXUV.yy) * WH.yy; + } + else if(WMT == 3) + { + i = (i & UVMSK.yy) | UVFIX.yy; + } + + return i; +} - float2 dd = frac(tc * WH); +int4 wrapuv(float4 f, int4 i) +{ + if(WMT == 0) + { + i = frac(f) * WH.xyxy; + } + else if(WMT == 1) + { + i = saturate(f) * WH.xyxy; + } + else if(WMT == 2) + { + i = clamp(f, MINUV.xyxy, MAXUV.xyxy) * WH.xyxy; + } + else if(WMT == 3) + { + i = (i & UVMSK.xyxy) | UVFIX.xyxy; + } - t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y); + return i; +} + +int4 wrap(float4 uv, int4 iuv) +{ + if(WMS == WMT) + { + iuv = wrapuv(uv, iuv); } else + { + iuv.xz = wrapu(uv.xz, iuv.xz); + iuv.yw = wrapv(uv.yw, iuv.yw); + } + + return iuv; +} + +float4 sample(float2 tc, float w) +{ + if(FST == 0) + { + tc /= w; + } + + float4 t; +/* + if(BPP < 3 && WMS < 2 && WMT < 2) { t = Texture.Sample(TextureSampler, tc); } +*/ + if(BPP < 3 && WMS < 3 && WMT < 3) + { + if(WMS == 2 && WMT == 2) tc = clamp(tc, MINUV.xy, MAXUV.xy); + else if(WMS == 2) tc.x = clamp(tc.x, MINUV.x, MAXUV.x); + else if(WMT == 2) tc.y = clamp(tc.y, MINUV.y, MAXUV.y); + + t = Texture.Sample(TextureSampler, tc); + } + else + { + float4 uv = tc.xyxy + HalfTexel; + float4 uv2 = uv * WH.xyxy; + float2 dd = frac(uv2.xy); + + int4 iuv = wrap(uv, uv2); + + float4 t00, t01, t10, t11; + + if(BPP == 3) // 8HP + 32-bit palette + { + float4 a; + + a.x = Texture.Load(int3(iuv.xy, 0)).a; + a.y = Texture.Load(int3(iuv.zy, 0)).a; + a.z = Texture.Load(int3(iuv.xw, 0)).a; + a.w = Texture.Load(int3(iuv.zw, 0)).a; + + t00 = Palette.Load(a.x); + t01 = Palette.Load(a.y); + t10 = Palette.Load(a.z); + t11 = Palette.Load(a.w); + } + else if(BPP == 4) // 8HP + 16-bit palette + { + // TODO: yuck, just pre-convert the palette to 32-bit + } + else if(BPP == 5) // 16P + { + float4 r; + + r.x = Texture.Load(int3(iuv.xy, 0)).r; + r.y = Texture.Load(int3(iuv.zy, 0)).r; + r.z = Texture.Load(int3(iuv.xw, 0)).r; + r.w = Texture.Load(int3(iuv.zw, 0)).r; + + uint4 i = r * 65535; + + t00 = Extract16(i.x); + t01 = Extract16(i.y); + t10 = Extract16(i.z); + t11 = Extract16(i.w); + } + else + { + t00 = Texture.Load(int3(iuv.xy, 0)); + t01 = Texture.Load(int3(iuv.zy, 0)); + t10 = Texture.Load(int3(iuv.xw, 0)); + t11 = Texture.Load(int3(iuv.zw, 0)); + } + + t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y); + } + + if(BPP == 1) // 24 + { + t.a = AEM == 0 || any(t.rgb) ? TA0 : 0; + } + else if(BPP == 2 || BPP == 5) // 16 || 16P + { + if(BPP == 5) + { + t = Normalize16(t); + } + + // a bit incompatible with up-scaling because the 1 bit alpha is interpolated + + t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; + } return t; } -float4 sample8hp(float2 tc) +float4 tfx(float4 t, float4 c) { - float4 tc01; - - // if(WMS >= 2 || WMT >= 2) - if(WMS >= 3 || WMT >= 3) - { - int4 itc = tc.xyxy * WH.xyxy; - - tc01.x = repeatu(itc.x); - tc01.y = repeatv(itc.y); - tc01.z = repeatu(itc.z + 1); - tc01.w = repeatv(itc.w + 1); - - tc01 *= rWrH.xyxy; - } - else - { - tc01.x = tc.x; - tc01.y = tc.y; - tc01.z = tc.x + rWrH.x; - tc01.w = tc.y + rWrH.y; - } - - float4 t; - - t.x = Texture.Sample(TextureSampler, tc01.xy).a; - t.y = Texture.Sample(TextureSampler, tc01.zy).a; - t.z = Texture.Sample(TextureSampler, tc01.xw).a; - t.w = Texture.Sample(TextureSampler, tc01.zw).a; - - float4 t00 = Palette.Sample(PaletteSampler, t.x); - float4 t01 = Palette.Sample(PaletteSampler, t.y); - float4 t10 = Palette.Sample(PaletteSampler, t.z); - float4 t11 = Palette.Sample(PaletteSampler, t.w); - - float2 dd = frac(tc * WH); - - return lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y); -} - -float4 sample16p(float2 tc) -{ - float4 t; - - float4 tc01; - - // if(WMS >= 2 || WMT >= 2) - if(WMS >= 3 || WMT >= 3) - { - int4 itc = tc.xyxy * WH.xyxy; - - tc01.x = repeatu(itc.x); - tc01.y = repeatv(itc.y); - tc01.z = repeatu(itc.z + 1); - tc01.w = repeatv(itc.w + 1); - - tc01 *= rWrH.xyxy; - } - else - { - tc01.x = tc.x; - tc01.y = tc.y; - tc01.z = tc.x + rWrH.x; - tc01.w = tc.y + rWrH.y; - } - - t.x = Texture.Sample(TextureSampler, tc01.xy).r; - t.y = Texture.Sample(TextureSampler, tc01.zy).r; - t.z = Texture.Sample(TextureSampler, tc01.xw).r; - t.w = Texture.Sample(TextureSampler, tc01.zw).r; - - uint4 i = t * 65535; - - float4 t00 = Extract16(i.x); - float4 t01 = Extract16(i.y); - float4 t10 = Extract16(i.z); - float4 t11 = Extract16(i.w); - - float2 dd = frac(tc * WH); - - return Normalize16(lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y)); -} - -PS_OUTPUT ps_main(PS_INPUT input) -{ - float2 tc = input.t.xy; - - if(FST == 0) - { - tc /= input.t.w; - } - - tc -= rWrH / 2; - - if(WMS == 2) - { - tc.x = clamp(tc.x, MINU, MAXU); - } - - if(WMT == 2) - { - tc.y = clamp(tc.y, MINV, MAXV); - } - - float4 t; - - if(BPP == 0) // 32 - { - t = sample(tc); - } - else if(BPP == 1) // 24 - { - t = sample(tc); - - t.a = AEM == 0 || any(t.rgb) ? TA0 : 0; - } - else if(BPP == 2) // 16 - { - t = sample(tc); - - t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; // a bit incompatible with up-scaling because the 1 bit alpha is interpolated - } - else if(BPP == 3) // 8HP / 32-bit palette - { - t = sample8hp(tc); - } - else if(BPP == 4) // 8HP / 16-bit palette - { - // TODO: yuck, just pre-convert the palette to 32-bit - } - else if(BPP == 5) // 16P - { - t = sample16p(tc); - - t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; // a bit incompatible with up-scaling because the 1 bit alpha is interpolated - } - - float4 c = input.c; - if(TFX == 0) { if(TCC == 0) @@ -435,7 +426,7 @@ PS_OUTPUT ps_main(PS_INPUT input) else if(TFX == 2) { c.rgb = c.rgb * t.rgb * 255.0f / 128 + c.a; - + if(TCC == 1) { c.a += t.a; @@ -444,15 +435,18 @@ PS_OUTPUT ps_main(PS_INPUT input) else if(TFX == 3) { c.rgb = c.rgb * t.rgb * 255.0f / 128 + c.a; - + if(TCC == 1) { c.a = t.a; } } + + return saturate(c); +} - c = saturate(c); - +void atst(float4 c) +{ if(ATE == 1) { if(ATST == 0) @@ -476,12 +470,28 @@ PS_OUTPUT ps_main(PS_INPUT input) clip(abs(c.a - AREF) - 0.4f / 255); // FIXME: 0.5f is too much } } +} +float4 fog(float4 c, float f) +{ if(FOG == 1) { - c.rgb = lerp(FogColor.rgb, c.rgb, input.t.z); + c.rgb = lerp(FogColor.rgb, c.rgb, f); } + return c; +} + +PS_OUTPUT ps_main(PS_INPUT input) +{ + float4 t = sample(input.t.xy, input.t.w); + + float4 c = tfx(t, input.c); + + atst(c); + + c = fog(c, input.t.z); + if(CLR1 == 1) // needed for Cd * (As/Ad/F + 1) blending modes { c.rgb = 1; diff --git a/plugins/GSdx/res/tfx9.fx b/plugins/GSdx/res/tfx9.fx index b17a07f822..ebe822ffee 100644 --- a/plugins/GSdx/res/tfx9.fx +++ b/plugins/GSdx/res/tfx9.fx @@ -71,22 +71,19 @@ VS_OUTPUT vs_main(VS_INPUT input) return output; } -float4 ps_params[5]; +float4 ps_params[6]; #define FogColor ps_params[0].bgra -#define MINU ps_params[1].x -#define MAXU ps_params[1].y -#define MINV ps_params[1].z -#define MAXV ps_params[1].w -#define UMSK ps_params[2].x -#define UFIX ps_params[2].y -#define VMSK ps_params[2].z -#define VFIX ps_params[2].w +#define MINUV ps_params[1].xy +#define MAXUV ps_params[1].zw +#define UVMSK ps_params[2].xy +#define UVFIX ps_params[2].zw #define TA0 ps_params[3].x #define TA1 ps_params[3].y #define AREF ps_params[3].z #define WH ps_params[4].xy #define rWrH ps_params[4].zw +#define HalfTexel ps_params[5] struct PS_INPUT { @@ -114,129 +111,170 @@ sampler1D Palette : register(s1); sampler1D UMSKFIX : register(s2); sampler1D VMSKFIX : register(s3); -float repeatu(float tc) +float2 wrapu(float2 f) { - return WMS == 3 ? tex1D(UMSKFIX, tc*rWrH.x)* WH.x : tc; -} - -float repeatv(float tc) -{ - return WMT == 3 ? tex1D(VMSKFIX, tc*rWrH.y)* WH.y : tc; -} - -float4 sample(float2 tc) -{ - float4 t; - - // if(WMS >= 2 || WMT >= 2) - if(WMS >= 3 || WMT >= 3) + if(WMS == 0) { - tc -= rWrH / 2; - - int4 itc = tc.xyxy * WH.xyxy; - - float4 tc01; - - tc01.x = repeatu(itc.x); - tc01.y = repeatv(itc.y); - tc01.z = repeatu(itc.z + 1); - tc01.w = repeatv(itc.w + 1); + f = frac(f); + } + else if(WMS == 1) + { + f = saturate(f); + } + else if(WMS == 2) + { + f = clamp(f, MINUV.xx, MAXUV.xx); + } + else if(WMS == 3) + { + f.x = tex1D(UMSKFIX, f.x); + f.y = tex1D(UMSKFIX, f.y); + } - tc01 *= rWrH.xyxy; + return f; +} - float4 t00 = tex2D(Texture, tc01.xy); - float4 t01 = tex2D(Texture, tc01.zy); - float4 t10 = tex2D(Texture, tc01.xw); - float4 t11 = tex2D(Texture, tc01.zw); +float2 wrapv(float2 f) +{ + if(WMS == 0) + { + f = frac(f); + } + else if(WMS == 1) + { + f = saturate(f); + } + else if(WMS == 2) + { + f = clamp(f, MINUV.yy, MAXUV.yy); + } + else if(WMS == 3) + { + f.x = tex1D(VMSKFIX, f.x); + f.y = tex1D(VMSKFIX, f.y); + } - float2 dd = frac(tc * WH); + return f; +} - t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y); +float4 wrapuv(float4 f) +{ + if(WMS == 0) + { + f = frac(f); + } + else if(WMS == 1) + { + f = saturate(f); + } + else if(WMS == 2) + { + f = clamp(f, MINUV.xyxy, MAXUV.xyxy); + } + else if(WMS == 3) + { + f.x = tex1D(UMSKFIX, f.x); + f.y = tex1D(VMSKFIX, f.y); + f.z = tex1D(UMSKFIX, f.z); + f.w = tex1D(VMSKFIX, f.w); + } + + return f; +} + +float4 wrap(float4 uv) +{ + if(WMS == WMT) + { + uv = wrapuv(uv); } else + { + uv.xz = wrapu(uv.xz); + uv.yw = wrapv(uv.yw); + } + + return uv; +} + +float4 sample(float2 tc, float w) +{ + if(FST == 0) + { + tc /= w; + } + + float4 t; +/* + if(BPP < 3 && WMS < 2 && WMT < 2) { t = tex2D(Texture, tc); } +*/ + if(BPP < 3 && WMS < 3 && WMT < 3) + { + if(WMS == 2 && WMT == 2) tc = clamp(tc, MINUV.xy, MAXUV.xy); + else if(WMS == 2) tc.x = clamp(tc.x, MINUV.x, MAXUV.x); + else if(WMT == 2) tc.y = clamp(tc.y, MINUV.y, MAXUV.y); + + t = tex2D(Texture, tc); + } + else + { + float4 uv = tc.xyxy + HalfTexel; + float2 dd = frac(uv.xy * WH); + + uv = wrap(uv); + + float4 t00, t01, t10, t11; + + if(BPP == 3) // 8HP ln + { + float4 a; + + a.x = tex2D(Texture, uv.xy).a; + a.y = tex2D(Texture, uv.zy).a; + a.z = tex2D(Texture, uv.xw).a; + a.w = tex2D(Texture, uv.zw).a; + + if(RT == 1) a *= 0.5; + + t00 = tex1D(Palette, a.x); + t01 = tex1D(Palette, a.y); + t10 = tex1D(Palette, a.z); + t11 = tex1D(Palette, a.w); + } + else + { + t00 = tex2D(Texture, uv.xy); + t01 = tex2D(Texture, uv.zy); + t10 = tex2D(Texture, uv.xw); + t11 = tex2D(Texture, uv.zw); + } + + t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y); + } + + if(BPP == 0) // 32 + { + if(RT == 1) t.a *= 0.5; + } + else if(BPP == 1) // 24 + { + t.a = AEM == 0 || any(t.rgb) ? TA0 : 0; + } + else if(BPP == 2) // 16 + { + // a bit incompatible with up-scaling because the 1 bit alpha is interpolated + + t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; + } return t; } -float4 sample8hp(float2 tc) +float4 tfx(float4 t, float4 c) { - tc -= rWrH / 2; - - float4 tc01; - - tc01.x = tc.x; - tc01.y = tc.y; - tc01.z = tc.x + rWrH.x; - tc01.w = tc.y + rWrH.y; - - float4 t; - - t.x = tex2D(Texture, tc01.xy).a; - t.y = tex2D(Texture, tc01.zy).a; - t.z = tex2D(Texture, tc01.xw).a; - t.w = tex2D(Texture, tc01.zw).a; - - if(RT == 1) t *= 0.5; - - float4 t00 = tex1D(Palette, t.x); - float4 t01 = tex1D(Palette, t.y); - float4 t10 = tex1D(Palette, t.z); - float4 t11 = tex1D(Palette, t.w); - - float2 dd = frac(tc * WH); - - return lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y); -} - -float4 ps_main(PS_INPUT input) : COLOR -{ - float2 tc = input.t.xy; - - if(FST == 0) - { - tc /= input.t.w; - } - - if(WMS == 2) - { - tc.x = clamp(tc.x, MINU, MAXU); - } - - if(WMT == 2) - { - tc.y = clamp(tc.y, MINV, MAXV); - } - - float4 t; - - if(BPP == 0) // 32 - { - t = sample(tc); - - if(RT == 1) t.a *= 0.5; - } - else if(BPP == 1) // 24 - { - t = sample(tc); - - t.a = AEM == 0 || any(t.rgb) ? TA0 : 0; - } - else if(BPP == 2) // 16 - { - t = sample(tc); - - t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; // a bit incompatible with up-scaling because the 1 bit alpha is interpolated - } - else if(BPP == 3) // 8HP ln - { - t = sample8hp(tc); - } - - float4 c = input.c; - if(TFX == 0) { if(TCC == 0) @@ -277,9 +315,12 @@ float4 ps_main(PS_INPUT input) : COLOR c.a = t.a; } } + + return saturate(c); +} - c = saturate(c); - +void atst(float4 c) +{ if(ATE == 1) { if(ATST == 0) @@ -303,12 +344,28 @@ float4 ps_main(PS_INPUT input) : COLOR clip(abs(c.a - AREF) - 0.4f / 255); // FIXME: 0.5f is too much } } +} +float4 fog(float4 c, float f) +{ if(FOG == 1) { - c.rgb = lerp(FogColor.rgb, c.rgb, input.t.z); + c.rgb = lerp(FogColor.rgb, c.rgb, f); } + return c; +} + +float4 ps_main(PS_INPUT input) : COLOR +{ + float4 t = sample(input.t.xy, input.t.w); + + float4 c = tfx(t, input.c); + + atst(c); + + c = fog(c, input.t.z); + if(CLR1 == 1) // needed for Cd * (As/Ad/F + 1) blending modes { c.rgb = 1;