GSdx: pixel shaders were reorganized, things might be broken :P

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1341 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-06-06 17:53:34 +00:00
parent 95ff8e3a98
commit 4ed3035661
12 changed files with 414 additions and 369 deletions

View File

@ -22,11 +22,10 @@
#include "StdAfx.h" #include "StdAfx.h"
#include "GSRenderer.h" #include "GSRenderer.h"
GSRenderer::GSRenderer(uint8* base, bool mt, void (*irq)(), GSDevice* dev, bool psrr) GSRenderer::GSRenderer(uint8* base, bool mt, void (*irq)(), GSDevice* dev)
: GSState(base, mt, irq) : GSState(base, mt, irq)
, m_dev(dev) , m_dev(dev)
, m_shader(0) , m_shader(0)
, m_psrr(psrr)
{ {
m_interlace = theApp.GetConfig("interlace", 0); m_interlace = theApp.GetConfig("interlace", 0);
m_aspectratio = theApp.GetConfig("aspectratio", 1); m_aspectratio = theApp.GetConfig("aspectratio", 1);

View File

@ -51,7 +51,6 @@ protected:
public: public:
GSWnd m_wnd; GSWnd m_wnd;
GSDevice* m_dev; GSDevice* m_dev;
bool m_psrr;
int s_n; int s_n;
bool s_dump; bool s_dump;
@ -59,7 +58,7 @@ public:
bool s_savez; bool s_savez;
public: public:
GSRenderer(uint8* base, bool mt, void (*irq)(), GSDevice* dev, bool psrr = true); GSRenderer(uint8* base, bool mt, void (*irq)(), GSDevice* dev);
virtual ~GSRenderer(); virtual ~GSRenderer();
virtual bool Create(const string& title); virtual bool Create(const string& title);
@ -216,8 +215,8 @@ protected:
virtual void Draw() = 0; virtual void Draw() = 0;
public: public:
GSRendererT(uint8* base, bool mt, void (*irq)(), GSDevice* dev, bool psrr = true) GSRendererT(uint8* base, bool mt, void (*irq)(), GSDevice* dev)
: GSRenderer(base, mt, irq, dev, psrr) : GSRenderer(base, mt, irq, dev)
, m_count(0) , m_count(0)
, m_maxcount(0) , m_maxcount(0)
, m_vertices(NULL) , m_vertices(NULL)

View File

@ -179,8 +179,8 @@ protected:
vr.z = v.z; vr.z = v.z;
break; break;
case CLAMP_REGION_REPEAT: case CLAMP_REGION_REPEAT:
if(m_psrr) {vr.x = maxu; vr.z = vr.x + (minu + 1);} vr.x = maxu;
//else {vr.x = 0; vr.z = w;} vr.z = vr.x + (minu + 1);
break; break;
default: default:
__assume(0); __assume(0);
@ -202,8 +202,8 @@ protected:
vr.w = v.w; vr.w = v.w;
break; break;
case CLAMP_REGION_REPEAT: case CLAMP_REGION_REPEAT:
if(m_psrr) {vr.y = maxv; vr.w = vr.y + (minv + 1);} vr.y = maxv;
//else {r.y = 0; r.w = w;} vr.w = vr.y + (minv + 1);
break; break;
default: default:
__assume(0); __assume(0);
@ -691,8 +691,8 @@ protected:
} }
public: public:
GSRendererHW(uint8* base, bool mt, void (*irq)(), GSDevice* dev, GSTextureCache* tc, bool psrr) GSRendererHW(uint8* base, bool mt, void (*irq)(), GSDevice* dev, GSTextureCache* tc)
: GSRendererT<Vertex>(base, mt, irq, dev, psrr) : GSRendererT<Vertex>(base, mt, irq, dev)
, m_tc(tc) , m_tc(tc)
, m_width(1024) , m_width(1024)
, m_height(1024) , m_height(1024)

View File

@ -25,7 +25,7 @@
#include "resource.h" #include "resource.h"
GSRendererHW10::GSRendererHW10(uint8* base, bool mt, void (*irq)()) GSRendererHW10::GSRendererHW10(uint8* base, bool mt, void (*irq)())
: GSRendererHW<GSVertexHW10>(base, mt, irq, new GSDevice10(), new GSTextureCache10(this), true) : GSRendererHW<GSVertexHW10>(base, mt, irq, new GSDevice10(), new GSTextureCache10(this))
{ {
InitVertexKick<GSRendererHW10>(); InitVertexKick<GSRendererHW10>();
} }
@ -361,7 +361,7 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
ps_ssel.tau = 0; ps_ssel.tau = 0;
break; break;
case 2: case 2:
ps_cb.MINU = ((float)(int)context->CLAMP.MINU + 0.5f) / (1 << context->TEX0.TW); ps_cb.MINU = ((float)(int)context->CLAMP.MINU) / (1 << context->TEX0.TW);
ps_cb.MAXU = ((float)(int)context->CLAMP.MAXU) / (1 << context->TEX0.TW); ps_cb.MAXU = ((float)(int)context->CLAMP.MAXU) / (1 << context->TEX0.TW);
ps_ssel.tau = 0; ps_ssel.tau = 0;
break; break;
@ -383,7 +383,7 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
ps_ssel.tav = 0; ps_ssel.tav = 0;
break; break;
case 2: case 2:
ps_cb.MINV = ((float)(int)context->CLAMP.MINV + 0.5f) / (1 << context->TEX0.TH); ps_cb.MINV = ((float)(int)context->CLAMP.MINV) / (1 << context->TEX0.TH);
ps_cb.MAXV = ((float)(int)context->CLAMP.MAXV) / (1 << context->TEX0.TH); ps_cb.MAXV = ((float)(int)context->CLAMP.MAXV) / (1 << context->TEX0.TH);
ps_ssel.tav = 0; ps_ssel.tav = 0;
break; break;
@ -401,6 +401,7 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
ps_cb.WH = GSVector2(w, h); ps_cb.WH = GSVector2(w, h);
ps_cb.rWrH = GSVector2(1.0f / w, 1.0f / h); ps_cb.rWrH = GSVector2(1.0f / w, 1.0f / h);
ps_cb.HalfTexel = GSVector4(-0.5f / w, -0.5f / h, +0.5f / w, +0.5f / h);
} }
else else
{ {

View File

@ -25,7 +25,7 @@
#include "resource.h" #include "resource.h"
GSRendererHW9::GSRendererHW9(uint8* base, bool mt, void (*irq)()) GSRendererHW9::GSRendererHW9(uint8* base, bool mt, void (*irq)())
: GSRendererHW<GSVertexHW9>(base, mt, irq, new GSDevice9(), new GSTextureCache9(this), true) : GSRendererHW<GSVertexHW9>(base, mt, irq, new GSDevice9(), new GSTextureCache9(this))
{ {
m_fba.enabled = !!theApp.GetConfig("fba", 1); m_fba.enabled = !!theApp.GetConfig("fba", 1);
m_logz = !!theApp.GetConfig("logz", 0); m_logz = !!theApp.GetConfig("logz", 0);
@ -335,7 +335,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
ps_ssel.tau = 0; ps_ssel.tau = 0;
break; break;
case 2: case 2:
ps_cb.MINU = ((float)(int)context->CLAMP.MINU + 0.5f) / (1 << context->TEX0.TW); ps_cb.MINU = ((float)(int)context->CLAMP.MINU) / (1 << context->TEX0.TW);
ps_cb.MAXU = ((float)(int)context->CLAMP.MAXU) / (1 << context->TEX0.TW); ps_cb.MAXU = ((float)(int)context->CLAMP.MAXU) / (1 << context->TEX0.TW);
ps_ssel.tau = 0; ps_ssel.tau = 0;
break; break;
@ -357,7 +357,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
ps_ssel.tav = 0; ps_ssel.tav = 0;
break; break;
case 2: case 2:
ps_cb.MINV = ((float)(int)context->CLAMP.MINV + 0.5f) / (1 << context->TEX0.TH); ps_cb.MINV = ((float)(int)context->CLAMP.MINV) / (1 << context->TEX0.TH);
ps_cb.MAXV = ((float)(int)context->CLAMP.MAXV) / (1 << context->TEX0.TH); ps_cb.MAXV = ((float)(int)context->CLAMP.MAXV) / (1 << context->TEX0.TH);
ps_ssel.tav = 0; ps_ssel.tav = 0;
break; break;
@ -375,6 +375,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
ps_cb.WH = GSVector2(w, h); ps_cb.WH = GSVector2(w, h);
ps_cb.rWrH = GSVector2(1.0f / w, 1.0f / h); ps_cb.rWrH = GSVector2(1.0f / w, 1.0f / h);
ps_cb.HalfTexel = GSVector4(-0.5f / w, -0.5f / h, +0.5f / w, +0.5f / h);
} }
else else
{ {
@ -393,7 +394,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
m_tfx.SetupOM(om_dssel, om_bsel, bf, rt, ds); m_tfx.SetupOM(om_dssel, om_bsel, bf, rt, ds);
m_tfx.SetupIA(m_vertices, m_count, topology); m_tfx.SetupIA(m_vertices, m_count, topology);
m_tfx.SetupVS(vs_sel, &vs_cb); m_tfx.SetupVS(vs_sel, &vs_cb);
m_tfx.SetupPS(ps_sel, &ps_cb, ps_ssel, tex ? tex->m_texture : NULL, tex ? tex->m_palette : NULL, m_psrr); m_tfx.SetupPS(ps_sel, &ps_cb, ps_ssel, tex ? tex->m_texture : NULL, tex ? tex->m_palette : NULL);
m_tfx.SetupRS(w, h, scissor); m_tfx.SetupRS(w, h, scissor);
// draw // draw
@ -411,7 +412,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
ps_sel.atst = iatst[ps_sel.atst]; ps_sel.atst = iatst[ps_sel.atst];
m_tfx.UpdatePS(ps_sel, &ps_cb, ps_ssel, m_psrr); m_tfx.UpdatePS(ps_sel, &ps_cb, ps_ssel);
bool z = om_dssel.zwe; bool z = om_dssel.zwe;
bool r = om_bsel.wr; bool r = om_bsel.wr;

View File

@ -266,7 +266,6 @@ GSTextureCache::GSCachedTexture* GSTextureCache::GetTexture()
{ {
if(TEX0.PSM == t->m_TEX0.PSM && TEX0.TBW == t->m_TEX0.TBW if(TEX0.PSM == t->m_TEX0.PSM && TEX0.TBW == t->m_TEX0.TBW
&& TEX0.TW == t->m_TEX0.TW && TEX0.TH == t->m_TEX0.TH && TEX0.TW == t->m_TEX0.TW && TEX0.TH == t->m_TEX0.TH
&& (m_renderer->m_psrr || (CLAMP.WMS != 3 && t->m_CLAMP.WMS != 3 && CLAMP.WMT != 3 && t->m_CLAMP.WMT != 3 || CLAMP.u64 == t->m_CLAMP.u64))
&& (pal == 0 || TEX0.CPSM == t->m_TEX0.CPSM && GSVector4i::compare(t->m_clut, clut, pal * sizeof(clut[0])))) && (pal == 0 || TEX0.CPSM == t->m_TEX0.CPSM && GSVector4i::compare(t->m_clut, clut, pal * sizeof(clut[0]))))
{ {
m_tex.splice(m_tex.begin(), m_tex, i); m_tex.splice(m_tex.begin(), m_tex, i);
@ -705,14 +704,7 @@ void GSTextureCache::GSCachedTexture::Update()
{ {
// in dx9 managed textures can be written directly, less copying is faster, but still not as fast as dx10's UpdateResource // in dx9 managed textures can be written directly, less copying is faster, but still not as fast as dx10's UpdateResource
if(m_renderer->m_psrr) m_renderer->m_mem.ReadTextureNPNC(r, bits, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP);
{
m_renderer->m_mem.ReadTextureNPNC(r, bits, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP);
}
else
{
m_renderer->m_mem.ReadTextureNP(r, bits, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP);
}
m_texture->Unmap(); m_texture->Unmap();
} }
@ -722,14 +714,7 @@ void GSTextureCache::GSCachedTexture::Update()
pitch = ((r.width() + 3) & ~3) * 4; pitch = ((r.width() + 3) & ~3) * 4;
if(m_renderer->m_psrr) m_renderer->m_mem.ReadTextureNPNC(r, buff, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP);
{
m_renderer->m_mem.ReadTextureNPNC(r, buff, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP);
}
else
{
m_renderer->m_mem.ReadTextureNP(r, buff, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP);
}
m_texture->Update(r, buff, pitch); m_texture->Update(r, buff, pitch);
} }

View File

@ -268,11 +268,6 @@ void GSTextureFX10::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
if(sel.tfx != 4) if(sel.tfx != 4)
{ {
if(sel.bpp >= 3 || sel.wms >= 3 || sel.wmt >= 3)
{
ssel.min = ssel.mag = 0;
}
hash_map<uint32, CComPtr<ID3D10SamplerState> >::const_iterator i = m_ps_ss.find(ssel); hash_map<uint32, CComPtr<ID3D10SamplerState> >::const_iterator i = m_ps_ss.find(ssel);
if(i != m_ps_ss.end()) if(i != m_ps_ss.end())

View File

@ -79,12 +79,12 @@ public:
{ {
GSVector4 FogColor; GSVector4 FogColor;
float MINU; float MINU;
float MAXU;
float MINV; float MINV;
float MAXU;
float MAXV; float MAXV;
uint32 UMSK; uint32 UMSK;
uint32 UFIX;
uint32 VMSK; uint32 VMSK;
uint32 UFIX;
uint32 VFIX; uint32 VFIX;
float TA0; float TA0;
float TA1; float TA1;
@ -92,6 +92,7 @@ public:
float _pad[1]; float _pad[1];
GSVector2 WH; GSVector2 WH;
GSVector2 rWrH; GSVector2 rWrH;
GSVector4 HalfTexel;
struct PSConstantBuffer() {memset(this, 0, sizeof(*this));} struct PSConstantBuffer() {memset(this, 0, sizeof(*this));}
@ -105,14 +106,16 @@ public:
GSVector4i b2 = b[2]; GSVector4i b2 = b[2];
GSVector4i b3 = b[3]; GSVector4i b3 = b[3];
GSVector4i b4 = b[4]; GSVector4i b4 = b[4];
GSVector4i b5 = b[5];
if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4)).alltrue()) if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue())
{ {
a[0] = b0; a[0] = b0;
a[1] = b1; a[1] = b1;
a[2] = b2; a[2] = b2;
a[3] = b3; a[3] = b3;
a[4] = b4; a[4] = b4;
a[5] = b5;
return true; return true;
} }

View File

@ -144,11 +144,11 @@ bool GSTextureFX9::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
return true; return true;
} }
bool GSTextureFX9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal, bool psrr) bool GSTextureFX9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal)
{ {
m_dev->PSSetShaderResources(tex, pal); m_dev->PSSetShaderResources(tex, pal);
if(tex && psrr && (sel.wms == 3 || sel.wmt == 3)) if(tex && (sel.wms == 3 || sel.wmt == 3))
{ {
if(sel.wms == 3) if(sel.wms == 3)
{ {
@ -167,21 +167,15 @@ bool GSTextureFX9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSampler
} }
} }
UpdatePS(sel, cb, ssel, psrr); UpdatePS(sel, cb, ssel);
return true; return true;
} }
void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, bool psrr) void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel)
{ {
HRESULT hr; HRESULT hr;
if(!psrr)
{
if(sel.wms == 3) sel.wms = 0;
if(sel.wmt == 3) sel.wmt = 0;
}
hash_map<uint32, CComPtr<IDirect3DPixelShader9> >::const_iterator i = m_ps.find(sel); hash_map<uint32, CComPtr<IDirect3DPixelShader9> >::const_iterator i = m_ps.find(sel);
if(i == m_ps.end()) if(i == m_ps.end())
@ -233,10 +227,10 @@ void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSample
if(sel.tfx != 4) if(sel.tfx != 4)
{ {
if(sel.bpp >= 3 || sel.wms >= 3 || sel.wmt >= 3) bool b = sel.bpp < 3 && sel.wms < 3 && sel.wmt < 3;
{
ssel.min = ssel.mag = 0; ssel.min = b;
} ssel.mag = b;
hash_map<uint32, Direct3DSamplerState9* >::const_iterator i = m_ps_ss.find(ssel); hash_map<uint32, Direct3DSamplerState9* >::const_iterator i = m_ps_ss.find(ssel);

View File

@ -55,12 +55,12 @@ public:
{ {
GSVector4 FogColor; GSVector4 FogColor;
float MINU; float MINU;
float MAXU;
float MINV; float MINV;
float MAXU;
float MAXV; float MAXV;
uint32 UMSK; uint32 UMSK;
uint32 UFIX;
uint32 VMSK; uint32 VMSK;
uint32 UFIX;
uint32 VFIX; uint32 VFIX;
float TA0; float TA0;
float TA1; float TA1;
@ -68,6 +68,7 @@ public:
float _pad[1]; float _pad[1];
GSVector2 WH; GSVector2 WH;
GSVector2 rWrH; GSVector2 rWrH;
GSVector4 HalfTexel;
}; };
union PSSelector union PSSelector
@ -166,8 +167,8 @@ public:
bool SetupIA(const GSVertexHW9* vertices, int count, D3DPRIMITIVETYPE prim); bool SetupIA(const GSVertexHW9* vertices, int count, D3DPRIMITIVETYPE prim);
bool SetupVS(VSSelector sel, const VSConstantBuffer* cb); bool SetupVS(VSSelector sel, const VSConstantBuffer* cb);
bool SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal, bool psrr); bool SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal);
void UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, bool psrr); void UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
void SetupRS(int w, int h, const GSVector4i& scissor); void SetupRS(int w, int h, const GSVector4i& scissor);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 bf, GSTexture* rt, GSTexture* ds); void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 bf, GSTexture* rt, GSTexture* ds);
void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 bf); void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 bf);

View File

@ -155,28 +155,25 @@ void gs_main(line VS_OUTPUT input[2], inout TriangleStream<VS_OUTPUT> stream)
#endif #endif
Texture2D Texture; Texture2D<float4> Texture;
Texture2D Palette; Texture2D<float> Palette;
SamplerState TextureSampler; SamplerState TextureSampler;
SamplerState PaletteSampler; SamplerState PaletteSampler;
cbuffer cb1 cbuffer cb1
{ {
float4 FogColor; float4 FogColor;
float MINU; float2 MINUV;
float MAXU; float2 MAXUV;
float MINV; uint2 UVMSK;
float MAXV; uint2 UVFIX;
uint UMSK;
uint UFIX;
uint VMSK;
uint VFIX;
float TA0; float TA0;
float TA1; float TA1;
float AREF; float AREF;
float _pad; float _pad;
float2 WH; float2 WH;
float2 rWrH; float2 rWrH;
float4 HalfTexel;
}; };
struct PS_INPUT struct PS_INPUT
@ -194,8 +191,8 @@ struct PS_OUTPUT
#ifndef FST #ifndef FST
#define FST 0 #define FST 0
#define WMS 3 #define WMS 0
#define WMT 3 #define WMT 0
#define BPP 0 #define BPP 0
#define AEM 0 #define AEM 0
#define TFX 0 #define TFX 0
@ -225,191 +222,185 @@ float4 Extract16(uint i)
return f; return f;
} }
int repeatu(int tc) int2 wrapu(float2 f, int2 i)
{ {
return WMS == 3 ? ((tc & UMSK) | UFIX) : tc; if(WMS == 0)
}
int repeatv(int tc)
{
return WMT == 3 ? ((tc & VMSK) | VFIX) : tc;
}
float4 sample(float2 tc)
{
float4 t;
// if(WMS >= 2 || WMT >= 2)
if(WMS >= 3 || WMT >= 3)
{ {
int4 itc = tc.xyxy * WH.xyxy; i = frac(f) * WH.xx;
}
else if(WMS == 1)
{
i = saturate(f) * WH.xx;
}
else if(WMS == 2)
{
i = clamp(f, MINUV.xx, MAXUV.xx) * WH.xx;
}
else if(WMS == 3)
{
i = (i & UVMSK.xx) | UVFIX.xx;
}
float4 tc01; return i;
}
tc01.x = repeatu(itc.x); int2 wrapv(float2 f, int2 i)
tc01.y = repeatv(itc.y); {
tc01.z = repeatu(itc.z + 1); if(WMT == 0)
tc01.w = repeatv(itc.w + 1); {
i = frac(f) * WH.yy;
}
else if(WMT == 1)
{
i = saturate(f) * WH.yy;
}
else if(WMT == 2)
{
i = clamp(f, MINUV.yy, MAXUV.yy) * WH.yy;
}
else if(WMT == 3)
{
i = (i & UVMSK.yy) | UVFIX.yy;
}
tc01 *= rWrH.xyxy; return i;
}
float4 t00 = Texture.Sample(TextureSampler, tc01.xy); int4 wrapuv(float4 f, int4 i)
float4 t01 = Texture.Sample(TextureSampler, tc01.zy); {
float4 t10 = Texture.Sample(TextureSampler, tc01.xw); if(WMT == 0)
float4 t11 = Texture.Sample(TextureSampler, tc01.zw); {
i = frac(f) * WH.xyxy;
}
else if(WMT == 1)
{
i = saturate(f) * WH.xyxy;
}
else if(WMT == 2)
{
i = clamp(f, MINUV.xyxy, MAXUV.xyxy) * WH.xyxy;
}
else if(WMT == 3)
{
i = (i & UVMSK.xyxy) | UVFIX.xyxy;
}
float2 dd = frac(tc * WH); return i;
}
t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y); int4 wrap(float4 uv, int4 iuv)
{
if(WMS == WMT)
{
iuv = wrapuv(uv, iuv);
} }
else else
{
iuv.xz = wrapu(uv.xz, iuv.xz);
iuv.yw = wrapv(uv.yw, iuv.yw);
}
return iuv;
}
float4 sample(float2 tc, float w)
{
if(FST == 0)
{
tc /= w;
}
float4 t;
/*
if(BPP < 3 && WMS < 2 && WMT < 2)
{ {
t = Texture.Sample(TextureSampler, tc); t = Texture.Sample(TextureSampler, tc);
} }
*/
if(BPP < 3 && WMS < 3 && WMT < 3)
{
if(WMS == 2 && WMT == 2) tc = clamp(tc, MINUV.xy, MAXUV.xy);
else if(WMS == 2) tc.x = clamp(tc.x, MINUV.x, MAXUV.x);
else if(WMT == 2) tc.y = clamp(tc.y, MINUV.y, MAXUV.y);
t = Texture.Sample(TextureSampler, tc);
}
else
{
float4 uv = tc.xyxy + HalfTexel;
float4 uv2 = uv * WH.xyxy;
float2 dd = frac(uv2.xy);
int4 iuv = wrap(uv, uv2);
float4 t00, t01, t10, t11;
if(BPP == 3) // 8HP + 32-bit palette
{
float4 a;
a.x = Texture.Load(int3(iuv.xy, 0)).a;
a.y = Texture.Load(int3(iuv.zy, 0)).a;
a.z = Texture.Load(int3(iuv.xw, 0)).a;
a.w = Texture.Load(int3(iuv.zw, 0)).a;
t00 = Palette.Load(a.x);
t01 = Palette.Load(a.y);
t10 = Palette.Load(a.z);
t11 = Palette.Load(a.w);
}
else if(BPP == 4) // 8HP + 16-bit palette
{
// TODO: yuck, just pre-convert the palette to 32-bit
}
else if(BPP == 5) // 16P
{
float4 r;
r.x = Texture.Load(int3(iuv.xy, 0)).r;
r.y = Texture.Load(int3(iuv.zy, 0)).r;
r.z = Texture.Load(int3(iuv.xw, 0)).r;
r.w = Texture.Load(int3(iuv.zw, 0)).r;
uint4 i = r * 65535;
t00 = Extract16(i.x);
t01 = Extract16(i.y);
t10 = Extract16(i.z);
t11 = Extract16(i.w);
}
else
{
t00 = Texture.Load(int3(iuv.xy, 0));
t01 = Texture.Load(int3(iuv.zy, 0));
t10 = Texture.Load(int3(iuv.xw, 0));
t11 = Texture.Load(int3(iuv.zw, 0));
}
t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y);
}
if(BPP == 1) // 24
{
t.a = AEM == 0 || any(t.rgb) ? TA0 : 0;
}
else if(BPP == 2 || BPP == 5) // 16 || 16P
{
if(BPP == 5)
{
t = Normalize16(t);
}
// a bit incompatible with up-scaling because the 1 bit alpha is interpolated
t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0;
}
return t; return t;
} }
float4 sample8hp(float2 tc) float4 tfx(float4 t, float4 c)
{ {
float4 tc01;
// if(WMS >= 2 || WMT >= 2)
if(WMS >= 3 || WMT >= 3)
{
int4 itc = tc.xyxy * WH.xyxy;
tc01.x = repeatu(itc.x);
tc01.y = repeatv(itc.y);
tc01.z = repeatu(itc.z + 1);
tc01.w = repeatv(itc.w + 1);
tc01 *= rWrH.xyxy;
}
else
{
tc01.x = tc.x;
tc01.y = tc.y;
tc01.z = tc.x + rWrH.x;
tc01.w = tc.y + rWrH.y;
}
float4 t;
t.x = Texture.Sample(TextureSampler, tc01.xy).a;
t.y = Texture.Sample(TextureSampler, tc01.zy).a;
t.z = Texture.Sample(TextureSampler, tc01.xw).a;
t.w = Texture.Sample(TextureSampler, tc01.zw).a;
float4 t00 = Palette.Sample(PaletteSampler, t.x);
float4 t01 = Palette.Sample(PaletteSampler, t.y);
float4 t10 = Palette.Sample(PaletteSampler, t.z);
float4 t11 = Palette.Sample(PaletteSampler, t.w);
float2 dd = frac(tc * WH);
return lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y);
}
float4 sample16p(float2 tc)
{
float4 t;
float4 tc01;
// if(WMS >= 2 || WMT >= 2)
if(WMS >= 3 || WMT >= 3)
{
int4 itc = tc.xyxy * WH.xyxy;
tc01.x = repeatu(itc.x);
tc01.y = repeatv(itc.y);
tc01.z = repeatu(itc.z + 1);
tc01.w = repeatv(itc.w + 1);
tc01 *= rWrH.xyxy;
}
else
{
tc01.x = tc.x;
tc01.y = tc.y;
tc01.z = tc.x + rWrH.x;
tc01.w = tc.y + rWrH.y;
}
t.x = Texture.Sample(TextureSampler, tc01.xy).r;
t.y = Texture.Sample(TextureSampler, tc01.zy).r;
t.z = Texture.Sample(TextureSampler, tc01.xw).r;
t.w = Texture.Sample(TextureSampler, tc01.zw).r;
uint4 i = t * 65535;
float4 t00 = Extract16(i.x);
float4 t01 = Extract16(i.y);
float4 t10 = Extract16(i.z);
float4 t11 = Extract16(i.w);
float2 dd = frac(tc * WH);
return Normalize16(lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y));
}
PS_OUTPUT ps_main(PS_INPUT input)
{
float2 tc = input.t.xy;
if(FST == 0)
{
tc /= input.t.w;
}
tc -= rWrH / 2;
if(WMS == 2)
{
tc.x = clamp(tc.x, MINU, MAXU);
}
if(WMT == 2)
{
tc.y = clamp(tc.y, MINV, MAXV);
}
float4 t;
if(BPP == 0) // 32
{
t = sample(tc);
}
else if(BPP == 1) // 24
{
t = sample(tc);
t.a = AEM == 0 || any(t.rgb) ? TA0 : 0;
}
else if(BPP == 2) // 16
{
t = sample(tc);
t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; // a bit incompatible with up-scaling because the 1 bit alpha is interpolated
}
else if(BPP == 3) // 8HP / 32-bit palette
{
t = sample8hp(tc);
}
else if(BPP == 4) // 8HP / 16-bit palette
{
// TODO: yuck, just pre-convert the palette to 32-bit
}
else if(BPP == 5) // 16P
{
t = sample16p(tc);
t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; // a bit incompatible with up-scaling because the 1 bit alpha is interpolated
}
float4 c = input.c;
if(TFX == 0) if(TFX == 0)
{ {
if(TCC == 0) if(TCC == 0)
@ -451,8 +442,11 @@ PS_OUTPUT ps_main(PS_INPUT input)
} }
} }
c = saturate(c); return saturate(c);
}
void atst(float4 c)
{
if(ATE == 1) if(ATE == 1)
{ {
if(ATST == 0) if(ATST == 0)
@ -476,12 +470,28 @@ PS_OUTPUT ps_main(PS_INPUT input)
clip(abs(c.a - AREF) - 0.4f / 255); // FIXME: 0.5f is too much clip(abs(c.a - AREF) - 0.4f / 255); // FIXME: 0.5f is too much
} }
} }
}
float4 fog(float4 c, float f)
{
if(FOG == 1) if(FOG == 1)
{ {
c.rgb = lerp(FogColor.rgb, c.rgb, input.t.z); c.rgb = lerp(FogColor.rgb, c.rgb, f);
} }
return c;
}
PS_OUTPUT ps_main(PS_INPUT input)
{
float4 t = sample(input.t.xy, input.t.w);
float4 c = tfx(t, input.c);
atst(c);
c = fog(c, input.t.z);
if(CLR1 == 1) // needed for Cd * (As/Ad/F + 1) blending modes if(CLR1 == 1) // needed for Cd * (As/Ad/F + 1) blending modes
{ {
c.rgb = 1; c.rgb = 1;

View File

@ -71,22 +71,19 @@ VS_OUTPUT vs_main(VS_INPUT input)
return output; return output;
} }
float4 ps_params[5]; float4 ps_params[6];
#define FogColor ps_params[0].bgra #define FogColor ps_params[0].bgra
#define MINU ps_params[1].x #define MINUV ps_params[1].xy
#define MAXU ps_params[1].y #define MAXUV ps_params[1].zw
#define MINV ps_params[1].z #define UVMSK ps_params[2].xy
#define MAXV ps_params[1].w #define UVFIX ps_params[2].zw
#define UMSK ps_params[2].x
#define UFIX ps_params[2].y
#define VMSK ps_params[2].z
#define VFIX ps_params[2].w
#define TA0 ps_params[3].x #define TA0 ps_params[3].x
#define TA1 ps_params[3].y #define TA1 ps_params[3].y
#define AREF ps_params[3].z #define AREF ps_params[3].z
#define WH ps_params[4].xy #define WH ps_params[4].xy
#define rWrH ps_params[4].zw #define rWrH ps_params[4].zw
#define HalfTexel ps_params[5]
struct PS_INPUT struct PS_INPUT
{ {
@ -114,129 +111,170 @@ sampler1D Palette : register(s1);
sampler1D UMSKFIX : register(s2); sampler1D UMSKFIX : register(s2);
sampler1D VMSKFIX : register(s3); sampler1D VMSKFIX : register(s3);
float repeatu(float tc) float2 wrapu(float2 f)
{ {
return WMS == 3 ? tex1D(UMSKFIX, tc*rWrH.x)* WH.x : tc; if(WMS == 0)
}
float repeatv(float tc)
{
return WMT == 3 ? tex1D(VMSKFIX, tc*rWrH.y)* WH.y : tc;
}
float4 sample(float2 tc)
{
float4 t;
// if(WMS >= 2 || WMT >= 2)
if(WMS >= 3 || WMT >= 3)
{ {
tc -= rWrH / 2; f = frac(f);
}
else if(WMS == 1)
{
f = saturate(f);
}
else if(WMS == 2)
{
f = clamp(f, MINUV.xx, MAXUV.xx);
}
else if(WMS == 3)
{
f.x = tex1D(UMSKFIX, f.x);
f.y = tex1D(UMSKFIX, f.y);
}
int4 itc = tc.xyxy * WH.xyxy; return f;
}
float4 tc01; float2 wrapv(float2 f)
{
if(WMS == 0)
{
f = frac(f);
}
else if(WMS == 1)
{
f = saturate(f);
}
else if(WMS == 2)
{
f = clamp(f, MINUV.yy, MAXUV.yy);
}
else if(WMS == 3)
{
f.x = tex1D(VMSKFIX, f.x);
f.y = tex1D(VMSKFIX, f.y);
}
tc01.x = repeatu(itc.x); return f;
tc01.y = repeatv(itc.y); }
tc01.z = repeatu(itc.z + 1);
tc01.w = repeatv(itc.w + 1);
tc01 *= rWrH.xyxy; float4 wrapuv(float4 f)
{
if(WMS == 0)
{
f = frac(f);
}
else if(WMS == 1)
{
f = saturate(f);
}
else if(WMS == 2)
{
f = clamp(f, MINUV.xyxy, MAXUV.xyxy);
}
else if(WMS == 3)
{
f.x = tex1D(UMSKFIX, f.x);
f.y = tex1D(VMSKFIX, f.y);
f.z = tex1D(UMSKFIX, f.z);
f.w = tex1D(VMSKFIX, f.w);
}
float4 t00 = tex2D(Texture, tc01.xy); return f;
float4 t01 = tex2D(Texture, tc01.zy); }
float4 t10 = tex2D(Texture, tc01.xw);
float4 t11 = tex2D(Texture, tc01.zw);
float2 dd = frac(tc * WH); float4 wrap(float4 uv)
{
t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y); if(WMS == WMT)
{
uv = wrapuv(uv);
} }
else else
{
uv.xz = wrapu(uv.xz);
uv.yw = wrapv(uv.yw);
}
return uv;
}
float4 sample(float2 tc, float w)
{
if(FST == 0)
{
tc /= w;
}
float4 t;
/*
if(BPP < 3 && WMS < 2 && WMT < 2)
{ {
t = tex2D(Texture, tc); t = tex2D(Texture, tc);
} }
*/
if(BPP < 3 && WMS < 3 && WMT < 3)
{
if(WMS == 2 && WMT == 2) tc = clamp(tc, MINUV.xy, MAXUV.xy);
else if(WMS == 2) tc.x = clamp(tc.x, MINUV.x, MAXUV.x);
else if(WMT == 2) tc.y = clamp(tc.y, MINUV.y, MAXUV.y);
t = tex2D(Texture, tc);
}
else
{
float4 uv = tc.xyxy + HalfTexel;
float2 dd = frac(uv.xy * WH);
uv = wrap(uv);
float4 t00, t01, t10, t11;
if(BPP == 3) // 8HP ln
{
float4 a;
a.x = tex2D(Texture, uv.xy).a;
a.y = tex2D(Texture, uv.zy).a;
a.z = tex2D(Texture, uv.xw).a;
a.w = tex2D(Texture, uv.zw).a;
if(RT == 1) a *= 0.5;
t00 = tex1D(Palette, a.x);
t01 = tex1D(Palette, a.y);
t10 = tex1D(Palette, a.z);
t11 = tex1D(Palette, a.w);
}
else
{
t00 = tex2D(Texture, uv.xy);
t01 = tex2D(Texture, uv.zy);
t10 = tex2D(Texture, uv.xw);
t11 = tex2D(Texture, uv.zw);
}
t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y);
}
if(BPP == 0) // 32
{
if(RT == 1) t.a *= 0.5;
}
else if(BPP == 1) // 24
{
t.a = AEM == 0 || any(t.rgb) ? TA0 : 0;
}
else if(BPP == 2) // 16
{
// a bit incompatible with up-scaling because the 1 bit alpha is interpolated
t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0;
}
return t; return t;
} }
float4 sample8hp(float2 tc) float4 tfx(float4 t, float4 c)
{ {
tc -= rWrH / 2;
float4 tc01;
tc01.x = tc.x;
tc01.y = tc.y;
tc01.z = tc.x + rWrH.x;
tc01.w = tc.y + rWrH.y;
float4 t;
t.x = tex2D(Texture, tc01.xy).a;
t.y = tex2D(Texture, tc01.zy).a;
t.z = tex2D(Texture, tc01.xw).a;
t.w = tex2D(Texture, tc01.zw).a;
if(RT == 1) t *= 0.5;
float4 t00 = tex1D(Palette, t.x);
float4 t01 = tex1D(Palette, t.y);
float4 t10 = tex1D(Palette, t.z);
float4 t11 = tex1D(Palette, t.w);
float2 dd = frac(tc * WH);
return lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y);
}
float4 ps_main(PS_INPUT input) : COLOR
{
float2 tc = input.t.xy;
if(FST == 0)
{
tc /= input.t.w;
}
if(WMS == 2)
{
tc.x = clamp(tc.x, MINU, MAXU);
}
if(WMT == 2)
{
tc.y = clamp(tc.y, MINV, MAXV);
}
float4 t;
if(BPP == 0) // 32
{
t = sample(tc);
if(RT == 1) t.a *= 0.5;
}
else if(BPP == 1) // 24
{
t = sample(tc);
t.a = AEM == 0 || any(t.rgb) ? TA0 : 0;
}
else if(BPP == 2) // 16
{
t = sample(tc);
t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; // a bit incompatible with up-scaling because the 1 bit alpha is interpolated
}
else if(BPP == 3) // 8HP ln
{
t = sample8hp(tc);
}
float4 c = input.c;
if(TFX == 0) if(TFX == 0)
{ {
if(TCC == 0) if(TCC == 0)
@ -278,8 +316,11 @@ float4 ps_main(PS_INPUT input) : COLOR
} }
} }
c = saturate(c); return saturate(c);
}
void atst(float4 c)
{
if(ATE == 1) if(ATE == 1)
{ {
if(ATST == 0) if(ATST == 0)
@ -303,12 +344,28 @@ float4 ps_main(PS_INPUT input) : COLOR
clip(abs(c.a - AREF) - 0.4f / 255); // FIXME: 0.5f is too much clip(abs(c.a - AREF) - 0.4f / 255); // FIXME: 0.5f is too much
} }
} }
}
float4 fog(float4 c, float f)
{
if(FOG == 1) if(FOG == 1)
{ {
c.rgb = lerp(FogColor.rgb, c.rgb, input.t.z); c.rgb = lerp(FogColor.rgb, c.rgb, f);
} }
return c;
}
float4 ps_main(PS_INPUT input) : COLOR
{
float4 t = sample(input.t.xy, input.t.w);
float4 c = tfx(t, input.c);
atst(c);
c = fog(c, input.t.z);
if(CLR1 == 1) // needed for Cd * (As/Ad/F + 1) blending modes if(CLR1 == 1) // needed for Cd * (As/Ad/F + 1) blending modes
{ {
c.rgb = 1; c.rgb = 1;