GSdx: pixel shaders were reorganized, things might be broken :P

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1341 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-06-06 17:53:34 +00:00
parent 95ff8e3a98
commit 4ed3035661
12 changed files with 414 additions and 369 deletions

View File

@ -22,11 +22,10 @@
#include "StdAfx.h"
#include "GSRenderer.h"
GSRenderer::GSRenderer(uint8* base, bool mt, void (*irq)(), GSDevice* dev, bool psrr)
GSRenderer::GSRenderer(uint8* base, bool mt, void (*irq)(), GSDevice* dev)
: GSState(base, mt, irq)
, m_dev(dev)
, m_shader(0)
, m_psrr(psrr)
{
m_interlace = theApp.GetConfig("interlace", 0);
m_aspectratio = theApp.GetConfig("aspectratio", 1);

View File

@ -51,7 +51,6 @@ protected:
public:
GSWnd m_wnd;
GSDevice* m_dev;
bool m_psrr;
int s_n;
bool s_dump;
@ -59,7 +58,7 @@ public:
bool s_savez;
public:
GSRenderer(uint8* base, bool mt, void (*irq)(), GSDevice* dev, bool psrr = true);
GSRenderer(uint8* base, bool mt, void (*irq)(), GSDevice* dev);
virtual ~GSRenderer();
virtual bool Create(const string& title);
@ -216,8 +215,8 @@ protected:
virtual void Draw() = 0;
public:
GSRendererT(uint8* base, bool mt, void (*irq)(), GSDevice* dev, bool psrr = true)
: GSRenderer(base, mt, irq, dev, psrr)
GSRendererT(uint8* base, bool mt, void (*irq)(), GSDevice* dev)
: GSRenderer(base, mt, irq, dev)
, m_count(0)
, m_maxcount(0)
, m_vertices(NULL)

View File

@ -179,8 +179,8 @@ protected:
vr.z = v.z;
break;
case CLAMP_REGION_REPEAT:
if(m_psrr) {vr.x = maxu; vr.z = vr.x + (minu + 1);}
//else {vr.x = 0; vr.z = w;}
vr.x = maxu;
vr.z = vr.x + (minu + 1);
break;
default:
__assume(0);
@ -202,8 +202,8 @@ protected:
vr.w = v.w;
break;
case CLAMP_REGION_REPEAT:
if(m_psrr) {vr.y = maxv; vr.w = vr.y + (minv + 1);}
//else {r.y = 0; r.w = w;}
vr.y = maxv;
vr.w = vr.y + (minv + 1);
break;
default:
__assume(0);
@ -691,8 +691,8 @@ protected:
}
public:
GSRendererHW(uint8* base, bool mt, void (*irq)(), GSDevice* dev, GSTextureCache* tc, bool psrr)
: GSRendererT<Vertex>(base, mt, irq, dev, psrr)
GSRendererHW(uint8* base, bool mt, void (*irq)(), GSDevice* dev, GSTextureCache* tc)
: GSRendererT<Vertex>(base, mt, irq, dev)
, m_tc(tc)
, m_width(1024)
, m_height(1024)

View File

@ -25,7 +25,7 @@
#include "resource.h"
GSRendererHW10::GSRendererHW10(uint8* base, bool mt, void (*irq)())
: GSRendererHW<GSVertexHW10>(base, mt, irq, new GSDevice10(), new GSTextureCache10(this), true)
: GSRendererHW<GSVertexHW10>(base, mt, irq, new GSDevice10(), new GSTextureCache10(this))
{
InitVertexKick<GSRendererHW10>();
}
@ -361,7 +361,7 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
ps_ssel.tau = 0;
break;
case 2:
ps_cb.MINU = ((float)(int)context->CLAMP.MINU + 0.5f) / (1 << context->TEX0.TW);
ps_cb.MINU = ((float)(int)context->CLAMP.MINU) / (1 << context->TEX0.TW);
ps_cb.MAXU = ((float)(int)context->CLAMP.MAXU) / (1 << context->TEX0.TW);
ps_ssel.tau = 0;
break;
@ -383,7 +383,7 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
ps_ssel.tav = 0;
break;
case 2:
ps_cb.MINV = ((float)(int)context->CLAMP.MINV + 0.5f) / (1 << context->TEX0.TH);
ps_cb.MINV = ((float)(int)context->CLAMP.MINV) / (1 << context->TEX0.TH);
ps_cb.MAXV = ((float)(int)context->CLAMP.MAXV) / (1 << context->TEX0.TH);
ps_ssel.tav = 0;
break;
@ -401,6 +401,7 @@ void GSRendererHW10::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache
ps_cb.WH = GSVector2(w, h);
ps_cb.rWrH = GSVector2(1.0f / w, 1.0f / h);
ps_cb.HalfTexel = GSVector4(-0.5f / w, -0.5f / h, +0.5f / w, +0.5f / h);
}
else
{

View File

@ -25,7 +25,7 @@
#include "resource.h"
GSRendererHW9::GSRendererHW9(uint8* base, bool mt, void (*irq)())
: GSRendererHW<GSVertexHW9>(base, mt, irq, new GSDevice9(), new GSTextureCache9(this), true)
: GSRendererHW<GSVertexHW9>(base, mt, irq, new GSDevice9(), new GSTextureCache9(this))
{
m_fba.enabled = !!theApp.GetConfig("fba", 1);
m_logz = !!theApp.GetConfig("logz", 0);
@ -335,7 +335,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
ps_ssel.tau = 0;
break;
case 2:
ps_cb.MINU = ((float)(int)context->CLAMP.MINU + 0.5f) / (1 << context->TEX0.TW);
ps_cb.MINU = ((float)(int)context->CLAMP.MINU) / (1 << context->TEX0.TW);
ps_cb.MAXU = ((float)(int)context->CLAMP.MAXU) / (1 << context->TEX0.TW);
ps_ssel.tau = 0;
break;
@ -357,7 +357,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
ps_ssel.tav = 0;
break;
case 2:
ps_cb.MINV = ((float)(int)context->CLAMP.MINV + 0.5f) / (1 << context->TEX0.TH);
ps_cb.MINV = ((float)(int)context->CLAMP.MINV) / (1 << context->TEX0.TH);
ps_cb.MAXV = ((float)(int)context->CLAMP.MAXV) / (1 << context->TEX0.TH);
ps_ssel.tav = 0;
break;
@ -375,6 +375,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
ps_cb.WH = GSVector2(w, h);
ps_cb.rWrH = GSVector2(1.0f / w, 1.0f / h);
ps_cb.HalfTexel = GSVector4(-0.5f / w, -0.5f / h, +0.5f / w, +0.5f / h);
}
else
{
@ -393,7 +394,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
m_tfx.SetupOM(om_dssel, om_bsel, bf, rt, ds);
m_tfx.SetupIA(m_vertices, m_count, topology);
m_tfx.SetupVS(vs_sel, &vs_cb);
m_tfx.SetupPS(ps_sel, &ps_cb, ps_ssel, tex ? tex->m_texture : NULL, tex ? tex->m_palette : NULL, m_psrr);
m_tfx.SetupPS(ps_sel, &ps_cb, ps_ssel, tex ? tex->m_texture : NULL, tex ? tex->m_palette : NULL);
m_tfx.SetupRS(w, h, scissor);
// draw
@ -411,7 +412,7 @@ void GSRendererHW9::Draw(int prim, GSTexture* rt, GSTexture* ds, GSTextureCache:
ps_sel.atst = iatst[ps_sel.atst];
m_tfx.UpdatePS(ps_sel, &ps_cb, ps_ssel, m_psrr);
m_tfx.UpdatePS(ps_sel, &ps_cb, ps_ssel);
bool z = om_dssel.zwe;
bool r = om_bsel.wr;

View File

@ -266,7 +266,6 @@ GSTextureCache::GSCachedTexture* GSTextureCache::GetTexture()
{
if(TEX0.PSM == t->m_TEX0.PSM && TEX0.TBW == t->m_TEX0.TBW
&& TEX0.TW == t->m_TEX0.TW && TEX0.TH == t->m_TEX0.TH
&& (m_renderer->m_psrr || (CLAMP.WMS != 3 && t->m_CLAMP.WMS != 3 && CLAMP.WMT != 3 && t->m_CLAMP.WMT != 3 || CLAMP.u64 == t->m_CLAMP.u64))
&& (pal == 0 || TEX0.CPSM == t->m_TEX0.CPSM && GSVector4i::compare(t->m_clut, clut, pal * sizeof(clut[0]))))
{
m_tex.splice(m_tex.begin(), m_tex, i);
@ -705,14 +704,7 @@ void GSTextureCache::GSCachedTexture::Update()
{
// in dx9 managed textures can be written directly, less copying is faster, but still not as fast as dx10's UpdateResource
if(m_renderer->m_psrr)
{
m_renderer->m_mem.ReadTextureNPNC(r, bits, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP);
}
else
{
m_renderer->m_mem.ReadTextureNP(r, bits, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP);
}
m_renderer->m_mem.ReadTextureNPNC(r, bits, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP);
m_texture->Unmap();
}
@ -722,14 +714,7 @@ void GSTextureCache::GSCachedTexture::Update()
pitch = ((r.width() + 3) & ~3) * 4;
if(m_renderer->m_psrr)
{
m_renderer->m_mem.ReadTextureNPNC(r, buff, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP);
}
else
{
m_renderer->m_mem.ReadTextureNP(r, buff, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP);
}
m_renderer->m_mem.ReadTextureNPNC(r, buff, pitch, m_renderer->m_context->TEX0, m_renderer->m_env.TEXA, m_renderer->m_context->CLAMP);
m_texture->Update(r, buff, pitch);
}

View File

@ -268,11 +268,6 @@ void GSTextureFX10::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSampl
if(sel.tfx != 4)
{
if(sel.bpp >= 3 || sel.wms >= 3 || sel.wmt >= 3)
{
ssel.min = ssel.mag = 0;
}
hash_map<uint32, CComPtr<ID3D10SamplerState> >::const_iterator i = m_ps_ss.find(ssel);
if(i != m_ps_ss.end())

View File

@ -79,12 +79,12 @@ public:
{
GSVector4 FogColor;
float MINU;
float MAXU;
float MINV;
float MAXU;
float MAXV;
uint32 UMSK;
uint32 UFIX;
uint32 VMSK;
uint32 UFIX;
uint32 VFIX;
float TA0;
float TA1;
@ -92,6 +92,7 @@ public:
float _pad[1];
GSVector2 WH;
GSVector2 rWrH;
GSVector4 HalfTexel;
struct PSConstantBuffer() {memset(this, 0, sizeof(*this));}
@ -105,14 +106,16 @@ public:
GSVector4i b2 = b[2];
GSVector4i b3 = b[3];
GSVector4i b4 = b[4];
GSVector4i b5 = b[5];
if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4)).alltrue())
if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue())
{
a[0] = b0;
a[1] = b1;
a[2] = b2;
a[3] = b3;
a[4] = b4;
a[5] = b5;
return true;
}

View File

@ -144,11 +144,11 @@ bool GSTextureFX9::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
return true;
}
bool GSTextureFX9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal, bool psrr)
bool GSTextureFX9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal)
{
m_dev->PSSetShaderResources(tex, pal);
if(tex && psrr && (sel.wms == 3 || sel.wmt == 3))
if(tex && (sel.wms == 3 || sel.wmt == 3))
{
if(sel.wms == 3)
{
@ -167,21 +167,15 @@ bool GSTextureFX9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSampler
}
}
UpdatePS(sel, cb, ssel, psrr);
UpdatePS(sel, cb, ssel);
return true;
}
void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, bool psrr)
void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel)
{
HRESULT hr;
if(!psrr)
{
if(sel.wms == 3) sel.wms = 0;
if(sel.wmt == 3) sel.wmt = 0;
}
hash_map<uint32, CComPtr<IDirect3DPixelShader9> >::const_iterator i = m_ps.find(sel);
if(i == m_ps.end())
@ -233,10 +227,10 @@ void GSTextureFX9::UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSample
if(sel.tfx != 4)
{
if(sel.bpp >= 3 || sel.wms >= 3 || sel.wmt >= 3)
{
ssel.min = ssel.mag = 0;
}
bool b = sel.bpp < 3 && sel.wms < 3 && sel.wmt < 3;
ssel.min = b;
ssel.mag = b;
hash_map<uint32, Direct3DSamplerState9* >::const_iterator i = m_ps_ss.find(ssel);

View File

@ -55,12 +55,12 @@ public:
{
GSVector4 FogColor;
float MINU;
float MAXU;
float MINV;
float MAXU;
float MAXV;
uint32 UMSK;
uint32 UFIX;
uint32 VMSK;
uint32 UFIX;
uint32 VFIX;
float TA0;
float TA1;
@ -68,6 +68,7 @@ public:
float _pad[1];
GSVector2 WH;
GSVector2 rWrH;
GSVector4 HalfTexel;
};
union PSSelector
@ -166,8 +167,8 @@ public:
bool SetupIA(const GSVertexHW9* vertices, int count, D3DPRIMITIVETYPE prim);
bool SetupVS(VSSelector sel, const VSConstantBuffer* cb);
bool SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal, bool psrr);
void UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, bool psrr);
bool SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, GSTexture* tex, GSTexture* pal);
void UpdatePS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
void SetupRS(int w, int h, const GSVector4i& scissor);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 bf, GSTexture* rt, GSTexture* ds);
void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, uint8 bf);

View File

@ -155,28 +155,25 @@ void gs_main(line VS_OUTPUT input[2], inout TriangleStream<VS_OUTPUT> stream)
#endif
Texture2D Texture;
Texture2D Palette;
Texture2D<float4> Texture;
Texture2D<float> Palette;
SamplerState TextureSampler;
SamplerState PaletteSampler;
cbuffer cb1
{
float4 FogColor;
float MINU;
float MAXU;
float MINV;
float MAXV;
uint UMSK;
uint UFIX;
uint VMSK;
uint VFIX;
float2 MINUV;
float2 MAXUV;
uint2 UVMSK;
uint2 UVFIX;
float TA0;
float TA1;
float AREF;
float _pad;
float2 WH;
float2 rWrH;
float4 HalfTexel;
};
struct PS_INPUT
@ -194,8 +191,8 @@ struct PS_OUTPUT
#ifndef FST
#define FST 0
#define WMS 3
#define WMT 3
#define WMS 0
#define WMT 0
#define BPP 0
#define AEM 0
#define TFX 0
@ -225,191 +222,185 @@ float4 Extract16(uint i)
return f;
}
int repeatu(int tc)
int2 wrapu(float2 f, int2 i)
{
return WMS == 3 ? ((tc & UMSK) | UFIX) : tc;
}
int repeatv(int tc)
{
return WMT == 3 ? ((tc & VMSK) | VFIX) : tc;
}
float4 sample(float2 tc)
{
float4 t;
// if(WMS >= 2 || WMT >= 2)
if(WMS >= 3 || WMT >= 3)
if(WMS == 0)
{
int4 itc = tc.xyxy * WH.xyxy;
float4 tc01;
tc01.x = repeatu(itc.x);
tc01.y = repeatv(itc.y);
tc01.z = repeatu(itc.z + 1);
tc01.w = repeatv(itc.w + 1);
tc01 *= rWrH.xyxy;
i = frac(f) * WH.xx;
}
else if(WMS == 1)
{
i = saturate(f) * WH.xx;
}
else if(WMS == 2)
{
i = clamp(f, MINUV.xx, MAXUV.xx) * WH.xx;
}
else if(WMS == 3)
{
i = (i & UVMSK.xx) | UVFIX.xx;
}
return i;
}
float4 t00 = Texture.Sample(TextureSampler, tc01.xy);
float4 t01 = Texture.Sample(TextureSampler, tc01.zy);
float4 t10 = Texture.Sample(TextureSampler, tc01.xw);
float4 t11 = Texture.Sample(TextureSampler, tc01.zw);
int2 wrapv(float2 f, int2 i)
{
if(WMT == 0)
{
i = frac(f) * WH.yy;
}
else if(WMT == 1)
{
i = saturate(f) * WH.yy;
}
else if(WMT == 2)
{
i = clamp(f, MINUV.yy, MAXUV.yy) * WH.yy;
}
else if(WMT == 3)
{
i = (i & UVMSK.yy) | UVFIX.yy;
}
return i;
}
float2 dd = frac(tc * WH);
int4 wrapuv(float4 f, int4 i)
{
if(WMT == 0)
{
i = frac(f) * WH.xyxy;
}
else if(WMT == 1)
{
i = saturate(f) * WH.xyxy;
}
else if(WMT == 2)
{
i = clamp(f, MINUV.xyxy, MAXUV.xyxy) * WH.xyxy;
}
else if(WMT == 3)
{
i = (i & UVMSK.xyxy) | UVFIX.xyxy;
}
t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y);
return i;
}
int4 wrap(float4 uv, int4 iuv)
{
if(WMS == WMT)
{
iuv = wrapuv(uv, iuv);
}
else
{
iuv.xz = wrapu(uv.xz, iuv.xz);
iuv.yw = wrapv(uv.yw, iuv.yw);
}
return iuv;
}
float4 sample(float2 tc, float w)
{
if(FST == 0)
{
tc /= w;
}
float4 t;
/*
if(BPP < 3 && WMS < 2 && WMT < 2)
{
t = Texture.Sample(TextureSampler, tc);
}
*/
if(BPP < 3 && WMS < 3 && WMT < 3)
{
if(WMS == 2 && WMT == 2) tc = clamp(tc, MINUV.xy, MAXUV.xy);
else if(WMS == 2) tc.x = clamp(tc.x, MINUV.x, MAXUV.x);
else if(WMT == 2) tc.y = clamp(tc.y, MINUV.y, MAXUV.y);
t = Texture.Sample(TextureSampler, tc);
}
else
{
float4 uv = tc.xyxy + HalfTexel;
float4 uv2 = uv * WH.xyxy;
float2 dd = frac(uv2.xy);
int4 iuv = wrap(uv, uv2);
float4 t00, t01, t10, t11;
if(BPP == 3) // 8HP + 32-bit palette
{
float4 a;
a.x = Texture.Load(int3(iuv.xy, 0)).a;
a.y = Texture.Load(int3(iuv.zy, 0)).a;
a.z = Texture.Load(int3(iuv.xw, 0)).a;
a.w = Texture.Load(int3(iuv.zw, 0)).a;
t00 = Palette.Load(a.x);
t01 = Palette.Load(a.y);
t10 = Palette.Load(a.z);
t11 = Palette.Load(a.w);
}
else if(BPP == 4) // 8HP + 16-bit palette
{
// TODO: yuck, just pre-convert the palette to 32-bit
}
else if(BPP == 5) // 16P
{
float4 r;
r.x = Texture.Load(int3(iuv.xy, 0)).r;
r.y = Texture.Load(int3(iuv.zy, 0)).r;
r.z = Texture.Load(int3(iuv.xw, 0)).r;
r.w = Texture.Load(int3(iuv.zw, 0)).r;
uint4 i = r * 65535;
t00 = Extract16(i.x);
t01 = Extract16(i.y);
t10 = Extract16(i.z);
t11 = Extract16(i.w);
}
else
{
t00 = Texture.Load(int3(iuv.xy, 0));
t01 = Texture.Load(int3(iuv.zy, 0));
t10 = Texture.Load(int3(iuv.xw, 0));
t11 = Texture.Load(int3(iuv.zw, 0));
}
t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y);
}
if(BPP == 1) // 24
{
t.a = AEM == 0 || any(t.rgb) ? TA0 : 0;
}
else if(BPP == 2 || BPP == 5) // 16 || 16P
{
if(BPP == 5)
{
t = Normalize16(t);
}
// a bit incompatible with up-scaling because the 1 bit alpha is interpolated
t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0;
}
return t;
}
float4 sample8hp(float2 tc)
float4 tfx(float4 t, float4 c)
{
float4 tc01;
// if(WMS >= 2 || WMT >= 2)
if(WMS >= 3 || WMT >= 3)
{
int4 itc = tc.xyxy * WH.xyxy;
tc01.x = repeatu(itc.x);
tc01.y = repeatv(itc.y);
tc01.z = repeatu(itc.z + 1);
tc01.w = repeatv(itc.w + 1);
tc01 *= rWrH.xyxy;
}
else
{
tc01.x = tc.x;
tc01.y = tc.y;
tc01.z = tc.x + rWrH.x;
tc01.w = tc.y + rWrH.y;
}
float4 t;
t.x = Texture.Sample(TextureSampler, tc01.xy).a;
t.y = Texture.Sample(TextureSampler, tc01.zy).a;
t.z = Texture.Sample(TextureSampler, tc01.xw).a;
t.w = Texture.Sample(TextureSampler, tc01.zw).a;
float4 t00 = Palette.Sample(PaletteSampler, t.x);
float4 t01 = Palette.Sample(PaletteSampler, t.y);
float4 t10 = Palette.Sample(PaletteSampler, t.z);
float4 t11 = Palette.Sample(PaletteSampler, t.w);
float2 dd = frac(tc * WH);
return lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y);
}
float4 sample16p(float2 tc)
{
float4 t;
float4 tc01;
// if(WMS >= 2 || WMT >= 2)
if(WMS >= 3 || WMT >= 3)
{
int4 itc = tc.xyxy * WH.xyxy;
tc01.x = repeatu(itc.x);
tc01.y = repeatv(itc.y);
tc01.z = repeatu(itc.z + 1);
tc01.w = repeatv(itc.w + 1);
tc01 *= rWrH.xyxy;
}
else
{
tc01.x = tc.x;
tc01.y = tc.y;
tc01.z = tc.x + rWrH.x;
tc01.w = tc.y + rWrH.y;
}
t.x = Texture.Sample(TextureSampler, tc01.xy).r;
t.y = Texture.Sample(TextureSampler, tc01.zy).r;
t.z = Texture.Sample(TextureSampler, tc01.xw).r;
t.w = Texture.Sample(TextureSampler, tc01.zw).r;
uint4 i = t * 65535;
float4 t00 = Extract16(i.x);
float4 t01 = Extract16(i.y);
float4 t10 = Extract16(i.z);
float4 t11 = Extract16(i.w);
float2 dd = frac(tc * WH);
return Normalize16(lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y));
}
PS_OUTPUT ps_main(PS_INPUT input)
{
float2 tc = input.t.xy;
if(FST == 0)
{
tc /= input.t.w;
}
tc -= rWrH / 2;
if(WMS == 2)
{
tc.x = clamp(tc.x, MINU, MAXU);
}
if(WMT == 2)
{
tc.y = clamp(tc.y, MINV, MAXV);
}
float4 t;
if(BPP == 0) // 32
{
t = sample(tc);
}
else if(BPP == 1) // 24
{
t = sample(tc);
t.a = AEM == 0 || any(t.rgb) ? TA0 : 0;
}
else if(BPP == 2) // 16
{
t = sample(tc);
t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; // a bit incompatible with up-scaling because the 1 bit alpha is interpolated
}
else if(BPP == 3) // 8HP / 32-bit palette
{
t = sample8hp(tc);
}
else if(BPP == 4) // 8HP / 16-bit palette
{
// TODO: yuck, just pre-convert the palette to 32-bit
}
else if(BPP == 5) // 16P
{
t = sample16p(tc);
t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; // a bit incompatible with up-scaling because the 1 bit alpha is interpolated
}
float4 c = input.c;
if(TFX == 0)
{
if(TCC == 0)
@ -435,7 +426,7 @@ PS_OUTPUT ps_main(PS_INPUT input)
else if(TFX == 2)
{
c.rgb = c.rgb * t.rgb * 255.0f / 128 + c.a;
if(TCC == 1)
{
c.a += t.a;
@ -444,15 +435,18 @@ PS_OUTPUT ps_main(PS_INPUT input)
else if(TFX == 3)
{
c.rgb = c.rgb * t.rgb * 255.0f / 128 + c.a;
if(TCC == 1)
{
c.a = t.a;
}
}
return saturate(c);
}
c = saturate(c);
void atst(float4 c)
{
if(ATE == 1)
{
if(ATST == 0)
@ -476,12 +470,28 @@ PS_OUTPUT ps_main(PS_INPUT input)
clip(abs(c.a - AREF) - 0.4f / 255); // FIXME: 0.5f is too much
}
}
}
float4 fog(float4 c, float f)
{
if(FOG == 1)
{
c.rgb = lerp(FogColor.rgb, c.rgb, input.t.z);
c.rgb = lerp(FogColor.rgb, c.rgb, f);
}
return c;
}
PS_OUTPUT ps_main(PS_INPUT input)
{
float4 t = sample(input.t.xy, input.t.w);
float4 c = tfx(t, input.c);
atst(c);
c = fog(c, input.t.z);
if(CLR1 == 1) // needed for Cd * (As/Ad/F + 1) blending modes
{
c.rgb = 1;

View File

@ -71,22 +71,19 @@ VS_OUTPUT vs_main(VS_INPUT input)
return output;
}
float4 ps_params[5];
float4 ps_params[6];
#define FogColor ps_params[0].bgra
#define MINU ps_params[1].x
#define MAXU ps_params[1].y
#define MINV ps_params[1].z
#define MAXV ps_params[1].w
#define UMSK ps_params[2].x
#define UFIX ps_params[2].y
#define VMSK ps_params[2].z
#define VFIX ps_params[2].w
#define MINUV ps_params[1].xy
#define MAXUV ps_params[1].zw
#define UVMSK ps_params[2].xy
#define UVFIX ps_params[2].zw
#define TA0 ps_params[3].x
#define TA1 ps_params[3].y
#define AREF ps_params[3].z
#define WH ps_params[4].xy
#define rWrH ps_params[4].zw
#define HalfTexel ps_params[5]
struct PS_INPUT
{
@ -114,129 +111,170 @@ sampler1D Palette : register(s1);
sampler1D UMSKFIX : register(s2);
sampler1D VMSKFIX : register(s3);
float repeatu(float tc)
float2 wrapu(float2 f)
{
return WMS == 3 ? tex1D(UMSKFIX, tc*rWrH.x)* WH.x : tc;
}
float repeatv(float tc)
{
return WMT == 3 ? tex1D(VMSKFIX, tc*rWrH.y)* WH.y : tc;
}
float4 sample(float2 tc)
{
float4 t;
// if(WMS >= 2 || WMT >= 2)
if(WMS >= 3 || WMT >= 3)
if(WMS == 0)
{
tc -= rWrH / 2;
int4 itc = tc.xyxy * WH.xyxy;
float4 tc01;
tc01.x = repeatu(itc.x);
tc01.y = repeatv(itc.y);
tc01.z = repeatu(itc.z + 1);
tc01.w = repeatv(itc.w + 1);
f = frac(f);
}
else if(WMS == 1)
{
f = saturate(f);
}
else if(WMS == 2)
{
f = clamp(f, MINUV.xx, MAXUV.xx);
}
else if(WMS == 3)
{
f.x = tex1D(UMSKFIX, f.x);
f.y = tex1D(UMSKFIX, f.y);
}
tc01 *= rWrH.xyxy;
return f;
}
float4 t00 = tex2D(Texture, tc01.xy);
float4 t01 = tex2D(Texture, tc01.zy);
float4 t10 = tex2D(Texture, tc01.xw);
float4 t11 = tex2D(Texture, tc01.zw);
float2 wrapv(float2 f)
{
if(WMS == 0)
{
f = frac(f);
}
else if(WMS == 1)
{
f = saturate(f);
}
else if(WMS == 2)
{
f = clamp(f, MINUV.yy, MAXUV.yy);
}
else if(WMS == 3)
{
f.x = tex1D(VMSKFIX, f.x);
f.y = tex1D(VMSKFIX, f.y);
}
float2 dd = frac(tc * WH);
return f;
}
t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y);
float4 wrapuv(float4 f)
{
if(WMS == 0)
{
f = frac(f);
}
else if(WMS == 1)
{
f = saturate(f);
}
else if(WMS == 2)
{
f = clamp(f, MINUV.xyxy, MAXUV.xyxy);
}
else if(WMS == 3)
{
f.x = tex1D(UMSKFIX, f.x);
f.y = tex1D(VMSKFIX, f.y);
f.z = tex1D(UMSKFIX, f.z);
f.w = tex1D(VMSKFIX, f.w);
}
return f;
}
float4 wrap(float4 uv)
{
if(WMS == WMT)
{
uv = wrapuv(uv);
}
else
{
uv.xz = wrapu(uv.xz);
uv.yw = wrapv(uv.yw);
}
return uv;
}
float4 sample(float2 tc, float w)
{
if(FST == 0)
{
tc /= w;
}
float4 t;
/*
if(BPP < 3 && WMS < 2 && WMT < 2)
{
t = tex2D(Texture, tc);
}
*/
if(BPP < 3 && WMS < 3 && WMT < 3)
{
if(WMS == 2 && WMT == 2) tc = clamp(tc, MINUV.xy, MAXUV.xy);
else if(WMS == 2) tc.x = clamp(tc.x, MINUV.x, MAXUV.x);
else if(WMT == 2) tc.y = clamp(tc.y, MINUV.y, MAXUV.y);
t = tex2D(Texture, tc);
}
else
{
float4 uv = tc.xyxy + HalfTexel;
float2 dd = frac(uv.xy * WH);
uv = wrap(uv);
float4 t00, t01, t10, t11;
if(BPP == 3) // 8HP ln
{
float4 a;
a.x = tex2D(Texture, uv.xy).a;
a.y = tex2D(Texture, uv.zy).a;
a.z = tex2D(Texture, uv.xw).a;
a.w = tex2D(Texture, uv.zw).a;
if(RT == 1) a *= 0.5;
t00 = tex1D(Palette, a.x);
t01 = tex1D(Palette, a.y);
t10 = tex1D(Palette, a.z);
t11 = tex1D(Palette, a.w);
}
else
{
t00 = tex2D(Texture, uv.xy);
t01 = tex2D(Texture, uv.zy);
t10 = tex2D(Texture, uv.xw);
t11 = tex2D(Texture, uv.zw);
}
t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y);
}
if(BPP == 0) // 32
{
if(RT == 1) t.a *= 0.5;
}
else if(BPP == 1) // 24
{
t.a = AEM == 0 || any(t.rgb) ? TA0 : 0;
}
else if(BPP == 2) // 16
{
// a bit incompatible with up-scaling because the 1 bit alpha is interpolated
t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0;
}
return t;
}
float4 sample8hp(float2 tc)
float4 tfx(float4 t, float4 c)
{
tc -= rWrH / 2;
float4 tc01;
tc01.x = tc.x;
tc01.y = tc.y;
tc01.z = tc.x + rWrH.x;
tc01.w = tc.y + rWrH.y;
float4 t;
t.x = tex2D(Texture, tc01.xy).a;
t.y = tex2D(Texture, tc01.zy).a;
t.z = tex2D(Texture, tc01.xw).a;
t.w = tex2D(Texture, tc01.zw).a;
if(RT == 1) t *= 0.5;
float4 t00 = tex1D(Palette, t.x);
float4 t01 = tex1D(Palette, t.y);
float4 t10 = tex1D(Palette, t.z);
float4 t11 = tex1D(Palette, t.w);
float2 dd = frac(tc * WH);
return lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y);
}
float4 ps_main(PS_INPUT input) : COLOR
{
float2 tc = input.t.xy;
if(FST == 0)
{
tc /= input.t.w;
}
if(WMS == 2)
{
tc.x = clamp(tc.x, MINU, MAXU);
}
if(WMT == 2)
{
tc.y = clamp(tc.y, MINV, MAXV);
}
float4 t;
if(BPP == 0) // 32
{
t = sample(tc);
if(RT == 1) t.a *= 0.5;
}
else if(BPP == 1) // 24
{
t = sample(tc);
t.a = AEM == 0 || any(t.rgb) ? TA0 : 0;
}
else if(BPP == 2) // 16
{
t = sample(tc);
t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; // a bit incompatible with up-scaling because the 1 bit alpha is interpolated
}
else if(BPP == 3) // 8HP ln
{
t = sample8hp(tc);
}
float4 c = input.c;
if(TFX == 0)
{
if(TCC == 0)
@ -277,9 +315,12 @@ float4 ps_main(PS_INPUT input) : COLOR
c.a = t.a;
}
}
return saturate(c);
}
c = saturate(c);
void atst(float4 c)
{
if(ATE == 1)
{
if(ATST == 0)
@ -303,12 +344,28 @@ float4 ps_main(PS_INPUT input) : COLOR
clip(abs(c.a - AREF) - 0.4f / 255); // FIXME: 0.5f is too much
}
}
}
float4 fog(float4 c, float f)
{
if(FOG == 1)
{
c.rgb = lerp(FogColor.rgb, c.rgb, input.t.z);
c.rgb = lerp(FogColor.rgb, c.rgb, f);
}
return c;
}
float4 ps_main(PS_INPUT input) : COLOR
{
float4 t = sample(input.t.xy, input.t.w);
float4 c = tfx(t, input.c);
atst(c);
c = fog(c, input.t.z);
if(CLR1 == 1) // needed for Cd * (As/Ad/F + 1) blending modes
{
c.rgb = 1;