GSdx: Direct3D port of HPO special/V2

Adds a port of the new Half-pixel Offset options for the Direct3D
Hardware renderers.
This commit is contained in:
FlatOutPS2 2017-02-17 10:59:21 +01:00 committed by Gregory Hainaut
parent 2d591182c4
commit 975441dfe9
15 changed files with 307 additions and 243 deletions

View File

@ -145,7 +145,7 @@ public: // TODO
hash_map<uint32, GSVertexShader11 > m_vs; hash_map<uint32, GSVertexShader11 > m_vs;
CComPtr<ID3D11Buffer> m_vs_cb; CComPtr<ID3D11Buffer> m_vs_cb;
hash_map<uint32, CComPtr<ID3D11GeometryShader> > m_gs; hash_map<uint32, CComPtr<ID3D11GeometryShader> > m_gs;
hash_map<uint32, CComPtr<ID3D11PixelShader> > m_ps; hash_map<uint64, CComPtr<ID3D11PixelShader> > m_ps;
CComPtr<ID3D11Buffer> m_ps_cb; CComPtr<ID3D11Buffer> m_ps_cb;
hash_map<uint32, CComPtr<ID3D11SamplerState> > m_ps_ss; hash_map<uint32, CComPtr<ID3D11SamplerState> > m_ps_ss;
CComPtr<ID3D11SamplerState> m_palette_ss; CComPtr<ID3D11SamplerState> m_palette_ss;

View File

@ -173,7 +173,7 @@ public: // TODO
// Shaders... // Shaders...
hash_map<uint32, GSVertexShader9 > m_vs; hash_map<uint32, GSVertexShader9 > m_vs;
hash_map<uint32, CComPtr<IDirect3DPixelShader9> > m_ps; hash_map<uint64, CComPtr<IDirect3DPixelShader9> > m_ps;
hash_map<uint32, Direct3DSamplerState9* > m_ps_ss; hash_map<uint32, Direct3DSamplerState9* > m_ps_ss;
hash_map<uint32, Direct3DDepthStencilState9* > m_om_dss; hash_map<uint32, Direct3DDepthStencilState9* > m_om_dss;
hash_map<uint32, Direct3DBlendState9* > m_om_bs; hash_map<uint32, Direct3DBlendState9* > m_om_bs;

View File

@ -34,13 +34,13 @@ public:
{ {
GSVector4 VertexScale; GSVector4 VertexScale;
GSVector4 VertexOffset; GSVector4 VertexOffset;
GSVector4 TextureScale; GSVector4 Texture_Scale_Offset;
struct VSConstantBuffer() struct VSConstantBuffer()
{ {
VertexScale = GSVector4::zero(); VertexScale = GSVector4::zero();
VertexOffset = GSVector4::zero(); VertexOffset = GSVector4::zero();
TextureScale = GSVector4::zero(); Texture_Scale_Offset = GSVector4::zero();
} }
__forceinline bool Update(const VSConstantBuffer* cb) __forceinline bool Update(const VSConstantBuffer* cb)
@ -48,15 +48,12 @@ public:
GSVector4i* a = (GSVector4i*)this; GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb; GSVector4i* b = (GSVector4i*)cb;
GSVector4i b0 = b[0]; if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3])).alltrue())
GSVector4i b1 = b[1];
GSVector4i b2 = b[2];
if(!((a[0] == b0) & (a[1] == b1) & (a[2] == b2)).alltrue())
{ {
a[0] = b0; a[0] = b[0];
a[1] = b1; a[1] = b[1];
a[2] = b2; a[2] = b[2];
a[3] = b[3];
return true; return true;
} }
@ -112,21 +109,14 @@ public:
GSVector4i* a = (GSVector4i*)this; GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb; GSVector4i* b = (GSVector4i*)cb;
GSVector4i b0 = b[0]; if(!((a[0] == b[0]) /*& (a[1] == b1)*/ & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4]) & (a[5] == b[5])).alltrue()) // if WH matches HalfTexel does too
GSVector4i b1 = b[1];
GSVector4i b2 = b[2];
GSVector4i b3 = b[3];
GSVector4i b4 = b[4];
GSVector4i b5 = b[5];
if(!((a[0] == b0) /*& (a[1] == b1)*/ & (a[2] == b2) & (a[3] == b3) & (a[4] == b4) & (a[5] == b5)).alltrue()) // if WH matches HalfTexel does too
{ {
a[0] = b0; a[0] = b[0];
a[1] = b1; a[1] = b[1];
a[2] = b2; a[2] = b[2];
a[3] = b3; a[3] = b[3];
a[4] = b4; a[4] = b[4];
a[5] = b5; a[5] = b[5];
return true; return true;
} }
@ -162,7 +152,7 @@ public:
uint32 fst:1; uint32 fst:1;
uint32 wms:2; uint32 wms:2;
uint32 wmt:2; uint32 wmt:2;
uint32 fmt:3; uint32 fmt:4;
uint32 aem:1; uint32 aem:1;
uint32 tfx:3; uint32 tfx:3;
uint32 tcc:1; uint32 tcc:1;
@ -180,12 +170,14 @@ public:
uint32 point_sampler:1; uint32 point_sampler:1;
uint32 shuffle:1; uint32 shuffle:1;
uint32 read_ba:1; uint32 read_ba:1;
uint32 _free:32;
}; };
uint32 key; uint64 key;
}; };
operator uint32() {return key & 0xfffffff;} operator uint64() {return key;}
PSSelector() : key(0) {} PSSelector() : key(0) {}
}; };

View File

@ -597,7 +597,7 @@ void GSRendererCS::Draw()
CComPtr<ID3D11PixelShader> ps[2] = {m_ps0, NULL}; CComPtr<ID3D11PixelShader> ps[2] = {m_ps0, NULL};
hash_map<uint32, CComPtr<ID3D11PixelShader> >::const_iterator i = m_ps1.find(ps_sel); hash_map<uint64, CComPtr<ID3D11PixelShader> >::const_iterator i = m_ps1.find(ps_sel);
if(i != m_ps1.end()) if(i != m_ps1.end())
{ {

View File

@ -111,7 +111,7 @@ class GSRendererCS : public GSRenderer
CComPtr<ID3D11Buffer> m_vs_cb; CComPtr<ID3D11Buffer> m_vs_cb;
hash_map<uint32, CComPtr<ID3D11GeometryShader> > m_gs; hash_map<uint32, CComPtr<ID3D11GeometryShader> > m_gs;
CComPtr<ID3D11PixelShader> m_ps0; CComPtr<ID3D11PixelShader> m_ps0;
hash_map<uint32, CComPtr<ID3D11PixelShader> > m_ps1; hash_map<uint64, CComPtr<ID3D11PixelShader> > m_ps1;
CComPtr<ID3D11Buffer> m_ps_cb; CComPtr<ID3D11Buffer> m_ps_cb;
void Write(GSOffset* off, const GSVector4i& r); void Write(GSOffset* off, const GSVector4i& r);

View File

@ -34,10 +34,12 @@ GSRendererDX::GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter)
UserHacks_AlphaHack = theApp.GetConfigB("UserHacks_AlphaHack"); UserHacks_AlphaHack = theApp.GetConfigB("UserHacks_AlphaHack");
UserHacks_AlphaStencil = theApp.GetConfigB("UserHacks_AlphaStencil"); UserHacks_AlphaStencil = theApp.GetConfigB("UserHacks_AlphaStencil");
UserHacks_TCOffset = theApp.GetConfigI("UserHacks_TCOffset"); UserHacks_TCOffset = theApp.GetConfigI("UserHacks_TCOffset");
UserHacks_HPO = theApp.GetConfigI("UserHacks_HalfPixelOffset");
} else { } else {
UserHacks_AlphaHack = false; UserHacks_AlphaHack = false;
UserHacks_AlphaStencil = false; UserHacks_AlphaStencil = false;
UserHacks_TCOffset = 0; UserHacks_TCOffset = 0;
UserHacks_HPO = 0;
} }
UserHacks_TCO_x = (UserHacks_TCOffset & 0xFFFF) / -1000.0f; UserHacks_TCO_x = (UserHacks_TCOffset & 0xFFFF) / -1000.0f;
@ -58,39 +60,39 @@ void GSRendererDX::EmulateAtst(const int pass, const GSTextureCache::Source* tex
switch (atst) { switch (atst) {
case ATST_LESS: case ATST_LESS:
if (tex && tex->m_spritehack_t) { if (tex && tex->m_spritehack_t) {
ps_sel.atst = 0; m_ps_sel.atst = 0;
} }
else { else {
ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f; ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f;
ps_sel.atst = 1; m_ps_sel.atst = 1;
} }
break; break;
case ATST_LEQUAL: case ATST_LEQUAL:
ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f + 1.0f; ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f + 1.0f;
ps_sel.atst = 1; m_ps_sel.atst = 1;
break; break;
case ATST_GEQUAL: case ATST_GEQUAL:
// Maybe a -1 trick multiplication factor could be used to merge with ATST_LEQUAL case // Maybe a -1 trick multiplication factor could be used to merge with ATST_LEQUAL case
ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f; ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f;
ps_sel.atst = 2; m_ps_sel.atst = 2;
break; break;
case ATST_GREATER: case ATST_GREATER:
// Maybe a -1 trick multiplication factor could be used to merge with ATST_LESS case // Maybe a -1 trick multiplication factor could be used to merge with ATST_LESS case
ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f + 1.0f; ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f + 1.0f;
ps_sel.atst = 2; m_ps_sel.atst = 2;
break; break;
case ATST_EQUAL: case ATST_EQUAL:
ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF; ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF;
ps_sel.atst = 3; m_ps_sel.atst = 3;
break; break;
case ATST_NOTEQUAL: case ATST_NOTEQUAL:
ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF; ps_cb.FogColor_AREF.a = (float)m_context->TEST.AREF;
ps_sel.atst = 4; m_ps_sel.atst = 4;
break; break;
case ATST_NEVER: case ATST_NEVER:
case ATST_ALWAYS: case ATST_ALWAYS:
default: default:
ps_sel.atst = 0; m_ps_sel.atst = 0;
break; break;
} }
} }
@ -145,6 +147,141 @@ void GSRendererDX::EmulateZbuffer()
} }
} }
void GSRendererDX::EmulateTextureSampler(const GSTextureCache::Source* tex)
{
const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM];
const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm;
const uint8 wms = m_context->CLAMP.WMS;
const uint8 wmt = m_context->CLAMP.WMT;
bool complex_wms_wmt = !!((wms | wmt) & 2);
bool bilinear = m_vt.IsLinear();
bool shader_emulated_sampler = tex->m_palette || cpsm.fmt != 0 || complex_wms_wmt;
// 1 and 0 are equivalent
m_ps_sel.wms = (wms & 2) ? wms : 0;
m_ps_sel.wmt = (wmt & 2) ? wmt : 0;
int w = tex->m_texture->GetWidth();
int h = tex->m_texture->GetHeight();
int tw = (int)(1 << m_context->TEX0.TW);
int th = (int)(1 << m_context->TEX0.TH);
GSVector4 WH(tw, th, w, h);
// Performance note:
// 1/ Don't set 0 as it is the default value
// 2/ Only keep aem when it is useful (avoid useless shader permutation)
if (m_ps_sel.shuffle) {
// Force a 32 bits access (normally shuffle is done on 16 bits)
// m_ps_sel.tex_fmt = 0; // removed as an optimization
m_ps_sel.aem = m_env.TEXA.AEM;
ASSERT(tex->m_target);
// Shuffle is a 16 bits format, so aem is always required
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
ps_cb.MinF_TA = (GSVector4(ps_cb.MskFix) + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));
bilinear &= m_vt.IsLinear();
GSVector4 half_offset = RealignTargetTextureCoordinate(tex);
vs_cb.Texture_Scale_Offset.z = half_offset.x;
vs_cb.Texture_Scale_Offset.w = half_offset.y;
} else if (tex->m_target) {
// Use an old target. AEM and index aren't resolved it must be done
// on the GPU
// Select the 32/24/16 bits color (AEM)
m_ps_sel.fmt = cpsm.fmt;
m_ps_sel.aem = m_env.TEXA.AEM;
// Don't upload AEM if format is 32 bits
if (cpsm.fmt) {
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
ps_cb.MinF_TA = (GSVector4(ps_cb.MskFix) + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));
}
// Select the index format
if (tex->m_palette) {
// FIXME Potentially improve fmt field in GSLocalMemory
if (m_context->TEX0.PSM == PSM_PSMT4HL)
m_ps_sel.fmt |= 1 << 2;
else if (m_context->TEX0.PSM == PSM_PSMT4HH)
m_ps_sel.fmt |= 2 << 2;
else
m_ps_sel.fmt |= 3 << 2;
// Alpha channel of the RT is reinterpreted as an index. Star
// Ocean 3 uses it to emulate a stencil buffer. It is a very
// bad idea to force bilinear filtering on it.
bilinear &= m_vt.IsLinear();
}
GSVector4 half_offset = RealignTargetTextureCoordinate(tex);
vs_cb.Texture_Scale_Offset.z = half_offset.x;
vs_cb.Texture_Scale_Offset.w = half_offset.y;
}
else if (tex->m_palette) {
// Use a standard 8 bits texture. AEM is already done on the CLUT
// Therefore you only need to set the index
// m_ps_sel.aem = 0; // removed as an optimization
// Note 4 bits indexes are converted to 8 bits
m_ps_sel.fmt = 3 << 2;
}
else {
// Standard texture. Both index and AEM expansion were already done by the CPU.
// m_ps_sel.tex_fmt = 0; // removed as an optimization
// m_ps_sel.aem = 0; // removed as an optimization
}
if (m_context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) {
// Micro optimization that reduces GPU load (removes 5 instructions on the FS program)
m_ps_sel.tfx = TFX_DECAL;
}
else {
m_ps_sel.tfx = m_context->TEX0.TFX;
}
m_ps_sel.tcc = m_context->TEX0.TCC;
m_ps_sel.ltf = bilinear && shader_emulated_sampler;
m_ps_sel.rt = tex->m_target;
m_ps_sel.spritehack = tex->m_spritehack_t;
m_ps_sel.point_sampler = !(bilinear && !shader_emulated_sampler);
if (PRIM->FST)
{
GSVector4 TextureScale = GSVector4(0.0625f) / WH.xyxy();
vs_cb.Texture_Scale_Offset.x = TextureScale.x;
vs_cb.Texture_Scale_Offset.y = TextureScale.y;
//Maybe better?
//vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw();
m_ps_sel.fst = 1;
}
ps_cb.WH = WH;
ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
if (complex_wms_wmt) {
ps_cb.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV);
ps_cb.MinMax = GSVector4(ps_cb.MskFix) / WH.xyxy();
}
// TC Offset Hack
m_ps_sel.tcoffsethack = !!UserHacks_TCOffset;
ps_cb.TC_OffsetHack = GSVector4(UserHacks_TCO_x, UserHacks_TCO_y).xyxy() / WH.xyxy();
m_ps_ssel.tau = (m_context->CLAMP.WMS + 3) >> 1;
m_ps_ssel.tav = (m_context->CLAMP.WMT + 3) >> 1;
m_ps_ssel.ltf = bilinear && !shader_emulated_sampler;
}
void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{ {
const GSVector2i& rtsize = ds ? ds->GetSize() : rt->GetSize(); const GSVector2i& rtsize = ds ? ds->GetSize() : rt->GetSize();
@ -155,6 +292,10 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
bool ate_first_pass = m_context->TEST.DoFirstPass(); bool ate_first_pass = m_context->TEST.DoFirstPass();
bool ate_second_pass = m_context->TEST.DoSecondPass(); bool ate_second_pass = m_context->TEST.DoSecondPass();
m_gs_sel.key = 0;
vs_cb.Texture_Scale_Offset = GSVector4(0.0f);
GSTexture* rtcopy = NULL; GSTexture* rtcopy = NULL;
ASSERT(m_dev != NULL); ASSERT(m_dev != NULL);
@ -257,8 +398,6 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0; vs_sel.logz = dev->HasDepth32() ? 0 : m_logz ? 1 : 0;
vs_sel.rtcopy = !!rtcopy; vs_sel.rtcopy = !!rtcopy;
GSDeviceDX::VSConstantBuffer vs_cb;
float sx = 2.0f * rtscale.x / (rtsize.x << 4); float sx = 2.0f * rtscale.x / (rtsize.x << 4);
float sy = 2.0f * rtscale.y / (rtsize.y << 4); float sy = 2.0f * rtscale.y / (rtsize.y << 4);
float ox = (float)(int)m_context->XYOFFSET.OFX; float ox = (float)(int)m_context->XYOFFSET.OFX;
@ -272,7 +411,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
//The resulting shifted output aligns better with common blending / corona / blurring effects, //The resulting shifted output aligns better with common blending / corona / blurring effects,
//but introduces a few bad pixels on the edges. //but introduces a few bad pixels on the edges.
if(rt && rt->LikelyOffset) if(rt && rt->LikelyOffset && UserHacks_HPO == 1)
{ {
// DX9 has pixelcenter set to 0.0, so give it some value here // DX9 has pixelcenter set to 0.0, so give it some value here
@ -294,14 +433,14 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
// ps // ps
ps_sel.key = 0; m_ps_sel.key = 0;
ps_ssel.key = 0; m_ps_ssel.key = 0;
// Gregory: code is not yet ready so let's only enable it when // Gregory: code is not yet ready so let's only enable it when
// CRC is below the FULL level // CRC is below the FULL level
if (m_texture_shuffle && (m_crc_hack_level < CRCHackLevel::Full)) { if (m_texture_shuffle && (m_crc_hack_level < CRCHackLevel::Full)) {
ps_sel.shuffle = 1; m_ps_sel.shuffle = 1;
ps_sel.fmt = 0; m_ps_sel.fmt = 0;
const GIFRegXYOFFSET& o = m_context->XYOFFSET; const GIFRegXYOFFSET& o = m_context->XYOFFSET;
GSVertex* v = &m_vertex.buff[0]; GSVertex* v = &m_vertex.buff[0];
@ -312,9 +451,9 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
bool write_ba = (pos > 112 && pos < 136); bool write_ba = (pos > 112 && pos < 136);
// Read texture is 8 to 16 pixels (same as above) // Read texture is 8 to 16 pixels (same as above)
int tex_pos = v[0].U & 0xFF; int tex_pos = v[0].U & 0xFF;
ps_sel.read_ba = (tex_pos > 112 && tex_pos < 144); m_ps_sel.read_ba = (tex_pos > 112 && tex_pos < 144);
GL_INS("Color shuffle %s => %s", ps_sel.read_ba ? "BA" : "RG", write_ba ? "BA" : "RG"); GL_INS("Color shuffle %s => %s", m_ps_sel.read_ba ? "BA" : "RG", write_ba ? "BA" : "RG");
// Convert the vertex info to a 32 bits color format equivalent // Convert the vertex info to a 32 bits color format equivalent
for (size_t i = 0; i < count; i += 2) { for (size_t i = 0; i < count; i += 2) {
@ -323,7 +462,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
else else
v[i + 1].XYZ.X += 128u; v[i + 1].XYZ.X += 128u;
if (ps_sel.read_ba) if (m_ps_sel.read_ba)
v[i].U -= 128u; v[i].U -= 128u;
else else
v[i + 1].U += 128u; v[i + 1].U += 128u;
@ -390,24 +529,24 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
} }
else else
{ {
ps_sel.date = 1 + m_context->TEST.DATM; m_ps_sel.date = 1 + m_context->TEST.DATM;
} }
} }
if(m_env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) if(m_env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{ {
ps_sel.colclip = 1; m_ps_sel.colclip = 1;
} }
ps_sel.clr1 = om_bsel.IsCLR1(); m_ps_sel.clr1 = om_bsel.IsCLR1();
ps_sel.fba = m_context->FBA.FBA; m_ps_sel.fba = m_context->FBA.FBA;
ps_sel.aout = m_context->FRAME.PSM == PSM_PSMCT16 || m_context->FRAME.PSM == PSM_PSMCT16S || (m_context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; m_ps_sel.aout = m_context->FRAME.PSM == PSM_PSMCT16 || m_context->FRAME.PSM == PSM_PSMCT16S || (m_context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
ps_sel.aout &= !ps_sel.shuffle; m_ps_sel.aout &= !m_ps_sel.shuffle;
if(UserHacks_AlphaHack) ps_sel.aout = 1; if(UserHacks_AlphaHack) m_ps_sel.aout = 1;
if(PRIM->FGE) if(PRIM->FGE)
{ {
ps_sel.fog = 1; m_ps_sel.fog = 1;
ps_cb.FogColor_AREF = GSVector4::rgba32(m_env.FOGCOL.u32[0]) / 255; ps_cb.FogColor_AREF = GSVector4::rgba32(m_env.FOGCOL.u32[0]) / 255;
} }
@ -457,12 +596,12 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
if (!m_context->FBA.FBA) if (!m_context->FBA.FBA)
{ {
if (m_context->TEST.DATM == 0) if (m_context->TEST.DATM == 0)
ps_sel.atst = 2; // >= m_ps_sel.atst = 2; // >=
else { else {
if (tex && tex->m_spritehack_t) if (tex && tex->m_spritehack_t)
ps_sel.atst = 0; // < m_ps_sel.atst = 0; // <
else else
ps_sel.atst = 1; // < m_ps_sel.atst = 1; // <
} }
ps_cb.FogColor_AREF.a = (float)0x80; ps_cb.FogColor_AREF.a = (float)0x80;
} }
@ -472,67 +611,11 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
if(tex) if(tex)
{ {
const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; EmulateTextureSampler(tex);
const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm;
// The texture cache will handle various format conversion internally for non-target texture
// After the conversion the texture will be RGBA8 (aka 32 bits) hence the 0 below
int gpu_tex_fmt = (tex->m_target) ? cpsm.fmt : 0;
bool bilinear = m_vt.IsLinear();
bool simple_sample = !tex->m_palette && gpu_tex_fmt == 0 && m_context->CLAMP.WMS < 2 && m_context->CLAMP.WMT < 2;
ps_sel.wms = m_context->CLAMP.WMS;
ps_sel.wmt = m_context->CLAMP.WMT;
if (ps_sel.shuffle) {
ps_sel.fmt = 0;
} else {
ps_sel.fmt = tex->m_palette ? gpu_tex_fmt | 4 : gpu_tex_fmt;
}
ps_sel.aem = m_env.TEXA.AEM;
ps_sel.tfx = m_context->TEX0.TFX;
ps_sel.tcc = m_context->TEX0.TCC;
ps_sel.ltf = bilinear && !simple_sample;
ps_sel.rt = tex->m_target;
ps_sel.spritehack = tex->m_spritehack_t;
ps_sel.point_sampler = !(bilinear && simple_sample);
int w = tex->m_texture->GetWidth();
int h = tex->m_texture->GetHeight();
int tw = (int)(1 << m_context->TEX0.TW);
int th = (int)(1 << m_context->TEX0.TH);
GSVector4 WH(tw, th, w, h);
if(PRIM->FST)
{
vs_cb.TextureScale = GSVector4(1.0f / 16) / WH.xyxy();
//Maybe better?
//vs_cb.TextureScale = GSVector4(1.0f / 16) * GSVector4(tex->m_texture->GetScale()).xyxy() / WH.zwzw();
ps_sel.fst = 1;
}
ps_cb.WH = WH;
ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
ps_cb.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV);
// TC Offset Hack
ps_sel.tcoffsethack = !!UserHacks_TCOffset;
ps_cb.TC_OffsetHack = GSVector4(UserHacks_TCO_x, UserHacks_TCO_y).xyxy() / WH.xyxy();
GSVector4 clamp(ps_cb.MskFix);
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
ps_cb.MinMax = clamp / WH.xyxy();
ps_cb.MinF_TA = (clamp + 0.5f).xyxy(ta) / WH.xyxy(GSVector4(255, 255));
ps_ssel.tau = (m_context->CLAMP.WMS + 3) >> 1;
ps_ssel.tav = (m_context->CLAMP.WMT + 3) >> 1;
ps_ssel.ltf = bilinear && simple_sample;
} }
else else
{ {
ps_sel.tfx = 4; m_ps_sel.tfx = 4;
} }
// rs // rs
@ -551,7 +634,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
dev->SetupOM(om_dssel, om_bsel, afix); dev->SetupOM(om_dssel, om_bsel, afix);
dev->SetupVS(vs_sel, &vs_cb); dev->SetupVS(vs_sel, &vs_cb);
dev->SetupGS(gs_sel); dev->SetupGS(gs_sel);
dev->SetupPS(ps_sel, &ps_cb, ps_ssel); dev->SetupPS(m_ps_sel, &ps_cb, m_ps_ssel);
// draw // draw
@ -562,13 +645,13 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
if (m_env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) if (m_env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{ {
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel); GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
GSDeviceDX::PSSelector ps_selneg(ps_sel); GSDeviceDX::PSSelector ps_selneg(m_ps_sel);
om_bselneg.negative = 1; om_bselneg.negative = 1;
ps_selneg.colclip = 2; ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix); dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel); dev->SetupPS(ps_selneg, &ps_cb, m_ps_ssel);
dev->DrawIndexedPrimitive(); dev->DrawIndexedPrimitive();
dev->SetupOM(om_dssel, om_bsel, afix); dev->SetupOM(om_dssel, om_bsel, afix);
@ -590,7 +673,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
EmulateAtst(2, tex); EmulateAtst(2, tex);
} }
dev->SetupPS(ps_sel, &ps_cb, ps_ssel); dev->SetupPS(m_ps_sel, &ps_cb, m_ps_ssel);
bool z = om_dssel.zwe; bool z = om_dssel.zwe;
bool r = om_bsel.wr; bool r = om_bsel.wr;
@ -633,13 +716,13 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
if (m_env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST) if (m_env.COLCLAMP.CLAMP == 0 && /* hack */ !tex && PRIM->PRIM != GS_POINTLIST)
{ {
GSDeviceDX::OMBlendSelector om_bselneg(om_bsel); GSDeviceDX::OMBlendSelector om_bselneg(om_bsel);
GSDeviceDX::PSSelector ps_selneg(ps_sel); GSDeviceDX::PSSelector ps_selneg(m_ps_sel);
om_bselneg.negative = 1; om_bselneg.negative = 1;
ps_selneg.colclip = 2; ps_selneg.colclip = 2;
dev->SetupOM(om_dssel, om_bselneg, afix); dev->SetupOM(om_dssel, om_bselneg, afix);
dev->SetupPS(ps_selneg, &ps_cb, ps_ssel); dev->SetupPS(ps_selneg, &ps_cb, m_ps_ssel);
dev->DrawIndexedPrimitive(); dev->DrawIndexedPrimitive();
} }

View File

@ -36,12 +36,14 @@ class GSRendererDX : public GSRendererHW
protected: protected:
void EmulateAtst(const int pass, const GSTextureCache::Source* tex); void EmulateAtst(const int pass, const GSTextureCache::Source* tex);
void EmulateZbuffer(); void EmulateZbuffer();
void EmulateTextureSampler(const GSTextureCache::Source* tex);
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex); virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
virtual void SetupIA() = 0; virtual void SetupIA() = 0;
virtual void UpdateFBA(GSTexture* rt) {} virtual void UpdateFBA(GSTexture* rt) {}
unsigned int UserHacks_TCOffset; unsigned int UserHacks_TCOffset;
float UserHacks_TCO_x, UserHacks_TCO_y; float UserHacks_TCO_x, UserHacks_TCO_y;
int UserHacks_HPO;
bool DATE; bool DATE;
@ -53,9 +55,11 @@ protected:
GSDeviceDX::OMDepthStencilSelector om_dssel; GSDeviceDX::OMDepthStencilSelector om_dssel;
GSDeviceDX::OMBlendSelector om_bsel; GSDeviceDX::OMBlendSelector om_bsel;
GSDeviceDX::PSSelector ps_sel; GSDeviceDX::PSSelector m_ps_sel;
GSDeviceDX::PSSamplerSelector ps_ssel; GSDeviceDX::PSSamplerSelector m_ps_ssel;
GSDeviceDX::PSConstantBuffer ps_cb; GSDeviceDX::PSConstantBuffer ps_cb;
GSDeviceDX::VSConstantBuffer vs_cb;
public: public:
GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0)); GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0));

View File

@ -40,10 +40,12 @@ GSRendererHW::GSRendererHW(GSTextureCache* tc)
m_userhacks_align_sprite_X = theApp.GetConfigB("UserHacks_align_sprite_X"); m_userhacks_align_sprite_X = theApp.GetConfigB("UserHacks_align_sprite_X");
m_userhacks_round_sprite_offset = theApp.GetConfigI("UserHacks_round_sprite_offset"); m_userhacks_round_sprite_offset = theApp.GetConfigI("UserHacks_round_sprite_offset");
m_userhacks_disable_gs_mem_clear = theApp.GetConfigB("UserHacks_DisableGsMemClear"); m_userhacks_disable_gs_mem_clear = theApp.GetConfigB("UserHacks_DisableGsMemClear");
m_userHacks_HPO = theApp.GetConfigI("UserHacks_HalfPixelOffset");
} else { } else {
m_userhacks_align_sprite_X = false; m_userhacks_align_sprite_X = false;
m_userhacks_round_sprite_offset = 0; m_userhacks_round_sprite_offset = 0;
m_userhacks_disable_gs_mem_clear = false; m_userhacks_disable_gs_mem_clear = false;
m_userHacks_HPO = 0;
} }
if (!m_upscale_multiplier) { //Custom Resolution if (!m_upscale_multiplier) { //Custom Resolution
@ -265,6 +267,65 @@ GSTexture* GSRendererHW::GetFeedbackOutput()
return t; return t;
} }
GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Source* tex)
{
if (m_userHacks_HPO <= 1 || GetUpscaleMultiplier() == 1) return GSVector4(0.0f);
GSVertex* v = &m_vertex.buff[0];
const GSVector2& scale = tex->m_texture->GetScale();
bool linear = m_vt.IsRealLinear();
int t_position = v[0].U;
GSVector4 half_offset(0.0f);
// FIXME Let's start with something wrong same mess on X and Y
// FIXME Maybe it will be enough to check linear
if (PRIM->FST) {
if (m_userHacks_HPO == 3) {
if (!linear && t_position == 8) {
half_offset.x = 8;
half_offset.y = 8;
} else if (linear && t_position == 16) {
half_offset.x = 16;
half_offset.y = 16;
} else if (m_vt.m_min.p.x == -0.5f) {
half_offset.x = 8;
half_offset.y = 8;
}
} else {
if (!linear && t_position == 8) {
half_offset.x = 8 - 8 / scale.x;
half_offset.y = 8 - 8 / scale.y;
} else if (linear && t_position == 16) {
half_offset.x = 16 - 16 / scale.x;
half_offset.y = 16 - 16 / scale.y;
} else if (m_vt.m_min.p.x == -0.5f) {
half_offset.x = 8;
half_offset.y = 8;
}
}
GL_INS("offset detected %f,%f t_pos %d (linear %d, scale %f)",
half_offset.x, half_offset.y, t_position, linear, scale.x);
} else if (m_vt.m_eq.q) {
float tw = (float)(1 << m_context->TEX0.TW);
float th = (float)(1 << m_context->TEX0.TH);
float q = v[0].RGBAQ.Q;
// Tales of Abyss
half_offset.x = 0.5f * q / tw;
half_offset.y = 0.5f * q / th;
GL_INS("ST offset detected %f,%f (linear %d, scale %f)",
half_offset.x, half_offset.y, linear, scale.x);
}
return half_offset;
}
void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r) void GSRendererHW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
{ {
// printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM); // printf("[%d] InvalidateVideoMem %d,%d - %d,%d %05x (%d)\n", (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP, (int)BITBLTBUF.DPSM);

View File

@ -151,6 +151,7 @@ protected:
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0; virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0;
int m_userhacks_round_sprite_offset; int m_userhacks_round_sprite_offset;
int m_userHacks_HPO;
bool m_channel_shuffle; bool m_channel_shuffle;
@ -165,6 +166,7 @@ public:
int GetUpscaleMultiplier(); int GetUpscaleMultiplier();
GSVector2i GetCustomResolution(); GSVector2i GetCustomResolution();
void SetScaling(); void SetScaling();
GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex);
void Reset(); void Reset();
void VSync(int field); void VSync(int field);

View File

@ -753,65 +753,6 @@ void GSRendererOGL::EmulateBlending(bool DATE_GL42)
} }
} }
void GSRendererOGL::RealignTargetTextureCoordinate(const GSTextureCache::Source* tex)
{
if (UserHacks_HPO <= 1 || GetUpscaleMultiplier() == 1) return;
GSVertex* v = &m_vertex.buff[0];
const GSVector2& scale = tex->m_texture->GetScale();
bool linear = m_vt.IsRealLinear();
int t_position = v[0].U;
GSVector4 half_offset(0.0f);
// FIXME Let's start with something wrong same mess on X and Y
// FIXME Maybe it will be enough to check linear
if (PRIM->FST) {
if (UserHacks_HPO == 3) {
if (!linear && t_position == 8) {
half_offset.x = 8;
half_offset.y = 8;
} else if (linear && t_position == 16) {
half_offset.x = 16;
half_offset.y = 16;
} else if (m_vt.m_min.p.x == -0.5f) {
half_offset.x = 8;
half_offset.y = 8;
}
} else {
if (!linear && t_position == 8) {
half_offset.x = 8 - 8 / scale.x;
half_offset.y = 8 - 8 / scale.y;
} else if (linear && t_position == 16) {
half_offset.x = 16 - 16 / scale.x;
half_offset.y = 16 - 16 / scale.y;
} else if (m_vt.m_min.p.x == -0.5f) {
half_offset.x = 8;
half_offset.y = 8;
}
}
GL_INS("offset detected %f,%f t_pos %d (linear %d, scale %f)",
half_offset.x, half_offset.y, t_position, linear, scale.x);
} else if (m_vt.m_eq.q) {
float tw = (float)(1 << m_context->TEX0.TW);
float th = (float)(1 << m_context->TEX0.TH);
float q = v[0].RGBAQ.Q;
// Tales of Abyss
half_offset.x = 0.5f * q / tw;
half_offset.y = 0.5f * q / th;
GL_INS("ST offset detected %f,%f (linear %d, scale %f)",
half_offset.x, half_offset.y, linear, scale.x);
}
vs_cb.TextureOffset = GSVector4(half_offset);
}
void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex) void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
{ {
GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; GSDeviceOGL* dev = (GSDeviceOGL*)m_dev;
@ -883,7 +824,7 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
// The purpose of texture shuffle is to move color channel. Extra interpolation is likely a bad idea. // The purpose of texture shuffle is to move color channel. Extra interpolation is likely a bad idea.
bilinear &= m_vt.IsLinear(); bilinear &= m_vt.IsLinear();
RealignTargetTextureCoordinate(tex); vs_cb.TextureOffset = RealignTargetTextureCoordinate(tex);
} else if (tex->m_target) { } else if (tex->m_target) {
// Use an old target. AEM and index aren't resolved it must be done // Use an old target. AEM and index aren't resolved it must be done
@ -933,7 +874,7 @@ void GSRendererOGL::EmulateTextureSampler(const GSTextureCache::Source* tex)
bilinear &= m_vt.IsLinear(); bilinear &= m_vt.IsLinear();
} }
RealignTargetTextureCoordinate(tex); vs_cb.TextureOffset = RealignTargetTextureCoordinate(tex);
} else if (tex->m_palette) { } else if (tex->m_palette) {
// Use a standard 8 bits texture. AEM is already done on the CLUT // Use a standard 8 bits texture. AEM is already done on the CLUT

View File

@ -77,7 +77,6 @@ class GSRendererOGL final : public GSRendererHW
inline void ResetStates(); inline void ResetStates();
inline void Lines2Sprites(); inline void Lines2Sprites();
inline void SetupIA(const float& sx, const float& sy); inline void SetupIA(const float& sx, const float& sy);
inline void RealignTargetTextureCoordinate(const GSTextureCache::Source* tex);
inline void EmulateTextureShuffleAndFbmask(); inline void EmulateTextureShuffleAndFbmask();
inline void EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::Source* tex); inline void EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::Source* tex);
inline void EmulateBlending(bool DATE_GL42); inline void EmulateBlending(bool DATE_GL42);

View File

@ -688,12 +688,8 @@ void GSHacksDlg::OnInit()
CheckDlgButton(m_hWnd, IDC_AUTO_FLUSH, theApp.GetConfigB("UserHacks_AutoFlush")); CheckDlgButton(m_hWnd, IDC_AUTO_FLUSH, theApp.GetConfigB("UserHacks_AutoFlush"));
CheckDlgButton(m_hWnd, IDC_UNSCALE_POINT_LINE, theApp.GetConfigB("UserHacks_unscale_point_line")); CheckDlgButton(m_hWnd, IDC_UNSCALE_POINT_LINE, theApp.GetConfigB("UserHacks_unscale_point_line"));
CheckDlgButton(m_hWnd, IDC_MEMORY_WRAPPING, theApp.GetConfigB("wrap_gs_mem")); CheckDlgButton(m_hWnd, IDC_MEMORY_WRAPPING, theApp.GetConfigB("wrap_gs_mem"));
std::vector<GSSetting> hpo_combobox = theApp.m_gs_offset_hack;
if (!ogl) ComboBoxInit(IDC_OFFSETHACK, theApp.m_gs_offset_hack, theApp.GetConfigI("UserHacks_HalfPixelOffset"));
{
hpo_combobox.erase(hpo_combobox.begin() + 2, hpo_combobox.begin() + 4);
}
ComboBoxInit(IDC_OFFSETHACK, hpo_combobox,theApp.GetConfigI("UserHacks_HalfPixelOffset"));
ComboBoxInit(IDC_ROUND_SPRITE, theApp.m_gs_hack, theApp.GetConfigI("UserHacks_round_sprite_offset")); ComboBoxInit(IDC_ROUND_SPRITE, theApp.m_gs_hack, theApp.GetConfigI("UserHacks_round_sprite_offset"));
ComboBoxInit(IDC_SPRITEHACK, theApp.m_gs_hack, theApp.GetConfigI("UserHacks_SpriteHack")); ComboBoxInit(IDC_SPRITEHACK, theApp.m_gs_hack, theApp.GetConfigI("UserHacks_SpriteHack"));
@ -720,6 +716,8 @@ void GSHacksDlg::OnInit()
EnableWindow(GetDlgItem(m_hWnd, IDC_ROUND_SPRITE), !native); EnableWindow(GetDlgItem(m_hWnd, IDC_ROUND_SPRITE), !native);
EnableWindow(GetDlgItem(m_hWnd, IDC_SPRITEHACK_TEXT), !native); EnableWindow(GetDlgItem(m_hWnd, IDC_SPRITEHACK_TEXT), !native);
EnableWindow(GetDlgItem(m_hWnd, IDC_ROUND_SPRITE_TEXT), !native); EnableWindow(GetDlgItem(m_hWnd, IDC_ROUND_SPRITE_TEXT), !native);
EnableWindow(GetDlgItem(m_hWnd, IDC_OFFSETHACK_TEXT), !native);
EnableWindow(GetDlgItem(m_hWnd, IDC_OFFSETHACK), !native);
AddTooltip(IDC_SKIPDRAWHACKEDIT); AddTooltip(IDC_SKIPDRAWHACKEDIT);
AddTooltip(IDC_SKIPDRAWHACK); AddTooltip(IDC_SKIPDRAWHACK);

View File

@ -178,11 +178,11 @@ void GSDevice11::SetupGS(GSSelector sel)
void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel)
{ {
hash_map<uint32, CComPtr<ID3D11PixelShader> >::const_iterator i = m_ps.find(sel); hash_map<uint64, CComPtr<ID3D11PixelShader> >::const_iterator i = m_ps.find(sel);
if(i == m_ps.end()) if(i == m_ps.end())
{ {
string str[20]; string str[21];
str[0] = format("%d", sel.fst); str[0] = format("%d", sel.fst);
str[1] = format("%d", sel.wms); str[1] = format("%d", sel.wms);
@ -204,6 +204,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe
str[17] = format("%d", sel.point_sampler); str[17] = format("%d", sel.point_sampler);
str[18] = format("%d", sel.shuffle); str[18] = format("%d", sel.shuffle);
str[19] = format("%d", sel.read_ba); str[19] = format("%d", sel.read_ba);
str[20] = format("%d", sel.fmt >> 2);
D3D_SHADER_MACRO macro[] = D3D_SHADER_MACRO macro[] =
{ {
@ -227,6 +228,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe
{"PS_POINT_SAMPLER", str[17].c_str()}, {"PS_POINT_SAMPLER", str[17].c_str()},
{"PS_SHUFFLE", str[18].c_str() }, {"PS_SHUFFLE", str[18].c_str() },
{"PS_READ_BA", str[19].c_str() }, {"PS_READ_BA", str[19].c_str() },
{"PS_PAL_FMT", str[20].c_str() },
{NULL, NULL}, {NULL, NULL},
}; };

View File

@ -133,11 +133,11 @@ void GSDevice9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSel
} }
} }
hash_map<uint32, CComPtr<IDirect3DPixelShader9> >::const_iterator i = m_ps.find(sel); hash_map<uint64, CComPtr<IDirect3DPixelShader9> >::const_iterator i = m_ps.find(sel);
if(i == m_ps.end()) if(i == m_ps.end())
{ {
string str[17]; string str[18];
str[0] = format("%d", sel.fst); str[0] = format("%d", sel.fst);
str[1] = format("%d", sel.wms); str[1] = format("%d", sel.wms);
@ -156,6 +156,7 @@ void GSDevice9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSel
str[14] = format("%d", sel.spritehack); str[14] = format("%d", sel.spritehack);
str[15] = format("%d", sel.tcoffsethack); str[15] = format("%d", sel.tcoffsethack);
str[16] = format("%d", sel.point_sampler); str[16] = format("%d", sel.point_sampler);
str[17] = format("%d", sel.fmt >> 2);
D3D_SHADER_MACRO macro[] = D3D_SHADER_MACRO macro[] =
{ {
@ -176,6 +177,7 @@ void GSDevice9::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSel
{"PS_SPRITEHACK", str[14].c_str()}, {"PS_SPRITEHACK", str[14].c_str()},
{"PS_TCOFFSETHACK", str[15].c_str()}, {"PS_TCOFFSETHACK", str[15].c_str()},
{"PS_POINT_SAMPLER", str[16].c_str()}, {"PS_POINT_SAMPLER", str[16].c_str()},
{"PS_PAL_FMT", str[17].c_str()},
{NULL, NULL}, {NULL, NULL},
}; };

View File

@ -2,7 +2,6 @@
#define FMT_32 0 #define FMT_32 0
#define FMT_24 1 #define FMT_24 1
#define FMT_16 2 #define FMT_16 2
#define FMT_PAL 4 /* flag bit */
// And I say this as an ATI user. // And I say this as an ATI user.
#define ATI_SUCKS 1 #define ATI_SUCKS 1
@ -41,6 +40,7 @@
#define PS_POINT_SAMPLER 0 #define PS_POINT_SAMPLER 0
#define PS_SHUFFLE 0 #define PS_SHUFFLE 0
#define PS_READ_BA 0 #define PS_READ_BA 0
#define PS_PAL_FMT 0
#endif #endif
struct VS_INPUT struct VS_INPUT
@ -91,7 +91,7 @@ cbuffer cb0
{ {
float4 VertexScale; float4 VertexScale;
float4 VertexOffset; float4 VertexOffset;
float2 TextureScale; float4 Texture_Scale_Offset;
}; };
cbuffer cb1 cbuffer cb1
@ -155,6 +155,7 @@ float4 sample_rt(float2 uv)
#define PS_LTF 0 #define PS_LTF 0
#define PS_COLCLIP 0 #define PS_COLCLIP 0
#define PS_DATE 0 #define PS_DATE 0
#define PS_PAL_FMT 0
#endif #endif
struct VS_INPUT struct VS_INPUT
@ -194,7 +195,7 @@ float4 vs_params[3];
#define VertexScale vs_params[0] #define VertexScale vs_params[0]
#define VertexOffset vs_params[1] #define VertexOffset vs_params[1]
#define TextureScale vs_params[2].xy #define Texture_Scale_Offset vs_params[2]
float4 ps_params[7]; float4 ps_params[7];
@ -225,7 +226,9 @@ float4 sample_rt(float2 uv)
#endif #endif
float4 wrapuv(float4 uv) #define PS_AEM_FMT (PS_FMT & 3)
float4 clamp_wrap_uv(float4 uv)
{ {
if(PS_WMS == PS_WMT) if(PS_WMS == PS_WMT)
{ {
@ -311,24 +314,6 @@ float4 wrapuv(float4 uv)
return uv; return uv;
} }
float2 clampuv(float2 uv)
{
if(PS_WMS == 2 && PS_WMT == 2)
{
uv = clamp(uv, MinF, MinMax.zw);
}
else if(PS_WMS == 2)
{
uv.x = clamp(uv.x, MinF.x, MinMax.z);
}
else if(PS_WMT == 2)
{
uv.y = clamp(uv.y, MinF.y, MinMax.w);
}
return uv;
}
float4x4 sample_4c(float4 uv) float4x4 sample_4c(float4 uv)
{ {
float4x4 c; float4x4 c;
@ -381,16 +366,10 @@ float4 sample(float2 st, float q)
float4x4 c; float4x4 c;
float2 dd; float2 dd;
/* if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2)
if(!PS_LTF && PS_FMT <= FMT_16 && PS_WMS < 2 && PS_WMT < 2)
{ {
c[0] = sample_c(st); c[0] = sample_c(st);
} }
*/
if (!PS_LTF && PS_FMT <= FMT_16 && PS_WMS < 3 && PS_WMT < 3)
{
c[0] = sample_c(clampuv(st));
}
else else
{ {
float4 uv; float4 uv;
@ -405,32 +384,29 @@ float4 sample(float2 st, float q)
uv = st.xyxy; uv = st.xyxy;
} }
uv = wrapuv(uv); uv = clamp_wrap_uv(uv);
if(PS_FMT & FMT_PAL) #if PS_PAL_FMT != 0
{
c = sample_4p(sample_4a(uv)); c = sample_4p(sample_4a(uv));
} #else
else
{
c = sample_4c(uv); c = sample_4c(uv);
} #endif
} }
[unroll] [unroll]
for (uint i = 0; i < 4; i++) for (uint i = 0; i < 4; i++)
{ {
if((PS_FMT & ~FMT_PAL) == FMT_32) if(PS_AEM_FMT == FMT_32)
{ {
#if SHADER_MODEL <= 0x300 #if SHADER_MODEL <= 0x300
if(PS_RT) c[i].a *= 128.0f / 255; if(PS_RT) c[i].a *= 128.0f / 255;
#endif #endif
} }
else if((PS_FMT & ~FMT_PAL) == FMT_24) else if(PS_AEM_FMT == FMT_24)
{ {
c[i].a = !PS_AEM || any(c[i].rgb) ? TA.x : 0; c[i].a = !PS_AEM || any(c[i].rgb) ? TA.x : 0;
} }
else if((PS_FMT & ~FMT_PAL) == FMT_16) else if(PS_AEM_FMT == FMT_16)
{ {
c[i].a = c[i].a >= 0.5 ? TA.y : !PS_AEM || any(c[i].rgb) ? TA.x : 0; c[i].a = c[i].a >= 0.5 ? TA.y : !PS_AEM || any(c[i].rgb) ? TA.x : 0;
} }
@ -631,12 +607,14 @@ VS_OUTPUT vs_main(VS_INPUT input)
{ {
if(VS_FST) if(VS_FST)
{ {
output.t.xy = input.uv * TextureScale; float2 uv = input.uv - Texture_Scale_Offset.zw;
output.t.w = 1.0f;
output.t.xy = uv * Texture_Scale_Offset.xy;
output.t.zw = uv;
} }
else else
{ {
output.t.xy = input.st; output.t.xy = input.st - Texture_Scale_Offset.zw;
output.t.w = input.q; output.t.w = input.q;
} }
} }
@ -808,14 +786,16 @@ VS_OUTPUT vs_main(VS_INPUT input)
if(VS_TME) if(VS_TME)
{ {
float2 t = input.t - Texture_Scale_Offset.zw;
if(VS_FST) if(VS_FST)
{ {
output.t.xy = input.t * TextureScale;
output.t.w = 1.0f; output.t.xy = t * Texture_Scale_Offset.xy;
output.t.zw = t;
} }
else else
{ {
output.t.xy = input.t; output.t.xy = t;
output.t.w = input.p.w; output.t.w = input.p.w;
} }
} }