Merge pull request #1306 from PCSX2/gsdx-direct-depth-sample

Gsdx direct depth sample
This commit is contained in:
Gregory Hainaut 2016-04-26 16:46:22 +02:00
commit c7f0a85d41
8 changed files with 352 additions and 32 deletions

View File

@ -777,6 +777,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
+ format("#define PS_WMT %d\n", sel.wmt)
+ format("#define PS_TEX_FMT %d\n", sel.tex_fmt)
+ format("#define PS_DFMT %d\n", sel.dfmt)
+ format("#define PS_DEPTH_FMT %d\n", sel.depth_fmt)
+ format("#define PS_AEM %d\n", sel.aem)
+ format("#define PS_TFX %d\n", sel.tfx)
+ format("#define PS_TCC %d\n", sel.tcc)

View File

@ -247,6 +247,7 @@ class GSDeviceOGL final : public GSDevice
// Format
uint32 tex_fmt:4;
uint32 dfmt:2;
uint32 depth_fmt:2;
// Alpha extension/Correction
uint32 aem:1;
uint32 fba:1;
@ -270,7 +271,7 @@ class GSDeviceOGL final : public GSDevice
uint32 write_rg:1;
uint32 fbmask:1;
uint32 _free1:2;
//uint32 _free1:0;
// *** Word 2
// Blend and Colclip

View File

@ -415,7 +415,7 @@ void GSRendererHW::Draw()
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());
tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
tex = tex_psm.depth ? m_tc->LookupDepthSource(context->TEX0, env.TEXA, r) : m_tc->LookupSource(context->TEX0, env.TEXA, r);
if(!tex) {
GL_POP();

View File

@ -873,16 +873,21 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
if (tex)
{
const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM];
// Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth.
//const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM];
const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM];
const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm;
bool bilinear = m_filter == 2 ? m_vt.IsLinear() : m_filter != 0;
bool simple_sample = !tex->m_palette && cpsm.fmt == 0 && m_context->CLAMP.WMS < 2 && m_context->CLAMP.WMT < 2;
bool simple_sample = !tex->m_palette && cpsm.fmt == 0 && m_context->CLAMP.WMS < 2 && m_context->CLAMP.WMT < 2 && !psm.depth;
// Don't force extra filtering on sprite (it creates various upscaling issue)
bilinear &= !((m_vt.m_primclass == GS_SPRITE_CLASS) && m_userhacks_round_sprite_offset && !m_vt.IsLinear());
ps_sel.wms = m_context->CLAMP.WMS;
ps_sel.wmt = m_context->CLAMP.WMT;
// Depth + bilinear filtering isn't done yet (And I'm not sure we need it anyway but a game will prove me wrong)
ASSERT(!(psm.depth && m_vt.IsLinear()));
// Performance note:
// 1/ Don't set 0 as it is the default value
// 2/ Only keep aem when it is useful (avoid useless shader permutation)
@ -892,6 +897,11 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
ps_sel.aem = m_env.TEXA.AEM;
ASSERT(tex->m_target);
// Require a float conversion if the texure is a depth otherwise uses Integral scaling
if (psm.depth) {
ps_sel.depth_fmt = (tex->m_texture->GetType() != GSTexture::DepthStencil) ? 3 : 1;
}
// Shuffle is a 16 bits format, so aem is always required
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
ta /= 255.0f;
@ -899,8 +909,8 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
ps_cb.TA_Af.x = ta.x;
ps_cb.TA_Af.y = ta.y;
// FIXME: it is likely a bad idea to do the bilinear interpolation here
// bilinear &= m_vt.IsLinear();
// The purpose of texture shuffle is to move color channel. Extra interpolation is likely a bad idea.
bilinear &= m_vt.IsLinear();
} else if (tex->m_target) {
// Use an old target. AEM and index aren't resolved it must be done
@ -935,6 +945,15 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
bilinear &= m_vt.IsLinear();
}
// Depth format
if (psm.depth) {
// Require a float conversion if the texure is a depth otherwise uses Integral scaling
ps_sel.depth_fmt = (tex->m_texture->GetType() != GSTexture::DepthStencil) ? 3 :
(psm.bpp == 16) ? 2 : 1;
// Don't force interpolation on depth format
bilinear &= m_vt.IsLinear();
}
} else if (tex->m_palette) {
// Use a standard 8 bits texture. AEM is already done on the CLUT
// Therefore you only need to set the index

View File

@ -85,6 +85,75 @@ void GSTextureCache::RemoveAll()
}
}
GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r)
{
if (!CanConvertDepth()) return NULL;
if(GSLocalMemory::m_psm[TEX0.PSM].pal > 0)
m_renderer->m_mem.m_clut.Read32(TEX0, TEXA);
Source* src = NULL;
Target* dst = NULL;
// Check only current frame, I guess it is only used as a postprocessing effect
uint32 bp = TEX0.TBP0;
uint32 psm = TEX0.PSM;
for(auto t : m_dst[DepthStencil]) {
if(!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
ASSERT(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth);
dst = t;
break;
}
}
if (!dst) {
// Retry on the render target (Silent Hill 4)
for(auto t : m_dst[RenderTarget]) {
if(!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
ASSERT(GSLocalMemory::m_psm[t->m_TEX0.PSM].depth);
dst = t;
break;
}
}
}
if (dst) {
GL_CACHE("TC depth: dst %s hit: %d (0x%x, F:0x%x)", to_string(dst->m_type),
dst->m_texture ? dst->m_texture->GetID() : 0,
TEX0.TBP0, TEX0.PSM);
// Create a shared texture source
src = new Source(m_renderer, TEX0, TEXA, m_temp, true);
src->m_texture = dst->m_texture;
src->m_shared_texture = true;
src->m_target = true; // So renderer can check if a conversion is required
src->m_32_bits_fmt = dst->m_32_bits_fmt;
// Insert the texture in the hash set to keep track of it. But don't bother with
// texture cache list. It means that a new Source is created everytime we need it.
// If it is too expensive, one could cut memory allocation in Source constructor for this
// use case.
m_src.m_surfaces.insert(src);
} else {
GL_CACHE("TC depth: ERROR miss (0x%x, F:0x%x)", TEX0.TBP0, TEX0.PSM);
// Possible ? In this case we could call LookupSource
// Or just put a basic texture
// src->m_texture = m_renderer->m_dev->CreateTexture(tw, th);
// In all cases rendering will be broken
//
// Note: might worth to check previous frame
// Note: otherwise return NULL and skip the draw
//ASSERT(0);
return LookupSource(TEX0, TEXA, r);
}
return src;
}
GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r)
{
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
@ -207,6 +276,14 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
}
}
// Pure depth texture format will be fetched by LookupDepthSource.
// However guess what, some games (GoW) read the depth as a standard
// color format (instead of a depth format). All pixels are scrambled
// (because color and depth don't have same location). They don't care
// pixel will be several draw calls later.
//
// Sigh... They don't help us.
if (dst == NULL && CanConvertDepth()) {
// Let's try a trick to avoid to use wrongly a depth buffer
// Unfortunately, I don't have any Arc the Lad testcase
@ -217,8 +294,15 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
if(!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM))
{
dst = t;
break;
GL_INS("TC: Warning depth format read as color format. Pixels will be scrambled");
//dst = t;
//break;
// Let's fetch a depth format texture. Rational, it will avoid the texture allocation and the
// rescaling of the current function.
GIFRegTEX0 depth_TEX0;
depth_TEX0.u32[0] = TEX0.u32[0] | (0x30u << 20u);
depth_TEX0.u32[1] = TEX0.u32[1];
return LookupDepthSource(depth_TEX0, TEXA, r);
}
}
}
@ -314,14 +398,16 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
dst = CreateTarget(TEX0, w, h, type);
dst->m_32_bits_fmt = t->m_32_bits_fmt;
int shader;
bool fmt_16_bits = (GSLocalMemory::m_psm[TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 16);
if (type == DepthStencil) {
GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, F:0x%x)", w, h, bp, TEX0.PSM);
int shader = ShaderConvert_RGBA8_TO_FLOAT32 + GSLocalMemory::m_psm[TEX0.PSM].fmt;
m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, shader, false);
GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, F:0x%x was F:0x%x)", w, h, bp, TEX0.PSM, t->m_TEX0.PSM);
shader = (fmt_16_bits) ? ShaderConvert_RGB5A1_TO_FLOAT16 : ShaderConvert_RGBA8_TO_FLOAT32 + GSLocalMemory::m_psm[TEX0.PSM].fmt;
} else {
GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, F:0x%x)", w, h, bp, TEX0.PSM);
m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, ShaderConvert_FLOAT32_TO_RGBA8, false);
GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, F:0x%x was F:0x%x)", w, h, bp, TEX0.PSM, t->m_TEX0.PSM);
shader = (fmt_16_bits) ? ShaderConvert_FLOAT16_TO_RGB5A1 : ShaderConvert_FLOAT32_TO_RGBA8;
}
m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, shader, false);
break;
}
@ -944,8 +1030,12 @@ void GSTextureCache::IncAge()
Source* s = *j;
if(++s->m_age > maxage)
{
if(s->m_shared_texture) {
// Shared textures are temporary only added in the hash set but not in the texture
// cache list therefore you can't use RemoveAt
m_src.m_surfaces.erase(s);
delete s;
} else if(++s->m_age > maxage) {
m_src.RemoveAt(s);
}
}
@ -1339,7 +1429,7 @@ void GSTextureCache::PrintMemoryUsage()
uint32 dss = 0;
for(hash_set<Source*>::iterator i = m_src.m_surfaces.begin(); i != m_src.m_surfaces.end(); i++) {
Source* s = *i;
if (s) {
if (s && !s->m_shared_texture) {
if (s->m_target)
tex_rt += s->m_texture->GetMemUsage();
else
@ -1370,13 +1460,17 @@ GSTextureCache::Surface::Surface(GSRenderer* r, uint8* temp)
, m_age(0)
, m_temp(temp)
, m_32_bits_fmt(false)
, m_shared_texture(false)
{
m_TEX0.TBP0 = 0x3fff;
}
GSTextureCache::Surface::~Surface()
{
m_renderer->m_dev->Recycle(m_texture);
// Shared textures are pointers copy. Therefore no allocation
// to recycle.
if (!m_shared_texture)
m_renderer->m_dev->Recycle(m_texture);
}
void GSTextureCache::Surface::Update()
@ -1386,7 +1480,7 @@ void GSTextureCache::Surface::Update()
// GSTextureCache::Source
GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp)
GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container)
: Surface(r, temp)
, m_palette(NULL)
, m_initpalette(true)
@ -1398,20 +1492,32 @@ GSTextureCache::Source::Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFR
m_TEX0 = TEX0;
m_TEXA = TEXA;
memset(m_valid, 0, sizeof(m_valid));
if (dummy_container) {
// Dummy container only contain a m_texture that is a pointer to another source.
m_clut = (uint32*)_aligned_malloc(256 * sizeof(uint32), 32);
m_write.rect = NULL;
m_write.count = 0;
memset(m_clut, 0, 256*sizeof(uint32));
m_clut = NULL;
m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 32);
m_write.count = 0;
m_repeating = false;
m_repeating = m_TEX0.IsRepeating();
} else {
memset(m_valid, 0, sizeof(m_valid));
if(m_repeating)
{
m_p2t = r->m_mem.GetPage2TileMap(m_TEX0);
m_clut = (uint32*)_aligned_malloc(256 * sizeof(uint32), 32);
memset(m_clut, 0, 256*sizeof(uint32));
m_write.rect = (GSVector4i*)_aligned_malloc(3 * sizeof(GSVector4i), 32);
m_write.count = 0;
m_repeating = m_TEX0.IsRepeating();
if(m_repeating)
{
m_p2t = r->m_mem.GetPage2TileMap(m_TEX0);
}
}
}

View File

@ -41,6 +41,7 @@ public:
int m_age;
uint8* m_temp;
bool m_32_bits_fmt; // Allow to detect the casting of 32 bits as 16 bits texture
bool m_shared_texture;
public:
Surface(GSRenderer* r, uint8* temp);
@ -68,7 +69,7 @@ public:
vector<GSVector2i>* m_p2t;
public:
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp);
Source(GSRenderer* r, const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, uint8* temp, bool dummy_container = false);
virtual ~Source();
virtual void Update(const GSVector4i& rect);
@ -141,6 +142,8 @@ public:
void RemovePartial();
Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r);
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used);
Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int real_h);

View File

@ -175,6 +175,94 @@ mat4 sample_4p(vec4 u)
return c;
}
//////////////////////////////////////////////////////////////////////
// Depth sampling
//////////////////////////////////////////////////////////////////////
vec4 fetch_c(ivec2 uv)
{
return texelFetch(TextureSampler, ivec2(uv), 0);
}
ivec2 clamp_wrap_uv_depth(ivec2 uv)
{
ivec2 uv_out = uv;
// Keep the full precision
// It allow to multiply the ScalingFactor before the 1/16 coeff
ivec4 mask = ivec4(MskFix) << 4;
#if PS_WMS == PS_WMT
#if PS_WMS == 2
uv_out = clamp(uv, mask.xy, mask.zw);
#elif PS_WMS == 3
uv_out = (uv & mask.xy) | mask.zw;
#endif
#else // PS_WMS != PS_WMT
#if PS_WMS == 2
uv_out.x = clamp(uv, mask.x, mask.z);
#elif PS_WMS == 3
uv_out.x = (uv.x & mask.x) | mask.z;
#endif
#if PS_WMT == 2
uv_out.y = clamp(uv, mask.y, mask.w);
#elif PS_WMT == 3
uv_out.y = (uv.y & mask.y) | mask.w;
#endif
#endif
return uv_out;
}
vec4 sample_depth(vec2 st)
{
vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(ScalingFactor.xy) * vec2(1.0f/16.0f);
ivec2 uv = ivec2(uv_f);
vec4 t;
#if PS_DEPTH_FMT == 1
// Based on ps_main11 of convert
// Convert a GL_FLOAT32 depth texture into a RGBA color texture
const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));
const vec4 bitMsk = vec4(0.0, 1.0/256.0, 1.0/256.0, 1.0/256.0);
vec4 res = fract(vec4(fetch_c(uv).r) * bitSh);
t = (res - res.xxyz * bitMsk) * 256.0f;
#elif PS_DEPTH_FMT == 2
// Based on ps_main12 of convert
// Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture
const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));
const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1);
uvec4 color = uvec4(vec4(fetch_c(uv).r) * bitSh) & bitMsk;
t = vec4(color) * vec4(8.0f, 8.0f, 8.0f, 128.0f);
#elif PS_DEPTH_FMT == 3
// Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture
t = fetch_c(uv) * 255.0f;
#endif
// warning t ranges from 0 to 255
#if (PS_AEM_FMT == FMT_24)
t.a = ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f;
#elif (PS_AEM_FMT == FMT_16)
t.a = t.a >= 128.0f ? 255.0f * TA.y : ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f;
#endif
return t;
}
//////////////////////////////////////////////////////////////////////
vec4 sample_color(vec2 st)
{
#if (PS_TCOFFSETHACK == 1)
@ -328,10 +416,17 @@ vec4 ps_color()
{
//FIXME: maybe we can set gl_Position.w = q in VS
#if (PS_FST == 0)
vec4 T = sample_color(PSin.t_float.xy / vec2(PSin.t_float.w));
vec2 st = PSin.t_float.xy / vec2(PSin.t_float.w);
#else
// Note xy are normalized coordinate
vec4 T = sample_color(PSin.t_int.xy);
vec2 st = PSin.t_int.xy;
#endif
#if (PS_DEPTH_FMT > 0)
// Integral coordinate
vec4 T = sample_depth(PSin.t_int.zw);
#else
vec4 T = sample_color(st);
#endif
#if PS_IIP == 1

View File

@ -1019,6 +1019,94 @@ static const char* const tfx_fs_all_glsl =
" return c;\n"
"}\n"
"\n"
"//////////////////////////////////////////////////////////////////////\n"
"// Depth sampling\n"
"//////////////////////////////////////////////////////////////////////\n"
"vec4 fetch_c(ivec2 uv)\n"
"{\n"
" return texelFetch(TextureSampler, ivec2(uv), 0);\n"
"}\n"
"\n"
"ivec2 clamp_wrap_uv_depth(ivec2 uv)\n"
"{\n"
" ivec2 uv_out = uv;\n"
"\n"
" // Keep the full precision\n"
" // It allow to multiply the ScalingFactor before the 1/16 coeff\n"
" ivec4 mask = ivec4(MskFix) << 4;\n"
"\n"
"#if PS_WMS == PS_WMT\n"
"\n"
"#if PS_WMS == 2\n"
" uv_out = clamp(uv, mask.xy, mask.zw);\n"
"#elif PS_WMS == 3\n"
" uv_out = (uv & mask.xy) | mask.zw;\n"
"#endif\n"
"\n"
"#else // PS_WMS != PS_WMT\n"
"\n"
"#if PS_WMS == 2\n"
" uv_out.x = clamp(uv, mask.x, mask.z);\n"
"#elif PS_WMS == 3\n"
" uv_out.x = (uv.x & mask.x) | mask.z;\n"
"#endif\n"
"\n"
"#if PS_WMT == 2\n"
" uv_out.y = clamp(uv, mask.y, mask.w);\n"
"#elif PS_WMT == 3\n"
" uv_out.y = (uv.y & mask.y) | mask.w;\n"
"#endif\n"
"\n"
"#endif\n"
"\n"
" return uv_out;\n"
"}\n"
"\n"
"vec4 sample_depth(vec2 st)\n"
"{\n"
" vec2 uv_f = vec2(clamp_wrap_uv_depth(ivec2(st))) * vec2(ScalingFactor.xy) * vec2(1.0f/16.0f);\n"
" ivec2 uv = ivec2(uv_f);\n"
"\n"
" vec4 t;\n"
"#if PS_DEPTH_FMT == 1\n"
" // Based on ps_main11 of convert\n"
"\n"
" // Convert a GL_FLOAT32 depth texture into a RGBA color texture\n"
" const vec4 bitSh = vec4(exp2(24.0f), exp2(16.0f), exp2(8.0f), exp2(0.0f));\n"
" const vec4 bitMsk = vec4(0.0, 1.0/256.0, 1.0/256.0, 1.0/256.0);\n"
"\n"
" vec4 res = fract(vec4(fetch_c(uv).r) * bitSh);\n"
"\n"
" t = (res - res.xxyz * bitMsk) * 256.0f;\n"
"\n"
"#elif PS_DEPTH_FMT == 2\n"
" // Based on ps_main12 of convert\n"
"\n"
" // Convert a GL_FLOAT32 (only 16 lsb) depth into a RGB5A1 color texture\n"
" const vec4 bitSh = vec4(exp2(32.0f), exp2(27.0f), exp2(22.0f), exp2(17.0f));\n"
" const uvec4 bitMsk = uvec4(0x1F, 0x1F, 0x1F, 0x1);\n"
" uvec4 color = uvec4(vec4(fetch_c(uv).r) * bitSh) & bitMsk;\n"
"\n"
" t = vec4(color) * vec4(8.0f, 8.0f, 8.0f, 128.0f);\n"
"\n"
"#elif PS_DEPTH_FMT == 3\n"
" // Convert a RGBA/RGB5A1 color texture into a RGBA/RGB5A1 color texture\n"
" t = fetch_c(uv) * 255.0f;\n"
"\n"
"#endif\n"
"\n"
" // warning t ranges from 0 to 255\n"
"#if (PS_AEM_FMT == FMT_24)\n"
" t.a = ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f;\n"
"#elif (PS_AEM_FMT == FMT_16)\n"
" t.a = t.a >= 128.0f ? 255.0f * TA.y : ( (PS_AEM == 0) || any(bvec3(t.rgb)) ) ? 255.0f * TA.x : 0.0f;\n"
"#endif\n"
"\n"
"\n"
" return t;\n"
"}\n"
"//////////////////////////////////////////////////////////////////////\n"
"\n"
"vec4 sample_color(vec2 st)\n"
"{\n"
"#if (PS_TCOFFSETHACK == 1)\n"
@ -1172,10 +1260,17 @@ static const char* const tfx_fs_all_glsl =
"{\n"
" //FIXME: maybe we can set gl_Position.w = q in VS\n"
"#if (PS_FST == 0)\n"
" vec4 T = sample_color(PSin.t_float.xy / vec2(PSin.t_float.w));\n"
" vec2 st = PSin.t_float.xy / vec2(PSin.t_float.w);\n"
"#else\n"
" // Note xy are normalized coordinate\n"
" vec4 T = sample_color(PSin.t_int.xy);\n"
" vec2 st = PSin.t_int.xy;\n"
"#endif\n"
"\n"
"#if (PS_DEPTH_FMT > 0)\n"
" // Integral coordinate\n"
" vec4 T = sample_depth(PSin.t_int.zw);\n"
"#else\n"
" vec4 T = sample_color(st);\n"
"#endif\n"
"\n"
"#if PS_IIP == 1\n"