diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 0fef4ec759..618d8f2a6b 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -651,8 +651,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel) std::string macro = format("#define PS_FST %d\n", sel.fst) + format("#define PS_WMS %d\n", sel.wms) + format("#define PS_WMT %d\n", sel.wmt) - + format("#define PS_FMT %d\n", sel.fmt) - + format("#define PS_IFMT %d\n", sel.ifmt) + + format("#define PS_TEX_FMT %d\n", sel.tex_fmt) + format("#define PS_DFMT %d\n", sel.dfmt) + format("#define PS_AEM %d\n", sel.aem) + format("#define PS_TFX %d\n", sel.tfx) @@ -812,30 +811,27 @@ void GSDeviceOGL::SelfShaderTest() PRINT_TEST("Tfx/Tcc"); // Test: Texture Sampling - for (int fmt = 0; fmt < 8; fmt++) { + for (int fmt = 0; fmt < 16; fmt++) { if ((fmt & 3) == 3) continue; for (int ltf = 0; ltf < 2; ltf++) { for (int aem = 0; aem < 2; aem++) { - for (int ifmt = 0; ifmt < 3; ifmt++) { - for (int wms = 1; wms < 4; wms++) { - for (int wmt = 1; wmt < 4; wmt++) { - PSSelector sel; - sel.atst = 1; - sel.tfx = 1; - sel.tcc = 1; - sel.fst = 1; + for (int wms = 1; wms < 4; wms++) { + for (int wmt = 1; wmt < 4; wmt++) { + PSSelector sel; + sel.atst = 1; + sel.tfx = 1; + sel.tcc = 1; + sel.fst = 1; - sel.ltf = ltf; - sel.aem = aem; - sel.fmt = fmt; - sel.ifmt = ifmt; - sel.wms = wms; - sel.wmt = wmt; - std::string file = format("Shader_Ltf_%d__Aem_%d__Fmt_%d__Ifmt_%d__Wms_%d__Wmt_%d.glsl.asm", - ltf, aem, fmt, ifmt, wms, wmt); - RUN_TEST; - } + sel.ltf = ltf; + sel.aem = aem; + sel.tex_fmt = fmt; + sel.wms = wms; + sel.wmt = wmt; + std::string file = format("Shader_Ltf_%d__Aem_%d__TFmt_%d__Wms_%d__Wmt_%d.glsl.asm", + ltf, aem, fmt, wms, wmt); + RUN_TEST; } } } diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 04bd033905..d170650dfa 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -250,8 +250,7 @@ class GSDeviceOGL : public GSDevice { // *** Word 1 // Format - uint32 fmt:3; - uint32 ifmt:2; + uint32 tex_fmt:4; uint32 dfmt:2; // Alpha extension/Correction uint32 aem:1; @@ -276,7 +275,7 @@ class GSDeviceOGL : public GSDevice uint32 write_rg:1; uint32 fbmask:1; - uint32 _free1:1; + uint32 _free1:2; // *** Word 2 // Blend and Colclip diff --git a/plugins/GSdx/GSRendererHW.cpp b/plugins/GSdx/GSRendererHW.cpp index 4efa2eb041..355aff723b 100644 --- a/plugins/GSdx/GSRendererHW.cpp +++ b/plugins/GSdx/GSRendererHW.cpp @@ -396,6 +396,7 @@ void GSRendererHW::Draw() return; } + // FIXME: Could be removed on openGL if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0) { m_mem.m_clut.Read32(context->TEX0, env.TEXA); diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 506498075f..bb3a8c7b0d 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -802,26 +802,62 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_sel.wms = m_context->CLAMP.WMS; ps_sel.wmt = m_context->CLAMP.WMT; + // Performance note: + // 1/ Don't set 0 as it is the default value + // 2/ Only keep aem when it is useful (avoid useless shader permutation) if (ps_sel.shuffle) { - ps_sel.fmt = 0; - } else if (tex->m_palette) { - ps_sel.fmt = cpsm.fmt | 4; - ps_sel.ifmt = !tex->m_target ? 0 - : (m_context->TEX0.PSM == PSM_PSMT4HL) ? 2 - : (m_context->TEX0.PSM == PSM_PSMT4HH) ? 1 - : 0; + // Force a 32 bits access (normally shuffle is done on 16 bits) + // ps_sel.tex_fmt = 0; // removed as an optimization + ps_sel.aem = m_env.TEXA.AEM; + ASSERT(tex->m_target); - // In standard mode palette is only used when alpha channel of the RT is - // reinterpreted as an index. Star Ocean 3 uses it to emulate a stencil buffer. - // It is a very bad idea to force bilinear filtering on it. - if (tex->m_target) + GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff()); + ps_cb.MinF_TA = ta.xyxy() / 255.0f; + + // FIXME: it is likely a bad idea to do the bilinear interpolation here + // bilinear &= m_vt.IsLinear(); + + } else if (tex->m_target) { + // Use an old target. AEM and index aren't resolved it must be done + // on the GPU + + // Select the 32/24/16 bits color (AEM) + ps_sel.tex_fmt = cpsm.fmt; + ps_sel.aem = m_env.TEXA.AEM; + + GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff()); + ps_cb.MinF_TA = ta.xyxy() / 255.0f; + + // Select the index format + if (tex->m_palette) { + // FIXME Potentially improve fmt field in GSLocalMemory + if (m_context->TEX0.PSM == PSM_PSMT4HL) + ps_sel.tex_fmt |= 1 << 2; + else if (m_context->TEX0.PSM == PSM_PSMT4HH) + ps_sel.tex_fmt |= 2 << 2; + else + ps_sel.tex_fmt |= 3 << 2; + + // Alpha channel of the RT is reinterpreted as an index. Star + // Ocean 3 uses it to emulate a stencil buffer. It is a very + // bad idea to force bilinear filtering on it. bilinear &= m_vt.IsLinear(); + } + + } else if (tex->m_palette) { + // Use a standard 8 bits texture. AEM is already done on the CLUT + // Therefore you only need to set the index + // ps_sel.tex_fmt = 0; // removed as an optimization + // ps_sel.aem = 0; // removed as an optimization + + // Note 4 bits indexes are converted to 8 bits + ps_sel.tex_fmt = 3 << 2; - //GL_INS("Use palette with format %d and index format %d", ps_sel.fmt, ps_sel.ifmt); } else { - ps_sel.fmt = cpsm.fmt; + // Standard texture. Both index and AEM expansion were already done by the CPU. + // ps_sel.tex_fmt = 0; // removed as an optimization + // ps_sel.aem = 0; // removed as an optimization } - ps_sel.aem = m_env.TEXA.AEM; if (m_context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) { // Micro optimization that reduces GPU load (removes 5 instructions on the FS program) @@ -856,8 +892,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_sel.tcoffsethack = !!UserHacks_TCOffset; ps_cb.TC_OH_TS = GSVector4(1/16.0f, 1/16.0f, UserHacks_TCO_x, UserHacks_TCO_y).xyxy() / WH.xyxy(); - GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff()); - ps_cb.MinF_TA = ta.xyxy() / WH.xyxy(GSVector4(255, 255)); // Only enable clamping in CLAMP mode. REGION_CLAMP will be done manually in the shader ps_ssel.tau = (m_context->CLAMP.WMS != CLAMP_CLAMP); diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index e8144130d2..c31c6213cd 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -22,9 +22,13 @@ #include "stdafx.h" #include "GSTextureCache.h" +bool s_IS_OPENGL = false; + GSTextureCache::GSTextureCache(GSRenderer* r) : m_renderer(r) { + s_IS_OPENGL = (theApp.GetConfig("Renderer", 12) == 12); + m_spritehack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_SpriteHack", 0) : 0; UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks", 0) && !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0); @@ -72,12 +76,18 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; //const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm; - GIFRegTEXA plainTEXA; + // Until DX is fixed + if (s_IS_OPENGL) { + if(psm.pal > 0) + m_renderer->m_mem.m_clut.Read32(TEX0, TEXA); + } else { + GIFRegTEXA plainTEXA; - plainTEXA.AEM = 1; - plainTEXA.TA0 = 0; - plainTEXA.TA1 = 0x80; - m_renderer->m_mem.m_clut.Read32(TEX0, plainTEXA); + plainTEXA.AEM = 1; + plainTEXA.TA0 = 0; + plainTEXA.TA1 = 0x80; + m_renderer->m_mem.m_clut.Read32(TEX0, plainTEXA); + } const uint32* clut = m_renderer->m_mem.m_clut; @@ -85,26 +95,27 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con list& m = m_src.m_map[TEX0.TBP0 >> 5]; + for(list::iterator i = m.begin(); i != m.end(); i++) { Source* s = *i; - if(((TEX0.u32[0] ^ s->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ s->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH - { + if (((TEX0.u32[0] ^ s->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ s->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH continue; - } - // Special check for palette texture (psm.pal > 0) - // - // if m_paltex is enabled - // 1/ s->m_palette must always be defined - // 2/ Clut is useless (will be uploaded again at the end of the function) - // - // if m_paltex is disabled - // 1/ Clut must match if m_palette is NULL - if(s->m_palette == NULL && psm.pal > 0 && !GSVector4i::compare64(clut, s->m_clut, psm.pal * sizeof(clut[0]))) - { - continue; + // Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check + if (!s->m_target) { + // We request a palette texture (psm.pal). If the texture was + // converted by the CPU (s->m_palette == NULL), we need to ensure + // palette content is the same. + // Note: content of the palette will be uploaded at the end of the function + if (psm.pal > 0 && s->m_palette == NULL && !GSVector4i::compare64(clut, s->m_clut, psm.pal * sizeof(clut[0]))) + continue; + + // We request a 24/16 bit RGBA texture. Alpha expansion was done by + // the CPU. We need to check that TEXA is identical + if (psm.pal == 0 && psm.fmt > 0 && s->m_TEXA.u64 != TEXA.u64) + continue; } m.splice(m.begin(), m, i); @@ -147,7 +158,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM; if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) { - if (!IsOpenGL() && (psm == PSM_PSMT8)) { + if (!s_IS_OPENGL && (psm == PSM_PSMT8)) { // OpenGL can convert the texture directly in the GPU. Not sure we want to keep this // code for DX. It fixes effect but it is slow (MGS3) @@ -324,7 +335,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int // // From a performance point of view, it might cost a little on big upscaling // but normally few RT are miss so it must remain reasonable. - if (IsOpenGL()) { + if (s_IS_OPENGL) { switch (type) { case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break; case DepthStencil: m_renderer->m_dev->ClearDepth(dst->m_texture, 0); break; @@ -863,7 +874,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // TODO: clean up this mess int shader = dst->m_type != RenderTarget ? ShaderConvert_FLOAT32_TO_RGBA8 : ShaderConvert_COPY; - bool is_8bits = TEX0.PSM == PSM_PSMT8 && IsOpenGL(); + bool is_8bits = TEX0.PSM == PSM_PSMT8 && s_IS_OPENGL; if (is_8bits) { GL_INS("Reading RT as a packed-indexed 8 bits format"); @@ -1417,9 +1428,14 @@ void GSTextureCache::Source::Flush(uint32 count) GIFRegTEXA plainTEXA; - plainTEXA.AEM = 1; - plainTEXA.TA0 = 0; - plainTEXA.TA1 = 0x80; + // Until DX is fixed + if (s_IS_OPENGL) { + plainTEXA = m_TEXA; + } else { + plainTEXA.AEM = 1; + plainTEXA.TA0 = 0; + plainTEXA.TA1 = 0x80; + } if(m_palette) { diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 98dc674f42..0c4cea8111 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -129,7 +129,6 @@ protected: #endif virtual bool CanConvertDepth() { return m_can_convert_depth; } - virtual bool IsOpenGL() { return false; } public: GSTextureCache(GSRenderer* r); diff --git a/plugins/GSdx/GSTextureCacheOGL.h b/plugins/GSdx/GSTextureCacheOGL.h index 4e241d4be4..f3f1216ae0 100644 --- a/plugins/GSdx/GSTextureCacheOGL.h +++ b/plugins/GSdx/GSTextureCacheOGL.h @@ -32,8 +32,6 @@ protected: void Read(Target* t, const GSVector4i& r); - virtual bool IsOpenGL() { return true; } - public: GSTextureCacheOGL(GSRenderer* r); }; diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index 754ffbf9b0..67c71fba46 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -6,7 +6,9 @@ #define FMT_32 0 #define FMT_24 1 #define FMT_16 2 -#define FMT_PAL 4 /* flag bit */ + +#define PS_PAL_FMT (PS_TEX_FMT >> 2) +#define PS_AEM_FMT (PS_TEX_FMT & 3) // APITRACE_DEBUG enables forced pixel output to easily detect // the fragment computed by primitive @@ -162,14 +164,14 @@ vec4 sample_4_index(vec4 uv) uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value -#if PS_IFMT == 1 - // 4HH - return vec4(i >> 4u) / 255.0f; - -#elif PS_IFMT == 2 - // 4HL +#if PS_PAL_FMT == 1 + // 4HL return vec4(i & 0xFu) / 255.0f; +#elif PS_PAL_FMT == 2 + // 4HH + return vec4(i >> 4u) / 255.0f; + #else // Most of texture will hit this code so keep normalized float value @@ -207,7 +209,7 @@ vec4 sample_color(vec2 st, float q) vec2 dd; // FIXME I'm not sure this condition is useful (I think code will be optimized) -#if (PS_LTF == 0 && PS_FMT == FMT_32 && PS_WMS < 2 && PS_WMT < 2) +#if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2) // No software LTF and pure 32 bits RGBA texure without special texture wrapping c[0] = sample_c(st); #ifdef TEX_COORD_DEBUG @@ -229,14 +231,12 @@ vec4 sample_color(vec2 st, float q) uv = clamp_wrap_uv(uv); - if((PS_FMT & FMT_PAL) != 0) - { - c = sample_4p(sample_4_index(uv)); - } - else - { - c = sample_4c(uv); - } +#if PS_PAL_FMT != 0 + c = sample_4p(sample_4_index(uv)); +#else + c = sample_4c(uv); +#endif + #ifdef TEX_COORD_DEBUG c[0].rg = uv.xy; c[1].rg = uv.xy; @@ -246,18 +246,17 @@ vec4 sample_color(vec2 st, float q) #endif - // PERF: see the impact of the exansion before/after the interpolation - for (int i = 0; i < 4; i++) - { - // PERF note: using dot product reduces by 1 the number of instruction - // but I'm not sure it is equivalent neither faster. + // PERF note: using dot product reduces by 1 the number of instruction + // but I'm not sure it is equivalent neither faster. + for (int i = 0; i < 4; i++) + { //float sum = dot(c[i].rgb, vec3(1.0f)); -#if ((PS_FMT & ~FMT_PAL) == FMT_24) - c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f; - //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f; -#elif ((PS_FMT & ~FMT_PAL) == FMT_16) - c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f; - //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f; +#if (PS_AEM_FMT == FMT_24) + c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f; + //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f; +#elif (PS_AEM_FMT == FMT_16) + c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f; + //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f; #endif } diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index fc9ecc217b..ccffb134a8 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -910,7 +910,9 @@ static const char* tfx_fs_all_glsl = "#define FMT_32 0\n" "#define FMT_24 1\n" "#define FMT_16 2\n" - "#define FMT_PAL 4 /* flag bit */\n" + "\n" + "#define PS_PAL_FMT (PS_TEX_FMT >> 2)\n" + "#define PS_AEM_FMT (PS_TEX_FMT & 3)\n" "\n" "// APITRACE_DEBUG enables forced pixel output to easily detect\n" "// the fragment computed by primitive\n" @@ -1066,14 +1068,14 @@ static const char* tfx_fs_all_glsl = "\n" " uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value\n" "\n" - "#if PS_IFMT == 1\n" - " // 4HH\n" - " return vec4(i >> 4u) / 255.0f;\n" - "\n" - "#elif PS_IFMT == 2\n" - " // 4HL\n" + "#if PS_PAL_FMT == 1\n" + " // 4HL\n" " return vec4(i & 0xFu) / 255.0f;\n" "\n" + "#elif PS_PAL_FMT == 2\n" + " // 4HH\n" + " return vec4(i >> 4u) / 255.0f;\n" + "\n" "#else\n" " // Most of texture will hit this code so keep normalized float value\n" "\n" @@ -1111,7 +1113,7 @@ static const char* tfx_fs_all_glsl = " vec2 dd;\n" "\n" " // FIXME I'm not sure this condition is useful (I think code will be optimized)\n" - "#if (PS_LTF == 0 && PS_FMT == FMT_32 && PS_WMS < 2 && PS_WMT < 2)\n" + "#if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2)\n" " // No software LTF and pure 32 bits RGBA texure without special texture wrapping\n" " c[0] = sample_c(st);\n" "#ifdef TEX_COORD_DEBUG\n" @@ -1133,14 +1135,12 @@ static const char* tfx_fs_all_glsl = "\n" " uv = clamp_wrap_uv(uv);\n" "\n" - " if((PS_FMT & FMT_PAL) != 0)\n" - " {\n" - " c = sample_4p(sample_4_index(uv));\n" - " }\n" - " else\n" - " {\n" - " c = sample_4c(uv);\n" - " }\n" + "#if PS_PAL_FMT != 0\n" + " c = sample_4p(sample_4_index(uv));\n" + "#else\n" + " c = sample_4c(uv);\n" + "#endif\n" + "\n" "#ifdef TEX_COORD_DEBUG\n" " c[0].rg = uv.xy;\n" " c[1].rg = uv.xy;\n" @@ -1150,18 +1150,17 @@ static const char* tfx_fs_all_glsl = "\n" "#endif\n" "\n" - " // PERF: see the impact of the exansion before/after the interpolation\n" - " for (int i = 0; i < 4; i++)\n" - " {\n" - " // PERF note: using dot product reduces by 1 the number of instruction\n" - " // but I'm not sure it is equivalent neither faster.\n" + " // PERF note: using dot product reduces by 1 the number of instruction\n" + " // but I'm not sure it is equivalent neither faster.\n" + " for (int i = 0; i < 4; i++)\n" + " {\n" " //float sum = dot(c[i].rgb, vec3(1.0f));\n" - "#if ((PS_FMT & ~FMT_PAL) == FMT_24)\n" - " c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n" - " //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n" - "#elif ((PS_FMT & ~FMT_PAL) == FMT_16)\n" - " c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n" - " //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n" + "#if (PS_AEM_FMT == FMT_24)\n" + " c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n" + " //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n" + "#elif (PS_AEM_FMT == FMT_16)\n" + " c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n" + " //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n" "#endif\n" " }\n" "\n"