mirror of https://github.com/PCSX2/pcsx2.git
Merge pull request #763 from PCSX2/gsdx-texture-format
Gsdx improves texture format setup
This commit is contained in:
commit
0a7eed686c
|
@ -651,8 +651,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
|
|||
std::string macro = format("#define PS_FST %d\n", sel.fst)
|
||||
+ format("#define PS_WMS %d\n", sel.wms)
|
||||
+ format("#define PS_WMT %d\n", sel.wmt)
|
||||
+ format("#define PS_FMT %d\n", sel.fmt)
|
||||
+ format("#define PS_IFMT %d\n", sel.ifmt)
|
||||
+ format("#define PS_TEX_FMT %d\n", sel.tex_fmt)
|
||||
+ format("#define PS_DFMT %d\n", sel.dfmt)
|
||||
+ format("#define PS_AEM %d\n", sel.aem)
|
||||
+ format("#define PS_TFX %d\n", sel.tfx)
|
||||
|
@ -812,30 +811,27 @@ void GSDeviceOGL::SelfShaderTest()
|
|||
PRINT_TEST("Tfx/Tcc");
|
||||
|
||||
// Test: Texture Sampling
|
||||
for (int fmt = 0; fmt < 8; fmt++) {
|
||||
for (int fmt = 0; fmt < 16; fmt++) {
|
||||
if ((fmt & 3) == 3) continue;
|
||||
|
||||
for (int ltf = 0; ltf < 2; ltf++) {
|
||||
for (int aem = 0; aem < 2; aem++) {
|
||||
for (int ifmt = 0; ifmt < 3; ifmt++) {
|
||||
for (int wms = 1; wms < 4; wms++) {
|
||||
for (int wmt = 1; wmt < 4; wmt++) {
|
||||
PSSelector sel;
|
||||
sel.atst = 1;
|
||||
sel.tfx = 1;
|
||||
sel.tcc = 1;
|
||||
sel.fst = 1;
|
||||
for (int wms = 1; wms < 4; wms++) {
|
||||
for (int wmt = 1; wmt < 4; wmt++) {
|
||||
PSSelector sel;
|
||||
sel.atst = 1;
|
||||
sel.tfx = 1;
|
||||
sel.tcc = 1;
|
||||
sel.fst = 1;
|
||||
|
||||
sel.ltf = ltf;
|
||||
sel.aem = aem;
|
||||
sel.fmt = fmt;
|
||||
sel.ifmt = ifmt;
|
||||
sel.wms = wms;
|
||||
sel.wmt = wmt;
|
||||
std::string file = format("Shader_Ltf_%d__Aem_%d__Fmt_%d__Ifmt_%d__Wms_%d__Wmt_%d.glsl.asm",
|
||||
ltf, aem, fmt, ifmt, wms, wmt);
|
||||
RUN_TEST;
|
||||
}
|
||||
sel.ltf = ltf;
|
||||
sel.aem = aem;
|
||||
sel.tex_fmt = fmt;
|
||||
sel.wms = wms;
|
||||
sel.wmt = wmt;
|
||||
std::string file = format("Shader_Ltf_%d__Aem_%d__TFmt_%d__Wms_%d__Wmt_%d.glsl.asm",
|
||||
ltf, aem, fmt, wms, wmt);
|
||||
RUN_TEST;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -250,8 +250,7 @@ class GSDeviceOGL : public GSDevice
|
|||
{
|
||||
// *** Word 1
|
||||
// Format
|
||||
uint32 fmt:3;
|
||||
uint32 ifmt:2;
|
||||
uint32 tex_fmt:4;
|
||||
uint32 dfmt:2;
|
||||
// Alpha extension/Correction
|
||||
uint32 aem:1;
|
||||
|
@ -276,7 +275,7 @@ class GSDeviceOGL : public GSDevice
|
|||
uint32 write_rg:1;
|
||||
uint32 fbmask:1;
|
||||
|
||||
uint32 _free1:1;
|
||||
uint32 _free1:2;
|
||||
|
||||
// *** Word 2
|
||||
// Blend and Colclip
|
||||
|
|
|
@ -396,6 +396,7 @@ void GSRendererHW::Draw()
|
|||
return;
|
||||
}
|
||||
|
||||
// FIXME: Could be removed on openGL
|
||||
if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
|
||||
{
|
||||
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
|
||||
|
|
|
@ -802,26 +802,62 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
ps_sel.wms = m_context->CLAMP.WMS;
|
||||
ps_sel.wmt = m_context->CLAMP.WMT;
|
||||
|
||||
// Performance note:
|
||||
// 1/ Don't set 0 as it is the default value
|
||||
// 2/ Only keep aem when it is useful (avoid useless shader permutation)
|
||||
if (ps_sel.shuffle) {
|
||||
ps_sel.fmt = 0;
|
||||
} else if (tex->m_palette) {
|
||||
ps_sel.fmt = cpsm.fmt | 4;
|
||||
ps_sel.ifmt = !tex->m_target ? 0
|
||||
: (m_context->TEX0.PSM == PSM_PSMT4HL) ? 2
|
||||
: (m_context->TEX0.PSM == PSM_PSMT4HH) ? 1
|
||||
: 0;
|
||||
// Force a 32 bits access (normally shuffle is done on 16 bits)
|
||||
// ps_sel.tex_fmt = 0; // removed as an optimization
|
||||
ps_sel.aem = m_env.TEXA.AEM;
|
||||
ASSERT(tex->m_target);
|
||||
|
||||
// In standard mode palette is only used when alpha channel of the RT is
|
||||
// reinterpreted as an index. Star Ocean 3 uses it to emulate a stencil buffer.
|
||||
// It is a very bad idea to force bilinear filtering on it.
|
||||
if (tex->m_target)
|
||||
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
|
||||
ps_cb.MinF_TA = ta.xyxy() / 255.0f;
|
||||
|
||||
// FIXME: it is likely a bad idea to do the bilinear interpolation here
|
||||
// bilinear &= m_vt.IsLinear();
|
||||
|
||||
} else if (tex->m_target) {
|
||||
// Use an old target. AEM and index aren't resolved it must be done
|
||||
// on the GPU
|
||||
|
||||
// Select the 32/24/16 bits color (AEM)
|
||||
ps_sel.tex_fmt = cpsm.fmt;
|
||||
ps_sel.aem = m_env.TEXA.AEM;
|
||||
|
||||
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
|
||||
ps_cb.MinF_TA = ta.xyxy() / 255.0f;
|
||||
|
||||
// Select the index format
|
||||
if (tex->m_palette) {
|
||||
// FIXME Potentially improve fmt field in GSLocalMemory
|
||||
if (m_context->TEX0.PSM == PSM_PSMT4HL)
|
||||
ps_sel.tex_fmt |= 1 << 2;
|
||||
else if (m_context->TEX0.PSM == PSM_PSMT4HH)
|
||||
ps_sel.tex_fmt |= 2 << 2;
|
||||
else
|
||||
ps_sel.tex_fmt |= 3 << 2;
|
||||
|
||||
// Alpha channel of the RT is reinterpreted as an index. Star
|
||||
// Ocean 3 uses it to emulate a stencil buffer. It is a very
|
||||
// bad idea to force bilinear filtering on it.
|
||||
bilinear &= m_vt.IsLinear();
|
||||
}
|
||||
|
||||
} else if (tex->m_palette) {
|
||||
// Use a standard 8 bits texture. AEM is already done on the CLUT
|
||||
// Therefore you only need to set the index
|
||||
// ps_sel.tex_fmt = 0; // removed as an optimization
|
||||
// ps_sel.aem = 0; // removed as an optimization
|
||||
|
||||
// Note 4 bits indexes are converted to 8 bits
|
||||
ps_sel.tex_fmt = 3 << 2;
|
||||
|
||||
//GL_INS("Use palette with format %d and index format %d", ps_sel.fmt, ps_sel.ifmt);
|
||||
} else {
|
||||
ps_sel.fmt = cpsm.fmt;
|
||||
// Standard texture. Both index and AEM expansion were already done by the CPU.
|
||||
// ps_sel.tex_fmt = 0; // removed as an optimization
|
||||
// ps_sel.aem = 0; // removed as an optimization
|
||||
}
|
||||
ps_sel.aem = m_env.TEXA.AEM;
|
||||
|
||||
if (m_context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) {
|
||||
// Micro optimization that reduces GPU load (removes 5 instructions on the FS program)
|
||||
|
@ -856,8 +892,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
ps_sel.tcoffsethack = !!UserHacks_TCOffset;
|
||||
ps_cb.TC_OH_TS = GSVector4(1/16.0f, 1/16.0f, UserHacks_TCO_x, UserHacks_TCO_y).xyxy() / WH.xyxy();
|
||||
|
||||
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
|
||||
ps_cb.MinF_TA = ta.xyxy() / WH.xyxy(GSVector4(255, 255));
|
||||
|
||||
// Only enable clamping in CLAMP mode. REGION_CLAMP will be done manually in the shader
|
||||
ps_ssel.tau = (m_context->CLAMP.WMS != CLAMP_CLAMP);
|
||||
|
|
|
@ -22,9 +22,13 @@
|
|||
#include "stdafx.h"
|
||||
#include "GSTextureCache.h"
|
||||
|
||||
bool s_IS_OPENGL = false;
|
||||
|
||||
GSTextureCache::GSTextureCache(GSRenderer* r)
|
||||
: m_renderer(r)
|
||||
{
|
||||
s_IS_OPENGL = (theApp.GetConfig("Renderer", 12) == 12);
|
||||
|
||||
m_spritehack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_SpriteHack", 0) : 0;
|
||||
UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks", 0) && !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0);
|
||||
|
||||
|
@ -72,12 +76,18 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||
//const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm;
|
||||
|
||||
GIFRegTEXA plainTEXA;
|
||||
// Until DX is fixed
|
||||
if (s_IS_OPENGL) {
|
||||
if(psm.pal > 0)
|
||||
m_renderer->m_mem.m_clut.Read32(TEX0, TEXA);
|
||||
} else {
|
||||
GIFRegTEXA plainTEXA;
|
||||
|
||||
plainTEXA.AEM = 1;
|
||||
plainTEXA.TA0 = 0;
|
||||
plainTEXA.TA1 = 0x80;
|
||||
m_renderer->m_mem.m_clut.Read32(TEX0, plainTEXA);
|
||||
plainTEXA.AEM = 1;
|
||||
plainTEXA.TA0 = 0;
|
||||
plainTEXA.TA1 = 0x80;
|
||||
m_renderer->m_mem.m_clut.Read32(TEX0, plainTEXA);
|
||||
}
|
||||
|
||||
const uint32* clut = m_renderer->m_mem.m_clut;
|
||||
|
||||
|
@ -85,26 +95,27 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
|
||||
list<Source*>& m = m_src.m_map[TEX0.TBP0 >> 5];
|
||||
|
||||
|
||||
for(list<Source*>::iterator i = m.begin(); i != m.end(); i++)
|
||||
{
|
||||
Source* s = *i;
|
||||
|
||||
if(((TEX0.u32[0] ^ s->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ s->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
|
||||
{
|
||||
if (((TEX0.u32[0] ^ s->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ s->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
|
||||
continue;
|
||||
}
|
||||
|
||||
// Special check for palette texture (psm.pal > 0)
|
||||
//
|
||||
// if m_paltex is enabled
|
||||
// 1/ s->m_palette must always be defined
|
||||
// 2/ Clut is useless (will be uploaded again at the end of the function)
|
||||
//
|
||||
// if m_paltex is disabled
|
||||
// 1/ Clut must match if m_palette is NULL
|
||||
if(s->m_palette == NULL && psm.pal > 0 && !GSVector4i::compare64(clut, s->m_clut, psm.pal * sizeof(clut[0])))
|
||||
{
|
||||
continue;
|
||||
// Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check
|
||||
if (!s->m_target) {
|
||||
// We request a palette texture (psm.pal). If the texture was
|
||||
// converted by the CPU (s->m_palette == NULL), we need to ensure
|
||||
// palette content is the same.
|
||||
// Note: content of the palette will be uploaded at the end of the function
|
||||
if (psm.pal > 0 && s->m_palette == NULL && !GSVector4i::compare64(clut, s->m_clut, psm.pal * sizeof(clut[0])))
|
||||
continue;
|
||||
|
||||
// We request a 24/16 bit RGBA texture. Alpha expansion was done by
|
||||
// the CPU. We need to check that TEXA is identical
|
||||
if (psm.pal == 0 && psm.fmt > 0 && s->m_TEXA.u64 != TEXA.u64)
|
||||
continue;
|
||||
}
|
||||
|
||||
m.splice(m.begin(), m, i);
|
||||
|
@ -147,7 +158,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
|||
uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM;
|
||||
|
||||
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) {
|
||||
if (!IsOpenGL() && (psm == PSM_PSMT8)) {
|
||||
if (!s_IS_OPENGL && (psm == PSM_PSMT8)) {
|
||||
// OpenGL can convert the texture directly in the GPU. Not sure we want to keep this
|
||||
// code for DX. It fixes effect but it is slow (MGS3)
|
||||
|
||||
|
@ -324,7 +335,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
|
|||
//
|
||||
// From a performance point of view, it might cost a little on big upscaling
|
||||
// but normally few RT are miss so it must remain reasonable.
|
||||
if (IsOpenGL()) {
|
||||
if (s_IS_OPENGL) {
|
||||
switch (type) {
|
||||
case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break;
|
||||
case DepthStencil: m_renderer->m_dev->ClearDepth(dst->m_texture, 0); break;
|
||||
|
@ -863,7 +874,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
|
|||
// TODO: clean up this mess
|
||||
|
||||
int shader = dst->m_type != RenderTarget ? ShaderConvert_FLOAT32_TO_RGBA8 : ShaderConvert_COPY;
|
||||
bool is_8bits = TEX0.PSM == PSM_PSMT8 && IsOpenGL();
|
||||
bool is_8bits = TEX0.PSM == PSM_PSMT8 && s_IS_OPENGL;
|
||||
|
||||
if (is_8bits) {
|
||||
GL_INS("Reading RT as a packed-indexed 8 bits format");
|
||||
|
@ -1417,9 +1428,14 @@ void GSTextureCache::Source::Flush(uint32 count)
|
|||
|
||||
GIFRegTEXA plainTEXA;
|
||||
|
||||
plainTEXA.AEM = 1;
|
||||
plainTEXA.TA0 = 0;
|
||||
plainTEXA.TA1 = 0x80;
|
||||
// Until DX is fixed
|
||||
if (s_IS_OPENGL) {
|
||||
plainTEXA = m_TEXA;
|
||||
} else {
|
||||
plainTEXA.AEM = 1;
|
||||
plainTEXA.TA0 = 0;
|
||||
plainTEXA.TA1 = 0x80;
|
||||
}
|
||||
|
||||
if(m_palette)
|
||||
{
|
||||
|
|
|
@ -129,7 +129,6 @@ protected:
|
|||
#endif
|
||||
|
||||
virtual bool CanConvertDepth() { return m_can_convert_depth; }
|
||||
virtual bool IsOpenGL() { return false; }
|
||||
|
||||
public:
|
||||
GSTextureCache(GSRenderer* r);
|
||||
|
|
|
@ -32,8 +32,6 @@ protected:
|
|||
|
||||
void Read(Target* t, const GSVector4i& r);
|
||||
|
||||
virtual bool IsOpenGL() { return true; }
|
||||
|
||||
public:
|
||||
GSTextureCacheOGL(GSRenderer* r);
|
||||
};
|
||||
|
|
|
@ -6,7 +6,9 @@
|
|||
#define FMT_32 0
|
||||
#define FMT_24 1
|
||||
#define FMT_16 2
|
||||
#define FMT_PAL 4 /* flag bit */
|
||||
|
||||
#define PS_PAL_FMT (PS_TEX_FMT >> 2)
|
||||
#define PS_AEM_FMT (PS_TEX_FMT & 3)
|
||||
|
||||
// APITRACE_DEBUG enables forced pixel output to easily detect
|
||||
// the fragment computed by primitive
|
||||
|
@ -162,14 +164,14 @@ vec4 sample_4_index(vec4 uv)
|
|||
|
||||
uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value
|
||||
|
||||
#if PS_IFMT == 1
|
||||
// 4HH
|
||||
return vec4(i >> 4u) / 255.0f;
|
||||
|
||||
#elif PS_IFMT == 2
|
||||
// 4HL
|
||||
#if PS_PAL_FMT == 1
|
||||
// 4HL
|
||||
return vec4(i & 0xFu) / 255.0f;
|
||||
|
||||
#elif PS_PAL_FMT == 2
|
||||
// 4HH
|
||||
return vec4(i >> 4u) / 255.0f;
|
||||
|
||||
#else
|
||||
// Most of texture will hit this code so keep normalized float value
|
||||
|
||||
|
@ -207,7 +209,7 @@ vec4 sample_color(vec2 st, float q)
|
|||
vec2 dd;
|
||||
|
||||
// FIXME I'm not sure this condition is useful (I think code will be optimized)
|
||||
#if (PS_LTF == 0 && PS_FMT == FMT_32 && PS_WMS < 2 && PS_WMT < 2)
|
||||
#if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2)
|
||||
// No software LTF and pure 32 bits RGBA texure without special texture wrapping
|
||||
c[0] = sample_c(st);
|
||||
#ifdef TEX_COORD_DEBUG
|
||||
|
@ -229,14 +231,12 @@ vec4 sample_color(vec2 st, float q)
|
|||
|
||||
uv = clamp_wrap_uv(uv);
|
||||
|
||||
if((PS_FMT & FMT_PAL) != 0)
|
||||
{
|
||||
c = sample_4p(sample_4_index(uv));
|
||||
}
|
||||
else
|
||||
{
|
||||
c = sample_4c(uv);
|
||||
}
|
||||
#if PS_PAL_FMT != 0
|
||||
c = sample_4p(sample_4_index(uv));
|
||||
#else
|
||||
c = sample_4c(uv);
|
||||
#endif
|
||||
|
||||
#ifdef TEX_COORD_DEBUG
|
||||
c[0].rg = uv.xy;
|
||||
c[1].rg = uv.xy;
|
||||
|
@ -246,18 +246,17 @@ vec4 sample_color(vec2 st, float q)
|
|||
|
||||
#endif
|
||||
|
||||
// PERF: see the impact of the exansion before/after the interpolation
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
// PERF note: using dot product reduces by 1 the number of instruction
|
||||
// but I'm not sure it is equivalent neither faster.
|
||||
// PERF note: using dot product reduces by 1 the number of instruction
|
||||
// but I'm not sure it is equivalent neither faster.
|
||||
for (int i = 0; i < 4; i++)
|
||||
{
|
||||
//float sum = dot(c[i].rgb, vec3(1.0f));
|
||||
#if ((PS_FMT & ~FMT_PAL) == FMT_24)
|
||||
c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
|
||||
//c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
|
||||
#elif ((PS_FMT & ~FMT_PAL) == FMT_16)
|
||||
c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
|
||||
//c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
|
||||
#if (PS_AEM_FMT == FMT_24)
|
||||
c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
|
||||
//c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
|
||||
#elif (PS_AEM_FMT == FMT_16)
|
||||
c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
|
||||
//c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -910,7 +910,9 @@ static const char* tfx_fs_all_glsl =
|
|||
"#define FMT_32 0\n"
|
||||
"#define FMT_24 1\n"
|
||||
"#define FMT_16 2\n"
|
||||
"#define FMT_PAL 4 /* flag bit */\n"
|
||||
"\n"
|
||||
"#define PS_PAL_FMT (PS_TEX_FMT >> 2)\n"
|
||||
"#define PS_AEM_FMT (PS_TEX_FMT & 3)\n"
|
||||
"\n"
|
||||
"// APITRACE_DEBUG enables forced pixel output to easily detect\n"
|
||||
"// the fragment computed by primitive\n"
|
||||
|
@ -1066,14 +1068,14 @@ static const char* tfx_fs_all_glsl =
|
|||
"\n"
|
||||
" uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value\n"
|
||||
"\n"
|
||||
"#if PS_IFMT == 1\n"
|
||||
" // 4HH\n"
|
||||
" return vec4(i >> 4u) / 255.0f;\n"
|
||||
"\n"
|
||||
"#elif PS_IFMT == 2\n"
|
||||
" // 4HL\n"
|
||||
"#if PS_PAL_FMT == 1\n"
|
||||
" // 4HL\n"
|
||||
" return vec4(i & 0xFu) / 255.0f;\n"
|
||||
"\n"
|
||||
"#elif PS_PAL_FMT == 2\n"
|
||||
" // 4HH\n"
|
||||
" return vec4(i >> 4u) / 255.0f;\n"
|
||||
"\n"
|
||||
"#else\n"
|
||||
" // Most of texture will hit this code so keep normalized float value\n"
|
||||
"\n"
|
||||
|
@ -1111,7 +1113,7 @@ static const char* tfx_fs_all_glsl =
|
|||
" vec2 dd;\n"
|
||||
"\n"
|
||||
" // FIXME I'm not sure this condition is useful (I think code will be optimized)\n"
|
||||
"#if (PS_LTF == 0 && PS_FMT == FMT_32 && PS_WMS < 2 && PS_WMT < 2)\n"
|
||||
"#if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2)\n"
|
||||
" // No software LTF and pure 32 bits RGBA texure without special texture wrapping\n"
|
||||
" c[0] = sample_c(st);\n"
|
||||
"#ifdef TEX_COORD_DEBUG\n"
|
||||
|
@ -1133,14 +1135,12 @@ static const char* tfx_fs_all_glsl =
|
|||
"\n"
|
||||
" uv = clamp_wrap_uv(uv);\n"
|
||||
"\n"
|
||||
" if((PS_FMT & FMT_PAL) != 0)\n"
|
||||
" {\n"
|
||||
" c = sample_4p(sample_4_index(uv));\n"
|
||||
" }\n"
|
||||
" else\n"
|
||||
" {\n"
|
||||
" c = sample_4c(uv);\n"
|
||||
" }\n"
|
||||
"#if PS_PAL_FMT != 0\n"
|
||||
" c = sample_4p(sample_4_index(uv));\n"
|
||||
"#else\n"
|
||||
" c = sample_4c(uv);\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
"#ifdef TEX_COORD_DEBUG\n"
|
||||
" c[0].rg = uv.xy;\n"
|
||||
" c[1].rg = uv.xy;\n"
|
||||
|
@ -1150,18 +1150,17 @@ static const char* tfx_fs_all_glsl =
|
|||
"\n"
|
||||
"#endif\n"
|
||||
"\n"
|
||||
" // PERF: see the impact of the exansion before/after the interpolation\n"
|
||||
" for (int i = 0; i < 4; i++)\n"
|
||||
" {\n"
|
||||
" // PERF note: using dot product reduces by 1 the number of instruction\n"
|
||||
" // but I'm not sure it is equivalent neither faster.\n"
|
||||
" // PERF note: using dot product reduces by 1 the number of instruction\n"
|
||||
" // but I'm not sure it is equivalent neither faster.\n"
|
||||
" for (int i = 0; i < 4; i++)\n"
|
||||
" {\n"
|
||||
" //float sum = dot(c[i].rgb, vec3(1.0f));\n"
|
||||
"#if ((PS_FMT & ~FMT_PAL) == FMT_24)\n"
|
||||
" c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
|
||||
" //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
|
||||
"#elif ((PS_FMT & ~FMT_PAL) == FMT_16)\n"
|
||||
" c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
|
||||
" //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
|
||||
"#if (PS_AEM_FMT == FMT_24)\n"
|
||||
" c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
|
||||
" //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
|
||||
"#elif (PS_AEM_FMT == FMT_16)\n"
|
||||
" c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
|
||||
" //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
|
||||
"#endif\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
|
|
Loading…
Reference in New Issue