mirror of https://github.com/PCSX2/pcsx2.git
Merge pull request #763 from PCSX2/gsdx-texture-format
Gsdx improves texture format setup
This commit is contained in:
commit
0a7eed686c
|
@ -651,8 +651,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
|
||||||
std::string macro = format("#define PS_FST %d\n", sel.fst)
|
std::string macro = format("#define PS_FST %d\n", sel.fst)
|
||||||
+ format("#define PS_WMS %d\n", sel.wms)
|
+ format("#define PS_WMS %d\n", sel.wms)
|
||||||
+ format("#define PS_WMT %d\n", sel.wmt)
|
+ format("#define PS_WMT %d\n", sel.wmt)
|
||||||
+ format("#define PS_FMT %d\n", sel.fmt)
|
+ format("#define PS_TEX_FMT %d\n", sel.tex_fmt)
|
||||||
+ format("#define PS_IFMT %d\n", sel.ifmt)
|
|
||||||
+ format("#define PS_DFMT %d\n", sel.dfmt)
|
+ format("#define PS_DFMT %d\n", sel.dfmt)
|
||||||
+ format("#define PS_AEM %d\n", sel.aem)
|
+ format("#define PS_AEM %d\n", sel.aem)
|
||||||
+ format("#define PS_TFX %d\n", sel.tfx)
|
+ format("#define PS_TFX %d\n", sel.tfx)
|
||||||
|
@ -812,12 +811,11 @@ void GSDeviceOGL::SelfShaderTest()
|
||||||
PRINT_TEST("Tfx/Tcc");
|
PRINT_TEST("Tfx/Tcc");
|
||||||
|
|
||||||
// Test: Texture Sampling
|
// Test: Texture Sampling
|
||||||
for (int fmt = 0; fmt < 8; fmt++) {
|
for (int fmt = 0; fmt < 16; fmt++) {
|
||||||
if ((fmt & 3) == 3) continue;
|
if ((fmt & 3) == 3) continue;
|
||||||
|
|
||||||
for (int ltf = 0; ltf < 2; ltf++) {
|
for (int ltf = 0; ltf < 2; ltf++) {
|
||||||
for (int aem = 0; aem < 2; aem++) {
|
for (int aem = 0; aem < 2; aem++) {
|
||||||
for (int ifmt = 0; ifmt < 3; ifmt++) {
|
|
||||||
for (int wms = 1; wms < 4; wms++) {
|
for (int wms = 1; wms < 4; wms++) {
|
||||||
for (int wmt = 1; wmt < 4; wmt++) {
|
for (int wmt = 1; wmt < 4; wmt++) {
|
||||||
PSSelector sel;
|
PSSelector sel;
|
||||||
|
@ -828,19 +826,17 @@ void GSDeviceOGL::SelfShaderTest()
|
||||||
|
|
||||||
sel.ltf = ltf;
|
sel.ltf = ltf;
|
||||||
sel.aem = aem;
|
sel.aem = aem;
|
||||||
sel.fmt = fmt;
|
sel.tex_fmt = fmt;
|
||||||
sel.ifmt = ifmt;
|
|
||||||
sel.wms = wms;
|
sel.wms = wms;
|
||||||
sel.wmt = wmt;
|
sel.wmt = wmt;
|
||||||
std::string file = format("Shader_Ltf_%d__Aem_%d__Fmt_%d__Ifmt_%d__Wms_%d__Wmt_%d.glsl.asm",
|
std::string file = format("Shader_Ltf_%d__Aem_%d__TFmt_%d__Wms_%d__Wmt_%d.glsl.asm",
|
||||||
ltf, aem, fmt, ifmt, wms, wmt);
|
ltf, aem, fmt, wms, wmt);
|
||||||
RUN_TEST;
|
RUN_TEST;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
PRINT_TEST("Texture Sampling");
|
PRINT_TEST("Texture Sampling");
|
||||||
|
|
||||||
fprintf(stderr, "\nTotal %d\n", all);
|
fprintf(stderr, "\nTotal %d\n", all);
|
||||||
|
|
|
@ -250,8 +250,7 @@ class GSDeviceOGL : public GSDevice
|
||||||
{
|
{
|
||||||
// *** Word 1
|
// *** Word 1
|
||||||
// Format
|
// Format
|
||||||
uint32 fmt:3;
|
uint32 tex_fmt:4;
|
||||||
uint32 ifmt:2;
|
|
||||||
uint32 dfmt:2;
|
uint32 dfmt:2;
|
||||||
// Alpha extension/Correction
|
// Alpha extension/Correction
|
||||||
uint32 aem:1;
|
uint32 aem:1;
|
||||||
|
@ -276,7 +275,7 @@ class GSDeviceOGL : public GSDevice
|
||||||
uint32 write_rg:1;
|
uint32 write_rg:1;
|
||||||
uint32 fbmask:1;
|
uint32 fbmask:1;
|
||||||
|
|
||||||
uint32 _free1:1;
|
uint32 _free1:2;
|
||||||
|
|
||||||
// *** Word 2
|
// *** Word 2
|
||||||
// Blend and Colclip
|
// Blend and Colclip
|
||||||
|
|
|
@ -396,6 +396,7 @@ void GSRendererHW::Draw()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: Could be removed on openGL
|
||||||
if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
|
if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
|
||||||
{
|
{
|
||||||
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
|
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
|
||||||
|
|
|
@ -802,26 +802,62 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
ps_sel.wms = m_context->CLAMP.WMS;
|
ps_sel.wms = m_context->CLAMP.WMS;
|
||||||
ps_sel.wmt = m_context->CLAMP.WMT;
|
ps_sel.wmt = m_context->CLAMP.WMT;
|
||||||
|
|
||||||
|
// Performance note:
|
||||||
|
// 1/ Don't set 0 as it is the default value
|
||||||
|
// 2/ Only keep aem when it is useful (avoid useless shader permutation)
|
||||||
if (ps_sel.shuffle) {
|
if (ps_sel.shuffle) {
|
||||||
ps_sel.fmt = 0;
|
// Force a 32 bits access (normally shuffle is done on 16 bits)
|
||||||
} else if (tex->m_palette) {
|
// ps_sel.tex_fmt = 0; // removed as an optimization
|
||||||
ps_sel.fmt = cpsm.fmt | 4;
|
|
||||||
ps_sel.ifmt = !tex->m_target ? 0
|
|
||||||
: (m_context->TEX0.PSM == PSM_PSMT4HL) ? 2
|
|
||||||
: (m_context->TEX0.PSM == PSM_PSMT4HH) ? 1
|
|
||||||
: 0;
|
|
||||||
|
|
||||||
// In standard mode palette is only used when alpha channel of the RT is
|
|
||||||
// reinterpreted as an index. Star Ocean 3 uses it to emulate a stencil buffer.
|
|
||||||
// It is a very bad idea to force bilinear filtering on it.
|
|
||||||
if (tex->m_target)
|
|
||||||
bilinear &= m_vt.IsLinear();
|
|
||||||
|
|
||||||
//GL_INS("Use palette with format %d and index format %d", ps_sel.fmt, ps_sel.ifmt);
|
|
||||||
} else {
|
|
||||||
ps_sel.fmt = cpsm.fmt;
|
|
||||||
}
|
|
||||||
ps_sel.aem = m_env.TEXA.AEM;
|
ps_sel.aem = m_env.TEXA.AEM;
|
||||||
|
ASSERT(tex->m_target);
|
||||||
|
|
||||||
|
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
|
||||||
|
ps_cb.MinF_TA = ta.xyxy() / 255.0f;
|
||||||
|
|
||||||
|
// FIXME: it is likely a bad idea to do the bilinear interpolation here
|
||||||
|
// bilinear &= m_vt.IsLinear();
|
||||||
|
|
||||||
|
} else if (tex->m_target) {
|
||||||
|
// Use an old target. AEM and index aren't resolved it must be done
|
||||||
|
// on the GPU
|
||||||
|
|
||||||
|
// Select the 32/24/16 bits color (AEM)
|
||||||
|
ps_sel.tex_fmt = cpsm.fmt;
|
||||||
|
ps_sel.aem = m_env.TEXA.AEM;
|
||||||
|
|
||||||
|
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
|
||||||
|
ps_cb.MinF_TA = ta.xyxy() / 255.0f;
|
||||||
|
|
||||||
|
// Select the index format
|
||||||
|
if (tex->m_palette) {
|
||||||
|
// FIXME Potentially improve fmt field in GSLocalMemory
|
||||||
|
if (m_context->TEX0.PSM == PSM_PSMT4HL)
|
||||||
|
ps_sel.tex_fmt |= 1 << 2;
|
||||||
|
else if (m_context->TEX0.PSM == PSM_PSMT4HH)
|
||||||
|
ps_sel.tex_fmt |= 2 << 2;
|
||||||
|
else
|
||||||
|
ps_sel.tex_fmt |= 3 << 2;
|
||||||
|
|
||||||
|
// Alpha channel of the RT is reinterpreted as an index. Star
|
||||||
|
// Ocean 3 uses it to emulate a stencil buffer. It is a very
|
||||||
|
// bad idea to force bilinear filtering on it.
|
||||||
|
bilinear &= m_vt.IsLinear();
|
||||||
|
}
|
||||||
|
|
||||||
|
} else if (tex->m_palette) {
|
||||||
|
// Use a standard 8 bits texture. AEM is already done on the CLUT
|
||||||
|
// Therefore you only need to set the index
|
||||||
|
// ps_sel.tex_fmt = 0; // removed as an optimization
|
||||||
|
// ps_sel.aem = 0; // removed as an optimization
|
||||||
|
|
||||||
|
// Note 4 bits indexes are converted to 8 bits
|
||||||
|
ps_sel.tex_fmt = 3 << 2;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// Standard texture. Both index and AEM expansion were already done by the CPU.
|
||||||
|
// ps_sel.tex_fmt = 0; // removed as an optimization
|
||||||
|
// ps_sel.aem = 0; // removed as an optimization
|
||||||
|
}
|
||||||
|
|
||||||
if (m_context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) {
|
if (m_context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) {
|
||||||
// Micro optimization that reduces GPU load (removes 5 instructions on the FS program)
|
// Micro optimization that reduces GPU load (removes 5 instructions on the FS program)
|
||||||
|
@ -856,8 +892,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
ps_sel.tcoffsethack = !!UserHacks_TCOffset;
|
ps_sel.tcoffsethack = !!UserHacks_TCOffset;
|
||||||
ps_cb.TC_OH_TS = GSVector4(1/16.0f, 1/16.0f, UserHacks_TCO_x, UserHacks_TCO_y).xyxy() / WH.xyxy();
|
ps_cb.TC_OH_TS = GSVector4(1/16.0f, 1/16.0f, UserHacks_TCO_x, UserHacks_TCO_y).xyxy() / WH.xyxy();
|
||||||
|
|
||||||
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
|
|
||||||
ps_cb.MinF_TA = ta.xyxy() / WH.xyxy(GSVector4(255, 255));
|
|
||||||
|
|
||||||
// Only enable clamping in CLAMP mode. REGION_CLAMP will be done manually in the shader
|
// Only enable clamping in CLAMP mode. REGION_CLAMP will be done manually in the shader
|
||||||
ps_ssel.tau = (m_context->CLAMP.WMS != CLAMP_CLAMP);
|
ps_ssel.tau = (m_context->CLAMP.WMS != CLAMP_CLAMP);
|
||||||
|
|
|
@ -22,9 +22,13 @@
|
||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
#include "GSTextureCache.h"
|
#include "GSTextureCache.h"
|
||||||
|
|
||||||
|
bool s_IS_OPENGL = false;
|
||||||
|
|
||||||
GSTextureCache::GSTextureCache(GSRenderer* r)
|
GSTextureCache::GSTextureCache(GSRenderer* r)
|
||||||
: m_renderer(r)
|
: m_renderer(r)
|
||||||
{
|
{
|
||||||
|
s_IS_OPENGL = (theApp.GetConfig("Renderer", 12) == 12);
|
||||||
|
|
||||||
m_spritehack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_SpriteHack", 0) : 0;
|
m_spritehack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_SpriteHack", 0) : 0;
|
||||||
UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks", 0) && !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0);
|
UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks", 0) && !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0);
|
||||||
|
|
||||||
|
@ -72,12 +76,18 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||||
//const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm;
|
//const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm;
|
||||||
|
|
||||||
|
// Until DX is fixed
|
||||||
|
if (s_IS_OPENGL) {
|
||||||
|
if(psm.pal > 0)
|
||||||
|
m_renderer->m_mem.m_clut.Read32(TEX0, TEXA);
|
||||||
|
} else {
|
||||||
GIFRegTEXA plainTEXA;
|
GIFRegTEXA plainTEXA;
|
||||||
|
|
||||||
plainTEXA.AEM = 1;
|
plainTEXA.AEM = 1;
|
||||||
plainTEXA.TA0 = 0;
|
plainTEXA.TA0 = 0;
|
||||||
plainTEXA.TA1 = 0x80;
|
plainTEXA.TA1 = 0x80;
|
||||||
m_renderer->m_mem.m_clut.Read32(TEX0, plainTEXA);
|
m_renderer->m_mem.m_clut.Read32(TEX0, plainTEXA);
|
||||||
|
}
|
||||||
|
|
||||||
const uint32* clut = m_renderer->m_mem.m_clut;
|
const uint32* clut = m_renderer->m_mem.m_clut;
|
||||||
|
|
||||||
|
@ -85,25 +95,26 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
|
|
||||||
list<Source*>& m = m_src.m_map[TEX0.TBP0 >> 5];
|
list<Source*>& m = m_src.m_map[TEX0.TBP0 >> 5];
|
||||||
|
|
||||||
|
|
||||||
for(list<Source*>::iterator i = m.begin(); i != m.end(); i++)
|
for(list<Source*>::iterator i = m.begin(); i != m.end(); i++)
|
||||||
{
|
{
|
||||||
Source* s = *i;
|
Source* s = *i;
|
||||||
|
|
||||||
if(((TEX0.u32[0] ^ s->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ s->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
|
if (((TEX0.u32[0] ^ s->m_TEX0.u32[0]) | ((TEX0.u32[1] ^ s->m_TEX0.u32[1]) & 3)) != 0) // TBP0 TBW PSM TW TH
|
||||||
{
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
// Special check for palette texture (psm.pal > 0)
|
// Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check
|
||||||
//
|
if (!s->m_target) {
|
||||||
// if m_paltex is enabled
|
// We request a palette texture (psm.pal). If the texture was
|
||||||
// 1/ s->m_palette must always be defined
|
// converted by the CPU (s->m_palette == NULL), we need to ensure
|
||||||
// 2/ Clut is useless (will be uploaded again at the end of the function)
|
// palette content is the same.
|
||||||
//
|
// Note: content of the palette will be uploaded at the end of the function
|
||||||
// if m_paltex is disabled
|
if (psm.pal > 0 && s->m_palette == NULL && !GSVector4i::compare64(clut, s->m_clut, psm.pal * sizeof(clut[0])))
|
||||||
// 1/ Clut must match if m_palette is NULL
|
continue;
|
||||||
if(s->m_palette == NULL && psm.pal > 0 && !GSVector4i::compare64(clut, s->m_clut, psm.pal * sizeof(clut[0])))
|
|
||||||
{
|
// We request a 24/16 bit RGBA texture. Alpha expansion was done by
|
||||||
|
// the CPU. We need to check that TEXA is identical
|
||||||
|
if (psm.pal == 0 && psm.fmt > 0 && s->m_TEXA.u64 != TEXA.u64)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,7 +158,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM;
|
uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM;
|
||||||
|
|
||||||
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) {
|
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) {
|
||||||
if (!IsOpenGL() && (psm == PSM_PSMT8)) {
|
if (!s_IS_OPENGL && (psm == PSM_PSMT8)) {
|
||||||
// OpenGL can convert the texture directly in the GPU. Not sure we want to keep this
|
// OpenGL can convert the texture directly in the GPU. Not sure we want to keep this
|
||||||
// code for DX. It fixes effect but it is slow (MGS3)
|
// code for DX. It fixes effect but it is slow (MGS3)
|
||||||
|
|
||||||
|
@ -324,7 +335,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
|
||||||
//
|
//
|
||||||
// From a performance point of view, it might cost a little on big upscaling
|
// From a performance point of view, it might cost a little on big upscaling
|
||||||
// but normally few RT are miss so it must remain reasonable.
|
// but normally few RT are miss so it must remain reasonable.
|
||||||
if (IsOpenGL()) {
|
if (s_IS_OPENGL) {
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break;
|
case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break;
|
||||||
case DepthStencil: m_renderer->m_dev->ClearDepth(dst->m_texture, 0); break;
|
case DepthStencil: m_renderer->m_dev->ClearDepth(dst->m_texture, 0); break;
|
||||||
|
@ -863,7 +874,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
|
||||||
// TODO: clean up this mess
|
// TODO: clean up this mess
|
||||||
|
|
||||||
int shader = dst->m_type != RenderTarget ? ShaderConvert_FLOAT32_TO_RGBA8 : ShaderConvert_COPY;
|
int shader = dst->m_type != RenderTarget ? ShaderConvert_FLOAT32_TO_RGBA8 : ShaderConvert_COPY;
|
||||||
bool is_8bits = TEX0.PSM == PSM_PSMT8 && IsOpenGL();
|
bool is_8bits = TEX0.PSM == PSM_PSMT8 && s_IS_OPENGL;
|
||||||
|
|
||||||
if (is_8bits) {
|
if (is_8bits) {
|
||||||
GL_INS("Reading RT as a packed-indexed 8 bits format");
|
GL_INS("Reading RT as a packed-indexed 8 bits format");
|
||||||
|
@ -1417,9 +1428,14 @@ void GSTextureCache::Source::Flush(uint32 count)
|
||||||
|
|
||||||
GIFRegTEXA plainTEXA;
|
GIFRegTEXA plainTEXA;
|
||||||
|
|
||||||
|
// Until DX is fixed
|
||||||
|
if (s_IS_OPENGL) {
|
||||||
|
plainTEXA = m_TEXA;
|
||||||
|
} else {
|
||||||
plainTEXA.AEM = 1;
|
plainTEXA.AEM = 1;
|
||||||
plainTEXA.TA0 = 0;
|
plainTEXA.TA0 = 0;
|
||||||
plainTEXA.TA1 = 0x80;
|
plainTEXA.TA1 = 0x80;
|
||||||
|
}
|
||||||
|
|
||||||
if(m_palette)
|
if(m_palette)
|
||||||
{
|
{
|
||||||
|
|
|
@ -129,7 +129,6 @@ protected:
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
virtual bool CanConvertDepth() { return m_can_convert_depth; }
|
virtual bool CanConvertDepth() { return m_can_convert_depth; }
|
||||||
virtual bool IsOpenGL() { return false; }
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSTextureCache(GSRenderer* r);
|
GSTextureCache(GSRenderer* r);
|
||||||
|
|
|
@ -32,8 +32,6 @@ protected:
|
||||||
|
|
||||||
void Read(Target* t, const GSVector4i& r);
|
void Read(Target* t, const GSVector4i& r);
|
||||||
|
|
||||||
virtual bool IsOpenGL() { return true; }
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
GSTextureCacheOGL(GSRenderer* r);
|
GSTextureCacheOGL(GSRenderer* r);
|
||||||
};
|
};
|
||||||
|
|
|
@ -6,7 +6,9 @@
|
||||||
#define FMT_32 0
|
#define FMT_32 0
|
||||||
#define FMT_24 1
|
#define FMT_24 1
|
||||||
#define FMT_16 2
|
#define FMT_16 2
|
||||||
#define FMT_PAL 4 /* flag bit */
|
|
||||||
|
#define PS_PAL_FMT (PS_TEX_FMT >> 2)
|
||||||
|
#define PS_AEM_FMT (PS_TEX_FMT & 3)
|
||||||
|
|
||||||
// APITRACE_DEBUG enables forced pixel output to easily detect
|
// APITRACE_DEBUG enables forced pixel output to easily detect
|
||||||
// the fragment computed by primitive
|
// the fragment computed by primitive
|
||||||
|
@ -162,14 +164,14 @@ vec4 sample_4_index(vec4 uv)
|
||||||
|
|
||||||
uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value
|
uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value
|
||||||
|
|
||||||
#if PS_IFMT == 1
|
#if PS_PAL_FMT == 1
|
||||||
// 4HH
|
|
||||||
return vec4(i >> 4u) / 255.0f;
|
|
||||||
|
|
||||||
#elif PS_IFMT == 2
|
|
||||||
// 4HL
|
// 4HL
|
||||||
return vec4(i & 0xFu) / 255.0f;
|
return vec4(i & 0xFu) / 255.0f;
|
||||||
|
|
||||||
|
#elif PS_PAL_FMT == 2
|
||||||
|
// 4HH
|
||||||
|
return vec4(i >> 4u) / 255.0f;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
// Most of texture will hit this code so keep normalized float value
|
// Most of texture will hit this code so keep normalized float value
|
||||||
|
|
||||||
|
@ -207,7 +209,7 @@ vec4 sample_color(vec2 st, float q)
|
||||||
vec2 dd;
|
vec2 dd;
|
||||||
|
|
||||||
// FIXME I'm not sure this condition is useful (I think code will be optimized)
|
// FIXME I'm not sure this condition is useful (I think code will be optimized)
|
||||||
#if (PS_LTF == 0 && PS_FMT == FMT_32 && PS_WMS < 2 && PS_WMT < 2)
|
#if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2)
|
||||||
// No software LTF and pure 32 bits RGBA texure without special texture wrapping
|
// No software LTF and pure 32 bits RGBA texure without special texture wrapping
|
||||||
c[0] = sample_c(st);
|
c[0] = sample_c(st);
|
||||||
#ifdef TEX_COORD_DEBUG
|
#ifdef TEX_COORD_DEBUG
|
||||||
|
@ -229,14 +231,12 @@ vec4 sample_color(vec2 st, float q)
|
||||||
|
|
||||||
uv = clamp_wrap_uv(uv);
|
uv = clamp_wrap_uv(uv);
|
||||||
|
|
||||||
if((PS_FMT & FMT_PAL) != 0)
|
#if PS_PAL_FMT != 0
|
||||||
{
|
|
||||||
c = sample_4p(sample_4_index(uv));
|
c = sample_4p(sample_4_index(uv));
|
||||||
}
|
#else
|
||||||
else
|
|
||||||
{
|
|
||||||
c = sample_4c(uv);
|
c = sample_4c(uv);
|
||||||
}
|
#endif
|
||||||
|
|
||||||
#ifdef TEX_COORD_DEBUG
|
#ifdef TEX_COORD_DEBUG
|
||||||
c[0].rg = uv.xy;
|
c[0].rg = uv.xy;
|
||||||
c[1].rg = uv.xy;
|
c[1].rg = uv.xy;
|
||||||
|
@ -246,16 +246,15 @@ vec4 sample_color(vec2 st, float q)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// PERF: see the impact of the exansion before/after the interpolation
|
|
||||||
for (int i = 0; i < 4; i++)
|
|
||||||
{
|
|
||||||
// PERF note: using dot product reduces by 1 the number of instruction
|
// PERF note: using dot product reduces by 1 the number of instruction
|
||||||
// but I'm not sure it is equivalent neither faster.
|
// but I'm not sure it is equivalent neither faster.
|
||||||
|
for (int i = 0; i < 4; i++)
|
||||||
|
{
|
||||||
//float sum = dot(c[i].rgb, vec3(1.0f));
|
//float sum = dot(c[i].rgb, vec3(1.0f));
|
||||||
#if ((PS_FMT & ~FMT_PAL) == FMT_24)
|
#if (PS_AEM_FMT == FMT_24)
|
||||||
c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
|
c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
|
||||||
//c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
|
//c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
|
||||||
#elif ((PS_FMT & ~FMT_PAL) == FMT_16)
|
#elif (PS_AEM_FMT == FMT_16)
|
||||||
c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
|
c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;
|
||||||
//c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
|
//c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -910,7 +910,9 @@ static const char* tfx_fs_all_glsl =
|
||||||
"#define FMT_32 0\n"
|
"#define FMT_32 0\n"
|
||||||
"#define FMT_24 1\n"
|
"#define FMT_24 1\n"
|
||||||
"#define FMT_16 2\n"
|
"#define FMT_16 2\n"
|
||||||
"#define FMT_PAL 4 /* flag bit */\n"
|
"\n"
|
||||||
|
"#define PS_PAL_FMT (PS_TEX_FMT >> 2)\n"
|
||||||
|
"#define PS_AEM_FMT (PS_TEX_FMT & 3)\n"
|
||||||
"\n"
|
"\n"
|
||||||
"// APITRACE_DEBUG enables forced pixel output to easily detect\n"
|
"// APITRACE_DEBUG enables forced pixel output to easily detect\n"
|
||||||
"// the fragment computed by primitive\n"
|
"// the fragment computed by primitive\n"
|
||||||
|
@ -1066,14 +1068,14 @@ static const char* tfx_fs_all_glsl =
|
||||||
"\n"
|
"\n"
|
||||||
" uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value\n"
|
" uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value\n"
|
||||||
"\n"
|
"\n"
|
||||||
"#if PS_IFMT == 1\n"
|
"#if PS_PAL_FMT == 1\n"
|
||||||
" // 4HH\n"
|
|
||||||
" return vec4(i >> 4u) / 255.0f;\n"
|
|
||||||
"\n"
|
|
||||||
"#elif PS_IFMT == 2\n"
|
|
||||||
" // 4HL\n"
|
" // 4HL\n"
|
||||||
" return vec4(i & 0xFu) / 255.0f;\n"
|
" return vec4(i & 0xFu) / 255.0f;\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
"#elif PS_PAL_FMT == 2\n"
|
||||||
|
" // 4HH\n"
|
||||||
|
" return vec4(i >> 4u) / 255.0f;\n"
|
||||||
|
"\n"
|
||||||
"#else\n"
|
"#else\n"
|
||||||
" // Most of texture will hit this code so keep normalized float value\n"
|
" // Most of texture will hit this code so keep normalized float value\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
@ -1111,7 +1113,7 @@ static const char* tfx_fs_all_glsl =
|
||||||
" vec2 dd;\n"
|
" vec2 dd;\n"
|
||||||
"\n"
|
"\n"
|
||||||
" // FIXME I'm not sure this condition is useful (I think code will be optimized)\n"
|
" // FIXME I'm not sure this condition is useful (I think code will be optimized)\n"
|
||||||
"#if (PS_LTF == 0 && PS_FMT == FMT_32 && PS_WMS < 2 && PS_WMT < 2)\n"
|
"#if (PS_LTF == 0 && PS_AEM_FMT == FMT_32 && PS_PAL_FMT == 0 && PS_WMS < 2 && PS_WMT < 2)\n"
|
||||||
" // No software LTF and pure 32 bits RGBA texure without special texture wrapping\n"
|
" // No software LTF and pure 32 bits RGBA texure without special texture wrapping\n"
|
||||||
" c[0] = sample_c(st);\n"
|
" c[0] = sample_c(st);\n"
|
||||||
"#ifdef TEX_COORD_DEBUG\n"
|
"#ifdef TEX_COORD_DEBUG\n"
|
||||||
|
@ -1133,14 +1135,12 @@ static const char* tfx_fs_all_glsl =
|
||||||
"\n"
|
"\n"
|
||||||
" uv = clamp_wrap_uv(uv);\n"
|
" uv = clamp_wrap_uv(uv);\n"
|
||||||
"\n"
|
"\n"
|
||||||
" if((PS_FMT & FMT_PAL) != 0)\n"
|
"#if PS_PAL_FMT != 0\n"
|
||||||
" {\n"
|
|
||||||
" c = sample_4p(sample_4_index(uv));\n"
|
" c = sample_4p(sample_4_index(uv));\n"
|
||||||
" }\n"
|
"#else\n"
|
||||||
" else\n"
|
|
||||||
" {\n"
|
|
||||||
" c = sample_4c(uv);\n"
|
" c = sample_4c(uv);\n"
|
||||||
" }\n"
|
"#endif\n"
|
||||||
|
"\n"
|
||||||
"#ifdef TEX_COORD_DEBUG\n"
|
"#ifdef TEX_COORD_DEBUG\n"
|
||||||
" c[0].rg = uv.xy;\n"
|
" c[0].rg = uv.xy;\n"
|
||||||
" c[1].rg = uv.xy;\n"
|
" c[1].rg = uv.xy;\n"
|
||||||
|
@ -1150,16 +1150,15 @@ static const char* tfx_fs_all_glsl =
|
||||||
"\n"
|
"\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
"\n"
|
"\n"
|
||||||
" // PERF: see the impact of the exansion before/after the interpolation\n"
|
|
||||||
" for (int i = 0; i < 4; i++)\n"
|
|
||||||
" {\n"
|
|
||||||
" // PERF note: using dot product reduces by 1 the number of instruction\n"
|
" // PERF note: using dot product reduces by 1 the number of instruction\n"
|
||||||
" // but I'm not sure it is equivalent neither faster.\n"
|
" // but I'm not sure it is equivalent neither faster.\n"
|
||||||
|
" for (int i = 0; i < 4; i++)\n"
|
||||||
|
" {\n"
|
||||||
" //float sum = dot(c[i].rgb, vec3(1.0f));\n"
|
" //float sum = dot(c[i].rgb, vec3(1.0f));\n"
|
||||||
"#if ((PS_FMT & ~FMT_PAL) == FMT_24)\n"
|
"#if (PS_AEM_FMT == FMT_24)\n"
|
||||||
" c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
|
" c[i].a = ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
|
||||||
" //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
|
" //c[i].a = ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
|
||||||
"#elif ((PS_FMT & ~FMT_PAL) == FMT_16)\n"
|
"#elif (PS_AEM_FMT == FMT_16)\n"
|
||||||
" c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
|
" c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || any(bvec3(c[i].rgb)) ) ? TA.x : 0.0f;\n"
|
||||||
" //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
|
" //c[i].a = c[i].a >= 0.5 ? TA.y : ( (PS_AEM == 0) || (sum > 0.0f) ) ? TA.x : 0.0f;\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
|
|
Loading…
Reference in New Issue