mirror of https://github.com/PCSX2/pcsx2.git
gsdx-ogl: emulate texture shuffle
GS doesn't supports texture shuffle/swizzle so it is emulated in a complex way. The idea is to read/write the 32 bits color format as a 16 bit format. This way, RG (16 lsb bits) or BA (16 msb bits) can be read or written with square texture that targets pixels 1-8 or pixels 8-16. However shuffle is limited. For example you can copy the green channel to either the alpha channel or another green channel. Note: Partial masking of channel is not yet implemented V2: improve logging V3: better support of green channel in shader V4: improve detection of destination (issue due to rounding)
This commit is contained in:
parent
2d812deb84
commit
58ce7d4bb8
|
@ -647,6 +647,8 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel)
|
||||||
//+ format("#define PS_POINT_SAMPLER %d\n", sel.point_sampler)
|
//+ format("#define PS_POINT_SAMPLER %d\n", sel.point_sampler)
|
||||||
+ format("#define PS_BLEND %d\n", sel.blend)
|
+ format("#define PS_BLEND %d\n", sel.blend)
|
||||||
+ format("#define PS_IIP %d\n", sel.iip)
|
+ format("#define PS_IIP %d\n", sel.iip)
|
||||||
|
+ format("#define PS_SHUFFLE %d\n", sel.shuffle)
|
||||||
|
+ format("#define PS_READ_BA %d\n", sel.read_ba)
|
||||||
;
|
;
|
||||||
|
|
||||||
return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro);
|
return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro);
|
||||||
|
|
|
@ -322,8 +322,10 @@ class GSDeviceOGL : public GSDevice
|
||||||
uint32 wmt:2;
|
uint32 wmt:2;
|
||||||
uint32 ltf:1;
|
uint32 ltf:1;
|
||||||
uint32 ifmt:2;
|
uint32 ifmt:2;
|
||||||
|
uint32 shuffle:1;
|
||||||
|
uint32 read_ba:1;
|
||||||
|
|
||||||
uint32 _free1:2;
|
//uint32 _free1:0;
|
||||||
|
|
||||||
// Word 2
|
// Word 2
|
||||||
uint32 blend:8;
|
uint32 blend:8;
|
||||||
|
|
|
@ -247,9 +247,116 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
GSDeviceOGL::OMColorMaskSelector om_csel;
|
GSDeviceOGL::OMColorMaskSelector om_csel;
|
||||||
GSDeviceOGL::OMDepthStencilSelector om_dssel;
|
GSDeviceOGL::OMDepthStencilSelector om_dssel;
|
||||||
|
|
||||||
// Format of the output
|
if ((context->FRAME.PSM & 0x2) && (context->TEX0.PSM & 2) && (m_vt.m_primclass == GS_SPRITE_CLASS)) {
|
||||||
|
ps_sel.shuffle = 1;
|
||||||
|
ps_sel.dfmt = 0;
|
||||||
|
|
||||||
|
const GIFRegXYOFFSET& o = m_context->XYOFFSET;
|
||||||
|
GSVertex* v = &m_vertex.buff[0];
|
||||||
|
size_t count = m_vertex.next;
|
||||||
|
|
||||||
|
// vertex position is 8 to 16 pixels, therefore it is the 16-31 bits of the colors
|
||||||
|
int pos = (v[0].XYZ.X - o.OFX) & 0xFF;
|
||||||
|
bool write_ba = (pos > 112 && pos < 136);
|
||||||
|
// Read texture is 8 to 16 pixels (same as above)
|
||||||
|
int tex_pos = v[0].U & 0xFF;
|
||||||
|
ps_sel.read_ba = (tex_pos > 112 && tex_pos < 144);
|
||||||
|
|
||||||
|
//GL_INS("First vertex is P: %d => %d T: %d => %d", v[0].XYZ.X, v[1].XYZ.X, v[0].U, v[1].U);
|
||||||
|
|
||||||
|
// Convert the vertex info to a 32 bits color format equivalent
|
||||||
|
for(size_t i = 0; i < count; i += 2) {
|
||||||
|
if (write_ba)
|
||||||
|
v[i].XYZ.X -= 128u;
|
||||||
|
else
|
||||||
|
v[i+1].XYZ.X += 128u;
|
||||||
|
|
||||||
|
if (ps_sel.read_ba)
|
||||||
|
v[i].U -= 128u;
|
||||||
|
else
|
||||||
|
v[i+1].U += 128u;
|
||||||
|
|
||||||
|
// Height is too big (2x).
|
||||||
|
int tex_offset = v[i].V & 0xF;
|
||||||
|
GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset);
|
||||||
|
|
||||||
|
GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i+1].XYZ.Y, v[i+1].V);
|
||||||
|
tmp = GSVector4i(tmp - offset).srl32(1) + offset;
|
||||||
|
|
||||||
|
v[i].XYZ.Y = tmp.x;
|
||||||
|
v[i].V = tmp.y;
|
||||||
|
v[i+1].XYZ.Y = tmp.z;
|
||||||
|
v[i+1].V = tmp.w;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Please bang my head against the wall!
|
||||||
|
// 1/ Reduce the frame mask to a 16 bit format
|
||||||
|
const uint32& m = context->FRAME.FBMSK;
|
||||||
|
uint32 fbmask = ((m >> 3) & 0x1F) | ((m >> 6) & 0x3E0) | ((m >> 9) & 0x7C00) | ((m >> 31) & 0x8000);
|
||||||
|
om_csel.wrgba = 0;
|
||||||
|
|
||||||
|
// 2 Select the new mask (Please someone put SSE here)
|
||||||
|
if ((fbmask & 0xFF) == 0) {
|
||||||
|
if (write_ba) {
|
||||||
|
GL_INS("Color shuffle %s => B", ps_sel.read_ba ? "B" : "R");
|
||||||
|
om_csel.wb = 1;
|
||||||
|
} else {
|
||||||
|
GL_INS("Color shuffle %s => R", ps_sel.read_ba ? "B" : "R");
|
||||||
|
om_csel.wr = 1;
|
||||||
|
}
|
||||||
|
} else if ((fbmask & 0xFF) != 0xFF) {
|
||||||
|
GL_INS("ERROR: not supported RG mask:%x", fbmask & 0xFF);
|
||||||
|
ASSERT(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
fbmask >>= 8;
|
||||||
|
if ((fbmask & 0xFF) == 0) {
|
||||||
|
if (write_ba) {
|
||||||
|
GL_INS("Color shuffle %s => A", ps_sel.read_ba ? "A" : "G");
|
||||||
|
om_csel.wa = 1;
|
||||||
|
} else {
|
||||||
|
GL_INS("Color shuffle %s => G", ps_sel.read_ba ? "A" : "G");
|
||||||
|
om_csel.wg = 1;
|
||||||
|
}
|
||||||
|
} else if ((fbmask & 0xFF) != 0xFF) {
|
||||||
|
GL_INS("ERROR: not supported BA mask:%x", fbmask & 0xFF);
|
||||||
|
ASSERT(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
ps_sel.dfmt = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
|
ps_sel.dfmt = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
|
||||||
|
|
||||||
|
om_csel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
|
||||||
|
|
||||||
|
{
|
||||||
|
#ifdef ENABLE_OGL_DEBUG
|
||||||
|
uint8 r_mask = (context->FRAME.FBMSK >> 0) & 0xFF;
|
||||||
|
uint8 g_mask = (context->FRAME.FBMSK >> 8) & 0xFF;
|
||||||
|
uint8 b_mask = (context->FRAME.FBMSK >> 16) & 0xFF;
|
||||||
|
uint8 a_mask = (context->FRAME.FBMSK >> 24) & 0xFF;
|
||||||
|
uint8 bits = (GSLocalMemory::m_psm[context->FRAME.PSM].fmt == 2) ? 16 : 32;
|
||||||
|
if (r_mask != 0 && r_mask != 0xFF) {
|
||||||
|
GL_INS("ERROR: not supported r_mask:%x on %d bits format", r_mask, bits);
|
||||||
|
ASSERT(0);
|
||||||
|
}
|
||||||
|
if (g_mask != 0 && g_mask != 0xFF) {
|
||||||
|
GL_INS("ERROR: not supported g_mask:%x on %d bits format", g_mask, bits);
|
||||||
|
ASSERT(0);
|
||||||
|
}
|
||||||
|
if (b_mask != 0 && b_mask != 0xFF) {
|
||||||
|
GL_INS("ERROR: not supported b_mask:%x on %d bits format", b_mask, bits);
|
||||||
|
ASSERT(0);
|
||||||
|
}
|
||||||
|
if (a_mask != 0 && a_mask != 0xFF) {
|
||||||
|
GL_INS("ERROR: not supported a_mask:%x on %d bits format", a_mask, bits);
|
||||||
|
ASSERT(0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format of the output
|
||||||
|
|
||||||
GIFRegALPHA ALPHA = context->ALPHA;
|
GIFRegALPHA ALPHA = context->ALPHA;
|
||||||
float afix = (float)context->ALPHA.FIX / 0x80;
|
float afix = (float)context->ALPHA.FIX / 0x80;
|
||||||
|
|
||||||
|
@ -285,7 +392,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
om_csel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask();
|
|
||||||
if (ps_sel.dfmt == 1) {
|
if (ps_sel.dfmt == 1) {
|
||||||
if (ALPHA.C == 1) {
|
if (ALPHA.C == 1) {
|
||||||
// 24 bits no alpha channel so use 1.0f fix factor as equivalent
|
// 24 bits no alpha channel so use 1.0f fix factor as equivalent
|
||||||
|
@ -471,7 +577,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
}
|
}
|
||||||
|
|
||||||
ps_sel.fba = context->FBA.FBA;
|
ps_sel.fba = context->FBA.FBA;
|
||||||
|
// TODO deprecat this stuff
|
||||||
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
|
ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0;
|
||||||
|
ps_sel.aout &= !ps_sel.shuffle;
|
||||||
|
|
||||||
if (UserHacks_AlphaHack) ps_sel.aout = 1;
|
if (UserHacks_AlphaHack) ps_sel.aout = 1;
|
||||||
|
|
||||||
|
@ -524,7 +632,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
||||||
|
|
||||||
ps_sel.wms = context->CLAMP.WMS;
|
ps_sel.wms = context->CLAMP.WMS;
|
||||||
ps_sel.wmt = context->CLAMP.WMT;
|
ps_sel.wmt = context->CLAMP.WMT;
|
||||||
if (tex->m_palette) {
|
|
||||||
|
if (ps_sel.shuffle) {
|
||||||
|
ps_sel.fmt = 0;
|
||||||
|
} else if (tex->m_palette) {
|
||||||
ps_sel.fmt = cpsm.fmt | 4;
|
ps_sel.fmt = cpsm.fmt | 4;
|
||||||
ps_sel.ifmt = !tex->m_target ? 0
|
ps_sel.ifmt = !tex->m_target ? 0
|
||||||
: (context->TEX0.PSM == PSM_PSMT4HL) ? 2
|
: (context->TEX0.PSM == PSM_PSMT4HL) ? 2
|
||||||
|
|
|
@ -223,7 +223,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int
|
||||||
|
|
||||||
dst = t;
|
dst = t;
|
||||||
|
|
||||||
#ifdef ENABLE_OGL_DEBUG
|
#if 0
|
||||||
// Likely the root cause of tons and tons of bug
|
// Likely the root cause of tons and tons of bug
|
||||||
if (dst->m_TEX0.PSM != TEX0.PSM) {
|
if (dst->m_TEX0.PSM != TEX0.PSM) {
|
||||||
GL_INS("TC: ERROR: use a target with format 0x%x as 0x%x without any conversion", dst->m_TEX0.PSM, TEX0.PSM);
|
GL_INS("TC: ERROR: use a target with format 0x%x as 0x%x without any conversion", dst->m_TEX0.PSM, TEX0.PSM);
|
||||||
|
|
|
@ -708,6 +708,32 @@ void ps_main()
|
||||||
#endif
|
#endif
|
||||||
#if (APITRACE_DEBUG & 8) == 8
|
#if (APITRACE_DEBUG & 8) == 8
|
||||||
c.a = 0.5f;
|
c.a = 0.5f;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if PS_SHUFFLE
|
||||||
|
uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);
|
||||||
|
uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);
|
||||||
|
|
||||||
|
// Write RB part. Mask will take care of the correct destination
|
||||||
|
#if PS_READ_BA
|
||||||
|
c.rb = c.bb;
|
||||||
|
#else
|
||||||
|
c.rb = c.rr;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Write GA part. Mask will take care of the correct destination
|
||||||
|
#if PS_READ_BA
|
||||||
|
if (bool(denorm_c.a & 0x80u))
|
||||||
|
c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);
|
||||||
|
else
|
||||||
|
c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);
|
||||||
|
#else
|
||||||
|
if (bool(denorm_c.g & 0x80u))
|
||||||
|
c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);
|
||||||
|
else
|
||||||
|
c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Must be done before alpha correction
|
// Must be done before alpha correction
|
||||||
|
|
|
@ -1489,6 +1489,32 @@ static const char* tfx_fs_all_glsl =
|
||||||
" c.a = 0.5f;\n"
|
" c.a = 0.5f;\n"
|
||||||
"#endif\n"
|
"#endif\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
"#if PS_SHUFFLE\n"
|
||||||
|
" uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);\n"
|
||||||
|
" uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);\n"
|
||||||
|
"\n"
|
||||||
|
" // Write RB part. Mask will take care of the correct destination\n"
|
||||||
|
"#if PS_READ_BA\n"
|
||||||
|
" c.rb = c.bb;\n"
|
||||||
|
"#else\n"
|
||||||
|
" c.rb = c.rr;\n"
|
||||||
|
"#endif\n"
|
||||||
|
"\n"
|
||||||
|
" // Write GA part. Mask will take care of the correct destination\n"
|
||||||
|
"#if PS_READ_BA\n"
|
||||||
|
" if (bool(denorm_c.a & 0x80u))\n"
|
||||||
|
" c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n"
|
||||||
|
" else\n"
|
||||||
|
" c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);\n"
|
||||||
|
"#else\n"
|
||||||
|
" if (bool(denorm_c.g & 0x80u))\n"
|
||||||
|
" c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n"
|
||||||
|
" else\n"
|
||||||
|
" c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);\n"
|
||||||
|
"#endif\n"
|
||||||
|
"\n"
|
||||||
|
"#endif\n"
|
||||||
|
"\n"
|
||||||
" // Must be done before alpha correction\n"
|
" // Must be done before alpha correction\n"
|
||||||
" float alpha = c.a * 255.0f / 128.0f;\n"
|
" float alpha = c.a * 255.0f / 128.0f;\n"
|
||||||
"\n"
|
"\n"
|
||||||
|
|
Loading…
Reference in New Issue