gsdx-tc: GPU accelerate 8 bits texture conversion

Only native is supported currently
This commit is contained in:
Gregory Hainaut 2015-06-27 11:24:16 +02:00
parent a8bcc760b4
commit d29e375f72
4 changed files with 147 additions and 7 deletions

View File

@ -498,7 +498,7 @@ class GSDeviceOGL : public GSDevice
struct { struct {
GLuint vs; // program object GLuint vs; // program object
GLuint ps[14]; // program object GLuint ps[15]; // program object
GLuint ln; // sampler object GLuint ln; // sampler object
GLuint pt; // sampler object GLuint pt; // sampler object
GSDepthStencilOGL* dss; GSDepthStencilOGL* dss;

View File

@ -139,7 +139,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM; uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM;
if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) { if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) {
if (psm == PSM_PSMT8) { if (!IsOpenGL() && (psm == PSM_PSMT8)) {
// OpenGL can convert the texture directly in the GPU. Not sure we want to keep this
// code for DX. It fixes effect but it is slow (MGS3)
// It is a complex to convert the code in shader. As a reference, let's do it on the CPU, it will // It is a complex to convert the code in shader. As a reference, let's do it on the CPU, it will
// be slow but // be slow but
// 1/ it just works :) // 1/ it just works :)
@ -808,9 +811,18 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
{ {
// TODO: clean up this mess // TODO: clean up this mess
// Shader 11 convert depth to color
// Shader 14 convert 32 bits color to 8 bits color
int shader = dst->m_type != RenderTarget ? 11 : 0;
if (TEX0.PSM == PSM_PSMT8) {
GL_INS("Reading RT as a packed-indexed 8 bits format");
shader = 14; // ask a conversion to 8 bits format
}
#ifdef ENABLE_OGL_DEBUG #ifdef ENABLE_OGL_DEBUG
if (TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT4) { if (TEX0.PSM == PSM_PSMT4) {
GL_INS("ERROR: Reading RT as a packed-indexed (0x%x) format is not supported", TEX0.PSM); GL_INS("ERROR: Reading RT as a packed-indexed 4 bits format is not supported");
} }
#endif #endif
@ -902,6 +914,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// Try to extract a texture bigger than the RT. Current solution is to rescale the size // Try to extract a texture bigger than the RT. Current solution is to rescale the size
// of the texture to fit in the RT. In my opinion, it would be better to increase the size of // of the texture to fit in the RT. In my opinion, it would be better to increase the size of
// the RT // the RT
// TODO investigate this code is correct (maybe linked to custom resolution?)
if(w > dstsize.x) if(w > dstsize.x)
{ {
scale.x = (float)dstsize.x / tw; scale.x = (float)dstsize.x / tw;
@ -920,6 +933,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
GSTexture* sTex = src->m_texture ? src->m_texture : dst->m_texture; GSTexture* sTex = src->m_texture ? src->m_texture : dst->m_texture;
GSTexture* dTex = m_renderer->m_dev->CreateRenderTarget(w, h, false); GSTexture* dTex = m_renderer->m_dev->CreateRenderTarget(w, h, false);
// GH: by default (m_paltex == 0) GSdx converts texture to the 32 bit format // GH: by default (m_paltex == 0) GSdx converts texture to the 32 bit format
// However it is different here. We want to reuse a Render Target as a texture. // However it is different here. We want to reuse a Render Target as a texture.
// Because the texture is already on the GPU, CPU can't convert it. // Because the texture is already on the GPU, CPU can't convert it.
@ -952,14 +966,12 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
// copy. Likely a speed boost and memory usage reduction. // copy. Likely a speed boost and memory usage reduction.
bool linear = (TEX0.PSM == PSM_PSMCT32 || TEX0.PSM == PSM_PSMCT24); bool linear = (TEX0.PSM == PSM_PSMCT32 || TEX0.PSM == PSM_PSMCT24);
int shader = dst->m_type != RenderTarget ? 11 : 0;
if(!src->m_texture) if(!src->m_texture)
{ {
src->m_texture = dTex; src->m_texture = dTex;
} }
if((sRect == dRect).alltrue() && !shader) if ((sRect == dRect).alltrue() && !shader)
{ {
if (half_right) { if (half_right) {
// You typically hit this code in snow engine game. Dstsize is the size of of Dx/GL RT // You typically hit this code in snow engine game. Dstsize is the size of of Dx/GL RT

View File

@ -196,6 +196,70 @@ void ps_main13()
} }
#endif #endif
#ifdef ps_main14
void ps_main14()
{
// Convert a RGBA texture into a 8 bits packed texture
// Input column: 8x2 RGBA pixels
// 0: 8 RGBA
// 1: 8 RGBA
// Output column: 16x4 Index pixels
// 0: 8 R | 8 B
// 1: 8 R | 8 B
// 2: 8 G | 8 A
// 3: 8 G | 8 A
float c;
uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);
ivec2 tb = ((ivec2(gl_FragCoord.xy) & ~ivec2(15, 3)) >> 1u);
int ty = tb.y | (int(gl_FragCoord.y) & 1);
int txN = tb.x | (int(gl_FragCoord.x) & 7);
int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);
vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);
vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);
// Potential speed optimization. There is a high probability that
// game only want to extract a single channel (blue). It will allow
// to remove the sel.x condition check
if ((sel.y & 4u) == 0u) {
// Column 0 and 2
if ((sel.y & 3u) < 2u) {
// first 2 lines of the col
if (sel.x < 8u)
c = cN.r;
else
c = cN.b;
} else {
if (sel.x < 8u)
c = cH.g;
else
c = cH.a;
}
} else {
// Column 1 and 3
if ((sel.y & 3u) < 2u) {
// first 2 lines of the col
if (sel.x < 8u)
c = cH.r;
else
c = cH.b;
} else {
if (sel.x < 8u)
c = cN.g;
else
c = cN.a;
}
}
SV_Target0 = vec4(c);
}
#endif
#ifdef ps_main7 #ifdef ps_main7
void ps_main7() void ps_main7()
{ {

View File

@ -221,6 +221,70 @@ static const char* convert_glsl =
"}\n" "}\n"
"#endif\n" "#endif\n"
"\n" "\n"
"#ifdef ps_main14\n"
"void ps_main14()\n"
"{\n"
" // Convert a RGBA texture into a 8 bits packed texture\n"
" // Input column: 8x2 RGBA pixels\n"
" // 0: 8 RGBA\n"
" // 1: 8 RGBA\n"
" // Output column: 16x4 Index pixels\n"
" // 0: 8 R | 8 B\n"
" // 1: 8 R | 8 B\n"
" // 2: 8 G | 8 A\n"
" // 3: 8 G | 8 A\n"
"\n"
" float c;\n"
"\n"
" uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);\n"
" ivec2 tb = ((ivec2(gl_FragCoord.xy) & ~ivec2(15, 3)) >> 1u);\n"
"\n"
" int ty = tb.y | (int(gl_FragCoord.y) & 1);\n"
" int txN = tb.x | (int(gl_FragCoord.x) & 7);\n"
" int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);\n"
"\n"
" vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);\n"
" vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);\n"
"\n"
" // Potential speed optimization. There is a high probability that\n"
" // game only want to extract a single channel (blue). It will allow\n"
" // to remove the sel.x condition check\n"
"\n"
" if ((sel.y & 4u) == 0u) {\n"
" // Column 0 and 2\n"
" if ((sel.y & 3u) < 2u) {\n"
" // first 2 lines of the col\n"
" if (sel.x < 8u)\n"
" c = cN.r;\n"
" else\n"
" c = cN.b;\n"
" } else {\n"
" if (sel.x < 8u)\n"
" c = cH.g;\n"
" else\n"
" c = cH.a;\n"
" }\n"
" } else {\n"
" // Column 1 and 3\n"
" if ((sel.y & 3u) < 2u) {\n"
" // first 2 lines of the col\n"
" if (sel.x < 8u)\n"
" c = cH.r;\n"
" else\n"
" c = cH.b;\n"
" } else {\n"
" if (sel.x < 8u)\n"
" c = cN.g;\n"
" else\n"
" c = cN.a;\n"
" }\n"
" }\n"
"\n"
"\n"
" SV_Target0 = vec4(c);\n"
"}\n"
"#endif\n"
"\n"
"#ifdef ps_main7\n" "#ifdef ps_main7\n"
"void ps_main7()\n" "void ps_main7()\n"
"{\n" "{\n"