diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index f7ff874015..752dddacd6 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -606,6 +606,160 @@ bool GSRendererOGL::EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_G return require_barrier; } +void GSRendererOGL::EmulateTextureSampler(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::PSSamplerSelector ps_ssel, const GSTextureCache::Source* tex) +{ + GSDeviceOGL* dev = (GSDeviceOGL*)m_dev; + + // Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth. + //const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; + const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM]; + const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm; + bool bilinear = m_filter == 2 ? m_vt.IsLinear() : m_filter != 0; + bool simple_sample = !tex->m_palette && cpsm.fmt == 0 && m_context->CLAMP.WMS < 2 && m_context->CLAMP.WMT < 2 && !psm.depth; + // Don't force extra filtering on sprite (it creates various upscaling issue) + bilinear &= !((m_vt.m_primclass == GS_SPRITE_CLASS) && m_userhacks_round_sprite_offset && !m_vt.IsLinear()); + + ps_sel.wms = m_context->CLAMP.WMS; + ps_sel.wmt = m_context->CLAMP.WMT; + + // Depth + bilinear filtering isn't done yet (And I'm not sure we need it anyway but a game will prove me wrong) + // So of course, GTA set the linear mode, but sampling is done at texel center so it is equivalent to nearest sampling + ASSERT(!(psm.depth && m_vt.IsLinear())); + + // Performance note: + // 1/ Don't set 0 as it is the default value + // 2/ Only keep aem when it is useful (avoid useless shader permutation) + if (ps_sel.shuffle) { + // Force a 32 bits access (normally shuffle is done on 16 bits) + // ps_sel.tex_fmt = 0; // removed as an optimization + ps_sel.aem = m_env.TEXA.AEM; + ASSERT(tex->m_target); + + // Require a float conversion if the texure is a depth otherwise uses Integral scaling + if (psm.depth) { + ps_sel.depth_fmt = (tex->m_texture->GetType() != GSTexture::DepthStencil) ? 3 : 1; + } + + // Shuffle is a 16 bits format, so aem is always required + GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff()); + ta /= 255.0f; + // FIXME rely on compiler for the optimization + ps_cb.TA_Af.x = ta.x; + ps_cb.TA_Af.y = ta.y; + + // The purpose of texture shuffle is to move color channel. Extra interpolation is likely a bad idea. + bilinear &= m_vt.IsLinear(); + + } else if (tex->m_target) { + // Use an old target. AEM and index aren't resolved it must be done + // on the GPU + + // Select the 32/24/16 bits color (AEM) + ps_sel.tex_fmt = cpsm.fmt; + ps_sel.aem = m_env.TEXA.AEM; + + // Don't upload AEM if format is 32 bits + if (cpsm.fmt) { + GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff()); + ta /= 255.0f; + // FIXME rely on compiler for the optimization + ps_cb.TA_Af.x = ta.x; + ps_cb.TA_Af.y = ta.y; + } + + // Select the index format + if (tex->m_palette) { + // FIXME Potentially improve fmt field in GSLocalMemory + if (m_context->TEX0.PSM == PSM_PSMT4HL) + ps_sel.tex_fmt |= 1 << 2; + else if (m_context->TEX0.PSM == PSM_PSMT4HH) + ps_sel.tex_fmt |= 2 << 2; + else + ps_sel.tex_fmt |= 3 << 2; + + // Alpha channel of the RT is reinterpreted as an index. Star + // Ocean 3 uses it to emulate a stencil buffer. It is a very + // bad idea to force bilinear filtering on it. + bilinear &= m_vt.IsLinear(); + } + + // Depth format + if (tex->m_texture->GetType() == GSTexture::DepthStencil) { + // Require a float conversion if the texure is a depth format + ps_sel.depth_fmt = (psm.bpp == 16) ? 2 : 1; + + // Don't force interpolation on depth format + bilinear &= m_vt.IsLinear(); + } else if (psm.depth) { + // Use Integral scaling + ps_sel.depth_fmt = (tex->m_texture->GetType() != GSTexture::DepthStencil) ? 3 : + + // Don't force interpolation on depth format + bilinear &= m_vt.IsLinear(); + } + + } else if (tex->m_palette) { + // Use a standard 8 bits texture. AEM is already done on the CLUT + // Therefore you only need to set the index + // ps_sel.aem = 0; // removed as an optimization + + // Note 4 bits indexes are converted to 8 bits + ps_sel.tex_fmt = 3 << 2; + + } else { + // Standard texture. Both index and AEM expansion were already done by the CPU. + // ps_sel.tex_fmt = 0; // removed as an optimization + // ps_sel.aem = 0; // removed as an optimization + } + + if (m_context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) { + // Micro optimization that reduces GPU load (removes 5 instructions on the FS program) + ps_sel.tfx = TFX_DECAL; + } else { + ps_sel.tfx = m_context->TEX0.TFX; + } + + ps_sel.tcc = m_context->TEX0.TCC; + + ps_sel.ltf = bilinear && !simple_sample; + + int w = tex->m_texture->GetWidth(); + int h = tex->m_texture->GetHeight(); + + int tw = (int)(1 << m_context->TEX0.TW); + int th = (int)(1 << m_context->TEX0.TH); + + GSVector4 WH(tw, th, w, h); + + ps_sel.fst = !!PRIM->FST; + + ps_cb.WH = WH; + ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); + if ((m_context->CLAMP.WMS | m_context->CLAMP.WMT) > 1) { + ps_cb.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV); + ps_cb.MinMax = GSVector4(ps_cb.MskFix) / WH.xyxy(); + } + + // TC Offset Hack + ps_sel.tcoffsethack = !!UserHacks_TCOffset; + ps_cb.TC_OH_TS = GSVector4(1/16.0f, 1/16.0f, UserHacks_TCO_x, UserHacks_TCO_y) / WH.xyxy(); + + + // Only enable clamping in CLAMP mode. REGION_CLAMP will be done manually in the shader + ps_ssel.tau = (m_context->CLAMP.WMS != CLAMP_CLAMP); + ps_ssel.tav = (m_context->CLAMP.WMT != CLAMP_CLAMP); + ps_ssel.ltf = bilinear && simple_sample; + ps_ssel.aniso = simple_sample; + + // Setup Texture ressources + dev->SetupSampler(ps_ssel); + dev->PSSetShaderResources(tex->m_texture, tex->m_palette); + + if (tex->m_spritehack_t && (ps_sel.atst == 2)) { + ps_sel.atst = 1; + } +} + GSRendererOGL::PRIM_OVERLAP GSRendererOGL::PrimitiveOverlap() { // Either 1 triangle or 1 line or 3 POINTs @@ -1082,165 +1236,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // By default don't use texture ps_sel.tfx = 4; - bool spritehack = false; int atst = ps_sel.atst; - if (tex) - { - // Warning fetch the texture PSM format rather than the context format. The latter could have been corrected in the texture cache for depth. - //const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[m_context->TEX0.PSM]; - const GSLocalMemory::psm_t &psm = GSLocalMemory::m_psm[tex->m_TEX0.PSM]; - const GSLocalMemory::psm_t &cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[m_context->TEX0.CPSM] : psm; - bool bilinear = m_filter == 2 ? m_vt.IsLinear() : m_filter != 0; - bool simple_sample = !tex->m_palette && cpsm.fmt == 0 && m_context->CLAMP.WMS < 2 && m_context->CLAMP.WMT < 2 && !psm.depth; - // Don't force extra filtering on sprite (it creates various upscaling issue) - bilinear &= !((m_vt.m_primclass == GS_SPRITE_CLASS) && m_userhacks_round_sprite_offset && !m_vt.IsLinear()); - - ps_sel.wms = m_context->CLAMP.WMS; - ps_sel.wmt = m_context->CLAMP.WMT; - - // Depth + bilinear filtering isn't done yet (And I'm not sure we need it anyway but a game will prove me wrong) - // So of course, GTA set the linear mode, but sampling is done at texel center so it is equivalent to nearest sampling - ASSERT(!(psm.depth && m_vt.IsLinear())); - - // Performance note: - // 1/ Don't set 0 as it is the default value - // 2/ Only keep aem when it is useful (avoid useless shader permutation) - if (ps_sel.shuffle) { - // Force a 32 bits access (normally shuffle is done on 16 bits) - // ps_sel.tex_fmt = 0; // removed as an optimization - ps_sel.aem = m_env.TEXA.AEM; - ASSERT(tex->m_target); - - // Require a float conversion if the texure is a depth otherwise uses Integral scaling - if (psm.depth) { - ps_sel.depth_fmt = (tex->m_texture->GetType() != GSTexture::DepthStencil) ? 3 : 1; - } - - // Shuffle is a 16 bits format, so aem is always required - GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff()); - ta /= 255.0f; - // FIXME rely on compiler for the optimization - ps_cb.TA_Af.x = ta.x; - ps_cb.TA_Af.y = ta.y; - - // The purpose of texture shuffle is to move color channel. Extra interpolation is likely a bad idea. - bilinear &= m_vt.IsLinear(); - - } else if (tex->m_target) { - // Use an old target. AEM and index aren't resolved it must be done - // on the GPU - - // Select the 32/24/16 bits color (AEM) - ps_sel.tex_fmt = cpsm.fmt; - ps_sel.aem = m_env.TEXA.AEM; - - // Don't upload AEM if format is 32 bits - if (cpsm.fmt) { - GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff()); - ta /= 255.0f; - // FIXME rely on compiler for the optimization - ps_cb.TA_Af.x = ta.x; - ps_cb.TA_Af.y = ta.y; - } - - // Select the index format - if (tex->m_palette) { - // FIXME Potentially improve fmt field in GSLocalMemory - if (m_context->TEX0.PSM == PSM_PSMT4HL) - ps_sel.tex_fmt |= 1 << 2; - else if (m_context->TEX0.PSM == PSM_PSMT4HH) - ps_sel.tex_fmt |= 2 << 2; - else - ps_sel.tex_fmt |= 3 << 2; - - // Alpha channel of the RT is reinterpreted as an index. Star - // Ocean 3 uses it to emulate a stencil buffer. It is a very - // bad idea to force bilinear filtering on it. - bilinear &= m_vt.IsLinear(); - } - - // Depth format - if (tex->m_texture->GetType() == GSTexture::DepthStencil) { - // Require a float conversion if the texure is a depth format - ps_sel.depth_fmt = (psm.bpp == 16) ? 2 : 1; - - // Don't force interpolation on depth format - bilinear &= m_vt.IsLinear(); - } else if (psm.depth) { - // Use Integral scaling - ps_sel.depth_fmt = (tex->m_texture->GetType() != GSTexture::DepthStencil) ? 3 : - - // Don't force interpolation on depth format - bilinear &= m_vt.IsLinear(); - } - - } else if (tex->m_palette) { - // Use a standard 8 bits texture. AEM is already done on the CLUT - // Therefore you only need to set the index - // ps_sel.aem = 0; // removed as an optimization - - // Note 4 bits indexes are converted to 8 bits - ps_sel.tex_fmt = 3 << 2; - - } else { - // Standard texture. Both index and AEM expansion were already done by the CPU. - // ps_sel.tex_fmt = 0; // removed as an optimization - // ps_sel.aem = 0; // removed as an optimization - } - - if (m_context->TEX0.TFX == TFX_MODULATE && m_vt.m_eq.rgba == 0xFFFF && m_vt.m_min.c.eq(GSVector4i(128))) { - // Micro optimization that reduces GPU load (removes 5 instructions on the FS program) - ps_sel.tfx = TFX_DECAL; - } else { - ps_sel.tfx = m_context->TEX0.TFX; - } - - ps_sel.tcc = m_context->TEX0.TCC; - - ps_sel.ltf = bilinear && !simple_sample; - spritehack = tex->m_spritehack_t; - - int w = tex->m_texture->GetWidth(); - int h = tex->m_texture->GetHeight(); - - int tw = (int)(1 << m_context->TEX0.TW); - int th = (int)(1 << m_context->TEX0.TH); - - GSVector4 WH(tw, th, w, h); - - ps_sel.fst = !!PRIM->FST; - - ps_cb.WH = WH; - ps_cb.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw(); - if ((m_context->CLAMP.WMS | m_context->CLAMP.WMT) > 1) { - ps_cb.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV); - ps_cb.MinMax = GSVector4(ps_cb.MskFix) / WH.xyxy(); - } - - // TC Offset Hack - ps_sel.tcoffsethack = !!UserHacks_TCOffset; - ps_cb.TC_OH_TS = GSVector4(1/16.0f, 1/16.0f, UserHacks_TCO_x, UserHacks_TCO_y) / WH.xyxy(); - - - // Only enable clamping in CLAMP mode. REGION_CLAMP will be done manually in the shader - ps_ssel.tau = (m_context->CLAMP.WMS != CLAMP_CLAMP); - ps_ssel.tav = (m_context->CLAMP.WMT != CLAMP_CLAMP); - ps_ssel.ltf = bilinear && simple_sample; - ps_ssel.aniso = simple_sample; - - // Setup Texture ressources - dev->SetupSampler(ps_ssel); - dev->PSSetShaderResources(tex->m_texture, tex->m_palette); - - if (spritehack && (ps_sel.atst == 2)) { - ps_sel.atst = 1; - } - } else { -#ifdef ENABLE_OGL_DEBUG - // Unattach texture to avoid noise in debugger - dev->PSSetShaderResources(NULL, NULL); -#endif + if (tex) { + EmulateTextureSampler(ps_sel, ps_ssel, tex); } // Always bind the RT. This way special effect can use it. dev->PSSetShaderResource(3, rt); @@ -1325,7 +1324,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour static const uint32 iatst[] = {1, 0, 5, 6, 7, 2, 3, 4}; ps_sel.atst = iatst[atst]; - if (spritehack && (ps_sel.atst == 2)) { + if (tex && tex->m_spritehack_t && (ps_sel.atst == 2)) { ps_sel.atst = 1; } diff --git a/plugins/GSdx/GSRendererOGL.h b/plugins/GSdx/GSRendererOGL.h index 2810a5c054..51c53709f5 100644 --- a/plugins/GSdx/GSRendererOGL.h +++ b/plugins/GSdx/GSRendererOGL.h @@ -67,6 +67,7 @@ class GSRendererOGL final : public GSRendererHW inline bool EmulateTextureShuffleAndFbmask(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::OMColorMaskSelector& om_csel); inline void EmulateChannelShuffle(GSDeviceOGL::PSSelector& ps_sel, GSTexture** rt, const GSTextureCache::Source* tex); inline bool EmulateBlending(GSDeviceOGL::PSSelector& ps_sel, bool DATE_GL42); + inline void EmulateTextureSampler(GSDeviceOGL::PSSelector& ps_sel, GSDeviceOGL::PSSamplerSelector ps_ssel, const GSTextureCache::Source* tex); public: GSRendererOGL();