From 35081f922a08d5069e9e1f4c66a2b9f140f5ef97 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 5 Jun 2015 22:37:34 +0200 Subject: [PATCH 01/50] gsdx: GS kinds of support draw without framebuffer Gow uses 24 bits buffer, so only color is updated but blending is configured as Cd so it is a NOP In this case, we don't lookup the target in the texture cache. It reduces the complexity to handle depth which can be located at same address as RT Note: please test DX renderer --- plugins/GSdx/GS.h | 1 + plugins/GSdx/GSDeviceOGL.cpp | 8 +++----- plugins/GSdx/GSRendererDX.cpp | 6 +++--- plugins/GSdx/GSRendererDX9.cpp | 3 +++ plugins/GSdx/GSRendererHW.cpp | 24 ++++++++++++++++-------- plugins/GSdx/GSRendererOGL.cpp | 10 +++++----- 6 files changed, 31 insertions(+), 21 deletions(-) diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index ec414a3ac3..52611fff18 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -523,6 +523,7 @@ REG_END2 // opaque => output will be Cs/As __forceinline bool IsOpaque() const {return ((A == B || (C == 2 && FIX == 0)) && D == 0) || (A == 0 && B == D && C == 2 && FIX == 0x80);} __forceinline bool IsOpaque(int amin, int amax) const {return ((A == B || amax == 0) && D == 0) || (A == 0 && B == D && amin == 0x80 && amax == 0x80);} + __forceinline bool IsCd() { return (A == B) && (D == 1);} REG_END2 REG64_(GIFReg, BITBLTBUF) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 752ec9adec..eb8ada7304 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -996,7 +996,6 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver { GL_PUSH("DATE First Pass"); - GSTexture* t = NULL; // sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows, persona4 shadows BeginScene(); @@ -1016,7 +1015,7 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver OMSetDepthStencilState(m_date.dss, 1); OMSetBlendState(m_date.bs, 0); // normally ok without any RT if GL_ARB_framebuffer_no_attachments is supported (minus driver bug) - OMSetRenderTargets(t, ds, &GLState::scissor); + OMSetRenderTargets(NULL, ds, &GLState::scissor); OMSetColorMaskState(); // TODO: likely useless // ia @@ -1035,9 +1034,7 @@ void GSDeviceOGL::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* ver PSSetSamplerState(m_convert.pt); } - OMSetWriteBuffer(GL_NONE); DrawPrimitive(); - OMSetWriteBuffer(); EndScene(); @@ -1179,9 +1176,10 @@ void GSDeviceOGL::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVecto if (rt == NULL || !RT->IsBackbuffer()) { OMSetFBO(m_fbo); if (rt) { + OMSetWriteBuffer(); OMAttachRt(RT); } else { - // Note: NULL rt is only used in DATE so far. + OMSetWriteBuffer(GL_NONE); OMAttachRt(); } diff --git a/plugins/GSdx/GSRendererDX.cpp b/plugins/GSdx/GSRendererDX.cpp index 103fb50928..755606c919 100644 --- a/plugins/GSdx/GSRendererDX.cpp +++ b/plugins/GSdx/GSRendererDX.cpp @@ -47,8 +47,8 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; - const GSVector2i& rtsize = rt->GetSize(); - const GSVector2& rtscale = rt->GetScale(); + const GSVector2i& rtsize = ds->GetSize(); + const GSVector2& rtscale = ds->GetScale(); bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; @@ -199,7 +199,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc //The resulting shifted output aligns better with common blending / corona / blurring effects, //but introduces a few bad pixels on the edges. - if(rt->LikelyOffset) + if(rt && rt->LikelyOffset) { // DX9 has pixelcenter set to 0.0, so give it some value here diff --git a/plugins/GSdx/GSRendererDX9.cpp b/plugins/GSdx/GSRendererDX9.cpp index e8c16a1040..fa078e7646 100644 --- a/plugins/GSdx/GSRendererDX9.cpp +++ b/plugins/GSdx/GSRendererDX9.cpp @@ -231,6 +231,9 @@ void GSRendererDX9::SetupIA() void GSRendererDX9::UpdateFBA(GSTexture* rt) { + if (!rt) + return; + GSDevice9* dev = (GSDevice9*)m_dev; dev->BeginScene(); diff --git a/plugins/GSdx/GSRendererHW.cpp b/plugins/GSdx/GSRendererHW.cpp index 99f1cd0db8..2876b24baa 100644 --- a/plugins/GSdx/GSRendererHW.cpp +++ b/plugins/GSdx/GSRendererHW.cpp @@ -334,12 +334,17 @@ void GSRendererHW::Draw() GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; + // It is allowed to use the depth and rt at the same location. However at least 1 must + // be disabled. GoW uses a Cd blending on a 24 bits buffer (no alpha) + const bool no_rt = context->ALPHA.IsCd() && PRIM->ABE && (context->FRAME.PSM == 1); + GIFRegTEX0 TEX0; TEX0.TBP0 = context->FRAME.Block(); TEX0.TBW = context->FRAME.FBW; TEX0.PSM = context->FRAME.PSM; - GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true); + + GSTextureCache::Target* rt = no_rt ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true); TEX0.TBP0 = context->ZBUF.Block(); TEX0.TBW = context->FRAME.FBW; @@ -347,7 +352,7 @@ void GSRendererHW::Draw() GSTextureCache::Target* ds = m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::DepthStencil, context->DepthWrite()); - if(!rt || !ds) + if((!rt && !no_rt) || !ds) { GL_POP(); ASSERT(0); @@ -429,7 +434,8 @@ void GSRendererHW::Draw() { s = format("%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); - rt->m_texture->Save(root_hw+s); + if (rt) + rt->m_texture->Save(root_hw+s); } if(s_savez && s_n >= s_saven) @@ -447,7 +453,7 @@ void GSRendererHW::Draw() #endif } - if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt->m_texture, ds->m_texture, tex)) + if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(NULL, ds->m_texture, tex)) { s_n += 1; // keep counter sync GL_POP(); @@ -514,7 +520,7 @@ void GSRendererHW::Draw() // - DrawPrims(rt->m_texture, ds->m_texture, tex); + DrawPrims(rt ? rt->m_texture : NULL, ds->m_texture, tex); // @@ -526,7 +532,7 @@ void GSRendererHW::Draw() GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in)); - if(fm != 0xffffffff) + if(fm != 0xffffffff && rt) { rt->m_valid = rt->m_valid.runion(r); @@ -557,7 +563,8 @@ void GSRendererHW::Draw() { s = format("%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, context->FRAME.Block(), context->FRAME.PSM); - rt->m_texture->Save(root_hw+s); + if (rt) + rt->m_texture->Save(root_hw+s); } if(s_savez && s_n >= s_saven) @@ -580,7 +587,8 @@ void GSRendererHW::Draw() #ifdef DISABLE_HW_TEXTURE_CACHE - m_tc->Read(rt, r); + if (rt) + m_tc->Read(rt, r); #endif diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index d1ea0a6e19..0489ac4065 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -217,13 +217,13 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour { GL_PUSH("GL Draw from %d in %d (Depth %d)", tex && tex->m_texture ? tex->m_texture->GetID() : 0, - rt->GetID(), ds->GetID()); + rt ? rt->GetID() : -1, ds->GetID()); GSDrawingEnvironment& env = m_env; GSDrawingContext* context = m_context; - const GSVector2i& rtsize = rt->GetSize(); - const GSVector2& rtscale = rt->GetScale(); + const GSVector2i& rtsize = ds->GetSize(); + const GSVector2& rtscale = ds->GetScale(); bool DATE = m_context->TEST.DATE && context->FRAME.PSM != PSM_PSMCT24; bool DATE_GL42 = false; @@ -425,7 +425,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour //The resulting shifted output aligns better with common blending / corona / blurring effects, //but introduces a few bad pixels on the edges. - if (rt->LikelyOffset) + if (rt && rt->LikelyOffset) { ox2 *= rt->OffsetHack_modx; oy2 *= rt->OffsetHack_mody; @@ -621,7 +621,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour bool all_sw = !( (ALPHA.A == ALPHA.B) || (ALPHA.C == 2 && afix <= 1.002f) ) && (m_accurate_blend > 1); bool sw_blending = (m_accurate_blend && (bogus_blend & A_MAX)) || acc_colclip_wrap || all_sw; - if (sw_blending && om_bsel.abe) { + if (sw_blending && om_bsel.abe && rt) { GL_INS("!!! SW blending effect used (0x%x from sel %d) !!!", bogus_blend, blend_sel); // select a shader that support blending From 49516cbbcad2a52b78e15fea009ac35c4cba875d Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 5 Jun 2015 23:37:06 +0200 Subject: [PATCH 02/50] gsdx-tc: allow to invalidate the depth/target if it is used as target/depth It is requires when game will uses sometimes an address as a RT sometimes as a depth --- plugins/GSdx/GSRendererHW.cpp | 4 ++++ plugins/GSdx/GSTextureCache.cpp | 37 +++++++++++++++++++++++++++++++-- plugins/GSdx/GSTextureCache.h | 1 + 3 files changed, 40 insertions(+), 2 deletions(-) diff --git a/plugins/GSdx/GSRendererHW.cpp b/plugins/GSdx/GSRendererHW.cpp index 2876b24baa..1f4b78b6a8 100644 --- a/plugins/GSdx/GSRendererHW.cpp +++ b/plugins/GSdx/GSRendererHW.cpp @@ -537,6 +537,8 @@ void GSRendererHW::Draw() rt->m_valid = rt->m_valid.runion(r); m_tc->InvalidateVideoMem(context->offset.fb, r, false); + + m_tc->InvalidateVideoMemType(GSTextureCache::DepthStencil, context->FRAME.Block()); } if(zm != 0xffffffff) @@ -544,6 +546,8 @@ void GSRendererHW::Draw() ds->m_valid = ds->m_valid.runion(r); m_tc->InvalidateVideoMem(context->offset.zb, r, false); + + m_tc->InvalidateVideoMemType(GSTextureCache::RenderTarget, context->ZBUF.Block()); } // diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 2a0fc01e68..06549004c2 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -152,7 +152,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con { #ifdef ENABLE_OGL_DEBUG if (dst) { - GL_CACHE("TC: dst hit (%s): %d (0x%x)", half_right ? "half" : "full", + GL_CACHE("TC: dst %s hit (%s): %d (0x%x)", dst->m_type ? "Depth" : "Color", half_right ? "half" : "full", dst->m_texture ? dst->m_texture->GetID() : 0, TEX0.TBP0); } else { @@ -165,6 +165,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con { return NULL; } + #ifdef ENABLE_OGL_DEBUG } else { GL_CACHE("TC: src hit: %d (0x%x)", @@ -258,6 +259,10 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int hh *= 2; } + // Gregory: I'm sure this sillyness is related to the usage of a 32bits + // buffer as a 16 bits format. In this case the height of the buffer is + // multiplyed by 2 (Hence a scissor bigger than the RT) + // This vp2 fix doesn't work most of the time if(hh < 512 && m_renderer->m_context->SCISSOR.SCAY1 == 511) // vp2 @@ -334,6 +339,30 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int return dst; } +// Goal: Depth And Target at the same address is not possible. On GS it is +// the same memory but not on the Dx/GL. Therefore a write to the Depth/Target +// must invalidate the Target/Depth respectively +void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp) +{ + for(list::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++) + { + Target* t = *i; + + if(bp == t->m_TEX0.TBP0) + { + GL_CACHE("TC: InvalidateVideoMemType: Remove Target(T%d) %d (0x%x)", type, + t->m_texture ? t->m_texture->GetID() : 0, + t->m_TEX0.TBP0); + + m_dst[type].erase(i); + delete t; + + break; + } + } + +} + // Goal: invalidate data sent to the GPU when the source (GS memory) is modified // Called each time you want to write to the GS memory void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, bool target) @@ -465,7 +494,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b else { m_dst[type].erase(j); - GL_CACHE("TC: Remove Target(%d) %d (0x%x)", type, + GL_CACHE("TC: Remove Target(T%d) %d (0x%x)", type, t->m_texture ? t->m_texture->GetID() : 0, t->m_TEX0.TBP0); delete t; @@ -776,6 +805,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con GSVector4 dRect(0, 0, w, h); + // Try to extract a texture bigger than the RT. Current solution is to rescale the size + // of the texture to fit in the RT. In my opinion, it would be better to increase the size of + // the RT if(w > dstsize.x) { scale.x = (float)dstsize.x / tw; @@ -825,6 +857,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con } else { + // Different size or not the same format sRect.z /= sTex->GetWidth(); sRect.w /= sTex->GetHeight(); diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 04f71b4aaa..0879c63a64 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -135,6 +135,7 @@ public: Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used); Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h); + void InvalidateVideoMemType(int type, uint32 bp); void InvalidateVideoMem(GSOffset* off, const GSVector4i& r, bool target = true); void InvalidateLocalMem(GSOffset* off, const GSVector4i& r); From b59a347ad7dcb2141f57a7f016de51a4186448ba Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 5 Jun 2015 23:50:38 +0200 Subject: [PATCH 03/50] gsdx-tc: allow to use depth buffer as input texture --- plugins/GSdx/GSTextureCache.cpp | 68 +++++++++++++++++---------------- 1 file changed, 36 insertions(+), 32 deletions(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 06549004c2..7abd759916 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -121,28 +121,41 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con // (Simply not doing this code at all makes a lot of previsouly missing stuff show (but breaks pretty much everything // else.) - //for(int type = 0; type < 2 && dst == NULL; type++) - for(int type = 0; type < 1 && dst == NULL; type++) // Only look for render target, no depth stencil + for(list::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); i++) { - for(list::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++) - { + Target* t = *i; + + if(t->m_used && t->m_dirty.empty()) { + if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { + dst = t; + + break; + + } else if ((t->m_TEX0.TBW >= 16) && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0 + t->m_TEX0.TBW * 0x10, t->m_TEX0.PSM)) { + // Detect half of the render target (fix snow engine game) + // Target Page (8KB) have always a width of 64 pixels + // Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10 + half_right = true; + dst = t; + + break; + } + + } + } + + if (dst == NULL) { + // Let's try a trick to avoid to use wrongly a depth buffer + // Unfortunately, I don't have any Arc the Lad testcase + // + // 1/ Check only current frame, I guess it is only used as a postprocessing effect + for(list::iterator i = m_dst[DepthStencil].begin(); i != m_dst[DepthStencil].end(); i++) { Target* t = *i; - if(t->m_used && t->m_dirty.empty()) { - if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { - dst = t; - - break; - - } else if ((t->m_TEX0.TBW >= 16) && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0 + t->m_TEX0.TBW * 0x10, t->m_TEX0.PSM)) { - // Detect half of the render target (fix snow engine game) - // Target Page (8KB) have always a width of 64 pixels - // Half of the Target is TBW/2 pages * 8KB / (1 block * 256B) = 0x10 - half_right = true; - dst = t; - - break; - } + if(!t->m_age && t->m_used && t->m_dirty.empty() && GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) + { + dst = t; + break; } } } @@ -712,17 +725,6 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con src->m_target = true; - if(dst->m_type != RenderTarget) - { - GL_CACHE("TC: Remove dst because not a RT %d (0x%x)", - dst->m_texture ? dst->m_texture->GetID() : 0, - dst->m_TEX0.TBP0); - - // TODO - delete src; - return NULL; - } - dst->Update(); GSTexture* tmp = NULL; @@ -838,12 +840,14 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con linear = false; } + int shader = dst->m_type != RenderTarget ? 11 : 0; + if(!src->m_texture) { src->m_texture = dTex; } - if((sRect == dRect).alltrue()) + if((sRect == dRect).alltrue() && !shader) { if (half_right) { // You typically hit this code in snow engine game. Dstsize is the size of of Dx/GL RT @@ -865,7 +869,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con sRect.x = sRect.z/2.0f; } - m_renderer->m_dev->StretchRect(sTex, sRect, dTex, dRect, 0, linear); + m_renderer->m_dev->StretchRect(sTex, sRect, dTex, dRect, shader, linear); } if(dTex != src->m_texture) From 74d84aafb5a8924f6ace955cf1c4b9dd88e2b074 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 5 Jun 2015 23:51:13 +0200 Subject: [PATCH 04/50] gsdx-tc: allow to search the depth in the old target list Yes it is possible on PS2 world --- plugins/GSdx/GSTextureCache.cpp | 46 +++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 7abd759916..85b51c26eb 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -234,22 +234,42 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int } } - if(dst == NULL) - { - GL_CACHE("TC: Lookup Target(T%d) %dx%d, miss (0x%x)", type, w, h, bp); - - dst = CreateTarget(TEX0, w, h, type); - - if(dst == NULL) - { - return NULL; - } - } - else - { + if (dst) { GL_CACHE("TC: Lookup Target(T%d) %dx%d, hit: %d (0x%x)", type, w, h, dst->m_texture->GetID(), bp); dst->Update(); + } else { + + if (type == DepthStencil) { + // Depth stencil can be an older RT but only check recent RT to avoid to pick + // some bad data. + for(list::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); i++) + { + Target* t = *i; + + if(!t->m_age && bp == t->m_TEX0.TBP0) + { + GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x)", w, h, bp); + // Convert the RenderTarget into a Depth Buffer + dst = CreateTarget(TEX0, w, h, type); + GSVector4 sRect(0, 0, 1.0, 1.0); + GSVector4 dRect(0, 0, w, h); + m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, 12, false); + } + } + } + + if(dst == NULL) + { + GL_CACHE("TC: Lookup Target(T%d) %dx%d, miss (0x%x)", type, w, h, bp); + + dst = CreateTarget(TEX0, w, h, type); + + if(dst == NULL) + { + return NULL; + } + } } if(m_renderer->CanUpscale()) From 76160505f7a6f7739b9f3e6b465b6c144a78c3eb Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sun, 7 Jun 2015 18:42:30 +0200 Subject: [PATCH 05/50] gsdx-tc: add an option to avoid blow up dx :p texture_cache_depth = 1 (default for openGL) --- plugins/GSdx/GSTextureCache.cpp | 9 +++++++-- plugins/GSdx/GSTextureCache.h | 3 +++ plugins/GSdx/GSTextureCache11.h | 2 ++ plugins/GSdx/GSTextureCache9.h | 2 ++ 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 85b51c26eb..5ce6f6bef7 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -30,6 +30,8 @@ GSTextureCache::GSTextureCache(GSRenderer* r) UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks", 0) && !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0); m_paltex = !!theApp.GetConfig("paltex", 0); + m_can_convert_depth = theApp.GetConfig("texture_cache_depth", 1); + m_temp = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32); } @@ -144,7 +146,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con } } - if (dst == NULL) { + if (dst == NULL && CanConvertDepth()) { // Let's try a trick to avoid to use wrongly a depth buffer // Unfortunately, I don't have any Arc the Lad testcase // @@ -240,7 +242,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int dst->Update(); } else { - if (type == DepthStencil) { + if (type == DepthStencil && CanConvertDepth()) { // Depth stencil can be an older RT but only check recent RT to avoid to pick // some bad data. for(list::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); i++) @@ -377,6 +379,9 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int // must invalidate the Target/Depth respectively void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp) { + if (!CanConvertDepth()) + return; + for(list::iterator i = m_dst[type].begin(); i != m_dst[type].end(); i++) { Target* t = *i; diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 0879c63a64..39a64368b6 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -110,6 +110,7 @@ protected: bool m_paltex; int m_spritehack; uint8* m_temp; + bool m_can_convert_depth; virtual Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL, bool half_right = false); virtual Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type); @@ -122,6 +123,8 @@ protected: virtual void Read(Target* t, const GSVector4i& r) = 0; #endif + virtual bool CanConvertDepth() { return m_can_convert_depth; } + public: GSTextureCache(GSRenderer* r); virtual ~GSTextureCache(); diff --git a/plugins/GSdx/GSTextureCache11.h b/plugins/GSdx/GSTextureCache11.h index 9e0522d784..d110dbe156 100644 --- a/plugins/GSdx/GSTextureCache11.h +++ b/plugins/GSdx/GSTextureCache11.h @@ -31,6 +31,8 @@ protected: void Read(Target* t, const GSVector4i& r); + virtual bool CanConvertDepth() { return false; } + public: GSTextureCache11(GSRenderer* r); }; diff --git a/plugins/GSdx/GSTextureCache9.h b/plugins/GSdx/GSTextureCache9.h index 67a0d89200..1fbf701860 100644 --- a/plugins/GSdx/GSTextureCache9.h +++ b/plugins/GSdx/GSTextureCache9.h @@ -31,6 +31,8 @@ protected: void Read(Target* t, const GSVector4i& r); + virtual bool CanConvertDepth() { return false; } + public: GSTextureCache9(GSRenderer* r); }; From 2d812deb845bac828595d3d6a3e4b64c6009ff64 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Mon, 8 Jun 2015 09:44:47 +0200 Subject: [PATCH 06/50] gsdx-tc: Depth and Target are interchangeable on the GS !!! Extend the texture cache to search Color target in Depth target --- plugins/GSdx/GSTextureCache.cpp | 51 ++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 23 deletions(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 5ce6f6bef7..c924af9252 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -240,38 +240,43 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int GL_CACHE("TC: Lookup Target(T%d) %dx%d, hit: %d (0x%x)", type, w, h, dst->m_texture->GetID(), bp); dst->Update(); - } else { + } else if (CanConvertDepth()) { - if (type == DepthStencil && CanConvertDepth()) { - // Depth stencil can be an older RT but only check recent RT to avoid to pick - // some bad data. - for(list::iterator i = m_dst[RenderTarget].begin(); i != m_dst[RenderTarget].end(); i++) + int rev_type = (type == DepthStencil) ? RenderTarget : DepthStencil; + GSVector4 sRect(0, 0, 1.0, 1.0); + GSVector4 dRect(0, 0, w, h); + + // Depth stencil/RT can be an older RT/DS but only check recent RT/DS to avoid to pick + // some bad data. + + for(list::iterator i = m_dst[rev_type].begin(); i != m_dst[rev_type].end(); i++) + { + Target* t = *i; + + if(!t->m_age && bp == t->m_TEX0.TBP0) { - Target* t = *i; - - if(!t->m_age && bp == t->m_TEX0.TBP0) - { + dst = CreateTarget(TEX0, w, h, type); + if (type == DepthStencil) { GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x)", w, h, bp); - // Convert the RenderTarget into a Depth Buffer - dst = CreateTarget(TEX0, w, h, type); - GSVector4 sRect(0, 0, 1.0, 1.0); - GSVector4 dRect(0, 0, w, h); m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, 12, false); + } else { + GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x)", w, h, bp); + m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, 11, false); } + + break; } } + } + + if(dst == NULL) + { + GL_CACHE("TC: Lookup Target(T%d) %dx%d, miss (0x%x)", type, w, h, bp); + + dst = CreateTarget(TEX0, w, h, type); if(dst == NULL) - { - GL_CACHE("TC: Lookup Target(T%d) %dx%d, miss (0x%x)", type, w, h, bp); - - dst = CreateTarget(TEX0, w, h, type); - - if(dst == NULL) - { - return NULL; - } - } + return NULL; } if(m_renderer->CanUpscale()) From 58ce7d4bb8e57755ce764a04f546f513501e452e Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 6 Jun 2015 13:56:08 +0200 Subject: [PATCH 07/50] gsdx-ogl: emulate texture shuffle GS doesn't supports texture shuffle/swizzle so it is emulated in a complex way. The idea is to read/write the 32 bits color format as a 16 bit format. This way, RG (16 lsb bits) or BA (16 msb bits) can be read or written with square texture that targets pixels 1-8 or pixels 8-16. However shuffle is limited. For example you can copy the green channel to either the alpha channel or another green channel. Note: Partial masking of channel is not yet implemented V2: improve logging V3: better support of green channel in shader V4: improve detection of destination (issue due to rounding) --- plugins/GSdx/GSDeviceOGL.cpp | 2 + plugins/GSdx/GSDeviceOGL.h | 4 +- plugins/GSdx/GSRendererOGL.cpp | 117 +++++++++++++++++++++++++++++- plugins/GSdx/GSTextureCache.cpp | 2 +- plugins/GSdx/res/glsl/tfx_fs.glsl | 26 +++++++ plugins/GSdx/res/glsl_source.h | 26 +++++++ 6 files changed, 172 insertions(+), 5 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index eb8ada7304..4bfd25f9d0 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -647,6 +647,8 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel) //+ format("#define PS_POINT_SAMPLER %d\n", sel.point_sampler) + format("#define PS_BLEND %d\n", sel.blend) + format("#define PS_IIP %d\n", sel.iip) + + format("#define PS_SHUFFLE %d\n", sel.shuffle) + + format("#define PS_READ_BA %d\n", sel.read_ba) ; return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro); diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index d10f553e35..00d9d9b367 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -322,8 +322,10 @@ class GSDeviceOGL : public GSDevice uint32 wmt:2; uint32 ltf:1; uint32 ifmt:2; + uint32 shuffle:1; + uint32 read_ba:1; - uint32 _free1:2; + //uint32 _free1:0; // Word 2 uint32 blend:8; diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 0489ac4065..b247459c7e 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -247,8 +247,115 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GSDeviceOGL::OMColorMaskSelector om_csel; GSDeviceOGL::OMDepthStencilSelector om_dssel; + if ((context->FRAME.PSM & 0x2) && (context->TEX0.PSM & 2) && (m_vt.m_primclass == GS_SPRITE_CLASS)) { + ps_sel.shuffle = 1; + ps_sel.dfmt = 0; + + const GIFRegXYOFFSET& o = m_context->XYOFFSET; + GSVertex* v = &m_vertex.buff[0]; + size_t count = m_vertex.next; + + // vertex position is 8 to 16 pixels, therefore it is the 16-31 bits of the colors + int pos = (v[0].XYZ.X - o.OFX) & 0xFF; + bool write_ba = (pos > 112 && pos < 136); + // Read texture is 8 to 16 pixels (same as above) + int tex_pos = v[0].U & 0xFF; + ps_sel.read_ba = (tex_pos > 112 && tex_pos < 144); + + //GL_INS("First vertex is P: %d => %d T: %d => %d", v[0].XYZ.X, v[1].XYZ.X, v[0].U, v[1].U); + + // Convert the vertex info to a 32 bits color format equivalent + for(size_t i = 0; i < count; i += 2) { + if (write_ba) + v[i].XYZ.X -= 128u; + else + v[i+1].XYZ.X += 128u; + + if (ps_sel.read_ba) + v[i].U -= 128u; + else + v[i+1].U += 128u; + + // Height is too big (2x). + int tex_offset = v[i].V & 0xF; + GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset); + + GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i+1].XYZ.Y, v[i+1].V); + tmp = GSVector4i(tmp - offset).srl32(1) + offset; + + v[i].XYZ.Y = tmp.x; + v[i].V = tmp.y; + v[i+1].XYZ.Y = tmp.z; + v[i+1].V = tmp.w; + } + + // Please bang my head against the wall! + // 1/ Reduce the frame mask to a 16 bit format + const uint32& m = context->FRAME.FBMSK; + uint32 fbmask = ((m >> 3) & 0x1F) | ((m >> 6) & 0x3E0) | ((m >> 9) & 0x7C00) | ((m >> 31) & 0x8000); + om_csel.wrgba = 0; + + // 2 Select the new mask (Please someone put SSE here) + if ((fbmask & 0xFF) == 0) { + if (write_ba) { + GL_INS("Color shuffle %s => B", ps_sel.read_ba ? "B" : "R"); + om_csel.wb = 1; + } else { + GL_INS("Color shuffle %s => R", ps_sel.read_ba ? "B" : "R"); + om_csel.wr = 1; + } + } else if ((fbmask & 0xFF) != 0xFF) { + GL_INS("ERROR: not supported RG mask:%x", fbmask & 0xFF); + ASSERT(0); + } + + fbmask >>= 8; + if ((fbmask & 0xFF) == 0) { + if (write_ba) { + GL_INS("Color shuffle %s => A", ps_sel.read_ba ? "A" : "G"); + om_csel.wa = 1; + } else { + GL_INS("Color shuffle %s => G", ps_sel.read_ba ? "A" : "G"); + om_csel.wg = 1; + } + } else if ((fbmask & 0xFF) != 0xFF) { + GL_INS("ERROR: not supported BA mask:%x", fbmask & 0xFF); + ASSERT(0); + } + + } else { + ps_sel.dfmt = GSLocalMemory::m_psm[context->FRAME.PSM].fmt; + + om_csel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); + + { +#ifdef ENABLE_OGL_DEBUG + uint8 r_mask = (context->FRAME.FBMSK >> 0) & 0xFF; + uint8 g_mask = (context->FRAME.FBMSK >> 8) & 0xFF; + uint8 b_mask = (context->FRAME.FBMSK >> 16) & 0xFF; + uint8 a_mask = (context->FRAME.FBMSK >> 24) & 0xFF; + uint8 bits = (GSLocalMemory::m_psm[context->FRAME.PSM].fmt == 2) ? 16 : 32; + if (r_mask != 0 && r_mask != 0xFF) { + GL_INS("ERROR: not supported r_mask:%x on %d bits format", r_mask, bits); + ASSERT(0); + } + if (g_mask != 0 && g_mask != 0xFF) { + GL_INS("ERROR: not supported g_mask:%x on %d bits format", g_mask, bits); + ASSERT(0); + } + if (b_mask != 0 && b_mask != 0xFF) { + GL_INS("ERROR: not supported b_mask:%x on %d bits format", b_mask, bits); + ASSERT(0); + } + if (a_mask != 0 && a_mask != 0xFF) { + GL_INS("ERROR: not supported a_mask:%x on %d bits format", a_mask, bits); + ASSERT(0); + } +#endif + } + } + // Format of the output - ps_sel.dfmt = GSLocalMemory::m_psm[context->FRAME.PSM].fmt; GIFRegALPHA ALPHA = context->ALPHA; float afix = (float)context->ALPHA.FIX / 0x80; @@ -285,7 +392,6 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } } - om_csel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); if (ps_sel.dfmt == 1) { if (ALPHA.C == 1) { // 24 bits no alpha channel so use 1.0f fix factor as equivalent @@ -471,7 +577,9 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour } ps_sel.fba = context->FBA.FBA; + // TODO deprecat this stuff ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; + ps_sel.aout &= !ps_sel.shuffle; if (UserHacks_AlphaHack) ps_sel.aout = 1; @@ -524,7 +632,10 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour ps_sel.wms = context->CLAMP.WMS; ps_sel.wmt = context->CLAMP.WMT; - if (tex->m_palette) { + + if (ps_sel.shuffle) { + ps_sel.fmt = 0; + } else if (tex->m_palette) { ps_sel.fmt = cpsm.fmt | 4; ps_sel.ifmt = !tex->m_target ? 0 : (context->TEX0.PSM == PSM_PSMT4HL) ? 2 diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index c924af9252..5f9f626d3f 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -223,7 +223,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int dst = t; -#ifdef ENABLE_OGL_DEBUG +#if 0 // Likely the root cause of tons and tons of bug if (dst->m_TEX0.PSM != TEX0.PSM) { GL_INS("TC: ERROR: use a target with format 0x%x as 0x%x without any conversion", dst->m_TEX0.PSM, TEX0.PSM); diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index 32979ee128..c37a529537 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -708,6 +708,32 @@ void ps_main() #endif #if (APITRACE_DEBUG & 8) == 8 c.a = 0.5f; +#endif + +#if PS_SHUFFLE + uvec4 denorm_c = uvec4(c * 255.0f + 0.5f); + uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f); + + // Write RB part. Mask will take care of the correct destination +#if PS_READ_BA + c.rb = c.bb; +#else + c.rb = c.rr; +#endif + + // Write GA part. Mask will take care of the correct destination +#if PS_READ_BA + if (bool(denorm_c.a & 0x80u)) + c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f); + else + c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f); +#else + if (bool(denorm_c.g & 0x80u)) + c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f); + else + c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f); +#endif + #endif // Must be done before alpha correction diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index f7ca8e663a..5971a36c7a 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -1489,6 +1489,32 @@ static const char* tfx_fs_all_glsl = " c.a = 0.5f;\n" "#endif\n" "\n" + "#if PS_SHUFFLE\n" + " uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);\n" + " uvec2 denorm_TA = uvec2(vec2(TA.xy) * 255.0f + 0.5f);\n" + "\n" + " // Write RB part. Mask will take care of the correct destination\n" + "#if PS_READ_BA\n" + " c.rb = c.bb;\n" + "#else\n" + " c.rb = c.rr;\n" + "#endif\n" + "\n" + " // Write GA part. Mask will take care of the correct destination\n" + "#if PS_READ_BA\n" + " if (bool(denorm_c.a & 0x80u))\n" + " c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n" + " else\n" + " c.ga = vec2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);\n" + "#else\n" + " if (bool(denorm_c.g & 0x80u))\n" + " c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f);\n" + " else\n" + " c.ga = vec2(float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f);\n" + "#endif\n" + "\n" + "#endif\n" + "\n" " // Must be done before alpha correction\n" " float alpha = c.a * 255.0f / 128.0f;\n" "\n" From 4bc8bfc23e56c5ea5651db92b965abc0c3c95834 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 10 Jun 2015 00:17:26 +0100 Subject: [PATCH 08/50] GSdx-DX: Rough port of texture switching from OGL --- plugins/GSdx/GSDeviceDX.h | 2 + plugins/GSdx/GSRendererDX.cpp | 88 +++++++++++++++++++++++++++++++++- plugins/GSdx/GSTextureFX11.cpp | 6 ++- plugins/GSdx/res/tfx.fx | 29 +++++++++++ 4 files changed, 122 insertions(+), 3 deletions(-) diff --git a/plugins/GSdx/GSDeviceDX.h b/plugins/GSdx/GSDeviceDX.h index e49a4d2e31..be2cacc9fe 100644 --- a/plugins/GSdx/GSDeviceDX.h +++ b/plugins/GSdx/GSDeviceDX.h @@ -179,6 +179,8 @@ public: uint32 spritehack:1; uint32 tcoffsethack:1; uint32 point_sampler:1; + uint32 shuffle:1; + uint32 read_ba:1; }; uint32 key; diff --git a/plugins/GSdx/GSRendererDX.cpp b/plugins/GSdx/GSRendererDX.cpp index 755606c919..cf0eda3ae5 100644 --- a/plugins/GSdx/GSRendererDX.cpp +++ b/plugins/GSdx/GSRendererDX.cpp @@ -225,6 +225,83 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc GSDeviceDX::PSSamplerSelector ps_ssel; GSDeviceDX::PSConstantBuffer ps_cb; + if ((context->FRAME.PSM == 0x2) && (context->TEX0.PSM & 2) && (m_vt.m_primclass == GS_SPRITE_CLASS)) { + ps_sel.shuffle = 1; + ps_sel.fmt = 0; + + const GIFRegXYOFFSET& o = m_context->XYOFFSET; + GSVertex* v = &m_vertex.buff[0]; + size_t count = m_vertex.next; + + // vertex position is 8 to 16 pixels, therefore it is the 16-31 bits of the colors + bool write_ba = (((v[0].XYZ.X - o.OFX) & 0xF0) == 128); + // Read texture is 8 to 16 pixels (same as above) + ps_sel.read_ba = ((v[0].U & 0xF0) == 128); + + GL_INS("Color shuffle %s => %s", ps_sel.read_ba ? "BA" : "RG", write_ba ? "BA" : "RG"); + + // Convert the vertex info to a 32 bits color format equivalent + for (size_t i = 0; i < count; i += 2) { + if (write_ba) + v[i].XYZ.X -= 128u; + else + v[i + 1].XYZ.X += 128u; + + if (ps_sel.read_ba) + v[i].U -= 128u; + else + v[i + 1].U += 128u; + + // Height is too big (2x). + int tex_offset = v[i].V & 0xF; + GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset); + + GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i + 1].XYZ.Y, v[i + 1].V); + tmp = ((tmp - offset) >> 1) + offset; + + v[i].XYZ.Y = tmp.x; + v[i].V = tmp.y; + v[i + 1].XYZ.Y = tmp.z; + v[i + 1].V = tmp.w; + } + + // Please bang my head against the wall! + // 1/ Reduce the frame mask to a 16 bit format + const uint32& m = context->FRAME.FBMSK; + uint32 fbmask = ((m >> 3) & 0x1F) | ((m >> 6) & 0x3E0) | ((m >> 9) & 0x7C00) | ((m >> 31) & 0x8000); + om_bsel.wrgba = 0; + + // 2 Select the new mask (Please someone put SSE here) + if ((fbmask & 0xFF) == 0) { + if (write_ba) + om_bsel.wb = 1; + else + om_bsel.wr = 1; + } + else if ((fbmask & 0xFF) != 0xFF) { + fprintf(stderr, "Please fix me! wb %d wr %d\n", om_bsel.wb, om_bsel.wr); + //ASSERT(0); + } + + fbmask >>= 8; + if ((fbmask & 0xFF) == 0) { + if (write_ba) + om_bsel.wa = 1; + else + om_bsel.wg = 1; + } + else if ((fbmask & 0xFF) != 0xFF) { + fprintf(stderr, "Please fix me! wa %d wg %d\n", om_bsel.wa, om_bsel.wg); + //ASSERT(0); + } + + } + else { + //ps_sel.fmt = GSLocalMemory::m_psm[context->FRAME.PSM].fmt; + + om_bsel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); + } + if(DATE) { if(dev->HasStencil()) @@ -245,7 +322,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc ps_sel.clr1 = om_bsel.IsCLR1(); ps_sel.fba = context->FBA.FBA; ps_sel.aout = context->FRAME.PSM == PSM_PSMCT16 || context->FRAME.PSM == PSM_PSMCT16S || (context->FRAME.FBMSK & 0xff000000) == 0x7f000000 ? 1 : 0; - + ps_sel.aout &= !ps_sel.shuffle; if(UserHacks_AlphaHack) ps_sel.aout = 1; if(PRIM->FGE) @@ -292,7 +369,14 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc ps_sel.wms = context->CLAMP.WMS; ps_sel.wmt = context->CLAMP.WMT; - ps_sel.fmt = tex->m_palette? cpsm.fmt | 4 : cpsm.fmt; + if (ps_sel.shuffle) { + ps_sel.fmt = 0; + + } + else + { + ps_sel.fmt = tex->m_palette ? cpsm.fmt | 4 : cpsm.fmt; + } ps_sel.aem = env.TEXA.AEM; ps_sel.tfx = context->TEX0.TFX; ps_sel.tcc = context->TEX0.TCC; diff --git a/plugins/GSdx/GSTextureFX11.cpp b/plugins/GSdx/GSTextureFX11.cpp index f1719a7c2c..e45aa01062 100644 --- a/plugins/GSdx/GSTextureFX11.cpp +++ b/plugins/GSdx/GSTextureFX11.cpp @@ -178,7 +178,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe if(i == m_ps.end()) { - string str[18]; + string str[20]; str[0] = format("%d", sel.fst); str[1] = format("%d", sel.wms); @@ -198,6 +198,8 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe str[15] = format("%d", sel.spritehack); str[16] = format("%d", sel.tcoffsethack); str[17] = format("%d", sel.point_sampler); + str[18] = format("%d", sel.shuffle); + str[19] = format("%d", sel.read_ba); D3D11_SHADER_MACRO macro[] = { @@ -219,6 +221,8 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe {"PS_SPRITEHACK", str[15].c_str()}, {"PS_TCOFFSETHACK", str[16].c_str()}, {"PS_POINT_SAMPLER", str[17].c_str()}, + {"PS_SHUFFLE", str[18].c_str() }, + {"PS_READ_BA", str[19].c_str() }, {NULL, NULL}, }; diff --git a/plugins/GSdx/res/tfx.fx b/plugins/GSdx/res/tfx.fx index ef48db6f37..c868c7cdf5 100644 --- a/plugins/GSdx/res/tfx.fx +++ b/plugins/GSdx/res/tfx.fx @@ -39,6 +39,8 @@ #define PS_SPRITEHACK 0 #define PS_TCOFFSETHACK 0 #define PS_POINT_SAMPLER 0 +#define PS_SHUFFLE 0 +#define PS_READ_BA 0 #endif struct VS_INPUT @@ -712,6 +714,33 @@ PS_OUTPUT ps_main(PS_INPUT input) PS_OUTPUT output; +#if PS_SHUFFLE + int4 denorm_c = int4(c * 255.0f + 0.5f); + int2 denorm_TA = int2(int2(TA.xy) * 255.0f + 0.5f); + + // Mask will take care of the correct destination +#if PS_READ_BA + c.rb = c.bb; +#else + c.rb = c.rr; +#endif + +#if PS_READ_BA + if (denorm_c.a & 0x80) + c.ga = int2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f); + else + c.ga = int2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f); +#else + if (denorm_c.g & 0x80) + c.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f; + else + c.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f; +#endif + //Probably not right :/ + c.g = c.b; + +#endif + output.c1 = c.a * 2; // used for alpha blending if(PS_AOUT) // 16 bit output From c925b1d1351921a9703530f7e387c61917197af9 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Wed, 10 Jun 2015 10:07:40 +0200 Subject: [PATCH 09/50] gsdx-tc: support GS mem to depth buffer transfer Please test SMT games (issue #572) --- plugins/GSdx/GSTextureCache.cpp | 103 +++++++++++++++++++------------- plugins/GSdx/GSTextureCache.h | 3 +- 2 files changed, 62 insertions(+), 44 deletions(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 5f9f626d3f..b760380611 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -974,7 +974,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type) { - Target* t = new Target(m_renderer, TEX0, m_temp); + Target* t = new Target(m_renderer, TEX0, m_temp, m_can_convert_depth); // FIXME: initial data should be unswizzled from local mem in Update() if dirty @@ -1262,10 +1262,11 @@ void GSTextureCache::Source::Flush(uint32 count) // GSTextureCache::Target -GSTextureCache::Target::Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp) +GSTextureCache::Target::Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp, bool depth_supported) : Surface(r, temp) , m_type(-1) , m_used(false) + , m_depth_supported(depth_supported) { m_TEX0 = TEX0; @@ -1277,59 +1278,75 @@ void GSTextureCache::Target::Update() Surface::Update(); // FIXME: the union of the rects may also update wrong parts of the render target (but a lot faster :) + // GH: it must be doable + // 1/ rescale the new t to the good size + // 2/ copy each rectangle (rescale the rectangle) (use CopyRect or multiple vertex) + // Alternate + // 1/ uses multiple vertex rectangle GSVector4i r = m_dirty.GetDirtyRectAndClear(m_TEX0, m_texture->GetSize()); - if(r.rempty()) return; + if (r.rempty()) return; + int w = r.width(); + int h = r.height(); + + GIFRegTEXA TEXA; + + TEXA.AEM = 1; + TEXA.TA0 = 0; + TEXA.TA1 = 0x80; + + GSTexture* t = m_renderer->m_dev->CreateTexture(w, h); + if (t == NULL) return; + + // No handling please + if ((m_type == DepthStencil) && !m_depth_supported) { + // do the most likely thing a direct write would do, clear it + GL_INS("ERROR: Update DepthStencil dummy"); + + if((m_renderer->m_game.flags & CRC::ZWriteMustNotClear) == 0) + m_renderer->m_dev->ClearDepth(m_texture, 0); + + return; + } + + const GSOffset* off = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM); + + GSTexture::GSMap m; + + if(t->Map(m)) + { + m_renderer->m_mem.ReadTexture(off, r, m.bits, m.pitch, TEXA); + + t->Unmap(); + } + else + { + int pitch = ((w + 3) & ~3) * 4; + + m_renderer->m_mem.ReadTexture(off, r, m_temp, pitch, TEXA); + + t->Update(r.rsize(), m_temp, pitch); + } + + // m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, w * h * 4); + + // Copy the new GS memory content into the destination texture. if(m_type == RenderTarget) { - int w = r.width(); - int h = r.height(); + GL_INS("ERROR: Update RenderTarget"); - if(GSTexture* t = m_renderer->m_dev->CreateTexture(w, h)) - { - const GSOffset* off = m_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM); - - GIFRegTEXA TEXA; - - TEXA.AEM = 1; - TEXA.TA0 = 0; - TEXA.TA1 = 0x80; - - GSTexture::GSMap m; - - if(t->Map(m)) - { - m_renderer->m_mem.ReadTexture(off, r, m.bits, m.pitch, TEXA); - - t->Unmap(); - } - else - { - int pitch = ((w + 3) & ~3) * 4; - - m_renderer->m_mem.ReadTexture(off, r, m_temp, pitch, TEXA); - - t->Update(r.rsize(), m_temp, pitch); - } - - // m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, w * h * 4); - - m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy()); - - m_renderer->m_dev->Recycle(t); - } + m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy()); } else if(m_type == DepthStencil) { - // do the most likely thing a direct write would do, clear it + GL_INS("ERROR: Update DepthStencil"); - if((m_renderer->m_game.flags & CRC::ZWriteMustNotClear) == 0) - { - m_renderer->m_dev->ClearDepth(m_texture, 0); - } + m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy(), 12); } + + m_renderer->m_dev->Recycle(t); } // GSTextureCache::SourceMap diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 39a64368b6..0aa778d85c 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -80,9 +80,10 @@ public: bool m_used; GSDirtyRectList m_dirty; GSVector4i m_valid; + bool m_depth_supported; public: - Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp); + Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp, bool depth_supported); virtual void Update(); }; From 2dc4e2a04ccd44cb93d63bb8d67a99e8fd7f3305 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 10 Jun 2015 10:59:20 +0100 Subject: [PATCH 10/50] GSdx: Gave accurate blend option an extra state, can now do super accurate blending from the GUI --- plugins/GSdx/GSdx.rc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/GSdx/GSdx.rc b/plugins/GSdx/GSdx.rc index c504169f32..a40d2d797c 100644 --- a/plugins/GSdx/GSdx.rc +++ b/plugins/GSdx/GSdx.rc @@ -268,7 +268,7 @@ BEGIN CONTROL "Allow 8-Bit Textures",IDC_PALTEX,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,10,243,82,10 CONTROL "Texture Filtering",IDC_FILTER,"Button",BS_AUTO3STATE | WS_TABSTOP,10,227,67,10 CONTROL "Enable Shade Boost",IDC_SHADEBOOST,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,10,186,79,10 - CONTROL "Accurate Blend",IDC_ACCURATE_BLEND,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,10,259,67,10 + CONTROL "Accurate Blend",IDC_ACCURATE_BLEND,"Button",BS_AUTO3STATE | WS_TABSTOP,10,259,67,10 CONTROL "Accurate Date",IDC_ACCURATE_DATE,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,92,259,67,10 CONTROL "Accurate color clipping",IDC_ACCURATE_COLCLIP,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,10,275,87,10 PUSHBUTTON "Settings...",IDC_SHADEBUTTON,92,183,75,14 From de189b67a23d5c5e0108cf2011831c555260339c Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 10 Jun 2015 11:32:00 +0100 Subject: [PATCH 11/50] GSdx: Add option to toggle Texture Cache Depth support. - Forced off for DX currently to save fallover. --- plugins/GSdx/GSSettingsDlg.cpp | 6 +++++- plugins/GSdx/GSTextureCache.cpp | 4 ++-- plugins/GSdx/GSdx.rc | 3 ++- plugins/GSdx/GSdx_vs2013.vcxproj | 5 ++++- plugins/GSdx/GSdx_vs2013.vcxproj.filters | 13 +++++++++---- plugins/GSdx/resource.h | 4 +++- 6 files changed, 25 insertions(+), 10 deletions(-) diff --git a/plugins/GSdx/GSSettingsDlg.cpp b/plugins/GSdx/GSSettingsDlg.cpp index 9d101b15cb..84ecb71264 100644 --- a/plugins/GSdx/GSSettingsDlg.cpp +++ b/plugins/GSdx/GSSettingsDlg.cpp @@ -197,6 +197,7 @@ void GSSettingsDlg::OnInit() CheckDlgButton(m_hWnd, IDC_ACCURATE_BLEND, theApp.GetConfig("accurate_blend", 1)); CheckDlgButton(m_hWnd, IDC_ACCURATE_DATE, theApp.GetConfig("accurate_date", 0)); CheckDlgButton(m_hWnd, IDC_ACCURATE_COLCLIP, theApp.GetConfig("accurate_colclip", 0)); + CheckDlgButton(m_hWnd, IDC_OLGDEPTH, theApp.GetConfig("texture_cache_depth", 0)); // Shade Boost CheckDlgButton(m_hWnd, IDC_SHADEBOOST, theApp.GetConfig("ShadeBoost", 0)); @@ -334,7 +335,8 @@ bool GSSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code) theApp.SetConfig("accurate_blend", (int)IsDlgButtonChecked(m_hWnd, IDC_ACCURATE_BLEND)); theApp.SetConfig("accurate_date", (int)IsDlgButtonChecked(m_hWnd, IDC_ACCURATE_DATE)); theApp.SetConfig("accurate_colclip", (int)IsDlgButtonChecked(m_hWnd, IDC_ACCURATE_COLCLIP)); - + theApp.SetConfig("texture_cache_depth", (int)IsDlgButtonChecked(m_hWnd, IDC_OLGDEPTH)); + // Shade Boost theApp.SetConfig("ShadeBoost", (int)IsDlgButtonChecked(m_hWnd, IDC_SHADEBOOST)); @@ -438,6 +440,8 @@ void GSSettingsDlg::UpdateControls() EnableWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_BLEND), ogl && hw); EnableWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_DATE), ogl && hw); EnableWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_COLCLIP), ogl && hw); + EnableWindow(GetDlgItem(m_hWnd, IDC_OLGDEPTH), ogl && hw); + //EnableWindow(GetDlgItem(m_hWnd, IDC_AA1), sw); // Let uers set software params regardless of renderer used //EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS_EDIT), sw); //EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS), sw); diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index b760380611..aa588250e3 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -30,8 +30,8 @@ GSTextureCache::GSTextureCache(GSRenderer* r) UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks", 0) && !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0); m_paltex = !!theApp.GetConfig("paltex", 0); - m_can_convert_depth = theApp.GetConfig("texture_cache_depth", 1); - + m_can_convert_depth = theApp.GetConfig("Renderer", 12) == 12 ? theApp.GetConfig("texture_cache_depth", 1) : 0; + m_temp = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32); } diff --git a/plugins/GSdx/GSdx.rc b/plugins/GSdx/GSdx.rc index a40d2d797c..565f45df5d 100644 --- a/plugins/GSdx/GSdx.rc +++ b/plugins/GSdx/GSdx.rc @@ -270,7 +270,7 @@ BEGIN CONTROL "Enable Shade Boost",IDC_SHADEBOOST,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,10,186,79,10 CONTROL "Accurate Blend",IDC_ACCURATE_BLEND,"Button",BS_AUTO3STATE | WS_TABSTOP,10,259,67,10 CONTROL "Accurate Date",IDC_ACCURATE_DATE,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,92,259,67,10 - CONTROL "Accurate color clipping",IDC_ACCURATE_COLCLIP,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,10,275,87,10 + CONTROL "Accurate color clip",IDC_ACCURATE_COLCLIP,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,10,275,74,10 PUSHBUTTON "Settings...",IDC_SHADEBUTTON,92,183,75,14 CONTROL "Enable HW Hacks",IDC_HACKS_ENABLED,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,10,291,71,10 PUSHBUTTON "Configure...",IDC_HACKSBUTTON,92,288,75,14 @@ -282,6 +282,7 @@ BEGIN COMBOBOX IDC_AFCOMBO,93,304,35,30,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP LTEXT "OpenCL Device:",IDC_STATIC,6,86,52,8 COMBOBOX IDC_OPENCL_DEVICE,70,84,111,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP + CONTROL "HW OGL Depth",IDC_OLGDEPTH,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,92,273,72,13 END diff --git a/plugins/GSdx/GSdx_vs2013.vcxproj b/plugins/GSdx/GSdx_vs2013.vcxproj index 6d96bb38fb..5a19d0e464 100644 --- a/plugins/GSdx/GSdx_vs2013.vcxproj +++ b/plugins/GSdx/GSdx_vs2013.vcxproj @@ -2078,6 +2078,9 @@ + + + @@ -2086,4 +2089,4 @@ - + \ No newline at end of file diff --git a/plugins/GSdx/GSdx_vs2013.vcxproj.filters b/plugins/GSdx/GSdx_vs2013.vcxproj.filters index 4fcf5dd8e9..6e34f020dd 100644 --- a/plugins/GSdx/GSdx_vs2013.vcxproj.filters +++ b/plugins/GSdx/GSdx_vs2013.vcxproj.filters @@ -720,9 +720,6 @@ Resource Files - - - Resource Files Shaders @@ -755,10 +752,18 @@ Shaders + + Resource Files + Resource Files - + + + Resource Files + + + \ No newline at end of file diff --git a/plugins/GSdx/resource.h b/plugins/GSdx/resource.h index 977c06fbec..6e3efef2cf 100644 --- a/plugins/GSdx/resource.h +++ b/plugins/GSdx/resource.h @@ -79,6 +79,8 @@ #define IDC_ROUND_SPRITE 2095 #define IDC_ALIGN_SPRITE 2096 #define IDC_CRC_LEVEL 2097 +#define IDC_CHECK1 2098 +#define IDC_OLGDEPTH 2099 #define IDC_COLORSPACE 3000 #define IDR_CONVERT_FX 10000 #define IDR_TFX_FX 10001 @@ -99,7 +101,7 @@ #ifndef APSTUDIO_READONLY_SYMBOLS #define _APS_NEXT_RESOURCE_VALUE 10013 #define _APS_NEXT_COMMAND_VALUE 32771 -#define _APS_NEXT_CONTROL_VALUE 2099 +#define _APS_NEXT_CONTROL_VALUE 2100 #define _APS_NEXT_SYMED_VALUE 5000 #endif #endif From 7ee3dbd615ab8eefb97784e9b9a93e08499d0191 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Wed, 10 Jun 2015 15:37:56 +0200 Subject: [PATCH 12/50] gsdx-tc: use the virtual function to check the state This ways it will be disabled for dx --- plugins/GSdx/GSTextureCache.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index aa588250e3..f026358499 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -974,7 +974,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type) { - Target* t = new Target(m_renderer, TEX0, m_temp, m_can_convert_depth); + Target* t = new Target(m_renderer, TEX0, m_temp, CanConvertDepth()); // FIXME: initial data should be unswizzled from local mem in Update() if dirty From af09d7e063786406b093f96f0b8de0875e78d579 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 10 Jun 2015 19:09:24 +0100 Subject: [PATCH 13/50] GSdx-DX: Modified shader slightly as some bits were incorrect. --- plugins/GSdx/res/tfx.fx | 55 ++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/plugins/GSdx/res/tfx.fx b/plugins/GSdx/res/tfx.fx index c868c7cdf5..518e884a6e 100644 --- a/plugins/GSdx/res/tfx.fx +++ b/plugins/GSdx/res/tfx.fx @@ -714,32 +714,37 @@ PS_OUTPUT ps_main(PS_INPUT input) PS_OUTPUT output; -#if PS_SHUFFLE - int4 denorm_c = int4(c * 255.0f + 0.5f); - int2 denorm_TA = int2(int2(TA.xy) * 255.0f + 0.5f); + if (PS_SHUFFLE){ + uint4 denorm_c = uint4(c * 255.0f + 0.5f); + uint2 denorm_TA = uint2(float2(TA.xy) * 255.0f + 0.5f); - // Mask will take care of the correct destination -#if PS_READ_BA - c.rb = c.bb; -#else - c.rb = c.rr; -#endif - -#if PS_READ_BA - if (denorm_c.a & 0x80) - c.ga = int2(float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f); - else - c.ga = int2(float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f); -#else - if (denorm_c.g & 0x80) - c.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f; - else - c.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f; -#endif - //Probably not right :/ - c.g = c.b; - -#endif + // Mask will take care of the correct destination + if (PS_READ_BA){ + c.rb = c.bb; + } + else { + c.rb = c.rr; + } + c.g = c.a; + if (PS_READ_BA){ + if (denorm_c.a & 0x80) + c.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f; + else + c.a = float((denorm_c.a & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f; + + //c.g = c.a; + } + else { + if (denorm_c.g & 0x80) + c.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.y & 0x80u)) / 255.0f; + else + c.a = float((denorm_c.g & 0x7Fu) | (denorm_TA.x & 0x80u)) / 255.0f; + + //c.g = c.a; + } + //Probably not right :/ + //c.g = c.b; + } output.c1 = c.a * 2; // used for alpha blending From 955fc3aa35e53eaa51b2068612e7ec1ab9e5be55 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Wed, 10 Jun 2015 19:10:10 +0100 Subject: [PATCH 14/50] GSdx-DX: Mirror round value change from OGL --- plugins/GSdx/GSRendererDX.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/plugins/GSdx/GSRendererDX.cpp b/plugins/GSdx/GSRendererDX.cpp index cf0eda3ae5..96d8ee9b8a 100644 --- a/plugins/GSdx/GSRendererDX.cpp +++ b/plugins/GSdx/GSRendererDX.cpp @@ -234,9 +234,11 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc size_t count = m_vertex.next; // vertex position is 8 to 16 pixels, therefore it is the 16-31 bits of the colors - bool write_ba = (((v[0].XYZ.X - o.OFX) & 0xF0) == 128); + int pos = (v[0].XYZ.X - o.OFX) & 0xFF; + bool write_ba = (pos > 112 && pos < 136); // Read texture is 8 to 16 pixels (same as above) - ps_sel.read_ba = ((v[0].U & 0xF0) == 128); + int tex_pos = v[0].U & 0xFF; + ps_sel.read_ba = (tex_pos > 112 && tex_pos < 144); GL_INS("Color shuffle %s => %s", ps_sel.read_ba ? "BA" : "RG", write_ba ? "BA" : "RG"); @@ -257,7 +259,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset); GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i + 1].XYZ.Y, v[i + 1].V); - tmp = ((tmp - offset) >> 1) + offset; + tmp = GSVector4i(tmp - offset).srl32(1) + offset; v[i].XYZ.Y = tmp.x; v[i].V = tmp.y; From 6b5a3dedd9077f252cced889aa45038b7a18f7b3 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 13 Jun 2015 08:32:11 +0200 Subject: [PATCH 15/50] gsdx-debug: add the format in texture cache logging --- plugins/GSdx/GSTextureCache.cpp | 20 ++++++++++---------- plugins/GSdx/GSTextureCache.h | 4 ++++ 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index f026358499..78b37c154f 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -167,11 +167,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con { #ifdef ENABLE_OGL_DEBUG if (dst) { - GL_CACHE("TC: dst %s hit (%s): %d (0x%x)", dst->m_type ? "Depth" : "Color", half_right ? "half" : "full", + GL_CACHE("TC: dst %s hit (%s): %d (0x%x, F:0x%x)", to_string(dst->m_type), half_right ? "half" : "full", dst->m_texture ? dst->m_texture->GetID() : 0, - TEX0.TBP0); + TEX0.TBP0, TEX0.PSM); } else { - GL_CACHE("TC: src miss (0x%x)", TEX0.TBP0); + GL_CACHE("TC: src miss (0x%x, F:0x%x)", TEX0.TBP0, TEX0.PSM); } #endif src = CreateSource(TEX0, TEXA, dst, half_right); @@ -237,7 +237,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int } if (dst) { - GL_CACHE("TC: Lookup Target(T%d) %dx%d, hit: %d (0x%x)", type, w, h, dst->m_texture->GetID(), bp); + GL_CACHE("TC: Lookup Target(%s) %dx%d, hit: %d (0x%x, F:0x%x)", to_string(type), w, h, dst->m_texture->GetID(), bp, TEX0.PSM); dst->Update(); } else if (CanConvertDepth()) { @@ -257,10 +257,10 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int { dst = CreateTarget(TEX0, w, h, type); if (type == DepthStencil) { - GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x)", w, h, bp); + GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, F:0x%x)", w, h, bp, TEX0.PSM); m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, 12, false); } else { - GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x)", w, h, bp); + GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, F:0x%x)", w, h, bp, TEX0.PSM); m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, 11, false); } @@ -271,7 +271,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int if(dst == NULL) { - GL_CACHE("TC: Lookup Target(T%d) %dx%d, miss (0x%x)", type, w, h, bp); + GL_CACHE("TC: Lookup Target(%s) %dx%d, miss (0x%x, F:0x%x)", to_string(type), w, h, bp, TEX0.PSM); dst = CreateTarget(TEX0, w, h, type); @@ -393,7 +393,7 @@ void GSTextureCache::InvalidateVideoMemType(int type, uint32 bp) if(bp == t->m_TEX0.TBP0) { - GL_CACHE("TC: InvalidateVideoMemType: Remove Target(T%d) %d (0x%x)", type, + GL_CACHE("TC: InvalidateVideoMemType: Remove Target(%s) %d (0x%x)", to_string(type), t->m_texture ? t->m_texture->GetID() : 0, t->m_TEX0.TBP0); @@ -537,7 +537,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b else { m_dst[type].erase(j); - GL_CACHE("TC: Remove Target(T%d) %d (0x%x)", type, + GL_CACHE("TC: Remove Target(%s) %d (0x%x)", to_string(type), t->m_texture ? t->m_texture->GetID() : 0, t->m_TEX0.TBP0); delete t; @@ -717,7 +717,7 @@ void GSTextureCache::IncAge() if(++t->m_age > maxage) { m_dst[type].erase(j); - GL_CACHE("TC: Remove Target(T%d): %d (0x%x) due to age", type, + GL_CACHE("TC: Remove Target(%s): %d (0x%x) due to age", to_string(type), t->m_texture ? t->m_texture->GetID() : 0, t->m_TEX0.TBP0); diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 0aa778d85c..5cbb8d5681 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -145,4 +145,8 @@ public: void IncAge(); bool UserHacks_HalfPixelOffset; + + const char* to_string(int type) { + return (type == DepthStencil) ? "Depth" : "Color"; + } }; From b62a2d6a3c09c21b5e1c12a0924602b2ad39f304 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 13 Jun 2015 08:40:32 +0200 Subject: [PATCH 16/50] gsdx-debug: dump texture in 32 bits when a shuffing effect is detected Otherwise it is unreadable --- plugins/GSdx/GSRendererSW.cpp | 36 +++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index e3c28ba0e0..23474ab176 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -516,6 +516,7 @@ void GSRendererSW::Draw() Sync(2); uint64 frame = m_perfmon.GetFrame(); + bool texture_shuffle = ((context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS)); string s; @@ -529,18 +530,34 @@ void GSRendererSW::Draw() if(s_savet && s_n >= s_saven && PRIM->TME) { - s = format("%05d_f%lld_tex_%05x_%d.bmp", s_n, frame, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM); + if (texture_shuffle) { + // Dump the texture in 32 bits format. It helps to debug texture shuffle effect + s = format("%05d_f%lld_tex_%05x_32bits.bmp", s_n, frame, (int)m_context->TEX0.TBP0); + + m_mem.SaveBMP(root_sw+s, m_context->TEX0.TBP0, m_context->TEX0.TBW, 0, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); + } else { + s = format("%05d_f%lld_tex_%05x_%d.bmp", s_n, frame, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM); + + m_mem.SaveBMP(root_sw+s, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); + } - m_mem.SaveBMP(root_sw+s, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); } s_n++; if(s_save && s_n >= s_saven) { - s = format("%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM); - m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); + if (texture_shuffle) { + // Dump the RT in 32 bits format. It helps to debug texture shuffle effect + s = format("%05d_f%lld_rt0_%05x_32bits.bmp", s_n, frame, m_context->FRAME.Block()); + + m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, 0, GetFrameRect().width(), 512); + } else { + s = format("%05d_f%lld_rt0_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM); + + m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); + } } if(s_savez && s_n >= s_saven) @@ -558,9 +575,16 @@ void GSRendererSW::Draw() if(s_save && s_n >= s_saven) { - s = format("%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM); + if (texture_shuffle) { + // Dump the RT in 32 bits format. It helps to debug texture shuffle effect + s = format("%05d_f%lld_rt1_%05x_32bits.bmp", s_n, frame, m_context->FRAME.Block()); - m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); + m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, 0, GetFrameRect().width(), 512); + } else { + s = format("%05d_f%lld_rt1_%05x_%d.bmp", s_n, frame, m_context->FRAME.Block(), m_context->FRAME.PSM); + + m_mem.SaveBMP(root_sw+s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512); + } } if(s_savez && s_n >= s_saven) From 05c72980fc41d1c9a06554b50b6a3ebfdb039706 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 13 Jun 2015 10:04:36 +0200 Subject: [PATCH 17/50] gsdx: avoid to detect PSMT8H as 16 bits --- plugins/GSdx/GSRendererDX.cpp | 2 +- plugins/GSdx/GSRendererOGL.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/GSdx/GSRendererDX.cpp b/plugins/GSdx/GSRendererDX.cpp index 96d8ee9b8a..d3559ad6d8 100644 --- a/plugins/GSdx/GSRendererDX.cpp +++ b/plugins/GSdx/GSRendererDX.cpp @@ -225,7 +225,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc GSDeviceDX::PSSamplerSelector ps_ssel; GSDeviceDX::PSConstantBuffer ps_cb; - if ((context->FRAME.PSM == 0x2) && (context->TEX0.PSM & 2) && (m_vt.m_primclass == GS_SPRITE_CLASS)) { + if ((context->FRAME.PSM == 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS)) { ps_sel.shuffle = 1; ps_sel.fmt = 0; diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index b247459c7e..3a2a411075 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -247,7 +247,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GSDeviceOGL::OMColorMaskSelector om_csel; GSDeviceOGL::OMDepthStencilSelector om_dssel; - if ((context->FRAME.PSM & 0x2) && (context->TEX0.PSM & 2) && (m_vt.m_primclass == GS_SPRITE_CLASS)) { + if ((context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS)) { ps_sel.shuffle = 1; ps_sel.dfmt = 0; From 42e911c78f2aa1802f73bcc0e61f22df120416b7 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 13 Jun 2015 10:05:33 +0200 Subject: [PATCH 18/50] gsdx-ogl: mask alpha channel in depth conversion Might not work if depth is recasted as a rt (we lost the alpha channel) --- plugins/GSdx/GSDeviceOGL.cpp | 2 +- plugins/GSdx/GSDeviceOGL.h | 2 +- plugins/GSdx/GSTextureCache.cpp | 4 +++- plugins/GSdx/res/glsl/convert.glsl | 13 +++++++++++++ plugins/GSdx/res/glsl_source.h | 13 +++++++++++++ 5 files changed, 31 insertions(+), 3 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 4bfd25f9d0..756118b1eb 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -740,7 +740,7 @@ void GSDeviceOGL::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture return; } - bool draw_in_depth = (ps == m_convert.ps[12]); + bool draw_in_depth = (ps == m_convert.ps[12] || ps == m_convert.ps[13]); // Performance optimization. It might be faster to use a framebuffer blit for standard case // instead to emulate it with shader diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 00d9d9b367..14c9fb582d 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -494,7 +494,7 @@ class GSDeviceOGL : public GSDevice struct { GLuint vs; // program object - GLuint ps[13]; // program object + GLuint ps[14]; // program object GLuint ln; // sampler object GLuint pt; // sampler object GSDepthStencilOGL* dss; diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 78b37c154f..9b3a4e5dad 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -258,7 +258,8 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int dst = CreateTarget(TEX0, w, h, type); if (type == DepthStencil) { GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, F:0x%x)", w, h, bp, TEX0.PSM); - m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, 12, false); + int shader = (TEX0.PSM & 1) ? 13 : 12; + m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, shader, false); } else { GL_CACHE("TC: Lookup Target(Color) %dx%d, hit Depth (0x%x, F:0x%x)", w, h, bp, TEX0.PSM); m_renderer->m_dev->StretchRect(t->m_texture, sRect, dst->m_texture, dRect, 11, false); @@ -1343,6 +1344,7 @@ void GSTextureCache::Target::Update() { GL_INS("ERROR: Update DepthStencil"); + // FIXME linear or not? m_renderer->m_dev->StretchRect(t, m_texture, GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy(), 12); } diff --git a/plugins/GSdx/res/glsl/convert.glsl b/plugins/GSdx/res/glsl/convert.glsl index 3a7fa21f17..b8baf5cbe4 100644 --- a/plugins/GSdx/res/glsl/convert.glsl +++ b/plugins/GSdx/res/glsl/convert.glsl @@ -183,6 +183,19 @@ void ps_main12() } #endif +#ifdef ps_main13 +out float gl_FragDepth; +void ps_main13() +{ + // Same as above but without the alpha channel + + // Convert a RRGBA texture into a float depth texture + // FIXME: I'm afraid of the accuracy + const vec4 bitSh = vec4(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0, 0.0) * vec4(255.0/256.0); + gl_FragDepth = dot(sample_c(), bitSh); +} +#endif + #ifdef ps_main7 void ps_main7() { diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 5971a36c7a..3fa091dc87 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -208,6 +208,19 @@ static const char* convert_glsl = "}\n" "#endif\n" "\n" + "#ifdef ps_main13\n" + "out float gl_FragDepth;\n" + "void ps_main13()\n" + "{\n" + " // Same as above but without the alpha channel\n" + "\n" + " // Convert a RRGBA texture into a float depth texture\n" + " // FIXME: I'm afraid of the accuracy\n" + " const vec4 bitSh = vec4(1.0/(256.0*256.0*256.0), 1.0/(256.0*256.0), 1.0/256.0, 0.0) * vec4(255.0/256.0);\n" + " gl_FragDepth = dot(sample_c(), bitSh);\n" + "}\n" + "#endif\n" + "\n" "#ifdef ps_main7\n" "void ps_main7()\n" "{\n" From 33c9e9da0aedd8ce82c0a216ee9c2ff517f0c99b Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sun, 14 Jun 2015 19:42:11 +0200 Subject: [PATCH 19/50] gsdx: new generic OI hack To clear a (W, H) RT/Depth, GS can only send a (W , H/2) primitive with a Z (or RT) pointer in the middle of the buffer Top-half will be cleared by the color (or the depth). Bottom-half will be cleared by the depth (or the color). Code isn't enabled yet but it was tested with success on GoW2. --- plugins/GSdx/GSRendererHW.cpp | 64 ++++++++++++++++++++++++++++++++--- plugins/GSdx/GSRendererHW.h | 1 + 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/plugins/GSdx/GSRendererHW.cpp b/plugins/GSdx/GSRendererHW.cpp index 1f4b78b6a8..d84bc6ba5d 100644 --- a/plugins/GSdx/GSRendererHW.cpp +++ b/plugins/GSdx/GSRendererHW.cpp @@ -345,6 +345,7 @@ void GSRendererHW::Draw() TEX0.PSM = context->FRAME.PSM; GSTextureCache::Target* rt = no_rt ? NULL : m_tc->LookupTarget(TEX0, m_width, m_height, GSTextureCache::RenderTarget, true); + GSTexture* rt_tex = rt ? rt->m_texture : NULL; TEX0.TBP0 = context->ZBUF.Block(); TEX0.TBW = context->FRAME.FBW; @@ -453,7 +454,7 @@ void GSRendererHW::Draw() #endif } - if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(NULL, ds->m_texture, tex)) + if(m_hacks.m_oi && !(this->*m_hacks.m_oi)(rt_tex, ds->m_texture, tex)) { s_n += 1; // keep counter sync GL_POP(); @@ -520,7 +521,7 @@ void GSRendererHW::Draw() // - DrawPrims(rt ? rt->m_texture : NULL, ds->m_texture, tex); + DrawPrims(rt_tex, ds->m_texture, tex); // @@ -644,12 +645,67 @@ void GSRendererHW::Hacks::SetGameCRC(const CRC::Game& game) m_oo = m_oo_map[hash]; m_cu = m_cu_map[hash]; - if(game.flags & CRC::PointListPalette) - { + if (game.flags & CRC::PointListPalette) { ASSERT(m_oi == NULL); m_oi = &GSRendererHW::OI_PointListPalette; } +#if 0 + // FIXME: Enable this code in the future. I think it could replace + // most of the "old" OI hack. So far code was tested on GoW2 & SimpsonsGame with + // success + if (m_oi == NULL) { + m_oi = &GSRendererHW::OI_DoubleHalfClear; + } +#endif +} + +bool GSRendererHW::OI_DoubleHalfClear(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t) +{ + if (m_vt.m_primclass == GS_SPRITE_CLASS && !PRIM->TME && !m_context->ZBUF.ZMSK && (m_context->FRAME.FBW >= 7)) { + GSVertex* v = &m_vertex.buff[0]; + + //GL_INS("OI_DoubleHalfClear: psm:%x. Z:%d R:%d G:%d B:%d A:%d", m_context->FRAME.PSM, + // v[1].XYZ.Z, v[1].RGBAQ.R, v[1].RGBAQ.G, v[1].RGBAQ.B, v[1].RGBAQ.A); + + // Check it is a clear on the first primitive only + if (v[1].XYZ.Z || v[1].RGBAQ.R || v[1].RGBAQ.G || v[1].RGBAQ.B || v[1].RGBAQ.A) { + return true; + } + // Only 32 bits format is supported otherwise it is complicated + if (m_context->FRAME.PSM & 2) + return true; + + // FIXME might need some rounding + // In 32 bits pages are 64x32 pixels. In theory, it must be somethings + // like FBW * 64 pixels * ratio / 32 pixels / 2 = FBW * ratio + // It is hard to predict the ratio, so I round it to 1. And I use + // <= comparison below. + uint32 h_pages = m_context->FRAME.FBW; + + uint32 base; + uint32 half; + if (m_context->FRAME.FBP > m_context->ZBUF.ZBP) { + base = m_context->ZBUF.ZBP; + half = m_context->FRAME.FBP; + } else { + base = m_context->FRAME.FBP; + half = m_context->ZBUF.ZBP; + } + + if (half <= (base + h_pages * m_context->FRAME.FBW)) { + //GL_INS("OI_DoubleHalfClear: base %x half %x. h_pages %d fbw %d", base, half, h_pages, m_context->FRAME.FBW); + if (m_context->FRAME.FBP > m_context->ZBUF.ZBP) { + m_dev->ClearDepth(ds, 0); + } else { + m_dev->ClearRenderTarget(rt, 0); + } + // Don't return false, it will break the rendering. I guess that it misses texture + // invalidation + //return false; + } + } + return true; } // OI (others input?/implementation?) hacks replace current draw call diff --git a/plugins/GSdx/GSRendererHW.h b/plugins/GSdx/GSRendererHW.h index b5a73e68e1..fdf43e9df5 100644 --- a/plugins/GSdx/GSRendererHW.h +++ b/plugins/GSdx/GSRendererHW.h @@ -46,6 +46,7 @@ private: typedef void (GSRendererHW::*OO_Ptr)(); typedef bool (GSRendererHW::*CU_Ptr)(); + bool OI_DoubleHalfClear(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); bool OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); bool OI_FFX(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); bool OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t); From 3b127f663bc899235fcefb47a7cbd2c8e1fd6534 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Wed, 17 Jun 2015 20:02:03 +0200 Subject: [PATCH 20/50] gsdx-tc: trace the texture format to detect texture shuffling It fixes games that uses 16 bits RT (like snow engine games) --- plugins/GSdx/GSRenderer.h | 1 + plugins/GSdx/GSRendererDX.cpp | 2 +- plugins/GSdx/GSRendererHW.cpp | 7 +++++++ plugins/GSdx/GSRendererOGL.cpp | 2 +- plugins/GSdx/GSRendererSW.cpp | 4 +++- plugins/GSdx/GSTextureCache.cpp | 12 +++++------- plugins/GSdx/GSTextureCache.h | 1 + 7 files changed, 19 insertions(+), 10 deletions(-) diff --git a/plugins/GSdx/GSRenderer.h b/plugins/GSdx/GSRenderer.h index ad4eb22e64..64f82e3d37 100644 --- a/plugins/GSdx/GSRenderer.h +++ b/plugins/GSdx/GSRenderer.h @@ -50,6 +50,7 @@ protected: bool m_shaderfx; bool m_fxaa; bool m_shadeboost; + bool m_texture_shuffle; virtual GSTexture* GetOutput(int i) = 0; diff --git a/plugins/GSdx/GSRendererDX.cpp b/plugins/GSdx/GSRendererDX.cpp index d3559ad6d8..a1d976289a 100644 --- a/plugins/GSdx/GSRendererDX.cpp +++ b/plugins/GSdx/GSRendererDX.cpp @@ -225,7 +225,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc GSDeviceDX::PSSamplerSelector ps_ssel; GSDeviceDX::PSConstantBuffer ps_cb; - if ((context->FRAME.PSM == 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS)) { + if (m_texture_shuffle) { ps_sel.shuffle = 1; ps_sel.fmt = 0; diff --git a/plugins/GSdx/GSRendererHW.cpp b/plugins/GSdx/GSRendererHW.cpp index d84bc6ba5d..6242005bb7 100644 --- a/plugins/GSdx/GSRendererHW.cpp +++ b/plugins/GSdx/GSRendererHW.cpp @@ -361,6 +361,7 @@ void GSRendererHW::Draw() } GSTextureCache::Source* tex = NULL; + m_texture_shuffle = false; if(PRIM->TME) { @@ -395,6 +396,12 @@ void GSRendererHW::Draw() { m_mem.m_clut.Read32(context->TEX0, env.TEXA); } + + if (rt) { + rt->m_32_bits_fmt |= tex->m_32_bits_fmt; + } + // Both input and output are 16 bits but texture was initially 32 bits! + m_texture_shuffle = ((context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS) && tex->m_32_bits_fmt); } if(s_dump) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 3a2a411075..0e3b82f070 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -247,7 +247,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GSDeviceOGL::OMColorMaskSelector om_csel; GSDeviceOGL::OMDepthStencilSelector om_dssel; - if ((context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS)) { + if (m_texture_shuffle) { ps_sel.shuffle = 1; ps_sel.dfmt = 0; diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index 23474ab176..38cab98da7 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -516,6 +516,8 @@ void GSRendererSW::Draw() Sync(2); uint64 frame = m_perfmon.GetFrame(); + // Dump the texture in 32 bits format. It helps to debug texture shuffle effect + // It will breaks the few games that really uses 16 bits RT bool texture_shuffle = ((context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS)); string s; @@ -531,7 +533,7 @@ void GSRendererSW::Draw() if(s_savet && s_n >= s_saven && PRIM->TME) { if (texture_shuffle) { - // Dump the texture in 32 bits format. It helps to debug texture shuffle effect + // Dump the RT in 32 bits format. It helps to debug texture shuffle effect s = format("%05d_f%lld_tex_%05x_32bits.bmp", s_n, frame, (int)m_context->TEX0.TBP0); m_mem.SaveBMP(root_sw+s, m_context->TEX0.TBP0, m_context->TEX0.TBW, 0, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH); diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 9b3a4e5dad..6985d0a581 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -223,13 +223,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int dst = t; -#if 0 - // Likely the root cause of tons and tons of bug - if (dst->m_TEX0.PSM != TEX0.PSM) { - GL_INS("TC: ERROR: use a target with format 0x%x as 0x%x without any conversion", dst->m_TEX0.PSM, TEX0.PSM); - } -#endif - + dst->m_32_bits_fmt |= !(TEX0.PSM & 2); dst->m_TEX0 = TEX0; break; @@ -256,6 +250,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int if(!t->m_age && bp == t->m_TEX0.TBP0) { dst = CreateTarget(TEX0, w, h, type); + if (type == DepthStencil) { GL_CACHE("TC: Lookup Target(Depth) %dx%d, hit Color (0x%x, F:0x%x)", w, h, bp, TEX0.PSM); int shader = (TEX0.PSM & 1) ? 13 : 12; @@ -753,6 +748,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con if (dst) { // TODO: clean up this mess + src->m_32_bits_fmt = dst->m_32_bits_fmt; src->m_target = true; @@ -1011,6 +1007,7 @@ GSTextureCache::Surface::Surface(GSRenderer* r, uint8* temp) , m_texture(NULL) , m_age(0) , m_temp(temp) + , m_32_bits_fmt(false) { m_TEX0.TBP0 = 0x3fff; } @@ -1270,6 +1267,7 @@ GSTextureCache::Target::Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* tem , m_depth_supported(depth_supported) { m_TEX0 = TEX0; + m_32_bits_fmt |= !(TEX0.PSM & 2); m_valid = GSVector4i::zero(); } diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 5cbb8d5681..dc679bf345 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -40,6 +40,7 @@ public: GIFRegTEXA m_TEXA; int m_age; uint8* m_temp; + bool m_32_bits_fmt; // Allow to detect the casting of 32 bits as 16 bits texture public: Surface(GSRenderer* r, uint8* temp); From 82818dab3c542582066786fb515df661d46b4798 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Wed, 17 Jun 2015 22:55:17 +0200 Subject: [PATCH 21/50] gsdx-ogl: make some room in AlphaCoefficient variable The idea will be to use the remaining int to store the FB mask --- plugins/GSdx/GSRendererOGL.cpp | 2 +- plugins/GSdx/res/glsl/tfx_fs.glsl | 39 ++++++++++++++++--------------- plugins/GSdx/res/glsl_source.h | 39 ++++++++++++++++--------------- 3 files changed, 41 insertions(+), 39 deletions(-) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 0e3b82f070..4df591ab53 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -742,7 +742,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // Require the fix alpha vlaue if (ALPHA.C == 2) { - ps_cb.AlphaCoeff = GSVector4(afix); + ps_cb.AlphaCoeff.a = afix; } // No need to flush for every primitive diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index c37a529537..2243389a87 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -65,7 +65,8 @@ layout(std140, binding = 21) uniform cb21 vec2 MinF; vec2 TA; uvec4 MskFix; - vec4 Af; + vec3 FbMask; + float Af; vec4 HalfTexel; vec4 MinMax; vec4 TC_OffsetHack; @@ -428,7 +429,7 @@ void ps_blend(inout vec4 c, in float As) #elif PS_BLEND == 6 // 6 => *0120: (Cs - Cd)*F + Cs ==> Cs*(F + 1) - Cd*F - c.rgb = Cs * (Af.x + 1.0f) - Cd * Af.x; + c.rgb = Cs * (Af + 1.0f) - Cd * Af; #elif PS_BLEND == 7 // 7 => *0200: (Cs - 0)*As + Cs ==> Cs*(As + 1) @@ -440,7 +441,7 @@ void ps_blend(inout vec4 c, in float As) #elif PS_BLEND == 9 // 9 => *0220: (Cs - 0)*F + Cs ==> Cs*(F + 1) - c.rgb = Cs * (Af.x + 1.0f); + c.rgb = Cs * (Af + 1.0f); #elif PS_BLEND == 10 // 10 => *1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As @@ -452,7 +453,7 @@ void ps_blend(inout vec4 c, in float As) #elif PS_BLEND == 12 // 12 => *1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F - c.rgb = Cd * (Af.x + 1.0f) - Cs * Af.x; + c.rgb = Cd * (Af + 1.0f) - Cs * Af; #elif PS_BLEND == 13 // 13 => 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As) @@ -472,11 +473,11 @@ void ps_blend(inout vec4 c, in float As) #elif PS_BLEND == 17 // 17 => 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F) - c.rgb = Cs * Af.x + Cd * (1.0f - Af.x); + c.rgb = Cs * Af + Cd * (1.0f - Af); #elif PS_BLEND == 18 // 18 => 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F - c.rgb = Cs * Af.x - Cd * Af.x; + c.rgb = Cs * Af - Cd * Af; #elif PS_BLEND == 19 // 19 => 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd @@ -496,11 +497,11 @@ void ps_blend(inout vec4 c, in float As) #elif PS_BLEND == 23 // 23 => 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd - c.rgb = Cs * Af.x + Cd; + c.rgb = Cs * Af + Cd; #elif PS_BLEND == 24 // 24 => 0222: (Cs - 0)*F + 0 ==> Cs*F - c.rgb = Cs * Af.x; + c.rgb = Cs * Af; #elif PS_BLEND == 25 // 25 => 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As) @@ -520,11 +521,11 @@ void ps_blend(inout vec4 c, in float As) #elif PS_BLEND == 29 // 29 => 1020: (Cd - Cs)*F + Cs ==> Cd*F + Cs*(1 - F) - c.rgb = Cd * Af.x + Cs * (1.0f - Af.x); + c.rgb = Cd * Af + Cs * (1.0f - Af); #elif PS_BLEND == 30 // 30 => 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F - c.rgb = Cd * Af.x - Cs * Af.x; + c.rgb = Cd * Af - Cs * Af; #elif PS_BLEND == 31 // 31 => 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As @@ -552,15 +553,15 @@ void ps_blend(inout vec4 c, in float As) #elif PS_BLEND == 35 // 35 => 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F - c.rgb = Cs + Cd * Af.x; + c.rgb = Cs + Cd * Af; #elif PS_BLEND == 57 // C_CLR | 57 => #1221: (Cd - 0)*F + Cd ==> Cd*(1 + F) - c.rgb = Cd * (1.0f + Af.x); + c.rgb = Cd * (1.0f + Af); #elif PS_BLEND == 36 // 36 => 1222: (Cd - 0)*F + 0 ==> Cd*F - c.rgb = Cd * Af.x; + c.rgb = Cd * Af; #elif PS_BLEND == 37 // 37 => 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As) @@ -588,15 +589,15 @@ void ps_blend(inout vec4 c, in float As) #elif PS_BLEND == 43 // 43 => 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F) - c.rgb = Cs * (1.0f - Af.x); + c.rgb = Cs * (1.0f - Af); #elif PS_BLEND == 44 // 44 => 2021: (0 - Cs)*F + Cd ==> Cd - Cs*F - c.rgb = Cd - Cs * Af.x; + c.rgb = Cd - Cs * Af; #elif PS_BLEND == 45 // 45 => 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F - c.rgb = - Cs * Af.x; + c.rgb = - Cs * Af; #elif PS_BLEND == 46 // 46 => 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As @@ -624,15 +625,15 @@ void ps_blend(inout vec4 c, in float As) #elif PS_BLEND == 52 // 52 => 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F - c.rgb = Cs - Cd * Af.x; + c.rgb = Cs - Cd * Af; #elif PS_BLEND == 53 // 53 => 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F) - c.rgb = Cd * (1.0f - Af.x); + c.rgb = Cd * (1.0f - Af); #elif PS_BLEND == 54 // 54 => 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F - c.rgb = - Cd * Af.x; + c.rgb = - Cd * Af; #endif diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 3fa091dc87..adea49b471 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -857,7 +857,8 @@ static const char* tfx_fs_all_glsl = " vec2 MinF;\n" " vec2 TA;\n" " uvec4 MskFix;\n" - " vec4 Af;\n" + " vec3 FbMask;\n" + " float Af;\n" " vec4 HalfTexel;\n" " vec4 MinMax;\n" " vec4 TC_OffsetHack;\n" @@ -1220,7 +1221,7 @@ static const char* tfx_fs_all_glsl = "\n" "#elif PS_BLEND == 6\n" " // 6 => *0120: (Cs - Cd)*F + Cs ==> Cs*(F + 1) - Cd*F\n" - " c.rgb = Cs * (Af.x + 1.0f) - Cd * Af.x;\n" + " c.rgb = Cs * (Af + 1.0f) - Cd * Af;\n" "\n" "#elif PS_BLEND == 7\n" " // 7 => *0200: (Cs - 0)*As + Cs ==> Cs*(As + 1)\n" @@ -1232,7 +1233,7 @@ static const char* tfx_fs_all_glsl = "\n" "#elif PS_BLEND == 9\n" " // 9 => *0220: (Cs - 0)*F + Cs ==> Cs*(F + 1)\n" - " c.rgb = Cs * (Af.x + 1.0f);\n" + " c.rgb = Cs * (Af + 1.0f);\n" "\n" "#elif PS_BLEND == 10\n" " // 10 => *1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As\n" @@ -1244,7 +1245,7 @@ static const char* tfx_fs_all_glsl = "\n" "#elif PS_BLEND == 12\n" " // 12 => *1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F\n" - " c.rgb = Cd * (Af.x + 1.0f) - Cs * Af.x;\n" + " c.rgb = Cd * (Af + 1.0f) - Cs * Af;\n" "\n" "#elif PS_BLEND == 13\n" " // 13 => 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As)\n" @@ -1264,11 +1265,11 @@ static const char* tfx_fs_all_glsl = "\n" "#elif PS_BLEND == 17\n" " // 17 => 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F)\n" - " c.rgb = Cs * Af.x + Cd * (1.0f - Af.x);\n" + " c.rgb = Cs * Af + Cd * (1.0f - Af);\n" "\n" "#elif PS_BLEND == 18\n" " // 18 => 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F\n" - " c.rgb = Cs * Af.x - Cd * Af.x;\n" + " c.rgb = Cs * Af - Cd * Af;\n" "\n" "#elif PS_BLEND == 19\n" " // 19 => 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd\n" @@ -1288,11 +1289,11 @@ static const char* tfx_fs_all_glsl = "\n" "#elif PS_BLEND == 23\n" " // 23 => 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd\n" - " c.rgb = Cs * Af.x + Cd;\n" + " c.rgb = Cs * Af + Cd;\n" "\n" "#elif PS_BLEND == 24\n" " // 24 => 0222: (Cs - 0)*F + 0 ==> Cs*F\n" - " c.rgb = Cs * Af.x;\n" + " c.rgb = Cs * Af;\n" "\n" "#elif PS_BLEND == 25\n" " // 25 => 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As)\n" @@ -1312,11 +1313,11 @@ static const char* tfx_fs_all_glsl = "\n" "#elif PS_BLEND == 29\n" " // 29 => 1020: (Cd - Cs)*F + Cs ==> Cd*F + Cs*(1 - F)\n" - " c.rgb = Cd * Af.x + Cs * (1.0f - Af.x);\n" + " c.rgb = Cd * Af + Cs * (1.0f - Af);\n" "\n" "#elif PS_BLEND == 30\n" " // 30 => 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F\n" - " c.rgb = Cd * Af.x - Cs * Af.x;\n" + " c.rgb = Cd * Af - Cs * Af;\n" "\n" "#elif PS_BLEND == 31\n" " // 31 => 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As\n" @@ -1344,15 +1345,15 @@ static const char* tfx_fs_all_glsl = "\n" "#elif PS_BLEND == 35\n" " // 35 => 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F\n" - " c.rgb = Cs + Cd * Af.x;\n" + " c.rgb = Cs + Cd * Af;\n" "\n" "#elif PS_BLEND == 57\n" " // C_CLR | 57 => #1221: (Cd - 0)*F + Cd ==> Cd*(1 + F)\n" - " c.rgb = Cd * (1.0f + Af.x);\n" + " c.rgb = Cd * (1.0f + Af);\n" "\n" "#elif PS_BLEND == 36\n" " // 36 => 1222: (Cd - 0)*F + 0 ==> Cd*F\n" - " c.rgb = Cd * Af.x;\n" + " c.rgb = Cd * Af;\n" "\n" "#elif PS_BLEND == 37\n" " // 37 => 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As)\n" @@ -1380,15 +1381,15 @@ static const char* tfx_fs_all_glsl = "\n" "#elif PS_BLEND == 43\n" " // 43 => 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F)\n" - " c.rgb = Cs * (1.0f - Af.x);\n" + " c.rgb = Cs * (1.0f - Af);\n" "\n" "#elif PS_BLEND == 44\n" " // 44 => 2021: (0 - Cs)*F + Cd ==> Cd - Cs*F\n" - " c.rgb = Cd - Cs * Af.x;\n" + " c.rgb = Cd - Cs * Af;\n" "\n" "#elif PS_BLEND == 45\n" " // 45 => 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F\n" - " c.rgb = - Cs * Af.x;\n" + " c.rgb = - Cs * Af;\n" "\n" "#elif PS_BLEND == 46\n" " // 46 => 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As\n" @@ -1416,15 +1417,15 @@ static const char* tfx_fs_all_glsl = "\n" "#elif PS_BLEND == 52\n" " // 52 => 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F\n" - " c.rgb = Cs - Cd * Af.x;\n" + " c.rgb = Cs - Cd * Af;\n" "\n" "#elif PS_BLEND == 53\n" " // 53 => 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F)\n" - " c.rgb = Cd * (1.0f - Af.x);\n" + " c.rgb = Cd * (1.0f - Af);\n" "\n" "#elif PS_BLEND == 54\n" " // 54 => 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F\n" - " c.rgb = - Cd * Af.x;\n" + " c.rgb = - Cd * Af;\n" "\n" "#endif\n" "\n" From 87f54ae0ffa517e41374e970a8966a282726f31e Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 20 Jun 2015 16:19:02 +0200 Subject: [PATCH 22/50] gsdx: report error on unsupported case in texture cache It seems to impacts lots of games that still have issue (VP2, MTG3, PoP) The PSMT32 format is read a PSMT8. I think we need to convert it as PSMT8H (i.e. unpack it to have only an alpha channel) --- plugins/GSdx/GSTextureCache.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 6985d0a581..6dc1c8f63d 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -183,9 +183,9 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con #ifdef ENABLE_OGL_DEBUG } else { - GL_CACHE("TC: src hit: %d (0x%x)", + GL_CACHE("TC: src hit: %d (0x%x F:0x%x)", src->m_texture ? src->m_texture->GetID() : 0, - TEX0.TBP0); + TEX0.TBP0, TEX0.PSM); #endif } @@ -748,8 +748,14 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con if (dst) { // TODO: clean up this mess - src->m_32_bits_fmt = dst->m_32_bits_fmt; +#ifdef ENABLE_OGL_DEBUG + if (TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT4) { + GL_INS("ERROR: Reading RT as a packed-indexed (0x%x) format is not supported", TEX0.PSM); + } +#endif + + src->m_32_bits_fmt = dst->m_32_bits_fmt; src->m_target = true; dst->Update(); From 839003467e4193e66e74857816fa910a536f7088 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sun, 21 Jun 2015 08:47:45 +0200 Subject: [PATCH 23/50] gsdx-ogl: add support of partial frame buffer masking It might help to fix a bit the color on a couple of games accurate_fbmask = 1 Code uses GL4.5 extensions. So far it seems the effect is ony used a couple of time and often in non-overlapping primitive. Speed impact will likely remain small --- plugins/GSdx/GSDeviceOGL.cpp | 1 + plugins/GSdx/GSDeviceOGL.h | 20 ++++++----- plugins/GSdx/GSLinuxDialog.cpp | 4 ++- plugins/GSdx/GSRendererOGL.cpp | 60 ++++++++++++++++++++----------- plugins/GSdx/GSRendererOGL.h | 1 + plugins/GSdx/GSSetting.cpp | 7 ++-- plugins/GSdx/GSSetting.h | 1 + plugins/GSdx/res/glsl/tfx_fs.glsl | 20 +++++++++-- plugins/GSdx/res/glsl_source.h | 20 +++++++++-- 9 files changed, 98 insertions(+), 36 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index 756118b1eb..c89a81b1f9 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -649,6 +649,7 @@ GLuint GSDeviceOGL::CompilePS(PSSelector sel) + format("#define PS_IIP %d\n", sel.iip) + format("#define PS_SHUFFLE %d\n", sel.shuffle) + format("#define PS_READ_BA %d\n", sel.read_ba) + + format("#define PS_FBMASK %d\n", sel.fbmask) ; return m_shader->Compile("tfx.glsl", "ps_main", GL_FRAGMENT_SHADER, tfx_fs_all_glsl, macro); diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index 14c9fb582d..af1d37f625 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -254,6 +254,7 @@ class GSDeviceOGL : public GSDevice GSVector4 WH; GSVector4 MinF_TA; GSVector4i MskFix; + GSVector4i FbMask; GSVector4 AlphaCoeff; GSVector4 HalfTexel; @@ -263,13 +264,14 @@ class GSDeviceOGL : public GSDevice PSConstantBuffer() { FogColor_AREF = GSVector4::zero(); - HalfTexel = GSVector4::zero(); - WH = GSVector4::zero(); - MinMax = GSVector4::zero(); - MinF_TA = GSVector4::zero(); - MskFix = GSVector4i::zero(); - AlphaCoeff = GSVector4::zero(); + HalfTexel = GSVector4::zero(); + WH = GSVector4::zero(); + MinMax = GSVector4::zero(); + MinF_TA = GSVector4::zero(); + MskFix = GSVector4i::zero(); + AlphaCoeff = GSVector4::zero(); TC_OffsetHack = GSVector4::zero(); + FbMask = GSVector4i::zero(); } __forceinline bool Update(const PSConstantBuffer* cb) @@ -279,7 +281,7 @@ class GSDeviceOGL : public GSDevice // if WH matches both HalfTexel and TC_OffsetHack do too // MinMax depends on WH and MskFix so no need to check it too - if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4])).alltrue()) + if(!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4]) & (a[5] == b[5])).alltrue()) { // Note previous check uses SSE already, a plain copy will be faster than any memcpy a[0] = b[0]; @@ -287,6 +289,7 @@ class GSDeviceOGL : public GSDevice a[2] = b[2]; a[3] = b[3]; a[4] = b[4]; + a[5] = b[5]; return true; } @@ -330,8 +333,9 @@ class GSDeviceOGL : public GSDevice // Word 2 uint32 blend:8; uint32 dfmt:2; + uint32 fbmask:1; - uint32 _free2:22; + uint32 _free2:21; }; uint64 key; diff --git a/plugins/GSdx/GSLinuxDialog.cpp b/plugins/GSdx/GSLinuxDialog.cpp index 13a9e26ae3..40b4d671b1 100644 --- a/plugins/GSdx/GSLinuxDialog.cpp +++ b/plugins/GSdx/GSLinuxDialog.cpp @@ -295,6 +295,7 @@ void populate_hw_table(GtkWidget* hw_table) GtkWidget* acc_blend_check = CreateCheckBox("Accurate Blend", "accurate_blend", true); GtkWidget* acc_date_check = CreateCheckBox("Accurate Date", "accurate_date", false); GtkWidget* acc_cclip_check = CreateCheckBox("Accurate Color Clipping", "accurate_colclip", false); + GtkWidget* acc_fbmsk_check = CreateCheckBox("Accurate FrameBuffer Mask", "accurate_fbmask", false); GtkWidget* MT_nvidia_check = CreateCheckBox("Nvidia Multi-Thread support", "enable_nvidia_multi_thread", true); @@ -303,6 +304,7 @@ void populate_hw_table(GtkWidget* hw_table) gtk_widget_set_tooltip_text(acc_blend_check, dialog_message(IDC_ACCURATE_BLEND)); gtk_widget_set_tooltip_text(acc_date_check, dialog_message(IDC_ACCURATE_DATE)); gtk_widget_set_tooltip_text(acc_cclip_check, dialog_message(IDC_ACCURATE_COLCLIP)); + gtk_widget_set_tooltip_text(acc_fbmsk_check, dialog_message(IDC_ACCURATE_FBMASK)); gtk_widget_set_tooltip_text(MT_nvidia_check, "Huge speedup on Nvidia binary driver! No effect otherwise."); gtk_widget_set_tooltip_text(crc_label, dialog_message(IDC_CRC_LEVEL)); gtk_widget_set_tooltip_text(crc_combo_box, dialog_message(IDC_CRC_LEVEL)); @@ -310,7 +312,7 @@ void populate_hw_table(GtkWidget* hw_table) s_table_line = 0; InsertWidgetInTable(hw_table, paltex_check, MT_nvidia_check); InsertWidgetInTable(hw_table, acc_blend_check, acc_date_check); - InsertWidgetInTable(hw_table, acc_cclip_check); + InsertWidgetInTable(hw_table, acc_cclip_check, acc_fbmsk_check); InsertWidgetInTable(hw_table, filter_label, filter_combo_box); InsertWidgetInTable(hw_table, af_label, af_combo_box); InsertWidgetInTable(hw_table, crc_label, crc_combo_box); diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 4df591ab53..1bf66eb724 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -32,6 +32,7 @@ GSRendererOGL::GSRendererOGL() m_accurate_blend = theApp.GetConfig("accurate_blend", 1); m_accurate_date = theApp.GetConfig("accurate_date", 0); m_accurate_colclip = theApp.GetConfig("accurate_colclip", 0); + m_accurate_fbmask = theApp.GetConfig("accurate_fbmask", 0); UserHacks_AlphaHack = theApp.GetConfig("UserHacks_AlphaHack", 0); UserHacks_AlphaStencil = theApp.GetConfig("UserHacks_AlphaStencil", 0); @@ -293,10 +294,13 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour // 1/ Reduce the frame mask to a 16 bit format const uint32& m = context->FRAME.FBMSK; uint32 fbmask = ((m >> 3) & 0x1F) | ((m >> 6) & 0x3E0) | ((m >> 9) & 0x7C00) | ((m >> 31) & 0x8000); + // FIXME GSVector will be nice here + uint8 rg_mask = fbmask & 0xFF; + uint8 ba_mask = (fbmask >> 8) & 0xFF; om_csel.wrgba = 0; // 2 Select the new mask (Please someone put SSE here) - if ((fbmask & 0xFF) == 0) { + if (rg_mask != 0xFF) { if (write_ba) { GL_INS("Color shuffle %s => B", ps_sel.read_ba ? "B" : "R"); om_csel.wb = 1; @@ -304,13 +308,11 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GL_INS("Color shuffle %s => R", ps_sel.read_ba ? "B" : "R"); om_csel.wr = 1; } - } else if ((fbmask & 0xFF) != 0xFF) { - GL_INS("ERROR: not supported RG mask:%x", fbmask & 0xFF); - ASSERT(0); + if (rg_mask) + ps_sel.fbmask = 1; } - fbmask >>= 8; - if ((fbmask & 0xFF) == 0) { + if (ba_mask != 0xFF) { if (write_ba) { GL_INS("Color shuffle %s => A", ps_sel.read_ba ? "A" : "G"); om_csel.wa = 1; @@ -318,9 +320,19 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour GL_INS("Color shuffle %s => G", ps_sel.read_ba ? "A" : "G"); om_csel.wg = 1; } - } else if ((fbmask & 0xFF) != 0xFF) { - GL_INS("ERROR: not supported BA mask:%x", fbmask & 0xFF); - ASSERT(0); + if (ba_mask) + ps_sel.fbmask = 1; + } + + ps_sel.fbmask &= m_accurate_fbmask; + if (ps_sel.fbmask) { + GL_INS("FBMASK SW emulated fb_mask:%x on tex shuffle", fbmask); + ps_cb.FbMask.r = rg_mask; + ps_cb.FbMask.g = rg_mask; + ps_cb.FbMask.b = ba_mask; + ps_cb.FbMask.a = ba_mask; + require_barrier = true; + dev->PSSetShaderResource(3, rt); } } else { @@ -329,29 +341,35 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour om_csel.wrgba = ~GSVector4i::load((int)context->FRAME.FBMSK).eq8(GSVector4i::xffffffff()).mask(); { -#ifdef ENABLE_OGL_DEBUG + // FIXME GSVector will be nice here uint8 r_mask = (context->FRAME.FBMSK >> 0) & 0xFF; uint8 g_mask = (context->FRAME.FBMSK >> 8) & 0xFF; uint8 b_mask = (context->FRAME.FBMSK >> 16) & 0xFF; uint8 a_mask = (context->FRAME.FBMSK >> 24) & 0xFF; - uint8 bits = (GSLocalMemory::m_psm[context->FRAME.PSM].fmt == 2) ? 16 : 32; if (r_mask != 0 && r_mask != 0xFF) { - GL_INS("ERROR: not supported r_mask:%x on %d bits format", r_mask, bits); - ASSERT(0); + ps_sel.fbmask = 1; } if (g_mask != 0 && g_mask != 0xFF) { - GL_INS("ERROR: not supported g_mask:%x on %d bits format", g_mask, bits); - ASSERT(0); + ps_sel.fbmask = 1; } if (b_mask != 0 && b_mask != 0xFF) { - GL_INS("ERROR: not supported b_mask:%x on %d bits format", b_mask, bits); - ASSERT(0); + ps_sel.fbmask = 1; } if (a_mask != 0 && a_mask != 0xFF) { - GL_INS("ERROR: not supported a_mask:%x on %d bits format", a_mask, bits); - ASSERT(0); + ps_sel.fbmask = 1; + } + + ps_sel.fbmask &= m_accurate_fbmask; + if (ps_sel.fbmask) { + GL_INS("FBMASK SW emulated fb_mask:%x on %d bits format", context->FRAME.FBMSK, + (GSLocalMemory::m_psm[context->FRAME.PSM].fmt == 2) ? 16 : 32); + ps_cb.FbMask.r = r_mask; + ps_cb.FbMask.g = g_mask; + ps_cb.FbMask.b = b_mask; + ps_cb.FbMask.a = a_mask; + require_barrier = true; + dev->PSSetShaderResource(3, rt); } -#endif } } @@ -730,7 +748,7 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour int blend_sel = ((om_bsel.a * 3 + om_bsel.b) * 3 + om_bsel.c) * 3 + om_bsel.d; int bogus_blend = GSDeviceOGL::m_blendMapD3D9[blend_sel].bogus; bool all_sw = !( (ALPHA.A == ALPHA.B) || (ALPHA.C == 2 && afix <= 1.002f) ) && (m_accurate_blend > 1); - bool sw_blending = (m_accurate_blend && (bogus_blend & A_MAX)) || acc_colclip_wrap || all_sw; + bool sw_blending = (m_accurate_blend && (bogus_blend & A_MAX)) || acc_colclip_wrap || all_sw || ps_sel.fbmask; if (sw_blending && om_bsel.abe && rt) { GL_INS("!!! SW blending effect used (0x%x from sel %d) !!!", bogus_blend, blend_sel); diff --git a/plugins/GSdx/GSRendererOGL.h b/plugins/GSdx/GSRendererOGL.h index e1cf6ee39d..e97d6741d9 100644 --- a/plugins/GSdx/GSRendererOGL.h +++ b/plugins/GSdx/GSRendererOGL.h @@ -34,6 +34,7 @@ class GSRendererOGL : public GSRendererHW int m_accurate_blend; bool m_accurate_date; bool m_accurate_colclip; + bool m_accurate_fbmask; bool UserHacks_AlphaHack; bool UserHacks_AlphaStencil; diff --git a/plugins/GSdx/GSSetting.cpp b/plugins/GSdx/GSSetting.cpp index b0546df6d3..0dd76596a6 100644 --- a/plugins/GSdx/GSSetting.cpp +++ b/plugins/GSdx/GSSetting.cpp @@ -117,8 +117,11 @@ const char* dialog_message(int ID, bool* updateText) { return "Allow to solve the impossible blending error message.\n\n" "It could be slower when the effect are used.\n\nNote: it requires the 4.5 openGL extension GL_ARB_texture_barrier"; case IDC_ACCURATE_COLCLIP: - return "Debug option to implement the wrapping of color after an overflow\n\n" - "It will be slow when the effect are used!\n\nNote: it requires the 4.5 openGL extension GL_ARB_texture_barrier"; + return "Implement the wrapping of color after an overflow\n\n" + "It will be slow (half speed) when the effect are used!\n\nNote: it requires the 4.5 openGL extension GL_ARB_texture_barrier"; + case IDC_ACCURATE_FBMASK: + return "Implement partial color masking\n\n" + "No status yet on the speed impact\n\nNote: it requires the 4.5 openGL extension GL_ARB_texture_barrier"; #endif default: if (updateText) diff --git a/plugins/GSdx/GSSetting.h b/plugins/GSdx/GSSetting.h index e5cb8137fe..60c29e5ade 100644 --- a/plugins/GSdx/GSSetting.h +++ b/plugins/GSdx/GSSetting.h @@ -66,6 +66,7 @@ enum { IDC_ACCURATE_BLEND, IDC_ACCURATE_DATE, IDC_ACCURATE_COLCLIP, + IDC_ACCURATE_FBMASK, IDC_CRC_LEVEL }; #endif diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index 2243389a87..7229e5cb79 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -65,11 +65,12 @@ layout(std140, binding = 21) uniform cb21 vec2 MinF; vec2 TA; uvec4 MskFix; - vec3 FbMask; + uvec4 FbMask; + vec3 _not_yet_used; float Af; vec4 HalfTexel; vec4 MinMax; - vec4 TC_OffsetHack; + vec2 TC_OffsetHack; }; #ifdef SUBROUTINE_GL40 @@ -393,6 +394,18 @@ vec4 ps_color() return c; } +void ps_fbmask(inout vec4 c) +{ + // FIXME do I need special case for 16 bits +#if PS_FBMASK + vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0); + uvec4 denorm_rt = uvec4(rt * 255.0f + 0.5f); + uvec4 denorm_c = uvec4(c * 255.0f + 0.5f); + c = vec4((denorm_c & ~FbMask) | (denorm_rt & FbMask)) / 255.0f; +#endif +} + +#if PS_BLEND > 0 void ps_blend(inout vec4 c, in float As) { vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0); @@ -658,6 +671,7 @@ void ps_blend(inout vec4 c, in float As) // Don't compile => unable to find compatible overloaded function "mod(vec3)" //c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f; } +#endif void ps_main() { @@ -770,6 +784,8 @@ void ps_main() ps_blend(c, alpha); #endif + ps_fbmask(c); + SV_Target0 = c; SV_Target1 = vec4(alpha, alpha, alpha, alpha); } diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index adea49b471..b267373096 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -857,11 +857,12 @@ static const char* tfx_fs_all_glsl = " vec2 MinF;\n" " vec2 TA;\n" " uvec4 MskFix;\n" - " vec3 FbMask;\n" + " uvec4 FbMask;\n" + " vec3 _not_yet_used;\n" " float Af;\n" " vec4 HalfTexel;\n" " vec4 MinMax;\n" - " vec4 TC_OffsetHack;\n" + " vec2 TC_OffsetHack;\n" "};\n" "\n" "#ifdef SUBROUTINE_GL40\n" @@ -1185,6 +1186,18 @@ static const char* tfx_fs_all_glsl = " return c;\n" "}\n" "\n" + "void ps_fbmask(inout vec4 c)\n" + "{\n" + " // FIXME do I need special case for 16 bits\n" + "#if PS_FBMASK\n" + " vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n" + " uvec4 denorm_rt = uvec4(rt * 255.0f + 0.5f);\n" + " uvec4 denorm_c = uvec4(c * 255.0f + 0.5f);\n" + " c = vec4((denorm_c & ~FbMask) | (denorm_rt & FbMask)) / 255.0f;\n" + "#endif\n" + "}\n" + "\n" + "#if PS_BLEND > 0\n" "void ps_blend(inout vec4 c, in float As)\n" "{\n" " vec4 rt = texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);\n" @@ -1450,6 +1463,7 @@ static const char* tfx_fs_all_glsl = " // Don't compile => unable to find compatible overloaded function \"mod(vec3)\"\n" " //c.rgb = mod((c.rgb * 255.0f) + 256.5f) / 255.0f;\n" "}\n" + "#endif\n" "\n" "void ps_main()\n" "{\n" @@ -1562,6 +1576,8 @@ static const char* tfx_fs_all_glsl = " ps_blend(c, alpha);\n" "#endif\n" "\n" + " ps_fbmask(c);\n" + "\n" " SV_Target0 = c;\n" " SV_Target1 = vec4(alpha, alpha, alpha, alpha);\n" "}\n" From ae3b768de95aae9c74d0a17c1e7c67a6741ff3f4 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Wed, 24 Jun 2015 20:00:13 +0200 Subject: [PATCH 24/50] bump gsdx version to 1.0.0 --- plugins/GSdx/CMakeLists.txt | 2 +- plugins/GSdx/GS.cpp | 4 ++-- plugins/GSdx/GS.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/plugins/GSdx/CMakeLists.txt b/plugins/GSdx/CMakeLists.txt index ae007947b8..cee53a1d25 100644 --- a/plugins/GSdx/CMakeLists.txt +++ b/plugins/GSdx/CMakeLists.txt @@ -7,7 +7,7 @@ endif() # plugin name -set(Output GSdx-0.1.16) +set(Output GSdx-1.0.0) set(CommonFlags -fno-operator-names # because Xbyak uses and()/xor()/or()/not() function diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index a49e31f593..11943153fd 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -86,8 +86,8 @@ EXPORT_C_(const char*) PS2EgetLibName() EXPORT_C_(uint32) PS2EgetLibVersion2(uint32 type) { - const uint32 revision = 0; - const uint32 build = 1; + const uint32 revision = 1; + const uint32 build = 0; return (build << 0) | (revision << 8) | (PS2E_GS_VERSION << 16) | (PLUGIN_VERSION << 24); } diff --git a/plugins/GSdx/GS.h b/plugins/GSdx/GS.h index 52611fff18..4f0f493b95 100644 --- a/plugins/GSdx/GS.h +++ b/plugins/GSdx/GS.h @@ -21,7 +21,7 @@ #pragma once -#define PLUGIN_VERSION 16 +#define PLUGIN_VERSION 0 #define VM_SIZE 4194304 #define PAGE_SIZE 8192 From 77e718c61f6a5bc740810d247de7f8d93e3e59c7 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Thu, 25 Jun 2015 08:25:31 +0200 Subject: [PATCH 25/50] gsdx: disable linear interpolation when converting texture in texture cache It avoid various upscaling glitches on GS post-processing effect // 1/ Palette is used to interpret the alpha channel of the RT as an index. // Star Ocean 3 uses it to emulate a stencil buffer. // 2/ Z formats are a bad idea to interpolate (discontinuties). // 3/ 16 bits buffer is used to move data from a channel to another. // // I keep linear filtering for standard color even if I'm not sure that it is // working correctly. // Indeed, texture is reduced so you need to read all covered pixels (9 in 3x) // to correctly interpolate the value. Linear interpolation is likely acceptable // only in 2x scaling // // Src texture will still be bilinear interpolated so I'm really not sure // that we need to do it here too. // // Future note: instead to do // RT 2048x2048 -> T 1024x1024 -> RT 2048x2048 // We can maybe sample directly a bigger texture // RT 2048x2048 -> T 2048x2048 -> RT 2048x2048 // Pro: better quality. Copy instead of StretchRect (must be faster) // Cons: consume more memory // // In distant future: investigate to reuse the RT directly without any // copy. Likely a speed boost and memory usage reduction. --- plugins/GSdx/GSTextureCache.cpp | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 6dc1c8f63d..ba155d440d 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -864,14 +864,34 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // GH: by default (m_paltex == 0) GSdx converts texture to the 32 bit format // However it is different here. We want to reuse a Render Target as a texture. // Because the texture is already on the GPU, CPU can't convert it. - bool linear = true; if (psm.pal > 0) { src->m_palette = m_renderer->m_dev->CreateTexture(256, 1); - // Palette is used to interpret the alpha channel of the RT as an index. - // Star Ocean 3 uses it to emulate a stencil buffer. - // It is a very bad idea to force bilinear filtering on it. - linear = false; } + // Disable linear filtering for various GS post-processing effect + // 1/ Palette is used to interpret the alpha channel of the RT as an index. + // Star Ocean 3 uses it to emulate a stencil buffer. + // 2/ Z formats are a bad idea to interpolate (discontinuties). + // 3/ 16 bits buffer is used to move data from a channel to another. + // + // I keep linear filtering for standard color even if I'm not sure that it is + // working correctly. + // Indeed, texture is reduced so you need to read all covered pixels (9 in 3x) + // to correctly interpolate the value. Linear interpolation is likely acceptable + // only in 2x scaling + // + // Src texture will still be bilinear interpolated so I'm really not sure + // that we need to do it here too. + // + // Future note: instead to do + // RT 2048x2048 -> T 1024x1024 -> RT 2048x2048 + // We can maybe sample directly a bigger texture + // RT 2048x2048 -> T 2048x2048 -> RT 2048x2048 + // Pro: better quality. Copy instead of StretchRect (must be faster) + // Cons: consume more memory + // + // In distant future: investigate to reuse the RT directly without any + // copy. Likely a speed boost and memory usage reduction. + bool linear = (TEX0.PSM == PSM_PSMCT32 || TEX0.PSM == PSM_PSMCT24); int shader = dst->m_type != RenderTarget ? 11 : 0; From 9802ba66214434b06d027d6a090a5a0cc4babe92 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Thu, 25 Jun 2015 09:12:03 +0200 Subject: [PATCH 26/50] gsdx-tc: add support of pseudo 8 bits RT conversion Code is obviously slow but at least it works. It fixes the blur effect of VP2 --- plugins/GSdx/GSTextureCache.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index ba155d440d..81d4406a8d 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -129,7 +129,16 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con if(t->m_used && t->m_dirty.empty()) { if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { - dst = t; + if (psm == PSM_PSMT8) { + // It is a complex to convert the code in shader. As a reference, let's do it on the CPU, it will + // be slow but + // 1/ it just works :) + // 2/ even with upscaling + // 3/ for both DX and OpenGL + Read(t, t->m_valid); + } else { + dst = t; + } break; From a588f61a8a2e4c80c31d7da5099c5adebce11ace Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Thu, 25 Jun 2015 09:49:11 +0200 Subject: [PATCH 27/50] gsdx: add an assert for unsupported case --- plugins/GSdx/GSRendererHW.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/GSdx/GSRendererHW.cpp b/plugins/GSdx/GSRendererHW.cpp index 6242005bb7..c0754dbaa0 100644 --- a/plugins/GSdx/GSRendererHW.cpp +++ b/plugins/GSdx/GSRendererHW.cpp @@ -402,6 +402,7 @@ void GSRendererHW::Draw() } // Both input and output are 16 bits but texture was initially 32 bits! m_texture_shuffle = ((context->FRAME.PSM & 0x2) && ((context->TEX0.PSM & 3) == 2) && (m_vt.m_primclass == GS_SPRITE_CLASS) && tex->m_32_bits_fmt); + ASSERT(!m_texture_shuffle || (context->CLAMP.WMS < 3 && context->CLAMP.WMT < 3)); } if(s_dump) From 4a65e5e723a9d58b1079803fccb9aca125426147 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Thu, 25 Jun 2015 19:00:58 +0200 Subject: [PATCH 28/50] gsdx-tc: don't try to read a 0 pixels texture "Regression" introduced in 8 bits RT support The case appears when 1/ address of RT is the same as input texture 2/ input format is 8 bits 3/ previous lookup of RT was miss --- plugins/GSdx/GSTextureCache11.cpp | 2 +- plugins/GSdx/GSTextureCache9.cpp | 2 +- plugins/GSdx/GSTextureCacheOGL.cpp | 4 +--- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/plugins/GSdx/GSTextureCache11.cpp b/plugins/GSdx/GSTextureCache11.cpp index ed00b17194..72c4251935 100644 --- a/plugins/GSdx/GSTextureCache11.cpp +++ b/plugins/GSdx/GSTextureCache11.cpp @@ -50,7 +50,7 @@ void GSTextureCache11::Read(Target* t, const GSVector4i& r) return; } - if(!t->m_dirty.empty()) + if (!t->m_dirty.empty() || (r.width() == 0 && r.height() == 0)) { return; } diff --git a/plugins/GSdx/GSTextureCache9.cpp b/plugins/GSdx/GSTextureCache9.cpp index 1d75f1a908..1820efc2ad 100644 --- a/plugins/GSdx/GSTextureCache9.cpp +++ b/plugins/GSdx/GSTextureCache9.cpp @@ -50,7 +50,7 @@ void GSTextureCache9::Read(Target* t, const GSVector4i& r) return; } - if(!t->m_dirty.empty()) + if (!t->m_dirty.empty() || (r.width() == 0 && r.height() == 0)) { return; } diff --git a/plugins/GSdx/GSTextureCacheOGL.cpp b/plugins/GSdx/GSTextureCacheOGL.cpp index b11940bf47..d1412b8d2c 100644 --- a/plugins/GSdx/GSTextureCacheOGL.cpp +++ b/plugins/GSdx/GSTextureCacheOGL.cpp @@ -30,10 +30,8 @@ GSTextureCacheOGL::GSTextureCacheOGL(GSRenderer* r) void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r) { - if(!t->m_dirty.empty()) - { + if (!t->m_dirty.empty() || (r.width() == 0 && r.height() == 0)) return; - } const GIFRegTEX0& TEX0 = t->m_TEX0; From 85543db094af490a7b1dc3b2bfb3087cde92d9cb Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Thu, 25 Jun 2015 19:09:26 +0200 Subject: [PATCH 29/50] gsdx-tc: trace the alpha channel update in TC Improve the rendering in MGS3 (even if the game is still broken due to others TC issues) // Typical bug (MGS3 blue cloud): // 1/ RT used as 32 bits => alpha channel written // 2/ RT used as 24 bits => no update of alpha channel // 3/ Lookup of texture that used alpha channel as index, HasSharedBits will return false // because of the previous draw call format // // Solution: consider the RT as 32 bits if the alpha was used in the past --- plugins/GSdx/GSTextureCache.cpp | 15 ++++++++++++++- plugins/GSdx/GSTextureCache.h | 1 + 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 81d4406a8d..f17a313f51 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -128,7 +128,16 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con Target* t = *i; if(t->m_used && t->m_dirty.empty()) { - if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { + // Typical bug (MGS3 blue cloud): + // 1/ RT used as 32 bits => alpha channel written + // 2/ RT used as 24 bits => no update of alpha channel + // 3/ Lookup of texture that used alpha channel as index, HasSharedBits will return false + // because of the previous draw call format + // + // Solution: consider the RT as 32 bits if the alpha was used in the past + uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM; + + if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) { if (psm == PSM_PSMT8) { // It is a complex to convert the code in shader. As a reference, let's do it on the CPU, it will // be slow but @@ -243,6 +252,9 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int GL_CACHE("TC: Lookup Target(%s) %dx%d, hit: %d (0x%x, F:0x%x)", to_string(type), w, h, dst->m_texture->GetID(), bp, TEX0.PSM); dst->Update(); + + dst->m_dirty_alpha |= (TEX0.PSM != PSM_PSMCT24) && (TEX0.PSM != PSM_PSMZ24); + } else if (CanConvertDepth()) { int rev_type = (type == DepthStencil) ? RenderTarget : DepthStencil; @@ -1303,6 +1315,7 @@ GSTextureCache::Target::Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* tem { m_TEX0 = TEX0; m_32_bits_fmt |= !(TEX0.PSM & 2); + m_dirty_alpha = (TEX0.PSM != PSM_PSMCT24) && (TEX0.PSM != PSM_PSMZ24); m_valid = GSVector4i::zero(); } diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index dc679bf345..d3e2ff68f3 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -82,6 +82,7 @@ public: GSDirtyRectList m_dirty; GSVector4i m_valid; bool m_depth_supported; + bool m_dirty_alpha; public: Target(GSRenderer* r, const GIFRegTEX0& TEX0, uint8* temp, bool depth_supported); From a751db5f2b9aa6df7d3439761148b0161bc27a24 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Thu, 25 Jun 2015 21:59:02 +0200 Subject: [PATCH 30/50] gsdx-ogl: merge 2 log (too verbose otherwise) --- plugins/GSdx/GSTextureCacheOGL.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/plugins/GSdx/GSTextureCacheOGL.cpp b/plugins/GSdx/GSTextureCacheOGL.cpp index d1412b8d2c..9f137e55e4 100644 --- a/plugins/GSdx/GSTextureCacheOGL.cpp +++ b/plugins/GSdx/GSTextureCacheOGL.cpp @@ -75,10 +75,8 @@ void GSTextureCacheOGL::Read(Target* t, const GSVector4i& r) // Yes lots of logging, but I'm not confident with this code GL_PUSH("Texture Cache Read. Format(0x%x)", TEX0.PSM); - GL_CACHE("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]", - t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM); - - GL_PERF("Read texture from GPU. Format(0x%x)", TEX0.PSM); + GL_PERF("TC: Read Back Target: %d (0x%x)[fmt: 0x%x]. Size %dx%d", + t->m_texture->GetID(), TEX0.TBP0, TEX0.PSM, r.width(), r.height()); GSVector4 src = GSVector4(r) * GSVector4(t->m_texture->GetScale()).xyxy() / GSVector4(t->m_texture->GetSize()).xyxy(); From e66aac8ab75f5cee97dd52d03d0a3ab1c2e89608 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 26 Jun 2015 09:25:50 +0200 Subject: [PATCH 31/50] gsdx-hw: add an hidden option to preload the final frame with GS mem data Game can directly uploads a background or the full image in "CTRC" buffer. Previous code was a full black screen. It will also avoid various black screen issue in gs dump. hidden option: preload_frame_with_gs_data Note: impact on upscaling was not tested and it's likely broken --- plugins/GSdx/GSRendererHW.cpp | 2 +- plugins/GSdx/GSTextureCache.cpp | 14 +++++++++++++- plugins/GSdx/GSTextureCache.h | 3 ++- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/plugins/GSdx/GSRendererHW.cpp b/plugins/GSdx/GSRendererHW.cpp index c0754dbaa0..dae60ddb9e 100644 --- a/plugins/GSdx/GSRendererHW.cpp +++ b/plugins/GSdx/GSRendererHW.cpp @@ -161,7 +161,7 @@ GSTexture* GSRendererHW::GetOutput(int i) GSTexture* t = NULL; - if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height)) + if(GSTextureCache::Target* rt = m_tc->LookupTarget(TEX0, m_width, m_height, GetFrameRect(i).bottom)) { t = rt->m_texture; diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index f17a313f51..5db4bdea66 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -26,6 +26,7 @@ GSTextureCache::GSTextureCache(GSRenderer* r) : m_renderer(r) { m_spritehack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_SpriteHack", 0) : 0; + m_preload_frame = theApp.GetConfig("preload_frame_with_gs_data", 0); UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks", 0) && !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0); m_paltex = !!theApp.GetConfig("paltex", 0); @@ -342,7 +343,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int return dst; } -GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h) +GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int real_h) { uint32 bp = TEX0.TBP0; @@ -385,6 +386,17 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int } m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); // new frame buffers after reset should be cleared, don't display memory garbage + + if (m_preload_frame) { + // Load GS data into frame. Game can directly uploads a background or the full image in + // "CTRC" buffer. It will also avoid various black screen issue in gs dump. + // + // Code is more or less an equivalent of the SW renderer + // + // Option is hidden and not enabled by default to avoid any regression + dst->m_dirty.push_back(GSDirtyRect(GSVector4i(0, 0, TEX0.TBW * 64, real_h), TEX0.PSM)); + dst->Update(); + } } else { diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index d3e2ff68f3..0d337e8662 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -112,6 +112,7 @@ protected: list m_dst[2]; bool m_paltex; int m_spritehack; + bool m_preload_frame; uint8* m_temp; bool m_can_convert_depth; @@ -139,7 +140,7 @@ public: Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r); Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int type, bool used); - Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h); + Target* LookupTarget(const GIFRegTEX0& TEX0, int w, int h, int real_h); void InvalidateVideoMemType(int type, uint32 bp); void InvalidateVideoMem(GSOffset* off, const GSVector4i& r, bool target = true); From 99b03b46322bd6af73e194e648d96b26f69568f8 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 26 Jun 2015 18:02:51 +0200 Subject: [PATCH 32/50] gsdx-debug: log all write (texture) from the EE It is verbose but it might help to understand the invalidation of texture --- plugins/GSdx/GSState.cpp | 7 ++++++- plugins/GSdx/GSTextureCache.cpp | 9 ++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index a9f3256a09..b3ff6783f6 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -1486,6 +1486,11 @@ void GSState::Write(const uint8* mem, int len) return; } + GL_CACHE("Write! ... => 0x%x W:%d F:%d (DIR %d%d), dPos(%d %d) size(%d %d)", + m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, + m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY, + m_env.TRXPOS.DSAX, m_env.TRXPOS.DSAY, w, h); + if(PRIM->TME && (m_env.BITBLTBUF.DBP == m_context->TEX0.TBP0 || m_env.BITBLTBUF.DBP == m_context->TEX0.CBP)) // TODO: hmmmm { FlushPrim(); @@ -1602,7 +1607,7 @@ void GSState::Move() int w = m_env.TRXREG.RRW; int h = m_env.TRXREG.RRH; - GL_CACHE("Move! %05x %d %d => %05x %d %d (DIR %d%d), sPos(%d %d) dPos(%d %d) size(%d %d)", + GL_CACHE("Move! 0x%x W:%d F:%d => 0x%x W:%d F:%d (DIR %d%d), sPos(%d %d) dPos(%d %d) size(%d %d)", m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW, m_env.BITBLTBUF.SPSM, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, m_env.TRXPOS.DIRX, m_env.TRXPOS.DIRY, diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 5db4bdea66..c82822897b 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -379,7 +379,6 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int dst = CreateTarget(TEX0, w, h, RenderTarget); - if(dst == NULL) { return NULL; @@ -556,6 +555,13 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b Target* t = *j; + // GH: (I think) this code is completely broken. Typical issue: + // EE write an alpha channel into 32 bits texture + // Results: the target is deleted (because HasCompatibleBits is false) + // + // Major issues are expected if the game try to reuse the target + // If we dirty the RT, it will likely upload partially invalid data. + // (The color on the previous example) if(GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t->m_TEX0.PSM)) { if(!found && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) @@ -574,6 +580,7 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b } } + // GH: Try to detect texture write that will overlap with a target buffer if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) && bp < t->m_TEX0.TBP0) { uint32 rowsize = bw * 8192; From 61a717db1463196b97405099517254038f9f7705 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 26 Jun 2015 18:33:41 +0200 Subject: [PATCH 33/50] gsdx-tc: clean the RT in debug build // In theory new textures contain invalidated data. Still in theory a new target // must contains the content of the GS memory. // In practice, TC will wrongly invalidate some RT. For example due to write on the alpha // channel but colors is still valid. Unfortunately TC doesn't support the upload of data // in target. // // Cleaning the code here will likely break several games. However it might reduce // the noise in draw call debugging. It is the main reason to enable it on debug build. // // From a performance point of view, it might cost a little on big upscaling // but normally few RT are miss so it must remain reasonable. --- plugins/GSdx/GSTextureCache.cpp | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index c82822897b..b4df5377a4 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -295,6 +295,25 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int if(dst == NULL) return NULL; + +#ifdef ENABLE_OGL_DEBUG + // In theory new textures contain invalidated data. Still in theory a new target + // must contains the content of the GS memory. + // In practice, TC will wrongly invalidate some RT. For example due to write on the alpha + // channel but colors is still valid. Unfortunately TC doesn't support the upload of data + // in target. + // + // Cleaning the code here will likely break several games. However it might reduce + // the noise in draw call debugging. It is the main reason to enable it on debug build. + // + // From a performance point of view, it might cost a little on big upscaling + // but normally few RT are miss so it must remain reasonable. + switch (type) { + case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break; + case DepthStencil: m_renderer->m_dev->ClearDepth(dst->m_texture, 0); break; + default:break; + } +#endif } if(m_renderer->CanUpscale()) From db5468afcf0802378b4c56beddb7007bfff9a760 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 26 Jun 2015 20:03:15 +0200 Subject: [PATCH 34/50] glsl: fix shadow on Shin Megami Tensei Nocturne --- plugins/GSdx/res/glsl/tfx_fs.glsl | 5 ++++- plugins/GSdx/res/glsl_source.h | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index 7229e5cb79..fb3a3ad653 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -48,7 +48,10 @@ layout(binding = 3) uniform sampler2D RtSampler; // note 2 already use by the im #if PS_DATE > 0 // FIXME how to declare memory access layout(r32i, binding = 2) coherent uniform iimage2D img_prim_min; -layout(early_fragment_tests) in; +// Don't enable it. Discard fragment can still write in the depth buffer +// it breaks shadow in Shin Megami Tensei Nocturne +//layout(early_fragment_tests) in; + // I don't remember why I set this parameter but it is surely useless //layout(pixel_center_integer) in vec4 gl_FragCoord; #endif diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index b267373096..d44aeabd2b 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -840,7 +840,10 @@ static const char* tfx_fs_all_glsl = "#if PS_DATE > 0\n" "// FIXME how to declare memory access\n" "layout(r32i, binding = 2) coherent uniform iimage2D img_prim_min;\n" - "layout(early_fragment_tests) in;\n" + "// Don't enable it. Discard fragment can still write in the depth buffer\n" + "// it breaks shadow in Shin Megami Tensei Nocturne\n" + "//layout(early_fragment_tests) in;\n" + "\n" "// I don't remember why I set this parameter but it is surely useless\n" "//layout(pixel_center_integer) in vec4 gl_FragCoord;\n" "#endif\n" From 3f6edc39cfbdd5c5daf5d95abe5c3c94d5eb70ee Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Fri, 26 Jun 2015 22:07:51 +0200 Subject: [PATCH 35/50] gsdx-linux-gui: update option Add mipmap for SW Add tc depth for HW Remove nvidia MT, always on --- plugins/GSdx/GS.cpp | 7 ------- plugins/GSdx/GSLinuxDialog.cpp | 10 +++++----- plugins/GSdx/GSSetting.cpp | 2 ++ plugins/GSdx/GSSetting.h | 1 + plugins/GSdx/GSSettingsDlg.cpp | 6 +++--- plugins/GSdx/GSTextureOGL.cpp | 2 +- plugins/GSdx/GSdx.rc | 2 +- plugins/GSdx/resource.h | 2 +- 8 files changed, 14 insertions(+), 18 deletions(-) diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index 11943153fd..f7c73a5d87 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -194,13 +194,6 @@ EXPORT_C GSclose() static int _GSopen(void** dsp, char* title, int renderer, int threads = -1) { - // I really don't know the impact on windows! It could work -#ifdef __linux__ - if (theApp.GetConfig("enable_nvidia_multi_thread", 1)) { - setenv("__GL_THREADED_OPTIMIZATIONS", "1", 0); - } -#endif - GSDevice* dev = NULL; if(renderer == -1) diff --git a/plugins/GSdx/GSLinuxDialog.cpp b/plugins/GSdx/GSLinuxDialog.cpp index 40b4d671b1..dd76a2297a 100644 --- a/plugins/GSdx/GSLinuxDialog.cpp +++ b/plugins/GSdx/GSLinuxDialog.cpp @@ -296,8 +296,7 @@ void populate_hw_table(GtkWidget* hw_table) GtkWidget* acc_date_check = CreateCheckBox("Accurate Date", "accurate_date", false); GtkWidget* acc_cclip_check = CreateCheckBox("Accurate Color Clipping", "accurate_colclip", false); GtkWidget* acc_fbmsk_check = CreateCheckBox("Accurate FrameBuffer Mask", "accurate_fbmask", false); - - GtkWidget* MT_nvidia_check = CreateCheckBox("Nvidia Multi-Thread support", "enable_nvidia_multi_thread", true); + GtkWidget* tc_depth_check = CreateCheckBox("Full Depth Emulation", "texture_cache_depth", true); // Some helper string gtk_widget_set_tooltip_text(paltex_check, dialog_message(IDC_PALTEX)); @@ -305,12 +304,12 @@ void populate_hw_table(GtkWidget* hw_table) gtk_widget_set_tooltip_text(acc_date_check, dialog_message(IDC_ACCURATE_DATE)); gtk_widget_set_tooltip_text(acc_cclip_check, dialog_message(IDC_ACCURATE_COLCLIP)); gtk_widget_set_tooltip_text(acc_fbmsk_check, dialog_message(IDC_ACCURATE_FBMASK)); - gtk_widget_set_tooltip_text(MT_nvidia_check, "Huge speedup on Nvidia binary driver! No effect otherwise."); gtk_widget_set_tooltip_text(crc_label, dialog_message(IDC_CRC_LEVEL)); gtk_widget_set_tooltip_text(crc_combo_box, dialog_message(IDC_CRC_LEVEL)); + gtk_widget_set_tooltip_text(tc_depth_check, dialog_message(IDC_TC_DEPTH)); s_table_line = 0; - InsertWidgetInTable(hw_table, paltex_check, MT_nvidia_check); + InsertWidgetInTable(hw_table, paltex_check, tc_depth_check); InsertWidgetInTable(hw_table, acc_blend_check, acc_date_check); InsertWidgetInTable(hw_table, acc_cclip_check, acc_fbmsk_check); InsertWidgetInTable(hw_table, filter_label, filter_combo_box); @@ -354,11 +353,12 @@ void populate_sw_table(GtkWidget* sw_table) GtkWidget* threads_spin = CreateSpinButton(0, 32, "extrathreads", 0); GtkWidget* aa_check = CreateCheckBox("Edge anti-aliasing (AA1)", "aa1"); + GtkWidget* mipmap_check = CreateCheckBox("Mipmap", "mipmap", true); GtkWidget* spin_thread_check= CreateCheckBox("Disable thread sleeping (6+ cores CPU)", "spin_thread"); s_table_line = 0; InsertWidgetInTable(sw_table , threads_label , threads_spin); - InsertWidgetInTable(sw_table , aa_check); + InsertWidgetInTable(sw_table , aa_check, mipmap_check); InsertWidgetInTable(sw_table , spin_thread_check , spin_thread_check); } diff --git a/plugins/GSdx/GSSetting.cpp b/plugins/GSdx/GSSetting.cpp index 0dd76596a6..361bdd4141 100644 --- a/plugins/GSdx/GSSetting.cpp +++ b/plugins/GSdx/GSSetting.cpp @@ -122,6 +122,8 @@ const char* dialog_message(int ID, bool* updateText) { case IDC_ACCURATE_FBMASK: return "Implement partial color masking\n\n" "No status yet on the speed impact\n\nNote: it requires the 4.5 openGL extension GL_ARB_texture_barrier"; + case IDC_TC_DEPTH: + return "Allow to convert Depth buffer from/to Color buffer. It is used for blur & depth of field effects"; #endif default: if (updateText) diff --git a/plugins/GSdx/GSSetting.h b/plugins/GSdx/GSSetting.h index 60c29e5ade..21d64ec9c0 100644 --- a/plugins/GSdx/GSSetting.h +++ b/plugins/GSdx/GSSetting.h @@ -67,6 +67,7 @@ enum { IDC_ACCURATE_DATE, IDC_ACCURATE_COLCLIP, IDC_ACCURATE_FBMASK, + IDC_TC_DEPTH, IDC_CRC_LEVEL }; #endif diff --git a/plugins/GSdx/GSSettingsDlg.cpp b/plugins/GSdx/GSSettingsDlg.cpp index 84ecb71264..a19d6da7df 100644 --- a/plugins/GSdx/GSSettingsDlg.cpp +++ b/plugins/GSdx/GSSettingsDlg.cpp @@ -197,7 +197,7 @@ void GSSettingsDlg::OnInit() CheckDlgButton(m_hWnd, IDC_ACCURATE_BLEND, theApp.GetConfig("accurate_blend", 1)); CheckDlgButton(m_hWnd, IDC_ACCURATE_DATE, theApp.GetConfig("accurate_date", 0)); CheckDlgButton(m_hWnd, IDC_ACCURATE_COLCLIP, theApp.GetConfig("accurate_colclip", 0)); - CheckDlgButton(m_hWnd, IDC_OLGDEPTH, theApp.GetConfig("texture_cache_depth", 0)); + CheckDlgButton(m_hWnd, IDC_TC_DEPTH, theApp.GetConfig("texture_cache_depth", 0)); // Shade Boost CheckDlgButton(m_hWnd, IDC_SHADEBOOST, theApp.GetConfig("ShadeBoost", 0)); @@ -335,7 +335,7 @@ bool GSSettingsDlg::OnCommand(HWND hWnd, UINT id, UINT code) theApp.SetConfig("accurate_blend", (int)IsDlgButtonChecked(m_hWnd, IDC_ACCURATE_BLEND)); theApp.SetConfig("accurate_date", (int)IsDlgButtonChecked(m_hWnd, IDC_ACCURATE_DATE)); theApp.SetConfig("accurate_colclip", (int)IsDlgButtonChecked(m_hWnd, IDC_ACCURATE_COLCLIP)); - theApp.SetConfig("texture_cache_depth", (int)IsDlgButtonChecked(m_hWnd, IDC_OLGDEPTH)); + theApp.SetConfig("texture_cache_depth", (int)IsDlgButtonChecked(m_hWnd, IDC_TC_DEPTH)); // Shade Boost @@ -440,7 +440,7 @@ void GSSettingsDlg::UpdateControls() EnableWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_BLEND), ogl && hw); EnableWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_DATE), ogl && hw); EnableWindow(GetDlgItem(m_hWnd, IDC_ACCURATE_COLCLIP), ogl && hw); - EnableWindow(GetDlgItem(m_hWnd, IDC_OLGDEPTH), ogl && hw); + EnableWindow(GetDlgItem(m_hWnd, IDC_TC_DEPTH), ogl && hw); //EnableWindow(GetDlgItem(m_hWnd, IDC_AA1), sw); // Let uers set software params regardless of renderer used //EnableWindow(GetDlgItem(m_hWnd, IDC_SWTHREADS_EDIT), sw); diff --git a/plugins/GSdx/GSTextureOGL.cpp b/plugins/GSdx/GSTextureOGL.cpp index 300527064d..bdca672851 100644 --- a/plugins/GSdx/GSTextureOGL.cpp +++ b/plugins/GSdx/GSTextureOGL.cpp @@ -64,7 +64,7 @@ namespace PboPool { gl_GenBuffers(countof(m_pool), m_pool); m_texture_storage = GLLoader::found_GL_ARB_buffer_storage; // Code is really faster on MT driver. So far only nvidia support it - if (!(GLLoader::nvidia_buggy_driver && theApp.GetConfig("enable_nvidia_multi_thread", 1))) + if (!GLLoader::nvidia_buggy_driver) m_texture_storage &= (theApp.GetConfig("ogl_texture_storage", 0) == 1); for (size_t i = 0; i < countof(m_pool); i++) { diff --git a/plugins/GSdx/GSdx.rc b/plugins/GSdx/GSdx.rc index 565f45df5d..a466d9cc15 100644 --- a/plugins/GSdx/GSdx.rc +++ b/plugins/GSdx/GSdx.rc @@ -282,7 +282,7 @@ BEGIN COMBOBOX IDC_AFCOMBO,93,304,35,30,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP LTEXT "OpenCL Device:",IDC_STATIC,6,86,52,8 COMBOBOX IDC_OPENCL_DEVICE,70,84,111,118,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP - CONTROL "HW OGL Depth",IDC_OLGDEPTH,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,92,273,72,13 + CONTROL "HW OGL Depth",IDC_TC_DEPTH,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,92,273,72,13 END diff --git a/plugins/GSdx/resource.h b/plugins/GSdx/resource.h index 6e3efef2cf..efc199e802 100644 --- a/plugins/GSdx/resource.h +++ b/plugins/GSdx/resource.h @@ -80,7 +80,7 @@ #define IDC_ALIGN_SPRITE 2096 #define IDC_CRC_LEVEL 2097 #define IDC_CHECK1 2098 -#define IDC_OLGDEPTH 2099 +#define IDC_TC_DEPTH 2099 #define IDC_COLORSPACE 3000 #define IDR_CONVERT_FX 10000 #define IDR_TFX_FX 10001 From a8bcc760b4e75aba28833d95cdc4f2d94b3bc63e Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 27 Jun 2015 11:08:29 +0200 Subject: [PATCH 36/50] gsdx-tc: only clean RT in openGL It is a debug feature for myself --- plugins/GSdx/GSTextureCache.cpp | 14 +++++++------- plugins/GSdx/GSTextureCache.h | 1 + plugins/GSdx/GSTextureCacheOGL.h | 4 +++- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index b4df5377a4..f338bcb047 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -200,12 +200,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con return NULL; } -#ifdef ENABLE_OGL_DEBUG } else { GL_CACHE("TC: src hit: %d (0x%x F:0x%x)", src->m_texture ? src->m_texture->GetID() : 0, TEX0.TBP0, TEX0.PSM); -#endif } if (src->m_palette) @@ -308,10 +306,12 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, int // // From a performance point of view, it might cost a little on big upscaling // but normally few RT are miss so it must remain reasonable. - switch (type) { - case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break; - case DepthStencil: m_renderer->m_dev->ClearDepth(dst->m_texture, 0); break; - default:break; + if (IsOpenGL()) { + switch (type) { + case RenderTarget: m_renderer->m_dev->ClearRenderTarget(dst->m_texture, 0); break; + case DepthStencil: m_renderer->m_dev->ClearDepth(dst->m_texture, 0); break; + default:break; + } } #endif } @@ -998,7 +998,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con if(tmp != NULL) { - // tmp is texture before a MultiSample resolve + // tmp is the texture before a MultiSample resolve m_renderer->m_dev->Recycle(dst->m_texture); dst->m_texture = tmp; diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 0d337e8662..748f0205c5 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -128,6 +128,7 @@ protected: #endif virtual bool CanConvertDepth() { return m_can_convert_depth; } + virtual bool IsOpenGL() { return false; } public: GSTextureCache(GSRenderer* r); diff --git a/plugins/GSdx/GSTextureCacheOGL.h b/plugins/GSdx/GSTextureCacheOGL.h index 40840e7670..4e241d4be4 100644 --- a/plugins/GSdx/GSTextureCacheOGL.h +++ b/plugins/GSdx/GSTextureCacheOGL.h @@ -28,10 +28,12 @@ class GSTextureCacheOGL : public GSTextureCache { protected: - int Get8bitFormat() { return GL_R8; /* TODO return DXGI_FORMAT_A8_UNORM;*/} + int Get8bitFormat() { return GL_R8;} void Read(Target* t, const GSVector4i& r); + virtual bool IsOpenGL() { return true; } + public: GSTextureCacheOGL(GSRenderer* r); }; From d29e375f72c94fb206812e589cf6b8c5814249f9 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 27 Jun 2015 11:24:16 +0200 Subject: [PATCH 37/50] gsdx-tc: GPU accelerate 8 bits texture conversion Only native is supported currently --- plugins/GSdx/GSDeviceOGL.h | 2 +- plugins/GSdx/GSTextureCache.cpp | 24 ++++++++--- plugins/GSdx/res/glsl/convert.glsl | 64 ++++++++++++++++++++++++++++++ plugins/GSdx/res/glsl_source.h | 64 ++++++++++++++++++++++++++++++ 4 files changed, 147 insertions(+), 7 deletions(-) diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index af1d37f625..fd95d6a9cd 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -498,7 +498,7 @@ class GSDeviceOGL : public GSDevice struct { GLuint vs; // program object - GLuint ps[14]; // program object + GLuint ps[15]; // program object GLuint ln; // sampler object GLuint pt; // sampler object GSDepthStencilOGL* dss; diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index f338bcb047..0456adf9c7 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -139,7 +139,10 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con uint32 t_psm = (t->m_dirty_alpha) ? t->m_TEX0.PSM & ~0x1 : t->m_TEX0.PSM; if (GSUtil::HasSharedBits(bp, psm, t->m_TEX0.TBP0, t_psm)) { - if (psm == PSM_PSMT8) { + if (!IsOpenGL() && (psm == PSM_PSMT8)) { + // OpenGL can convert the texture directly in the GPU. Not sure we want to keep this + // code for DX. It fixes effect but it is slow (MGS3) + // It is a complex to convert the code in shader. As a reference, let's do it on the CPU, it will // be slow but // 1/ it just works :) @@ -808,9 +811,18 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con { // TODO: clean up this mess + // Shader 11 convert depth to color + // Shader 14 convert 32 bits color to 8 bits color + int shader = dst->m_type != RenderTarget ? 11 : 0; + + if (TEX0.PSM == PSM_PSMT8) { + GL_INS("Reading RT as a packed-indexed 8 bits format"); + shader = 14; // ask a conversion to 8 bits format + } + #ifdef ENABLE_OGL_DEBUG - if (TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT4) { - GL_INS("ERROR: Reading RT as a packed-indexed (0x%x) format is not supported", TEX0.PSM); + if (TEX0.PSM == PSM_PSMT4) { + GL_INS("ERROR: Reading RT as a packed-indexed 4 bits format is not supported"); } #endif @@ -902,6 +914,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // Try to extract a texture bigger than the RT. Current solution is to rescale the size // of the texture to fit in the RT. In my opinion, it would be better to increase the size of // the RT + // TODO investigate this code is correct (maybe linked to custom resolution?) if(w > dstsize.x) { scale.x = (float)dstsize.x / tw; @@ -920,6 +933,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con GSTexture* sTex = src->m_texture ? src->m_texture : dst->m_texture; GSTexture* dTex = m_renderer->m_dev->CreateRenderTarget(w, h, false); + // GH: by default (m_paltex == 0) GSdx converts texture to the 32 bit format // However it is different here. We want to reuse a Render Target as a texture. // Because the texture is already on the GPU, CPU can't convert it. @@ -952,14 +966,12 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // copy. Likely a speed boost and memory usage reduction. bool linear = (TEX0.PSM == PSM_PSMCT32 || TEX0.PSM == PSM_PSMCT24); - int shader = dst->m_type != RenderTarget ? 11 : 0; - if(!src->m_texture) { src->m_texture = dTex; } - if((sRect == dRect).alltrue() && !shader) + if ((sRect == dRect).alltrue() && !shader) { if (half_right) { // You typically hit this code in snow engine game. Dstsize is the size of of Dx/GL RT diff --git a/plugins/GSdx/res/glsl/convert.glsl b/plugins/GSdx/res/glsl/convert.glsl index b8baf5cbe4..fce5d19b97 100644 --- a/plugins/GSdx/res/glsl/convert.glsl +++ b/plugins/GSdx/res/glsl/convert.glsl @@ -196,6 +196,70 @@ void ps_main13() } #endif +#ifdef ps_main14 +void ps_main14() +{ + // Convert a RGBA texture into a 8 bits packed texture + // Input column: 8x2 RGBA pixels + // 0: 8 RGBA + // 1: 8 RGBA + // Output column: 16x4 Index pixels + // 0: 8 R | 8 B + // 1: 8 R | 8 B + // 2: 8 G | 8 A + // 3: 8 G | 8 A + + float c; + + uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u); + ivec2 tb = ((ivec2(gl_FragCoord.xy) & ~ivec2(15, 3)) >> 1u); + + int ty = tb.y | (int(gl_FragCoord.y) & 1); + int txN = tb.x | (int(gl_FragCoord.x) & 7); + int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7); + + vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0); + vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0); + + // Potential speed optimization. There is a high probability that + // game only want to extract a single channel (blue). It will allow + // to remove the sel.x condition check + + if ((sel.y & 4u) == 0u) { + // Column 0 and 2 + if ((sel.y & 3u) < 2u) { + // first 2 lines of the col + if (sel.x < 8u) + c = cN.r; + else + c = cN.b; + } else { + if (sel.x < 8u) + c = cH.g; + else + c = cH.a; + } + } else { + // Column 1 and 3 + if ((sel.y & 3u) < 2u) { + // first 2 lines of the col + if (sel.x < 8u) + c = cH.r; + else + c = cH.b; + } else { + if (sel.x < 8u) + c = cN.g; + else + c = cN.a; + } + } + + + SV_Target0 = vec4(c); +} +#endif + #ifdef ps_main7 void ps_main7() { diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index d44aeabd2b..121da0770f 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -221,6 +221,70 @@ static const char* convert_glsl = "}\n" "#endif\n" "\n" + "#ifdef ps_main14\n" + "void ps_main14()\n" + "{\n" + " // Convert a RGBA texture into a 8 bits packed texture\n" + " // Input column: 8x2 RGBA pixels\n" + " // 0: 8 RGBA\n" + " // 1: 8 RGBA\n" + " // Output column: 16x4 Index pixels\n" + " // 0: 8 R | 8 B\n" + " // 1: 8 R | 8 B\n" + " // 2: 8 G | 8 A\n" + " // 3: 8 G | 8 A\n" + "\n" + " float c;\n" + "\n" + " uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);\n" + " ivec2 tb = ((ivec2(gl_FragCoord.xy) & ~ivec2(15, 3)) >> 1u);\n" + "\n" + " int ty = tb.y | (int(gl_FragCoord.y) & 1);\n" + " int txN = tb.x | (int(gl_FragCoord.x) & 7);\n" + " int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);\n" + "\n" + " vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);\n" + " vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);\n" + "\n" + " // Potential speed optimization. There is a high probability that\n" + " // game only want to extract a single channel (blue). It will allow\n" + " // to remove the sel.x condition check\n" + "\n" + " if ((sel.y & 4u) == 0u) {\n" + " // Column 0 and 2\n" + " if ((sel.y & 3u) < 2u) {\n" + " // first 2 lines of the col\n" + " if (sel.x < 8u)\n" + " c = cN.r;\n" + " else\n" + " c = cN.b;\n" + " } else {\n" + " if (sel.x < 8u)\n" + " c = cH.g;\n" + " else\n" + " c = cH.a;\n" + " }\n" + " } else {\n" + " // Column 1 and 3\n" + " if ((sel.y & 3u) < 2u) {\n" + " // first 2 lines of the col\n" + " if (sel.x < 8u)\n" + " c = cH.r;\n" + " else\n" + " c = cH.b;\n" + " } else {\n" + " if (sel.x < 8u)\n" + " c = cN.g;\n" + " else\n" + " c = cN.a;\n" + " }\n" + " }\n" + "\n" + "\n" + " SV_Target0 = vec4(c);\n" + "}\n" + "#endif\n" + "\n" "#ifdef ps_main7\n" "void ps_main7()\n" "{\n" From 6ca7a802bfff5562b3cb0370799a79ec22e4a8a3 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 27 Jun 2015 16:39:44 +0200 Subject: [PATCH 38/50] gsdx-tc: add a big comment to explain the scaling of src in TC --- plugins/GSdx/GSTextureCache.cpp | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 0456adf9c7..d5ffcdf783 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -833,7 +833,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con GSTexture* tmp = NULL; - if(dst->m_texture->IsMSAA()) + if (dst->m_texture->IsMSAA()) { tmp = dst->m_texture; @@ -911,10 +911,24 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con GSVector4 dRect(0, 0, w, h); - // Try to extract a texture bigger than the RT. Current solution is to rescale the size - // of the texture to fit in the RT. In my opinion, it would be better to increase the size of - // the RT - // TODO investigate this code is correct (maybe linked to custom resolution?) + // Lengthy explanation of the rescaling code. + // Here an example in 2x: + // RT is 1280x1024 but only contains 512x448 valid data (so 256x224 pixels without upscaling) + // + // PS2 want to read it back as a 1024x1024 pixels (they don't care about the extra pixels) + // So in theory we need to shrink a 2048x2048 RT into a 1024x1024 texture. Obviously the RT is + // too small. + // + // So we will only limit the resize to the available data in RT. + // Therefore we will resize the RT from 1280x1024 to 1280x1024/2048x2048 % of the new texture + // size (which is 1280x1024) (i.e. 800x512) + // From the rendering point of view. UV coordinate will be normalized on the real GS texture size + // This way it can be used on an upscaled texture without extra scaling factor (only requirement is + // to have same proportion) + // + // FIXME: The scaling will create a bad offset. For example if texture coordinate start at 0.5 (pixel 0) + // At 2x it will become 0.5/128 * 256 = 1 (pixel 1) + if(w > dstsize.x) { scale.x = (float)dstsize.x / tw; From 2af7dcbda282c3e6a4b296591a620a3c70338869 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 27 Jun 2015 16:45:31 +0200 Subject: [PATCH 39/50] gsdx-tc: GPU accelerate 8 bits texture when upscaling is enabled Code unscale the texture to ease the conversion. Quality is awful (same as before) but I'm not sure we can support an upscaled texture Maybe the quality loss is due to the reduction without mipmap Maybe the best solution will be to add an hack to extract the blue channel (with texture swizzle), and uses a "full page/screen" spirte instead. (it would be faster too) Note: won't be compatible with MSAA (but gl doesn't support it anyway) --- plugins/GSdx/GSTextureCache.cpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index d5ffcdf783..7dd5dd6384 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -840,6 +840,26 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con dst->m_texture = m_renderer->m_dev->Resolve(dst->m_texture); } + + // Unscale 8 bits textures, quality won't be nice but format is really awful + // Code won't be compatible with MSAA but it is a DX issue + if (TEX0.PSM == PSM_PSMT8) { + GSVector2 old_scale = dst->m_texture->GetScale(); + + if (old_scale != GSVector2(1.0f, 1.0f)) { + GSVector2i size = dst->m_texture->GetSize(); + tmp = dst->m_texture; + + dst->m_texture = m_renderer->m_dev->CreateRenderTarget(size.x, size.y, false); + + GSVector4 sRect(0.0, 0.0, old_scale.x, old_scale.y); + GSVector4 dRect(0.0, 0.0, size.x, size.y); + m_renderer->m_dev->StretchRect(tmp, sRect, dst->m_texture, dRect, 0, false); + + dst->m_texture->SetScale(GSVector2(1.0f, 1.0f)); + } + } + // do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows) int w = (int)(dst->m_texture->GetScale().x * tw); From e2727f10ba6facd785d77122b1b9b0d8c7921648 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Sat, 27 Jun 2015 16:58:37 +0200 Subject: [PATCH 40/50] glsl: fix for AMD I hope :) --- plugins/GSdx/res/glsl/tfx_fs.glsl | 2 +- plugins/GSdx/res/glsl_source.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index fb3a3ad653..c3337d4802 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -705,7 +705,7 @@ void ps_main() #endif #if PS_DATE == 3 && !defined(DISABLE_GL42_image) - int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)); + int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)).r; // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update // the bad alpha value so we must keep it. diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 121da0770f..6789ad857e 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -1561,7 +1561,7 @@ static const char* tfx_fs_all_glsl = "#endif\n" "\n" "#if PS_DATE == 3 && !defined(DISABLE_GL42_image)\n" - " int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy));\n" + " int stencil_ceil = imageLoad(img_prim_min, ivec2(gl_FragCoord.xy)).r;\n" " // Note gl_PrimitiveID == stencil_ceil will be the primitive that will update\n" " // the bad alpha value so we must keep it.\n" "\n" From 2ecca529d1b9f780a217dd93ec382f006ae547e8 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Mon, 29 Jun 2015 08:48:19 +0200 Subject: [PATCH 41/50] gsdx-tc: log dirty target --- plugins/GSdx/GSTextureCache.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 7dd5dd6384..8adad2b21f 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -588,6 +588,9 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b { if(!found && GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)) { + GL_CACHE("TC: Dirty Target(%s) %d (0x%x)", to_string(type), + t->m_texture ? t->m_texture->GetID() : 0, + t->m_TEX0.TBP0); t->m_dirty.push_back(GSDirtyRect(r, psm)); t->m_TEX0.TBW = bw; } @@ -614,6 +617,9 @@ void GSTextureCache::InvalidateVideoMem(GSOffset* off, const GSVector4i& rect, b if(r.bottom > y) { + GL_CACHE("TC: Dirty After Target(%s) %d (0x%x)", to_string(type), + t->m_texture ? t->m_texture->GetID() : 0, + t->m_TEX0.TBP0); // TODO: do not add this rect above too t->m_dirty.push_back(GSDirtyRect(GSVector4i(r.left, r.top - y, r.right, r.bottom - y), psm)); t->m_TEX0.TBW = bw; From 6121677aa1064269db333f4d5db0b3ead65270c1 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Mon, 29 Jun 2015 19:17:46 +0200 Subject: [PATCH 42/50] gsdx-tc: use a single shader pass to convert texture in 8 bits format It might save a couple of fps Add a define to test the perf if we keep only the blue channel. It brokes the code in Prince Of Persia that use the Red/Green channel... Maybe the speed hack :( Or find a way to replace all if with a lookup table Note: it is only supported on OpenGL currently --- plugins/GSdx/GSDevice.h | 8 +++++ plugins/GSdx/GSDeviceOGL.cpp | 8 +++++ plugins/GSdx/GSDeviceOGL.h | 1 + plugins/GSdx/GSTextureCache.cpp | 52 ++++++++++++------------------ plugins/GSdx/res/glsl/convert.glsl | 31 +++++++++++++++--- plugins/GSdx/res/glsl_source.h | 31 +++++++++++++++--- 6 files changed, 92 insertions(+), 39 deletions(-) diff --git a/plugins/GSdx/GSDevice.h b/plugins/GSdx/GSDevice.h index 0a5b629bec..8fde98d93f 100644 --- a/plugins/GSdx/GSDevice.h +++ b/plugins/GSdx/GSDevice.h @@ -28,6 +28,14 @@ #pragma pack(push, 1) +class ConvertConstantBuffer +{ +public: + GSVector4i ScalingFactor; + + ConvertConstantBuffer() {memset(this, 0, sizeof(*this));} +}; + class MergeConstantBuffer { public: diff --git a/plugins/GSdx/GSDeviceOGL.cpp b/plugins/GSdx/GSDeviceOGL.cpp index c89a81b1f9..cd8e075084 100644 --- a/plugins/GSdx/GSDeviceOGL.cpp +++ b/plugins/GSdx/GSDeviceOGL.cpp @@ -39,6 +39,7 @@ static const uint32 g_merge_cb_index = 10; static const uint32 g_interlace_cb_index = 11; static const uint32 g_shadeboost_cb_index = 12; static const uint32 g_fx_cb_index = 14; +static const uint32 g_convert_index = 15; bool GSDeviceOGL::m_debug_gl_call = false; int GSDeviceOGL::s_n = 0; @@ -103,6 +104,7 @@ GSDeviceOGL::~GSDeviceOGL() delete m_convert.dss; delete m_convert.dss_write; delete m_convert.bs; + delete m_convert.cb; // Clean m_fxaa delete m_fxaa.cb; @@ -242,6 +244,12 @@ bool GSDeviceOGL::Create(GSWnd* wnd) // **************************************************************** // convert // **************************************************************** + m_convert.cb = new GSUniformBufferOGL(g_convert_index, sizeof(ConvertConstantBuffer)); + // Upload once and forget about it + ConvertConstantBuffer cb; + cb.ScalingFactor = GSVector4i(theApp.GetConfig("nativeres", 0) ? 1 : theApp.GetConfig("upscale_multiplier", 2)); + m_convert.cb->upload(&cb); + m_convert.vs = m_shader->Compile("convert.glsl", "vs_main", GL_VERTEX_SHADER, convert_glsl); for(size_t i = 0; i < countof(m_convert.ps); i++) m_convert.ps[i] = m_shader->Compile("convert.glsl", format("ps_main%d", i), GL_FRAGMENT_SHADER, convert_glsl); diff --git a/plugins/GSdx/GSDeviceOGL.h b/plugins/GSdx/GSDeviceOGL.h index fd95d6a9cd..46611072e3 100644 --- a/plugins/GSdx/GSDeviceOGL.h +++ b/plugins/GSdx/GSDeviceOGL.h @@ -504,6 +504,7 @@ class GSDeviceOGL : public GSDevice GSDepthStencilOGL* dss; GSDepthStencilOGL* dss_write; GSBlendStateOGL* bs; + GSUniformBufferOGL* cb; } m_convert; struct { diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 8adad2b21f..2892364aba 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -820,8 +820,9 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // Shader 11 convert depth to color // Shader 14 convert 32 bits color to 8 bits color int shader = dst->m_type != RenderTarget ? 11 : 0; + bool is_8bits = TEX0.PSM == PSM_PSMT8 && IsOpenGL(); - if (TEX0.PSM == PSM_PSMT8) { + if (is_8bits) { GL_INS("Reading RT as a packed-indexed 8 bits format"); shader = 14; // ask a conversion to 8 bits format } @@ -847,29 +848,15 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con } - // Unscale 8 bits textures, quality won't be nice but format is really awful - // Code won't be compatible with MSAA but it is a DX issue - if (TEX0.PSM == PSM_PSMT8) { - GSVector2 old_scale = dst->m_texture->GetScale(); - - if (old_scale != GSVector2(1.0f, 1.0f)) { - GSVector2i size = dst->m_texture->GetSize(); - tmp = dst->m_texture; - - dst->m_texture = m_renderer->m_dev->CreateRenderTarget(size.x, size.y, false); - - GSVector4 sRect(0.0, 0.0, old_scale.x, old_scale.y); - GSVector4 dRect(0.0, 0.0, size.x, size.y); - m_renderer->m_dev->StretchRect(tmp, sRect, dst->m_texture, dRect, 0, false); - - dst->m_texture->SetScale(GSVector2(1.0f, 1.0f)); - } - } - // do not round here!!! if edge becomes a black pixel and addressing mode is clamp => everything outside the clamped area turns into black (kh2 shadows) int w = (int)(dst->m_texture->GetScale().x * tw); int h = (int)(dst->m_texture->GetScale().y * th); + if (is_8bits) { + // Unscale 8 bits textures, quality won't be nice but format is really awful + w = tw; + h = th; + } GSVector2i dstsize = dst->m_texture->GetSize(); @@ -955,18 +942,21 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // FIXME: The scaling will create a bad offset. For example if texture coordinate start at 0.5 (pixel 0) // At 2x it will become 0.5/128 * 256 = 1 (pixel 1) - if(w > dstsize.x) - { - scale.x = (float)dstsize.x / tw; - dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x; - w = dstsize.x; - } + if (!is_8bits) { + // 8 bits handling is special due to unscaling. It is better to not execute this code + if (w > dstsize.x) + { + scale.x = (float)dstsize.x / tw; + dRect.z = (float)dstsize.x * scale.x / dst->m_texture->GetScale().x; + w = dstsize.x; + } - if(h > dstsize.y) - { - scale.y = (float)dstsize.y / th; - dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y; - h = dstsize.y; + if (h > dstsize.y) + { + scale.y = (float)dstsize.y / th; + dRect.w = (float)dstsize.y * scale.y / dst->m_texture->GetScale().y; + h = dstsize.y; + } } GSVector4 sRect(0, 0, w, h); diff --git a/plugins/GSdx/res/glsl/convert.glsl b/plugins/GSdx/res/glsl/convert.glsl index fce5d19b97..5e5b6841c1 100644 --- a/plugins/GSdx/res/glsl/convert.glsl +++ b/plugins/GSdx/res/glsl/convert.glsl @@ -70,6 +70,11 @@ layout(bindless_sampler, location = 0) uniform sampler2D TextureSampler; layout(binding = 0) uniform sampler2D TextureSampler; #endif +layout(std140, binding = 15) uniform cb15 +{ + ivec4 ScalingFactor; +}; + vec4 sample_c() { return texture(TextureSampler, PSin_t ); @@ -199,6 +204,15 @@ void ps_main13() #ifdef ps_main14 void ps_main14() { + + // Potential speed optimization. There is a high probability that + // game only want to extract a single channel (blue). It will allow + // to remove most of the conditional operation and yield a +2/3 fps + // boost on MGS3 + // + // Hypothesis wrong in Prince of Persia ... Seriously WTF ! +//#define ONLY_BLUE; + // Convert a RGBA texture into a 8 bits packed texture // Input column: 8x2 RGBA pixels // 0: 8 RGBA @@ -208,7 +222,6 @@ void ps_main14() // 1: 8 R | 8 B // 2: 8 G | 8 A // 3: 8 G | 8 A - float c; uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u); @@ -218,15 +231,20 @@ void ps_main14() int txN = tb.x | (int(gl_FragCoord.x) & 7); int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7); + txN *= ScalingFactor.x; + txH *= ScalingFactor.x; + ty *= ScalingFactor.y; + + // TODO investigate texture gather vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0); vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0); - // Potential speed optimization. There is a high probability that - // game only want to extract a single channel (blue). It will allow - // to remove the sel.x condition check if ((sel.y & 4u) == 0u) { // Column 0 and 2 +#ifdef ONLY_BLUE + c = cN.b; +#else if ((sel.y & 3u) < 2u) { // first 2 lines of the col if (sel.x < 8u) @@ -239,7 +257,11 @@ void ps_main14() else c = cH.a; } +#endif } else { +#ifdef ONLY_BLUE + c = cH.b; +#else // Column 1 and 3 if ((sel.y & 3u) < 2u) { // first 2 lines of the col @@ -253,6 +275,7 @@ void ps_main14() else c = cN.a; } +#endif } diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index 6789ad857e..aac59407a5 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -95,6 +95,11 @@ static const char* convert_glsl = "layout(binding = 0) uniform sampler2D TextureSampler;\n" "#endif\n" "\n" + "layout(std140, binding = 15) uniform cb15\n" + "{\n" + " ivec4 ScalingFactor;\n" + "};\n" + "\n" "vec4 sample_c()\n" "{\n" " return texture(TextureSampler, PSin_t );\n" @@ -224,6 +229,15 @@ static const char* convert_glsl = "#ifdef ps_main14\n" "void ps_main14()\n" "{\n" + "\n" + " // Potential speed optimization. There is a high probability that\n" + " // game only want to extract a single channel (blue). It will allow\n" + " // to remove most of the conditional operation and yield a +2/3 fps\n" + " // boost on MGS3\n" + " //\n" + " // Hypothesis wrong in Prince of Persia ... Seriously WTF !\n" + "//#define ONLY_BLUE;\n" + "\n" " // Convert a RGBA texture into a 8 bits packed texture\n" " // Input column: 8x2 RGBA pixels\n" " // 0: 8 RGBA\n" @@ -233,7 +247,6 @@ static const char* convert_glsl = " // 1: 8 R | 8 B\n" " // 2: 8 G | 8 A\n" " // 3: 8 G | 8 A\n" - "\n" " float c;\n" "\n" " uvec2 sel = uvec2(gl_FragCoord.xy) % uvec2(16u, 16u);\n" @@ -243,15 +256,20 @@ static const char* convert_glsl = " int txN = tb.x | (int(gl_FragCoord.x) & 7);\n" " int txH = tb.x | ((int(gl_FragCoord.x) + 4) & 7);\n" "\n" + " txN *= ScalingFactor.x;\n" + " txH *= ScalingFactor.x;\n" + " ty *= ScalingFactor.y;\n" + "\n" + " // TODO investigate texture gather\n" " vec4 cN = texelFetch(TextureSampler, ivec2(txN, ty), 0);\n" " vec4 cH = texelFetch(TextureSampler, ivec2(txH, ty), 0);\n" "\n" - " // Potential speed optimization. There is a high probability that\n" - " // game only want to extract a single channel (blue). It will allow\n" - " // to remove the sel.x condition check\n" "\n" " if ((sel.y & 4u) == 0u) {\n" " // Column 0 and 2\n" + "#ifdef ONLY_BLUE\n" + " c = cN.b;\n" + "#else\n" " if ((sel.y & 3u) < 2u) {\n" " // first 2 lines of the col\n" " if (sel.x < 8u)\n" @@ -264,7 +282,11 @@ static const char* convert_glsl = " else\n" " c = cH.a;\n" " }\n" + "#endif\n" " } else {\n" + "#ifdef ONLY_BLUE\n" + " c = cH.b;\n" + "#else\n" " // Column 1 and 3\n" " if ((sel.y & 3u) < 2u) {\n" " // first 2 lines of the col\n" @@ -278,6 +300,7 @@ static const char* convert_glsl = " else\n" " c = cN.a;\n" " }\n" + "#endif\n" " }\n" "\n" "\n" From beafbd9768d3cd5cad18e72eedabadccc70ffb4f Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Mon, 29 Jun 2015 20:13:32 +0200 Subject: [PATCH 43/50] gsdx: add a warning for the future --- plugins/GSdx/GSDevice.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/plugins/GSdx/GSDevice.cpp b/plugins/GSdx/GSDevice.cpp index ef0b0cf30a..69af182383 100644 --- a/plugins/GSdx/GSDevice.cpp +++ b/plugins/GSdx/GSDevice.cpp @@ -158,6 +158,15 @@ void GSDevice::Recycle(GSTexture* t) { if(t) { + // FIXME: WARNING: Broken Texture Cache reuse render target without any + // cleaning (or uploading of correct gs mem data) Ofc it is wrong. If + // blending is enabled, rendering would be completely broken. However + // du to wrong invalidation of the TC it is sometimes better to reuse + // (partially) wrong data... + // + // Invalidating the data might be even worse. I'm not sure invalidating data really + // help on the perf. But people reports better perf on BDG2 (memory intensive) on OpenGL. + // It could be the reason. t->Invalidate(); t->last_frame_used = m_frame; From be1403cdc27e683fc305b5e6edabcbcf49901ed8 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Mon, 29 Jun 2015 21:41:36 +0200 Subject: [PATCH 44/50] gsdx-ogl: support texture shuffling on !FST Mostly fix "Finding Nemo" It remains a shadows issue when you enable accurate_fbmask and depth --- plugins/GSdx/GSRendererOGL.cpp | 73 ++++++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 21 deletions(-) diff --git a/plugins/GSdx/GSRendererOGL.cpp b/plugins/GSdx/GSRendererOGL.cpp index 1bf66eb724..9b02d411fd 100644 --- a/plugins/GSdx/GSRendererOGL.cpp +++ b/plugins/GSdx/GSRendererOGL.cpp @@ -260,34 +260,65 @@ void GSRendererOGL::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour int pos = (v[0].XYZ.X - o.OFX) & 0xFF; bool write_ba = (pos > 112 && pos < 136); // Read texture is 8 to 16 pixels (same as above) - int tex_pos = v[0].U & 0xFF; + float tw = (float)(1u << context->TEX0.TW); + int tex_pos = (PRIM->FST) ? v[0].U : tw * v[0].ST.S; + tex_pos &= 0xFF; ps_sel.read_ba = (tex_pos > 112 && tex_pos < 144); - //GL_INS("First vertex is P: %d => %d T: %d => %d", v[0].XYZ.X, v[1].XYZ.X, v[0].U, v[1].U); - // Convert the vertex info to a 32 bits color format equivalent - for(size_t i = 0; i < count; i += 2) { - if (write_ba) - v[i].XYZ.X -= 128u; - else - v[i+1].XYZ.X += 128u; + if (PRIM->FST) { + GL_INS("First vertex is P: %d => %d T: %d => %d", v[0].XYZ.X, v[1].XYZ.X, v[0].U, v[1].U); - if (ps_sel.read_ba) - v[i].U -= 128u; - else - v[i+1].U += 128u; + for(size_t i = 0; i < count; i += 2) { + if (write_ba) + v[i].XYZ.X -= 128u; + else + v[i+1].XYZ.X += 128u; - // Height is too big (2x). - int tex_offset = v[i].V & 0xF; - GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset); + if (ps_sel.read_ba) + v[i].U -= 128u; + else + v[i+1].U += 128u; - GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i+1].XYZ.Y, v[i+1].V); - tmp = GSVector4i(tmp - offset).srl32(1) + offset; + // Height is too big (2x). + int tex_offset = v[i].V & 0xF; + GSVector4i offset(o.OFY, tex_offset, o.OFY, tex_offset); - v[i].XYZ.Y = tmp.x; - v[i].V = tmp.y; - v[i+1].XYZ.Y = tmp.z; - v[i+1].V = tmp.w; + GSVector4i tmp(v[i].XYZ.Y, v[i].V, v[i+1].XYZ.Y, v[i+1].V); + tmp = GSVector4i(tmp - offset).srl32(1) + offset; + + v[i].XYZ.Y = tmp.x; + v[i].V = tmp.y; + v[i+1].XYZ.Y = tmp.z; + v[i+1].V = tmp.w; + } + } else { + const float offset_8pix = 8.0f / tw; + GL_INS("First vertex is P: %d => %d T: %f => %f (offset %f)", v[0].XYZ.X, v[1].XYZ.X, v[0].ST.S, v[1].ST.S, offset_8pix); + + for(size_t i = 0; i < count; i += 2) { + if (write_ba) + v[i].XYZ.X -= 128u; + else + v[i+1].XYZ.X += 128u; + + if (ps_sel.read_ba) + v[i].ST.S -= offset_8pix; + else + v[i+1].ST.S += offset_8pix; + + // Height is too big (2x). + GSVector4i offset(o.OFY, o.OFY); + + GSVector4i tmp(v[i].XYZ.Y, v[i+1].XYZ.Y); + tmp = GSVector4i(tmp - offset).srl32(1) + offset; + + //fprintf(stderr, "Before %d, After %d\n", v[i+1].XYZ.Y, tmp.y); + v[i].XYZ.Y = tmp.x; + v[i].ST.T /= 2.0f; + v[i+1].XYZ.Y = tmp.y; + v[i+1].ST.T /= 2.0f; + } } // Please bang my head against the wall! From d46e41533d1e850c21b9975cf963d990738f5958 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Tue, 30 Jun 2015 10:11:13 +0200 Subject: [PATCH 45/50] gsdx: remove various CRC of the openGL level It is mostly the texture shuffle effect which is now emulated correctly on openGL They remain in the Dx level as speedhack in particular for * Castlevania; * GodOfWar; * StarOcean3; * ValkyrieProfile2; * RadiataStories; Remove old commented hack: Drakengard2, ArctheLad, ReZ Separate hacks that are only enabled in aggressive mode --- plugins/GSdx/GSCrc.cpp | 9 -- plugins/GSdx/GSCrc.h | 3 - plugins/GSdx/GSState.cpp | 294 ++++++++++++++++++--------------------- 3 files changed, 138 insertions(+), 168 deletions(-) diff --git a/plugins/GSdx/GSCrc.cpp b/plugins/GSdx/GSCrc.cpp index 64fd873341..25cf0697db 100644 --- a/plugins/GSdx/GSCrc.cpp +++ b/plugins/GSdx/GSCrc.cpp @@ -173,9 +173,6 @@ CRC::Game CRC::m_games[] = {0xA32F7CD0, AceCombat4, US, 0}, {0x5ED8FB53, AceCombat4, JP, 0}, {0x1B9B7563, AceCombat4, EU, 0}, - {0xEC432B24, Drakengard2, EU, 0}, - {0x1648E3C9, Drakengard2, US, 0}, - {0xB7ADB13A, Drakengard2, CH, 0}, {0xFC46EA61, Tekken5, JP, 0}, {0x1F88EE37, Tekken5, EU, 0}, {0x1F88BECD, Tekken5, EU, 0}, //language selector... @@ -296,9 +293,6 @@ CRC::Game CRC::m_games[] = {0x90F0D852, BigMuthaTruckers, US, 0}, {0x5CC9BF81, TimeSplitters2, EU, 0}, {0x12532F1C, TimeSplitters2, US, 0}, - {0xA33748AA, ReZ, US, 0}, - {0xAE1152EB, ReZ, EU, 0}, - {0xD2EA890A, ReZ, JP, 0}, {0xC818BEC2, LordOfTheRingsTwoTowers, US, 0}, {0xDC43F2B8, LordOfTheRingsTwoTowers, EU, 0}, {0x9ABF90FB, LordOfTheRingsTwoTowers, ES, 0}, @@ -354,9 +348,6 @@ CRC::Game CRC::m_games[] = // DMC(1)? {0x79B8A95F, DevilMayCry3, US, 0}, {0x7F3D692D, DevilMayCry3, CH, 0}, // {0x1A85E924, DevilMayCry3, CH, 0}, // same CRC as {GodOfWar, NoRegion} - {0x0a8ef911, ArctheLad, US, 0}, // cutie comment - {0x2C5E7DEA, ArctheLad, CH, 0}, - {0xE69E7F58, ArctheLad, US, 0}, // cutie comment {0xB1995E29, ShadowofRome, EU, 0}, // cutie comment {0x958DCA28, ShadowofRome, EU, 0}, {0x57818AF6, ShadowofRome, US, 0}, diff --git a/plugins/GSdx/GSCrc.h b/plugins/GSdx/GSCrc.h index 498a90b9dd..7b7e43aed4 100644 --- a/plugins/GSdx/GSCrc.h +++ b/plugins/GSdx/GSCrc.h @@ -61,7 +61,6 @@ public: ResidentEvil4, Spartan, AceCombat4, - Drakengard2, Tekken5, IkkiTousen, GodOfWar, @@ -97,11 +96,9 @@ public: Sly3, Sly2, ShadowofRome, - ArctheLad, DemonStone, BigMuthaTruckers, TimeSplitters2, - ReZ, LordOfTheRingsTwoTowers, LordOfTheRingsThirdAge, RedDeadRevolver, diff --git a/plugins/GSdx/GSState.cpp b/plugins/GSdx/GSState.cpp index b3ff6783f6..df608141ab 100644 --- a/plugins/GSdx/GSState.cpp +++ b/plugins/GSdx/GSState.cpp @@ -3045,6 +3045,7 @@ bool GSC_Okami(const GSFrameInfo& fi, int& skip) bool GSC_MetalGearSolid3(const GSFrameInfo& fi, int& skip) { + // Game requires sub RT support (texture cache limitation) if(skip == 0) { if(fi.TME && fi.FBP == 0x02000 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01000) && fi.TPSM == PSM_PSMCT24) @@ -3117,7 +3118,11 @@ bool GSC_DBZBT3(const GSFrameInfo& fi, int& skip) } else if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x00e00 || fi.FBP == 0x01000) && fi.FPSM == PSM_PSMCT16 && fi.TPSM == PSM_PSMZ16) { - skip = 5; + // Texture shuffling must work on openGL + if (Dx_only) + skip = 5; + else + return false; } else if(fi.TME && fi.FPSM == fi.TPSM && fi.TBP0 == 0x03f00 && fi.TPSM == PSM_PSMCT32) { @@ -3278,6 +3283,7 @@ bool GSC_ICO(const GSFrameInfo& fi, int& skip) bool GSC_GT4(const GSFrameInfo& fi, int& skip) { + // Game requires to extract source from RT (block boundary) (texture cache limitation) if(skip == 0) { if(fi.TME && fi.FBP >= 0x02f00 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01180 /*|| fi.TBP0 == 0x01a40*/) && fi.TPSM == PSM_PSMT8) //TBP0 0x1a40 progressive @@ -3300,6 +3306,7 @@ bool GSC_GT4(const GSFrameInfo& fi, int& skip) bool GSC_GT3(const GSFrameInfo& fi, int& skip) { + // Same issue as GSC_GT4 ??? if(skip == 0) { if(fi.TME && fi.FBP >= 0x02de0 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01180) && fi.TPSM == PSM_PSMT8) @@ -3313,6 +3320,7 @@ bool GSC_GT3(const GSFrameInfo& fi, int& skip) bool GSC_GTConcept(const GSFrameInfo& fi, int& skip) { + // Same issue as GSC_GT4 ??? if(skip == 0) { if(fi.TME && fi.FBP >= 0x03420 && fi.FPSM == PSM_PSMCT32 && (fi.TBP0 == 0x00000 || fi.TBP0 == 0x01400) && fi.TPSM == PSM_PSMT8) @@ -3414,7 +3422,7 @@ bool GSC_ResidentEvil4(const GSFrameInfo& fi, int& skip) { skip = 176; } - else if(fi.TME && fi.FBP ==0x03100 && (fi.TBP0==0x2a00 ||fi.TBP0==0x3480) && fi.TPSM ==0 && fi.FBMSK == 0) + else if(fi.TME && fi.FBP ==0x03100 && (fi.TBP0==0x2a00 ||fi.TBP0==0x3480) && fi.TPSM == PSM_PSMCT32 && fi.FBMSK == 0) { skip = 1; } @@ -3428,7 +3436,7 @@ bool GSC_SacredBlaze(const GSFrameInfo& fi, int& skip) //Fix Sacred Blaze rendering glitches if(skip == 0) { - if(fi.TME && (fi.FBP==0x0000 || fi.FBP==0x0e00) && (fi.TBP0==0x2880 || fi.TBP0==0x2a80 ) && fi.FPSM==fi.TPSM && fi.TPSM == PSM_PSMCT32 && fi.TPSM ==0 && fi.FBMSK == 0x0) + if(fi.TME && (fi.FBP==0x0000 || fi.FBP==0x0e00) && (fi.TBP0==0x2880 || fi.TBP0==0x2a80 ) && fi.FPSM==fi.TPSM && fi.TPSM == PSM_PSMCT32 && fi.FBMSK == 0x0) { skip = 1; } @@ -3504,25 +3512,6 @@ bool GSC_AceCombat4(const GSFrameInfo& fi, int& skip) return true; } -bool GSC_Drakengard2(const GSFrameInfo& fi, int& skip) -{ - // Below hack breaks the GUI - - /*if(skip == 0) - { - if(g_crc_region == CRC::CH && fi.TME && fi.FBP == 0x026c0 && fi.TBP0 == 0x00a00 && fi.FPSM ==2) - { - skip =34; - } - if((g_crc_region == CRC::US || g_crc_region == CRC::EU) && fi.TME && fi.FBP == 0x026c0 && fi.FPSM == PSM_PSMCT32 && fi.TBP0 == 0x00a00 && fi.TPSM == PSM_PSMCT32) - { - skip = 64; - } - }*/ - - return true; -} - bool GSC_Tekken5(const GSFrameInfo& fi, int& skip) { if(skip == 0) @@ -3749,7 +3738,11 @@ bool GSC_Genji(const GSFrameInfo& fi, int& skip) { if(fi.TME && fi.FBP == 0x01500 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x00e00 && fi.TPSM == PSM_PSMZ16) { - skip = 6; + // likely fixed in openGL (texture shuffle) + if (Dx_only) + skip = 6; + else + return false; } else if(fi.TPSM == PSM_PSMCT24 && fi.TME ==0x0001 && fi.TBP0==fi.FBP) { @@ -3837,7 +3830,7 @@ bool GSC_RadiataStories(const GSFrameInfo& fi, int& skip) { skip = 1; } - else if(Dx_only && fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT4HH) + else if(fi.TME && fi.FBP == fi.TBP0 && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMT4HH) { // GH: Hack is quite similar to GSC_StarOcean3. It is potentially the same issue. // Fixed on openGL @@ -3861,7 +3854,10 @@ bool GSC_HauntingGround(const GSFrameInfo& fi, int& skip) { if(fi.TME && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT16S && fi.FBMSK == 0x03FFF) { - skip = 1; + if (Dx_only) + skip = 1; + else + return false; } else if(fi.TME && fi.FBP == 0x3000 && fi.TBP0 == 0x3380) { @@ -3994,6 +3990,7 @@ bool GSC_EternalPoison(const GSFrameInfo& fi, int& skip) { if(skip == 0) { + // Texture shuffle ??? if(fi.TPSM == PSM_PSMCT16S && fi.TBP0 == 0x3200) { skip = 1; @@ -4178,18 +4175,6 @@ bool GSC_FFX(const GSFrameInfo& fi, int& skip) return true; } -bool GSC_ArctheLad(const GSFrameInfo& fi, int& skip) -{ - if(skip == 0) - { - /*if(fi.TPSM == PSM_PSMT8H && fi.FBMSK >= 0xFFFFFFF) - { - skip = 1; - }*/ - } - return true; -} - bool GSC_DemonStone(const GSFrameInfo& fi, int& skip) { if(skip == 0) @@ -4236,20 +4221,6 @@ bool GSC_TimeSplitters2(const GSFrameInfo& fi, int& skip) return true; } -bool GSC_ReZ(const GSFrameInfo& fi, int& skip) -{ - //not needed anymore - /*if(skip == 0) - { - if(fi.TME && (fi.FBP == 0x00000 || fi.FBP == 0x008c0 || fi.FBP == 0x00a00) && fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT32) - { - skip = 1; - } - }*/ - - return true; -} - bool GSC_LordOfTheRingsTwoTowers(const GSFrameInfo& fi, int& skip) { if(skip == 0) @@ -4384,6 +4355,7 @@ bool GSC_Black(const GSFrameInfo& fi, int& skip) { if(skip == 0) { + // Note: the first part of the hack must be fixed in openGL (texture shuffle). Remains the 2nd part (HasSharedBits) if(fi.TME /*&& (fi.FBP == 0x00000 || fi.FBP == 0x008c0)*/ && fi.FPSM == PSM_PSMCT16 && (fi.TBP0 == 0x01a40 || fi.TBP0 == 0x01b80 || fi.TBP0 == 0x030c0) && fi.TPSM == PSM_PSMZ16 || (GSUtil::HasSharedBits(fi.FBP, fi.FPSM, fi.TBP0, fi.TPSM))) { skip = 5; @@ -4516,7 +4488,7 @@ bool GSC_DevilMayCry3(const GSFrameInfo& fi, int& skip) if(skip == 0) { - if(fi.TME && fi.FBP == 0x01800 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x01000 && fi.TPSM == PSM_PSMZ16) + if(Dx_only && fi.TME && fi.FBP == 0x01800 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x01000 && fi.TPSM == PSM_PSMZ16) { skip = 32; } @@ -4588,7 +4560,7 @@ bool GSC_BlackHawkDown(const GSFrameInfo& fi, int& skip) { if(skip == 0) { - if(fi.TME && fi.FBP == 0x00800 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x01800 && fi.TPSM == PSM_PSMZ16) + if(Dx_only && fi.TME && fi.FBP == 0x00800 && fi.FPSM == PSM_PSMCT16 && fi.TBP0 == 0x01800 && fi.TPSM == PSM_PSMZ16) { skip = 2; //wall of fog } @@ -4611,6 +4583,8 @@ bool GSC_Burnout(const GSFrameInfo& fi, int& skip) } else if(fi.TME && fi.FPSM == PSM_PSMCT16 && fi.TPSM == PSM_PSMZ16) //fog { + if (!Dx_only) return false; + if(fi.FBP == 0x00a00 && fi.TBP0 == 0x01e00) { skip = 4; //pal @@ -4834,6 +4808,8 @@ bool GSC_ZettaiZetsumeiToshi2(const GSFrameInfo& fi, int& skip) } else if((fi.FBP | fi.TBP0)&& fi.FPSM == fi.TPSM && fi.TPSM == PSM_PSMCT16 && fi.FBMSK == 0x3FFF) { + // Note start of the effect (texture shuffle) is fixed in openGL but maybe not the extra draw + // call.... skip = 1000; } @@ -5430,131 +5406,137 @@ bool GSState::IsBadFrame(int& skip, int UserHacks_SkipDraw) memset(map, 0, sizeof(map)); if (s_crc_hack_level > 1) { - map[CRC::Okami] = GSC_Okami; - map[CRC::MetalGearSolid3] = GSC_MetalGearSolid3; + map[CRC::AceCombat4] = GSC_AceCombat4; + map[CRC::AlpineRacer3] = GSC_AlpineRacer3; + map[CRC::Black] = GSC_Black; + map[CRC::BlackHawkDown] = GSC_BlackHawkDown; + map[CRC::BleachBladeBattlers] = GSC_BleachBladeBattlers; + map[CRC::BullyCC] = GSC_BullyCC; // Bully is fixed, maybe this one too? + map[CRC::BurnoutDominator] = GSC_Burnout; + map[CRC::BurnoutRevenge] = GSC_Burnout; + map[CRC::BurnoutTakedown] = GSC_Burnout; + map[CRC::CaptainTsubasa] = GSC_CaptainTsubasa; + map[CRC::CrashBandicootWoC] = GSC_CrashBandicootWoC; + map[CRC::CrashNburn] = GSC_CrashNburn; map[CRC::DBZBT2] = GSC_DBZBT2; map[CRC::DBZBT3] = GSC_DBZBT3; - map[CRC::SFEX3] = GSC_SFEX3; - map[CRC::Bully] = GSC_Bully; - map[CRC::BullyCC] = GSC_BullyCC; - map[CRC::SoTC] = GSC_SoTC; - map[CRC::OnePieceGrandAdventure] = GSC_OnePieceGrandAdventure; - map[CRC::OnePieceGrandBattle] = GSC_OnePieceGrandBattle; - map[CRC::ICO] = GSC_ICO; - map[CRC::GT4] = GSC_GT4; - map[CRC::GT3] = GSC_GT3; - map[CRC::GTConcept] = GSC_GTConcept; - map[CRC::WildArms4] = GSC_WildArms4; - map[CRC::WildArms5] = GSC_WildArms5; - map[CRC::Manhunt2] = GSC_Manhunt2; - map[CRC::CrashBandicootWoC] = GSC_CrashBandicootWoC; - map[CRC::ResidentEvil4] = GSC_ResidentEvil4; - map[CRC::Spartan] = GSC_Spartan; - map[CRC::AceCombat4] = GSC_AceCombat4; - map[CRC::Drakengard2] = GSC_Drakengard2; - map[CRC::Tekken5] = GSC_Tekken5; - map[CRC::IkkiTousen] = GSC_IkkiTousen; - map[CRC::GodOfWar] = GSC_GodOfWar; - map[CRC::GodOfWar2] = GSC_GodOfWar2; - map[CRC::GiTS] = GSC_GiTS; - map[CRC::Onimusha3] = GSC_Onimusha3; - map[CRC::TalesOfAbyss] = GSC_TalesOfAbyss; - map[CRC::SonicUnleashed] = GSC_SonicUnleashed; - map[CRC::SimpsonsGame] = GSC_SimpsonsGame; - map[CRC::Genji] = GSC_Genji; - map[CRC::RadiataStories] = GSC_RadiataStories; - map[CRC::HauntingGround] = GSC_HauntingGround; + map[CRC::DeathByDegreesTekkenNinaWilliams] = GSC_DeathByDegreesTekkenNinaWilliams; + map[CRC::DevilMayCry3] = GSC_DevilMayCry3; + map[CRC::EternalPoison] = GSC_EternalPoison; map[CRC::EvangelionJo] = GSC_EvangelionJo; - map[CRC::SuikodenTactics] = GSC_SuikodenTactics; - map[CRC::CaptainTsubasa] = GSC_CaptainTsubasa; - map[CRC::Oneechanbara2Special] = GSC_Oneechanbara2Special; + map[CRC::FFVIIDoC] = GSC_FFVIIDoC; + map[CRC::FightingBeautyWulong] = GSC_FightingBeautyWulong; + map[CRC::FinalFightStreetwise] = GSC_FinalFightStreetwise; + map[CRC::FrontMission5] = GSC_FrontMission5; + map[CRC::Genji] = GSC_Genji; + map[CRC::GetaWayBlackMonday] = GSC_GetaWay; + map[CRC::GetaWay] = GSC_GetaWay; + map[CRC::GodHand] = GSC_GodHand; + map[CRC::GodOfWar2] = GSC_GodOfWar2; + map[CRC::GT3] = GSC_GT3; + map[CRC::GT4] = GSC_GT4; + map[CRC::GTASanAndreas] = GSC_GTASanAndreas; + map[CRC::GTConcept] = GSC_GTConcept; + map[CRC::HauntingGround] = GSC_HauntingGround; + map[CRC::HeavyMetalThunder] = GSC_HeavyMetalThunder; + map[CRC::HummerBadlands] = GSC_HummerBadlands; + map[CRC::ICO] = GSC_ICO; + map[CRC::IkkiTousen] = GSC_IkkiTousen; + map[CRC::JamesBondEverythingOrNothing] = GSC_JamesBondEverythingOrNothing; + map[CRC::KnightsOfTheTemple2] = GSC_KnightsOfTheTemple2; + map[CRC::Kunoichi] = GSC_Kunoichi; + map[CRC::LordOfTheRingsThirdAge] = GSC_LordOfTheRingsThirdAge; + map[CRC::Manhunt2] = GSC_Manhunt2; + map[CRC::MetalGearSolid3] = GSC_MetalGearSolid3; + map[CRC::MidnightClub3] = GSC_MidnightClub3; + map[CRC::NanoBreaker] = GSC_NanoBreaker; map[CRC::NarutimateAccel] = GSC_NarutimateAccel; map[CRC::Naruto] = GSC_Naruto; - map[CRC::EternalPoison] = GSC_EternalPoison; - map[CRC::LegoBatman] = GSC_LegoBatman; - map[CRC::SakuraTaisen] = GSC_SakuraTaisen; - map[CRC::TenchuWoH] = GSC_Tenchu; - map[CRC::TenchuFS] = GSC_Tenchu; - map[CRC::Sly3] = GSC_Sly3; - map[CRC::Sly2] = GSC_Sly2; - map[CRC::ShadowofRome] = GSC_ShadowofRome; - map[CRC::FFXII] = GSC_FFXII; - map[CRC::FFX2] = GSC_FFX2; - map[CRC::FFX] = GSC_FFX; - map[CRC::ArctheLad] = GSC_ArctheLad; - map[CRC::DemonStone] = GSC_DemonStone; - map[CRC::BigMuthaTruckers] = GSC_BigMuthaTruckers; - map[CRC::TimeSplitters2] = GSC_TimeSplitters2; - map[CRC::ReZ] = GSC_ReZ; - map[CRC::LordOfTheRingsTwoTowers] = GSC_LordOfTheRingsTwoTowers; - map[CRC::LordOfTheRingsThirdAge] = GSC_LordOfTheRingsThirdAge; + map[CRC::Oneechanbara2Special] = GSC_Oneechanbara2Special; + map[CRC::Onimusha3] = GSC_Onimusha3; map[CRC::RedDeadRevolver] = GSC_RedDeadRevolver; - map[CRC::HeavyMetalThunder] = GSC_HeavyMetalThunder; - map[CRC::BleachBladeBattlers] = GSC_BleachBladeBattlers; - map[CRC::CrashNburn] = GSC_CrashNburn; - map[CRC::TombRaiderUnderworld] = GSC_TombRaiderUnderWorld; - map[CRC::TombRaiderAnniversary] = GSC_TombRaider; - map[CRC::TombRaiderLegend] = GSC_TombRaiderLegend; - map[CRC::SSX3] = GSC_SSX3; - map[CRC::Black] = GSC_Black; - map[CRC::FFVIIDoC] = GSC_FFVIIDoC; - map[CRC::StarWarsForceUnleashed] = GSC_StarWarsForceUnleashed; - map[CRC::StarWarsBattlefront] = GSC_StarWarsBattlefront; - map[CRC::StarWarsBattlefront2] = GSC_StarWarsBattlefront2; - map[CRC::BlackHawkDown] = GSC_BlackHawkDown; - map[CRC::DevilMayCry3] = GSC_DevilMayCry3; - map[CRC::BurnoutTakedown] = GSC_Burnout; - map[CRC::BurnoutRevenge] = GSC_Burnout; - map[CRC::BurnoutDominator] = GSC_Burnout; - map[CRC::MidnightClub3] = GSC_MidnightClub3; - map[CRC::SpyroNewBeginning] = GSC_SpyroNewBeginning; - map[CRC::SpyroEternalNight] = GSC_SpyroEternalNight; - map[CRC::TalesOfLegendia] = GSC_TalesOfLegendia; - map[CRC::NanoBreaker] = GSC_NanoBreaker; - map[CRC::Kunoichi] = GSC_Kunoichi; - map[CRC::Yakuza] = GSC_Yakuza; - map[CRC::Yakuza2] = GSC_Yakuza2; - map[CRC::SkyGunner] = GSC_SkyGunner; - map[CRC::JamesBondEverythingOrNothing] = GSC_JamesBondEverythingOrNothing; - map[CRC::ZettaiZetsumeiToshi2] = GSC_ZettaiZetsumeiToshi2; - map[CRC::ShinOnimusha] = GSC_ShinOnimusha; - map[CRC::XE3] = GSC_XE3; - map[CRC::GetaWay] = GSC_GetaWay; - map[CRC::GetaWayBlackMonday] = GSC_GetaWay; + map[CRC::ResidentEvil4] = GSC_ResidentEvil4; + map[CRC::SacredBlaze] = GSC_SacredBlaze; + map[CRC::SakuraTaisen] = GSC_SakuraTaisen; map[CRC::SakuraWarsSoLongMyLove] = GSC_SakuraWarsSoLongMyLove; - map[CRC::FightingBeautyWulong] = GSC_FightingBeautyWulong; - map[CRC::TouristTrophy] = GSC_TouristTrophy; - map[CRC::GTASanAndreas] = GSC_GTASanAndreas; - map[CRC::FrontMission5] = GSC_FrontMission5; - map[CRC::GodHand] = GSC_GodHand; - map[CRC::KnightsOfTheTemple2] = GSC_KnightsOfTheTemple2; - map[CRC::UltramanFightingEvolution] = GSC_UltramanFightingEvolution; - map[CRC::DeathByDegreesTekkenNinaWilliams] = GSC_DeathByDegreesTekkenNinaWilliams; - map[CRC::AlpineRacer3] = GSC_AlpineRacer3; - map[CRC::HummerBadlands] = GSC_HummerBadlands; map[CRC::SengokuBasara] = GSC_SengokuBasara; - map[CRC::Grandia3] = GSC_Grandia3; - map[CRC::FinalFightStreetwise] = GSC_FinalFightStreetwise; - map[CRC::TalesofSymphonia] = GSC_TalesofSymphonia; + map[CRC::ShadowofRome] = GSC_ShadowofRome; + map[CRC::ShinOnimusha] = GSC_ShinOnimusha; + map[CRC::Simple2000Vol114] = GSC_Simple2000Vol114; + map[CRC::SkyGunner] = GSC_SkyGunner; map[CRC::SoulCalibur2] = GSC_SoulCalibur2; map[CRC::SoulCalibur3] = GSC_SoulCalibur3; - map[CRC::Simple2000Vol114] = GSC_Simple2000Vol114; - map[CRC::UrbanReign] = GSC_UrbanReign; + map[CRC::Spartan] = GSC_Spartan; + map[CRC::StarWarsBattlefront2] = GSC_StarWarsBattlefront2; + map[CRC::StarWarsBattlefront] = GSC_StarWarsBattlefront; + map[CRC::StarWarsForceUnleashed] = GSC_StarWarsForceUnleashed; map[CRC::SteambotChronicles] = GSC_SteambotChronicles; - map[CRC::SacredBlaze] = GSC_SacredBlaze; - map[CRC::SMTNocturne] = GSC_SMTNocturneDDS<0x2054E870>; + map[CRC::SuikodenTactics] = GSC_SuikodenTactics; + map[CRC::TalesOfAbyss] = GSC_TalesOfAbyss; + map[CRC::TalesOfLegendia] = GSC_TalesOfLegendia; + map[CRC::TalesofSymphonia] = GSC_TalesofSymphonia; + map[CRC::Tekken5] = GSC_Tekken5; + map[CRC::TimeSplitters2] = GSC_TimeSplitters2; + map[CRC::TombRaiderAnniversary] = GSC_TombRaider; + map[CRC::TombRaiderLegend] = GSC_TombRaiderLegend; + map[CRC::TombRaiderUnderworld] = GSC_TombRaiderUnderWorld; + map[CRC::TouristTrophy] = GSC_TouristTrophy; + map[CRC::UltramanFightingEvolution] = GSC_UltramanFightingEvolution; + map[CRC::UrbanReign] = GSC_UrbanReign; + map[CRC::WildArms4] = GSC_WildArms4; + map[CRC::WildArms5] = GSC_WildArms5; + map[CRC::XE3] = GSC_XE3; + map[CRC::Yakuza2] = GSC_Yakuza2; + map[CRC::Yakuza] = GSC_Yakuza; + map[CRC::ZettaiZetsumeiToshi2] = GSC_ZettaiZetsumeiToshi2; + // Only Aggresive + map[CRC::FFX2] = GSC_FFX2; + map[CRC::FFX] = GSC_FFX; + map[CRC::FFXII] = GSC_FFXII; map[CRC::SMTDDS1] = GSC_SMTNocturneDDS<0x203BA820>; map[CRC::SMTDDS2] = GSC_SMTNocturneDDS<0x20435BF0>; + map[CRC::SMTNocturne] = GSC_SMTNocturneDDS<0x2054E870>; + map[CRC::SoTC] = GSC_SoTC; + map[CRC::SSX3] = GSC_SSX3; } // Hack that were fixed on openGL if (Dx_only) { - // This one requires accurate_colclip + map[CRC::Bully] = GSC_Bully; + map[CRC::LordOfTheRingsTwoTowers] = GSC_LordOfTheRingsTwoTowers; + map[CRC::Okami] = GSC_Okami; + map[CRC::SimpsonsGame] = GSC_SimpsonsGame; + + // Not tested but must be fixed with texture shuffle + map[CRC::BigMuthaTruckers] = GSC_BigMuthaTruckers; + map[CRC::DemonStone] = GSC_DemonStone; + map[CRC::GiTS] = GSC_GiTS; + map[CRC::LegoBatman] = GSC_LegoBatman; + map[CRC::OnePieceGrandAdventure] = GSC_OnePieceGrandAdventure; + map[CRC::OnePieceGrandBattle] = GSC_OnePieceGrandBattle; + map[CRC::SFEX3] = GSC_SFEX3; + map[CRC::SpyroEternalNight] = GSC_SpyroEternalNight; + map[CRC::SpyroNewBeginning] = GSC_SpyroNewBeginning; + map[CRC::SonicUnleashed] = GSC_SonicUnleashed; + map[CRC::TenchuFS] = GSC_Tenchu; + map[CRC::TenchuWoH] = GSC_Tenchu; + + // Those games might requires accurate fbmask + map[CRC::Sly2] = GSC_Sly2; + map[CRC::Sly3] = GSC_Sly3; + + // Those games require accurate_colclip (perf) map[CRC::CastlevaniaCoD] = GSC_Castlevania; map[CRC::CastlevaniaLoI] = GSC_Castlevania; + map[CRC::GodOfWar] = GSC_GodOfWar; + // Those games emulate a stencil buffer with the alpha channel of the RT (Slow) + map[CRC::RadiataStories] = GSC_RadiataStories; map[CRC::StarOcean3] = GSC_StarOcean3; map[CRC::ValkyrieProfile2] = GSC_ValkyrieProfile2; + + // Deprecated hack could be removed (Cutie) + map[CRC::Grandia3] = GSC_Grandia3; } } From a9f49ab9abcc456e1a59a78bf5815da479c57e25 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Tue, 30 Jun 2015 22:12:00 +0200 Subject: [PATCH 46/50] glsl: fix rounding error for shadow computation Better shadow on Castlevania/Nemo (others :) ) --- plugins/GSdx/res/glsl/tfx_fs.glsl | 7 +++++-- plugins/GSdx/res/glsl_source.h | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/plugins/GSdx/res/glsl/tfx_fs.glsl b/plugins/GSdx/res/glsl/tfx_fs.glsl index c3337d4802..dd24980255 100644 --- a/plugins/GSdx/res/glsl/tfx_fs.glsl +++ b/plugins/GSdx/res/glsl/tfx_fs.glsl @@ -661,14 +661,17 @@ void ps_blend(inout vec4 c, in float As) c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f)); #endif + // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy + // GS: Color = 1, Alpha = 255 => output 1 + // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875 #if PS_DFMT == FMT_16 // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania // Basically we want to do 'c.rgb &= 0xF8' in denormalized mode - c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xF8)) / 255.0f; + c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xF8)) / 255.0f; #elif PS_COLCLIP == 3 // Basically we want to do 'c.rgb &= 0xFF' in denormalized mode - c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f; + c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xFF)) / 255.0f; #endif // Don't compile => unable to find compatible overloaded function "mod(vec3)" diff --git a/plugins/GSdx/res/glsl_source.h b/plugins/GSdx/res/glsl_source.h index aac59407a5..c2e7ecea07 100644 --- a/plugins/GSdx/res/glsl_source.h +++ b/plugins/GSdx/res/glsl_source.h @@ -1540,14 +1540,17 @@ static const char* tfx_fs_all_glsl = " c.rgb = clamp(c.rgb, vec3(0.0f), vec3(1.0f));\n" "#endif\n" "\n" + " // Warning: normally blending equation is mult(A, B) = A * B >> 7. GPU have the full accuracy\n" + " // GS: Color = 1, Alpha = 255 => output 1\n" + " // GPU: Color = 1/255, Alpha = 255/255 * 255/128 => output 1.9921875\n" "#if PS_DFMT == FMT_16\n" " // In 16 bits format, only 5 bits of colors are used. It impacts shadows computation of Castlevania\n" "\n" " // Basically we want to do 'c.rgb &= 0xF8' in denormalized mode\n" - " c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xF8)) / 255.0f;\n" + " c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xF8)) / 255.0f;\n" "#elif PS_COLCLIP == 3\n" " // Basically we want to do 'c.rgb &= 0xFF' in denormalized mode\n" - " c.rgb = vec3(uvec3((c.rgb * 255.0f) + 256.5f) & uvec3(0xFF)) / 255.0f;\n" + " c.rgb = vec3(uvec3(c.rgb * 255.0f) & uvec3(0xFF)) / 255.0f;\n" "#endif\n" "\n" " // Don't compile => unable to find compatible overloaded function \"mod(vec3)\"\n" From 50c9988018eda01c95f045dcc8cda818d3637455 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Tue, 30 Jun 2015 23:04:54 +0200 Subject: [PATCH 47/50] gsdx-ogl: disable all accurate options when not supported + No Intel support on windows --- plugins/GSdx/GLLoader.cpp | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/plugins/GSdx/GLLoader.cpp b/plugins/GSdx/GLLoader.cpp index 5ec466c622..36d56a7dbc 100644 --- a/plugins/GSdx/GLLoader.cpp +++ b/plugins/GSdx/GLLoader.cpp @@ -391,8 +391,7 @@ namespace GLLoader { } const char* vendor = (const char*)glGetString(GL_VENDOR); - fprintf(stderr, "Supported Opengl version: %s on GPU: %s. Vendor: %s\n", s, glGetString(GL_RENDERER), vendor); - fprintf(stderr, "Note: the maximum version supported by GSdx is 3.3 (even if you driver supports more)!\n"); + fprintf(stderr, "OpenGL information. GPU: %s. Vendor: %s\n", glGetString(GL_RENDERER), vendor); // Name change but driver is still bad! if (strstr(vendor, "ATI") || strstr(vendor, "Advanced Micro Devices")) @@ -401,10 +400,14 @@ namespace GLLoader { nvidia_buggy_driver = true; if (strstr(vendor, "Intel")) intel_buggy_driver = true; - if (strstr(vendor, "X.Org") || strstr(vendor, "nouveau")) // Note: it might actually catch nouveau too, but bug are likely to be the same anyway + if (strstr(vendor, "X.Org") || strstr(vendor, "nouveau")) // Note: it might actually catch nouveau too, but bugs are likely to be the same anyway mesa_amd_buggy_driver = true; if (strstr(vendor, "VMware")) // Assume worst case because I don't know the real status mesa_amd_buggy_driver = intel_buggy_driver = true; +#ifdef _WINDOWS + if (intel_buggy_driver) + return false; // too much buggy no need to check anything. +#endif GLuint dot = 0; while (s[dot] != '\0' && s[dot] != '.') dot++; @@ -422,7 +425,7 @@ namespace GLLoader { fprintf(stderr, "Overriding geometry shaders detection\n"); } if ( (major_gl < major) || ( major_gl == major && minor_gl < minor ) ) { - fprintf(stderr, "OpenGL %d.%d is not supported\n", major, minor); + fprintf(stderr, "OpenGL %d.%d is not supported. Only OpenGL %d.%d\n was found", major, minor, major_gl, minor_gl); return false; } @@ -517,10 +520,10 @@ namespace GLLoader { } if (!found_GL_ARB_texture_barrier) { - if (theApp.GetConfig("accurate_blend", 1)) { - fprintf(stderr, "Error GL_ARB_texture_barrier is not supported by your driver so you can't enable accurate_blend! Sorry.\n"); - theApp.SetConfig("accurate_blend", 0); - } + fprintf(stderr, "Error GL_ARB_texture_barrier is not supported by your driver. Accurate options will be disabled! Sorry!\n"); + theApp.SetConfig("accurate_blend", 0); + theApp.SetConfig("accurate_colclip", 0); + theApp.SetConfig("accurate_fbmask", 0); } fprintf(stderr, "\n"); From 76f96ddbb64972d97bc44692af70236c154bc6f2 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Tue, 30 Jun 2015 23:08:31 +0200 Subject: [PATCH 48/50] gsdx-tooltip: MSAA is not implemented on OpenGL --- plugins/GSdx/GSSetting.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/plugins/GSdx/GSSetting.cpp b/plugins/GSdx/GSSetting.cpp index 361bdd4141..e14fdd006d 100644 --- a/plugins/GSdx/GSSetting.cpp +++ b/plugins/GSdx/GSSetting.cpp @@ -69,7 +69,8 @@ const char* dialog_message(int ID, bool* updateText) { case IDC_MSAACB: case IDC_STATIC_MSAA: return "Multisample Anti-Aliasing\n\nEnables hardware Anti-Aliasing. Needs lots of memory." - " The Z-24 modes might need to have LogarithmicZ to compensate for the bits lost (only in DX9 mode)."; + " The Z-24 modes might need to have LogarithmicZ to compensate for the bits lost (only in DX9 mode).\n\n" + " MSAA is not implemented on the OpenGL renderer"; case IDC_AGGRESSIVECRC: return "Use more aggressive CRC hacks on some games\n\n" "Only affects few games, removing some effects which might make the image sharper/clearer.\n" @@ -107,21 +108,21 @@ const char* dialog_message(int ID, bool* updateText) { #ifdef __linux__ case IDC_PALTEX: return "When checked 4/8 bits texture will be send to the GPU with a palette. GPU will be in charge of the conversion. " - "(Note it was never tested on openGL)\n\n" + "(Note it was never tested on OpenGL)\n\n" "When uncheked the CPU will convert directly the texture to 32 bits\n\n" "It is a basically a trade-off between GPU/CPU"; case IDC_ACCURATE_DATE: return "Implement a more accurate algorithm to compute GS destination alpha testing.\n\n" - "It could be slower when the effects are used.\n\nNote: it requires the 4.2 openGL extension GL_ARB_shader_image_load_store"; + "It could be slower when the effects are used.\n\nNote: it requires the 4.2 OpenGL extension GL_ARB_shader_image_load_store"; case IDC_ACCURATE_BLEND: return "Allow to solve the impossible blending error message.\n\n" - "It could be slower when the effect are used.\n\nNote: it requires the 4.5 openGL extension GL_ARB_texture_barrier"; + "It could be slower when the effect are used.\n\nNote: it requires the 4.5 OpenGL extension GL_ARB_texture_barrier"; case IDC_ACCURATE_COLCLIP: return "Implement the wrapping of color after an overflow\n\n" - "It will be slow (half speed) when the effect are used!\n\nNote: it requires the 4.5 openGL extension GL_ARB_texture_barrier"; + "It will be slow (half speed) when the effect are used!\n\nNote: it requires the 4.5 OpenGL extension GL_ARB_texture_barrier"; case IDC_ACCURATE_FBMASK: return "Implement partial color masking\n\n" - "No status yet on the speed impact\n\nNote: it requires the 4.5 openGL extension GL_ARB_texture_barrier"; + "No status yet on the speed impact\n\nNote: it requires the 4.5 OpenGL extension GL_ARB_texture_barrier"; case IDC_TC_DEPTH: return "Allow to convert Depth buffer from/to Color buffer. It is used for blur & depth of field effects"; #endif From 074881228d6c5ee03b2c413cbbbe08e9d742cb9f Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Tue, 30 Jun 2015 23:21:31 +0200 Subject: [PATCH 49/50] gsdx:comment: small explanation of the half offset hack --- plugins/GSdx/GSTextureCache.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 2892364aba..81fa59f42a 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -941,6 +941,11 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con // // FIXME: The scaling will create a bad offset. For example if texture coordinate start at 0.5 (pixel 0) // At 2x it will become 0.5/128 * 256 = 1 (pixel 1) + // I think it is the purpose of the UserHacks_HalfPixelOffset below. However implementation is less + // than ideal. + // 1/ It suppose games have an half pixel offset on texture coordinate which could be wrong + // 2/ It doesn't support rescaling of the RT (tw = 1024) + // Maybe it will be more easy to just round the UV value in the Vertex Shader if (!is_8bits) { // 8 bits handling is special due to unscaling. It is better to not execute this code From 831c24de51e75c6029aaa29ab9bee5eec8bbf23c Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Wed, 1 Jul 2015 09:28:32 +0200 Subject: [PATCH 50/50] gsdx-dx: only enable new code when CRC is below FULL level Code is not ready. It allow to test the new code without a massive regression --- plugins/GSdx/GSRendererDX.cpp | 9 ++++----- plugins/GSdx/GSTextureCache.cpp | 16 +++++++++++----- plugins/GSdx/GSTextureCache.h | 1 + 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/plugins/GSdx/GSRendererDX.cpp b/plugins/GSdx/GSRendererDX.cpp index a1d976289a..abdb5e8d8f 100644 --- a/plugins/GSdx/GSRendererDX.cpp +++ b/plugins/GSdx/GSRendererDX.cpp @@ -225,7 +225,9 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc GSDeviceDX::PSSamplerSelector ps_ssel; GSDeviceDX::PSConstantBuffer ps_cb; - if (m_texture_shuffle) { + // Gregory: code is not yet ready so let's only enable it when + // CRC is below the FULL level + if (m_texture_shuffle && (m_crc_hack_level < 3)) { ps_sel.shuffle = 1; ps_sel.fmt = 0; @@ -373,10 +375,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc ps_sel.wmt = context->CLAMP.WMT; if (ps_sel.shuffle) { ps_sel.fmt = 0; - - } - else - { + } else { ps_sel.fmt = tex->m_palette ? cpsm.fmt | 4 : cpsm.fmt; } ps_sel.aem = env.TEXA.AEM; diff --git a/plugins/GSdx/GSTextureCache.cpp b/plugins/GSdx/GSTextureCache.cpp index 81fa59f42a..ae9f8cc8b7 100644 --- a/plugins/GSdx/GSTextureCache.cpp +++ b/plugins/GSdx/GSTextureCache.cpp @@ -26,12 +26,12 @@ GSTextureCache::GSTextureCache(GSRenderer* r) : m_renderer(r) { m_spritehack = !!theApp.GetConfig("UserHacks", 0) ? theApp.GetConfig("UserHacks_SpriteHack", 0) : 0; - m_preload_frame = theApp.GetConfig("preload_frame_with_gs_data", 0); - UserHacks_HalfPixelOffset = !!theApp.GetConfig("UserHacks", 0) && !!theApp.GetConfig("UserHacks_HalfPixelOffset", 0); - m_paltex = !!theApp.GetConfig("paltex", 0); - m_can_convert_depth = theApp.GetConfig("Renderer", 12) == 12 ? theApp.GetConfig("texture_cache_depth", 1) : 0; + m_paltex = !!theApp.GetConfig("paltex", 0); + m_preload_frame = theApp.GetConfig("preload_frame_with_gs_data", 0); + m_can_convert_depth = IsOpenGL() ? theApp.GetConfig("texture_cache_depth", 1) : 0; + m_crc_hack_level = theApp.GetConfig("crc_hack_level", 3); m_temp = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32); } @@ -148,7 +148,13 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con // 1/ it just works :) // 2/ even with upscaling // 3/ for both DX and OpenGL - Read(t, t->m_valid); + + // Gregory: to avoid a massive slow down for nothing, let's only enable + // this code when CRC is below the FULL level + if (m_crc_hack_level < 3) + Read(t, t->m_valid); + else + dst = t; } else { dst = t; } diff --git a/plugins/GSdx/GSTextureCache.h b/plugins/GSdx/GSTextureCache.h index 748f0205c5..6314955d1b 100644 --- a/plugins/GSdx/GSTextureCache.h +++ b/plugins/GSdx/GSTextureCache.h @@ -115,6 +115,7 @@ protected: bool m_preload_frame; uint8* m_temp; bool m_can_convert_depth; + int m_crc_hack_level; virtual Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL, bool half_right = false); virtual Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type);