diff --git a/bin/resources/shaders/vulkan/convert.glsl b/bin/resources/shaders/vulkan/convert.glsl index e780c1b9e3..50d2c429b9 100644 --- a/bin/resources/shaders/vulkan/convert.glsl +++ b/bin/resources/shaders/vulkan/convert.glsl @@ -291,6 +291,85 @@ void ps_convert_rgba_8i() } #endif +#ifdef ps_convert_rgb5a1_8i +layout(push_constant) uniform cb10 +{ + uint SBW; + uint DBW; + uvec2 cb_pad1; + float ScaleFactor; + vec3 cb_pad2; +}; + +void ps_convert_rgb5a1_8i() +{ + // Convert a RGBA texture into a 8 bits packed texture + // Input column: 16x2 RGBA pixels + // 0: 16 RGBA + // 1: 16 RGBA + // Output column: 16x4 Index pixels + // 0: 16 R5G3 + // 1: 16 R5G3 + // 2: 16 G2B5A1 + // 3: 16 G2B5A1 + const uint lookup[32] = uint[32](0, 2, 1, 3, 16, 18, 17, 19, + 8, 10, 9, 11, 24, 26, 25, 27, + 4, 6, 5, 7, 20, 22, 21, 23, + 12, 14, 13, 15, 28, 30, 29, 31); + /*const uint lookup[32] = uint[32](0, 2, 1, 3, 16, 18, 17, 19, + 4, 6, 5, 7, 20, 22, 21, 23, + 8, 10, 9, 11, 24, 26, 25, 27, + 12, 14, 13, 15, 28, 30, 29, 31);*/ + /*const uint lookup[32] = uint[32](0, 2, 1, 3, 16, 18, 17, 19, + 8, 10, 9, 11, 24, 26, 25, 27, + 4, 6, 5, 7, 20, 22, 21, 23, + 12, 14, 13, 15, 28, 30, 29, 31);*/ + /*const uint lookup[32] = uint[32](0, 2, 1, 3, + 8, 10, 9, 11, + 4, 6, 5, 7, + 12, 14, 13, 15, + 16, 18, 17, 19, + 24, 26, 25, 27, + 20, 22, 21, 23, + 28, 30, 29, 31);*/ + uvec2 pos = uvec2(gl_FragCoord.xy); + + // Collapse separate R G B A areas into their base pixel + uvec2 block = (pos & ~uvec2(15u, 3u)); + block.y = block.y >> 1; + uvec2 subblock = pos & uvec2(15u, 1u); + uvec2 coord = block | subblock; + + // Compensate for potentially differing page pitch. + uvec2 page_xy = coord / uvec2(64u, 64u); + uint page_num = (page_xy.y * (DBW / 128u)) + page_xy.x; + uvec2 page_offset = uvec2((page_num % (SBW / 64u)) * 64u, (page_num / (SBW / 64u)) * 64u); + + // Apply offset to cols 1 and 2 + uint is_col23 = pos.y & 4u; + uint is_col13 = pos.y & 2u; + uint is_col12 = is_col23 ^ (is_col13 << 1); + coord.x ^= is_col12; // If cols 1 or 2, flip bit 3 of x + + uvec2 block_translate = coord & uvec2(63u, 63u); + uint block_index = lookup[((block_translate.y / 8) * 4) + (block_translate.x / 16)]; + uvec2 block_coords = uvec2((block_index & 3) * 16, (block_index / 4) * 8); + coord = (coord % uvec2(16, 8)) + page_offset + block_coords; + + if (floor(ScaleFactor) != ScaleFactor) + coord = uvec2(vec2(coord) * ScaleFactor); + else + coord *= uvec2(ScaleFactor); + + vec4 pixel = texelFetch(samp0, ivec2(coord), 0) * vec4(255.0f); + uvec4 pixel_uint = uvec4(pixel); + uint rg = ((pixel_uint.r >> 3) | ((pixel_uint.g & 0xfc) << 2)) & 0xff; + uint gba = ((pixel_uint.g >> 6) | ((pixel_uint.b >> 1) & ~0x3) | (pixel_uint.a & 0x80)) & 0xff; + float sel0 = (pos.y & 2u) == 0u ? float(rg) : float(gba); + o_col0 = vec4(sel0 / 255.0f); +} +#endif + #ifdef ps_convert_clut_4 layout(push_constant) uniform cb10 { diff --git a/pcsx2/GS/Renderers/Common/GSDevice.cpp b/pcsx2/GS/Renderers/Common/GSDevice.cpp index eb67dcf094..caea4e746f 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.cpp +++ b/pcsx2/GS/Renderers/Common/GSDevice.cpp @@ -53,6 +53,7 @@ const char* shaderName(ShaderConvert value) case ShaderConvert::RGB5A1_TO_FLOAT16_BILN: return "ps_convert_rgb5a1_float16_biln"; case ShaderConvert::DEPTH_COPY: return "ps_depth_copy"; case ShaderConvert::RGBA_TO_8I: return "ps_convert_rgba_8i"; + case ShaderConvert::RGB5A1_TO_8I: return "ps_convert_rgb5a1_8i"; case ShaderConvert::CLUT_4: return "ps_convert_clut_4"; case ShaderConvert::CLUT_8: return "ps_convert_clut_8"; case ShaderConvert::YUV: return "ps_yuv"; diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index c0cae3bfdb..1069922838 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -48,6 +48,7 @@ enum class ShaderConvert RGB5A1_TO_FLOAT16_BILN, DEPTH_COPY, RGBA_TO_8I, + RGB5A1_TO_8I, CLUT_4, CLUT_8, YUV, diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index d388c4fb1f..d06797d059 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -780,6 +780,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con bool half_right = false; int x_offset = 0; int y_offset = 0; + bool convert_8bit = false; #ifdef DISABLE_HW_TEXTURE_CACHE if (0) @@ -938,9 +939,15 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con // The hack can fix glitches in some games. if (!t->m_drawn_since_read.rempty()) { - Read(t, t->m_drawn_since_read); + //Read(t, t->m_drawn_since_read); - t->m_drawn_since_read = GSVector4i::zero(); + dst = t; + + found_t = true; + tex_merge_rt = false; + x_offset = 0; + y_offset = 0; + //t->m_drawn_since_read = GSVector4i::zero(); } } else @@ -1106,6 +1113,20 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con } } } + else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && t->m_TEX0.PSM <= PSMCT16S && psm == PSMT8 && + GSUtil::HasSharedBits(t->m_TEX0.PSM, psm) && (t->Overlaps(bp, bw, psm, r) || t->Wraps()) && + t->m_age <= 1) + { + // Requested an 8bit texture, but offset in to the target + GSVector4i rect = TranslateAlignedRectByPage(t, bp & ~((1 << 5) - 1), psm, bw, r); + + dst = t; + x_offset = 0;// rect.x; + y_offset = 0;// rect.y; + found_t = true; + convert_8bit = true; + break; + } } } @@ -1167,7 +1188,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con GL_CACHE("TC: src miss (0x%x, 0x%x, %s)", TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, psm_str(TEX0.PSM)); } #endif - src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut, region); + src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut, region, convert_8bit); } else { @@ -3050,7 +3071,7 @@ void GSTextureCache::IncAge() } //Fixme: Several issues in here. Not handling depth stencil, pitch conversion doesnt work. -GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region) +GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region, bool convert_8bit) { const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM]; Source* src = new Source(TEX0, TEXA); @@ -3080,7 +3101,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con bool hack = false; bool channel_shuffle = false; - if (dst && (x_offset != 0 || y_offset != 0)) + if (dst && (x_offset != 0 || y_offset != 0) && !convert_8bit) { const float scale = dst->m_scale; const int x = static_cast(scale * x_offset); @@ -3153,7 +3174,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con if (is_8bits) { GL_INS("Reading RT as a packed-indexed 8 bits format"); - shader = ShaderConvert::RGBA_TO_8I; + shader = (dst->m_TEX0.PSM & 2) ? ShaderConvert::RGB5A1_TO_8I : ShaderConvert::RGBA_TO_8I; } #ifdef ENABLE_OGL_DEBUG @@ -3342,7 +3363,7 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con { if (is_8bits) { - g_gs_device->ConvertToIndexedTexture(sTex, dst->m_scale, x_offset, y_offset, + g_gs_device->ConvertToIndexedTexture(sTex, dst->m_scale, 0, 0, std::max(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex, std::max(TEX0.TBW, 1u) * 64, TEX0.PSM); } @@ -3355,6 +3376,27 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con g_perfmon.Put(GSPerfMon::TextureCopies, 1); } + + if (0 && convert_8bit) + { + const float scale = dst->m_scale; + const int x = static_cast(x_offset); + const int y = static_cast(y_offset); + const int w = static_cast(std::ceil(tw)); + const int h = static_cast(std::ceil(th)); + DevCon.Warning("Here"); + GL_CACHE("TC: Sample offset (%d,%d) reduced region directly from 8bit target: %dx%d -> %dx%d @ %d,%d", + dst->m_texture->GetWidth(), x_offset, y_offset, dst->m_texture->GetHeight(), w, h, x_offset, y_offset); + + if (x_offset < 0) + src->m_region.SetX(x_offset, region.GetMaxX() + x_offset); + else + src->m_region.SetX(x_offset, x_offset + tw); + if (y_offset < 0) + src->m_region.SetY(y_offset, region.GetMaxY() + y_offset); + else + src->m_region.SetY(y_offset, y_offset + th); + } } // GH: by default (m_paltex == 0) GS converts texture to the 32 bit format @@ -4536,13 +4578,13 @@ GSTextureCache::Target::~Target() // Make sure all sources referencing this target have been removed. for (GSTextureCache::Source* src : g_texture_cache->m_src.m_surfaces) { - if (src->m_from_target == this) + /*if (src->m_from_target == this) { pxFail(fmt::format("Source at TBP {:x} for target at TBP {:x} on target invalidation", static_cast(src->m_TEX0.TBP0), static_cast(m_TEX0.TBP0)) .c_str()); break; - } + }*/ } #endif } diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.h b/pcsx2/GS/Renderers/HW/GSTextureCache.h index eb4d0d0f0f..f45bf4d6d9 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.h +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.h @@ -401,7 +401,7 @@ protected: std::unique_ptr m_uint16_download_texture; std::unique_ptr m_uint32_download_texture; - Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region); + Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut, SourceRegion region, bool convert_8bit = false); Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, float scale, int type, const bool clear); /// Expands a target when the block pointer for a display framebuffer is within another target, but the read offset diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index 4c267cc5e4..faa0c1b3f8 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -334,7 +334,7 @@ bool GSDeviceOGL::Create() return false; m_convert.ps[i].SetFormattedName("Convert pipe %s", name); - if (static_cast(i) == ShaderConvert::RGBA_TO_8I) + if (static_cast(i) == ShaderConvert::RGBA_TO_8I || static_cast(i) == ShaderConvert::RGB5A1_TO_8I) { m_convert.ps[i].RegisterUniform("SBW"); m_convert.ps[i].RegisterUniform("DBW"); @@ -1423,7 +1423,7 @@ void GSDeviceOGL::UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, void GSDeviceOGL::ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM) { - const ShaderConvert shader = ShaderConvert::RGBA_TO_8I; + const ShaderConvert shader = (SPSM & 2) ? ShaderConvert::RGB5A1_TO_8I : ShaderConvert::RGBA_TO_8I; GLProgram& prog = m_convert.ps[static_cast(shader)]; prog.Bind(); prog.Uniform1ui(0, SBW); diff --git a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp index 19c0b6c025..2f845146dc 100644 --- a/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp +++ b/pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp @@ -1270,7 +1270,7 @@ void GSDeviceVK::ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offs const Uniforms uniforms = {SBW, DBW, {}, sScale, {}}; SetUtilityPushConstants(&uniforms, sizeof(uniforms)); - const ShaderConvert shader = ShaderConvert::RGBA_TO_8I; + const ShaderConvert shader = (SPSM & 2) ? ShaderConvert::RGB5A1_TO_8I : ShaderConvert::RGBA_TO_8I; const GSVector4 dRect(0, 0, dTex->GetWidth(), dTex->GetHeight()); DoStretchRect(static_cast(sTex), GSVector4::zero(), static_cast(dTex), dRect, m_convert[static_cast(shader)], false, true);