From f6f1761733cdb5454e6c47fea5dc8066ae9e09a6 Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Mon, 6 Jan 2025 17:34:30 +0000 Subject: [PATCH] GS/HW: Fixes to texture is target offsets --- bin/resources/shaders/dx11/tfx.fx | 4 +- bin/resources/shaders/opengl/tfx_fs.glsl | 4 +- bin/resources/shaders/vulkan/tfx.glsl | 4 +- pcsx2/GS/GSState.cpp | 5 +- pcsx2/GS/Renderers/HW/GSHwHack.cpp | 6 +- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 135 +++++++++++++++-------- pcsx2/GS/Renderers/HW/GSTextureCache.cpp | 58 +++++++--- pcsx2/GS/Renderers/Metal/tfx.metal | 4 +- 8 files changed, 146 insertions(+), 74 deletions(-) diff --git a/bin/resources/shaders/dx11/tfx.fx b/bin/resources/shaders/dx11/tfx.fx index 5d6b76d7d4..1c57eb9440 100644 --- a/bin/resources/shaders/dx11/tfx.fx +++ b/bin/resources/shaders/dx11/tfx.fx @@ -1123,8 +1123,8 @@ PS_OUTPUT ps_main(PS_INPUT input) { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.b = C.r; - C.a = C.g; + C.br = C.rb; + C.ag = C.ga; } else if(PS_PROCESS_BA & SHUFFLE_READ) { diff --git a/bin/resources/shaders/opengl/tfx_fs.glsl b/bin/resources/shaders/opengl/tfx_fs.glsl index 66bdfa340a..c5a312bf74 100644 --- a/bin/resources/shaders/opengl/tfx_fs.glsl +++ b/bin/resources/shaders/opengl/tfx_fs.glsl @@ -1086,8 +1086,8 @@ void ps_main() C.ga = vec2(float((denorm_c.g >> 6) | ((denorm_c.b >> 3) << 2) | (denorm_TA.x & 0x80u))); #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.b = C.r; - C.a = C.g; + C.br = C.rb; + C.ag = C.ga; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/bin/resources/shaders/vulkan/tfx.glsl b/bin/resources/shaders/vulkan/tfx.glsl index 1ecf891e18..812c2fe565 100644 --- a/bin/resources/shaders/vulkan/tfx.glsl +++ b/bin/resources/shaders/vulkan/tfx.glsl @@ -1350,8 +1350,8 @@ void main() // Write RB part. Mask will take care of the correct destination #elif PS_SHUFFLE_ACROSS #if(PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) - C.b = C.r; - C.a = C.g; + C.br = C.rb; + C.ag = C.ga; #elif(PS_PROCESS_BA & SHUFFLE_READ) C.rb = C.bb; C.ga = C.aa; diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index a095abaff0..18787838d1 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -467,7 +467,8 @@ void GSState::DumpVertices(const std::string& filename) file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.R) << DEL; file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.G) << DEL; file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.B) << DEL; - file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.A); + file << std::setfill('0') << std::setw(3) << unsigned(v.RGBAQ.A) << DEL; + file << "FOG: " << std::setfill('0') << std::setw(3) << unsigned(v.FOG); file << std::endl; } @@ -3100,7 +3101,7 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim) { // Pretty confident here... GSVertex* buffer = &m_vertex.buff[0]; - const bool const_spacing = (buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == (m_v.U - m_v.XYZ.X); + const bool const_spacing = std::abs(buffer[m_index.buff[0]].U - buffer[m_index.buff[0]].XYZ.X) == std::abs(m_v.U - m_v.XYZ.X) && std::abs(buffer[m_index.buff[1]].XYZ.X - buffer[m_index.buff[0]].XYZ.X) < 64; if (const_spacing) return false; diff --git a/pcsx2/GS/Renderers/HW/GSHwHack.cpp b/pcsx2/GS/Renderers/HW/GSHwHack.cpp index 324f8e6449..7eb88167ee 100644 --- a/pcsx2/GS/Renderers/HW/GSHwHack.cpp +++ b/pcsx2/GS/Renderers/HW/GSHwHack.cpp @@ -1047,7 +1047,7 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, // compute shadow in RG, // save result in alpha with a TS, // Restore RG channel that we previously copied to render shadows. - + // Important note: The game downsizes the target to half height, then later expands it back up to full size, that's why PCSX2 doesn't like it, we don't support that behaviour. const GIFRegTEX0& Texture = RTEX0; GIFRegTEX0 Frame = {}; @@ -1058,9 +1058,9 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds, if ((!rt) || (!RPRIM->TME) || (GSLocalMemory::m_psm[Texture.PSM].bpp != 16) || (GSLocalMemory::m_psm[Frame.PSM].bpp != 16) || (Texture.TBP0 == Frame.TBP0) || (Frame.TBW != 16 && Texture.TBW != 16)) return true; - GL_INS("OI_SonicUnleashed replace draw by a copy"); + GL_INS("OI_SonicUnleashed replace draw by a copy draw %d", r.s_n); - GSTextureCache::Target* src = g_texture_cache->LookupTarget(Texture, GSVector2i(1, 1), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget); + GSTextureCache::Target* src = g_texture_cache->LookupTarget(Texture, GSVector2i(1, 1), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget, true, 0, false, false, true, true, GSVector4i::zero(), true); if (!src) return true; diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index f341430407..8090cfdacb 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -346,7 +346,7 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, tex_pos &= 0xFF; shuffle_across = (((tex_pos + 8) >> 4) ^ ((pos + 8) >> 4)) & 0x8; - const bool full_width = !shuffle_across && (((second_vert.XYZ.X + 9) - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8; + const bool full_width = ((second_vert.XYZ.X - first_vert.XYZ.X) >> 4) >= 16 && m_r.width() > 8 && tex && tex->m_from_target && rt == tex->m_from_target; process_ba = ((pos > 112 && pos < 136) || full_width) ? SHUFFLE_WRITE : 0; process_rg = (!process_ba || full_width) ? SHUFFLE_WRITE : 0; // "same group" means it can read blue and write alpha using C32 tricks @@ -733,10 +733,25 @@ void GSRendererHW::ConvertSpriteTextureShuffle(u32& process_rg, u32& process_ba, m_vt.m_max.p.y = floor(m_vt.m_max.p.y + 1.9f) / 2.0f; } - m_context->scissor.in.x = m_vt.m_min.p.x; - m_context->scissor.in.z = m_vt.m_max.p.x + 0.9f; - m_context->scissor.in.y = m_vt.m_min.p.y; - m_context->scissor.in.w = m_vt.m_max.p.y + 0.9f; + if (m_context->scissor.in.x & 8) + { + m_context->scissor.in.x &= ~0xf;//m_vt.m_min.p.x; + + if (half_right_vert) + m_context->scissor.in.x /= 2; + } + if (m_context->scissor.in.z & 8) + { + m_context->scissor.in.z += 8; //m_vt.m_min.p.x; + + if (half_right_vert) + m_context->scissor.in.z /= 2; + } + if (half_bottom_vert) + { + m_context->scissor.in.y /= 2; + m_context->scissor.in.w /= 2; + } // Only do this is the source is being interpreted as 16bit if (half_bottom_uv) @@ -2570,7 +2585,26 @@ void GSRendererHW::Draw() bool shuffle_target = false; if (!no_rt && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp >= 16) { - if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0) + if (!shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16) + { + const GSVertex* v = &m_vertex.buff[0]; + + const int first_x = std::abs(static_cast(((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8))) >> 4; + const int first_u = PRIM->FST ? ((v[0].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q))); + const int second_u = PRIM->FST ? ((v[1].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.6f); + // offset coordinates swap around RG/BA. (Ace Combat) + const u32 minv = m_cached_ctx.CLAMP.MINV; + const u32 minu = m_cached_ctx.CLAMP.MINU; + const bool rgba_shuffle = ((m_cached_ctx.CLAMP.WMS == m_cached_ctx.CLAMP.WMT && m_cached_ctx.CLAMP.WMS == CLAMP_REGION_REPEAT) && (minu && minv)); + const bool shuffle_coords = ((first_x ^ first_u) & 8) || rgba_shuffle; + // Round up half of second coord, it can sometimes be slightly under. + const int draw_width = std::abs(v[1].XYZ.X + 9 - v[0].XYZ.X) >> 4; + const int read_width = std::abs(second_u - first_u); + + shuffle_target = shuffle_coords && (draw_width & 7) == 0 && std::abs(draw_width - read_width) <= 1; + } + + if (m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0 || !shuffle_target) { // FBW is going to be wrong for channel shuffling into a new target, so take it from the source. FRAME_TEX0.U64 = 0; @@ -2586,24 +2620,6 @@ void GSRendererHW::Draw() tgt = nullptr; } - if (!shuffle_target && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16) - { - const GSVertex* v = &m_vertex.buff[0]; - - const int first_x = ((v[0].XYZ.X - m_context->XYOFFSET.OFX) + 8) >> 4; - const int first_u = PRIM->FST ? ((v[0].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[0].ST.S / v[1].RGBAQ.Q))); - const int second_u = PRIM->FST ? ((v[1].U + 9) >> 4) : static_cast(((1 << m_cached_ctx.TEX0.TW) * (v[1].ST.S / v[1].RGBAQ.Q)) + 0.6f); - // offset coordinates swap around RG/BA. (Ace Combat) - const u32 minv = m_cached_ctx.CLAMP.MINV; - const u32 minu = m_cached_ctx.CLAMP.MINU; - const bool rgba_shuffle = ((m_cached_ctx.CLAMP.WMS == m_cached_ctx.CLAMP.WMT && m_cached_ctx.CLAMP.WMS == CLAMP_REGION_REPEAT) && (minu && minv)); - const bool shuffle_coords = ((first_x ^ first_u) & 8) || rgba_shuffle; - // Round up half of second coord, it can sometimes be slightly under. - const int draw_width = std::abs(v[1].XYZ.X + 9 - v[0].XYZ.X) >> 4; - const int read_width = std::abs(second_u - first_u); - - shuffle_target = shuffle_coords && (draw_width & 7) == 0 && std::abs(draw_width - read_width) <= 1; - } } possible_shuffle = !no_rt && (((shuffle_target /*&& GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16*/) /*|| (m_cached_ctx.FRAME.Block() == m_cached_ctx.TEX0.TBP0 && ((m_cached_ctx.TEX0.PSM & 0x6) || m_cached_ctx.FRAME.PSM != m_cached_ctx.TEX0.PSM))*/) || IsPossibleChannelShuffle()); @@ -2611,7 +2627,7 @@ void GSRendererHW::Draw() const u32 color_mask = (m_vt.m_max.c > GSVector4i::zero()).mask(); const bool texture_function_color = m_cached_ctx.TEX0.TFX == TFX_DECAL || (color_mask & 0xFFF) || (m_cached_ctx.TEX0.TFX > TFX_DECAL && (color_mask & 0xF000)); const bool texture_function_alpha = m_cached_ctx.TEX0.TFX != TFX_MODULATE || (color_mask & 0xF000); - const bool req_color = texture_function_color && (!PRIM->ABE || (PRIM->ABE && IsUsingCsInBlend())) && (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0x00FFFFFF)) != (fm_mask & 0x00FFFFFF)) || need_aem_color; + const bool req_color = (texture_function_color && (!PRIM->ABE || (PRIM->ABE && IsUsingCsInBlend())) && (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0x00FFFFFF)) != (fm_mask & 0x00FFFFFF))) || need_aem_color; const bool alpha_used = (GSUtil::GetChannelMask(m_context->TEX0.PSM) == 0x8 || (m_context->TEX0.TCC && texture_function_alpha)) && ((PRIM->ABE && IsUsingAsInBlend()) || (m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.ATST > ATST_ALWAYS) || (possible_shuffle || (m_cached_ctx.FRAME.FBMSK & (fm_mask & 0xFF000000)) != (fm_mask & 0xFF000000))); const bool req_alpha = (GSUtil::GetChannelMask(m_context->TEX0.PSM) & 0x8) && alpha_used; @@ -2830,6 +2846,7 @@ void GSRendererHW::Draw() if (!possible_shuffle && m_split_texture_shuffle_pages == 0) m_r = m_r.rintersect(t_size_rect); + GSVector4i lookup_rect = unclamped_draw_rect; // Do the lookup with the real format on a shuffle, if possible. if (possible_shuffle && GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory ::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) { @@ -2843,6 +2860,22 @@ void GSRendererHW::Draw() FRAME_TEX0.PSM = next_ctx.TEX0.PSM; else FRAME_TEX0.PSM = PSMCT32; // Guess full color if no upcoming hint, it'll fix itself later. + + // This is just for overlap detection, it doesn't matter which direction we do this in + if (GSLocalMemory::m_psm[FRAME_TEX0.PSM].bpp == 32) + { + // Shuffling with a double width (Sonic Unleashed for example which does a wierd shuffle/not shuffle green backup/restore). + if (src && std::abs((lookup_rect.width() / 2) - src->m_from_target->m_unscaled_size.x) <= 8) + { + lookup_rect.x /= 2; + lookup_rect.z /= 2; + } + else + { + lookup_rect.y /= 2; + lookup_rect.w /= 2; + } + } } // Normally we would use 1024 here to match the clear above, but The Godfather does a 1023x1023 draw instead @@ -2856,7 +2889,7 @@ void GSRendererHW::Draw() const bool preserve_downscale_draw = scale_draw < 0 || (scale_draw == 0 && ((src && src->m_from_target && src->m_from_target->m_downscaled) || is_possible_mem_clear == ClearType::ClearWithDraw)); rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, ((src && src->m_scale != 1) && GSConfig.UserHacks_NativeScaling == GSNativeScaling::Normal && !possible_shuffle) ? GetTextureScaleFactor() : target_scale, GSTextureCache::RenderTarget, true, - fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), + fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, lookup_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), GSConfig.UserHacks_NativeScaling != GSNativeScaling::Off && preserve_downscale_draw && is_possible_mem_clear != ClearType::NormalClear, src, (no_ds || !ds) ? -1 : (m_cached_ctx.ZBUF.Block() - ds->m_TEX0.TBP0)); // Draw skipped because it was a clear and there was no target. @@ -2895,7 +2928,7 @@ void GSRendererHW::Draw() else if (rt->m_TEX0.TBP0 != m_cached_ctx.FRAME.Block()) { int vertical_offset = ((static_cast(m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0) / 32) / std::max(static_cast(rt->m_TEX0.TBW), 1)) * frame_psm.pgs.y; // I know I could just not shift it.. - + int texture_offset = 0; const int horizontal_offset = ((static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) / 32) % static_cast(std::max(rt->m_TEX0.TBW, 1U))) * frame_psm.pgs.x; // Used to reduce the offset made later in channel shuffles m_target_offset = std::abs(static_cast((m_cached_ctx.FRAME.Block() - rt->m_TEX0.TBP0)) >> 5); @@ -2906,6 +2939,7 @@ void GSRendererHW::Draw() GSVector2i new_scaled_size = rt->m_unscaled_size * rt->m_scale; // Make sure to use the original format for the offset. int new_offset = std::abs((vertical_offset / frame_psm.pgs.y) * GSLocalMemory::m_psm[rt->m_TEX0.PSM].pgs.y); + texture_offset = new_offset; new_scaled_size.y += new_offset * rt->m_scale; GSTexture* tex = g_gs_device->CreateRenderTarget(new_scaled_size.x, new_scaled_size.y, GSTexture::Format::Color, true); @@ -2916,18 +2950,13 @@ void GSRendererHW::Draw() g_gs_device->StretchRect(rt->m_texture, GSVector4(0,0,1,1), tex, GSVector4(dRect), ShaderConvert::COPY, false); - if (src && src->m_from_target && src->m_from_target == rt) + if (src && src->m_from_target && src->m_from_target == rt && src->m_target_direct) { - src->m_texture = rt->m_texture; - src->m_target_direct = false; - src->m_shared_texture = false; - } - else - { - //m_target_memory_usage -= dst->m_texture->GetMemUsage(); - g_gs_device->Recycle(rt->m_texture); + src->m_texture = tex; } + g_gs_device->Recycle(rt->m_texture); + rt->m_valid.y += new_offset; rt->m_valid.w += new_offset; rt->m_drawn_since_read.y += new_offset; @@ -2958,8 +2987,26 @@ void GSRendererHW::Draw() for (u32 i = 0; i < m_vertex.tail; i++) { - v[i].XYZ.Y += vertical_offset << 4; v[i].XYZ.X += horizontal_offset << 4; + v[i].XYZ.Y += vertical_offset << 4; + } + + if (texture_offset && src && src->m_from_target && src->m_target_direct && src->m_from_target == rt) + { + GSVector4i src_region = src->GetRegionRect(); + + if (src_region.rempty()) + { + src_region = GSVector4i::loadh(rt->m_unscaled_size); + src_region.y += texture_offset; + } + else + { + src_region.y += texture_offset; + src_region.w += texture_offset; + } + src->m_region.SetX(src_region.x, src_region.z); + src->m_region.SetY(src_region.y, src_region.w); } m_context->scissor.in.x += horizontal_offset; @@ -3004,6 +3051,7 @@ void GSRendererHW::Draw() src->m_texture = rt->m_texture; src->m_scale = rt->GetScale(); src->m_unscaled_size = rt->m_unscaled_size; + } target_scale = rt->GetScale(); @@ -3414,7 +3462,7 @@ void GSRendererHW::Draw() GL_INS("Resize RT from %dx%d to %dx%d", rt->GetUnscaledWidth(), rt->GetUnscaledHeight(), new_w, new_h); // May not be needed/could cause problems with garbage loaded from GS memory - if (preserve_rt_color) + /*if (preserve_rt_color) { RGBAMask mask; mask._u32 = 0xF; @@ -3430,7 +3478,7 @@ void GSRendererHW::Draw() GSVector4i height_dirty_rect = GSVector4i(0, rt->m_unscaled_size.y, new_w, new_h); g_texture_cache->AddDirtyRectTarget(rt, height_dirty_rect, rt->m_TEX0.PSM, rt->m_TEX0.TBW, mask); } - } + }*/ rt->ResizeTexture(new_w, new_h); @@ -3485,10 +3533,11 @@ void GSRendererHW::Draw() const bool new_rect = ds->m_valid.rempty(); const bool new_height = new_h > ds->GetUnscaledHeight(); const int old_height = ds->m_texture->GetHeight(); - const GSVector4i old_rect = ds->GetUnscaledRect(); + pxAssert(ds->GetScale() == target_scale); if (ds->GetUnscaledWidth() != new_w || ds->GetUnscaledHeight() != new_h) GL_INS("Resize DS from %dx%d to %dx%d", ds->GetUnscaledWidth(), ds->GetUnscaledHeight(), new_w, new_h); + ds->ResizeTexture(new_w, new_h); if (!m_texture_shuffle && !m_channel_shuffle) @@ -7821,10 +7870,10 @@ ClearType GSRendererHW::IsConstantDirectWriteMemClear() && !(m_draw_env->SCANMSK.MSK & 2) && !m_cached_ctx.TEST.ATE // no alpha test && !m_cached_ctx.TEST.DATE // no destination alpha test && (!m_cached_ctx.TEST.ZTE || m_cached_ctx.TEST.ZTST == ZTST_ALWAYS) // no depth test - && (m_vt.m_eq.rgba == 0xFFFF || m_vertex.next == 2) // constant color write - && !(PRIM->ABE && m_context->ALPHA.IsCdInBlend())) // Not keeping dest color + && (m_vt.m_eq.rgba == 0xFFFF || m_vertex.next == 2) + && (!PRIM->FGE || m_vt.m_max.p.w == 0.0f)) // constant color write { - if (PRIM->ABE && !m_context->ALPHA.IsOpaque() || m_cached_ctx.FRAME.FBMSK) + if ((PRIM->ABE && !m_context->ALPHA.IsOpaque()) || m_cached_ctx.FRAME.FBMSK) return ClearWithDraw; return NormalClear; diff --git a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp index 1e452cdb58..ecdc1d8e7b 100644 --- a/pcsx2/GS/Renderers/HW/GSTextureCache.cpp +++ b/pcsx2/GS/Renderers/HW/GSTextureCache.cpp @@ -236,7 +236,7 @@ bool GSTextureCache::CanTranslate(u32 bp, u32 bw, u32 spsm, GSVector4i r, u32 db // The page width matches. // The rect width is less than the width of the destination texture and the height is less than or equal to 1 page high. // The rect width and height is equal to the page size and it covers the width of the incoming bw, so lines are sequential. - const bool page_aligned_rect = masked_rect.eq(r); + const bool page_aligned_rect = masked_rect.xyxy().eq(r.xyxy()); const bool width_match = ((bw * 64) / src_page_size.x) == ((dbw * 64) / dst_page_size.x); const bool sequential_pages = page_aligned_rect && r.x == 0 && r.z == src_pixel_width; const bool single_row = (((bw * 64) / src_page_size.x) <= ((dbw * 64) / dst_page_size.x)) && r.z <= src_pixel_width && r.w <= src_page_size.y; @@ -277,12 +277,12 @@ GSVector4i GSTextureCache::TranslateAlignedRectByPage(u32 tbp, u32 tebp, u32 tbw in_rect = (in_rect + GSVector4i(inc_horizontal_offset, 0).xyxy()).max_i32(GSVector4i(0)); // Project Snowblind and Tomb Raider access the rect offset by 1 page and use a region to correct it, we need to account for that here. - if (in_rect.x >= (dst_pgw * dst_page_size.x)) + if (in_rect.x >= (src_pgw * src_page_size.x)) { - in_rect.z -= dst_pgw * dst_page_size.x; - in_rect.x -= dst_pgw * dst_page_size.x; - in_rect.y += dst_page_size.y; - in_rect.w += dst_page_size.y; + in_rect.z -= src_pgw * src_page_size.x; + in_rect.x -= src_pgw * src_page_size.x; + in_rect.y += src_page_size.y; + in_rect.w += src_page_size.y; } page_offset = 0; single_page = (in_rect.width() / src_page_size.x) <= 1 && (in_rect.height() / src_page_size.y) <= 1; @@ -1458,13 +1458,24 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const // Make sure the texture actually is INSIDE the RT, it's possibly not valid if it isn't. // Also check BP >= TBP, create source isn't equpped to expand it backwards and all data comes from the target. (GH3) else if (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && - (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && t->m_TEX0.TBW >= (bw * 2))) && // Channel shuffles or non indexed lookups. + (GSLocalMemory::m_psm[color_psm].bpp >= 16 || (/*possible_shuffle &&*/ GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) && // Channel shuffles or non indexed lookups. t->m_age <= 1 && (!found_t || t->m_last_draw > dst->m_last_draw) /*&& CanTranslate(bp, bw, psm, block_boundary_rect, t->m_TEX0.TBP0, t->m_TEX0.PSM, t->m_TEX0.TBW)*/) { if (!t->HasValidBitsForFormat(psm, req_color, req_alpha) && !(possible_shuffle && GSLocalMemory::m_psm[psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32)) continue; + if (GSLocalMemory::m_psm[color_psm].bpp == 16 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp && (t->m_TEX0.TBW != bw && (t->m_TEX0.TBW * 2) != bw)) + { + DevCon.Warning("BP %x - 16bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); + continue; + } + else if (!possible_shuffle && (GSLocalMemory::m_psm[color_psm].bpp == 8 && GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == 32 && + !((t->m_TEX0.TBW == (bw / 2)) || (t->m_TEX0.TBW >= (bw / 2) && (req_rect.w < GSLocalMemory::m_psm[psm].pgs.y))))) + { + DevCon.Warning("BP %x - 8bit bad match for target bp %x bw %d src %d format %d", bp, t->m_TEX0.TBP0, t->m_TEX0.TBW, bw, t->m_TEX0.PSM); + continue; + } // PSM equality needed because CreateSource does not handle PSM conversion. // Only inclusive hit to limit false hits. GSVector4i rect = req_rect; @@ -1600,7 +1611,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const bool is_color, const if (bp < t->m_TEX0.TBP0 && region.HasX() && region.HasY() && (region.GetMinX() & (page_size.x - 1)) == 0 && (region.GetMinY() & (page_size.y - 1)) == 0 && (offset.bn(region.GetMinX(), region.GetMinY()) == t->m_TEX0.TBP0 || - (offset_bp >= t->m_TEX0.TBP0) && ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) + (rect.width() / page_size.x)) <= bw)) + ((offset_bp >= t->m_TEX0.TBP0) && ((((offset_bp - t->m_TEX0.TBP0) >> 5) % bw) + (rect.width() / page_size.x)) <= bw))) { GL_CACHE("TC: Target 0x%x detected in front of TBP 0x%x with %d,%d offset (%d pages)", t->m_TEX0.TBP0, TEX0.TBP0, region.GetMinX(), region.GetMinY(), @@ -1915,7 +1926,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe } } // Probably pointing to half way through the target - else if (!min_rect.rempty()&& GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) + else if (!min_rect.rempty() && GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets) { // Problem: Project - Snowblind and Tomb Raider offset the RT but not the Z /*if (offset != -1 && (bp - t->m_TEX0.TBP0) != offset) @@ -2607,7 +2618,7 @@ bool GSTextureCache::PreloadTarget(GIFRegTEX0 TEX0, const GSVector2i& size, cons if (valid_draw_size && supported_fmt) { - const GSVector4i newrect = GSVector4i::loadh(valid_size); + const GSVector4i newrect = GSVector4i::loadh(size); const u32 rect_end = GSLocalMemory::GetUnwrappedEndBlockAddress(TEX0.TBP0, TEX0.TBW, TEX0.PSM, newrect); RGBAMask rgba; @@ -3223,7 +3234,7 @@ void GSTextureCache::InvalidateContainedTargets(u32 start_bp, u32 end_bp, u32 wr continue; } - const u32 total_pages = ((end_bp + 1) - t->m_TEX0.TBP0) >> 5; + //const u32 total_pages = ((end_bp + 1) - t->m_TEX0.TBP0) >> 5; // Not covering the whole target, and a different format, so just dirty it. /*if (start_bp >= t->m_TEX0.TBP0 && (t->UnwrappedEndBlock() > end_bp) && write_psm != t->m_TEX0.PSM && write_bw == t->m_TEX0.TBW) { @@ -4315,8 +4326,8 @@ GSTextureCache::Target* GSTextureCache::GetExactTarget(u32 BP, u32 BW, int type, for (auto it = rts.begin(); it != rts.end(); ++it) // Iterate targets from MRU to LRU. { Target* t = *it; - - if ((t->m_TEX0.TBP0 == BP || (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && t->m_TEX0.TBP0 < BP && ((BP >> 5) % t->m_TEX0.TBW) == 0)) && t->m_TEX0.TBW == BW && t->UnwrappedEndBlock() >= end_bp) + const u32 tgt_bw = std::max(t->m_TEX0.TBW, 1U); + if ((t->m_TEX0.TBP0 == BP || (GSConfig.UserHacks_TextureInsideRt >= GSTextureInRtMode::InsideTargets && t->m_TEX0.TBP0 < BP && ((BP >> 5) % tgt_bw) == 0)) && tgt_bw == BW && t->UnwrappedEndBlock() >= end_bp) { rts.MoveFront(it.Index()); return t; @@ -5033,8 +5044,22 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con std::max(dst->m_TEX0.TBW, 1u) * 64, dst->m_TEX0.PSM, dTex, std::max(TEX0.TBW, 1u) * 64, TEX0.PSM); - src->m_region.SetX((x_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.x) * GSLocalMemory::m_psm[TEX0.PSM].pgs.x, tw); - src->m_region.SetY((y_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y) * GSLocalMemory::m_psm[TEX0.PSM].pgs.y, th); + // Adjust the region for the newly translated rect. + u32 const dst_y_height = GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.y; + u32 const src_y_height = GSLocalMemory::m_psm[TEX0.PSM].pgs.y; + u32 const dst_page_offset = (y_offset / dst_y_height) * std::max(dst->m_TEX0.TBW, 1U); + y_offset = (dst_page_offset / (std::max(TEX0.TBW / 2U, 1U))) * src_y_height; + + u32 const src_page_width = GSLocalMemory::m_psm[TEX0.PSM].pgs.x; + x_offset = (x_offset / GSLocalMemory::m_psm[dst->m_TEX0.PSM].pgs.x) * GSLocalMemory::m_psm[TEX0.PSM].pgs.x; + if (x_offset >= static_cast(std::max(TEX0.TBW, 1U) * src_page_width)) + { + const u32 adjust = x_offset / src_page_width; + y_offset += adjust * GSLocalMemory::m_psm[TEX0.PSM].pgs.y; + x_offset -= src_page_width * adjust; + } + src->m_region.SetX(x_offset, x_offset + tw); + src->m_region.SetY(y_offset, y_offset + th); } else { @@ -6663,9 +6688,6 @@ void GSTextureCache::Target::ResizeValidity(const GSVector4i& rect) void GSTextureCache::Target::UpdateValidity(const GSVector4i& rect, bool can_resize) { - if (m_TEX0.TBP0 == 0x1a00 && rect.w == 448 && can_resize) - DevCon.Warning("Here"); - if (m_valid.eq(GSVector4i::zero())) { m_valid = rect; diff --git a/pcsx2/GS/Renderers/Metal/tfx.metal b/pcsx2/GS/Renderers/Metal/tfx.metal index f6e4fc04be..296342ca51 100644 --- a/pcsx2/GS/Renderers/Metal/tfx.metal +++ b/pcsx2/GS/Renderers/Metal/tfx.metal @@ -1168,8 +1168,8 @@ struct PSMain { if (PS_PROCESS_BA == SHUFFLE_READWRITE && PS_PROCESS_RG == SHUFFLE_READWRITE) { - C.b = C.r; - C.a = C.g; + C.br = C.rb; + C.ag = C.ga; } else if(PS_PROCESS_BA & SHUFFLE_READ) {