mirror of https://github.com/PCSX2/pcsx2.git
GS-HW: Improve Local->Host and preload accuracy.
This commit is contained in:
parent
a97df14064
commit
9a53f0f853
|
@ -1766,7 +1766,15 @@ void GSState::Write(const u8* mem, int len)
|
|||
r.bottom = r.top + m_env.TRXREG.RRH;
|
||||
|
||||
// Store the transfer for preloading new RT's.
|
||||
if (m_draw_transfers.size() == 0 || (m_draw_transfers.size() > 0 && blit.DBP != m_draw_transfers.back().blit.DBP))
|
||||
if ((m_draw_transfers.size() > 0 && blit.DBP == m_draw_transfers.back().blit.DBP))
|
||||
{
|
||||
// Same BP, let's update the rect.
|
||||
GSUploadQueue transfer = m_draw_transfers.back();
|
||||
m_draw_transfers.pop_back();
|
||||
transfer.rect = transfer.rect.runion(r);
|
||||
m_draw_transfers.push_back(transfer);
|
||||
}
|
||||
else
|
||||
{
|
||||
GSUploadQueue new_transfer = { blit, r, s_n };
|
||||
m_draw_transfers.push_back(new_transfer);
|
||||
|
@ -1915,16 +1923,22 @@ void GSState::Move()
|
|||
Flush(GSFlushReason::LOCALTOLOCALMOVE);
|
||||
}
|
||||
|
||||
GSVector4i r;
|
||||
r.left = m_env.TRXPOS.DSAX;
|
||||
r.top = m_env.TRXPOS.DSAY;
|
||||
r.right = r.left + m_env.TRXREG.RRW;
|
||||
r.bottom = r.top + m_env.TRXREG.RRH;
|
||||
// Store the transfer for preloading new RT's.
|
||||
if (m_draw_transfers.size() == 0 || (m_draw_transfers.size() > 0 && dbp != m_draw_transfers.back().blit.DBP))
|
||||
if ((m_draw_transfers.size() > 0 && m_env.BITBLTBUF.DBP == m_draw_transfers.back().blit.DBP))
|
||||
{
|
||||
// Same BP, let's update the rect.
|
||||
GSUploadQueue transfer = m_draw_transfers.back();
|
||||
m_draw_transfers.pop_back();
|
||||
transfer.rect = transfer.rect.runion(r);
|
||||
m_draw_transfers.push_back(transfer);
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4i r;
|
||||
|
||||
r.left = m_env.TRXPOS.DSAX;
|
||||
r.top = m_env.TRXPOS.DSAY;
|
||||
r.right = r.left + m_env.TRXREG.RRW;
|
||||
r.bottom = r.top + m_env.TRXREG.RRH;
|
||||
|
||||
GSUploadQueue new_transfer = { m_env.BITBLTBUF, r, s_n };
|
||||
m_draw_transfers.push_back(new_transfer);
|
||||
}
|
||||
|
|
|
@ -834,24 +834,51 @@ void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
if (clut)
|
||||
return; // FIXME
|
||||
|
||||
u32 incoming_end = GSLocalMemory::m_psm[BITBLTBUF.SPSM].info.bn(r.z - 1, r.w - 1, BITBLTBUF.SBP, BITBLTBUF.SBW);
|
||||
const u32 incoming_end = GSLocalMemory::m_psm[BITBLTBUF.SPSM].info.bn(r.z - 1, r.w - 1, BITBLTBUF.SBP, BITBLTBUF.SBW);
|
||||
std::vector<GSState::GSUploadQueue>::iterator iter = GSRendererHW::GetInstance()->m_draw_transfers.end();
|
||||
|
||||
bool skip = false;
|
||||
// If the EE write overlaps the readback and was done since the last draw, there's no need to read it back.
|
||||
// Dog's life and Ratchet Gladiator do this.
|
||||
while (iter != GSRendererHW::GetInstance()->m_draw_transfers.begin())
|
||||
{
|
||||
--iter;
|
||||
u32 ee_write_end = GSLocalMemory::m_psm[iter->blit.DPSM].info.bn(iter->rect.z - 1, iter->rect.w - 1, iter->blit.DBP, iter->blit.DBW);
|
||||
|
||||
if (!GSUtil::HasSharedBits(iter->blit.DPSM, BITBLTBUF.SPSM) || iter->draw != s_n)
|
||||
continue;
|
||||
|
||||
// Make sure write covers the read area.
|
||||
const u32 ee_write_end = GSLocalMemory::m_psm[iter->blit.DPSM].info.bn(iter->rect.z - 1, iter->rect.w - 1, iter->blit.DBP, iter->blit.DBW);
|
||||
if (!(iter->blit.DBP < incoming_end && ee_write_end > BITBLTBUF.SBP))
|
||||
continue;
|
||||
|
||||
GSTextureCache::SurfaceOffsetKey sok;
|
||||
sok.elems[0].bp = BITBLTBUF.SBP;
|
||||
sok.elems[0].bw = BITBLTBUF.SBW;
|
||||
sok.elems[0].psm = BITBLTBUF.SPSM;
|
||||
sok.elems[0].rect = r;
|
||||
sok.elems[1].bp = iter->blit.DBP;
|
||||
sok.elems[1].bw = iter->blit.DBW;
|
||||
sok.elems[1].psm = iter->blit.DPSM;
|
||||
sok.elems[1].rect = iter->rect;
|
||||
|
||||
// Calculate the rect offset if the BP doesn't match.
|
||||
const GSVector4i targetr = GSUtil::HasCompatibleBits(iter->blit.DPSM, BITBLTBUF.SPSM) ? r : m_tc->ComputeSurfaceOffset(sok).b2a_offset;
|
||||
|
||||
// Possibly incompatible or missed, we don't know, so let's assume it's a fail.
|
||||
if (targetr.rempty())
|
||||
continue;
|
||||
|
||||
//u32 ee_write_end = GSLocalMemory::m_psm[iter->blit.DPSM].info.bn(iter->rect.z - 1, iter->rect.w - 1, iter->blit.DBP, iter->blit.DBW);
|
||||
// If the format, and location doesn't match, but also the upload is at least the size of the target, don't preload.
|
||||
if (iter->blit.DBP < incoming_end && GSUtil::HasSharedBits(iter->blit.DPSM, BITBLTBUF.SPSM) && ee_write_end > BITBLTBUF.SBP && iter->draw == s_n)
|
||||
if (iter->rect.rintersect(targetr).eq(targetr))
|
||||
{
|
||||
//DevCon.Warning("Download from same draw as write address %x, skipping invalidation", BITBLTBUF.SBP);
|
||||
return;
|
||||
skip = true;
|
||||
}
|
||||
}
|
||||
|
||||
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
|
||||
if(!skip)
|
||||
m_tc->InvalidateLocalMem(m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM), r);
|
||||
}
|
||||
|
||||
void GSRendererHW::Move()
|
||||
|
|
|
@ -833,27 +833,56 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con
|
|||
{
|
||||
const bool forced_preload = GSRendererHW::GetInstance()->m_force_preload > 0;
|
||||
const GSVector4i newrect = GSVector4i(0, 0, real_w, real_h);
|
||||
const u32 rect_end = GSLocalMemory::m_psm[TEX0.PSM].info.bn(newrect.z - 1, newrect.w - 1, TEX0.TBP0, TEX0.TBW);
|
||||
|
||||
if (!is_frame && !forced_preload && !preload)
|
||||
{
|
||||
std::vector<GSState::GSUploadQueue>::iterator iter;
|
||||
GSVector4i eerect = GSVector4i::zero();
|
||||
for (iter = GSRendererHW::GetInstance()->m_draw_transfers.begin(); iter != GSRendererHW::GetInstance()->m_draw_transfers.end(); )
|
||||
{
|
||||
// If the format, and location doesn't match, but also the upload is at least the size of the target, don't preload.
|
||||
if (iter->blit.DBP == TEX0.TBP0 && GSUtil::HasCompatibleBits(iter->blit.DPSM, TEX0.PSM) && iter->rect.rintersect(newrect).eq(newrect))
|
||||
// If the format, and location doesn't overlap
|
||||
if (iter->blit.DBP >= TEX0.TBP0 && iter->blit.DBP <= rect_end && GSUtil::HasCompatibleBits(iter->blit.DPSM, TEX0.PSM))
|
||||
{
|
||||
GSRendererHW::GetInstance()->m_draw_transfers.erase(iter);
|
||||
GL_INS("Preloading the RT DATA");
|
||||
AddDirtyRectTarget(dst, newrect, TEX0.PSM, TEX0.TBW);
|
||||
dst->Update(true);
|
||||
break;
|
||||
GSTextureCache::SurfaceOffsetKey sok;
|
||||
sok.elems[0].bp = iter->blit.DBP;
|
||||
sok.elems[0].bw = iter->blit.DBW;
|
||||
sok.elems[0].psm = iter->blit.DPSM;
|
||||
sok.elems[0].rect = iter->rect;
|
||||
sok.elems[1].bp = TEX0.TBP0;
|
||||
sok.elems[1].bw = TEX0.TBW;
|
||||
sok.elems[1].psm = TEX0.PSM;
|
||||
sok.elems[1].rect = newrect;
|
||||
|
||||
// Calculate the rect offset if the BP doesn't match.
|
||||
const GSVector4i targetr = (iter->blit.DBP == TEX0.TBP0 && GSUtil::HasCompatibleBits(iter->blit.DPSM, TEX0.PSM)) ? iter->rect : ComputeSurfaceOffset(sok).b2a_offset;
|
||||
if (eerect.rempty())
|
||||
eerect = targetr;
|
||||
else
|
||||
eerect = eerect.runion(targetr);
|
||||
|
||||
iter = GSRendererHW::GetInstance()->m_draw_transfers.erase(iter);
|
||||
|
||||
if (eerect.rintersect(newrect).eq(newrect))
|
||||
break;
|
||||
else
|
||||
continue;
|
||||
}
|
||||
iter++;
|
||||
}
|
||||
|
||||
if (!eerect.rempty())
|
||||
{
|
||||
GL_INS("Preloading the RT DATA");
|
||||
dst->UpdateValidity(eerect);
|
||||
AddDirtyRectTarget(dst, eerect, TEX0.PSM, TEX0.TBW);
|
||||
dst->Update(true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
GL_INS("Preloading the RT DATA");
|
||||
dst->UpdateValidity(newrect);
|
||||
AddDirtyRectTarget(dst, newrect, TEX0.PSM, TEX0.TBW);
|
||||
dst->Update(true);
|
||||
}
|
||||
|
@ -1441,6 +1470,9 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r
|
|||
const u32 bp = off.bp();
|
||||
const u32 psm = off.psm();
|
||||
[[maybe_unused]] const u32 bw = off.bw();
|
||||
const u32 read_start = GSLocalMemory::m_psm[psm].info.bn(r.x, r.y, bp, bw);
|
||||
const u32 read_end = GSLocalMemory::m_psm[psm].info.bn(r.x, r.y, bp, bw);
|
||||
const bool read_paltex = GSLocalMemory::m_psm[psm].pal > 0;
|
||||
|
||||
GL_CACHE("TC: InvalidateLocalMem off(0x%x, %u, %s) r(%d, %d => %d, %d)",
|
||||
bp,
|
||||
|
@ -1468,7 +1500,9 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r
|
|||
{
|
||||
Target* t = *it;
|
||||
|
||||
if (!t->Overlaps(bp, bw, psm, r) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || t->m_age >= 30)
|
||||
// Check the offset of the read, if they're not pointing at or inside this texture, it's probably not what we want.
|
||||
const bool expecting_this_tex = ((bp < t->m_TEX0.TBP0 && read_start >= t->m_TEX0.TBP0) || bp >= t->m_TEX0.TBP0) && read_end <= t->m_end_block;
|
||||
if (!expecting_this_tex || !t->Overlaps(bp, bw, psm, r) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || ((bp != t->m_TEX0.TBP0) && !GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)))
|
||||
continue;
|
||||
|
||||
const bool bpp_match = GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[psm].bpp;
|
||||
|
@ -1496,11 +1530,29 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r
|
|||
Target* t = *it;
|
||||
if (t->m_TEX0.PSM != PSM_PSMZ32 && t->m_TEX0.PSM != PSM_PSMZ24 && t->m_TEX0.PSM != PSM_PSMZ16 && t->m_TEX0.PSM != PSM_PSMZ16S)
|
||||
{
|
||||
const u32 read_start = GSLocalMemory::m_psm[psm].info.bn(r.x, r.y, bp, bw);
|
||||
// Check the offset of the read, if they're not pointing at or inside this texture, it's probably not what we want.
|
||||
const bool expecting_this_tex = (bp < t->m_TEX0.TBP0 && read_start >= t->m_TEX0.TBP0) || bp >= t->m_TEX0.TBP0;
|
||||
// propagate the format from the result of a channel effect
|
||||
// texture is 16/8 bit but the real data is 32
|
||||
// common use for shuffling is moving data into the alpha channel
|
||||
// the game can then draw using 8H format
|
||||
// in the case of silent hill blit 8H -> 8P
|
||||
// this will matter later when the data ends up in GS memory in the wrong format
|
||||
// Be careful to avoid 24 bit textures which are technically 32bit, as you could lose alpha (8H) data.
|
||||
if (t->m_32_bits_fmt && t->m_TEX0.PSM > PSM_PSMCT24)
|
||||
t->m_TEX0.PSM = PSM_PSMCT32;
|
||||
|
||||
if (!expecting_this_tex || !t->Overlaps(bp, bw, psm, r) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || t->m_age >= 30)
|
||||
// Check the offset of the read, if they're not pointing at or inside this texture, it's probably not what we want.
|
||||
const bool expecting_this_tex = ((bp < t->m_TEX0.TBP0 && read_start >= t->m_TEX0.TBP0) || bp >= t->m_TEX0.TBP0) && read_end <= t->m_end_block;
|
||||
const bool target_paltex = GSLocalMemory::m_psm[t->m_TEX0.PSM].pal > 0;
|
||||
// Only allow an indexed format on a 32bit colour, if it's alpha channel.
|
||||
const bool alpha_read = t->m_TEX0.PSM == PSM_PSMCT32 && psm >= PSM_PSMT8H;
|
||||
|
||||
// Okay this is a nightmare of a check, so these are the conditions:
|
||||
// 1. Check if it's expecting this texture, so the read must be inside this texture, even if the BP doesn't match.
|
||||
// 2. It must overlap (okay maybe redundant).
|
||||
// 3. If it's a paltex (indexed format), the target must also be paltex, unless it's only reading the alpha channel.
|
||||
// 4. They share bits in some capacity.
|
||||
// 5. If the BP doesn't match, make sure the formats are compatible, at very least (like CT32 + CT24 for example).
|
||||
if (!expecting_this_tex || !t->Overlaps(bp, bw, psm, r) || (read_paltex != target_paltex && !alpha_read) || !GSUtil::HasSharedBits(psm, t->m_TEX0.PSM) || (bp != t->m_TEX0.TBP0 && !GSUtil::HasCompatibleBits(psm, t->m_TEX0.PSM)))
|
||||
continue;
|
||||
|
||||
const bool bpp_match = GSLocalMemory::m_psm[t->m_TEX0.PSM].bpp == GSLocalMemory::m_psm[psm].bpp;
|
||||
|
@ -1514,15 +1566,10 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r
|
|||
sok.elems[1].bw = t->m_TEX0.TBW;
|
||||
sok.elems[1].psm = t->m_TEX0.PSM;
|
||||
sok.elems[1].rect = t->m_valid;
|
||||
// Calculate the rect offset if the BP doesn't match.
|
||||
const GSVector4i targetr = (format_match) ? r.rintersect(t->m_valid) : ComputeSurfaceOffset(sok).b2a_offset;
|
||||
|
||||
// Some games like to offset their GS download memory addresses by
|
||||
// using overly big source Y position values.
|
||||
// Checking for targets that overlap with the requested memory region
|
||||
// instead of just comparing TBPs should fix that.
|
||||
// For example, this fixes Judgement ring rendering in Shadow Hearts 2.
|
||||
// Be wary of old targets being misdetected, set a sensible range of 30 frames (like Display source lookups).
|
||||
// Calculate the rect offset if the BP doesn't match.
|
||||
const GSVector4i targetr = GSVector4i((format_match) ? r.rintersect(t->m_valid) : ComputeSurfaceOffset(sok).b2a_offset).rintersect(t->m_drawn_since_read);
|
||||
|
||||
if (!targetr.rempty())
|
||||
{
|
||||
// GH Note: Read will do a StretchRect and then will sizzle data to the GS memory
|
||||
|
@ -1537,21 +1584,10 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r
|
|||
// Read(t, t->m_valid) works in all tested games but is very slow in GUST titles ><
|
||||
// Update: 18/02/2023: Chaos legion breaks because it reads the width at half of the real width.
|
||||
// Surface offset deals with this.
|
||||
|
||||
// If the game has been spamming downloads, we've already read the whole texture back at this point.
|
||||
if (t->m_drawn_since_read.rempty())
|
||||
if (t->m_drawn_since_read.rempty() || !t->m_dirty.empty())
|
||||
continue;
|
||||
|
||||
// propagate the format from the result of a channel effect
|
||||
// texture is 16/8 bit but the real data is 32
|
||||
// common use for shuffling is moving data into the alpha channel
|
||||
// the game can then draw using 8H format
|
||||
// in the case of silent hill blit 8H -> 8P
|
||||
// this will matter later when the data ends up in GS memory in the wrong format
|
||||
// Be careful to avoid 24 bit textures which are technically 32bit, as you could lose alpha (8H) data.
|
||||
if (t->m_32_bits_fmt && t->m_TEX0.PSM > PSM_PSMCT24)
|
||||
t->m_TEX0.PSM = PSM_PSMCT32;
|
||||
|
||||
if (GSConfig.HWDownloadMode != GSHardwareDownloadMode::Enabled)
|
||||
{
|
||||
const GSVector4i rb_rc((!GSConfig.UserHacks_DisablePartialInvalidation && targetr.x == 0 && targetr.y == 0) ? t->m_valid : targetr.rintersect(t->m_valid));
|
||||
|
@ -1569,33 +1605,30 @@ void GSTextureCache::InvalidateLocalMem(const GSOffset& off, const GSVector4i& r
|
|||
else if(!targetr.rintersect(t->m_drawn_since_read).rempty()) // Block level read?
|
||||
{
|
||||
// Read the width of the draw, reading too much could wipe out dirty memory.
|
||||
GSVector4i full_lines = GSVector4i(0, targetr.y, t->m_drawn_since_read.z, targetr.w);
|
||||
full_lines = targetr.rintersect(t->m_drawn_since_read);
|
||||
|
||||
Read(t, full_lines);
|
||||
Read(t, targetr);
|
||||
|
||||
// After reading, try to cut down our "dirty" rect.
|
||||
if (full_lines.rintersect(t->m_drawn_since_read).eq(t->m_drawn_since_read))
|
||||
if (targetr.rintersect(t->m_drawn_since_read).eq(t->m_drawn_since_read))
|
||||
t->m_drawn_since_read = GSVector4i::zero();
|
||||
else
|
||||
{
|
||||
// Try to cut down how much we read next, if we can.
|
||||
// Fatal Frame reads in vertical strips, SOCOM 2 does horizontal, so we can handle that below.
|
||||
if (full_lines.width() == t->m_drawn_since_read.width()
|
||||
&& full_lines.w >= t->m_drawn_since_read.y)
|
||||
if (targetr.width() == t->m_drawn_since_read.width()
|
||||
&& targetr.w >= t->m_drawn_since_read.y)
|
||||
{
|
||||
if (full_lines.y <= t->m_drawn_since_read.y)
|
||||
t->m_drawn_since_read.y = full_lines.w;
|
||||
else if (full_lines.w >= t->m_drawn_since_read.w)
|
||||
t->m_drawn_since_read.w = full_lines.y;
|
||||
if (targetr.y <= t->m_drawn_since_read.y)
|
||||
t->m_drawn_since_read.y = targetr.w;
|
||||
else if (targetr.w >= t->m_drawn_since_read.w)
|
||||
t->m_drawn_since_read.w = targetr.y;
|
||||
}
|
||||
else if (full_lines.height() == t->m_drawn_since_read.height()
|
||||
&& full_lines.z >= t->m_drawn_since_read.x)
|
||||
else if (targetr.height() == t->m_drawn_since_read.height()
|
||||
&& targetr.z >= t->m_drawn_since_read.x)
|
||||
{
|
||||
if (full_lines.x <= t->m_drawn_since_read.x)
|
||||
t->m_drawn_since_read.x = full_lines.z;
|
||||
else if (full_lines.z >= t->m_drawn_since_read.z)
|
||||
t->m_drawn_since_read.z = full_lines.x;
|
||||
if (targetr.x <= t->m_drawn_since_read.x)
|
||||
t->m_drawn_since_read.x = targetr.z;
|
||||
else if (targetr.z >= t->m_drawn_since_read.z)
|
||||
t->m_drawn_since_read.z = targetr.x;
|
||||
}
|
||||
}
|
||||
t->readbacks_since_draw++;
|
||||
|
|
Loading…
Reference in New Issue