GS/HW: Use multi stretch for preloading

This commit is contained in:
Stenzek 2023-03-02 21:35:17 +10:00 committed by refractionpcsx2
parent 89b18275c0
commit 229cf908b7
7 changed files with 81 additions and 47 deletions

View File

@ -264,12 +264,22 @@ void GSDevice::StretchRect(GSTexture* sTex, GSTexture* dTex, const GSVector4& dR
StretchRect(sTex, GSVector4(0, 0, 1, 1), dTex, dRect, shader, linear);
}
void GSDevice::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader)
void GSDevice::DrawMultiStretchRects(
const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader)
{
for (u32 i = 0; i < num_rects; i++)
{
const MultiStretchRect& sr = rects[i];
g_gs_device->StretchRect(sr.src, sr.src_rect, dTex, sr.dst_rect, shader, sr.linear);
pxAssert(shader == ShaderConvert::COPY || rects[0].wmask.wrgba == 0xf);
if (rects[0].wmask.wrgba != 0xf)
{
g_gs_device->StretchRect(sr.src, sr.src_rect, dTex, sr.dst_rect, rects[0].wmask.wr,
rects[0].wmask.wg, rects[0].wmask.wb, rects[0].wmask.wa);
}
else
{
g_gs_device->StretchRect(sr.src, sr.src_rect, dTex, sr.dst_rect, shader, sr.linear);
}
}
}

View File

@ -712,10 +712,11 @@ public:
struct MultiStretchRect
{
GSTexture* src;
GSVector4 src_rect;
GSVector4 dst_rect;
GSTexture* src;
bool linear;
GSHWDrawConfig::ColorMaskSelector wmask; // 0xf for all channels by default
};
enum BlendFactor : u8

View File

@ -793,23 +793,25 @@ void GSDevice11::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_re
{
IASetInputLayout(m_convert.il.get());
IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
VSSetShader(m_convert.vs.get(), nullptr);
GSSetShader(nullptr, nullptr);
PSSetShader(m_convert.ps[static_cast<int>(shader)].get(), nullptr);
OMSetDepthStencilState(m_convert.dss.get(), 0);
OMSetBlendState(m_convert.bs[D3D11_COLOR_WRITE_ENABLE_ALL].get(), 0.0f);
OMSetRenderTargets(dTex, nullptr);
OMSetDepthStencilState(dTex->IsRenderTarget() ? m_convert.dss.get() : m_convert.dss_write.get(), 0);
OMSetRenderTargets(dTex->IsRenderTarget() ? dTex : nullptr, dTex->IsDepthStencil() ? dTex : nullptr);
const GSVector2 ds(static_cast<float>(dTex->GetWidth()), static_cast<float>(dTex->GetHeight()));
GSTexture* last_tex = rects[0].src;
bool last_linear = rects[0].linear;
u8 last_wmask = rects[0].wmask.wrgba;
u32 first = 0;
u32 count = 1;
for (u32 i = 1; i < num_rects; i++)
{
if (rects[i].src == last_tex && rects[i].linear == last_linear)
if (rects[i].src == last_tex && rects[i].linear == last_linear || rects[i].wmask.wrgba != last_wmask)
{
count++;
continue;
@ -818,6 +820,7 @@ void GSDevice11::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_re
DoMultiStretchRects(rects + first, count, ds);
last_tex = rects[i].src;
last_linear = rects[i].linear;
last_wmask = rects[i].wmask.wrgba;
first += count;
count = 1;
}
@ -864,6 +867,9 @@ void GSDevice11::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rect
PSSetShaderResource(0, rects[0].src);
PSSetSamplerState(rects[0].linear ? m_convert.ln.get() : m_convert.pt.get());
OMSetBlendState(m_convert.bs[rects[0].wmask.wrgba].get(), 0.0f);
DrawIndexedPrimitive();
}

View File

@ -483,6 +483,7 @@ void GSDevice12::DrawMultiStretchRects(
{
GSTexture* last_tex = rects[0].src;
bool last_linear = rects[0].linear;
u8 last_wmask = rects[0].wmask.wrgba;
u32 first = 0;
u32 count = 1;
@ -502,7 +503,7 @@ void GSDevice12::DrawMultiStretchRects(
for (u32 i = 1; i < num_rects; i++)
{
if (rects[i].src == last_tex && rects[i].linear == last_linear)
if (rects[i].src == last_tex && rects[i].linear == last_linear && rects[i].wmask.wrgba == last_wmask)
{
count++;
continue;
@ -511,6 +512,7 @@ void GSDevice12::DrawMultiStretchRects(
DoMultiStretchRects(rects + first, count, static_cast<GSTexture12*>(dTex), shader);
last_tex = rects[i].src;
last_linear = rects[i].linear;
last_wmask = rects[i].wmask.wrgba;
first += count;
count = 1;
}
@ -579,12 +581,16 @@ void GSDevice12::DoMultiStretchRects(
// Even though we're batching, a cmdbuffer submit could've messed this up.
const GSVector4i rc(dTex->GetRect());
OMSetRenderTargets(dTex, nullptr, rc);
OMSetRenderTargets(dTex->IsRenderTarget() ? dTex : nullptr, dTex->IsDepthStencil() ? dTex : nullptr, rc);
if (!InRenderPass())
BeginRenderPassForStretchRect(dTex, rc, rc, false);
SetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
SetUtilityTexture(rects[0].src, rects[0].linear ? m_linear_sampler_cpu : m_point_sampler_cpu);
SetPipeline(m_convert[static_cast<int>(shader)].get());
pxAssert(shader == ShaderConvert::COPY || rects[0].wmask.wrgba == 0xf);
SetPipeline((rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba].get() :
m_convert[static_cast<int>(shader)].get());
if (ApplyUtilityState())
DrawIndexedPrimitive();
}

View File

@ -2648,8 +2648,8 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR
// Upload texture -> render target.
const bool linear = (scale.x != 1.0f);
copy_queue[copy_count++] = {lmtex, GSVector4(rect) / GSVector4(lmtex->GetSize()).xyxy(),
GSVector4(rect) * GSVector4(scale).xyxy(), linear};
copy_queue[copy_count++] = {GSVector4(rect) / GSVector4(lmtex->GetSize()).xyxy(),
GSVector4(rect) * GSVector4(scale).xyxy(), lmtex, linear, 0xf};
};
// The idea: loop through pages that this texture covers, find targets which overlap, and copy them in.
@ -2769,13 +2769,13 @@ GSTextureCache::Source* GSTextureCache::CreateMergedSource(GIFRegTEX0 TEX0, GIFR
}
GL_INS(" Copy from %d,%d -> %d,%d (%dx%d)", src_x, src_y, dst_x, dst_y, copy_width, copy_height);
copy_queue[copy_count++] = {t->m_texture,
copy_queue[copy_count++] = {
(GSVector4(src_x, src_y, src_x + copy_width, src_y + copy_height) *
GSVector4(t->m_texture->GetScale()).xyxy()) /
GSVector4(t->m_texture->GetSize()).xyxy(),
GSVector4(dst_x, dst_y, dst_x + copy_width, dst_y + copy_height) *
GSVector4(scale).xyxy(),
linear};
t->m_texture, linear, 0xf};
}
row_page++;
@ -3644,6 +3644,15 @@ void GSTextureCache::Target::Update(bool reset_age)
TEXA.TA0 = 0;
TEXA.TA1 = 0x80;
// Bilinear filtering this is probably not a good thing, at least in native, but upscaling Nearest can be gross and messy.
// It's needed for depth, though.. filtering depth doesn't make much sense, but SMT3 needs it..
const bool upscaled = (m_texture->GetScale().x > 1.0f);
const bool linear = (m_type == RenderTarget && upscaled);
GSDevice::MultiStretchRect* drects = static_cast<GSDevice::MultiStretchRect*>(
alloca(sizeof(GSDevice::MultiStretchRect) * static_cast<u32>(m_dirty.size())));
u32 ndrects = 0;
const GSOffset off(g_gs_renderer->m_mem.GetOffset(m_TEX0.TBP0, m_TEX0.TBW, m_TEX0.PSM));
for (size_t i = 0; i < m_dirty.size(); i++)
{
@ -3664,41 +3673,35 @@ void GSTextureCache::Target::Update(bool reset_age)
t->Update(t_r, s_unswizzle_buffer, pitch);
}
GSDevice::MultiStretchRect& drect = drects[ndrects++];
drect.src = t;
drect.src_rect = GSVector4(r - t_offset) / t_sizef;
drect.dst_rect = GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy();
drect.linear = linear;
// Copy the new GS memory content into the destination texture.
if (m_type == RenderTarget)
{
GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW, r.x, r.y, r.z, r.w);
drect.wmask = static_cast<u8>(m_dirty[i].rgba._u32);
}
else if (m_type == DepthStencil)
{
GL_INS("ERROR: Update DepthStencil 0x%x", m_TEX0.TBP0);
drect.wmask = 0xF;
}
}
if (mapped)
t->Unmap();
for (size_t i = 0; i < m_dirty.size(); i++)
if (ndrects > 0)
{
const GSVector4i r(m_dirty.GetDirtyRect(i, m_TEX0, total_rect));
if (r.rempty())
continue;
const GSVector4 sRect(GSVector4(r - t_offset) / t_sizef);
const GSVector4 dRect(GSVector4(r) * GSVector4(m_texture->GetScale()).xyxy());
// Copy the new GS memory content into the destination texture.
if (m_type == RenderTarget)
{
GL_INS("ERROR: Update RenderTarget 0x%x bw:%d (%d,%d => %d,%d)", m_TEX0.TBP0, m_TEX0.TBW, r.x, r.y, r.z, r.w);
if (m_dirty[i].rgba._u32 != 0xf)
{
g_gs_device->StretchRect(t, sRect, m_texture, dRect, m_dirty[i].rgba.c.r, m_dirty[i].rgba.c.g, m_dirty[i].rgba.c.b, m_dirty[i].rgba.c.a);
}
else
{
// Bilinear filtering this is probably not a good thing, at least in native, but upscaling Nearest can be gross and messy.
g_gs_device->StretchRect(t, sRect, m_texture, dRect, ShaderConvert::COPY, g_gs_renderer->CanUpscale());
}
}
else if (m_type == DepthStencil)
{
GL_INS("ERROR: Update DepthStencil 0x%x", m_TEX0.TBP0);
// FIXME linear or not?
g_gs_device->StretchRect(t, sRect, m_texture, dRect, ShaderConvert::RGBA8_TO_FLOAT32_BILN);
}
// No need to sort here, it's all the one texture.
const ShaderConvert shader = (m_type == RenderTarget) ? ShaderConvert::COPY :
(upscaled ? ShaderConvert::RGBA8_TO_FLOAT32 :
ShaderConvert::RGBA8_TO_FLOAT32_BILN);
g_gs_device->DrawMultiStretchRects(drects, ndrects, m_texture, shader);
}
UpdateValidity(total_rect);

View File

@ -1323,13 +1323,14 @@ void GSDeviceOGL::DrawMultiStretchRects(
const GSVector2 ds(static_cast<float>(dTex->GetWidth()), static_cast<float>(dTex->GetHeight()));
GSTexture* last_tex = rects[0].src;
bool last_linear = rects[0].linear;
u8 last_wmask = rects[0].wmask.wrgba;
u32 first = 0;
u32 count = 1;
for (u32 i = 1; i < num_rects; i++)
{
if (rects[i].src == last_tex && rects[i].linear == last_linear)
if (rects[i].src == last_tex && rects[i].linear == last_linear || rects[i].wmask.wrgba != last_wmask)
{
count++;
continue;
@ -1338,6 +1339,7 @@ void GSDeviceOGL::DrawMultiStretchRects(
DoMultiStretchRects(rects + first, count, ds);
last_tex = rects[i].src;
last_linear = rects[i].linear;
last_wmask = rects[i].wmask.wrgba;
first += count;
count = 1;
}
@ -1391,6 +1393,7 @@ void GSDeviceOGL::DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rec
PSSetShaderResource(0, rects[0].src);
PSSetSamplerState(rects[0].linear ? m_convert.ln : m_convert.pt);
OMSetColorMaskState(rects[0].wmask);
DrawIndexedPrimitive();
}

View File

@ -558,6 +558,7 @@ void GSDeviceVK::DrawMultiStretchRects(
{
GSTexture* last_tex = rects[0].src;
bool last_linear = rects[0].linear;
u8 last_wmask = rects[0].wmask.wrgba;
u32 first = 0;
u32 count = 1;
@ -577,7 +578,7 @@ void GSDeviceVK::DrawMultiStretchRects(
for (u32 i = 1; i < num_rects; i++)
{
if (rects[i].src == last_tex && rects[i].linear == last_linear)
if (rects[i].src == last_tex && rects[i].linear == last_linear && rects[i].wmask.wrgba == last_wmask)
{
count++;
continue;
@ -586,6 +587,7 @@ void GSDeviceVK::DrawMultiStretchRects(
DoMultiStretchRects(rects + first, count, static_cast<GSTextureVK*>(dTex), shader);
last_tex = rects[i].src;
last_linear = rects[i].linear;
last_wmask = rects[i].wmask.wrgba;
first += count;
count = 1;
}
@ -651,11 +653,14 @@ void GSDeviceVK::DoMultiStretchRects(
// Even though we're batching, a cmdbuffer submit could've messed this up.
const GSVector4i rc(dTex->GetRect());
OMSetRenderTargets(dTex, nullptr, rc, false);
OMSetRenderTargets(dTex->IsRenderTarget() ? dTex : nullptr, dTex->IsDepthStencil() ? dTex : nullptr, rc, false);
if (!InRenderPass() || !CheckRenderPassArea(rc))
BeginRenderPassForStretchRect(dTex, rc, rc, false);
SetUtilityTexture(rects[0].src, rects[0].linear ? m_linear_sampler : m_point_sampler);
SetPipeline(m_convert[static_cast<int>(shader)]);
pxAssert(shader == ShaderConvert::COPY || rects[0].wmask.wrgba == 0xf);
SetPipeline((rects[0].wmask.wrgba != 0xf) ? m_color_copy[rects[0].wmask.wrgba] : m_convert[static_cast<int>(shader)]);
if (ApplyUtilityState())
DrawIndexedPrimitive();
}