GS: Add GPU Target CLUT

This commit is contained in:
Stenzek 2023-01-02 23:14:10 +10:00 committed by refractionpcsx2
parent 0619555232
commit d30e076dbd
26 changed files with 518 additions and 68 deletions

View File

@ -23,7 +23,8 @@ cbuffer cb0 : register(b0)
float4 BGColor;
int EMODA;
int EMODC;
int cb0_pad[2];
int DOFFSET;
int cb0_pad;
};
static const float3x3 rgb2yuv =
@ -291,6 +292,41 @@ PS_OUTPUT ps_convert_rgba_8i(PS_INPUT input)
return output;
}
PS_OUTPUT ps_convert_clut_4(PS_INPUT input)
{
// Borrowing the YUV constant buffer.
float2 scale = BGColor.xy;
uint2 offset = uint2(uint(EMODA), uint(EMODC)) + uint(DOFFSET);
// CLUT4 is easy, just two rows of 8x8.
uint index = uint(input.p.x);
uint2 pos = uint2(index % 8u, index / 8u);
int2 final = int2(floor(float2(offset + pos) * scale));
PS_OUTPUT output;
output.c = Texture.Load(int3(final, 0), 0);
return output;
}
PS_OUTPUT ps_convert_clut_8(PS_INPUT input)
{
float2 scale = BGColor.xy;
uint2 offset = uint2(uint(EMODA), uint(EMODC));
uint index = min(uint(input.p.x) + uint(DOFFSET), 240u);
// CLUT is arranged into 8 groups of 16x2, with the top-right and bottom-left quadrants swapped.
// This can probably be done better..
uint subgroup = (index / 8u) % 4u;
uint2 pos;
pos.x = (index % 8u) + ((subgroup >= 2u) ? 8u : 0u);
pos.y = ((index / 32u) * 2u) + (subgroup % 2u);
int2 final = int2(floor(float2(offset + pos) * scale));
PS_OUTPUT output;
output.c = Texture.Load(int3(final, 0), 0);
return output;
}
PS_OUTPUT ps_yuv(PS_INPUT input)
{
PS_OUTPUT output;

View File

@ -314,6 +314,41 @@ void ps_hdr_resolve()
}
#endif
#ifdef ps_convert_clut_4
uniform uvec3 offset;
uniform vec2 scale;
void ps_convert_clut_4()
{
// CLUT4 is easy, just two rows of 8x8.
uint index = uint(gl_FragCoord.x) + offset.z;
uvec2 pos = uvec2(index % 8u, index / 8u);
ivec2 final = ivec2(floor(vec2(offset.xy + pos) * scale));
SV_Target0 = texelFetch(TextureSampler, final, 0);
}
#endif
#ifdef ps_convert_clut_8
uniform uvec3 offset;
uniform vec2 scale;
void ps_convert_clut_8()
{
uint index = min(uint(gl_FragCoord.x) + offset.z, 240u);
// CLUT is arranged into 8 groups of 16x2, with the top-right and bottom-left quadrants swapped.
// This can probably be done better..
uint subgroup = (index / 8u) % 4u;
uvec2 pos;
pos.x = (index % 8u) + ((subgroup >= 2u) ? 8u : 0u);
pos.y = ((index / 32u) * 2u) + (subgroup % 2u);
ivec2 final = ivec2(floor(vec2(offset.xy + pos) * scale));
SV_Target0 = texelFetch(TextureSampler, final, 0);
}
#endif
#ifdef ps_yuv
uniform ivec2 EMOD;

View File

@ -274,6 +274,49 @@ void ps_convert_rgba_8i()
}
#endif
#ifdef ps_convert_clut_4
layout(push_constant) uniform cb10
{
vec2 scale;
uvec2 offset;
uint doffset;
};
void ps_convert_clut_4()
{
// CLUT4 is easy, just two rows of 8x8.
uint index = uint(gl_FragCoord.x) + doffset;
uvec2 pos = uvec2(index % 8u, index / 8u);
ivec2 final = ivec2(floor(vec2(offset + pos) * scale));
o_col0 = texelFetch(samp0, final, 0);
}
#endif
#ifdef ps_convert_clut_8
layout(push_constant) uniform cb10
{
vec2 scale;
uvec2 offset;
uint doffset;
};
void ps_convert_clut_8()
{
uint index = min(uint(gl_FragCoord.x) + doffset, 240u);
// CLUT is arranged into 8 groups of 16x2, with the top-right and bottom-left quadrants swapped.
// This can probably be done better..
uint subgroup = (index / 8u) % 4u;
uvec2 pos;
pos.x = (index % 8u) + ((subgroup >= 2u) ? 8u : 0u);
pos.y = ((index / 32u) * 2u) + (subgroup % 2u);
ivec2 final = ivec2(floor(vec2(offset + pos) * scale));
o_col0 = texelFetch(samp0, final, 0);
}
#endif
#ifdef ps_yuv
layout(push_constant) uniform cb10
{

View File

@ -205,6 +205,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget*
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.halfScreenFix, "EmuCore/GS", "UserHacks_Half_Bottom_Override", -1, -1);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.cpuSpriteRenderBW, "EmuCore/GS", "UserHacks_CPUSpriteRenderBW", 0);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.cpuCLUTRender, "EmuCore/GS", "UserHacks_CPUCLUTRender", 0);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.gpuTargetCLUTMode, "EmuCore/GS", "UserHacks_GPUTargetCLUTMode", 0);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.skipDrawStart, "EmuCore/GS", "UserHacks_SkipDraw_Start", 0);
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.skipDrawEnd, "EmuCore/GS", "UserHacks_SkipDraw_End", 0);
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.hwAutoFlush, "EmuCore/GS", "UserHacks_AutoFlush", false);

View File

@ -920,14 +920,14 @@
</item>
</widget>
</item>
<item row="3" column="0">
<item row="4" column="0">
<widget class="QLabel" name="label_12">
<property name="text">
<string>Skipdraw Range:</string>
</property>
</widget>
</item>
<item row="3" column="1">
<item row="4" column="1">
<layout class="QHBoxLayout" name="horizontalLayout">
<item>
<widget class="QSpinBox" name="skipDrawStart">
@ -945,7 +945,7 @@
</item>
</layout>
</item>
<item row="4" column="0" colspan="2">
<item row="5" column="0" colspan="2">
<layout class="QGridLayout" name="gridLayout">
<item row="0" column="0">
<widget class="QCheckBox" name="hwAutoFlush">
@ -1030,6 +1030,32 @@
</property>
</widget>
</item>
<item row="3" column="0">
<widget class="QLabel" name="label_47">
<property name="text">
<string>GPU Target CLUT:</string>
</property>
</widget>
</item>
<item row="3" column="1">
<widget class="QComboBox" name="gpuTargetCLUTMode">
<item>
<property name="text">
<string>Disabled (Default)</string>
</property>
</item>
<item>
<property name="text">
<string>Enabled (Exact Match)</string>
</property>
</item>
<item>
<property name="text">
<string>Enabled (Check Inside Target)</string>
</property>
</item>
</widget>
</item>
</layout>
</widget>
<widget class="QGroupBox" name="upscalingFixesTab">

View File

@ -350,6 +350,13 @@ enum class GSCASMode : u8
SharpenAndResize,
};
enum class GSGPUTargetCLUTMode : u8
{
Disabled,
Enabled,
InsideTarget,
};
// Template function for casting enumerations to their underlying type
template <typename Enumeration>
typename std::underlying_type<Enumeration>::type enum_cast(Enumeration E)
@ -727,6 +734,7 @@ struct Pcsx2Config
int UserHacks_TCOffsetY{0};
int UserHacks_CPUSpriteRenderBW{0};
int UserHacks_CPUCLUTRender{ 0 };
GSGPUTargetCLUTMode UserHacks_GPUTargetCLUTMode{GSGPUTargetCLUTMode::Disabled};
TriFiltering TriFilter{TriFiltering::Automatic};
int OverrideTextureBarriers{-1};
int OverrideGeometryShaders{-1};

View File

@ -728,7 +728,8 @@ void GSUpdateConfig(const Pcsx2Config::GSOptions& new_config)
GSConfig.UserHacks_DisablePartialInvalidation != old_config.UserHacks_DisablePartialInvalidation ||
GSConfig.UserHacks_TextureInsideRt != old_config.UserHacks_TextureInsideRt ||
GSConfig.UserHacks_CPUSpriteRenderBW != old_config.UserHacks_CPUSpriteRenderBW ||
GSConfig.UserHacks_CPUCLUTRender != old_config.UserHacks_CPUCLUTRender)
GSConfig.UserHacks_CPUCLUTRender != old_config.UserHacks_CPUCLUTRender ||
GSConfig.UserHacks_GPUTargetCLUTMode != old_config.UserHacks_GPUTargetCLUTMode)
{
g_gs_renderer->PurgeTextureCache();
g_gs_renderer->PurgePool();

View File

@ -14,9 +14,11 @@
*/
#include "PrecompiledHeader.h"
#include "GSClut.h"
#include "GSLocalMemory.h"
#include "GSGL.h"
#include "GS/GSClut.h"
#include "GS/GSLocalMemory.h"
#include "GS/GSGL.h"
#include "GS/Renderers/Common/GSDevice.h"
#include "GS/Renderers/Common/GSRenderer.h"
#include "common/AlignedMalloc.h"
GSClut::GSClut(GSLocalMemory* mem)
@ -103,6 +105,11 @@ GSClut::GSClut(GSLocalMemory* mem)
GSClut::~GSClut()
{
if (m_gpu_clut4)
delete m_gpu_clut4;
if (m_gpu_clut8)
delete m_gpu_clut8;
_aligned_free(m_clut);
}
@ -381,6 +388,52 @@ void GSClut::Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
break;
}
}
m_current_gpu_clut = nullptr;
if (GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled)
{
const bool is_4bit = (TEX0.PSM == PSM_PSMT4 || TEX0.PSM == PSM_PSMT4HL || TEX0.PSM == PSM_PSMT4HH);
u32 CBW;
GSVector2i offset;
GSVector2i size;
if (!TEX0.CSM)
{
CBW = 0; // don't care
offset = {};
size.x = is_4bit ? 8 : 16;
size.y = is_4bit ? 2 : 16;
}
else
{
CBW = m_write.TEXCLUT.CBW;
offset.x = m_write.TEXCLUT.COU;
offset.y = m_write.TEXCLUT.COV;
size.x = is_4bit ? 16 : 256;
size.y = 1;
}
GSTexture* src = g_gs_renderer->LookupPaletteSource(TEX0.CBP, TEX0.CPSM, CBW, offset, size);
if (src)
{
GSTexture* dst = is_4bit ? m_gpu_clut4 : m_gpu_clut8;
u32 dst_size = is_4bit ? 16 : 256;
const u32 dOffset = (TEX0.CSA & ((TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S) ? 15u : 31u)) << 4;
if (!dst)
{
// allocate texture lazily
dst = g_gs_device->CreateRenderTarget(dst_size, 1, GSTexture::Format::Color, false);
is_4bit ? (m_gpu_clut4 = dst) : (m_gpu_clut8 = dst);
}
if (dst)
{
GL_PUSH("Update GPU CLUT [CBP=%04X, CPSM=%s, CBW=%u, CSA=%u, Offset=(%d,%d)]",
TEX0.CBP, psm_str(TEX0.CPSM), CBW, TEX0.CSA, offset.x, offset.y);
g_gs_device->UpdateCLUTTexture(src, offset.x, offset.y, dst, dOffset, dst_size);
m_current_gpu_clut = dst;
}
}
}
}
}

View File

@ -21,6 +21,7 @@
#include "GSAlignedClass.h"
class GSLocalMemory;
class GSTexture;
class alignas(32) GSClut final : public GSAlignedClass<32>
{
@ -55,6 +56,10 @@ class alignas(32) GSClut final : public GSAlignedClass<32>
bool IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
} m_read;
GSTexture* m_gpu_clut4 = nullptr;
GSTexture* m_gpu_clut8 = nullptr;
GSTexture* m_current_gpu_clut = nullptr;
typedef void (GSClut::*writeCLUT)(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
writeCLUT m_wc[2][16][64];
@ -101,6 +106,8 @@ public:
GSClut(GSLocalMemory* mem);
~GSClut();
__fi GSTexture* GetGPUTexture() const { return m_current_gpu_clut; }
bool InvalidateRange(u32 start_block, u32 end_block, bool is_draw = false);
u8 IsInvalid();
void ClearDrawInvalidity();

View File

@ -46,6 +46,8 @@ const char* shaderName(ShaderConvert value)
case ShaderConvert::RGB5A1_TO_FLOAT16_BILN: return "ps_convert_rgb5a1_float16_biln";
case ShaderConvert::DEPTH_COPY: return "ps_depth_copy";
case ShaderConvert::RGBA_TO_8I: return "ps_convert_rgba_8i";
case ShaderConvert::CLUT_4: return "ps_convert_clut_4";
case ShaderConvert::CLUT_8: return "ps_convert_clut_8";
case ShaderConvert::YUV: return "ps_yuv";
// clang-format on
default:

View File

@ -49,6 +49,8 @@ enum class ShaderConvert
RGB5A1_TO_FLOAT16_BILN,
DEPTH_COPY,
RGBA_TO_8I,
CLUT_4,
CLUT_8,
YUV,
Count
};
@ -834,6 +836,9 @@ public:
/// Performs a screen blit for display. If dTex is null, it assumes you are writing to the system framebuffer/swap chain.
virtual void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) {}
/// Updates a GPU CLUT texture from a source texture.
virtual void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) {}
virtual void RenderHW(GSHWDrawConfig& config) {}
__fi FeatureSupport Features() const { return m_features; }

View File

@ -954,6 +954,11 @@ void GSRenderer::PurgeTextureCache()
{
}
GSTexture* GSRenderer::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size)
{
return nullptr;
}
bool GSRenderer::SaveSnapshotToMemory(u32 window_width, u32 window_height, bool apply_aspect, bool crop_borders,
u32* width, u32* height, std::vector<u32>* pixels)
{

View File

@ -54,6 +54,8 @@ public:
virtual void PurgePool() override;
virtual void PurgeTextureCache();
virtual GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size);
bool SaveSnapshotToMemory(u32 window_width, u32 window_height, bool apply_aspect, bool crop_borders,
u32* width, u32* height, std::vector<u32>* pixels);

View File

@ -773,6 +773,24 @@ void GSDevice11::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
PSSetShaderResources(nullptr, nullptr);
}
void GSDevice11::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
{
// match merge cb
struct Uniforms
{
float scaleX, scaleY;
float pad1[2];
u32 offsetX, offsetY, dOffset;
u32 pad2;
};
const Uniforms cb = {sTex->GetScale().x, sTex->GetScale().y, 0.0f, 0.0f, offsetX, offsetY, dOffset};
m_ctx->UpdateSubresource(m_merge.cb.get(), 0, nullptr, &cb, 0, 0);
const GSVector4 dRect(0, 0, dSize, 1);
const ShaderConvert shader = (dSize == 16) ? ShaderConvert::CLUT_4 : ShaderConvert::CLUT_8;
StretchRect(sTex, GSVector4::zero(), dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), m_merge.cb.get(), nullptr, false);
}
void GSDevice11::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c)
{
const GSVector4 full_r(0.0f, 0.0f, 1.0f, 1.0f);

View File

@ -267,6 +267,7 @@ public:
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true);
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override;
void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm);

View File

@ -521,6 +521,24 @@ void GSDevice12::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
m_present[static_cast<int>(shader)].get(), linear);
}
void GSDevice12::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
{
struct Uniforms
{
float scaleX, scaleY;
float pad1[2];
u32 offsetX, offsetY, dOffset;
u32 pad2;
};
const Uniforms cb = {sTex->GetScale().x, sTex->GetScale().y, 0.0f, 0.0f, offsetX, offsetY, dOffset};
SetUtilityPushConstants(&cb, sizeof(cb));
const GSVector4 dRect(0, 0, dSize, 1);
const ShaderConvert shader = (dSize == 16) ? ShaderConvert::CLUT_4 : ShaderConvert::CLUT_8;
DoStretchRect(static_cast<GSTexture12*>(sTex), GSVector4::zero(), static_cast<GSTexture12*>(dTex), dRect,
m_convert[static_cast<int>(shader)].get(), false);
}
void GSDevice12::BeginRenderPassForStretchRect(GSTexture12* dTex, const GSVector4i& dtex_rc, const GSVector4i& dst_rc)
{
const bool is_whole_target = dst_rc.eq(dtex_rc);

View File

@ -264,6 +264,7 @@ public:
bool green, bool blue, bool alpha) override;
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
PresentShader shader, float shaderTime, bool linear) override;
void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void BeginRenderPassForStretchRect(GSTexture12* dTex, const GSVector4i& dtex_rc, const GSVector4i& dst_rc);
void DoStretchRect(GSTexture12* sTex, const GSVector4& sRect, GSTexture12* dTex, const GSVector4& dRect,

View File

@ -120,6 +120,11 @@ void GSRendererHW::PurgeTextureCache()
m_tc->RemoveAll();
}
GSTexture* GSRendererHW::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size)
{
return m_tc->LookupPaletteSource(CBP, CPSM, CBW, offset, size);
}
bool GSRendererHW::UpdateTexIsFB(GSTextureCache::Target* dst, const GIFRegTEX0& TEX0)
{
if (GSConfig.AccurateBlendingUnit == AccBlendLevel::Minimum || !g_gs_device->Features().texture_barrier)
@ -1406,11 +1411,12 @@ void GSRendererHW::Draw()
}
// SW CLUT Render enable.
if (GSConfig.UserHacks_CPUCLUTRender > 0)
bool preload = GSConfig.PreloadFrameWithGSData;
if (GSConfig.UserHacks_CPUCLUTRender > 0 || GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled)
{
bool result = (GSConfig.UserHacks_CPUCLUTRender == 1) ? PossibleCLUTDraw() : PossibleCLUTDrawAggressive();
const CLUTDrawTestResult result = (GSConfig.UserHacks_CPUCLUTRender == 2) ? PossibleCLUTDrawAggressive() : PossibleCLUTDraw();
m_mem.m_clut.ClearDrawInvalidity();
if (result)
if (result == CLUTDrawTestResult::CLUTDrawOnCPU && GSConfig.UserHacks_CPUCLUTRender > 0)
{
if (SwPrimRender(*this, true))
{
@ -1418,6 +1424,17 @@ void GSRendererHW::Draw()
return;
}
}
else if (result != CLUTDrawTestResult::NotCLUTDraw)
{
// Force enable preloading if any of the existing data is needed.
// e.g. NFSMW only writes the alpha channel, and needs the RGB preloaded.
if (((fm & fm_mask) != fm_mask) || // Some channels masked
!IsOpaque()) // Blending enabled
{
GL_INS("Forcing preload due to partial/blended CLUT draw");
preload = true;
}
}
}
if (m_channel_shuffle)
@ -1743,7 +1760,7 @@ void GSRendererHW::Draw()
GSTextureCache::Target* rt = nullptr;
if (!no_rt)
rt = m_tc->LookupTarget(TEX0, t_size, GSTextureCache::RenderTarget, true, fm);
rt = m_tc->LookupTarget(TEX0, t_size, GSTextureCache::RenderTarget, true, fm, false, 0, 0, preload);
TEX0.TBP0 = context->ZBUF.Block();
TEX0.TBW = context->FRAME.FBW;
@ -1751,7 +1768,7 @@ void GSRendererHW::Draw()
GSTextureCache::Target* ds = nullptr;
if (!no_ds)
ds = m_tc->LookupTarget(TEX0, t_size, GSTextureCache::DepthStencil, context->DepthWrite());
ds = m_tc->LookupTarget(TEX0, t_size, GSTextureCache::DepthStencil, context->DepthWrite(), 0, false, 0, 0, preload);
if (rt)
{
@ -3964,46 +3981,46 @@ void GSRendererHW::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
g_gs_device->RenderHW(m_conf);
}
bool GSRendererHW::PossibleCLUTDraw()
GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw()
{
// No shuffles.
if (m_channel_shuffle || m_texture_shuffle)
return false;
return CLUTDrawTestResult::NotCLUTDraw;
// Keep the draws simple, no alpha testing, blending, mipmapping, Z writes, and make sure it's flat.
const bool fb_only = m_context->TEST.ATE && m_context->TEST.AFAIL == 1 && m_context->TEST.ATST == ATST_NEVER;
// No Z writes, unless it's points, then it's quite likely to be a palette and they left it on.
if (!m_context->ZBUF.ZMSK && !fb_only && !(m_vt.m_primclass == GS_POINT_CLASS))
return false;
return CLUTDrawTestResult::NotCLUTDraw;
// Make sure it's flat.
if (m_vt.m_eq.z != 0x1)
return false;
return CLUTDrawTestResult::NotCLUTDraw;
// No mipmapping, please never be any mipmapping...
if (m_context->TEX1.MXL)
return false;
return CLUTDrawTestResult::NotCLUTDraw;
// Writing to the framebuffer for output. We're not interested. - Note: This stops NFS HP2 Busted screens working, but they're glitchy anyway
// what NFS HP2 really needs is a kind of shuffle with mask, 32bit target is interpreted as 16bit and masked.
if ((m_regs->DISP[0].DISPFB.Block() == m_context->FRAME.Block()) || (m_regs->DISP[1].DISPFB.Block() == m_context->FRAME.Block()) ||
(PRIM->TME && ((m_regs->DISP[0].DISPFB.Block() == m_context->TEX0.TBP0) || (m_regs->DISP[1].DISPFB.Block() == m_context->TEX0.TBP0)) && !(m_mem.m_clut.IsInvalid() & 2)))
return false;
return CLUTDrawTestResult::NotCLUTDraw;
// Ignore recursive/shuffle effects, but possible it will recursively draw, but make sure it's staying in page width
if (PRIM->TME && m_context->TEX0.TBP0 == m_context->FRAME.Block() && (m_context->FRAME.FBW != 1 && m_context->TEX0.TBW == m_context->FRAME.FBW))
return false;
return CLUTDrawTestResult::NotCLUTDraw;
// Hopefully no games draw a CLUT with a CLUT, that would be evil, most likely a channel shuffle.
if (PRIM->TME && GSLocalMemory::m_psm[m_context->TEX0.PSM].pal > 0)
return false;
return CLUTDrawTestResult::NotCLUTDraw;
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_context->FRAME.PSM];
// Make sure the CLUT formats are matching.
if (GSLocalMemory::m_psm[m_mem.m_clut.GetCLUTCPSM()].bpp != psm.bpp)
return false;
return CLUTDrawTestResult::NotCLUTDraw;
// Max size for a CLUT/Current page size.
constexpr float min_clut_width = 7.0f;
@ -4013,7 +4030,7 @@ bool GSRendererHW::PossibleCLUTDraw()
// If the coordinates aren't starting within the page, it's likely not a CLUT draw.
if (floor(m_vt.m_min.p.x) < 0 || floor(m_vt.m_min.p.y) < 0 || floor(m_vt.m_min.p.x) > page_width || floor(m_vt.m_min.p.y) > page_height)
return false;
return CLUTDrawTestResult::NotCLUTDraw;
// Make sure it's a division of 8 in width to avoid bad draws. Points will go from 0-7 inclusive, but sprites etc will do 0-16 exclusive.
int draw_divder_match = false;
@ -4035,13 +4052,36 @@ bool GSRendererHW::PossibleCLUTDraw()
// Make sure the draw hits the next CLUT and it's marked as invalid (kind of a sanity check).
// We can also allow draws which are of a sensible size within the page, as they could also be CLUT draws (or gradients for the CLUT).
if (!valid_size)
return false;
return CLUTDrawTestResult::NotCLUTDraw;
if (PRIM->TME)
{
// If we're using a texture to draw our CLUT/whatever, we need the GPU to write back dirty data we need.
const GSVector4i r = GetTextureMinMax(m_context->TEX0, m_context->CLAMP, m_vt.IsLinear()).coverage;
// If we have GPU CLUT enabled, don't do a CPU draw when it would result in a download.
if (GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled)
{
GSTextureCache::Target* tgt = m_tc->GetExactTarget(m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM);
if (tgt)
{
bool is_dirty = false;
for (const GSDirtyRect& rc : tgt->m_dirty)
{
if (!rc.GetDirtyRect(m_context->TEX0).rintersect(r).rempty())
{
is_dirty = true;
break;
}
}
if (!is_dirty)
{
GL_INS("GPU clut is enabled and this draw would readback, leaving on GPU");
return CLUTDrawTestResult::CLUTDrawOnGPU;
}
}
}
GIFRegBITBLTBUF BITBLTBUF;
BITBLTBUF.SBP = m_context->TEX0.TBP0;
BITBLTBUF.SBW = m_context->TEX0.TBW;
@ -4054,41 +4094,41 @@ bool GSRendererHW::PossibleCLUTDraw()
//const u32 endbp = psm.info.bn(m_vt.m_max.p.x, m_vt.m_max.p.y, m_context->FRAME.Block(), m_context->FRAME.FBW);
//DevCon.Warning("Draw width %f height %f page width %f height %f TPSM %x TBP0 %x FPSM %x FBP %x CBP %x valid size %d Invalid %d DISPFB0 %x DISPFB1 %x start %x end %x draw %d", draw_width, draw_height, page_width, page_height, m_context->TEX0.PSM, m_context->TEX0.TBP0, m_context->FRAME.PSM, m_context->FRAME.Block(), m_mem.m_clut.GetCLUTCBP(), valid_size, m_mem.m_clut.IsInvalid(), m_regs->DISP[0].DISPFB.Block(), m_regs->DISP[1].DISPFB.Block(), startbp, endbp, s_n);
return true;
return CLUTDrawTestResult::CLUTDrawOnCPU;
}
// Slight more aggressive version that kinda YOLO's it if the draw is anywhere near the CLUT or is point/line (providing it's not too wide of a draw and a few other parameters.
// This is pretty much tuned for the Sega Model 2 games, which draw a huge gradient, then pick lines out of it to make up CLUT's for about 4000 draws...
bool GSRendererHW::PossibleCLUTDrawAggressive()
GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDrawAggressive()
{
// Avoid any shuffles.
if (m_channel_shuffle || m_texture_shuffle)
return false;
return CLUTDrawTestResult::NotCLUTDraw;
// Keep the draws simple, no alpha testing, blending, mipmapping, Z writes, and make sure it's flat.
if (m_context->TEST.ATE)
return false;
return CLUTDrawTestResult::NotCLUTDraw;
if (PRIM->ABE)
return false;
return CLUTDrawTestResult::NotCLUTDraw;
if (m_context->TEX1.MXL)
return false;
return CLUTDrawTestResult::NotCLUTDraw;
if (m_context->FRAME.FBW != 1)
return false;
return CLUTDrawTestResult::NotCLUTDraw;
if (!m_context->ZBUF.ZMSK)
return false;
return CLUTDrawTestResult::NotCLUTDraw;
if (m_vt.m_eq.z != 0x1)
return false;
return CLUTDrawTestResult::NotCLUTDraw;
if (!((m_vt.m_primclass == GS_POINT_CLASS || m_vt.m_primclass == GS_LINE_CLASS) || ((m_mem.m_clut.GetCLUTCBP() >> 5) >= m_context->FRAME.FBP && (m_context->FRAME.FBP + 1U) >= (m_mem.m_clut.GetCLUTCBP() >> 5) && m_vt.m_primclass == GS_SPRITE_CLASS)))
return false;
return CLUTDrawTestResult::NotCLUTDraw;
// Avoid invalidating anything here, we just want to avoid the thing being drawn on the GPU.
return true;
return CLUTDrawTestResult::CLUTDrawOnCPU;
}
bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex)

View File

@ -66,8 +66,15 @@ private:
void SwSpriteRender();
bool CanUseSwSpriteRender();
bool PossibleCLUTDraw();
bool PossibleCLUTDrawAggressive();
enum class CLUTDrawTestResult
{
NotCLUTDraw,
CLUTDrawOnCPU,
CLUTDrawOnGPU,
};
CLUTDrawTestResult PossibleCLUTDraw();
CLUTDrawTestResult PossibleCLUTDrawAggressive();
bool CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex);
bool (*SwPrimRender)(GSRendererHW&, bool invalidate_tc);
@ -153,6 +160,7 @@ public:
void Draw() override;
void PurgeTextureCache() override;
GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size) override;
// Called by the texture cache to know if current texture is useful
bool UpdateTexIsFB(GSTextureCache::Target* src, const GIFRegTEX0& TEX0);

View File

@ -224,12 +224,13 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, const GSVector2i* lod)
{
GL_CACHE("TC: Lookup Source <%d,%d => %d,%d> (0x%x, %s, BW: %u)", r.x, r.y, r.z, r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW);
GL_CACHE("TC: Lookup Source <%d,%d => %d,%d> (0x%x, %s, BW: %u, CBP: 0x%x)", r.x, r.y, r.z, r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW, TEX0.CBP);
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
//const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm;
const u32* clut = g_gs_renderer->m_mem.m_clut;
const u32* const clut = g_gs_renderer->m_mem.m_clut;
GSTexture* const gpu_clut = (psm_s.pal > 0) ? g_gs_renderer->m_mem.m_clut.GetGPUTexture() : nullptr;
Source* src = NULL;
@ -246,16 +247,25 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
// Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check
if (!s->m_target)
{
// We request a palette texture (psm_s.pal). If the texture was
// converted by the CPU (!s->m_palette), we need to ensure
// palette content is the same.
if (psm_s.pal > 0 && !s->m_palette && !s->ClutMatch({clut, psm_s.pal}))
continue;
if (psm_s.pal > 0)
{
// If we're doing GPU CLUT, we don't want to use the CPU-converted version.
if (gpu_clut && !s->m_palette)
continue;
// We request a 24/16 bit RGBA texture. Alpha expansion was done by
// the CPU. We need to check that TEXA is identical
if (psm_s.pal == 0 && psm_s.fmt > 0 && s->m_TEXA.U64 != TEXA.U64)
continue;
// We request a palette texture (psm_s.pal). If the texture was
// converted by the CPU (!s->m_palette), we need to ensure
// palette content is the same.
if (!s->m_palette && !s->ClutMatch({ clut, psm_s.pal }))
continue;
}
else
{
// We request a 24/16 bit RGBA texture. Alpha expansion was done by
// the CPU. We need to check that TEXA is identical
if (psm_s.fmt > 0 && s->m_TEXA.U64 != TEXA.U64)
continue;
}
// Same base mip texture, but we need to check that MXL was the same as well.
// When mipmapping is off, this will be 0,0 vs 0,0.
@ -404,9 +414,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
}
}
bool new_source = false;
if (src == NULL)
if (!src)
{
#ifdef ENABLE_OGL_DEBUG
if (dst)
@ -425,8 +433,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
GL_CACHE("TC: src miss (0x%x, 0x%x, %s)", TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, psm_str(TEX0.PSM));
}
#endif
src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r);
new_source = true;
src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut);
}
else
{
@ -434,11 +441,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
src->m_texture ? src->m_texture->GetID() : 0,
TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0,
psm_str(TEX0.PSM));
}
if (src->m_palette && !new_source && !src->ClutMatch({clut, psm_s.pal}))
{
AttachPaletteToSource(src, psm_s.pal, true);
if (gpu_clut)
AttachPaletteToSource(src, gpu_clut);
else if (src->m_palette && (!src->m_palette_obj || !src->ClutMatch({clut, psm_s.pal})))
AttachPaletteToSource(src, psm_s.pal, true);
}
src->Update(r);
@ -448,7 +455,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
return src;
}
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask, const bool is_frame, const int real_w, const int real_h)
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask, const bool is_frame, const int real_w, const int real_h, bool preload)
{
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
const GSVector2& new_s = static_cast<GSRendererHW*>(g_gs_renderer.get())->GetTextureScaleFactor();
@ -656,7 +663,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con
// From a performance point of view, it might cost a little on big upscaling
// but normally few RT are miss so it must remain reasonable.
bool supported_fmt = !GSConfig.UserHacks_DisableDepthSupport || psm_s.depth == 0;
if (GSConfig.PreloadFrameWithGSData && TEX0.TBW > 0 && supported_fmt)
if (preload && TEX0.TBW > 0 && supported_fmt)
{
GL_INS("Preloading the RT DATA");
// RT doesn't have height but if we use a too big value, we will read outside of the GS memory.
@ -1726,7 +1733,7 @@ void GSTextureCache::IncAge()
}
//Fixme: Several issues in here. Not handling depth stencil, pitch conversion doesnt work.
GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range)
GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut)
{
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
Source* src = new Source(TEX0, TEXA, false);
@ -2042,28 +2049,33 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
else
{
// maintain the clut even when paltex is on for the dump/replacement texture lookup
bool paltex = (GSConfig.GPUPaletteConversion && psm.pal > 0);
bool paltex = (GSConfig.GPUPaletteConversion && psm.pal > 0) || gpu_clut;
const u32* clut = (psm.pal > 0) ? static_cast<const u32*>(g_gs_renderer->m_mem.m_clut) : nullptr;
// try the hash cache
if ((src->m_from_hash_cache = LookupHashCache(TEX0, TEXA, paltex, clut, lod)) != nullptr)
{
src->m_texture = src->m_from_hash_cache->texture;
if (psm.pal > 0)
if (gpu_clut)
AttachPaletteToSource(src, gpu_clut);
else if (psm.pal > 0)
AttachPaletteToSource(src, psm.pal, paltex);
}
else if (paltex)
{
src->m_texture = g_gs_device->CreateTexture(tw, th, tlevels, GSTexture::Format::UNorm8);
AttachPaletteToSource(src, psm.pal, true);
if (gpu_clut)
AttachPaletteToSource(src, gpu_clut);
else
AttachPaletteToSource(src, psm.pal, true);
}
else
{
src->m_texture = g_gs_device->CreateTexture(tw, th, tlevels, GSTexture::Format::Color);
if (psm.pal > 0)
{
if (gpu_clut)
AttachPaletteToSource(src, gpu_clut);
else if (psm.pal > 0)
AttachPaletteToSource(src, psm.pal, false);
}
}
}
@ -2243,6 +2255,71 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int
return t;
}
GSTexture* GSTextureCache::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size)
{
for (auto t : m_dst[RenderTarget])
{
if (!t->m_used)
continue;
GSVector2i this_offset;
if (t->m_TEX0.TBP0 == CBP)
{
// Exact match, this one's likely fine, unless the format is different.
if (t->m_TEX0.PSM != CPSM || (CBW != 0 && t->m_TEX0.TBW != CBW))
continue;
GL_INS("Exact match on BP 0x%04x BW %u", t->m_TEX0.CBP, t->m_TEX0.TBW);
this_offset.x = 0;
this_offset.y = 0;
}
else if (GSConfig.UserHacks_GPUTargetCLUTMode == GSGPUTargetCLUTMode::InsideTarget &&
t->m_TEX0.TBP0 < CBP && t->m_end_block >= CBP)
{
// Somewhere within this target, can we find it?
const GSVector4i rc(0, 0, size.x, size.y);
SurfaceOffset so = ComputeSurfaceOffset(CBP, std::max<u32>(CBW, 0), CPSM, rc, t);
if (!so.is_valid)
continue;
GL_INS("Match inside RT at BP 0x%04X-0x%04X BW %u", t->m_TEX0.TBP0, t->m_end_block, t->m_TEX0.TBW);
this_offset.x = so.b2a_offset.left;
this_offset.y = so.b2a_offset.top;
}
else
{
// Not inside this target, skip.
continue;
}
// Make sure the clut isn't in an area of the target where the EE has overwritten it.
// Otherwise, we'll be using stale data on the CPU.
if (!t->m_dirty.empty())
{
GL_INS("Candidate is dirty, checking");
const GSVector4i clut_rc(this_offset.x, this_offset.y, this_offset.x + size.x, this_offset.y + size.y);
bool is_dirty = false;
for (const GSDirtyRect& dirty : t->m_dirty)
{
if (!dirty.GetDirtyRect(t->m_TEX0).rintersect(clut_rc).rempty())
{
GL_INS("Dirty rectangle overlaps CLUT rectangle, skipping");
is_dirty = true;
break;
}
}
if (is_dirty)
continue;
}
offset = this_offset;
return t->m_texture;
}
return nullptr;
}
void GSTextureCache::Read(Target* t, const GSVector4i& r)
{
if (!t->m_dirty.empty() || r.width() == 0 || r.height() == 0)
@ -2980,6 +3057,12 @@ void GSTextureCache::AttachPaletteToSource(Source* s, u16 pal, bool need_gs_text
s->m_palette = need_gs_texture ? s->m_palette_obj->GetPaletteGSTexture() : nullptr;
}
void GSTextureCache::AttachPaletteToSource(Source* s, GSTexture* gpu_clut)
{
s->m_palette_obj = nullptr;
s->m_palette = gpu_clut;
}
GSTextureCache::SurfaceOffset GSTextureCache::ComputeSurfaceOffset(const GSOffset& off, const GSVector4i& r, const Target* t)
{
// Computes offset from Target to offset+rectangle in Target coords.

View File

@ -308,7 +308,7 @@ protected:
std::unordered_map<SurfaceOffsetKey, SurfaceOffset, SurfaceOffsetKeyHash, SurfaceOffsetKeyEqual> m_surface_offset_cache;
Source* m_temporary_source = nullptr; // invalidated after the draw
Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL, bool half_right = false, int x_offset = 0, int y_offset = 0, const GSVector2i* lod = nullptr, const GSVector4i* src_range = nullptr);
Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut);
Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type, const bool clear);
/// Expands a target when the block pointer for a display framebuffer is within another target, but the read offset
@ -337,10 +337,12 @@ public:
void RemovePartial();
void AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm, u32 bw);
GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size);
Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, const GSVector2i* lod);
Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette = false);
Target* LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask = 0, const bool is_frame = false, const int real_w = 0, const int real_h = 0);
Target* LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask = 0, const bool is_frame = false, const int real_w = 0, const int real_h = 0, bool preload = GSConfig.PreloadFrameWithGSData);
Target* LookupDisplayTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, const int real_w, const int real_h);
/// Looks up a target in the cache, and only returns it if the BP/BW/PSM match exactly.
@ -367,6 +369,7 @@ public:
void PrintMemoryUsage();
void AttachPaletteToSource(Source* s, u16 pal, bool need_gs_texture);
void AttachPaletteToSource(Source* s, GSTexture* gpu_clut);
SurfaceOffset ComputeSurfaceOffset(const GSOffset& off, const GSVector4i& r, const Target* t);
SurfaceOffset ComputeSurfaceOffset(const uint32_t bp, const uint32_t bw, const uint32_t psm, const GSVector4i& r, const Target* t);
SurfaceOffset ComputeSurfaceOffset(const SurfaceOffsetKey& sok);

View File

@ -295,7 +295,14 @@ bool GSDeviceOGL::Create()
m_convert.ps[i].SetFormattedName("Convert pipe %s", name);
if (static_cast<ShaderConvert>(i) == ShaderConvert::YUV)
{
m_convert.ps[i].RegisterUniform("EMOD");
}
else if (static_cast<ShaderConvert>(i) == ShaderConvert::CLUT_4 || static_cast<ShaderConvert>(i) == ShaderConvert::CLUT_8)
{
m_convert.ps[i].RegisterUniform("offset");
m_convert.ps[i].RegisterUniform("scale");
}
}
const PSSamplerSelector point;
@ -1278,6 +1285,30 @@ void GSDeviceOGL::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
EndScene();
}
void GSDeviceOGL::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
{
BeginScene();
const ShaderConvert shader = (dSize == 16) ? ShaderConvert::CLUT_4 : ShaderConvert::CLUT_8;
GL::Program& prog = m_convert.ps[static_cast<int>(shader)];
prog.Bind();
prog.Uniform3ui(0, offsetX, offsetY, dOffset);
prog.Uniform2f(1, sTex->GetScale().x, sTex->GetScale().y);
OMSetDepthStencilState(m_convert.dss);
OMSetBlendState(false);
OMSetColorMaskState();
OMSetRenderTargets(dTex, nullptr);
PSSetShaderResource(0, sTex);
PSSetSamplerState(m_convert.pt);
const GSVector4 dRect(0, 0, dSize, 1);
DrawStretchRect(GSVector4::zero(), dRect, dTex->GetSize());
EndScene();
}
void GSDeviceOGL::DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect, const GSVector2i& ds)
{
// Original code from DX

View File

@ -334,6 +334,7 @@ public:
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) final;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GL::Program& ps, bool alpha_blend, OMColorMaskSelector cms, bool linear = true);
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) final;
void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) final;
void RenderHW(GSHWDrawConfig& config) final;
void SendHWDraw(const GSHWDrawConfig& config, bool needs_barrier);

View File

@ -763,6 +763,23 @@ void GSDeviceVK::BlitRect(GSTexture* sTex, const GSVector4i& sRect, u32 sLevel,
&ib, linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST);
}
void GSDeviceVK::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
{
struct Uniforms
{
float scaleX, scaleY;
u32 offsetX, offsetY, dOffset;
};
const Uniforms uniforms = {sTex->GetScale().x, sTex->GetScale().y, offsetX, offsetY, dOffset};
SetUtilityPushConstants(&uniforms, sizeof(uniforms));
const GSVector4 dRect(0, 0, dSize, 1);
const ShaderConvert shader = (dSize == 16) ? ShaderConvert::CLUT_4 : ShaderConvert::CLUT_8;
DoStretchRect(static_cast<GSTextureVK*>(sTex), GSVector4::zero(), static_cast<GSTextureVK*>(dTex), dRect,
m_convert[static_cast<int>(shader)], false);
}
void GSDeviceVK::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect,
const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c)
{

View File

@ -251,6 +251,8 @@ public:
void BlitRect(GSTexture* sTex, const GSVector4i& sRect, u32 sLevel, GSTexture* dTex, const GSVector4i& dRect,
u32 dLevel, bool linear);
void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void SetupDATE(GSTexture* rt, GSTexture* ds, bool datm, const GSVector4i& bbox);
GSTextureVK* SetupPrimitiveTrackingDATE(GSHWDrawConfig& config);

View File

@ -505,6 +505,7 @@ bool Pcsx2Config::GSOptions::OptionsAreEqual(const GSOptions& right) const
OpEqu(UserHacks_TCOffsetY) &&
OpEqu(UserHacks_CPUSpriteRenderBW) &&
OpEqu(UserHacks_CPUCLUTRender) &&
OpEqu(UserHacks_GPUTargetCLUTMode) &&
OpEqu(OverrideTextureBarriers) &&
OpEqu(OverrideGeometryShaders) &&
@ -681,6 +682,7 @@ void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap)
GSSettingIntEx(UserHacks_TCOffsetY, "UserHacks_TCOffsetY");
GSSettingIntEx(UserHacks_CPUSpriteRenderBW, "UserHacks_CPUSpriteRenderBW");
GSSettingIntEx(UserHacks_CPUCLUTRender, "UserHacks_CPUCLUTRender");
GSSettingIntEnumEx(UserHacks_GPUTargetCLUTMode, "UserHacks_GPUTargetCLUTMode");
GSSettingIntEnumEx(TriFilter, "TriFilter");
GSSettingIntEx(OverrideTextureBarriers, "OverrideTextureBarriers");
GSSettingIntEx(OverrideGeometryShaders, "OverrideGeometryShaders");
@ -746,6 +748,7 @@ void Pcsx2Config::GSOptions::MaskUserHacks()
UserHacks_TCOffsetY = 0;
UserHacks_CPUSpriteRenderBW = 0;
UserHacks_CPUCLUTRender = 0;
UserHacks_GPUTargetCLUTMode = GSGPUTargetCLUTMode::Disabled;
SkipDrawStart = 0;
SkipDrawEnd = 0;
}