mirror of https://github.com/PCSX2/pcsx2.git
GS: Add GPU Target CLUT
This commit is contained in:
parent
0619555232
commit
d30e076dbd
|
@ -23,7 +23,8 @@ cbuffer cb0 : register(b0)
|
||||||
float4 BGColor;
|
float4 BGColor;
|
||||||
int EMODA;
|
int EMODA;
|
||||||
int EMODC;
|
int EMODC;
|
||||||
int cb0_pad[2];
|
int DOFFSET;
|
||||||
|
int cb0_pad;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const float3x3 rgb2yuv =
|
static const float3x3 rgb2yuv =
|
||||||
|
@ -291,6 +292,41 @@ PS_OUTPUT ps_convert_rgba_8i(PS_INPUT input)
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PS_OUTPUT ps_convert_clut_4(PS_INPUT input)
|
||||||
|
{
|
||||||
|
// Borrowing the YUV constant buffer.
|
||||||
|
float2 scale = BGColor.xy;
|
||||||
|
uint2 offset = uint2(uint(EMODA), uint(EMODC)) + uint(DOFFSET);
|
||||||
|
|
||||||
|
// CLUT4 is easy, just two rows of 8x8.
|
||||||
|
uint index = uint(input.p.x);
|
||||||
|
uint2 pos = uint2(index % 8u, index / 8u);
|
||||||
|
|
||||||
|
int2 final = int2(floor(float2(offset + pos) * scale));
|
||||||
|
PS_OUTPUT output;
|
||||||
|
output.c = Texture.Load(int3(final, 0), 0);
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
PS_OUTPUT ps_convert_clut_8(PS_INPUT input)
|
||||||
|
{
|
||||||
|
float2 scale = BGColor.xy;
|
||||||
|
uint2 offset = uint2(uint(EMODA), uint(EMODC));
|
||||||
|
uint index = min(uint(input.p.x) + uint(DOFFSET), 240u);
|
||||||
|
|
||||||
|
// CLUT is arranged into 8 groups of 16x2, with the top-right and bottom-left quadrants swapped.
|
||||||
|
// This can probably be done better..
|
||||||
|
uint subgroup = (index / 8u) % 4u;
|
||||||
|
uint2 pos;
|
||||||
|
pos.x = (index % 8u) + ((subgroup >= 2u) ? 8u : 0u);
|
||||||
|
pos.y = ((index / 32u) * 2u) + (subgroup % 2u);
|
||||||
|
|
||||||
|
int2 final = int2(floor(float2(offset + pos) * scale));
|
||||||
|
PS_OUTPUT output;
|
||||||
|
output.c = Texture.Load(int3(final, 0), 0);
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
PS_OUTPUT ps_yuv(PS_INPUT input)
|
PS_OUTPUT ps_yuv(PS_INPUT input)
|
||||||
{
|
{
|
||||||
PS_OUTPUT output;
|
PS_OUTPUT output;
|
||||||
|
|
|
@ -314,6 +314,41 @@ void ps_hdr_resolve()
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef ps_convert_clut_4
|
||||||
|
uniform uvec3 offset;
|
||||||
|
uniform vec2 scale;
|
||||||
|
|
||||||
|
void ps_convert_clut_4()
|
||||||
|
{
|
||||||
|
// CLUT4 is easy, just two rows of 8x8.
|
||||||
|
uint index = uint(gl_FragCoord.x) + offset.z;
|
||||||
|
uvec2 pos = uvec2(index % 8u, index / 8u);
|
||||||
|
|
||||||
|
ivec2 final = ivec2(floor(vec2(offset.xy + pos) * scale));
|
||||||
|
SV_Target0 = texelFetch(TextureSampler, final, 0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ps_convert_clut_8
|
||||||
|
uniform uvec3 offset;
|
||||||
|
uniform vec2 scale;
|
||||||
|
|
||||||
|
void ps_convert_clut_8()
|
||||||
|
{
|
||||||
|
uint index = min(uint(gl_FragCoord.x) + offset.z, 240u);
|
||||||
|
|
||||||
|
// CLUT is arranged into 8 groups of 16x2, with the top-right and bottom-left quadrants swapped.
|
||||||
|
// This can probably be done better..
|
||||||
|
uint subgroup = (index / 8u) % 4u;
|
||||||
|
uvec2 pos;
|
||||||
|
pos.x = (index % 8u) + ((subgroup >= 2u) ? 8u : 0u);
|
||||||
|
pos.y = ((index / 32u) * 2u) + (subgroup % 2u);
|
||||||
|
|
||||||
|
ivec2 final = ivec2(floor(vec2(offset.xy + pos) * scale));
|
||||||
|
SV_Target0 = texelFetch(TextureSampler, final, 0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef ps_yuv
|
#ifdef ps_yuv
|
||||||
uniform ivec2 EMOD;
|
uniform ivec2 EMOD;
|
||||||
|
|
||||||
|
|
|
@ -274,6 +274,49 @@ void ps_convert_rgba_8i()
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef ps_convert_clut_4
|
||||||
|
layout(push_constant) uniform cb10
|
||||||
|
{
|
||||||
|
vec2 scale;
|
||||||
|
uvec2 offset;
|
||||||
|
uint doffset;
|
||||||
|
};
|
||||||
|
|
||||||
|
void ps_convert_clut_4()
|
||||||
|
{
|
||||||
|
// CLUT4 is easy, just two rows of 8x8.
|
||||||
|
uint index = uint(gl_FragCoord.x) + doffset;
|
||||||
|
uvec2 pos = uvec2(index % 8u, index / 8u);
|
||||||
|
|
||||||
|
ivec2 final = ivec2(floor(vec2(offset + pos) * scale));
|
||||||
|
o_col0 = texelFetch(samp0, final, 0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ps_convert_clut_8
|
||||||
|
layout(push_constant) uniform cb10
|
||||||
|
{
|
||||||
|
vec2 scale;
|
||||||
|
uvec2 offset;
|
||||||
|
uint doffset;
|
||||||
|
};
|
||||||
|
|
||||||
|
void ps_convert_clut_8()
|
||||||
|
{
|
||||||
|
uint index = min(uint(gl_FragCoord.x) + doffset, 240u);
|
||||||
|
|
||||||
|
// CLUT is arranged into 8 groups of 16x2, with the top-right and bottom-left quadrants swapped.
|
||||||
|
// This can probably be done better..
|
||||||
|
uint subgroup = (index / 8u) % 4u;
|
||||||
|
uvec2 pos;
|
||||||
|
pos.x = (index % 8u) + ((subgroup >= 2u) ? 8u : 0u);
|
||||||
|
pos.y = ((index / 32u) * 2u) + (subgroup % 2u);
|
||||||
|
|
||||||
|
ivec2 final = ivec2(floor(vec2(offset + pos) * scale));
|
||||||
|
o_col0 = texelFetch(samp0, final, 0);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef ps_yuv
|
#ifdef ps_yuv
|
||||||
layout(push_constant) uniform cb10
|
layout(push_constant) uniform cb10
|
||||||
{
|
{
|
||||||
|
|
|
@ -205,6 +205,7 @@ GraphicsSettingsWidget::GraphicsSettingsWidget(SettingsDialog* dialog, QWidget*
|
||||||
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.halfScreenFix, "EmuCore/GS", "UserHacks_Half_Bottom_Override", -1, -1);
|
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.halfScreenFix, "EmuCore/GS", "UserHacks_Half_Bottom_Override", -1, -1);
|
||||||
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.cpuSpriteRenderBW, "EmuCore/GS", "UserHacks_CPUSpriteRenderBW", 0);
|
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.cpuSpriteRenderBW, "EmuCore/GS", "UserHacks_CPUSpriteRenderBW", 0);
|
||||||
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.cpuCLUTRender, "EmuCore/GS", "UserHacks_CPUCLUTRender", 0);
|
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.cpuCLUTRender, "EmuCore/GS", "UserHacks_CPUCLUTRender", 0);
|
||||||
|
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.gpuTargetCLUTMode, "EmuCore/GS", "UserHacks_GPUTargetCLUTMode", 0);
|
||||||
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.skipDrawStart, "EmuCore/GS", "UserHacks_SkipDraw_Start", 0);
|
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.skipDrawStart, "EmuCore/GS", "UserHacks_SkipDraw_Start", 0);
|
||||||
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.skipDrawEnd, "EmuCore/GS", "UserHacks_SkipDraw_End", 0);
|
SettingWidgetBinder::BindWidgetToIntSetting(sif, m_ui.skipDrawEnd, "EmuCore/GS", "UserHacks_SkipDraw_End", 0);
|
||||||
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.hwAutoFlush, "EmuCore/GS", "UserHacks_AutoFlush", false);
|
SettingWidgetBinder::BindWidgetToBoolSetting(sif, m_ui.hwAutoFlush, "EmuCore/GS", "UserHacks_AutoFlush", false);
|
||||||
|
|
|
@ -920,14 +920,14 @@
|
||||||
</item>
|
</item>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="3" column="0">
|
<item row="4" column="0">
|
||||||
<widget class="QLabel" name="label_12">
|
<widget class="QLabel" name="label_12">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
<string>Skipdraw Range:</string>
|
<string>Skipdraw Range:</string>
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
<item row="3" column="1">
|
<item row="4" column="1">
|
||||||
<layout class="QHBoxLayout" name="horizontalLayout">
|
<layout class="QHBoxLayout" name="horizontalLayout">
|
||||||
<item>
|
<item>
|
||||||
<widget class="QSpinBox" name="skipDrawStart">
|
<widget class="QSpinBox" name="skipDrawStart">
|
||||||
|
@ -945,7 +945,7 @@
|
||||||
</item>
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</item>
|
</item>
|
||||||
<item row="4" column="0" colspan="2">
|
<item row="5" column="0" colspan="2">
|
||||||
<layout class="QGridLayout" name="gridLayout">
|
<layout class="QGridLayout" name="gridLayout">
|
||||||
<item row="0" column="0">
|
<item row="0" column="0">
|
||||||
<widget class="QCheckBox" name="hwAutoFlush">
|
<widget class="QCheckBox" name="hwAutoFlush">
|
||||||
|
@ -1030,6 +1030,32 @@
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item row="3" column="0">
|
||||||
|
<widget class="QLabel" name="label_47">
|
||||||
|
<property name="text">
|
||||||
|
<string>GPU Target CLUT:</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
|
<item row="3" column="1">
|
||||||
|
<widget class="QComboBox" name="gpuTargetCLUTMode">
|
||||||
|
<item>
|
||||||
|
<property name="text">
|
||||||
|
<string>Disabled (Default)</string>
|
||||||
|
</property>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<property name="text">
|
||||||
|
<string>Enabled (Exact Match)</string>
|
||||||
|
</property>
|
||||||
|
</item>
|
||||||
|
<item>
|
||||||
|
<property name="text">
|
||||||
|
<string>Enabled (Check Inside Target)</string>
|
||||||
|
</property>
|
||||||
|
</item>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
</layout>
|
</layout>
|
||||||
</widget>
|
</widget>
|
||||||
<widget class="QGroupBox" name="upscalingFixesTab">
|
<widget class="QGroupBox" name="upscalingFixesTab">
|
||||||
|
|
|
@ -350,6 +350,13 @@ enum class GSCASMode : u8
|
||||||
SharpenAndResize,
|
SharpenAndResize,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class GSGPUTargetCLUTMode : u8
|
||||||
|
{
|
||||||
|
Disabled,
|
||||||
|
Enabled,
|
||||||
|
InsideTarget,
|
||||||
|
};
|
||||||
|
|
||||||
// Template function for casting enumerations to their underlying type
|
// Template function for casting enumerations to their underlying type
|
||||||
template <typename Enumeration>
|
template <typename Enumeration>
|
||||||
typename std::underlying_type<Enumeration>::type enum_cast(Enumeration E)
|
typename std::underlying_type<Enumeration>::type enum_cast(Enumeration E)
|
||||||
|
@ -727,6 +734,7 @@ struct Pcsx2Config
|
||||||
int UserHacks_TCOffsetY{0};
|
int UserHacks_TCOffsetY{0};
|
||||||
int UserHacks_CPUSpriteRenderBW{0};
|
int UserHacks_CPUSpriteRenderBW{0};
|
||||||
int UserHacks_CPUCLUTRender{ 0 };
|
int UserHacks_CPUCLUTRender{ 0 };
|
||||||
|
GSGPUTargetCLUTMode UserHacks_GPUTargetCLUTMode{GSGPUTargetCLUTMode::Disabled};
|
||||||
TriFiltering TriFilter{TriFiltering::Automatic};
|
TriFiltering TriFilter{TriFiltering::Automatic};
|
||||||
int OverrideTextureBarriers{-1};
|
int OverrideTextureBarriers{-1};
|
||||||
int OverrideGeometryShaders{-1};
|
int OverrideGeometryShaders{-1};
|
||||||
|
|
|
@ -728,7 +728,8 @@ void GSUpdateConfig(const Pcsx2Config::GSOptions& new_config)
|
||||||
GSConfig.UserHacks_DisablePartialInvalidation != old_config.UserHacks_DisablePartialInvalidation ||
|
GSConfig.UserHacks_DisablePartialInvalidation != old_config.UserHacks_DisablePartialInvalidation ||
|
||||||
GSConfig.UserHacks_TextureInsideRt != old_config.UserHacks_TextureInsideRt ||
|
GSConfig.UserHacks_TextureInsideRt != old_config.UserHacks_TextureInsideRt ||
|
||||||
GSConfig.UserHacks_CPUSpriteRenderBW != old_config.UserHacks_CPUSpriteRenderBW ||
|
GSConfig.UserHacks_CPUSpriteRenderBW != old_config.UserHacks_CPUSpriteRenderBW ||
|
||||||
GSConfig.UserHacks_CPUCLUTRender != old_config.UserHacks_CPUCLUTRender)
|
GSConfig.UserHacks_CPUCLUTRender != old_config.UserHacks_CPUCLUTRender ||
|
||||||
|
GSConfig.UserHacks_GPUTargetCLUTMode != old_config.UserHacks_GPUTargetCLUTMode)
|
||||||
{
|
{
|
||||||
g_gs_renderer->PurgeTextureCache();
|
g_gs_renderer->PurgeTextureCache();
|
||||||
g_gs_renderer->PurgePool();
|
g_gs_renderer->PurgePool();
|
||||||
|
|
|
@ -14,9 +14,11 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "PrecompiledHeader.h"
|
#include "PrecompiledHeader.h"
|
||||||
#include "GSClut.h"
|
#include "GS/GSClut.h"
|
||||||
#include "GSLocalMemory.h"
|
#include "GS/GSLocalMemory.h"
|
||||||
#include "GSGL.h"
|
#include "GS/GSGL.h"
|
||||||
|
#include "GS/Renderers/Common/GSDevice.h"
|
||||||
|
#include "GS/Renderers/Common/GSRenderer.h"
|
||||||
#include "common/AlignedMalloc.h"
|
#include "common/AlignedMalloc.h"
|
||||||
|
|
||||||
GSClut::GSClut(GSLocalMemory* mem)
|
GSClut::GSClut(GSLocalMemory* mem)
|
||||||
|
@ -103,6 +105,11 @@ GSClut::GSClut(GSLocalMemory* mem)
|
||||||
|
|
||||||
GSClut::~GSClut()
|
GSClut::~GSClut()
|
||||||
{
|
{
|
||||||
|
if (m_gpu_clut4)
|
||||||
|
delete m_gpu_clut4;
|
||||||
|
if (m_gpu_clut8)
|
||||||
|
delete m_gpu_clut8;
|
||||||
|
|
||||||
_aligned_free(m_clut);
|
_aligned_free(m_clut);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -381,6 +388,52 @@ void GSClut::Read32(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
m_current_gpu_clut = nullptr;
|
||||||
|
if (GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled)
|
||||||
|
{
|
||||||
|
const bool is_4bit = (TEX0.PSM == PSM_PSMT4 || TEX0.PSM == PSM_PSMT4HL || TEX0.PSM == PSM_PSMT4HH);
|
||||||
|
|
||||||
|
u32 CBW;
|
||||||
|
GSVector2i offset;
|
||||||
|
GSVector2i size;
|
||||||
|
if (!TEX0.CSM)
|
||||||
|
{
|
||||||
|
CBW = 0; // don't care
|
||||||
|
offset = {};
|
||||||
|
size.x = is_4bit ? 8 : 16;
|
||||||
|
size.y = is_4bit ? 2 : 16;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
CBW = m_write.TEXCLUT.CBW;
|
||||||
|
offset.x = m_write.TEXCLUT.COU;
|
||||||
|
offset.y = m_write.TEXCLUT.COV;
|
||||||
|
size.x = is_4bit ? 16 : 256;
|
||||||
|
size.y = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
GSTexture* src = g_gs_renderer->LookupPaletteSource(TEX0.CBP, TEX0.CPSM, CBW, offset, size);
|
||||||
|
if (src)
|
||||||
|
{
|
||||||
|
GSTexture* dst = is_4bit ? m_gpu_clut4 : m_gpu_clut8;
|
||||||
|
u32 dst_size = is_4bit ? 16 : 256;
|
||||||
|
const u32 dOffset = (TEX0.CSA & ((TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S) ? 15u : 31u)) << 4;
|
||||||
|
if (!dst)
|
||||||
|
{
|
||||||
|
// allocate texture lazily
|
||||||
|
dst = g_gs_device->CreateRenderTarget(dst_size, 1, GSTexture::Format::Color, false);
|
||||||
|
is_4bit ? (m_gpu_clut4 = dst) : (m_gpu_clut8 = dst);
|
||||||
|
}
|
||||||
|
if (dst)
|
||||||
|
{
|
||||||
|
GL_PUSH("Update GPU CLUT [CBP=%04X, CPSM=%s, CBW=%u, CSA=%u, Offset=(%d,%d)]",
|
||||||
|
TEX0.CBP, psm_str(TEX0.CPSM), CBW, TEX0.CSA, offset.x, offset.y);
|
||||||
|
g_gs_device->UpdateCLUTTexture(src, offset.x, offset.y, dst, dOffset, dst_size);
|
||||||
|
m_current_gpu_clut = dst;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "GSAlignedClass.h"
|
#include "GSAlignedClass.h"
|
||||||
|
|
||||||
class GSLocalMemory;
|
class GSLocalMemory;
|
||||||
|
class GSTexture;
|
||||||
|
|
||||||
class alignas(32) GSClut final : public GSAlignedClass<32>
|
class alignas(32) GSClut final : public GSAlignedClass<32>
|
||||||
{
|
{
|
||||||
|
@ -55,6 +56,10 @@ class alignas(32) GSClut final : public GSAlignedClass<32>
|
||||||
bool IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
|
bool IsDirty(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA);
|
||||||
} m_read;
|
} m_read;
|
||||||
|
|
||||||
|
GSTexture* m_gpu_clut4 = nullptr;
|
||||||
|
GSTexture* m_gpu_clut8 = nullptr;
|
||||||
|
GSTexture* m_current_gpu_clut = nullptr;
|
||||||
|
|
||||||
typedef void (GSClut::*writeCLUT)(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
|
typedef void (GSClut::*writeCLUT)(const GIFRegTEX0& TEX0, const GIFRegTEXCLUT& TEXCLUT);
|
||||||
|
|
||||||
writeCLUT m_wc[2][16][64];
|
writeCLUT m_wc[2][16][64];
|
||||||
|
@ -101,6 +106,8 @@ public:
|
||||||
GSClut(GSLocalMemory* mem);
|
GSClut(GSLocalMemory* mem);
|
||||||
~GSClut();
|
~GSClut();
|
||||||
|
|
||||||
|
__fi GSTexture* GetGPUTexture() const { return m_current_gpu_clut; }
|
||||||
|
|
||||||
bool InvalidateRange(u32 start_block, u32 end_block, bool is_draw = false);
|
bool InvalidateRange(u32 start_block, u32 end_block, bool is_draw = false);
|
||||||
u8 IsInvalid();
|
u8 IsInvalid();
|
||||||
void ClearDrawInvalidity();
|
void ClearDrawInvalidity();
|
||||||
|
|
|
@ -46,6 +46,8 @@ const char* shaderName(ShaderConvert value)
|
||||||
case ShaderConvert::RGB5A1_TO_FLOAT16_BILN: return "ps_convert_rgb5a1_float16_biln";
|
case ShaderConvert::RGB5A1_TO_FLOAT16_BILN: return "ps_convert_rgb5a1_float16_biln";
|
||||||
case ShaderConvert::DEPTH_COPY: return "ps_depth_copy";
|
case ShaderConvert::DEPTH_COPY: return "ps_depth_copy";
|
||||||
case ShaderConvert::RGBA_TO_8I: return "ps_convert_rgba_8i";
|
case ShaderConvert::RGBA_TO_8I: return "ps_convert_rgba_8i";
|
||||||
|
case ShaderConvert::CLUT_4: return "ps_convert_clut_4";
|
||||||
|
case ShaderConvert::CLUT_8: return "ps_convert_clut_8";
|
||||||
case ShaderConvert::YUV: return "ps_yuv";
|
case ShaderConvert::YUV: return "ps_yuv";
|
||||||
// clang-format on
|
// clang-format on
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -49,6 +49,8 @@ enum class ShaderConvert
|
||||||
RGB5A1_TO_FLOAT16_BILN,
|
RGB5A1_TO_FLOAT16_BILN,
|
||||||
DEPTH_COPY,
|
DEPTH_COPY,
|
||||||
RGBA_TO_8I,
|
RGBA_TO_8I,
|
||||||
|
CLUT_4,
|
||||||
|
CLUT_8,
|
||||||
YUV,
|
YUV,
|
||||||
Count
|
Count
|
||||||
};
|
};
|
||||||
|
@ -834,6 +836,9 @@ public:
|
||||||
/// Performs a screen blit for display. If dTex is null, it assumes you are writing to the system framebuffer/swap chain.
|
/// Performs a screen blit for display. If dTex is null, it assumes you are writing to the system framebuffer/swap chain.
|
||||||
virtual void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) {}
|
virtual void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) {}
|
||||||
|
|
||||||
|
/// Updates a GPU CLUT texture from a source texture.
|
||||||
|
virtual void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) {}
|
||||||
|
|
||||||
virtual void RenderHW(GSHWDrawConfig& config) {}
|
virtual void RenderHW(GSHWDrawConfig& config) {}
|
||||||
|
|
||||||
__fi FeatureSupport Features() const { return m_features; }
|
__fi FeatureSupport Features() const { return m_features; }
|
||||||
|
|
|
@ -954,6 +954,11 @@ void GSRenderer::PurgeTextureCache()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GSTexture* GSRenderer::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size)
|
||||||
|
{
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
bool GSRenderer::SaveSnapshotToMemory(u32 window_width, u32 window_height, bool apply_aspect, bool crop_borders,
|
bool GSRenderer::SaveSnapshotToMemory(u32 window_width, u32 window_height, bool apply_aspect, bool crop_borders,
|
||||||
u32* width, u32* height, std::vector<u32>* pixels)
|
u32* width, u32* height, std::vector<u32>* pixels)
|
||||||
{
|
{
|
||||||
|
|
|
@ -54,6 +54,8 @@ public:
|
||||||
virtual void PurgePool() override;
|
virtual void PurgePool() override;
|
||||||
virtual void PurgeTextureCache();
|
virtual void PurgeTextureCache();
|
||||||
|
|
||||||
|
virtual GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size);
|
||||||
|
|
||||||
bool SaveSnapshotToMemory(u32 window_width, u32 window_height, bool apply_aspect, bool crop_borders,
|
bool SaveSnapshotToMemory(u32 window_width, u32 window_height, bool apply_aspect, bool crop_borders,
|
||||||
u32* width, u32* height, std::vector<u32>* pixels);
|
u32* width, u32* height, std::vector<u32>* pixels);
|
||||||
|
|
||||||
|
|
|
@ -773,6 +773,24 @@ void GSDevice11::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
|
||||||
PSSetShaderResources(nullptr, nullptr);
|
PSSetShaderResources(nullptr, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GSDevice11::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
|
||||||
|
{
|
||||||
|
// match merge cb
|
||||||
|
struct Uniforms
|
||||||
|
{
|
||||||
|
float scaleX, scaleY;
|
||||||
|
float pad1[2];
|
||||||
|
u32 offsetX, offsetY, dOffset;
|
||||||
|
u32 pad2;
|
||||||
|
};
|
||||||
|
const Uniforms cb = {sTex->GetScale().x, sTex->GetScale().y, 0.0f, 0.0f, offsetX, offsetY, dOffset};
|
||||||
|
m_ctx->UpdateSubresource(m_merge.cb.get(), 0, nullptr, &cb, 0, 0);
|
||||||
|
|
||||||
|
const GSVector4 dRect(0, 0, dSize, 1);
|
||||||
|
const ShaderConvert shader = (dSize == 16) ? ShaderConvert::CLUT_4 : ShaderConvert::CLUT_8;
|
||||||
|
StretchRect(sTex, GSVector4::zero(), dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), m_merge.cb.get(), nullptr, false);
|
||||||
|
}
|
||||||
|
|
||||||
void GSDevice11::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c)
|
void GSDevice11::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c)
|
||||||
{
|
{
|
||||||
const GSVector4 full_r(0.0f, 0.0f, 1.0f, 1.0f);
|
const GSVector4 full_r(0.0f, 0.0f, 1.0f, 1.0f);
|
||||||
|
|
|
@ -267,6 +267,7 @@ public:
|
||||||
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override;
|
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) override;
|
||||||
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true);
|
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true);
|
||||||
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override;
|
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override;
|
||||||
|
void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
|
||||||
|
|
||||||
void SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm);
|
void SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm);
|
||||||
|
|
||||||
|
|
|
@ -521,6 +521,24 @@ void GSDevice12::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
|
||||||
m_present[static_cast<int>(shader)].get(), linear);
|
m_present[static_cast<int>(shader)].get(), linear);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GSDevice12::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
|
||||||
|
{
|
||||||
|
struct Uniforms
|
||||||
|
{
|
||||||
|
float scaleX, scaleY;
|
||||||
|
float pad1[2];
|
||||||
|
u32 offsetX, offsetY, dOffset;
|
||||||
|
u32 pad2;
|
||||||
|
};
|
||||||
|
const Uniforms cb = {sTex->GetScale().x, sTex->GetScale().y, 0.0f, 0.0f, offsetX, offsetY, dOffset};
|
||||||
|
SetUtilityPushConstants(&cb, sizeof(cb));
|
||||||
|
|
||||||
|
const GSVector4 dRect(0, 0, dSize, 1);
|
||||||
|
const ShaderConvert shader = (dSize == 16) ? ShaderConvert::CLUT_4 : ShaderConvert::CLUT_8;
|
||||||
|
DoStretchRect(static_cast<GSTexture12*>(sTex), GSVector4::zero(), static_cast<GSTexture12*>(dTex), dRect,
|
||||||
|
m_convert[static_cast<int>(shader)].get(), false);
|
||||||
|
}
|
||||||
|
|
||||||
void GSDevice12::BeginRenderPassForStretchRect(GSTexture12* dTex, const GSVector4i& dtex_rc, const GSVector4i& dst_rc)
|
void GSDevice12::BeginRenderPassForStretchRect(GSTexture12* dTex, const GSVector4i& dtex_rc, const GSVector4i& dst_rc)
|
||||||
{
|
{
|
||||||
const bool is_whole_target = dst_rc.eq(dtex_rc);
|
const bool is_whole_target = dst_rc.eq(dtex_rc);
|
||||||
|
|
|
@ -264,6 +264,7 @@ public:
|
||||||
bool green, bool blue, bool alpha) override;
|
bool green, bool blue, bool alpha) override;
|
||||||
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
|
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect,
|
||||||
PresentShader shader, float shaderTime, bool linear) override;
|
PresentShader shader, float shaderTime, bool linear) override;
|
||||||
|
void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
|
||||||
|
|
||||||
void BeginRenderPassForStretchRect(GSTexture12* dTex, const GSVector4i& dtex_rc, const GSVector4i& dst_rc);
|
void BeginRenderPassForStretchRect(GSTexture12* dTex, const GSVector4i& dtex_rc, const GSVector4i& dst_rc);
|
||||||
void DoStretchRect(GSTexture12* sTex, const GSVector4& sRect, GSTexture12* dTex, const GSVector4& dRect,
|
void DoStretchRect(GSTexture12* sTex, const GSVector4& sRect, GSTexture12* dTex, const GSVector4& dRect,
|
||||||
|
|
|
@ -120,6 +120,11 @@ void GSRendererHW::PurgeTextureCache()
|
||||||
m_tc->RemoveAll();
|
m_tc->RemoveAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GSTexture* GSRendererHW::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size)
|
||||||
|
{
|
||||||
|
return m_tc->LookupPaletteSource(CBP, CPSM, CBW, offset, size);
|
||||||
|
}
|
||||||
|
|
||||||
bool GSRendererHW::UpdateTexIsFB(GSTextureCache::Target* dst, const GIFRegTEX0& TEX0)
|
bool GSRendererHW::UpdateTexIsFB(GSTextureCache::Target* dst, const GIFRegTEX0& TEX0)
|
||||||
{
|
{
|
||||||
if (GSConfig.AccurateBlendingUnit == AccBlendLevel::Minimum || !g_gs_device->Features().texture_barrier)
|
if (GSConfig.AccurateBlendingUnit == AccBlendLevel::Minimum || !g_gs_device->Features().texture_barrier)
|
||||||
|
@ -1406,11 +1411,12 @@ void GSRendererHW::Draw()
|
||||||
}
|
}
|
||||||
|
|
||||||
// SW CLUT Render enable.
|
// SW CLUT Render enable.
|
||||||
if (GSConfig.UserHacks_CPUCLUTRender > 0)
|
bool preload = GSConfig.PreloadFrameWithGSData;
|
||||||
|
if (GSConfig.UserHacks_CPUCLUTRender > 0 || GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled)
|
||||||
{
|
{
|
||||||
bool result = (GSConfig.UserHacks_CPUCLUTRender == 1) ? PossibleCLUTDraw() : PossibleCLUTDrawAggressive();
|
const CLUTDrawTestResult result = (GSConfig.UserHacks_CPUCLUTRender == 2) ? PossibleCLUTDrawAggressive() : PossibleCLUTDraw();
|
||||||
m_mem.m_clut.ClearDrawInvalidity();
|
m_mem.m_clut.ClearDrawInvalidity();
|
||||||
if (result)
|
if (result == CLUTDrawTestResult::CLUTDrawOnCPU && GSConfig.UserHacks_CPUCLUTRender > 0)
|
||||||
{
|
{
|
||||||
if (SwPrimRender(*this, true))
|
if (SwPrimRender(*this, true))
|
||||||
{
|
{
|
||||||
|
@ -1418,6 +1424,17 @@ void GSRendererHW::Draw()
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (result != CLUTDrawTestResult::NotCLUTDraw)
|
||||||
|
{
|
||||||
|
// Force enable preloading if any of the existing data is needed.
|
||||||
|
// e.g. NFSMW only writes the alpha channel, and needs the RGB preloaded.
|
||||||
|
if (((fm & fm_mask) != fm_mask) || // Some channels masked
|
||||||
|
!IsOpaque()) // Blending enabled
|
||||||
|
{
|
||||||
|
GL_INS("Forcing preload due to partial/blended CLUT draw");
|
||||||
|
preload = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_channel_shuffle)
|
if (m_channel_shuffle)
|
||||||
|
@ -1743,7 +1760,7 @@ void GSRendererHW::Draw()
|
||||||
|
|
||||||
GSTextureCache::Target* rt = nullptr;
|
GSTextureCache::Target* rt = nullptr;
|
||||||
if (!no_rt)
|
if (!no_rt)
|
||||||
rt = m_tc->LookupTarget(TEX0, t_size, GSTextureCache::RenderTarget, true, fm);
|
rt = m_tc->LookupTarget(TEX0, t_size, GSTextureCache::RenderTarget, true, fm, false, 0, 0, preload);
|
||||||
|
|
||||||
TEX0.TBP0 = context->ZBUF.Block();
|
TEX0.TBP0 = context->ZBUF.Block();
|
||||||
TEX0.TBW = context->FRAME.FBW;
|
TEX0.TBW = context->FRAME.FBW;
|
||||||
|
@ -1751,7 +1768,7 @@ void GSRendererHW::Draw()
|
||||||
|
|
||||||
GSTextureCache::Target* ds = nullptr;
|
GSTextureCache::Target* ds = nullptr;
|
||||||
if (!no_ds)
|
if (!no_ds)
|
||||||
ds = m_tc->LookupTarget(TEX0, t_size, GSTextureCache::DepthStencil, context->DepthWrite());
|
ds = m_tc->LookupTarget(TEX0, t_size, GSTextureCache::DepthStencil, context->DepthWrite(), 0, false, 0, 0, preload);
|
||||||
|
|
||||||
if (rt)
|
if (rt)
|
||||||
{
|
{
|
||||||
|
@ -3964,46 +3981,46 @@ void GSRendererHW::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
||||||
g_gs_device->RenderHW(m_conf);
|
g_gs_device->RenderHW(m_conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GSRendererHW::PossibleCLUTDraw()
|
GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDraw()
|
||||||
{
|
{
|
||||||
// No shuffles.
|
// No shuffles.
|
||||||
if (m_channel_shuffle || m_texture_shuffle)
|
if (m_channel_shuffle || m_texture_shuffle)
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
// Keep the draws simple, no alpha testing, blending, mipmapping, Z writes, and make sure it's flat.
|
// Keep the draws simple, no alpha testing, blending, mipmapping, Z writes, and make sure it's flat.
|
||||||
const bool fb_only = m_context->TEST.ATE && m_context->TEST.AFAIL == 1 && m_context->TEST.ATST == ATST_NEVER;
|
const bool fb_only = m_context->TEST.ATE && m_context->TEST.AFAIL == 1 && m_context->TEST.ATST == ATST_NEVER;
|
||||||
|
|
||||||
// No Z writes, unless it's points, then it's quite likely to be a palette and they left it on.
|
// No Z writes, unless it's points, then it's quite likely to be a palette and they left it on.
|
||||||
if (!m_context->ZBUF.ZMSK && !fb_only && !(m_vt.m_primclass == GS_POINT_CLASS))
|
if (!m_context->ZBUF.ZMSK && !fb_only && !(m_vt.m_primclass == GS_POINT_CLASS))
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
// Make sure it's flat.
|
// Make sure it's flat.
|
||||||
if (m_vt.m_eq.z != 0x1)
|
if (m_vt.m_eq.z != 0x1)
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
// No mipmapping, please never be any mipmapping...
|
// No mipmapping, please never be any mipmapping...
|
||||||
if (m_context->TEX1.MXL)
|
if (m_context->TEX1.MXL)
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
// Writing to the framebuffer for output. We're not interested. - Note: This stops NFS HP2 Busted screens working, but they're glitchy anyway
|
// Writing to the framebuffer for output. We're not interested. - Note: This stops NFS HP2 Busted screens working, but they're glitchy anyway
|
||||||
// what NFS HP2 really needs is a kind of shuffle with mask, 32bit target is interpreted as 16bit and masked.
|
// what NFS HP2 really needs is a kind of shuffle with mask, 32bit target is interpreted as 16bit and masked.
|
||||||
if ((m_regs->DISP[0].DISPFB.Block() == m_context->FRAME.Block()) || (m_regs->DISP[1].DISPFB.Block() == m_context->FRAME.Block()) ||
|
if ((m_regs->DISP[0].DISPFB.Block() == m_context->FRAME.Block()) || (m_regs->DISP[1].DISPFB.Block() == m_context->FRAME.Block()) ||
|
||||||
(PRIM->TME && ((m_regs->DISP[0].DISPFB.Block() == m_context->TEX0.TBP0) || (m_regs->DISP[1].DISPFB.Block() == m_context->TEX0.TBP0)) && !(m_mem.m_clut.IsInvalid() & 2)))
|
(PRIM->TME && ((m_regs->DISP[0].DISPFB.Block() == m_context->TEX0.TBP0) || (m_regs->DISP[1].DISPFB.Block() == m_context->TEX0.TBP0)) && !(m_mem.m_clut.IsInvalid() & 2)))
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
// Ignore recursive/shuffle effects, but possible it will recursively draw, but make sure it's staying in page width
|
// Ignore recursive/shuffle effects, but possible it will recursively draw, but make sure it's staying in page width
|
||||||
if (PRIM->TME && m_context->TEX0.TBP0 == m_context->FRAME.Block() && (m_context->FRAME.FBW != 1 && m_context->TEX0.TBW == m_context->FRAME.FBW))
|
if (PRIM->TME && m_context->TEX0.TBP0 == m_context->FRAME.Block() && (m_context->FRAME.FBW != 1 && m_context->TEX0.TBW == m_context->FRAME.FBW))
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
// Hopefully no games draw a CLUT with a CLUT, that would be evil, most likely a channel shuffle.
|
// Hopefully no games draw a CLUT with a CLUT, that would be evil, most likely a channel shuffle.
|
||||||
if (PRIM->TME && GSLocalMemory::m_psm[m_context->TEX0.PSM].pal > 0)
|
if (PRIM->TME && GSLocalMemory::m_psm[m_context->TEX0.PSM].pal > 0)
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_context->FRAME.PSM];
|
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[m_context->FRAME.PSM];
|
||||||
|
|
||||||
// Make sure the CLUT formats are matching.
|
// Make sure the CLUT formats are matching.
|
||||||
if (GSLocalMemory::m_psm[m_mem.m_clut.GetCLUTCPSM()].bpp != psm.bpp)
|
if (GSLocalMemory::m_psm[m_mem.m_clut.GetCLUTCPSM()].bpp != psm.bpp)
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
// Max size for a CLUT/Current page size.
|
// Max size for a CLUT/Current page size.
|
||||||
constexpr float min_clut_width = 7.0f;
|
constexpr float min_clut_width = 7.0f;
|
||||||
|
@ -4013,7 +4030,7 @@ bool GSRendererHW::PossibleCLUTDraw()
|
||||||
|
|
||||||
// If the coordinates aren't starting within the page, it's likely not a CLUT draw.
|
// If the coordinates aren't starting within the page, it's likely not a CLUT draw.
|
||||||
if (floor(m_vt.m_min.p.x) < 0 || floor(m_vt.m_min.p.y) < 0 || floor(m_vt.m_min.p.x) > page_width || floor(m_vt.m_min.p.y) > page_height)
|
if (floor(m_vt.m_min.p.x) < 0 || floor(m_vt.m_min.p.y) < 0 || floor(m_vt.m_min.p.x) > page_width || floor(m_vt.m_min.p.y) > page_height)
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
// Make sure it's a division of 8 in width to avoid bad draws. Points will go from 0-7 inclusive, but sprites etc will do 0-16 exclusive.
|
// Make sure it's a division of 8 in width to avoid bad draws. Points will go from 0-7 inclusive, but sprites etc will do 0-16 exclusive.
|
||||||
int draw_divder_match = false;
|
int draw_divder_match = false;
|
||||||
|
@ -4035,13 +4052,36 @@ bool GSRendererHW::PossibleCLUTDraw()
|
||||||
// Make sure the draw hits the next CLUT and it's marked as invalid (kind of a sanity check).
|
// Make sure the draw hits the next CLUT and it's marked as invalid (kind of a sanity check).
|
||||||
// We can also allow draws which are of a sensible size within the page, as they could also be CLUT draws (or gradients for the CLUT).
|
// We can also allow draws which are of a sensible size within the page, as they could also be CLUT draws (or gradients for the CLUT).
|
||||||
if (!valid_size)
|
if (!valid_size)
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
if (PRIM->TME)
|
if (PRIM->TME)
|
||||||
{
|
{
|
||||||
// If we're using a texture to draw our CLUT/whatever, we need the GPU to write back dirty data we need.
|
// If we're using a texture to draw our CLUT/whatever, we need the GPU to write back dirty data we need.
|
||||||
const GSVector4i r = GetTextureMinMax(m_context->TEX0, m_context->CLAMP, m_vt.IsLinear()).coverage;
|
const GSVector4i r = GetTextureMinMax(m_context->TEX0, m_context->CLAMP, m_vt.IsLinear()).coverage;
|
||||||
|
|
||||||
|
// If we have GPU CLUT enabled, don't do a CPU draw when it would result in a download.
|
||||||
|
if (GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled)
|
||||||
|
{
|
||||||
|
GSTextureCache::Target* tgt = m_tc->GetExactTarget(m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM);
|
||||||
|
if (tgt)
|
||||||
|
{
|
||||||
|
bool is_dirty = false;
|
||||||
|
for (const GSDirtyRect& rc : tgt->m_dirty)
|
||||||
|
{
|
||||||
|
if (!rc.GetDirtyRect(m_context->TEX0).rintersect(r).rempty())
|
||||||
|
{
|
||||||
|
is_dirty = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!is_dirty)
|
||||||
|
{
|
||||||
|
GL_INS("GPU clut is enabled and this draw would readback, leaving on GPU");
|
||||||
|
return CLUTDrawTestResult::CLUTDrawOnGPU;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
GIFRegBITBLTBUF BITBLTBUF;
|
GIFRegBITBLTBUF BITBLTBUF;
|
||||||
BITBLTBUF.SBP = m_context->TEX0.TBP0;
|
BITBLTBUF.SBP = m_context->TEX0.TBP0;
|
||||||
BITBLTBUF.SBW = m_context->TEX0.TBW;
|
BITBLTBUF.SBW = m_context->TEX0.TBW;
|
||||||
|
@ -4054,41 +4094,41 @@ bool GSRendererHW::PossibleCLUTDraw()
|
||||||
//const u32 endbp = psm.info.bn(m_vt.m_max.p.x, m_vt.m_max.p.y, m_context->FRAME.Block(), m_context->FRAME.FBW);
|
//const u32 endbp = psm.info.bn(m_vt.m_max.p.x, m_vt.m_max.p.y, m_context->FRAME.Block(), m_context->FRAME.FBW);
|
||||||
//DevCon.Warning("Draw width %f height %f page width %f height %f TPSM %x TBP0 %x FPSM %x FBP %x CBP %x valid size %d Invalid %d DISPFB0 %x DISPFB1 %x start %x end %x draw %d", draw_width, draw_height, page_width, page_height, m_context->TEX0.PSM, m_context->TEX0.TBP0, m_context->FRAME.PSM, m_context->FRAME.Block(), m_mem.m_clut.GetCLUTCBP(), valid_size, m_mem.m_clut.IsInvalid(), m_regs->DISP[0].DISPFB.Block(), m_regs->DISP[1].DISPFB.Block(), startbp, endbp, s_n);
|
//DevCon.Warning("Draw width %f height %f page width %f height %f TPSM %x TBP0 %x FPSM %x FBP %x CBP %x valid size %d Invalid %d DISPFB0 %x DISPFB1 %x start %x end %x draw %d", draw_width, draw_height, page_width, page_height, m_context->TEX0.PSM, m_context->TEX0.TBP0, m_context->FRAME.PSM, m_context->FRAME.Block(), m_mem.m_clut.GetCLUTCBP(), valid_size, m_mem.m_clut.IsInvalid(), m_regs->DISP[0].DISPFB.Block(), m_regs->DISP[1].DISPFB.Block(), startbp, endbp, s_n);
|
||||||
|
|
||||||
return true;
|
return CLUTDrawTestResult::CLUTDrawOnCPU;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Slight more aggressive version that kinda YOLO's it if the draw is anywhere near the CLUT or is point/line (providing it's not too wide of a draw and a few other parameters.
|
// Slight more aggressive version that kinda YOLO's it if the draw is anywhere near the CLUT or is point/line (providing it's not too wide of a draw and a few other parameters.
|
||||||
// This is pretty much tuned for the Sega Model 2 games, which draw a huge gradient, then pick lines out of it to make up CLUT's for about 4000 draws...
|
// This is pretty much tuned for the Sega Model 2 games, which draw a huge gradient, then pick lines out of it to make up CLUT's for about 4000 draws...
|
||||||
bool GSRendererHW::PossibleCLUTDrawAggressive()
|
GSRendererHW::CLUTDrawTestResult GSRendererHW::PossibleCLUTDrawAggressive()
|
||||||
{
|
{
|
||||||
// Avoid any shuffles.
|
// Avoid any shuffles.
|
||||||
if (m_channel_shuffle || m_texture_shuffle)
|
if (m_channel_shuffle || m_texture_shuffle)
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
// Keep the draws simple, no alpha testing, blending, mipmapping, Z writes, and make sure it's flat.
|
// Keep the draws simple, no alpha testing, blending, mipmapping, Z writes, and make sure it's flat.
|
||||||
if (m_context->TEST.ATE)
|
if (m_context->TEST.ATE)
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
if (PRIM->ABE)
|
if (PRIM->ABE)
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
if (m_context->TEX1.MXL)
|
if (m_context->TEX1.MXL)
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
if (m_context->FRAME.FBW != 1)
|
if (m_context->FRAME.FBW != 1)
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
if (!m_context->ZBUF.ZMSK)
|
if (!m_context->ZBUF.ZMSK)
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
if (m_vt.m_eq.z != 0x1)
|
if (m_vt.m_eq.z != 0x1)
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
if (!((m_vt.m_primclass == GS_POINT_CLASS || m_vt.m_primclass == GS_LINE_CLASS) || ((m_mem.m_clut.GetCLUTCBP() >> 5) >= m_context->FRAME.FBP && (m_context->FRAME.FBP + 1U) >= (m_mem.m_clut.GetCLUTCBP() >> 5) && m_vt.m_primclass == GS_SPRITE_CLASS)))
|
if (!((m_vt.m_primclass == GS_POINT_CLASS || m_vt.m_primclass == GS_LINE_CLASS) || ((m_mem.m_clut.GetCLUTCBP() >> 5) >= m_context->FRAME.FBP && (m_context->FRAME.FBP + 1U) >= (m_mem.m_clut.GetCLUTCBP() >> 5) && m_vt.m_primclass == GS_SPRITE_CLASS)))
|
||||||
return false;
|
return CLUTDrawTestResult::NotCLUTDraw;
|
||||||
|
|
||||||
// Avoid invalidating anything here, we just want to avoid the thing being drawn on the GPU.
|
// Avoid invalidating anything here, we just want to avoid the thing being drawn on the GPU.
|
||||||
return true;
|
return CLUTDrawTestResult::CLUTDrawOnCPU;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex)
|
bool GSRendererHW::CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex)
|
||||||
|
|
|
@ -66,8 +66,15 @@ private:
|
||||||
void SwSpriteRender();
|
void SwSpriteRender();
|
||||||
bool CanUseSwSpriteRender();
|
bool CanUseSwSpriteRender();
|
||||||
|
|
||||||
bool PossibleCLUTDraw();
|
enum class CLUTDrawTestResult
|
||||||
bool PossibleCLUTDrawAggressive();
|
{
|
||||||
|
NotCLUTDraw,
|
||||||
|
CLUTDrawOnCPU,
|
||||||
|
CLUTDrawOnGPU,
|
||||||
|
};
|
||||||
|
|
||||||
|
CLUTDrawTestResult PossibleCLUTDraw();
|
||||||
|
CLUTDrawTestResult PossibleCLUTDrawAggressive();
|
||||||
bool CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex);
|
bool CanUseSwPrimRender(bool no_rt, bool no_ds, bool draw_sprite_tex);
|
||||||
bool (*SwPrimRender)(GSRendererHW&, bool invalidate_tc);
|
bool (*SwPrimRender)(GSRendererHW&, bool invalidate_tc);
|
||||||
|
|
||||||
|
@ -153,6 +160,7 @@ public:
|
||||||
void Draw() override;
|
void Draw() override;
|
||||||
|
|
||||||
void PurgeTextureCache() override;
|
void PurgeTextureCache() override;
|
||||||
|
GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size) override;
|
||||||
|
|
||||||
// Called by the texture cache to know if current texture is useful
|
// Called by the texture cache to know if current texture is useful
|
||||||
bool UpdateTexIsFB(GSTextureCache::Target* src, const GIFRegTEX0& TEX0);
|
bool UpdateTexIsFB(GSTextureCache::Target* src, const GIFRegTEX0& TEX0);
|
||||||
|
|
|
@ -224,12 +224,13 @@ GSTextureCache::Source* GSTextureCache::LookupDepthSource(const GIFRegTEX0& TEX0
|
||||||
|
|
||||||
GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, const GSVector2i* lod)
|
GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, const GSVector2i* lod)
|
||||||
{
|
{
|
||||||
GL_CACHE("TC: Lookup Source <%d,%d => %d,%d> (0x%x, %s, BW: %u)", r.x, r.y, r.z, r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW);
|
GL_CACHE("TC: Lookup Source <%d,%d => %d,%d> (0x%x, %s, BW: %u, CBP: 0x%x)", r.x, r.y, r.z, r.w, TEX0.TBP0, psm_str(TEX0.PSM), TEX0.TBW, TEX0.CBP);
|
||||||
|
|
||||||
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
|
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
|
||||||
//const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm;
|
//const GSLocalMemory::psm_t& cpsm = psm.pal > 0 ? GSLocalMemory::m_psm[TEX0.CPSM] : psm;
|
||||||
|
|
||||||
const u32* clut = g_gs_renderer->m_mem.m_clut;
|
const u32* const clut = g_gs_renderer->m_mem.m_clut;
|
||||||
|
GSTexture* const gpu_clut = (psm_s.pal > 0) ? g_gs_renderer->m_mem.m_clut.GetGPUTexture() : nullptr;
|
||||||
|
|
||||||
Source* src = NULL;
|
Source* src = NULL;
|
||||||
|
|
||||||
|
@ -246,16 +247,25 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
// Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check
|
// Target are converted (AEM & palette) on the fly by the GPU. They don't need extra check
|
||||||
if (!s->m_target)
|
if (!s->m_target)
|
||||||
{
|
{
|
||||||
// We request a palette texture (psm_s.pal). If the texture was
|
if (psm_s.pal > 0)
|
||||||
// converted by the CPU (!s->m_palette), we need to ensure
|
{
|
||||||
// palette content is the same.
|
// If we're doing GPU CLUT, we don't want to use the CPU-converted version.
|
||||||
if (psm_s.pal > 0 && !s->m_palette && !s->ClutMatch({clut, psm_s.pal}))
|
if (gpu_clut && !s->m_palette)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
// We request a 24/16 bit RGBA texture. Alpha expansion was done by
|
// We request a palette texture (psm_s.pal). If the texture was
|
||||||
// the CPU. We need to check that TEXA is identical
|
// converted by the CPU (!s->m_palette), we need to ensure
|
||||||
if (psm_s.pal == 0 && psm_s.fmt > 0 && s->m_TEXA.U64 != TEXA.U64)
|
// palette content is the same.
|
||||||
continue;
|
if (!s->m_palette && !s->ClutMatch({ clut, psm_s.pal }))
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// We request a 24/16 bit RGBA texture. Alpha expansion was done by
|
||||||
|
// the CPU. We need to check that TEXA is identical
|
||||||
|
if (psm_s.fmt > 0 && s->m_TEXA.U64 != TEXA.U64)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// Same base mip texture, but we need to check that MXL was the same as well.
|
// Same base mip texture, but we need to check that MXL was the same as well.
|
||||||
// When mipmapping is off, this will be 0,0 vs 0,0.
|
// When mipmapping is off, this will be 0,0 vs 0,0.
|
||||||
|
@ -404,9 +414,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool new_source = false;
|
if (!src)
|
||||||
|
|
||||||
if (src == NULL)
|
|
||||||
{
|
{
|
||||||
#ifdef ENABLE_OGL_DEBUG
|
#ifdef ENABLE_OGL_DEBUG
|
||||||
if (dst)
|
if (dst)
|
||||||
|
@ -425,8 +433,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
GL_CACHE("TC: src miss (0x%x, 0x%x, %s)", TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, psm_str(TEX0.PSM));
|
GL_CACHE("TC: src miss (0x%x, 0x%x, %s)", TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0, psm_str(TEX0.PSM));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r);
|
src = CreateSource(TEX0, TEXA, dst, half_right, x_offset, y_offset, lod, &r, gpu_clut);
|
||||||
new_source = true;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -434,11 +441,11 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
src->m_texture ? src->m_texture->GetID() : 0,
|
src->m_texture ? src->m_texture->GetID() : 0,
|
||||||
TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0,
|
TEX0.TBP0, psm_s.pal > 0 ? TEX0.CBP : 0,
|
||||||
psm_str(TEX0.PSM));
|
psm_str(TEX0.PSM));
|
||||||
}
|
|
||||||
|
|
||||||
if (src->m_palette && !new_source && !src->ClutMatch({clut, psm_s.pal}))
|
if (gpu_clut)
|
||||||
{
|
AttachPaletteToSource(src, gpu_clut);
|
||||||
AttachPaletteToSource(src, psm_s.pal, true);
|
else if (src->m_palette && (!src->m_palette_obj || !src->ClutMatch({clut, psm_s.pal})))
|
||||||
|
AttachPaletteToSource(src, psm_s.pal, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
src->Update(r);
|
src->Update(r);
|
||||||
|
@ -448,7 +455,7 @@ GSTextureCache::Source* GSTextureCache::LookupSource(const GIFRegTEX0& TEX0, con
|
||||||
return src;
|
return src;
|
||||||
}
|
}
|
||||||
|
|
||||||
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask, const bool is_frame, const int real_w, const int real_h)
|
GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask, const bool is_frame, const int real_w, const int real_h, bool preload)
|
||||||
{
|
{
|
||||||
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
|
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
|
||||||
const GSVector2& new_s = static_cast<GSRendererHW*>(g_gs_renderer.get())->GetTextureScaleFactor();
|
const GSVector2& new_s = static_cast<GSRendererHW*>(g_gs_renderer.get())->GetTextureScaleFactor();
|
||||||
|
@ -656,7 +663,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(const GIFRegTEX0& TEX0, con
|
||||||
// From a performance point of view, it might cost a little on big upscaling
|
// From a performance point of view, it might cost a little on big upscaling
|
||||||
// but normally few RT are miss so it must remain reasonable.
|
// but normally few RT are miss so it must remain reasonable.
|
||||||
bool supported_fmt = !GSConfig.UserHacks_DisableDepthSupport || psm_s.depth == 0;
|
bool supported_fmt = !GSConfig.UserHacks_DisableDepthSupport || psm_s.depth == 0;
|
||||||
if (GSConfig.PreloadFrameWithGSData && TEX0.TBW > 0 && supported_fmt)
|
if (preload && TEX0.TBW > 0 && supported_fmt)
|
||||||
{
|
{
|
||||||
GL_INS("Preloading the RT DATA");
|
GL_INS("Preloading the RT DATA");
|
||||||
// RT doesn't have height but if we use a too big value, we will read outside of the GS memory.
|
// RT doesn't have height but if we use a too big value, we will read outside of the GS memory.
|
||||||
|
@ -1726,7 +1733,7 @@ void GSTextureCache::IncAge()
|
||||||
}
|
}
|
||||||
|
|
||||||
//Fixme: Several issues in here. Not handling depth stencil, pitch conversion doesnt work.
|
//Fixme: Several issues in here. Not handling depth stencil, pitch conversion doesnt work.
|
||||||
GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range)
|
GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* dst, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut)
|
||||||
{
|
{
|
||||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[TEX0.PSM];
|
||||||
Source* src = new Source(TEX0, TEXA, false);
|
Source* src = new Source(TEX0, TEXA, false);
|
||||||
|
@ -2042,28 +2049,33 @@ GSTextureCache::Source* GSTextureCache::CreateSource(const GIFRegTEX0& TEX0, con
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// maintain the clut even when paltex is on for the dump/replacement texture lookup
|
// maintain the clut even when paltex is on for the dump/replacement texture lookup
|
||||||
bool paltex = (GSConfig.GPUPaletteConversion && psm.pal > 0);
|
bool paltex = (GSConfig.GPUPaletteConversion && psm.pal > 0) || gpu_clut;
|
||||||
const u32* clut = (psm.pal > 0) ? static_cast<const u32*>(g_gs_renderer->m_mem.m_clut) : nullptr;
|
const u32* clut = (psm.pal > 0) ? static_cast<const u32*>(g_gs_renderer->m_mem.m_clut) : nullptr;
|
||||||
|
|
||||||
// try the hash cache
|
// try the hash cache
|
||||||
if ((src->m_from_hash_cache = LookupHashCache(TEX0, TEXA, paltex, clut, lod)) != nullptr)
|
if ((src->m_from_hash_cache = LookupHashCache(TEX0, TEXA, paltex, clut, lod)) != nullptr)
|
||||||
{
|
{
|
||||||
src->m_texture = src->m_from_hash_cache->texture;
|
src->m_texture = src->m_from_hash_cache->texture;
|
||||||
if (psm.pal > 0)
|
if (gpu_clut)
|
||||||
|
AttachPaletteToSource(src, gpu_clut);
|
||||||
|
else if (psm.pal > 0)
|
||||||
AttachPaletteToSource(src, psm.pal, paltex);
|
AttachPaletteToSource(src, psm.pal, paltex);
|
||||||
}
|
}
|
||||||
else if (paltex)
|
else if (paltex)
|
||||||
{
|
{
|
||||||
src->m_texture = g_gs_device->CreateTexture(tw, th, tlevels, GSTexture::Format::UNorm8);
|
src->m_texture = g_gs_device->CreateTexture(tw, th, tlevels, GSTexture::Format::UNorm8);
|
||||||
AttachPaletteToSource(src, psm.pal, true);
|
if (gpu_clut)
|
||||||
|
AttachPaletteToSource(src, gpu_clut);
|
||||||
|
else
|
||||||
|
AttachPaletteToSource(src, psm.pal, true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
src->m_texture = g_gs_device->CreateTexture(tw, th, tlevels, GSTexture::Format::Color);
|
src->m_texture = g_gs_device->CreateTexture(tw, th, tlevels, GSTexture::Format::Color);
|
||||||
if (psm.pal > 0)
|
if (gpu_clut)
|
||||||
{
|
AttachPaletteToSource(src, gpu_clut);
|
||||||
|
else if (psm.pal > 0)
|
||||||
AttachPaletteToSource(src, psm.pal, false);
|
AttachPaletteToSource(src, psm.pal, false);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2243,6 +2255,71 @@ GSTextureCache::Target* GSTextureCache::CreateTarget(const GIFRegTEX0& TEX0, int
|
||||||
return t;
|
return t;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GSTexture* GSTextureCache::LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size)
|
||||||
|
{
|
||||||
|
for (auto t : m_dst[RenderTarget])
|
||||||
|
{
|
||||||
|
if (!t->m_used)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
GSVector2i this_offset;
|
||||||
|
if (t->m_TEX0.TBP0 == CBP)
|
||||||
|
{
|
||||||
|
// Exact match, this one's likely fine, unless the format is different.
|
||||||
|
if (t->m_TEX0.PSM != CPSM || (CBW != 0 && t->m_TEX0.TBW != CBW))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
GL_INS("Exact match on BP 0x%04x BW %u", t->m_TEX0.CBP, t->m_TEX0.TBW);
|
||||||
|
this_offset.x = 0;
|
||||||
|
this_offset.y = 0;
|
||||||
|
}
|
||||||
|
else if (GSConfig.UserHacks_GPUTargetCLUTMode == GSGPUTargetCLUTMode::InsideTarget &&
|
||||||
|
t->m_TEX0.TBP0 < CBP && t->m_end_block >= CBP)
|
||||||
|
{
|
||||||
|
// Somewhere within this target, can we find it?
|
||||||
|
const GSVector4i rc(0, 0, size.x, size.y);
|
||||||
|
SurfaceOffset so = ComputeSurfaceOffset(CBP, std::max<u32>(CBW, 0), CPSM, rc, t);
|
||||||
|
if (!so.is_valid)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
GL_INS("Match inside RT at BP 0x%04X-0x%04X BW %u", t->m_TEX0.TBP0, t->m_end_block, t->m_TEX0.TBW);
|
||||||
|
this_offset.x = so.b2a_offset.left;
|
||||||
|
this_offset.y = so.b2a_offset.top;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Not inside this target, skip.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Make sure the clut isn't in an area of the target where the EE has overwritten it.
|
||||||
|
// Otherwise, we'll be using stale data on the CPU.
|
||||||
|
if (!t->m_dirty.empty())
|
||||||
|
{
|
||||||
|
GL_INS("Candidate is dirty, checking");
|
||||||
|
|
||||||
|
const GSVector4i clut_rc(this_offset.x, this_offset.y, this_offset.x + size.x, this_offset.y + size.y);
|
||||||
|
bool is_dirty = false;
|
||||||
|
for (const GSDirtyRect& dirty : t->m_dirty)
|
||||||
|
{
|
||||||
|
if (!dirty.GetDirtyRect(t->m_TEX0).rintersect(clut_rc).rempty())
|
||||||
|
{
|
||||||
|
GL_INS("Dirty rectangle overlaps CLUT rectangle, skipping");
|
||||||
|
is_dirty = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (is_dirty)
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
offset = this_offset;
|
||||||
|
return t->m_texture;
|
||||||
|
}
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
void GSTextureCache::Read(Target* t, const GSVector4i& r)
|
void GSTextureCache::Read(Target* t, const GSVector4i& r)
|
||||||
{
|
{
|
||||||
if (!t->m_dirty.empty() || r.width() == 0 || r.height() == 0)
|
if (!t->m_dirty.empty() || r.width() == 0 || r.height() == 0)
|
||||||
|
@ -2980,6 +3057,12 @@ void GSTextureCache::AttachPaletteToSource(Source* s, u16 pal, bool need_gs_text
|
||||||
s->m_palette = need_gs_texture ? s->m_palette_obj->GetPaletteGSTexture() : nullptr;
|
s->m_palette = need_gs_texture ? s->m_palette_obj->GetPaletteGSTexture() : nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GSTextureCache::AttachPaletteToSource(Source* s, GSTexture* gpu_clut)
|
||||||
|
{
|
||||||
|
s->m_palette_obj = nullptr;
|
||||||
|
s->m_palette = gpu_clut;
|
||||||
|
}
|
||||||
|
|
||||||
GSTextureCache::SurfaceOffset GSTextureCache::ComputeSurfaceOffset(const GSOffset& off, const GSVector4i& r, const Target* t)
|
GSTextureCache::SurfaceOffset GSTextureCache::ComputeSurfaceOffset(const GSOffset& off, const GSVector4i& r, const Target* t)
|
||||||
{
|
{
|
||||||
// Computes offset from Target to offset+rectangle in Target coords.
|
// Computes offset from Target to offset+rectangle in Target coords.
|
||||||
|
|
|
@ -308,7 +308,7 @@ protected:
|
||||||
std::unordered_map<SurfaceOffsetKey, SurfaceOffset, SurfaceOffsetKeyHash, SurfaceOffsetKeyEqual> m_surface_offset_cache;
|
std::unordered_map<SurfaceOffsetKey, SurfaceOffset, SurfaceOffsetKeyHash, SurfaceOffsetKeyEqual> m_surface_offset_cache;
|
||||||
Source* m_temporary_source = nullptr; // invalidated after the draw
|
Source* m_temporary_source = nullptr; // invalidated after the draw
|
||||||
|
|
||||||
Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t = NULL, bool half_right = false, int x_offset = 0, int y_offset = 0, const GSVector2i* lod = nullptr, const GSVector4i* src_range = nullptr);
|
Source* CreateSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, Target* t, bool half_right, int x_offset, int y_offset, const GSVector2i* lod, const GSVector4i* src_range, GSTexture* gpu_clut);
|
||||||
Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type, const bool clear);
|
Target* CreateTarget(const GIFRegTEX0& TEX0, int w, int h, int type, const bool clear);
|
||||||
|
|
||||||
/// Expands a target when the block pointer for a display framebuffer is within another target, but the read offset
|
/// Expands a target when the block pointer for a display framebuffer is within another target, but the read offset
|
||||||
|
@ -337,10 +337,12 @@ public:
|
||||||
void RemovePartial();
|
void RemovePartial();
|
||||||
void AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm, u32 bw);
|
void AddDirtyRectTarget(Target* target, GSVector4i rect, u32 psm, u32 bw);
|
||||||
|
|
||||||
|
GSTexture* LookupPaletteSource(u32 CBP, u32 CPSM, u32 CBW, GSVector2i& offset, const GSVector2i& size);
|
||||||
|
|
||||||
Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, const GSVector2i* lod);
|
Source* LookupSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, const GSVector2i* lod);
|
||||||
Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette = false);
|
Source* LookupDepthSource(const GIFRegTEX0& TEX0, const GIFRegTEXA& TEXA, const GSVector4i& r, bool palette = false);
|
||||||
|
|
||||||
Target* LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask = 0, const bool is_frame = false, const int real_w = 0, const int real_h = 0);
|
Target* LookupTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, int type, bool used, u32 fbmask = 0, const bool is_frame = false, const int real_w = 0, const int real_h = 0, bool preload = GSConfig.PreloadFrameWithGSData);
|
||||||
Target* LookupDisplayTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, const int real_w, const int real_h);
|
Target* LookupDisplayTarget(const GIFRegTEX0& TEX0, const GSVector2i& size, const int real_w, const int real_h);
|
||||||
|
|
||||||
/// Looks up a target in the cache, and only returns it if the BP/BW/PSM match exactly.
|
/// Looks up a target in the cache, and only returns it if the BP/BW/PSM match exactly.
|
||||||
|
@ -367,6 +369,7 @@ public:
|
||||||
void PrintMemoryUsage();
|
void PrintMemoryUsage();
|
||||||
|
|
||||||
void AttachPaletteToSource(Source* s, u16 pal, bool need_gs_texture);
|
void AttachPaletteToSource(Source* s, u16 pal, bool need_gs_texture);
|
||||||
|
void AttachPaletteToSource(Source* s, GSTexture* gpu_clut);
|
||||||
SurfaceOffset ComputeSurfaceOffset(const GSOffset& off, const GSVector4i& r, const Target* t);
|
SurfaceOffset ComputeSurfaceOffset(const GSOffset& off, const GSVector4i& r, const Target* t);
|
||||||
SurfaceOffset ComputeSurfaceOffset(const uint32_t bp, const uint32_t bw, const uint32_t psm, const GSVector4i& r, const Target* t);
|
SurfaceOffset ComputeSurfaceOffset(const uint32_t bp, const uint32_t bw, const uint32_t psm, const GSVector4i& r, const Target* t);
|
||||||
SurfaceOffset ComputeSurfaceOffset(const SurfaceOffsetKey& sok);
|
SurfaceOffset ComputeSurfaceOffset(const SurfaceOffsetKey& sok);
|
||||||
|
|
|
@ -295,7 +295,14 @@ bool GSDeviceOGL::Create()
|
||||||
m_convert.ps[i].SetFormattedName("Convert pipe %s", name);
|
m_convert.ps[i].SetFormattedName("Convert pipe %s", name);
|
||||||
|
|
||||||
if (static_cast<ShaderConvert>(i) == ShaderConvert::YUV)
|
if (static_cast<ShaderConvert>(i) == ShaderConvert::YUV)
|
||||||
|
{
|
||||||
m_convert.ps[i].RegisterUniform("EMOD");
|
m_convert.ps[i].RegisterUniform("EMOD");
|
||||||
|
}
|
||||||
|
else if (static_cast<ShaderConvert>(i) == ShaderConvert::CLUT_4 || static_cast<ShaderConvert>(i) == ShaderConvert::CLUT_8)
|
||||||
|
{
|
||||||
|
m_convert.ps[i].RegisterUniform("offset");
|
||||||
|
m_convert.ps[i].RegisterUniform("scale");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const PSSamplerSelector point;
|
const PSSamplerSelector point;
|
||||||
|
@ -1278,6 +1285,30 @@ void GSDeviceOGL::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture
|
||||||
EndScene();
|
EndScene();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GSDeviceOGL::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
|
||||||
|
{
|
||||||
|
BeginScene();
|
||||||
|
|
||||||
|
const ShaderConvert shader = (dSize == 16) ? ShaderConvert::CLUT_4 : ShaderConvert::CLUT_8;
|
||||||
|
GL::Program& prog = m_convert.ps[static_cast<int>(shader)];
|
||||||
|
prog.Bind();
|
||||||
|
prog.Uniform3ui(0, offsetX, offsetY, dOffset);
|
||||||
|
prog.Uniform2f(1, sTex->GetScale().x, sTex->GetScale().y);
|
||||||
|
|
||||||
|
OMSetDepthStencilState(m_convert.dss);
|
||||||
|
OMSetBlendState(false);
|
||||||
|
OMSetColorMaskState();
|
||||||
|
OMSetRenderTargets(dTex, nullptr);
|
||||||
|
|
||||||
|
PSSetShaderResource(0, sTex);
|
||||||
|
PSSetSamplerState(m_convert.pt);
|
||||||
|
|
||||||
|
const GSVector4 dRect(0, 0, dSize, 1);
|
||||||
|
DrawStretchRect(GSVector4::zero(), dRect, dTex->GetSize());
|
||||||
|
|
||||||
|
EndScene();
|
||||||
|
}
|
||||||
|
|
||||||
void GSDeviceOGL::DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect, const GSVector2i& ds)
|
void GSDeviceOGL::DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect, const GSVector2i& ds)
|
||||||
{
|
{
|
||||||
// Original code from DX
|
// Original code from DX
|
||||||
|
|
|
@ -334,6 +334,7 @@ public:
|
||||||
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) final;
|
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) final;
|
||||||
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GL::Program& ps, bool alpha_blend, OMColorMaskSelector cms, bool linear = true);
|
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, const GL::Program& ps, bool alpha_blend, OMColorMaskSelector cms, bool linear = true);
|
||||||
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) final;
|
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) final;
|
||||||
|
void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) final;
|
||||||
|
|
||||||
void RenderHW(GSHWDrawConfig& config) final;
|
void RenderHW(GSHWDrawConfig& config) final;
|
||||||
void SendHWDraw(const GSHWDrawConfig& config, bool needs_barrier);
|
void SendHWDraw(const GSHWDrawConfig& config, bool needs_barrier);
|
||||||
|
|
|
@ -763,6 +763,23 @@ void GSDeviceVK::BlitRect(GSTexture* sTex, const GSVector4i& sRect, u32 sLevel,
|
||||||
&ib, linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST);
|
&ib, linear ? VK_FILTER_LINEAR : VK_FILTER_NEAREST);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void GSDeviceVK::UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize)
|
||||||
|
{
|
||||||
|
struct Uniforms
|
||||||
|
{
|
||||||
|
float scaleX, scaleY;
|
||||||
|
u32 offsetX, offsetY, dOffset;
|
||||||
|
};
|
||||||
|
|
||||||
|
const Uniforms uniforms = {sTex->GetScale().x, sTex->GetScale().y, offsetX, offsetY, dOffset};
|
||||||
|
SetUtilityPushConstants(&uniforms, sizeof(uniforms));
|
||||||
|
|
||||||
|
const GSVector4 dRect(0, 0, dSize, 1);
|
||||||
|
const ShaderConvert shader = (dSize == 16) ? ShaderConvert::CLUT_4 : ShaderConvert::CLUT_8;
|
||||||
|
DoStretchRect(static_cast<GSTextureVK*>(sTex), GSVector4::zero(), static_cast<GSTextureVK*>(dTex), dRect,
|
||||||
|
m_convert[static_cast<int>(shader)], false);
|
||||||
|
}
|
||||||
|
|
||||||
void GSDeviceVK::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect,
|
void GSDeviceVK::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect,
|
||||||
const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c)
|
const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c)
|
||||||
{
|
{
|
||||||
|
|
|
@ -251,6 +251,8 @@ public:
|
||||||
void BlitRect(GSTexture* sTex, const GSVector4i& sRect, u32 sLevel, GSTexture* dTex, const GSVector4i& dRect,
|
void BlitRect(GSTexture* sTex, const GSVector4i& sRect, u32 sLevel, GSTexture* dTex, const GSVector4i& dRect,
|
||||||
u32 dLevel, bool linear);
|
u32 dLevel, bool linear);
|
||||||
|
|
||||||
|
void UpdateCLUTTexture(GSTexture* sTex, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
|
||||||
|
|
||||||
void SetupDATE(GSTexture* rt, GSTexture* ds, bool datm, const GSVector4i& bbox);
|
void SetupDATE(GSTexture* rt, GSTexture* ds, bool datm, const GSVector4i& bbox);
|
||||||
GSTextureVK* SetupPrimitiveTrackingDATE(GSHWDrawConfig& config);
|
GSTextureVK* SetupPrimitiveTrackingDATE(GSHWDrawConfig& config);
|
||||||
|
|
||||||
|
|
|
@ -505,6 +505,7 @@ bool Pcsx2Config::GSOptions::OptionsAreEqual(const GSOptions& right) const
|
||||||
OpEqu(UserHacks_TCOffsetY) &&
|
OpEqu(UserHacks_TCOffsetY) &&
|
||||||
OpEqu(UserHacks_CPUSpriteRenderBW) &&
|
OpEqu(UserHacks_CPUSpriteRenderBW) &&
|
||||||
OpEqu(UserHacks_CPUCLUTRender) &&
|
OpEqu(UserHacks_CPUCLUTRender) &&
|
||||||
|
OpEqu(UserHacks_GPUTargetCLUTMode) &&
|
||||||
OpEqu(OverrideTextureBarriers) &&
|
OpEqu(OverrideTextureBarriers) &&
|
||||||
OpEqu(OverrideGeometryShaders) &&
|
OpEqu(OverrideGeometryShaders) &&
|
||||||
|
|
||||||
|
@ -681,6 +682,7 @@ void Pcsx2Config::GSOptions::LoadSave(SettingsWrapper& wrap)
|
||||||
GSSettingIntEx(UserHacks_TCOffsetY, "UserHacks_TCOffsetY");
|
GSSettingIntEx(UserHacks_TCOffsetY, "UserHacks_TCOffsetY");
|
||||||
GSSettingIntEx(UserHacks_CPUSpriteRenderBW, "UserHacks_CPUSpriteRenderBW");
|
GSSettingIntEx(UserHacks_CPUSpriteRenderBW, "UserHacks_CPUSpriteRenderBW");
|
||||||
GSSettingIntEx(UserHacks_CPUCLUTRender, "UserHacks_CPUCLUTRender");
|
GSSettingIntEx(UserHacks_CPUCLUTRender, "UserHacks_CPUCLUTRender");
|
||||||
|
GSSettingIntEnumEx(UserHacks_GPUTargetCLUTMode, "UserHacks_GPUTargetCLUTMode");
|
||||||
GSSettingIntEnumEx(TriFilter, "TriFilter");
|
GSSettingIntEnumEx(TriFilter, "TriFilter");
|
||||||
GSSettingIntEx(OverrideTextureBarriers, "OverrideTextureBarriers");
|
GSSettingIntEx(OverrideTextureBarriers, "OverrideTextureBarriers");
|
||||||
GSSettingIntEx(OverrideGeometryShaders, "OverrideGeometryShaders");
|
GSSettingIntEx(OverrideGeometryShaders, "OverrideGeometryShaders");
|
||||||
|
@ -746,6 +748,7 @@ void Pcsx2Config::GSOptions::MaskUserHacks()
|
||||||
UserHacks_TCOffsetY = 0;
|
UserHacks_TCOffsetY = 0;
|
||||||
UserHacks_CPUSpriteRenderBW = 0;
|
UserHacks_CPUSpriteRenderBW = 0;
|
||||||
UserHacks_CPUCLUTRender = 0;
|
UserHacks_CPUCLUTRender = 0;
|
||||||
|
UserHacks_GPUTargetCLUTMode = GSGPUTargetCLUTMode::Disabled;
|
||||||
SkipDrawStart = 0;
|
SkipDrawStart = 0;
|
||||||
SkipDrawEnd = 0;
|
SkipDrawEnd = 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue