Merge pull request #6743 from stenzek/faster-disabled-copy-filter
TextureConversionShader: Don't sample from adjacent rows when not needed
This commit is contained in:
commit
c485efdfe1
|
@ -240,7 +240,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
|
|||
}
|
||||
|
||||
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
|
||||
scale_by_half);
|
||||
scale_by_half,
|
||||
NeedsCopyFilterInShader(filter_coefficients));
|
||||
ID3D11PixelShader* pixel_shader = GetEFBToTexPixelShader(uid);
|
||||
if (!pixel_shader)
|
||||
return;
|
||||
|
|
|
@ -515,7 +515,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
|
|||
glViewport(0, 0, destination_texture->GetConfig().width, destination_texture->GetConfig().height);
|
||||
|
||||
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
|
||||
scale_by_half);
|
||||
scale_by_half,
|
||||
NeedsCopyFilterInShader(filter_coefficients));
|
||||
|
||||
auto it = m_efb_copy_programs.emplace(uid, EFBCopyShader());
|
||||
EFBCopyShader& shader = it.first->second;
|
||||
|
|
|
@ -274,7 +274,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
|
|||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
||||
|
||||
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
|
||||
scale_by_half);
|
||||
scale_by_half,
|
||||
NeedsCopyFilterInShader(filter_coefficients));
|
||||
|
||||
auto it = m_efb_copy_to_tex_shaders.emplace(uid, VkShaderModule(VK_NULL_HANDLE));
|
||||
VkShaderModule& shader = it.first->second;
|
||||
|
|
|
@ -1501,8 +1501,8 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda
|
|||
}
|
||||
}
|
||||
|
||||
TextureCacheBase::CopyFilterCoefficientArray
|
||||
TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
|
||||
TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterCoefficients(
|
||||
const CopyFilterCoefficients::Values& coefficients) const
|
||||
{
|
||||
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
|
||||
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
|
||||
|
@ -1512,8 +1512,8 @@ TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Val
|
|||
static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])};
|
||||
}
|
||||
|
||||
TextureCacheBase::CopyFilterCoefficientArray
|
||||
TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
|
||||
TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetVRAMCopyFilterCoefficients(
|
||||
const CopyFilterCoefficients::Values& coefficients) const
|
||||
{
|
||||
// If the user disables the copy filter, only apply it to the VRAM copy.
|
||||
// This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
|
||||
|
@ -1530,6 +1530,12 @@ TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Va
|
|||
return res;
|
||||
}
|
||||
|
||||
bool TextureCacheBase::NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const
|
||||
{
|
||||
// If the top/bottom coefficients are zero, no point sampling/blending from these rows.
|
||||
return coefficients[0] != 0 || coefficients[2] != 0;
|
||||
}
|
||||
|
||||
void TextureCacheBase::CopyRenderTargetToTexture(
|
||||
u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy,
|
||||
const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, float y_scale, float gamma,
|
||||
|
@ -1652,11 +1658,12 @@ void TextureCacheBase::CopyRenderTargetToTexture(
|
|||
|
||||
if (copy_to_ram)
|
||||
{
|
||||
CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
|
||||
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
|
||||
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity);
|
||||
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
|
||||
NeedsCopyFilterInShader(coefficients));
|
||||
CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf,
|
||||
y_scale, gamma, clamp_top, clamp_bottom,
|
||||
GetRAMCopyFilterCoefficients(filter_coefficients));
|
||||
y_scale, gamma, clamp_top, clamp_bottom, coefficients);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -47,21 +47,23 @@ struct TextureAndTLUTFormat
|
|||
struct EFBCopyParams
|
||||
{
|
||||
EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_,
|
||||
bool yuv_)
|
||||
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_)
|
||||
bool yuv_, bool copy_filter_)
|
||||
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_),
|
||||
copy_filter(copy_filter_)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator<(const EFBCopyParams& rhs) const
|
||||
{
|
||||
return std::tie(efb_format, copy_format, depth, yuv) <
|
||||
std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv);
|
||||
return std::tie(efb_format, copy_format, depth, yuv, copy_filter) <
|
||||
std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.copy_filter);
|
||||
}
|
||||
|
||||
PEControl::PixelFormat efb_format;
|
||||
EFBCopyFormat copy_format;
|
||||
bool depth;
|
||||
bool yuv;
|
||||
bool copy_filter;
|
||||
};
|
||||
|
||||
struct TextureLookupInformation
|
||||
|
@ -106,6 +108,7 @@ private:
|
|||
static const int FRAMECOUNT_INVALID = 0;
|
||||
|
||||
public:
|
||||
// Reduced version of the full coefficient array, reduced to a single value for each row.
|
||||
using CopyFilterCoefficientArray = std::array<u32, 3>;
|
||||
|
||||
struct TCacheEntry
|
||||
|
@ -278,6 +281,9 @@ public:
|
|||
protected:
|
||||
TextureCacheBase();
|
||||
|
||||
// Returns false if the top/bottom row coefficients are zero.
|
||||
bool NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const;
|
||||
|
||||
alignas(16) u8* temp = nullptr;
|
||||
size_t temp_size = 0;
|
||||
|
||||
|
@ -329,9 +335,9 @@ private:
|
|||
|
||||
// Precomputing the coefficients for the previous, current, and next lines for the copy filter.
|
||||
CopyFilterCoefficientArray
|
||||
GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
|
||||
GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const;
|
||||
CopyFilterCoefficientArray
|
||||
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
|
||||
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const;
|
||||
|
||||
TexAddrCache textures_by_address;
|
||||
TexHashCache textures_by_hash;
|
||||
|
|
|
@ -180,21 +180,33 @@ static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType A
|
|||
// The filter is only applied to the RGB channels, the alpha channel is left intact.
|
||||
WRITE(p, "float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n");
|
||||
WRITE(p, "{\n");
|
||||
WRITE(p, " float4 prev_row = ");
|
||||
WriteSampleOp(-1);
|
||||
WRITE(p, ";\n");
|
||||
WRITE(p, " float4 current_row = ");
|
||||
WriteSampleOp(0);
|
||||
WRITE(p, ";\n");
|
||||
WRITE(p, " float4 next_row = ");
|
||||
WriteSampleOp(1);
|
||||
WRITE(p, ";\n");
|
||||
WRITE(p,
|
||||
if (params.copy_filter)
|
||||
{
|
||||
WRITE(p, " float4 prev_row = ");
|
||||
WriteSampleOp(-1);
|
||||
WRITE(p, ";\n");
|
||||
WRITE(p, " float4 current_row = ");
|
||||
WriteSampleOp(0);
|
||||
WRITE(p, ";\n");
|
||||
WRITE(p, " float4 next_row = ");
|
||||
WriteSampleOp(1);
|
||||
WRITE(p, ";\n");
|
||||
WRITE(
|
||||
p,
|
||||
" float3 col = float3(clamp((int3(prev_row.rgb * 255.0) * filter_coefficients[0] +\n"
|
||||
" int3(current_row.rgb * 255.0) * filter_coefficients[1] +\n"
|
||||
" int3(next_row.rgb * 255.0) * filter_coefficients[2]) >> 6,\n"
|
||||
" int3(0, 0, 0), int3(255, 255, 255))) / 255.0;\n");
|
||||
WRITE(p, " return float4(col, current_row.a);\n");
|
||||
WRITE(p, " return float4(col, current_row.a);\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
WRITE(p, " float4 current_row = ");
|
||||
WriteSampleOp(0);
|
||||
WRITE(p, ";\n");
|
||||
WRITE(p, " return float4(clamp(int3(current_row.rgb * 255.0) * filter_coefficients[1], "
|
||||
"int3(0, 0, 0), int3(255, 255, 255)), current_row.a);\n");
|
||||
}
|
||||
WRITE(p, "}\n");
|
||||
}
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
namespace TextureConversionShaderGen
|
||||
{
|
||||
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
|
||||
bool scale_by_half)
|
||||
bool scale_by_half, bool copy_filter)
|
||||
{
|
||||
TCShaderUid out;
|
||||
UidData* uid_data = out.GetUidData<UidData>();
|
||||
|
@ -26,6 +26,7 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i
|
|||
uid_data->is_depth_copy = is_depth_copy;
|
||||
uid_data->is_intensity = is_intensity;
|
||||
uid_data->scale_by_half = scale_by_half;
|
||||
uid_data->copy_filter = copy_filter;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
@ -91,12 +92,21 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
|
|||
|
||||
// The copy filter applies to both color and depth copies. This has been verified on hardware.
|
||||
// The filter is only applied to the RGB channels, the alpha channel is left intact.
|
||||
out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n"
|
||||
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
|
||||
" float4 next_row = SampleEFB(uv0, 1.0f);\n"
|
||||
" float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n"
|
||||
" current_row.rgb * filter_coefficients[1] +\n"
|
||||
" next_row.rgb * filter_coefficients[2], current_row.a);\n");
|
||||
if (uid_data->copy_filter)
|
||||
{
|
||||
out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n"
|
||||
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
|
||||
" float4 next_row = SampleEFB(uv0, 1.0f);\n"
|
||||
" float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n"
|
||||
" current_row.rgb * filter_coefficients[1] +\n"
|
||||
" next_row.rgb * filter_coefficients[2], current_row.a);\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
out.Write(
|
||||
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
|
||||
" float4 texcol = float4(current_row.rgb * filter_coefficients[1], current_row.a);\n");
|
||||
}
|
||||
|
||||
if (uid_data->is_depth_copy)
|
||||
{
|
||||
|
|
|
@ -22,6 +22,7 @@ struct UidData
|
|||
u32 is_depth_copy : 1;
|
||||
u32 is_intensity : 1;
|
||||
u32 scale_by_half : 1;
|
||||
u32 copy_filter : 1;
|
||||
};
|
||||
#pragma pack()
|
||||
|
||||
|
@ -30,6 +31,6 @@ using TCShaderUid = ShaderUid<UidData>;
|
|||
ShaderCode GenerateShader(APIType api_type, const UidData* uid_data);
|
||||
|
||||
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
|
||||
bool scale_by_half);
|
||||
bool scale_by_half, bool copy_filter);
|
||||
|
||||
} // namespace TextureConversionShaderGen
|
||||
|
|
Loading…
Reference in New Issue