Merge pull request #6743 from stenzek/faster-disabled-copy-filter

TextureConversionShader: Don't sample from adjacent rows when not needed
This commit is contained in:
Markus Wick 2018-05-17 10:45:50 +02:00 committed by GitHub
commit c485efdfe1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 74 additions and 35 deletions

View File

@ -240,7 +240,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
}
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
scale_by_half);
scale_by_half,
NeedsCopyFilterInShader(filter_coefficients));
ID3D11PixelShader* pixel_shader = GetEFBToTexPixelShader(uid);
if (!pixel_shader)
return;

View File

@ -515,7 +515,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
glViewport(0, 0, destination_texture->GetConfig().width, destination_texture->GetConfig().height);
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
scale_by_half);
scale_by_half,
NeedsCopyFilterInShader(filter_coefficients));
auto it = m_efb_copy_programs.emplace(uid, EFBCopyShader());
EFBCopyShader& shader = it.first->second;

View File

@ -274,7 +274,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
scale_by_half);
scale_by_half,
NeedsCopyFilterInShader(filter_coefficients));
auto it = m_efb_copy_to_tex_shaders.emplace(uid, VkShaderModule(VK_NULL_HANDLE));
VkShaderModule& shader = it.first->second;

View File

@ -1501,8 +1501,8 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda
}
}
TextureCacheBase::CopyFilterCoefficientArray
TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterCoefficients(
const CopyFilterCoefficients::Values& coefficients) const
{
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
@ -1512,8 +1512,8 @@ TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Val
static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])};
}
TextureCacheBase::CopyFilterCoefficientArray
TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetVRAMCopyFilterCoefficients(
const CopyFilterCoefficients::Values& coefficients) const
{
// If the user disables the copy filter, only apply it to the VRAM copy.
// This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
@ -1530,6 +1530,12 @@ TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Va
return res;
}
bool TextureCacheBase::NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const
{
// If the top/bottom coefficients are zero, no point sampling/blending from these rows.
return coefficients[0] != 0 || coefficients[2] != 0;
}
void TextureCacheBase::CopyRenderTargetToTexture(
u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy,
const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, float y_scale, float gamma,
@ -1652,11 +1658,12 @@ void TextureCacheBase::CopyRenderTargetToTexture(
if (copy_to_ram)
{
CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity);
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
NeedsCopyFilterInShader(coefficients));
CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf,
y_scale, gamma, clamp_top, clamp_bottom,
GetRAMCopyFilterCoefficients(filter_coefficients));
y_scale, gamma, clamp_top, clamp_bottom, coefficients);
}
else
{

View File

@ -47,21 +47,23 @@ struct TextureAndTLUTFormat
struct EFBCopyParams
{
EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_,
bool yuv_)
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_)
bool yuv_, bool copy_filter_)
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_),
copy_filter(copy_filter_)
{
}
bool operator<(const EFBCopyParams& rhs) const
{
return std::tie(efb_format, copy_format, depth, yuv) <
std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv);
return std::tie(efb_format, copy_format, depth, yuv, copy_filter) <
std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.copy_filter);
}
PEControl::PixelFormat efb_format;
EFBCopyFormat copy_format;
bool depth;
bool yuv;
bool copy_filter;
};
struct TextureLookupInformation
@ -106,6 +108,7 @@ private:
static const int FRAMECOUNT_INVALID = 0;
public:
// Reduced version of the full coefficient array, reduced to a single value for each row.
using CopyFilterCoefficientArray = std::array<u32, 3>;
struct TCacheEntry
@ -278,6 +281,9 @@ public:
protected:
TextureCacheBase();
// Returns false if the top/bottom row coefficients are zero.
bool NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const;
alignas(16) u8* temp = nullptr;
size_t temp_size = 0;
@ -329,9 +335,9 @@ private:
// Precomputing the coefficients for the previous, current, and next lines for the copy filter.
CopyFilterCoefficientArray
GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const;
CopyFilterCoefficientArray
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const;
TexAddrCache textures_by_address;
TexHashCache textures_by_hash;

View File

@ -180,21 +180,33 @@ static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType A
// The filter is only applied to the RGB channels, the alpha channel is left intact.
WRITE(p, "float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n");
WRITE(p, "{\n");
WRITE(p, " float4 prev_row = ");
WriteSampleOp(-1);
WRITE(p, ";\n");
WRITE(p, " float4 current_row = ");
WriteSampleOp(0);
WRITE(p, ";\n");
WRITE(p, " float4 next_row = ");
WriteSampleOp(1);
WRITE(p, ";\n");
WRITE(p,
if (params.copy_filter)
{
WRITE(p, " float4 prev_row = ");
WriteSampleOp(-1);
WRITE(p, ";\n");
WRITE(p, " float4 current_row = ");
WriteSampleOp(0);
WRITE(p, ";\n");
WRITE(p, " float4 next_row = ");
WriteSampleOp(1);
WRITE(p, ";\n");
WRITE(
p,
" float3 col = float3(clamp((int3(prev_row.rgb * 255.0) * filter_coefficients[0] +\n"
" int3(current_row.rgb * 255.0) * filter_coefficients[1] +\n"
" int3(next_row.rgb * 255.0) * filter_coefficients[2]) >> 6,\n"
" int3(0, 0, 0), int3(255, 255, 255))) / 255.0;\n");
WRITE(p, " return float4(col, current_row.a);\n");
WRITE(p, " return float4(col, current_row.a);\n");
}
else
{
WRITE(p, " float4 current_row = ");
WriteSampleOp(0);
WRITE(p, ";\n");
WRITE(p, " return float4(clamp(int3(current_row.rgb * 255.0) * filter_coefficients[1], "
"int3(0, 0, 0), int3(255, 255, 255)), current_row.a);\n");
}
WRITE(p, "}\n");
}

View File

@ -15,7 +15,7 @@
namespace TextureConversionShaderGen
{
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
bool scale_by_half)
bool scale_by_half, bool copy_filter)
{
TCShaderUid out;
UidData* uid_data = out.GetUidData<UidData>();
@ -26,6 +26,7 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i
uid_data->is_depth_copy = is_depth_copy;
uid_data->is_intensity = is_intensity;
uid_data->scale_by_half = scale_by_half;
uid_data->copy_filter = copy_filter;
return out;
}
@ -91,12 +92,21 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
// The copy filter applies to both color and depth copies. This has been verified on hardware.
// The filter is only applied to the RGB channels, the alpha channel is left intact.
out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n"
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
" float4 next_row = SampleEFB(uv0, 1.0f);\n"
" float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], current_row.a);\n");
if (uid_data->copy_filter)
{
out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n"
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
" float4 next_row = SampleEFB(uv0, 1.0f);\n"
" float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], current_row.a);\n");
}
else
{
out.Write(
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
" float4 texcol = float4(current_row.rgb * filter_coefficients[1], current_row.a);\n");
}
if (uid_data->is_depth_copy)
{

View File

@ -22,6 +22,7 @@ struct UidData
u32 is_depth_copy : 1;
u32 is_intensity : 1;
u32 scale_by_half : 1;
u32 copy_filter : 1;
};
#pragma pack()
@ -30,6 +31,6 @@ using TCShaderUid = ShaderUid<UidData>;
ShaderCode GenerateShader(APIType api_type, const UidData* uid_data);
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
bool scale_by_half);
bool scale_by_half, bool copy_filter);
} // namespace TextureConversionShaderGen