TextureConversionShader: Don't sample from adjacent rows when not needed

This commit is contained in:
Stenzek 2018-05-03 14:09:32 +10:00
parent ef98a21735
commit 4faac3a627
8 changed files with 74 additions and 35 deletions

View File

@ -240,7 +240,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
} }
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
scale_by_half); scale_by_half,
NeedsCopyFilterInShader(filter_coefficients));
ID3D11PixelShader* pixel_shader = GetEFBToTexPixelShader(uid); ID3D11PixelShader* pixel_shader = GetEFBToTexPixelShader(uid);
if (!pixel_shader) if (!pixel_shader)
return; return;

View File

@ -515,7 +515,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
glViewport(0, 0, destination_texture->GetConfig().width, destination_texture->GetConfig().height); glViewport(0, 0, destination_texture->GetConfig().width, destination_texture->GetConfig().height);
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
scale_by_half); scale_by_half,
NeedsCopyFilterInShader(filter_coefficients));
auto it = m_efb_copy_programs.emplace(uid, EFBCopyShader()); auto it = m_efb_copy_programs.emplace(uid, EFBCopyShader());
EFBCopyShader& shader = it.first->second; EFBCopyShader& shader = it.first->second;

View File

@ -274,7 +274,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
scale_by_half); scale_by_half,
NeedsCopyFilterInShader(filter_coefficients));
auto it = m_efb_copy_to_tex_shaders.emplace(uid, VkShaderModule(VK_NULL_HANDLE)); auto it = m_efb_copy_to_tex_shaders.emplace(uid, VkShaderModule(VK_NULL_HANDLE));
VkShaderModule& shader = it.first->second; VkShaderModule& shader = it.first->second;

View File

@ -1499,8 +1499,8 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda
} }
} }
TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterCoefficients(
TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const CopyFilterCoefficients::Values& coefficients) const
{ {
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1 // To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below. // are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
@ -1510,8 +1510,8 @@ TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Val
static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])}; static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])};
} }
TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetVRAMCopyFilterCoefficients(
TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const CopyFilterCoefficients::Values& coefficients) const
{ {
// If the user disables the copy filter, only apply it to the VRAM copy. // If the user disables the copy filter, only apply it to the VRAM copy.
// This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected. // This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
@ -1528,6 +1528,12 @@ TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Va
return res; return res;
} }
bool TextureCacheBase::NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const
{
// If the top/bottom coefficients are zero, no point sampling/blending from these rows.
return coefficients[0] != 0 || coefficients[2] != 0;
}
void TextureCacheBase::CopyRenderTargetToTexture( void TextureCacheBase::CopyRenderTargetToTexture(
u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy, u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy,
const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, float y_scale, float gamma, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, float y_scale, float gamma,
@ -1650,11 +1656,12 @@ void TextureCacheBase::CopyRenderTargetToTexture(
if (copy_to_ram) if (copy_to_ram)
{ {
CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format; PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity); EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
NeedsCopyFilterInShader(coefficients));
CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf, CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf,
y_scale, gamma, clamp_top, clamp_bottom, y_scale, gamma, clamp_top, clamp_bottom, coefficients);
GetRAMCopyFilterCoefficients(filter_coefficients));
} }
else else
{ {

View File

@ -47,21 +47,23 @@ struct TextureAndTLUTFormat
struct EFBCopyParams struct EFBCopyParams
{ {
EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_, EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_,
bool yuv_) bool yuv_, bool copy_filter_)
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_) : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_),
copy_filter(copy_filter_)
{ {
} }
bool operator<(const EFBCopyParams& rhs) const bool operator<(const EFBCopyParams& rhs) const
{ {
return std::tie(efb_format, copy_format, depth, yuv) < return std::tie(efb_format, copy_format, depth, yuv, copy_filter) <
std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv); std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.copy_filter);
} }
PEControl::PixelFormat efb_format; PEControl::PixelFormat efb_format;
EFBCopyFormat copy_format; EFBCopyFormat copy_format;
bool depth; bool depth;
bool yuv; bool yuv;
bool copy_filter;
}; };
struct TextureLookupInformation struct TextureLookupInformation
@ -106,6 +108,7 @@ private:
static const int FRAMECOUNT_INVALID = 0; static const int FRAMECOUNT_INVALID = 0;
public: public:
// Reduced version of the full coefficient array, reduced to a single value for each row.
using CopyFilterCoefficientArray = std::array<u32, 3>; using CopyFilterCoefficientArray = std::array<u32, 3>;
struct TCacheEntry struct TCacheEntry
@ -278,6 +281,9 @@ public:
protected: protected:
TextureCacheBase(); TextureCacheBase();
// Returns false if the top/bottom row coefficients are zero.
bool NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const;
alignas(16) u8* temp = nullptr; alignas(16) u8* temp = nullptr;
size_t temp_size = 0; size_t temp_size = 0;
@ -329,9 +335,9 @@ private:
// Precomputing the coefficients for the previous, current, and next lines for the copy filter. // Precomputing the coefficients for the previous, current, and next lines for the copy filter.
CopyFilterCoefficientArray CopyFilterCoefficientArray
GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const;
CopyFilterCoefficientArray CopyFilterCoefficientArray
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const;
TexAddrCache textures_by_address; TexAddrCache textures_by_address;
TexHashCache textures_by_hash; TexHashCache textures_by_hash;

View File

@ -180,21 +180,33 @@ static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType A
// The filter is only applied to the RGB channels, the alpha channel is left intact. // The filter is only applied to the RGB channels, the alpha channel is left intact.
WRITE(p, "float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n"); WRITE(p, "float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n");
WRITE(p, "{\n"); WRITE(p, "{\n");
WRITE(p, " float4 prev_row = "); if (params.copy_filter)
WriteSampleOp(-1); {
WRITE(p, ";\n"); WRITE(p, " float4 prev_row = ");
WRITE(p, " float4 current_row = "); WriteSampleOp(-1);
WriteSampleOp(0); WRITE(p, ";\n");
WRITE(p, ";\n"); WRITE(p, " float4 current_row = ");
WRITE(p, " float4 next_row = "); WriteSampleOp(0);
WriteSampleOp(1); WRITE(p, ";\n");
WRITE(p, ";\n"); WRITE(p, " float4 next_row = ");
WRITE(p, WriteSampleOp(1);
WRITE(p, ";\n");
WRITE(
p,
" float3 col = float3(clamp((int3(prev_row.rgb * 255.0) * filter_coefficients[0] +\n" " float3 col = float3(clamp((int3(prev_row.rgb * 255.0) * filter_coefficients[0] +\n"
" int3(current_row.rgb * 255.0) * filter_coefficients[1] +\n" " int3(current_row.rgb * 255.0) * filter_coefficients[1] +\n"
" int3(next_row.rgb * 255.0) * filter_coefficients[2]) >> 6,\n" " int3(next_row.rgb * 255.0) * filter_coefficients[2]) >> 6,\n"
" int3(0, 0, 0), int3(255, 255, 255))) / 255.0;\n"); " int3(0, 0, 0), int3(255, 255, 255))) / 255.0;\n");
WRITE(p, " return float4(col, current_row.a);\n"); WRITE(p, " return float4(col, current_row.a);\n");
}
else
{
WRITE(p, " float4 current_row = ");
WriteSampleOp(0);
WRITE(p, ";\n");
WRITE(p, " return float4(clamp(int3(current_row.rgb * 255.0) * filter_coefficients[1], "
"int3(0, 0, 0), int3(255, 255, 255)), current_row.a);\n");
}
WRITE(p, "}\n"); WRITE(p, "}\n");
} }

View File

@ -15,7 +15,7 @@
namespace TextureConversionShaderGen namespace TextureConversionShaderGen
{ {
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity, TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
bool scale_by_half) bool scale_by_half, bool copy_filter)
{ {
TCShaderUid out; TCShaderUid out;
UidData* uid_data = out.GetUidData<UidData>(); UidData* uid_data = out.GetUidData<UidData>();
@ -26,6 +26,7 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i
uid_data->is_depth_copy = is_depth_copy; uid_data->is_depth_copy = is_depth_copy;
uid_data->is_intensity = is_intensity; uid_data->is_intensity = is_intensity;
uid_data->scale_by_half = scale_by_half; uid_data->scale_by_half = scale_by_half;
uid_data->copy_filter = copy_filter;
return out; return out;
} }
@ -91,12 +92,21 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
// The copy filter applies to both color and depth copies. This has been verified on hardware. // The copy filter applies to both color and depth copies. This has been verified on hardware.
// The filter is only applied to the RGB channels, the alpha channel is left intact. // The filter is only applied to the RGB channels, the alpha channel is left intact.
out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n" if (uid_data->copy_filter)
" float4 current_row = SampleEFB(uv0, 0.0f);\n" {
" float4 next_row = SampleEFB(uv0, 1.0f);\n" out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n"
" float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n" " float4 current_row = SampleEFB(uv0, 0.0f);\n"
" current_row.rgb * filter_coefficients[1] +\n" " float4 next_row = SampleEFB(uv0, 1.0f);\n"
" next_row.rgb * filter_coefficients[2], current_row.a);\n"); " float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n"
" current_row.rgb * filter_coefficients[1] +\n"
" next_row.rgb * filter_coefficients[2], current_row.a);\n");
}
else
{
out.Write(
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
" float4 texcol = float4(current_row.rgb * filter_coefficients[1], current_row.a);\n");
}
if (uid_data->is_depth_copy) if (uid_data->is_depth_copy)
{ {

View File

@ -22,6 +22,7 @@ struct UidData
u32 is_depth_copy : 1; u32 is_depth_copy : 1;
u32 is_intensity : 1; u32 is_intensity : 1;
u32 scale_by_half : 1; u32 scale_by_half : 1;
u32 copy_filter : 1;
}; };
#pragma pack() #pragma pack()
@ -30,6 +31,6 @@ using TCShaderUid = ShaderUid<UidData>;
ShaderCode GenerateShader(APIType api_type, const UidData* uid_data); ShaderCode GenerateShader(APIType api_type, const UidData* uid_data);
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity, TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
bool scale_by_half); bool scale_by_half, bool copy_filter);
} // namespace TextureConversionShaderGen } // namespace TextureConversionShaderGen