TextureConversionShader: Don't sample from adjacent rows when not needed
This commit is contained in:
parent
ef98a21735
commit
4faac3a627
|
@ -240,7 +240,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
|
||||||
}
|
}
|
||||||
|
|
||||||
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
|
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
|
||||||
scale_by_half);
|
scale_by_half,
|
||||||
|
NeedsCopyFilterInShader(filter_coefficients));
|
||||||
ID3D11PixelShader* pixel_shader = GetEFBToTexPixelShader(uid);
|
ID3D11PixelShader* pixel_shader = GetEFBToTexPixelShader(uid);
|
||||||
if (!pixel_shader)
|
if (!pixel_shader)
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -515,7 +515,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
|
||||||
glViewport(0, 0, destination_texture->GetConfig().width, destination_texture->GetConfig().height);
|
glViewport(0, 0, destination_texture->GetConfig().width, destination_texture->GetConfig().height);
|
||||||
|
|
||||||
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
|
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
|
||||||
scale_by_half);
|
scale_by_half,
|
||||||
|
NeedsCopyFilterInShader(filter_coefficients));
|
||||||
|
|
||||||
auto it = m_efb_copy_programs.emplace(uid, EFBCopyShader());
|
auto it = m_efb_copy_programs.emplace(uid, EFBCopyShader());
|
||||||
EFBCopyShader& shader = it.first->second;
|
EFBCopyShader& shader = it.first->second;
|
||||||
|
|
|
@ -274,7 +274,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
|
||||||
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
|
||||||
|
|
||||||
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
|
auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
|
||||||
scale_by_half);
|
scale_by_half,
|
||||||
|
NeedsCopyFilterInShader(filter_coefficients));
|
||||||
|
|
||||||
auto it = m_efb_copy_to_tex_shaders.emplace(uid, VkShaderModule(VK_NULL_HANDLE));
|
auto it = m_efb_copy_to_tex_shaders.emplace(uid, VkShaderModule(VK_NULL_HANDLE));
|
||||||
VkShaderModule& shader = it.first->second;
|
VkShaderModule& shader = it.first->second;
|
||||||
|
|
|
@ -1499,8 +1499,8 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TextureCacheBase::CopyFilterCoefficientArray
|
TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterCoefficients(
|
||||||
TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
|
const CopyFilterCoefficients::Values& coefficients) const
|
||||||
{
|
{
|
||||||
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
|
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
|
||||||
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
|
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
|
||||||
|
@ -1510,8 +1510,8 @@ TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Val
|
||||||
static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])};
|
static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])};
|
||||||
}
|
}
|
||||||
|
|
||||||
TextureCacheBase::CopyFilterCoefficientArray
|
TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetVRAMCopyFilterCoefficients(
|
||||||
TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
|
const CopyFilterCoefficients::Values& coefficients) const
|
||||||
{
|
{
|
||||||
// If the user disables the copy filter, only apply it to the VRAM copy.
|
// If the user disables the copy filter, only apply it to the VRAM copy.
|
||||||
// This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
|
// This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
|
||||||
|
@ -1528,6 +1528,12 @@ TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Va
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TextureCacheBase::NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const
|
||||||
|
{
|
||||||
|
// If the top/bottom coefficients are zero, no point sampling/blending from these rows.
|
||||||
|
return coefficients[0] != 0 || coefficients[2] != 0;
|
||||||
|
}
|
||||||
|
|
||||||
void TextureCacheBase::CopyRenderTargetToTexture(
|
void TextureCacheBase::CopyRenderTargetToTexture(
|
||||||
u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy,
|
u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy,
|
||||||
const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, float y_scale, float gamma,
|
const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, float y_scale, float gamma,
|
||||||
|
@ -1650,11 +1656,12 @@ void TextureCacheBase::CopyRenderTargetToTexture(
|
||||||
|
|
||||||
if (copy_to_ram)
|
if (copy_to_ram)
|
||||||
{
|
{
|
||||||
|
CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
|
||||||
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
|
PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
|
||||||
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity);
|
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
|
||||||
|
NeedsCopyFilterInShader(coefficients));
|
||||||
CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf,
|
CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf,
|
||||||
y_scale, gamma, clamp_top, clamp_bottom,
|
y_scale, gamma, clamp_top, clamp_bottom, coefficients);
|
||||||
GetRAMCopyFilterCoefficients(filter_coefficients));
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -47,21 +47,23 @@ struct TextureAndTLUTFormat
|
||||||
struct EFBCopyParams
|
struct EFBCopyParams
|
||||||
{
|
{
|
||||||
EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_,
|
EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_,
|
||||||
bool yuv_)
|
bool yuv_, bool copy_filter_)
|
||||||
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_)
|
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_),
|
||||||
|
copy_filter(copy_filter_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator<(const EFBCopyParams& rhs) const
|
bool operator<(const EFBCopyParams& rhs) const
|
||||||
{
|
{
|
||||||
return std::tie(efb_format, copy_format, depth, yuv) <
|
return std::tie(efb_format, copy_format, depth, yuv, copy_filter) <
|
||||||
std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv);
|
std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.copy_filter);
|
||||||
}
|
}
|
||||||
|
|
||||||
PEControl::PixelFormat efb_format;
|
PEControl::PixelFormat efb_format;
|
||||||
EFBCopyFormat copy_format;
|
EFBCopyFormat copy_format;
|
||||||
bool depth;
|
bool depth;
|
||||||
bool yuv;
|
bool yuv;
|
||||||
|
bool copy_filter;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct TextureLookupInformation
|
struct TextureLookupInformation
|
||||||
|
@ -106,6 +108,7 @@ private:
|
||||||
static const int FRAMECOUNT_INVALID = 0;
|
static const int FRAMECOUNT_INVALID = 0;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
// Reduced version of the full coefficient array, reduced to a single value for each row.
|
||||||
using CopyFilterCoefficientArray = std::array<u32, 3>;
|
using CopyFilterCoefficientArray = std::array<u32, 3>;
|
||||||
|
|
||||||
struct TCacheEntry
|
struct TCacheEntry
|
||||||
|
@ -278,6 +281,9 @@ public:
|
||||||
protected:
|
protected:
|
||||||
TextureCacheBase();
|
TextureCacheBase();
|
||||||
|
|
||||||
|
// Returns false if the top/bottom row coefficients are zero.
|
||||||
|
bool NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const;
|
||||||
|
|
||||||
alignas(16) u8* temp = nullptr;
|
alignas(16) u8* temp = nullptr;
|
||||||
size_t temp_size = 0;
|
size_t temp_size = 0;
|
||||||
|
|
||||||
|
@ -329,9 +335,9 @@ private:
|
||||||
|
|
||||||
// Precomputing the coefficients for the previous, current, and next lines for the copy filter.
|
// Precomputing the coefficients for the previous, current, and next lines for the copy filter.
|
||||||
CopyFilterCoefficientArray
|
CopyFilterCoefficientArray
|
||||||
GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
|
GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const;
|
||||||
CopyFilterCoefficientArray
|
CopyFilterCoefficientArray
|
||||||
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
|
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const;
|
||||||
|
|
||||||
TexAddrCache textures_by_address;
|
TexAddrCache textures_by_address;
|
||||||
TexHashCache textures_by_hash;
|
TexHashCache textures_by_hash;
|
||||||
|
|
|
@ -180,21 +180,33 @@ static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType A
|
||||||
// The filter is only applied to the RGB channels, the alpha channel is left intact.
|
// The filter is only applied to the RGB channels, the alpha channel is left intact.
|
||||||
WRITE(p, "float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n");
|
WRITE(p, "float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n");
|
||||||
WRITE(p, "{\n");
|
WRITE(p, "{\n");
|
||||||
WRITE(p, " float4 prev_row = ");
|
if (params.copy_filter)
|
||||||
WriteSampleOp(-1);
|
{
|
||||||
WRITE(p, ";\n");
|
WRITE(p, " float4 prev_row = ");
|
||||||
WRITE(p, " float4 current_row = ");
|
WriteSampleOp(-1);
|
||||||
WriteSampleOp(0);
|
WRITE(p, ";\n");
|
||||||
WRITE(p, ";\n");
|
WRITE(p, " float4 current_row = ");
|
||||||
WRITE(p, " float4 next_row = ");
|
WriteSampleOp(0);
|
||||||
WriteSampleOp(1);
|
WRITE(p, ";\n");
|
||||||
WRITE(p, ";\n");
|
WRITE(p, " float4 next_row = ");
|
||||||
WRITE(p,
|
WriteSampleOp(1);
|
||||||
|
WRITE(p, ";\n");
|
||||||
|
WRITE(
|
||||||
|
p,
|
||||||
" float3 col = float3(clamp((int3(prev_row.rgb * 255.0) * filter_coefficients[0] +\n"
|
" float3 col = float3(clamp((int3(prev_row.rgb * 255.0) * filter_coefficients[0] +\n"
|
||||||
" int3(current_row.rgb * 255.0) * filter_coefficients[1] +\n"
|
" int3(current_row.rgb * 255.0) * filter_coefficients[1] +\n"
|
||||||
" int3(next_row.rgb * 255.0) * filter_coefficients[2]) >> 6,\n"
|
" int3(next_row.rgb * 255.0) * filter_coefficients[2]) >> 6,\n"
|
||||||
" int3(0, 0, 0), int3(255, 255, 255))) / 255.0;\n");
|
" int3(0, 0, 0), int3(255, 255, 255))) / 255.0;\n");
|
||||||
WRITE(p, " return float4(col, current_row.a);\n");
|
WRITE(p, " return float4(col, current_row.a);\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
WRITE(p, " float4 current_row = ");
|
||||||
|
WriteSampleOp(0);
|
||||||
|
WRITE(p, ";\n");
|
||||||
|
WRITE(p, " return float4(clamp(int3(current_row.rgb * 255.0) * filter_coefficients[1], "
|
||||||
|
"int3(0, 0, 0), int3(255, 255, 255)), current_row.a);\n");
|
||||||
|
}
|
||||||
WRITE(p, "}\n");
|
WRITE(p, "}\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
namespace TextureConversionShaderGen
|
namespace TextureConversionShaderGen
|
||||||
{
|
{
|
||||||
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
|
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
|
||||||
bool scale_by_half)
|
bool scale_by_half, bool copy_filter)
|
||||||
{
|
{
|
||||||
TCShaderUid out;
|
TCShaderUid out;
|
||||||
UidData* uid_data = out.GetUidData<UidData>();
|
UidData* uid_data = out.GetUidData<UidData>();
|
||||||
|
@ -26,6 +26,7 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i
|
||||||
uid_data->is_depth_copy = is_depth_copy;
|
uid_data->is_depth_copy = is_depth_copy;
|
||||||
uid_data->is_intensity = is_intensity;
|
uid_data->is_intensity = is_intensity;
|
||||||
uid_data->scale_by_half = scale_by_half;
|
uid_data->scale_by_half = scale_by_half;
|
||||||
|
uid_data->copy_filter = copy_filter;
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
@ -91,12 +92,21 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data)
|
||||||
|
|
||||||
// The copy filter applies to both color and depth copies. This has been verified on hardware.
|
// The copy filter applies to both color and depth copies. This has been verified on hardware.
|
||||||
// The filter is only applied to the RGB channels, the alpha channel is left intact.
|
// The filter is only applied to the RGB channels, the alpha channel is left intact.
|
||||||
out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n"
|
if (uid_data->copy_filter)
|
||||||
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
|
{
|
||||||
" float4 next_row = SampleEFB(uv0, 1.0f);\n"
|
out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n"
|
||||||
" float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n"
|
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
|
||||||
" current_row.rgb * filter_coefficients[1] +\n"
|
" float4 next_row = SampleEFB(uv0, 1.0f);\n"
|
||||||
" next_row.rgb * filter_coefficients[2], current_row.a);\n");
|
" float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n"
|
||||||
|
" current_row.rgb * filter_coefficients[1] +\n"
|
||||||
|
" next_row.rgb * filter_coefficients[2], current_row.a);\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
out.Write(
|
||||||
|
" float4 current_row = SampleEFB(uv0, 0.0f);\n"
|
||||||
|
" float4 texcol = float4(current_row.rgb * filter_coefficients[1], current_row.a);\n");
|
||||||
|
}
|
||||||
|
|
||||||
if (uid_data->is_depth_copy)
|
if (uid_data->is_depth_copy)
|
||||||
{
|
{
|
||||||
|
|
|
@ -22,6 +22,7 @@ struct UidData
|
||||||
u32 is_depth_copy : 1;
|
u32 is_depth_copy : 1;
|
||||||
u32 is_intensity : 1;
|
u32 is_intensity : 1;
|
||||||
u32 scale_by_half : 1;
|
u32 scale_by_half : 1;
|
||||||
|
u32 copy_filter : 1;
|
||||||
};
|
};
|
||||||
#pragma pack()
|
#pragma pack()
|
||||||
|
|
||||||
|
@ -30,6 +31,6 @@ using TCShaderUid = ShaderUid<UidData>;
|
||||||
ShaderCode GenerateShader(APIType api_type, const UidData* uid_data);
|
ShaderCode GenerateShader(APIType api_type, const UidData* uid_data);
|
||||||
|
|
||||||
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
|
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
|
||||||
bool scale_by_half);
|
bool scale_by_half, bool copy_filter);
|
||||||
|
|
||||||
} // namespace TextureConversionShaderGen
|
} // namespace TextureConversionShaderGen
|
||||||
|
|
Loading…
Reference in New Issue