Merge pull request #10466 from Pokechu22/efb-copy-gamma
Accurately handle the copy filter and gamma for EFB and XFB copies
This commit is contained in:
commit
f1d23ff9a4
|
@ -591,9 +591,9 @@ void FifoPlayer::ClearEfb()
|
||||||
UPE_Copy copy = bpmem.triggerEFBCopy;
|
UPE_Copy copy = bpmem.triggerEFBCopy;
|
||||||
copy.clamp_top = false;
|
copy.clamp_top = false;
|
||||||
copy.clamp_bottom = false;
|
copy.clamp_bottom = false;
|
||||||
copy.yuv = false;
|
copy.unknown_bit = false;
|
||||||
copy.target_pixel_format = static_cast<u32>(EFBCopyFormat::RGBA8) << 1;
|
copy.target_pixel_format = static_cast<u32>(EFBCopyFormat::RGBA8) << 1;
|
||||||
copy.gamma = 0;
|
copy.gamma = GammaCorrection::Gamma1_0;
|
||||||
copy.half_scale = false;
|
copy.half_scale = false;
|
||||||
copy.scale_invert = false;
|
copy.scale_invert = false;
|
||||||
copy.clear = true;
|
copy.clear = true;
|
||||||
|
|
|
@ -14,7 +14,7 @@ protected:
|
||||||
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
|
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
|
||||||
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
|
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
|
||||||
float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
|
float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
|
||||||
const EFBCopyFilterCoefficients& filter_coefficients) override
|
const std::array<u32, 3>& filter_coefficients) override
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,7 +22,7 @@ protected:
|
||||||
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
|
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
|
||||||
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
|
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
|
||||||
float gamma, bool clamp_top, bool clamp_bottom,
|
float gamma, bool clamp_top, bool clamp_bottom,
|
||||||
const EFBCopyFilterCoefficients& filter_coefficients) override
|
const std::array<u32, 3>& filter_coefficients) override
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -535,9 +535,14 @@ static yuv444 ConvertColorToYUV(u32 color)
|
||||||
|
|
||||||
// GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see
|
// GameCube/Wii uses the BT.601 standard algorithm for converting to YCbCr; see
|
||||||
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
|
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
|
||||||
return {static_cast<u8>(0.257f * red + 0.504f * green + 0.098f * blue),
|
// These numbers were determined by hardware testing
|
||||||
static_cast<s8>(-0.148f * red + -0.291f * green + 0.439f * blue),
|
const u16 y = +66 * red + 129 * green + +25 * blue;
|
||||||
static_cast<s8>(0.439f * red + -0.368f * green + -0.071f * blue)};
|
const s16 u = -38 * red + -74 * green + 112 * blue;
|
||||||
|
const s16 v = 112 * red + -94 * green + -18 * blue;
|
||||||
|
const u8 y_round = static_cast<u8>((y >> 8) + ((y >> 7) & 1));
|
||||||
|
const s8 u_round = static_cast<s8>((u >> 8) + ((u >> 7) & 1));
|
||||||
|
const s8 v_round = static_cast<s8>((v >> 8) + ((v >> 7) & 1));
|
||||||
|
return {y_round, u_round, v_round};
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 GetDepth(u16 x, u16 y)
|
u32 GetDepth(u16 x, u16 y)
|
||||||
|
|
|
@ -14,7 +14,7 @@ protected:
|
||||||
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
|
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
|
||||||
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
|
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half, bool linear_filter,
|
||||||
float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
|
float y_scale, float gamma, bool clamp_top, bool clamp_bottom,
|
||||||
const EFBCopyFilterCoefficients& filter_coefficients) override
|
const std::array<u32, 3>& filter_coefficients) override
|
||||||
{
|
{
|
||||||
TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride,
|
TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride,
|
||||||
src_rect, scale_by_half, y_scale, gamma);
|
src_rect, scale_by_half, y_scale, gamma);
|
||||||
|
@ -23,7 +23,7 @@ protected:
|
||||||
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
|
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
|
||||||
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
|
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
|
||||||
float gamma, bool clamp_top, bool clamp_bottom,
|
float gamma, bool clamp_top, bool clamp_bottom,
|
||||||
const EFBCopyFilterCoefficients& filter_coefficients) override
|
const std::array<u32, 3>& filter_coefficients) override
|
||||||
{
|
{
|
||||||
// TODO: If we ever want to "fake" vram textures, we would need to implement this
|
// TODO: If we ever want to "fake" vram textures, we would need to implement this
|
||||||
}
|
}
|
||||||
|
|
|
@ -2035,17 +2035,30 @@ struct fmt::formatter<FrameToField> : EnumFormatter<FrameToField::InterlacedOdd>
|
||||||
constexpr formatter() : EnumFormatter(names) {}
|
constexpr formatter() : EnumFormatter(names) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class GammaCorrection : u32
|
||||||
|
{
|
||||||
|
Gamma1_0 = 0,
|
||||||
|
Gamma1_7 = 1,
|
||||||
|
Gamma2_2 = 2,
|
||||||
|
// Hardware testing indicates this behaves the same as Gamma2_2
|
||||||
|
Invalid2_2 = 3,
|
||||||
|
};
|
||||||
|
template <>
|
||||||
|
struct fmt::formatter<GammaCorrection> : EnumFormatter<GammaCorrection::Invalid2_2>
|
||||||
|
{
|
||||||
|
constexpr formatter() : EnumFormatter({"1.0", "1.7", "2.2", "Invalid 2.2"}) {}
|
||||||
|
};
|
||||||
|
|
||||||
union UPE_Copy
|
union UPE_Copy
|
||||||
{
|
{
|
||||||
u32 Hex;
|
u32 Hex;
|
||||||
|
|
||||||
BitField<0, 1, bool, u32> clamp_top; // if set clamp top
|
BitField<0, 1, bool, u32> clamp_top; // if set clamp top
|
||||||
BitField<1, 1, bool, u32> clamp_bottom; // if set clamp bottom
|
BitField<1, 1, bool, u32> clamp_bottom; // if set clamp bottom
|
||||||
BitField<2, 1, bool, u32> yuv; // if set, color conversion from RGB to YUV
|
BitField<2, 1, u32> unknown_bit;
|
||||||
BitField<3, 4, u32> target_pixel_format; // realformat is (fmt/2)+((fmt&1)*8).... for some reason
|
BitField<3, 4, u32> target_pixel_format; // realformat is (fmt/2)+((fmt&1)*8).... for some reason
|
||||||
// the msb is the lsb (pattern: cycling right shift)
|
// the msb is the lsb (pattern: cycling right shift)
|
||||||
// gamma correction.. 0 = 1.0 ; 1 = 1.7 ; 2 = 2.2 ; 3 is reserved
|
BitField<7, 2, GammaCorrection> gamma;
|
||||||
BitField<7, 2, u32> gamma;
|
|
||||||
// "mipmap" filter... false = no filter (scale 1:1) ; true = box filter (scale 2:1)
|
// "mipmap" filter... false = no filter (scale 1:1) ; true = box filter (scale 2:1)
|
||||||
BitField<9, 1, bool, u32> half_scale;
|
BitField<9, 1, bool, u32> half_scale;
|
||||||
BitField<10, 1, bool, u32> scale_invert; // if set vertical scaling is on
|
BitField<10, 1, bool, u32> scale_invert; // if set vertical scaling is on
|
||||||
|
@ -2084,23 +2097,10 @@ struct fmt::formatter<UPE_Copy>
|
||||||
else
|
else
|
||||||
clamp = "None";
|
clamp = "None";
|
||||||
}
|
}
|
||||||
std::string_view gamma = "Invalid";
|
|
||||||
switch (copy.gamma)
|
|
||||||
{
|
|
||||||
case 0:
|
|
||||||
gamma = "1.0";
|
|
||||||
break;
|
|
||||||
case 1:
|
|
||||||
gamma = "1.7";
|
|
||||||
break;
|
|
||||||
case 2:
|
|
||||||
gamma = "2.2";
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return fmt::format_to(ctx.out(),
|
return fmt::format_to(ctx.out(),
|
||||||
"Clamping: {}\n"
|
"Clamping: {}\n"
|
||||||
"Converting from RGB to YUV: {}\n"
|
"Unknown bit: {}\n"
|
||||||
"Target pixel format: {}\n"
|
"Target pixel format: {}\n"
|
||||||
"Gamma correction: {}\n"
|
"Gamma correction: {}\n"
|
||||||
"Half scale: {}\n"
|
"Half scale: {}\n"
|
||||||
|
@ -2110,7 +2110,7 @@ struct fmt::formatter<UPE_Copy>
|
||||||
"Copy to XFB: {}\n"
|
"Copy to XFB: {}\n"
|
||||||
"Intensity format: {}\n"
|
"Intensity format: {}\n"
|
||||||
"Automatic color conversion: {}",
|
"Automatic color conversion: {}",
|
||||||
clamp, no_yes[copy.yuv], copy.tp_realFormat(), gamma,
|
clamp, copy.unknown_bit, copy.tp_realFormat(), copy.gamma,
|
||||||
no_yes[copy.half_scale], no_yes[copy.scale_invert], no_yes[copy.clear],
|
no_yes[copy.half_scale], no_yes[copy.scale_invert], no_yes[copy.clear],
|
||||||
copy.frame_to_field, no_yes[copy.copy_to_xfb], no_yes[copy.intensity_fmt],
|
copy.frame_to_field, no_yes[copy.copy_to_xfb], no_yes[copy.intensity_fmt],
|
||||||
no_yes[copy.auto_conv]);
|
no_yes[copy.auto_conv]);
|
||||||
|
@ -2123,10 +2123,12 @@ union CopyFilterCoefficients
|
||||||
|
|
||||||
u64 Hex;
|
u64 Hex;
|
||||||
|
|
||||||
|
BitField<0, 32, u32, u64> Low;
|
||||||
BitField<0, 6, u64> w0;
|
BitField<0, 6, u64> w0;
|
||||||
BitField<6, 6, u64> w1;
|
BitField<6, 6, u64> w1;
|
||||||
BitField<12, 6, u64> w2;
|
BitField<12, 6, u64> w2;
|
||||||
BitField<18, 6, u64> w3;
|
BitField<18, 6, u64> w3;
|
||||||
|
BitField<32, 32, u32, u64> High;
|
||||||
BitField<32, 6, u64> w4;
|
BitField<32, 6, u64> w4;
|
||||||
BitField<38, 6, u64> w5;
|
BitField<38, 6, u64> w5;
|
||||||
BitField<44, 6, u64> w6;
|
BitField<44, 6, u64> w6;
|
||||||
|
|
|
@ -11,6 +11,7 @@
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
|
#include "Common/EnumMap.h"
|
||||||
#include "Common/Logging/Log.h"
|
#include "Common/Logging/Log.h"
|
||||||
|
|
||||||
#include "Core/ConfigManager.h"
|
#include "Core/ConfigManager.h"
|
||||||
|
@ -42,7 +43,8 @@
|
||||||
|
|
||||||
using namespace BPFunctions;
|
using namespace BPFunctions;
|
||||||
|
|
||||||
static const float s_gammaLUT[] = {1.0f, 1.7f, 2.2f, 1.0f};
|
static constexpr Common::EnumMap<float, GammaCorrection::Invalid2_2> s_gammaLUT = {1.0f, 1.7f, 2.2f,
|
||||||
|
2.2f};
|
||||||
|
|
||||||
void BPInit()
|
void BPInit()
|
||||||
{
|
{
|
||||||
|
@ -276,9 +278,9 @@ static void BPWritten(const BPCmd& bp, int cycles_into_future)
|
||||||
bool is_depth_copy = bpmem.zcontrol.pixel_format == PixelFormat::Z24;
|
bool is_depth_copy = bpmem.zcontrol.pixel_format == PixelFormat::Z24;
|
||||||
g_texture_cache->CopyRenderTargetToTexture(
|
g_texture_cache->CopyRenderTargetToTexture(
|
||||||
destAddr, PE_copy.tp_realFormat(), copy_width, copy_height, destStride, is_depth_copy,
|
destAddr, PE_copy.tp_realFormat(), copy_width, copy_height, destStride, is_depth_copy,
|
||||||
srcRect, PE_copy.intensity_fmt, PE_copy.half_scale, 1.0f, 1.0f,
|
srcRect, PE_copy.intensity_fmt && PE_copy.auto_conv, PE_copy.half_scale, 1.0f,
|
||||||
bpmem.triggerEFBCopy.clamp_top, bpmem.triggerEFBCopy.clamp_bottom,
|
s_gammaLUT[PE_copy.gamma], bpmem.triggerEFBCopy.clamp_top,
|
||||||
bpmem.copyfilter.GetCoefficients());
|
bpmem.triggerEFBCopy.clamp_bottom, bpmem.copyfilter.GetCoefficients());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -550,7 +550,7 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
WARN_LOG_FMT(VIDEO, "From format {} is not supported", static_cast<u32>(from_format));
|
WARN_LOG_FMT(VIDEO, "From format {} is not supported", from_format);
|
||||||
return "{}\n";
|
return "{}\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -602,7 +602,7 @@ std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureF
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
WARN_LOG_FMT(VIDEO, "To format {} is not supported", static_cast<u32>(to_format));
|
WARN_LOG_FMT(VIDEO, "To format {} is not supported", to_format);
|
||||||
return "{}\n";
|
return "{}\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -276,8 +276,7 @@ TextureCacheBase::ApplyPaletteToEntry(TCacheEntry* entry, const u8* palette, TLU
|
||||||
const AbstractPipeline* pipeline = g_shader_cache->GetPaletteConversionPipeline(tlutfmt);
|
const AbstractPipeline* pipeline = g_shader_cache->GetPaletteConversionPipeline(tlutfmt);
|
||||||
if (!pipeline)
|
if (!pipeline)
|
||||||
{
|
{
|
||||||
ERROR_LOG_FMT(VIDEO, "Failed to get conversion pipeline for format {:#04X}",
|
ERROR_LOG_FMT(VIDEO, "Failed to get conversion pipeline for format {}", tlutfmt);
|
||||||
static_cast<u32>(tlutfmt));
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -345,9 +344,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::ReinterpretEntry(const TCacheEn
|
||||||
g_shader_cache->GetTextureReinterpretPipeline(existing_entry->format.texfmt, new_format);
|
g_shader_cache->GetTextureReinterpretPipeline(existing_entry->format.texfmt, new_format);
|
||||||
if (!pipeline)
|
if (!pipeline)
|
||||||
{
|
{
|
||||||
ERROR_LOG_FMT(VIDEO,
|
ERROR_LOG_FMT(VIDEO, "Failed to obtain texture reinterpreting pipeline from format {} to {}",
|
||||||
"Failed to obtain texture reinterpreting pipeline from format {:#04X} to {:#04X}",
|
existing_entry->format.texfmt, new_format);
|
||||||
static_cast<u32>(existing_entry->format.texfmt), static_cast<u32>(new_format));
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1980,44 +1978,49 @@ void TextureCacheBase::StitchXFBCopy(TCacheEntry* stitched_entry)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
EFBCopyFilterCoefficients
|
std::array<u32, 3>
|
||||||
TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
|
TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
|
||||||
{
|
{
|
||||||
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
|
// To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1
|
||||||
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
|
// are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below.
|
||||||
return EFBCopyFilterCoefficients{
|
return {
|
||||||
static_cast<float>(static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1])) /
|
static_cast<u32>(coefficients[0]) + static_cast<u32>(coefficients[1]),
|
||||||
64.0f,
|
static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
|
||||||
static_cast<float>(static_cast<u32>(coefficients[2]) + static_cast<u32>(coefficients[3]) +
|
static_cast<u32>(coefficients[4]),
|
||||||
static_cast<u32>(coefficients[4])) /
|
static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6]),
|
||||||
64.0f,
|
|
||||||
static_cast<float>(static_cast<u32>(coefficients[5]) + static_cast<u32>(coefficients[6])) /
|
|
||||||
64.0f,
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
EFBCopyFilterCoefficients
|
std::array<u32, 3>
|
||||||
TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
|
TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients)
|
||||||
{
|
{
|
||||||
// If the user disables the copy filter, only apply it to the VRAM copy.
|
// If the user disables the copy filter, only apply it to the VRAM copy.
|
||||||
// This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
|
// This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected.
|
||||||
EFBCopyFilterCoefficients res = GetRAMCopyFilterCoefficients(coefficients);
|
std::array<u32, 3> res = GetRAMCopyFilterCoefficients(coefficients);
|
||||||
if (!g_ActiveConfig.bDisableCopyFilter)
|
if (!g_ActiveConfig.bDisableCopyFilter)
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
// Disabling the copy filter in options should not ignore the values the game sets completely,
|
// Disabling the copy filter in options should not ignore the values the game sets completely,
|
||||||
// as some games use the filter coefficients to control the brightness of the screen. Instead,
|
// as some games use the filter coefficients to control the brightness of the screen. Instead,
|
||||||
// add all coefficients to the middle sample, so the deflicker/vertical filter has no effect.
|
// add all coefficients to the middle sample, so the deflicker/vertical filter has no effect.
|
||||||
res.middle = res.upper + res.middle + res.lower;
|
res[1] = res[0] + res[1] + res[2];
|
||||||
res.upper = 0.0f;
|
res[0] = 0;
|
||||||
res.lower = 0.0f;
|
res[2] = 0;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool TextureCacheBase::NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients)
|
bool TextureCacheBase::AllCopyFilterCoefsNeeded(const std::array<u32, 3>& coefficients)
|
||||||
{
|
{
|
||||||
// If the top/bottom coefficients are zero, no point sampling/blending from these rows.
|
// If the top/bottom coefficients are zero, no point sampling/blending from these rows.
|
||||||
return coefficients.upper != 0 || coefficients.lower != 0;
|
return coefficients[0] != 0 || coefficients[2] != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TextureCacheBase::CopyFilterCanOverflow(const std::array<u32, 3>& coefficients)
|
||||||
|
{
|
||||||
|
// Normally, the copy filter coefficients will sum to at most 64. If the sum is higher than that,
|
||||||
|
// colors are clamped to the range [0, 255], but if the sum is higher than 128, that clamping
|
||||||
|
// breaks (as colors end up >= 512, which wraps back to 0).
|
||||||
|
return coefficients[0] + coefficients[1] + coefficients[2] >= 128;
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextureCacheBase::CopyRenderTargetToTexture(
|
void TextureCacheBase::CopyRenderTargetToTexture(
|
||||||
|
@ -2257,10 +2260,11 @@ void TextureCacheBase::CopyRenderTargetToTexture(
|
||||||
|
|
||||||
if (copy_to_ram)
|
if (copy_to_ram)
|
||||||
{
|
{
|
||||||
EFBCopyFilterCoefficients coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
|
const std::array<u32, 3> coefficients = GetRAMCopyFilterCoefficients(filter_coefficients);
|
||||||
PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
|
PixelFormat srcFormat = bpmem.zcontrol.pixel_format;
|
||||||
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
|
EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity,
|
||||||
NeedsCopyFilterInShader(coefficients));
|
AllCopyFilterCoefsNeeded(coefficients),
|
||||||
|
CopyFilterCanOverflow(coefficients), gamma != 1.0);
|
||||||
|
|
||||||
std::unique_ptr<AbstractStagingTexture> staging_texture = GetEFBCopyStagingTexture();
|
std::unique_ptr<AbstractStagingTexture> staging_texture = GetEFBCopyStagingTexture();
|
||||||
if (staging_texture)
|
if (staging_texture)
|
||||||
|
@ -2718,16 +2722,15 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
|
||||||
bool scale_by_half, bool linear_filter,
|
bool scale_by_half, bool linear_filter,
|
||||||
EFBCopyFormat dst_format, bool is_intensity, float gamma,
|
EFBCopyFormat dst_format, bool is_intensity, float gamma,
|
||||||
bool clamp_top, bool clamp_bottom,
|
bool clamp_top, bool clamp_bottom,
|
||||||
const EFBCopyFilterCoefficients& filter_coefficients)
|
const std::array<u32, 3>& filter_coefficients)
|
||||||
{
|
{
|
||||||
// Flush EFB pokes first, as they're expected to be included.
|
// Flush EFB pokes first, as they're expected to be included.
|
||||||
g_framebuffer_manager->FlushEFBPokes();
|
g_framebuffer_manager->FlushEFBPokes();
|
||||||
|
|
||||||
// Get the pipeline which we will be using. If the compilation failed, this will be null.
|
// Get the pipeline which we will be using. If the compilation failed, this will be null.
|
||||||
const AbstractPipeline* copy_pipeline =
|
const AbstractPipeline* copy_pipeline = g_shader_cache->GetEFBCopyToVRAMPipeline(
|
||||||
g_shader_cache->GetEFBCopyToVRAMPipeline(TextureConversionShaderGen::GetShaderUid(
|
TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity,
|
||||||
dst_format, is_depth_copy, is_intensity, scale_by_half,
|
scale_by_half, 1.0f / gamma, filter_coefficients));
|
||||||
NeedsCopyFilterInShader(filter_coefficients)));
|
|
||||||
if (!copy_pipeline)
|
if (!copy_pipeline)
|
||||||
{
|
{
|
||||||
WARN_LOG_FMT(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline.");
|
WARN_LOG_FMT(VIDEO, "Skipping EFB copy to VRAM due to missing pipeline.");
|
||||||
|
@ -2748,7 +2751,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
|
||||||
struct Uniforms
|
struct Uniforms
|
||||||
{
|
{
|
||||||
float src_left, src_top, src_width, src_height;
|
float src_left, src_top, src_width, src_height;
|
||||||
float filter_coefficients[3];
|
std::array<u32, 3> filter_coefficients;
|
||||||
float gamma_rcp;
|
float gamma_rcp;
|
||||||
float clamp_top;
|
float clamp_top;
|
||||||
float clamp_bottom;
|
float clamp_bottom;
|
||||||
|
@ -2763,9 +2766,7 @@ void TextureCacheBase::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_cop
|
||||||
uniforms.src_top = framebuffer_rect.top * rcp_efb_height;
|
uniforms.src_top = framebuffer_rect.top * rcp_efb_height;
|
||||||
uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width;
|
uniforms.src_width = framebuffer_rect.GetWidth() * rcp_efb_width;
|
||||||
uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height;
|
uniforms.src_height = framebuffer_rect.GetHeight() * rcp_efb_height;
|
||||||
uniforms.filter_coefficients[0] = filter_coefficients.upper;
|
uniforms.filter_coefficients = filter_coefficients;
|
||||||
uniforms.filter_coefficients[1] = filter_coefficients.middle;
|
|
||||||
uniforms.filter_coefficients[2] = filter_coefficients.lower;
|
|
||||||
uniforms.gamma_rcp = 1.0f / gamma;
|
uniforms.gamma_rcp = 1.0f / gamma;
|
||||||
// NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB,
|
// NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB,
|
||||||
// which returns random garbage from the empty bus (confirmed by hardware tests).
|
// which returns random garbage from the empty bus (confirmed by hardware tests).
|
||||||
|
@ -2797,7 +2798,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
|
||||||
u32 memory_stride, const MathUtil::Rectangle<int>& src_rect,
|
u32 memory_stride, const MathUtil::Rectangle<int>& src_rect,
|
||||||
bool scale_by_half, bool linear_filter, float y_scale, float gamma,
|
bool scale_by_half, bool linear_filter, float y_scale, float gamma,
|
||||||
bool clamp_top, bool clamp_bottom,
|
bool clamp_top, bool clamp_bottom,
|
||||||
const EFBCopyFilterCoefficients& filter_coefficients)
|
const std::array<u32, 3>& filter_coefficients)
|
||||||
{
|
{
|
||||||
// Flush EFB pokes first, as they're expected to be included.
|
// Flush EFB pokes first, as they're expected to be included.
|
||||||
g_framebuffer_manager->FlushEFBPokes();
|
g_framebuffer_manager->FlushEFBPokes();
|
||||||
|
@ -2828,7 +2829,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
|
||||||
float gamma_rcp;
|
float gamma_rcp;
|
||||||
float clamp_top;
|
float clamp_top;
|
||||||
float clamp_bottom;
|
float clamp_bottom;
|
||||||
float filter_coefficients[3];
|
std::array<u32, 3> filter_coefficients;
|
||||||
u32 padding;
|
u32 padding;
|
||||||
};
|
};
|
||||||
Uniforms encoder_params;
|
Uniforms encoder_params;
|
||||||
|
@ -2849,9 +2850,7 @@ void TextureCacheBase::CopyEFB(AbstractStagingTexture* dst, const EFBCopyParams&
|
||||||
encoder_params.clamp_top = (static_cast<float>(top_coord) + .5f) * rcp_efb_height;
|
encoder_params.clamp_top = (static_cast<float>(top_coord) + .5f) * rcp_efb_height;
|
||||||
const u32 bottom_coord = (clamp_bottom ? framebuffer_rect.bottom : efb_height) - 1;
|
const u32 bottom_coord = (clamp_bottom ? framebuffer_rect.bottom : efb_height) - 1;
|
||||||
encoder_params.clamp_bottom = (static_cast<float>(bottom_coord) + .5f) * rcp_efb_height;
|
encoder_params.clamp_bottom = (static_cast<float>(bottom_coord) + .5f) * rcp_efb_height;
|
||||||
encoder_params.filter_coefficients[0] = filter_coefficients.upper;
|
encoder_params.filter_coefficients = filter_coefficients;
|
||||||
encoder_params.filter_coefficients[1] = filter_coefficients.middle;
|
|
||||||
encoder_params.filter_coefficients[2] = filter_coefficients.lower;
|
|
||||||
g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params));
|
g_vertex_manager->UploadUtilityUniforms(&encoder_params, sizeof(encoder_params));
|
||||||
|
|
||||||
// Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left.
|
// Because the shader uses gl_FragCoord and we read it back, we must render to the lower-left.
|
||||||
|
|
|
@ -57,23 +57,30 @@ struct TextureAndTLUTFormat
|
||||||
struct EFBCopyParams
|
struct EFBCopyParams
|
||||||
{
|
{
|
||||||
EFBCopyParams(PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_, bool yuv_,
|
EFBCopyParams(PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_, bool yuv_,
|
||||||
bool copy_filter_)
|
bool all_copy_filter_coefs_needed_, bool copy_filter_can_overflow_,
|
||||||
|
bool apply_gamma_)
|
||||||
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_),
|
: efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_),
|
||||||
copy_filter(copy_filter_)
|
all_copy_filter_coefs_needed(all_copy_filter_coefs_needed_),
|
||||||
|
copy_filter_can_overflow(copy_filter_can_overflow_), apply_gamma(apply_gamma_)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator<(const EFBCopyParams& rhs) const
|
bool operator<(const EFBCopyParams& rhs) const
|
||||||
{
|
{
|
||||||
return std::tie(efb_format, copy_format, depth, yuv, copy_filter) <
|
return std::tie(efb_format, copy_format, depth, yuv, all_copy_filter_coefs_needed,
|
||||||
std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.copy_filter);
|
copy_filter_can_overflow,
|
||||||
|
apply_gamma) < std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv,
|
||||||
|
rhs.all_copy_filter_coefs_needed,
|
||||||
|
rhs.copy_filter_can_overflow, rhs.apply_gamma);
|
||||||
}
|
}
|
||||||
|
|
||||||
PixelFormat efb_format;
|
PixelFormat efb_format;
|
||||||
EFBCopyFormat copy_format;
|
EFBCopyFormat copy_format;
|
||||||
bool depth;
|
bool depth;
|
||||||
bool yuv;
|
bool yuv;
|
||||||
bool copy_filter;
|
bool all_copy_filter_coefs_needed;
|
||||||
|
bool copy_filter_can_overflow;
|
||||||
|
bool apply_gamma;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
|
@ -89,19 +96,13 @@ struct fmt::formatter<EFBCopyParams>
|
||||||
else
|
else
|
||||||
copy_format = fmt::to_string(uid.copy_format);
|
copy_format = fmt::to_string(uid.copy_format);
|
||||||
return fmt::format_to(ctx.out(),
|
return fmt::format_to(ctx.out(),
|
||||||
"format: {}, copy format: {}, depth: {}, yuv: {}, copy filter: {}",
|
"format: {}, copy format: {}, depth: {}, yuv: {}, apply_gamma: {}, "
|
||||||
uid.efb_format, copy_format, uid.depth, uid.yuv, uid.copy_filter);
|
"all_copy_filter_coefs_needed: {}, copy_filter_can_overflow: {}",
|
||||||
|
uid.efb_format, copy_format, uid.depth, uid.yuv, uid.apply_gamma,
|
||||||
|
uid.all_copy_filter_coefs_needed, uid.copy_filter_can_overflow);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Reduced version of the full coefficient array, with a single value for each row.
|
|
||||||
struct EFBCopyFilterCoefficients
|
|
||||||
{
|
|
||||||
float upper;
|
|
||||||
float middle;
|
|
||||||
float lower;
|
|
||||||
};
|
|
||||||
|
|
||||||
class TextureCacheBase
|
class TextureCacheBase
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
|
@ -267,8 +268,8 @@ public:
|
||||||
// Save States
|
// Save States
|
||||||
void DoState(PointerWrap& p);
|
void DoState(PointerWrap& p);
|
||||||
|
|
||||||
// Returns false if the top/bottom row coefficients are zero.
|
static bool AllCopyFilterCoefsNeeded(const std::array<u32, 3>& coefficients);
|
||||||
static bool NeedsCopyFilterInShader(const EFBCopyFilterCoefficients& coefficients);
|
static bool CopyFilterCanOverflow(const std::array<u32, 3>& coefficients);
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
// Decodes the specified data to the GPU texture specified by entry.
|
// Decodes the specified data to the GPU texture specified by entry.
|
||||||
|
@ -285,12 +286,12 @@ protected:
|
||||||
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
|
u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride,
|
||||||
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
|
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
|
||||||
bool linear_filter, float y_scale, float gamma, bool clamp_top,
|
bool linear_filter, float y_scale, float gamma, bool clamp_top,
|
||||||
bool clamp_bottom, const EFBCopyFilterCoefficients& filter_coefficients);
|
bool clamp_bottom, const std::array<u32, 3>& filter_coefficients);
|
||||||
virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
|
virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy,
|
||||||
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
|
const MathUtil::Rectangle<int>& src_rect, bool scale_by_half,
|
||||||
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
|
bool linear_filter, EFBCopyFormat dst_format, bool is_intensity,
|
||||||
float gamma, bool clamp_top, bool clamp_bottom,
|
float gamma, bool clamp_top, bool clamp_bottom,
|
||||||
const EFBCopyFilterCoefficients& filter_coefficients);
|
const std::array<u32, 3>& filter_coefficients);
|
||||||
|
|
||||||
alignas(16) u8* temp = nullptr;
|
alignas(16) u8* temp = nullptr;
|
||||||
size_t temp_size = 0;
|
size_t temp_size = 0;
|
||||||
|
@ -338,9 +339,9 @@ private:
|
||||||
void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y);
|
void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y);
|
||||||
|
|
||||||
// Precomputing the coefficients for the previous, current, and next lines for the copy filter.
|
// Precomputing the coefficients for the previous, current, and next lines for the copy filter.
|
||||||
static EFBCopyFilterCoefficients
|
static std::array<u32, 3>
|
||||||
GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
|
GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
|
||||||
static EFBCopyFilterCoefficients
|
static std::array<u32, 3>
|
||||||
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
|
GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients);
|
||||||
|
|
||||||
// Flushes a pending EFB copy to RAM from the host to the guest RAM.
|
// Flushes a pending EFB copy to RAM from the host to the guest RAM.
|
||||||
|
|
|
@ -18,8 +18,6 @@
|
||||||
|
|
||||||
namespace TextureConversionShaderTiled
|
namespace TextureConversionShaderTiled
|
||||||
{
|
{
|
||||||
static bool IntensityConstantAdded = false;
|
|
||||||
|
|
||||||
u16 GetEncodedSampleCount(EFBCopyFormat format)
|
u16 GetEncodedSampleCount(EFBCopyFormat format)
|
||||||
{
|
{
|
||||||
switch (format)
|
switch (format)
|
||||||
|
@ -48,8 +46,7 @@ u16 GetEncodedSampleCount(EFBCopyFormat format)
|
||||||
case EFBCopyFormat::XFB:
|
case EFBCopyFormat::XFB:
|
||||||
return 2;
|
return 2;
|
||||||
default:
|
default:
|
||||||
PanicAlertFmt("Invalid EFB Copy Format ({:#X})! (GetEncodedSampleCount)",
|
PanicAlertFmt("Invalid EFB Copy Format {}! (GetEncodedSampleCount)", format);
|
||||||
static_cast<int>(format));
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -63,7 +60,7 @@ static void WriteHeader(ShaderCode& code, APIType api_type)
|
||||||
" float y_scale;\n"
|
" float y_scale;\n"
|
||||||
" float gamma_rcp;\n"
|
" float gamma_rcp;\n"
|
||||||
" float2 clamp_tb;\n"
|
" float2 clamp_tb;\n"
|
||||||
" float3 filter_coefficients;\n"
|
" uint3 filter_coefficients;\n"
|
||||||
"}};\n");
|
"}};\n");
|
||||||
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
|
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
|
||||||
{
|
{
|
||||||
|
@ -86,115 +83,124 @@ static void WriteHeader(ShaderCode& code, APIType api_type)
|
||||||
|
|
||||||
"float4 RGBA8ToRGBA6(float4 src)\n"
|
"float4 RGBA8ToRGBA6(float4 src)\n"
|
||||||
"{{\n"
|
"{{\n"
|
||||||
" int4 val = int4(roundEven(src * 255.0)) >> 2;\n"
|
" int4 val = int4(roundEven(src * 255.0));\n"
|
||||||
" return float4(val) / 63.0;\n"
|
" val = (val & 0xfc) | (val >> 6);\n"
|
||||||
|
" return float4(val) / 255.0;\n"
|
||||||
"}}\n"
|
"}}\n"
|
||||||
|
|
||||||
"float4 RGBA8ToRGB565(float4 src)\n"
|
"float4 RGBA8ToRGB565(float4 src)\n"
|
||||||
"{{\n"
|
"{{\n"
|
||||||
" int4 val = int4(roundEven(src * 255.0));\n"
|
" int4 val = int4(roundEven(src * 255.0));\n"
|
||||||
" val = int4(val.r >> 3, val.g >> 2, val.b >> 3, 1);\n"
|
" val.r = (val.r & 0xf8) | (val.r >> 5);\n"
|
||||||
" return float4(val) / float4(31.0, 63.0, 31.0, 1.0);\n"
|
" val.g = (val.g & 0xfc) | (val.g >> 6);\n"
|
||||||
|
" val.b = (val.b & 0xf8) | (val.b >> 5);\n"
|
||||||
|
" val.a = 255;\n"
|
||||||
|
" return float4(val) / 255.0;\n"
|
||||||
"}}\n");
|
"}}\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, APIType api_type)
|
static void WriteSampleFunction(ShaderCode& code, const EFBCopyParams& params, APIType api_type)
|
||||||
{
|
{
|
||||||
const auto WriteSampleOp = [api_type, &code, ¶ms](int yoffset) {
|
code.Write("uint4 SampleEFB0(float2 uv, float2 pixel_size, float x_offset, float y_offset) {{\n"
|
||||||
if (!params.depth)
|
" float4 tex_sample = texture(samp0, float3(uv.x + x_offset * pixel_size.x, ");
|
||||||
{
|
|
||||||
switch (params.efb_format)
|
|
||||||
{
|
|
||||||
case PixelFormat::RGB8_Z24:
|
|
||||||
code.Write("RGBA8ToRGB8(");
|
|
||||||
break;
|
|
||||||
case PixelFormat::RGBA6_Z24:
|
|
||||||
code.Write("RGBA8ToRGBA6(");
|
|
||||||
break;
|
|
||||||
case PixelFormat::RGB565_Z16:
|
|
||||||
code.Write("RGBA8ToRGB565(");
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
code.Write("(");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Handle D3D depth inversion.
|
|
||||||
if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
|
|
||||||
code.Write("1.0 - (");
|
|
||||||
else
|
|
||||||
code.Write("(");
|
|
||||||
}
|
|
||||||
|
|
||||||
code.Write("texture(samp0, float3(");
|
// Reverse the direction for OpenGL, since positive numbers are distance from the bottom row.
|
||||||
|
// TODO: This isn't done on TextureConverterShaderGen - maybe it handles that via pixel_size?
|
||||||
|
if (api_type == APIType::OpenGL)
|
||||||
|
code.Write("clamp(uv.y - y_offset * pixel_size.y, clamp_tb.x, clamp_tb.y)");
|
||||||
|
else
|
||||||
|
code.Write("clamp(uv.y + y_offset * pixel_size.y, clamp_tb.x, clamp_tb.y)");
|
||||||
|
|
||||||
code.Write("uv.x + float(xoffset) * pixel_size.x, ");
|
code.Write(", 0.0));\n");
|
||||||
|
|
||||||
// Reverse the direction for OpenGL, since positive numbers are distance from the bottom row.
|
// TODO: Is this really needed? Doesn't the EFB only store appropriate values? Or is this for
|
||||||
if (yoffset != 0)
|
// EFB2Ram having consistent output with force 32-bit color?
|
||||||
{
|
if (params.efb_format == PixelFormat::RGB8_Z24)
|
||||||
if (api_type == APIType::OpenGL)
|
code.Write(" tex_sample = RGBA8ToRGB8(tex_sample);\n");
|
||||||
code.Write("clamp(uv.y - float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset);
|
else if (params.efb_format == PixelFormat::RGBA6_Z24)
|
||||||
else
|
code.Write(" tex_sample = RGBA8ToRGBA6(tex_sample);\n");
|
||||||
code.Write("clamp(uv.y + float({}) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset);
|
else if (params.efb_format == PixelFormat::RGB565_Z16)
|
||||||
}
|
code.Write(" tex_sample = RGBA8ToRGB565(tex_sample);\n");
|
||||||
else
|
|
||||||
{
|
|
||||||
code.Write("uv.y");
|
|
||||||
}
|
|
||||||
|
|
||||||
code.Write(", 0.0)))");
|
if (params.depth)
|
||||||
};
|
|
||||||
|
|
||||||
// The copy filter applies to both color and depth copies. This has been verified on hardware.
|
|
||||||
// The filter is only applied to the RGB channels, the alpha channel is left intact.
|
|
||||||
code.Write("float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n"
|
|
||||||
"{{\n");
|
|
||||||
if (params.copy_filter)
|
|
||||||
{
|
{
|
||||||
code.Write(" float4 prev_row = ");
|
if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
|
||||||
WriteSampleOp(-1);
|
code.Write(" tex_sample.x = 1.0 - tex_sample.x;\n");
|
||||||
code.Write(";\n"
|
|
||||||
" float4 current_row = ");
|
code.Write(" uint depth = uint(tex_sample.x * 16777216.0);\n"
|
||||||
WriteSampleOp(0);
|
" return uint4((depth >> 16) & 255u, (depth >> 8) & 255u, depth & 255u, 255u);\n"
|
||||||
code.Write(";\n"
|
"}}\n");
|
||||||
" float4 next_row = ");
|
|
||||||
WriteSampleOp(1);
|
|
||||||
code.Write(";\n"
|
|
||||||
" return float4(min(prev_row.rgb * filter_coefficients[0] +\n"
|
|
||||||
" current_row.rgb * filter_coefficients[1] +\n"
|
|
||||||
" next_row.rgb * filter_coefficients[2], \n"
|
|
||||||
" float3(1, 1, 1)), current_row.a);\n");
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
code.Write(" float4 current_row = ");
|
code.Write(" return uint4(tex_sample * 255.0);\n"
|
||||||
WriteSampleOp(0);
|
"}}\n");
|
||||||
code.Write(";\n"
|
|
||||||
"return float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
|
|
||||||
" current_row.a);\n");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The copy filter applies to both color and depth copies. This has been verified on hardware.
|
||||||
|
// The filter is only applied to the RGB channels, the alpha channel is left intact.
|
||||||
|
code.Write("float4 SampleEFB(float2 uv, float2 pixel_size, int x_offset)\n"
|
||||||
|
"{{\n");
|
||||||
|
if (params.all_copy_filter_coefs_needed)
|
||||||
|
{
|
||||||
|
code.Write(" uint4 prev_row = SampleEFB0(uv, pixel_size, float(x_offset), -1.0f);\n"
|
||||||
|
" uint4 current_row = SampleEFB0(uv, pixel_size, float(x_offset), 0.0f);\n"
|
||||||
|
" uint4 next_row = SampleEFB0(uv, pixel_size, float(x_offset), 1.0f);\n"
|
||||||
|
" uint3 combined_rows = prev_row.rgb * filter_coefficients[0] +\n"
|
||||||
|
" current_row.rgb * filter_coefficients[1] +\n"
|
||||||
|
" next_row.rgb * filter_coefficients[2];\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
code.Write(" uint4 current_row = SampleEFB0(uv, pixel_size, float(x_offset), 0.0f);\n"
|
||||||
|
" uint3 combined_rows = current_row.rgb * filter_coefficients[1];\n");
|
||||||
|
}
|
||||||
|
code.Write(" // Shift right by 6 to divide by 64, as filter coefficients\n"
|
||||||
|
" // that sum to 64 result in no change in brightness\n"
|
||||||
|
" uint4 texcol_raw = uint4(combined_rows.rgb >> 6, current_row.a);\n");
|
||||||
|
|
||||||
|
if (params.copy_filter_can_overflow)
|
||||||
|
code.Write(" texcol_raw &= 0x1ffu;\n");
|
||||||
|
// Note that overflow occurs when the sum of values is >= 128, but this max situation can be hit
|
||||||
|
// on >= 64, so we always include it.
|
||||||
|
code.Write(" texcol_raw = min(texcol_raw, uint4(255, 255, 255, 255));\n");
|
||||||
|
|
||||||
|
if (params.apply_gamma)
|
||||||
|
{
|
||||||
|
code.Write(" texcol_raw = uint4(round(pow(float4(texcol_raw) / 255.0,\n"
|
||||||
|
" float4(gamma_rcp, gamma_rcp, gamma_rcp, 1.0)) * 255.0));\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (params.yuv)
|
||||||
|
{
|
||||||
|
code.Write(" // Intensity/YUV format conversion constants determined by hardware testing\n"
|
||||||
|
" const float4 y_const = float4( 66, 129, 25, 16);\n"
|
||||||
|
" const float4 u_const = float4(-38, -74, 112, 128);\n"
|
||||||
|
" const float4 v_const = float4(112, -94, -18, 128);\n"
|
||||||
|
" // Intensity/YUV format conversion\n"
|
||||||
|
" texcol_raw.rgb = uint3(dot(y_const, float4(texcol_raw.rgb, 256)),\n"
|
||||||
|
" dot(u_const, float4(texcol_raw.rgb, 256)),\n"
|
||||||
|
" dot(v_const, float4(texcol_raw.rgb, 256)));\n"
|
||||||
|
" // Divide by 256 and round .5 and higher up\n"
|
||||||
|
" texcol_raw.rgb = (texcol_raw.rgb >> 8) + ((texcol_raw.rgb >> 7) & 1);\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
code.Write(" return float4(texcol_raw) / 255.0;\n");
|
||||||
code.Write("}}\n");
|
code.Write("}}\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Block dimensions : widthStride, heightStride
|
// Block dimensions : widthStride, heightStride
|
||||||
// Texture dimensions : width, height, x offset, y offset
|
// Texture dimensions : width, height, x offset, y offset
|
||||||
static void WriteSwizzler(ShaderCode& code, const EFBCopyParams& params, EFBCopyFormat format,
|
static void WriteSwizzler(ShaderCode& code, const EFBCopyParams& params, APIType api_type)
|
||||||
APIType api_type)
|
|
||||||
{
|
{
|
||||||
WriteHeader(code, api_type);
|
|
||||||
WriteSampleFunction(code, params, api_type);
|
|
||||||
|
|
||||||
code.Write("void main()\n"
|
code.Write("void main()\n"
|
||||||
"{{\n"
|
"{{\n"
|
||||||
" int2 sampleUv;\n"
|
" int2 sampleUv;\n"
|
||||||
" int2 uv1 = int2(gl_FragCoord.xy);\n");
|
" int2 uv1 = int2(gl_FragCoord.xy);\n");
|
||||||
|
|
||||||
const int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format);
|
const int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(params.copy_format);
|
||||||
const int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format);
|
const int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(params.copy_format);
|
||||||
int samples = GetEncodedSampleCount(format);
|
int samples = GetEncodedSampleCount(params.copy_format);
|
||||||
|
|
||||||
code.Write(" int x_block_position = (uv1.x >> {}) << {};\n", IntLog2(blkH * blkW / samples),
|
code.Write(" int x_block_position = (uv1.x >> {}) << {};\n", IntLog2(blkH * blkW / samples),
|
||||||
IntLog2(blkW));
|
IntLog2(blkW));
|
||||||
|
@ -244,146 +250,13 @@ static void WriteSampleColor(ShaderCode& code, std::string_view color_comp, std:
|
||||||
code.Write(" {} = SampleEFB(uv0, pixel_size, {}).{};\n", dest, x_offset, color_comp);
|
code.Write(" {} = SampleEFB(uv0, pixel_size, {}).{};\n", dest, x_offset, color_comp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void WriteColorToIntensity(ShaderCode& code, std::string_view src, std::string_view dest)
|
|
||||||
{
|
|
||||||
if (!IntensityConstantAdded)
|
|
||||||
{
|
|
||||||
code.Write(" float4 IntensityConst = float4(0.257f,0.504f,0.098f,0.0625f);\n");
|
|
||||||
IntensityConstantAdded = true;
|
|
||||||
}
|
|
||||||
code.Write(" {} = dot(IntensityConst.rgb, {}.rgb);\n", dest, src);
|
|
||||||
// don't add IntensityConst.a yet, because doing it later is faster and uses less instructions,
|
|
||||||
// due to vectorization
|
|
||||||
}
|
|
||||||
|
|
||||||
static void WriteToBitDepth(ShaderCode& code, u8 depth, std::string_view src, std::string_view dest)
|
static void WriteToBitDepth(ShaderCode& code, u8 depth, std::string_view src, std::string_view dest)
|
||||||
{
|
{
|
||||||
code.Write(" {} = floor({} * 255.0 / exp2(8.0 - {}.0));\n", dest, src, depth);
|
code.Write(" {} = floor({} * 255.0 / exp2(8.0 - {}.0));\n", dest, src, depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void WriteEncoderEnd(ShaderCode& code)
|
|
||||||
{
|
|
||||||
code.Write("}}\n");
|
|
||||||
IntensityConstantAdded = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void WriteI8Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
|
||||||
{
|
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::R8, api_type);
|
|
||||||
code.Write(" float3 texSample;\n");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgb", "texSample", 0, api_type, params);
|
|
||||||
WriteColorToIntensity(code, "texSample", "ocol0.b");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgb", "texSample", 1, api_type, params);
|
|
||||||
WriteColorToIntensity(code, "texSample", "ocol0.g");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgb", "texSample", 2, api_type, params);
|
|
||||||
WriteColorToIntensity(code, "texSample", "ocol0.r");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgb", "texSample", 3, api_type, params);
|
|
||||||
WriteColorToIntensity(code, "texSample", "ocol0.a");
|
|
||||||
|
|
||||||
// See WriteColorToIntensity
|
|
||||||
code.Write(" ocol0.rgba += IntensityConst.aaaa;\n");
|
|
||||||
|
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void WriteI4Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
|
||||||
{
|
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::R4, api_type);
|
|
||||||
code.Write(" float3 texSample;\n"
|
|
||||||
" float4 color0;\n"
|
|
||||||
" float4 color1;\n");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgb", "texSample", 0, api_type, params);
|
|
||||||
WriteColorToIntensity(code, "texSample", "color0.b");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgb", "texSample", 1, api_type, params);
|
|
||||||
WriteColorToIntensity(code, "texSample", "color1.b");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgb", "texSample", 2, api_type, params);
|
|
||||||
WriteColorToIntensity(code, "texSample", "color0.g");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgb", "texSample", 3, api_type, params);
|
|
||||||
WriteColorToIntensity(code, "texSample", "color1.g");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgb", "texSample", 4, api_type, params);
|
|
||||||
WriteColorToIntensity(code, "texSample", "color0.r");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgb", "texSample", 5, api_type, params);
|
|
||||||
WriteColorToIntensity(code, "texSample", "color1.r");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgb", "texSample", 6, api_type, params);
|
|
||||||
WriteColorToIntensity(code, "texSample", "color0.a");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgb", "texSample", 7, api_type, params);
|
|
||||||
WriteColorToIntensity(code, "texSample", "color1.a");
|
|
||||||
|
|
||||||
code.Write(" color0.rgba += IntensityConst.aaaa;\n"
|
|
||||||
" color1.rgba += IntensityConst.aaaa;\n");
|
|
||||||
|
|
||||||
WriteToBitDepth(code, 4, "color0", "color0");
|
|
||||||
WriteToBitDepth(code, 4, "color1", "color1");
|
|
||||||
|
|
||||||
code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
|
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void WriteIA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
|
||||||
{
|
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::RA8, api_type);
|
|
||||||
code.Write(" float4 texSample;\n");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgba", "texSample", 0, api_type, params);
|
|
||||||
code.Write(" ocol0.b = texSample.a;\n");
|
|
||||||
WriteColorToIntensity(code, "texSample", "ocol0.g");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgba", "texSample", 1, api_type, params);
|
|
||||||
code.Write(" ocol0.r = texSample.a;\n");
|
|
||||||
WriteColorToIntensity(code, "texSample", "ocol0.a");
|
|
||||||
|
|
||||||
code.Write(" ocol0.ga += IntensityConst.aa;\n");
|
|
||||||
|
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void WriteIA4Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
|
||||||
{
|
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::RA4, api_type);
|
|
||||||
code.Write(" float4 texSample;\n"
|
|
||||||
" float4 color0;\n"
|
|
||||||
" float4 color1;\n");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgba", "texSample", 0, api_type, params);
|
|
||||||
code.Write(" color0.b = texSample.a;\n");
|
|
||||||
WriteColorToIntensity(code, "texSample", "color1.b");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgba", "texSample", 1, api_type, params);
|
|
||||||
code.Write(" color0.g = texSample.a;\n");
|
|
||||||
WriteColorToIntensity(code, "texSample", "color1.g");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgba", "texSample", 2, api_type, params);
|
|
||||||
code.Write(" color0.r = texSample.a;\n");
|
|
||||||
WriteColorToIntensity(code, "texSample", "color1.r");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "rgba", "texSample", 3, api_type, params);
|
|
||||||
code.Write(" color0.a = texSample.a;\n");
|
|
||||||
WriteColorToIntensity(code, "texSample", "color1.a");
|
|
||||||
|
|
||||||
code.Write(" color1.rgba += IntensityConst.aaaa;\n");
|
|
||||||
|
|
||||||
WriteToBitDepth(code, 4, "color0", "color0");
|
|
||||||
WriteToBitDepth(code, 4, "color1", "color1");
|
|
||||||
|
|
||||||
code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
|
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void WriteRGB565Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
static void WriteRGB565Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
||||||
{
|
{
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::RGB565, api_type);
|
|
||||||
code.Write(" float3 texSample0;\n"
|
code.Write(" float3 texSample0;\n"
|
||||||
" float3 texSample1;\n");
|
" float3 texSample1;\n");
|
||||||
|
|
||||||
|
@ -403,13 +276,10 @@ static void WriteRGB565Encoder(ShaderCode& code, APIType api_type, const EFBCopy
|
||||||
code.Write(" ocol0.ga = ocol0.ga + gLower * 32.0;\n");
|
code.Write(" ocol0.ga = ocol0.ga + gLower * 32.0;\n");
|
||||||
|
|
||||||
code.Write(" ocol0 = ocol0 / 255.0;\n");
|
code.Write(" ocol0 = ocol0 / 255.0;\n");
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void WriteRGB5A3Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
static void WriteRGB5A3Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
||||||
{
|
{
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::RGB5A3, api_type);
|
|
||||||
|
|
||||||
code.Write(" float4 texSample;\n"
|
code.Write(" float4 texSample;\n"
|
||||||
" float color0;\n"
|
" float color0;\n"
|
||||||
" float gUpper;\n"
|
" float gUpper;\n"
|
||||||
|
@ -467,13 +337,10 @@ static void WriteRGB5A3Encoder(ShaderCode& code, APIType api_type, const EFBCopy
|
||||||
code.Write("}}\n");
|
code.Write("}}\n");
|
||||||
|
|
||||||
code.Write(" ocol0 = ocol0 / 255.0;\n");
|
code.Write(" ocol0 = ocol0 / 255.0;\n");
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void WriteRGBA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
static void WriteRGBA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
||||||
{
|
{
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::RGBA8, api_type);
|
|
||||||
|
|
||||||
code.Write(" float4 texSample;\n"
|
code.Write(" float4 texSample;\n"
|
||||||
" float4 color0;\n"
|
" float4 color0;\n"
|
||||||
" float4 color1;\n");
|
" float4 color1;\n");
|
||||||
|
@ -491,14 +358,11 @@ static void WriteRGBA8Encoder(ShaderCode& code, APIType api_type, const EFBCopyP
|
||||||
" color1.a = texSample.b;\n");
|
" color1.a = texSample.b;\n");
|
||||||
|
|
||||||
code.Write(" ocol0 = first ? color0 : color1;\n");
|
code.Write(" ocol0 = first ? color0 : color1;\n");
|
||||||
|
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void WriteC4Encoder(ShaderCode& code, std::string_view comp, APIType api_type,
|
static void WriteC4Encoder(ShaderCode& code, std::string_view comp, APIType api_type,
|
||||||
const EFBCopyParams& params)
|
const EFBCopyParams& params)
|
||||||
{
|
{
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::R4, api_type);
|
|
||||||
code.Write(" float4 color0;\n"
|
code.Write(" float4 color0;\n"
|
||||||
" float4 color1;\n");
|
" float4 color1;\n");
|
||||||
|
|
||||||
|
@ -515,26 +379,20 @@ static void WriteC4Encoder(ShaderCode& code, std::string_view comp, APIType api_
|
||||||
WriteToBitDepth(code, 4, "color1", "color1");
|
WriteToBitDepth(code, 4, "color1", "color1");
|
||||||
|
|
||||||
code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
|
code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void WriteC8Encoder(ShaderCode& code, std::string_view comp, APIType api_type,
|
static void WriteC8Encoder(ShaderCode& code, std::string_view comp, APIType api_type,
|
||||||
const EFBCopyParams& params)
|
const EFBCopyParams& params)
|
||||||
{
|
{
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::R8, api_type);
|
|
||||||
|
|
||||||
WriteSampleColor(code, comp, "ocol0.b", 0, api_type, params);
|
WriteSampleColor(code, comp, "ocol0.b", 0, api_type, params);
|
||||||
WriteSampleColor(code, comp, "ocol0.g", 1, api_type, params);
|
WriteSampleColor(code, comp, "ocol0.g", 1, api_type, params);
|
||||||
WriteSampleColor(code, comp, "ocol0.r", 2, api_type, params);
|
WriteSampleColor(code, comp, "ocol0.r", 2, api_type, params);
|
||||||
WriteSampleColor(code, comp, "ocol0.a", 3, api_type, params);
|
WriteSampleColor(code, comp, "ocol0.a", 3, api_type, params);
|
||||||
|
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void WriteCC4Encoder(ShaderCode& code, std::string_view comp, APIType api_type,
|
static void WriteCC4Encoder(ShaderCode& code, std::string_view comp, APIType api_type,
|
||||||
const EFBCopyParams& params)
|
const EFBCopyParams& params)
|
||||||
{
|
{
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::RA4, api_type);
|
|
||||||
code.Write(" float2 texSample;\n"
|
code.Write(" float2 texSample;\n"
|
||||||
" float4 color0;\n"
|
" float4 color0;\n"
|
||||||
" float4 color1;\n");
|
" float4 color1;\n");
|
||||||
|
@ -559,198 +417,52 @@ static void WriteCC4Encoder(ShaderCode& code, std::string_view comp, APIType api
|
||||||
WriteToBitDepth(code, 4, "color1", "color1");
|
WriteToBitDepth(code, 4, "color1", "color1");
|
||||||
|
|
||||||
code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
|
code.Write(" ocol0 = (color0 * 16.0 + color1) / 255.0;\n");
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void WriteCC8Encoder(ShaderCode& code, std::string_view comp, APIType api_type,
|
static void WriteCC8Encoder(ShaderCode& code, std::string_view comp, APIType api_type,
|
||||||
const EFBCopyParams& params)
|
const EFBCopyParams& params)
|
||||||
{
|
{
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::RA8, api_type);
|
|
||||||
|
|
||||||
WriteSampleColor(code, comp, "ocol0.bg", 0, api_type, params);
|
WriteSampleColor(code, comp, "ocol0.bg", 0, api_type, params);
|
||||||
WriteSampleColor(code, comp, "ocol0.ra", 1, api_type, params);
|
WriteSampleColor(code, comp, "ocol0.ra", 1, api_type, params);
|
||||||
|
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void WriteZ8Encoder(ShaderCode& code, std::string_view multiplier, APIType api_type,
|
|
||||||
const EFBCopyParams& params)
|
|
||||||
{
|
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::G8, api_type);
|
|
||||||
|
|
||||||
code.Write(" float depth;\n");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "r", "depth", 0, api_type, params);
|
|
||||||
code.Write("ocol0.b = frac(depth * {});\n", multiplier);
|
|
||||||
|
|
||||||
WriteSampleColor(code, "r", "depth", 1, api_type, params);
|
|
||||||
code.Write("ocol0.g = frac(depth * {});\n", multiplier);
|
|
||||||
|
|
||||||
WriteSampleColor(code, "r", "depth", 2, api_type, params);
|
|
||||||
code.Write("ocol0.r = frac(depth * {});\n", multiplier);
|
|
||||||
|
|
||||||
WriteSampleColor(code, "r", "depth", 3, api_type, params);
|
|
||||||
code.Write("ocol0.a = frac(depth * {});\n", multiplier);
|
|
||||||
|
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void WriteZ16Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
|
||||||
{
|
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::RA8, api_type);
|
|
||||||
|
|
||||||
code.Write(" float depth;\n"
|
|
||||||
" float3 expanded;\n");
|
|
||||||
|
|
||||||
// Byte order is reversed
|
|
||||||
|
|
||||||
WriteSampleColor(code, "r", "depth", 0, api_type, params);
|
|
||||||
|
|
||||||
code.Write(" depth *= 16777216.0;\n"
|
|
||||||
" expanded.r = floor(depth / (256.0 * 256.0));\n"
|
|
||||||
" depth -= expanded.r * 256.0 * 256.0;\n"
|
|
||||||
" expanded.g = floor(depth / 256.0);\n");
|
|
||||||
|
|
||||||
code.Write(" ocol0.b = expanded.g / 255.0;\n"
|
|
||||||
" ocol0.g = expanded.r / 255.0;\n");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "r", "depth", 1, api_type, params);
|
|
||||||
|
|
||||||
code.Write(" depth *= 16777216.0;\n"
|
|
||||||
" expanded.r = floor(depth / (256.0 * 256.0));\n"
|
|
||||||
" depth -= expanded.r * 256.0 * 256.0;\n"
|
|
||||||
" expanded.g = floor(depth / 256.0);\n");
|
|
||||||
|
|
||||||
code.Write(" ocol0.r = expanded.g / 255.0;\n"
|
|
||||||
" ocol0.a = expanded.r / 255.0;\n");
|
|
||||||
|
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void WriteZ16LEncoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
|
||||||
{
|
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::GB8, api_type);
|
|
||||||
|
|
||||||
code.Write(" float depth;\n"
|
|
||||||
" float3 expanded;\n");
|
|
||||||
|
|
||||||
// Byte order is reversed
|
|
||||||
|
|
||||||
WriteSampleColor(code, "r", "depth", 0, api_type, params);
|
|
||||||
|
|
||||||
code.Write(" depth *= 16777216.0;\n"
|
|
||||||
" expanded.r = floor(depth / (256.0 * 256.0));\n"
|
|
||||||
" depth -= expanded.r * 256.0 * 256.0;\n"
|
|
||||||
" expanded.g = floor(depth / 256.0);\n"
|
|
||||||
" depth -= expanded.g * 256.0;\n"
|
|
||||||
" expanded.b = depth;\n");
|
|
||||||
|
|
||||||
code.Write(" ocol0.b = expanded.b / 255.0;\n"
|
|
||||||
" ocol0.g = expanded.g / 255.0;\n");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "r", "depth", 1, api_type, params);
|
|
||||||
|
|
||||||
code.Write(" depth *= 16777216.0;\n"
|
|
||||||
" expanded.r = floor(depth / (256.0 * 256.0));\n"
|
|
||||||
" depth -= expanded.r * 256.0 * 256.0;\n"
|
|
||||||
" expanded.g = floor(depth / 256.0);\n"
|
|
||||||
" depth -= expanded.g * 256.0;\n"
|
|
||||||
" expanded.b = depth;\n");
|
|
||||||
|
|
||||||
code.Write(" ocol0.r = expanded.b / 255.0;\n"
|
|
||||||
" ocol0.a = expanded.g / 255.0;\n");
|
|
||||||
|
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void WriteZ24Encoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
|
||||||
{
|
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::RGBA8, api_type);
|
|
||||||
|
|
||||||
code.Write(" float depth0;\n"
|
|
||||||
" float depth1;\n"
|
|
||||||
" float3 expanded0;\n"
|
|
||||||
" float3 expanded1;\n");
|
|
||||||
|
|
||||||
WriteSampleColor(code, "r", "depth0", 0, api_type, params);
|
|
||||||
WriteSampleColor(code, "r", "depth1", 1, api_type, params);
|
|
||||||
|
|
||||||
for (int i = 0; i < 2; i++)
|
|
||||||
{
|
|
||||||
code.Write(" depth{} *= 16777216.0;\n", i);
|
|
||||||
|
|
||||||
code.Write(" expanded{}.r = floor(depth{} / (256.0 * 256.0));\n", i, i);
|
|
||||||
code.Write(" depth{} -= expanded{}.r * 256.0 * 256.0;\n", i, i);
|
|
||||||
code.Write(" expanded{}.g = floor(depth{} / 256.0);\n", i, i);
|
|
||||||
code.Write(" depth{} -= expanded{}.g * 256.0;\n", i, i);
|
|
||||||
code.Write(" expanded{}.b = depth{};\n", i, i);
|
|
||||||
}
|
|
||||||
|
|
||||||
code.Write(" if (!first) {{\n");
|
|
||||||
// Upper 16
|
|
||||||
code.Write(" ocol0.b = expanded0.g / 255.0;\n"
|
|
||||||
" ocol0.g = expanded0.b / 255.0;\n"
|
|
||||||
" ocol0.r = expanded1.g / 255.0;\n"
|
|
||||||
" ocol0.a = expanded1.b / 255.0;\n"
|
|
||||||
" }} else {{\n");
|
|
||||||
// Lower 8
|
|
||||||
code.Write(" ocol0.b = 1.0;\n"
|
|
||||||
" ocol0.g = expanded0.r / 255.0;\n"
|
|
||||||
" ocol0.r = 1.0;\n"
|
|
||||||
" ocol0.a = expanded1.r / 255.0;\n"
|
|
||||||
" }}\n");
|
|
||||||
|
|
||||||
WriteEncoderEnd(code);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void WriteXFBEncoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
static void WriteXFBEncoder(ShaderCode& code, APIType api_type, const EFBCopyParams& params)
|
||||||
{
|
{
|
||||||
WriteSwizzler(code, params, EFBCopyFormat::XFB, api_type);
|
code.Write("float4 color0 = float4(0, 0, 0, 1), color1 = float4(0, 0, 0, 1);\n");
|
||||||
|
WriteSampleColor(code, "rgb", "color0.rgb", 0, api_type, params);
|
||||||
code.Write("float3 color0, color1;\n");
|
WriteSampleColor(code, "rgb", "color1.rgb", 1, api_type, params);
|
||||||
WriteSampleColor(code, "rgb", "color0", 0, api_type, params);
|
|
||||||
WriteSampleColor(code, "rgb", "color1", 1, api_type, params);
|
|
||||||
|
|
||||||
// Gamma is only applied to XFB copies.
|
|
||||||
code.Write(" color0 = pow(abs(color0), float3(gamma_rcp, gamma_rcp, gamma_rcp));\n"
|
|
||||||
" color1 = pow(abs(color1), float3(gamma_rcp, gamma_rcp, gamma_rcp));\n");
|
|
||||||
|
|
||||||
// Convert to YUV.
|
// Convert to YUV.
|
||||||
code.Write(" const float3 y_const = float3(0.257, 0.504, 0.098);\n"
|
code.Write(" // Intensity/YUV format conversion constants determined by hardware testing\n"
|
||||||
" const float3 u_const = float3(-0.148, -0.291, 0.439);\n"
|
" const float4 y_const = float4( 66, 129, 25, 16);\n"
|
||||||
" const float3 v_const = float3(0.439, -0.368, -0.071);\n"
|
" const float4 u_const = float4(-38, -74, 112, 128);\n"
|
||||||
" float3 average = (color0 + color1) * 0.5;\n"
|
" const float4 v_const = float4(112, -94, -18, 128);\n"
|
||||||
" ocol0.b = dot(color0, y_const) + 0.0625;\n"
|
" float4 average = (color0 + color1) * 0.5;\n"
|
||||||
" ocol0.g = dot(average, u_const) + 0.5;\n"
|
" // TODO: check rounding\n"
|
||||||
" ocol0.r = dot(color1, y_const) + 0.0625;\n"
|
" ocol0.b = round(dot(color0, y_const)) / 256.0;\n"
|
||||||
" ocol0.a = dot(average, v_const) + 0.5;\n");
|
" ocol0.g = round(dot(average, u_const)) / 256.0;\n"
|
||||||
|
" ocol0.r = round(dot(color1, y_const)) / 256.0;\n"
|
||||||
WriteEncoderEnd(code);
|
" ocol0.a = round(dot(average, v_const)) / 256.0;\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GenerateEncodingShader(const EFBCopyParams& params, APIType api_type)
|
std::string GenerateEncodingShader(const EFBCopyParams& params, APIType api_type)
|
||||||
{
|
{
|
||||||
ShaderCode code;
|
ShaderCode code;
|
||||||
|
|
||||||
|
WriteHeader(code, api_type);
|
||||||
|
WriteSampleFunction(code, params, api_type);
|
||||||
|
WriteSwizzler(code, params, api_type);
|
||||||
|
|
||||||
switch (params.copy_format)
|
switch (params.copy_format)
|
||||||
{
|
{
|
||||||
case EFBCopyFormat::R4:
|
case EFBCopyFormat::R4:
|
||||||
if (params.yuv)
|
WriteC4Encoder(code, "r", api_type, params);
|
||||||
WriteI4Encoder(code, api_type, params);
|
|
||||||
else
|
|
||||||
WriteC4Encoder(code, "r", api_type, params);
|
|
||||||
break;
|
break;
|
||||||
case EFBCopyFormat::RA4:
|
case EFBCopyFormat::RA4:
|
||||||
if (params.yuv)
|
WriteCC4Encoder(code, "ar", api_type, params);
|
||||||
WriteIA4Encoder(code, api_type, params);
|
|
||||||
else
|
|
||||||
WriteCC4Encoder(code, "ar", api_type, params);
|
|
||||||
break;
|
break;
|
||||||
case EFBCopyFormat::RA8:
|
case EFBCopyFormat::RA8:
|
||||||
if (params.yuv)
|
WriteCC8Encoder(code, "ar", api_type, params);
|
||||||
WriteIA8Encoder(code, api_type, params);
|
|
||||||
else
|
|
||||||
WriteCC8Encoder(code, "ar", api_type, params);
|
|
||||||
break;
|
break;
|
||||||
case EFBCopyFormat::RGB565:
|
case EFBCopyFormat::RGB565:
|
||||||
WriteRGB565Encoder(code, api_type, params);
|
WriteRGB565Encoder(code, api_type, params);
|
||||||
|
@ -759,54 +471,37 @@ std::string GenerateEncodingShader(const EFBCopyParams& params, APIType api_type
|
||||||
WriteRGB5A3Encoder(code, api_type, params);
|
WriteRGB5A3Encoder(code, api_type, params);
|
||||||
break;
|
break;
|
||||||
case EFBCopyFormat::RGBA8:
|
case EFBCopyFormat::RGBA8:
|
||||||
if (params.depth)
|
WriteRGBA8Encoder(code, api_type, params);
|
||||||
WriteZ24Encoder(code, api_type, params);
|
|
||||||
else
|
|
||||||
WriteRGBA8Encoder(code, api_type, params);
|
|
||||||
break;
|
break;
|
||||||
case EFBCopyFormat::A8:
|
case EFBCopyFormat::A8:
|
||||||
WriteC8Encoder(code, "a", api_type, params);
|
WriteC8Encoder(code, "a", api_type, params);
|
||||||
break;
|
break;
|
||||||
case EFBCopyFormat::R8_0x1:
|
case EFBCopyFormat::R8_0x1:
|
||||||
case EFBCopyFormat::R8:
|
case EFBCopyFormat::R8:
|
||||||
if (params.yuv)
|
WriteC8Encoder(code, "r", api_type, params);
|
||||||
WriteI8Encoder(code, api_type, params);
|
|
||||||
else
|
|
||||||
WriteC8Encoder(code, "r", api_type, params);
|
|
||||||
break;
|
break;
|
||||||
case EFBCopyFormat::G8:
|
case EFBCopyFormat::G8:
|
||||||
if (params.depth)
|
WriteC8Encoder(code, "g", api_type, params);
|
||||||
WriteZ8Encoder(code, "256.0", api_type, params); // Z8M
|
|
||||||
else
|
|
||||||
WriteC8Encoder(code, "g", api_type, params);
|
|
||||||
break;
|
break;
|
||||||
case EFBCopyFormat::B8:
|
case EFBCopyFormat::B8:
|
||||||
if (params.depth)
|
WriteC8Encoder(code, "b", api_type, params);
|
||||||
WriteZ8Encoder(code, "65536.0", api_type, params); // Z8L
|
|
||||||
else
|
|
||||||
WriteC8Encoder(code, "b", api_type, params);
|
|
||||||
break;
|
break;
|
||||||
case EFBCopyFormat::RG8:
|
case EFBCopyFormat::RG8:
|
||||||
if (params.depth)
|
WriteCC8Encoder(code, "gr", api_type, params);
|
||||||
WriteZ16Encoder(code, api_type, params); // Z16H
|
|
||||||
else
|
|
||||||
WriteCC8Encoder(code, "gr", api_type, params);
|
|
||||||
break;
|
break;
|
||||||
case EFBCopyFormat::GB8:
|
case EFBCopyFormat::GB8:
|
||||||
if (params.depth)
|
WriteCC8Encoder(code, "bg", api_type, params);
|
||||||
WriteZ16LEncoder(code, api_type, params); // Z16L
|
|
||||||
else
|
|
||||||
WriteCC8Encoder(code, "bg", api_type, params);
|
|
||||||
break;
|
break;
|
||||||
case EFBCopyFormat::XFB:
|
case EFBCopyFormat::XFB:
|
||||||
WriteXFBEncoder(code, api_type, params);
|
WriteXFBEncoder(code, api_type, params);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
PanicAlertFmt("Invalid EFB Copy Format ({:#X})! (GenerateEncodingShader)",
|
PanicAlertFmt("Invalid EFB Copy Format {}! (GenerateEncodingShader)", params.copy_format);
|
||||||
static_cast<int>(params.copy_format));
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
code.Write("}}\n");
|
||||||
|
|
||||||
return code.GetBuffer();
|
return code.GetBuffer();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1273,6 +968,8 @@ static const std::map<TextureFormat, DecodingShaderInfo> s_decoding_shader_info{
|
||||||
|
|
||||||
// We do the inverse BT.601 conversion for YCbCr to RGB
|
// We do the inverse BT.601 conversion for YCbCr to RGB
|
||||||
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
|
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
|
||||||
|
// TODO: Use more precise numbers for this conversion (although on real hardware, the XFB isn't
|
||||||
|
// in a real texture format, so does this conversion actually ever happen?)
|
||||||
{TextureFormat::XFB,
|
{TextureFormat::XFB,
|
||||||
{TEXEL_BUFFER_FORMAT_RGBA8_UINT, 0, 8, 8, false,
|
{TEXEL_BUFFER_FORMAT_RGBA8_UINT, 0, 8, 8, false,
|
||||||
R"(
|
R"(
|
||||||
|
|
|
@ -6,13 +6,15 @@
|
||||||
#include "Common/Assert.h"
|
#include "Common/Assert.h"
|
||||||
#include "Common/CommonTypes.h"
|
#include "Common/CommonTypes.h"
|
||||||
#include "VideoCommon/BPMemory.h"
|
#include "VideoCommon/BPMemory.h"
|
||||||
|
#include "VideoCommon/TextureCacheBase.h"
|
||||||
#include "VideoCommon/VideoCommon.h"
|
#include "VideoCommon/VideoCommon.h"
|
||||||
#include "VideoCommon/VideoConfig.h"
|
#include "VideoCommon/VideoConfig.h"
|
||||||
|
|
||||||
namespace TextureConversionShaderGen
|
namespace TextureConversionShaderGen
|
||||||
{
|
{
|
||||||
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
|
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
|
||||||
bool scale_by_half, bool copy_filter)
|
bool scale_by_half, float gamma_rcp,
|
||||||
|
const std::array<u32, 3>& filter_coefficients)
|
||||||
{
|
{
|
||||||
TCShaderUid out;
|
TCShaderUid out;
|
||||||
|
|
||||||
|
@ -22,7 +24,11 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i
|
||||||
uid_data->is_depth_copy = is_depth_copy;
|
uid_data->is_depth_copy = is_depth_copy;
|
||||||
uid_data->is_intensity = is_intensity;
|
uid_data->is_intensity = is_intensity;
|
||||||
uid_data->scale_by_half = scale_by_half;
|
uid_data->scale_by_half = scale_by_half;
|
||||||
uid_data->copy_filter = copy_filter;
|
uid_data->all_copy_filter_coefs_needed =
|
||||||
|
TextureCacheBase::AllCopyFilterCoefsNeeded(filter_coefficients);
|
||||||
|
uid_data->copy_filter_can_overflow = TextureCacheBase::CopyFilterCanOverflow(filter_coefficients);
|
||||||
|
// If the gamma is needed, then include that too.
|
||||||
|
uid_data->apply_gamma = gamma_rcp != 1.0f;
|
||||||
|
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
@ -31,7 +37,7 @@ static void WriteHeader(APIType api_type, ShaderCode& out)
|
||||||
{
|
{
|
||||||
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
|
out.Write("UBO_BINDING(std140, 1) uniform PSBlock {{\n"
|
||||||
" float2 src_offset, src_size;\n"
|
" float2 src_offset, src_size;\n"
|
||||||
" float3 filter_coefficients;\n"
|
" uint3 filter_coefficients;\n"
|
||||||
" float gamma_rcp;\n"
|
" float gamma_rcp;\n"
|
||||||
" float2 clamp_tb;\n"
|
" float2 clamp_tb;\n"
|
||||||
" float pixel_height;\n"
|
" float pixel_height;\n"
|
||||||
|
@ -78,11 +84,25 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
|
||||||
WriteHeader(api_type, out);
|
WriteHeader(api_type, out);
|
||||||
|
|
||||||
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
|
out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n");
|
||||||
out.Write("float4 SampleEFB(float3 uv, float y_offset) {{\n"
|
out.Write("uint4 SampleEFB(float3 uv, float y_offset) {{\n"
|
||||||
" return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), "
|
" float4 tex_sample = texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * "
|
||||||
"clamp_tb.x, clamp_tb.y), {}));\n"
|
"pixel_height), clamp_tb.x, clamp_tb.y), {}));\n",
|
||||||
"}}\n",
|
|
||||||
mono_depth ? "0.0" : "uv.z");
|
mono_depth ? "0.0" : "uv.z");
|
||||||
|
if (uid_data->is_depth_copy)
|
||||||
|
{
|
||||||
|
if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
|
||||||
|
out.Write(" tex_sample.x = 1.0 - tex_sample.x;\n");
|
||||||
|
|
||||||
|
out.Write(" uint depth = uint(tex_sample.x * 16777216.0);\n"
|
||||||
|
" return uint4((depth >> 16) & 255u, (depth >> 8) & 255u, depth & 255u, 255u);\n"
|
||||||
|
"}}\n");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
out.Write(" return uint4(tex_sample * 255.0);\n"
|
||||||
|
"}}\n");
|
||||||
|
}
|
||||||
|
|
||||||
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
|
if (g_ActiveConfig.backend_info.bSupportsGeometryShaders)
|
||||||
{
|
{
|
||||||
out.Write("VARYING_LOCATION(0) in VertexData {{\n"
|
out.Write("VARYING_LOCATION(0) in VertexData {{\n"
|
||||||
|
@ -93,201 +113,125 @@ ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data)
|
||||||
{
|
{
|
||||||
out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n");
|
out.Write("VARYING_LOCATION(0) in vec3 v_tex0;\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"
|
out.Write("FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"
|
||||||
"void main()\n{{\n");
|
"void main()\n{{\n");
|
||||||
|
|
||||||
// The copy filter applies to both color and depth copies. This has been verified on hardware.
|
// The copy filter applies to both color and depth copies. This has been verified on hardware.
|
||||||
// The filter is only applied to the RGB channels, the alpha channel is left intact.
|
// The filter is only applied to the RGB channels, the alpha channel is left intact.
|
||||||
if (uid_data->copy_filter)
|
if (uid_data->all_copy_filter_coefs_needed)
|
||||||
{
|
{
|
||||||
out.Write(" float4 prev_row = SampleEFB(v_tex0, -1.0f);\n"
|
out.Write(" uint4 prev_row = SampleEFB(v_tex0, -1.0f);\n"
|
||||||
" float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
|
" uint4 current_row = SampleEFB(v_tex0, 0.0f);\n"
|
||||||
" float4 next_row = SampleEFB(v_tex0, 1.0f);\n"
|
" uint4 next_row = SampleEFB(v_tex0, 1.0f);\n"
|
||||||
" float4 texcol = float4(min(prev_row.rgb * filter_coefficients[0] +\n"
|
" uint3 combined_rows = prev_row.rgb * filter_coefficients[0] +\n"
|
||||||
" current_row.rgb * filter_coefficients[1] +\n"
|
" current_row.rgb * filter_coefficients[1] +\n"
|
||||||
" next_row.rgb * filter_coefficients[2], \n"
|
" next_row.rgb * filter_coefficients[2];\n");
|
||||||
" float3(1, 1, 1)), current_row.a);\n");
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
out.Write(
|
out.Write(" uint4 current_row = SampleEFB(v_tex0, 0.0f);\n"
|
||||||
" float4 current_row = SampleEFB(v_tex0, 0.0f);\n"
|
" uint3 combined_rows = current_row.rgb * filter_coefficients[1];\n");
|
||||||
" float4 texcol = float4(min(current_row.rgb * filter_coefficients[1], float3(1, 1, 1)),\n"
|
}
|
||||||
" current_row.a);\n");
|
out.Write(" // Shift right by 6 to divide by 64, as filter coefficients\n"
|
||||||
|
" // that sum to 64 result in no change in brightness\n"
|
||||||
|
" uint4 texcol_raw = uint4(combined_rows.rgb >> 6, {});\n",
|
||||||
|
uid_data->efb_has_alpha ? "current_row.a" : "255");
|
||||||
|
|
||||||
|
if (uid_data->copy_filter_can_overflow)
|
||||||
|
out.Write(" texcol_raw &= 0x1ffu;\n");
|
||||||
|
// Note that overflow occurs when the sum of values is >= 128, but this max situation can be hit
|
||||||
|
// on >= 64, so we always include it.
|
||||||
|
out.Write(" texcol_raw = min(texcol_raw, uint4(255, 255, 255, 255));\n");
|
||||||
|
|
||||||
|
if (uid_data->apply_gamma)
|
||||||
|
{
|
||||||
|
out.Write(" texcol_raw = uint4(round(pow(abs(float4(texcol_raw) / 255.0),\n"
|
||||||
|
" float4(gamma_rcp, gamma_rcp, gamma_rcp, 1.0)) * 255.0));\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (uid_data->is_depth_copy)
|
if (uid_data->is_intensity)
|
||||||
{
|
{
|
||||||
if (!g_ActiveConfig.backend_info.bSupportsReversedDepthRange)
|
out.Write(" // Intensity/YUV format conversion constants determined by hardware testing\n"
|
||||||
out.Write("texcol.x = 1.0 - texcol.x;\n");
|
" const float4 y_const = float4( 66, 129, 25, 16);\n"
|
||||||
|
" const float4 u_const = float4(-38, -74, 112, 128);\n"
|
||||||
out.Write(" int depth = int(texcol.x * 16777216.0);\n"
|
" const float4 v_const = float4(112, -94, -18, 128);\n"
|
||||||
|
" // Intensity/YUV format conversion\n"
|
||||||
// Convert to Z24 format
|
" texcol_raw.rgb = uint3(dot(y_const, float4(texcol_raw.rgb, 256)),\n"
|
||||||
" int4 workspace;\n"
|
" dot(u_const, float4(texcol_raw.rgb, 256)),\n"
|
||||||
" workspace.r = (depth >> 16) & 255;\n"
|
" dot(v_const, float4(texcol_raw.rgb, 256)));\n"
|
||||||
" workspace.g = (depth >> 8) & 255;\n"
|
" // Divide by 256 and round .5 and higher up\n"
|
||||||
" workspace.b = depth & 255;\n"
|
" texcol_raw.rgb = (texcol_raw.rgb >> 8) + ((texcol_raw.rgb >> 7) & 1);\n");
|
||||||
|
|
||||||
// Convert to Z4 format
|
|
||||||
" workspace.a = (depth >> 16) & 0xF0;\n"
|
|
||||||
|
|
||||||
// Normalize components to [0.0..1.0]
|
|
||||||
" texcol = float4(workspace) / 255.0;\n");
|
|
||||||
switch (uid_data->dst_format)
|
|
||||||
{
|
|
||||||
case EFBCopyFormat::R4: // Z4
|
|
||||||
out.Write(" ocol0 = texcol.aaaa;\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case EFBCopyFormat::R8_0x1: // Z8
|
|
||||||
case EFBCopyFormat::R8: // Z8H
|
|
||||||
out.Write(" ocol0 = texcol.rrrr;\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case EFBCopyFormat::RA8: // Z16
|
|
||||||
out.Write(" ocol0 = texcol.gggr;\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case EFBCopyFormat::RG8: // Z16 (reverse order)
|
|
||||||
out.Write(" ocol0 = texcol.rrrg;\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case EFBCopyFormat::RGBA8: // Z24X8
|
|
||||||
out.Write(" ocol0 = float4(texcol.rgb, 1.0);\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case EFBCopyFormat::G8: // Z8M
|
|
||||||
out.Write(" ocol0 = texcol.gggg;\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case EFBCopyFormat::B8: // Z8L
|
|
||||||
out.Write(" ocol0 = texcol.bbbb;\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case EFBCopyFormat::GB8: // Z16L - copy lower 16 depth bits
|
|
||||||
// expected to be used as an IA8 texture (upper 8 bits stored as intensity, lower 8 bits
|
|
||||||
// stored as alpha)
|
|
||||||
// Used e.g. in Zelda: Skyward Sword
|
|
||||||
out.Write(" ocol0 = texcol.gggb;\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
ERROR_LOG_FMT(VIDEO, "Unknown copy zbuf format: {:#X}",
|
|
||||||
static_cast<int>(uid_data->dst_format));
|
|
||||||
out.Write(" ocol0 = float4(texcol.bgr, 0.0);\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else if (uid_data->is_intensity)
|
|
||||||
|
switch (uid_data->dst_format)
|
||||||
{
|
{
|
||||||
if (!uid_data->efb_has_alpha)
|
case EFBCopyFormat::R4: // R4
|
||||||
out.Write(" texcol.a = 1.0;\n");
|
out.Write(" float red = float(texcol_raw.r & 0xF0u) / 240.0;\n"
|
||||||
|
" ocol0 = float4(red, red, red, red);\n");
|
||||||
|
break;
|
||||||
|
|
||||||
bool has_four_bits =
|
case EFBCopyFormat::R8_0x1: // R8
|
||||||
(uid_data->dst_format == EFBCopyFormat::R4 || uid_data->dst_format == EFBCopyFormat::RA4);
|
case EFBCopyFormat::R8: // R8
|
||||||
bool has_alpha =
|
out.Write(" ocol0 = float4(texcol_raw).rrrr / 255.0;\n");
|
||||||
(uid_data->dst_format == EFBCopyFormat::RA4 || uid_data->dst_format == EFBCopyFormat::RA8);
|
break;
|
||||||
|
|
||||||
switch (uid_data->dst_format)
|
case EFBCopyFormat::RA4: // RA4
|
||||||
{
|
out.Write(" float2 red_alpha = float2(texcol_raw.ra & 0xF0u) / 240.0;\n"
|
||||||
case EFBCopyFormat::R4: // I4
|
" ocol0 = red_alpha.rrrg;\n");
|
||||||
case EFBCopyFormat::R8_0x1: // I8
|
break;
|
||||||
case EFBCopyFormat::R8: // I8
|
|
||||||
case EFBCopyFormat::RA4: // IA4
|
|
||||||
case EFBCopyFormat::RA8: // IA8
|
|
||||||
if (has_four_bits)
|
|
||||||
out.Write(" texcol = float4(int4(texcol * 255.0) & 0xF0) * (1.0 / 240.0);\n");
|
|
||||||
|
|
||||||
// TODO - verify these coefficients
|
case EFBCopyFormat::RA8: // RA8
|
||||||
out.Write(" const float3 coefficients = float3(0.257, 0.504, 0.098);\n"
|
out.Write(" ocol0 = float4(texcol_raw).rrra / 255.0;\n");
|
||||||
" float intensity = dot(texcol.rgb, coefficients) + 16.0 / 255.0;\n"
|
break;
|
||||||
" ocol0 = float4(intensity, intensity, intensity, {});\n",
|
|
||||||
has_alpha ? "texcol.a" : "intensity");
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
case EFBCopyFormat::A8: // A8
|
||||||
ERROR_LOG_FMT(VIDEO, "Unknown copy intensity format: {:#X}",
|
out.Write(" ocol0 = float4(texcol_raw).aaaa / 255.0;\n");
|
||||||
static_cast<int>(uid_data->dst_format));
|
break;
|
||||||
out.Write(" ocol0 = texcol;\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (!uid_data->efb_has_alpha)
|
|
||||||
out.Write(" texcol.a = 1.0;\n");
|
|
||||||
|
|
||||||
switch (uid_data->dst_format)
|
case EFBCopyFormat::G8: // G8
|
||||||
{
|
out.Write(" ocol0 = float4(texcol_raw).gggg / 255.0;\n");
|
||||||
case EFBCopyFormat::R4: // R4
|
break;
|
||||||
out.Write(" float red = float(int(texcol.r * 255.0) & 0xF0) * (1.0 / 240.0);\n"
|
|
||||||
" ocol0 = float4(red, red, red, red);\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case EFBCopyFormat::R8_0x1: // R8
|
case EFBCopyFormat::B8: // B8
|
||||||
case EFBCopyFormat::R8: // R8
|
out.Write(" ocol0 = float4(texcol_raw).bbbb / 255.0;\n");
|
||||||
out.Write(" ocol0 = texcol.rrrr;\n");
|
break;
|
||||||
break;
|
|
||||||
|
|
||||||
case EFBCopyFormat::RA4: // RA4
|
case EFBCopyFormat::RG8: // RG8
|
||||||
out.Write(" float2 red_alpha = float2(int2(texcol.ra * 255.0) & 0xF0) * (1.0 / 240.0);\n"
|
out.Write(" ocol0 = float4(texcol_raw).rrrg / 255.0;\n");
|
||||||
" ocol0 = red_alpha.rrrg;\n");
|
break;
|
||||||
break;
|
|
||||||
|
|
||||||
case EFBCopyFormat::RA8: // RA8
|
case EFBCopyFormat::GB8: // GB8
|
||||||
out.Write(" ocol0 = texcol.rrra;\n");
|
out.Write(" ocol0 = float4(texcol_raw).gggb / 255.0;\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case EFBCopyFormat::A8: // A8
|
case EFBCopyFormat::RGB565: // RGB565
|
||||||
out.Write(" ocol0 = texcol.aaaa;\n");
|
out.Write(" float2 red_blue = float2(texcol_raw.rb & 0xF8u) / 248.0;\n"
|
||||||
break;
|
" float green = float(texcol_raw.g & 0xFCu) / 252.0;\n"
|
||||||
|
" ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n");
|
||||||
|
break;
|
||||||
|
|
||||||
case EFBCopyFormat::G8: // G8
|
case EFBCopyFormat::RGB5A3: // RGB5A3
|
||||||
out.Write(" ocol0 = texcol.gggg;\n");
|
// TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection
|
||||||
break;
|
// will need to be implemented once we move away from floats.
|
||||||
|
out.Write(" float3 color = float3(texcol_raw.rgb & 0xF8u) / 248.0;\n"
|
||||||
|
" float alpha = float(texcol_raw.a & 0xE0u) / 224.0;\n"
|
||||||
|
" ocol0 = float4(color, alpha);\n");
|
||||||
|
break;
|
||||||
|
|
||||||
case EFBCopyFormat::B8: // B8
|
case EFBCopyFormat::RGBA8: // RGBA8
|
||||||
out.Write(" ocol0 = texcol.bbbb;\n");
|
out.Write(" ocol0 = float4(texcol_raw.rgba) / 255.0;\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case EFBCopyFormat::RG8: // RG8
|
case EFBCopyFormat::XFB:
|
||||||
out.Write(" ocol0 = texcol.rrrg;\n");
|
out.Write(" ocol0 = float4(float3(texcol_raw.rgb) / 255.0, 1.0);\n");
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case EFBCopyFormat::GB8: // GB8
|
default:
|
||||||
out.Write(" ocol0 = texcol.gggb;\n");
|
ERROR_LOG_FMT(VIDEO, "Unknown copy/intensity color format: {} {}", uid_data->dst_format,
|
||||||
break;
|
uid_data->is_intensity);
|
||||||
|
out.Write(" ocol0 = float4(texcol_raw.rgba) / 255.0;\n");
|
||||||
case EFBCopyFormat::RGB565: // RGB565
|
break;
|
||||||
out.Write(" float2 red_blue = float2(int2(texcol.rb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
|
|
||||||
" float green = float(int(texcol.g * 255.0) & 0xFC) * (1.0 / 252.0);\n"
|
|
||||||
" ocol0 = float4(red_blue.r, green, red_blue.g, 1.0);\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case EFBCopyFormat::RGB5A3: // RGB5A3
|
|
||||||
// TODO: The MSB controls whether we have RGB5 or RGB4A3, this selection
|
|
||||||
// will need to be implemented once we move away from floats.
|
|
||||||
out.Write(" float3 color = float3(int3(texcol.rgb * 255.0) & 0xF8) * (1.0 / 248.0);\n"
|
|
||||||
" float alpha = float(int(texcol.a * 255.0) & 0xE0) * (1.0 / 224.0);\n"
|
|
||||||
" ocol0 = float4(color, alpha);\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case EFBCopyFormat::RGBA8: // RGBA8
|
|
||||||
out.Write(" ocol0 = texcol;\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case EFBCopyFormat::XFB:
|
|
||||||
out.Write(" ocol0 = float4(pow(abs(texcol.rgb), float3(gamma_rcp, gamma_rcp, gamma_rcp)), "
|
|
||||||
"1.0f);\n");
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
ERROR_LOG_FMT(VIDEO, "Unknown copy color format: {:#X}",
|
|
||||||
static_cast<int>(uid_data->dst_format));
|
|
||||||
out.Write(" ocol0 = texcol;\n");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
out.Write("}}\n");
|
out.Write("}}\n");
|
||||||
|
|
|
@ -25,7 +25,9 @@ struct UidData
|
||||||
u32 is_depth_copy : 1;
|
u32 is_depth_copy : 1;
|
||||||
u32 is_intensity : 1;
|
u32 is_intensity : 1;
|
||||||
u32 scale_by_half : 1;
|
u32 scale_by_half : 1;
|
||||||
u32 copy_filter : 1;
|
u32 all_copy_filter_coefs_needed : 1;
|
||||||
|
u32 copy_filter_can_overflow : 1;
|
||||||
|
u32 apply_gamma : 1;
|
||||||
};
|
};
|
||||||
#pragma pack()
|
#pragma pack()
|
||||||
|
|
||||||
|
@ -35,7 +37,8 @@ ShaderCode GenerateVertexShader(APIType api_type);
|
||||||
ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data);
|
ShaderCode GeneratePixelShader(APIType api_type, const UidData* uid_data);
|
||||||
|
|
||||||
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
|
TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity,
|
||||||
bool scale_by_half, bool copy_filter);
|
bool scale_by_half, float gamma_rcp,
|
||||||
|
const std::array<u32, 3>& filter_coefficients);
|
||||||
|
|
||||||
} // namespace TextureConversionShaderGen
|
} // namespace TextureConversionShaderGen
|
||||||
|
|
||||||
|
@ -53,8 +56,10 @@ struct fmt::formatter<TextureConversionShaderGen::UidData>
|
||||||
dst_format = fmt::to_string(uid.dst_format);
|
dst_format = fmt::to_string(uid.dst_format);
|
||||||
return fmt::format_to(ctx.out(),
|
return fmt::format_to(ctx.out(),
|
||||||
"dst_format: {}, efb_has_alpha: {}, is_depth_copy: {}, is_intensity: {}, "
|
"dst_format: {}, efb_has_alpha: {}, is_depth_copy: {}, is_intensity: {}, "
|
||||||
"scale_by_half: {}, copy_filter: {}",
|
"scale_by_half: {}, all_copy_filter_coefs_needed: {}, "
|
||||||
|
"copy_filter_can_overflow: {}, apply_gamma: {}",
|
||||||
dst_format, uid.efb_has_alpha, uid.is_depth_copy, uid.is_intensity,
|
dst_format, uid.efb_has_alpha, uid.is_depth_copy, uid.is_intensity,
|
||||||
uid.scale_by_half, uid.copy_filter);
|
uid.scale_by_half, uid.all_copy_filter_coefs_needed,
|
||||||
|
uid.copy_filter_can_overflow, uid.apply_gamma);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
|
@ -50,8 +50,7 @@ int TexDecoder_GetTexelSizeInNibbles(TextureFormat format)
|
||||||
case TextureFormat::XFB:
|
case TextureFormat::XFB:
|
||||||
return 4;
|
return 4;
|
||||||
default:
|
default:
|
||||||
PanicAlertFmt("Invalid Texture Format ({:#X})! (GetTexelSizeInNibbles)",
|
PanicAlertFmt("Invalid Texture Format {}! (GetTexelSizeInNibbles)", format);
|
||||||
static_cast<int>(format));
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -90,8 +89,7 @@ int TexDecoder_GetBlockWidthInTexels(TextureFormat format)
|
||||||
case TextureFormat::XFB:
|
case TextureFormat::XFB:
|
||||||
return 16;
|
return 16;
|
||||||
default:
|
default:
|
||||||
PanicAlertFmt("Invalid Texture Format ({:#X})! (GetBlockWidthInTexels)",
|
PanicAlertFmt("Invalid Texture Format {}! (GetBlockWidthInTexels)", format);
|
||||||
static_cast<int>(format));
|
|
||||||
return 8;
|
return 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -125,8 +123,7 @@ int TexDecoder_GetBlockHeightInTexels(TextureFormat format)
|
||||||
case TextureFormat::XFB:
|
case TextureFormat::XFB:
|
||||||
return 1;
|
return 1;
|
||||||
default:
|
default:
|
||||||
PanicAlertFmt("Invalid Texture Format ({:#X})! (GetBlockHeightInTexels)",
|
PanicAlertFmt("Invalid Texture Format {}! (GetBlockHeightInTexels)", format);
|
||||||
static_cast<int>(format));
|
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -160,8 +157,7 @@ int TexDecoder_GetEFBCopyBlockWidthInTexels(EFBCopyFormat format)
|
||||||
case EFBCopyFormat::XFB:
|
case EFBCopyFormat::XFB:
|
||||||
return 16;
|
return 16;
|
||||||
default:
|
default:
|
||||||
PanicAlertFmt("Invalid EFB Copy Format ({:#X})! (GetEFBCopyBlockWidthInTexels)",
|
PanicAlertFmt("Invalid EFB Copy Format {}! (GetEFBCopyBlockWidthInTexels)", format);
|
||||||
static_cast<int>(format));
|
|
||||||
return 8;
|
return 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -195,8 +191,7 @@ int TexDecoder_GetEFBCopyBlockHeightInTexels(EFBCopyFormat format)
|
||||||
case EFBCopyFormat::XFB:
|
case EFBCopyFormat::XFB:
|
||||||
return 1;
|
return 1;
|
||||||
default:
|
default:
|
||||||
PanicAlertFmt("Invalid EFB Copy Format ({:#X})! (GetEFBCopyBlockHeightInTexels)",
|
PanicAlertFmt("Invalid EFB Copy Format {}! (GetEFBCopyBlockHeightInTexels)", format);
|
||||||
static_cast<int>(format));
|
|
||||||
return 4;
|
return 4;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -247,8 +242,7 @@ TextureFormat TexDecoder_GetEFBCopyBaseFormat(EFBCopyFormat format)
|
||||||
case EFBCopyFormat::XFB:
|
case EFBCopyFormat::XFB:
|
||||||
return TextureFormat::XFB;
|
return TextureFormat::XFB;
|
||||||
default:
|
default:
|
||||||
PanicAlertFmt("Invalid EFB Copy Format ({:#X})! (GetEFBCopyBaseFormat)",
|
PanicAlertFmt("Invalid EFB Copy Format {}! (GetEFBCopyBaseFormat)", format);
|
||||||
static_cast<int>(format));
|
|
||||||
return static_cast<TextureFormat>(format);
|
return static_cast<TextureFormat>(format);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -259,77 +253,6 @@ void TexDecoder_SetTexFmtOverlayOptions(bool enable, bool center)
|
||||||
TexFmt_Overlay_Center = center;
|
TexFmt_Overlay_Center = center;
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char* texfmt[] = {
|
|
||||||
// pixel
|
|
||||||
"I4",
|
|
||||||
"I8",
|
|
||||||
"IA4",
|
|
||||||
"IA8",
|
|
||||||
"RGB565",
|
|
||||||
"RGB5A3",
|
|
||||||
"RGBA8",
|
|
||||||
"0x07",
|
|
||||||
"C4",
|
|
||||||
"C8",
|
|
||||||
"C14X2",
|
|
||||||
"0x0B",
|
|
||||||
"0x0C",
|
|
||||||
"0x0D",
|
|
||||||
"CMPR",
|
|
||||||
"0x0F",
|
|
||||||
// Z-buffer
|
|
||||||
"0x10",
|
|
||||||
"Z8",
|
|
||||||
"0x12",
|
|
||||||
"Z16",
|
|
||||||
"0x14",
|
|
||||||
"0x15",
|
|
||||||
"Z24X8",
|
|
||||||
"0x17",
|
|
||||||
"0x18",
|
|
||||||
"0x19",
|
|
||||||
"0x1A",
|
|
||||||
"0x1B",
|
|
||||||
"0x1C",
|
|
||||||
"0x1D",
|
|
||||||
"0x1E",
|
|
||||||
"0x1F",
|
|
||||||
// pixel + copy
|
|
||||||
"CR4",
|
|
||||||
"0x21",
|
|
||||||
"CRA4",
|
|
||||||
"CRA8",
|
|
||||||
"0x24",
|
|
||||||
"0x25",
|
|
||||||
"CYUVA8",
|
|
||||||
"CA8",
|
|
||||||
"CR8",
|
|
||||||
"CG8",
|
|
||||||
"CB8",
|
|
||||||
"CRG8",
|
|
||||||
"CGB8",
|
|
||||||
"0x2D",
|
|
||||||
"0x2E",
|
|
||||||
"XFB",
|
|
||||||
// Z + copy
|
|
||||||
"CZ4",
|
|
||||||
"0x31",
|
|
||||||
"0x32",
|
|
||||||
"0x33",
|
|
||||||
"0x34",
|
|
||||||
"0x35",
|
|
||||||
"0x36",
|
|
||||||
"0x37",
|
|
||||||
"0x38",
|
|
||||||
"CZ8M",
|
|
||||||
"CZ8L",
|
|
||||||
"0x3B",
|
|
||||||
"CZ16L",
|
|
||||||
"0x3D",
|
|
||||||
"0x3E",
|
|
||||||
"0x3F",
|
|
||||||
};
|
|
||||||
|
|
||||||
static void TexDecoder_DrawOverlay(u8* dst, int width, int height, TextureFormat texformat)
|
static void TexDecoder_DrawOverlay(u8* dst, int width, int height, TextureFormat texformat)
|
||||||
{
|
{
|
||||||
int w = std::min(width, 40);
|
int w = std::min(width, 40);
|
||||||
|
@ -344,11 +267,11 @@ static void TexDecoder_DrawOverlay(u8* dst, int width, int height, TextureFormat
|
||||||
yoff = 0;
|
yoff = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char* fmt = texfmt[static_cast<int>(texformat) & 15];
|
const auto fmt_str = fmt::to_string(texformat);
|
||||||
while (*fmt)
|
for (char ch : fmt_str)
|
||||||
{
|
{
|
||||||
int xcnt = 0;
|
int xcnt = 0;
|
||||||
int nchar = sfont_map[(int)*fmt];
|
int nchar = sfont_map[ch];
|
||||||
|
|
||||||
const unsigned char* ptr = sfont_raw[nchar]; // each char is up to 9x10
|
const unsigned char* ptr = sfont_raw[nchar]; // each char is up to 9x10
|
||||||
|
|
||||||
|
@ -369,7 +292,6 @@ static void TexDecoder_DrawOverlay(u8* dst, int width, int height, TextureFormat
|
||||||
ptr += 9;
|
ptr += 9;
|
||||||
}
|
}
|
||||||
xoff += xcnt;
|
xoff += xcnt;
|
||||||
fmt++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -707,6 +629,8 @@ void TexDecoder_DecodeTexel(u8* dst, const u8* src, int s, int t, int imageWidth
|
||||||
|
|
||||||
// We do the inverse BT.601 conversion for YCbCr to RGB
|
// We do the inverse BT.601 conversion for YCbCr to RGB
|
||||||
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
|
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
|
||||||
|
// TODO: Use more precise numbers for this conversion (although on real hardware, the XFB isn't
|
||||||
|
// in a real texture format, so does this conversion actually ever happen?)
|
||||||
u8 R = std::clamp(int(1.164f * Y + 1.596f * V), 0, 255);
|
u8 R = std::clamp(int(1.164f * Y + 1.596f * V), 0, 255);
|
||||||
u8 G = std::clamp(int(1.164f * Y - 0.392f * U - 0.813f * V), 0, 255);
|
u8 G = std::clamp(int(1.164f * Y - 0.392f * U - 0.813f * V), 0, 255);
|
||||||
u8 B = std::clamp(int(1.164f * Y + 2.017f * U), 0, 255);
|
u8 B = std::clamp(int(1.164f * Y + 2.017f * U), 0, 255);
|
||||||
|
@ -772,6 +696,8 @@ void TexDecoder_DecodeXFB(u8* dst, const u8* src, u32 width, u32 height, u32 str
|
||||||
|
|
||||||
// We do the inverse BT.601 conversion for YCbCr to RGB
|
// We do the inverse BT.601 conversion for YCbCr to RGB
|
||||||
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
|
// http://www.equasys.de/colorconversion.html#YCbCr-RGBColorFormatConversion
|
||||||
|
// TODO: Use more precise numbers for this conversion (although on real hardware, the XFB
|
||||||
|
// isn't in a real texture format, so does this conversion actually ever happen?)
|
||||||
u8 R1 = static_cast<u8>(std::clamp(int(1.164f * Y1 + 1.596f * V), 0, 255));
|
u8 R1 = static_cast<u8>(std::clamp(int(1.164f * Y1 + 1.596f * V), 0, 255));
|
||||||
u8 G1 = static_cast<u8>(std::clamp(int(1.164f * Y1 - 0.392f * U - 0.813f * V), 0, 255));
|
u8 G1 = static_cast<u8>(std::clamp(int(1.164f * Y1 - 0.392f * U - 0.813f * V), 0, 255));
|
||||||
u8 B1 = static_cast<u8>(std::clamp(int(1.164f * Y1 + 2.017f * U), 0, 255));
|
u8 B1 = static_cast<u8>(std::clamp(int(1.164f * Y1 + 2.017f * U), 0, 255));
|
||||||
|
|
|
@ -1495,8 +1495,7 @@ void _TexDecoder_DecodeImpl(u32* dst, const u8* src, int width, int height, Text
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
PanicAlertFmt("Invalid Texture Format ({:#X})! (_TexDecoder_DecodeImpl)",
|
PanicAlertFmt("Invalid Texture Format {}! (_TexDecoder_DecodeImpl)", texformat);
|
||||||
static_cast<int>(texformat));
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@
|
||||||
static const unsigned char sfont_map[] = {
|
static const unsigned char sfont_map[] = {
|
||||||
10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
|
10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
|
||||||
10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
|
10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
|
||||||
10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,10,
|
10,10,10,10,10,10,10,10,63,64,10,10,10,10,10,10,
|
||||||
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,10,10,10,10,10,
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,10,10,10,10,10,
|
||||||
10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
|
10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,
|
||||||
26,27,28,29,30,31,32,33,34,35,36,10,10,10,10,10,
|
26,27,28,29,30,31,32,33,34,35,36,10,10,10,10,10,
|
||||||
|
@ -713,5 +713,27 @@ static const unsigned char sfont_raw[][9*10] = {
|
||||||
0xff, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78,
|
0xff, 0x00, 0x00, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78,
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78,
|
0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78,
|
||||||
0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78,
|
0xff, 0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
},{
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
},{
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0xff, 0x00, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0x00, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
|
0xff, 0xff, 0xff, 0xff, 0x78, 0x78, 0x78, 0x78, 0x78,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue