mirror of https://git.suyu.dev/suyu/suyu
Merge pull request #5891 from ameerj/bgra-ogl
renderer_opengl: Use compute shaders to swizzle BGR textures on copy
This commit is contained in:
commit
daf5c5060b
|
@ -48,6 +48,15 @@ constexpr std::array VIEW_CLASS_32_BITS{
|
||||||
PixelFormat::A2B10G10R10_UINT,
|
PixelFormat::A2B10G10R10_UINT,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
constexpr std::array VIEW_CLASS_32_BITS_NO_BGR{
|
||||||
|
PixelFormat::R16G16_FLOAT, PixelFormat::B10G11R11_FLOAT, PixelFormat::R32_FLOAT,
|
||||||
|
PixelFormat::A2B10G10R10_UNORM, PixelFormat::R16G16_UINT, PixelFormat::R32_UINT,
|
||||||
|
PixelFormat::R16G16_SINT, PixelFormat::R32_SINT, PixelFormat::A8B8G8R8_UNORM,
|
||||||
|
PixelFormat::R16G16_UNORM, PixelFormat::A8B8G8R8_SNORM, PixelFormat::R16G16_SNORM,
|
||||||
|
PixelFormat::A8B8G8R8_SRGB, PixelFormat::E5B9G9R9_FLOAT, PixelFormat::A8B8G8R8_UINT,
|
||||||
|
PixelFormat::A8B8G8R8_SINT, PixelFormat::A2B10G10R10_UINT,
|
||||||
|
};
|
||||||
|
|
||||||
// TODO: How should we handle 24 bits?
|
// TODO: How should we handle 24 bits?
|
||||||
|
|
||||||
constexpr std::array VIEW_CLASS_16_BITS{
|
constexpr std::array VIEW_CLASS_16_BITS{
|
||||||
|
@ -205,7 +214,6 @@ constexpr Table MakeViewTable() {
|
||||||
EnableRange(view, VIEW_CLASS_128_BITS);
|
EnableRange(view, VIEW_CLASS_128_BITS);
|
||||||
EnableRange(view, VIEW_CLASS_96_BITS);
|
EnableRange(view, VIEW_CLASS_96_BITS);
|
||||||
EnableRange(view, VIEW_CLASS_64_BITS);
|
EnableRange(view, VIEW_CLASS_64_BITS);
|
||||||
EnableRange(view, VIEW_CLASS_32_BITS);
|
|
||||||
EnableRange(view, VIEW_CLASS_16_BITS);
|
EnableRange(view, VIEW_CLASS_16_BITS);
|
||||||
EnableRange(view, VIEW_CLASS_8_BITS);
|
EnableRange(view, VIEW_CLASS_8_BITS);
|
||||||
EnableRange(view, VIEW_CLASS_RGTC1_RED);
|
EnableRange(view, VIEW_CLASS_RGTC1_RED);
|
||||||
|
@ -231,20 +239,47 @@ constexpr Table MakeCopyTable() {
|
||||||
EnableRange(copy, COPY_CLASS_64_BITS);
|
EnableRange(copy, COPY_CLASS_64_BITS);
|
||||||
return copy;
|
return copy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr Table MakeNativeBgrViewTable() {
|
||||||
|
Table copy = MakeViewTable();
|
||||||
|
EnableRange(copy, VIEW_CLASS_32_BITS);
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr Table MakeNonNativeBgrViewTable() {
|
||||||
|
Table copy = MakeViewTable();
|
||||||
|
EnableRange(copy, VIEW_CLASS_32_BITS_NO_BGR);
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr Table MakeNativeBgrCopyTable() {
|
||||||
|
Table copy = MakeCopyTable();
|
||||||
|
EnableRange(copy, VIEW_CLASS_32_BITS);
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
|
|
||||||
|
constexpr Table MakeNonNativeBgrCopyTable() {
|
||||||
|
Table copy = MakeCopyTable();
|
||||||
|
EnableRange(copy, VIEW_CLASS_32_BITS);
|
||||||
|
return copy;
|
||||||
|
}
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views) {
|
bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views,
|
||||||
|
bool native_bgr) {
|
||||||
if (broken_views) {
|
if (broken_views) {
|
||||||
// If format views are broken, only accept formats that are identical.
|
// If format views are broken, only accept formats that are identical.
|
||||||
return format_a == format_b;
|
return format_a == format_b;
|
||||||
}
|
}
|
||||||
static constexpr Table TABLE = MakeViewTable();
|
static constexpr Table BGR_TABLE = MakeNativeBgrViewTable();
|
||||||
return IsSupported(TABLE, format_a, format_b);
|
static constexpr Table NO_BGR_TABLE = MakeNonNativeBgrViewTable();
|
||||||
|
return IsSupported(native_bgr ? BGR_TABLE : NO_BGR_TABLE, format_a, format_b);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b) {
|
bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b, bool native_bgr) {
|
||||||
static constexpr Table TABLE = MakeCopyTable();
|
static constexpr Table BGR_TABLE = MakeNativeBgrCopyTable();
|
||||||
return IsSupported(TABLE, format_a, format_b);
|
static constexpr Table NO_BGR_TABLE = MakeNonNativeBgrCopyTable();
|
||||||
|
return IsSupported(native_bgr ? BGR_TABLE : NO_BGR_TABLE, format_a, format_b);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace VideoCore::Surface
|
} // namespace VideoCore::Surface
|
||||||
|
|
|
@ -8,8 +8,9 @@
|
||||||
|
|
||||||
namespace VideoCore::Surface {
|
namespace VideoCore::Surface {
|
||||||
|
|
||||||
bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views);
|
bool IsViewCompatible(PixelFormat format_a, PixelFormat format_b, bool broken_views,
|
||||||
|
bool native_bgr);
|
||||||
|
|
||||||
bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b);
|
bool IsCopyCompatible(PixelFormat format_a, PixelFormat format_b, bool native_bgr);
|
||||||
|
|
||||||
} // namespace VideoCore::Surface
|
} // namespace VideoCore::Surface
|
||||||
|
|
|
@ -5,6 +5,7 @@ set(SHADER_FILES
|
||||||
convert_float_to_depth.frag
|
convert_float_to_depth.frag
|
||||||
full_screen_triangle.vert
|
full_screen_triangle.vert
|
||||||
opengl_copy_bc4.comp
|
opengl_copy_bc4.comp
|
||||||
|
opengl_copy_bgra.comp
|
||||||
opengl_present.frag
|
opengl_present.frag
|
||||||
opengl_present.vert
|
opengl_present.vert
|
||||||
pitch_unswizzle.comp
|
pitch_unswizzle.comp
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
// Copyright 2021 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#version 430 core
|
||||||
|
|
||||||
|
layout (local_size_x = 4, local_size_y = 4) in;
|
||||||
|
|
||||||
|
layout(binding = 0, rgba8) readonly uniform image2DArray bgr_input;
|
||||||
|
layout(binding = 1, rgba8) writeonly uniform image2DArray bgr_output;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
vec4 color = imageLoad(bgr_input, ivec3(gl_GlobalInvocationID));
|
||||||
|
imageStore(bgr_output, ivec3(gl_GlobalInvocationID), color.bgra);
|
||||||
|
}
|
|
@ -96,7 +96,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
|
||||||
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
|
{GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT}, // BC6H_UFLOAT
|
||||||
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
|
{GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT}, // BC6H_SFLOAT
|
||||||
{GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
|
{GL_COMPRESSED_RGBA_ASTC_4x4_KHR}, // ASTC_2D_4X4_UNORM
|
||||||
{GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
|
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
|
||||||
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
|
{GL_RGBA32F, GL_RGBA, GL_FLOAT}, // R32G32B32A32_FLOAT
|
||||||
{GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
|
{GL_RGBA32I, GL_RGBA_INTEGER, GL_INT}, // R32G32B32A32_SINT
|
||||||
{GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
|
{GL_RG32F, GL_RG, GL_FLOAT}, // R32G32_FLOAT
|
||||||
|
@ -125,7 +125,7 @@ constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{
|
||||||
{GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
|
{GL_COMPRESSED_RGBA_ASTC_8x8_KHR}, // ASTC_2D_8X8_UNORM
|
||||||
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
|
{GL_COMPRESSED_RGBA_ASTC_8x5_KHR}, // ASTC_2D_8X5_UNORM
|
||||||
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
|
{GL_COMPRESSED_RGBA_ASTC_5x4_KHR}, // ASTC_2D_5X4_UNORM
|
||||||
{GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE}, // B8G8R8A8_UNORM
|
{GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE}, // B8G8R8A8_SRGB
|
||||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
|
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT}, // BC1_RGBA_SRGB
|
||||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
|
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT}, // BC2_SRGB
|
||||||
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
|
{GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT}, // BC3_SRGB
|
||||||
|
@ -396,6 +396,17 @@ void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] bool IsPixelFormatBGR(PixelFormat format) {
|
||||||
|
switch (format) {
|
||||||
|
case PixelFormat::B5G6R5_UNORM:
|
||||||
|
case PixelFormat::B8G8R8A8_UNORM:
|
||||||
|
case PixelFormat::B8G8R8A8_SRGB:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
ImageBufferMap::~ImageBufferMap() {
|
ImageBufferMap::~ImageBufferMap() {
|
||||||
|
@ -512,6 +523,9 @@ bool TextureCacheRuntime::CanImageBeCopied(const Image& dst, const Image& src) {
|
||||||
if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
|
if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -520,6 +534,8 @@ void TextureCacheRuntime::EmulateCopyImage(Image& dst, Image& src,
|
||||||
if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
|
if (dst.info.type == ImageType::e3D && dst.info.format == PixelFormat::BC4_UNORM) {
|
||||||
ASSERT(src.info.type == ImageType::e3D);
|
ASSERT(src.info.type == ImageType::e3D);
|
||||||
util_shaders.CopyBC4(dst, src, copies);
|
util_shaders.CopyBC4(dst, src, copies);
|
||||||
|
} else if (IsPixelFormatBGR(dst.info.format) || IsPixelFormatBGR(src.info.format)) {
|
||||||
|
util_shaders.CopyBGR(dst, src, copies);
|
||||||
} else {
|
} else {
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,6 +86,11 @@ public:
|
||||||
|
|
||||||
FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
|
FormatProperties FormatInfo(VideoCommon::ImageType type, GLenum internal_format) const;
|
||||||
|
|
||||||
|
bool HasNativeBgr() const noexcept {
|
||||||
|
// OpenGL does not have native support for the BGR internal format
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool HasBrokenTextureViewFormats() const noexcept {
|
bool HasBrokenTextureViewFormats() const noexcept {
|
||||||
return has_broken_texture_view_formats;
|
return has_broken_texture_view_formats;
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
|
#include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h"
|
||||||
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
|
#include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h"
|
||||||
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
|
#include "video_core/host_shaders/opengl_copy_bc4_comp.h"
|
||||||
|
#include "video_core/host_shaders/opengl_copy_bgra_comp.h"
|
||||||
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
|
#include "video_core/host_shaders/pitch_unswizzle_comp.h"
|
||||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||||
|
@ -48,6 +49,11 @@ OGLProgram MakeProgram(std::string_view source) {
|
||||||
return program;
|
return program;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t NumPixelsInCopy(const VideoCommon::ImageCopy& copy) {
|
||||||
|
return static_cast<size_t>(copy.extent.width * copy.extent.height *
|
||||||
|
copy.src_subresource.num_layers);
|
||||||
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
UtilShaders::UtilShaders(ProgramManager& program_manager_)
|
UtilShaders::UtilShaders(ProgramManager& program_manager_)
|
||||||
|
@ -55,6 +61,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
|
||||||
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
|
block_linear_unswizzle_2d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_2D_COMP)),
|
||||||
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
|
block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)),
|
||||||
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
|
pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)),
|
||||||
|
copy_bgra_program(MakeProgram(OPENGL_COPY_BGRA_COMP)),
|
||||||
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
|
copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)) {
|
||||||
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
|
const auto swizzle_table = Tegra::Texture::MakeSwizzleTable();
|
||||||
swizzle_table_buffer.Create();
|
swizzle_table_buffer.Create();
|
||||||
|
@ -205,6 +212,43 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im
|
||||||
program_manager.RestoreGuestCompute();
|
program_manager.RestoreGuestCompute();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void UtilShaders::CopyBGR(Image& dst_image, Image& src_image,
|
||||||
|
std::span<const VideoCommon::ImageCopy> copies) {
|
||||||
|
static constexpr GLuint BINDING_INPUT_IMAGE = 0;
|
||||||
|
static constexpr GLuint BINDING_OUTPUT_IMAGE = 1;
|
||||||
|
static constexpr VideoCommon::Offset3D zero_offset{0, 0, 0};
|
||||||
|
const u32 bytes_per_block = BytesPerBlock(dst_image.info.format);
|
||||||
|
switch (bytes_per_block) {
|
||||||
|
case 2:
|
||||||
|
// BGR565 copy
|
||||||
|
for (const ImageCopy& copy : copies) {
|
||||||
|
ASSERT(copy.src_offset == zero_offset);
|
||||||
|
ASSERT(copy.dst_offset == zero_offset);
|
||||||
|
bgr_copy_pass.Execute(dst_image, src_image, copy);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 4: {
|
||||||
|
// BGRA8 copy
|
||||||
|
program_manager.BindHostCompute(copy_bgra_program.handle);
|
||||||
|
constexpr GLenum FORMAT = GL_RGBA8;
|
||||||
|
for (const ImageCopy& copy : copies) {
|
||||||
|
ASSERT(copy.src_offset == zero_offset);
|
||||||
|
ASSERT(copy.dst_offset == zero_offset);
|
||||||
|
glBindImageTexture(BINDING_INPUT_IMAGE, src_image.StorageHandle(),
|
||||||
|
copy.src_subresource.base_level, GL_FALSE, 0, GL_READ_ONLY, FORMAT);
|
||||||
|
glBindImageTexture(BINDING_OUTPUT_IMAGE, dst_image.StorageHandle(),
|
||||||
|
copy.dst_subresource.base_level, GL_FALSE, 0, GL_WRITE_ONLY, FORMAT);
|
||||||
|
glDispatchCompute(copy.extent.width, copy.extent.height, copy.extent.depth);
|
||||||
|
}
|
||||||
|
program_manager.RestoreGuestCompute();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
GLenum StoreFormat(u32 bytes_per_block) {
|
GLenum StoreFormat(u32 bytes_per_block) {
|
||||||
switch (bytes_per_block) {
|
switch (bytes_per_block) {
|
||||||
case 1:
|
case 1:
|
||||||
|
@ -222,4 +266,36 @@ GLenum StoreFormat(u32 bytes_per_block) {
|
||||||
return GL_R8UI;
|
return GL_R8UI;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Bgr565CopyPass::Execute(const Image& dst_image, const Image& src_image,
|
||||||
|
const ImageCopy& copy) {
|
||||||
|
if (CopyBufferCreationNeeded(copy)) {
|
||||||
|
CreateNewCopyBuffer(copy, GL_TEXTURE_2D_ARRAY, GL_RGB565);
|
||||||
|
}
|
||||||
|
// Copy from source to PBO
|
||||||
|
glPixelStorei(GL_PACK_ALIGNMENT, 1);
|
||||||
|
glPixelStorei(GL_PACK_ROW_LENGTH, copy.extent.width);
|
||||||
|
glBindBuffer(GL_PIXEL_PACK_BUFFER, bgr16_pbo.handle);
|
||||||
|
glGetTextureSubImage(src_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
|
||||||
|
copy.src_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5,
|
||||||
|
static_cast<GLsizei>(bgr16_pbo_size), nullptr);
|
||||||
|
|
||||||
|
// Copy from PBO to destination in reverse order
|
||||||
|
glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
|
||||||
|
glPixelStorei(GL_UNPACK_ROW_LENGTH, copy.extent.width);
|
||||||
|
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, bgr16_pbo.handle);
|
||||||
|
glTextureSubImage3D(dst_image.Handle(), 0, 0, 0, 0, copy.extent.width, copy.extent.height,
|
||||||
|
copy.dst_subresource.num_layers, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV,
|
||||||
|
nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Bgr565CopyPass::CopyBufferCreationNeeded(const ImageCopy& copy) {
|
||||||
|
return bgr16_pbo_size < NumPixelsInCopy(copy) * sizeof(u16);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Bgr565CopyPass::CreateNewCopyBuffer(const ImageCopy& copy, GLenum target, GLuint format) {
|
||||||
|
bgr16_pbo.Create();
|
||||||
|
bgr16_pbo_size = NumPixelsInCopy(copy) * sizeof(u16);
|
||||||
|
glNamedBufferData(bgr16_pbo.handle, bgr16_pbo_size, nullptr, GL_STREAM_COPY);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -19,6 +19,22 @@ class ProgramManager;
|
||||||
|
|
||||||
struct ImageBufferMap;
|
struct ImageBufferMap;
|
||||||
|
|
||||||
|
class Bgr565CopyPass {
|
||||||
|
public:
|
||||||
|
Bgr565CopyPass() = default;
|
||||||
|
~Bgr565CopyPass() = default;
|
||||||
|
|
||||||
|
void Execute(const Image& dst_image, const Image& src_image,
|
||||||
|
const VideoCommon::ImageCopy& copy);
|
||||||
|
|
||||||
|
private:
|
||||||
|
[[nodiscard]] bool CopyBufferCreationNeeded(const VideoCommon::ImageCopy& copy);
|
||||||
|
void CreateNewCopyBuffer(const VideoCommon::ImageCopy& copy, GLenum target, GLuint format);
|
||||||
|
|
||||||
|
OGLBuffer bgr16_pbo;
|
||||||
|
size_t bgr16_pbo_size{};
|
||||||
|
};
|
||||||
|
|
||||||
class UtilShaders {
|
class UtilShaders {
|
||||||
public:
|
public:
|
||||||
explicit UtilShaders(ProgramManager& program_manager);
|
explicit UtilShaders(ProgramManager& program_manager);
|
||||||
|
@ -36,6 +52,9 @@ public:
|
||||||
void CopyBC4(Image& dst_image, Image& src_image,
|
void CopyBC4(Image& dst_image, Image& src_image,
|
||||||
std::span<const VideoCommon::ImageCopy> copies);
|
std::span<const VideoCommon::ImageCopy> copies);
|
||||||
|
|
||||||
|
void CopyBGR(Image& dst_image, Image& src_image,
|
||||||
|
std::span<const VideoCommon::ImageCopy> copies);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ProgramManager& program_manager;
|
ProgramManager& program_manager;
|
||||||
|
|
||||||
|
@ -44,7 +63,10 @@ private:
|
||||||
OGLProgram block_linear_unswizzle_2d_program;
|
OGLProgram block_linear_unswizzle_2d_program;
|
||||||
OGLProgram block_linear_unswizzle_3d_program;
|
OGLProgram block_linear_unswizzle_3d_program;
|
||||||
OGLProgram pitch_unswizzle_program;
|
OGLProgram pitch_unswizzle_program;
|
||||||
|
OGLProgram copy_bgra_program;
|
||||||
OGLProgram copy_bc4_program;
|
OGLProgram copy_bc4_program;
|
||||||
|
|
||||||
|
Bgr565CopyPass bgr_copy_pass;
|
||||||
};
|
};
|
||||||
|
|
||||||
GLenum StoreFormat(u32 bytes_per_block);
|
GLenum StoreFormat(u32 bytes_per_block);
|
||||||
|
|
|
@ -93,6 +93,11 @@ struct TextureCacheRuntime {
|
||||||
// No known Vulkan driver has broken image views
|
// No known Vulkan driver has broken image views
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool HasNativeBgr() const noexcept {
|
||||||
|
// All known Vulkan drivers can natively handle BGR textures
|
||||||
|
return true;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
class Image : public VideoCommon::ImageBase {
|
class Image : public VideoCommon::ImageBase {
|
||||||
|
|
|
@ -120,9 +120,10 @@ void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_i
|
||||||
if (lhs.info.type == ImageType::Linear) {
|
if (lhs.info.type == ImageType::Linear) {
|
||||||
base = SubresourceBase{.level = 0, .layer = 0};
|
base = SubresourceBase{.level = 0, .layer = 0};
|
||||||
} else {
|
} else {
|
||||||
// We are passing relaxed formats as an option, having broken views or not won't matter
|
// We are passing relaxed formats as an option, having broken views/bgr or not won't matter
|
||||||
static constexpr bool broken_views = false;
|
static constexpr bool broken_views = false;
|
||||||
base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views);
|
static constexpr bool native_bgr = true;
|
||||||
|
base = FindSubresource(rhs.info, lhs, rhs.gpu_addr, OPTIONS, broken_views, native_bgr);
|
||||||
}
|
}
|
||||||
if (!base) {
|
if (!base) {
|
||||||
LOG_ERROR(HW_GPU, "Image alias should have been flipped");
|
LOG_ERROR(HW_GPU, "Image alias should have been flipped");
|
||||||
|
|
|
@ -24,7 +24,7 @@ ImageViewBase::ImageViewBase(const ImageViewInfo& info, const ImageInfo& image_i
|
||||||
.height = std::max(image_info.size.height >> range.base.level, 1u),
|
.height = std::max(image_info.size.height >> range.base.level, 1u),
|
||||||
.depth = std::max(image_info.size.depth >> range.base.level, 1u),
|
.depth = std::max(image_info.size.depth >> range.base.level, 1u),
|
||||||
} {
|
} {
|
||||||
ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false),
|
ASSERT_MSG(VideoCore::Surface::IsViewCompatible(image_info.format, info.format, false, true),
|
||||||
"Image view format {} is incompatible with image format {}", info.format,
|
"Image view format {} is incompatible with image format {}", info.format,
|
||||||
image_info.format);
|
image_info.format);
|
||||||
const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
|
const bool is_async = Settings::values.use_asynchronous_gpu_emulation.GetValue();
|
||||||
|
|
|
@ -876,6 +876,7 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
||||||
return ImageId{};
|
return ImageId{};
|
||||||
}
|
}
|
||||||
const bool broken_views = runtime.HasBrokenTextureViewFormats();
|
const bool broken_views = runtime.HasBrokenTextureViewFormats();
|
||||||
|
const bool native_bgr = runtime.HasNativeBgr();
|
||||||
ImageId image_id;
|
ImageId image_id;
|
||||||
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
|
const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) {
|
||||||
if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
|
if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) {
|
||||||
|
@ -885,11 +886,12 @@ ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
||||||
if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
|
if (existing_image.gpu_addr == gpu_addr && existing.type == info.type &&
|
||||||
existing.pitch == info.pitch &&
|
existing.pitch == info.pitch &&
|
||||||
IsPitchLinearSameSize(existing, info, strict_size) &&
|
IsPitchLinearSameSize(existing, info, strict_size) &&
|
||||||
IsViewCompatible(existing.format, info.format, broken_views)) {
|
IsViewCompatible(existing.format, info.format, broken_views, native_bgr)) {
|
||||||
image_id = existing_image_id;
|
image_id = existing_image_id;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
} else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views)) {
|
} else if (IsSubresource(info, existing_image, gpu_addr, options, broken_views,
|
||||||
|
native_bgr)) {
|
||||||
image_id = existing_image_id;
|
image_id = existing_image_id;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -920,6 +922,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
ImageInfo new_info = info;
|
ImageInfo new_info = info;
|
||||||
const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
|
const size_t size_bytes = CalculateGuestSizeInBytes(new_info);
|
||||||
const bool broken_views = runtime.HasBrokenTextureViewFormats();
|
const bool broken_views = runtime.HasBrokenTextureViewFormats();
|
||||||
|
const bool native_bgr = runtime.HasNativeBgr();
|
||||||
std::vector<ImageId> overlap_ids;
|
std::vector<ImageId> overlap_ids;
|
||||||
std::vector<ImageId> left_aliased_ids;
|
std::vector<ImageId> left_aliased_ids;
|
||||||
std::vector<ImageId> right_aliased_ids;
|
std::vector<ImageId> right_aliased_ids;
|
||||||
|
@ -935,8 +938,8 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
static constexpr bool strict_size = true;
|
static constexpr bool strict_size = true;
|
||||||
const std::optional<OverlapResult> solution =
|
const std::optional<OverlapResult> solution = ResolveOverlap(
|
||||||
ResolveOverlap(new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views);
|
new_info, gpu_addr, cpu_addr, overlap, strict_size, broken_views, native_bgr);
|
||||||
if (solution) {
|
if (solution) {
|
||||||
gpu_addr = solution->gpu_addr;
|
gpu_addr = solution->gpu_addr;
|
||||||
cpu_addr = solution->cpu_addr;
|
cpu_addr = solution->cpu_addr;
|
||||||
|
@ -946,10 +949,10 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
||||||
}
|
}
|
||||||
static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
|
static constexpr auto options = RelaxedOptions::Size | RelaxedOptions::Format;
|
||||||
const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
|
const ImageBase new_image_base(new_info, gpu_addr, cpu_addr);
|
||||||
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views)) {
|
if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) {
|
||||||
left_aliased_ids.push_back(overlap_id);
|
left_aliased_ids.push_back(overlap_id);
|
||||||
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
|
} else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options,
|
||||||
broken_views)) {
|
broken_views, native_bgr)) {
|
||||||
right_aliased_ids.push_back(overlap_id);
|
right_aliased_ids.push_back(overlap_id);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
|
@ -1035,13 +1035,13 @@ bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool stri
|
||||||
|
|
||||||
std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr,
|
std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, GPUVAddr gpu_addr,
|
||||||
VAddr cpu_addr, const ImageBase& overlap,
|
VAddr cpu_addr, const ImageBase& overlap,
|
||||||
bool strict_size, bool broken_views) {
|
bool strict_size, bool broken_views, bool native_bgr) {
|
||||||
ASSERT(new_info.type != ImageType::Linear);
|
ASSERT(new_info.type != ImageType::Linear);
|
||||||
ASSERT(overlap.info.type != ImageType::Linear);
|
ASSERT(overlap.info.type != ImageType::Linear);
|
||||||
if (!IsLayerStrideCompatible(new_info, overlap.info)) {
|
if (!IsLayerStrideCompatible(new_info, overlap.info)) {
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views)) {
|
if (!IsViewCompatible(overlap.info.format, new_info.format, broken_views, native_bgr)) {
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
if (gpu_addr == overlap.gpu_addr) {
|
if (gpu_addr == overlap.gpu_addr) {
|
||||||
|
@ -1085,14 +1085,14 @@ bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs) {
|
||||||
|
|
||||||
std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image,
|
std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const ImageBase& image,
|
||||||
GPUVAddr candidate_addr, RelaxedOptions options,
|
GPUVAddr candidate_addr, RelaxedOptions options,
|
||||||
bool broken_views) {
|
bool broken_views, bool native_bgr) {
|
||||||
const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr);
|
const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr);
|
||||||
if (!base) {
|
if (!base) {
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
const ImageInfo& existing = image.info;
|
const ImageInfo& existing = image.info;
|
||||||
if (False(options & RelaxedOptions::Format)) {
|
if (False(options & RelaxedOptions::Format)) {
|
||||||
if (!IsViewCompatible(existing.format, candidate.format, broken_views)) {
|
if (!IsViewCompatible(existing.format, candidate.format, broken_views, native_bgr)) {
|
||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1129,8 +1129,9 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr,
|
bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr,
|
||||||
RelaxedOptions options, bool broken_views) {
|
RelaxedOptions options, bool broken_views, bool native_bgr) {
|
||||||
return FindSubresource(candidate, image, candidate_addr, options, broken_views).has_value();
|
return FindSubresource(candidate, image, candidate_addr, options, broken_views, native_bgr)
|
||||||
|
.has_value();
|
||||||
}
|
}
|
||||||
|
|
||||||
void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
|
void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
|
||||||
|
|
|
@ -87,7 +87,8 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima
|
||||||
[[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info,
|
[[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info,
|
||||||
GPUVAddr gpu_addr, VAddr cpu_addr,
|
GPUVAddr gpu_addr, VAddr cpu_addr,
|
||||||
const ImageBase& overlap,
|
const ImageBase& overlap,
|
||||||
bool strict_size, bool broken_views);
|
bool strict_size, bool broken_views,
|
||||||
|
bool native_bgr);
|
||||||
|
|
||||||
[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs);
|
[[nodiscard]] bool IsLayerStrideCompatible(const ImageInfo& lhs, const ImageInfo& rhs);
|
||||||
|
|
||||||
|
@ -95,11 +96,11 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima
|
||||||
const ImageBase& image,
|
const ImageBase& image,
|
||||||
GPUVAddr candidate_addr,
|
GPUVAddr candidate_addr,
|
||||||
RelaxedOptions options,
|
RelaxedOptions options,
|
||||||
bool broken_views);
|
bool broken_views, bool native_bgr);
|
||||||
|
|
||||||
[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image,
|
[[nodiscard]] bool IsSubresource(const ImageInfo& candidate, const ImageBase& image,
|
||||||
GPUVAddr candidate_addr, RelaxedOptions options,
|
GPUVAddr candidate_addr, RelaxedOptions options, bool broken_views,
|
||||||
bool broken_views);
|
bool native_bgr);
|
||||||
|
|
||||||
void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
|
void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst,
|
||||||
const ImageBase* src);
|
const ImageBase* src);
|
||||||
|
|
Loading…
Reference in New Issue