gl: Implement CopyBufferToImage in software

- Overrides the drivers CopyBufferToImage handling where possible
This commit is contained in:
kd-11 2022-06-19 22:30:18 +03:00 committed by kd-11
parent 954c60947d
commit f948ce399e
11 changed files with 342 additions and 239 deletions

View File

@ -82,7 +82,7 @@ namespace gl
glBindVertexArray(old_vao);
}
void overlay_pass::run(gl::command_context& cmd, const areau& region, GLuint target_texture, bool depth_target, bool use_blending)
void overlay_pass::run(gl::command_context& cmd, const areau& region, GLuint target_texture, GLuint image_aspect_bits, bool use_blending)
{
if (!compiled)
{
@ -97,15 +97,22 @@ namespace gl
{
save_fbo = std::make_unique<fbo::save_binding_state>(fbo);
if (depth_target)
{
fbo.draw_buffer(fbo.no_color);
fbo.depth_stencil = target_texture;
}
else
switch (image_aspect_bits)
{
case gl::image_aspect::color:
fbo.color[0] = target_texture;
fbo.draw_buffer(fbo.color[0]);
break;
case gl::image_aspect::depth:
fbo.draw_buffer(fbo.no_color);
fbo.depth = target_texture;
break;
case gl::image_aspect::depth | gl::image_aspect::stencil:
fbo.draw_buffer(fbo.no_color);
fbo.depth_stencil = target_texture;
break;
default:
fmt::throw_exception("Unsupported image aspect combination 0x%x", image_aspect_bits);
}
}
@ -117,7 +124,7 @@ namespace gl
// Set initial state
glViewport(region.x1, region.y1, region.width(), region.height());
cmd->color_maski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
cmd->depth_mask(depth_target ? GL_TRUE : GL_FALSE);
cmd->depth_mask(image_aspect_bits == gl::image_aspect::color ? GL_FALSE : GL_TRUE);
// Disabling depth test will also disable depth writes which is not desired
cmd->depth_func(GL_ALWAYS);
@ -149,6 +156,7 @@ namespace gl
if (target_texture)
{
fbo.color[0] = GL_NONE;
fbo.depth = GL_NONE;
fbo.depth_stencil = GL_NONE;
}
}
@ -511,7 +519,7 @@ namespace gl
program_handle.uniforms["blur_strength"] = static_cast<s32>(cmd.config.blur_strength);
program_handle.uniforms["clip_region"] = static_cast<s32>(cmd.config.clip_region);
program_handle.uniforms["clip_bounds"] = cmd.config.clip_rect;
overlay_pass::run(cmd_, viewport, target, false, true);
overlay_pass::run(cmd_, viewport, target, gl::image_aspect::color, true);
}
ui.update();
@ -582,41 +590,45 @@ namespace gl
saved_sampler_state saved2(30, m_sampler);
cmd->bind_texture(30, GL_TEXTURE_2D, source[1]);
overlay_pass::run(cmd, viewport, GL_NONE, false, false);
overlay_pass::run(cmd, viewport, GL_NONE, gl::image_aspect::color, false);
}
rp_ssbo_to_d24x8_texture::rp_ssbo_to_d24x8_texture()
rp_ssbo_to_texture::rp_ssbo_to_texture()
{
vs_src =
#include "../Program/GLSLSnippets/GenericVSPassthrough.glsl"
;
fs_src =
#include "../Program/GLSLSnippets/CopyBufferToD24x8.glsl"
#include "../Program/GLSLSnippets/CopyBufferToImage.glsl"
;
std::pair<std::string_view, std::string> repl_list[] =
{
{ "%set, ", "" },
{ "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0)) },
{ "%push_block", fmt::format("binding=%d, std140", GL_COMPUTE_BUFFER_SLOT(1)) }
{ "%push_block", fmt::format("binding=%d, std140", GL_COMPUTE_BUFFER_SLOT(1)) },
{ "%stencil_export_supported", gl::get_driver_caps().ARB_shader_stencil_export_supported ? "1" : "0" }
};
fs_src = fmt::replace_all(fs_src, repl_list);
}
void rp_ssbo_to_d24x8_texture::run(gl::command_context& cmd,
void rp_ssbo_to_texture::run(gl::command_context& cmd,
const buffer* src, const texture* dst,
const u32 src_offset, const coordu& dst_region,
const pixel_unpack_settings& settings)
const pixel_buffer_layout& layout)
{
const u32 row_length = settings.get_row_length() ? settings.get_row_length() : static_cast<u32>(dst_region.width);
const u32 row_length = static_cast<u32>(dst_region.width);
const u32 bpp = dst->pitch() / dst->width();
program_handle.uniforms["src_pitch"] = row_length;
program_handle.uniforms["swap_bytes"] = settings.get_swap_bytes();
src->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), src_offset, row_length * 4 * dst_region.height);
program_handle.uniforms["swap_bytes"] = layout.swap_bytes;
program_handle.uniforms["format"] = static_cast<GLenum>(dst->get_internal_format());
src->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), src_offset, row_length * bpp * dst_region.height);
cmd->stencil_mask(0xFF);
overlay_pass::run(cmd, dst_region, dst->id(), true);
overlay_pass::run(cmd, dst_region, dst->id(), dst->aspect());
}
}

View File

@ -74,7 +74,7 @@ namespace gl
virtual void emit_geometry();
void run(gl::command_context& cmd, const areau& region, GLuint target_texture, bool depth_target, bool use_blending = false);
void run(gl::command_context& cmd, const areau& region, GLuint target_texture, GLuint image_aspect_bits, bool use_blending = false);
};
struct ui_overlay_renderer : public overlay_pass
@ -114,20 +114,10 @@ namespace gl
void run(gl::command_context& cmd, const areau& viewport, const rsx::simple_array<GLuint>& source, f32 gamma, bool limited_rgb, bool _3d);
};
struct rp_ssbo_to_d24x8_texture : public overlay_pass
struct rp_ssbo_to_texture : public overlay_pass
{
rp_ssbo_to_d24x8_texture();
void run(gl::command_context& cmd, const buffer* src, const texture* dst, const u32 src_offset, const coordu& dst_region, const pixel_unpack_settings& settings);
};
struct rp_copy_rgba_to_bgra : public overlay_pass
{
void run(gl::command_context& cmd, const texture* src, const texture* dst, const coordu& src_region, const coordu& dst_region);
};
struct rp_copy_bgra_to_rgba : public overlay_pass
{
void run(gl::command_context& cmd, const texture* src, const texture* dst, const coordu& src_region, const coordu& dst_region);
rp_ssbo_to_texture();
void run(gl::command_context& cmd, const buffer* src, const texture* dst, const u32 src_offset, const coordu& dst_region, const pixel_buffer_layout& layout);
};
// TODO: Replace with a proper manager

View File

@ -19,11 +19,11 @@ color_format rsx::internals::surface_color_format_to_gl(rsx::surface_color_forma
//All XBGR formats will have remapping before they can be read back in shaders as DRGB8
//Prefix o = 1, z = 0
case rsx::surface_color_format::x1r5g5b5_o1r5g5b5:
return{ ::gl::texture::type::ushort_5_5_5_1, ::gl::texture::format::rgb, ::gl::texture::internal_format::rgb5a1, true,
return{ ::gl::texture::type::ushort_5_5_5_1, ::gl::texture::format::rgb, ::gl::texture::internal_format::bgr5a1, true,
{ ::gl::texture::channel::one, ::gl::texture::channel::r, ::gl::texture::channel::g, ::gl::texture::channel::b } };
case rsx::surface_color_format::x1r5g5b5_z1r5g5b5:
return{ ::gl::texture::type::ushort_5_5_5_1, ::gl::texture::format::rgb, ::gl::texture::internal_format::rgb5a1, true,
return{ ::gl::texture::type::ushort_5_5_5_1, ::gl::texture::format::rgb, ::gl::texture::internal_format::bgr5a1, true,
{ ::gl::texture::channel::zero, ::gl::texture::channel::r, ::gl::texture::channel::g, ::gl::texture::channel::b } };
case rsx::surface_color_format::x8r8g8b8_z8r8g8b8:

View File

@ -74,7 +74,7 @@ namespace gl
switch (texture_format)
{
case CELL_GCM_TEXTURE_B8: return GL_R8;
case CELL_GCM_TEXTURE_A1R5G5B5: return GL_RGB5_A1;
case CELL_GCM_TEXTURE_A1R5G5B5: return GL_BGR5_A1;
case CELL_GCM_TEXTURE_A4R4G4B4: return GL_RGBA4;
case CELL_GCM_TEXTURE_R5G6B5: return GL_RGB565;
case CELL_GCM_TEXTURE_A8R8G8B8: return GL_BGRA8;
@ -90,16 +90,16 @@ namespace gl
case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: return GL_RGBA16F;
case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return GL_RGBA32F;
case CELL_GCM_TEXTURE_X32_FLOAT: return GL_R32F;
case CELL_GCM_TEXTURE_D1R5G5B5: return GL_RGB5_A1;
case CELL_GCM_TEXTURE_D1R5G5B5: return GL_BGR5_A1;
case CELL_GCM_TEXTURE_D8R8G8B8: return GL_BGRA8;
case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return GL_RG16F;
case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;
case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;
case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;
case CELL_GCM_TEXTURE_COMPRESSED_HILO8: return GL_RG8;
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return GL_RG8;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return GL_RGBA8;
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return GL_RGBA8;
case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return GL_RG8_SNORM;
case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return GL_BGRA8;
case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return GL_BGRA8;
}
fmt::throw_exception("Unknown texture format 0x%x", texture_format);
}
@ -163,6 +163,8 @@ namespace gl
return { GL_RGB, GL_UNSIGNED_SHORT_5_6_5, 2, true };
case texture::internal_format::rgb5a1:
return { GL_RGB, GL_UNSIGNED_SHORT_5_5_5_1, 2, true };
case texture::internal_format::bgr5a1:
return { GL_RGB, GL_UNSIGNED_SHORT_1_5_5_5_REV, 2, true };
case texture::internal_format::rgba4:
return { GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4, 2, false };
case texture::internal_format::rgba8:
@ -396,6 +398,7 @@ namespace gl
case CELL_GCM_TEXTURE_R5G5B5A1:
case CELL_GCM_TEXTURE_R6G5B5:
case CELL_GCM_TEXTURE_R5G6B5:
case CELL_GCM_TEXTURE_A4R4G4B4:
case CELL_GCM_TEXTURE_A8R8G8B8:
case CELL_GCM_TEXTURE_COMPRESSED_DXT1:
case CELL_GCM_TEXTURE_COMPRESSED_DXT23:
@ -410,9 +413,6 @@ namespace gl
case CELL_GCM_TEXTURE_DEPTH16_FLOAT:
return{ GL_RED, GL_RED, GL_RED, GL_RED };
case CELL_GCM_TEXTURE_A4R4G4B4:
return{ GL_BLUE, GL_GREEN, GL_RED, GL_ALPHA };
case CELL_GCM_TEXTURE_B8:
return{ GL_ONE, GL_RED, GL_RED, GL_RED };
@ -580,63 +580,39 @@ namespace gl
transfer_buf = &scratch_mem;
};
if (dst->aspect() == image_aspect::color ||
unpack_info.type == GL_UNSIGNED_SHORT ||
unpack_info.type == GL_UNSIGNED_INT_24_8)
{
if (auto job = get_trivial_transform_job(unpack_info))
{
job->run(cmd, src, static_cast<u32>(mem_info->image_size_in_bytes), in_offset);
}
else
{
skip_barrier = true;
}
}
else if (unpack_info.type == GL_FLOAT)
{
mem_info->memory_required = (mem_info->image_size_in_texels * 4);
initialize_scratch_mem();
if (unpack_info.swap_bytes)
{
get_compute_task<cs_fconvert_task<f16, f32, true, false>>()->run(cmd, transfer_buf, in_offset, static_cast<u32>(mem_info->image_size_in_bytes), out_offset);
}
else
{
get_compute_task<cs_fconvert_task<f16, f32, false, false>>()->run(cmd, transfer_buf, in_offset, static_cast<u32>(mem_info->image_size_in_bytes), out_offset);
}
}
else if (unpack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
{
mem_info->memory_required = (mem_info->image_size_in_texels * 8);
initialize_scratch_mem();
get_compute_task<cs_shuffle_x8d24f_to_d32fx8>()->run(cmd, transfer_buf, in_offset, out_offset, static_cast<u32>(mem_info->image_size_in_texels));
}
else
{
fmt::throw_exception("Invalid depth/stencil type 0x%x", unpack_info.type);
}
const auto caps = gl::get_driver_caps();
if (dst->get_internal_format() == gl::texture::internal_format::depth24_stencil8 &&
dst->get_target() == gl::texture::target::texture2D && // Only 2D output supported for the moment.
!caps.vendor_NVIDIA && // NVIDIA has native support for D24X8 data as they introduced this extension.
caps.ARB_shader_stencil_export_supported) // The driver needs to support stencil export at the very least
if (!(dst->aspect() & image_aspect::stencil) || caps.ARB_shader_stencil_export_supported)
{
// This optimized path handles the data load on the GPU without context switching to compute.
// The upside is that it is very fast if you have headroom.
// The downside is that it is linear. Not that it matters that much as most drivers seem to be downloading the entire data source and doing really slow things with it.
if (!skip_barrier)
{
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
}
auto pass = gl::get_overlay_pass<gl::rp_ssbo_to_d24x8_texture>();
pass->run(cmd, transfer_buf, dst, out_offset, {{dst_region.x, dst_region.y}, {dst_region.width, dst_region.height}}, {});
// We do not need to use the driver's builtin transport mechanism
glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
gl::get_overlay_pass<gl::rp_ssbo_to_texture>()->run(cmd, transfer_buf, dst, out_offset, { {dst_region.x, dst_region.y}, {dst_region.width, dst_region.height} }, unpack_info);
}
else
{
// Stencil format on NV. Use driver upload path
if (unpack_info.type == GL_UNSIGNED_INT_24_8)
{
if (auto job = get_trivial_transform_job(unpack_info))
{
job->run(cmd, src, static_cast<u32>(mem_info->image_size_in_bytes), in_offset);
}
else
{
skip_barrier = true;
}
}
else if (unpack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
{
mem_info->memory_required = (mem_info->image_size_in_texels * 8);
initialize_scratch_mem();
get_compute_task<cs_shuffle_x8d24f_to_d32fx8>()->run(cmd, transfer_buf, in_offset, out_offset, static_cast<u32>(mem_info->image_size_in_texels));
}
else
{
fmt::throw_exception("Invalid depth/stencil type 0x%x", unpack_info.type);
}
if (!skip_barrier)
{
glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT);
@ -648,8 +624,6 @@ namespace gl
dst->copy_from(reinterpret_cast<void*>(u64(out_offset)), static_cast<texture::format>(unpack_info.format),
static_cast<texture::type>(unpack_info.type), dst_level, dst_region, {});
}
if (scratch_mem) scratch_mem.remove();
}
gl::viewable_image* create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps,
@ -739,8 +713,6 @@ namespace gl
}
else
{
bool apply_settings = true;
bool use_compute_transform = is_swizzled;
std::pair<void*, u32> upload_scratch_mem = {}, compute_scratch_mem = {};
image_memory_requirements mem_info;
pixel_buffer_layout mem_layout;
@ -750,28 +722,9 @@ namespace gl
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
u64 image_linear_size = staging_buffer.size();
switch (gl_type)
{
case GL_BYTE:
case GL_UNSIGNED_BYTE:
// Multi-channel format uploaded one byte at a time. This is due to poor driver support for formats like GL_UNSIGNED SHORT_8_8
// Do byteswapping in software for now until compute acceleration is available
apply_settings = (gl_format == GL_RED);
caps.supports_byteswap = apply_settings;
break;
case GL_FLOAT:
case GL_UNSIGNED_INT_24_8:
case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
mem_layout.swap_bytes = true;
mem_layout.size = 4;
use_compute_transform = true;
apply_settings = false;
break;
}
const auto min_required_buffer_size = std::max<u64>(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000);
if (use_compute_transform)
if (driver_caps.ARB_compute_shader_supported)
{
if (g_upload_transfer_buffer.size() < static_cast<GLsizeiptr>(min_required_buffer_size))
{
@ -790,9 +743,15 @@ namespace gl
for (const rsx::subresource_layout& layout : input_layouts)
{
if (use_compute_transform)
if (driver_caps.ARB_compute_shader_supported)
{
const u64 row_pitch = rsx::align2<u64, u64>(layout.width_in_block * block_size_in_bytes, caps.alignment);
u64 row_pitch = rsx::align2<u64, u64>(layout.width_in_block * block_size_in_bytes, caps.alignment);
if (!rsx::is_compressed_host_format(format))
{
// Handle emulated compressed formats with host unpack (R8G8 compressed)
row_pitch = std::max<u64>(row_pitch, dst->pitch());
}
image_linear_size = row_pitch * layout.height_in_block * layout.depth;
compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast<u32>(image_linear_size), 256) };
@ -803,7 +762,7 @@ namespace gl
dst_buffer = { reinterpret_cast<std::byte*>(upload_scratch_mem.first), image_linear_size };
}
caps.supports_hw_deswizzle = (is_swizzled && use_compute_transform && image_linear_size > 4096);
caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 4096);
auto op = upload_texture_subresource(dst_buffer, layout, format, is_swizzled, caps);
// Define upload region
@ -815,7 +774,7 @@ namespace gl
region.height = layout.height_in_texel;
region.depth = layout.depth;
if (use_compute_transform)
if (driver_caps.ARB_compute_shader_supported)
{
// 0. Preconf
mem_layout.swap_bytes = op.require_swap;
@ -895,12 +854,7 @@ namespace gl
}
else
{
if (apply_settings)
{
unpack_settings.swap_bytes(op.require_swap);
apply_settings = false;
}
unpack_settings.swap_bytes(op.require_swap);
dst->copy_from(out_pointer, static_cast<texture::format>(gl_format), static_cast<texture::type>(gl_type), layout.level, region, unpack_settings);
}
}
@ -1017,6 +971,11 @@ namespace gl
// 2. Both formats require no transforms (basic memcpy) or...
// 3. Both formats have the same transform (e.g RG16_UNORM to RG16_SFLOAT, both are down and uploaded with a 2-byte byteswap)
if (format1 == GL_BGRA8 || format2 == GL_BGRA8)
{
return false;
}
if (get_format_texel_width(format1) != get_format_texel_width(format2))
{
return false;

View File

@ -9,17 +9,6 @@ namespace gl
{
blitter* g_hw_blitter = nullptr;
void process_bgra_transfer_source(const gl::texture* src, const gl::texture* dst, const coord3i& region)
{
ensure(src->get_internal_format() == texture::internal_format::bgra8);
ensure(dst->get_internal_format() == texture::internal_format::rgba8);
}
void process_bgra_transfer_dest(const gl::texture* tex, const coord3i& region)
{
ensure(tex->get_internal_format() == texture::internal_format::bgra8);
}
void blitter::copy_image(gl::command_context& cmd, const texture* src, const texture* dst, int src_level, int dst_level, const position3i& src_offset, const position3i& dst_offset, const size3i& size) const
{
ensure(src_level == 0);
@ -27,29 +16,11 @@ namespace gl
// Typeless bypass for BGRA8
std::unique_ptr<gl::texture> temp_image;
const texture* real_src = src;
bool handle_bgra8_dest = false;
if (src->get_internal_format() != dst->get_internal_format())
{
if (false && src->get_internal_format() == texture::internal_format::bgra8)
{
temp_image = std::make_unique<texture>(static_cast<GLenum>(src->get_target()), src->width(), src->height(), src->depth(), src->levels(), GL_RGBA8, rsx::format_class::RSX_FORMAT_CLASS_COLOR);
process_bgra_transfer_source(src, temp_image.get(), { src_offset, size });
real_src = temp_image.get();
}
handle_bgra8_dest = (dst->get_internal_format() == texture::internal_format::bgra8);
}
glCopyImageSubData(real_src->id(), static_cast<GLenum>(real_src->get_target()), src_level,
src_offset.x, src_offset.y, src_offset.z,
dst->id(), static_cast<GLenum>(dst->get_target()), dst_level,
dst_offset.x, dst_offset.y, dst_offset.z, size.width, size.height, size.depth);
if (handle_bgra8_dest)
{
process_bgra_transfer_dest(dst, { dst_offset, size });
}
}
void blitter::scale_image(gl::command_context& cmd, const texture* src, texture* dst, areai src_rect, areai dst_rect,
@ -80,7 +51,7 @@ namespace gl
}
else
{
copy_image(cmd, src, dst, 0, 1, position3i{ src_rect.x1, src_rect.y1, 0u }, position3i{ dst_rect.x1, dst_rect.y1, 0 }, size3i{ src_rect.width(), src_rect.height(), 1 });
copy_image(cmd, src, dst, 0, 0, position3i{ src_rect.x1, src_rect.y1, 0u }, position3i{ dst_rect.x1, dst_rect.y1, 0 }, size3i{ src_rect.width(), src_rect.height(), 1 });
}
return;
@ -123,33 +94,12 @@ namespace gl
}
}
if (src->get_internal_format() == texture::internal_format::bgra8 &&
real_src == src &&
dst->get_internal_format() != src->get_internal_format())
{
// Not typeless, plus src is bgra8. Needs conversion
typeless_src = std::make_unique<texture>(GL_TEXTURE_2D, src->width(), src->height(), 1, 1, GL_RGBA8);
process_bgra_transfer_source(src, typeless_src.get(), {{src_rect.x1, src_rect.y1, 0}, {src_rect.width(), src_rect.height(), 1}});
real_src = typeless_src.get();
}
bool handle_bgra8_dest = false;
if (dst->get_internal_format() == texture::internal_format::bgra8 &&
real_dst == dst &&
dst->get_internal_format() != src->get_internal_format())
{
// Not typeless but dst is bgra8.
// Handle the conversion in post
handle_bgra8_dest = true;
}
ensure(real_src->aspect() == real_dst->aspect());
if (src_rect.width() == dst_rect.width() && src_rect.height() == dst_rect.height() &&
!src_rect.is_flipped() && !dst_rect.is_flipped())
{
copy_image(cmd, real_src, real_dst, 0, 1, position3i{ src_rect.x1, src_rect.y1, 0 }, position3i{ dst_rect.x1, dst_rect.y1, 0 }, size3i{ src_rect.width(), src_rect.height(), 1 });
handle_bgra8_dest = false; // Handled in copy_image
copy_image(cmd, real_src, real_dst, 0, 0, position3i{ src_rect.x1, src_rect.y1, 0 }, position3i{ dst_rect.x1, dst_rect.y1, 0 }, size3i{ src_rect.width(), src_rect.height(), 1 });
}
else
{
@ -209,11 +159,6 @@ namespace gl
// Transfer contents from typeless dst back to original dst
copy_typeless(cmd, dst, typeless_dst.get());
}
else if (handle_bgra8_dest)
{
// Blit transfer to BGRA8 target
process_bgra_transfer_dest(dst, { {dst_rect.x1, dst_rect.y1, 0}, {dst_rect.width(), dst_rect.height(), 1} });
}
}
void blitter::fast_clear_image(gl::command_context& cmd, const texture* dst, const color4f& color)

View File

@ -8,7 +8,15 @@ namespace gl
{
static GLenum sizedfmt_to_ifmt(GLenum sized)
{
return sized == GL_BGRA8 ? GL_RGBA8 : sized;
switch (sized)
{
case GL_BGRA8:
return GL_RGBA8;
case GL_BGR5_A1:
return GL_RGB5_A1;
default:
return sized;
}
}
texture::texture(GLenum target, GLuint width, GLuint height, GLuint depth, GLuint mipmaps, GLenum sized_format, rsx::format_class format_class)

View File

@ -10,7 +10,8 @@ using namespace ::rsx::format_class_;
namespace gl
{
#define GL_BGRA8 0x80E1 // Enumerant of GL_BGRA8_EXT from the GL_EXT_texture_format_BGRA8888
#define GL_BGRA8 0x80E1 // Enumerant of GL_BGRA8_EXT from the GL_EXT_texture_format_BGRA8888
#define GL_BGR5_A1 0x99F0 // Unused enum 0x96xx is the last official GL enumerant
class buffer;
class buffer_view;
@ -120,6 +121,7 @@ namespace gl
bgra8 = GL_BGRA8,
rgb565 = GL_RGB565,
rgb5a1 = GL_RGB5_A1,
bgr5a1 = GL_BGR5_A1,
rgba4 = GL_RGBA4,
r8 = GL_R8,
r16 = GL_R16,
@ -128,7 +130,9 @@ namespace gl
rg16 = GL_RG16,
rg16f = GL_RG16F,
rgba16f = GL_RGBA16F,
rgba32f = GL_RGBA32F
rgba32f = GL_RGBA32F,
rg8_snorm = GL_RG8_SNORM
};
enum class wrap

View File

@ -1,48 +0,0 @@
R"(
#version 430
#extension GL_ARB_shader_stencil_export : enable
layout(%set, binding=%loc, std430) readonly restrict buffer RawDataBlock
{
uint data[];
};
#if USE_UBO
layout(%push_block) uniform UnpackConfiguration
{
uint swap_bytes;
uint src_pitch;
};
#else
uniform uint swap_bytes;
uniform uint src_pitch;
#endif
uint getDataOffset()
{
const ivec2 coords = ivec2(gl_FragCoord.xy);
return coords.y * src_pitch + coords.x;
}
void main()
{
const uint virtual_address = getDataOffset();
uint real_data = data[virtual_address];
const uint stencil_byte = bitfieldExtract(real_data, 0, 8);
uint depth_bytes;
if (swap_bytes > 0)
{
// CCBBAA00 -> 00AABBCC -> AABBCC. Stencil byte does not actually move
depth_bytes = bitfieldExtract(real_data, 24, 8) | (bitfieldExtract(real_data, 16, 8) << 8) | (bitfieldExtract(real_data, 8, 8) << 24);
}
else
{
depth_bytes = bitfieldExtract(real_data, 8, 24);
}
gl_FragDepth = float(depth_bytes) / 0xffffff;
gl_FragStencilRefARB = int(stencil_byte);
}
)"

View File

@ -0,0 +1,233 @@
R"(
#version 430
#extension GL_ARB_shader_stencil_export : enable
#define ENABLE_DEPTH_STENCIL_LOAD %stencil_export_supported
#define FMT_GL_DEPTH_COMPONENT16 0x81A5
#define FMT_GL_DEPTH_COMPONENT32F 0x8CAC
#define FMT_GL_DEPTH24_STENCIL8 0x88F0
#define FMT_GL_DEPTH32F_STENCIL8 0x8CAD
#define FMT_GL_RGBA8 0x8058
#define FMT_GL_BGRA8 0x80E1
#define FMT_GL_RGB565 0x8D62
#define FMT_GL_RGB5_A1 0x8057
#define FMT_GL_BGR5_A1 0x99F0
#define FMT_GL_RGBA4 0x8056
#define FMT_GL_R8 0x8229
#define FMT_GL_R16 0x822A
#define FMT_GL_R32F 0x822E
#define FMT_GL_RG8 0x822B
#define FMT_GL_RG8_SNORM 0x8F95
#define FMT_GL_RG16 0x822C
#define FMT_GL_RG16F 0x822F
#define FMT_GL_RGBA16F 0x881A
#define FMT_GL_RGBA32F 0x8814
#define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8
#define bswap_u32(bits) (bits & 0xFF) << 24 | (bits & 0xFF00) << 8 | (bits & 0xFF0000) >> 8 | (bits & 0xFF000000) >> 24
layout(location=0) out vec4 fragColor;
layout(%set, binding=%loc, std430) readonly restrict buffer RawDataBlock
{
uint data[];
};
#if USE_UBO
layout(%push_block) uniform UnpackConfiguration
{
uint swap_bytes;
uint src_pitch;
uint format;
};
#else
uniform uint swap_bytes;
uniform uint src_pitch;
uniform uint format;
#endif
uint getTexelOffset()
{
const ivec2 coords = ivec2(gl_FragCoord.xy);
return coords.y * src_pitch + coords.x;
}
// Decoders. Beware of multi-wide swapped types (e.g swap(16x2) != swap(32x1))
uint readUint8(const in uint address)
{
const uint block = address / 4;
const uint offset = address % 4;
return bitfieldExtract(data[block], int(offset) * 8, 8);
}
uint readUint16(const in uint address)
{
const uint block = address / 2;
const uint offset = address % 2;
const uint value = bitfieldExtract(data[block], int(offset) * 16, 16);
if (swap_bytes != 0)
{
return bswap_u16(value);
}
return value;
}
uint readUint32(const in uint address)
{
const uint value = data[address];
return (swap_bytes != 0) ? bswap_u32(value) : value;
}
uvec2 readUint24_8(const in uint address)
{
const uint raw_value = data[address];
const uint stencil = bitfieldExtract(raw_value, 0, 8);
if (swap_bytes != 0)
{
const uint depth = min(bswap_u32(raw_value), 0xffffff);
return uvec2(depth, stencil);
}
return uvec2(
bitfieldExtract(raw_value, 8, 24),
stencil
);
}
uvec2 readUint8x2(const in uint address)
{
const uint raw = readUint16(address);
return uvec2(bitfieldExtract(raw, 0, 8), bitfieldExtract(raw, 8, 8));
}
ivec2 readInt8x2(const in uint address)
{
const ivec2 raw = ivec2(readUint8x2(address));
return raw - (ivec2(greaterThan(raw, ivec2(127))) * 256);
}
#define readFixed8(address) readUint8(address) / 255.f
#define readFixed8x2(address) readUint8x2(address) / 255.f
#define readFixed8x2Snorm(address) readInt8x2(address) / 127.f
vec4 readFixed8x4(const in uint address)
{
const uint raw = readUint32(address);
return uvec4(
bitfieldExtract(raw, 0, 8),
bitfieldExtract(raw, 8, 8),
bitfieldExtract(raw, 16, 8),
bitfieldExtract(raw, 24, 8)
) / 255.f;
}
#define readFixed16(address) readUint16(uint(address)) / 65535.f
#define readFixed16x2(address) vec2(readFixed16(address * 2 + 0), readFixed16(address * 2 + 1))
#define readFixed16x4(address) vec4(readFixed16(address * 4 + 0), readFixed16(address * 4 + 1), readFixed16(address * 4 + 2), readFixed16(address * 4 + 3))
#define readFloat16(address) unpackHalf2x16(readUint16(uint(address))).x
#define readFloat16x2(address) vec2(readFloat16(address * 2 + 0), readFloat16(address * 2 + 1))
#define readFloat16x4(address) vec4(readFloat16(address * 4 + 0), readFloat16(address * 4 + 1), readFloat16(address * 4 + 2), readFloat16(address * 4 + 3))
#define readFloat32(address) uintBitsToFloat(readUint32(address))
#define readFloat32x4(address) uintBitsToFloat(uvec4(readUint32(address * 4 + 0), readUint32(address * 4 + 1), readUint32(address * 4 + 2), readUint32(address * 4 + 3)))
void main()
{
const uint texel_address = getTexelOffset();
uint utmp;
uvec2 utmp2;
switch (format)
{
// Depth formats
case FMT_GL_DEPTH_COMPONENT16:
gl_FragDepth = readFixed16(texel_address);
break;
case FMT_GL_DEPTH_COMPONENT32F:
gl_FragDepth = readFloat16(texel_address);
break;
#if ENABLE_DEPTH_STENCIL_LOAD
// Depth-stencil formats. Unsupported on NVIDIA due to missing extensions.
case FMT_GL_DEPTH24_STENCIL8:
case FMT_GL_DEPTH32F_STENCIL8:
utmp2 = readUint24_8(texel_address);
gl_FragDepth = float(utmp2.x) / 0xffffff;
gl_FragStencilRefARB = int(utmp2.y);
break;
#endif
// Simple color
case FMT_GL_RGBA8:
fragColor = readFixed8x4(texel_address);
break;
case FMT_GL_BGRA8:
fragColor = readFixed8x4(texel_address).bgra;
break;
case FMT_GL_R8:
fragColor.r = readFixed8(texel_address);
break;
case FMT_GL_R16:
fragColor.r = readFixed16(texel_address);
break;
case FMT_GL_R32F:
fragColor.r = readFloat32(texel_address);
break;
case FMT_GL_RG8:
fragColor.rg = readFixed8x2(texel_address);
break;
case FMT_GL_RG8_SNORM:
fragColor.rg = readFixed8x2Snorm(texel_address);
break;
case FMT_GL_RG16:
fragColor.rg = readFixed16x2(texel_address);
break;
case FMT_GL_RG16F:
fragColor.rg = readFloat16x2(texel_address);
break;
case FMT_GL_RGBA16F:
fragColor = readFloat16x4(texel_address);
break;
case FMT_GL_RGBA32F:
fragColor = readFloat32x4(texel_address);
break;
// Packed color
case FMT_GL_RGB565:
utmp = readUint16(texel_address);
fragColor.b = bitfieldExtract(utmp, 0, 5) / 31.f;
fragColor.g = bitfieldExtract(utmp, 5, 6) / 63.f;
fragColor.r = bitfieldExtract(utmp, 11, 5) / 31.f;
break;
case FMT_GL_BGR5_A1:
utmp = readUint16(texel_address);
fragColor.b = bitfieldExtract(utmp, 0, 5) / 31.f;
fragColor.g = bitfieldExtract(utmp, 5, 5) / 31.f;
fragColor.r = bitfieldExtract(utmp, 10, 5) / 31.f;
fragColor.a = bitfieldExtract(utmp, 15, 1) * 1.f;
break;
case FMT_GL_RGB5_A1:
utmp = readUint16(texel_address);
fragColor.a = bitfieldExtract(utmp, 0, 1) * 1.f;
fragColor.b = bitfieldExtract(utmp, 1, 5) / 31.f;
fragColor.g = bitfieldExtract(utmp, 6, 5) / 31.f;
fragColor.r = bitfieldExtract(utmp, 11, 5) / 31.f;
break;
case FMT_GL_RGBA4:
utmp = readUint16(texel_address);
fragColor.b = bitfieldExtract(utmp, 0, 4) / 15.f;
fragColor.g = bitfieldExtract(utmp, 4, 4) / 15.f;
fragColor.r = bitfieldExtract(utmp, 8, 4) / 15.f;
fragColor.a = bitfieldExtract(utmp, 12, 4) / 15.f;
break;
}
}
)"

View File

@ -831,7 +831,7 @@
<ItemGroup>
<None Include="Emu\RSX\Program\GLSLInterpreter\FragmentInterpreter.glsl" />
<None Include="Emu\RSX\Program\GLSLInterpreter\VertexInterpreter.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\CopyBufferToD24x8.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\CopyBufferToImage.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\CopyD24x8ToBuffer.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\CopyRGBA8ToBuffer.glsl" />
<None Include="Emu\RSX\Program\GLSLSnippets\GenericVSPassthrough.glsl" />

View File

@ -2178,7 +2178,7 @@
<None Include="Emu\RSX\Program\GLSLSnippets\ShuffleBytes.glsl">
<Filter>Emu\GPU\RSX\Program\Snippets</Filter>
</None>
<None Include="Emu\RSX\Program\GLSLSnippets\CopyBufferToD24x8.glsl">
<None Include="Emu\RSX\Program\GLSLSnippets\CopyBufferToImage.glsl">
<Filter>Emu\GPU\RSX\Program\Snippets</Filter>
</None>
<None Include="Emu\RSX\Program\GLSLSnippets\GenericVSPassthrough.glsl">