mirror of https://github.com/RPCS3/rpcs3.git
gl: Accelerate D24X8_UINT operations
- Adds compute decoding for D24X8_UINT on both download and upload routines - Adds support for D24X8_UINT operations for typeless copy
This commit is contained in:
parent
abc715bc5c
commit
220e86bbd1
|
@ -745,7 +745,6 @@ namespace gl
|
|||
m_target = static_cast<GLenum>(target_);
|
||||
}
|
||||
|
||||
|
||||
~save_binding_state()
|
||||
{
|
||||
glBindBuffer(m_target, m_last_binding);
|
||||
|
@ -942,6 +941,18 @@ namespace gl
|
|||
{
|
||||
glBindBufferRange(static_cast<GLenum>(target_), index, id(), offset, size);
|
||||
}
|
||||
|
||||
void copy_to(buffer* other, u64 src_offset, u64 dst_offset, u64 size)
|
||||
{
|
||||
if (get_driver_caps().ARB_dsa_supported)
|
||||
{
|
||||
glCopyNamedBufferSubData(this->id(), other->id(), src_offset, dst_offset, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
glNamedCopyBufferSubDataEXT(this->id(), other->id(), src_offset, dst_offset, size);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class ring_buffer : public buffer
|
||||
|
@ -1638,7 +1649,12 @@ namespace gl
|
|||
m_aspect_flags = image_aspect::depth;
|
||||
break;
|
||||
}
|
||||
case GL_DEPTH_COMPONENT32: // Unimplemented decode
|
||||
case GL_DEPTH_COMPONENT32F:
|
||||
{
|
||||
m_pitch = width * 4;
|
||||
m_aspect_flags = image_aspect::depth;
|
||||
break;
|
||||
}
|
||||
case GL_DEPTH24_STENCIL8:
|
||||
case GL_DEPTH32F_STENCIL8:
|
||||
{
|
||||
|
@ -1678,17 +1694,17 @@ namespace gl
|
|||
{
|
||||
fmt::throw_exception("Unhandled GL format 0x%X" HERE, sized_format);
|
||||
}
|
||||
}
|
||||
|
||||
if (format_class == RSX_FORMAT_CLASS_UNDEFINED)
|
||||
{
|
||||
if (m_aspect_flags != image_aspect::color)
|
||||
if (format_class == RSX_FORMAT_CLASS_UNDEFINED)
|
||||
{
|
||||
rsx_log.error("Undefined format class for depth texture is not allowed");
|
||||
}
|
||||
else
|
||||
{
|
||||
format_class = RSX_FORMAT_CLASS_COLOR;
|
||||
if (m_aspect_flags != image_aspect::color)
|
||||
{
|
||||
rsx_log.error("Undefined format class for depth texture is not allowed");
|
||||
}
|
||||
else
|
||||
{
|
||||
format_class = RSX_FORMAT_CLASS_COLOR;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -189,6 +189,9 @@ OPENGL_PROC(PFNGLTEXTURESUBIMAGE3DPROC, TextureSubImage3D);
|
|||
|
||||
OPENGL_PROC(PFNGLCLEARBUFFERFVPROC, ClearBufferfv);
|
||||
|
||||
OPENGL_PROC(PFNGLCOPYNAMEDBUFFERSUBDATAPROC, CopyNamedBufferSubData);
|
||||
OPENGL_PROC(PFNGLNAMEDCOPYBUFFERSUBDATAEXTPROC, NamedCopyBufferSubDataEXT);
|
||||
|
||||
// Sampler Objects
|
||||
OPENGL_PROC(PFNGLGENSAMPLERSPROC, GenSamplers);
|
||||
OPENGL_PROC(PFNGLDELETESAMPLERSPROC, DeleteSamplers);
|
||||
|
|
|
@ -54,8 +54,8 @@ namespace gl
|
|||
void initialize_memory(gl::command_context& cmd, bool read_access);
|
||||
|
||||
public:
|
||||
render_target(GLuint width, GLuint height, GLenum sized_format)
|
||||
: viewable_image(GL_TEXTURE_2D, width, height, 1, 1, sized_format)
|
||||
render_target(GLuint width, GLuint height, GLenum sized_format, rsx::format_class format_class)
|
||||
: viewable_image(GL_TEXTURE_2D, width, height, 1, 1, sized_format, format_class)
|
||||
{}
|
||||
|
||||
// Internal pitch is the actual row length in bytes of the openGL texture
|
||||
|
@ -146,7 +146,8 @@ struct gl_render_target_traits
|
|||
auto format = rsx::internals::surface_color_format_to_gl(surface_color_format);
|
||||
|
||||
std::unique_ptr<gl::render_target> result(new gl::render_target(rsx::apply_resolution_scale(static_cast<u16>(width), true),
|
||||
rsx::apply_resolution_scale(static_cast<u16>(height), true), static_cast<GLenum>(format.internal_format)));
|
||||
rsx::apply_resolution_scale(static_cast<u16>(height), true), static_cast<GLenum>(format.internal_format),
|
||||
RSX_FORMAT_CLASS_COLOR));
|
||||
|
||||
result->set_aa_mode(antialias);
|
||||
result->set_native_pitch(static_cast<u16>(width) * get_format_block_size_in_bytes(surface_color_format) * result->samples_x);
|
||||
|
@ -173,7 +174,8 @@ struct gl_render_target_traits
|
|||
{
|
||||
auto format = rsx::internals::surface_depth_format_to_gl(surface_depth_format);
|
||||
std::unique_ptr<gl::render_target> result(new gl::render_target(rsx::apply_resolution_scale(static_cast<u16>(width), true),
|
||||
rsx::apply_resolution_scale(static_cast<u16>(height), true), static_cast<GLenum>(format.internal_format)));
|
||||
rsx::apply_resolution_scale(static_cast<u16>(height), true), static_cast<GLenum>(format.internal_format),
|
||||
rsx::classify_format(surface_depth_format)));
|
||||
|
||||
result->set_aa_mode(antialias);
|
||||
result->set_surface_dimensions(static_cast<u16>(width), static_cast<u16>(height), static_cast<u16>(pitch));
|
||||
|
@ -202,7 +204,7 @@ struct gl_render_target_traits
|
|||
const auto new_w = rsx::apply_resolution_scale(prev.width, true, ref->get_surface_width(rsx::surface_metrics::pixels));
|
||||
const auto new_h = rsx::apply_resolution_scale(prev.height, true, ref->get_surface_height(rsx::surface_metrics::pixels));
|
||||
|
||||
sink = std::make_unique<gl::render_target>(new_w, new_h, internal_format);
|
||||
sink = std::make_unique<gl::render_target>(new_w, new_h, internal_format, ref->format_class());
|
||||
sink->add_ref();
|
||||
|
||||
sink->memory_usage_flags = rsx::surface_usage_flags::storage;
|
||||
|
|
|
@ -69,9 +69,9 @@ namespace gl
|
|||
case CELL_GCM_TEXTURE_G8B8: return std::make_tuple(GL_RG, GL_UNSIGNED_BYTE);
|
||||
case CELL_GCM_TEXTURE_R6G5B5: return std::make_tuple(GL_RGB, GL_UNSIGNED_SHORT_5_6_5);
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8: return std::make_tuple(GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8);
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return std::make_tuple(GL_DEPTH_STENCIL, GL_FLOAT); // TODO, requires separate aspect readback
|
||||
case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: return std::make_tuple(GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV);
|
||||
case CELL_GCM_TEXTURE_DEPTH16: return std::make_tuple(GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT);
|
||||
case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return std::make_tuple(GL_DEPTH_COMPONENT, GL_HALF_FLOAT);
|
||||
case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return std::make_tuple(GL_DEPTH_COMPONENT, GL_FLOAT);
|
||||
case CELL_GCM_TEXTURE_X16: return std::make_tuple(GL_RED, GL_UNSIGNED_SHORT);
|
||||
case CELL_GCM_TEXTURE_Y16_X16: return std::make_tuple(GL_RG, GL_UNSIGNED_SHORT);
|
||||
case CELL_GCM_TEXTURE_R5G5B5A1: return std::make_tuple(GL_RGBA, GL_UNSIGNED_SHORT_5_5_5_1);
|
||||
|
@ -126,6 +126,8 @@ namespace gl
|
|||
return { GL_RGBA, GL_FLOAT, 4, true };
|
||||
case texture::internal_format::depth16:
|
||||
return { GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, 2, true };
|
||||
case texture::internal_format::depth32f:
|
||||
return { GL_DEPTH_COMPONENT, GL_FLOAT, 2, true };
|
||||
case texture::internal_format::depth24_stencil8:
|
||||
case texture::internal_format::depth32f_stencil8:
|
||||
return { GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, 4, true };
|
||||
|
@ -154,7 +156,13 @@ namespace gl
|
|||
}
|
||||
}
|
||||
|
||||
return get_format_type(ifmt);
|
||||
auto ret = get_format_type(ifmt);
|
||||
if (tex->format_class() == RSX_FORMAT_CLASS_DEPTH24_FLOAT_X8_PACK32)
|
||||
{
|
||||
ret.type = GL_FLOAT_32_UNSIGNED_INT_24_8_REV;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
GLenum get_srgb_format(GLenum in_format)
|
||||
|
@ -459,6 +467,7 @@ namespace gl
|
|||
|
||||
GLenum target;
|
||||
GLenum internal_format = get_sized_internal_format(gcm_format);
|
||||
auto format_class = rsx::classify_format(gcm_format);
|
||||
|
||||
switch (type)
|
||||
{
|
||||
|
@ -476,7 +485,7 @@ namespace gl
|
|||
break;
|
||||
}
|
||||
|
||||
return new gl::viewable_image(target, width, height, depth, mipmaps, internal_format);
|
||||
return new gl::viewable_image(target, width, height, depth, mipmaps, internal_format, format_class);
|
||||
}
|
||||
|
||||
void fill_texture(rsx::texture_dimension_extended dim, u16 mipmap_count, int format, u16 width, u16 height, u16 depth,
|
||||
|
@ -538,6 +547,14 @@ namespace gl
|
|||
else
|
||||
{
|
||||
bool apply_settings = true;
|
||||
buffer upload_scratch_mem, compute_scratch_mem;
|
||||
|
||||
cs_shuffle_base* pixel_transform = nullptr;
|
||||
gsl::span<gsl::byte> dst_buffer = staging_buffer;
|
||||
void* out_pointer = staging_buffer.data();
|
||||
u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format);
|
||||
u64 image_linear_size;
|
||||
|
||||
switch (gl_type)
|
||||
{
|
||||
case GL_UNSIGNED_INT_8_8_8_8:
|
||||
|
@ -552,6 +569,21 @@ namespace gl
|
|||
apply_settings = (gl_format == GL_RED);
|
||||
caps.supports_byteswap = apply_settings;
|
||||
break;
|
||||
case GL_UNSIGNED_INT_24_8:
|
||||
if (gl::get_driver_caps().ARB_compute_shader_supported)
|
||||
{
|
||||
apply_settings = false;
|
||||
pixel_transform = gl::get_compute_task<cs_shuffle_x8d24_to_d24x8<true>>();
|
||||
}
|
||||
break;
|
||||
case GL_FLOAT:
|
||||
// TODO: Expand depth16f to depth32f
|
||||
gl_type = GL_HALF_FLOAT;
|
||||
break;
|
||||
case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
|
||||
// TODO: Expand depth24 to depth32f
|
||||
gl_type = GL_UNSIGNED_INT_24_8;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -561,10 +593,39 @@ namespace gl
|
|||
unpack_settings.apply();
|
||||
}
|
||||
|
||||
if (pixel_transform)
|
||||
{
|
||||
upload_scratch_mem.create(staging_buffer.size(), nullptr, buffer::memory_type::host_visible, GL_STREAM_DRAW);
|
||||
compute_scratch_mem.create(staging_buffer.size(), nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
out_pointer = nullptr;
|
||||
}
|
||||
|
||||
for (const rsx::subresource_layout& layout : input_layouts)
|
||||
{
|
||||
auto op = upload_texture_subresource(staging_buffer, layout, format, is_swizzled, caps);
|
||||
if (apply_settings)
|
||||
if (pixel_transform)
|
||||
{
|
||||
const u64 row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, caps.alignment);
|
||||
image_linear_size = row_pitch * layout.height_in_block * layout.depth;
|
||||
dst_buffer = { reinterpret_cast<gsl::byte*>(upload_scratch_mem.map(buffer::access::write)), image_linear_size };
|
||||
}
|
||||
|
||||
auto op = upload_texture_subresource(dst_buffer, layout, format, is_swizzled, caps);
|
||||
|
||||
if (pixel_transform)
|
||||
{
|
||||
// 1. Unmap buffer
|
||||
upload_scratch_mem.unmap();
|
||||
|
||||
// 2. Execute compute job
|
||||
upload_scratch_mem.copy_to(&compute_scratch_mem, 0, 0, image_linear_size);
|
||||
pixel_transform->run(&compute_scratch_mem, image_linear_size);
|
||||
|
||||
// 3. Bind compute buffer as pixel unpack buffer
|
||||
glMemoryBarrier(GL_PIXEL_UNPACK_BUFFER);
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE);
|
||||
compute_scratch_mem.bind(buffer::target::pixel_unpack);
|
||||
}
|
||||
else if (apply_settings)
|
||||
{
|
||||
unpack_settings.swap_bytes(op.require_swap);
|
||||
unpack_settings.apply();
|
||||
|
@ -574,22 +635,28 @@ namespace gl
|
|||
switch (dim)
|
||||
{
|
||||
case rsx::texture_dimension_extended::texture_dimension_1d:
|
||||
glTexSubImage1D(GL_TEXTURE_1D, layout.level, 0, layout.width_in_texel, gl_format, gl_type, staging_buffer.data());
|
||||
glTexSubImage1D(GL_TEXTURE_1D, layout.level, 0, layout.width_in_texel, gl_format, gl_type, out_pointer);
|
||||
break;
|
||||
case rsx::texture_dimension_extended::texture_dimension_2d:
|
||||
glTexSubImage2D(GL_TEXTURE_2D, layout.level, 0, 0, layout.width_in_texel, layout.height_in_texel, gl_format, gl_type, staging_buffer.data());
|
||||
glTexSubImage2D(GL_TEXTURE_2D, layout.level, 0, 0, layout.width_in_texel, layout.height_in_texel, gl_format, gl_type, out_pointer);
|
||||
break;
|
||||
case rsx::texture_dimension_extended::texture_dimension_cubemap:
|
||||
glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + layout.layer, layout.level, 0, 0, layout.width_in_texel, layout.height_in_texel, gl_format, gl_type, staging_buffer.data());
|
||||
glTexSubImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + layout.layer, layout.level, 0, 0, layout.width_in_texel, layout.height_in_texel, gl_format, gl_type, out_pointer);
|
||||
break;
|
||||
case rsx::texture_dimension_extended::texture_dimension_3d:
|
||||
glTexSubImage3D(GL_TEXTURE_3D, layout.layer, 0, 0, 0, layout.width_in_texel, layout.height_in_texel, depth, gl_format, gl_type, staging_buffer.data());
|
||||
glTexSubImage3D(GL_TEXTURE_3D, layout.layer, 0, 0, 0, layout.width_in_texel, layout.height_in_texel, depth, gl_format, gl_type, out_pointer);
|
||||
break;
|
||||
default:
|
||||
ASSUME(0);
|
||||
fmt::throw_exception("Unreachable" HERE);
|
||||
}
|
||||
}
|
||||
|
||||
if (pixel_transform)
|
||||
{
|
||||
upload_scratch_mem.remove();
|
||||
compute_scratch_mem.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -754,40 +821,97 @@ namespace gl
|
|||
return false;
|
||||
}
|
||||
|
||||
cs_shuffle_base* get_pixel_transform_job(const pixel_buffer_layout& pack_info)
|
||||
cs_shuffle_base* get_trivial_transform_job(const pixel_buffer_layout& pack_info)
|
||||
{
|
||||
const bool is_depth_stencil = (pack_info.type == GL_UNSIGNED_INT_24_8);
|
||||
if (!is_depth_stencil) [[likely]]
|
||||
if (!pack_info.swap_bytes)
|
||||
{
|
||||
if (!pack_info.swap_bytes)
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
switch (pack_info.size)
|
||||
{
|
||||
case 1:
|
||||
return nullptr;
|
||||
case 2:
|
||||
return gl::get_compute_task<gl::cs_shuffle_16>();
|
||||
break;
|
||||
case 4:
|
||||
return gl::get_compute_task<gl::cs_shuffle_32>();
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unsupported format");
|
||||
}
|
||||
}
|
||||
|
||||
cs_shuffle_base* get_image_to_buffer_job(const pixel_buffer_layout& pack_info, u32 aspect_mask)
|
||||
{
|
||||
switch (aspect_mask)
|
||||
{
|
||||
case image_aspect::color:
|
||||
{
|
||||
return get_trivial_transform_job(pack_info);
|
||||
}
|
||||
case image_aspect::depth:
|
||||
{
|
||||
if (pack_info.type == GL_FLOAT)
|
||||
{
|
||||
// TODO: D16F
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
switch (pack_info.size)
|
||||
{
|
||||
case 1:
|
||||
return nullptr;
|
||||
case 2:
|
||||
return gl::get_compute_task<gl::cs_shuffle_16>();
|
||||
break;
|
||||
case 4:
|
||||
return gl::get_compute_task<gl::cs_shuffle_32>();
|
||||
break;
|
||||
default:
|
||||
fmt::throw_exception("Unsupported format");
|
||||
}
|
||||
return get_trivial_transform_job(pack_info);
|
||||
}
|
||||
else
|
||||
case image_aspect::depth | image_aspect::stencil:
|
||||
{
|
||||
if (pack_info.swap_bytes)
|
||||
verify(HERE), pack_info.swap_bytes;
|
||||
if (pack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
|
||||
{
|
||||
return gl::get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<true>>();
|
||||
// TODO: D24FX8
|
||||
return nullptr;
|
||||
}
|
||||
else
|
||||
|
||||
return gl::get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<true>>();
|
||||
}
|
||||
default:
|
||||
{
|
||||
fmt::throw_exception("Invalid aspect mask 0x%x" HERE, aspect_mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cs_shuffle_base* get_buffer_to_image_job(const pixel_buffer_layout& unpack_info, u32 aspect_mask)
|
||||
{
|
||||
switch (aspect_mask)
|
||||
{
|
||||
case image_aspect::color:
|
||||
{
|
||||
return get_trivial_transform_job(unpack_info);
|
||||
}
|
||||
case image_aspect::depth:
|
||||
{
|
||||
if (unpack_info.type == GL_FLOAT)
|
||||
{
|
||||
return gl::get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<false>>();
|
||||
// TODO: D16F
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return get_trivial_transform_job(unpack_info);
|
||||
}
|
||||
case image_aspect::depth | image_aspect::stencil:
|
||||
{
|
||||
verify(HERE), unpack_info.swap_bytes;
|
||||
if (unpack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV)
|
||||
{
|
||||
// TODO: D24FX8
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return gl::get_compute_task<gl::cs_shuffle_x8d24_to_d24x8<true>>();
|
||||
}
|
||||
default:
|
||||
{
|
||||
fmt::throw_exception("Invalid aspect mask 0x%x" HERE, aspect_mask);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -807,6 +931,28 @@ namespace gl
|
|||
auto pack_info = get_format_type(src);
|
||||
auto unpack_info = get_format_type(dst);
|
||||
|
||||
if (!caps.ARB_compute_shader_supported)
|
||||
{
|
||||
auto remove_depth_transformation = [](const texture* tex, pixel_buffer_layout& pack_info)
|
||||
{
|
||||
if (tex->aspect() & image_aspect::depth)
|
||||
{
|
||||
switch (pack_info.type)
|
||||
{
|
||||
case GL_FLOAT_32_UNSIGNED_INT_24_8_REV:
|
||||
pack_info.type = GL_UNSIGNED_INT_24_8;
|
||||
break;
|
||||
case GL_FLOAT:
|
||||
pack_info.type = GL_HALF_FLOAT;
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
remove_depth_transformation(src, pack_info);
|
||||
remove_depth_transformation(dst, unpack_info);
|
||||
}
|
||||
|
||||
// Start pack operation
|
||||
g_typeless_transfer_buffer.bind(buffer::target::pixel_pack);
|
||||
|
||||
|
@ -829,8 +975,8 @@ namespace gl
|
|||
|
||||
if (caps.ARB_compute_shader_supported) [[likely]]
|
||||
{
|
||||
auto src_transform = get_pixel_transform_job(pack_info);
|
||||
auto dst_transform = get_pixel_transform_job(unpack_info);
|
||||
auto src_transform = get_image_to_buffer_job(pack_info, src->aspect());
|
||||
auto dst_transform = get_buffer_to_image_job(unpack_info, dst->aspect());
|
||||
|
||||
if (src->aspect() == gl::image_aspect::color && dst->aspect() == gl::image_aspect::color)
|
||||
{
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "GLRenderTargets.h"
|
||||
#include "GLOverlays.h"
|
||||
#include "GLTexture.h"
|
||||
#include "GLCompute.h"
|
||||
#include "../Common/TextureUtils.h"
|
||||
#include "../Common/texture_cache.h"
|
||||
|
||||
|
@ -151,9 +152,7 @@ namespace gl
|
|||
void dma_transfer(gl::command_context& /*cmd*/, gl::texture* src, const areai& /*src_area*/, const utils::address_range& /*valid_range*/, u32 pitch)
|
||||
{
|
||||
init_buffer(src);
|
||||
|
||||
glGetError();
|
||||
pbo.bind(buffer::target::pixel_pack);
|
||||
|
||||
if (context == rsx::texture_upload_context::dma)
|
||||
{
|
||||
|
@ -161,23 +160,68 @@ namespace gl
|
|||
const auto format_info = gl::get_format_type(src->get_internal_format());
|
||||
format = static_cast<gl::texture::format>(format_info.format);
|
||||
type = static_cast<gl::texture::type>(format_info.type);
|
||||
pack_unpack_swap_bytes = format_info.swap_bytes;
|
||||
}
|
||||
|
||||
if ((src->aspect() & gl::image_aspect::stencil) == 0)
|
||||
bool use_driver_pixel_transform = true;
|
||||
if (get_driver_caps().ARB_compute_shader_supported) [[likely]]
|
||||
{
|
||||
if (src->aspect() & image_aspect::stencil)
|
||||
{
|
||||
pack_unpack_swap_bytes = format_info.swap_bytes;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Z24S8 decode is done on the CPU for now
|
||||
pack_unpack_swap_bytes = false;
|
||||
buffer scratch_mem;
|
||||
scratch_mem.create(buffer::target::pixel_pack, pbo.size(), nullptr, buffer::memory_type::local, GL_STATIC_COPY);
|
||||
scratch_mem.bind();
|
||||
|
||||
pixel_pack_settings pack_settings;
|
||||
pack_settings.alignment(1);
|
||||
src->copy_to(nullptr, format, type, pack_settings);
|
||||
|
||||
// Invoke compute
|
||||
if (auto error = glGetError(); !error) [[likely]]
|
||||
{
|
||||
cs_shuffle_base * job;
|
||||
if (pack_unpack_swap_bytes)
|
||||
{
|
||||
job = get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<true>>();
|
||||
}
|
||||
else
|
||||
{
|
||||
job = get_compute_task<gl::cs_shuffle_d24x8_to_x8d24<false>>();
|
||||
}
|
||||
|
||||
const auto job_length = src->pitch() * src->height();
|
||||
job->run(&scratch_mem, job_length);
|
||||
|
||||
glBindBuffer(GL_SHADER_STORAGE_BUFFER, GL_NONE);
|
||||
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT);
|
||||
scratch_mem.copy_to(&pbo, 0, 0, job_length);
|
||||
}
|
||||
else
|
||||
{
|
||||
rsx_log.error("Memory transfer failed with error 0x%x. Format=0x%x, Type=0x%x", error, static_cast<u32>(format), static_cast<u32>(type));
|
||||
}
|
||||
|
||||
scratch_mem.remove();
|
||||
use_driver_pixel_transform = false;
|
||||
}
|
||||
}
|
||||
|
||||
pixel_pack_settings pack_settings;
|
||||
pack_settings.alignment(1);
|
||||
pack_settings.swap_bytes(pack_unpack_swap_bytes);
|
||||
if (use_driver_pixel_transform)
|
||||
{
|
||||
if (src->aspect() & image_aspect::stencil)
|
||||
{
|
||||
pack_unpack_swap_bytes = false;
|
||||
}
|
||||
|
||||
pbo.bind(buffer::target::pixel_pack);
|
||||
|
||||
pixel_pack_settings pack_settings;
|
||||
pack_settings.alignment(1);
|
||||
pack_settings.swap_bytes(pack_unpack_swap_bytes);
|
||||
|
||||
src->copy_to(nullptr, format, type, pack_settings);
|
||||
}
|
||||
|
||||
src->copy_to(nullptr, format, type, pack_settings);
|
||||
real_pitch = src->pitch();
|
||||
rsx_pitch = pitch;
|
||||
|
||||
|
@ -297,20 +341,15 @@ namespace gl
|
|||
const u32 valid_length = valid_range.second;
|
||||
void *dst = get_ptr(get_section_base() + valid_offset);
|
||||
|
||||
if (pack_unpack_swap_bytes)
|
||||
if (!gl::get_driver_caps().ARB_compute_shader_supported)
|
||||
{
|
||||
// Shuffle
|
||||
// TODO: Do this with a compute shader
|
||||
switch (type)
|
||||
{
|
||||
case gl::texture::type::sbyte:
|
||||
case gl::texture::type::ubyte:
|
||||
{
|
||||
if (pack_unpack_swap_bytes)
|
||||
{
|
||||
// byte swapping does not work on byte types, use uint_8_8_8_8 for rgba8 instead to avoid penalty
|
||||
rsx::shuffle_texel_data_wzyx<u8>(dst, rsx_pitch, width, align(valid_length, rsx_pitch) / rsx_pitch);
|
||||
}
|
||||
// byte swapping does not work on byte types, use uint_8_8_8_8 for rgba8 instead to avoid penalty
|
||||
verify(HERE), !pack_unpack_swap_bytes;
|
||||
break;
|
||||
}
|
||||
case gl::texture::type::uint_24_8:
|
||||
|
|
Loading…
Reference in New Issue