diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.cpp b/rpcs3/Emu/RSX/GL/GLOverlays.cpp index 604b802b4b..36ee8624a2 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.cpp +++ b/rpcs3/Emu/RSX/GL/GLOverlays.cpp @@ -82,7 +82,7 @@ namespace gl glBindVertexArray(old_vao); } - void overlay_pass::run(gl::command_context& cmd, const areau& region, GLuint target_texture, bool depth_target, bool use_blending) + void overlay_pass::run(gl::command_context& cmd, const areau& region, GLuint target_texture, GLuint image_aspect_bits, bool use_blending) { if (!compiled) { @@ -97,15 +97,22 @@ namespace gl { save_fbo = std::make_unique(fbo); - if (depth_target) - { - fbo.draw_buffer(fbo.no_color); - fbo.depth_stencil = target_texture; - } - else + switch (image_aspect_bits) { + case gl::image_aspect::color: fbo.color[0] = target_texture; fbo.draw_buffer(fbo.color[0]); + break; + case gl::image_aspect::depth: + fbo.draw_buffer(fbo.no_color); + fbo.depth = target_texture; + break; + case gl::image_aspect::depth | gl::image_aspect::stencil: + fbo.draw_buffer(fbo.no_color); + fbo.depth_stencil = target_texture; + break; + default: + fmt::throw_exception("Unsupported image aspect combination 0x%x", image_aspect_bits); } } @@ -117,7 +124,7 @@ namespace gl // Set initial state glViewport(region.x1, region.y1, region.width(), region.height()); cmd->color_maski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - cmd->depth_mask(depth_target ? GL_TRUE : GL_FALSE); + cmd->depth_mask(image_aspect_bits == gl::image_aspect::color ? GL_FALSE : GL_TRUE); // Disabling depth test will also disable depth writes which is not desired cmd->depth_func(GL_ALWAYS); @@ -149,6 +156,7 @@ namespace gl if (target_texture) { fbo.color[0] = GL_NONE; + fbo.depth = GL_NONE; fbo.depth_stencil = GL_NONE; } } @@ -511,7 +519,7 @@ namespace gl program_handle.uniforms["blur_strength"] = static_cast(cmd.config.blur_strength); program_handle.uniforms["clip_region"] = static_cast(cmd.config.clip_region); program_handle.uniforms["clip_bounds"] = cmd.config.clip_rect; - overlay_pass::run(cmd_, viewport, target, false, true); + overlay_pass::run(cmd_, viewport, target, gl::image_aspect::color, true); } ui.update(); @@ -582,41 +590,45 @@ namespace gl saved_sampler_state saved2(30, m_sampler); cmd->bind_texture(30, GL_TEXTURE_2D, source[1]); - overlay_pass::run(cmd, viewport, GL_NONE, false, false); + overlay_pass::run(cmd, viewport, GL_NONE, gl::image_aspect::color, false); } - rp_ssbo_to_d24x8_texture::rp_ssbo_to_d24x8_texture() + rp_ssbo_to_texture::rp_ssbo_to_texture() { vs_src = #include "../Program/GLSLSnippets/GenericVSPassthrough.glsl" ; fs_src = - #include "../Program/GLSLSnippets/CopyBufferToD24x8.glsl" + #include "../Program/GLSLSnippets/CopyBufferToImage.glsl" ; std::pair repl_list[] = { { "%set, ", "" }, { "%loc", std::to_string(GL_COMPUTE_BUFFER_SLOT(0)) }, - { "%push_block", fmt::format("binding=%d, std140", GL_COMPUTE_BUFFER_SLOT(1)) } + { "%push_block", fmt::format("binding=%d, std140", GL_COMPUTE_BUFFER_SLOT(1)) }, + { "%stencil_export_supported", gl::get_driver_caps().ARB_shader_stencil_export_supported ? "1" : "0" } }; fs_src = fmt::replace_all(fs_src, repl_list); } - void rp_ssbo_to_d24x8_texture::run(gl::command_context& cmd, + void rp_ssbo_to_texture::run(gl::command_context& cmd, const buffer* src, const texture* dst, const u32 src_offset, const coordu& dst_region, - const pixel_unpack_settings& settings) + const pixel_buffer_layout& layout) { - const u32 row_length = settings.get_row_length() ? settings.get_row_length() : static_cast(dst_region.width); + const u32 row_length = static_cast(dst_region.width); + const u32 bpp = dst->pitch() / dst->width(); + program_handle.uniforms["src_pitch"] = row_length; - program_handle.uniforms["swap_bytes"] = settings.get_swap_bytes(); - src->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), src_offset, row_length * 4 * dst_region.height); + program_handle.uniforms["swap_bytes"] = layout.swap_bytes; + program_handle.uniforms["format"] = static_cast(dst->get_internal_format()); + src->bind_range(gl::buffer::target::ssbo, GL_COMPUTE_BUFFER_SLOT(0), src_offset, row_length * bpp * dst_region.height); cmd->stencil_mask(0xFF); - overlay_pass::run(cmd, dst_region, dst->id(), true); + overlay_pass::run(cmd, dst_region, dst->id(), dst->aspect()); } } diff --git a/rpcs3/Emu/RSX/GL/GLOverlays.h b/rpcs3/Emu/RSX/GL/GLOverlays.h index 8f3438ca6d..b0448abc8b 100644 --- a/rpcs3/Emu/RSX/GL/GLOverlays.h +++ b/rpcs3/Emu/RSX/GL/GLOverlays.h @@ -74,7 +74,7 @@ namespace gl virtual void emit_geometry(); - void run(gl::command_context& cmd, const areau& region, GLuint target_texture, bool depth_target, bool use_blending = false); + void run(gl::command_context& cmd, const areau& region, GLuint target_texture, GLuint image_aspect_bits, bool use_blending = false); }; struct ui_overlay_renderer : public overlay_pass @@ -114,20 +114,10 @@ namespace gl void run(gl::command_context& cmd, const areau& viewport, const rsx::simple_array& source, f32 gamma, bool limited_rgb, bool _3d); }; - struct rp_ssbo_to_d24x8_texture : public overlay_pass + struct rp_ssbo_to_texture : public overlay_pass { - rp_ssbo_to_d24x8_texture(); - void run(gl::command_context& cmd, const buffer* src, const texture* dst, const u32 src_offset, const coordu& dst_region, const pixel_unpack_settings& settings); - }; - - struct rp_copy_rgba_to_bgra : public overlay_pass - { - void run(gl::command_context& cmd, const texture* src, const texture* dst, const coordu& src_region, const coordu& dst_region); - }; - - struct rp_copy_bgra_to_rgba : public overlay_pass - { - void run(gl::command_context& cmd, const texture* src, const texture* dst, const coordu& src_region, const coordu& dst_region); + rp_ssbo_to_texture(); + void run(gl::command_context& cmd, const buffer* src, const texture* dst, const u32 src_offset, const coordu& dst_region, const pixel_buffer_layout& layout); }; // TODO: Replace with a proper manager diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index a6d63b1400..a7d6cec855 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -19,11 +19,11 @@ color_format rsx::internals::surface_color_format_to_gl(rsx::surface_color_forma //All XBGR formats will have remapping before they can be read back in shaders as DRGB8 //Prefix o = 1, z = 0 case rsx::surface_color_format::x1r5g5b5_o1r5g5b5: - return{ ::gl::texture::type::ushort_5_5_5_1, ::gl::texture::format::rgb, ::gl::texture::internal_format::rgb5a1, true, + return{ ::gl::texture::type::ushort_5_5_5_1, ::gl::texture::format::rgb, ::gl::texture::internal_format::bgr5a1, true, { ::gl::texture::channel::one, ::gl::texture::channel::r, ::gl::texture::channel::g, ::gl::texture::channel::b } }; case rsx::surface_color_format::x1r5g5b5_z1r5g5b5: - return{ ::gl::texture::type::ushort_5_5_5_1, ::gl::texture::format::rgb, ::gl::texture::internal_format::rgb5a1, true, + return{ ::gl::texture::type::ushort_5_5_5_1, ::gl::texture::format::rgb, ::gl::texture::internal_format::bgr5a1, true, { ::gl::texture::channel::zero, ::gl::texture::channel::r, ::gl::texture::channel::g, ::gl::texture::channel::b } }; case rsx::surface_color_format::x8r8g8b8_z8r8g8b8: diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 0fc0fccd62..29ace4c265 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -74,7 +74,7 @@ namespace gl switch (texture_format) { case CELL_GCM_TEXTURE_B8: return GL_R8; - case CELL_GCM_TEXTURE_A1R5G5B5: return GL_RGB5_A1; + case CELL_GCM_TEXTURE_A1R5G5B5: return GL_BGR5_A1; case CELL_GCM_TEXTURE_A4R4G4B4: return GL_RGBA4; case CELL_GCM_TEXTURE_R5G6B5: return GL_RGB565; case CELL_GCM_TEXTURE_A8R8G8B8: return GL_BGRA8; @@ -90,16 +90,16 @@ namespace gl case CELL_GCM_TEXTURE_W16_Z16_Y16_X16_FLOAT: return GL_RGBA16F; case CELL_GCM_TEXTURE_W32_Z32_Y32_X32_FLOAT: return GL_RGBA32F; case CELL_GCM_TEXTURE_X32_FLOAT: return GL_R32F; - case CELL_GCM_TEXTURE_D1R5G5B5: return GL_RGB5_A1; + case CELL_GCM_TEXTURE_D1R5G5B5: return GL_BGR5_A1; case CELL_GCM_TEXTURE_D8R8G8B8: return GL_BGRA8; case CELL_GCM_TEXTURE_Y16_X16_FLOAT: return GL_RG16F; case CELL_GCM_TEXTURE_COMPRESSED_DXT1: return GL_COMPRESSED_RGBA_S3TC_DXT1_EXT; case CELL_GCM_TEXTURE_COMPRESSED_DXT23: return GL_COMPRESSED_RGBA_S3TC_DXT3_EXT; case CELL_GCM_TEXTURE_COMPRESSED_DXT45: return GL_COMPRESSED_RGBA_S3TC_DXT5_EXT; case CELL_GCM_TEXTURE_COMPRESSED_HILO8: return GL_RG8; - case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return GL_RG8; - case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return GL_RGBA8; - case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return GL_RGBA8; + case CELL_GCM_TEXTURE_COMPRESSED_HILO_S8: return GL_RG8_SNORM; + case CELL_GCM_TEXTURE_COMPRESSED_B8R8_G8R8: return GL_BGRA8; + case CELL_GCM_TEXTURE_COMPRESSED_R8B8_R8G8: return GL_BGRA8; } fmt::throw_exception("Unknown texture format 0x%x", texture_format); } @@ -163,6 +163,8 @@ namespace gl return { GL_RGB, GL_UNSIGNED_SHORT_5_6_5, 2, true }; case texture::internal_format::rgb5a1: return { GL_RGB, GL_UNSIGNED_SHORT_5_5_5_1, 2, true }; + case texture::internal_format::bgr5a1: + return { GL_RGB, GL_UNSIGNED_SHORT_1_5_5_5_REV, 2, true }; case texture::internal_format::rgba4: return { GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4, 2, false }; case texture::internal_format::rgba8: @@ -396,6 +398,7 @@ namespace gl case CELL_GCM_TEXTURE_R5G5B5A1: case CELL_GCM_TEXTURE_R6G5B5: case CELL_GCM_TEXTURE_R5G6B5: + case CELL_GCM_TEXTURE_A4R4G4B4: case CELL_GCM_TEXTURE_A8R8G8B8: case CELL_GCM_TEXTURE_COMPRESSED_DXT1: case CELL_GCM_TEXTURE_COMPRESSED_DXT23: @@ -410,9 +413,6 @@ namespace gl case CELL_GCM_TEXTURE_DEPTH16_FLOAT: return{ GL_RED, GL_RED, GL_RED, GL_RED }; - case CELL_GCM_TEXTURE_A4R4G4B4: - return{ GL_BLUE, GL_GREEN, GL_RED, GL_ALPHA }; - case CELL_GCM_TEXTURE_B8: return{ GL_ONE, GL_RED, GL_RED, GL_RED }; @@ -580,63 +580,39 @@ namespace gl transfer_buf = &scratch_mem; }; - if (dst->aspect() == image_aspect::color || - unpack_info.type == GL_UNSIGNED_SHORT || - unpack_info.type == GL_UNSIGNED_INT_24_8) - { - if (auto job = get_trivial_transform_job(unpack_info)) - { - job->run(cmd, src, static_cast(mem_info->image_size_in_bytes), in_offset); - } - else - { - skip_barrier = true; - } - } - else if (unpack_info.type == GL_FLOAT) - { - mem_info->memory_required = (mem_info->image_size_in_texels * 4); - initialize_scratch_mem(); - - if (unpack_info.swap_bytes) - { - get_compute_task>()->run(cmd, transfer_buf, in_offset, static_cast(mem_info->image_size_in_bytes), out_offset); - } - else - { - get_compute_task>()->run(cmd, transfer_buf, in_offset, static_cast(mem_info->image_size_in_bytes), out_offset); - } - } - else if (unpack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV) - { - mem_info->memory_required = (mem_info->image_size_in_texels * 8); - initialize_scratch_mem(); - get_compute_task()->run(cmd, transfer_buf, in_offset, out_offset, static_cast(mem_info->image_size_in_texels)); - } - else - { - fmt::throw_exception("Invalid depth/stencil type 0x%x", unpack_info.type); - } - const auto caps = gl::get_driver_caps(); - if (dst->get_internal_format() == gl::texture::internal_format::depth24_stencil8 && - dst->get_target() == gl::texture::target::texture2D && // Only 2D output supported for the moment. - !caps.vendor_NVIDIA && // NVIDIA has native support for D24X8 data as they introduced this extension. - caps.ARB_shader_stencil_export_supported) // The driver needs to support stencil export at the very least + if (!(dst->aspect() & image_aspect::stencil) || caps.ARB_shader_stencil_export_supported) { - // This optimized path handles the data load on the GPU without context switching to compute. - // The upside is that it is very fast if you have headroom. - // The downside is that it is linear. Not that it matters that much as most drivers seem to be downloading the entire data source and doing really slow things with it. - if (!skip_barrier) - { - glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); - } - - auto pass = gl::get_overlay_pass(); - pass->run(cmd, transfer_buf, dst, out_offset, {{dst_region.x, dst_region.y}, {dst_region.width, dst_region.height}}, {}); + // We do not need to use the driver's builtin transport mechanism + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + gl::get_overlay_pass()->run(cmd, transfer_buf, dst, out_offset, { {dst_region.x, dst_region.y}, {dst_region.width, dst_region.height} }, unpack_info); } else { + // Stencil format on NV. Use driver upload path + + if (unpack_info.type == GL_UNSIGNED_INT_24_8) + { + if (auto job = get_trivial_transform_job(unpack_info)) + { + job->run(cmd, src, static_cast(mem_info->image_size_in_bytes), in_offset); + } + else + { + skip_barrier = true; + } + } + else if (unpack_info.type == GL_FLOAT_32_UNSIGNED_INT_24_8_REV) + { + mem_info->memory_required = (mem_info->image_size_in_texels * 8); + initialize_scratch_mem(); + get_compute_task()->run(cmd, transfer_buf, in_offset, out_offset, static_cast(mem_info->image_size_in_texels)); + } + else + { + fmt::throw_exception("Invalid depth/stencil type 0x%x", unpack_info.type); + } + if (!skip_barrier) { glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); @@ -648,8 +624,6 @@ namespace gl dst->copy_from(reinterpret_cast(u64(out_offset)), static_cast(unpack_info.format), static_cast(unpack_info.type), dst_level, dst_region, {}); } - - if (scratch_mem) scratch_mem.remove(); } gl::viewable_image* create_texture(u32 gcm_format, u16 width, u16 height, u16 depth, u16 mipmaps, @@ -739,8 +713,6 @@ namespace gl } else { - bool apply_settings = true; - bool use_compute_transform = is_swizzled; std::pair upload_scratch_mem = {}, compute_scratch_mem = {}; image_memory_requirements mem_info; pixel_buffer_layout mem_layout; @@ -750,28 +722,9 @@ namespace gl u8 block_size_in_bytes = rsx::get_format_block_size_in_bytes(format); u64 image_linear_size = staging_buffer.size(); - switch (gl_type) - { - case GL_BYTE: - case GL_UNSIGNED_BYTE: - // Multi-channel format uploaded one byte at a time. This is due to poor driver support for formats like GL_UNSIGNED SHORT_8_8 - // Do byteswapping in software for now until compute acceleration is available - apply_settings = (gl_format == GL_RED); - caps.supports_byteswap = apply_settings; - break; - case GL_FLOAT: - case GL_UNSIGNED_INT_24_8: - case GL_FLOAT_32_UNSIGNED_INT_24_8_REV: - mem_layout.swap_bytes = true; - mem_layout.size = 4; - use_compute_transform = true; - apply_settings = false; - break; - } - const auto min_required_buffer_size = std::max(utils::align(image_linear_size * 4, 0x100000), 16 * 0x100000); - if (use_compute_transform) + if (driver_caps.ARB_compute_shader_supported) { if (g_upload_transfer_buffer.size() < static_cast(min_required_buffer_size)) { @@ -790,9 +743,15 @@ namespace gl for (const rsx::subresource_layout& layout : input_layouts) { - if (use_compute_transform) + if (driver_caps.ARB_compute_shader_supported) { - const u64 row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, caps.alignment); + u64 row_pitch = rsx::align2(layout.width_in_block * block_size_in_bytes, caps.alignment); + if (!rsx::is_compressed_host_format(format)) + { + // Handle emulated compressed formats with host unpack (R8G8 compressed) + row_pitch = std::max(row_pitch, dst->pitch()); + } + image_linear_size = row_pitch * layout.height_in_block * layout.depth; compute_scratch_mem = { nullptr, g_compute_decode_buffer.alloc(static_cast(image_linear_size), 256) }; @@ -803,7 +762,7 @@ namespace gl dst_buffer = { reinterpret_cast(upload_scratch_mem.first), image_linear_size }; } - caps.supports_hw_deswizzle = (is_swizzled && use_compute_transform && image_linear_size > 4096); + caps.supports_hw_deswizzle = (is_swizzled && driver_caps.ARB_compute_shader_supported && image_linear_size > 4096); auto op = upload_texture_subresource(dst_buffer, layout, format, is_swizzled, caps); // Define upload region @@ -815,7 +774,7 @@ namespace gl region.height = layout.height_in_texel; region.depth = layout.depth; - if (use_compute_transform) + if (driver_caps.ARB_compute_shader_supported) { // 0. Preconf mem_layout.swap_bytes = op.require_swap; @@ -895,12 +854,7 @@ namespace gl } else { - if (apply_settings) - { - unpack_settings.swap_bytes(op.require_swap); - apply_settings = false; - } - + unpack_settings.swap_bytes(op.require_swap); dst->copy_from(out_pointer, static_cast(gl_format), static_cast(gl_type), layout.level, region, unpack_settings); } } @@ -1017,6 +971,11 @@ namespace gl // 2. Both formats require no transforms (basic memcpy) or... // 3. Both formats have the same transform (e.g RG16_UNORM to RG16_SFLOAT, both are down and uploaded with a 2-byte byteswap) + if (format1 == GL_BGRA8 || format2 == GL_BGRA8) + { + return false; + } + if (get_format_texel_width(format1) != get_format_texel_width(format2)) { return false; diff --git a/rpcs3/Emu/RSX/GL/glutils/blitter.cpp b/rpcs3/Emu/RSX/GL/glutils/blitter.cpp index 626224a5a3..ba8993d684 100644 --- a/rpcs3/Emu/RSX/GL/glutils/blitter.cpp +++ b/rpcs3/Emu/RSX/GL/glutils/blitter.cpp @@ -9,17 +9,6 @@ namespace gl { blitter* g_hw_blitter = nullptr; - void process_bgra_transfer_source(const gl::texture* src, const gl::texture* dst, const coord3i& region) - { - ensure(src->get_internal_format() == texture::internal_format::bgra8); - ensure(dst->get_internal_format() == texture::internal_format::rgba8); - } - - void process_bgra_transfer_dest(const gl::texture* tex, const coord3i& region) - { - ensure(tex->get_internal_format() == texture::internal_format::bgra8); - } - void blitter::copy_image(gl::command_context& cmd, const texture* src, const texture* dst, int src_level, int dst_level, const position3i& src_offset, const position3i& dst_offset, const size3i& size) const { ensure(src_level == 0); @@ -27,29 +16,11 @@ namespace gl // Typeless bypass for BGRA8 std::unique_ptr temp_image; const texture* real_src = src; - bool handle_bgra8_dest = false; - - if (src->get_internal_format() != dst->get_internal_format()) - { - if (false && src->get_internal_format() == texture::internal_format::bgra8) - { - temp_image = std::make_unique(static_cast(src->get_target()), src->width(), src->height(), src->depth(), src->levels(), GL_RGBA8, rsx::format_class::RSX_FORMAT_CLASS_COLOR); - process_bgra_transfer_source(src, temp_image.get(), { src_offset, size }); - real_src = temp_image.get(); - } - - handle_bgra8_dest = (dst->get_internal_format() == texture::internal_format::bgra8); - } glCopyImageSubData(real_src->id(), static_cast(real_src->get_target()), src_level, src_offset.x, src_offset.y, src_offset.z, dst->id(), static_cast(dst->get_target()), dst_level, dst_offset.x, dst_offset.y, dst_offset.z, size.width, size.height, size.depth); - - if (handle_bgra8_dest) - { - process_bgra_transfer_dest(dst, { dst_offset, size }); - } } void blitter::scale_image(gl::command_context& cmd, const texture* src, texture* dst, areai src_rect, areai dst_rect, @@ -80,7 +51,7 @@ namespace gl } else { - copy_image(cmd, src, dst, 0, 1, position3i{ src_rect.x1, src_rect.y1, 0u }, position3i{ dst_rect.x1, dst_rect.y1, 0 }, size3i{ src_rect.width(), src_rect.height(), 1 }); + copy_image(cmd, src, dst, 0, 0, position3i{ src_rect.x1, src_rect.y1, 0u }, position3i{ dst_rect.x1, dst_rect.y1, 0 }, size3i{ src_rect.width(), src_rect.height(), 1 }); } return; @@ -123,33 +94,12 @@ namespace gl } } - if (src->get_internal_format() == texture::internal_format::bgra8 && - real_src == src && - dst->get_internal_format() != src->get_internal_format()) - { - // Not typeless, plus src is bgra8. Needs conversion - typeless_src = std::make_unique(GL_TEXTURE_2D, src->width(), src->height(), 1, 1, GL_RGBA8); - process_bgra_transfer_source(src, typeless_src.get(), {{src_rect.x1, src_rect.y1, 0}, {src_rect.width(), src_rect.height(), 1}}); - real_src = typeless_src.get(); - } - - bool handle_bgra8_dest = false; - if (dst->get_internal_format() == texture::internal_format::bgra8 && - real_dst == dst && - dst->get_internal_format() != src->get_internal_format()) - { - // Not typeless but dst is bgra8. - // Handle the conversion in post - handle_bgra8_dest = true; - } - ensure(real_src->aspect() == real_dst->aspect()); if (src_rect.width() == dst_rect.width() && src_rect.height() == dst_rect.height() && !src_rect.is_flipped() && !dst_rect.is_flipped()) { - copy_image(cmd, real_src, real_dst, 0, 1, position3i{ src_rect.x1, src_rect.y1, 0 }, position3i{ dst_rect.x1, dst_rect.y1, 0 }, size3i{ src_rect.width(), src_rect.height(), 1 }); - handle_bgra8_dest = false; // Handled in copy_image + copy_image(cmd, real_src, real_dst, 0, 0, position3i{ src_rect.x1, src_rect.y1, 0 }, position3i{ dst_rect.x1, dst_rect.y1, 0 }, size3i{ src_rect.width(), src_rect.height(), 1 }); } else { @@ -209,11 +159,6 @@ namespace gl // Transfer contents from typeless dst back to original dst copy_typeless(cmd, dst, typeless_dst.get()); } - else if (handle_bgra8_dest) - { - // Blit transfer to BGRA8 target - process_bgra_transfer_dest(dst, { {dst_rect.x1, dst_rect.y1, 0}, {dst_rect.width(), dst_rect.height(), 1} }); - } } void blitter::fast_clear_image(gl::command_context& cmd, const texture* dst, const color4f& color) diff --git a/rpcs3/Emu/RSX/GL/glutils/image.cpp b/rpcs3/Emu/RSX/GL/glutils/image.cpp index bfdef950f8..2591eff697 100644 --- a/rpcs3/Emu/RSX/GL/glutils/image.cpp +++ b/rpcs3/Emu/RSX/GL/glutils/image.cpp @@ -8,7 +8,15 @@ namespace gl { static GLenum sizedfmt_to_ifmt(GLenum sized) { - return sized == GL_BGRA8 ? GL_RGBA8 : sized; + switch (sized) + { + case GL_BGRA8: + return GL_RGBA8; + case GL_BGR5_A1: + return GL_RGB5_A1; + default: + return sized; + } } texture::texture(GLenum target, GLuint width, GLuint height, GLuint depth, GLuint mipmaps, GLenum sized_format, rsx::format_class format_class) diff --git a/rpcs3/Emu/RSX/GL/glutils/image.h b/rpcs3/Emu/RSX/GL/glutils/image.h index 77a5b748e4..f8bd3443a5 100644 --- a/rpcs3/Emu/RSX/GL/glutils/image.h +++ b/rpcs3/Emu/RSX/GL/glutils/image.h @@ -10,7 +10,8 @@ using namespace ::rsx::format_class_; namespace gl { -#define GL_BGRA8 0x80E1 // Enumerant of GL_BGRA8_EXT from the GL_EXT_texture_format_BGRA8888 +#define GL_BGRA8 0x80E1 // Enumerant of GL_BGRA8_EXT from the GL_EXT_texture_format_BGRA8888 +#define GL_BGR5_A1 0x99F0 // Unused enum 0x96xx is the last official GL enumerant class buffer; class buffer_view; @@ -120,6 +121,7 @@ namespace gl bgra8 = GL_BGRA8, rgb565 = GL_RGB565, rgb5a1 = GL_RGB5_A1, + bgr5a1 = GL_BGR5_A1, rgba4 = GL_RGBA4, r8 = GL_R8, r16 = GL_R16, @@ -128,7 +130,9 @@ namespace gl rg16 = GL_RG16, rg16f = GL_RG16F, rgba16f = GL_RGBA16F, - rgba32f = GL_RGBA32F + rgba32f = GL_RGBA32F, + + rg8_snorm = GL_RG8_SNORM }; enum class wrap diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToD24x8.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToD24x8.glsl deleted file mode 100644 index f089add963..0000000000 --- a/rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToD24x8.glsl +++ /dev/null @@ -1,48 +0,0 @@ -R"( -#version 430 -#extension GL_ARB_shader_stencil_export : enable - -layout(%set, binding=%loc, std430) readonly restrict buffer RawDataBlock -{ - uint data[]; -}; - -#if USE_UBO -layout(%push_block) uniform UnpackConfiguration -{ - uint swap_bytes; - uint src_pitch; -}; -#else - uniform uint swap_bytes; - uniform uint src_pitch; -#endif - -uint getDataOffset() -{ - const ivec2 coords = ivec2(gl_FragCoord.xy); - return coords.y * src_pitch + coords.x; -} - -void main() -{ - const uint virtual_address = getDataOffset(); - uint real_data = data[virtual_address]; - - const uint stencil_byte = bitfieldExtract(real_data, 0, 8); - uint depth_bytes; - - if (swap_bytes > 0) - { - // CCBBAA00 -> 00AABBCC -> AABBCC. Stencil byte does not actually move - depth_bytes = bitfieldExtract(real_data, 24, 8) | (bitfieldExtract(real_data, 16, 8) << 8) | (bitfieldExtract(real_data, 8, 8) << 24); - } - else - { - depth_bytes = bitfieldExtract(real_data, 8, 24); - } - - gl_FragDepth = float(depth_bytes) / 0xffffff; - gl_FragStencilRefARB = int(stencil_byte); -} -)" diff --git a/rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToImage.glsl b/rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToImage.glsl new file mode 100644 index 0000000000..fdea953008 --- /dev/null +++ b/rpcs3/Emu/RSX/Program/GLSLSnippets/CopyBufferToImage.glsl @@ -0,0 +1,233 @@ +R"( +#version 430 +#extension GL_ARB_shader_stencil_export : enable + +#define ENABLE_DEPTH_STENCIL_LOAD %stencil_export_supported + +#define FMT_GL_DEPTH_COMPONENT16 0x81A5 +#define FMT_GL_DEPTH_COMPONENT32F 0x8CAC +#define FMT_GL_DEPTH24_STENCIL8 0x88F0 +#define FMT_GL_DEPTH32F_STENCIL8 0x8CAD + +#define FMT_GL_RGBA8 0x8058 +#define FMT_GL_BGRA8 0x80E1 +#define FMT_GL_RGB565 0x8D62 +#define FMT_GL_RGB5_A1 0x8057 +#define FMT_GL_BGR5_A1 0x99F0 +#define FMT_GL_RGBA4 0x8056 +#define FMT_GL_R8 0x8229 +#define FMT_GL_R16 0x822A +#define FMT_GL_R32F 0x822E +#define FMT_GL_RG8 0x822B +#define FMT_GL_RG8_SNORM 0x8F95 +#define FMT_GL_RG16 0x822C +#define FMT_GL_RG16F 0x822F +#define FMT_GL_RGBA16F 0x881A +#define FMT_GL_RGBA32F 0x8814 + +#define bswap_u16(bits) (bits & 0xFF) << 8 | (bits & 0xFF00) >> 8 | (bits & 0xFF0000) << 8 | (bits & 0xFF000000) >> 8 +#define bswap_u32(bits) (bits & 0xFF) << 24 | (bits & 0xFF00) << 8 | (bits & 0xFF0000) >> 8 | (bits & 0xFF000000) >> 24 + +layout(location=0) out vec4 fragColor; + +layout(%set, binding=%loc, std430) readonly restrict buffer RawDataBlock +{ + uint data[]; +}; + +#if USE_UBO +layout(%push_block) uniform UnpackConfiguration +{ + uint swap_bytes; + uint src_pitch; + uint format; +}; +#else + uniform uint swap_bytes; + uniform uint src_pitch; + uniform uint format; +#endif + +uint getTexelOffset() +{ + const ivec2 coords = ivec2(gl_FragCoord.xy); + return coords.y * src_pitch + coords.x; +} + +// Decoders. Beware of multi-wide swapped types (e.g swap(16x2) != swap(32x1)) +uint readUint8(const in uint address) +{ + const uint block = address / 4; + const uint offset = address % 4; + return bitfieldExtract(data[block], int(offset) * 8, 8); +} + +uint readUint16(const in uint address) +{ + const uint block = address / 2; + const uint offset = address % 2; + const uint value = bitfieldExtract(data[block], int(offset) * 16, 16); + + if (swap_bytes != 0) + { + return bswap_u16(value); + } + + return value; +} + +uint readUint32(const in uint address) +{ + const uint value = data[address]; + return (swap_bytes != 0) ? bswap_u32(value) : value; +} + +uvec2 readUint24_8(const in uint address) +{ + const uint raw_value = data[address]; + const uint stencil = bitfieldExtract(raw_value, 0, 8); + + if (swap_bytes != 0) + { + const uint depth = min(bswap_u32(raw_value), 0xffffff); + return uvec2(depth, stencil); + } + + return uvec2( + bitfieldExtract(raw_value, 8, 24), + stencil + ); +} + +uvec2 readUint8x2(const in uint address) +{ + const uint raw = readUint16(address); + return uvec2(bitfieldExtract(raw, 0, 8), bitfieldExtract(raw, 8, 8)); +} + +ivec2 readInt8x2(const in uint address) +{ + const ivec2 raw = ivec2(readUint8x2(address)); + return raw - (ivec2(greaterThan(raw, ivec2(127))) * 256); +} + +#define readFixed8(address) readUint8(address) / 255.f +#define readFixed8x2(address) readUint8x2(address) / 255.f +#define readFixed8x2Snorm(address) readInt8x2(address) / 127.f + +vec4 readFixed8x4(const in uint address) +{ + const uint raw = readUint32(address); + return uvec4( + bitfieldExtract(raw, 0, 8), + bitfieldExtract(raw, 8, 8), + bitfieldExtract(raw, 16, 8), + bitfieldExtract(raw, 24, 8) + ) / 255.f; +} + +#define readFixed16(address) readUint16(uint(address)) / 65535.f +#define readFixed16x2(address) vec2(readFixed16(address * 2 + 0), readFixed16(address * 2 + 1)) +#define readFixed16x4(address) vec4(readFixed16(address * 4 + 0), readFixed16(address * 4 + 1), readFixed16(address * 4 + 2), readFixed16(address * 4 + 3)) + +#define readFloat16(address) unpackHalf2x16(readUint16(uint(address))).x +#define readFloat16x2(address) vec2(readFloat16(address * 2 + 0), readFloat16(address * 2 + 1)) +#define readFloat16x4(address) vec4(readFloat16(address * 4 + 0), readFloat16(address * 4 + 1), readFloat16(address * 4 + 2), readFloat16(address * 4 + 3)) + +#define readFloat32(address) uintBitsToFloat(readUint32(address)) +#define readFloat32x4(address) uintBitsToFloat(uvec4(readUint32(address * 4 + 0), readUint32(address * 4 + 1), readUint32(address * 4 + 2), readUint32(address * 4 + 3))) + +void main() +{ + const uint texel_address = getTexelOffset(); + uint utmp; + uvec2 utmp2; + + switch (format) + { + // Depth formats + case FMT_GL_DEPTH_COMPONENT16: + gl_FragDepth = readFixed16(texel_address); + break; + case FMT_GL_DEPTH_COMPONENT32F: + gl_FragDepth = readFloat16(texel_address); + break; + +#if ENABLE_DEPTH_STENCIL_LOAD + + // Depth-stencil formats. Unsupported on NVIDIA due to missing extensions. + case FMT_GL_DEPTH24_STENCIL8: + case FMT_GL_DEPTH32F_STENCIL8: + utmp2 = readUint24_8(texel_address); + gl_FragDepth = float(utmp2.x) / 0xffffff; + gl_FragStencilRefARB = int(utmp2.y); + break; + +#endif + + // Simple color + case FMT_GL_RGBA8: + fragColor = readFixed8x4(texel_address); + break; + case FMT_GL_BGRA8: + fragColor = readFixed8x4(texel_address).bgra; + break; + case FMT_GL_R8: + fragColor.r = readFixed8(texel_address); + break; + case FMT_GL_R16: + fragColor.r = readFixed16(texel_address); + break; + case FMT_GL_R32F: + fragColor.r = readFloat32(texel_address); + break; + case FMT_GL_RG8: + fragColor.rg = readFixed8x2(texel_address); + break; + case FMT_GL_RG8_SNORM: + fragColor.rg = readFixed8x2Snorm(texel_address); + break; + case FMT_GL_RG16: + fragColor.rg = readFixed16x2(texel_address); + break; + case FMT_GL_RG16F: + fragColor.rg = readFloat16x2(texel_address); + break; + case FMT_GL_RGBA16F: + fragColor = readFloat16x4(texel_address); + break; + case FMT_GL_RGBA32F: + fragColor = readFloat32x4(texel_address); + break; + + // Packed color + case FMT_GL_RGB565: + utmp = readUint16(texel_address); + fragColor.b = bitfieldExtract(utmp, 0, 5) / 31.f; + fragColor.g = bitfieldExtract(utmp, 5, 6) / 63.f; + fragColor.r = bitfieldExtract(utmp, 11, 5) / 31.f; + break; + case FMT_GL_BGR5_A1: + utmp = readUint16(texel_address); + fragColor.b = bitfieldExtract(utmp, 0, 5) / 31.f; + fragColor.g = bitfieldExtract(utmp, 5, 5) / 31.f; + fragColor.r = bitfieldExtract(utmp, 10, 5) / 31.f; + fragColor.a = bitfieldExtract(utmp, 15, 1) * 1.f; + break; + case FMT_GL_RGB5_A1: + utmp = readUint16(texel_address); + fragColor.a = bitfieldExtract(utmp, 0, 1) * 1.f; + fragColor.b = bitfieldExtract(utmp, 1, 5) / 31.f; + fragColor.g = bitfieldExtract(utmp, 6, 5) / 31.f; + fragColor.r = bitfieldExtract(utmp, 11, 5) / 31.f; + break; + case FMT_GL_RGBA4: + utmp = readUint16(texel_address); + fragColor.b = bitfieldExtract(utmp, 0, 4) / 15.f; + fragColor.g = bitfieldExtract(utmp, 4, 4) / 15.f; + fragColor.r = bitfieldExtract(utmp, 8, 4) / 15.f; + fragColor.a = bitfieldExtract(utmp, 12, 4) / 15.f; + break; + } +} +)" diff --git a/rpcs3/emucore.vcxproj b/rpcs3/emucore.vcxproj index 95ca42c0a5..4155e3d736 100644 --- a/rpcs3/emucore.vcxproj +++ b/rpcs3/emucore.vcxproj @@ -831,7 +831,7 @@ - + diff --git a/rpcs3/emucore.vcxproj.filters b/rpcs3/emucore.vcxproj.filters index 052ae2be49..b5afd02822 100644 --- a/rpcs3/emucore.vcxproj.filters +++ b/rpcs3/emucore.vcxproj.filters @@ -2178,7 +2178,7 @@ Emu\GPU\RSX\Program\Snippets - + Emu\GPU\RSX\Program\Snippets