From f9aea076aeed5e82f83c5dbdf3b69bfdae4d3183 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Wed, 14 Aug 2019 01:38:31 +0300 Subject: [PATCH] rsx: Implement depth_buffer_float support. - Since this is transparent to the application at all time, it only becomes a problem when doing memory transfer or DEPTH->RGBA conversion in shaders. --- rpcs3/Emu/RSX/Common/GLSLCommon.h | 14 ++++-- rpcs3/Emu/RSX/Common/TextureUtils.h | 9 +++- rpcs3/Emu/RSX/Common/surface_utils.h | 19 ++++++- rpcs3/Emu/RSX/Common/texture_cache.h | 58 +++++++++++++++------- rpcs3/Emu/RSX/Common/texture_cache_utils.h | 17 +++++++ rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 3 ++ rpcs3/Emu/RSX/RSXThread.cpp | 11 ++-- rpcs3/Emu/RSX/RSXThread.h | 1 + rpcs3/Emu/RSX/VK/VKGSRender.cpp | 4 +- rpcs3/Emu/RSX/rsx_decode.h | 23 +++++++++ rpcs3/Emu/RSX/rsx_methods.h | 5 ++ rpcs3/Emu/RSX/rsx_utils.h | 1 + 12 files changed, 136 insertions(+), 29 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/GLSLCommon.h b/rpcs3/Emu/RSX/Common/GLSLCommon.h index 837d33e8f7..322dedd6ae 100644 --- a/rpcs3/Emu/RSX/Common/GLSLCommon.h +++ b/rpcs3/Emu/RSX/Common/GLSLCommon.h @@ -603,9 +603,14 @@ namespace glsl //NOTE: Memory layout is fetched as byteswapped BGRA [GBAR] (GOW collection, DS2, DeS) //The A component (Z) is useless (should contain stencil8 or just 1) OS << - "vec4 decodeLinearDepth(float depth_value)\n" + "vec4 decode_depth24(float depth_value, uint depth_float)\n" "{\n" - " uint value = uint(depth_value * 16777215.);\n" + " uint value;\n" + " if (depth_float == 0)\n" + " value = uint(depth_value * 16777215.);\n" + " else\n" + " value = (floatBitsToUint(depth_value) >> 7) & 0xffffff;\n" + "\n" " uint b = (value & 0xff);\n" " uint g = (value >> 8) & 0xff;\n" " uint r = (value >> 16) & 0xff;\n" @@ -625,9 +630,10 @@ namespace glsl "vec4 texture2DReconstruct(sampler2D tex, usampler2D stencil_tex, vec2 coord, float remap)\n" "{\n" - " vec4 result = decodeLinearDepth(texture(tex, coord.xy).r);\n" + " uint control_bits = floatBitsToUint(remap);\n" + " vec4 result = decode_depth24(texture(tex, coord.xy).r, control_bits >> 16);\n" " result.z = float(texture(stencil_tex, coord.xy).x) / 255.f;\n" - " uint remap_vector = floatBitsToUint(remap) & 0xFF;\n" + " uint remap_vector = control_bits & 0xFF;\n" " if (remap_vector == 0xE4) return result;\n\n" " vec4 tmp;\n" " uint remap_a = remap_vector & 0x3;\n" diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 14a9b78791..531ffa3165 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -42,12 +42,19 @@ namespace rsx transfer = 2 }; + enum format_type : u8 + { + color = 0, + depth_uint = 1, + depth_float = 2 + }; + //Sampled image descriptor struct sampled_image_descriptor_base { texture_upload_context upload_context = texture_upload_context::shader_read; rsx::texture_dimension_extended image_type = texture_dimension_extended::texture_dimension_2d; - bool is_depth_texture = false; + rsx::format_type format_class = rsx::format_type::color; bool is_cyclic_reference = false; f32 scale_x = 1.f; f32 scale_y = 1.f; diff --git a/rpcs3/Emu/RSX/Common/surface_utils.h b/rpcs3/Emu/RSX/Common/surface_utils.h index 7b3bbf5d7e..79353b0d5d 100644 --- a/rpcs3/Emu/RSX/Common/surface_utils.h +++ b/rpcs3/Emu/RSX/Common/surface_utils.h @@ -149,6 +149,8 @@ namespace rsx u8 samples_x = 1; u8 samples_y = 1; + format_type format_class = format_type::color; + std::unique_ptr::type> resolve_surface; surface_sample_layout sample_layout = surface_sample_layout::null; @@ -279,16 +281,29 @@ namespace rsx format_info.gcm_depth_format = format; } - rsx::surface_color_format get_surface_color_format() + void set_depth_render_mode(bool integer) + { + if (is_depth_surface()) + { + format_class = (integer) ? format_type::depth_uint : format_type::depth_float; + } + } + + rsx::surface_color_format get_surface_color_format() const { return format_info.gcm_color_format; } - rsx::surface_depth_format get_surface_depth_format() + rsx::surface_depth_format get_surface_depth_format() const { return format_info.gcm_depth_format; } + rsx::format_type get_format_type() const + { + return format_class; + } + bool dirty() const { return (state_flags != rsx::surface_state_flags::ready) || !old_contents.empty(); diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index a51f7f1951..de0665f444 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -212,12 +212,12 @@ namespace rsx sampled_image_descriptor() = default; - sampled_image_descriptor(image_view_type handle, texture_upload_context ctx, bool is_depth, + sampled_image_descriptor(image_view_type handle, texture_upload_context ctx, format_type ftype, f32 x_scale, f32 y_scale, rsx::texture_dimension_extended type, bool cyclic_reference = false) { image_handle = handle; upload_context = ctx; - is_depth_texture = is_depth; + format_class = ftype; is_cyclic_reference = cyclic_reference; scale_x = x_scale; scale_y = y_scale; @@ -226,14 +226,14 @@ namespace rsx sampled_image_descriptor(image_resource_type external_handle, deferred_request_command reason, u32 base_address, u32 gcm_format, u16 x_offset, u16 y_offset, u16 width, u16 height, u16 depth, u16 slice_h, - texture_upload_context ctx, bool is_depth, f32 x_scale, f32 y_scale, + texture_upload_context ctx, format_type ftype, f32 x_scale, f32 y_scale, rsx::texture_dimension_extended type, const texture_channel_remap_t& remap) { external_subresource_desc = { external_handle, reason, base_address, gcm_format, x_offset, y_offset, width, height, depth, slice_h, remap }; image_handle = 0; upload_context = ctx; - is_depth_texture = is_depth; + format_class = ftype; scale_x = x_scale; scale_y = y_scale; image_type = type; @@ -1009,6 +1009,21 @@ namespace rsx } } + inline format_type get_format_class(u32 gcm_format) + { + switch (gcm_format) + { + default: + return format_type::color; + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH24_D8: + return format_type::depth_uint; + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + return format_type::depth_float; + } + } + public: texture_cache() : m_storage(this), m_predictor(this) {} @@ -1865,9 +1880,10 @@ namespace rsx const auto scaled_w = rsx::apply_resolution_scale(tex_width, true); const auto scaled_h = rsx::apply_resolution_scale(tex_height, true); + const auto format_class = (force_convert) ? get_format_class(format): texptr->get_format_type(); const auto command = surface_is_rop_target ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static; return { texptr->get_surface(rsx::surface_access::read), command, texaddr, format, 0, 0, scaled_w, scaled_h, 1, 0, - texture_upload_context::framebuffer_storage, is_depth, scale_x, scale_y, + texture_upload_context::framebuffer_storage, format_class, scale_x, scale_y, extended_dimension, decoded_remap }; } @@ -1877,7 +1893,7 @@ namespace rsx } return{ texptr->get_view(encoded_remap, decoded_remap), texture_upload_context::framebuffer_storage, - is_depth, scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d, surface_is_rop_target }; + texptr->get_format_type(), scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d, surface_is_rop_target }; } const auto scaled_w = rsx::apply_resolution_scale(tex_width, true); @@ -1887,14 +1903,14 @@ namespace rsx { return{ texptr->get_surface(rsx::surface_access::read), deferred_request_command::_3d_unwrap, texaddr, format, 0, 0, scaled_w, scaled_h, tex_depth, slice_h, - texture_upload_context::framebuffer_storage, is_depth, 1.f, 1.f, + texture_upload_context::framebuffer_storage, texptr->get_format_type(), 1.f, 1.f, rsx::texture_dimension_extended::texture_dimension_3d, decoded_remap }; } verify(HERE), extended_dimension == rsx::texture_dimension_extended::texture_dimension_cubemap; return{ texptr->get_surface(rsx::surface_access::read), deferred_request_command::cubemap_unwrap, texaddr, format, 0, 0, scaled_w, scaled_h, 1, slice_h, - texture_upload_context::framebuffer_storage, is_depth, 1.f, 1.f, + texture_upload_context::framebuffer_storage, texptr->get_format_type(), 1.f, 1.f, rsx::texture_dimension_extended::texture_dimension_cubemap, decoded_remap }; } @@ -1930,12 +1946,13 @@ namespace rsx auto scaled_h = rsx::apply_resolution_scale(tex_height, true); const auto bpp = get_format_block_size_in_bytes(format); + const auto format_class = get_format_class(format); if (extended_dimension == rsx::texture_dimension_extended::texture_dimension_cubemap) { sampled_image_descriptor desc = { nullptr, deferred_request_command::cubemap_gather, texaddr, format, 0, 0, scaled_w, scaled_w, 1, slice_h, - texture_upload_context::framebuffer_storage, is_depth, 1.f, 1.f, + texture_upload_context::framebuffer_storage, format_class, 1.f, 1.f, rsx::texture_dimension_extended::texture_dimension_cubemap, decoded_remap }; u16 padding = u16(slice_h - tex_width); @@ -1946,7 +1963,7 @@ namespace rsx { sampled_image_descriptor desc = { nullptr, deferred_request_command::_3d_gather, texaddr, format, 0, 0, scaled_w, scaled_h, tex_depth, slice_h, - texture_upload_context::framebuffer_storage, is_depth, 1.f, 1.f, + texture_upload_context::framebuffer_storage, format_class, 1.f, 1.f, rsx::texture_dimension_extended::texture_dimension_3d, decoded_remap }; u16 padding = u16(slice_h - tex_height); @@ -1963,7 +1980,7 @@ namespace rsx const auto h = fbos.empty()? tex_height : rsx::apply_resolution_scale(tex_height, true); sampled_image_descriptor result = { nullptr, deferred_request_command::atlas_gather, - texaddr, format, 0, 0, w, h, 1, slice_h, texture_upload_context::framebuffer_storage, is_depth, + texaddr, format, 0, 0, w, h, 1, slice_h, texture_upload_context::framebuffer_storage, format_class, scale_x, scale_y, rsx::texture_dimension_extended::texture_dimension_2d, decoded_remap }; result.external_subresource_desc.sections_to_copy = gather_texture_slices(cmd, fbos, local, texaddr, tex_width, tex_height, 0, tex_pitch, 1, bpp, is_depth); @@ -2049,7 +2066,7 @@ namespace rsx // Most mesh textures are stored as compressed to make the most of the limited memory if (auto cached_texture = find_texture_from_dimensions(texaddr, format, tex_width, tex_height, depth)) { - return{ cached_texture->get_view(tex.remap(), tex.decoded_remap()), cached_texture->get_context(), cached_texture->is_depth_texture(), scale_x, scale_y, cached_texture->get_image_type() }; + return{ cached_texture->get_view(tex.remap(), tex.decoded_remap()), cached_texture->get_context(), cached_texture->get_format_type(), scale_x, scale_y, cached_texture->get_image_type() }; } } else @@ -2075,7 +2092,7 @@ namespace rsx { if (cached_texture->matches(texaddr, format, tex_width, tex_height, depth, 0)) { - return{ cached_texture->get_view(tex.remap(), tex.decoded_remap()), cached_texture->get_context(), cached_texture->is_depth_texture(), scale_x, scale_y, cached_texture->get_image_type() }; + return{ cached_texture->get_view(tex.remap(), tex.decoded_remap()), cached_texture->get_context(), cached_texture->get_format_type(), scale_x, scale_y, cached_texture->get_image_type() }; } } @@ -2178,7 +2195,7 @@ namespace rsx } return { last->get_raw_texture(), deferred_request_command::copy_image_static, texaddr, gcm_format, 0, 0, - tex_width, tex_height, 1, 0, last->get_context(), is_depth, + tex_width, tex_height, 1, 0, last->get_context(), last->get_format_type(), scale_x, scale_y, extended_dimension, tex.decoded_remap() }; } } @@ -2235,14 +2252,19 @@ namespace rsx const bool is_swizzled = !(tex.format() & CELL_GCM_TEXTURE_LN); auto subresources_layout = get_subresources_layout(tex); - bool is_depth_format = false; + rsx::format_type format_class; switch (format) { + default: + format_class = rsx::format_type::color; + break; case CELL_GCM_TEXTURE_DEPTH16: - case CELL_GCM_TEXTURE_DEPTH16_FLOAT: case CELL_GCM_TEXTURE_DEPTH24_D8: + format_class = rsx::format_type::depth_uint; + break; case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: - is_depth_format = true; + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + format_class = rsx::format_type::depth_float; break; } @@ -2254,7 +2276,7 @@ namespace rsx //NOTE: SRGB correction is to be handled in the fragment shader; upload as linear RGB return{ upload_image_from_cpu(cmd, tex_range, tex_width, tex_height, depth, tex.get_exact_mipmap_count(), tex_pitch, format, texture_upload_context::shader_read, subresources_layout, extended_dimension, is_swizzled)->get_view(tex.remap(), tex.decoded_remap()), - texture_upload_context::shader_read, is_depth_format, scale_x, scale_y, extended_dimension }; + texture_upload_context::shader_read, format_class, scale_x, scale_y, extended_dimension }; } template diff --git a/rpcs3/Emu/RSX/Common/texture_cache_utils.h b/rpcs3/Emu/RSX/Common/texture_cache_utils.h index 5fc81313f8..0f91377648 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache_utils.h +++ b/rpcs3/Emu/RSX/Common/texture_cache_utils.h @@ -1621,6 +1621,23 @@ namespace rsx return sync_timestamp; } + format_type get_format_type() const + { + switch (gcm_format) + { + default: + return format_type::color; + case CELL_GCM_TEXTURE_DEPTH16: + case CELL_GCM_TEXTURE_DEPTH24_D8: + return format_type::depth_uint; + case CELL_GCM_TEXTURE_DEPTH16_FLOAT: + case CELL_GCM_TEXTURE_DEPTH24_D8_FLOAT: + return format_type::depth_float; + case 0: + fmt::throw_exception("Unreachable" HERE); + } + } + /** * Comparison */ diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index c174e6bc86..ac519dea2e 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -215,6 +215,7 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk { auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); depth_stencil_target = ds->id(); + ds->set_depth_render_mode(!layout.depth_float); verify("Pitch mismatch!" HERE), std::get<1>(m_rtts.m_bound_depth_stencil)->get_rsx_pitch() == layout.actual_zeta_pitch; @@ -223,8 +224,10 @@ void GLGSRender::init_buffers(rsx::framebuffer_creation_context context, bool sk m_depth_surface_info.width = layout.width; m_depth_surface_info.height = layout.height; m_depth_surface_info.depth_format = layout.depth_format; + m_depth_surface_info.depth_buffer_float = layout.depth_float; m_depth_surface_info.bpp = (layout.depth_format == rsx::surface_depth_format::z16? 2 : 4); m_depth_surface_info.samples = samples; + m_gl_texture_cache.notify_surface_changed(m_depth_surface_info.get_memory_range(layout.aa_factors)); } else diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 3cfb338302..1012df95ba 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -1049,6 +1049,7 @@ namespace rsx layout.color_format = rsx::method_registers.surface_color(); layout.depth_format = rsx::method_registers.surface_depth_fmt(); + layout.depth_float = rsx::method_registers.depth_buffer_float_enabled(); layout.target = rsx::method_registers.surface_color_target(); const auto aa_mode = rsx::method_registers.surface_antialias(); @@ -1259,6 +1260,7 @@ namespace rsx { if (m_surface_info[i].width != layout.width || m_surface_info[i].height != layout.height || + m_surface_info[i].color_format != layout.color_format || m_surface_info[i].samples != sample_count) { really_changed = true; @@ -1270,6 +1272,8 @@ namespace rsx if (!really_changed) { if (layout.zeta_address == m_depth_surface_info.address && + layout.depth_format == m_depth_surface_info.depth_format && + layout.depth_float == m_depth_surface_info.depth_buffer_float && sample_count == m_depth_surface_info.samples) { // Same target is reused @@ -1650,7 +1654,7 @@ namespace rsx if (raw_format & CELL_GCM_TEXTURE_UN) result.unnormalized_coords |= (1 << i); - if (sampler_descriptors[i]->is_depth_texture) + if (sampler_descriptors[i]->format_class != format_type::color) { switch (format) { @@ -1664,9 +1668,10 @@ namespace rsx { // Reading depth data as XRGB8 is supported with in-shader conversion // TODO: Optionally add support for 16-bit formats (not necessary since type casts are easy with that) - u32 remap = tex.remap(); + u32 control_bits = sampler_descriptors[i]->format_class == format_type::depth_float? (1u << 16) : 0u; + control_bits |= tex.remap() & 0xFFFF; result.redirected_textures |= (1 << i); - result.texture_scale[i][2] = std::bit_cast(remap); + result.texture_scale[i][2] = std::bit_cast(control_bits); break; } case CELL_GCM_TEXTURE_DEPTH16: diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index d8426be35b..082799ae40 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -316,6 +316,7 @@ namespace rsx rsx::surface_depth_format depth_format; rsx::surface_antialiasing aa_mode; u32 aa_factors[2]; + bool depth_float; bool ignore_change; }; diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 93f2882122..29402b7554 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -1309,7 +1309,7 @@ void VKGSRender::end() // Check if non-point filtering can even be used on this format bool can_sample_linear; - if (LIKELY(!sampler_state->is_depth_texture)) + if (LIKELY(sampler_state->format_class == rsx::format_type::color)) { // Most PS3-like formats can be linearly filtered without problem can_sample_linear = true; @@ -2887,6 +2887,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) m_depth_surface_info.width = layout.width; m_depth_surface_info.height = layout.height; m_depth_surface_info.depth_format = layout.depth_format; + m_depth_surface_info.depth_buffer_float = layout.depth_float; m_depth_surface_info.bpp = (layout.depth_format == rsx::surface_depth_format::z16? 2 : 4); m_depth_surface_info.samples = samples; } @@ -2914,6 +2915,7 @@ void VKGSRender::prepare_rtts(rsx::framebuffer_creation_context context) if (std::get<0>(m_rtts.m_bound_depth_stencil) != 0) { auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); + ds->set_depth_render_mode(!layout.depth_float); m_fbo_images.push_back(ds); m_depth_surface_info.address = layout.zeta_address; diff --git a/rpcs3/Emu/RSX/rsx_decode.h b/rpcs3/Emu/RSX/rsx_decode.h index 256052a141..984c54c78b 100644 --- a/rpcs3/Emu/RSX/rsx_decode.h +++ b/rpcs3/Emu/RSX/rsx_decode.h @@ -4185,6 +4185,29 @@ struct registers_decoder } }; +template <> +struct registers_decoder +{ + struct decoded_type + { + private: + u32 value; + + public: + decoded_type(u32 value) : value(value) {} + + bool depth_float() const + { + return bf_decoder<12, 1>(value) != 0; + } + }; + + static std::string dump(decoded_type&& decoded_values) + { + return "Depth float enabled: " + decoded_values.depth_float() ? "true" : "false"; + } +}; + #define TRANSFORM_PROGRAM(index) template<> struct registers_decoder : public transform_program_helper {}; #define DECLARE_TRANSFORM_PROGRAM(index) NV4097_SET_TRANSFORM_PROGRAM + index, EXPAND_RANGE_512(0, TRANSFORM_PROGRAM) diff --git a/rpcs3/Emu/RSX/rsx_methods.h b/rpcs3/Emu/RSX/rsx_methods.h index ea30389980..ebe36a228e 100644 --- a/rpcs3/Emu/RSX/rsx_methods.h +++ b/rpcs3/Emu/RSX/rsx_methods.h @@ -1619,6 +1619,11 @@ namespace rsx { return decode().srgb_output_enabled(); } + + bool depth_buffer_float_enabled() + { + return decode().depth_float(); + } }; extern rsx_state method_registers; diff --git a/rpcs3/Emu/RSX/rsx_utils.h b/rpcs3/Emu/RSX/rsx_utils.h index f97f91b49c..2a011e163a 100644 --- a/rpcs3/Emu/RSX/rsx_utils.h +++ b/rpcs3/Emu/RSX/rsx_utils.h @@ -111,6 +111,7 @@ namespace rsx rsx::surface_color_format color_format; rsx::surface_depth_format depth_format; + bool depth_buffer_float; u16 width = 0; u16 height = 0;