From 1fd33f611c694543c1c4a131a53cf5dc0235c202 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Fri, 10 Mar 2017 16:27:38 +0300 Subject: [PATCH] gl: Fix texture cache bugs Fix endianness bug Fix r/w when real pitch is <= 64 --- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 25 +++++++++--- rpcs3/Emu/RSX/GL/GLGSRender.h | 10 +++-- rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 58 ++++++++++++++++++++++------ rpcs3/Emu/RSX/GL/GLTextureCache.cpp | 4 +- rpcs3/Emu/RSX/GL/GLTextureCache.h | 12 ++++-- 5 files changed, 84 insertions(+), 25 deletions(-) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 55d6211c49..99777852cb 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -479,7 +479,6 @@ void GLGSRender::end() m_draw_calls++; - //LOG_WARNING(RSX, "Finished draw call, EID=%d", m_draw_calls); synchronize_buffers(); rsx::thread::end(); @@ -946,10 +945,26 @@ void GLGSRender::do_local_task() for (work_item& q: work_queue) { + if (q.processed) continue; + std::unique_lock lock(q.guard_mutex); - //Process this address - q.result = m_gl_texture_cache.flush_section(q.address_to_flush); + //Check if the suggested section is valid + if (!q.section_to_flush->is_flushed()) + { + q.section_to_flush->flush(); + q.result = true; + } + else + { + //TODO: Validate flushing requests before appending to queue. + //Highly unlikely that this path will be taken + LOG_ERROR(RSX, "Possible race condition for flush @address 0x%X", q.address_to_flush); + + //Process this address + q.result = m_gl_texture_cache.flush_section(q.address_to_flush); + } + q.processed = true; //Notify thread waiting on this @@ -958,13 +973,14 @@ void GLGSRender::do_local_task() } } -work_item& GLGSRender::post_flush_request(u32 address) +work_item& GLGSRender::post_flush_request(u32 address, gl::texture_cache::cached_rtt_section *section) { std::lock_guard lock(queue_guard); work_queue.emplace_back(); work_item &result = work_queue.back(); result.address_to_flush = address; + result.section_to_flush = section; return result; } @@ -972,7 +988,6 @@ void GLGSRender::synchronize_buffers() { if (flush_draw_buffers) { - //LOG_WARNING(RSX, "Flushing RTT buffers EID=%d", m_draw_calls); write_buffers(); flush_draw_buffers = false; } diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 4b05862475..c434edfc11 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -18,9 +18,11 @@ struct work_item std::mutex guard_mutex; u32 address_to_flush = 0; - bool processed = false; - bool result = false; - bool received = false; + gl::texture_cache::cached_rtt_section *section_to_flush = nullptr; + + volatile bool processed = false; + volatile bool result = false; + volatile bool received = false; }; struct gcm_buffer_info @@ -125,7 +127,7 @@ public: void set_viewport(); void synchronize_buffers(); - work_item& post_flush_request(u32 address); + work_item& post_flush_request(u32 address, gl::texture_cache::cached_rtt_section *section); protected: void begin() override; diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index f2a1591191..347f4d54c2 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -42,7 +42,7 @@ color_format rsx::internals::surface_color_format_to_gl(rsx::surface_color_forma return{ ::gl::texture::type::ubyte, ::gl::texture::format::rg, false, 2, 1 }; case rsx::surface_color_format::x32: - return{ ::gl::texture::type::f32, ::gl::texture::format::red, false, 1, 4 }; + return{ ::gl::texture::type::f32, ::gl::texture::format::red, true, 1, 4 }; default: LOG_ERROR(RSX, "Surface color buffer: Unsupported surface color format (0x%x)", (u32)color_format); @@ -160,7 +160,6 @@ void GLGSRender::init_buffers(bool skip_reading) } //We are about to change buffers, flush any pending requests for the old buffers - //LOG_WARNING(RSX, "Render targets have changed; checking for sync points (EID=%d)", m_draw_calls); synchronize_buffers(); m_rtts_dirty = false; @@ -189,6 +188,23 @@ void GLGSRender::init_buffers(bool skip_reading) std::get<1>(m_rtts.m_bound_render_targets[i])->set_rsx_pitch(pitchs[i]); surface_info[i] = { surface_addresses[i], pitchs[i], false, surface_format, depth_format, clip_horizontal, clip_vertical }; + + //Verify pitch given is correct if pitch <= 64 (especially 64) + if (pitchs[i] <= 64) + { + verify(HERE), pitchs[i] == 64; + + const u16 native_pitch = std::get<1>(m_rtts.m_bound_render_targets[i])->get_native_pitch(); + if (native_pitch > pitchs[i]) + { + LOG_WARNING(RSX, "Bad color surface pitch given: surface_width=%d, format=%d, pitch=%d, native_pitch=%d", + clip_horizontal, (u32)surface_format, pitchs[i], native_pitch); + + //Will not transfer this surface between cell and rsx due to misalignment + //TODO: Verify correct behaviour + surface_info[i].pitch = 0; + } + } } else surface_info[i] = {}; @@ -198,8 +214,26 @@ void GLGSRender::init_buffers(bool skip_reading) { __glcheck draw_fbo.depth = *std::get<1>(m_rtts.m_bound_depth_stencil); + const u32 depth_surface_pitch = rsx::method_registers.surface_z_pitch(); std::get<1>(m_rtts.m_bound_depth_stencil)->set_rsx_pitch(rsx::method_registers.surface_z_pitch()); - depth_surface_info = { depth_address, rsx::method_registers.surface_z_pitch(), true, surface_format, depth_format, clip_horizontal, clip_vertical }; + depth_surface_info = { depth_address, depth_surface_pitch, true, surface_format, depth_format, clip_horizontal, clip_vertical }; + + //Verify pitch given is correct if pitch <= 64 (especially 64) + if (depth_surface_pitch <= 64) + { + verify(HERE), depth_surface_pitch == 64; + + const u16 native_pitch = std::get<1>(m_rtts.m_bound_depth_stencil)->get_native_pitch(); + if (native_pitch > depth_surface_pitch) + { + LOG_WARNING(RSX, "Bad depth surface pitch given: surface_width=%d, format=%d, pitch=%d, native_pitch=%d", + clip_horizontal, (u32)depth_format, depth_surface_pitch, native_pitch); + + //Will not transfer this surface between cell and rsx due to misalignment + //TODO: Verify correct behaviour + depth_surface_info.pitch = 0; + } + } } else depth_surface_info = {}; @@ -247,17 +281,17 @@ void GLGSRender::init_buffers(bool skip_reading) for (u8 i = 0; i < rsx::limits::color_buffers_count; ++i) { - if (!surface_info[i].address || pitchs[i] <= 64) continue; + if (!surface_info[i].address || !surface_info[i].pitch) continue; const u32 range = surface_info[i].pitch * surface_info[i].height; m_gl_texture_cache.lock_rtt_region(surface_info[i].address, range, surface_info[i].width, surface_info[i].height, surface_info[i].pitch, - color_format.format, color_format.type, *std::get<1>(m_rtts.m_bound_render_targets[i])); + color_format.format, color_format.type, color_format.swap_bytes, *std::get<1>(m_rtts.m_bound_render_targets[i])); } } if (g_cfg_rsx_write_depth_buffer) { - if (depth_surface_info.address && rsx::method_registers.surface_z_pitch() > 64) + if (depth_surface_info.address && depth_surface_info.pitch) { auto depth_format_gl = rsx::internals::surface_depth_format_to_gl(depth_format); @@ -271,7 +305,7 @@ void GLGSRender::init_buffers(bool skip_reading) LOG_WARNING(RSX, "Depth surface pitch does not match computed pitch, %d vs %d", depth_surface_info.pitch, pitch); m_gl_texture_cache.lock_rtt_region(depth_surface_info.address, range, depth_surface_info.width, depth_surface_info.height, pitch, - depth_format_gl.format, depth_format_gl.type, *std::get<1>(m_rtts.m_bound_depth_stencil)); + depth_format_gl.format, depth_format_gl.type, true, *std::get<1>(m_rtts.m_bound_depth_stencil)); } } } @@ -316,7 +350,7 @@ void GLGSRender::read_buffers() u32 location = locations[i]; u32 pitch = pitchs[i]; - if (pitch <= 64) + if (!surface_info[i].pitch) continue; rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); @@ -375,9 +409,9 @@ void GLGSRender::read_buffers() if (g_cfg_rsx_read_depth_buffer) { //TODO: use pitch - u32 pitch = rsx::method_registers.surface_z_pitch(); + u32 pitch = depth_surface_info.pitch; - if (pitch <= 64) + if (!pitch) return; u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma()); @@ -432,7 +466,7 @@ void GLGSRender::write_buffers() { for (int i = index; i < index + count; ++i) { - if (surface_info[i].address == 0 || surface_info[i].pitch <= 64) + if (surface_info[i].pitch == 0) continue; /**Even tiles are loaded as whole textures during read_buffers from testing. @@ -451,7 +485,7 @@ void GLGSRender::write_buffers() if (g_cfg_rsx_write_depth_buffer) { //TODO: use pitch - if (!depth_surface_info.address || depth_surface_info.pitch <= 64) return; + if (depth_surface_info.pitch == 0) return; u32 range = depth_surface_info.width * depth_surface_info.height * 2; if (depth_surface_info.depth_format != rsx::surface_depth_format::z16) range *= 2; diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.cpp b/rpcs3/Emu/RSX/GL/GLTextureCache.cpp index f91088864f..5c523bcfa4 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.cpp +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.cpp @@ -14,6 +14,7 @@ namespace gl return false; bool post_task = false; + cached_rtt_section* section_to_post = nullptr; { std::lock_guard lock(m_section_mutex); @@ -35,6 +36,7 @@ namespace gl if (std::this_thread::get_id() != m_renderer_thread) { post_task = true; + section_to_post = &rtt; break; } @@ -47,7 +49,7 @@ namespace gl if (post_task) { //LOG_WARNING(RSX, "Cache access not from worker thread! address = 0x%X", address); - work_item &task = m_renderer->post_flush_request(address); + work_item &task = m_renderer->post_flush_request(address, section_to_post); { std::unique_lock lock(task.guard_mutex); diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index 8df1b616ec..60d9fe2f16 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -94,6 +94,7 @@ namespace gl texture::format format = texture::format::rgba; texture::type type = texture::type::ubyte; + bool pack_unpack_swap_bytes = false; u8 get_pixel_size(texture::format fmt_, texture::type type_) { @@ -268,10 +269,11 @@ namespace gl real_pitch = width * get_pixel_size(format, type); } - void set_format(texture::format gl_format, texture::type gl_type) + void set_format(texture::format gl_format, texture::type gl_type, bool swap_bytes) { format = gl_format; type = gl_type; + pack_unpack_swap_bytes = swap_bytes; real_pitch = current_width * get_pixel_size(format, type); } @@ -289,6 +291,7 @@ namespace gl return; } + glPixelStorei(GL_PACK_SWAP_BYTES, pack_unpack_swap_bytes); glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo_id); glGetTextureImageEXT(source_texture, GL_TEXTURE_2D, 0, (GLenum)format, (GLenum)type, nullptr); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); @@ -309,6 +312,7 @@ namespace gl u32 min_height = std::min((u32)tex.height(), current_height); tex.bind(); + glPixelStorei(GL_UNPACK_SWAP_BYTES, pack_unpack_swap_bytes); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_id); glTexSubImage2D((GLenum)tex.get_target(), 0, 0, 0, min_width, min_height, (GLenum)format, (GLenum)type, nullptr); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); @@ -341,7 +345,9 @@ namespace gl verify(HERE), data != nullptr; if (real_pitch >= current_pitch) + { memcpy(dst, data, cpu_address_range); + } else { //TODO: Use compression hint from the gcm tile information @@ -732,7 +738,7 @@ namespace gl region->copy_texture(); } - void lock_rtt_region(const u32 base, const u32 size, const u16 width, const u16 height, const u16 pitch, const texture::format format, const texture::type type, gl::texture &source) + void lock_rtt_region(const u32 base, const u32 size, const u16 width, const u16 height, const u16 pitch, const texture::format format, const texture::type type, const bool swap_bytes, gl::texture &source) { std::lock_guard lock(m_section_mutex); @@ -749,7 +755,7 @@ namespace gl } region->set_dimensions(width, height, pitch); - region->set_format(format, type); + region->set_format(format, type, swap_bytes); region->set_dirty(false); region->set_flushed(false); region->set_copied(false);