From 27aeaf66bc0be26ef8eee60208513b25ee6431d6 Mon Sep 17 00:00:00 2001 From: kd-11 Date: Tue, 20 Aug 2019 21:01:27 +0300 Subject: [PATCH] gl: Restructure buffer objects to give more control over usage - This allows creating buffers with no MAP bits set which should ensure they are created for VRAM usage only - TODO: Implement compute kernels to avoid software fallback mode for pack/unpack operations --- rpcs3/Emu/RSX/Common/texture_cache.h | 15 -- rpcs3/Emu/RSX/GL/GLGSRender.cpp | 2 +- rpcs3/Emu/RSX/GL/GLGSRender.h | 219 --------------------------- rpcs3/Emu/RSX/GL/GLHelpers.h | 150 +++++++++--------- rpcs3/Emu/RSX/GL/GLRenderTargets.cpp | 129 +--------------- rpcs3/Emu/RSX/GL/GLTexture.cpp | 5 +- 6 files changed, 84 insertions(+), 436 deletions(-) diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index de0665f444..92e5858393 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -1337,21 +1337,6 @@ namespace rsx } public: - template - bool load_memory_from_cache(const address_range &memory_range, Args&&... extras) - { - reader_lock lock(m_cache_mutex); - section_storage_type *region = find_flushable_section(memory_range); - - if (region && !region->is_dirty()) - { - region->fill_texture(std::forward(extras)...); - return true; - } - - //No valid object found in cache - return false; - } template thrashed_set invalidate_address(commandbuffer_type& cmd, u32 address, invalidation_cause cause, Args&&... extras) diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.cpp b/rpcs3/Emu/RSX/GL/GLGSRender.cpp index 51d00338a9..eb301340d6 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.cpp +++ b/rpcs3/Emu/RSX/GL/GLGSRender.cpp @@ -820,7 +820,7 @@ void GLGSRender::on_init_thread() if (gl_caps.vendor_AMD) { m_identity_index_buffer = std::make_unique(); - m_identity_index_buffer->create(gl::buffer::target::element_array, 1 * 0x100000); + m_identity_index_buffer->create(gl::buffer::target::element_array, 1 * 0x100000, nullptr, gl::buffer::memory_type::host_visible); // Initialize with 256k identity entries auto *dst = (u32*)m_identity_index_buffer->map(gl::buffer::access::write); diff --git a/rpcs3/Emu/RSX/GL/GLGSRender.h b/rpcs3/Emu/RSX/GL/GLGSRender.h index 8641d7c660..4de1004715 100644 --- a/rpcs3/Emu/RSX/GL/GLGSRender.h +++ b/rpcs3/Emu/RSX/GL/GLGSRender.h @@ -53,225 +53,6 @@ struct work_item } }; -struct driver_state -{ - const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001; - const u32 DEPTH_BOUNDS_MAX = 0xFFFF0002; - const u32 DEPTH_RANGE_MIN = 0xFFFF0003; - const u32 DEPTH_RANGE_MAX = 0xFFFF0004; - - std::unordered_map properties = {}; - std::unordered_map> indexed_properties = {}; - - bool enable(u32 test, GLenum cap) - { - auto found = properties.find(cap); - if (found != properties.end() && found->second == test) - return !!test; - - properties[cap] = test; - - if (test) - glEnable(cap); - else - glDisable(cap); - - return !!test; - } - - bool enablei(u32 test, GLenum cap, u32 index) - { - auto found = indexed_properties.find(cap); - const bool exists = found != indexed_properties.end(); - - if (!exists) - { - indexed_properties[cap] = {}; - indexed_properties[cap][index] = test; - } - else - { - if (found->second[index] == test) - return !!test; - - found->second[index] = test; - } - - if (test) - glEnablei(cap, index); - else - glDisablei(cap, index); - - return !!test; - } - - inline bool test_property(GLenum property, u32 test) const - { - auto found = properties.find(property); - if (found == properties.end()) - return false; - - return (found->second == test); - } - - void depth_func(GLenum func) - { - if (!test_property(GL_DEPTH_FUNC, func)) - { - glDepthFunc(func); - properties[GL_DEPTH_FUNC] = func; - } - } - - void depth_mask(GLboolean mask) - { - if (!test_property(GL_DEPTH_WRITEMASK, mask)) - { - glDepthMask(mask); - properties[GL_DEPTH_WRITEMASK] = mask; - } - } - - void clear_depth(GLfloat depth) - { - u32 value = std::bit_cast(depth); - if (!test_property(GL_DEPTH_CLEAR_VALUE, value)) - { - glClearDepth(depth); - properties[GL_DEPTH_CLEAR_VALUE] = value; - } - } - - void stencil_mask(GLuint mask) - { - if (!test_property(GL_STENCIL_WRITEMASK, mask)) - { - glStencilMask(mask); - properties[GL_STENCIL_WRITEMASK] = mask; - } - } - - void clear_stencil(GLint stencil) - { - u32 value = std::bit_cast(stencil); - if (!test_property(GL_STENCIL_CLEAR_VALUE, value)) - { - glClearStencil(stencil); - properties[GL_STENCIL_CLEAR_VALUE] = value; - } - } - - void color_mask(u32 mask) - { - if (!test_property(GL_COLOR_WRITEMASK, mask)) - { - glColorMask(((mask & 0x10) ? 1 : 0), ((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0)); - properties[GL_COLOR_WRITEMASK] = mask; - } - } - - void color_mask(bool r, bool g, bool b, bool a) - { - u32 mask = 0; - if (r) mask |= 0x10; - if (g) mask |= 0x20; - if (b) mask |= 0x40; - if (a) mask |= 0x80; - - color_mask(mask); - } - - void clear_color(u8 r, u8 g, u8 b, u8 a) - { - u32 value = (u32)r | (u32)g << 8 | (u32)b << 16 | (u32)a << 24; - if (!test_property(GL_COLOR_CLEAR_VALUE, value)) - { - glClearColor(r / 255.f, g / 255.f, b / 255.f, a / 255.f); - properties[GL_COLOR_CLEAR_VALUE] = value; - } - } - - void depth_bounds(float min, float max) - { - u32 depth_min = std::bit_cast(min); - u32 depth_max = std::bit_cast(max); - - if (!test_property(DEPTH_BOUNDS_MIN, depth_min) || !test_property(DEPTH_BOUNDS_MAX, depth_max)) - { - glDepthBoundsEXT(min, max); - - properties[DEPTH_BOUNDS_MIN] = depth_min; - properties[DEPTH_BOUNDS_MAX] = depth_max; - } - } - - void depth_range(float min, float max) - { - u32 depth_min = std::bit_cast(min); - u32 depth_max = std::bit_cast(max); - - if (!test_property(DEPTH_RANGE_MIN, depth_min) || !test_property(DEPTH_RANGE_MAX, depth_max)) - { - glDepthRange(min, max); - - properties[DEPTH_RANGE_MIN] = depth_min; - properties[DEPTH_RANGE_MAX] = depth_max; - } - } - - void logic_op(GLenum op) - { - if (!test_property(GL_COLOR_LOGIC_OP, op)) - { - glLogicOp(op); - properties[GL_COLOR_LOGIC_OP] = op; - } - } - - void line_width(GLfloat width) - { - u32 value = std::bit_cast(width); - - if (!test_property(GL_LINE_WIDTH, value)) - { - glLineWidth(width); - properties[GL_LINE_WIDTH] = value; - } - } - - void front_face(GLenum face) - { - if (!test_property(GL_FRONT_FACE, face)) - { - glFrontFace(face); - properties[GL_FRONT_FACE] = face; - } - } - - void cull_face(GLenum mode) - { - if (!test_property(GL_CULL_FACE_MODE, mode)) - { - glCullFace(mode); - properties[GL_CULL_FACE_MODE] = mode; - } - } - - void polygon_offset(float factor, float units) - { - u32 _units = std::bit_cast(units); - u32 _factor = std::bit_cast(factor); - - if (!test_property(GL_POLYGON_OFFSET_UNITS, _units) || !test_property(GL_POLYGON_OFFSET_FACTOR, _factor)) - { - glPolygonOffset(factor, units); - - properties[GL_POLYGON_OFFSET_UNITS] = _units; - properties[GL_POLYGON_OFFSET_FACTOR] = _factor; - } - } -}; - class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control { private: diff --git a/rpcs3/Emu/RSX/GL/GLHelpers.h b/rpcs3/Emu/RSX/GL/GLHelpers.h index 551984405b..974009fc4f 100644 --- a/rpcs3/Emu/RSX/GL/GLHelpers.h +++ b/rpcs3/Emu/RSX/GL/GLHelpers.h @@ -688,6 +688,7 @@ namespace gl uniform = GL_UNIFORM_BUFFER, texture = GL_TEXTURE_BUFFER }; + enum class access { read = GL_READ_ONLY, @@ -695,25 +696,12 @@ namespace gl read_write = GL_READ_WRITE }; - protected: - GLuint m_id = GL_NONE; - GLsizeiptr m_size = 0; - target m_target = target::array; - - public: - buffer() = default; - buffer(const buffer&) = delete; - - buffer(GLuint id) + enum class memory_type { - set_id(id); - } - - ~buffer() - { - if (created()) - remove(); - } + undefined = 0, + local = 1, + host_visible = 2 + }; class save_binding_state { @@ -750,6 +738,65 @@ namespace gl } }; + protected: + GLuint m_id = GL_NONE; + GLsizeiptr m_size = 0; + target m_target = target::array; + memory_type m_memory_type = memory_type::undefined; + + void allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage) + { + if (get_driver_caps().ARB_buffer_storage_supported) + { + target target_ = current_target(); + save_binding_state save(target_, *this); + GLenum flags = 0; + + if (type == memory_type::host_visible) + { + switch (usage) + { + case GL_STREAM_DRAW: + case GL_STATIC_DRAW: + case GL_DYNAMIC_DRAW: + flags |= GL_MAP_WRITE_BIT; + break; + case GL_STREAM_READ: + case GL_STATIC_READ: + case GL_DYNAMIC_READ: + flags |= GL_MAP_READ_BIT; + break; + default: + fmt::throw_exception("Unsupported buffer usage 0x%x", usage); + } + } + + glBufferStorage((GLenum)target_, size, data_, flags); + m_size = size; + } + else + { + data(size, data_, usage); + } + + m_memory_type = type; + } + + public: + buffer() = default; + buffer(const buffer&) = delete; + + buffer(GLuint id) + { + set_id(id); + } + + ~buffer() + { + if (created()) + remove(); + } + void recreate() { if (created()) @@ -775,32 +822,17 @@ namespace gl glGenBuffers(1, &m_id); } - void create(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW) + void create(GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW) { create(); - data(size, data_, usage); + allocate(size, data_, type, usage); } - void create(target target_, GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW) + void create(target target_, GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW) { create(); m_target = target_; - data(size, data_, usage); - } - - void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW) - { - target target_ = current_target(); - save_binding_state save(target_, *this); - glBufferData((GLenum)target_, size, data_, usage); - m_size = size; - } - - void sub_data(GLintptr offset, GLsizeiptr size, const void* data_ = nullptr) - { - target target_ = current_target(); - save_binding_state save(target_, *this); - glBufferSubData((GLenum)target_, offset, size, data_); + allocate(size, data_, type, usage); } void bind(target target_) const @@ -849,50 +881,27 @@ namespace gl return created(); } - void map(const std::function& impl, access access_) + void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW) { + verify(HERE), m_memory_type == memory_type::undefined; + target target_ = current_target(); save_binding_state save(target_, *this); - - if (GLubyte* ptr = (GLubyte*)glMapBuffer((GLenum)target_, (GLenum)access_)) - { - impl(ptr); - glUnmapBuffer((GLenum)target_); - } + glBufferData((GLenum)target_, size, data_, usage); + m_size = size; } - class mapper - { - buffer *m_parent; - GLubyte *m_data; - - public: - mapper(buffer& parent, access access_) - { - m_parent = &parent; - m_data = parent.map(access_); - } - - ~mapper() - { - m_parent->unmap(); - } - - GLubyte* get() const - { - return m_data; - } - }; - GLubyte* map(access access_) { - bind(current_target()); + verify(HERE), m_memory_type == memory_type::host_visible; + bind(current_target()); return (GLubyte*)glMapBuffer((GLenum)current_target(), (GLenum)access_); } void unmap() { + verify(HERE), m_memory_type == memory_type::host_visible; glUnmapBuffer((GLenum)current_target()); } }; @@ -1010,8 +1019,9 @@ namespace gl remove(); buffer::create(); - buffer::data(size, data); + buffer::data(size, data, GL_DYNAMIC_DRAW); + m_memory_type = memory_type::host_visible; m_memory_mapping = nullptr; m_data_loc = 0; m_size = ::narrow(size); @@ -1034,7 +1044,7 @@ namespace gl if ((offset + block_size) > m_size) { - buffer::data(m_size, nullptr); + buffer::data(m_size, nullptr, GL_DYNAMIC_DRAW); m_data_loc = 0; } diff --git a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp index ac519dea2e..8c568db1eb 100644 --- a/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp +++ b/rpcs3/Emu/RSX/GL/GLRenderTargets.cpp @@ -426,134 +426,7 @@ std::array, 2> GLGSRender::copy_depth_stencil_buffer_to_m void GLGSRender::read_buffers() { - if (!m_draw_fbo) - return; - - glDisable(GL_STENCIL_TEST); - - if (g_cfg.video.read_color_buffers) - { - auto color_format = rsx::internals::surface_color_format_to_gl(rsx::method_registers.surface_color()); - - auto read_color_buffers = [&](int index, int count) - { - const u32 width = rsx::method_registers.surface_clip_width(); - const u32 height = rsx::method_registers.surface_clip_height(); - - const std::array offsets = get_offsets(); - const std::array locations = get_locations(); - const std::array pitchs = get_pitchs(); - - for (int i = index; i < index + count; ++i) - { - const u32 offset = offsets[i]; - const u32 location = locations[i]; - const u32 pitch = pitchs[i]; - - if (!m_surface_info[i].pitch) - continue; - - rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf); - u32 texaddr = vm::get_addr(color_buffer.ptr); - - const utils::address_range range = utils::address_range::start_length(texaddr, pitch * height); - bool success = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_render_targets[i])); - - //Fall back to slower methods if the image could not be fetched from cache. - if (!success) - { - if (!color_buffer.tile) - { - std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(color_buffer.ptr, color_format.format, color_format.type); - } - else - { - std::unique_ptr buffer(new u8[pitch * height]); - color_buffer.read(buffer.get(), width, height, pitch); - - std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(buffer.get(), color_format.format, color_format.type); - } - } - } - }; - - switch (rsx::method_registers.surface_color_target()) - { - case rsx::surface_target::none: - break; - - case rsx::surface_target::surface_a: - read_color_buffers(0, 1); - break; - - case rsx::surface_target::surface_b: - read_color_buffers(1, 1); - break; - - case rsx::surface_target::surfaces_a_b: - read_color_buffers(0, 2); - break; - - case rsx::surface_target::surfaces_a_b_c: - read_color_buffers(0, 3); - break; - - case rsx::surface_target::surfaces_a_b_c_d: - read_color_buffers(0, 4); - break; - } - } - - if (g_cfg.video.read_depth_buffer) - { - //TODO: use pitch - const u32 pitch = m_depth_surface_info.pitch; - const u32 width = rsx::method_registers.surface_clip_width(); - const u32 height = rsx::method_registers.surface_clip_height(); - - if (!pitch) - return; - - const u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma()); - const utils::address_range range = utils::address_range::start_length(depth_address, pitch * height); - bool in_cache = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_depth_stencil)); - - if (in_cache) - return; - - //Read failed. Fall back to slow s/w path... - - auto depth_format = rsx::internals::surface_depth_format_to_gl(rsx::method_registers.surface_depth_fmt()); - int pixel_size = rsx::internals::get_pixel_size(rsx::method_registers.surface_depth_fmt()); - gl::buffer pbo_depth; - - pbo_depth.create(width * height * pixel_size); - pbo_depth.map([&](GLubyte* pixels) - { - u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma()); - - if (rsx::method_registers.surface_depth_fmt() == rsx::surface_depth_format::z16) - { - u16 *dst = (u16*)pixels; - const be_t* src = vm::_ptr(depth_address); - for (int i = 0, end = std::get<1>(m_rtts.m_bound_depth_stencil)->width() * std::get<1>(m_rtts.m_bound_depth_stencil)->height(); i < end; ++i) - { - dst[i] = src[i]; - } - } - else - { - u32 *dst = (u32*)pixels; - const be_t* src = vm::_ptr(depth_address); - for (int i = 0, end = std::get<1>(m_rtts.m_bound_depth_stencil)->width() * std::get<1>(m_rtts.m_bound_depth_stencil)->height(); i < end; ++i) - { - dst[i] = src[i]; - } - } - }, gl::buffer::access::write); - - std::get<1>(m_rtts.m_bound_depth_stencil)->copy_from(pbo_depth, depth_format.format, depth_format.type); - } + // TODO } void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init) diff --git a/rpcs3/Emu/RSX/GL/GLTexture.cpp b/rpcs3/Emu/RSX/GL/GLTexture.cpp index 72bc6003b8..e060ce0660 100644 --- a/rpcs3/Emu/RSX/GL/GLTexture.cpp +++ b/rpcs3/Emu/RSX/GL/GLTexture.cpp @@ -738,7 +738,7 @@ namespace gl if (!g_typeless_transfer_buffer || max_mem > g_typeless_transfer_buffer.size()) { if (g_typeless_transfer_buffer) g_typeless_transfer_buffer.remove(); - g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, buffer_copy_flag); + g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, buffer_copy_flag); } auto format_type = get_format_type(src->get_internal_format()); @@ -746,14 +746,13 @@ namespace gl pack_settings.swap_bytes(std::get<2>(format_type)); g_typeless_transfer_buffer.bind(buffer::target::pixel_pack); src->copy_to(nullptr, (texture::format)std::get<0>(format_type), (texture::type)std::get<1>(format_type), pack_settings); + glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); format_type = get_format_type(dst->get_internal_format()); pixel_unpack_settings unpack_settings{}; unpack_settings.swap_bytes(std::get<2>(format_type)); g_typeless_transfer_buffer.bind(buffer::target::pixel_unpack); dst->copy_from(nullptr, (texture::format)std::get<0>(format_type), (texture::type)std::get<1>(format_type), unpack_settings); - - glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE); glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE); } }