gl: Restructure buffer objects to give more control over usage

- This allows creating buffers with no MAP bits set which should ensure they are created for VRAM usage only
- TODO: Implement compute kernels to avoid software fallback mode for pack/unpack operations
This commit is contained in:
kd-11 2019-08-20 21:01:27 +03:00 committed by kd-11
parent 9672ad53cd
commit 27aeaf66bc
6 changed files with 84 additions and 436 deletions

View File

@ -1337,21 +1337,6 @@ namespace rsx
}
public:
template <typename ...Args>
bool load_memory_from_cache(const address_range &memory_range, Args&&... extras)
{
reader_lock lock(m_cache_mutex);
section_storage_type *region = find_flushable_section(memory_range);
if (region && !region->is_dirty())
{
region->fill_texture(std::forward<Args>(extras)...);
return true;
}
//No valid object found in cache
return false;
}
template <typename ...Args>
thrashed_set invalidate_address(commandbuffer_type& cmd, u32 address, invalidation_cause cause, Args&&... extras)

View File

@ -820,7 +820,7 @@ void GLGSRender::on_init_thread()
if (gl_caps.vendor_AMD)
{
m_identity_index_buffer = std::make_unique<gl::buffer>();
m_identity_index_buffer->create(gl::buffer::target::element_array, 1 * 0x100000);
m_identity_index_buffer->create(gl::buffer::target::element_array, 1 * 0x100000, nullptr, gl::buffer::memory_type::host_visible);
// Initialize with 256k identity entries
auto *dst = (u32*)m_identity_index_buffer->map(gl::buffer::access::write);

View File

@ -53,225 +53,6 @@ struct work_item
}
};
struct driver_state
{
const u32 DEPTH_BOUNDS_MIN = 0xFFFF0001;
const u32 DEPTH_BOUNDS_MAX = 0xFFFF0002;
const u32 DEPTH_RANGE_MIN = 0xFFFF0003;
const u32 DEPTH_RANGE_MAX = 0xFFFF0004;
std::unordered_map<GLenum, u32> properties = {};
std::unordered_map<GLenum, std::array<u32, 4>> indexed_properties = {};
bool enable(u32 test, GLenum cap)
{
auto found = properties.find(cap);
if (found != properties.end() && found->second == test)
return !!test;
properties[cap] = test;
if (test)
glEnable(cap);
else
glDisable(cap);
return !!test;
}
bool enablei(u32 test, GLenum cap, u32 index)
{
auto found = indexed_properties.find(cap);
const bool exists = found != indexed_properties.end();
if (!exists)
{
indexed_properties[cap] = {};
indexed_properties[cap][index] = test;
}
else
{
if (found->second[index] == test)
return !!test;
found->second[index] = test;
}
if (test)
glEnablei(cap, index);
else
glDisablei(cap, index);
return !!test;
}
inline bool test_property(GLenum property, u32 test) const
{
auto found = properties.find(property);
if (found == properties.end())
return false;
return (found->second == test);
}
void depth_func(GLenum func)
{
if (!test_property(GL_DEPTH_FUNC, func))
{
glDepthFunc(func);
properties[GL_DEPTH_FUNC] = func;
}
}
void depth_mask(GLboolean mask)
{
if (!test_property(GL_DEPTH_WRITEMASK, mask))
{
glDepthMask(mask);
properties[GL_DEPTH_WRITEMASK] = mask;
}
}
void clear_depth(GLfloat depth)
{
u32 value = std::bit_cast<u32>(depth);
if (!test_property(GL_DEPTH_CLEAR_VALUE, value))
{
glClearDepth(depth);
properties[GL_DEPTH_CLEAR_VALUE] = value;
}
}
void stencil_mask(GLuint mask)
{
if (!test_property(GL_STENCIL_WRITEMASK, mask))
{
glStencilMask(mask);
properties[GL_STENCIL_WRITEMASK] = mask;
}
}
void clear_stencil(GLint stencil)
{
u32 value = std::bit_cast<u32>(stencil);
if (!test_property(GL_STENCIL_CLEAR_VALUE, value))
{
glClearStencil(stencil);
properties[GL_STENCIL_CLEAR_VALUE] = value;
}
}
void color_mask(u32 mask)
{
if (!test_property(GL_COLOR_WRITEMASK, mask))
{
glColorMask(((mask & 0x10) ? 1 : 0), ((mask & 0x20) ? 1 : 0), ((mask & 0x40) ? 1 : 0), ((mask & 0x80) ? 1 : 0));
properties[GL_COLOR_WRITEMASK] = mask;
}
}
void color_mask(bool r, bool g, bool b, bool a)
{
u32 mask = 0;
if (r) mask |= 0x10;
if (g) mask |= 0x20;
if (b) mask |= 0x40;
if (a) mask |= 0x80;
color_mask(mask);
}
void clear_color(u8 r, u8 g, u8 b, u8 a)
{
u32 value = (u32)r | (u32)g << 8 | (u32)b << 16 | (u32)a << 24;
if (!test_property(GL_COLOR_CLEAR_VALUE, value))
{
glClearColor(r / 255.f, g / 255.f, b / 255.f, a / 255.f);
properties[GL_COLOR_CLEAR_VALUE] = value;
}
}
void depth_bounds(float min, float max)
{
u32 depth_min = std::bit_cast<u32>(min);
u32 depth_max = std::bit_cast<u32>(max);
if (!test_property(DEPTH_BOUNDS_MIN, depth_min) || !test_property(DEPTH_BOUNDS_MAX, depth_max))
{
glDepthBoundsEXT(min, max);
properties[DEPTH_BOUNDS_MIN] = depth_min;
properties[DEPTH_BOUNDS_MAX] = depth_max;
}
}
void depth_range(float min, float max)
{
u32 depth_min = std::bit_cast<u32>(min);
u32 depth_max = std::bit_cast<u32>(max);
if (!test_property(DEPTH_RANGE_MIN, depth_min) || !test_property(DEPTH_RANGE_MAX, depth_max))
{
glDepthRange(min, max);
properties[DEPTH_RANGE_MIN] = depth_min;
properties[DEPTH_RANGE_MAX] = depth_max;
}
}
void logic_op(GLenum op)
{
if (!test_property(GL_COLOR_LOGIC_OP, op))
{
glLogicOp(op);
properties[GL_COLOR_LOGIC_OP] = op;
}
}
void line_width(GLfloat width)
{
u32 value = std::bit_cast<u32>(width);
if (!test_property(GL_LINE_WIDTH, value))
{
glLineWidth(width);
properties[GL_LINE_WIDTH] = value;
}
}
void front_face(GLenum face)
{
if (!test_property(GL_FRONT_FACE, face))
{
glFrontFace(face);
properties[GL_FRONT_FACE] = face;
}
}
void cull_face(GLenum mode)
{
if (!test_property(GL_CULL_FACE_MODE, mode))
{
glCullFace(mode);
properties[GL_CULL_FACE_MODE] = mode;
}
}
void polygon_offset(float factor, float units)
{
u32 _units = std::bit_cast<u32>(units);
u32 _factor = std::bit_cast<u32>(factor);
if (!test_property(GL_POLYGON_OFFSET_UNITS, _units) || !test_property(GL_POLYGON_OFFSET_FACTOR, _factor))
{
glPolygonOffset(factor, units);
properties[GL_POLYGON_OFFSET_UNITS] = _units;
properties[GL_POLYGON_OFFSET_FACTOR] = _factor;
}
}
};
class GLGSRender : public GSRender, public ::rsx::reports::ZCULL_control
{
private:

View File

@ -688,6 +688,7 @@ namespace gl
uniform = GL_UNIFORM_BUFFER,
texture = GL_TEXTURE_BUFFER
};
enum class access
{
read = GL_READ_ONLY,
@ -695,25 +696,12 @@ namespace gl
read_write = GL_READ_WRITE
};
protected:
GLuint m_id = GL_NONE;
GLsizeiptr m_size = 0;
target m_target = target::array;
public:
buffer() = default;
buffer(const buffer&) = delete;
buffer(GLuint id)
enum class memory_type
{
set_id(id);
}
~buffer()
{
if (created())
remove();
}
undefined = 0,
local = 1,
host_visible = 2
};
class save_binding_state
{
@ -750,6 +738,65 @@ namespace gl
}
};
protected:
GLuint m_id = GL_NONE;
GLsizeiptr m_size = 0;
target m_target = target::array;
memory_type m_memory_type = memory_type::undefined;
void allocate(GLsizeiptr size, const void* data_, memory_type type, GLenum usage)
{
if (get_driver_caps().ARB_buffer_storage_supported)
{
target target_ = current_target();
save_binding_state save(target_, *this);
GLenum flags = 0;
if (type == memory_type::host_visible)
{
switch (usage)
{
case GL_STREAM_DRAW:
case GL_STATIC_DRAW:
case GL_DYNAMIC_DRAW:
flags |= GL_MAP_WRITE_BIT;
break;
case GL_STREAM_READ:
case GL_STATIC_READ:
case GL_DYNAMIC_READ:
flags |= GL_MAP_READ_BIT;
break;
default:
fmt::throw_exception("Unsupported buffer usage 0x%x", usage);
}
}
glBufferStorage((GLenum)target_, size, data_, flags);
m_size = size;
}
else
{
data(size, data_, usage);
}
m_memory_type = type;
}
public:
buffer() = default;
buffer(const buffer&) = delete;
buffer(GLuint id)
{
set_id(id);
}
~buffer()
{
if (created())
remove();
}
void recreate()
{
if (created())
@ -775,32 +822,17 @@ namespace gl
glGenBuffers(1, &m_id);
}
void create(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW)
void create(GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW)
{
create();
data(size, data_, usage);
allocate(size, data_, type, usage);
}
void create(target target_, GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW)
void create(target target_, GLsizeiptr size, const void* data_ = nullptr, memory_type type = memory_type::local, GLenum usage = GL_STREAM_DRAW)
{
create();
m_target = target_;
data(size, data_, usage);
}
void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW)
{
target target_ = current_target();
save_binding_state save(target_, *this);
glBufferData((GLenum)target_, size, data_, usage);
m_size = size;
}
void sub_data(GLintptr offset, GLsizeiptr size, const void* data_ = nullptr)
{
target target_ = current_target();
save_binding_state save(target_, *this);
glBufferSubData((GLenum)target_, offset, size, data_);
allocate(size, data_, type, usage);
}
void bind(target target_) const
@ -849,50 +881,27 @@ namespace gl
return created();
}
void map(const std::function<void(GLubyte*)>& impl, access access_)
void data(GLsizeiptr size, const void* data_ = nullptr, GLenum usage = GL_STREAM_DRAW)
{
verify(HERE), m_memory_type == memory_type::undefined;
target target_ = current_target();
save_binding_state save(target_, *this);
if (GLubyte* ptr = (GLubyte*)glMapBuffer((GLenum)target_, (GLenum)access_))
{
impl(ptr);
glUnmapBuffer((GLenum)target_);
glBufferData((GLenum)target_, size, data_, usage);
m_size = size;
}
}
class mapper
{
buffer *m_parent;
GLubyte *m_data;
public:
mapper(buffer& parent, access access_)
{
m_parent = &parent;
m_data = parent.map(access_);
}
~mapper()
{
m_parent->unmap();
}
GLubyte* get() const
{
return m_data;
}
};
GLubyte* map(access access_)
{
bind(current_target());
verify(HERE), m_memory_type == memory_type::host_visible;
bind(current_target());
return (GLubyte*)glMapBuffer((GLenum)current_target(), (GLenum)access_);
}
void unmap()
{
verify(HERE), m_memory_type == memory_type::host_visible;
glUnmapBuffer((GLenum)current_target());
}
};
@ -1010,8 +1019,9 @@ namespace gl
remove();
buffer::create();
buffer::data(size, data);
buffer::data(size, data, GL_DYNAMIC_DRAW);
m_memory_type = memory_type::host_visible;
m_memory_mapping = nullptr;
m_data_loc = 0;
m_size = ::narrow<u32>(size);
@ -1034,7 +1044,7 @@ namespace gl
if ((offset + block_size) > m_size)
{
buffer::data(m_size, nullptr);
buffer::data(m_size, nullptr, GL_DYNAMIC_DRAW);
m_data_loc = 0;
}

View File

@ -426,134 +426,7 @@ std::array<std::vector<gsl::byte>, 2> GLGSRender::copy_depth_stencil_buffer_to_m
void GLGSRender::read_buffers()
{
if (!m_draw_fbo)
return;
glDisable(GL_STENCIL_TEST);
if (g_cfg.video.read_color_buffers)
{
auto color_format = rsx::internals::surface_color_format_to_gl(rsx::method_registers.surface_color());
auto read_color_buffers = [&](int index, int count)
{
const u32 width = rsx::method_registers.surface_clip_width();
const u32 height = rsx::method_registers.surface_clip_height();
const std::array<u32, 4> offsets = get_offsets();
const std::array<u32, 4 > locations = get_locations();
const std::array<u32, 4 > pitchs = get_pitchs();
for (int i = index; i < index + count; ++i)
{
const u32 offset = offsets[i];
const u32 location = locations[i];
const u32 pitch = pitchs[i];
if (!m_surface_info[i].pitch)
continue;
rsx::tiled_region color_buffer = get_tiled_address(offset, location & 0xf);
u32 texaddr = vm::get_addr(color_buffer.ptr);
const utils::address_range range = utils::address_range::start_length(texaddr, pitch * height);
bool success = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_render_targets[i]));
//Fall back to slower methods if the image could not be fetched from cache.
if (!success)
{
if (!color_buffer.tile)
{
std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(color_buffer.ptr, color_format.format, color_format.type);
}
else
{
std::unique_ptr<u8[]> buffer(new u8[pitch * height]);
color_buffer.read(buffer.get(), width, height, pitch);
std::get<1>(m_rtts.m_bound_render_targets[i])->copy_from(buffer.get(), color_format.format, color_format.type);
}
}
}
};
switch (rsx::method_registers.surface_color_target())
{
case rsx::surface_target::none:
break;
case rsx::surface_target::surface_a:
read_color_buffers(0, 1);
break;
case rsx::surface_target::surface_b:
read_color_buffers(1, 1);
break;
case rsx::surface_target::surfaces_a_b:
read_color_buffers(0, 2);
break;
case rsx::surface_target::surfaces_a_b_c:
read_color_buffers(0, 3);
break;
case rsx::surface_target::surfaces_a_b_c_d:
read_color_buffers(0, 4);
break;
}
}
if (g_cfg.video.read_depth_buffer)
{
//TODO: use pitch
const u32 pitch = m_depth_surface_info.pitch;
const u32 width = rsx::method_registers.surface_clip_width();
const u32 height = rsx::method_registers.surface_clip_height();
if (!pitch)
return;
const u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
const utils::address_range range = utils::address_range::start_length(depth_address, pitch * height);
bool in_cache = m_gl_texture_cache.load_memory_from_cache(range, std::get<1>(m_rtts.m_bound_depth_stencil));
if (in_cache)
return;
//Read failed. Fall back to slow s/w path...
auto depth_format = rsx::internals::surface_depth_format_to_gl(rsx::method_registers.surface_depth_fmt());
int pixel_size = rsx::internals::get_pixel_size(rsx::method_registers.surface_depth_fmt());
gl::buffer pbo_depth;
pbo_depth.create(width * height * pixel_size);
pbo_depth.map([&](GLubyte* pixels)
{
u32 depth_address = rsx::get_address(rsx::method_registers.surface_z_offset(), rsx::method_registers.surface_z_dma());
if (rsx::method_registers.surface_depth_fmt() == rsx::surface_depth_format::z16)
{
u16 *dst = (u16*)pixels;
const be_t<u16>* src = vm::_ptr<u16>(depth_address);
for (int i = 0, end = std::get<1>(m_rtts.m_bound_depth_stencil)->width() * std::get<1>(m_rtts.m_bound_depth_stencil)->height(); i < end; ++i)
{
dst[i] = src[i];
}
}
else
{
u32 *dst = (u32*)pixels;
const be_t<u32>* src = vm::_ptr<u32>(depth_address);
for (int i = 0, end = std::get<1>(m_rtts.m_bound_depth_stencil)->width() * std::get<1>(m_rtts.m_bound_depth_stencil)->height(); i < end; ++i)
{
dst[i] = src[i];
}
}
}, gl::buffer::access::write);
std::get<1>(m_rtts.m_bound_depth_stencil)->copy_from(pbo_depth, depth_format.format, depth_format.type);
}
// TODO
}
void gl::render_target::memory_barrier(gl::command_context& cmd, bool force_init)

View File

@ -738,7 +738,7 @@ namespace gl
if (!g_typeless_transfer_buffer || max_mem > g_typeless_transfer_buffer.size())
{
if (g_typeless_transfer_buffer) g_typeless_transfer_buffer.remove();
g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, buffer_copy_flag);
g_typeless_transfer_buffer.create(buffer::target::pixel_pack, max_mem, nullptr, buffer::memory_type::local, buffer_copy_flag);
}
auto format_type = get_format_type(src->get_internal_format());
@ -746,14 +746,13 @@ namespace gl
pack_settings.swap_bytes(std::get<2>(format_type));
g_typeless_transfer_buffer.bind(buffer::target::pixel_pack);
src->copy_to(nullptr, (texture::format)std::get<0>(format_type), (texture::type)std::get<1>(format_type), pack_settings);
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
format_type = get_format_type(dst->get_internal_format());
pixel_unpack_settings unpack_settings{};
unpack_settings.swap_bytes(std::get<2>(format_type));
g_typeless_transfer_buffer.bind(buffer::target::pixel_unpack);
dst->copy_from(nullptr, (texture::format)std::get<0>(format_type), (texture::type)std::get<1>(format_type), unpack_settings);
glBindBuffer(GL_PIXEL_PACK_BUFFER, GL_NONE);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER, GL_NONE);
}
}