renderer_opengl: Implement a buffer cache.

The idea of this cache is to avoid redundant uploads. So we are going
to cache the uploaded buffers within the stream_buffer and just reuse
the old pointers.
The next step is to implement a VBO cache on GPU memory, but for now,
I want to check the overhead of the cache management. Fetching the
buffer over PCI-E should be quite fast.
This commit is contained in:
Markus Wick 2018-08-29 00:27:03 +02:00
parent a1ef02c3e6
commit 50a806ea67
5 changed files with 182 additions and 86 deletions

View File

@ -22,6 +22,7 @@ add_library(video_core STATIC
rasterizer_interface.h rasterizer_interface.h
renderer_base.cpp renderer_base.cpp
renderer_base.h renderer_base.h
renderer_opengl/gl_buffer_cache.cpp
renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.cpp
renderer_opengl/gl_rasterizer.h renderer_opengl/gl_rasterizer.h
renderer_opengl/gl_rasterizer_cache.cpp renderer_opengl/gl_rasterizer_cache.cpp

View File

@ -0,0 +1,90 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/alignment.h"
#include "common/assert.h"
#include "core/core.h"
#include "core/memory.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
namespace OpenGL {
OGLBufferCache::OGLBufferCache(size_t size) : stream_buffer(GL_ARRAY_BUFFER, size) {}
GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment,
bool cache) {
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
// Cache management is a big overhead, so only cache entries with a given size.
// TODO: Figure out which size is the best for given games.
cache &= size >= 2048;
if (cache) {
auto entry = TryGet(*cpu_addr);
if (entry) {
if (entry->size >= size && entry->alignment == alignment) {
return entry->offset;
}
Unregister(entry);
}
}
AlignBuffer(alignment);
GLintptr uploaded_offset = buffer_offset;
Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
buffer_ptr += size;
buffer_offset += size;
if (cache) {
auto entry = std::make_shared<CachedBufferEntry>();
entry->offset = uploaded_offset;
entry->size = size;
entry->alignment = alignment;
entry->addr = *cpu_addr;
Register(entry);
}
return uploaded_offset;
}
GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment) {
AlignBuffer(alignment);
std::memcpy(buffer_ptr, raw_pointer, size);
GLintptr uploaded_offset = buffer_offset;
buffer_ptr += size;
buffer_offset += size;
return uploaded_offset;
}
void OGLBufferCache::Map(size_t max_size) {
bool invalidate;
std::tie(buffer_ptr, buffer_offset_base, invalidate) =
stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
buffer_offset = buffer_offset_base;
if (invalidate) {
InvalidateAll();
}
}
void OGLBufferCache::Unmap() {
stream_buffer.Unmap(buffer_offset - buffer_offset_base);
}
GLuint OGLBufferCache::GetHandle() {
return stream_buffer.GetHandle();
}
void OGLBufferCache::AlignBuffer(size_t alignment) {
// Align the offset, not the mapped pointer
GLintptr offset_aligned =
static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment));
buffer_ptr += offset_aligned - buffer_offset;
buffer_offset = offset_aligned;
}
} // namespace OpenGL

View File

@ -0,0 +1,57 @@
// Copyright 2018 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include <memory>
#include <unordered_map>
#include "common/common_types.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
namespace OpenGL {
struct CachedBufferEntry final {
VAddr GetAddr() const {
return addr;
}
size_t GetSizeInBytes() const {
return size;
}
VAddr addr;
size_t size;
GLintptr offset;
size_t alignment;
};
class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
public:
OGLBufferCache(size_t size);
GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, size_t size, size_t alignment = 4,
bool cache = true);
GLintptr UploadHostMemory(const void* raw_pointer, size_t size, size_t alignment = 4);
void Map(size_t max_size);
void Unmap();
GLuint GetHandle();
protected:
void AlignBuffer(size_t alignment);
private:
OGLStreamBuffer stream_buffer;
u8* buffer_ptr;
GLintptr buffer_offset;
GLintptr buffer_offset_base;
};
} // namespace OpenGL

View File

@ -43,7 +43,7 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
: emu_window{window}, screen_info{info}, stream_buffer(GL_ARRAY_BUFFER, STREAM_BUFFER_SIZE) { : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) {
// Create sampler objects // Create sampler objects
for (size_t i = 0; i < texture_samplers.size(); ++i) { for (size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create(); texture_samplers[i].Create();
@ -83,14 +83,14 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
hw_vao.Create(); hw_vao.Create();
state.draw.vertex_buffer = stream_buffer.GetHandle(); state.draw.vertex_buffer = buffer_cache.GetHandle();
shader_program_manager = std::make_unique<GLShader::ProgramManager>(); shader_program_manager = std::make_unique<GLShader::ProgramManager>();
state.draw.shader_program = 0; state.draw.shader_program = 0;
state.draw.vertex_array = hw_vao.handle; state.draw.vertex_array = hw_vao.handle;
state.Apply(); state.Apply();
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, stream_buffer.GetHandle()); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer_cache.GetHandle());
glEnable(GL_BLEND); glEnable(GL_BLEND);
@ -101,14 +101,13 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
RasterizerOpenGL::~RasterizerOpenGL() {} RasterizerOpenGL::~RasterizerOpenGL() {}
std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, void RasterizerOpenGL::SetupVertexArrays() {
GLintptr buffer_offset) {
MICROPROFILE_SCOPE(OpenGL_VAO); MICROPROFILE_SCOPE(OpenGL_VAO);
const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
const auto& regs = gpu.regs; const auto& regs = gpu.regs;
state.draw.vertex_array = hw_vao.handle; state.draw.vertex_array = hw_vao.handle;
state.draw.vertex_buffer = stream_buffer.GetHandle(); state.draw.vertex_buffer = buffer_cache.GetHandle();
state.Apply(); state.Apply();
// Upload all guest vertex arrays sequentially to our buffer // Upload all guest vertex arrays sequentially to our buffer
@ -127,12 +126,10 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
ASSERT(end > start); ASSERT(end > start);
u64 size = end - start + 1; u64 size = end - start + 1;
GLintptr vertex_buffer_offset; GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size);
std::tie(array_ptr, buffer_offset, vertex_buffer_offset) =
UploadMemory(array_ptr, buffer_offset, start, size);
// Bind the vertex array to the buffer at the current offset. // Bind the vertex array to the buffer at the current offset.
glBindVertexBuffer(index, stream_buffer.GetHandle(), vertex_buffer_offset, glBindVertexBuffer(index, buffer_cache.GetHandle(), vertex_buffer_offset,
vertex_array.stride); vertex_array.stride);
if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
@ -177,11 +174,9 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
} }
glVertexAttribBinding(index, attrib.buffer); glVertexAttribBinding(index, attrib.buffer);
} }
return {array_ptr, buffer_offset};
} }
std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { void RasterizerOpenGL::SetupShaders() {
MICROPROFILE_SCOPE(OpenGL_Shader); MICROPROFILE_SCOPE(OpenGL_Shader);
auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
@ -199,21 +194,15 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr
continue; continue;
} }
std::tie(buffer_ptr, buffer_offset) =
AlignBuffer(buffer_ptr, buffer_offset, static_cast<size_t>(uniform_buffer_alignment));
const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5 const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
GLShader::MaxwellUniformData ubo{}; GLShader::MaxwellUniformData ubo{};
ubo.SetFromRegs(gpu.state.shader_stages[stage]); ubo.SetFromRegs(gpu.state.shader_stages[stage]);
std::memcpy(buffer_ptr, &ubo, sizeof(ubo)); GLintptr offset = buffer_cache.UploadHostMemory(
&ubo, sizeof(ubo), static_cast<size_t>(uniform_buffer_alignment));
// Bind the buffer // Bind the buffer
glBindBufferRange(GL_UNIFORM_BUFFER, stage, stream_buffer.GetHandle(), buffer_offset, glBindBufferRange(GL_UNIFORM_BUFFER, stage, buffer_cache.GetHandle(), offset, sizeof(ubo));
sizeof(ubo));
buffer_ptr += sizeof(ubo);
buffer_offset += sizeof(ubo);
Shader shader{shader_cache.GetStageProgram(program)}; Shader shader{shader_cache.GetStageProgram(program)};
@ -234,9 +223,8 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr
} }
// Configure the const buffers for this shader stage. // Configure the const buffers for this shader stage.
std::tie(buffer_ptr, buffer_offset, current_constbuffer_bindpoint) = current_constbuffer_bindpoint = SetupConstBuffers(static_cast<Maxwell::ShaderStage>(stage),
SetupConstBuffers(buffer_ptr, buffer_offset, static_cast<Maxwell::ShaderStage>(stage), shader, current_constbuffer_bindpoint);
shader, current_constbuffer_bindpoint);
// Configure the textures for this shader stage. // Configure the textures for this shader stage.
current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader, current_texture_bindpoint = SetupTextures(static_cast<Maxwell::ShaderStage>(stage), shader,
@ -250,8 +238,6 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr
} }
shader_program_manager->UseTrivialGeometryShader(); shader_program_manager->UseTrivialGeometryShader();
return {buffer_ptr, buffer_offset};
} }
size_t RasterizerOpenGL::CalculateVertexArraysSize() const { size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@ -439,31 +425,6 @@ void RasterizerOpenGL::Clear() {
glClear(clear_mask); glClear(clear_mask);
} }
std::pair<u8*, GLintptr> RasterizerOpenGL::AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset,
size_t alignment) {
// Align the offset, not the mapped pointer
GLintptr offset_aligned =
static_cast<GLintptr>(Common::AlignUp(static_cast<size_t>(buffer_offset), alignment));
return {buffer_ptr + (offset_aligned - buffer_offset), offset_aligned};
}
std::tuple<u8*, GLintptr, GLintptr> RasterizerOpenGL::UploadMemory(u8* buffer_ptr,
GLintptr buffer_offset,
Tegra::GPUVAddr gpu_addr,
size_t size, size_t alignment) {
std::tie(buffer_ptr, buffer_offset) = AlignBuffer(buffer_ptr, buffer_offset, alignment);
GLintptr uploaded_offset = buffer_offset;
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)};
Memory::ReadBlock(*cpu_addr, buffer_ptr, size);
buffer_ptr += size;
buffer_offset += size;
return {buffer_ptr, buffer_offset, uploaded_offset};
}
void RasterizerOpenGL::DrawArrays() { void RasterizerOpenGL::DrawArrays() {
if (accelerate_draw == AccelDraw::Disabled) if (accelerate_draw == AccelDraw::Disabled)
return; return;
@ -489,7 +450,7 @@ void RasterizerOpenGL::DrawArrays() {
const bool is_indexed = accelerate_draw == AccelDraw::Indexed; const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
state.draw.vertex_buffer = stream_buffer.GetHandle(); state.draw.vertex_buffer = buffer_cache.GetHandle();
state.Apply(); state.Apply();
size_t buffer_size = CalculateVertexArraysSize(); size_t buffer_size = CalculateVertexArraysSize();
@ -506,25 +467,21 @@ void RasterizerOpenGL::DrawArrays() {
// Add space for at least 18 constant buffers // Add space for at least 18 constant buffers
buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment); buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + uniform_buffer_alignment);
u8* buffer_ptr; buffer_cache.Map(buffer_size);
GLintptr buffer_offset;
std::tie(buffer_ptr, buffer_offset, std::ignore) =
stream_buffer.Map(static_cast<GLsizeiptr>(buffer_size), 4);
u8* buffer_ptr_base = buffer_ptr;
std::tie(buffer_ptr, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); SetupVertexArrays();
// If indexed mode, copy the index buffer // If indexed mode, copy the index buffer
GLintptr index_buffer_offset = 0; GLintptr index_buffer_offset = 0;
if (is_indexed) { if (is_indexed) {
MICROPROFILE_SCOPE(OpenGL_Index); MICROPROFILE_SCOPE(OpenGL_Index);
std::tie(buffer_ptr, buffer_offset, index_buffer_offset) = UploadMemory( index_buffer_offset =
buffer_ptr, buffer_offset, regs.index_array.StartAddress(), index_buffer_size); buffer_cache.UploadMemory(regs.index_array.StartAddress(), index_buffer_size);
} }
std::tie(buffer_ptr, buffer_offset) = SetupShaders(buffer_ptr, buffer_offset); SetupShaders();
stream_buffer.Unmap(buffer_ptr - buffer_ptr_base); buffer_cache.Unmap();
shader_program_manager->ApplyTo(state); shader_program_manager->ApplyTo(state);
state.Apply(); state.Apply();
@ -569,6 +526,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
MICROPROFILE_SCOPE(OpenGL_CacheManagement); MICROPROFILE_SCOPE(OpenGL_CacheManagement);
res_cache.InvalidateRegion(addr, size); res_cache.InvalidateRegion(addr, size);
shader_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size);
buffer_cache.InvalidateRegion(addr, size);
} }
void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) {
@ -658,11 +616,8 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
} }
} }
std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(u8* buffer_ptr, u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, Shader& shader,
GLintptr buffer_offset, u32 current_bindpoint) {
Maxwell::ShaderStage stage,
Shader& shader,
u32 current_bindpoint) {
MICROPROFILE_SCOPE(OpenGL_UBO); MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& gpu = Core::System::GetInstance().GPU(); const auto& gpu = Core::System::GetInstance().GPU();
const auto& maxwell3d = gpu.Maxwell3D(); const auto& maxwell3d = gpu.Maxwell3D();
@ -699,13 +654,11 @@ std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(u8* buffer_pt
size = Common::AlignUp(size, sizeof(GLvec4)); size = Common::AlignUp(size, sizeof(GLvec4));
ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
GLintptr const_buffer_offset; GLintptr const_buffer_offset = buffer_cache.UploadMemory(
std::tie(buffer_ptr, buffer_offset, const_buffer_offset) = buffer.address, size, static_cast<size_t>(uniform_buffer_alignment));
UploadMemory(buffer_ptr, buffer_offset, buffer.address, size,
static_cast<size_t>(uniform_buffer_alignment));
glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint, glBindBufferRange(GL_UNIFORM_BUFFER, current_bindpoint + bindpoint,
stream_buffer.GetHandle(), const_buffer_offset, size); buffer_cache.GetHandle(), const_buffer_offset, size);
// Now configure the bindpoint of the buffer inside the shader // Now configure the bindpoint of the buffer inside the shader
glUniformBlockBinding(shader->GetProgramHandle(), glUniformBlockBinding(shader->GetProgramHandle(),
@ -715,7 +668,7 @@ std::tuple<u8*, GLintptr, u32> RasterizerOpenGL::SetupConstBuffers(u8* buffer_pt
state.Apply(); state.Apply();
return {buffer_ptr, buffer_offset, current_bindpoint + static_cast<u32>(entries.size())}; return current_bindpoint + static_cast<u32>(entries.size());
} }
u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, u32 current_unit) { u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader, u32 current_unit) {

View File

@ -18,7 +18,9 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/memory_manager.h" #include "video_core/memory_manager.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h"
@ -109,9 +111,8 @@ private:
* @param current_bindpoint The offset at which to start counting new buffer bindpoints. * @param current_bindpoint The offset at which to start counting new buffer bindpoints.
* @returns The next available bindpoint for use in the next shader stage. * @returns The next available bindpoint for use in the next shader stage.
*/ */
std::tuple<u8*, GLintptr, u32> SetupConstBuffers( u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
u8* buffer_ptr, GLintptr buffer_offset, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, u32 current_bindpoint);
Shader& shader, u32 current_bindpoint);
/* /*
* Configures the current textures to use for the draw command. * Configures the current textures to use for the draw command.
@ -173,22 +174,16 @@ private:
std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers; std::array<SamplerInfo, GLShader::NumTextureSamplers> texture_samplers;
static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; static constexpr size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLStreamBuffer stream_buffer; OGLBufferCache buffer_cache;
OGLBuffer uniform_buffer; OGLBuffer uniform_buffer;
OGLFramebuffer framebuffer; OGLFramebuffer framebuffer;
GLint uniform_buffer_alignment; GLint uniform_buffer_alignment;
size_t CalculateVertexArraysSize() const; size_t CalculateVertexArraysSize() const;
std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset); void SetupVertexArrays();
std::pair<u8*, GLintptr> SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); void SetupShaders();
std::pair<u8*, GLintptr> AlignBuffer(u8* buffer_ptr, GLintptr buffer_offset, size_t alignment);
std::tuple<u8*, GLintptr, GLintptr> UploadMemory(u8* buffer_ptr, GLintptr buffer_offset,
Tegra::GPUVAddr gpu_addr, size_t size,
size_t alignment = 4);
enum class AccelDraw { Disabled, Arrays, Indexed }; enum class AccelDraw { Disabled, Arrays, Indexed };
AccelDraw accelerate_draw = AccelDraw::Disabled; AccelDraw accelerate_draw = AccelDraw::Disabled;