Merge pull request #9648 from stenzek/dx11-gl-bbox-caching

Cache bounding box values between register reads in DX11/GL
This commit is contained in:
Connor McLaughlin 2021-05-28 21:17:44 +10:00 committed by GitHub
commit 37d0559493
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 201 additions and 57 deletions

View File

@ -2,18 +2,25 @@
// Licensed under GPLv2+ // Licensed under GPLv2+
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include "VideoBackends/D3D/D3DBoundingBox.h" #include <algorithm>
#include <array>
#include "Common/CommonTypes.h" #include "Common/CommonTypes.h"
#include "Common/MsgHandler.h" #include "Common/MsgHandler.h"
#include "VideoBackends/D3D/D3DBoundingBox.h"
#include "VideoBackends/D3D/D3DState.h" #include "VideoBackends/D3D/D3DState.h"
#include "VideoBackends/D3DCommon/D3DCommon.h" #include "VideoBackends/D3DCommon/D3DCommon.h"
#include "VideoCommon/VideoConfig.h" #include "VideoCommon/VideoConfig.h"
namespace DX11 namespace DX11
{ {
static constexpr u32 NUM_BBOX_VALUES = 4;
static ComPtr<ID3D11Buffer> s_bbox_buffer; static ComPtr<ID3D11Buffer> s_bbox_buffer;
static ComPtr<ID3D11Buffer> s_bbox_staging_buffer; static ComPtr<ID3D11Buffer> s_bbox_staging_buffer;
static ComPtr<ID3D11UnorderedAccessView> s_bbox_uav; static ComPtr<ID3D11UnorderedAccessView> s_bbox_uav;
static std::array<s32, NUM_BBOX_VALUES> s_bbox_values;
static std::array<bool, NUM_BBOX_VALUES> s_bbox_dirty;
static bool s_bbox_valid = false;
ID3D11UnorderedAccessView* BBox::GetUAV() ID3D11UnorderedAccessView* BBox::GetUAV()
{ {
@ -22,42 +29,45 @@ ID3D11UnorderedAccessView* BBox::GetUAV()
void BBox::Init() void BBox::Init()
{ {
if (g_ActiveConfig.backend_info.bSupportsBBox) if (!g_ActiveConfig.backend_info.bSupportsBBox)
{ return;
// Create 2 buffers here.
// First for unordered access on default pool.
auto desc = CD3D11_BUFFER_DESC(4 * sizeof(s32), D3D11_BIND_UNORDERED_ACCESS,
D3D11_USAGE_DEFAULT, 0, 0, 4);
int initial_values[4] = {0, 0, 0, 0};
D3D11_SUBRESOURCE_DATA data;
data.pSysMem = initial_values;
data.SysMemPitch = 4 * sizeof(s32);
data.SysMemSlicePitch = 0;
HRESULT hr;
hr = D3D::device->CreateBuffer(&desc, &data, &s_bbox_buffer);
CHECK(SUCCEEDED(hr), "Create BoundingBox Buffer.");
D3DCommon::SetDebugObjectName(s_bbox_buffer.Get(), "BoundingBox Buffer");
// Second to use as a staging buffer. // Create 2 buffers here.
desc.Usage = D3D11_USAGE_STAGING; // First for unordered access on default pool.
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ; auto desc = CD3D11_BUFFER_DESC(NUM_BBOX_VALUES * sizeof(s32), D3D11_BIND_UNORDERED_ACCESS,
desc.BindFlags = 0; D3D11_USAGE_DEFAULT, 0, 0, sizeof(s32));
hr = D3D::device->CreateBuffer(&desc, nullptr, &s_bbox_staging_buffer); const s32 initial_values[NUM_BBOX_VALUES] = {0, 0, 0, 0};
CHECK(SUCCEEDED(hr), "Create BoundingBox Staging Buffer."); D3D11_SUBRESOURCE_DATA data;
D3DCommon::SetDebugObjectName(s_bbox_staging_buffer.Get(), "BoundingBox Staging Buffer"); data.pSysMem = initial_values;
data.SysMemPitch = NUM_BBOX_VALUES * sizeof(s32);
data.SysMemSlicePitch = 0;
HRESULT hr;
hr = D3D::device->CreateBuffer(&desc, &data, &s_bbox_buffer);
CHECK(SUCCEEDED(hr), "Create BoundingBox Buffer.");
D3DCommon::SetDebugObjectName(s_bbox_buffer.Get(), "BoundingBox Buffer");
// UAV is required to allow concurrent access. // Second to use as a staging buffer.
D3D11_UNORDERED_ACCESS_VIEW_DESC UAVdesc = {}; desc.Usage = D3D11_USAGE_STAGING;
UAVdesc.Format = DXGI_FORMAT_R32_SINT; desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
UAVdesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; desc.BindFlags = 0;
UAVdesc.Buffer.FirstElement = 0; hr = D3D::device->CreateBuffer(&desc, nullptr, &s_bbox_staging_buffer);
UAVdesc.Buffer.Flags = 0; CHECK(SUCCEEDED(hr), "Create BoundingBox Staging Buffer.");
UAVdesc.Buffer.NumElements = 4; D3DCommon::SetDebugObjectName(s_bbox_staging_buffer.Get(), "BoundingBox Staging Buffer");
hr = D3D::device->CreateUnorderedAccessView(s_bbox_buffer.Get(), &UAVdesc, &s_bbox_uav);
CHECK(SUCCEEDED(hr), "Create BoundingBox UAV."); // UAV is required to allow concurrent access.
D3DCommon::SetDebugObjectName(s_bbox_uav.Get(), "BoundingBox UAV"); D3D11_UNORDERED_ACCESS_VIEW_DESC UAVdesc = {};
D3D::stateman->SetOMUAV(s_bbox_uav.Get()); UAVdesc.Format = DXGI_FORMAT_R32_SINT;
} UAVdesc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
UAVdesc.Buffer.FirstElement = 0;
UAVdesc.Buffer.Flags = 0;
UAVdesc.Buffer.NumElements = NUM_BBOX_VALUES;
hr = D3D::device->CreateUnorderedAccessView(s_bbox_buffer.Get(), &UAVdesc, &s_bbox_uav);
CHECK(SUCCEEDED(hr), "Create BoundingBox UAV.");
D3DCommon::SetDebugObjectName(s_bbox_uav.Get(), "BoundingBox UAV");
D3D::stateman->SetOMUAV(s_bbox_uav.Get());
s_bbox_dirty = {};
s_bbox_valid = true;
} }
void BBox::Shutdown() void BBox::Shutdown()
@ -67,23 +77,73 @@ void BBox::Shutdown()
s_bbox_buffer.Reset(); s_bbox_buffer.Reset();
} }
void BBox::Set(int index, int value) void BBox::Flush()
{ {
D3D11_BOX box{index * sizeof(s32), 0, 0, (index + 1) * sizeof(s32), 1, 1}; s_bbox_valid = false;
D3D::context->UpdateSubresource(s_bbox_buffer.Get(), 0, &box, &value, 0, 0);
if (std::none_of(s_bbox_dirty.begin(), s_bbox_dirty.end(), [](bool dirty) { return dirty; }))
return;
for (u32 start = 0; start < NUM_BBOX_VALUES;)
{
if (!s_bbox_dirty[start])
{
start++;
continue;
}
u32 end = start + 1;
s_bbox_dirty[start] = false;
for (; end < NUM_BBOX_VALUES; end++)
{
if (!s_bbox_dirty[end])
break;
s_bbox_dirty[end] = false;
}
D3D11_BOX box{start * sizeof(s32), 0, 0, end * sizeof(s32), 1, 1};
D3D::context->UpdateSubresource(s_bbox_buffer.Get(), 0, &box, &s_bbox_values[start], 0, 0);
}
} }
int BBox::Get(int index) void BBox::Readback()
{ {
int data = 0;
D3D::context->CopyResource(s_bbox_staging_buffer.Get(), s_bbox_buffer.Get()); D3D::context->CopyResource(s_bbox_staging_buffer.Get(), s_bbox_buffer.Get());
D3D11_MAPPED_SUBRESOURCE map; D3D11_MAPPED_SUBRESOURCE map;
HRESULT hr = D3D::context->Map(s_bbox_staging_buffer.Get(), 0, D3D11_MAP_READ, 0, &map); HRESULT hr = D3D::context->Map(s_bbox_staging_buffer.Get(), 0, D3D11_MAP_READ, 0, &map);
if (SUCCEEDED(hr)) if (SUCCEEDED(hr))
{ {
data = ((s32*)map.pData)[index]; for (u32 i = 0; i < NUM_BBOX_VALUES; i++)
{
if (!s_bbox_dirty[i])
{
std::memcpy(&s_bbox_values[i], reinterpret_cast<const u8*>(map.pData) + sizeof(s32) * i,
sizeof(s32));
}
}
D3D::context->Unmap(s_bbox_staging_buffer.Get(), 0);
} }
D3D::context->Unmap(s_bbox_staging_buffer.Get(), 0);
return data; s_bbox_valid = true;
}
void BBox::Set(int index, int value)
{
if (s_bbox_valid && s_bbox_values[index] == value)
return;
s_bbox_values[index] = value;
s_bbox_dirty[index] = true;
}
int BBox::Get(int index)
{
if (!s_bbox_valid)
Readback();
return s_bbox_values[index];
} }
}; // namespace DX11 }; // namespace DX11

View File

@ -14,6 +14,9 @@ public:
static void Init(); static void Init();
static void Shutdown(); static void Shutdown();
static void Flush();
static void Readback();
static void Set(int index, int value); static void Set(int index, int value);
static int Get(int index); static int Get(int index);
}; };

View File

@ -274,6 +274,11 @@ void Renderer::BBoxWriteImpl(int index, u16 value)
BBox::Set(index, value); BBox::Set(index, value);
} }
void Renderer::BBoxFlushImpl()
{
BBox::Flush();
}
void Renderer::Flush() void Renderer::Flush()
{ {
D3D::context->Flush(); D3D::context->Flush();

View File

@ -63,6 +63,7 @@ public:
u16 BBoxReadImpl(int index) override; u16 BBoxReadImpl(int index) override;
void BBoxWriteImpl(int index, u16 value) override; void BBoxWriteImpl(int index, u16 value) override;
void BBoxFlushImpl() override;
void Flush() override; void Flush() override;
void WaitForGPUIdle() override; void WaitForGPUIdle() override;

View File

@ -2,6 +2,8 @@
// Licensed under GPLv2+ // Licensed under GPLv2+
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <algorithm>
#include <array>
#include <cstring> #include <cstring>
#include "Common/GL/GLUtil.h" #include "Common/GL/GLUtil.h"
@ -12,7 +14,15 @@
#include "VideoCommon/DriverDetails.h" #include "VideoCommon/DriverDetails.h"
#include "VideoCommon/VideoConfig.h" #include "VideoCommon/VideoConfig.h"
enum : u32
{
NUM_BBOX_VALUES = 4,
};
static GLuint s_bbox_buffer_id; static GLuint s_bbox_buffer_id;
static std::array<s32, NUM_BBOX_VALUES> s_bbox_values;
static std::array<bool, NUM_BBOX_VALUES> s_bbox_dirty;
static bool s_bbox_valid = false;
namespace OGL namespace OGL
{ {
@ -21,10 +31,14 @@ void BoundingBox::Init()
if (!g_ActiveConfig.backend_info.bSupportsBBox) if (!g_ActiveConfig.backend_info.bSupportsBBox)
return; return;
int initial_values[4] = {0, 0, 0, 0}; const s32 initial_values[NUM_BBOX_VALUES] = {0, 0, 0, 0};
std::memcpy(s_bbox_values.data(), initial_values, sizeof(s_bbox_values));
s_bbox_dirty = {};
s_bbox_valid = true;
glGenBuffers(1, &s_bbox_buffer_id); glGenBuffers(1, &s_bbox_buffer_id);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(s32), initial_values, GL_DYNAMIC_DRAW); glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(initial_values), initial_values, GL_DYNAMIC_DRAW);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, s_bbox_buffer_id); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, s_bbox_buffer_id);
} }
@ -36,22 +50,42 @@ void BoundingBox::Shutdown()
glDeleteBuffers(1, &s_bbox_buffer_id); glDeleteBuffers(1, &s_bbox_buffer_id);
} }
void BoundingBox::Set(int index, int value) void BoundingBox::Flush()
{ {
if (!g_ActiveConfig.backend_info.bSupportsBBox) s_bbox_valid = false;
if (std::none_of(s_bbox_dirty.begin(), s_bbox_dirty.end(), [](bool dirty) { return dirty; }))
return; return;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value);
for (u32 start = 0; start < NUM_BBOX_VALUES;)
{
if (!s_bbox_dirty[start])
{
start++;
continue;
}
u32 end = start + 1;
s_bbox_dirty[start] = false;
for (; end < NUM_BBOX_VALUES; end++)
{
if (!s_bbox_dirty[end])
break;
s_bbox_dirty[end] = false;
}
glBufferSubData(GL_SHADER_STORAGE_BUFFER, start * sizeof(s32), (end - start) * sizeof(s32),
&s_bbox_values[start]);
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
} }
int BoundingBox::Get(int index) void BoundingBox::Readback()
{ {
if (!g_ActiveConfig.backend_info.bSupportsBBox)
return 0;
int data = 0;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id); glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA) && if (!DriverDetails::HasBug(DriverDetails::BUG_SLOW_GETBUFFERSUBDATA) &&
!static_cast<Renderer*>(g_renderer.get())->IsGLES()) !static_cast<Renderer*>(g_renderer.get())->IsGLES())
@ -59,20 +93,52 @@ int BoundingBox::Get(int index)
// Using glMapBufferRange to read back the contents of the SSBO is extremely slow // Using glMapBufferRange to read back the contents of the SSBO is extremely slow
// on nVidia drivers. This is more noticeable at higher internal resolutions. // on nVidia drivers. This is more noticeable at higher internal resolutions.
// Using glGetBufferSubData instead does not seem to exhibit this slowdown. // Using glGetBufferSubData instead does not seem to exhibit this slowdown.
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &data); std::array<s32, NUM_BBOX_VALUES> gpu_values;
glGetBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(s32) * NUM_BBOX_VALUES,
gpu_values.data());
for (u32 i = 0; i < NUM_BBOX_VALUES; i++)
{
if (!s_bbox_dirty[i])
s_bbox_values[i] = gpu_values[i];
}
} }
else else
{ {
// Using glMapBufferRange is faster on AMD cards by a measurable margin. // Using glMapBufferRange is faster on AMD cards by a measurable margin.
void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, sizeof(s32) * NUM_BBOX_VALUES,
GL_MAP_READ_BIT); GL_MAP_READ_BIT);
if (ptr) if (ptr)
{ {
memcpy(&data, ptr, sizeof(int)); for (u32 i = 0; i < NUM_BBOX_VALUES; i++)
{
if (!s_bbox_dirty[i])
{
std::memcpy(&s_bbox_values[i], reinterpret_cast<const u8*>(ptr) + sizeof(s32) * i,
sizeof(s32));
}
}
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER); glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
} }
} }
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
return data; s_bbox_valid = true;
}
void BoundingBox::Set(int index, int value)
{
if (s_bbox_valid && s_bbox_values[index] == value)
return;
s_bbox_values[index] = value;
s_bbox_dirty[index] = true;
}
int BoundingBox::Get(int index)
{
if (!s_bbox_valid)
Readback();
return s_bbox_values[index];
} }
}; // namespace OGL }; // namespace OGL

View File

@ -12,6 +12,9 @@ public:
static void Init(); static void Init();
static void Shutdown(); static void Shutdown();
static void Flush();
static void Readback();
static void Set(int index, int value); static void Set(int index, int value);
static int Get(int index); static int Get(int index);
}; };

View File

@ -882,6 +882,11 @@ void Renderer::BBoxWriteImpl(int index, u16 value)
BoundingBox::Set(index, swapped_value); BoundingBox::Set(index, swapped_value);
} }
void Renderer::BBoxFlushImpl()
{
BoundingBox::Flush();
}
void Renderer::SetViewport(float x, float y, float width, float height, float near_depth, void Renderer::SetViewport(float x, float y, float width, float height, float near_depth,
float far_depth) float far_depth)
{ {

View File

@ -128,6 +128,7 @@ public:
u16 BBoxReadImpl(int index) override; u16 BBoxReadImpl(int index) override;
void BBoxWriteImpl(int index, u16 value) override; void BBoxWriteImpl(int index, u16 value) override;
void BBoxFlushImpl() override;
void BeginUtilityDrawing() override; void BeginUtilityDrawing() override;
void EndUtilityDrawing() override; void EndUtilityDrawing() override;