OGL: implement bounding box support with ssbo

This implemention tries to be as accurate as the old SW implemention, but it will remove the dependcy of our vertexloader on videosw.
This commit is contained in:
degasus 2014-11-13 23:26:49 +01:00
parent dced84d440
commit c211450b99
24 changed files with 231 additions and 7 deletions

View File

@ -34,6 +34,8 @@ public:
void RenderText(const std::string& text, int left, int top, u32 color) override;
u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override;
u16 BBoxRead(int index) override { return 0; };
void BBoxWrite(int index, u16 value) override {};
void ResetAPIState() override;
void RestoreAPIState() override;

View File

@ -77,6 +77,7 @@ void InitBackendInfo()
g_Config.backend_info.bSupportsDualSourceBlend = true;
g_Config.backend_info.bSupportsPrimitiveRestart = true;
g_Config.backend_info.bSupportsOversizedViewports = false;
g_Config.backend_info.bSupportsBBox = false; // TODO: not implemented
IDXGIFactory* factory;
IDXGIAdapter* ad;

View File

@ -0,0 +1,54 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#include "VideoBackends/OGL/BoundingBox.h"
#include "VideoBackends/OGL/GLUtil.h"
#include "VideoCommon/VideoConfig.h"
static GLuint s_bbox_buffer_id;
namespace OGL
{
void BoundingBox::Init()
{
if (g_ActiveConfig.backend_info.bSupportsBBox)
{
int initial_values[4] = {0,0,0,0};
glGenBuffers(1, &s_bbox_buffer_id);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
glBufferData(GL_SHADER_STORAGE_BUFFER, 4 * sizeof(s32), initial_values, GL_DYNAMIC_DRAW);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 3, s_bbox_buffer_id);
}
}
void BoundingBox::Shutdown()
{
if (g_ActiveConfig.backend_info.bSupportsBBox)
glDeleteBuffers(1, &s_bbox_buffer_id);
}
void BoundingBox::Set(int index, int value)
{
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
glBufferSubData(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), &value);
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
}
int BoundingBox::Get(int index)
{
int data = 0;
glBindBuffer(GL_SHADER_STORAGE_BUFFER, s_bbox_buffer_id);
void* ptr = glMapBufferRange(GL_SHADER_STORAGE_BUFFER, index * sizeof(int), sizeof(int), GL_MAP_READ_BIT);
if (ptr)
{
data = *(int*)ptr;
glUnmapBuffer(GL_SHADER_STORAGE_BUFFER);
}
glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
return data;
}
};

View File

@ -0,0 +1,20 @@
// Copyright 2014 Dolphin Emulator Project
// Licensed under GPLv2+
// Refer to the license.txt file included.
#pragma once
namespace OGL
{
class BoundingBox
{
public:
static void Init();
static void Shutdown();
static void Set(int index, int value);
static int Get(int index);
};
};

View File

@ -1,4 +1,5 @@
set(SRCS GLExtensions/GLExtensions.cpp
BoundingBox.cpp
FramebufferManager.cpp
GLUtil.cpp
main.cpp

View File

@ -35,6 +35,7 @@
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<ItemGroup>
<ClCompile Include="BoundingBox.cpp" />
<ClCompile Include="FramebufferManager.cpp" />
<ClCompile Include="GLExtensions\GLExtensions.cpp" />
<ClCompile Include="GLInterface\GLInterface.cpp" />
@ -54,6 +55,7 @@
<ClCompile Include="VertexManager.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="BoundingBox.h" />
<ClInclude Include="FramebufferManager.h" />
<ClInclude Include="GLExtensions\ARB_blend_func_extended.h" />
<ClInclude Include="GLExtensions\ARB_buffer_storage.h" />

View File

@ -36,6 +36,9 @@
<ClCompile Include="RasterFont.cpp">
<Filter>Logging</Filter>
</ClCompile>
<ClCompile Include="BoundingBox.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="FramebufferManager.cpp">
<Filter>Render</Filter>
</ClCompile>
@ -82,6 +85,9 @@
<ClInclude Include="RasterFont.h">
<Filter>Logging</Filter>
</ClInclude>
<ClInclude Include="BoundingBox.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="FramebufferManager.h">
<Filter>Render</Filter>
</ClInclude>

View File

@ -485,6 +485,7 @@ void ProgramShaderCache::CreateHeader()
"%s\n" // msaa
"%s\n" // sample shading
"%s\n" // Sampler binding
"%s\n" // storage buffer
// Precision defines for GLSL ES
"%s\n"
@ -516,6 +517,7 @@ void ProgramShaderCache::CreateHeader()
, (g_ogl_config.bSupportsMSAA && v < GLSL_150) ? "#extension GL_ARB_texture_multisample : enable" : ""
, (g_ogl_config.bSupportSampleShading) ? "#extension GL_ARB_sample_shading : enable" : ""
, g_ActiveConfig.backend_info.bSupportsBindingLayout ? "#define SAMPLER_BINDING(x) layout(binding = x)" : "#define SAMPLER_BINDING(x)"
, g_ActiveConfig.backend_info.bSupportsBBox ? "#extension GL_ARB_shader_storage_buffer_object : enable" : ""
, v>=GLSLES_300 ? "precision highp float;" : ""
, v>=GLSLES_300 ? "precision highp int;" : ""

View File

@ -20,6 +20,7 @@
#include "Core/Core.h"
#include "Core/Movie.h"
#include "VideoBackends/OGL/BoundingBox.h"
#include "VideoBackends/OGL/FramebufferManager.h"
#include "VideoBackends/OGL/GLInterfaceBase.h"
#include "VideoBackends/OGL/GLUtil.h"
@ -465,6 +466,7 @@ Renderer::Renderer()
g_Config.backend_info.bSupportsPrimitiveRestart = !DriverDetails::HasBug(DriverDetails::BUG_PRIMITIVERESTART) &&
((GLExtensions::Version() >= 310) || GLExtensions::Supports("GL_NV_primitive_restart"));
g_Config.backend_info.bSupportsEarlyZ = GLExtensions::Supports("GL_ARB_shader_image_load_store");
g_Config.backend_info.bSupportsBBox = GLExtensions::Supports("GL_ARB_shader_storage_buffer_object");
// Desktop OpenGL supports the binding layout if it supports 420pack
// OpenGL ES 3.1 supports it implicitly without an extension
@ -1161,6 +1163,52 @@ u32 Renderer::AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data)
return 0;
}
u16 Renderer::BBoxRead(int index)
{
int swapped_index = index;
if (index >= 2)
swapped_index ^= 1; // swap 2 and 3 for top/bottom
// Here we get the min/max value of the truncated position of the upscaled and swapped framebuffer.
// So we have to correct them to the unscaled EFB sizes.
int value = BoundingBox::Get(swapped_index);
if (index < 2)
{
// left/right
value = value * EFB_WIDTH / s_target_width;
}
else
{
// up/down -- we have to swap up and down
value = value * EFB_HEIGHT / s_target_height;
value = EFB_HEIGHT - value - 1;
}
if (index & 1)
value++; // fix max values to describe the outer border
return value;
}
void Renderer::BBoxWrite(int index, u16 _value)
{
int value = _value; // u16 isn't enough to multiply by the efb width
if (index & 1)
value--;
if (index < 2)
{
value = value * s_target_width / EFB_WIDTH;
}
else
{
index ^= 1; // swap 2 and 3 for top/bottom
value = EFB_HEIGHT - value - 1;
value = value * s_target_height / EFB_HEIGHT;
}
BoundingBox::Set(index, value);
}
void Renderer::SetViewport()
{
// reversed gxsetviewport(xorig, yorig, width, height, nearz, farz)

View File

@ -71,6 +71,9 @@ public:
u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) override;
u16 BBoxRead(int index) override;
void BBoxWrite(int index, u16 value) override;
void ResetAPIState() override;
void RestoreAPIState() override;

View File

@ -48,6 +48,7 @@ Make AA apply instantly during gameplay if possible
#include "Core/Core.h"
#include "Core/Host.h"
#include "VideoBackends/OGL/BoundingBox.h"
#include "VideoBackends/OGL/FramebufferManager.h"
#include "VideoBackends/OGL/GLInterfaceBase.h"
#include "VideoBackends/OGL/GLUtil.h"
@ -205,6 +206,7 @@ void VideoBackend::Video_Prepare()
Renderer::Init();
VertexLoaderManager::Init();
TextureConverter::Init();
BoundingBox::Init();
// Notify the core that the video backend is ready
Host_Message(WM_USER_CREATE);
@ -229,6 +231,7 @@ void VideoBackend::Video_Cleanup()
// The following calls are NOT Thread Safe
// And need to be called from the video thread
Renderer::Shutdown();
BoundingBox::Shutdown();
TextureConverter::Shutdown();
VertexLoaderManager::Shutdown();
delete g_sampler_cache;

View File

@ -33,6 +33,7 @@
#include "VideoBackends/Software/VideoBackend.h"
#include "VideoBackends/Software/XFMemLoader.h"
#include "VideoCommon/BoundingBox.h"
#include "VideoCommon/Fifo.h"
#include "VideoCommon/OnScreenDisplay.h"
#include "VideoCommon/PixelEngine.h"
@ -283,6 +284,11 @@ u32 VideoSoftware::Video_GetQueryResult(PerfQueryType type)
return EfbInterface::perf_values[type];
}
u16 VideoSoftware::Video_GetBoundingBox(int index)
{
return BoundingBox::coords[index];
}
bool VideoSoftware::Video_Screenshot(const std::string& filename)
{
SWRenderer::SetScreenshot(filename.c_str());

View File

@ -32,6 +32,7 @@ class VideoSoftware : public VideoBackend
u32 Video_AccessEFB(EFBAccessType, u32, u32, u32) override;
u32 Video_GetQueryResult(PerfQueryType type) override;
u16 Video_GetBoundingBox(int index) override;
void Video_AddMessage(const std::string& msg, unsigned int milliseconds) override;
void Video_ClearMessages() override;

View File

@ -380,6 +380,9 @@ static void BPWritten(const BPCmd& bp)
BoundingBox::coords[offset] = bp.newvalue & 0x3ff;
BoundingBox::coords[offset + 1] = bp.newvalue >> 10;
BoundingBox::active = true;
g_renderer->BBoxWrite(offset, bp.newvalue & 0x3ff);
g_renderer->BBoxWrite(offset + 1, bp.newvalue >> 10);
}
return;
case BPMEM_TEXINVALIDATE:

View File

@ -37,6 +37,7 @@ enum SyncGPUReason
SYNC_GPU_WRAPAROUND,
SYNC_GPU_EFB_POKE,
SYNC_GPU_PERFQUERY,
SYNC_GPU_BBOX,
SYNC_GPU_SWAP,
SYNC_GPU_AUX_SPACE,
};

View File

@ -1,6 +1,7 @@
#include "Common/Event.h"
#include "Core/ConfigManager.h"
#include "VideoCommon/BoundingBox.h"
#include "VideoCommon/BPStructs.h"
#include "VideoCommon/CommandProcessor.h"
#include "VideoCommon/Fifo.h"
@ -25,6 +26,11 @@ static Common::Event s_efbAccessReadyEvent;
static Common::Flag s_perfQueryRequested;
static Common::Event s_perfQueryReadyEvent;
static Common::Flag s_BBoxRequested;
static Common::Event s_BBoxReadyEvent;
static int s_BBoxIndex;
static u16 s_BBoxResult;
static volatile struct
{
u32 xfbAddr;
@ -217,6 +223,39 @@ u32 VideoBackendHardware::Video_GetQueryResult(PerfQueryType type)
return g_perf_query->GetQueryResult(type);
}
u16 VideoBackendHardware::Video_GetBoundingBox(int index)
{
if (!g_ActiveConfig.backend_info.bSupportsBBox)
return BoundingBox::coords[index];
SyncGPU(SYNC_GPU_BBOX);
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread)
{
s_BBoxReadyEvent.Reset();
if (s_FifoShuttingDown.IsSet())
return 0;
s_BBoxIndex = index;
s_BBoxRequested.Set();
s_BBoxReadyEvent.Wait();
return s_BBoxResult;
}
else
{
return g_renderer->BBoxRead(index);
}
}
static void VideoFifo_CheckBBoxRequest()
{
if (s_BBoxRequested.IsSet())
{
s_BBoxResult = g_renderer->BBoxRead(s_BBoxIndex);
s_BBoxRequested.Clear();
s_BBoxReadyEvent.Set();
}
}
void VideoBackendHardware::InitializeShared()
{
VideoCommon_Init();
@ -292,6 +331,7 @@ void VideoFifo_CheckAsyncRequest()
VideoFifo_CheckSwapRequest();
VideoFifo_CheckEFBAccess();
VideoFifo_CheckPerfQueryRequest();
VideoFifo_CheckBBoxRequest();
}
void VideoBackendHardware::Video_GatherPipeBursted()

View File

@ -233,7 +233,7 @@ void RegisterMMIO(MMIO::Mapping* mmio, u32 base)
mmio->Register(base | (PE_BBOX_LEFT + 2 * i),
MMIO::ComplexRead<u16>([i](u32) {
BoundingBox::active = false;
return BoundingBox::coords[i];
return g_video_backend->Video_GetBoundingBox(i);
}),
MMIO::InvalidWrite<u16>()
);

View File

@ -10,6 +10,7 @@
#include <xlocale.h>
#endif
#include "VideoCommon/BoundingBox.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/ConstantManager.h"
#include "VideoCommon/LightingShaderGen.h"
@ -264,6 +265,16 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
"\tfloat4 " I_DEPTHPARAMS";\n"
"};\n");
}
if (g_ActiveConfig.backend_info.bSupportsBBox)
{
out.Write(
"layout(std140, binding = 3) buffer BBox {\n"
"\tint4 bbox_data;\n"
"};\n"
);
}
const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED);
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z);
@ -551,6 +562,17 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
out.Write("\tocol0.a = float(" I_ALPHA".a) / 255.0;\n");
}
if (g_ActiveConfig.backend_info.bSupportsBBox && BoundingBox::active)
{
uid_data->bounding_box = true;
out.Write(
"\tif(bbox_data.x > int(gl_FragCoord.x)) atomicMin(bbox_data.x, int(gl_FragCoord.x));\n"
"\tif(bbox_data.y < int(gl_FragCoord.x)) atomicMax(bbox_data.y, int(gl_FragCoord.x));\n"
"\tif(bbox_data.z > int(gl_FragCoord.y)) atomicMin(bbox_data.z, int(gl_FragCoord.y));\n"
"\tif(bbox_data.w < int(gl_FragCoord.y)) atomicMax(bbox_data.w, int(gl_FragCoord.y));\n"
);
}
out.Write("}\n");
if (is_writing_shadercode)

View File

@ -61,7 +61,7 @@ struct pixel_shader_uid_data
u32 per_pixel_depth : 1;
u32 forced_early_z : 1;
u32 early_ztest : 1;
u32 pad1 : 1;
u32 bounding_box : 1;
u32 texMtxInfo_n_projection : 8; // 8x1 bit
u32 tevindref_bi0 : 3;

View File

@ -104,6 +104,9 @@ public:
virtual u32 AccessEFB(EFBAccessType type, u32 x, u32 y, u32 poke_data) = 0;
virtual u16 BBoxRead(int index) = 0;
virtual void BBoxWrite(int index, u16 value) = 0;
// What's the real difference between these? Too similar names.
virtual void ResetAPIState() = 0;
virtual void RestoreAPIState() = 0;

View File

@ -171,6 +171,7 @@ void VertexLoader::CompileVertexTranslator()
#endif
// Get the pointer to this vertex's buffer data for the bounding box
if (!g_ActiveConfig.backend_info.bSupportsBBox)
WriteCall(BoundingBox::SetVertexBufferPosition);
// Colors
@ -380,6 +381,7 @@ void VertexLoader::CompileVertexTranslator()
}
// Update the bounding box
if (!g_ActiveConfig.backend_info.bSupportsBBox)
WriteCall(BoundingBox::Update);
if (m_VtxDesc.PosMatIdx)
@ -457,6 +459,7 @@ void VertexLoader::SetupRunVertices(const VAT& vat, int primitive, int const cou
colElements[i] = m_VtxAttr.color[i].Elements;
// Prepare bounding box
if (!g_ActiveConfig.backend_info.bSupportsBBox)
BoundingBox::Prepare(vat, primitive, m_VtxDesc, m_native_vtx_decl);
}

View File

@ -89,6 +89,7 @@ public:
virtual u32 Video_AccessEFB(EFBAccessType, u32, u32, u32) = 0;
virtual u32 Video_GetQueryResult(PerfQueryType type) = 0;
virtual u16 Video_GetBoundingBox(int index) = 0;
virtual void Video_AddMessage(const std::string& msg, unsigned int milliseconds) = 0;
virtual void Video_ClearMessages() = 0;
@ -137,6 +138,7 @@ class VideoBackendHardware : public VideoBackend
u32 Video_AccessEFB(EFBAccessType, u32, u32, u32) override;
u32 Video_GetQueryResult(PerfQueryType type) override;
u16 Video_GetBoundingBox(int index) override;
void Video_AddMessage(const std::string& pstr, unsigned int milliseconds) override;
void Video_ClearMessages() override;

View File

@ -138,6 +138,7 @@ struct VideoConfig final
bool bSupportsOversizedViewports;
bool bSupportsEarlyZ; // needed by PixelShaderGen, so must stay in VideoCommon
bool bSupportsBindingLayout; // Needed by ShaderGen, so must stay in VideoCommon
bool bSupportsBBox;
} backend_info;
// Utility