D3D11: Port perf queries code to the PerfQueryBase interface.

Remove deprecated PerfQuery methods from RenderBase.
Windows build fix.
This commit is contained in:
NeoBrainX 2013-03-01 19:30:37 +01:00
parent b94f65b666
commit 5a7bb2abfa
11 changed files with 218 additions and 154 deletions

View File

@ -128,11 +128,6 @@ public:
static unsigned int GetPrevPixelFormat() { return prev_efb_format; }
static void StorePixelFormat(unsigned int new_format) { prev_efb_format = new_format; }
virtual void ResetPixelPerf() {};
virtual void ResumePixelPerf(bool efb_copies) {};
virtual void PausePixelPerf(bool efb_copies) {};
virtual u32 GetPixelPerfResult(PixelPerfQuery type) { return 0; };
// TODO: doesn't belong here
virtual void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) = 0;
virtual void SetPSConstant4fv(unsigned int const_number, const float *f) = 0;

View File

@ -101,6 +101,9 @@
<ClCompile Include="Src\MainBase.cpp">
<Filter>Base</Filter>
</ClCompile>
<ClCompile Include="Src\PerfQueryBase.cpp">
<Filter>Base</Filter>
</ClCompile>
<ClCompile Include="Src\RenderBase.cpp">
<Filter>Base</Filter>
</ClCompile>
@ -113,8 +116,6 @@
<ClCompile Include="Src\LightingShaderGen.cpp">
<Filter>Shader Generators</Filter>
</ClCompile>
<ClCompile Include="Src\PerfQueryBase.cpp">
<Filter>Base</Filter>
<ClCompile Include="Src\FPSCounter.cpp">
<Filter>Util</Filter>
</ClCompile>
@ -239,6 +240,9 @@
<ClInclude Include="Src\MainBase.h">
<Filter>Base</Filter>
</ClInclude>
<ClInclude Include="Src\PerfQueryBase.h">
<Filter>Base</Filter>
</ClInclude>
<ClInclude Include="Src\RenderBase.h">
<Filter>Base</Filter>
</ClInclude>
@ -251,8 +255,6 @@
<ClInclude Include="Src\LightingShaderGen.h">
<Filter>Shader Generators</Filter>
</ClInclude>
<ClInclude Include="Src\PerfQueryBase.h">
<Filter>Base</Filter>
<ClInclude Include="Src\FPSCounter.h">
<Filter>Util</Filter>
</ClInclude>

View File

@ -199,6 +199,7 @@
<ClCompile Include="Src\LineGeometryShader.cpp" />
<ClCompile Include="Src\main.cpp" />
<ClCompile Include="Src\NativeVertexFormat.cpp" />
<ClCompile Include="Src\PerfQuery.cpp" />
<ClCompile Include="Src\PixelShaderCache.cpp" />
<ClCompile Include="Src\PointGeometryShader.cpp" />
<ClCompile Include="Src\PSTextureEncoder.cpp" />
@ -228,6 +229,7 @@
<ClInclude Include="Src\Globals.h" />
<ClInclude Include="Src\LineGeometryShader.h" />
<ClInclude Include="Src\main.h" />
<ClInclude Include="Src\PerfQuery.h" />
<ClInclude Include="Src\PixelShaderCache.h" />
<ClInclude Include="Src\PointGeometryShader.h" />
<ClInclude Include="Src\PSTextureEncoder.h" />

View File

@ -57,6 +57,9 @@
<ClCompile Include="Src\PointGeometryShader.cpp">
<Filter>Render</Filter>
</ClCompile>
<ClCompile Include="Src\PerfQuery.cpp">
<Filter>Render</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="Src\Globals.h" />
@ -117,6 +120,9 @@
<ClInclude Include="Src\PointGeometryShader.h">
<Filter>Render</Filter>
</ClInclude>
<ClInclude Include="Src\PerfQuery.h">
<Filter>Render</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="D3D">

View File

@ -0,0 +1,148 @@
#include "RenderBase.h"
#include "D3DBase.h"
#include "PerfQuery.h"
namespace DX11 {
PerfQuery::PerfQuery()
: m_query_read_pos()
, m_query_count()
{
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
{
D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_OCCLUSION, 0);
D3D::device->CreateQuery(&qdesc, &m_query_buffer[i].query);
}
ResetQuery();
}
PerfQuery::~PerfQuery()
{
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
{
// TODO: EndQuery?
m_query_buffer[i].query->Release();
}
}
void PerfQuery::EnableQuery(PerfQueryGroup type)
{
// Is this sane?
if (m_query_count > ARRAYSIZE(m_query_buffer) / 2)
WeakFlush();
if (ARRAYSIZE(m_query_buffer) == m_query_count)
{
// TODO
FlushOne();
ERROR_LOG(VIDEO, "flushed query buffer early!");
}
// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % ARRAYSIZE(m_query_buffer)];
D3D::context->Begin(entry.query);
entry.query_type = type;
++m_query_count;
}
}
void PerfQuery::DisableQuery(PerfQueryGroup type)
{
// stop query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count + ARRAYSIZE(m_query_buffer)-1) % ARRAYSIZE(m_query_buffer)];
D3D::context->End(entry.query);
}
}
void PerfQuery::ResetQuery()
{
m_query_count = 0;
std::fill_n(m_results, ARRAYSIZE(m_results), 0);
}
u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
{
result = m_results[PQG_ZCOMP_ZCOMPLOC];
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
{
result = m_results[PQG_ZCOMP];
}
else if (type == PQ_BLEND_INPUT)
{
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
}
else if (type == PQ_EFB_COPY_CLOCKS)
{
result = m_results[PQG_EFB_COPY_CLOCKS];
}
return result / 4;
}
void PerfQuery::FlushOne()
{
auto& entry = m_query_buffer[m_query_read_pos];
UINT64 result = 0;
HRESULT hr = S_FALSE;
while (hr != S_OK)
{
// TODO: Might cause us to be stuck in an infinite loop!
hr = D3D::context->GetData(entry.query, &result, sizeof(result), 0);
}
m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight();
m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
--m_query_count;
}
// TODO: could selectively flush things, but I don't think that will do much
void PerfQuery::FlushResults()
{
while (!IsFlushed())
FlushOne();
}
void PerfQuery::WeakFlush()
{
while (!IsFlushed())
{
auto& entry = m_query_buffer[m_query_read_pos];
UINT64 result = 0;
HRESULT hr = D3D::context->GetData(entry.query, &result, sizeof(result), D3D11_ASYNC_GETDATA_DONOTFLUSH);
if (hr == S_OK)
{
m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight();
m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
--m_query_count;
}
else
{
break;
}
}
}
bool PerfQuery::IsFlushed() const
{
return 0 == m_query_count;
}
} // namespace

View File

@ -0,0 +1,46 @@
#ifndef _PERFQUERY_H_
#define _PERFQUERY_H_
#include "PerfQueryBase.h"
namespace DX11 {
class PerfQuery : public PerfQueryBase
{
public:
PerfQuery();
~PerfQuery();
void EnableQuery(PerfQueryGroup type);
void DisableQuery(PerfQueryGroup type);
void ResetQuery();
u32 GetQueryResult(PerfQueryType type);
void FlushResults();
bool IsFlushed() const;
private:
struct ActiveQuery
{
ID3D11Query* query;
PerfQueryGroup query_type;
};
void WeakFlush();
// Only use when non-empty
void FlushOne();
// when testing in SMS: 64 was too small, 128 was ok
static const int PERF_QUERY_BUFFER_SIZE = 512;
ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE];
int m_query_read_pos;
// TODO: sloppy
volatile int m_query_count;
volatile u32 m_results[PQG_NUM_MEMBERS];
};
} // namespace
#endif // _PERFQUERY_H_

View File

@ -65,18 +65,6 @@ ID3D11RasterizerState* resetraststate = NULL;
static ID3D11Texture2D* s_screenshot_texture = NULL;
// Using a vector of query objects to avoid flushing the gpu pipeline all the time
// TODO: Could probably optimized further by using a ring buffer or something
#define MAX_PIXEL_PERF_QUERIES 20 // 20 is an arbitrary guess
std::vector<ID3D11Query*> pixel_perf_queries;
static int pixel_perf_query_index = 0;
static u64 pixel_perf = 0;
static bool pixel_perf_active = false;
static bool pixel_perf_dirty = false;
ID3D11Query* gpu_finished_query = NULL;
// GX pipeline state
struct
@ -170,9 +158,6 @@ void SetupDeviceObjects()
D3D::SetDebugObjectName((ID3D11DeviceChild*)resetraststate, "rasterizer state for Renderer::ResetAPIState");
s_screenshot_texture = NULL;
D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_EVENT, 0);
D3D::device->CreateQuery(&qdesc, &gpu_finished_query);
}
// Kill off all device objects
@ -180,12 +165,6 @@ void TeardownDeviceObjects()
{
delete g_framebuffer_manager;
while (!pixel_perf_queries.empty())
{
SAFE_RELEASE(pixel_perf_queries.back());
pixel_perf_queries.pop_back();
}
SAFE_RELEASE(gpu_finished_query);
SAFE_RELEASE(access_efb_cbuf);
SAFE_RELEASE(clearblendstates[0]);
SAFE_RELEASE(clearblendstates[1]);
@ -232,11 +211,6 @@ Renderer::Renderer()
s_LastEFBScale = g_ActiveConfig.iEFBScale;
CalculateTargetSize(s_backbuffer_width, s_backbuffer_height);
pixel_perf_query_index = 0;
pixel_perf = 0;
pixel_perf_active = false;
pixel_perf_dirty = false;
SetupDeviceObjects();
@ -660,112 +634,6 @@ void Renderer::ReinterpretPixelData(unsigned int convtype)
D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV());
}
void Renderer::ResetPixelPerf()
{
if (g_ActiveConfig.bDisablePixelPerf)
return;
if (pixel_perf_active)
PausePixelPerf(false);
pixel_perf_query_index = 0;
pixel_perf = 0;
}
void Renderer::ResumePixelPerf(bool efb_copies)
{
if (g_ActiveConfig.bDisablePixelPerf)
return;
if (efb_copies)
return;
if(pixel_perf_active)
return;
if (pixel_perf_queries.size() < pixel_perf_query_index+1 && pixel_perf_query_index < MAX_PIXEL_PERF_QUERIES)
{
D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_OCCLUSION, 0);
ID3D11Query* tmpquery = NULL;
D3D::device->CreateQuery(&qdesc, &tmpquery);
pixel_perf_queries.push_back(tmpquery);
pixel_perf_query_index = pixel_perf_queries.size() - 1;
}
else if (pixel_perf_queries.size() < pixel_perf_query_index+1)
{
StorePixelPerfResult(PP_ZCOMP_OUTPUT);
pixel_perf_query_index = 0;
}
// This will spam the D3D11 debug runtime output with QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS warnings which safely can be ignored. Mute them in the DX control panel if you need to read the debug runtime output.
D3D::context->Begin(pixel_perf_queries[pixel_perf_query_index]);
pixel_perf_active = true;
pixel_perf_dirty = true;
}
void Renderer::PausePixelPerf(bool efb_copies)
{
if (g_ActiveConfig.bDisablePixelPerf)
return;
if(!pixel_perf_active)
return;
D3D::context->End(pixel_perf_queries[pixel_perf_query_index]);
pixel_perf_query_index++;
pixel_perf_active = false;
}
void Renderer::StorePixelPerfResult(PixelPerfQuery type)
{
// First, make sure the GPU has finished rendering so that query results are valid
D3D::context->End(gpu_finished_query);
BOOL gpu_finished = FALSE;
while (!gpu_finished)
{
// If nothing goes horribly wrong here, this should complete in finite time...
D3D::context->GetData(gpu_finished_query, &gpu_finished, sizeof(gpu_finished), 0);
}
for(int i = 0; i < pixel_perf_query_index; ++i)
{
UINT64 buf = 0;
D3D::context->GetData(pixel_perf_queries[i], &buf, sizeof(buf), 0);
// Reported pixel metrics should be referenced to native resolution:
pixel_perf += buf * EFB_WIDTH * EFB_HEIGHT / GetTargetWidth() / GetTargetHeight();
}
pixel_perf_dirty = false;
}
u32 Renderer::GetPixelPerfResult(PixelPerfQuery type)
{
if (g_ActiveConfig.bDisablePixelPerf)
return 0;
if (type == PP_EFB_COPY_CLOCKS)
{
// not implemented
return 0;
}
if (type == PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L ||
type == PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H ||
type == PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L ||
type == PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H)
{
// return zero for now because ZCOMP_OUTPUT_ZCOMPLOC + ZCOMP_OUTPUT should equal BLEND_INPUT
// TODO: Instead, should keep separate counters for zcomploc and non-zcomploc registers.
return 0;
}
// Basically we only implement PP_ZCOMP_OUTPUT, but we're returning the same value for PP_ZCOMP_INPUT and PP_BLEND_INPUT anyway
if (pixel_perf_dirty)
StorePixelPerfResult(PP_ZCOMP_OUTPUT);
// Dividing by 4 because we're expected to return the number of 2x2 quads instead of pixels
return std::min(pixel_perf / 4, (u64)0xFFFFFFFF);
}
void SetSrcBlend(D3D11_BLEND val)
{
// Colors should blend against SRC_ALPHA

View File

@ -46,12 +46,6 @@ public:
void ReinterpretPixelData(unsigned int convtype);
void ResetPixelPerf();
void ResumePixelPerf(bool efb_copies);
void PausePixelPerf(bool efb_copies);
u32 GetPixelPerfResult(PixelPerfQuery type);
void StorePixelPerfResult(PixelPerfQuery type); // internal
void UpdateViewport(Matrix44& vpCorrection);
bool SaveScreenshot(const std::string &filename, const TargetRectangle &rc);

View File

@ -274,9 +274,9 @@ void VertexManager::vFlush()
g_nativeVertexFmt->SetupVertexPointers();
g_renderer->ApplyState(useDstAlpha);
g_renderer->ResumePixelPerf(false);
g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
Draw(stride);
g_renderer->PausePixelPerf(false);
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);

View File

@ -42,6 +42,7 @@
#include "D3DUtil.h"
#include "D3DBase.h"
#include "PerfQuery.h"
#include "PixelShaderCache.h"
#include "TextureCache.h"
#include "VertexManager.h"
@ -185,6 +186,7 @@ void VideoBackend::Video_Prepare()
g_renderer = new Renderer;
g_texture_cache = new TextureCache;
g_vertex_manager = new VertexManager;
g_perf_query = new PerfQuery;
VertexShaderCache::Init();
PixelShaderCache::Init();
D3D::InitUtils();
@ -227,6 +229,7 @@ void VideoBackend::Shutdown()
D3D::ShutdownUtils();
PixelShaderCache::Shutdown();
VertexShaderCache::Shutdown();
delete g_perf_query;
delete g_vertex_manager;
delete g_texture_cache;
delete g_renderer;

View File

@ -17,25 +17,25 @@ public:
u32 GetQueryResult(PerfQueryType type);
void FlushResults();
bool IsFlushed() const;
private:
struct ActiveQuery
{
GLuint query_id;
PerfQueryGroup query_type;
};
// when testing in SMS: 64 was too small, 128 was ok
static const int PERF_QUERY_BUFFER_SIZE = 512;
void WeakFlush();
// Only use when non-empty
void FlushOne();
// This contains gl query objects with unretrieved results.
ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE];
int m_query_read_pos;
// TODO: sloppy
volatile int m_query_count;
volatile u32 m_results[PQG_NUM_MEMBERS];