D3D11: Port perf queries code to the PerfQueryBase interface.
Remove deprecated PerfQuery methods from RenderBase. Windows build fix.
This commit is contained in:
parent
b94f65b666
commit
5a7bb2abfa
|
@ -128,11 +128,6 @@ public:
|
|||
static unsigned int GetPrevPixelFormat() { return prev_efb_format; }
|
||||
static void StorePixelFormat(unsigned int new_format) { prev_efb_format = new_format; }
|
||||
|
||||
virtual void ResetPixelPerf() {};
|
||||
virtual void ResumePixelPerf(bool efb_copies) {};
|
||||
virtual void PausePixelPerf(bool efb_copies) {};
|
||||
virtual u32 GetPixelPerfResult(PixelPerfQuery type) { return 0; };
|
||||
|
||||
// TODO: doesn't belong here
|
||||
virtual void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) = 0;
|
||||
virtual void SetPSConstant4fv(unsigned int const_number, const float *f) = 0;
|
||||
|
|
|
@ -101,6 +101,9 @@
|
|||
<ClCompile Include="Src\MainBase.cpp">
|
||||
<Filter>Base</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Src\PerfQueryBase.cpp">
|
||||
<Filter>Base</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Src\RenderBase.cpp">
|
||||
<Filter>Base</Filter>
|
||||
</ClCompile>
|
||||
|
@ -113,8 +116,6 @@
|
|||
<ClCompile Include="Src\LightingShaderGen.cpp">
|
||||
<Filter>Shader Generators</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Src\PerfQueryBase.cpp">
|
||||
<Filter>Base</Filter>
|
||||
<ClCompile Include="Src\FPSCounter.cpp">
|
||||
<Filter>Util</Filter>
|
||||
</ClCompile>
|
||||
|
@ -239,6 +240,9 @@
|
|||
<ClInclude Include="Src\MainBase.h">
|
||||
<Filter>Base</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Src\PerfQueryBase.h">
|
||||
<Filter>Base</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Src\RenderBase.h">
|
||||
<Filter>Base</Filter>
|
||||
</ClInclude>
|
||||
|
@ -251,8 +255,6 @@
|
|||
<ClInclude Include="Src\LightingShaderGen.h">
|
||||
<Filter>Shader Generators</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Src\PerfQueryBase.h">
|
||||
<Filter>Base</Filter>
|
||||
<ClInclude Include="Src\FPSCounter.h">
|
||||
<Filter>Util</Filter>
|
||||
</ClInclude>
|
||||
|
|
|
@ -199,6 +199,7 @@
|
|||
<ClCompile Include="Src\LineGeometryShader.cpp" />
|
||||
<ClCompile Include="Src\main.cpp" />
|
||||
<ClCompile Include="Src\NativeVertexFormat.cpp" />
|
||||
<ClCompile Include="Src\PerfQuery.cpp" />
|
||||
<ClCompile Include="Src\PixelShaderCache.cpp" />
|
||||
<ClCompile Include="Src\PointGeometryShader.cpp" />
|
||||
<ClCompile Include="Src\PSTextureEncoder.cpp" />
|
||||
|
@ -228,6 +229,7 @@
|
|||
<ClInclude Include="Src\Globals.h" />
|
||||
<ClInclude Include="Src\LineGeometryShader.h" />
|
||||
<ClInclude Include="Src\main.h" />
|
||||
<ClInclude Include="Src\PerfQuery.h" />
|
||||
<ClInclude Include="Src\PixelShaderCache.h" />
|
||||
<ClInclude Include="Src\PointGeometryShader.h" />
|
||||
<ClInclude Include="Src\PSTextureEncoder.h" />
|
||||
|
|
|
@ -57,6 +57,9 @@
|
|||
<ClCompile Include="Src\PointGeometryShader.cpp">
|
||||
<Filter>Render</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Src\PerfQuery.cpp">
|
||||
<Filter>Render</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="Src\Globals.h" />
|
||||
|
@ -117,6 +120,9 @@
|
|||
<ClInclude Include="Src\PointGeometryShader.h">
|
||||
<Filter>Render</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Src\PerfQuery.h">
|
||||
<Filter>Render</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="D3D">
|
||||
|
|
|
@ -0,0 +1,148 @@
|
|||
#include "RenderBase.h"
|
||||
|
||||
#include "D3DBase.h"
|
||||
#include "PerfQuery.h"
|
||||
|
||||
namespace DX11 {
|
||||
|
||||
PerfQuery::PerfQuery()
|
||||
: m_query_read_pos()
|
||||
, m_query_count()
|
||||
{
|
||||
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
|
||||
{
|
||||
D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_OCCLUSION, 0);
|
||||
D3D::device->CreateQuery(&qdesc, &m_query_buffer[i].query);
|
||||
}
|
||||
ResetQuery();
|
||||
}
|
||||
|
||||
PerfQuery::~PerfQuery()
|
||||
{
|
||||
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
|
||||
{
|
||||
// TODO: EndQuery?
|
||||
m_query_buffer[i].query->Release();
|
||||
}
|
||||
}
|
||||
|
||||
void PerfQuery::EnableQuery(PerfQueryGroup type)
|
||||
{
|
||||
// Is this sane?
|
||||
if (m_query_count > ARRAYSIZE(m_query_buffer) / 2)
|
||||
WeakFlush();
|
||||
|
||||
if (ARRAYSIZE(m_query_buffer) == m_query_count)
|
||||
{
|
||||
// TODO
|
||||
FlushOne();
|
||||
ERROR_LOG(VIDEO, "flushed query buffer early!");
|
||||
}
|
||||
|
||||
// start query
|
||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||
{
|
||||
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % ARRAYSIZE(m_query_buffer)];
|
||||
|
||||
D3D::context->Begin(entry.query);
|
||||
entry.query_type = type;
|
||||
|
||||
++m_query_count;
|
||||
}
|
||||
}
|
||||
|
||||
void PerfQuery::DisableQuery(PerfQueryGroup type)
|
||||
{
|
||||
// stop query
|
||||
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||
{
|
||||
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count + ARRAYSIZE(m_query_buffer)-1) % ARRAYSIZE(m_query_buffer)];
|
||||
D3D::context->End(entry.query);
|
||||
}
|
||||
}
|
||||
|
||||
void PerfQuery::ResetQuery()
|
||||
{
|
||||
m_query_count = 0;
|
||||
std::fill_n(m_results, ARRAYSIZE(m_results), 0);
|
||||
}
|
||||
|
||||
u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
||||
{
|
||||
u32 result = 0;
|
||||
|
||||
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
||||
{
|
||||
result = m_results[PQG_ZCOMP_ZCOMPLOC];
|
||||
}
|
||||
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
||||
{
|
||||
result = m_results[PQG_ZCOMP];
|
||||
}
|
||||
else if (type == PQ_BLEND_INPUT)
|
||||
{
|
||||
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
|
||||
}
|
||||
else if (type == PQ_EFB_COPY_CLOCKS)
|
||||
{
|
||||
result = m_results[PQG_EFB_COPY_CLOCKS];
|
||||
}
|
||||
|
||||
return result / 4;
|
||||
}
|
||||
|
||||
void PerfQuery::FlushOne()
|
||||
{
|
||||
auto& entry = m_query_buffer[m_query_read_pos];
|
||||
|
||||
UINT64 result = 0;
|
||||
HRESULT hr = S_FALSE;
|
||||
while (hr != S_OK)
|
||||
{
|
||||
// TODO: Might cause us to be stuck in an infinite loop!
|
||||
hr = D3D::context->GetData(entry.query, &result, sizeof(result), 0);
|
||||
}
|
||||
|
||||
m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight();
|
||||
|
||||
m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
|
||||
--m_query_count;
|
||||
}
|
||||
|
||||
// TODO: could selectively flush things, but I don't think that will do much
|
||||
void PerfQuery::FlushResults()
|
||||
{
|
||||
while (!IsFlushed())
|
||||
FlushOne();
|
||||
}
|
||||
|
||||
void PerfQuery::WeakFlush()
|
||||
{
|
||||
while (!IsFlushed())
|
||||
{
|
||||
auto& entry = m_query_buffer[m_query_read_pos];
|
||||
|
||||
UINT64 result = 0;
|
||||
HRESULT hr = D3D::context->GetData(entry.query, &result, sizeof(result), D3D11_ASYNC_GETDATA_DONOTFLUSH);
|
||||
|
||||
if (hr == S_OK)
|
||||
{
|
||||
m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight();
|
||||
|
||||
m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
|
||||
--m_query_count;
|
||||
}
|
||||
else
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool PerfQuery::IsFlushed() const
|
||||
{
|
||||
return 0 == m_query_count;
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
|
@ -0,0 +1,46 @@
|
|||
#ifndef _PERFQUERY_H_
|
||||
#define _PERFQUERY_H_
|
||||
|
||||
#include "PerfQueryBase.h"
|
||||
|
||||
namespace DX11 {
|
||||
|
||||
class PerfQuery : public PerfQueryBase
|
||||
{
|
||||
public:
|
||||
PerfQuery();
|
||||
~PerfQuery();
|
||||
|
||||
void EnableQuery(PerfQueryGroup type);
|
||||
void DisableQuery(PerfQueryGroup type);
|
||||
void ResetQuery();
|
||||
u32 GetQueryResult(PerfQueryType type);
|
||||
void FlushResults();
|
||||
bool IsFlushed() const;
|
||||
|
||||
private:
|
||||
struct ActiveQuery
|
||||
{
|
||||
ID3D11Query* query;
|
||||
PerfQueryGroup query_type;
|
||||
};
|
||||
|
||||
void WeakFlush();
|
||||
|
||||
// Only use when non-empty
|
||||
void FlushOne();
|
||||
|
||||
// when testing in SMS: 64 was too small, 128 was ok
|
||||
static const int PERF_QUERY_BUFFER_SIZE = 512;
|
||||
|
||||
ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE];
|
||||
int m_query_read_pos;
|
||||
|
||||
// TODO: sloppy
|
||||
volatile int m_query_count;
|
||||
volatile u32 m_results[PQG_NUM_MEMBERS];
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // _PERFQUERY_H_
|
|
@ -65,18 +65,6 @@ ID3D11RasterizerState* resetraststate = NULL;
|
|||
|
||||
static ID3D11Texture2D* s_screenshot_texture = NULL;
|
||||
|
||||
// Using a vector of query objects to avoid flushing the gpu pipeline all the time
|
||||
// TODO: Could probably optimized further by using a ring buffer or something
|
||||
#define MAX_PIXEL_PERF_QUERIES 20 // 20 is an arbitrary guess
|
||||
std::vector<ID3D11Query*> pixel_perf_queries;
|
||||
static int pixel_perf_query_index = 0;
|
||||
|
||||
static u64 pixel_perf = 0;
|
||||
static bool pixel_perf_active = false;
|
||||
static bool pixel_perf_dirty = false;
|
||||
|
||||
ID3D11Query* gpu_finished_query = NULL;
|
||||
|
||||
|
||||
// GX pipeline state
|
||||
struct
|
||||
|
@ -170,9 +158,6 @@ void SetupDeviceObjects()
|
|||
D3D::SetDebugObjectName((ID3D11DeviceChild*)resetraststate, "rasterizer state for Renderer::ResetAPIState");
|
||||
|
||||
s_screenshot_texture = NULL;
|
||||
|
||||
D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_EVENT, 0);
|
||||
D3D::device->CreateQuery(&qdesc, &gpu_finished_query);
|
||||
}
|
||||
|
||||
// Kill off all device objects
|
||||
|
@ -180,12 +165,6 @@ void TeardownDeviceObjects()
|
|||
{
|
||||
delete g_framebuffer_manager;
|
||||
|
||||
while (!pixel_perf_queries.empty())
|
||||
{
|
||||
SAFE_RELEASE(pixel_perf_queries.back());
|
||||
pixel_perf_queries.pop_back();
|
||||
}
|
||||
SAFE_RELEASE(gpu_finished_query);
|
||||
SAFE_RELEASE(access_efb_cbuf);
|
||||
SAFE_RELEASE(clearblendstates[0]);
|
||||
SAFE_RELEASE(clearblendstates[1]);
|
||||
|
@ -232,11 +211,6 @@ Renderer::Renderer()
|
|||
s_LastEFBScale = g_ActiveConfig.iEFBScale;
|
||||
CalculateTargetSize(s_backbuffer_width, s_backbuffer_height);
|
||||
|
||||
pixel_perf_query_index = 0;
|
||||
pixel_perf = 0;
|
||||
pixel_perf_active = false;
|
||||
pixel_perf_dirty = false;
|
||||
|
||||
SetupDeviceObjects();
|
||||
|
||||
|
||||
|
@ -660,112 +634,6 @@ void Renderer::ReinterpretPixelData(unsigned int convtype)
|
|||
D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV());
|
||||
}
|
||||
|
||||
void Renderer::ResetPixelPerf()
|
||||
{
|
||||
if (g_ActiveConfig.bDisablePixelPerf)
|
||||
return;
|
||||
|
||||
if (pixel_perf_active)
|
||||
PausePixelPerf(false);
|
||||
|
||||
pixel_perf_query_index = 0;
|
||||
pixel_perf = 0;
|
||||
}
|
||||
|
||||
void Renderer::ResumePixelPerf(bool efb_copies)
|
||||
{
|
||||
if (g_ActiveConfig.bDisablePixelPerf)
|
||||
return;
|
||||
|
||||
if (efb_copies)
|
||||
return;
|
||||
|
||||
if(pixel_perf_active)
|
||||
return;
|
||||
|
||||
if (pixel_perf_queries.size() < pixel_perf_query_index+1 && pixel_perf_query_index < MAX_PIXEL_PERF_QUERIES)
|
||||
{
|
||||
D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_OCCLUSION, 0);
|
||||
ID3D11Query* tmpquery = NULL;
|
||||
D3D::device->CreateQuery(&qdesc, &tmpquery);
|
||||
pixel_perf_queries.push_back(tmpquery);
|
||||
pixel_perf_query_index = pixel_perf_queries.size() - 1;
|
||||
}
|
||||
else if (pixel_perf_queries.size() < pixel_perf_query_index+1)
|
||||
{
|
||||
StorePixelPerfResult(PP_ZCOMP_OUTPUT);
|
||||
pixel_perf_query_index = 0;
|
||||
}
|
||||
// This will spam the D3D11 debug runtime output with QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS warnings which safely can be ignored. Mute them in the DX control panel if you need to read the debug runtime output.
|
||||
D3D::context->Begin(pixel_perf_queries[pixel_perf_query_index]);
|
||||
pixel_perf_active = true;
|
||||
pixel_perf_dirty = true;
|
||||
}
|
||||
|
||||
void Renderer::PausePixelPerf(bool efb_copies)
|
||||
{
|
||||
if (g_ActiveConfig.bDisablePixelPerf)
|
||||
return;
|
||||
|
||||
if(!pixel_perf_active)
|
||||
return;
|
||||
|
||||
D3D::context->End(pixel_perf_queries[pixel_perf_query_index]);
|
||||
pixel_perf_query_index++;
|
||||
pixel_perf_active = false;
|
||||
}
|
||||
|
||||
void Renderer::StorePixelPerfResult(PixelPerfQuery type)
|
||||
{
|
||||
// First, make sure the GPU has finished rendering so that query results are valid
|
||||
D3D::context->End(gpu_finished_query);
|
||||
BOOL gpu_finished = FALSE;
|
||||
while (!gpu_finished)
|
||||
{
|
||||
// If nothing goes horribly wrong here, this should complete in finite time...
|
||||
D3D::context->GetData(gpu_finished_query, &gpu_finished, sizeof(gpu_finished), 0);
|
||||
}
|
||||
|
||||
for(int i = 0; i < pixel_perf_query_index; ++i)
|
||||
{
|
||||
UINT64 buf = 0;
|
||||
D3D::context->GetData(pixel_perf_queries[i], &buf, sizeof(buf), 0);
|
||||
|
||||
// Reported pixel metrics should be referenced to native resolution:
|
||||
pixel_perf += buf * EFB_WIDTH * EFB_HEIGHT / GetTargetWidth() / GetTargetHeight();
|
||||
}
|
||||
pixel_perf_dirty = false;
|
||||
}
|
||||
|
||||
u32 Renderer::GetPixelPerfResult(PixelPerfQuery type)
|
||||
{
|
||||
if (g_ActiveConfig.bDisablePixelPerf)
|
||||
return 0;
|
||||
|
||||
if (type == PP_EFB_COPY_CLOCKS)
|
||||
{
|
||||
// not implemented
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (type == PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L ||
|
||||
type == PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H ||
|
||||
type == PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L ||
|
||||
type == PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H)
|
||||
{
|
||||
// return zero for now because ZCOMP_OUTPUT_ZCOMPLOC + ZCOMP_OUTPUT should equal BLEND_INPUT
|
||||
// TODO: Instead, should keep separate counters for zcomploc and non-zcomploc registers.
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Basically we only implement PP_ZCOMP_OUTPUT, but we're returning the same value for PP_ZCOMP_INPUT and PP_BLEND_INPUT anyway
|
||||
if (pixel_perf_dirty)
|
||||
StorePixelPerfResult(PP_ZCOMP_OUTPUT);
|
||||
|
||||
// Dividing by 4 because we're expected to return the number of 2x2 quads instead of pixels
|
||||
return std::min(pixel_perf / 4, (u64)0xFFFFFFFF);
|
||||
}
|
||||
|
||||
void SetSrcBlend(D3D11_BLEND val)
|
||||
{
|
||||
// Colors should blend against SRC_ALPHA
|
||||
|
|
|
@ -46,12 +46,6 @@ public:
|
|||
|
||||
void ReinterpretPixelData(unsigned int convtype);
|
||||
|
||||
void ResetPixelPerf();
|
||||
void ResumePixelPerf(bool efb_copies);
|
||||
void PausePixelPerf(bool efb_copies);
|
||||
u32 GetPixelPerfResult(PixelPerfQuery type);
|
||||
void StorePixelPerfResult(PixelPerfQuery type); // internal
|
||||
|
||||
void UpdateViewport(Matrix44& vpCorrection);
|
||||
|
||||
bool SaveScreenshot(const std::string &filename, const TargetRectangle &rc);
|
||||
|
|
|
@ -274,9 +274,9 @@ void VertexManager::vFlush()
|
|||
g_nativeVertexFmt->SetupVertexPointers();
|
||||
g_renderer->ApplyState(useDstAlpha);
|
||||
|
||||
g_renderer->ResumePixelPerf(false);
|
||||
g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
|
||||
Draw(stride);
|
||||
g_renderer->PausePixelPerf(false);
|
||||
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
|
||||
|
||||
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
|
||||
|
||||
|
|
|
@ -42,6 +42,7 @@
|
|||
|
||||
#include "D3DUtil.h"
|
||||
#include "D3DBase.h"
|
||||
#include "PerfQuery.h"
|
||||
#include "PixelShaderCache.h"
|
||||
#include "TextureCache.h"
|
||||
#include "VertexManager.h"
|
||||
|
@ -185,6 +186,7 @@ void VideoBackend::Video_Prepare()
|
|||
g_renderer = new Renderer;
|
||||
g_texture_cache = new TextureCache;
|
||||
g_vertex_manager = new VertexManager;
|
||||
g_perf_query = new PerfQuery;
|
||||
VertexShaderCache::Init();
|
||||
PixelShaderCache::Init();
|
||||
D3D::InitUtils();
|
||||
|
@ -227,6 +229,7 @@ void VideoBackend::Shutdown()
|
|||
D3D::ShutdownUtils();
|
||||
PixelShaderCache::Shutdown();
|
||||
VertexShaderCache::Shutdown();
|
||||
delete g_perf_query;
|
||||
delete g_vertex_manager;
|
||||
delete g_texture_cache;
|
||||
delete g_renderer;
|
||||
|
|
Loading…
Reference in New Issue