Adds support for PE performance metrics in the D3D9 backend

This commit is contained in:
Rodolfo Bogado 2013-04-03 19:53:48 -03:00
parent d06379fc59
commit c4bc20b4d9
6 changed files with 206 additions and 3 deletions

View File

@ -199,6 +199,7 @@
<ClCompile Include="Src\FramebufferManager.cpp" /> <ClCompile Include="Src\FramebufferManager.cpp" />
<ClCompile Include="Src\main.cpp" /> <ClCompile Include="Src\main.cpp" />
<ClCompile Include="Src\NativeVertexFormat.cpp" /> <ClCompile Include="Src\NativeVertexFormat.cpp" />
<ClCompile Include="Src\PerfQuery.cpp" />
<ClCompile Include="Src\PixelShaderCache.cpp" /> <ClCompile Include="Src\PixelShaderCache.cpp" />
<ClCompile Include="Src\Render.cpp" /> <ClCompile Include="Src\Render.cpp" />
<ClCompile Include="Src\stdafx.cpp"> <ClCompile Include="Src\stdafx.cpp">
@ -222,6 +223,7 @@
<ClInclude Include="Src\FramebufferManager.h" /> <ClInclude Include="Src\FramebufferManager.h" />
<ClInclude Include="Src\Globals.h" /> <ClInclude Include="Src\Globals.h" />
<ClInclude Include="Src\main.h" /> <ClInclude Include="Src\main.h" />
<ClInclude Include="Src\PerfQuery.h" />
<ClInclude Include="Src\PixelShaderCache.h" /> <ClInclude Include="Src\PixelShaderCache.h" />
<ClInclude Include="Src\Render.h" /> <ClInclude Include="Src\Render.h" />
<ClInclude Include="Src\stdafx.h" /> <ClInclude Include="Src\stdafx.h" />

View File

@ -39,6 +39,9 @@
<ClCompile Include="Src\TextureConverter.cpp"> <ClCompile Include="Src\TextureConverter.cpp">
<Filter>D3D</Filter> <Filter>D3D</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="Src\PerfQuery.cpp">
<Filter>Render</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="Src\Globals.h" /> <ClInclude Include="Src\Globals.h" />
@ -78,6 +81,9 @@
<ClInclude Include="Src\TextureConverter.h"> <ClInclude Include="Src\TextureConverter.h">
<Filter>D3D</Filter> <Filter>D3D</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="Src\PerfQuery.h">
<Filter>Render</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<Filter Include="D3D"> <Filter Include="D3D">

View File

@ -0,0 +1,146 @@
#include "RenderBase.h"
#include "D3DBase.h"
#include "PerfQuery.h"
namespace DX9 {
PerfQuery::PerfQuery()
: m_query_read_pos()
, m_query_count()
{
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
{
D3D::dev->CreateQuery(D3DQUERYTYPE_OCCLUSION, &m_query_buffer[i].query);
}
ResetQuery();
}
PerfQuery::~PerfQuery()
{
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
{
m_query_buffer[i].query->Release();
}
}
void PerfQuery::EnableQuery(PerfQueryGroup type)
{
// Is this sane?
if (m_query_count > ARRAYSIZE(m_query_buffer) / 2)
WeakFlush();
if (ARRAYSIZE(m_query_buffer) == m_query_count)
{
// TODO
FlushOne();
ERROR_LOG(VIDEO, "Flushed query buffer early!");
}
// start query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % ARRAYSIZE(m_query_buffer)];
entry.query->Issue(D3DISSUE_BEGIN);
entry.query_type = type;
++m_query_count;
}
}
void PerfQuery::DisableQuery(PerfQueryGroup type)
{
// stop query
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
{
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count + ARRAYSIZE(m_query_buffer)-1) % ARRAYSIZE(m_query_buffer)];
entry.query->Issue(D3DISSUE_END);
}
}
void PerfQuery::ResetQuery()
{
m_query_count = 0;
std::fill_n(m_results, ARRAYSIZE(m_results), 0);
}
u32 PerfQuery::GetQueryResult(PerfQueryType type)
{
u32 result = 0;
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
{
result = m_results[PQG_ZCOMP_ZCOMPLOC];
}
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
{
result = m_results[PQG_ZCOMP];
}
else if (type == PQ_BLEND_INPUT)
{
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
}
else if (type == PQ_EFB_COPY_CLOCKS)
{
result = m_results[PQG_EFB_COPY_CLOCKS];
}
return result / 4;
}
void PerfQuery::FlushOne()
{
auto& entry = m_query_buffer[m_query_read_pos];
UINT64 result = 0;
HRESULT hr = S_FALSE;
while (hr != S_OK && hr != D3DERR_DEVICELOST)
{
// TODO: Might cause us to be stuck in an infinite loop!
hr = entry.query->GetData(&result, sizeof(result), D3DGETDATA_FLUSH);
}
// NOTE: Reported pixel metrics should be referenced to native resolution
m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight();
m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
--m_query_count;
}
// TODO: could selectively flush things, but I don't think that will do much
void PerfQuery::FlushResults()
{
while (!IsFlushed())
FlushOne();
}
void PerfQuery::WeakFlush()
{
while (!IsFlushed())
{
auto& entry = m_query_buffer[m_query_read_pos];
UINT64 result = 0;
HRESULT hr = entry.query->GetData(&result, sizeof(result), 0);
if (hr == S_OK)
{
// NOTE: Reported pixel metrics should be referenced to native resolution
m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight();
m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
--m_query_count;
}
else
{
break;
}
}
}
bool PerfQuery::IsFlushed() const
{
return 0 == m_query_count;
}
} // namespace

View File

@ -0,0 +1,46 @@
#ifndef _PERFQUERY_H_
#define _PERFQUERY_H_
#include "PerfQueryBase.h"
namespace DX9 {
class PerfQuery : public PerfQueryBase
{
public:
PerfQuery();
~PerfQuery();
void EnableQuery(PerfQueryGroup type);
void DisableQuery(PerfQueryGroup type);
void ResetQuery();
u32 GetQueryResult(PerfQueryType type);
void FlushResults();
bool IsFlushed() const;
private:
struct ActiveQuery
{
IDirect3DQuery9* query;
PerfQueryGroup query_type;
};
void WeakFlush();
// Only use when non-empty
void FlushOne();
// when testing in SMS: 64 was too small, 128 was ok
static const int PERF_QUERY_BUFFER_SIZE = 512;
ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE];
int m_query_read_pos;
// TODO: sloppy
volatile int m_query_count;
volatile u32 m_results[PQG_NUM_MEMBERS];
};
} // namespace
#endif // _PERFQUERY_H_

View File

@ -55,7 +55,7 @@
#include "BPFunctions.h" #include "BPFunctions.h"
#include "FPSCounter.h" #include "FPSCounter.h"
#include "ConfigManager.h" #include "ConfigManager.h"
#include "PerfQuery.h"
#include <strsafe.h> #include <strsafe.h>
@ -88,6 +88,7 @@ void SetupDeviceObjects()
VertexShaderCache::Init(); VertexShaderCache::Init();
PixelShaderCache::Init(); PixelShaderCache::Init();
g_vertex_manager->CreateDeviceObjects(); g_vertex_manager->CreateDeviceObjects();
g_perf_query = new PerfQuery;
// Texture cache will recreate themselves over time. // Texture cache will recreate themselves over time.
} }
@ -100,6 +101,7 @@ void TeardownDeviceObjects()
D3D::dev->SetRenderTarget(0, D3D::GetBackBufferSurface()); D3D::dev->SetRenderTarget(0, D3D::GetBackBufferSurface());
D3D::dev->SetDepthStencilSurface(D3D::GetBackBufferDepthSurface()); D3D::dev->SetDepthStencilSurface(D3D::GetBackBufferDepthSurface());
delete g_framebuffer_manager; delete g_framebuffer_manager;
delete g_perf_query;
D3D::font.Shutdown(); D3D::font.Shutdown();
TextureCache::Invalidate(); TextureCache::Invalidate();
VertexLoaderManager::Shutdown(); VertexLoaderManager::Shutdown();

View File

@ -379,6 +379,7 @@ void VertexManager::vFlush()
} }
PrepareDrawBuffers(stride); PrepareDrawBuffers(stride);
g_nativeVertexFmt->SetupVertexPointers(); g_nativeVertexFmt->SetupVertexPointers();
g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
if(m_buffers_count) if(m_buffers_count)
{ {
DrawVertexBuffer(stride); DrawVertexBuffer(stride);
@ -387,7 +388,7 @@ void VertexManager::vFlush()
{ {
DrawVertexArray(stride); DrawVertexArray(stride);
} }
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
if (useDstAlpha && !useDualSource) if (useDstAlpha && !useDualSource)
{ {
if (!PixelShaderCache::SetShader(DSTALPHA_ALPHA_PASS, g_nativeVertexFmt->m_components)) if (!PixelShaderCache::SetShader(DSTALPHA_ALPHA_PASS, g_nativeVertexFmt->m_components))