Adds support for PE performance metrics in the D3D9 backend
This commit is contained in:
parent
d06379fc59
commit
c4bc20b4d9
|
@ -199,6 +199,7 @@
|
||||||
<ClCompile Include="Src\FramebufferManager.cpp" />
|
<ClCompile Include="Src\FramebufferManager.cpp" />
|
||||||
<ClCompile Include="Src\main.cpp" />
|
<ClCompile Include="Src\main.cpp" />
|
||||||
<ClCompile Include="Src\NativeVertexFormat.cpp" />
|
<ClCompile Include="Src\NativeVertexFormat.cpp" />
|
||||||
|
<ClCompile Include="Src\PerfQuery.cpp" />
|
||||||
<ClCompile Include="Src\PixelShaderCache.cpp" />
|
<ClCompile Include="Src\PixelShaderCache.cpp" />
|
||||||
<ClCompile Include="Src\Render.cpp" />
|
<ClCompile Include="Src\Render.cpp" />
|
||||||
<ClCompile Include="Src\stdafx.cpp">
|
<ClCompile Include="Src\stdafx.cpp">
|
||||||
|
@ -222,6 +223,7 @@
|
||||||
<ClInclude Include="Src\FramebufferManager.h" />
|
<ClInclude Include="Src\FramebufferManager.h" />
|
||||||
<ClInclude Include="Src\Globals.h" />
|
<ClInclude Include="Src\Globals.h" />
|
||||||
<ClInclude Include="Src\main.h" />
|
<ClInclude Include="Src\main.h" />
|
||||||
|
<ClInclude Include="Src\PerfQuery.h" />
|
||||||
<ClInclude Include="Src\PixelShaderCache.h" />
|
<ClInclude Include="Src\PixelShaderCache.h" />
|
||||||
<ClInclude Include="Src\Render.h" />
|
<ClInclude Include="Src\Render.h" />
|
||||||
<ClInclude Include="Src\stdafx.h" />
|
<ClInclude Include="Src\stdafx.h" />
|
||||||
|
|
|
@ -39,6 +39,9 @@
|
||||||
<ClCompile Include="Src\TextureConverter.cpp">
|
<ClCompile Include="Src\TextureConverter.cpp">
|
||||||
<Filter>D3D</Filter>
|
<Filter>D3D</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
|
<ClCompile Include="Src\PerfQuery.cpp">
|
||||||
|
<Filter>Render</Filter>
|
||||||
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClInclude Include="Src\Globals.h" />
|
<ClInclude Include="Src\Globals.h" />
|
||||||
|
@ -78,6 +81,9 @@
|
||||||
<ClInclude Include="Src\TextureConverter.h">
|
<ClInclude Include="Src\TextureConverter.h">
|
||||||
<Filter>D3D</Filter>
|
<Filter>D3D</Filter>
|
||||||
</ClInclude>
|
</ClInclude>
|
||||||
|
<ClInclude Include="Src\PerfQuery.h">
|
||||||
|
<Filter>Render</Filter>
|
||||||
|
</ClInclude>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Filter Include="D3D">
|
<Filter Include="D3D">
|
||||||
|
|
|
@ -0,0 +1,146 @@
|
||||||
|
#include "RenderBase.h"
|
||||||
|
|
||||||
|
#include "D3DBase.h"
|
||||||
|
#include "PerfQuery.h"
|
||||||
|
|
||||||
|
namespace DX9 {
|
||||||
|
|
||||||
|
PerfQuery::PerfQuery()
|
||||||
|
: m_query_read_pos()
|
||||||
|
, m_query_count()
|
||||||
|
{
|
||||||
|
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
|
||||||
|
{
|
||||||
|
D3D::dev->CreateQuery(D3DQUERYTYPE_OCCLUSION, &m_query_buffer[i].query);
|
||||||
|
}
|
||||||
|
ResetQuery();
|
||||||
|
}
|
||||||
|
|
||||||
|
PerfQuery::~PerfQuery()
|
||||||
|
{
|
||||||
|
for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i)
|
||||||
|
{
|
||||||
|
m_query_buffer[i].query->Release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PerfQuery::EnableQuery(PerfQueryGroup type)
|
||||||
|
{
|
||||||
|
// Is this sane?
|
||||||
|
if (m_query_count > ARRAYSIZE(m_query_buffer) / 2)
|
||||||
|
WeakFlush();
|
||||||
|
|
||||||
|
if (ARRAYSIZE(m_query_buffer) == m_query_count)
|
||||||
|
{
|
||||||
|
// TODO
|
||||||
|
FlushOne();
|
||||||
|
ERROR_LOG(VIDEO, "Flushed query buffer early!");
|
||||||
|
}
|
||||||
|
|
||||||
|
// start query
|
||||||
|
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||||
|
{
|
||||||
|
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % ARRAYSIZE(m_query_buffer)];
|
||||||
|
entry.query->Issue(D3DISSUE_BEGIN);
|
||||||
|
entry.query_type = type;
|
||||||
|
++m_query_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PerfQuery::DisableQuery(PerfQueryGroup type)
|
||||||
|
{
|
||||||
|
// stop query
|
||||||
|
if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP)
|
||||||
|
{
|
||||||
|
auto& entry = m_query_buffer[(m_query_read_pos + m_query_count + ARRAYSIZE(m_query_buffer)-1) % ARRAYSIZE(m_query_buffer)];
|
||||||
|
entry.query->Issue(D3DISSUE_END);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void PerfQuery::ResetQuery()
|
||||||
|
{
|
||||||
|
m_query_count = 0;
|
||||||
|
std::fill_n(m_results, ARRAYSIZE(m_results), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 PerfQuery::GetQueryResult(PerfQueryType type)
|
||||||
|
{
|
||||||
|
u32 result = 0;
|
||||||
|
|
||||||
|
if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC)
|
||||||
|
{
|
||||||
|
result = m_results[PQG_ZCOMP_ZCOMPLOC];
|
||||||
|
}
|
||||||
|
else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT)
|
||||||
|
{
|
||||||
|
result = m_results[PQG_ZCOMP];
|
||||||
|
}
|
||||||
|
else if (type == PQ_BLEND_INPUT)
|
||||||
|
{
|
||||||
|
result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC];
|
||||||
|
}
|
||||||
|
else if (type == PQ_EFB_COPY_CLOCKS)
|
||||||
|
{
|
||||||
|
result = m_results[PQG_EFB_COPY_CLOCKS];
|
||||||
|
}
|
||||||
|
|
||||||
|
return result / 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PerfQuery::FlushOne()
|
||||||
|
{
|
||||||
|
auto& entry = m_query_buffer[m_query_read_pos];
|
||||||
|
|
||||||
|
UINT64 result = 0;
|
||||||
|
HRESULT hr = S_FALSE;
|
||||||
|
while (hr != S_OK && hr != D3DERR_DEVICELOST)
|
||||||
|
{
|
||||||
|
// TODO: Might cause us to be stuck in an infinite loop!
|
||||||
|
hr = entry.query->GetData(&result, sizeof(result), D3DGETDATA_FLUSH);
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: Reported pixel metrics should be referenced to native resolution
|
||||||
|
m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight();
|
||||||
|
|
||||||
|
m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
|
||||||
|
--m_query_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: could selectively flush things, but I don't think that will do much
|
||||||
|
void PerfQuery::FlushResults()
|
||||||
|
{
|
||||||
|
while (!IsFlushed())
|
||||||
|
FlushOne();
|
||||||
|
}
|
||||||
|
|
||||||
|
void PerfQuery::WeakFlush()
|
||||||
|
{
|
||||||
|
while (!IsFlushed())
|
||||||
|
{
|
||||||
|
auto& entry = m_query_buffer[m_query_read_pos];
|
||||||
|
|
||||||
|
UINT64 result = 0;
|
||||||
|
HRESULT hr = entry.query->GetData(&result, sizeof(result), 0);
|
||||||
|
|
||||||
|
if (hr == S_OK)
|
||||||
|
{
|
||||||
|
// NOTE: Reported pixel metrics should be referenced to native resolution
|
||||||
|
m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight();
|
||||||
|
|
||||||
|
m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer);
|
||||||
|
--m_query_count;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PerfQuery::IsFlushed() const
|
||||||
|
{
|
||||||
|
return 0 == m_query_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace
|
|
@ -0,0 +1,46 @@
|
||||||
|
#ifndef _PERFQUERY_H_
|
||||||
|
#define _PERFQUERY_H_
|
||||||
|
|
||||||
|
#include "PerfQueryBase.h"
|
||||||
|
|
||||||
|
namespace DX9 {
|
||||||
|
|
||||||
|
class PerfQuery : public PerfQueryBase
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
PerfQuery();
|
||||||
|
~PerfQuery();
|
||||||
|
|
||||||
|
void EnableQuery(PerfQueryGroup type);
|
||||||
|
void DisableQuery(PerfQueryGroup type);
|
||||||
|
void ResetQuery();
|
||||||
|
u32 GetQueryResult(PerfQueryType type);
|
||||||
|
void FlushResults();
|
||||||
|
bool IsFlushed() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct ActiveQuery
|
||||||
|
{
|
||||||
|
IDirect3DQuery9* query;
|
||||||
|
PerfQueryGroup query_type;
|
||||||
|
};
|
||||||
|
|
||||||
|
void WeakFlush();
|
||||||
|
|
||||||
|
// Only use when non-empty
|
||||||
|
void FlushOne();
|
||||||
|
|
||||||
|
// when testing in SMS: 64 was too small, 128 was ok
|
||||||
|
static const int PERF_QUERY_BUFFER_SIZE = 512;
|
||||||
|
|
||||||
|
ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE];
|
||||||
|
int m_query_read_pos;
|
||||||
|
|
||||||
|
// TODO: sloppy
|
||||||
|
volatile int m_query_count;
|
||||||
|
volatile u32 m_results[PQG_NUM_MEMBERS];
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
#endif // _PERFQUERY_H_
|
|
@ -55,7 +55,7 @@
|
||||||
#include "BPFunctions.h"
|
#include "BPFunctions.h"
|
||||||
#include "FPSCounter.h"
|
#include "FPSCounter.h"
|
||||||
#include "ConfigManager.h"
|
#include "ConfigManager.h"
|
||||||
|
#include "PerfQuery.h"
|
||||||
#include <strsafe.h>
|
#include <strsafe.h>
|
||||||
|
|
||||||
|
|
||||||
|
@ -88,6 +88,7 @@ void SetupDeviceObjects()
|
||||||
VertexShaderCache::Init();
|
VertexShaderCache::Init();
|
||||||
PixelShaderCache::Init();
|
PixelShaderCache::Init();
|
||||||
g_vertex_manager->CreateDeviceObjects();
|
g_vertex_manager->CreateDeviceObjects();
|
||||||
|
g_perf_query = new PerfQuery;
|
||||||
// Texture cache will recreate themselves over time.
|
// Texture cache will recreate themselves over time.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,6 +101,7 @@ void TeardownDeviceObjects()
|
||||||
D3D::dev->SetRenderTarget(0, D3D::GetBackBufferSurface());
|
D3D::dev->SetRenderTarget(0, D3D::GetBackBufferSurface());
|
||||||
D3D::dev->SetDepthStencilSurface(D3D::GetBackBufferDepthSurface());
|
D3D::dev->SetDepthStencilSurface(D3D::GetBackBufferDepthSurface());
|
||||||
delete g_framebuffer_manager;
|
delete g_framebuffer_manager;
|
||||||
|
delete g_perf_query;
|
||||||
D3D::font.Shutdown();
|
D3D::font.Shutdown();
|
||||||
TextureCache::Invalidate();
|
TextureCache::Invalidate();
|
||||||
VertexLoaderManager::Shutdown();
|
VertexLoaderManager::Shutdown();
|
||||||
|
|
|
@ -378,7 +378,8 @@ void VertexManager::vFlush()
|
||||||
|
|
||||||
}
|
}
|
||||||
PrepareDrawBuffers(stride);
|
PrepareDrawBuffers(stride);
|
||||||
g_nativeVertexFmt->SetupVertexPointers();
|
g_nativeVertexFmt->SetupVertexPointers();
|
||||||
|
g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
|
||||||
if(m_buffers_count)
|
if(m_buffers_count)
|
||||||
{
|
{
|
||||||
DrawVertexBuffer(stride);
|
DrawVertexBuffer(stride);
|
||||||
|
@ -387,7 +388,7 @@ void VertexManager::vFlush()
|
||||||
{
|
{
|
||||||
DrawVertexArray(stride);
|
DrawVertexArray(stride);
|
||||||
}
|
}
|
||||||
|
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
|
||||||
if (useDstAlpha && !useDualSource)
|
if (useDstAlpha && !useDualSource)
|
||||||
{
|
{
|
||||||
if (!PixelShaderCache::SetShader(DSTALPHA_ALPHA_PASS, g_nativeVertexFmt->m_components))
|
if (!PixelShaderCache::SetShader(DSTALPHA_ALPHA_PASS, g_nativeVertexFmt->m_components))
|
||||||
|
|
Loading…
Reference in New Issue