From 4d8d86bd6a27e9eea5bbf1440cbf317f02320519 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Tue, 29 May 2012 13:54:20 +0200 Subject: [PATCH 01/20] D3D11: Implement some PE pixel performance metrics. Super Mario Sunshine is using a cool trick: To determine how much goop has been cleaned in ep. 6 of Sirena Beach, it counts the number of pixels that are input to the blending stage. For that it's using the PE performance registers ;) Fixes issue 1498. --- Source/Core/VideoCommon/Src/BPMemory.h | 2 +- Source/Core/VideoCommon/Src/BPStructs.cpp | 13 +- Source/Core/VideoCommon/Src/PixelEngine.cpp | 71 +++++++--- Source/Core/VideoCommon/Src/PixelEngine.h | 27 ++-- Source/Core/VideoCommon/Src/RenderBase.h | 14 ++ .../VideoCommon/Src/VertexManagerBase.cpp | 2 + Source/Core/VideoCommon/Src/VideoConfig.cpp | 5 + Source/Core/VideoCommon/Src/VideoConfig.h | 2 + .../Plugins/Plugin_VideoDX11/Src/Render.cpp | 133 ++++++++++++++++++ Source/Plugins/Plugin_VideoDX11/Src/Render.h | 6 + .../Plugin_VideoDX11/Src/VertexManager.cpp | 3 +- Source/Plugins/Plugin_VideoDX11/Src/main.cpp | 1 + Source/Plugins/Plugin_VideoDX9/Src/main.cpp | 1 + Source/Plugins/Plugin_VideoOGL/Src/main.cpp | 1 + 14 files changed, 245 insertions(+), 36 deletions(-) diff --git a/Source/Core/VideoCommon/Src/BPMemory.h b/Source/Core/VideoCommon/Src/BPMemory.h index 9284674aa1..ce65bcb824 100644 --- a/Source/Core/VideoCommon/Src/BPMemory.h +++ b/Source/Core/VideoCommon/Src/BPMemory.h @@ -62,7 +62,7 @@ #define BPMEM_COPYFILTER1 0x54 #define BPMEM_CLEARBBOX1 0x55 #define BPMEM_CLEARBBOX2 0x56 -#define BPMEM_UNKNOWN_57 0x57 +#define BPMEM_CLEAR_PIXEL_PERF 0x57 #define BPMEM_REVBITS 0x58 #define BPMEM_SCISSOROFFSET 0x59 #define BPMEM_PRELOAD_ADDR 0x60 diff --git a/Source/Core/VideoCommon/Src/BPStructs.cpp b/Source/Core/VideoCommon/Src/BPStructs.cpp index 179196e412..bc6de89bf2 100644 --- a/Source/Core/VideoCommon/Src/BPStructs.cpp +++ b/Source/Core/VideoCommon/Src/BPStructs.cpp @@ -62,7 +62,6 @@ void RenderToXFB(const BPCmd &bp, const EFBRectangle &rc, float yScale, float xf { Renderer::RenderToXFB(xfbAddr, dstWidth, dstHeight, rc, gamma); } - void BPWritten(const BPCmd& bp) { /* @@ -141,7 +140,8 @@ void BPWritten(const BPCmd& bp) || bp.address == BPMEM_LOADTLUT0 || bp.address == BPMEM_LOADTLUT1 || bp.address == BPMEM_TEXINVALIDATE - || bp.address == BPMEM_PRELOAD_MODE)) + || bp.address == BPMEM_PRELOAD_MODE + || bp.address == BPMEM_CLEAR_PIXEL_PERF)) { return; } @@ -265,6 +265,8 @@ void BPWritten(const BPCmd& bp) UPE_Copy PE_copy = bpmem.triggerEFBCopy; + g_renderer->ResumePixelPerf(true); + // Check if we are to copy from the EFB or draw to the XFB if (PE_copy.copy_to_xfb == 0) { @@ -303,6 +305,8 @@ void BPWritten(const BPCmd& bp) s_gammaLUT[PE_copy.gamma]); } + g_renderer->PausePixelPerf(true); + // Clear the rectangular region after copying it. if (PE_copy.clear) { @@ -481,8 +485,9 @@ void BPWritten(const BPCmd& bp) case BPMEM_REVBITS: // Always set to 0x0F when GX_InitRevBits() is called. break; - case BPMEM_UNKNOWN_57: // Sunshine alternates this register between values 0x000 and 0xAAA - DEBUG_LOG(VIDEO, "Unknown BP Reg 0x57: %08x", bp.newvalue); + case BPMEM_CLEAR_PIXEL_PERF: + // GXClearPixMetric writes 0xAAA here, Sunshine alternates this register between values 0x000 and 0xAAA + g_renderer->ResetPixelPerf(); break; case BPMEM_PRELOAD_ADDR: diff --git a/Source/Core/VideoCommon/Src/PixelEngine.cpp b/Source/Core/VideoCommon/Src/PixelEngine.cpp index a488e52b42..4aac3c2456 100644 --- a/Source/Core/VideoCommon/Src/PixelEngine.cpp +++ b/Source/Core/VideoCommon/Src/PixelEngine.cpp @@ -28,6 +28,7 @@ #include "ConfigManager.h" #include "PixelEngine.h" +#include "RenderBase.h" #include "CommandProcessor.h" #include "HW/ProcessorInterface.h" #include "DLCache.h" @@ -255,23 +256,59 @@ void Read16(u16& _uReturnValue, const u32 _iAddress) break; } - case PE_PERF_0L: - case PE_PERF_0H: - case PE_PERF_1L: - case PE_PERF_1H: - case PE_PERF_2L: - case PE_PERF_2H: - case PE_PERF_3L: - case PE_PERF_3H: - case PE_PERF_4L: - case PE_PERF_4H: - case PE_PERF_5L: - case PE_PERF_5H: - INFO_LOG(PIXELENGINE, "(r16) perf counter @ %08x", _iAddress); - // git r90a2096a24f4 (svn r3663) added the PE_PERF cases, without setting - // _uReturnValue to anything, this reverts to the previous behaviour which allows - // The timer in SMS:Scrubbing Serena Beach to countdown correctly - _uReturnValue = 1; + // NOTE(neobrain): only PE_PERF_ZCOMP_OUTPUT is implemented in D3D11, but the other values shouldn't be contradictionary to the value of that register (i.e. INPUT registers should always be greater or equal to their corresponding OUTPUT registers). + case PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L: + _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_INPUT_ZCOMPLOC) & 0xFFFF; + break; + + case PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H: + _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_INPUT_ZCOMPLOC) >> 16; + break; + + case PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L: + _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_OUTPUT_ZCOMPLOC) & 0xFFFF; + break; + + case PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H: + _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_OUTPUT_ZCOMPLOC) >> 16; + break; + + case PE_PERF_ZCOMP_INPUT_L: + _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_INPUT) & 0xFFFF; + break; + + case PE_PERF_ZCOMP_INPUT_H: + _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_INPUT) >> 16; + break; + + case PE_PERF_ZCOMP_OUTPUT_L: + _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_OUTPUT) & 0xFFFF; + break; + + case PE_PERF_ZCOMP_OUTPUT_H: + _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_OUTPUT) >> 16; + break; + + case PE_PERF_BLEND_INPUT_L: + // Super Mario Sunshine uses this register in episode 6 of Sirena Beach: + // The amount of remaining goop is determined by checking how many pixels reach the blending stage. + // Once this register falls below a particular value (around 0x90), the game regards the challenge finished. + // In very old builds, Dolphin only returned 0. That caused the challenge to be immediately finished without any goop being cleaned (the timer just didn't even start counting from 3:00:00). + // Later builds returned 1 for the high register. That caused the timer to actually count down, but made the challenge unbeatable because the game always thought you didn't clear any goop at all. + // Note that currently this functionality is only implemented in the D3D11 backend. + _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_BLEND_INPUT) & 0xFFFF; + break; + + case PE_PERF_BLEND_INPUT_H: + _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_BLEND_INPUT) >> 16; + break; + + case PE_PERF_EFB_COPY_CLOCKS_L: + _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_EFB_COPY_CLOCKS) & 0xFFFF; + break; + + case PE_PERF_EFB_COPY_CLOCKS_H: + _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_EFB_COPY_CLOCKS) >> 16; break; default: diff --git a/Source/Core/VideoCommon/Src/PixelEngine.h b/Source/Core/VideoCommon/Src/PixelEngine.h index 64f959009f..eaf55f0031 100644 --- a/Source/Core/VideoCommon/Src/PixelEngine.h +++ b/Source/Core/VideoCommon/Src/PixelEngine.h @@ -36,19 +36,20 @@ enum PE_BBOX_TOP = 0x14, // Flip Top PE_BBOX_BOTTOM = 0x16, // Flip Bottom - // These have not yet been RE:d. They are the perf counters. - PE_PERF_0L = 0x18, - PE_PERF_0H = 0x1a, - PE_PERF_1L = 0x1c, - PE_PERF_1H = 0x1e, - PE_PERF_2L = 0x20, - PE_PERF_2H = 0x22, - PE_PERF_3L = 0x24, - PE_PERF_3H = 0x26, - PE_PERF_4L = 0x28, - PE_PERF_4H = 0x2a, - PE_PERF_5L = 0x2c, - PE_PERF_5H = 0x2e, + // NOTE: Order not verified + // These indicate the number of quads that are being used as input/output for each particular stage + PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L = 0x18, + PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H = 0x1a, + PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L = 0x1c, + PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H = 0x1e, + PE_PERF_ZCOMP_INPUT_L = 0x20, + PE_PERF_ZCOMP_INPUT_H = 0x22, + PE_PERF_ZCOMP_OUTPUT_L = 0x24, + PE_PERF_ZCOMP_OUTPUT_H = 0x26, + PE_PERF_BLEND_INPUT_L = 0x28, + PE_PERF_BLEND_INPUT_H = 0x2a, + PE_PERF_EFB_COPY_CLOCKS_L = 0x2c, + PE_PERF_EFB_COPY_CLOCKS_H = 0x2e, }; namespace PixelEngine diff --git a/Source/Core/VideoCommon/Src/RenderBase.h b/Source/Core/VideoCommon/Src/RenderBase.h index e8d4c55a20..4d288143c0 100644 --- a/Source/Core/VideoCommon/Src/RenderBase.h +++ b/Source/Core/VideoCommon/Src/RenderBase.h @@ -52,6 +52,15 @@ public: Renderer(); virtual ~Renderer(); + enum PixelPerfQuery { + PP_ZCOMP_INPUT_ZCOMPLOC, + PP_ZCOMP_OUTPUT_ZCOMPLOC, + PP_ZCOMP_INPUT, + PP_ZCOMP_OUTPUT, + PP_BLEND_INPUT, + PP_EFB_COPY_CLOCKS + }; + virtual void SetColorMask() = 0; virtual void SetBlendMode(bool forceUpdate) = 0; virtual void SetScissorRect(const TargetRectangle& rc) = 0; @@ -119,6 +128,11 @@ public: static unsigned int GetPrevPixelFormat() { return prev_efb_format; } static void StorePixelFormat(unsigned int new_format) { prev_efb_format = new_format; } + virtual void ResetPixelPerf() {}; + virtual void ResumePixelPerf(bool efb_copies) {}; + virtual void PausePixelPerf(bool efb_copies) {}; + virtual u32 GetPixelPerfResult(PixelPerfQuery type) { return 0; }; + // TODO: doesn't belong here virtual void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) = 0; virtual void SetPSConstant4fv(unsigned int const_number, const float *f) = 0; diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp index 4118e3dcbd..86dd891386 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp @@ -253,7 +253,9 @@ void VertexManager::Flush() //if (g_nativeVertexFmt) g_nativeVertexFmt->SetupVertexPointers(); + g_renderer->ResumePixelPerf(false); g_vertex_manager->Draw(stride, false); + g_renderer->PausePixelPerf(false); // run through vertex groups again to set alpha if (false == g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate) diff --git a/Source/Core/VideoCommon/Src/VideoConfig.cpp b/Source/Core/VideoCommon/Src/VideoConfig.cpp index c5628c19a1..5f464c3322 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.cpp +++ b/Source/Core/VideoCommon/Src/VideoConfig.cpp @@ -104,6 +104,7 @@ void VideoConfig::Load(const char *ini_file) iniFile.Get("Hacks", "EFBScaledCopy", &bCopyEFBScaled, true); iniFile.Get("Hacks", "EFBCopyCacheEnable", &bEFBCopyCacheEnable, false); iniFile.Get("Hacks", "EFBEmulateFormatChanges", &bEFBEmulateFormatChanges, false); + iniFile.Get("Hacks", "DisablePixelPerf", &bDisablePixelPerf, true); iniFile.Get("Hardware", "Adapter", &iAdapter, 0); @@ -153,6 +154,7 @@ void VideoConfig::GameIniLoad(const char *ini_file) iniFile.GetIfExists("Video_Hacks", "EFBScaledCopy", &bCopyEFBScaled); iniFile.GetIfExists("Video_Hacks", "EFBCopyCacheEnable", &bEFBCopyCacheEnable); iniFile.GetIfExists("Video_Hacks", "EFBEmulateFormatChanges", &bEFBEmulateFormatChanges); + iniFile.GetIfExists("Video_Hacks", "DisablePixelPerf", &bDisablePixelPerf); iniFile.GetIfExists("Video", "ProjectionHack", &iPhackvalue[0]); iniFile.GetIfExists("Video", "PH_SZNear", &iPhackvalue[1]); @@ -172,6 +174,7 @@ void VideoConfig::VerifyValidity() if (!backend_info.bSupports3DVision) b3DVision = false; if (!backend_info.bSupportsFormatReinterpretation) bEFBEmulateFormatChanges = false; if (!backend_info.bSupportsPixelLighting) bEnablePixelLighting = false; + if (!backend_info.bSupportsPixelPerfQuery) bDisablePixelPerf = true; } void VideoConfig::Save(const char *ini_file) @@ -231,6 +234,7 @@ void VideoConfig::Save(const char *ini_file) iniFile.Set("Hacks", "EFBScaledCopy", bCopyEFBScaled); iniFile.Set("Hacks", "EFBCopyCacheEnable", bEFBCopyCacheEnable); iniFile.Set("Hacks", "EFBEmulateFormatChanges", bEFBEmulateFormatChanges); + iniFile.Set("Hacks", "DisablePixelPerf", bDisablePixelPerf); iniFile.Set("Hardware", "Adapter", iAdapter); @@ -287,6 +291,7 @@ void VideoConfig::GameIniSave(const char* default_ini, const char* game_ini) SET_IF_DIFFERS("Video_Hacks", "EFBScaledCopy", bCopyEFBScaled); SET_IF_DIFFERS("Video_Hacks", "EFBCopyCacheEnable", bEFBCopyCacheEnable); SET_IF_DIFFERS("Video_Hacks", "EFBEmulateFormatChanges", bEFBEmulateFormatChanges); + SET_IF_DIFFERS("Video_Hacks", "DisablePixelPerf", bDisablePixelPerf); iniFile.Save(game_ini); } diff --git a/Source/Core/VideoCommon/Src/VideoConfig.h b/Source/Core/VideoCommon/Src/VideoConfig.h index 7653972ec8..8593de4fd7 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.h +++ b/Source/Core/VideoCommon/Src/VideoConfig.h @@ -133,6 +133,7 @@ struct VideoConfig bool bUseBBox; bool bEnablePixelLighting; bool bEnablePerPixelDepth; + bool bDisablePixelPerf; int iLog; // CONF_ bits int iSaveTargetId; // TODO: Should be dropped @@ -161,6 +162,7 @@ struct VideoConfig bool bSupportsDualSourceBlend; // only supported by D3D11 and OpenGL bool bSupportsFormatReinterpretation; bool bSupportsPixelLighting; + bool bSupportsPixelPerfQuery; } backend_info; }; diff --git a/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp index 3a725452e7..3f88d7f99d 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp @@ -65,6 +65,19 @@ ID3D11RasterizerState* resetraststate = NULL; static ID3D11Texture2D* s_screenshot_texture = NULL; +// Using a vector of query objects to avoid flushing the gpu pipeline all the time +// TODO: Could probably optimized further by using a ring buffer or something +#define MAX_PIXEL_PERF_QUERIES 20 // 20 is an arbitrary guess +std::vector pixel_perf_queries; +static int pixel_perf_query_index = 0; + +static u64 pixel_perf = 0; +static bool pixel_perf_active = false; +static bool pixel_perf_dirty = false; + +ID3D11Query* gpu_finished_query = NULL; + + // GX pipeline state struct { @@ -302,6 +315,9 @@ void SetupDeviceObjects() D3D::SetDebugObjectName((ID3D11DeviceChild*)resetraststate, "rasterizer state for Renderer::ResetAPIState"); s_screenshot_texture = NULL; + + D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_EVENT, 0); + D3D::device->CreateQuery(&qdesc, &gpu_finished_query); } // Kill off all device objects @@ -309,6 +325,12 @@ void TeardownDeviceObjects() { delete g_framebuffer_manager; + while (!pixel_perf_queries.empty()) + { + SAFE_RELEASE(pixel_perf_queries.back()); + pixel_perf_queries.pop_back(); + } + SAFE_RELEASE(gpu_finished_query); SAFE_RELEASE(access_efb_cbuf); SAFE_RELEASE(clearblendstates[0]); SAFE_RELEASE(clearblendstates[1]); @@ -357,6 +379,11 @@ Renderer::Renderer() s_LastEFBScale = g_ActiveConfig.iEFBScale; CalculateTargetSize(); + pixel_perf_query_index = 0; + pixel_perf = 0; + pixel_perf_active = false; + pixel_perf_dirty = false; + SetupDeviceObjects(); @@ -777,6 +804,112 @@ void Renderer::ReinterpretPixelData(unsigned int convtype) D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); } +void Renderer::ResetPixelPerf() +{ + if (g_ActiveConfig.bDisablePixelPerf) + return; + + if (pixel_perf_active) + PausePixelPerf(false); + + pixel_perf_query_index = 0; + pixel_perf = 0; +} + +void Renderer::ResumePixelPerf(bool efb_copies) +{ + if (g_ActiveConfig.bDisablePixelPerf) + return; + + if (efb_copies) + return; + + if(pixel_perf_active) + return; + + if (pixel_perf_queries.size() < pixel_perf_query_index+1 && pixel_perf_query_index < MAX_PIXEL_PERF_QUERIES) + { + D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_OCCLUSION, 0); + ID3D11Query* tmpquery = NULL; + D3D::device->CreateQuery(&qdesc, &tmpquery); + pixel_perf_queries.push_back(tmpquery); + pixel_perf_query_index = pixel_perf_queries.size() - 1; + } + else if (pixel_perf_queries.size() < pixel_perf_query_index+1) + { + StorePixelPerfResult(PP_ZCOMP_OUTPUT); + pixel_perf_query_index = 0; + } + // This will spam the D3D11 debug runtime output with QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS warnings which safely can be ignored. Mute them in the DX control panel if you need to read the debug runtime output. + D3D::context->Begin(pixel_perf_queries[pixel_perf_query_index]); + pixel_perf_active = true; + pixel_perf_dirty = true; +} + +void Renderer::PausePixelPerf(bool efb_copies) +{ + if (g_ActiveConfig.bDisablePixelPerf) + return; + + if(!pixel_perf_active) + return; + + D3D::context->End(pixel_perf_queries[pixel_perf_query_index]); + pixel_perf_query_index++; + pixel_perf_active = false; +} + +void Renderer::StorePixelPerfResult(PixelPerfQuery type) +{ + // First, make sure the GPU has finished rendering so that query results are valid + D3D::context->End(gpu_finished_query); + BOOL gpu_finished = FALSE; + while (!gpu_finished) + { + // If nothing goes horribly wrong here, this should complete in finite time... + D3D::context->GetData(gpu_finished_query, &gpu_finished, sizeof(gpu_finished), 0); + } + + for(int i = 0; i < pixel_perf_query_index; ++i) + { + UINT64 buf = 0; + D3D::context->GetData(pixel_perf_queries[i], &buf, sizeof(buf), 0); + + // Reported pixel metrics should be referenced to native resolution: + pixel_perf += buf * EFB_WIDTH * EFB_HEIGHT / GetTargetWidth() / GetTargetHeight(); + } + pixel_perf_dirty = false; +} + +u32 Renderer::GetPixelPerfResult(PixelPerfQuery type) +{ + if (g_ActiveConfig.bDisablePixelPerf) + return 0; + + if (type == PP_EFB_COPY_CLOCKS) + { + // not implemented + return 0; + } + + if (type == PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L || + type == PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H || + type == PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L || + type == PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H) + { + // return zero for now because ZCOMP_OUTPUT_ZCOMPLOC + ZCOMP_OUTPUT should equal BLEND_INPUT + // TODO: Instead, should keep separate counters for zcomploc and non-zcomploc registers. + return 0; + } + + // Basically we only implement PP_ZCOMP_OUTPUT, but we're returning the same value for PP_ZCOMP_INPUT and PP_BLEND_INPUT anyway + if (pixel_perf_dirty) + StorePixelPerfResult(PP_ZCOMP_OUTPUT); + + // Dividing by 4 because we're expected to return the number of 2x2 quads instead of pixels + return std::min(pixel_perf / 4, (u64)0xFFFFFFFF); +} + void SetSrcBlend(D3D11_BLEND val) { // Colors should blend against SRC_ALPHA diff --git a/Source/Plugins/Plugin_VideoDX11/Src/Render.h b/Source/Plugins/Plugin_VideoDX11/Src/Render.h index 8f6c78fae1..6db829c205 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/Render.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/Render.h @@ -46,6 +46,12 @@ public: void ReinterpretPixelData(unsigned int convtype); + void ResetPixelPerf(); + void ResumePixelPerf(bool efb_copies); + void PausePixelPerf(bool efb_copies); + u32 GetPixelPerfResult(PixelPerfQuery type); + void StorePixelPerfResult(PixelPerfQuery type); // internal + void UpdateViewport(Matrix44& vpCorrection); bool SaveScreenshot(const std::string &filename, const TargetRectangle &rc); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp index 58bad98cc6..4bb04bc302 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp @@ -198,7 +198,6 @@ void VertexManager::Draw(UINT stride) if (IndexGenerator::GetNumLines() > 0 || IndexGenerator::GetNumPoints() > 0) ((DX11::Renderer*)g_renderer)->RestoreCull(); } - void VertexManager::vFlush() { if (LocalVBuffer == s_pCurBufferPointer) return; @@ -266,7 +265,9 @@ void VertexManager::vFlush() g_renderer->ApplyState(useDstAlpha); LoadBuffers(); + g_renderer->ResumePixelPerf(false); Draw(stride); + g_renderer->PausePixelPerf(false); GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp index cb348b8684..161321cf08 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp @@ -94,6 +94,7 @@ void InitBackendInfo() g_Config.backend_info.bSupportsDualSourceBlend = true; g_Config.backend_info.bSupportsFormatReinterpretation = true; g_Config.backend_info.bSupportsPixelLighting = true; + g_Config.backend_info.bSupportsPixelPerfQuery = true; IDXGIFactory* factory; IDXGIAdapter* ad; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp index 5aeca6751d..1d1a2db112 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp @@ -96,6 +96,7 @@ void InitBackendInfo() g_Config.backend_info.bSupports3DVision = true; g_Config.backend_info.bSupportsDualSourceBlend = false; g_Config.backend_info.bSupportsFormatReinterpretation = true; + g_Config.backend_info.bSupportsPixelPerfQuery = false; g_Config.backend_info.bSupportsPixelLighting = C_PLIGHTS + 40 <= maxConstants && C_PMATERIALS + 4 <= maxConstants; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp index 543fb9824d..2c8f6d4716 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp @@ -135,6 +135,7 @@ void InitBackendInfo() g_Config.backend_info.bSupportsDualSourceBlend = false; // supported, but broken g_Config.backend_info.bSupportsFormatReinterpretation = false; g_Config.backend_info.bSupportsPixelLighting = true; + g_Config.backend_info.bSupportsPixelPerfQuery = false; // aamodes const char* caamodes[] = {"None", "2x", "4x", "8x", "8x CSAA", "8xQ CSAA", "16x CSAA", "16xQ CSAA"}; From cf8744cf2c1de2231d180348c4b55ebc2b35240f Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sun, 17 Jun 2012 13:58:29 +0200 Subject: [PATCH 02/20] OGL: Implement pixel metrics (untested) --- Source/Core/VideoCommon/CMakeLists.txt | 1 + Source/Core/VideoCommon/Src/BPStructs.cpp | 3 +- Source/Core/VideoCommon/Src/PerfQueryBase.cpp | 3 + Source/Core/VideoCommon/Src/PerfQueryBase.h | 39 ++++++++++ Source/Core/VideoCommon/Src/PixelEngine.cpp | 26 ++++--- Source/Plugins/Plugin_VideoOGL/CMakeLists.txt | 1 + .../Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp | 78 +++++++++++++++++++ .../Plugins/Plugin_VideoOGL/Src/PerfQuery.h | 22 ++++++ .../Plugin_VideoOGL/Src/VertexManager.cpp | 4 + Source/Plugins/Plugin_VideoOGL/Src/main.cpp | 2 + 10 files changed, 166 insertions(+), 13 deletions(-) create mode 100644 Source/Core/VideoCommon/Src/PerfQueryBase.cpp create mode 100644 Source/Core/VideoCommon/Src/PerfQueryBase.h create mode 100644 Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp create mode 100644 Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h diff --git a/Source/Core/VideoCommon/CMakeLists.txt b/Source/Core/VideoCommon/CMakeLists.txt index d0e28f571c..5a4ed87c1d 100644 --- a/Source/Core/VideoCommon/CMakeLists.txt +++ b/Source/Core/VideoCommon/CMakeLists.txt @@ -16,6 +16,7 @@ set(SRCS Src/BPFunctions.cpp Src/OpcodeDecoding.cpp Src/OpenCL.cpp Src/OpenCL/OCLTextureDecoder.cpp + Src/PerfQueryBase.cpp Src/PixelEngine.cpp Src/PixelShaderGen.cpp Src/PixelShaderManager.cpp diff --git a/Source/Core/VideoCommon/Src/BPStructs.cpp b/Source/Core/VideoCommon/Src/BPStructs.cpp index bc6de89bf2..cda0f7c7c0 100644 --- a/Source/Core/VideoCommon/Src/BPStructs.cpp +++ b/Source/Core/VideoCommon/Src/BPStructs.cpp @@ -31,6 +31,7 @@ #include "VertexShaderManager.h" #include "Thread.h" #include "HW/Memmap.h" +#include "PerfQueryBase.h" using namespace BPFunctions; @@ -487,7 +488,7 @@ void BPWritten(const BPCmd& bp) case BPMEM_CLEAR_PIXEL_PERF: // GXClearPixMetric writes 0xAAA here, Sunshine alternates this register between values 0x000 and 0xAAA - g_renderer->ResetPixelPerf(); + g_perf_query->ResetQuery(); break; case BPMEM_PRELOAD_ADDR: diff --git a/Source/Core/VideoCommon/Src/PerfQueryBase.cpp b/Source/Core/VideoCommon/Src/PerfQueryBase.cpp new file mode 100644 index 0000000000..c537d176f6 --- /dev/null +++ b/Source/Core/VideoCommon/Src/PerfQueryBase.cpp @@ -0,0 +1,3 @@ +#include "PerfQueryBase.h" + +PerfQueryBase* g_perf_query = 0; diff --git a/Source/Core/VideoCommon/Src/PerfQueryBase.h b/Source/Core/VideoCommon/Src/PerfQueryBase.h new file mode 100644 index 0000000000..0520e9244c --- /dev/null +++ b/Source/Core/VideoCommon/Src/PerfQueryBase.h @@ -0,0 +1,39 @@ +#ifndef _PERFQUERY_BASE_H_ +#define _PERFQUERY_BASE_H_ + +#include "CommonTypes.h" + +enum PerfQueryType +{ + PQ_ZCOMP_INPUT_ZCOMPLOC = 0, + PQ_ZCOMP_OUTPUT_ZCOMPLOC, + PQ_ZCOMP_INPUT, + PQ_ZCOMP_OUTPUT, + PQ_BLEND_INPUT, + PQ_EFB_COPY_CLOCKS, + PQ_NUM_MEMBERS +}; + +enum PerfQueryGroup +{ + PQG_ZCOMP_ZCOMPLOC, + PQG_ZCOMP, + PQG_EFB_COPY_CLOCKS, + PQG_NUM_MEMBERS, +}; + +class PerfQueryBase +{ +public: + PerfQueryBase() {}; + virtual ~PerfQueryBase() {} + + virtual void EnableQuery(PerfQueryGroup type) {} + virtual void DisableQuery(PerfQueryGroup type) {} + virtual void ResetQuery() {} + virtual u32 GetQueryResult(PerfQueryType type) { return 0; } +}; + +extern PerfQueryBase* g_perf_query; + +#endif // _PERFQUERY_H_ diff --git a/Source/Core/VideoCommon/Src/PixelEngine.cpp b/Source/Core/VideoCommon/Src/PixelEngine.cpp index 4aac3c2456..005468f63c 100644 --- a/Source/Core/VideoCommon/Src/PixelEngine.cpp +++ b/Source/Core/VideoCommon/Src/PixelEngine.cpp @@ -33,6 +33,8 @@ #include "HW/ProcessorInterface.h" #include "DLCache.h" #include "State.h" +#include "PerfQueryBase.h" + namespace PixelEngine { @@ -258,35 +260,35 @@ void Read16(u16& _uReturnValue, const u32 _iAddress) // NOTE(neobrain): only PE_PERF_ZCOMP_OUTPUT is implemented in D3D11, but the other values shouldn't be contradictionary to the value of that register (i.e. INPUT registers should always be greater or equal to their corresponding OUTPUT registers). case PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L: - _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_INPUT_ZCOMPLOC) & 0xFFFF; + _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_INPUT_ZCOMPLOC) & 0xFFFF; break; case PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H: - _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_INPUT_ZCOMPLOC) >> 16; + _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_INPUT_ZCOMPLOC) >> 16; break; case PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L: - _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_OUTPUT_ZCOMPLOC) & 0xFFFF; + _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_OUTPUT_ZCOMPLOC) & 0xFFFF; break; case PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H: - _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_OUTPUT_ZCOMPLOC) >> 16; + _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_OUTPUT_ZCOMPLOC) >> 16; break; case PE_PERF_ZCOMP_INPUT_L: - _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_INPUT) & 0xFFFF; + _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_INPUT) & 0xFFFF; break; case PE_PERF_ZCOMP_INPUT_H: - _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_INPUT) >> 16; + _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_INPUT) >> 16; break; case PE_PERF_ZCOMP_OUTPUT_L: - _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_OUTPUT) & 0xFFFF; + _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_OUTPUT) & 0xFFFF; break; case PE_PERF_ZCOMP_OUTPUT_H: - _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_OUTPUT) >> 16; + _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_OUTPUT) >> 16; break; case PE_PERF_BLEND_INPUT_L: @@ -296,19 +298,19 @@ void Read16(u16& _uReturnValue, const u32 _iAddress) // In very old builds, Dolphin only returned 0. That caused the challenge to be immediately finished without any goop being cleaned (the timer just didn't even start counting from 3:00:00). // Later builds returned 1 for the high register. That caused the timer to actually count down, but made the challenge unbeatable because the game always thought you didn't clear any goop at all. // Note that currently this functionality is only implemented in the D3D11 backend. - _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_BLEND_INPUT) & 0xFFFF; + _uReturnValue = g_perf_query->GetQueryResult(PQ_BLEND_INPUT) & 0xFFFF; break; case PE_PERF_BLEND_INPUT_H: - _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_BLEND_INPUT) >> 16; + _uReturnValue = g_perf_query->GetQueryResult(PQ_BLEND_INPUT) >> 16; break; case PE_PERF_EFB_COPY_CLOCKS_L: - _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_EFB_COPY_CLOCKS) & 0xFFFF; + _uReturnValue = g_perf_query->GetQueryResult(PQ_EFB_COPY_CLOCKS) & 0xFFFF; break; case PE_PERF_EFB_COPY_CLOCKS_H: - _uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_EFB_COPY_CLOCKS) >> 16; + _uReturnValue = g_perf_query->GetQueryResult(PQ_EFB_COPY_CLOCKS) >> 16; break; default: diff --git a/Source/Plugins/Plugin_VideoOGL/CMakeLists.txt b/Source/Plugins/Plugin_VideoOGL/CMakeLists.txt index b506087106..834e905faa 100644 --- a/Source/Plugins/Plugin_VideoOGL/CMakeLists.txt +++ b/Source/Plugins/Plugin_VideoOGL/CMakeLists.txt @@ -2,6 +2,7 @@ set(SRCS Src/FramebufferManager.cpp Src/GLUtil.cpp Src/main.cpp Src/NativeVertexFormat.cpp + Src/PerfQuery.cpp Src/PixelShaderCache.cpp Src/PostProcessing.cpp Src/RasterFont.cpp diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp new file mode 100644 index 0000000000..cb659e6921 --- /dev/null +++ b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp @@ -0,0 +1,78 @@ +#include "GLUtil.h" +#include "PerfQuery.h" + +namespace OGL { + +u32 results[PQG_NUM_MEMBERS] = { 0 }; +GLuint query_id; + +PerfQueryGroup active_query; + +PerfQuery::PerfQuery() +{ + glGenQueries(1, &query_id); +} + +PerfQuery::~PerfQuery() +{ + glDeleteQueries(1, &query_id); +} + +void PerfQuery::EnableQuery(PerfQueryGroup type) +{ + // start query + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + glBeginQuery(GL_SAMPLES_PASSED, query_id); + } + active_query = type; +} + +void PerfQuery::DisableQuery(PerfQueryGroup type) +{ + // stop query + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + glEndQuery(GL_SAMPLES_PASSED); + + GLuint query_result = GL_FALSE; + while (query_result != GL_TRUE) + { + glGetQueryObjectuiv(query_id, GL_QUERY_RESULT_AVAILABLE, &query_result); + } + + glGetQueryObjectuiv(query_id, GL_QUERY_RESULT, &query_result); + + results[active_query] += query_result; + } +} + +void PerfQuery::ResetQuery() +{ + memset(results, 0, sizeof(results)); +} + +u32 PerfQuery::GetQueryResult(PerfQueryType type) +{ + if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC || type == PQ_BLEND_INPUT) + { + + } + if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT || type == PQ_BLEND_INPUT) + { + + } + if (type == PQ_BLEND_INPUT) + { + results[PQ_BLEND_INPUT] = results[PQ_ZCOMP_OUTPUT] + results[PQ_ZCOMP_OUTPUT_ZCOMPLOC]; + } + + if (type == PQ_EFB_COPY_CLOCKS) + { + // TODO + } + + return results[type]; +} + +} // namespace diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h new file mode 100644 index 0000000000..776c576e2d --- /dev/null +++ b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h @@ -0,0 +1,22 @@ +#ifndef _PERFQUERY_H_ +#define _PERFQUERY_H_ + +#include "PerfQueryBase.h" + +namespace OGL { + +class PerfQuery : public PerfQueryBase +{ +public: + PerfQuery(); + ~PerfQuery(); + + void EnableQuery(PerfQueryGroup type); + void DisableQuery(PerfQueryGroup type); + void ResetQuery(); + u32 GetQueryResult(PerfQueryType type); +}; + +} // namespace + +#endif // _PERFQUERY_H_ diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index 0a37b0e654..b2f55adec5 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -40,6 +40,7 @@ #include "OpcodeDecoding.h" #include "FileUtil.h" #include "Debugger.h" +#include "PerfQueryBase.h" #include "main.h" @@ -207,7 +208,10 @@ void VertexManager::vFlush() if (ps) PixelShaderCache::SetCurrentShader(ps->glprogid); // Lego Star Wars crashes here. if (vs) VertexShaderCache::SetCurrentShader(vs->glprogid); + g_perf_query->EnableQuery(bpmem.zcontrol.zcomploc ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); Draw(); + g_perf_query->DisableQuery(bpmem.zcontrol.zcomploc ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); + ERROR_LOG(VIDEO, "PerfQuery result: %d", g_perf_query->GetQueryResult(bpmem.zcontrol.zcomploc ? PQ_ZCOMP_OUTPUT_ZCOMPLOC : PQ_ZCOMP_OUTPUT)); // run through vertex groups again to set alpha if (useDstAlpha && !dualSourcePossible) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp index 2c8f6d4716..329d131ded 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp @@ -93,6 +93,7 @@ Make AA apply instantly during gameplay if possible #include "FramebufferManager.h" #include "Core.h" #include "Host.h" +#include "PerfQuery.h" #include "VideoState.h" #include "VideoBackend.h" @@ -194,6 +195,7 @@ void VideoBackend::Video_Prepare() BPInit(); g_vertex_manager = new VertexManager; + g_perf_query = new PerfQuery; Fifo_Init(); // must be done before OpcodeDecoder_Init() OpcodeDecoder_Init(); VertexShaderCache::Init(); From 26de63a8cd3e0b05c1ca01bd19e25ace5631943f Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sun, 17 Jun 2012 19:49:48 +0200 Subject: [PATCH 03/20] Video_Software: Implement PE pixel metrics --- .../Plugin_VideoSoftware/Src/BPMemLoader.cpp | 9 ++++++++ .../Plugin_VideoSoftware/Src/Rasterizer.cpp | 15 ++++++++---- .../Plugin_VideoSoftware/Src/SWPixelEngine.h | 23 +++++++++++++++++++ .../Plugins/Plugin_VideoSoftware/Src/Tev.cpp | 18 +++++++++++---- 4 files changed, 56 insertions(+), 9 deletions(-) diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp index 95ec555181..ad730a485c 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp @@ -90,6 +90,15 @@ void SWBPWritten(int address, int newvalue) SWPixelEngine::pereg.boxBottom = newvalue >> 10; SWPixelEngine::pereg.boxTop = newvalue & 0x3ff; break; + case BPMEM_CLEAR_PIXEL_PERF: + // TODO: Parameter? + SWPixelEngine::pereg.perfZcompInputZcomploc = 0; + SWPixelEngine::pereg.perfZcompOutputZcomploc = 0; + SWPixelEngine::pereg.perfZcompInput = 0; + SWPixelEngine::pereg.perfZcompOutput = 0; + SWPixelEngine::pereg.perfBlendInput = 0; + SWPixelEngine::pereg.perfEfbCopyClocks = 0; + break; case BPMEM_LOADTLUT0: // This one updates bpmem.tlutXferSrc, no need to do anything here. break; case BPMEM_LOADTLUT1: // Load a Texture Look Up Table diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp index c212c93601..a23d85f27f 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp @@ -23,6 +23,7 @@ #include "BPMemLoader.h" #include "XFMemLoader.h" #include "Tev.h" +#include "SWPixelEngine.h" #include "SWStatistics.h" #include "SWVideoConfig.h" @@ -125,11 +126,17 @@ inline void Draw(s32 x, s32 y, s32 xi, s32 yi) if (z < 0 || z > 0x00ffffff) return; - if (bpmem.zcontrol.zcomploc && bpmem.zmode.testenable) + if (bpmem.zcontrol.zcomploc) { - // early z - if (!EfbInterface::ZCompare(x, y, z)) - return; + // TODO: Verify that perf regs are being incremented even if test is disabled + SWPixelEngine::pereg.perfZcompInputZcomploc++; + if (bpmem.zmode.testenable) + { + // early z + if (!EfbInterface::ZCompare(x, y, z)) + return; + } + SWPixelEngine::pereg.perfZcompOutputZcomploc++; } RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi]; diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.h b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.h index 6a87143e8c..764488c32e 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.h +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.h @@ -38,6 +38,21 @@ namespace SWPixelEngine PE_BBOX_RIGHT = 0x012, // Flip Right PE_BBOX_TOP = 0x014, // Flip Top PE_BBOX_BOTTOM = 0x016, // Flip Bottom + + // NOTE: Order not verified + // These indicate the number of quads that are being used as input/output for each particular stage + PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L = 0x18, + PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H = 0x1a, + PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L = 0x1c, + PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H = 0x1e, + PE_PERF_ZCOMP_INPUT_L = 0x20, + PE_PERF_ZCOMP_INPUT_H = 0x22, + PE_PERF_ZCOMP_OUTPUT_L = 0x24, + PE_PERF_ZCOMP_OUTPUT_H = 0x26, + PE_PERF_BLEND_INPUT_L = 0x28, + PE_PERF_BLEND_INPUT_H = 0x2a, + PE_PERF_EFB_COPY_CLOCKS_L = 0x2c, + PE_PERF_EFB_COPY_CLOCKS_H = 0x2e, }; union UPEZConfReg @@ -125,10 +140,18 @@ namespace SWPixelEngine UPECtrlReg ctrl; u16 unk0; u16 token; + u16 boxLeft; u16 boxRight; u16 boxTop; u16 boxBottom; + + u16 perfZcompInputZcomploc; + u16 perfZcompOutputZcomploc; + u16 perfZcompInput; + u16 perfZcompOutput; + u16 perfBlendInput; + u16 perfEfbCopyClocks; }; extern PEReg pereg; diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp index c67ee53837..83d9de2784 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp @@ -20,6 +20,7 @@ #include "Tev.h" #include "EfbInterface.h" #include "TextureSampler.h" +#include "SWPixelEngine.h" #include "SWStatistics.h" #include "SWVideoConfig.h" #include "DebugUtil.h" @@ -784,11 +785,16 @@ void Tev::Draw() output[BLU_C] = (output[BLU_C] * invFog + fogInt * bpmem.fog.color.b) >> 8; } - if (!bpmem.zcontrol.zcomploc && bpmem.zmode.testenable) - { - if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2])) - return; - } + if (!bpmem.zcontrol.zcomploc) + { + SWPixelEngine::pereg.perfZcompInput++; + if (bpmem.zmode.testenable) + { + if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2])) + return; + } + SWPixelEngine::pereg.perfZcompOutput++; + } #if ALLOW_TEV_DUMPS if (g_SWVideoConfig.bDumpTevStages) @@ -812,6 +818,8 @@ void Tev::Draw() INCSTAT(swstats.thisFrame.tevPixelsOut); + SWPixelEngine::pereg.perfBlendInput++; + EfbInterface::BlendTev(Position[0], Position[1], output); } From 1c1ae63b697fa5589772c92a78a635ef68f67f83 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Tue, 19 Jun 2012 23:05:39 +0200 Subject: [PATCH 04/20] Windows build fix. --- Source/Core/VideoCommon/VideoCommon.vcxproj | 2 ++ Source/Core/VideoCommon/VideoCommon.vcxproj.filters | 6 ++++++ Source/Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj | 2 ++ .../Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj.filters | 6 ++++++ 4 files changed, 16 insertions(+) diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj b/Source/Core/VideoCommon/VideoCommon.vcxproj index f53c18cb37..e458a8d87c 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj @@ -194,6 +194,7 @@ + @@ -240,6 +241,7 @@ + diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index c933fbc939..0a61595c45 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -119,6 +119,9 @@ Shader Generators + + Base + @@ -246,6 +249,9 @@ Shader Generators + + Base + diff --git a/Source/Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj b/Source/Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj index 18b8ac4d84..8d73bfda05 100644 --- a/Source/Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj +++ b/Source/Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj @@ -200,6 +200,7 @@ + @@ -222,6 +223,7 @@ + diff --git a/Source/Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj.filters b/Source/Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj.filters index cd170691da..f423a77f0b 100644 --- a/Source/Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj.filters +++ b/Source/Plugins/Plugin_VideoOGL/Plugin_VideoOGL.vcxproj.filters @@ -36,6 +36,9 @@ Render + + Render + @@ -72,6 +75,9 @@ Render + + Render + From 4607ebea2ad5fb472a53d5772af0c395c6f73810 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sun, 26 Aug 2012 20:21:51 +0200 Subject: [PATCH 05/20] Video_Software: Fix a small issue that prevented perf queries from working at all. --- Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp index 33a6164b01..525b39b0b6 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp @@ -77,7 +77,7 @@ void Read16(u16& _uReturnValue, const u32 _iAddress) u16 address = _iAddress & 0xFFF; - if (address <= 0x16) + if (address <= 0x2e) _uReturnValue = ((u16*)&pereg)[address >> 1]; } @@ -109,7 +109,7 @@ void Write16(const u16 _iValue, const u32 _iAddress) } break; default: - if (address <= 0x16) + if (address <= 0x2e) ((u16*)&pereg)[address >> 1] = _iValue; break; } From 87d8feb53df8499e61096e4df022d00e25edb3c3 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Sun, 26 Aug 2012 20:25:00 +0200 Subject: [PATCH 06/20] Video_Software: Add some debugging logs for PE perf queries --- Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp index 525b39b0b6..9ae13d4e3b 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp @@ -79,6 +79,11 @@ void Read16(u16& _uReturnValue, const u32 _iAddress) if (address <= 0x2e) _uReturnValue = ((u16*)&pereg)[address >> 1]; + + if (address > 0x16) + { + ERROR_LOG(PIXELENGINE, "Read from address %#08x, returning %#08x\n", address, _uReturnValue); + } } void Write32(const u32 _iValue, const u32 _iAddress) From d734a5b486059d724f55e6662cf019fd71995c2b Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 27 Aug 2012 00:57:17 +0200 Subject: [PATCH 07/20] More debugging logs for RDilux --- Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp index 9ae13d4e3b..63885513f5 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp @@ -82,7 +82,7 @@ void Read16(u16& _uReturnValue, const u32 _iAddress) if (address > 0x16) { - ERROR_LOG(PIXELENGINE, "Read from address %#08x, returning %#08x\n", address, _uReturnValue); + ERROR_LOG(PIXELENGINE, "addr %#08x, ret %#08x; %#08x, %#08x, %#08x, %#08x, %#08x, %#08x\n", address, _uReturnValue, pereg.perfZcompInputZcomploc, pereg.perfZcompOutputZcomploc, pereg.perfZcompInput, pereg.perfZcompOutput, pereg.perfBlendInput, pereg.perfEfbCopyClocks); } } From 49d1da5e7e0e9ba3aecc8fc02285ca537623c3ea Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 27 Aug 2012 23:51:35 +0200 Subject: [PATCH 08/20] Video_Software: Fix stuff. --- .../Plugin_VideoSoftware/Src/BPMemLoader.cpp | 18 ++++++++++++------ .../Plugin_VideoSoftware/Src/Rasterizer.cpp | 7 +++++-- .../Plugin_VideoSoftware/Src/SWPixelEngine.cpp | 2 +- .../Plugin_VideoSoftware/Src/SWPixelEngine.h | 18 ++++++++++++------ .../Plugins/Plugin_VideoSoftware/Src/Tev.cpp | 9 ++++++--- 5 files changed, 36 insertions(+), 18 deletions(-) diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp index ad730a485c..4de9f435fa 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp @@ -92,12 +92,18 @@ void SWBPWritten(int address, int newvalue) break; case BPMEM_CLEAR_PIXEL_PERF: // TODO: Parameter? - SWPixelEngine::pereg.perfZcompInputZcomploc = 0; - SWPixelEngine::pereg.perfZcompOutputZcomploc = 0; - SWPixelEngine::pereg.perfZcompInput = 0; - SWPixelEngine::pereg.perfZcompOutput = 0; - SWPixelEngine::pereg.perfBlendInput = 0; - SWPixelEngine::pereg.perfEfbCopyClocks = 0; + SWPixelEngine::pereg.perfZcompInputZcomplocLo = 0; + SWPixelEngine::pereg.perfZcompInputZcomplocHi = 0; + SWPixelEngine::pereg.perfZcompOutputZcomplocLo = 0; + SWPixelEngine::pereg.perfZcompOutputZcomplocHi = 0; + SWPixelEngine::pereg.perfZcompInputLo = 0; + SWPixelEngine::pereg.perfZcompInputHi = 0; + SWPixelEngine::pereg.perfZcompOutputLo = 0; + SWPixelEngine::pereg.perfZcompOutputHi = 0; + SWPixelEngine::pereg.perfBlendInputLo = 0; + SWPixelEngine::pereg.perfBlendInputHi = 0; + SWPixelEngine::pereg.perfEfbCopyClocksLo = 0; + SWPixelEngine::pereg.perfEfbCopyClocksHi = 0; break; case BPMEM_LOADTLUT0: // This one updates bpmem.tlutXferSrc, no need to do anything here. break; diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp index a23d85f27f..c9c3b8c26c 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp @@ -129,14 +129,17 @@ inline void Draw(s32 x, s32 y, s32 xi, s32 yi) if (bpmem.zcontrol.zcomploc) { // TODO: Verify that perf regs are being incremented even if test is disabled - SWPixelEngine::pereg.perfZcompInputZcomploc++; + if (++SWPixelEngine::pereg.perfZcompInputZcomplocLo == 0) + SWPixelEngine::pereg.perfZcompInputZcomplocHi++; + if (bpmem.zmode.testenable) { // early z if (!EfbInterface::ZCompare(x, y, z)) return; } - SWPixelEngine::pereg.perfZcompOutputZcomploc++; + if (++SWPixelEngine::pereg.perfZcompOutputZcomplocLo == 0) + SWPixelEngine::pereg.perfZcompOutputZcomplocHi++; } RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi]; diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp index 63885513f5..d489d033ae 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp @@ -82,7 +82,7 @@ void Read16(u16& _uReturnValue, const u32 _iAddress) if (address > 0x16) { - ERROR_LOG(PIXELENGINE, "addr %#08x, ret %#08x; %#08x, %#08x, %#08x, %#08x, %#08x, %#08x\n", address, _uReturnValue, pereg.perfZcompInputZcomploc, pereg.perfZcompOutputZcomploc, pereg.perfZcompInput, pereg.perfZcompOutput, pereg.perfBlendInput, pereg.perfEfbCopyClocks); + ERROR_LOG(PIXELENGINE, "addr %#08x, ret %#08x\n", address, _uReturnValue); } } diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.h b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.h index 764488c32e..7deb69a164 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.h +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.h @@ -146,12 +146,18 @@ namespace SWPixelEngine u16 boxTop; u16 boxBottom; - u16 perfZcompInputZcomploc; - u16 perfZcompOutputZcomploc; - u16 perfZcompInput; - u16 perfZcompOutput; - u16 perfBlendInput; - u16 perfEfbCopyClocks; + u16 perfZcompInputZcomplocLo; + u16 perfZcompInputZcomplocHi; + u16 perfZcompOutputZcomplocLo; + u16 perfZcompOutputZcomplocHi; + u16 perfZcompInputLo; + u16 perfZcompInputHi; + u16 perfZcompOutputLo; + u16 perfZcompOutputHi; + u16 perfBlendInputLo; + u16 perfBlendInputHi; + u16 perfEfbCopyClocksLo; + u16 perfEfbCopyClocksHi; }; extern PEReg pereg; diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp index 83d9de2784..fad8806a9f 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp @@ -787,13 +787,15 @@ void Tev::Draw() if (!bpmem.zcontrol.zcomploc) { - SWPixelEngine::pereg.perfZcompInput++; + if (++SWPixelEngine::pereg.perfZcompInputLo == 0) + SWPixelEngine::pereg.perfZcompInputHi++; if (bpmem.zmode.testenable) { if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2])) return; } - SWPixelEngine::pereg.perfZcompOutput++; + if (++SWPixelEngine::pereg.perfZcompOutputLo == 0) + SWPixelEngine::pereg.perfZcompOutputHi++; } #if ALLOW_TEV_DUMPS @@ -818,7 +820,8 @@ void Tev::Draw() INCSTAT(swstats.thisFrame.tevPixelsOut); - SWPixelEngine::pereg.perfBlendInput++; + if (++SWPixelEngine::pereg.perfBlendInputLo) + SWPixelEngine::pereg.perfBlendInputHi++; EfbInterface::BlendTev(Position[0], Position[1], output); } From 3d56ce18fb60a5617ba037d988f7918f7d29aae5 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 31 Aug 2012 20:49:59 +0200 Subject: [PATCH 09/20] Video_Software: Fix moar stuff --- Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp index fad8806a9f..80d68833b6 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp @@ -820,7 +820,7 @@ void Tev::Draw() INCSTAT(swstats.thisFrame.tevPixelsOut); - if (++SWPixelEngine::pereg.perfBlendInputLo) + if (++SWPixelEngine::pereg.perfBlendInputLo == 0) SWPixelEngine::pereg.perfBlendInputHi++; EfbInterface::BlendTev(Position[0], Position[1], output); From 54947b1e2246804ddf5ec0378f8111905a6d04bd Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 31 Aug 2012 21:30:12 +0200 Subject: [PATCH 10/20] Video_Software: Some more debugging logs. --- Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp index d489d033ae..ad19eb87ee 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp @@ -82,7 +82,13 @@ void Read16(u16& _uReturnValue, const u32 _iAddress) if (address > 0x16) { - ERROR_LOG(PIXELENGINE, "addr %#08x, ret %#08x\n", address, _uReturnValue); + ERROR_LOG(PIXELENGINE, "addr %#08x, ret %#04x; %#04x%04x, %#04x%04x, %#04x%04x, %#04x%04x, %#04x%04x, %#04x%04x\n", address, _uReturnValue, + pereg.perfZcompInputZcomplocHi, pereg.perfZcompInputZcomplocLo, + pereg.perfZcompOutputZcomplocHi, pereg.perfZcompOutputZcomplocLo, + pereg.perfZcompInputHi, pereg.perfZcompInputLo, + pereg.perfZcompOutputHi, pereg.perfZcompOutputLo, + pereg.perfBlendInputHi, pereg.perfBlendInputLo, + pereg.perfEfbCopyClocksHi, pereg.perfEfbCopyClocksLo); } } From 53aec6c476b0ec9db4d301eaafedbb26eb3697f4 Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Sat, 16 Feb 2013 17:50:40 -0600 Subject: [PATCH 11/20] Fix OGL perf queries and make them not slow! --- Source/Core/Common/Src/VideoBackendBase.h | 6 +- Source/Core/VideoCommon/Src/MainBase.cpp | 43 +++++++ Source/Core/VideoCommon/Src/PerfQueryBase.h | 2 + Source/Core/VideoCommon/Src/PixelEngine.cpp | 25 ++-- .../Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp | 111 +++++++++++++----- .../Plugins/Plugin_VideoOGL/Src/PerfQuery.h | 24 ++++ .../Plugin_VideoOGL/Src/VertexManager.cpp | 2 +- .../Plugin_VideoSoftware/Src/SWmain.cpp | 6 + .../Plugin_VideoSoftware/Src/VideoBackend.h | 2 + 9 files changed, 178 insertions(+), 43 deletions(-) diff --git a/Source/Core/Common/Src/VideoBackendBase.h b/Source/Core/Common/Src/VideoBackendBase.h index d09288ebc7..4e602526d2 100644 --- a/Source/Core/Common/Src/VideoBackendBase.h +++ b/Source/Core/Common/Src/VideoBackendBase.h @@ -22,6 +22,7 @@ #include #include "ChunkFile.h" +#include "PerfQueryBase.h" typedef void (*writeFn16)(const u16,const u32); typedef void (*writeFn32)(const u32,const u32); @@ -107,6 +108,7 @@ public: virtual void Video_EndField() = 0; virtual u32 Video_AccessEFB(EFBAccessType, u32, u32, u32) = 0; + virtual u32 Video_GetQueryResult(PerfQueryType type) = 0; virtual void Video_AddMessage(const char* pstr, unsigned int milliseconds) = 0; virtual void Video_ClearMessages() = 0; @@ -154,8 +156,10 @@ class VideoBackendHardware : public VideoBackend void Video_ExitLoop(); void Video_BeginField(u32, FieldType, u32, u32); void Video_EndField(); - u32 Video_AccessEFB(EFBAccessType, u32, u32, u32); + u32 Video_AccessEFB(EFBAccessType, u32, u32, u32); + u32 Video_GetQueryResult(PerfQueryType type); + void Video_AddMessage(const char* pstr, unsigned int milliseconds); void Video_ClearMessages(); bool Video_Screenshot(const char* filename); diff --git a/Source/Core/VideoCommon/Src/MainBase.cpp b/Source/Core/VideoCommon/Src/MainBase.cpp index cb6dc7ae5b..e8de52addb 100644 --- a/Source/Core/VideoCommon/Src/MainBase.cpp +++ b/Source/Core/VideoCommon/Src/MainBase.cpp @@ -21,6 +21,10 @@ volatile u32 s_swapRequested = false; u32 s_efbAccessRequested = false; volatile u32 s_FifoShuttingDown = false; +std::condition_variable s_perf_query_cond; +std::mutex s_perf_query_lock; +static volatile bool s_perf_query_requested; + static volatile struct { u32 xfbAddr; @@ -169,6 +173,43 @@ u32 VideoBackendHardware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32 return 0; } +static bool QueryResultIsReady() +{ + return !s_perf_query_requested || s_FifoShuttingDown; +} + +void VideoFifo_CheckPerfQueryRequest() +{ + if (s_perf_query_requested) + { + g_perf_query->FlushResults(); + + { + std::lock_guard lk(s_perf_query_lock); + s_perf_query_requested = false; + } + + s_perf_query_cond.notify_one(); + } +} + +u32 VideoBackendHardware::Video_GetQueryResult(PerfQueryType type) +{ + // Is this check sane? + if (!g_perf_query->IsFlushed()) + { + if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread) + { + s_perf_query_requested = true; + std::unique_lock lk(s_perf_query_lock); + s_perf_query_cond.wait(lk, QueryResultIsReady); + } + else + g_perf_query->FlushResults(); + } + + return g_perf_query->GetQueryResult(type); +} void VideoBackendHardware::InitializeShared() { @@ -176,6 +217,7 @@ void VideoBackendHardware::InitializeShared() s_swapRequested = 0; s_efbAccessRequested = 0; + s_perf_query_requested = false; s_FifoShuttingDown = 0; memset((void*)&s_beginFieldArgs, 0, sizeof(s_beginFieldArgs)); memset(&s_accessEFBArgs, 0, sizeof(s_accessEFBArgs)); @@ -223,6 +265,7 @@ void VideoFifo_CheckAsyncRequest() { VideoFifo_CheckSwapRequest(); VideoFifo_CheckEFBAccess(); + VideoFifo_CheckPerfQueryRequest(); } void VideoBackendHardware::Video_GatherPipeBursted() diff --git a/Source/Core/VideoCommon/Src/PerfQueryBase.h b/Source/Core/VideoCommon/Src/PerfQueryBase.h index 0520e9244c..2643482379 100644 --- a/Source/Core/VideoCommon/Src/PerfQueryBase.h +++ b/Source/Core/VideoCommon/Src/PerfQueryBase.h @@ -32,6 +32,8 @@ public: virtual void DisableQuery(PerfQueryGroup type) {} virtual void ResetQuery() {} virtual u32 GetQueryResult(PerfQueryType type) { return 0; } + virtual void FlushResults() {} + virtual bool IsFlushed() const { return true; } }; extern PerfQueryBase* g_perf_query; diff --git a/Source/Core/VideoCommon/Src/PixelEngine.cpp b/Source/Core/VideoCommon/Src/PixelEngine.cpp index 005468f63c..2d6275096c 100644 --- a/Source/Core/VideoCommon/Src/PixelEngine.cpp +++ b/Source/Core/VideoCommon/Src/PixelEngine.cpp @@ -260,35 +260,35 @@ void Read16(u16& _uReturnValue, const u32 _iAddress) // NOTE(neobrain): only PE_PERF_ZCOMP_OUTPUT is implemented in D3D11, but the other values shouldn't be contradictionary to the value of that register (i.e. INPUT registers should always be greater or equal to their corresponding OUTPUT registers). case PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L: - _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_INPUT_ZCOMPLOC) & 0xFFFF; + _uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_INPUT_ZCOMPLOC) & 0xFFFF; break; case PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H: - _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_INPUT_ZCOMPLOC) >> 16; + _uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_INPUT_ZCOMPLOC) >> 16; break; case PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L: - _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_OUTPUT_ZCOMPLOC) & 0xFFFF; + _uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_OUTPUT_ZCOMPLOC) & 0xFFFF; break; case PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H: - _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_OUTPUT_ZCOMPLOC) >> 16; + _uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_OUTPUT_ZCOMPLOC) >> 16; break; case PE_PERF_ZCOMP_INPUT_L: - _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_INPUT) & 0xFFFF; + _uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_INPUT) & 0xFFFF; break; case PE_PERF_ZCOMP_INPUT_H: - _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_INPUT) >> 16; + _uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_INPUT) >> 16; break; case PE_PERF_ZCOMP_OUTPUT_L: - _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_OUTPUT) & 0xFFFF; + _uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_OUTPUT) & 0xFFFF; break; case PE_PERF_ZCOMP_OUTPUT_H: - _uReturnValue = g_perf_query->GetQueryResult(PQ_ZCOMP_OUTPUT) >> 16; + _uReturnValue = g_video_backend->Video_GetQueryResult(PQ_ZCOMP_OUTPUT) >> 16; break; case PE_PERF_BLEND_INPUT_L: @@ -298,19 +298,20 @@ void Read16(u16& _uReturnValue, const u32 _iAddress) // In very old builds, Dolphin only returned 0. That caused the challenge to be immediately finished without any goop being cleaned (the timer just didn't even start counting from 3:00:00). // Later builds returned 1 for the high register. That caused the timer to actually count down, but made the challenge unbeatable because the game always thought you didn't clear any goop at all. // Note that currently this functionality is only implemented in the D3D11 backend. - _uReturnValue = g_perf_query->GetQueryResult(PQ_BLEND_INPUT) & 0xFFFF; + _uReturnValue = g_video_backend->Video_GetQueryResult(PQ_BLEND_INPUT) & 0xFFFF; + //ERROR_LOG(VIDEO, "PQ_BLEND_INPUT: %d", g_video_backend->Video_GetQueryResult(PQ_BLEND_INPUT)); break; case PE_PERF_BLEND_INPUT_H: - _uReturnValue = g_perf_query->GetQueryResult(PQ_BLEND_INPUT) >> 16; + _uReturnValue = g_video_backend->Video_GetQueryResult(PQ_BLEND_INPUT) >> 16; break; case PE_PERF_EFB_COPY_CLOCKS_L: - _uReturnValue = g_perf_query->GetQueryResult(PQ_EFB_COPY_CLOCKS) & 0xFFFF; + _uReturnValue = g_video_backend->Video_GetQueryResult(PQ_EFB_COPY_CLOCKS) & 0xFFFF; break; case PE_PERF_EFB_COPY_CLOCKS_H: - _uReturnValue = g_perf_query->GetQueryResult(PQ_EFB_COPY_CLOCKS) >> 16; + _uReturnValue = g_video_backend->Video_GetQueryResult(PQ_EFB_COPY_CLOCKS) >> 16; break; default: diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp index cb659e6921..8cee426e98 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp @@ -1,31 +1,47 @@ #include "GLUtil.h" #include "PerfQuery.h" -namespace OGL { - -u32 results[PQG_NUM_MEMBERS] = { 0 }; -GLuint query_id; - -PerfQueryGroup active_query; +namespace OGL +{ PerfQuery::PerfQuery() + : m_query_read_pos() + , m_query_count() { - glGenQueries(1, &query_id); + for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i) + glGenQueries(1, &m_query_buffer[i].query_id); + + ResetQuery(); } PerfQuery::~PerfQuery() { - glDeleteQueries(1, &query_id); + for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i) + glDeleteQueries(1, &m_query_buffer[i].query_id); } void PerfQuery::EnableQuery(PerfQueryGroup type) { + // Is this sane? + if (m_query_count > ARRAYSIZE(m_query_buffer) / 2) + WeakFlush(); + + if (ARRAYSIZE(m_query_buffer) == m_query_count) + { + FlushOne(); + //ERROR_LOG(VIDEO, "flushed query buffer early!"); + } + // start query if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) { - glBeginQuery(GL_SAMPLES_PASSED, query_id); + auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % ARRAYSIZE(m_query_buffer)]; + + glBeginQuery(GL_SAMPLES_PASSED, entry.query_id); + entry.query_type = type; + + ++m_query_count; } - active_query = type; } void PerfQuery::DisableQuery(PerfQueryGroup type) @@ -34,45 +50,82 @@ void PerfQuery::DisableQuery(PerfQueryGroup type) if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) { glEndQuery(GL_SAMPLES_PASSED); + } +} - GLuint query_result = GL_FALSE; - while (query_result != GL_TRUE) +bool PerfQuery::IsFlushed() const +{ + return 0 == m_query_count; +} + +void PerfQuery::FlushOne() +{ + auto& entry = m_query_buffer[m_query_read_pos]; + + GLuint result = 0; + glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result); + + m_results[entry.query_type] += result; + + m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer); + --m_query_count; +} + +// TODO: could selectively flush things, but I don't think that will do much +void PerfQuery::FlushResults() +{ + while (!IsFlushed()) + FlushOne(); +} + +void PerfQuery::WeakFlush() +{ + while (!IsFlushed()) + { + auto& entry = m_query_buffer[m_query_read_pos]; + + GLuint result = GL_FALSE; + glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT_AVAILABLE, &result); + + if (GL_TRUE == result) { - glGetQueryObjectuiv(query_id, GL_QUERY_RESULT_AVAILABLE, &query_result); + FlushOne(); + } + else + { + break; } - - glGetQueryObjectuiv(query_id, GL_QUERY_RESULT, &query_result); - - results[active_query] += query_result; } } void PerfQuery::ResetQuery() { - memset(results, 0, sizeof(results)); + m_query_count = 0; + std::fill_n(m_results, ARRAYSIZE(m_results), 0); } u32 PerfQuery::GetQueryResult(PerfQueryType type) { - if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC || type == PQ_BLEND_INPUT) + u32 result = 0; + + if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC) { - + result = m_results[PQG_ZCOMP_ZCOMPLOC]; } - if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT || type == PQ_BLEND_INPUT) + else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT) { - + result = m_results[PQG_ZCOMP]; } - if (type == PQ_BLEND_INPUT) + else if (type == PQ_BLEND_INPUT) { - results[PQ_BLEND_INPUT] = results[PQ_ZCOMP_OUTPUT] + results[PQ_ZCOMP_OUTPUT_ZCOMPLOC]; + result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC]; } - - if (type == PQ_EFB_COPY_CLOCKS) + else if (type == PQ_EFB_COPY_CLOCKS) { - // TODO + result = m_results[PQG_EFB_COPY_CLOCKS]; } - - return results[type]; + + return result / 4; } } // namespace diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h index 776c576e2d..76040272e3 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h @@ -15,6 +15,30 @@ public: void DisableQuery(PerfQueryGroup type); void ResetQuery(); u32 GetQueryResult(PerfQueryType type); + void FlushResults(); + bool IsFlushed() const; + +private: + struct ActiveQuery + { + GLuint query_id; + PerfQueryGroup query_type; + }; + + // when testing in SMS: 64 was too small, 128 was ok + static const int PERF_QUERY_BUFFER_SIZE = 512; + + void WeakFlush(); + // Only use when non-empty + void FlushOne(); + + // This contains gl query objects with unretrieved results. + ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE]; + int m_query_read_pos; + + // TODO: sloppy + volatile int m_query_count; + volatile u32 m_results[PQG_NUM_MEMBERS]; }; } // namespace diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index b2f55adec5..85a23e1c1d 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -211,7 +211,7 @@ void VertexManager::vFlush() g_perf_query->EnableQuery(bpmem.zcontrol.zcomploc ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); Draw(); g_perf_query->DisableQuery(bpmem.zcontrol.zcomploc ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); - ERROR_LOG(VIDEO, "PerfQuery result: %d", g_perf_query->GetQueryResult(bpmem.zcontrol.zcomploc ? PQ_ZCOMP_OUTPUT_ZCOMPLOC : PQ_ZCOMP_OUTPUT)); + //ERROR_LOG(VIDEO, "PerfQuery result: %d", g_perf_query->GetQueryResult(bpmem.zcontrol.zcomploc ? PQ_ZCOMP_OUTPUT_ZCOMPLOC : PQ_ZCOMP_OUTPUT)); // run through vertex groups again to set alpha if (useDstAlpha && !dualSourcePossible) diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SWmain.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/SWmain.cpp index 3c6fdcca38..100ff55bf4 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SWmain.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SWmain.cpp @@ -176,6 +176,12 @@ u32 VideoSoftware::Video_AccessEFB(EFBAccessType type, u32 x, u32 y, u32 InputDa return value; } +u32 VideoSoftware::Video_GetQueryResult(PerfQueryType type) +{ + // TODO: + return 0; +} + bool VideoSoftware::Video_Screenshot(const char *_szFilename) { return false; diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/VideoBackend.h b/Source/Plugins/Plugin_VideoSoftware/Src/VideoBackend.h index c0309c95c1..2ba282788b 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/VideoBackend.h +++ b/Source/Plugins/Plugin_VideoSoftware/Src/VideoBackend.h @@ -26,7 +26,9 @@ class VideoSoftware : public VideoBackend void Video_ExitLoop(); void Video_BeginField(u32, FieldType, u32, u32); void Video_EndField(); + u32 Video_AccessEFB(EFBAccessType, u32, u32, u32); + u32 Video_GetQueryResult(PerfQueryType type); void Video_AddMessage(const char* pstr, unsigned int milliseconds); void Video_ClearMessages(); From 0f617183a8a74f066d1d01a3ed50042cc9824bf8 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Mon, 18 Feb 2013 14:54:24 +0000 Subject: [PATCH 12/20] Add a possible TODO. Dunno if the hardware behaves like this, but it likely does. --- Source/Plugins/Plugin_VideoOGL/Src/Render.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp index f10634ba18..19f18ae855 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/Render.cpp @@ -1464,6 +1464,7 @@ void Renderer::SetDepthMode() else { // if the test is disabled write is disabled too + // TODO: When PE performance metrics are being emulated via occlusion queries, we should (probably?) enable depth test with depth function ALWAYS here glDisable(GL_DEPTH_TEST); glDepthMask(GL_FALSE); } From d0dbcc6369b1482237521308982baf84cd5989d5 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 1 Mar 2013 00:52:15 +0100 Subject: [PATCH 13/20] VideoSoftware: Cleanup PE perf metrics; returning the proper value now. --- .../Plugin_VideoSoftware/Src/BPMemLoader.cpp | 2 +- .../Plugin_VideoSoftware/Src/Rasterizer.cpp | 9 ++-- .../Src/SWPixelEngine.cpp | 11 ----- .../Plugin_VideoSoftware/Src/SWPixelEngine.h | 48 +++++++++++++++++++ .../Plugins/Plugin_VideoSoftware/Src/Tev.cpp | 14 ++---- 5 files changed, 57 insertions(+), 27 deletions(-) diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp index dde31ab7d3..0d7c77c767 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/BPMemLoader.cpp @@ -91,7 +91,7 @@ void SWBPWritten(int address, int newvalue) SWPixelEngine::pereg.boxTop = newvalue & 0x3ff; break; case BPMEM_CLEAR_PIXEL_PERF: - // TODO: Parameter? + // TODO: I didn't test if the value written to this register affects the amount of cleared registers SWPixelEngine::pereg.perfZcompInputZcomplocLo = 0; SWPixelEngine::pereg.perfZcompInputZcomplocHi = 0; SWPixelEngine::pereg.perfZcompOutputZcomplocLo = 0; diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp index d30c3d7033..badb123fa0 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/Rasterizer.cpp @@ -150,18 +150,15 @@ inline void Draw(s32 x, s32 y, s32 xi, s32 yi) if (bpmem.zcontrol.early_ztest && bpmem.zmode.testenable && g_SWVideoConfig.bZComploc) { - // TODO: Verify that perf regs are being incremented even if test is disabled - if (++SWPixelEngine::pereg.perfZcompInputZcomplocLo == 0) - SWPixelEngine::pereg.perfZcompInputZcomplocHi++; - + // TODO: Test if perf regs are incremented even if test is disabled + SWPixelEngine::pereg.IncZInputQuadCount(true); if (bpmem.zmode.testenable) { // early z if (!EfbInterface::ZCompare(x, y, z)) return; } - if (++SWPixelEngine::pereg.perfZcompOutputZcomplocLo == 0) - SWPixelEngine::pereg.perfZcompOutputZcomplocHi++; + SWPixelEngine::pereg.IncZOutputQuadCount(true); } RasterBlockPixel& pixel = rasterBlock.Pixel[xi][yi]; diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp index a621542ea0..118e59629e 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.cpp @@ -81,17 +81,6 @@ void Read16(u16& _uReturnValue, const u32 _iAddress) if (address <= 0x2e) _uReturnValue = ((u16*)&pereg)[address >> 1]; - - if (address > 0x16) - { - ERROR_LOG(PIXELENGINE, "addr %#08x, ret %#04x; %#04x%04x, %#04x%04x, %#04x%04x, %#04x%04x, %#04x%04x, %#04x%04x\n", address, _uReturnValue, - pereg.perfZcompInputZcomplocHi, pereg.perfZcompInputZcomplocLo, - pereg.perfZcompOutputZcomplocHi, pereg.perfZcompOutputZcomplocLo, - pereg.perfZcompInputHi, pereg.perfZcompInputLo, - pereg.perfZcompOutputHi, pereg.perfZcompOutputLo, - pereg.perfBlendInputHi, pereg.perfBlendInputLo, - pereg.perfEfbCopyClocksHi, pereg.perfEfbCopyClocksLo); - } } void Write32(const u32 _iValue, const u32 _iAddress) diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.h b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.h index 7deb69a164..351e53456d 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.h +++ b/Source/Plugins/Plugin_VideoSoftware/Src/SWPixelEngine.h @@ -158,6 +158,54 @@ namespace SWPixelEngine u16 perfBlendInputHi; u16 perfEfbCopyClocksLo; u16 perfEfbCopyClocksHi; + + // NOTE: hardware doesn't process individual pixels but quads instead. Current software renderer architecture works on pixels though, so we have this "quad" hack here to only increment the registers on every fourth rendered pixel + void IncZInputQuadCount(bool early_ztest) + { + static int quad = 0; + if (++quad != 3) + return; + quad = 0; + + if (early_ztest) + { + if (++perfZcompInputZcomplocLo == 0) + perfZcompInputZcomplocHi++; + } + else + { + if (++perfZcompInputLo == 0) + perfZcompInputHi++; + } + } + void IncZOutputQuadCount(bool early_ztest) + { + static int quad = 0; + if (++quad != 3) + return; + quad = 0; + + if (early_ztest) + { + if (++perfZcompOutputZcomplocLo == 0) + perfZcompOutputZcomplocHi++; + } + else + { + if (++perfZcompOutputLo == 0) + perfZcompOutputHi++; + } + } + void IncBlendInputQuadCount() + { + static int quad = 0; + if (++quad != 3) + return; + quad = 0; + + if (++perfBlendInputLo == 0) + perfBlendInputHi++; + } }; extern PEReg pereg; diff --git a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp index c9c408a623..3d5c0d7724 100644 --- a/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp +++ b/Source/Plugins/Plugin_VideoSoftware/Src/Tev.cpp @@ -789,14 +789,12 @@ void Tev::Draw() if (late_ztest && bpmem.zmode.testenable) { // TODO: Check against hw if these values get incremented even if depth testing is disabled - if (++SWPixelEngine::pereg.perfZcompInputLo == 0) - SWPixelEngine::pereg.perfZcompInputHi++; + SWPixelEngine::pereg.IncZInputQuadCount(false); - if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2])) - return; + if (!EfbInterface::ZCompare(Position[0], Position[1], Position[2])) + return; - if (++SWPixelEngine::pereg.perfZcompOutputLo == 0) - SWPixelEngine::pereg.perfZcompOutputHi++; + SWPixelEngine::pereg.IncZOutputQuadCount(false); } #if ALLOW_TEV_DUMPS @@ -820,9 +818,7 @@ void Tev::Draw() #endif INCSTAT(swstats.thisFrame.tevPixelsOut); - - if (++SWPixelEngine::pereg.perfBlendInputLo == 0) - SWPixelEngine::pereg.perfBlendInputHi++; + SWPixelEngine::pereg.IncBlendInputQuadCount(); EfbInterface::BlendTev(Position[0], Position[1], output); } From 1c9860246c1773ede5c35ba15d774cdb566fae18 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 1 Mar 2013 01:14:10 +0100 Subject: [PATCH 14/20] Build fixes for everyone! --- Source/Core/Common/Src/VideoBackendBase.h | 2 +- Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Source/Core/Common/Src/VideoBackendBase.h b/Source/Core/Common/Src/VideoBackendBase.h index f502bf42b6..f1c8b0a131 100644 --- a/Source/Core/Common/Src/VideoBackendBase.h +++ b/Source/Core/Common/Src/VideoBackendBase.h @@ -22,7 +22,7 @@ #include #include "ChunkFile.h" -#include "PerfQueryBase.h" +#include "../../VideoCommon/Src/PerfQueryBase.h" typedef void (*writeFn16)(const u16,const u32); typedef void (*writeFn32)(const u32,const u32); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index 87d79fdb38..87257f15da 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -218,10 +218,10 @@ void VertexManager::vFlush() if (ps) PixelShaderCache::SetCurrentShader(ps->glprogid); // Lego Star Wars crashes here. if (vs) VertexShaderCache::SetCurrentShader(vs->glprogid); - g_perf_query->EnableQuery(bpmem.zcontrol.zcomploc ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); + g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); Draw(); - g_perf_query->DisableQuery(bpmem.zcontrol.zcomploc ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); - //ERROR_LOG(VIDEO, "PerfQuery result: %d", g_perf_query->GetQueryResult(bpmem.zcontrol.zcomploc ? PQ_ZCOMP_OUTPUT_ZCOMPLOC : PQ_ZCOMP_OUTPUT)); + g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); + //ERROR_LOG(VIDEO, "PerfQuery result: %d", g_perf_query->GetQueryResult(bpmem.zcontrol.early_ztest ? PQ_ZCOMP_OUTPUT_ZCOMPLOC : PQ_ZCOMP_OUTPUT)); // run through vertex groups again to set alpha if (useDstAlpha && !dualSourcePossible) From cbf5efe19108a677987242f5ba6a71b38e129c05 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 1 Mar 2013 01:31:57 +0100 Subject: [PATCH 15/20] Some cleanups. PE perf metrics officially declared unsupported for the D3D9 project (out of pure laziness, anyone who cares can implement them :P). --- Source/Core/VideoCommon/Src/OnScreenDisplay.h | 2 +- Source/Core/VideoCommon/Src/VideoConfig.cpp | 1 - Source/Core/VideoCommon/Src/VideoConfig.h | 1 - Source/Plugins/Plugin_VideoDX11/Src/main.cpp | 1 - Source/Plugins/Plugin_VideoDX9/Src/main.cpp | 9 ++++++--- Source/Plugins/Plugin_VideoOGL/Src/main.cpp | 1 - 6 files changed, 7 insertions(+), 8 deletions(-) diff --git a/Source/Core/VideoCommon/Src/OnScreenDisplay.h b/Source/Core/VideoCommon/Src/OnScreenDisplay.h index 3777e2b5d3..80187b8ac3 100644 --- a/Source/Core/VideoCommon/Src/OnScreenDisplay.h +++ b/Source/Core/VideoCommon/Src/OnScreenDisplay.h @@ -22,7 +22,7 @@ namespace OSD { // On-screen message display -void AddMessage(const char* str, u32 ms); +void AddMessage(const char* str, u32 ms = 2000); void DrawMessages(); // draw the current messages on the screen. Only call once per frame. void ClearMessages(); diff --git a/Source/Core/VideoCommon/Src/VideoConfig.cpp b/Source/Core/VideoCommon/Src/VideoConfig.cpp index 74484efb01..bd5c6a7acb 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.cpp +++ b/Source/Core/VideoCommon/Src/VideoConfig.cpp @@ -174,7 +174,6 @@ void VideoConfig::VerifyValidity() if (!backend_info.bSupports3DVision) b3DVision = false; if (!backend_info.bSupportsFormatReinterpretation) bEFBEmulateFormatChanges = false; if (!backend_info.bSupportsPixelLighting) bEnablePixelLighting = false; - if (!backend_info.bSupportsPixelPerfQuery) bDisablePixelPerf = true; } void VideoConfig::Save(const char *ini_file) diff --git a/Source/Core/VideoCommon/Src/VideoConfig.h b/Source/Core/VideoCommon/Src/VideoConfig.h index 29ad6ec721..c9d2eef217 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.h +++ b/Source/Core/VideoCommon/Src/VideoConfig.h @@ -163,7 +163,6 @@ struct VideoConfig bool bSupportsDualSourceBlend; // only supported by D3D11 and OpenGL bool bSupportsFormatReinterpretation; bool bSupportsPixelLighting; - bool bSupportsPixelPerfQuery; } backend_info; // Utility diff --git a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp index f858021e77..b7dd9101d3 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp @@ -95,7 +95,6 @@ void InitBackendInfo() g_Config.backend_info.bSupportsDualSourceBlend = true; g_Config.backend_info.bSupportsFormatReinterpretation = true; g_Config.backend_info.bSupportsPixelLighting = true; - g_Config.backend_info.bSupportsPixelPerfQuery = true; IDXGIFactory* factory; IDXGIAdapter* ad; diff --git a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp index 8085ae4991..bf7bf16989 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp @@ -97,9 +97,6 @@ void InitBackendInfo() g_Config.backend_info.bSupports3DVision = true; g_Config.backend_info.bSupportsDualSourceBlend = false; g_Config.backend_info.bSupportsFormatReinterpretation = true; - g_Config.backend_info.bSupportsPixelPerfQuery = false; - - g_Config.backend_info.bSupportsPixelLighting = C_PLIGHTS + 40 <= maxConstants && C_PMATERIALS + 4 <= maxConstants; // adapters @@ -159,6 +156,12 @@ bool VideoBackend::Initialize(void *&window_handle) s_BackendInitialized = true; + if (!g_Config.bDisablePixelPerf) + { + OSD::AddMessage("PE perf metrics enabled although the D3D9 backend doesn't support them!"); + OSD::AddMessage("Try a different backend when issues arise."); + } + return true; } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp index 266ac2a8ae..854554d968 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp @@ -136,7 +136,6 @@ void InitBackendInfo() g_Config.backend_info.bSupportsDualSourceBlend = false; // supported, but broken g_Config.backend_info.bSupportsFormatReinterpretation = false; g_Config.backend_info.bSupportsPixelLighting = true; - g_Config.backend_info.bSupportsPixelPerfQuery = false; // aamodes const char* caamodes[] = {"None", "2x", "4x", "8x", "8x CSAA", "8xQ CSAA", "16x CSAA", "16xQ CSAA"}; From b94f65b66608148450c22615ea896d90bbc27ed4 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 1 Mar 2013 01:37:47 +0100 Subject: [PATCH 16/20] Remove two incorrect lines. How did those even get there?! --- Source/Core/VideoCommon/Src/BPStructs.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Source/Core/VideoCommon/Src/BPStructs.cpp b/Source/Core/VideoCommon/Src/BPStructs.cpp index a8d081666f..a79af48370 100644 --- a/Source/Core/VideoCommon/Src/BPStructs.cpp +++ b/Source/Core/VideoCommon/Src/BPStructs.cpp @@ -269,8 +269,6 @@ void BPWritten(const BPCmd& bp) UPE_Copy PE_copy = bpmem.triggerEFBCopy; - g_renderer->ResumePixelPerf(true); - // Check if we are to copy from the EFB or draw to the XFB if (PE_copy.copy_to_xfb == 0) { @@ -309,8 +307,6 @@ void BPWritten(const BPCmd& bp) s_gammaLUT[PE_copy.gamma]); } - g_renderer->PausePixelPerf(true); - // Clear the rectangular region after copying it. if (PE_copy.clear) { From 5a7bb2abfae1d37d01bef16d3113a0b54813b81f Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 1 Mar 2013 19:30:37 +0100 Subject: [PATCH 17/20] D3D11: Port perf queries code to the PerfQueryBase interface. Remove deprecated PerfQuery methods from RenderBase. Windows build fix. --- Source/Core/VideoCommon/Src/RenderBase.h | 5 - .../VideoCommon/VideoCommon.vcxproj.filters | 10 +- .../Plugin_VideoDX11/Plugin_VideoDX11.vcxproj | 2 + .../Plugin_VideoDX11.vcxproj.filters | 6 + .../Plugin_VideoDX11/Src/PerfQuery.cpp | 148 ++++++++++++++++++ .../Plugins/Plugin_VideoDX11/Src/PerfQuery.h | 46 ++++++ .../Plugins/Plugin_VideoDX11/Src/Render.cpp | 132 ---------------- Source/Plugins/Plugin_VideoDX11/Src/Render.h | 6 - .../Plugin_VideoDX11/Src/VertexManager.cpp | 4 +- Source/Plugins/Plugin_VideoDX11/Src/main.cpp | 3 + .../Plugins/Plugin_VideoOGL/Src/PerfQuery.h | 10 +- 11 files changed, 218 insertions(+), 154 deletions(-) create mode 100644 Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp create mode 100644 Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.h diff --git a/Source/Core/VideoCommon/Src/RenderBase.h b/Source/Core/VideoCommon/Src/RenderBase.h index 7f2853bcc3..55678f3f5a 100644 --- a/Source/Core/VideoCommon/Src/RenderBase.h +++ b/Source/Core/VideoCommon/Src/RenderBase.h @@ -128,11 +128,6 @@ public: static unsigned int GetPrevPixelFormat() { return prev_efb_format; } static void StorePixelFormat(unsigned int new_format) { prev_efb_format = new_format; } - virtual void ResetPixelPerf() {}; - virtual void ResumePixelPerf(bool efb_copies) {}; - virtual void PausePixelPerf(bool efb_copies) {}; - virtual u32 GetPixelPerfResult(PixelPerfQuery type) { return 0; }; - // TODO: doesn't belong here virtual void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) = 0; virtual void SetPSConstant4fv(unsigned int const_number, const float *f) = 0; diff --git a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters index 86b2e03221..330b23d370 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcxproj.filters +++ b/Source/Core/VideoCommon/VideoCommon.vcxproj.filters @@ -101,6 +101,9 @@ Base + + Base + Base @@ -113,8 +116,6 @@ Shader Generators - - Base Util @@ -239,6 +240,9 @@ Base + + Base + Base @@ -251,8 +255,6 @@ Shader Generators - - Base Util diff --git a/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj b/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj index 52d1c37aed..aab9345ef7 100644 --- a/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj +++ b/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj @@ -199,6 +199,7 @@ + @@ -228,6 +229,7 @@ + diff --git a/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj.filters b/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj.filters index 6492e887ca..4b8efac92b 100644 --- a/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj.filters +++ b/Source/Plugins/Plugin_VideoDX11/Plugin_VideoDX11.vcxproj.filters @@ -57,6 +57,9 @@ Render + + Render + @@ -117,6 +120,9 @@ Render + + Render + diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp new file mode 100644 index 0000000000..6ab91fed27 --- /dev/null +++ b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp @@ -0,0 +1,148 @@ +#include "RenderBase.h" + +#include "D3DBase.h" +#include "PerfQuery.h" + +namespace DX11 { + +PerfQuery::PerfQuery() + : m_query_read_pos() + , m_query_count() +{ + for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i) + { + D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_OCCLUSION, 0); + D3D::device->CreateQuery(&qdesc, &m_query_buffer[i].query); + } + ResetQuery(); +} + +PerfQuery::~PerfQuery() +{ + for (int i = 0; i != ARRAYSIZE(m_query_buffer); ++i) + { + // TODO: EndQuery? + m_query_buffer[i].query->Release(); + } +} + +void PerfQuery::EnableQuery(PerfQueryGroup type) +{ + // Is this sane? + if (m_query_count > ARRAYSIZE(m_query_buffer) / 2) + WeakFlush(); + + if (ARRAYSIZE(m_query_buffer) == m_query_count) + { + // TODO + FlushOne(); + ERROR_LOG(VIDEO, "flushed query buffer early!"); + } + + // start query + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + auto& entry = m_query_buffer[(m_query_read_pos + m_query_count) % ARRAYSIZE(m_query_buffer)]; + + D3D::context->Begin(entry.query); + entry.query_type = type; + + ++m_query_count; + } +} + +void PerfQuery::DisableQuery(PerfQueryGroup type) +{ + // stop query + if (type == PQG_ZCOMP_ZCOMPLOC || type == PQG_ZCOMP) + { + auto& entry = m_query_buffer[(m_query_read_pos + m_query_count + ARRAYSIZE(m_query_buffer)-1) % ARRAYSIZE(m_query_buffer)]; + D3D::context->End(entry.query); + } +} + +void PerfQuery::ResetQuery() +{ + m_query_count = 0; + std::fill_n(m_results, ARRAYSIZE(m_results), 0); +} + +u32 PerfQuery::GetQueryResult(PerfQueryType type) +{ + u32 result = 0; + + if (type == PQ_ZCOMP_INPUT_ZCOMPLOC || type == PQ_ZCOMP_OUTPUT_ZCOMPLOC) + { + result = m_results[PQG_ZCOMP_ZCOMPLOC]; + } + else if (type == PQ_ZCOMP_INPUT || type == PQ_ZCOMP_OUTPUT) + { + result = m_results[PQG_ZCOMP]; + } + else if (type == PQ_BLEND_INPUT) + { + result = m_results[PQG_ZCOMP] + m_results[PQG_ZCOMP_ZCOMPLOC]; + } + else if (type == PQ_EFB_COPY_CLOCKS) + { + result = m_results[PQG_EFB_COPY_CLOCKS]; + } + + return result / 4; +} + +void PerfQuery::FlushOne() +{ + auto& entry = m_query_buffer[m_query_read_pos]; + + UINT64 result = 0; + HRESULT hr = S_FALSE; + while (hr != S_OK) + { + // TODO: Might cause us to be stuck in an infinite loop! + hr = D3D::context->GetData(entry.query, &result, sizeof(result), 0); + } + + m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight(); + + m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer); + --m_query_count; +} + +// TODO: could selectively flush things, but I don't think that will do much +void PerfQuery::FlushResults() +{ + while (!IsFlushed()) + FlushOne(); +} + +void PerfQuery::WeakFlush() +{ + while (!IsFlushed()) + { + auto& entry = m_query_buffer[m_query_read_pos]; + + UINT64 result = 0; + HRESULT hr = D3D::context->GetData(entry.query, &result, sizeof(result), D3D11_ASYNC_GETDATA_DONOTFLUSH); + + if (hr == S_OK) + { + m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight(); + + m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer); + --m_query_count; + } + else + { + break; + } + } +} + +bool PerfQuery::IsFlushed() const +{ + return 0 == m_query_count; +} + + +} // namespace diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.h b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.h new file mode 100644 index 0000000000..b3709d1013 --- /dev/null +++ b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.h @@ -0,0 +1,46 @@ +#ifndef _PERFQUERY_H_ +#define _PERFQUERY_H_ + +#include "PerfQueryBase.h" + +namespace DX11 { + +class PerfQuery : public PerfQueryBase +{ +public: + PerfQuery(); + ~PerfQuery(); + + void EnableQuery(PerfQueryGroup type); + void DisableQuery(PerfQueryGroup type); + void ResetQuery(); + u32 GetQueryResult(PerfQueryType type); + void FlushResults(); + bool IsFlushed() const; + +private: + struct ActiveQuery + { + ID3D11Query* query; + PerfQueryGroup query_type; + }; + + void WeakFlush(); + + // Only use when non-empty + void FlushOne(); + + // when testing in SMS: 64 was too small, 128 was ok + static const int PERF_QUERY_BUFFER_SIZE = 512; + + ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE]; + int m_query_read_pos; + + // TODO: sloppy + volatile int m_query_count; + volatile u32 m_results[PQG_NUM_MEMBERS]; +}; + +} // namespace + +#endif // _PERFQUERY_H_ diff --git a/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp b/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp index 882d20f8e6..2dcfcd041c 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/Render.cpp @@ -65,18 +65,6 @@ ID3D11RasterizerState* resetraststate = NULL; static ID3D11Texture2D* s_screenshot_texture = NULL; -// Using a vector of query objects to avoid flushing the gpu pipeline all the time -// TODO: Could probably optimized further by using a ring buffer or something -#define MAX_PIXEL_PERF_QUERIES 20 // 20 is an arbitrary guess -std::vector pixel_perf_queries; -static int pixel_perf_query_index = 0; - -static u64 pixel_perf = 0; -static bool pixel_perf_active = false; -static bool pixel_perf_dirty = false; - -ID3D11Query* gpu_finished_query = NULL; - // GX pipeline state struct @@ -170,9 +158,6 @@ void SetupDeviceObjects() D3D::SetDebugObjectName((ID3D11DeviceChild*)resetraststate, "rasterizer state for Renderer::ResetAPIState"); s_screenshot_texture = NULL; - - D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_EVENT, 0); - D3D::device->CreateQuery(&qdesc, &gpu_finished_query); } // Kill off all device objects @@ -180,12 +165,6 @@ void TeardownDeviceObjects() { delete g_framebuffer_manager; - while (!pixel_perf_queries.empty()) - { - SAFE_RELEASE(pixel_perf_queries.back()); - pixel_perf_queries.pop_back(); - } - SAFE_RELEASE(gpu_finished_query); SAFE_RELEASE(access_efb_cbuf); SAFE_RELEASE(clearblendstates[0]); SAFE_RELEASE(clearblendstates[1]); @@ -232,11 +211,6 @@ Renderer::Renderer() s_LastEFBScale = g_ActiveConfig.iEFBScale; CalculateTargetSize(s_backbuffer_width, s_backbuffer_height); - pixel_perf_query_index = 0; - pixel_perf = 0; - pixel_perf_active = false; - pixel_perf_dirty = false; - SetupDeviceObjects(); @@ -660,112 +634,6 @@ void Renderer::ReinterpretPixelData(unsigned int convtype) D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV()); } -void Renderer::ResetPixelPerf() -{ - if (g_ActiveConfig.bDisablePixelPerf) - return; - - if (pixel_perf_active) - PausePixelPerf(false); - - pixel_perf_query_index = 0; - pixel_perf = 0; -} - -void Renderer::ResumePixelPerf(bool efb_copies) -{ - if (g_ActiveConfig.bDisablePixelPerf) - return; - - if (efb_copies) - return; - - if(pixel_perf_active) - return; - - if (pixel_perf_queries.size() < pixel_perf_query_index+1 && pixel_perf_query_index < MAX_PIXEL_PERF_QUERIES) - { - D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_OCCLUSION, 0); - ID3D11Query* tmpquery = NULL; - D3D::device->CreateQuery(&qdesc, &tmpquery); - pixel_perf_queries.push_back(tmpquery); - pixel_perf_query_index = pixel_perf_queries.size() - 1; - } - else if (pixel_perf_queries.size() < pixel_perf_query_index+1) - { - StorePixelPerfResult(PP_ZCOMP_OUTPUT); - pixel_perf_query_index = 0; - } - // This will spam the D3D11 debug runtime output with QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS warnings which safely can be ignored. Mute them in the DX control panel if you need to read the debug runtime output. - D3D::context->Begin(pixel_perf_queries[pixel_perf_query_index]); - pixel_perf_active = true; - pixel_perf_dirty = true; -} - -void Renderer::PausePixelPerf(bool efb_copies) -{ - if (g_ActiveConfig.bDisablePixelPerf) - return; - - if(!pixel_perf_active) - return; - - D3D::context->End(pixel_perf_queries[pixel_perf_query_index]); - pixel_perf_query_index++; - pixel_perf_active = false; -} - -void Renderer::StorePixelPerfResult(PixelPerfQuery type) -{ - // First, make sure the GPU has finished rendering so that query results are valid - D3D::context->End(gpu_finished_query); - BOOL gpu_finished = FALSE; - while (!gpu_finished) - { - // If nothing goes horribly wrong here, this should complete in finite time... - D3D::context->GetData(gpu_finished_query, &gpu_finished, sizeof(gpu_finished), 0); - } - - for(int i = 0; i < pixel_perf_query_index; ++i) - { - UINT64 buf = 0; - D3D::context->GetData(pixel_perf_queries[i], &buf, sizeof(buf), 0); - - // Reported pixel metrics should be referenced to native resolution: - pixel_perf += buf * EFB_WIDTH * EFB_HEIGHT / GetTargetWidth() / GetTargetHeight(); - } - pixel_perf_dirty = false; -} - -u32 Renderer::GetPixelPerfResult(PixelPerfQuery type) -{ - if (g_ActiveConfig.bDisablePixelPerf) - return 0; - - if (type == PP_EFB_COPY_CLOCKS) - { - // not implemented - return 0; - } - - if (type == PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L || - type == PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H || - type == PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L || - type == PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H) - { - // return zero for now because ZCOMP_OUTPUT_ZCOMPLOC + ZCOMP_OUTPUT should equal BLEND_INPUT - // TODO: Instead, should keep separate counters for zcomploc and non-zcomploc registers. - return 0; - } - - // Basically we only implement PP_ZCOMP_OUTPUT, but we're returning the same value for PP_ZCOMP_INPUT and PP_BLEND_INPUT anyway - if (pixel_perf_dirty) - StorePixelPerfResult(PP_ZCOMP_OUTPUT); - - // Dividing by 4 because we're expected to return the number of 2x2 quads instead of pixels - return std::min(pixel_perf / 4, (u64)0xFFFFFFFF); -} - void SetSrcBlend(D3D11_BLEND val) { // Colors should blend against SRC_ALPHA diff --git a/Source/Plugins/Plugin_VideoDX11/Src/Render.h b/Source/Plugins/Plugin_VideoDX11/Src/Render.h index 6db829c205..8f6c78fae1 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/Render.h +++ b/Source/Plugins/Plugin_VideoDX11/Src/Render.h @@ -46,12 +46,6 @@ public: void ReinterpretPixelData(unsigned int convtype); - void ResetPixelPerf(); - void ResumePixelPerf(bool efb_copies); - void PausePixelPerf(bool efb_copies); - u32 GetPixelPerfResult(PixelPerfQuery type); - void StorePixelPerfResult(PixelPerfQuery type); // internal - void UpdateViewport(Matrix44& vpCorrection); bool SaveScreenshot(const std::string &filename, const TargetRectangle &rc); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp index 8137e1a39f..6991b11690 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp @@ -274,9 +274,9 @@ void VertexManager::vFlush() g_nativeVertexFmt->SetupVertexPointers(); g_renderer->ApplyState(useDstAlpha); - g_renderer->ResumePixelPerf(false); + g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); Draw(stride); - g_renderer->PausePixelPerf(false); + g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP); GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); diff --git a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp index b7dd9101d3..af4d57dbf9 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/main.cpp @@ -42,6 +42,7 @@ #include "D3DUtil.h" #include "D3DBase.h" +#include "PerfQuery.h" #include "PixelShaderCache.h" #include "TextureCache.h" #include "VertexManager.h" @@ -185,6 +186,7 @@ void VideoBackend::Video_Prepare() g_renderer = new Renderer; g_texture_cache = new TextureCache; g_vertex_manager = new VertexManager; + g_perf_query = new PerfQuery; VertexShaderCache::Init(); PixelShaderCache::Init(); D3D::InitUtils(); @@ -227,6 +229,7 @@ void VideoBackend::Shutdown() D3D::ShutdownUtils(); PixelShaderCache::Shutdown(); VertexShaderCache::Shutdown(); + delete g_perf_query; delete g_vertex_manager; delete g_texture_cache; delete g_renderer; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h index 76040272e3..34c64e43a1 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h +++ b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.h @@ -17,25 +17,25 @@ public: u32 GetQueryResult(PerfQueryType type); void FlushResults(); bool IsFlushed() const; - + private: struct ActiveQuery { GLuint query_id; PerfQueryGroup query_type; }; - + // when testing in SMS: 64 was too small, 128 was ok static const int PERF_QUERY_BUFFER_SIZE = 512; - + void WeakFlush(); // Only use when non-empty void FlushOne(); - + // This contains gl query objects with unretrieved results. ActiveQuery m_query_buffer[PERF_QUERY_BUFFER_SIZE]; int m_query_read_pos; - + // TODO: sloppy volatile int m_query_count; volatile u32 m_results[PQG_NUM_MEMBERS]; From 4058b4c38a1d591253e1b1f84a44c95161607249 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 1 Mar 2013 23:02:11 +0100 Subject: [PATCH 18/20] Add documentation to PerfQueryBase interface. Remove the config field for perf queries (wasn't used for the new interface anyway). Few other cleanups. --- Source/Core/VideoCommon/Src/BPStructs.cpp | 2 +- Source/Core/VideoCommon/Src/MainBase.cpp | 4 ++-- Source/Core/VideoCommon/Src/PerfQueryBase.h | 13 +++++++++++++ Source/Core/VideoCommon/Src/PixelEngine.cpp | 1 - Source/Core/VideoCommon/Src/VideoConfig.cpp | 4 ---- Source/Core/VideoCommon/Src/VideoConfig.h | 3 +-- Source/Plugins/Plugin_VideoDX9/Src/main.cpp | 9 +++------ 7 files changed, 20 insertions(+), 16 deletions(-) diff --git a/Source/Core/VideoCommon/Src/BPStructs.cpp b/Source/Core/VideoCommon/Src/BPStructs.cpp index a79af48370..fa6bc08966 100644 --- a/Source/Core/VideoCommon/Src/BPStructs.cpp +++ b/Source/Core/VideoCommon/Src/BPStructs.cpp @@ -485,7 +485,7 @@ void BPWritten(const BPCmd& bp) case BPMEM_IND_IMASK: // Index Mask ? case BPMEM_REVBITS: // Always set to 0x0F when GX_InitRevBits() is called. break; - + case BPMEM_CLEAR_PIXEL_PERF: // GXClearPixMetric writes 0xAAA here, Sunshine alternates this register between values 0x000 and 0xAAA g_perf_query->ResetQuery(); diff --git a/Source/Core/VideoCommon/Src/MainBase.cpp b/Source/Core/VideoCommon/Src/MainBase.cpp index 0d357a4f80..1472367f21 100644 --- a/Source/Core/VideoCommon/Src/MainBase.cpp +++ b/Source/Core/VideoCommon/Src/MainBase.cpp @@ -195,7 +195,7 @@ void VideoFifo_CheckPerfQueryRequest() u32 VideoBackendHardware::Video_GetQueryResult(PerfQueryType type) { - // Is this check sane? + // TODO: Is this check sane? if (!g_perf_query->IsFlushed()) { if (SConfig::GetInstance().m_LocalCoreStartupParameter.bCPUThread) @@ -207,7 +207,7 @@ u32 VideoBackendHardware::Video_GetQueryResult(PerfQueryType type) else g_perf_query->FlushResults(); } - + return g_perf_query->GetQueryResult(type); } diff --git a/Source/Core/VideoCommon/Src/PerfQueryBase.h b/Source/Core/VideoCommon/Src/PerfQueryBase.h index 2643482379..b979449edb 100644 --- a/Source/Core/VideoCommon/Src/PerfQueryBase.h +++ b/Source/Core/VideoCommon/Src/PerfQueryBase.h @@ -28,11 +28,24 @@ public: PerfQueryBase() {}; virtual ~PerfQueryBase() {} + // Begin querying the specified value for the following host GPU commands virtual void EnableQuery(PerfQueryGroup type) {} + + // Stop querying the specified value for the following host GPU commands virtual void DisableQuery(PerfQueryGroup type) {} + + // Reset query counters to zero and drop any pending queries virtual void ResetQuery() {} + + // Return the measured value for the specified query type + // NOTE: Called from CPU thread virtual u32 GetQueryResult(PerfQueryType type) { return 0; } + + // Request the value of any pending queries - causes a pipeline flush and thus should be used carefully! virtual void FlushResults() {} + + // True if there are no further pending query results + // NOTE: Called from CPU thread virtual bool IsFlushed() const { return true; } }; diff --git a/Source/Core/VideoCommon/Src/PixelEngine.cpp b/Source/Core/VideoCommon/Src/PixelEngine.cpp index 2d6275096c..e5ba554678 100644 --- a/Source/Core/VideoCommon/Src/PixelEngine.cpp +++ b/Source/Core/VideoCommon/Src/PixelEngine.cpp @@ -299,7 +299,6 @@ void Read16(u16& _uReturnValue, const u32 _iAddress) // Later builds returned 1 for the high register. That caused the timer to actually count down, but made the challenge unbeatable because the game always thought you didn't clear any goop at all. // Note that currently this functionality is only implemented in the D3D11 backend. _uReturnValue = g_video_backend->Video_GetQueryResult(PQ_BLEND_INPUT) & 0xFFFF; - //ERROR_LOG(VIDEO, "PQ_BLEND_INPUT: %d", g_video_backend->Video_GetQueryResult(PQ_BLEND_INPUT)); break; case PE_PERF_BLEND_INPUT_H: diff --git a/Source/Core/VideoCommon/Src/VideoConfig.cpp b/Source/Core/VideoCommon/Src/VideoConfig.cpp index bd5c6a7acb..a76514a10c 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.cpp +++ b/Source/Core/VideoCommon/Src/VideoConfig.cpp @@ -105,7 +105,6 @@ void VideoConfig::Load(const char *ini_file) iniFile.Get("Hacks", "EFBScaledCopy", &bCopyEFBScaled, true); iniFile.Get("Hacks", "EFBCopyCacheEnable", &bEFBCopyCacheEnable, false); iniFile.Get("Hacks", "EFBEmulateFormatChanges", &bEFBEmulateFormatChanges, false); - iniFile.Get("Hacks", "DisablePixelPerf", &bDisablePixelPerf, true); iniFile.Get("Hardware", "Adapter", &iAdapter, 0); @@ -154,7 +153,6 @@ void VideoConfig::GameIniLoad(const char *ini_file) iniFile.GetIfExists("Video_Hacks", "EFBScaledCopy", &bCopyEFBScaled); iniFile.GetIfExists("Video_Hacks", "EFBCopyCacheEnable", &bEFBCopyCacheEnable); iniFile.GetIfExists("Video_Hacks", "EFBEmulateFormatChanges", &bEFBEmulateFormatChanges); - iniFile.GetIfExists("Video_Hacks", "DisablePixelPerf", &bDisablePixelPerf); iniFile.GetIfExists("Video", "ProjectionHack", &iPhackvalue[0]); iniFile.GetIfExists("Video", "PH_SZNear", &iPhackvalue[1]); @@ -233,7 +231,6 @@ void VideoConfig::Save(const char *ini_file) iniFile.Set("Hacks", "EFBScaledCopy", bCopyEFBScaled); iniFile.Set("Hacks", "EFBCopyCacheEnable", bEFBCopyCacheEnable); iniFile.Set("Hacks", "EFBEmulateFormatChanges", bEFBEmulateFormatChanges); - iniFile.Set("Hacks", "DisablePixelPerf", bDisablePixelPerf); iniFile.Set("Hardware", "Adapter", iAdapter); @@ -289,7 +286,6 @@ void VideoConfig::GameIniSave(const char* default_ini, const char* game_ini) SET_IF_DIFFERS("Video_Hacks", "EFBScaledCopy", bCopyEFBScaled); SET_IF_DIFFERS("Video_Hacks", "EFBCopyCacheEnable", bEFBCopyCacheEnable); SET_IF_DIFFERS("Video_Hacks", "EFBEmulateFormatChanges", bEFBEmulateFormatChanges); - SET_IF_DIFFERS("Video_Hacks", "DisablePixelPerf", bDisablePixelPerf); iniFile.Save(game_ini); } diff --git a/Source/Core/VideoCommon/Src/VideoConfig.h b/Source/Core/VideoCommon/Src/VideoConfig.h index c9d2eef217..0531918183 100644 --- a/Source/Core/VideoCommon/Src/VideoConfig.h +++ b/Source/Core/VideoCommon/Src/VideoConfig.h @@ -115,7 +115,7 @@ struct VideoConfig int iAnaglyphStereoSeparation; int iAnaglyphFocalAngle; bool b3DVision; - + // Hacks bool bEFBAccessEnable; bool bDlistCachingEnable; @@ -133,7 +133,6 @@ struct VideoConfig bool bZTPSpeedHack; // The Legend of Zelda: Twilight Princess bool bUseBBox; bool bEnablePixelLighting; - bool bDisablePixelPerf; int iLog; // CONF_ bits int iSaveTargetId; // TODO: Should be dropped diff --git a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp index bf7bf16989..970d4c7085 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp @@ -57,6 +57,7 @@ #include "ConfigManager.h" #include "VideoBackend.h" +#include "PerfQueryBase.h" namespace DX9 { @@ -156,12 +157,6 @@ bool VideoBackend::Initialize(void *&window_handle) s_BackendInitialized = true; - if (!g_Config.bDisablePixelPerf) - { - OSD::AddMessage("PE perf metrics enabled although the D3D9 backend doesn't support them!"); - OSD::AddMessage("Try a different backend when issues arise."); - } - return true; } @@ -176,6 +171,7 @@ void VideoBackend::Video_Prepare() g_vertex_manager = new VertexManager; g_renderer = new Renderer; g_texture_cache = new TextureCache; + g_perf_query = new PerfQueryBase; // VideoCommon BPInit(); Fifo_Init(); @@ -213,6 +209,7 @@ void VideoBackend::Shutdown() // internal interfaces PixelShaderCache::Shutdown(); VertexShaderCache::Shutdown(); + delete g_perf_query; delete g_texture_cache; delete g_renderer; delete g_vertex_manager; From d1acb0a937d842abaf55844d63298971e73ecee7 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 1 Mar 2013 23:12:41 +0100 Subject: [PATCH 19/20] OGL: Fix perf metrics being overcounted when using a non-native internal resolution. --- Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp | 1 + Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp index 6ab91fed27..d2a76d7d4b 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp @@ -103,6 +103,7 @@ void PerfQuery::FlushOne() hr = D3D::context->GetData(entry.query, &result, sizeof(result), 0); } + // NOTE: Reported pixel metrics should be referenced to native resolution m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight(); m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp index 8cee426e98..42ff918942 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp @@ -1,3 +1,4 @@ +#include "RenderBase.h" #include "GLUtil.h" #include "PerfQuery.h" @@ -61,12 +62,13 @@ bool PerfQuery::IsFlushed() const void PerfQuery::FlushOne() { auto& entry = m_query_buffer[m_query_read_pos]; - + GLuint result = 0; glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result); - - m_results[entry.query_type] += result; - + + // NOTE: Reported pixel metrics should be referenced to native resolution + m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight(); + m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer); --m_query_count; } From cb1d21c032b92412d5d795705642189657db37e3 Mon Sep 17 00:00:00 2001 From: NeoBrainX Date: Fri, 1 Mar 2013 23:57:56 +0100 Subject: [PATCH 20/20] PerfQueries: Fix an integer overflow. --- Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp | 5 +++-- Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp index d2a76d7d4b..b859d50ec6 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/PerfQuery.cpp @@ -104,7 +104,7 @@ void PerfQuery::FlushOne() } // NOTE: Reported pixel metrics should be referenced to native resolution - m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight(); + m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight(); m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer); --m_query_count; @@ -128,7 +128,8 @@ void PerfQuery::WeakFlush() if (hr == S_OK) { - m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight(); + // NOTE: Reported pixel metrics should be referenced to native resolution + m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight(); m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer); --m_query_count; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp index 42ff918942..95311eb98d 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/PerfQuery.cpp @@ -67,7 +67,7 @@ void PerfQuery::FlushOne() glGetQueryObjectuiv(entry.query_id, GL_QUERY_RESULT, &result); // NOTE: Reported pixel metrics should be referenced to native resolution - m_results[entry.query_type] += result * EFB_WIDTH * EFB_HEIGHT / g_renderer->GetTargetWidth() / g_renderer->GetTargetHeight(); + m_results[entry.query_type] += (u64)result * EFB_WIDTH / g_renderer->GetTargetWidth() * EFB_HEIGHT / g_renderer->GetTargetHeight(); m_query_read_pos = (m_query_read_pos + 1) % ARRAYSIZE(m_query_buffer); --m_query_count;