D3D11: Implement some PE pixel performance metrics.
Super Mario Sunshine is using a cool trick: To determine how much goop has been cleaned in ep. 6 of Sirena Beach, it counts the number of pixels that are input to the blending stage. For that it's using the PE performance registers ;) Fixes issue 1498.
This commit is contained in:
parent
90af798d3d
commit
4d8d86bd6a
|
@ -62,7 +62,7 @@
|
||||||
#define BPMEM_COPYFILTER1 0x54
|
#define BPMEM_COPYFILTER1 0x54
|
||||||
#define BPMEM_CLEARBBOX1 0x55
|
#define BPMEM_CLEARBBOX1 0x55
|
||||||
#define BPMEM_CLEARBBOX2 0x56
|
#define BPMEM_CLEARBBOX2 0x56
|
||||||
#define BPMEM_UNKNOWN_57 0x57
|
#define BPMEM_CLEAR_PIXEL_PERF 0x57
|
||||||
#define BPMEM_REVBITS 0x58
|
#define BPMEM_REVBITS 0x58
|
||||||
#define BPMEM_SCISSOROFFSET 0x59
|
#define BPMEM_SCISSOROFFSET 0x59
|
||||||
#define BPMEM_PRELOAD_ADDR 0x60
|
#define BPMEM_PRELOAD_ADDR 0x60
|
||||||
|
|
|
@ -62,7 +62,6 @@ void RenderToXFB(const BPCmd &bp, const EFBRectangle &rc, float yScale, float xf
|
||||||
{
|
{
|
||||||
Renderer::RenderToXFB(xfbAddr, dstWidth, dstHeight, rc, gamma);
|
Renderer::RenderToXFB(xfbAddr, dstWidth, dstHeight, rc, gamma);
|
||||||
}
|
}
|
||||||
|
|
||||||
void BPWritten(const BPCmd& bp)
|
void BPWritten(const BPCmd& bp)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
|
@ -141,7 +140,8 @@ void BPWritten(const BPCmd& bp)
|
||||||
|| bp.address == BPMEM_LOADTLUT0
|
|| bp.address == BPMEM_LOADTLUT0
|
||||||
|| bp.address == BPMEM_LOADTLUT1
|
|| bp.address == BPMEM_LOADTLUT1
|
||||||
|| bp.address == BPMEM_TEXINVALIDATE
|
|| bp.address == BPMEM_TEXINVALIDATE
|
||||||
|| bp.address == BPMEM_PRELOAD_MODE))
|
|| bp.address == BPMEM_PRELOAD_MODE
|
||||||
|
|| bp.address == BPMEM_CLEAR_PIXEL_PERF))
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -265,6 +265,8 @@ void BPWritten(const BPCmd& bp)
|
||||||
|
|
||||||
UPE_Copy PE_copy = bpmem.triggerEFBCopy;
|
UPE_Copy PE_copy = bpmem.triggerEFBCopy;
|
||||||
|
|
||||||
|
g_renderer->ResumePixelPerf(true);
|
||||||
|
|
||||||
// Check if we are to copy from the EFB or draw to the XFB
|
// Check if we are to copy from the EFB or draw to the XFB
|
||||||
if (PE_copy.copy_to_xfb == 0)
|
if (PE_copy.copy_to_xfb == 0)
|
||||||
{
|
{
|
||||||
|
@ -303,6 +305,8 @@ void BPWritten(const BPCmd& bp)
|
||||||
s_gammaLUT[PE_copy.gamma]);
|
s_gammaLUT[PE_copy.gamma]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
g_renderer->PausePixelPerf(true);
|
||||||
|
|
||||||
// Clear the rectangular region after copying it.
|
// Clear the rectangular region after copying it.
|
||||||
if (PE_copy.clear)
|
if (PE_copy.clear)
|
||||||
{
|
{
|
||||||
|
@ -481,8 +485,9 @@ void BPWritten(const BPCmd& bp)
|
||||||
case BPMEM_REVBITS: // Always set to 0x0F when GX_InitRevBits() is called.
|
case BPMEM_REVBITS: // Always set to 0x0F when GX_InitRevBits() is called.
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BPMEM_UNKNOWN_57: // Sunshine alternates this register between values 0x000 and 0xAAA
|
case BPMEM_CLEAR_PIXEL_PERF:
|
||||||
DEBUG_LOG(VIDEO, "Unknown BP Reg 0x57: %08x", bp.newvalue);
|
// GXClearPixMetric writes 0xAAA here, Sunshine alternates this register between values 0x000 and 0xAAA
|
||||||
|
g_renderer->ResetPixelPerf();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case BPMEM_PRELOAD_ADDR:
|
case BPMEM_PRELOAD_ADDR:
|
||||||
|
|
|
@ -28,6 +28,7 @@
|
||||||
#include "ConfigManager.h"
|
#include "ConfigManager.h"
|
||||||
|
|
||||||
#include "PixelEngine.h"
|
#include "PixelEngine.h"
|
||||||
|
#include "RenderBase.h"
|
||||||
#include "CommandProcessor.h"
|
#include "CommandProcessor.h"
|
||||||
#include "HW/ProcessorInterface.h"
|
#include "HW/ProcessorInterface.h"
|
||||||
#include "DLCache.h"
|
#include "DLCache.h"
|
||||||
|
@ -255,23 +256,59 @@ void Read16(u16& _uReturnValue, const u32 _iAddress)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case PE_PERF_0L:
|
// NOTE(neobrain): only PE_PERF_ZCOMP_OUTPUT is implemented in D3D11, but the other values shouldn't be contradictionary to the value of that register (i.e. INPUT registers should always be greater or equal to their corresponding OUTPUT registers).
|
||||||
case PE_PERF_0H:
|
case PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L:
|
||||||
case PE_PERF_1L:
|
_uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_INPUT_ZCOMPLOC) & 0xFFFF;
|
||||||
case PE_PERF_1H:
|
break;
|
||||||
case PE_PERF_2L:
|
|
||||||
case PE_PERF_2H:
|
case PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H:
|
||||||
case PE_PERF_3L:
|
_uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_INPUT_ZCOMPLOC) >> 16;
|
||||||
case PE_PERF_3H:
|
break;
|
||||||
case PE_PERF_4L:
|
|
||||||
case PE_PERF_4H:
|
case PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L:
|
||||||
case PE_PERF_5L:
|
_uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_OUTPUT_ZCOMPLOC) & 0xFFFF;
|
||||||
case PE_PERF_5H:
|
break;
|
||||||
INFO_LOG(PIXELENGINE, "(r16) perf counter @ %08x", _iAddress);
|
|
||||||
// git r90a2096a24f4 (svn r3663) added the PE_PERF cases, without setting
|
case PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H:
|
||||||
// _uReturnValue to anything, this reverts to the previous behaviour which allows
|
_uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_OUTPUT_ZCOMPLOC) >> 16;
|
||||||
// The timer in SMS:Scrubbing Serena Beach to countdown correctly
|
break;
|
||||||
_uReturnValue = 1;
|
|
||||||
|
case PE_PERF_ZCOMP_INPUT_L:
|
||||||
|
_uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_INPUT) & 0xFFFF;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PE_PERF_ZCOMP_INPUT_H:
|
||||||
|
_uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_INPUT) >> 16;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PE_PERF_ZCOMP_OUTPUT_L:
|
||||||
|
_uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_OUTPUT) & 0xFFFF;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PE_PERF_ZCOMP_OUTPUT_H:
|
||||||
|
_uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_ZCOMP_OUTPUT) >> 16;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PE_PERF_BLEND_INPUT_L:
|
||||||
|
// Super Mario Sunshine uses this register in episode 6 of Sirena Beach:
|
||||||
|
// The amount of remaining goop is determined by checking how many pixels reach the blending stage.
|
||||||
|
// Once this register falls below a particular value (around 0x90), the game regards the challenge finished.
|
||||||
|
// In very old builds, Dolphin only returned 0. That caused the challenge to be immediately finished without any goop being cleaned (the timer just didn't even start counting from 3:00:00).
|
||||||
|
// Later builds returned 1 for the high register. That caused the timer to actually count down, but made the challenge unbeatable because the game always thought you didn't clear any goop at all.
|
||||||
|
// Note that currently this functionality is only implemented in the D3D11 backend.
|
||||||
|
_uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_BLEND_INPUT) & 0xFFFF;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PE_PERF_BLEND_INPUT_H:
|
||||||
|
_uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_BLEND_INPUT) >> 16;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PE_PERF_EFB_COPY_CLOCKS_L:
|
||||||
|
_uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_EFB_COPY_CLOCKS) & 0xFFFF;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case PE_PERF_EFB_COPY_CLOCKS_H:
|
||||||
|
_uReturnValue = g_renderer->GetPixelPerfResult(Renderer::PP_EFB_COPY_CLOCKS) >> 16;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -36,19 +36,20 @@ enum
|
||||||
PE_BBOX_TOP = 0x14, // Flip Top
|
PE_BBOX_TOP = 0x14, // Flip Top
|
||||||
PE_BBOX_BOTTOM = 0x16, // Flip Bottom
|
PE_BBOX_BOTTOM = 0x16, // Flip Bottom
|
||||||
|
|
||||||
// These have not yet been RE:d. They are the perf counters.
|
// NOTE: Order not verified
|
||||||
PE_PERF_0L = 0x18,
|
// These indicate the number of quads that are being used as input/output for each particular stage
|
||||||
PE_PERF_0H = 0x1a,
|
PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L = 0x18,
|
||||||
PE_PERF_1L = 0x1c,
|
PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H = 0x1a,
|
||||||
PE_PERF_1H = 0x1e,
|
PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L = 0x1c,
|
||||||
PE_PERF_2L = 0x20,
|
PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H = 0x1e,
|
||||||
PE_PERF_2H = 0x22,
|
PE_PERF_ZCOMP_INPUT_L = 0x20,
|
||||||
PE_PERF_3L = 0x24,
|
PE_PERF_ZCOMP_INPUT_H = 0x22,
|
||||||
PE_PERF_3H = 0x26,
|
PE_PERF_ZCOMP_OUTPUT_L = 0x24,
|
||||||
PE_PERF_4L = 0x28,
|
PE_PERF_ZCOMP_OUTPUT_H = 0x26,
|
||||||
PE_PERF_4H = 0x2a,
|
PE_PERF_BLEND_INPUT_L = 0x28,
|
||||||
PE_PERF_5L = 0x2c,
|
PE_PERF_BLEND_INPUT_H = 0x2a,
|
||||||
PE_PERF_5H = 0x2e,
|
PE_PERF_EFB_COPY_CLOCKS_L = 0x2c,
|
||||||
|
PE_PERF_EFB_COPY_CLOCKS_H = 0x2e,
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace PixelEngine
|
namespace PixelEngine
|
||||||
|
|
|
@ -52,6 +52,15 @@ public:
|
||||||
Renderer();
|
Renderer();
|
||||||
virtual ~Renderer();
|
virtual ~Renderer();
|
||||||
|
|
||||||
|
enum PixelPerfQuery {
|
||||||
|
PP_ZCOMP_INPUT_ZCOMPLOC,
|
||||||
|
PP_ZCOMP_OUTPUT_ZCOMPLOC,
|
||||||
|
PP_ZCOMP_INPUT,
|
||||||
|
PP_ZCOMP_OUTPUT,
|
||||||
|
PP_BLEND_INPUT,
|
||||||
|
PP_EFB_COPY_CLOCKS
|
||||||
|
};
|
||||||
|
|
||||||
virtual void SetColorMask() = 0;
|
virtual void SetColorMask() = 0;
|
||||||
virtual void SetBlendMode(bool forceUpdate) = 0;
|
virtual void SetBlendMode(bool forceUpdate) = 0;
|
||||||
virtual void SetScissorRect(const TargetRectangle& rc) = 0;
|
virtual void SetScissorRect(const TargetRectangle& rc) = 0;
|
||||||
|
@ -119,6 +128,11 @@ public:
|
||||||
static unsigned int GetPrevPixelFormat() { return prev_efb_format; }
|
static unsigned int GetPrevPixelFormat() { return prev_efb_format; }
|
||||||
static void StorePixelFormat(unsigned int new_format) { prev_efb_format = new_format; }
|
static void StorePixelFormat(unsigned int new_format) { prev_efb_format = new_format; }
|
||||||
|
|
||||||
|
virtual void ResetPixelPerf() {};
|
||||||
|
virtual void ResumePixelPerf(bool efb_copies) {};
|
||||||
|
virtual void PausePixelPerf(bool efb_copies) {};
|
||||||
|
virtual u32 GetPixelPerfResult(PixelPerfQuery type) { return 0; };
|
||||||
|
|
||||||
// TODO: doesn't belong here
|
// TODO: doesn't belong here
|
||||||
virtual void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) = 0;
|
virtual void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4) = 0;
|
||||||
virtual void SetPSConstant4fv(unsigned int const_number, const float *f) = 0;
|
virtual void SetPSConstant4fv(unsigned int const_number, const float *f) = 0;
|
||||||
|
|
|
@ -253,7 +253,9 @@ void VertexManager::Flush()
|
||||||
//if (g_nativeVertexFmt)
|
//if (g_nativeVertexFmt)
|
||||||
g_nativeVertexFmt->SetupVertexPointers();
|
g_nativeVertexFmt->SetupVertexPointers();
|
||||||
|
|
||||||
|
g_renderer->ResumePixelPerf(false);
|
||||||
g_vertex_manager->Draw(stride, false);
|
g_vertex_manager->Draw(stride, false);
|
||||||
|
g_renderer->PausePixelPerf(false);
|
||||||
|
|
||||||
// run through vertex groups again to set alpha
|
// run through vertex groups again to set alpha
|
||||||
if (false == g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate)
|
if (false == g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate)
|
||||||
|
|
|
@ -104,6 +104,7 @@ void VideoConfig::Load(const char *ini_file)
|
||||||
iniFile.Get("Hacks", "EFBScaledCopy", &bCopyEFBScaled, true);
|
iniFile.Get("Hacks", "EFBScaledCopy", &bCopyEFBScaled, true);
|
||||||
iniFile.Get("Hacks", "EFBCopyCacheEnable", &bEFBCopyCacheEnable, false);
|
iniFile.Get("Hacks", "EFBCopyCacheEnable", &bEFBCopyCacheEnable, false);
|
||||||
iniFile.Get("Hacks", "EFBEmulateFormatChanges", &bEFBEmulateFormatChanges, false);
|
iniFile.Get("Hacks", "EFBEmulateFormatChanges", &bEFBEmulateFormatChanges, false);
|
||||||
|
iniFile.Get("Hacks", "DisablePixelPerf", &bDisablePixelPerf, true);
|
||||||
|
|
||||||
iniFile.Get("Hardware", "Adapter", &iAdapter, 0);
|
iniFile.Get("Hardware", "Adapter", &iAdapter, 0);
|
||||||
|
|
||||||
|
@ -153,6 +154,7 @@ void VideoConfig::GameIniLoad(const char *ini_file)
|
||||||
iniFile.GetIfExists("Video_Hacks", "EFBScaledCopy", &bCopyEFBScaled);
|
iniFile.GetIfExists("Video_Hacks", "EFBScaledCopy", &bCopyEFBScaled);
|
||||||
iniFile.GetIfExists("Video_Hacks", "EFBCopyCacheEnable", &bEFBCopyCacheEnable);
|
iniFile.GetIfExists("Video_Hacks", "EFBCopyCacheEnable", &bEFBCopyCacheEnable);
|
||||||
iniFile.GetIfExists("Video_Hacks", "EFBEmulateFormatChanges", &bEFBEmulateFormatChanges);
|
iniFile.GetIfExists("Video_Hacks", "EFBEmulateFormatChanges", &bEFBEmulateFormatChanges);
|
||||||
|
iniFile.GetIfExists("Video_Hacks", "DisablePixelPerf", &bDisablePixelPerf);
|
||||||
|
|
||||||
iniFile.GetIfExists("Video", "ProjectionHack", &iPhackvalue[0]);
|
iniFile.GetIfExists("Video", "ProjectionHack", &iPhackvalue[0]);
|
||||||
iniFile.GetIfExists("Video", "PH_SZNear", &iPhackvalue[1]);
|
iniFile.GetIfExists("Video", "PH_SZNear", &iPhackvalue[1]);
|
||||||
|
@ -172,6 +174,7 @@ void VideoConfig::VerifyValidity()
|
||||||
if (!backend_info.bSupports3DVision) b3DVision = false;
|
if (!backend_info.bSupports3DVision) b3DVision = false;
|
||||||
if (!backend_info.bSupportsFormatReinterpretation) bEFBEmulateFormatChanges = false;
|
if (!backend_info.bSupportsFormatReinterpretation) bEFBEmulateFormatChanges = false;
|
||||||
if (!backend_info.bSupportsPixelLighting) bEnablePixelLighting = false;
|
if (!backend_info.bSupportsPixelLighting) bEnablePixelLighting = false;
|
||||||
|
if (!backend_info.bSupportsPixelPerfQuery) bDisablePixelPerf = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VideoConfig::Save(const char *ini_file)
|
void VideoConfig::Save(const char *ini_file)
|
||||||
|
@ -231,6 +234,7 @@ void VideoConfig::Save(const char *ini_file)
|
||||||
iniFile.Set("Hacks", "EFBScaledCopy", bCopyEFBScaled);
|
iniFile.Set("Hacks", "EFBScaledCopy", bCopyEFBScaled);
|
||||||
iniFile.Set("Hacks", "EFBCopyCacheEnable", bEFBCopyCacheEnable);
|
iniFile.Set("Hacks", "EFBCopyCacheEnable", bEFBCopyCacheEnable);
|
||||||
iniFile.Set("Hacks", "EFBEmulateFormatChanges", bEFBEmulateFormatChanges);
|
iniFile.Set("Hacks", "EFBEmulateFormatChanges", bEFBEmulateFormatChanges);
|
||||||
|
iniFile.Set("Hacks", "DisablePixelPerf", bDisablePixelPerf);
|
||||||
|
|
||||||
iniFile.Set("Hardware", "Adapter", iAdapter);
|
iniFile.Set("Hardware", "Adapter", iAdapter);
|
||||||
|
|
||||||
|
@ -287,6 +291,7 @@ void VideoConfig::GameIniSave(const char* default_ini, const char* game_ini)
|
||||||
SET_IF_DIFFERS("Video_Hacks", "EFBScaledCopy", bCopyEFBScaled);
|
SET_IF_DIFFERS("Video_Hacks", "EFBScaledCopy", bCopyEFBScaled);
|
||||||
SET_IF_DIFFERS("Video_Hacks", "EFBCopyCacheEnable", bEFBCopyCacheEnable);
|
SET_IF_DIFFERS("Video_Hacks", "EFBCopyCacheEnable", bEFBCopyCacheEnable);
|
||||||
SET_IF_DIFFERS("Video_Hacks", "EFBEmulateFormatChanges", bEFBEmulateFormatChanges);
|
SET_IF_DIFFERS("Video_Hacks", "EFBEmulateFormatChanges", bEFBEmulateFormatChanges);
|
||||||
|
SET_IF_DIFFERS("Video_Hacks", "DisablePixelPerf", bDisablePixelPerf);
|
||||||
|
|
||||||
iniFile.Save(game_ini);
|
iniFile.Save(game_ini);
|
||||||
}
|
}
|
||||||
|
|
|
@ -133,6 +133,7 @@ struct VideoConfig
|
||||||
bool bUseBBox;
|
bool bUseBBox;
|
||||||
bool bEnablePixelLighting;
|
bool bEnablePixelLighting;
|
||||||
bool bEnablePerPixelDepth;
|
bool bEnablePerPixelDepth;
|
||||||
|
bool bDisablePixelPerf;
|
||||||
|
|
||||||
int iLog; // CONF_ bits
|
int iLog; // CONF_ bits
|
||||||
int iSaveTargetId; // TODO: Should be dropped
|
int iSaveTargetId; // TODO: Should be dropped
|
||||||
|
@ -161,6 +162,7 @@ struct VideoConfig
|
||||||
bool bSupportsDualSourceBlend; // only supported by D3D11 and OpenGL
|
bool bSupportsDualSourceBlend; // only supported by D3D11 and OpenGL
|
||||||
bool bSupportsFormatReinterpretation;
|
bool bSupportsFormatReinterpretation;
|
||||||
bool bSupportsPixelLighting;
|
bool bSupportsPixelLighting;
|
||||||
|
bool bSupportsPixelPerfQuery;
|
||||||
} backend_info;
|
} backend_info;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,19 @@ ID3D11RasterizerState* resetraststate = NULL;
|
||||||
|
|
||||||
static ID3D11Texture2D* s_screenshot_texture = NULL;
|
static ID3D11Texture2D* s_screenshot_texture = NULL;
|
||||||
|
|
||||||
|
// Using a vector of query objects to avoid flushing the gpu pipeline all the time
|
||||||
|
// TODO: Could probably optimized further by using a ring buffer or something
|
||||||
|
#define MAX_PIXEL_PERF_QUERIES 20 // 20 is an arbitrary guess
|
||||||
|
std::vector<ID3D11Query*> pixel_perf_queries;
|
||||||
|
static int pixel_perf_query_index = 0;
|
||||||
|
|
||||||
|
static u64 pixel_perf = 0;
|
||||||
|
static bool pixel_perf_active = false;
|
||||||
|
static bool pixel_perf_dirty = false;
|
||||||
|
|
||||||
|
ID3D11Query* gpu_finished_query = NULL;
|
||||||
|
|
||||||
|
|
||||||
// GX pipeline state
|
// GX pipeline state
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
|
@ -302,6 +315,9 @@ void SetupDeviceObjects()
|
||||||
D3D::SetDebugObjectName((ID3D11DeviceChild*)resetraststate, "rasterizer state for Renderer::ResetAPIState");
|
D3D::SetDebugObjectName((ID3D11DeviceChild*)resetraststate, "rasterizer state for Renderer::ResetAPIState");
|
||||||
|
|
||||||
s_screenshot_texture = NULL;
|
s_screenshot_texture = NULL;
|
||||||
|
|
||||||
|
D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_EVENT, 0);
|
||||||
|
D3D::device->CreateQuery(&qdesc, &gpu_finished_query);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Kill off all device objects
|
// Kill off all device objects
|
||||||
|
@ -309,6 +325,12 @@ void TeardownDeviceObjects()
|
||||||
{
|
{
|
||||||
delete g_framebuffer_manager;
|
delete g_framebuffer_manager;
|
||||||
|
|
||||||
|
while (!pixel_perf_queries.empty())
|
||||||
|
{
|
||||||
|
SAFE_RELEASE(pixel_perf_queries.back());
|
||||||
|
pixel_perf_queries.pop_back();
|
||||||
|
}
|
||||||
|
SAFE_RELEASE(gpu_finished_query);
|
||||||
SAFE_RELEASE(access_efb_cbuf);
|
SAFE_RELEASE(access_efb_cbuf);
|
||||||
SAFE_RELEASE(clearblendstates[0]);
|
SAFE_RELEASE(clearblendstates[0]);
|
||||||
SAFE_RELEASE(clearblendstates[1]);
|
SAFE_RELEASE(clearblendstates[1]);
|
||||||
|
@ -357,6 +379,11 @@ Renderer::Renderer()
|
||||||
s_LastEFBScale = g_ActiveConfig.iEFBScale;
|
s_LastEFBScale = g_ActiveConfig.iEFBScale;
|
||||||
CalculateTargetSize();
|
CalculateTargetSize();
|
||||||
|
|
||||||
|
pixel_perf_query_index = 0;
|
||||||
|
pixel_perf = 0;
|
||||||
|
pixel_perf_active = false;
|
||||||
|
pixel_perf_dirty = false;
|
||||||
|
|
||||||
SetupDeviceObjects();
|
SetupDeviceObjects();
|
||||||
|
|
||||||
|
|
||||||
|
@ -777,6 +804,112 @@ void Renderer::ReinterpretPixelData(unsigned int convtype)
|
||||||
D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV());
|
D3D::context->OMSetRenderTargets(1, &FramebufferManager::GetEFBColorTexture()->GetRTV(), FramebufferManager::GetEFBDepthTexture()->GetDSV());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Renderer::ResetPixelPerf()
|
||||||
|
{
|
||||||
|
if (g_ActiveConfig.bDisablePixelPerf)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (pixel_perf_active)
|
||||||
|
PausePixelPerf(false);
|
||||||
|
|
||||||
|
pixel_perf_query_index = 0;
|
||||||
|
pixel_perf = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Renderer::ResumePixelPerf(bool efb_copies)
|
||||||
|
{
|
||||||
|
if (g_ActiveConfig.bDisablePixelPerf)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (efb_copies)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if(pixel_perf_active)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (pixel_perf_queries.size() < pixel_perf_query_index+1 && pixel_perf_query_index < MAX_PIXEL_PERF_QUERIES)
|
||||||
|
{
|
||||||
|
D3D11_QUERY_DESC qdesc = CD3D11_QUERY_DESC(D3D11_QUERY_OCCLUSION, 0);
|
||||||
|
ID3D11Query* tmpquery = NULL;
|
||||||
|
D3D::device->CreateQuery(&qdesc, &tmpquery);
|
||||||
|
pixel_perf_queries.push_back(tmpquery);
|
||||||
|
pixel_perf_query_index = pixel_perf_queries.size() - 1;
|
||||||
|
}
|
||||||
|
else if (pixel_perf_queries.size() < pixel_perf_query_index+1)
|
||||||
|
{
|
||||||
|
StorePixelPerfResult(PP_ZCOMP_OUTPUT);
|
||||||
|
pixel_perf_query_index = 0;
|
||||||
|
}
|
||||||
|
// This will spam the D3D11 debug runtime output with QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS warnings which safely can be ignored. Mute them in the DX control panel if you need to read the debug runtime output.
|
||||||
|
D3D::context->Begin(pixel_perf_queries[pixel_perf_query_index]);
|
||||||
|
pixel_perf_active = true;
|
||||||
|
pixel_perf_dirty = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Renderer::PausePixelPerf(bool efb_copies)
|
||||||
|
{
|
||||||
|
if (g_ActiveConfig.bDisablePixelPerf)
|
||||||
|
return;
|
||||||
|
|
||||||
|
if(!pixel_perf_active)
|
||||||
|
return;
|
||||||
|
|
||||||
|
D3D::context->End(pixel_perf_queries[pixel_perf_query_index]);
|
||||||
|
pixel_perf_query_index++;
|
||||||
|
pixel_perf_active = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Renderer::StorePixelPerfResult(PixelPerfQuery type)
|
||||||
|
{
|
||||||
|
// First, make sure the GPU has finished rendering so that query results are valid
|
||||||
|
D3D::context->End(gpu_finished_query);
|
||||||
|
BOOL gpu_finished = FALSE;
|
||||||
|
while (!gpu_finished)
|
||||||
|
{
|
||||||
|
// If nothing goes horribly wrong here, this should complete in finite time...
|
||||||
|
D3D::context->GetData(gpu_finished_query, &gpu_finished, sizeof(gpu_finished), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int i = 0; i < pixel_perf_query_index; ++i)
|
||||||
|
{
|
||||||
|
UINT64 buf = 0;
|
||||||
|
D3D::context->GetData(pixel_perf_queries[i], &buf, sizeof(buf), 0);
|
||||||
|
|
||||||
|
// Reported pixel metrics should be referenced to native resolution:
|
||||||
|
pixel_perf += buf * EFB_WIDTH * EFB_HEIGHT / GetTargetWidth() / GetTargetHeight();
|
||||||
|
}
|
||||||
|
pixel_perf_dirty = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 Renderer::GetPixelPerfResult(PixelPerfQuery type)
|
||||||
|
{
|
||||||
|
if (g_ActiveConfig.bDisablePixelPerf)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (type == PP_EFB_COPY_CLOCKS)
|
||||||
|
{
|
||||||
|
// not implemented
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type == PE_PERF_ZCOMP_INPUT_ZCOMPLOC_L ||
|
||||||
|
type == PE_PERF_ZCOMP_INPUT_ZCOMPLOC_H ||
|
||||||
|
type == PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_L ||
|
||||||
|
type == PE_PERF_ZCOMP_OUTPUT_ZCOMPLOC_H)
|
||||||
|
{
|
||||||
|
// return zero for now because ZCOMP_OUTPUT_ZCOMPLOC + ZCOMP_OUTPUT should equal BLEND_INPUT
|
||||||
|
// TODO: Instead, should keep separate counters for zcomploc and non-zcomploc registers.
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Basically we only implement PP_ZCOMP_OUTPUT, but we're returning the same value for PP_ZCOMP_INPUT and PP_BLEND_INPUT anyway
|
||||||
|
if (pixel_perf_dirty)
|
||||||
|
StorePixelPerfResult(PP_ZCOMP_OUTPUT);
|
||||||
|
|
||||||
|
// Dividing by 4 because we're expected to return the number of 2x2 quads instead of pixels
|
||||||
|
return std::min(pixel_perf / 4, (u64)0xFFFFFFFF);
|
||||||
|
}
|
||||||
|
|
||||||
void SetSrcBlend(D3D11_BLEND val)
|
void SetSrcBlend(D3D11_BLEND val)
|
||||||
{
|
{
|
||||||
// Colors should blend against SRC_ALPHA
|
// Colors should blend against SRC_ALPHA
|
||||||
|
|
|
@ -46,6 +46,12 @@ public:
|
||||||
|
|
||||||
void ReinterpretPixelData(unsigned int convtype);
|
void ReinterpretPixelData(unsigned int convtype);
|
||||||
|
|
||||||
|
void ResetPixelPerf();
|
||||||
|
void ResumePixelPerf(bool efb_copies);
|
||||||
|
void PausePixelPerf(bool efb_copies);
|
||||||
|
u32 GetPixelPerfResult(PixelPerfQuery type);
|
||||||
|
void StorePixelPerfResult(PixelPerfQuery type); // internal
|
||||||
|
|
||||||
void UpdateViewport(Matrix44& vpCorrection);
|
void UpdateViewport(Matrix44& vpCorrection);
|
||||||
|
|
||||||
bool SaveScreenshot(const std::string &filename, const TargetRectangle &rc);
|
bool SaveScreenshot(const std::string &filename, const TargetRectangle &rc);
|
||||||
|
|
|
@ -198,7 +198,6 @@ void VertexManager::Draw(UINT stride)
|
||||||
if (IndexGenerator::GetNumLines() > 0 || IndexGenerator::GetNumPoints() > 0)
|
if (IndexGenerator::GetNumLines() > 0 || IndexGenerator::GetNumPoints() > 0)
|
||||||
((DX11::Renderer*)g_renderer)->RestoreCull();
|
((DX11::Renderer*)g_renderer)->RestoreCull();
|
||||||
}
|
}
|
||||||
|
|
||||||
void VertexManager::vFlush()
|
void VertexManager::vFlush()
|
||||||
{
|
{
|
||||||
if (LocalVBuffer == s_pCurBufferPointer) return;
|
if (LocalVBuffer == s_pCurBufferPointer) return;
|
||||||
|
@ -266,7 +265,9 @@ void VertexManager::vFlush()
|
||||||
|
|
||||||
g_renderer->ApplyState(useDstAlpha);
|
g_renderer->ApplyState(useDstAlpha);
|
||||||
LoadBuffers();
|
LoadBuffers();
|
||||||
|
g_renderer->ResumePixelPerf(false);
|
||||||
Draw(stride);
|
Draw(stride);
|
||||||
|
g_renderer->PausePixelPerf(false);
|
||||||
|
|
||||||
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
|
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
|
||||||
|
|
||||||
|
|
|
@ -94,6 +94,7 @@ void InitBackendInfo()
|
||||||
g_Config.backend_info.bSupportsDualSourceBlend = true;
|
g_Config.backend_info.bSupportsDualSourceBlend = true;
|
||||||
g_Config.backend_info.bSupportsFormatReinterpretation = true;
|
g_Config.backend_info.bSupportsFormatReinterpretation = true;
|
||||||
g_Config.backend_info.bSupportsPixelLighting = true;
|
g_Config.backend_info.bSupportsPixelLighting = true;
|
||||||
|
g_Config.backend_info.bSupportsPixelPerfQuery = true;
|
||||||
|
|
||||||
IDXGIFactory* factory;
|
IDXGIFactory* factory;
|
||||||
IDXGIAdapter* ad;
|
IDXGIAdapter* ad;
|
||||||
|
|
|
@ -96,6 +96,7 @@ void InitBackendInfo()
|
||||||
g_Config.backend_info.bSupports3DVision = true;
|
g_Config.backend_info.bSupports3DVision = true;
|
||||||
g_Config.backend_info.bSupportsDualSourceBlend = false;
|
g_Config.backend_info.bSupportsDualSourceBlend = false;
|
||||||
g_Config.backend_info.bSupportsFormatReinterpretation = true;
|
g_Config.backend_info.bSupportsFormatReinterpretation = true;
|
||||||
|
g_Config.backend_info.bSupportsPixelPerfQuery = false;
|
||||||
|
|
||||||
|
|
||||||
g_Config.backend_info.bSupportsPixelLighting = C_PLIGHTS + 40 <= maxConstants && C_PMATERIALS + 4 <= maxConstants;
|
g_Config.backend_info.bSupportsPixelLighting = C_PLIGHTS + 40 <= maxConstants && C_PMATERIALS + 4 <= maxConstants;
|
||||||
|
|
|
@ -135,6 +135,7 @@ void InitBackendInfo()
|
||||||
g_Config.backend_info.bSupportsDualSourceBlend = false; // supported, but broken
|
g_Config.backend_info.bSupportsDualSourceBlend = false; // supported, but broken
|
||||||
g_Config.backend_info.bSupportsFormatReinterpretation = false;
|
g_Config.backend_info.bSupportsFormatReinterpretation = false;
|
||||||
g_Config.backend_info.bSupportsPixelLighting = true;
|
g_Config.backend_info.bSupportsPixelLighting = true;
|
||||||
|
g_Config.backend_info.bSupportsPixelPerfQuery = false;
|
||||||
|
|
||||||
// aamodes
|
// aamodes
|
||||||
const char* caamodes[] = {"None", "2x", "4x", "8x", "8x CSAA", "8xQ CSAA", "16x CSAA", "16xQ CSAA"};
|
const char* caamodes[] = {"None", "2x", "4x", "8x", "8x CSAA", "8xQ CSAA", "16x CSAA", "16xQ CSAA"};
|
||||||
|
|
Loading…
Reference in New Issue