Merge pull request #2123 from NZJenkins/optimize-cnk2

Reduce the amount of work done per draw
This commit is contained in:
PatrickvL 2021-01-20 09:55:25 +01:00 committed by GitHub
commit b3f1e610ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 94 additions and 132 deletions

View File

@ -183,24 +183,6 @@ PopupReturn PopupCustomEx(const void* hwnd, const CXBXR_MODULE cxbxr_module, con
// For LOG_TEST_CASE
extern inline void EmuLogOutputEx(const CXBXR_MODULE cxbxr_module, const LOG_LEVEL level, const char *szWarningMessage, ...);
// The reason of having EmuLogOutputEx in LOG_TEST_CASE is to allow dump to log directly for any test cases triggered.
// Which will make developers easier to note which applications has triggered quicker, easier, and doesn't require any individual log enabled to capture them.
#define LOG_TEST_CASE(message) do { \
static bool bTestCaseLogged = false; \
if (bTestCaseLogged) break; \
bTestCaseLogged = true; \
if (g_CurrentLogPopupTestCase) { \
LOG_CHECK_ENABLED(LOG_LEVEL::INFO) { \
PopupInfo(nullptr, "Please report that %s shows the following message:\nLOG_TEST_CASE: %s\nIn %s (%s line %d)", \
CxbxKrnl_Xbe->m_szAsciiTitle, message, __func__, __FILE__, __LINE__); \
continue; \
} \
} \
EmuLogOutputEx(LOG_PREFIX, LOG_LEVEL::INFO, "Please report that %s shows the following message:\nLOG_TEST_CASE: %s\nIn %s (%s line %d)", \
CxbxKrnl_Xbe->m_szAsciiTitle, message, __func__, __FILE__, __LINE__); \
} while (0)
// was g_pCertificate->wszTitleName
//
// __FILENAME__
//

View File

@ -175,6 +175,8 @@ xbox::X_D3DVIEWPORT8 g_Xbox_Viewport = { 0 };
float g_Xbox_BackbufferScaleX = 1;
float g_Xbox_BackbufferScaleY = 1;
static constexpr size_t INDEX_BUFFER_CACHE_SIZE = 10000;
/* Unused :
static xbox::dword_xt *g_Xbox_D3DDevice; // TODO: This should be a D3DDevice structure
*/
@ -2641,8 +2643,8 @@ public:
}
};
std::unordered_map<uint32_t, ConvertedIndexBuffer> g_IndexBufferCache;
std::unordered_map<uint32_t, ConvertedIndexBuffer> g_IndexBufferCache;
void CxbxRemoveIndexBuffer(PWORD pData)
{
// HACK: Never Free
@ -2702,7 +2704,7 @@ ConvertedIndexBuffer& CxbxUpdateActiveIndexBuffer
}
// Poor-mans eviction policy : when exceeding treshold, clear entire cache :
if (g_IndexBufferCache.size() > 256) {
if (g_IndexBufferCache.size() > INDEX_BUFFER_CACHE_SIZE) {
g_IndexBufferCache.clear(); // Note : ConvertedIndexBuffer destructor will release any assigned pHostIndexBuffer
}

View File

@ -34,6 +34,7 @@
#include "core/hle/Intercept.hpp"
#include "RenderStates.h"
#include "core/hle/D3D8/Direct3D9/Direct3D9.h" // For g_pD3DDevice
#include <optional>
typedef struct {
char* S; // String representation.
@ -176,11 +177,19 @@ void XboxTextureStateConverter::Apply()
for (int State = xbox::X_D3DTSS_FIRST; State <= xbox::X_D3DTSS_LAST; State++) {
// Read the value of the current stage/state from the Xbox data structure
DWORD Value = Get(XboxStage, State); // OR D3D__TextureState[(XboxStage * xbox::X_D3DTS_STAGESIZE) + XboxTextureStateOffsets[State]];
DWORD XboxValue = Get(XboxStage, State); // OR D3D__TextureState[(XboxStage * xbox::X_D3DTS_STAGESIZE) + XboxTextureStateOffsets[State]];
DWORD PcValue = XboxValue;
// If the state hasn't changed, skip setting it
auto lastState = &PreviousStates[XboxStage][State];
if (*lastState == XboxValue) {
continue;
}
switch (State) {
// These types map 1:1 but have some unsupported values
case xbox::X_D3DTSS_ADDRESSU: case xbox::X_D3DTSS_ADDRESSV: case xbox::X_D3DTSS_ADDRESSW:
switch (Value) {
switch (XboxValue) {
case 0: // Let's ignore zero (its no known X_D3DTADDRESS_ mode, but logging this seems useless)
case xbox::X_D3DTADDRESS_WRAP: // = 1 = D3DTADDRESS_WRAP = 1,
case xbox::X_D3DTADDRESS_MIRROR: // = 2 = D3DTADDRESS_MIRROR = 2,
@ -192,16 +201,16 @@ void XboxTextureStateConverter::Apply()
LOG_TEST_CASE("X_D3DTADDRESS_CLAMPTOEDGE unsupported, falling back to D3DTADDRESS_BORDER");
// D3DTADDRESS_BORDER is the closest host match, CLAMPTOEDGE is identical
// Except it has additional restrictions.
Value = D3DTADDRESS_BORDER;
PcValue = D3DTADDRESS_BORDER;
break;
default:
EmuLog(LOG_LEVEL::WARNING, "Unsupported X_D3DTSS_ADDRESS? value %x", Value);
Value = D3DTADDRESS_WRAP;
EmuLog(LOG_LEVEL::WARNING, "Unsupported X_D3DTSS_ADDRESS? value %x", XboxValue);
PcValue = D3DTADDRESS_WRAP;
break;
}
break;
case xbox::X_D3DTSS_MAGFILTER: case xbox::X_D3DTSS_MINFILTER: case xbox::X_D3DTSS_MIPFILTER:
switch (Value) {
switch (XboxValue) {
case xbox::X_D3DTEXF_NONE: // = 0 = D3DTEXF_NONE = 0, // filtering disabled (valid for mip filter only)
case xbox::X_D3DTEXF_POINT: // = 1 = D3DTEXF_POINT = 1, // nearest
case xbox::X_D3DTEXF_LINEAR: // = 2 = D3DTEXF_LINEAR = 2, // linear interpolation
@ -210,7 +219,7 @@ void XboxTextureStateConverter::Apply()
break;
case xbox::X_D3DTEXF_QUINCUNX: // = 4; // quincunx kernel (Xbox extension), also known as "flat cubic"
LOG_TEST_CASE("X_D3DTEXF_QUINCUNX unsupported, falling back to D3DTEXF_ANISOTROPIC");
Value = D3DTEXF_ANISOTROPIC;
PcValue = D3DTEXF_ANISOTROPIC;
break;
case xbox::X_D3DTEXF_GAUSSIANCUBIC: // = 5 // Xbox extension, different cubic kernel
// Direct3D 9 alternatives :
@ -218,21 +227,21 @@ void XboxTextureStateConverter::Apply()
// D3DTEXF_GAUSSIANQUAD = 7, // 4-sample gaussian
// D3DTEXF_CONVOLUTIONMONO = 8, // Convolution filter for monochrome textures
LOG_TEST_CASE("X_D3DTEXF_QUINCUNX unsupported, falling back to D3DTEXF_GAUSSIANQUAD");
Value = D3DTEXF_GAUSSIANQUAD;
PcValue = D3DTEXF_GAUSSIANQUAD;
break;
default:
EmuLog(LOG_LEVEL::WARNING, "Unsupported X_D3DTSS_M??FILTER value %x", Value);
Value = D3DTEXF_NONE;
EmuLog(LOG_LEVEL::WARNING, "Unsupported X_D3DTSS_M??FILTER value %x", XboxValue);
PcValue = D3DTEXF_NONE;
break;
}
break;
case xbox::X_D3DTSS_TEXCOORDINDEX: {
int texCoordIndex = Value & 0x0000FFFF;
int texCoordIndex = XboxValue & 0x0000FFFF;
if (texCoordIndex > 3) {
LOG_TEST_CASE("TEXCOORDINDEX out of bounds, masking to lowest 2 bits");
texCoordIndex = Value & 3;
texCoordIndex = XboxValue & 3;
}
switch (Value & 0xFFFF0000) {
switch (XboxValue & 0xFFFF0000) {
case X_D3DTSS_TCI_PASSTHRU: // = 0x00000000
case X_D3DTSS_TCI_CAMERASPACENORMAL: // = 0x00010000
case X_D3DTSS_TCI_CAMERASPACEPOSITION: // = 0x00020000
@ -245,22 +254,22 @@ void XboxTextureStateConverter::Apply()
// It probably means "TexGen ObjectLinear", or '(untransformed) object space identity mapping'
LOG_TEST_CASE("Xbox D3DTSS_TCI_OBJECT unsupported on host");
// Test-case : Terrain XDK sample
Value = texCoordIndex;
PcValue = texCoordIndex;
break;
case X_D3DTSS_TCI_SPHEREMAP: // = 0x00050000
// Convert Xbox sphere mapping bit to host Direct3D 9 (which uses a different bit)
Value = D3DTSS_TCI_SPHEREMAP | texCoordIndex;
PcValue = D3DTSS_TCI_SPHEREMAP | texCoordIndex;
break;
default:
EmuLog(LOG_LEVEL::WARNING, "Unsupported X_D3DTSS_TEXCOORDINDEX value %x", Value);
Value = texCoordIndex;
EmuLog(LOG_LEVEL::WARNING, "Unsupported X_D3DTSS_TEXCOORDINDEX value %x", XboxValue);
PcValue = texCoordIndex;
break;
}
break;
}
// These types require value remapping for all supported values
case xbox::X_D3DTSS_COLOROP: case xbox::X_D3DTSS_ALPHAOP:
Value = GetHostTextureOpValue(Value);
PcValue = GetHostTextureOpValue(XboxValue);
break;
// These types require no conversion, so we just pass through as-is
case xbox::X_D3DTSS_COLORARG0: case xbox::X_D3DTSS_COLORARG1: case xbox::X_D3DTSS_COLORARG2:
@ -275,7 +284,7 @@ void XboxTextureStateConverter::Apply()
default:
// Only log missing state if it has a PC counterpart
if (CxbxTextureStateInfo[State].PC != 0) {
EmuLog(LOG_LEVEL::WARNING, "XboxTextureStateConverter::Apply(%s, 0x%.08X) is unimplemented!", CxbxTextureStateInfo[State].S, Value);
EmuLog(LOG_LEVEL::WARNING, "XboxTextureStateConverter::Apply(%s, 0x%.08X) is unimplemented!", CxbxTextureStateInfo[State].S, XboxValue);
}
break;
}
@ -286,10 +295,13 @@ void XboxTextureStateConverter::Apply()
}
if (CxbxTextureStateInfo[State].IsSamplerState) {
g_pD3DDevice->SetSamplerState(HostStage, (D3DSAMPLERSTATETYPE)CxbxTextureStateInfo[State].PC, Value);
g_pD3DDevice->SetSamplerState(HostStage, (D3DSAMPLERSTATETYPE)CxbxTextureStateInfo[State].PC, PcValue);
} else {
g_pD3DDevice->SetTextureStageState(HostStage, (D3DTEXTURESTAGESTATETYPE)CxbxTextureStateInfo[State].PC, Value);
g_pD3DDevice->SetTextureStageState(HostStage, (D3DTEXTURESTAGESTATETYPE)CxbxTextureStateInfo[State].PC, PcValue);
}
// Record we set a state
lastState->emplace(XboxValue);
}
// Make sure we only do this once

View File

@ -28,6 +28,7 @@
#include <cstdint>
#include <array>
#include "core\hle\D3D8\XbD3D8Types.h"
#include <optional>
#define CXBX_D3DRS_UNSUPPORTED (xbox::X_D3DRS_LAST + 1)
@ -47,4 +48,6 @@ private:
uint32_t* D3D__TextureState = nullptr;
std::array<int, xbox::X_D3DTSS_LAST + 1> XboxTextureStateOffsets;
XboxRenderStateConverter* pXboxRenderStates;
// Holds the last state that was set, so we don't set it again
std::optional<DWORD> PreviousStates[xbox::X_D3DTS_STAGECOUNT][xbox::X_D3DTSS_LAST + 1] = {};
};

View File

@ -194,32 +194,6 @@ std::string DebugPrependLineNumbers(std::string shaderString) {
return debugShader.str();
}
extern ShaderType EmuGetShaderInfo(IntermediateVertexShader* pIntermediateShader) {
if (pIntermediateShader->Instructions.size() == 0) {
// Do not attempt to compile empty shaders
// This is a declaration only shader, so there is no function to compile
return ShaderType::Empty;
}
switch (pIntermediateShader->Header.Version) {
case VERSION_XVS:
break;
case VERSION_XVSS:
LOG_TEST_CASE("Might not support vertex state shaders?");
break;
case VERSION_XVSW:
EmuLog(LOG_LEVEL::WARNING, "Might not support vertex read/write shaders?");
return ShaderType::Unsupported;
default:
EmuLog(LOG_LEVEL::WARNING, "Unknown vertex shader version 0x%02X", pIntermediateShader->Header.Version);
return ShaderType::Unsupported;
}
return ShaderType::Compilable;
}
HRESULT CompileHlsl(const std::string& hlsl, ID3DBlob** ppHostShader, const char* pSourceName)
{
// TODO include header in vertex shader

View File

@ -14,8 +14,6 @@ static const char* vs_model_2_a = "vs_2_a";
static const char* vs_model_3_0 = "vs_3_0";
extern const char* g_vs_model;
extern ShaderType EmuGetShaderInfo(IntermediateVertexShader* pIntermediateShader);
extern HRESULT EmuCompileShader
(
IntermediateVertexShader* pIntermediateShader,

View File

@ -2,7 +2,7 @@
#include "VertexShaderSource.h"
#include "Logging.h"
#include "core/kernel/init/CxbxKrnl.h"
#include "util/hasher.h"
#include "core/kernel/support/Emu.h"
@ -46,12 +46,8 @@ ID3DBlob* AsyncCreateVertexShader(IntermediateVertexShader intermediateShader, S
ShaderKey VertexShaderSource::CreateShader(const xbox::dword_xt* pXboxFunction, DWORD *pXboxFunctionSize) {
IntermediateVertexShader intermediateShader;
// Parse into intermediate format
EmuParseVshFunction((DWORD*)pXboxFunction,
pXboxFunctionSize,
&intermediateShader);
*pXboxFunctionSize = GetVshFunctionSize(pXboxFunction);
// FIXME ignore shader header when creating key
ShaderKey key = ComputeHash((void*)pXboxFunction, *pXboxFunctionSize);
// Check if we need to create the shader
@ -64,13 +60,14 @@ ShaderKey VertexShaderSource::CreateShader(const xbox::dword_xt* pXboxFunction,
return key;
}
// Parse into intermediate format
EmuParseVshFunction((DWORD*)pXboxFunction, &intermediateShader);
// We're going to create a new shader
auto newShader = LazyVertexShader();
newShader.referenceCount = 1;
auto shaderType = EmuGetShaderInfo(&intermediateShader);
if (shaderType == ShaderType::Compilable)
if (!intermediateShader.Instructions.empty())
{
// Start compiling the shader in the background
// TODO proper threading / threadpool.
@ -80,6 +77,8 @@ ShaderKey VertexShaderSource::CreateShader(const xbox::dword_xt* pXboxFunction,
}
else {
// We can't do anything with this shader
// Test case: ???
LOG_TEST_CASE("Empty vertex shader");
newShader.isReady = true;
newShader.pHostVertexShader = nullptr;
}

View File

@ -85,7 +85,7 @@ class CxbxVertexBufferConverter
ULONG m_TotalCacheHits = 0;
ULONG m_TotalCacheMisses = 0;
UINT m_MaxCacheSize = 2000; // Maximum number of entries in the cache
UINT m_MaxCacheSize = 10000; // Maximum number of entries in the cache
UINT m_CacheElasticity = 200; // Cache is allowed to grow this much more than maximum before being purged to maximum
std::unordered_map<uint64_t, std::list<CxbxPatchedStream>::iterator> m_PatchedStreams; // Stores references to patched streams for fast lookup
std::list<CxbxPatchedStream> m_PatchedStreamUsageList; // Linked list of vertex streams, least recently used is last in the list

View File

@ -371,9 +371,8 @@ xbox::X_STREAMINPUT& GetXboxVertexStreamInput(unsigned XboxStreamNumber)
// * Vertex shader function recompiler
// ****************************************************************************
class XboxVertexShaderDecoder
namespace XboxVertexShaderDecoder
{
private:
// Xbox Vertex SHader microcode types
enum VSH_OUTPUT_TYPE {
@ -539,7 +538,7 @@ private:
Param.Swizzle[3] = (VSH_SWIZZLE)VshGetField(pShaderToken, (VSH_FIELD_NAME)(d + FLD_A_SWZ_W));
}
void VshAddIntermediateInstruction(
static void VshAddIntermediateInstruction(
uint32_t* pShaderToken,
IntermediateVertexShader* pShader,
VSH_MAC MAC,
@ -592,8 +591,7 @@ private:
pShader->Instructions.push_back(intermediate);
}
public:
bool VshConvertToIntermediate(uint32_t* pShaderToken, IntermediateVertexShader* pShader)
static bool VshConvertToIntermediate(uint32_t* pShaderToken, IntermediateVertexShader* pShader)
{
// First get the instruction(s).
VSH_ILU ILU = (VSH_ILU)VshGetField(pShaderToken, FLD_ILU);
@ -648,9 +646,21 @@ public:
return VshGetField(pShaderToken, FLD_FINAL) == 0;
}
};
// Get the function size excluding the final field
size_t GetVshFunctionSize(const xbox::dword_xt* pXboxFunction) {
auto curToken = (uint32_t*)pXboxFunction;
while (!XboxVertexShaderDecoder::VshGetField(curToken, XboxVertexShaderDecoder::FLD_FINAL)) {
curToken += X_VSH_INSTRUCTION_SIZE; // TODO use a struct to represent these instructions
}
curToken += X_VSH_INSTRUCTION_SIZE; // For the final instruction
return (curToken - pXboxFunction) * sizeof(xbox::dword_xt);
}
// ****************************************************************************
// * Vertex shader declaration recompiler
// ****************************************************************************
@ -1547,54 +1557,18 @@ void CxbxImpl_SetVertexShaderConstant(INT Register, PVOID pConstantData, DWORD C
// parse xbox vertex shader function into an intermediate format
extern void EmuParseVshFunction
(
// Pointer to raw Xbox vertex shader instruction slots
DWORD* pXboxFunction,
DWORD* pXboxFunctionSize,
IntermediateVertexShader* pShader
)
{
auto VshDecoder = XboxVertexShaderDecoder();
*pXboxFunctionSize = 0;
// FIXME tidy handling of the header vs headerless cases
// Normally, pXboxFunction has a shader header before the shader tokens
// But we can also load shader tokens directly from the Xbox vertex shader slots too
bool headerless = pXboxFunction[0] == 0; // if its a token instead of a header, first DWORD is unused
auto headerSize = headerless ? 0 : sizeof(xbox::X_VSH_SHADER_HEADER);
// Decode the vertex shader program tokens into an intermediate representation
uint32_t* pCurToken = (uint32_t*)((uintptr_t)pXboxFunction + headerSize);
auto pCurToken = (uint32_t*)pXboxFunction;
if (headerless) {
// We've been fed shader slots. Make up a header...
pShader->Header.Version = VERSION_XVS;
pShader->Header.NumInst = (uint16_t)pShader->Instructions.size();
// Decode until we hit a token marked final
// Note : CxbxSetVertexShaderSlots makes sure this always stops
// after X_VSH_MAX_INSTRUCTION_COUNT, by setting FLD_FINAL in there.
while (VshDecoder.VshConvertToIntermediate(pCurToken, pShader)) {
pCurToken += X_VSH_INSTRUCTION_SIZE;
}
// Decode until we hit a token marked final
// Note : CxbxSetVertexShaderSlots makes sure this always stops
// after X_VSH_MAX_INSTRUCTION_COUNT, by setting FLD_FINAL in there.
while (XboxVertexShaderDecoder::VshConvertToIntermediate(pCurToken, pShader)) {
pCurToken += X_VSH_INSTRUCTION_SIZE;
}
else {
pShader->Header = *(xbox::X_VSH_SHADER_HEADER*)pXboxFunction;
// Decode only up to the number of instructions in the header
// The last instruction may not be marked final:
// Test case: Multiple Vertex Shaders sample
for (int i = 0; i < pShader->Header.NumInst; i++) {
if (!VshDecoder.VshConvertToIntermediate(pCurToken, pShader)) {
if (i < pShader->Header.NumInst - 1) {
LOG_TEST_CASE("Shader instructions after final instruction");
}
break;
}
pCurToken += X_VSH_INSTRUCTION_SIZE;
}
}
// The size of the shader is
pCurToken += X_VSH_INSTRUCTION_SIZE; // always at least one token
*pXboxFunctionSize = (intptr_t)pCurToken - (intptr_t)pXboxFunction;
}

View File

@ -185,7 +185,6 @@ typedef struct _VSH_INTERMEDIATE_FORMAT {
} VSH_INTERMEDIATE_FORMAT;
typedef struct _IntermediateVertexShader {
xbox::X_VSH_SHADER_HEADER Header;
std::vector<VSH_INTERMEDIATE_FORMAT> Instructions;
} IntermediateVertexShader;
@ -193,10 +192,11 @@ typedef struct _IntermediateVertexShader {
extern void EmuParseVshFunction
(
DWORD* pXboxFunction,
DWORD* pXboxFunctionSize,
IntermediateVertexShader* pShader
);
extern size_t GetVshFunctionSize(const xbox::dword_xt* pXboxFunction);
inline boolean VshHandleIsVertexShader(DWORD Handle) { return (Handle & X_D3DFVF_RESERVED0) ? TRUE : FALSE; }
inline xbox::X_D3DVertexShader *VshHandleToXboxVertexShader(DWORD Handle) { return (xbox::X_D3DVertexShader *)(Handle & ~X_D3DFVF_RESERVED0);}
@ -214,5 +214,4 @@ extern void CxbxImpl_SetVertexShaderInput(DWORD Handle, UINT StreamCount, xbox::
extern void CxbxImpl_SetVertexShaderConstant(INT Register, PVOID pConstantData, DWORD ConstantCount);
extern void CxbxImpl_DeleteVertexShader(DWORD Handle);
extern void CxbxVertexShaderSetFlags();
#endif

View File

@ -235,4 +235,23 @@ extern char szFilePath_Xbe[MAX_PATH*2];
// Returns the last Win32 error, in string format. Returns an empty string if there is no error.
extern std::string CxbxGetLastErrorString(char * lpszFunction);
// The reason of having EmuLogOutputEx in LOG_TEST_CASE is to allow dump to log directly for any test cases triggered.
// Which will make developers easier to note which applications has triggered quicker, easier, and doesn't require any individual log enabled to capture them.
// NOTE: This #define is here rather than Logging.h, because it has a dependency on CxbxKrnl_Xbe
#define LOG_TEST_CASE(message) do { \
static bool bTestCaseLogged = false; \
if (bTestCaseLogged) break; \
bTestCaseLogged = true; \
if (g_CurrentLogPopupTestCase) { \
LOG_CHECK_ENABLED(LOG_LEVEL::INFO) { \
PopupInfo(nullptr, "Please report that %s shows the following message:\nLOG_TEST_CASE: %s\nIn %s (%s line %d)", \
CxbxKrnl_Xbe->m_szAsciiTitle, message, __func__, __FILE__, __LINE__); \
continue; \
} \
} \
EmuLogOutputEx(LOG_PREFIX, LOG_LEVEL::INFO, "Please report that %s shows the following message:\nLOG_TEST_CASE: %s\nIn %s (%s line %d)", \
CxbxKrnl_Xbe->m_szAsciiTitle, message, __func__, __FILE__, __LINE__); \
} while (0)
// was g_pCertificate->wszTitleName
#endif