Improve shader slot emulation

This commit is contained in:
Anthony Miles 2020-04-27 11:07:35 +12:00
parent eaa095e8a2
commit a2b5d2c466
3 changed files with 83 additions and 165 deletions

View File

@ -195,7 +195,7 @@ static XTL::DWORD *g_Xbox_D3DDevice; // TODO: This should be a
static DWORD g_dwVertexShaderUsage = 0; // Unused. If needed, move to XbVertexShader.cpp
*/
static XTL::DWORD g_VertexShaderSlots[X_VSH_MAX_INSTRUCTION_COUNT];
static std::array<DWORD[X_VSH_INSTRUCTION_SIZE], X_VSH_MAX_INSTRUCTION_COUNT> g_VertexShaderSlots = { 0 };
XTL::DWORD g_Xbox_VertexShader_Handle = 0;
// Static Function(s)
@ -3542,15 +3542,28 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_LoadVertexShader)
// Handle is always address of an X_D3DVertexShader struct, thus always or-ed with 1 (X_D3DFVF_RESERVED0)
// Address is the slot (offset) from which the program must be written onwards (as whole DWORDS)
// D3DDevice_LoadVertexShader pushes the program contained in the Xbox VertexShader struct to the NV2A
if(Address < 136) {
CxbxVertexShader * pCxbxVertexShader = GetCxbxVertexShader(Handle);
if(Address < g_VertexShaderSlots.size()) {
CxbxVertexShader * pCxbxVertexShader = GetCxbxVertexShader(Handle);
if (pCxbxVertexShader) {
for (DWORD i = Address; i < pCxbxVertexShader->XboxNrAddressSlots; i++) {
// TODO: This seems very fishy
g_VertexShaderSlots[i] = Handle;
int upToSlot = Address + pCxbxVertexShader->XboxNrAddressSlots;
if (upToSlot > g_VertexShaderSlots.size()) {
LOG_TEST_CASE("Shader does not fit in vertex shader slots");
return;
}
// Skip the header DWORD at the beginning
auto pTokens = &pCxbxVertexShader->pXboxFunctionCopy[1];
for (DWORD i = 0; i < pCxbxVertexShader->XboxNrAddressSlots * X_VSH_INSTRUCTION_SIZE; i++) {
g_VertexShaderSlots[Address][i] = pTokens[i];
}
}
}
else {
LOG_TEST_CASE("LoadVertexShader called with unrecognized handle %d", Handle);
}
}
else {
LOG_TEST_CASE("LoadVertexShader address %d out of range", Address);
}
}
// LTCG specific D3DDevice_SelectVertexShader function...
@ -3610,38 +3623,28 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_SelectVertexShader)
g_Xbox_VertexShader_Handle = Handle;
CxbxVertexShader *pCxbxVertexShader = nullptr;
DWORD HostFVF = 0;
if(VshHandleIsVertexShader(Handle))
{
pCxbxVertexShader = GetCxbxVertexShader(Handle);
if (pCxbxVertexShader == nullptr) {
LOG_TEST_CASE("Shader handle has not been created");
}
// TODO we should only set the vertex declaration here?
SetCxbxVertexShader(pCxbxVertexShader);
}
else if(Handle == xbnull)
{
HostFVF = D3DFVF_XYZ | D3DFVF_TEX0;
// Clear any vertex shader that may be set
hRet = g_pD3DDevice->SetVertexShader(nullptr);
DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexShader()");
// Set the FVF
hRet = g_pD3DDevice->SetFVF(HostFVF);
DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetFVF(D3DFVF_XYZ | D3DFVF_TEX0)");
}
else if(Address < 136)
{
X_D3DVertexShader *pXboxVertexShader = (X_D3DVertexShader*)g_VertexShaderSlots[Address];
if(pXboxVertexShader != nullptr)
{
LOG_TEST_CASE("Assigned g_VertexShaderSlots");
// Was : pCxbxVertexShader = (CxbxVertexShader *)(pXboxVertexShader->CxbxVertexShaderHandle);
// However, the CxbxVertexShaderHandle union is never set, so could lead to using undefined data!
}
else
{
EmuLog(LOG_LEVEL::WARNING, "g_VertexShaderSlots[%d] = 0", Address);
}
}
if (Address < g_VertexShaderSlots.size()) {
// Create a vertex shader from the tokens
auto pTokens = &g_VertexShaderSlots[Address][0];
DWORD shaderSize;
auto shaderKey = g_VertexShaderSource.CreateShader(pTokens, &shaderSize);
g_pD3DDevice->SetVertexShader(g_VertexShaderSource.GetShader(shaderKey));
}
else {
LOG_TEST_CASE("SelectVertexShader address %d out of range", Address);
}
if (FAILED(hRet))
{
@ -8135,19 +8138,14 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_RunVertexStateShader)
LOG_UNIMPLEMENTED();
}
// Maps pFunction defintions to pre-compiled shaders
// to reduce the speed impact of LoadVertexShaderProgram
typedef uint64_t load_shader_program_key_t;
std::unordered_map<load_shader_program_key_t, DWORD> g_LoadVertexShaderProgramCache;
// ******************************************************************
// * patch: D3DDevice_LoadVertexShaderProgram
// ******************************************************************
VOID WINAPI XTL::EMUPATCH(D3DDevice_LoadVertexShaderProgram)
(
CONST DWORD *pFunction,
DWORD Address
)
CONST DWORD *pFunction,
DWORD Address
)
{
LOG_FUNC_BEGIN
LOG_FUNC_ARG(pFunction)
@ -8156,119 +8154,12 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_LoadVertexShaderProgram)
// D3DDevice_LoadVertexShaderProgram splits the given function buffer into batch-wise pushes to the NV2A
load_shader_program_key_t shaderCacheKey = ((load_shader_program_key_t)g_Xbox_VertexShader_Handle << 32) | (DWORD)pFunction;
// If the shader key was located in the cache, use the cached shader
// TODO: When do we clear the cache? In this approach, shaders are
// never freed...
auto it = g_LoadVertexShaderProgramCache.find(shaderCacheKey);
if (it != g_LoadVertexShaderProgramCache.end()) {
EMUPATCH(D3DDevice_LoadVertexShader)(it->second, Address);
EMUPATCH(D3DDevice_SelectVertexShader)(it->second, Address);
return;
// Copy shader instructions to shader slots
auto shaderHeader = *((XTL::X_VSH_SHADER_HEADER*) pFunction);
auto tokens = &pFunction[1];
for (int i = 0; i < shaderHeader.NumInst * X_VSH_INSTRUCTION_SIZE; i++) {
g_VertexShaderSlots[Address][i] = tokens[i];
}
DWORD *pXboxVertexDeclaration = nullptr;
if (VshHandleIsVertexShader(g_Xbox_VertexShader_Handle)) {
CxbxVertexShader *pCxbxVertexShader = GetCxbxVertexShader(g_Xbox_VertexShader_Handle);
// If we failed to fetch an active pixel shader, log and do nothing
if (pCxbxVertexShader == nullptr) {
LOG_TEST_CASE("D3DDevice_LoadVertexShaderProgram: Failed to locate original shader");
return;
}
// Simply retrieve the contents of the existing vertex shader program
pXboxVertexDeclaration = pCxbxVertexShader->Declaration.pXboxDeclarationCopy;
} else {
// This is an unusual scenario in which an FVF-based shader is being replaced with an actual shader
// But without calling CreateVertexShader: This means we need to parse the current FVF and generate
// our own Xbox-like declaration to use when converting/setting this new shader
// Define a large enough definition to contain all possible FVF types
// 20 is maximum possible size
DWORD CxbxXboxVertexDeclaration[20] = { 0 };
int index = 0;
// Write the Stream Number (always 0 for FVF)
CxbxXboxVertexDeclaration[index++] = X_D3DVSD_STREAM(0);
// Write Position
DWORD position = (g_Xbox_VertexShader_Handle & X_D3DFVF_POSITION_MASK);
if (position == X_D3DFVF_XYZRHW) {
CxbxXboxVertexDeclaration[index++] = X_D3DVSD_REG(X_D3DVSDE_POSITION, X_D3DVSDT_FLOAT4);
} else {
CxbxXboxVertexDeclaration[index++] = X_D3DVSD_REG(X_D3DVSDE_POSITION, X_D3DVSDT_FLOAT3);
}
// Write Blend Weights
if (position == X_D3DFVF_XYZB1) {
CxbxXboxVertexDeclaration[index++] = X_D3DVSD_REG(X_D3DVSDE_BLENDWEIGHT, X_D3DVSDT_FLOAT1);
}
if (position == X_D3DFVF_XYZB2) {
CxbxXboxVertexDeclaration[index++] = X_D3DVSD_REG(X_D3DVSDE_BLENDWEIGHT, X_D3DVSDT_FLOAT2);
}
if (position == X_D3DFVF_XYZB3) {
CxbxXboxVertexDeclaration[index++] = X_D3DVSD_REG(X_D3DVSDE_BLENDWEIGHT, X_D3DVSDT_FLOAT3);
}
if (position == X_D3DFVF_XYZB4) {
CxbxXboxVertexDeclaration[index++] = X_D3DVSD_REG(X_D3DVSDE_BLENDWEIGHT, X_D3DVSDT_FLOAT4);
}
// Write Normal, Diffuse, and Specular
if (g_Xbox_VertexShader_Handle & X_D3DFVF_NORMAL) {
CxbxXboxVertexDeclaration[index++] = X_D3DVSD_REG(X_D3DVSDE_NORMAL, X_D3DVSDT_FLOAT3);
}
if (g_Xbox_VertexShader_Handle & X_D3DFVF_DIFFUSE) {
CxbxXboxVertexDeclaration[index++] = X_D3DVSD_REG(X_D3DVSDE_DIFFUSE, X_D3DVSDT_D3DCOLOR);
}
if (g_Xbox_VertexShader_Handle & X_D3DFVF_SPECULAR) {
CxbxXboxVertexDeclaration[index++] = X_D3DVSD_REG(X_D3DVSDE_SPECULAR, X_D3DVSDT_D3DCOLOR);
}
// Write Texture Coordinates
int textureCount = (g_Xbox_VertexShader_Handle & X_D3DFVF_TEXCOUNT_MASK) >> X_D3DFVF_TEXCOUNT_SHIFT;
assert(textureCount <= 4); // Safeguard, since the X_D3DFVF_TEXCOUNT bitfield could contain invalid values (5 up to 15)
for (int i = 0; i < textureCount; i++) {
int numberOfCoordinates = 0;
if ((g_Xbox_VertexShader_Handle & X_D3DFVF_TEXCOORDSIZE1(i)) == (DWORD)X_D3DFVF_TEXCOORDSIZE1(i)) {
numberOfCoordinates = X_D3DVSDT_FLOAT1;
}
if ((g_Xbox_VertexShader_Handle & X_D3DFVF_TEXCOORDSIZE2(i)) == (DWORD)X_D3DFVF_TEXCOORDSIZE2(i)) {
numberOfCoordinates = X_D3DVSDT_FLOAT2;
}
if ((g_Xbox_VertexShader_Handle & X_D3DFVF_TEXCOORDSIZE3(i)) == (DWORD)X_D3DFVF_TEXCOORDSIZE3(i)) {
numberOfCoordinates = X_D3DVSDT_FLOAT3;
}
if ((g_Xbox_VertexShader_Handle & X_D3DFVF_TEXCOORDSIZE4(i)) == (DWORD)X_D3DFVF_TEXCOORDSIZE4(i)) {
numberOfCoordinates = X_D3DVSDT_FLOAT4;
}
CxbxXboxVertexDeclaration[index++] = X_D3DVSD_REG(X_D3DVSDE_TEXCOORD0 + i, numberOfCoordinates);
}
// Write Declaration End
CxbxXboxVertexDeclaration[index++] = X_D3DVSD_END();
pXboxVertexDeclaration = CxbxXboxVertexDeclaration;
// Now we can fall through and create a new vertex shader
}
// Create a vertex shader with the new vertex program data
DWORD hNewXboxShader = 0;
HRESULT hr = EMUPATCH(D3DDevice_CreateVertexShader)(pXboxVertexDeclaration, pFunction, &hNewXboxShader, 0);
if( FAILED( hr ) )
CxbxKrnlCleanup("Error creating new vertex shader!" );
EMUPATCH(D3DDevice_LoadVertexShader)(hNewXboxShader, Address);
EMUPATCH(D3DDevice_SelectVertexShader)(hNewXboxShader, Address);
g_LoadVertexShaderProgramCache[shaderCacheKey] = hNewXboxShader;
EmuLog(LOG_LEVEL::WARNING, "Vertex Shader Cache Size: %d", g_LoadVertexShaderProgramCache.size());
}
// ******************************************************************

View File

@ -40,13 +40,14 @@ ID3DBlob* AsyncCreateVertexShader(IntermediateVertexShader intermediateShader, S
// Create a new shader
// If the shader was already created, just increase its reference count
ShaderKey VertexShaderSource::CreateShader(const DWORD* pXboxFunction, DWORD *pXboxFunctionSize) {
IntermediateVertexShader intermediateShader;
IntermediateVertexShader intermediateShader;
// Parse into intermediate format
EmuParseVshFunction((DWORD*)pXboxFunction,
pXboxFunctionSize,
&intermediateShader);
// FIXME ignore shader header when creating key
ShaderKey key = ComputeHash((void*)pXboxFunction, *pXboxFunctionSize);
// Check if we need to create the shader

View File

@ -1323,21 +1323,47 @@ extern void EmuParseVshFunction
IntermediateVertexShader* pShader
)
{
uint32_t* pToken;
auto VshDecoder = XboxVertexShaderDecoder();
*pXboxFunctionSize = 0;
// Just copy the header for now
pShader->Header = *(XTL::X_VSH_SHADER_HEADER*)pXboxFunction;
// FIXME tidy handling of the header vs headerless cases
// Normally, pXboxFunction has a shader header before the shader tokens
// But we can also load shader tokens directly from the Xbox vertex shader slots too
bool headerless = pXboxFunction[0] == 0; // if its a token instead of a header, first DWORD is unused
auto headerSize = headerless ? 0 : sizeof(XTL::X_VSH_SHADER_HEADER);
// Decode the vertex shader program tokens into an intermediate representation
pToken = (uint32_t*)((uintptr_t)pXboxFunction + sizeof(XTL::X_VSH_SHADER_HEADER));
while (VshDecoder.VshConvertToIntermediate(pToken, pShader)) {
pToken += X_VSH_INSTRUCTION_SIZE;
}
uint32_t* pCurToken = (uint32_t*)((uintptr_t)pXboxFunction + headerSize);
if (headerless) {
// We've been fed shader slots. Make up a header...
pShader->Header.Version = VERSION_XVS;
pShader->Header.NumInst = pShader->Instructions.size();
// Decode until we hit a token marked final
while (VshDecoder.VshConvertToIntermediate(pCurToken, pShader)) {
pCurToken += X_VSH_INSTRUCTION_SIZE;
}
}
else {
pShader->Header = *(XTL::X_VSH_SHADER_HEADER*)pXboxFunction;
// Decode only up to the number of instructions in the header
// The last instruction may not be marked final:
// Test case: Multiple Vertex Shaders sample
for (int i = 0; i < pShader->Header.NumInst; i++) {
if (!VshDecoder.VshConvertToIntermediate(pCurToken, pShader)) {
if (i < pShader->Header.NumInst - 1) {
LOG_TEST_CASE("Shader instructions after final instruction");
}
break;
}
pCurToken += X_VSH_INSTRUCTION_SIZE;
}
}
// The size of the shader is
pToken += X_VSH_INSTRUCTION_SIZE; // always at least one token
*pXboxFunctionSize = (intptr_t)pToken - (intptr_t)pXboxFunction;
pCurToken += X_VSH_INSTRUCTION_SIZE; // always at least one token
*pXboxFunctionSize = (intptr_t)pCurToken - (intptr_t)pXboxFunction;
}