From 1e6845c940b25a53613c5580c980ebbd364b5a77 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Wed, 9 Dec 2020 22:55:47 +0100 Subject: [PATCH 01/47] Generalized HLSL shader compilation --- CMakeLists.txt | 2 + src/core/hle/D3D8/Direct3D9/Shader.cpp | 142 ++++++++++++++++ src/core/hle/D3D8/Direct3D9/Shader.h | 12 ++ src/core/hle/D3D8/Direct3D9/VertexShader.cpp | 155 ++---------------- src/core/hle/D3D8/Direct3D9/VertexShader.h | 7 +- .../hle/D3D8/Direct3D9/VertexShaderSource.cpp | 2 +- src/core/hle/D3D8/XbVertexShader.cpp | 3 +- src/core/hle/D3D8/XbVertexShader.h | 5 +- 8 files changed, 183 insertions(+), 145 deletions(-) create mode 100644 src/core/hle/D3D8/Direct3D9/Shader.cpp create mode 100644 src/core/hle/D3D8/Direct3D9/Shader.h diff --git a/CMakeLists.txt b/CMakeLists.txt index d764aa57a..bc97c8157 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -135,6 +135,7 @@ file (GLOB CXBXR_HEADER_EMU "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Shader.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShader.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShaderSource.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/WalkIndexBuffer.h" @@ -295,6 +296,7 @@ file (GLOB CXBXR_SOURCE_EMU "${CXBXR_ROOT_DIR}/src/core/common/video/RenderBase.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/RenderStates.cpp" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Shader.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/TextureStates.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShader.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp" diff --git a/src/core/hle/D3D8/Direct3D9/Shader.cpp b/src/core/hle/D3D8/Direct3D9/Shader.cpp new file mode 100644 index 000000000..382fc0aab --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/Shader.cpp @@ -0,0 +1,142 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check it. +// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com +// ****************************************************************** +// * +// * This file is part of the Cxbx project. +// * +// * Cxbx and Cxbe are free software; you can redistribute them +// * and/or modify them under the terms of the GNU General Public +// * License as published by the Free Software Foundation; either +// * version 2 of the license, or (at your option) any later version. +// * +// * This program is distributed in the hope that it will be useful, +// * but WITHOUT ANY WARRANTY; without even the implied warranty of +// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// * GNU General Public License for more details. +// * +// * You should have recieved a copy of the GNU General Public License +// * along with this program; see the file COPYING. +// * If not, write to the Free Software Foundation, Inc., +// * 59 Temple Place - Suite 330, Bostom, MA 02111-1307, USA. +// * +// * 2020 PatrickvL +// * +// * All rights reserved +// * +// ****************************************************************** + +#define LOG_PREFIX CXBXR_MODULE::VTXSH // TODO : Introduce generic HLSL logging + +#include +#include "Shader.h" +#include "core\kernel\init\CxbxKrnl.h" // LOG_TEST_CASE +#include "core\kernel\support\Emu.h" // EmuLog +//#include + +std::string DebugPrependLineNumbers(std::string shaderString) { + std::stringstream shader(shaderString); + auto debugShader = std::stringstream(); + + int i = 1; + for (std::string line; std::getline(shader, line); ) { + auto lineNumber = std::to_string(i++); + auto paddedLineNumber = lineNumber.insert(0, 3 - lineNumber.size(), ' '); + debugShader << "/* " << paddedLineNumber << " */ " << line << "\n"; + } + + return debugShader.str(); +} + +extern HRESULT EmuCompileShader +( + std::string hlsl_str, + const char* shader_profile, + ID3DBlob** ppHostShader, + const char* pSourceName +) +{ + ID3DBlob* pErrors = nullptr; + ID3DBlob* pErrorsCompatibility = nullptr; + HRESULT hRet = 0; + + EmuLog(LOG_LEVEL::DEBUG, "--- HLSL conversion ---"); + EmuLog(LOG_LEVEL::DEBUG, DebugPrependLineNumbers(hlsl_str).c_str()); + EmuLog(LOG_LEVEL::DEBUG, "-----------------------"); + + + UINT flags1 = D3DCOMPILE_OPTIMIZATION_LEVEL3; + + hRet = D3DCompile( + hlsl_str.c_str(), + hlsl_str.length(), + pSourceName, + nullptr, // pDefines + D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions? + "main", // shader entry poiint + shader_profile, + flags1, // flags1 + 0, // flags2 + ppHostShader, // out + &pErrors // ppErrorMsgs out + ); + if (FAILED(hRet)) { + EmuLog(LOG_LEVEL::WARNING, "Shader compile failed. Recompiling in compatibility mode"); + // Attempt to retry in compatibility mode, this allows some vertex-state shaders to compile + // Test Case: Spy vs Spy + flags1 |= D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY | D3DCOMPILE_AVOID_FLOW_CONTROL; + hRet = D3DCompile( + hlsl_str.c_str(), + hlsl_str.length(), + pSourceName, + nullptr, // pDefines + D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions? + "main", // shader entry poiint + shader_profile, + flags1, // flags1 + 0, // flags2 + ppHostShader, // out + &pErrorsCompatibility // ppErrorMsgs out + ); + + if (FAILED(hRet)) { + LOG_TEST_CASE("Couldn't assemble recompiled shader"); + //EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled shader"); + } + } + + // Determine the log level + auto hlslErrorLogLevel = FAILED(hRet) ? LOG_LEVEL::ERROR2 : LOG_LEVEL::DEBUG; + if (pErrors) { + // Log errors from the initial compilation + EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); + pErrors->Release(); + pErrors = nullptr; + } + + // Failure to recompile in compatibility mode ignored for now + if (pErrorsCompatibility != nullptr) { + pErrorsCompatibility->Release(); + pErrorsCompatibility = nullptr; + } + + LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) { + if (g_bPrintfOn) { + if (!FAILED(hRet)) { + // Log disassembly + hRet = D3DDisassemble( + (*ppHostShader)->GetBufferPointer(), + (*ppHostShader)->GetBufferSize(), + D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS | D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING, + NULL, + &pErrors + ); + if (pErrors) { + EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); + pErrors->Release(); + } + } + } + } + + return hRet; +} diff --git a/src/core/hle/D3D8/Direct3D9/Shader.h b/src/core/hle/D3D8/Direct3D9/Shader.h new file mode 100644 index 000000000..1a89d14a5 --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/Shader.h @@ -0,0 +1,12 @@ +#pragma once + +#include // std::string +#include // ID3DBlob (via d3d9.h > d3d11shader.h > d3dcommon.h) + +extern HRESULT EmuCompileShader +( + std::string hlsl_str, + const char* shader_profile, + ID3DBlob** ppHostShader, + const char* pSourceName = nullptr +); diff --git a/src/core/hle/D3D8/Direct3D9/VertexShader.cpp b/src/core/hle/D3D8/Direct3D9/VertexShader.cpp index d6b8d1047..21bb9582f 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/VertexShader.cpp @@ -1,11 +1,12 @@ #define LOG_PREFIX CXBXR_MODULE::VTXSH -#include "VertexShader.h" -#include "core\kernel\init\CxbxKrnl.h" -#include "core\kernel\support\Emu.h" +#include "Shader.h" // EmuCompileShader +#include "VertexShader.h" // EmuCompileVertexShader +#include "core\kernel\init\CxbxKrnl.h" // implicit CxbxKrnl_Xbe used in LOG_TEST_CASE +#include "core\kernel\support\Emu.h" // LOG_TEST_CASE (via Logging.h) #include -#include +#include // std::stringstream extern const char* g_vs_model = vs_model_2_a; @@ -181,133 +182,8 @@ void BuildShader(IntermediateVertexShader* pShader, std::stringstream& hlsl) } } -std::string DebugPrependLineNumbers(std::string shaderString) { - std::stringstream shader(shaderString); - auto debugShader = std::stringstream(); - - int i = 1; - for (std::string line; std::getline(shader, line); ) { - auto lineNumber = std::to_string(i++); - auto paddedLineNumber = lineNumber.insert(0, 3 - lineNumber.size(), ' '); - debugShader << "/* " << paddedLineNumber << " */ " << line << "\n"; - } - - return debugShader.str(); -} - -HRESULT CompileHlsl(const std::string& hlsl, ID3DBlob** ppHostShader, const char* pSourceName) -{ - // TODO include header in vertex shader - //xbox::X_VSH_SHADER_HEADER* pXboxVertexShaderHeader = (xbox::X_VSH_SHADER_HEADER*)pXboxFunction; - ID3DBlob* pErrors = nullptr; - ID3DBlob* pErrorsCompatibility = nullptr; - HRESULT hRet = 0; - auto hlslErrorLogLevel = FAILED(hRet) ? LOG_LEVEL::ERROR2 : LOG_LEVEL::DEBUG; - - UINT flags1 = D3DCOMPILE_OPTIMIZATION_LEVEL3; - hRet = D3DCompile( - hlsl.c_str(), - hlsl.length(), - pSourceName, // pSourceName - nullptr, // pDefines - D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions? - "main", // shader entry poiint - g_vs_model, // shader profile - flags1, // flags1 - 0, // flags2 - ppHostShader, // out - &pErrors // ppErrorMsgs out - ); - - // If the shader failed in the default vertex shader model, retry in vs_model_3_0 - // This allows shaders too large for 2_a to be compiled (Test Case: Shenmue 2) - if (FAILED(hRet)) { - if (pErrors) { - // Log HLSL compiler errors - EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); - pErrors->Release(); - pErrors = nullptr; - } - - EmuLog(LOG_LEVEL::WARNING, "Shader compile failed. Retrying with shader model 3.0"); - hRet = D3DCompile( - hlsl.c_str(), - hlsl.length(), - pSourceName, // pSourceName - nullptr, // pDefines - D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions? - "main", // shader entry poiint - vs_model_3_0, // shader profile - flags1, // flags1 - 0, // flags2 - ppHostShader, // out - &pErrors // ppErrorMsgs out - ); - } - - // If the shader failed again, retry in compatibility mode - if (FAILED(hRet)) { - EmuLog(LOG_LEVEL::WARNING, "Shader compile failed. Recompiling in compatibility mode"); - // Attempt to retry in compatibility mode, this allows some vertex-state shaders to compile - // Test Case: Spy vs Spy - flags1 |= D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY | D3DCOMPILE_AVOID_FLOW_CONTROL; - hRet = D3DCompile( - hlsl.c_str(), - hlsl.length(), - pSourceName, // pSourceName - nullptr, // pDefines - D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions? - "main", // shader entry poiint - g_vs_model, // shader profile - flags1, // flags1 - 0, // flags2 - ppHostShader, // out - &pErrorsCompatibility // ppErrorMsgs out - ); - - if (FAILED(hRet)) { - LOG_TEST_CASE("Couldn't assemble vertex shader"); - } - } - - // Determine the log level - if (pErrors) { - // Log errors from the initial compilation - EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); - pErrors->Release(); - pErrors = nullptr; - } - - // Failure to recompile in compatibility mode ignored for now - if (pErrorsCompatibility != nullptr) { - pErrorsCompatibility->Release(); - pErrorsCompatibility = nullptr; - } - - LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) { - if (g_bPrintfOn) { - if (!FAILED(hRet)) { - // Log disassembly - hRet = D3DDisassemble( - (*ppHostShader)->GetBufferPointer(), - (*ppHostShader)->GetBufferSize(), - D3D_DISASM_ENABLE_DEFAULT_VALUE_PRINTS | D3D_DISASM_ENABLE_INSTRUCTION_NUMBERING, - NULL, - &pErrors - ); - if (pErrors) { - EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); - pErrors->Release(); - } - } - } - } - - return hRet; -} - // recompile xbox vertex shader function -extern HRESULT EmuCompileShader +extern HRESULT EmuCompileVertexShader ( IntermediateVertexShader* pIntermediateShader, ID3DBlob** ppHostShader @@ -326,11 +202,16 @@ extern HRESULT EmuCompileShader hlsl_stream << hlsl_template[1]; // Finish with the HLSL template footer std::string hlsl_str = hlsl_stream.str(); - EmuLog(LOG_LEVEL::DEBUG, "--- HLSL conversion ---"); - EmuLog(LOG_LEVEL::DEBUG, DebugPrependLineNumbers(hlsl_str).c_str()); - EmuLog(LOG_LEVEL::DEBUG, "-----------------------"); - - return CompileHlsl(hlsl_str, ppHostShader, "CxbxVertexShaderTemplate.hlsl"); + HRESULT hRet = EmuCompileShader(hlsl_str, g_vs_model, ppHostShader, "CxbxVertexShaderTemplate.hlsl"); + + if (FAILED(hRet) && (g_vs_model != vs_model_3_0)) { + // If the shader failed in the default vertex shader model, retry in vs_model_3_0 + // This allows shaders too large for 2_a to be compiled (Test Case: Shenmue 2) + EmuLog(LOG_LEVEL::WARNING, "Shader compile failed. Retrying with shader model 3.0"); + hRet = EmuCompileShader(hlsl_str, vs_model_3_0, ppHostShader, "CxbxVertexShaderTemplate.hlsl"); + } + + return hRet; } extern void EmuCompileFixedFunction(ID3DBlob** ppHostShader) @@ -352,7 +233,7 @@ extern void EmuCompileFixedFunction(ID3DBlob** ppHostShader) hlsl << hlslStream.rdbuf(); // Compile the shader - CompileHlsl(hlsl.str(), &pShader, sourceFile.c_str()); + EmuCompileShader(hlsl.str(), g_vs_model, &pShader, sourceFile.c_str()); } *ppHostShader = pShader; @@ -473,7 +354,7 @@ VS_OUTPUT main(const VS_INPUT xIn) } )"; - CompileHlsl(hlsl, &pPassthroughShader, "passthrough.hlsl"); + EmuCompileShader(hlsl, g_vs_model, &pPassthroughShader, "passthrough.hlsl"); } *ppHostShader = pPassthroughShader; diff --git a/src/core/hle/D3D8/Direct3D9/VertexShader.h b/src/core/hle/D3D8/Direct3D9/VertexShader.h index 29d8cc57c..c2ef913b2 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShader.h +++ b/src/core/hle/D3D8/Direct3D9/VertexShader.h @@ -1,6 +1,4 @@ - -#ifndef DIRECT3D9VERTEXSHADER_H -#define DIRECT3D9VERTEXSHADER_H +#pragma once #include "core\hle\D3D8\XbVertexShader.h" #include "FixedFunctionVertexShaderState.hlsli" @@ -15,7 +13,7 @@ static const char* vs_model_2_a = "vs_2_a"; static const char* vs_model_3_0 = "vs_3_0"; extern const char* g_vs_model; -extern HRESULT EmuCompileShader +extern HRESULT EmuCompileVertexShader ( IntermediateVertexShader* pIntermediateShader, ID3DBlob** ppHostShader @@ -25,4 +23,3 @@ extern void EmuCompileFixedFunction(ID3DBlob** ppHostShader); extern HRESULT EmuCompileXboxPassthrough(ID3DBlob** ppHostShader); -#endif diff --git a/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp b/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp index a00fa252f..1748651ea 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp +++ b/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp @@ -13,7 +13,7 @@ VertexShaderSource g_VertexShaderSource = VertexShaderSource(); ID3DBlob* AsyncCreateVertexShader(IntermediateVertexShader intermediateShader, ShaderKey key) { ID3DBlob* pCompiledShader; - auto hRet = EmuCompileShader( + auto hRet = EmuCompileVertexShader( &intermediateShader, &pCompiledShader ); diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index fd47d2cbf..baa682fb1 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -38,6 +38,7 @@ #include "core\hle\D3D8\XbVertexBuffer.h" // For CxbxImpl_SetVertexData4f #include "core\hle\D3D8\XbVertexShader.h" #include "core\hle\D3D8\XbD3D8Logging.h" // For DEBUG_D3DRESULT +#include "devices\xbox.h" #include "core\hle\D3D8\XbConvert.h" // For NV2A_VP_UPLOAD_INST, NV2A_VP_UPLOAD_CONST_ID, NV2A_VP_UPLOAD_CONST #include "devices\video\nv2a.h" // For D3DPUSH_DECODE #include "common\Logging.h" // For LOG_INIT @@ -98,7 +99,7 @@ void CxbxVertexShaderSetFlags() // Note : Temporary, until we reliably locate the Xbox internal state for this // See D3DXDeclaratorFromFVF docs https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dxdeclaratorfromfvf // and https://github.com/reactos/wine/blob/2e8dfbb1ad71f24c41e8485a39df01bb9304127f/dlls/d3dx9_36/mesh.c#L2041 -static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf) +static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf) // TODO : Rename CxbxFVFToXboxVertexAttributeFormat? { using namespace xbox; diff --git a/src/core/hle/D3D8/XbVertexShader.h b/src/core/hle/D3D8/XbVertexShader.h index 5b82c28a3..aed15821c 100644 --- a/src/core/hle/D3D8/XbVertexShader.h +++ b/src/core/hle/D3D8/XbVertexShader.h @@ -209,6 +209,10 @@ extern void EmuParseVshFunction extern size_t GetVshFunctionSize(const xbox::dword_xt* pXboxFunction); inline boolean VshHandleIsVertexShader(DWORD Handle) { return (Handle & X_D3DFVF_RESERVED0) ? TRUE : FALSE; } +inline boolean VshHandleIsFVF(DWORD Handle) { return !VshHandleIsVertexShader(Handle); } +inline boolean VshHandleIsPassthrough(DWORD Handle) { + return VshHandleIsFVF(Handle) && ((Handle & X_D3DFVF_POSITION_MASK) == X_D3DFVF_XYZRHW); +} inline xbox::X_D3DVertexShader *VshHandleToXboxVertexShader(DWORD Handle) { return (xbox::X_D3DVertexShader *)(Handle & ~X_D3DFVF_RESERVED0);} // Get the number of components represented by the given xbox vertex data type @@ -228,5 +232,4 @@ extern void CxbxImpl_SetVertexShaderInput(DWORD Handle, UINT StreamCount, xbox:: extern void CxbxImpl_SetVertexShaderConstant(INT Register, PVOID pConstantData, DWORD ConstantCount); extern void CxbxImpl_DeleteVertexShader(DWORD Handle); extern void CxbxVertexShaderSetFlags(); -extern HRESULT SetVertexShader(IDirect3DVertexShader* pShader); #endif From 609a4d327680cf7b85c1b93b55865663dc907529 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Tue, 17 Nov 2020 13:23:19 +0100 Subject: [PATCH 02/47] [WIP] Xbox register combiner to HLSL pixel shader --- CMakeLists.txt | 2 + .../Direct3D9/CxbxPixelShaderTemplate.hlsl | 342 + .../Direct3D9/CxbxVertexShaderTemplate.hlsl | 4 +- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 6 +- src/core/hle/D3D8/Direct3D9/PixelShader.cpp | 387 + src/core/hle/D3D8/Direct3D9/PixelShader.h | 14 + src/core/hle/D3D8/XbPixelShader.cpp | 8514 ++--------------- src/core/hle/D3D8/XbPixelShader.h | 478 +- src/core/hle/D3D8/XbVertexShader.cpp | 39 +- 9 files changed, 1819 insertions(+), 7967 deletions(-) create mode 100644 src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl create mode 100644 src/core/hle/D3D8/Direct3D9/PixelShader.cpp create mode 100644 src/core/hle/D3D8/Direct3D9/PixelShader.h diff --git a/CMakeLists.txt b/CMakeLists.txt index bc97c8157..010f89ce1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -135,6 +135,7 @@ file (GLOB CXBXR_HEADER_EMU "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/PixelShader.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Shader.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShader.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/VertexShaderSource.h" @@ -295,6 +296,7 @@ file (GLOB CXBXR_SOURCE_EMU "${CXBXR_ROOT_DIR}/src/core/common/imgui/video.cpp" "${CXBXR_ROOT_DIR}/src/core/common/video/RenderBase.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/PixelShader.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/RenderStates.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Shader.cpp" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/TextureStates.cpp" diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl new file mode 100644 index 000000000..2baf6abf2 --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -0,0 +1,342 @@ +// This starts the raw string (comment to get syntax highlighting, UNCOMMENT to compile) : +R"DELIMITER( + +struct PS_INPUT // Declared identical to vertex shader output (see VS_OUTPUT) +{ + float2 iPos : VPOS; // Screen space x,y pixel location + float4 iD0 : COLOR0; // Front-facing primary (diffuse) vertex color (clamped to 0..1) + float4 iD1 : COLOR1; // Front-facing secondary (specular) vertex color (clamped to 0..1) + float iFog : FOG; + float iPts : PSIZE; + float4 iB0 : TEXCOORD4; // Back-facing primary (diffuse) vertex color (clamped to 0..1) + float4 iB1 : TEXCOORD5; // Back-facing secondary (specular) vertex color (clamped to 0..1) + float4 iT0 : TEXCOORD0; // Texture Coord 0 + float4 iT1 : TEXCOORD1; // Texture Coord 1 + float4 iT2 : TEXCOORD2; // Texture Coord 2 + float4 iT3 : TEXCOORD3; // Texture Coord 3 + float iFF : VFACE; // Front facing if > 0 +}; + +struct PS_OUTPUT +{ + float4 oR0 : COLOR; +}; + +// Source register modifier macro's, based on enum PS_INPUTMAPPING : +// TODO : Should all these 'max(0, x)' actually be 'saturate(x)'? This, because the operation may actually clamp the register value to the range [0..1] +#define s_sat(x) saturate(x) // PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // OK for final combiner // Clamps negative x to 0 // Was : max(0, x) +#define s_comp(x) 1 - saturate(x) // PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // OK for final combiner // Complements x (1-x) // Was : 1- min(max(0, x), 1) +#define s_bx2(x) ( 2 * max(0, x)) - 1 // PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // invalid for final combiner // Shifts range from [0..1] to [-1..1] +#define s_negbx2(x) (-2 * max(0, x)) + 1 // PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // invalid for final combiner // Shifts range from [0..1] to [-1..1] and then negates +#define s_bias(x) max(0, x) - 0.5 // PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, // invalid for final combiner // Clamps negative x to 0 and then subtracts 0.5 +#define s_negbias(x) -max(0, x) + 0.5 // PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // invalid for final combiner // Clamps negative x to 0, subtracts 0.5, and then negates +#define s_ident(x) x // PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // invalid for final combiner // No modifier, x is passed without alteration +#define s_neg(x) -x // PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // invalid for final combiner // Negate + +// Destination register modifier macro's, based on enum PS_COMBINEROUTPUT : +#define d_ident(x) x // PS_COMBINEROUTPUT_IDENTITY= 0x00L, // +#define d_bias(x) (x - 0.5) // PS_COMBINEROUTPUT_BIAS= 0x08L, // Subtracts 0.5 from outputs +#define d_x2(x) ( x * 2) // PS_COMBINEROUTPUT_SHIFTLEFT_1= 0x10L, // Scales outputs by 2 +#define d_bx2(x) ((x - 0.5) * 2) // PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS= 0x18L, // Subtracts 0.5 from outputs and scales by 2 +#define d_x4(x) ( x * 4) // PS_COMBINEROUTPUT_SHIFTLEFT_2= 0x20L, // Scales outputs by 4 +#define d_bx4(x) ((x - 0.5) * 4) // PS_COMBINEROUTPUT_SHIFTLEFT_2_BIAS= 0x28L, // Subtracts 0.5 from outputs and scales by 4 +#define d_d2(x) ( x / 2) // PS_COMBINEROUTPUT_SHIFTRIGHT_1= 0x30L, // Divides outputs by 2 +#define d_bd2(x) ((x - 0.5) / 2) // PS_COMBINEROUTPUT_SHIFTRIGHT_1_BIAS= 0x38L, // Subtracts 0.5 from outputs and divides by 2 + +// Constant registers +uniform const float4 c0_[8] : register(c0); +uniform const float4 c1_[8] : register(c8); +uniform const float4 c_fog : register(c16); // Note : Maps to PSH_XBOX_CONSTANT_FOG, assigned to fog.rgb + +// Constant registers used only in final combiner stage (xfc 'opcode') : +uniform const float4 FC0 : register(c17); // Note : Maps to PSH_XBOX_CONSTANT_FC0, must be generated as argument to xfc instead of C0 +uniform const float4 FC1 : register(c18); // Note : Maps to PSH_XBOX_CONSTANT_FC1, must be generated as argument to xfc instead of C1 +uniform const float4 BEM[4] : register(c19); // Note : PSH_XBOX_CONSTANT_BEM for 4 texture stages +uniform const float4 LUM[4] : register(c23); // Note : PSH_XBOX_CONSTANT_LUM for 4 texture stages + + +#define CM_LT(c) if(c < 0) clip(-1); // = PS_COMPAREMODE_[RSTQ]_LT +#define CM_GE(c) if(c >= 0) clip(-1); // = PS_COMPAREMODE_[RSTQ]_GE + +#if 0 + // Compiler-defines/symbols which must be defined when their bit/value is set in the corresponding register : + + // Bits from PSCombinerCount (a.k.a. PSCombinerCountFlags) : + #define PS_COMBINERCOUNT 2 + #define PS_COMBINERCOUNT_UNIQUE_C0 + #define PS_COMBINERCOUNT_UNIQUE_C1 + #define PS_COMBINERCOUNT_MUX_MSB + + // Generate defines like this, based on actual values : + #define PS_COMPAREMODE_0(in) CM_LT(in.x) CM_LT(in.y) CM_LT(in.z) CM_LT(in.w) + #define PS_COMPAREMODE_1(in) CM_LT(in.x) CM_LT(in.y) CM_LT(in.z) CM_LT(in.w) + #define PS_COMPAREMODE_2(in) CM_LT(in.x) CM_LT(in.y) CM_LT(in.z) CM_LT(in.w) + #define PS_COMPAREMODE_3(in) CM_LT(in.x) CM_LT(in.y) CM_LT(in.z) CM_LT(in.w) + + // Input texture register mappings for stage 1, 2 and 3 (stage 0 has no input-texture) + static const int PS_INPUTTEXTURE_[4] = { -1, 0, 0, 0 }; + + // Dot mappings for stage 1, 2 and 3 (stage 0 performs no dot product) + #define PS_DOTMAPPING_1 PS_DOTMAPPING_MINUS1_TO_1_D3D + #define PS_DOTMAPPING_2 PS_DOTMAPPING_MINUS1_TO_1_D3D + #define PS_DOTMAPPING_3 PS_DOTMAPPING_MINUS1_TO_1_D3D + + // Bits from FinalCombinerFlags (the 4th byte in PSFinalCombinerInputsEFG) : + #define PS_FINALCOMBINERSETTING_COMPLEMENT_V1 + #define PS_FINALCOMBINERSETTING_COMPLEMENT_R0 + #define PS_FINALCOMBINERSETTING_CLAMP_SUM +#endif + +)DELIMITER", /* This terminates the 1st raw string within the 16380 single-byte characters limit. // */ +// See https://docs.microsoft.com/en-us/cpp/error-messages/compiler-errors-1/compiler-error-c2026?f1url=%3FappId%3DDev15IDEF1%26l%3DEN-US%26k%3Dk(C2026)%26rd%3Dtrue&view=vs-2019 +// Second raw string : +R"DELIMITER( + +// Define defaults when their inverses are not defined (handy while compiler isn't yet providing these) : +#ifndef PS_COMBINERCOUNT_SAME_C0 + #define PS_COMBINERCOUNT_UNIQUE_C0 +#endif +#ifndef PS_COMBINERCOUNT_SAME_C1 + #define PS_COMBINERCOUNT_UNIQUE_C1 +#endif +#ifndef PS_COMBINERCOUNT_MUX_LSB + #define PS_COMBINERCOUNT_MUX_MSB +#endif + +// PS_COMBINERCOUNT_UNIQUE_C0 steers whether for C0 to use stage-specific constants c0_0 .. c0_7, or c0_0 for all stages +#ifdef PS_COMBINERCOUNT_UNIQUE_C0 + #define C0 c0_[stage] // concatenate stage to form c0_0 .. c0_7 +#else // PS_COMBINERCOUNT_SAME_C0 + #define C0 c0_[0] // always resolve to c0_0 +#endif + +// PS_COMBINERCOUNT_UNIQUE_C1 steers whether for C1 to use stage-specific constants c1_0 .. c1_7, or c1_0 for all stages +#ifdef PS_COMBINERCOUNT_UNIQUE_C1 + #define C1 c1_[stage] // concatenate stage to form c1_0 .. c1_7 +#else // PS_COMBINERCOUNT_SAME_C1 + #define C1 c1_[0] // always resolve to c1_0 +#endif + +// PS_COMBINERCOUNT_MUX_MSB steers the 'muxing' operation in the XMMC opcode, +// checking either the Most Significant Bit (MSB) or Least (LSB) of the r0 register. +// (In practice, LSB is seldom encountered, we have zero known test-cases.) +#ifdef PS_COMBINERCOUNT_MUX_MSB + #define FCS_MUX (r0.a >= 0.5) // Check r0.a MSB; Having range upto 1 this should be equal to : (((r0.a * 255) /*mod 256*/) >= 128) +#else // PS_COMBINERCOUNT_MUX_LSB + #define FCS_MUX (((r0.a * 255) mod 2) >= 1) // Check r0.b LSB; Get LSB by converting 1 into 255 (highest 8-bit value) and using modulo 2. TODO : Verify correctness +#endif + +// PS_FINALCOMBINERSETTING_COMPLEMENT_V1, when defined, applies a modifier to the v1 input when calculating the sum register +#ifdef PS_FINALCOMBINERSETTING_COMPLEMENT_V1 + #define FCS_V1 s_comp // making it use 1-complement, +#else + #define FCS_V1 s_ident // otherwise identity mapping. +#endif + +// PS_FINALCOMBINERSETTING_COMPLEMENT_R0, when defined, applies a modifier to the r0 input when calculating the sum register +#ifdef PS_FINALCOMBINERSETTING_COMPLEMENT_R0 + #define FCS_R0 s_comp // making it use 1-complement, +#else + #define FCS_R0 s_ident // otherwise identity mapping. +#endif + +// PS_FINALCOMBINERSETTING_CLAMP_SUM, when defined, applies a modifier to the sum register +#ifdef PS_FINALCOMBINERSETTING_CLAMP_SUM + #define FCS_SUM s_sat // making it clamp negative to zero, +#else + #define FCS_SUM s_ident // otherwise identity mapping. TODO : Confirm correctness +#endif + +// Xbox supports only one 'pixel shader' opcode, but bit flags tunes it's function; +// Here, effective all 5 Xbox opcodes, extended with a variable macro {xop_m(m,...)} for destination modifier : +// Note : Since both d0 AND d1 could be the same output register, calculation of d2 can re-use only one (d0 or d1) +#define xmma(d0, d1, d2, s0, s1, s2, s3, m, tmp) tmp = d0 = m(s0 * s1); d1 = m(s2 * s3); d2 = d1 + tmp // PS_COMBINEROUTPUT_AB_CD_SUM= 0x00L, // 3rd output is AB+CD +#define xmmc(d0, d1, d2, s0, s1, s2, s3, m, tmp) tmp = d0 = m(s0 * s1); d1 = m(s2 * s3); d2 = FCS_MUX ? d1 : tmp // PS_COMBINEROUTPUT_AB_CD_MUX= 0x04L, // 3rd output is MUX(AB,CD) based on R0.a + +#define xdm(d0, d1, s0, s1, s2, s3, m) d0 = m(dot(s0 , s1)); d1 = m( s2 * s3 ) // PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only // PS_COMBINEROUTPUT_CD_MULTIPLY= 0x00L, +#define xdd(d0, d1, s0, s1, s2, s3, m) d0 = m(dot(s0 , s1)); d1 = m(dot(s2 , s3)) // PS_COMBINEROUTPUT_CD_DOT_PRODUCT= 0x01L, // RGB only // PS_COMBINEROUTPUT_AB_MULTIPLY= 0x00L, +#define xmd(d0, d1, s0, s1, s2, s3, m) d0 = m( s0 * s1 ); d1 = m(dot(s2 , s3)) // PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only // PS_COMBINEROUTPUT_CD_MULTIPLY= 0x01L, + +// After the register combiner stages, there's one (optional) final combiner step, consisting of 4 parts; +// All the 7 final combiner inputs operate on rgb only and clamp negative input to zero: +#define fcin(r) saturate(r) +// Special purpose registers prod and sum operate on rgb only, and have alpha set to zero +#define xfc_sum sum = FCS_SUM(float4(FCS_V1(fcin(v1.rgb)) + FCS_R0(fcin(r0.rgb)), 0)) // Note : perform sum first, so prod can use its result +#define xfc_prod(e, f) prod = float4(fcin(e) * fcin(f), 0) // Note : prod can't have modifiers +// Color and Alpha calculations are performed, potentially using sum and/or prod and/or fog registers +#define xfc_rgb(a, b, c, d) r0.rgb = lerp(fcin(c), fcin(b), fcin(a)) + fcin(d) // Note : perform rgb and alpha last, so prod and sum can be used as inputs +#define xfc_alpha(g) r0.a = fcin(g) + +// Glue them all together, so we can generate a one-liner closing off the stages : +#define xfc(a, b, c, d, e, f, g) xfc_sum; xfc_prod(e, f); xfc_rgb(a, b, c, d); xfc_alpha(g) +// Note : If xfc is not generated (when PSFinalCombinerInputsABCD and PSFinalCombinerEFG are both 0), r0.rgba is still returned as pixel shader output + +// GLSL : https://www.khronos.org/registry/OpenGL-Refpages/gl4/html/mix.xhtml +// mix(x, y, a ) x*(1-a ) + y*a +// +// HLSL : https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx-graphics-hlsl-lerp +// lerp(x, y, s ) x*(1-s ) + y*s == x + s(y-x) +// lerp(s2, s1, s0) s2*(1-s0) + s1*s0 +)DELIMITER", /* This terminates the 1st raw string within the 16380 single-byte characters limit. // */ +// See https://docs.microsoft.com/en-us/cpp/error-messages/compiler-errors-1/compiler-error-c2026?f1url=%3FappId%3DDev15IDEF1%26l%3DEN-US%26k%3Dk(C2026)%26rd%3Dtrue&view=vs-2019 +// Second raw string : +R"DELIMITER( + +float m21d(const float input) +{ + int tmp = (int)(input * 255); // Convert float 0..1 into byte 0..255 + tmp -= 128; // 0 lowers to -128, 128 lowers to 0, 255 lowers to 127 + return (float)tmp / 127; // -128 scales to -1.007874016, 0 scales to 0.0, 127 scales to 1.0 +} + +float m21g(const float input) +{ + int tmp = (int)(input * 255); // Convert float 0..1 into byte 0..255 + if (tmp >= 128) { + tmp -= 256; // 128 lowers to -128, 255 lowers to -1 + } // 0 stays 0, 127 stays 127 + + return ((float)tmp + 0.5) / 127.5; +} + +float m21(const float input) +{ + int tmp = (int)(input * 255); // Convert float 0..1 into byte 0..255 + if (tmp >= 128) { + tmp -= 256; // 128 lowers to -128, 255 lowers to -1 + } // 0 stays 0, 127 stays 127 + + return (float)tmp / 127; // -128 scales to -1.007874016, 0 scales to 0.0, 127 scales to 1.0 +} + +// Note : each component seems already in range [0..1], but two must be combined into one +#define TwoIntoOne(a,b) (((a * 255) * 256) + (b * 255)) / 255 // TODO : Verify whether this works at all ! +#define CalcHiLo(in) H = TwoIntoOne(in.x, in.y); L = TwoIntoOne(in.z, in.w) // TODO : Verify whether this works at all ! + +// Dot mappings over the output value of a (4 component 8 bit unsigned) texture stage register into a (3 component float) vector value, for use in a dot product calculation: +#define PS_DOTMAPPING_ZERO_TO_ONE(in) dm = in.rgb // :r8g8b8a8->(r,g,b): 0x00=>0, 0xff=>1 thus : output = (input / 0xff ) +#define PS_DOTMAPPING_MINUS1_TO_1_D3D(in) dm = float3(m21d(in.x), m21d(in.y), m21d(in.z)) // :r8g8b8a8->(r,g,b): 0x00=>-128/127, 0x01=>-1, 0x80=>0, 0xff=>1 thus : output = ((input - 0x100 ) / 0x7f ) +#define PS_DOTMAPPING_MINUS1_TO_1_GL(in) dm = float3(m21g(in.x), m21g(in.y), m21g(in.z)) // :r8g8b8a8->(r,g,b): 0x80=>-1, 0x00=>0, 0x7f=>1 thus : output = (input < 0x80 ) ? (input / 0x7f ) : ((input - 0x100 ) / 0x80 ) (see https://en.wikipedia.org/wiki/Two's_complement) +#define PS_DOTMAPPING_MINUS1_TO_1(in) dm = float3(m21(in.x), m21(in.y), m21(in.z)) // :r8g8b8a8->(r,g,b): 0x80=>-128/127, ?0x81=>-1, 0x00=>0, 0x7f=>1 thus : output = (input < 0x80 ) ? (input / 0x7f ) : ((input - 0x100 ) / 0x7f ) (see https://en.wikipedia.org/wiki/Two's_complement) + +#define PS_DOTMAPPING_HILO_1(in) CalcHiLo(in); dm = float3(H, L, 1) // :H16L16 ->(H,L,1): 0x0000=>0, 0xffff=>1 thus : output = (input / 0xffff) +#define PS_DOTMAPPING_HILO_HEMISPHERE_D3D(in) CalcHiLo(in); dm = float3(H, L, sqrt(1-(H*H)-(L*L))) // :H16L16 ->(H,L,sqrt(1-H^2-L^2)):? 0x8000=>-1, 0x0000=>0, 0x7fff=32767/32768 thus : output = ((input - 0x10000) / 0x7fff) +#define PS_DOTMAPPING_HILO_HEMISPHERE_GL(in) CalcHiLo(in); dm = float3(H, L, sqrt(1-(H*H)-(L*L))) // :H16L16 ->(H,L,sqrt(1-H^2-L^2)):? 0x8000=>-1, 0x0000=>0, 0x7fff=>1 thus : output = (input < 0x8000) ? (input / 0x7fff) : ((input - 0x10000) / 0x8000) +#define PS_DOTMAPPING_HILO_HEMISPHERE(in) CalcHiLo(in); dm = float3(H, L, sqrt(1-(H*H)-(L*L))) // :H16L16 ->(H,L,sqrt(1-H^2-L^2)): 0x8000=>-32768/32767, 0x8001=>-1, 0x0000=>0, 0x7fff=>1 thus : output = (input < 0x8000) ? (input / 0x7fff) : ((input - 0x10000) / 0x7fff) + +// Declare one sampler per each {Sampler Type, Texture Stage} combination +// TODO : Generate sampler status? +sampler2D _sampler2D_0; +sampler2D _sampler2D_1; +sampler2D _sampler2D_2; +sampler2D _sampler2D_3; + +sampler3D _sampler3D_0; +sampler3D _sampler3D_1; +sampler3D _sampler3D_2; +sampler3D _sampler3D_3; + +samplerCUBE _sampler6F_0; +samplerCUBE _sampler6F_1; +samplerCUBE _sampler6F_2; +samplerCUBE _sampler6F_3; + +// Actual texture sampling per stage (always uses the s sampling vector variable as input) +// abstracting away the specifics of accessing above sampler declarations (usefull for future Direct3D 10+ sampler arrays) +#define Sample2D(st) tex2D(_sampler2D_ ## st, s.xy) // Ignores s.z (and whatever it's set to, will be optimized away by the compiler, see [1] below) +#define Sample3D(st) tex3D(_sampler3D_ ## st, s.xyz) +#define Sample6F(st) texCUBE(_sampler6F_ ## st, s.xyz) + +// Map texture registers to their array elements. Having texture registers in an array allows indexed access to them +#define t0 t[0] +#define t1 t[1] +#define t2 t[2] +#define t3 t[3] + +// Resolve a stage number via 'input texture (index) mapping' to it's corresponding output texture register (rgba?) +#define src(st) t[PS_INPUTTEXTURE_[st]] + +// Calculate the dot result for a given stage. Since any given stage is input-mapped to always be less than or equal the stage it appears in, this won't cause read-ahead issues +#define CalcDot(st) PS_DOTMAPPING_ ## st(src(st)); dot_[st] = dot(iT[st].xyz, dm) + +// Addressing operations +#define Passthru(st) float4(saturate(iT[st].xyz), 1) // Clamps input texture coordinates to the range [0..1] +#define Brdf(st) float3(t[st-2].y, t[s1-1].y, t[st-2].x - t[st-1].x) // TODO : Complete 16 bit phi/sigma retrieval from float4 texture register. Perhaps use CalcHiLo? +#define Normal2(st) float3(dot_[st-1], dot_[st], 0) // Preceding and current stage dot result. Will be input for Sample2D. +#define Normal3(st) float3(dot_[st-2], dot_[st-1], dot_[st]) // Two preceding and current stage dot result. +#define Eye float3(iT[1].w, iT[2].w, iT[3].w) // 4th (q) component of input texture coordinates 1, 2 and 3. Only used by texm3x3vspec/PS_TEXTUREMODES_DOT_RFLCT_SPEC, always at stage 3. TODO : Map iT[1/2/3] through PS_INPUTTEXTURE_[]? +#define Reflect(n, e) (2 * n * dot(n, e)) / dot(n, n) // TODO : Prevent division by zero when n == 0? +#define BumpEnv(st) float3(iT[st].x + (BEM[st].x * src(st).r) + (BEM[st].y * src(st).g), iT[st].y + (BEM[st].z * src(st).r) + (BEM[st].w * src(st).g), 0) // Will be input for Sample2D. TODO : Compact into a regular 2x2 maxtrix multiplication. +#define LSO(st) (LUM[st].x * src(st).b) + LUM[st].y // Uses PSH_XBOX_CONSTANT_LUM .x = D3DTSS_BUMPENVLSCALE .y = D3DTSS_BUMPENVLOFFSET + +// Implementations for all possible texture modes, with stage as argument (prefixed with valid stages and corresponding pixel shader 1.3 assembly texture addressing instructions) +// For ease of understanding, all follow this plan : Optional specifics, or dot calculation (some with normal selection) and sampling vector determination. All end by deriving a value and assigning this to the stage's texture register. +/*0123 tex */ #define PS_TEXTUREMODES_NONE(st) v = black; t[st] = v // Seems to work +/*0123 tex */ #define PS_TEXTUREMODES_PROJECT2D(st) s = iT[st].xyz; v = Sample2D(st); t[st] = v // Seems to work (are x/w and y/w implicit?) [1] +/*0123 tex */ #define PS_TEXTUREMODES_PROJECT3D(st) s = iT[st].xyz; v = Sample3D(st); t[st] = v // Seems to work (is z/w implicit?) +/*0123 tex */ #define PS_TEXTUREMODES_CUBEMAP(st) s = iT[st].xyz; v = Sample6F(st); t[st] = v // TODO : Test +/*0123 texcoord */ #define PS_TEXTUREMODES_PASSTHRU(st) v = Passthru(st); t[st] = v // Seems to work +/*0123 texkill */ #define PS_TEXTUREMODES_CLIPPLANE(st) PS_COMPAREMODE_ ## st(iT[st]); v = black; t[st] = v // Seems to work (setting black to texture register, in case it gets read) +/*-123 texbem */ #define PS_TEXTUREMODES_BUMPENVMAP(st) s = BumpEnv(st); v = Sample2D(st); t[st] = v // Seems to work +/*-123 texbeml */ #define PS_TEXTUREMODES_BUMPENVMAP_LUM(st) PS_TEXTUREMODES_BUMPENVMAP(st); v.rgb *= LSO(st); t[st] = v // TODO : Test +/*--23 texbrdf */ #define PS_TEXTUREMODES_BRDF(st) s = Brdf(st); v = Sample3D(st); t[st] = v // TODO : Test (t[st-2] is 16 bit eyePhi,eyeSigma; t[st-1] is lightPhi,lightSigma) +/*--23 texm3x2tex */ #define PS_TEXTUREMODES_DOT_ST(st) CalcDot(st); n = Normal2(st); s = n; v = Sample2D(st); t[st] = v // TODO : Test +/*--23 texm3x2depth */ #define PS_TEXTUREMODES_DOT_ZW(st) CalcDot(st); n = Normal2(st); if (n.y==0) v=1;else v = n.x / n.y; t[st] = v // TODO : Make depth-check use result of division, but how? +/*--2- texm3x3diff */ #define PS_TEXTUREMODES_DOT_RFLCT_DIFF(st) CalcDot(st); n = Normal3(st); s = n; v = Sample6F(st); t[st] = v // TODO : Test +/*---3 texm3x3vspec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC(st) CalcDot(st); n = Normal3(st); s = Reflect(n, Eye); v = Sample6F(st); t[st] = v // TODO : Test +/*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_3D(st) CalcDot(st); n = Normal3(st); s = n; v = Sample3D(st); t[st] = v // TODO : Test +/*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_CUBE(st) CalcDot(st); n = Normal3(st); s = n; v = Sample6F(st); t[st] = v // TODO : Test +/*-123 texreg2ar */ #define PS_TEXTUREMODES_DPNDNT_AR(st) s = src(st).arg; v = Sample2D(st); t[st] = v // TODO : Test [1] +/*-123 texreg2bg */ #define PS_TEXTUREMODES_DPNDNT_GB(st) s = src(st).gba; v = Sample2D(st); t[st] = v // TODO : Test [1] +/*-12- texm3x2pad */ #define PS_TEXTUREMODES_DOTPRODUCT(st) CalcDot(st); v = float4(dm,0); t[st] = v // TODO : Test all dot mapping (setting texture register, in case it gets read - test-case : BumpDemo) +/*---3 texm3x3spec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST(st) CalcDot(st); n = Normal3(st); s = Reflect(n, c0); v = Sample6F(st); t[st] = v // TODO : Test +// [1] Note : 3rd component set to s.z is just an (ignored) placeholder to produce a float3 (made unique, to avoid the potential complexity of repeated components) + +PS_OUTPUT main(const PS_INPUT xIn) +{ + // Local constants + const float4 zero = 0; + const float4 half = 0.5; // = s_negbias(zero) + const float4 one = 1; // = s_comp(zero) + const float4 black = float4(0, 0, 0, 1); // opaque black + const float4 iT[4] = { xIn.iT0, xIn.iT1, xIn.iT2, xIn.iT3 }; // Map input texture coordinates to an array, for indexing purposes + + // Xbox register variables + float4 r0, r1; // Temporary registers + float4 t[4]; // Texture coordinate registers + float4 v0, v1; // Vertex color registers + float4 _discard; // Write-only discard 'register' (we assume the HLSL compilers' optimization pass will remove assignments to this) + float4 fog; // Read-only fog register, reading alpha is only allowed in final combiner + float4 sum, prod; // Special purpose registers for xfc (final combiner) operation + + // Helper variables + int stage; // Write-only variable, generated prefixing each 'opcode', for use in C0 and C1 macro's (and should thus get optimized away) + float4 tmp; + float H, L; // HILO (high/low) temps + float dot_[4]; + float3 dm; // Dot mapping temporary + float3 n; // Normal vector (based on preceding dot_[] values) + float3 s; // Actual texture coordinate sampling coordinates (temporary) + float4 v; // Texture value (temporary) + + // Initialize variables + r0 = r1 = black; // Note : r0.a/r1.a will be overwritten by t0.a/t1.a (opaque_black will be retained for PS_TEXTUREMODES_NONE) + // Note : VFACE/FrontFace has been unreliable, investigate again if some test-case shows bland colors + v0 = xIn.iFF > 0 ? xIn.iD0 : xIn.iB0; // Diffuse front/back + v1 = xIn.iFF > 0 ? xIn.iD1 : xIn.iB1; // Specular front/back + fog = float4(c_fog.rgb, xIn.iFog); // color from PSH_XBOX_CONSTANT_FOG, alpha from vertex shader output / pixel shader input + + // Xbox shader program +)DELIMITER", /* This terminates the 2nd raw string within the 16380 single-byte characters limit. // */ +// Third and last raw string, the footer : +R"DELIMITER( + + // Copy r0.rgba to output + PS_OUTPUT xOut; + + xOut.oR0 = r0; + + return xOut; +} + +// End of pixel shader footer)DELIMITER" /* This terminates the footer raw string" // */ diff --git a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl index 6163a39d5..9b0e326c1 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl @@ -7,13 +7,13 @@ struct VS_INPUT }; // Output registers -struct VS_OUTPUT +struct VS_OUTPUT // Declared identical to pixel shader input (see PS_INPUT) { float4 oPos : POSITION; // Homogeneous clip space position float4 oD0 : COLOR0; // Primary color (front-facing) float4 oD1 : COLOR1; // Secondary color (front-facing) float oFog : FOG; // Fog coordinate - float oPts : PSIZE; // Point size + float oPts : PSIZE; // Point size float4 oB0 : TEXCOORD4; // Back-facing primary color float4 oB1 : TEXCOORD5; // Back-facing secondary color float4 oT0 : TEXCOORD0; // Texture coordinate set 0 diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index adaa7eb70..16b19e6d6 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -7848,9 +7848,13 @@ xbox::void_xt CxbxImpl_SetPixelShader(xbox::dword_xt Handle) // Cache the active shader handle g_pXbox_PixelShader = (xbox::X_PixelShader*)Handle; - // Copy the Pixel Shader data to our RenderState handler + // Copy the Pixel Shader data to our RenderState handler (this includes values for pixel shader constants) // This mirrors the fact that unpatched SetPixelShader does the same thing! // This shouldn't be necessary anymore, but shaders still break if we don't do this + // This breakage might be caused by our push-buffer processing could be "trailing behind" what our patches do; + // By writing to render state during this patch, we avoid missing out on updates that push buffer commands would perform. + // However, any updates that occur mid-way can overwrite what we store here, and still cause problems! + // The only viable solution for that would be to draw entirely based on push-buffer handling (which might require removing possibly all D3D patches!) if (g_pXbox_PixelShader != nullptr) { // TODO : If D3DDevice_SetPixelShader() in XDKs don't overwrite the X_D3DRS_PS_RESERVED slot with PSDef.PSTextureModes, // store it here and restore after memcpy, or alternatively, perform two separate memcpy's (the halves before, and after the reserved slot). diff --git a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp new file mode 100644 index 000000000..71d8394e1 --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp @@ -0,0 +1,387 @@ +// This is an open source non-commercial project. Dear PVS-Studio, please check it. +// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com +// ****************************************************************** +// * +// * This file is part of the Cxbx project. +// * +// * Cxbx and Cxbe are free software; you can redistribute them +// * and/or modify them under the terms of the GNU General Public +// * License as published by the Free Software Foundation; either +// * version 2 of the license, or (at your option) any later version. +// * +// * This program is distributed in the hope that it will be useful, +// * but WITHOUT ANY WARRANTY; without even the implied warranty of +// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// * GNU General Public License for more details. +// * +// * You should have recieved a copy of the GNU General Public License +// * along with this program; see the file COPYING. +// * If not, write to the Free Software Foundation, Inc., +// * 59 Temple Place - Suite 330, Bostom, MA 02111-1307, USA. +// * +// * 2020 PatrickvL +// * +// * All rights reserved +// * +// ****************************************************************** + +#define LOG_PREFIX CXBXR_MODULE::VTXSH + +#include // std::stringstream +#include "Shader.h" // EmuCompileShader +#include "PixelShader.h" // EmuCompilePixelShader +//#include "core\kernel\init\CxbxKrnl.h" +//#include "core\kernel\support\Emu.h" + +extern const char* g_ps_model = ps_model_3_0; + +// HLSL pixel shader generation + +static const std::string register_str[16+2] = { + "_discard", // PS_REGISTER_DISCARD = 0x00L, // w + "C0", // PS_REGISTER_C0 = 0x01L, // r + "C1", // PS_REGISTER_C1 = 0x02L, // r + "fog", // PS_REGISTER_FOG = 0x03L, // r + "v0", // PS_REGISTER_V0 = 0x04L, // r/w + "v1", // PS_REGISTER_V1 = 0x05L, // r/w + "?r6?", + "?r7?", + "t0", // PS_REGISTER_T0 = 0x08L, // r/w + "t1", // PS_REGISTER_T1 = 0x09L, // r/w + "t2", // PS_REGISTER_T2 = 0x0aL, // r/w + "t3", // PS_REGISTER_T3 = 0x0bL, // r/w + "r0", // PS_REGISTER_R0 = 0x0cL, // r/w + "r1", // PS_REGISTER_R1 = 0x0dL, // r/w + "sum", // PS_REGISTER_V1R0_SUM = 0x0eL, // r + "prod", // PS_REGISTER_EF_PROD = 0x0fL, // r + + // Cxbx extension; Separate final combiner constant registers : + "FC0", // PS_REGISTER_FC0 = 0x10L, // r + "FC1", // PS_REGISTER_FC1 = 0x11L, // r +}; + +static const unsigned channel_index_Alpha = 0; +static const unsigned channel_index_RGB = 1; +static const unsigned channel_index_BlueToAlpha = 2; // Note : RGB pipeline (sometimes referred to as "portion") can (besides reading .rgb) expand blue to alpha as well + +void InputRegisterHLSL(std::stringstream& hlsl, RPSInputRegister &input, unsigned channel_index, bool isLast = false, int isFinalCombiner = 0) +{ + static const std::string pipeline_channel_str[3][2] = { + ".b", ".a", // [0][*] dest Alpha : [0] = PS_CHANNEL_BLUE, [1] = PS_CHANNEL_ALPHA >> 4 + ".rgb", ".aaa", // [1][*] dest RGB : [0] = PS_CHANNEL_RGB, [1] = PS_CHANNEL_ALPHA >> 4 + ".rgbb", ".aaaa", // [2][*] dest RGB+BlueToAlpha : [0] = PS_CHANNEL_RGB, [1] = PS_CHANNEL_ALPHA >> 4 (test-case : TechCertGame) TODO : Verify .aaaa is indeed unreachable (BlueToAlpha being forbidden for Alpha channel + }; + + static const std::string input_mapping_str[8][3] = { + // [*][0] = PS_REGISTER_ZERO-derived constants, based on enum PS_INPUTMAPPING : + // [*][1] = Source register modifier macro's, based on enum PS_INPUTMAPPING : + // [*][2] = Final combiner source register modifier macro's, based on enum PS_INPUTMAPPING : + "zero", "s_sat", "abs", // saturate(x) // PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, OK for final combiner // Clamps negative x to 0 + "one", "s_comp", "", // ( 1.0 - saturate(x) ) // PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, OK for final combiner // Complements x (1-x) + "-one", "s_bx2", "N/A", // ( 2.0 * max(0.0, x) - 1.0) // PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, invalid for final combiner // Shifts range from [0..1] to [-1..1] + "one", "s_negbx2", "N/A", // (-2.0 * max(0.0, x) + 1.0) // PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, invalid for final combiner // Shifts range from [0..1] to [-1..1] and then negates + "-half", "s_bias", "N/A", // (max(0.0, x) - 0.5) // PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, invalid for final combiner // Clamps negative x to 0 and then subtracts 0.5 + "half", "s_negbias", "N/A", // (-max(0.0, x) + 0.5) // PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, invalid for final combiner // Clamps negative x to 0, subtracts 0.5, and then negates + "zero", "s_ident", "N/A", // x // PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, invalid for final combiner // No modifier, x is passed without alteration + "zero", "s_neg", "N/A" // -x // PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, invalid for final combiner // Negate + }; + + // Generate channel selector + std::string channel_str = pipeline_channel_str[channel_index][input.Channel >> 4]; + + unsigned input_mapping_index = (input.InputMapping >> 5) & 0x07; // Converts PS_INPUTMAPPING to an index into input_mapping_str + if (input.Reg == PS_REGISTER_ZERO) { // = PS_REGISTER_DISCARD + // Generate a constant per input mapping (instead of applying that on register_str[PS_REGISTER_DISCARD]) + hlsl << input_mapping_str[input_mapping_index][0] << channel_str; + } + else { + // Or an actual register (with an input mapping function applied) + switch (input_mapping_index) { + case PS_INPUTMAPPING_SIGNED_IDENTITY >> 5: + // Note : signed identity doesn't alter the argument, so avoid cluttering the output by leaving it out + hlsl << register_str[input.Reg] << channel_str; + break; + case PS_INPUTMAPPING_SIGNED_NEGATE >> 5: + // Note : signed negate can be written in short-hand using a minus sign + hlsl << '-' << register_str[input.Reg] << channel_str; + break; + default: + hlsl << input_mapping_str[input_mapping_index][1 + isFinalCombiner] << '(' << register_str[input.Reg] << channel_str << ')'; + break; + } + } + + if (!isLast) + hlsl << ','; +} + +static const std::string opcode_comment[6][2] = { + "xdd", "d0=s0 dot s1, d1=s2 dot s3", // dot/dot/discard > calculating AB=A.B and CD=C.D + "xdm", "d0=s0 dot s1, d1=s2*s3", // dot/mul/discard > calculating AB=A.B and CD=C*D + "xmd", "d0=s0*s1, d1=s2 dot s3", // mul/dot/discard > calculating AB=A*B and CD=C.D + "xmma", "d0=s0*s1, d1=s2*s3, d2={s2*s3}+{s0*s1}", // mul/mul/sum > calculating AB=A*B and CD=C*D and Sum=CD+AB + "xmmc", "d0=s0*s1, d1=s2*s3, d2={r0.a>0.5}?{s2*s3}:{s0*s1}", // mul/mul/mux > calculating AB=A*B and CD=C*D and Mux=CD?AB + "xfc", "sum=r0+v1, prod=s4*s5, r0.rgb=s0*s1+{1-s0}*s2+s3, r0.a=s6.b" +}; + +void CombinerStageHlsl(std::stringstream& hlsl, RPSCombinerStageChannel& stage, unsigned channel_index) +{ + // Determine which opcode we're dealing with (xdd, xdm, xmma or xmma) + // Note : DotProduct can only be set for RGB (see RPSCombinerStageChannel::Decode) + unsigned opcode; + if (stage.OutputAB.DotProduct) { + if (stage.OutputCD.DotProduct) + opcode = 0; // xdd + else + opcode = 1; // xdm + } else { + if (stage.OutputCD.DotProduct) + opcode = 2; // xmd TODO : Verify + else + if (!stage.AB_CD_MUX) + opcode = 3; // xmma + else + opcode = 4; // xmmc + } + + // Early exit when all outputs are discarded + if ((stage.OutputAB.Reg == PS_REGISTER_DISCARD) && (stage.OutputCD.Reg == PS_REGISTER_DISCARD)) { + // xdd, xdm and xmd have just 2 outputs, but xmma and xmmc must also check their 3rd output + if ((opcode <= 2) || (stage.OutputMUX_SUM.Reg == PS_REGISTER_DISCARD)) { + hlsl << "// discarded"; + return; + } + } + + // Determine output channels (only channel_index_RGB can increase to channel_index_BlueToAlpha) : + static const std::string dst_channels[3] = { ".a", ".rgb", ".rgba" }; + unsigned AB_channel_index = channel_index + stage.OutputAB.BlueToAlpha; + unsigned CD_channel_index = channel_index + stage.OutputCD.BlueToAlpha; + + // Generate 2 (or 3 for xmma/xmmc) output arguments + // Note : BlueToAlpha can only be set for RGB (see RPSCombinerStageChannel::Decode) + std::stringstream arguments; + arguments << register_str[stage.OutputAB.Reg] << dst_channels[AB_channel_index]; + arguments << ',' << register_str[stage.OutputCD.Reg] << dst_channels[CD_channel_index]; + // xmma and xmmc have a 3rd output (which doesn't support the BlueToAlpha flag) + if (opcode >= 3) { + // TODO : Figure out how to support BlueToAlpha source to MUX_SUM.rgb scenario + // If the xmma_m and xmmc_m macro's can't handle this, we may need to drop + // those macro's, and generate the HLSL here (alas, as we try to avoid that). + arguments << ',' << register_str[stage.OutputMUX_SUM.Reg] << dst_channels[channel_index]; + } + // Insert a visual separation between the output arguments, and the 4 input arguments + arguments << ", "; + // Generate 4 input arguments + InputRegisterHLSL(arguments, stage.OutputAB.Input[0], AB_channel_index); + InputRegisterHLSL(arguments, stage.OutputAB.Input[1], AB_channel_index); + InputRegisterHLSL(arguments, stage.OutputCD.Input[0], CD_channel_index); + InputRegisterHLSL(arguments, stage.OutputCD.Input[1], CD_channel_index); + + + // Generate combiner output modifier + static const std::string output_modifier_str[8] = { + "d_ident", // y = x // PS_COMBINEROUTPUT_OUTPUTMAPPING_IDENTITY= 0x00L + "d_bias", // y = (x - 0.5) // PS_COMBINEROUTPUT_OUTPUTMAPPING_BIAS= 0x08L // Subtracts 0.5 from outputs + "d_x2", // y = x * 2 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_1= 0x10L // Scales outputs by 2 + "d_bx2", // y = (x - 0.5) * 2 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_1_BIAS= 0x18L // Subtracts 0.5 from outputs and scales by 2 + "d_x4", // y = x * 4 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2= 0x20L // Scales outputs by 4 + "d_bx4", // y = (x - 0.5) * 4 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2_BIAS= 0x28L // Subtracts 0.5 from outputs and scales by 4 + "d_d2", // y = x / 2 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1= 0x30L // Divides outputs by 2 + "d_bd2" // y = (x - 0.5) / 2 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1_BIAS= 0x38L // Subtracts 0.5 from outputs and divides by 2 + }; + + std::string output_modifier = output_modifier_str[(stage.CombinerOutputMapping & 0x38) >> 3]; + + // Concatenate it all together into an opcode 'call' (which resolves into macro expressions) + hlsl << opcode_comment[opcode][0] << '(' << arguments.str() << ' ' << output_modifier; + + // xmma and xmmc require a temporary register with channel designation + if (opcode >= 3) + hlsl << ",tmp" << dst_channels[AB_channel_index]; // TODO : + + hlsl << "); // " << opcode_comment[opcode][1]; +} + +void FinalCombinerStageHlsl(std::stringstream& hlsl, RPSFinalCombiner& fc) +{ + std::stringstream arguments; + + for (unsigned i = 0; i < 7; i++) { // Generate A, B, C, D, E, F, G input arguments + // Note : Most final combiner inputs are treated as RGB, but G is single-channel (.a or .b) + bool isLast = (i == 6); + unsigned channel_index = isLast ? channel_index_Alpha : channel_index_RGB; + InputRegisterHLSL(arguments, fc.Input[i], channel_index, isLast, /*isFinalCombiner=*/1); + } + + // Concatenate it all together into the xfc opcode 'call' (which resolves into macro expressions) + // Note : The xfc opcode macro does not have an output modifier argument + hlsl << "\n " << opcode_comment[5][0] << "(" << arguments.str() << "); // " << opcode_comment[5][1]; +} + +void OutputDefine(std::stringstream& hlsl, std::string define_str, bool enabled) +{ + if (enabled) + hlsl << "\n#define " << define_str; + else + hlsl << "\n#undef " << define_str; +} + +/* Disabled, until BumpDemo is fixed (which with this code, inadvertedly skips stage 1 and 2 dotproducts) : +bool IsTextureSampled(DecodedRegisterCombiner* pShader, int reg) +{ + // TODO : Instead searching like this afterwards, simply set a boolean for each texture-read detected during decoding + // TODO : Extend detection if textures can also be used indirectly thru PSInputTexture (without mention in actual combiner stages) + for (unsigned i = 0; i < pShader->NumberOfCombiners; i++) { + // Is an output calculated, and does any of the inputs read from the given (texture) register? + if (pShader->Combiners[i].RGB.OutputAB.Reg != PS_REGISTER_DISCARD) { + if (pShader->Combiners[i].RGB.OutputAB.Input[0].Reg == reg) return true; + if (pShader->Combiners[i].RGB.OutputAB.Input[1].Reg == reg) return true; + } + if (pShader->Combiners[i].RGB.OutputCD.Reg != PS_REGISTER_DISCARD) { + if (pShader->Combiners[i].RGB.OutputCD.Input[0].Reg == reg) return true; + if (pShader->Combiners[i].RGB.OutputCD.Input[1].Reg == reg) return true; + } + if (pShader->Combiners[i].Alpha.OutputAB.Reg != PS_REGISTER_DISCARD) { + if (pShader->Combiners[i].Alpha.OutputAB.Input[0].Reg == reg) return true; + if (pShader->Combiners[i].Alpha.OutputAB.Input[1].Reg == reg) return true; + } + if (pShader->Combiners[i].Alpha.OutputCD.Reg != PS_REGISTER_DISCARD) { + if (pShader->Combiners[i].Alpha.OutputCD.Input[0].Reg == reg) return true; + if (pShader->Combiners[i].Alpha.OutputCD.Input[1].Reg == reg) return true; + } + // Is the given register writen to? Then no sampling took place + if (pShader->Combiners[i].RGB.OutputAB.Reg == reg) return false; + if (pShader->Combiners[i].RGB.OutputCD.Reg == reg) return false; + if (pShader->Combiners[i].RGB.OutputMUX_SUM.Reg == reg) return false; + if (pShader->Combiners[i].Alpha.OutputAB.Reg == reg) return false; + if (pShader->Combiners[i].Alpha.OutputCD.Reg == reg) return false; + if (pShader->Combiners[i].Alpha.OutputMUX_SUM.Reg == reg) return false; + } + + if (pShader->hasFinalCombiner) { + for (unsigned i = 0; i < 7; i++) { + if (pShader->FinalCombiner.Input[i].Reg == reg) return true; + } + } + + return false; +} */ + +void BuildShader(DecodedRegisterCombiner* pShader, std::stringstream& hlsl) +{ + // Include HLSL header and footer as raw strings : + static const std::string hlsl_template[4] = { + #include "core\hle\D3D8\Direct3D9\CxbxPixelShaderTemplate.hlsl" + }; + + hlsl << hlsl_template[0]; // Start with the HLSL template header + + hlsl << "\n#define PS_COMBINERCOUNT " << pShader->NumberOfCombiners; + if (pShader->NumberOfCombiners > 0) { + OutputDefine(hlsl, "PS_COMBINERCOUNT_UNIQUE_C0", pShader->CombinerHasUniqueC0); + OutputDefine(hlsl, "PS_COMBINERCOUNT_UNIQUE_C1", pShader->CombinerHasUniqueC1); + OutputDefine(hlsl, "PS_COMBINERCOUNT_MUX_MSB", pShader->CombinerMuxesOnMsb); + } + + for (unsigned i = 0; i < PSH_XBOX_MAX_T_REGISTER_COUNT; i++) { + hlsl << "\n#define PS_COMPAREMODE_" << i << "(in)" + << (pShader->PSCompareMode[i][0] ? " CM_GE(in.x)" : " CM_LT(in.x)") // PS_COMPAREMODE_S_[GE|LT] + << (pShader->PSCompareMode[i][1] ? " CM_GE(in.y)" : " CM_LT(in.y)") // PS_COMPAREMODE_T_[GE|LT] + << (pShader->PSCompareMode[i][2] ? " CM_GE(in.z)" : " CM_LT(in.z)") // PS_COMPAREMODE_R_[GE|LT] + << (pShader->PSCompareMode[i][3] ? " CM_GE(in.w)" : " CM_LT(in.w)");// PS_COMPAREMODE_Q_[GE|LT] + } + + hlsl << "\nstatic const int PS_INPUTTEXTURE_[4] = { -1, " + << pShader->PSInputTexture[1] << ", " + << pShader->PSInputTexture[2] << ", " + << pShader->PSInputTexture[3] << " };"; + + // Generate #defines required by CxbxPixelShaderTemplate.hlsl : + for (unsigned i = 1; i < PSH_XBOX_MAX_T_REGISTER_COUNT; i++) { + static const std::string dotmapping_str[8] = { + "PS_DOTMAPPING_ZERO_TO_ONE", // = 0x00L, // - * * * + "PS_DOTMAPPING_MINUS1_TO_1_D3D", // = 0x01L, // - * * * + "PS_DOTMAPPING_MINUS1_TO_1_GL", // = 0x02L, // - * * * + "PS_DOTMAPPING_MINUS1_TO_1", // = 0x03L, // - * * * + "PS_DOTMAPPING_HILO_1", // = 0x04L, // - * * * + "PS_DOTMAPPING_HILO_HEMISPHERE_D3D", // = 0x05L, // - * * * + "PS_DOTMAPPING_HILO_HEMISPHERE_GL", // = 0x06L, // - * * * + "PS_DOTMAPPING_HILO_HEMISPHERE" // = 0x07L, // - * * * + }; + + hlsl << "\n#define PS_DOTMAPPING_" << i << " " << dotmapping_str[(unsigned)pShader->PSDotMapping[i]]; + } + + if (pShader->hasFinalCombiner) { + OutputDefine(hlsl, "PS_FINALCOMBINERSETTING_COMPLEMENT_V1", pShader->FinalCombiner.ComplementV1); + OutputDefine(hlsl, "PS_FINALCOMBINERSETTING_COMPLEMENT_R0", pShader->FinalCombiner.ComplementR0); + OutputDefine(hlsl, "PS_FINALCOMBINERSETTING_CLAMP_SUM", pShader->FinalCombiner.ClampSum); + } + + hlsl << hlsl_template[1]; + hlsl << hlsl_template[2]; + + // Generate all four texture stages + for (unsigned i = 0; i < PSH_XBOX_MAX_T_REGISTER_COUNT; i++) { + static const std::string texturemode_str[19] = { + "PS_TEXTUREMODES_NONE", // = 0x00L, // * * * * + "PS_TEXTUREMODES_PROJECT2D", // = 0x01L, // * * * * + "PS_TEXTUREMODES_PROJECT3D", // = 0x02L, // * * * * + "PS_TEXTUREMODES_CUBEMAP", // = 0x03L, // * * * * + "PS_TEXTUREMODES_PASSTHRU", // = 0x04L, // * * * * + "PS_TEXTUREMODES_CLIPPLANE", // = 0x05L, // * * * * + "PS_TEXTUREMODES_BUMPENVMAP", // = 0x06L, // - * * * + "PS_TEXTUREMODES_BUMPENVMAP_LUM", // = 0x07L, // - * * * + "PS_TEXTUREMODES_BRDF", // = 0x08L, // - - * * + "PS_TEXTUREMODES_DOT_ST", // = 0x09L, // - - * * + "PS_TEXTUREMODES_DOT_ZW", // = 0x0aL, // - - * * + "PS_TEXTUREMODES_DOT_RFLCT_DIFF", // = 0x0bL, // - - * - + "PS_TEXTUREMODES_DOT_RFLCT_SPEC", // = 0x0cL, // - - - * + "PS_TEXTUREMODES_DOT_STR_3D", // = 0x0dL, // - - - * + "PS_TEXTUREMODES_DOT_STR_CUBE", // = 0x0eL, // - - - * + "PS_TEXTUREMODES_DPNDNT_AR", // = 0x0fL, // - * * * + "PS_TEXTUREMODES_DPNDNT_GB", // = 0x10L, // - * * * + "PS_TEXTUREMODES_DOTPRODUCT", // = 0x11L, // - * * - + "PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST", // = 0x12L, // - - - * + }; + + /* Disabled, until BumpDemo is fixed (which with this code, inadvertedly skips stage 1 and 2 dotproducts) : + // Skip stages never read, to avoid compilation overhead + if (!IsTextureSampled(pShader, PS_REGISTER_T0 + i)) + continue; */ + + hlsl << "\n " << texturemode_str[pShader->PSTextureModes[i]] << "(" << i << ");"; + // On Xbox, r0.a is initialized to t0.a (and r1.a to t1.a ?) : + if (i == 0) hlsl << " r0.a = t0.a;"; + if (i == 1) hlsl << " r1.a = t1.a;"; + } + + // Generate all combiners (rgb and alpha) + for (unsigned i = 0; i < pShader->NumberOfCombiners; i++) { + hlsl << "\n stage = " << i << "; "; + CombinerStageHlsl(hlsl, pShader->Combiners[i].RGB, channel_index_RGB); + hlsl << "\n /* + */ "; + CombinerStageHlsl(hlsl, pShader->Combiners[i].Alpha, channel_index_Alpha); + } + + if (pShader->hasFinalCombiner) { + FinalCombinerStageHlsl(hlsl, pShader->FinalCombiner); + } + + hlsl << hlsl_template[3]; // Finish with the HLSL template footer +} + +// recompile xbox pixel shader function +extern HRESULT EmuCompilePixelShader +( + DecodedRegisterCombiner* pIntermediateShader, + ID3DBlob** ppHostShader +) +{ + auto hlsl_stream = std::stringstream(); + BuildShader(pIntermediateShader, hlsl_stream); + std::string hlsl_str = hlsl_stream.str(); + + return EmuCompileShader(hlsl_str, g_ps_model, ppHostShader); +} diff --git a/src/core/hle/D3D8/Direct3D9/PixelShader.h b/src/core/hle/D3D8/Direct3D9/PixelShader.h new file mode 100644 index 000000000..d092139a0 --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/PixelShader.h @@ -0,0 +1,14 @@ +#pragma once + +#include "Shader.h" // ID3DBlob (via d3dcompiler.h > d3d11shader.h > d3dcommon.h) +#include "core\hle\D3D8\XbPixelShader.h" // DecodedRegisterCombiner + +static const char* ps_model_2_a = "ps_2_a"; +static const char* ps_model_3_0 = "ps_3_0"; +extern const char* g_ps_model; + +extern HRESULT EmuCompilePixelShader +( + DecodedRegisterCombiner* pIntermediateShader, + ID3DBlob** ppHostShader +); diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index 9d0af8b27..ca79697bf 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -20,6 +20,7 @@ // * 59 Temple Place - Suite 330, Bostom, MA 02111-1307, USA. // * // * (c) 2002-2003 kingofc +// * 2020 PatrickvL // * // * All rights reserved // * @@ -32,25 +33,6 @@ With the help of this parser it is possible to generate Direct3D pixel shader assembly code. - TODO: - - fix BumpDemo - (after second recompilation the shader does not work, - can also be something in CxbxKrnl because it looks like no - textures are set. Check cubemap loading from resourcesd!!!) - => seems to work now, the problem is that I don't know - how it must look on a real xbox - - - add reference counting constants which were added as c variables - if they are compiled away (optimization of the command, etc.) - decrement the reference count and when it reaches 0 remove - the constant (to save the num of vars) - - - add _sat feature - * Support as instruction modifier, - if necessary as mov_sat x, y - - - When porting to DirectX 9, expand this to pixel shader model 2.0 or up - - Alternatively, translate to HLSL and let D3DXCompileShader/D3DCompile figure it out */ #define LOG_PREFIX CXBXR_MODULE::PXSH @@ -58,6 +40,7 @@ #include "core\kernel\support\Emu.h" #include "core\hle\D3D8\Direct3D9\Direct3D9.h" // For g_pD3DDevice, g_pXbox_PixelShader #include "core\hle\D3D8\XbPixelShader.h" +#include "core\hle\D3D8\Direct3D9\PixelShader.h" // EmuCompilePixelShader #include "core\hle\D3D8\XbD3D8Logging.h" // For D3DErrorString() #include "core\kernel\init\CxbxKrnl.h" // For CxbxKrnlCleanup() @@ -76,5608 +59,6 @@ extern XboxRenderStateConverter XboxRenderStates; // Declared in Direct3D9.cpp LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) \ if(g_bPrintfOn) printf - -/*---------------------------------------------------------------------------*/ -/* Texture configuration - The following members of the D3DPixelShaderDef */ -/* structure define the addressing modes of each of the four texture stages:*/ -/* PSTextureModes */ -/* PSDotMapping */ -/* PSInputTexture */ -/* PSCompareMode */ -/*---------------------------------------------------------------------------*/ - -// ========================================================================================================= -// PSTextureModes -// --------.--------.--------.---xxxxx stage0 -// --------.--------.------xx.xxx----- stage1 -// --------.--------.-xxxxx--.-------- stage2 -// --------.----xxxx.x-------.-------- stage3 - -#define PS_TEXTUREMODES(t0,t1,t2,t3) (((t3)<<15)|((t2)<<10)|((t1)<<5)|(t0)) - -/* -Texture modes: -NONE :stage inactive -PROJECT2D :argb = texture(s/q, t/q) -PROJECT3D :argb = texture(s/q, t/q, r/q) -CUBEMAP :argb = cubemap(s,t,r) -PASSTHRU :argb = s,t,r,q -CLIPPLANE :pixel not drawn if s,t,r, or q < 0. PSCompareMode affects comparison -BUMPENVMAP :argb=texture(s+mat00*src.r+mat01*src.g, - t+mat10*src.r+mat11*src.g) - mat00 set via D3DTSS_BUMPENVMAT00, etc. -BUMPENVMAP_LUM :argb=texture(s+mat00*src.r+mat01*src.g, - t+mat10*src.r+mat11*src.g); - rgb *= (lum_scale*src.b + lum_bias); (a is not affected) - lum_scale set by D3DTSS_BUMPENVLSCALE - lum_bias set by D3DTSS_BUMPENVLOFFSET - mat00 set via D3DTSS_BUMPENVMAT00, etc. -BRDF :argb = texture(eyeSigma, lightSigma, dPhi) - eyeSigma = Sigma of eye vector in spherical coordinates - lightSigma = Sigma of light vector in spherical coordinates - dPhi = Phi of eye - Phi of light -DOT_ST :argb = texture(, (s,t,r).(src.r,src.g,src.b)) -DOT_ZW :frag depth = (/((s,t,r).(src.r,src.g,src.b)) -DOT_RFLCT_DIFF :n = (,(s,t,r).(src.r,src.g,src.b),) - argb = cubemap(n) -DOT_RFLCT_SPEC :n = (,,(s,t,r).(src.r,src.g,src.b)) - r = 2*n*(n.e)/(n.n) - e where e is eye vector built from q coord of each stage - argb = cubemap(r) -DOT_STR_3D :argb=texture((,,(s,t,r).(src.r,src.g,src.b))) -DOT_STR_CUBE :argb=cubemap((,,(s,t,r).(src.r,src.g,src.b))) -DEPENDENT_AR :argb = texture(src.a, src.r) -DEPENDENT_GB :argb = texture(src.g, src.b) -DOTPRODUCT :argb = (s,t,r).(src.r,src.g,src.b) -DOT_RFLCT_SPEC_CONST :n = (,,(s,t,r).(src.r,src.g,src.b)) - r = 2*n*(n.e)/(n.n) - e where e is eye vector set via SetEyeVector() - argb = cubemap(r) -*/ - -enum PS_TEXTUREMODES -{ // valid in stage 0 1 2 3 - PS_TEXTUREMODES_NONE= 0x00L, // * * * * - PS_TEXTUREMODES_PROJECT2D= 0x01L, // * * * * - PS_TEXTUREMODES_PROJECT3D= 0x02L, // * * * * - PS_TEXTUREMODES_CUBEMAP= 0x03L, // * * * * - PS_TEXTUREMODES_PASSTHRU= 0x04L, // * * * * - PS_TEXTUREMODES_CLIPPLANE= 0x05L, // * * * * - PS_TEXTUREMODES_BUMPENVMAP= 0x06L, // - * * * - PS_TEXTUREMODES_BUMPENVMAP_LUM= 0x07L, // - * * * - PS_TEXTUREMODES_BRDF= 0x08L, // - - * * - PS_TEXTUREMODES_DOT_ST= 0x09L, // - - * * - PS_TEXTUREMODES_DOT_ZW= 0x0aL, // - - * * - PS_TEXTUREMODES_DOT_RFLCT_DIFF= 0x0bL, // - - * - - PS_TEXTUREMODES_DOT_RFLCT_SPEC= 0x0cL, // - - - * - PS_TEXTUREMODES_DOT_STR_3D= 0x0dL, // - - - * - PS_TEXTUREMODES_DOT_STR_CUBE= 0x0eL, // - - - * - PS_TEXTUREMODES_DPNDNT_AR= 0x0fL, // - * * * - PS_TEXTUREMODES_DPNDNT_GB= 0x10L, // - * * * - PS_TEXTUREMODES_DOTPRODUCT= 0x11L, // - * * - - PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST= 0x12L, // - - - * - // 0x13-0x1f reserved -}; - -// ========================================================================================================= -// PSDotMapping -// --------.--------.--------.-----xxx // stage1 -// --------.--------.--------.-xxx---- // stage2 -// --------.--------.-----xxx.-------- // stage3 - -#define PS_DOTMAPPING(t0,t1,t2,t3) (((t3)<<8)|((t2)<<4)|(t1)) - -// Mappings: -// ZERO_TO_ONE :rgb->(r,g,b): 0x0=>0.0, 0xff=>1.0 -// MINUS1_TO_1_D3D :rgb->(r,g,b): 0x0=>-128/127, 0x01=>-1.0, 0x80=>0.0, 0xff=>1.0 -// MINUS1_TO_1_GL :rgb->(r,g,b): 0x80=>-1.0, 0x0=>0.0, 0x7f=>1.0 -// MINUS1_TO_1 :rgb->(r,g,b): 0x80=>-128/127, 0x81=>-1.0, 0x0=>0.0, 0x7f=>1.0 -// HILO_1 :HL->(H,L,1.0): 0x0000=>0.0, 0xffff=>1.0 -// HILO_HEMISPHERE :HL->(H,L,sqrt(1-H*H-L*L)): 0x8001=>-1.0, 0x0=>0.0, 0x7fff=>1.0, 0x8000=>-32768/32767 - -enum PS_DOTMAPPING -{ // valid in stage 0 1 2 3 - PS_DOTMAPPING_ZERO_TO_ONE= 0x00L, // - * * * - PS_DOTMAPPING_MINUS1_TO_1_D3D= 0x01L, // - * * * - PS_DOTMAPPING_MINUS1_TO_1_GL= 0x02L, // - * * * - PS_DOTMAPPING_MINUS1_TO_1= 0x03L, // - * * * - PS_DOTMAPPING_HILO_1= 0x04L, // - * * * - // ? 0x05L ? - // ? 0x06L ? - PS_DOTMAPPING_HILO_HEMISPHERE= 0x07L, // - * * * -}; - -// ========================================================================================================= -// PSCompareMode -// --------.--------.--------.----xxxx // stage0 -// --------.--------.--------.xxxx---- // stage1 -// --------.--------.----xxxx.-------- // stage2 -// --------.--------.xxxx----.-------- // stage3 - -#define PS_COMPAREMODE(t0,t1,t2,t3) (((t3)<<12)|((t2)<<8)|((t1)<<4)|(t0)) - -enum PS_COMPAREMODE -{ - PS_COMPAREMODE_S_LT= 0x00L, - PS_COMPAREMODE_S_GE= 0x01L, - - PS_COMPAREMODE_T_LT= 0x00L, - PS_COMPAREMODE_T_GE= 0x02L, - - PS_COMPAREMODE_R_LT= 0x00L, - PS_COMPAREMODE_R_GE= 0x04L, - - PS_COMPAREMODE_Q_LT= 0x00L, - PS_COMPAREMODE_Q_GE= 0x08L, -}; - -// ========================================================================================================= -// PSInputTexture -// --------.-------x.--------.-------- // stage2 -// --------.--xx----.--------.-------- // stage3 -// -// Selects the other texture to use as an input in the following texture modes: -// DOT_ST, DOT_STR_3D, DOT_STR_CUBE, DOT_ZW, DOT_RFLCT_SPEC, -// DOT_RFLCT_DIFF, DPNDNT_AR, DPNDNT_GB, BUMPENVMAP, -// BUMPENVMAP_LUM, DOT_PRODUCT - -#define PS_INPUTTEXTURE(t0,t1,t2,t3) (((t3)<<20)|((t2)<<16)) - - -/*---------------------------------------------------------------------------------*/ -/* Color combiners - The following members of the D3DPixelShaderDef structure */ -/* define the state for the eight stages of color combiners: */ -/* PSCombinerCount - Number of stages */ -/* PSAlphaInputs[8] - Inputs for alpha portion of each stage */ -/* PSRGBInputs[8] - Inputs for RGB portion of each stage */ -/* PSConstant0[8] - Constant 0 for each stage */ -/* PSConstant1[8] - Constant 1 for each stage */ -/* PSFinalCombinerConstant0 - Constant 0 for final combiner */ -/* PSFinalCombinerConstant1 - Constant 1 for final combiner */ -/* PSAlphaOutputs[8] - Outputs for alpha portion of each stage */ -/* PSRGBOutputs[8] - Outputs for RGB portion of each stage */ -/*---------------------------------------------------------------------------------*/ - - -// ========================================================================================================= -// PSCombinerCount -// --------.--------.--------.----xxxx // number of combiners (1-8) -// --------.--------.-------x.-------- // mux bit (0= LSB, 1= MSB) -// --------.--------.---x----.-------- // separate C0 -// --------.-------x.--------.-------- // separate C1 - -#define PS_COMBINERCOUNT(count, flags) (((flags)<<8)|(count)) -// count is 1-8, flags contains one or more values from PS_COMBINERCOUNTFLAGS - -enum PS_COMBINERCOUNTFLAGS -{ - PS_COMBINERCOUNT_MUX_LSB= 0x0000L, // mux on r0.a lsb - PS_COMBINERCOUNT_MUX_MSB= 0x0001L, // mux on r0.a msb - - PS_COMBINERCOUNT_SAME_C0= 0x0000L, // c0 same in each stage - PS_COMBINERCOUNT_UNIQUE_C0= 0x0010L, // c0 unique in each stage - - PS_COMBINERCOUNT_SAME_C1= 0x0000L, // c1 same in each stage - PS_COMBINERCOUNT_UNIQUE_C1= 0x0100L // c1 unique in each stage -}; - -// ========================================================================================================= -// PSRGBInputs[0-7] -// PSAlphaInputs[0-7] -// PSFinalCombinerInputsABCD -// PSFinalCombinerInputsEFG -// --------.--------.--------.----xxxx // D register -// --------.--------.--------.---x---- // D channel (0= RGB/BLUE, 1= ALPHA) -// --------.--------.--------.xxx----- // D input mapping -// --------.--------.----xxxx.-------- // C register -// --------.--------.---x----.-------- // C channel (0= RGB/BLUE, 1= ALPHA) -// --------.--------.xxx-----.-------- // C input mapping -// --------.----xxxx.--------.-------- // B register -// --------.---x----.--------.-------- // B channel (0= RGB/BLUE, 1= ALPHA) -// --------.xxx-----.--------.-------- // B input mapping -// ----xxxx.--------.--------.-------- // A register -// ---x----.--------.--------.-------- // A channel (0= RGB/BLUE, 1= ALPHA) -// xxx-----.--------.--------.-------- // A input mapping - -// examples: -// -// shader.PSRGBInputs[3]= PS_COMBINERINPUTS( -// PS_REGISTER_T0 | PS_INPUTMAPPING_EXPAND_NORMAL | PS_CHANNEL_RGB, -// PS_REGISTER_C0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_ALPHA, -// PS_REGISTER_ZERO, -// PS_REGISTER_ZERO); -// -// shader.PSFinalCombinerInputsABCD= PS_COMBINERINPUTS( -// PS_REGISTER_T0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_ALPHA, -// PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL | PS_CHANNEL_RGB, -// PS_REGISTER_EFPROD | PS_INPUTMAPPING_UNSIGNED_INVERT | PS_CHANNEL_RGB, -// PS_REGISTER_ZERO); -// -// PS_FINALCOMBINERSETTING is set in 4th field of PSFinalCombinerInputsEFG with PS_COMBINERINPUTS -// example: -// -// shader.PSFinalCombinerInputsEFG= PS_COMBINERINPUTS( -// PS_REGISTER_R0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_RGB, -// PS_REGISTER_R1 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_RGB, -// PS_REGISTER_R1 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_BLUE, -// PS_FINALCOMBINERSETTING_CLAMP_SUM | PS_FINALCOMBINERSETTING_COMPLEMENT_R0); - -#define PS_COMBINERINPUTS(a,b,c,d) (((a)<<24)|((b)<<16)|((c)<<8)|(d)) -// For PSFinalCombinerInputsEFG, -// a,b,c contain a value from PS_REGISTER, PS_CHANNEL, and PS_INPUTMAPPING for input E,F, and G -// d contains values from PS_FINALCOMBINERSETTING -// For all other inputs, -// a,b,c,d each contain a value from PS_REGISTER, PS_CHANNEL, and PS_INPUTMAPPING - -// The input can have the following mappings applied : -// -// PS_INPUTMAPPING_UNSIGNED_IDENTITY : y = max(0,x) = 1*max(0,x) + 0.0 -// PS_INPUTMAPPING_UNSIGNED_INVERT : y = 1 - max(0,x) = -1*max(0,x) + 1.0 -// PS_INPUTMAPPING_EXPAND_NORMAL : y = 2*max(0,x) - 1 = 2*max(0,x) - 1.0 -// PS_INPUTMAPPING_EXPAND_NEGATE : y = 1 - 2*max(0,x) = -2*max(0,x) + 1.0 -// PS_INPUTMAPPING_HALFBIAS_NORMAL : y = max(0,x) - 1/2 = 1*max(0,x) - 0.5 -// PS_INPUTMAPPING_HALFBIAS_NEGATE : y = 1/2 - max(0,x) = -1*max(0,x) + 0.5 -// PS_INPUTMAPPING_SIGNED_IDENTITY : y = x = 1* x + 0.0 -// PS_INPUTMAPPING_SIGNED_NEGATE : y = -x = -1* x + 0.0 -// -// (Note : I don't know for sure if the max() operation mentioned above is indeed what happens, -// as there's no further documentation available on this. Native Direct3D can clamp with the -// '_sat' instruction modifier, but that's not really the same as these Xbox1 input mappings.) -// -// When the input register is PS_ZERO, the above mappings result in the following constants: -// -// PS_REGISTER_NEGATIVE_ONE (PS_INPUTMAPPING_EXPAND_NORMAL on zero) : y = -1.0 -// PS_REGISTER_NEGATIVE_ONE_HALF (PS_INPUTMAPPING_HALFBIAS_NORMAL on zero) : y = -0.5 -// PS_REGISTER_ZERO itself : y = 0.0 -// PS_REGISTER_ONE_HALF (PS_INPUTMAPPING_HALFBIAS_NEGATE on zero) : y = 0.5 -// PS_REGISTER_ONE (PS_INPUTMAPPING_UNSIGNED_INVERT on zero) : y = 1.0 -// (Note : It has no define, but PS_INPUTMAPPING_EXPAND_NEGATE on zero results in ONE too!) - -enum PS_INPUTMAPPING -{ - PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // max(0,x) OK for final combiner: y = abs(x) - PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // 1 - max(0,x) OK for final combiner: y = 1 - x - PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // 2*max(0,x) - 1 invalid for final combiner - PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // 1 - 2*max(0,x) invalid for final combiner - PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, // max(0,x) - 1/2 invalid for final combiner - PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // 1/2 - max(0,x) invalid for final combiner - PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // x invalid for final combiner - PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // -x invalid for final combiner -}; - -enum PS_REGISTER -{ - PS_REGISTER_ZERO= 0x00L, // r - PS_REGISTER_DISCARD= 0x00L, // w - PS_REGISTER_C0= 0x01L, // r - PS_REGISTER_C1= 0x02L, // r - PS_REGISTER_FOG= 0x03L, // r - PS_REGISTER_V0= 0x04L, // r/w - PS_REGISTER_V1= 0x05L, // r/w - PS_REGISTER_T0= 0x08L, // r/w - PS_REGISTER_T1= 0x09L, // r/w - PS_REGISTER_T2= 0x0aL, // r/w - PS_REGISTER_T3= 0x0bL, // r/w - PS_REGISTER_R0= 0x0cL, // r/w - PS_REGISTER_R1= 0x0dL, // r/w - PS_REGISTER_V1R0_SUM= 0x0eL, // r - PS_REGISTER_EF_PROD= 0x0fL, // r - - PS_REGISTER_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // 0x20 OK for final combiner - PS_REGISTER_NEGATIVE_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL, // 0x40 invalid for final combiner - PS_REGISTER_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NEGATE, // 0xa0 invalid for final combiner - PS_REGISTER_NEGATIVE_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NORMAL, // 0x80 invalid for final combiner - - PS_REGISTER_CXBX_PROD = PS_REGISTER_ZERO | PS_INPUTMAPPING_SIGNED_IDENTITY, // Cxbx internal use -}; - -// FOG ALPHA is only available in final combiner -// V1R0_SUM and EF_PROD are only available in final combiner (A,B,C,D inputs only) -// V1R0_SUM_ALPHA and EF_PROD_ALPHA are not available -// R0_ALPHA is initialized to T0_ALPHA in stage0 - -enum PS_CHANNEL -{ - PS_CHANNEL_RGB= 0x00, // used as RGB source - PS_CHANNEL_BLUE= 0x00, // used as ALPHA source - PS_CHANNEL_ALPHA= 0x10, // used as RGB or ALPHA source -}; - -constexpr DWORD PS_ChannelMask = (DWORD)PS_CHANNEL_ALPHA; -constexpr DWORD PS_NoChannelMask = (DWORD)(~PS_ChannelMask); -constexpr DWORD PS_AlphaChannelsMask = (DWORD)(PS_ChannelMask | (PS_ChannelMask << 8) | (PS_ChannelMask << 16) | (PS_ChannelMask << 24)); -constexpr DWORD PS_NoChannelsMask = (DWORD)(~PS_AlphaChannelsMask); - -enum PS_FINALCOMBINERSETTING -{ - PS_FINALCOMBINERSETTING_CLAMP_SUM= 0x80, // V1+R0 sum clamped to [0,1] - PS_FINALCOMBINERSETTING_COMPLEMENT_V1= 0x40, // unsigned invert mapping (1 - v1) is used as an input to the sum rather than v1 - PS_FINALCOMBINERSETTING_COMPLEMENT_R0= 0x20, // unsigned invert mapping (1 - r0) is used as an input to the sum rather than r0 -}; - -// ========================================================================================================= -// PSRGBOutputs[0-7] -// PSAlphaOutputs[0-7] -// --------.--------.--------.----xxxx // CD register -// --------.--------.--------.xxxx---- // AB register -// --------.--------.----xxxx.-------- // SUM register -// --------.--------.---x----.-------- // CD output (0= multiply, 1= dot product) -// --------.--------.--x-----.-------- // AB output (0= multiply, 1= dot product) -// --------.--------.-x------.-------- // AB_CD mux/sum select (0= sum, 1= mux) -// --------.------xx.x-------.-------- // Output mapping -// --------.-----x--.--------.-------- // CD blue to alpha -// --------.----x---.--------.-------- // AB blue to alpha - -#define PS_COMBINEROUTPUTS(ab,cd,mux_sum,flags) (((flags)<<12)|((mux_sum)<<8)|((ab)<<4)|(cd)) -// ab,cd,mux_sum contain a value from PS_REGISTER -// flags contains values from PS_COMBINEROUTPUT - -enum PS_COMBINEROUTPUT -{ - PS_COMBINEROUTPUT_IDENTITY= 0x00L, // y = x - PS_COMBINEROUTPUT_BIAS= 0x08L, // y = x - 0.5 - PS_COMBINEROUTPUT_SHIFTLEFT_1= 0x10L, // y = x*2 - PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS= 0x18L, // y = (x - 0.5)*2 - PS_COMBINEROUTPUT_SHIFTLEFT_2= 0x20L, // y = x*4 - // PS_COMBINEROUTPUT_SHIFTLEFT_2_BIAS= 0x28L, // y = (x - 0.5)*4 - PS_COMBINEROUTPUT_SHIFTRIGHT_1= 0x30L, // y = x/2 - // PS_COMBINEROUTPUT_SHIFTRIGHT_1_BIAS= 0x38L, // y = (x - 0.5)/2 - - PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA= 0x80L, // RGB only - - PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA= 0x40L, // RGB only - - PS_COMBINEROUTPUT_AB_MULTIPLY= 0x00L, - PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only - - PS_COMBINEROUTPUT_CD_MULTIPLY= 0x00L, - PS_COMBINEROUTPUT_CD_DOT_PRODUCT= 0x01L, // RGB only - - PS_COMBINEROUTPUT_AB_CD_SUM= 0x00L, // 3rd output is AB+CD - PS_COMBINEROUTPUT_AB_CD_MUX= 0x04L, // 3rd output is MUX(AB,CD) based on R0.a -}; - -// AB_CD register output must be DISCARD if either AB_DOT_PRODUCT or CD_DOT_PRODUCT are set - -// ========================================================================================================= -// PSC0Mapping -// PSC1Mapping -// --------.--------.--------.----xxxx // offset of D3D constant for stage 0 -// --------.--------.--------.xxxx---- // offset of D3D constant for stage 1 -// --------.--------.----xxxx.-------- // offset of D3D constant for stage 2 -// --------.--------.xxxx----.-------- // offset of D3D constant for stage 3 -// --------.----xxxx.--------.-------- // offset of D3D constant for stage 4 -// --------.xxxx----.--------.-------- // offset of D3D constant for stage 5 -// ----xxxx.--------.--------.-------- // offset of D3D constant for stage 6 -// xxxx----.--------.--------.-------- // offset of D3D constant for stage 7 - -#define PS_CONSTANTMAPPING(s0,s1,s2,s3,s4,s5,s6,s7) \ - (((DWORD)(s0)&0xf)<< 0) | (((DWORD)(s1)&0xf)<< 4) | \ - (((DWORD)(s2)&0xf)<< 8) | (((DWORD)(s3)&0xf)<<12) | \ - (((DWORD)(s4)&0xf)<<16) | (((DWORD)(s5)&0xf)<<20) | \ - (((DWORD)(s6)&0xf)<<24) | (((DWORD)(s7)&0xf)<<28) -// s0-s7 contain the offset of the D3D constant that corresponds to the -// c0 or c1 constant in stages 0 through 7. These mappings are only used in -// SetPixelShaderConstant(). - -// ========================================================================================================= -// PSFinalCombinerConstants -// --------.--------.--------.----xxxx // offset of D3D constant for C0 -// --------.--------.--------.xxxx---- // offset of D3D constant for C1 -// --------.--------.-------x.-------- // Adjust texture flag - -#define PS_FINALCOMBINERCONSTANTS(c0,c1,flags) (((DWORD)(flags) << 8) | ((DWORD)(c0)&0xf)<< 0) | (((DWORD)(c1)&0xf)<< 4) -// c0 and c1 contain the offset of the D3D constant that corresponds to the -// constants in the final combiner. These mappings are only used in -// SetPixelShaderConstant(). Flags contains values from PS_GLOBALFLAGS - -enum PS_GLOBALFLAGS -{ - // if this flag is set, the texture mode for each texture stage is adjusted as follows: - // if set texture is a cubemap, - // change PS_TEXTUREMODES_PROJECT2D to PS_TEXTUREMODES_CUBEMAP - // change PS_TEXTUREMODES_PROJECT3D to PS_TEXTUREMODES_CUBEMAP - // change PS_TEXTUREMODES_DOT_STR_3D to PS_TEXTUREMODES_DOT_STR_CUBE - // if set texture is a volume texture, - // change PS_TEXTUREMODES_PROJECT2D to PS_TEXTUREMODES_PROJECT3D - // change PS_TEXTUREMODES_CUBEMAP to PS_TEXTUREMODES_PROJECT3D - // change PS_TEXTUREMODES_DOT_STR_CUBE to PS_TEXTUREMODES_DOT_STR_3D - // if set texture is neither cubemap or volume texture, - // change PS_TEXTUREMODES_PROJECT3D to PS_TEXTUREMODES_PROJECT2D - // change PS_TEXTUREMODES_CUBEMAP to PS_TEXTUREMODES_PROJECT2D - - PS_GLOBALFLAGS_NO_TEXMODE_ADJUST= 0x0000L, // don"t adjust texture modes - PS_GLOBALFLAGS_TEXMODE_ADJUST= 0x0001L, // adjust texture modes according to set texture -}; - -enum PSH_OPCODE -{ - PO_COMMENT, - PO_PS, - PO_DEF, - PO_DCL, // Note : ps.2.0 and up only - PO_DCL_2D, // Note : ps.2.0 and up only - PO_DCL_CUBE, // Note : ps.2.0 and up only - PO_DCL_VOLUME, // Note : ps.2.0 and up only - PO_TEX, - PO_TEXLD, // Note : ps.1.4 only - PO_TEXLD2, // Note : ps.2.0 and up only - PO_TEXBEM, - PO_TEXBEML, - PO_TEXBRDF, // Xbox ext. - PO_TEXCOORD, - PO_TEXCRD, // Note: ps.1.4 only - PO_TEXKILL, - PO_TEXREG2AR, - PO_TEXREG2GB, - PO_TEXDP3, // Note : ps.1.3 only - PO_TEXDP3TEX, // Note : ps.1.3 only - PO_TEXM3X2TEX, - PO_TEXM3X2DEPTH, // Note : requires ps.1.3 and a preceding texm3x2pad - PO_TEXM3X3DIFF, // Xbox ext. - PO_TEXM3X3VSPEC, - PO_TEXM3X3TEX, // Note : Uses a cube texture - PO_TEXM3X2PAD, // Note : Must be combined with texm3x2tex or texm3x2depth - PO_TEXM3X3PAD, - PO_TEXM3X3SPEC, // NOTE : NEEDS 3 ARGUMENTS! - // Direct3D8 arithmetic instructions : - PO_ADD, - PO_CMP, - PO_CND, - PO_DP3, // dp3 d, s1,s2 : d=s0 dot s1 (replicated to all channels, .rgb=color only, .a=color+alpha) - PO_DP4, // dp3 d, s1,s2 : d.r=d.g=d.b=d.a=(s1.r*s2.r)+(s1.g*s2.g)+(s1.b*s2.b)+(s1.a*s2.a) - PO_LRP, - PO_MAD, - PO_MOV, - PO_MUL, - PO_NOP, - PO_SUB, - PO_RCP, // Note: ps.2.0 and up only - // Xbox1 opcodes : - PO_XMMA, - PO_XMMC, - PO_XDM, - PO_XDD, - PO_XFC, - PO_XPS, -}; - -const struct { char *mn; int _Out; int _In; char *note; } PSH_OPCODE_DEFS[/*PSH_OPCODE*/] = { - // Pixel shader header opcodes (must be specified in this order) : - {/* PO_COMMENT */ /*mn:*/";", /*_Out: */ 0, /*_In: */ 0, /*note:*/"" }, // - {/* PO_PS */ /*mn:*/"ps", /*_Out: */ 0, /*_In: */ 0, /*note:*/"" }, // Must occur once - {/* PO_DEF */ /*mn:*/"def", /*_Out: */ 1, /*_In: */ 4, /*note:*/"" }, // Output must be a PARAM_C, arguments must be 4 floats [0.00f .. 1.00f] - {/* PO_DCL */ /*mn:*/"dcl", /*_Out: */ 1, /*_In: */ 0, /*note:*/"" }, // Note : ps.2.0 and up only - {/* PO_DCL_2D */ /*mn:*/"dcl_2d", /*_Out: */ 1, /*_In: */ 0, /*note:*/"" }, // Note : ps.2.0 and up only - {/* PO_DCL_CUBE */ /*mn:*/"dcl_cube", /*_Out: */ 1, /*_In: */ 0, /*note:*/"" }, // Note : ps.2.0 and up only - {/* PO_DCL_VOLUME */ /*mn:*/"dcl_volume", /*_Out: */ 1, /*_In: */ 0, /*note:*/"" }, // Note : ps.2.0 and up only - {/* PO_TEX */ /*mn:*/"tex", /*_Out: */ 1, /*_In: */ 0, /*note:*/"" }, - {/* PO_TEXLD */ /*mn:*/"texld", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, // Note : ps.1.4 and up only - {/* PO_TEXLD2 */ /*mn:*/"texld", /*_Out: */ 1, /*_In: */ 2, /*note:*/"" }, // Note : ps.1.4 and up only - {/* PO_TEXBEM */ /*mn:*/"texbem", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXBEML */ /*mn:*/"texbeml", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXBRDF */ /*mn:*/"texbrdf", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, // /*note: */ Not supported by Direct3D8 ? - {/* PO_TEXCOORD */ /*mn:*/"texcoord", /*_Out: */ 1, /*_In: */ 0, /*note:*/"" }, - {/* PO_TEXCRD */ /*mn:*/"texcrd", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, // Note: ps.1.4 only - {/* PO_TEXKILL */ /*mn:*/"texkill", /*_Out: */ 1, /*_In: */ 0, /*note:*/"" }, - {/* PO_TEXDP3 */ /*mn:*/"texdp3", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXDP3TEX */ /*mn:*/"texdp3tex", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXM3X2TEX */ /*mn:*/"texm3x2tex", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXM3X2DEPTH */ /*mn:*/"texm3x2depth", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, // /*note: */ requires ps.1.3 and a preceding texm3x2pad - {/* PO_TEXM3X3DIFF */ /*mn:*/"texm3x3diff", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, // /*note: */ Not supported by Direct3D8 ? - {/* PO_TEXM3X3VSPEC */ /*mn:*/"texm3x3vspec", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXM3X3TEX */ /*mn:*/"texm3x3tex", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, // /*note: */ Uses a cube texture - {/* PO_TEXREG2AR */ /*mn:*/"texreg2ar", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXREG2GB */ /*mn:*/"texreg2gb", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXM3X2PAD */ /*mn:*/"texm3x2pad", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXM3X3PAD */ /*mn:*/"texm3x3pad", /*_Out: */ 1, /*_In: */ 1, /*note:*/"" }, - {/* PO_TEXM3X3SPEC */ /*mn:*/"texm3x3spec", /*_Out: */ 1, /*_In: */ 2, /*note:*/"" }, - // Arithmetic opcodes : - {/* PO_ADD */ /*mn:*/"add", /*_Out: */ 1, /*_In: */ 2, /*note:*/"d0=s0+s1" }, - {/* PO_CMP */ /*mn:*/"cmp", /*_Out: */ 1, /*_In: */ 3, /*note:*/"d0={s0>=0?s1:s2}" }, - {/* PO_CND */ /*mn:*/"cnd", /*_Out: */ 1, /*_In: */ 3, /*note:*/"d0={s0.a>0.5?s1:s2}" }, // 1st input must be "r0.a" - {/* PO_DP3 */ /*mn:*/"dp3", /*_Out: */ 1, /*_In: */ 2, /*note:*/"d0=s0 dot3 s1" }, - {/* PO_DP4 */ /*mn:*/"dp4", /*_Out: */ 1, /*_In: */ 2, /*note:*/"d0=s0 dot4 s1" }, - {/* PO_LRP */ /*mn:*/"lrp", /*_Out: */ 1, /*_In: */ 3, /*note:*/"d0=s0*s1+{1-s0}*s2=s0*{s1-s2}+s2" }, - {/* PO_MAD */ /*mn:*/"mad", /*_Out: */ 1, /*_In: */ 3, /*note:*/"d0=s0*s1+s2" }, - {/* PO_MOV */ /*mn:*/"mov", /*_Out: */ 1, /*_In: */ 1, /*note:*/"d0=s0" }, - {/* PO_MUL */ /*mn:*/"mul", /*_Out: */ 1, /*_In: */ 2, /*note:*/"d0=s0*s1" }, - {/* PO_NOP */ /*mn:*/"nop", /*_Out: */ 0, /*_In: */ 0, /*note:*/"" }, - {/* PO_SUB */ /*mn:*/"sub", /*_Out: */ 1, /*_In: */ 2, /*note:*/"d0=s0-s1" }, - {/* PO_RCP */ /*mn:*/"rcp", /*_Out: */ 1, /*_In: */ 1, /*note:*/"d0=1/s0" }, // Note: ps.2.0 and up only - // Xbox-only {NV2A} opcodes : - {/* PO_XMMA */ /*mn:*/"xmma", /*_Out: */ 3, /*_In: */ 4, /*note:*/"d0=s0*s1, d1=s2*s3, d2={s0*s1}+{s2*s3}" }, - {/* PO_XMMC */ /*mn:*/"xmmc", /*_Out: */ 3, /*_In: */ 4, /*note:*/"d0=s0*s1, d1=s2*s3, d2={r0.a>0.5}?{s0*s1}:{s2*s3}" }, - {/* PO_XDM */ /*mn:*/"xdm", /*_Out: */ 2, /*_In: */ 4, /*note:*/"d0=s0 dot s1, d1=s2*s3" }, - {/* PO_XDD */ /*mn:*/"xdd", /*_Out: */ 2, /*_In: */ 4, /*note:*/"d0=s0 dot s1, d1=s2 dot s3" }, - {/* PO_XFC */ /*mn:*/"xfc", /*_Out: */ 0, /*_In: */ 7, /*note:*/"r0.rgb=s0*s1+{1-s0}*s2+s3, r0.a=s6.a, prod=s4*s5, sum=r0+v1" }, - {/* PO_XPS */ /*mn:*/"xps", /*_Out: */ 0, /*_In: */ 0, /*note:*/"" }, // Must occur once -}; - -enum PSH_ARGUMENT_TYPE -{ - PARAM_VALUE, // Xbox only; Numberic constants used in Xbox-only opcodes - PARAM_DISCARD, // Xbox only; - PARAM_FOG, // Final combiner only; Read-only register fog register - PARAM_V1R0_SUM, // Final combiner only; Read-only register that contains the result of V1+R0 - PARAM_EF_PROD, // Final combiner only; Read-only register that contains the result of final combiner parameters E * F - PARAM_oDepth, // Output depth register - PARAM_R, // Temporary registers (unassigned except r0.a, which on NV2A is initially set to t0.a) - PARAM_T, // Textures - PARAM_V, // Vertex colors - PARAM_C, // Constant registers, set by def opcodes or SetPixelShaderConstant - PARAM_S, // Sampler registers - PARAM_oC, // Output color registers -}; - -const char *PSH_ARGUMENT_TYPE_Str[/*PSH_ARGUMENT_TYPE*/] = { -// Prefix # r/w Input? Output? Note - "", // * r No No Used for numeric constants like -1, 0, 1 - "discard", // * w No Yes Only for xbox opcodes (native opcodes have single output - discards must be removed) - "fog", // 1 r Yes No Only for final combiner parameter - "sum", // 1 r Yes No Only for final combiner parameter - "prod", // 1 r Yes No Only for final combiner parameter - "oDepth", // - "r", // 2 r/w Yes Yes We fake a few extra registers and resolve them in FixupPixelShader - "t", // 4 r/w Yes Yes D3D9 cannot write to these! - "v", // 2 r Yes Yes - "c", // 16 r Yes No Xbox has 8*c0,c1=16, while PC D3D8 has only 8, we try to reduce that in FixupPixelShader - "s", // 16 - No Yes - "oC", // -}; - -constexpr int XFC_COMBINERSTAGENR = xbox::X_PSH_COMBINECOUNT; // Always call XFC 'stage 9', 1 after the 8th combiner - -constexpr int PSH_XBOX_MAX_C_REGISTER_COUNT = 16; -constexpr int PSH_XBOX_MAX_R_REGISTER_COUNT = 2; -constexpr int PSH_XBOX_MAX_T_REGISTER_COUNT = 4; -constexpr int PSH_XBOX_MAX_V_REGISTER_COUNT = 2; - -// Mapping indices of Xbox register combiner constants to host pixel shader constants; -// The first 16 are identity-mapped (C0_1 .. C0_7 are C0 .. C7 on host, C1_0 .. C1_7 are C8 .. C15 on host) : -constexpr int PSH_XBOX_CONSTANT_C0 = 0; // = 0..15 -// Then two final combiner constants : -constexpr int PSH_XBOX_CONSTANT_FC0 = PSH_XBOX_CONSTANT_C0 + PSH_XBOX_MAX_C_REGISTER_COUNT; // = 16 -constexpr int PSH_XBOX_CONSTANT_FC1 = PSH_XBOX_CONSTANT_FC0 + 1; // = 17 -// Fog requires a constant (as host PS1.4 doesn't support the FOG register) -constexpr int PSH_XBOX_CONSTANT_FOG = PSH_XBOX_CONSTANT_FC1 + 1; // = 18 -// Bump Environment Material registers -constexpr int PSH_XBOX_CONSTANT_BEM = PSH_XBOX_CONSTANT_FOG + 1; // = 19..22 -// Bump map Luminance registers -constexpr int PSH_XBOX_CONSTANT_LUM = PSH_XBOX_CONSTANT_BEM + 4; // = 23..26 -// This concludes the set of constants that need to be set on host : -constexpr int PSH_XBOX_CONSTANT_MAX = PSH_XBOX_CONSTANT_LUM + 4; // = 27 -// After those, we need two constants for literal values, which we DEF'ine in ConvertConstantsToNative : -constexpr int PSH_XBOX_CONSTANT_MUL0 = PSH_XBOX_CONSTANT_MAX; // = 27 -constexpr int PSH_XBOX_CONSTANT_MUL1 = PSH_XBOX_CONSTANT_MUL0 + 1; // = 28 - -constexpr int FakeRegNr_Sum = PSH_XBOX_MAX_T_REGISTER_COUNT + 0; -constexpr int FakeRegNr_Prod = PSH_XBOX_MAX_T_REGISTER_COUNT + 1; -constexpr int FakeRegNr_Xmm1 = PSH_XBOX_MAX_T_REGISTER_COUNT + 2; -constexpr int FakeRegNr_Xmm2 = PSH_XBOX_MAX_T_REGISTER_COUNT + 3; - -enum PSH_INST_MODIFIER { - INSMOD_NONE, // y = x - INSMOD_BIAS, // y = x - 0.5 // Xbox only : TODO : Fixup occurrances! - INSMOD_X2, // y = x * 2 - INSMOD_BX2, // y = (x - 0.5) * 2 // Xbox only : TODO : Fixup occurrances! - INSMOD_X4, // y = x * 4 - INSMOD_D2, // y = x * 0.5 - INSMOD_SAT, // Xbox doesn"t support this, but has ARGMOD_SATURATE instead - INSMOD_X8, // y = x * 8 // ps 1.4 only - INSMOD_D4, // y = x * 0.25 // ps 1.4 only - INSMOD_D8, // y = x * 0.125 // ps 1.4 only -}; - -const char *PSH_INST_MODIFIER_Str[/*PSH_INST_MODIFIER*/] = { - "", - "_bias", - "_x2", - "_bx2", - "_x4", - "_d2", - "_sat", - "_x8", - "_d4", - "_d8", -}; - -// Four argument modifiers (applied in this order) : -// 1: Inversion (invert or negate : "1-" or "-") -// 2: Apply bias ("_bias") -// 3: Apply scale ("_x2", "_bx2", "_x4", or "_d2") -// 4: Apply clamp ("_sat") -enum PSH_ARG_MODIFIER { - ARGMOD_IDENTITY, // y = x - - ARGMOD_INVERT, // y = 1-x -> 0..1 > 1..0 - ARGMOD_NEGATE, // y = -x -> 0..1 > 0..-1 - - ARGMOD_BIAS, // y = x-0.5 -> 0..1 > -0.5..0.5 - - ARGMOD_SCALE_X2, // y = x*2 -> 0..1 > 0..2 - ARGMOD_SCALE_BX2, // y = (x*2)-1 -> 0..1 > -1..1 - ARGMOD_SCALE_X4, // y = x*4 -> 0..1 > 0..4 - ARGMOD_SCALE_D2, // y = x/2 -> 0..1 > 0..0.5 - - ARGMOD_SATURATE, // Xbox - not available in PS1.3 (can be done on output instead) - - ARGMOD_ALPHA_REPLICATE, - ARGMOD_BLUE_REPLICATE // PS1.1-PS1.3 only allow this if destination writemask = .a -}; - -typedef DWORD PSH_ARG_MODIFIERs; // = set of PSH_ARG_MODIFIER; - -const char *PSH_ARG_MODIFIER_Str[/*PSH_ARG_MODIFIER*/] = { - "%s", - - "1-%s", - "-%s", - - "%s_bias", - - "%s_x2", - "%s_bx2", - "%s_x4", - "%s_d2", - - "%s_sat", - - "%s", // .a is added via Mask - "%s" // .b idem -}; - -struct RPSRegisterObject { - bool IsAlpha; - PS_REGISTER Reg; - - void Decode(uint8_t Value, bool aIsAlpha); - std::string DecodedToString(); -}; - -struct RPSInputRegister : RPSRegisterObject { - PS_CHANNEL Channel; - PS_INPUTMAPPING InputMapping; - - void Decode(uint8_t Value, bool aIsAlpha); - std::string DecodedToString(); -}; - -struct RPSCombinerOutput : RPSRegisterObject { - RPSInputRegister Input1; // Called InputA or InputC (depending if it's inside the AB or CD combiner) - RPSInputRegister Input2; // Called InputC or InputD (depending if it's inside the AB or CD combiner) - bool DotProduct; // False=Multiply, True=DotProduct - bool BlueToAlpha; // False=Alpha-to-Alpha, True=Blue-to-Alpha - - void Decode(uint8_t Value, DWORD PSInputs, bool aIsAlpha); -}; - -struct RPSCombinerOutputMuxSum : RPSRegisterObject { - RPSCombinerOutput OutputAB; // Contains InputA and InputB (as Input1 and Input2) - RPSCombinerOutput OutputCD; // Contains InputC and InputD (as Input1 and Input2) -}; - -struct RPSCombinerStageChannel { - RPSCombinerOutputMuxSum OutputSUM; // Contains OutputAB, OutputCD - PS_COMBINEROUTPUT CombinerOutputFlags; - bool AB_CD_SUM; // True=AB+CD, False=MUX(AB;CD) based on R0.a - - void Decode(DWORD PSInputs, DWORD PSOutputs, bool aIsAlpha = false); -}; - -struct RPSCombinerStage { - RPSCombinerStageChannel RGB; - RPSCombinerStageChannel Alpha; -}; - -struct RPSFinalCombiner { - RPSInputRegister InputA; - RPSInputRegister InputB; - RPSInputRegister InputC; - RPSInputRegister InputD; - RPSInputRegister InputE; - RPSInputRegister InputF; - RPSInputRegister InputG; - - PS_FINALCOMBINERSETTING FinalCombinerFlags; - - uint8_t FinalCombinerC0Mapping; - uint8_t FinalCombinerC1Mapping; - - DWORD dwPS_GLOBALFLAGS; - - void Decode(const DWORD PSFinalCombinerInputsABCD, const DWORD PSFinalCombinerInputsEFG, const DWORD PSFinalCombinerConstants); -}; - -constexpr DWORD MASK_NONE = 0x000; -constexpr DWORD MASK_R = 0x001; -constexpr DWORD MASK_G = 0x002; -constexpr DWORD MASK_B = 0x004; -constexpr DWORD MASK_A = 0x008; -constexpr DWORD MASK_RGB = MASK_R | MASK_G | MASK_B; -constexpr DWORD MASK_RGBA = MASK_R | MASK_G | MASK_B | MASK_A; - -enum - TArgumentType { - atInput, atOutput, atFinalCombiner -}; - -typedef struct _PSH_RECOMPILED_SHADER { - xbox::X_D3DPIXELSHADERDEF PSDef; - std::string NewShaderStr; - IDirect3DPixelShader* ConvertedHandle; -} PSH_RECOMPILED_SHADER, -*PPSH_RECOMPILED_SHADER; - -typedef struct _PSH_IMD_ARGUMENT { - PSH_ARGUMENT_TYPE Type; // For parameters: R, T, V or C For output : Discard, R, T or V - int16_t Address; // Register address - DWORD Mask; - PSH_ARG_MODIFIERs Modifiers; - float Multiplier; - - void SetConstValue(float Value); - float GetConstValue(); - bool UsesRegister(); - bool IsRegister(PSH_ARGUMENT_TYPE aRegType, int16_t aAddress); // overload; - bool IsRegister(PSH_ARGUMENT_TYPE aRegType, int16_t aAddress, DWORD aMask); // overload; - void SetRegister(PSH_ARGUMENT_TYPE aRegType, int16_t aAddress, DWORD aMask); - bool HasModifier(PSH_ARG_MODIFIER modifier); - bool SetScaleConstRegister(float factor, const PSH_RECOMPILED_SHADER& pRecompiled); - bool SetScaleBemLumRegister(D3DTEXTURESTAGESTATETYPE factor, int stage, const PSH_RECOMPILED_SHADER& pRecompiled); - std::string ToString(); - bool Decode(const DWORD Value, DWORD aMask, TArgumentType ArgumentType); - void Invert(); - void Negate(); -} PSH_IMD_ARGUMENT, -*PPSH_IMD_ARGUMENT; - -//TPSH_IMD_ARGUMENTArray = array[0..(MaxInt div SizeOf(PSH_IMD_ARGUMENT)) - 1] of PSH_IMD_ARGUMENT; -//PPSH_IMD_ARGUMENTs = ^TPSH_IMD_ARGUMENTArray; - -typedef struct _PSH_INTERMEDIATE_FORMAT { - int CombinerStageNr; - bool IsCombined; - PSH_OPCODE Opcode; - std::string CommentString; - PSH_INST_MODIFIER Modifier; - PSH_IMD_ARGUMENT Output[3]; // 3 = xmm* output count - PSH_IMD_ARGUMENT Parameters[7]; // 7 = xfc parameter count - - _PSH_INTERMEDIATE_FORMAT *Initialize(const PSH_OPCODE aOpcode); - std::string ToString(); - bool IsArithmetic(); - void ScaleOutput(float aFactor); - bool ReadsFromRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress); // overload; - bool ReadsFromRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress, DWORD aMask); // overload; - bool ReadsFromRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress, int& addressCount, int& total); // overload; - bool WritesToRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress); // overload; - bool WritesToRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress, DWORD aMask); // overload; - void SwapParameter(const int Index1, const int Index2); - void XSwapOutput(); - bool MoveRemovableParametersRight(const int Index1, const int Index2); - bool XMoveNonRegisterOutputsRight(); - void XCopySecondOpcodeToFirst(const PSH_OPCODE aOpcode); - bool Decode(DWORD aCombinerStageNr, DWORD PSInputs, DWORD PSOutputs, DWORD aMask); - bool DecodeFinalCombiner(DWORD aPSFinalCombinerInputsABCD, DWORD aPSFinalCombinerInputsEFG); -} PSH_INTERMEDIATE_FORMAT, -*PPSH_INTERMEDIATE_FORMAT; - -struct PSH_XBOX_SHADER { - uint32_t m_PSVersion; // see D3DPS_VERSION - https://msdn.microsoft.com/en-us/library/windows/desktop/bb172592(v=vs.85).aspx - int MaxConstantFloatRegisters; - int MaxTemporaryRegisters; - int MaxSamplerRegisters; // Sampler (Direct3D 9 asm-ps) - int MaxTextureCoordinateRegisters; - int MaxInputColorRegisters; - int PSH_PC_MAX_REGISTER_COUNT; - - // Reserve enough slots for all shaders, so we need space for 2 constants, 5 lines per texture addressing codes and 10 lines per opcode : : - PSH_INTERMEDIATE_FORMAT Intermediate[2 + (xbox::X_D3DTS_STAGECOUNT * 5) + (xbox::X_PSH_COMBINECOUNT * 10) + 1]; - int IntermediateCount; - - PS_TEXTUREMODES PSTextureModes[xbox::X_D3DTS_STAGECOUNT]; - PS_DOTMAPPING PSDotMapping[xbox::X_D3DTS_STAGECOUNT]; - DWORD PSCompareMode[xbox::X_D3DTS_STAGECOUNT]; - int PSInputTexture[xbox::X_D3DTS_STAGECOUNT]; - - PS_FINALCOMBINERSETTING FinalCombinerFlags; - // Note : The following constants are only needed for PSH_XBOX_SHADER::DecodedToString, - // they are not involved in the actual pixel shader recompilation anymore : - RPSFinalCombiner FinalCombiner; - RPSCombinerStage Combiners[xbox::X_PSH_COMBINECOUNT]; - int NumberOfCombiners; - DWORD CombinerCountFlags; // For PS_COMBINERCOUNTFLAGS - // Read from CombinerCountFlags : - bool CombinerMuxesOnMsb; - bool CombinerHasUniqueC0; - bool CombinerHasUniqueC1; - - int StartPos; - - PSH_RECOMPILED_SHADER Recompiled = {}; - - void SetPSVersion(const uint32_t PSVersion); - - std::string ToString(); - void Log(const char *PhaseStr); - PPSH_INTERMEDIATE_FORMAT NewIntermediate(); - void InsertIntermediate(PPSH_INTERMEDIATE_FORMAT pIntermediate, int Index); - void DeleteIntermediate(int Index); - void DeleteLastIntermediate(); - std::string static OriginalToString(xbox::X_D3DPIXELSHADERDEF *pPSDef); - void Decode(xbox::X_D3DPIXELSHADERDEF *pPSDef); - PSH_RECOMPILED_SHADER Convert(xbox::X_D3DPIXELSHADERDEF *pPSDef); - std::string DecodedToString(xbox::X_D3DPIXELSHADERDEF *pPSDef); - bool _NextIs2D(int Stage); - bool DecodeTextureModes(xbox::X_D3DPIXELSHADERDEF *pPSDef); - int GetTextureStageModifiers(int Stage); - void InsertTex3x2Instructions(int Stage, int inputStage, std::vector& InsertIns); - void InsertTex3x3Instructions(int Stage, int inputStage, std::vector& InsertIns); - bool InsertTextureModeInstruction(xbox::X_D3DPIXELSHADERDEF *pPSDef, int Stage, PSH_OPCODE opcode, std::vector& InsertIns, int& InsertPos); - bool MoveRemovableParametersRight(); - void ConvertXboxOpcodesToNative(xbox::X_D3DPIXELSHADERDEF *pPSDef); - void _SetColor(/*var OUT*/PSH_INTERMEDIATE_FORMAT &NewIns, D3DCOLOR ConstColor); - void _SetColor(/*var OUT*/PSH_INTERMEDIATE_FORMAT &NewIns, D3DCOLORVALUE ConstColor); - bool ConvertConstantsToNative(xbox::X_D3DPIXELSHADERDEF *pPSDef, /*var OUT*/PSH_RECOMPILED_SHADER *Recompiled); - bool RemoveUselessWrites(); - int MaxRegisterCount(PSH_ARGUMENT_TYPE aRegType); - bool IsValidNativeOutputRegister(PSH_ARGUMENT_TYPE aRegType, int index = -1); - int RegisterIsFreeFromIndexUntil(int aIndex, PSH_ARGUMENT_TYPE aRegType, int16_t aAddress); - int RegisterIsUsedFromIndexUntil(int aIndex, PSH_ARGUMENT_TYPE aRegType, int16_t aAddress); - int NextFreeRegisterFromIndexUntil(int aIndex, PSH_ARGUMENT_TYPE aRegType, int bIndex = -1, int startAddress = 0, int excludeAddress = -1); - bool IsRegisterFreeFromIndexOnwards(int aIndex, PSH_ARGUMENT_TYPE aRegType, int16_t aAddress); - void ReplaceInputRegisterFromIndexOnwards(int aIndex, - PSH_ARGUMENT_TYPE aSrcRegType, int16_t aSrcAddress, - PSH_ARGUMENT_TYPE aDstRegType, int16_t aDstAddress, int endIndex = -1); - void ReplaceOutputRegisterFromIndexOnwards(int aIndex, - PSH_ARGUMENT_TYPE aSrcRegType, int16_t aSrcAddress, - PSH_ARGUMENT_TYPE aDstRegType, int16_t aDstAddress, int endIndex = -1); - void ReplaceRegisterFromIndexOnwards(int aIndex, - PSH_ARGUMENT_TYPE aSrcRegType, int16_t aSrcAddress, - PSH_ARGUMENT_TYPE aDstRegType, int16_t aDstAddress, int endIndex = -1, bool replaceInput = true, bool replaceOutput = true); - bool ConvertXMMToNative_Except3RdOutput(int i); - void ConvertXPSToNative(int i); - void ConvertXMMAToNative(int i); - void ConvertXMMCToNative(int i); - void ConvertXDMToNative(int i); - void ConvertXDDToNative(int i); - void ConvertXFCToNative(int i); - bool FixArgumentModifiers(); - bool CombineInstructions(); - bool RemoveNops(); - bool SimplifyMOV(PPSH_INTERMEDIATE_FORMAT Cur); - bool SimplifyADD(PPSH_INTERMEDIATE_FORMAT Cur); - bool SimplifyMAD(PPSH_INTERMEDIATE_FORMAT Cur, int index); - bool SimplifySUB(PPSH_INTERMEDIATE_FORMAT Cur); - bool SimplifyMUL(PPSH_INTERMEDIATE_FORMAT Cur); - bool SimplifyLRP(PPSH_INTERMEDIATE_FORMAT Cur, int index); - bool FixupCND(PPSH_INTERMEDIATE_FORMAT Cur, int index); - bool FixupPixelShader(); - bool FixInvalidSrcSwizzle(); - bool FixMissingR0a(); - bool FixMissingR1a(); - bool FixCoIssuedOpcodes(); - bool FixInvalidDstRegister(); - bool FixConstantParameters(); - bool FixInstructionModifiers(); - bool FixUninitializedReads(); - bool FixOverusedRegisters(); - bool FinalizeShader(); - - static void GetPSTextureModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]); - static void GetPSDotMapping(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_DOTMAPPING psDotMapping[xbox::X_D3DTS_STAGECOUNT]); - static void GetPSCompareModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, DWORD psCompareModes[xbox::X_D3DTS_STAGECOUNT]); - static void GetPSInputTexture(xbox::X_D3DPIXELSHADERDEF* pPSDef, int psInputTexture[xbox::X_D3DTS_STAGECOUNT]); -}; - -/* -* Blueshogun's code (useful for debugging the PixelShader binary format) -*/ - -// PS Texture Modes -char* PS_TextureModesStr[/*PS_TEXTUREMODES*/] = -{ - "PS_TEXTUREMODES_NONE", // 0x00 - "PS_TEXTUREMODES_PROJECT2D", // 0x01 - "PS_TEXTUREMODES_PROJECT3D", // 0x02 - "PS_TEXTUREMODES_CUBEMAP", // 0x03 - "PS_TEXTUREMODES_PASSTHRU", // 0x04 - "PS_TEXTUREMODES_CLIPPLANE", // 0x05 - "PS_TEXTUREMODES_BUMPENVMAP", // 0x06 - "PS_TEXTUREMODES_BUMPENVMAP_LUM", // 0x07 - "PS_TEXTUREMODES_BRDF", // 0x08 - "PS_TEXTUREMODES_DOT_ST", // 0x09 - "PS_TEXTUREMODES_DOT_ZW", // 0x0A - "PS_TEXTUREMODES_DOT_RFLCT_DIFF", // 0x0B - "PS_TEXTUREMODES_DOT_RFLCT_SPEC", // 0x0C - "PS_TEXTUREMODES_DOT_STR_3D", // 0x0D - "PS_TEXTUREMODES_DOT_STR_CUBE", // 0x0E - "PS_TEXTUREMODES_DPNDNT_AR", // 0x0F - "PS_TEXTUREMODES_DPNDNT_GB", // 0x10 - "PS_TEXTUREMODES_DOTPRODUCT", // 0x11 - "PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST", // 0x12 - "???", // 0x13 - "???", // 0x14 - "???", // 0x15 - "???", // 0x16 - "???", // 0x17 - "???", // 0x18 - "???", // 0x19 - "???", // 0x1A - "???", // 0x1B - "???", // 0x1C - "???", // 0x1D - "???", // 0x1E - "???", // 0x1F -}; - -// PS DotMapping -char* PS_DotMappingStr[/*PS_DOTMAPPING*/] = -{ - "PS_DOTMAPPING_ZERO_TO_ONE", // 0x00 - "PS_DOTMAPPING_MINUS1_TO_1_D3D", // 0x01 - "PS_DOTMAPPING_MINUS1_TO_1_GL", // 0x02 - "PS_DOTMAPPING_MINUS1_TO_1", // 0x03 - "PS_DOTMAPPING_HILO_1", // 0x04 - "???", // 0x05 - "???", // 0x06 - "PS_DOTMAPPING_HILO_HEMISPHERE", // 0x07 -}; - -#if 1 // array unusable for bitflags -// PS CompareMode -char* PS_CompareModeStr[/*PS_COMPAREMODE*/] = -{ - "PS_COMPAREMODE_S_LT", // 0x00L - "PS_COMPAREMODE_S_GE", // 0x01L - - "PS_COMPAREMODE_T_LT", // 0x00L - "PS_COMPAREMODE_T_GE", // 0x02L - - "???", - - "PS_COMPAREMODE_R_LT", // 0x00L - "PS_COMPAREMODE_R_GE", // 0x04L - - "???", - "???", - "???", - - "PS_COMPAREMODE_Q_LT", // 0x00L - "PS_COMPAREMODE_Q_GE", // 0x08L -}; -#endif - -#if 1 // array unfit for bitflags -// PS CombinerCountFlags -char* PS_CombinerCountFlagsStr[/*PS_COMBINERCOUNTFLAGS*/] = -{ - "PS_COMBINERCOUNT_MUX_LSB", // 0x0000L, // mux on r0.a lsb - "PS_COMBINERCOUNT_MUX_MSB", // 0x0001L, // mux on r0.a msb - - "PS_COMBINERCOUNT_SAME_C0", // 0x0000L, // c0 same in each stage - "PS_COMBINERCOUNT_UNIQUE_C0", // 0x0010L, // c0 unique in each stage - - "PS_COMBINERCOUNT_SAME_C1", // 0x0000L, // c1 same in each stage - "PS_COMBINERCOUNT_UNIQUE_C1", // 0x0100L // c1 unique in each stage -}; -#endif - -// PS InputMapping -std::string PS_InputMappingStr[/*PS_INPUTMAPPING*/] = -{ - "PS_INPUTMAPPING_UNSIGNED_IDENTITY", // 0x00L, // max(0,x) OK for final combiner: y = abs(x) - "PS_INPUTMAPPING_UNSIGNED_INVERT", // 0x20L, // 1 - max(0,x) OK for final combiner: y = 1 - x - "PS_INPUTMAPPING_EXPAND_NORMAL", // 0x40L, // 2*max(0,x) - 1 invalid for final combiner - "PS_INPUTMAPPING_EXPAND_NEGATE", // 0x60L, // 1 - 2*max(0,x) invalid for final combiner - "PS_INPUTMAPPING_HALFBIAS_NORMAL", // 0x80L, // max(0,x) - 1/2 invalid for final combiner - "PS_INPUTMAPPING_HALFBIAS_NEGATE", // 0xa0L, // 1/2 - max(0,x) invalid for final combiner - "PS_INPUTMAPPING_SIGNED_IDENTITY", // 0xc0L, // x invalid for final combiner - "PS_INPUTMAPPING_SIGNED_NEGATE", // 0xe0L, // -x invalid for final combiner -}; - -// PS Register (note, a few have one space, to line up the output a little) -std::string PS_RegisterStr[/*PS_REGISTER*/] = -{ - "PS_REGISTER_ZERO", // 0x00L, // r - "PS_REGISTER_DISCARD", // 0x00L, // w - "PS_REGISTER_C0 ", // 0x01L, // r - "PS_REGISTER_C1 ", // 0x02L, // r - "PS_REGISTER_FOG", // 0x03L, // r - "PS_REGISTER_V0 ", // 0x04L, // r/w - "PS_REGISTER_V1 ", // 0x05L, // r/w - "??", // 0x06 - "??", // 0x07 - "PS_REGISTER_T0 ", // 0x08L, // r/w - "PS_REGISTER_T1 ", // 0x09L, // r/w - "PS_REGISTER_T2 ", // 0x0aL, // r/w - "PS_REGISTER_T3 ", // 0x0bL, // r/w - "PS_REGISTER_R0 ", // 0x0cL, // r/w - "PS_REGISTER_R1 ", // 0x0dL, // r/w - "PS_REGISTER_V1R0_SUM", // 0x0eL, // r - "PS_REGISTER_EF_PROD", // 0x0fL, // r - - "PS_REGISTER_ONE", // PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // OK for final combiner - "PS_REGISTER_NEGATIVE_ONE", // PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL, // invalid for final combiner - "PS_REGISTER_ONE_HALF", // PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NEGATE, // invalid for final combiner - "PS_REGISTER_NEGATIVE_ONE_HALF" // PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NORMAL, // invalid for final combiner -}; - -// PS Channel -char* PS_ChannelStr[/*PS_CHANNEL*/] = -{ - "PS_CHANNEL_RGB", // 0x00, // used as RGB source - "PS_CHANNEL_BLUE", // 0x00, // used as ALPHA source - "PS_CHANNEL_ALPHA", // 0x10, // used as RGB or ALPHA source -}; - -// PS FinalCombinerSetting -char* PS_FinalCombinerSettingStr[/*PS_FINALCOMBINERSETTING*/] = -{ - "PS_FINALCOMBINERSETTING_CLAMP_SUM", // 0x80, // V1+R0 sum clamped to [0,1] - "PS_FINALCOMBINERSETTING_COMPLEMENT_V1", // 0x40, // unsigned invert mapping - "PS_FINALCOMBINERSETTING_COMPLEMENT_R0", // 0x20, // unsigned invert mapping -}; - -// PS CombineOutput -char* PS_CombineOutputStr[/*PS_COMBINEROUTPUT*/] = -{ - "PS_COMBINEROUTPUT_IDENTITY", // 0x00L, // y = x - "PS_COMBINEROUTPUT_BIAS", // 0x08L, // y = x - 0.5 - "PS_COMBINEROUTPUT_SHIFTLEFT_1", // 0x10L, // y = x*2 - "PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS", // 0x18L, // y = (x - 0.5)*2 = x*2 - 1.0 - "PS_COMBINEROUTPUT_SHIFTLEFT_2", // 0x20L, // y = x*4 - "PS_COMBINEROUTPUT_SHIFTRIGHT_1", // 0x30L, // y = x/2 = x*0.5 - - "PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA", // 0x80L, // RGB only - - "PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA", // 0x40L, // RGB only - - "PS_COMBINEROUTPUT_AB_MULTIPLY", // 0x00L, - "PS_COMBINEROUTPUT_AB_DOT_PRODUCT", // 0x02L, // RGB only - - "PS_COMBINEROUTPUT_CD_MULTIPLY", // 0x00L, - "PS_COMBINEROUTPUT_CD_DOT_PRODUCT", // 0x01L, // RGB only - - "PS_COMBINEROUTPUT_AB_CD_SUM", // 0x00L, // 3rd output is AB+CD - "PS_COMBINEROUTPUT_AB_CD_MUX", // 0x04L, // 3rd output is MUX(AB,CD) based on R0.a -}; - -// PS GlobalFlags -char* PS_GlobalFlagsStr[/*PS_GLOBALFLAGS*/] = -{ - "PS_GLOBALFLAGS_NO_TEXMODE_ADJUST", // 0x0000L, // don't adjust texture modes - "PS_GLOBALFLAGS_TEXMODE_ADJUST", // 0x0001L, // adjust texture modes according to set texture -}; - -const int CONST_NEG_ONE = -2; -const int CONST_NEG_HALF = -1; -const int CONST_ZERO = 0; -const int CONST_POS_HALF = 1; // Note : Instead of 0.5 we use 1 (so we can keep using integers) -const int CONST_POS_ONE = 2; - -/// - -std::string PSCombinerOutputFlagsToStr(const DWORD dwFlags, bool aIsAlpha = false) -{ - std::string Result = PS_CombineOutputStr[0 + ((dwFlags & 0x38) >> 3)]; - Result = Result + " | " + PS_CombineOutputStr[8 + ((dwFlags & PS_COMBINEROUTPUT_AB_DOT_PRODUCT) >> 1)]; - Result = Result + " | " + PS_CombineOutputStr[10 + ((dwFlags & PS_COMBINEROUTPUT_CD_DOT_PRODUCT) >> 0)]; - Result = Result + " | " + PS_CombineOutputStr[12 + ((dwFlags & PS_COMBINEROUTPUT_AB_CD_MUX) >> 2)]; - - if (!aIsAlpha) { - if (dwFlags & PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA) - Result = Result + " | " + PS_CombineOutputStr[6]; - - if (dwFlags & PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA) - Result = Result + " | " + PS_CombineOutputStr[7]; - } - - return Result; -} - -std::string PSFinalCombinerSettingToStr(const DWORD dwPS_FINALCOMBINERSETTING) -{ - std::string Result = ""; - if (dwPS_FINALCOMBINERSETTING & PS_FINALCOMBINERSETTING_CLAMP_SUM) - Result = Result + " | " + PS_FinalCombinerSettingStr[0]; - - if (dwPS_FINALCOMBINERSETTING & PS_FINALCOMBINERSETTING_COMPLEMENT_V1) - Result = Result + " | " + PS_FinalCombinerSettingStr[1]; - - if (dwPS_FINALCOMBINERSETTING & PS_FINALCOMBINERSETTING_COMPLEMENT_R0) - Result = Result + " | " + PS_FinalCombinerSettingStr[2]; - - if (!Result.empty()) - Result.erase(0, 3); - - return Result; -} - -/* PSH_IMD_ARGUMENT */ - -void PSH_IMD_ARGUMENT::SetConstValue(float Value) -{ - Type = PARAM_VALUE; - Address = CONST_ZERO; - Multiplier = Value; - Modifiers = 0; -} - -float PSH_IMD_ARGUMENT::GetConstValue() -{ - if (Type != PARAM_VALUE) { - // Anything other than a value-parameter returns a value never checked for : - return INFINITY; - } - - float Result = Multiplier; - - // y = 1-x -> 0..1 > 1..0 - if (HasModifier(ARGMOD_INVERT)) Result = 1.0f-Result; - - // y = -x -> 0..1 > 0..-1 - if (HasModifier(ARGMOD_NEGATE)) Result = -Result; - - // y = x-0.5 -> 0..1 > -0.5..0.5 - if (HasModifier(ARGMOD_BIAS)) Result = Result-0.5f; - - // y = x*2 -> 0..1 > 0..2 - if (HasModifier(ARGMOD_SCALE_X2)) Result = Result*2.0f; - - // y = (x*2)-1 -> 0..1 > -1..1 - if (HasModifier(ARGMOD_SCALE_BX2)) Result = (Result*2.0f)-1.0f; - - // y = x*4 -> 0..1 > 0..4 - if (HasModifier(ARGMOD_SCALE_X4)) Result = Result*4.0f; - - // y = x/2 -> 0..1 > 0..0.5 - if (HasModifier(ARGMOD_SCALE_D2)) Result = Result/2.0f; - - return Result; -} // GetConstValue - -bool PSH_IMD_ARGUMENT::UsesRegister() -{ - return (Type > PARAM_DISCARD); -} - -bool PSH_IMD_ARGUMENT::IsRegister(PSH_ARGUMENT_TYPE aRegType, int16_t aAddress) -{ - return (Type == aRegType) - && (Address == aAddress || aAddress == -1); -} - -bool PSH_IMD_ARGUMENT::IsRegister(PSH_ARGUMENT_TYPE aRegType, int16_t aAddress, DWORD aMask) -{ - return IsRegister(aRegType, aAddress) - // Check the mask itself, but also 'mask-less' : - && (((Mask & aMask) == aMask) || (Mask == 0)); -} - -void PSH_IMD_ARGUMENT::SetRegister(PSH_ARGUMENT_TYPE aRegType, int16_t aAddress, DWORD aMask) -{ - Type = aRegType; - Address = aAddress; - Mask = aMask; -} - -bool PSH_IMD_ARGUMENT::HasModifier(PSH_ARG_MODIFIER modifier) -{ - return (Modifiers & (1 << modifier)) != 0; -} - -bool PSH_IMD_ARGUMENT::SetScaleConstRegister(float factor, const PSH_RECOMPILED_SHADER& pRecompiled) -{ - PSH_ARG_MODIFIERs modifiers = 0; - DWORD mask = Mask; - int address = Address; - - const int mappedConstant0 = PSH_XBOX_CONSTANT_MUL0; - const int mappedConstant1 = PSH_XBOX_CONSTANT_MUL1; - - if (factor < 0.0f) - { - factor = -factor; - modifiers = (1 << ARGMOD_NEGATE); - // This inversion is here to support negative scales, but it's not an actual match yet. - } - - // Note : 'switch(factor)' can't be used here, since that requires an ordinal value (and factor is a float) - if (factor == 1.0f) - { - address = mappedConstant0; - mask = MASK_R; - } - - else if (factor == 2.0f) - { - address = mappedConstant0; - mask = MASK_G; - } - - else if (factor == 4.0f) - { - address = mappedConstant0; - mask = MASK_B; - } - - else if (factor == 8.0f) - { - address = mappedConstant0; - mask = MASK_A; - } - - else if (factor == 0.0f) - { - address = mappedConstant1; - mask = MASK_R; - } - - else if (factor == 1.0f / 2.0f) - { - address = mappedConstant1; - mask = MASK_G; - } - - else if (factor == 1.0f / 4.0f) - { - address = mappedConstant1; - mask = MASK_B; - } - - else if (factor == 1.0f / 8.0f) - { - address = mappedConstant1; - mask = MASK_A; - } - else return false; - - Type = PARAM_C; - Address = address; - Mask = mask; - Modifiers = modifiers; - Multiplier = 1.0f; - - return true; -} - -bool PSH_IMD_ARGUMENT::SetScaleBemLumRegister(D3DTEXTURESTAGESTATETYPE factor, int stage, const PSH_RECOMPILED_SHADER& pRecompiled) -{ - const PSH_ARG_MODIFIERs modifiers = 0; - DWORD mask = Mask; - int address = Address; - - const int mappedConstant0 = PSH_XBOX_CONSTANT_BEM + stage; - const int mappedConstant1 = PSH_XBOX_CONSTANT_LUM + stage; - - switch (factor) - { - case D3DTSS_BUMPENVMAT00: - { - address = mappedConstant0; - mask = MASK_R; - break; - } - case D3DTSS_BUMPENVMAT01: - { - address = mappedConstant0; - mask = MASK_G; - break; - } - case D3DTSS_BUMPENVMAT11: - { - address = mappedConstant0; - mask = MASK_B; - break; - } - case D3DTSS_BUMPENVMAT10: - { - address = mappedConstant0; - mask = MASK_A; - break; - } - case D3DTSS_BUMPENVLSCALE: - { - address = mappedConstant1; - mask = MASK_R; - break; - } - case D3DTSS_BUMPENVLOFFSET: - { - address = mappedConstant1; - mask = MASK_G; - break; - } - default: return false; - } - - Type = PARAM_C; - Address = address; - Mask = mask; - Modifiers = modifiers; - Multiplier = 1.0f; - - return true; -} - -std::string PSH_IMD_ARGUMENT::ToString() -{ - std::string Result; - - if (Type == PARAM_VALUE) - { - Result = std::to_string(GetConstValue()); - if (Result.find(".") > 0) - Result = Result + 'f'; - - return Result; - } - - Result = PSH_ARGUMENT_TYPE_Str[Type]; - - if (Type >= PARAM_R) - Result = Result + std::to_string(Address); - - if (UsesRegister()) - { - for (DWORD Modifier = ARGMOD_IDENTITY; Modifier < ARGMOD_BLUE_REPLICATE; Modifier++) - if (HasModifier((PSH_ARG_MODIFIER)Modifier)) { - char buffer[256]; - Result = std::string(buffer, sprintf(buffer, PSH_ARG_MODIFIER_Str[Modifier], Result.c_str())); - } - - if ((Mask > 0) && (Mask != MASK_RGBA)) - { - Result = Result + '.'; - if ((Mask & MASK_R) > 0) Result = Result + 'r'; - if ((Mask & MASK_G) > 0) Result = Result + 'g'; - if ((Mask & MASK_B) > 0) Result = Result + 'b'; - if ((Mask & MASK_A) > 0) Result = Result + 'a'; - } - } - return Result; -} // ToString - -bool PSH_IMD_ARGUMENT::Decode(const DWORD Value, DWORD aMask, TArgumentType ArgumentType) -{ - PS_REGISTER Reg; - PS_INPUTMAPPING InputMapping; - PS_CHANNEL Channel; - - bool Result = true; - Address = 0; - Mask = aMask; - Modifiers = (1 << ARGMOD_IDENTITY); - Multiplier = 1.0; - - // Determine PS_REGISTER for this argument type : - { - Reg = (PS_REGISTER)(Value & 0xF); - if (ArgumentType == atOutput) - { - // Output arguments may not write to C0 or C1, prevent that : - if ((Reg == PS_REGISTER_C0) || (Reg == PS_REGISTER_C1)) - Reg = PS_REGISTER_CXBX_PROD; // unhandled case - will reach "invalid" else-block - } - else - { - // Input arguments (normal or final combiners) can use the extended PS_REGISTER values : - if (Reg == PS_REGISTER_ZERO) - Reg = (PS_REGISTER)(Value & 0xE0); - - // 'Signed Identity' flag on PS_REGISTER_ZERO has no meaning, treat as zero : - if (Reg == PS_REGISTER_CXBX_PROD) - Reg = PS_REGISTER_ZERO; - - // Prevent decoding final combiner registers outside that mode : - if (ArgumentType != atFinalCombiner) - if ((Reg == PS_REGISTER_FOG) || (Reg == PS_REGISTER_V1R0_SUM) || (Reg == PS_REGISTER_EF_PROD)) - Reg = PS_REGISTER_CXBX_PROD; // unhandled case - will reach "invalid" else-block - } - } - - switch (Reg) { - case PS_REGISTER_ZERO: - { - if (ArgumentType == atOutput) - { - // Mark output arguments as 'discard' and return that fact : - Type = PARAM_DISCARD; - Result = false; - } - else - Type = PARAM_VALUE; - - Address = CONST_ZERO; - Multiplier = 0.0f; - break; - } - case PS_REGISTER_C0: - Type = PARAM_C; - break; - case PS_REGISTER_C1: - { - Type = PARAM_C; - Address = 1; - break; - } - case PS_REGISTER_V0: - Type = PARAM_V; - break; - case PS_REGISTER_V1: - { - Type = PARAM_V; - Address = 1; - break; - } - case PS_REGISTER_T0: - Type = PARAM_T; - break; - case PS_REGISTER_T1: - { - Type = PARAM_T; - Address = 1; - break; - } - case PS_REGISTER_T2: - { - Type = PARAM_T; - Address = 2; - break; - } - case PS_REGISTER_T3: - { - Type = PARAM_T; - Address = 3; - break; - } - case PS_REGISTER_R0: - Type = PARAM_R; - break; - case PS_REGISTER_R1: - { - Type = PARAM_R; - Address = 1; - break; - } - // Registers only available when ArgumentType != atOutput (Reg is capped otherwise) : - case PS_REGISTER_ONE: - { - Type = PARAM_VALUE; - Address = CONST_POS_ONE; - Multiplier = 1.0f; - break; - } - case PS_REGISTER_NEGATIVE_ONE: - { - Type = PARAM_VALUE; - Address = CONST_NEG_ONE; - Multiplier = -1.0f; - break; - } - case PS_REGISTER_ONE_HALF: - { - Type = PARAM_VALUE; - Address = CONST_POS_HALF; - Multiplier = 0.5f; - break; - } - case PS_REGISTER_NEGATIVE_ONE_HALF: - { - Type = PARAM_VALUE; - Address = CONST_NEG_HALF; - Multiplier = -0.5f; - break; - } - // Registers only available when ArgumentType == atFinalCombiner (Reg is capped otherwise) : - case PS_REGISTER_FOG: - Type = PARAM_FOG; - break; - case PS_REGISTER_V1R0_SUM: - Type = PARAM_V1R0_SUM; - break; - case PS_REGISTER_EF_PROD: - Type = PARAM_EF_PROD; - break; - default : - EmuLog(LOG_LEVEL::DEBUG, "INVALID ARGUMENT!"); - - Result = false; - } - - // We're done if this decoding is meant for output parameters, - // or when the input is a value-parameter (already read above) : - if ((ArgumentType == atOutput) - || (Type == PARAM_VALUE) ) - return Result; - - // Handle the Channel Designator : - { - Channel = (PS_CHANNEL)(Value & PS_CHANNEL_ALPHA); - if (Channel == PS_CHANNEL_ALPHA) - // Input comes from alpha portion of input register (valid for both RGB and alpha portions) : - Mask = MASK_A; - else // = PS_CHANNEL_BLUE (for Alpha step) = PS_CHANNEL_BLUE (for RGB step) : - if (aMask == MASK_A) - // Input comes from b portion of input register (valid for alpha portion only) : - Mask = MASK_B; // Note : This is not the same as ARGMOD_BLUE_REPLICATE! - else - // Input comes from the RGB portion of the input register (valid for RGB portion only) : - Mask = aMask; // Note : Is already put here, but makes this code clearer - } - - InputMapping = (PS_INPUTMAPPING)(Value & 0xe0); - -// ARGMOD_BIAS, -// -// ARGMOD_SCALE_X2, ARGMOD_SCALE_BX2, ARGMOD_SCALE_X4, ARGMOD_SCALE_D2, -// -// ARGMOD_SATURATE, -// -// ARGMOD_ALPHA_REPLICATE, ARGMOD_BLUE_REPLICATE]; - - switch (InputMapping) { - case PS_INPUTMAPPING_UNSIGNED_IDENTITY: - Modifiers = (1 << ARGMOD_IDENTITY); - break; - case PS_INPUTMAPPING_UNSIGNED_INVERT: - Modifiers = (1 << ARGMOD_INVERT); - break; - case PS_INPUTMAPPING_EXPAND_NORMAL: - { - Modifiers = (1 << ARGMOD_SCALE_BX2); - Multiplier = 2.0f * Multiplier; - break; - } - case PS_INPUTMAPPING_EXPAND_NEGATE: - { - Modifiers = (1 << ARGMOD_NEGATE); - Multiplier = -Multiplier; - break; - } - case PS_INPUTMAPPING_HALFBIAS_NORMAL: - Modifiers = (1 << ARGMOD_BIAS); - break; -// case PS_INPUTMAPPING_HALFBIAS_NEGATE: -// Modifiers = (1 << ARGMOD_IDENTITY); ??? -// break; - case PS_INPUTMAPPING_SIGNED_IDENTITY: - Modifiers = (1 << ARGMOD_IDENTITY); - break; - case PS_INPUTMAPPING_SIGNED_NEGATE: - { - Modifiers = (1 << ARGMOD_NEGATE); - Multiplier = -Multiplier; - break; - } - } - return Result; -} // Decode - -void PSH_IMD_ARGUMENT::Invert() -{ - if (!HasModifier(ARGMOD_INVERT)) - Modifiers = Modifiers | (1 << ARGMOD_INVERT); - else - Modifiers = Modifiers & ~(1 << ARGMOD_INVERT); -} - -void PSH_IMD_ARGUMENT::Negate() -{ - if (!HasModifier(ARGMOD_NEGATE)) - Modifiers = Modifiers | (1 << ARGMOD_NEGATE); - else - Modifiers = Modifiers & ~(1 << ARGMOD_NEGATE); -} - -/* PSH_INTERMEDIATE_FORMAT */ - -_PSH_INTERMEDIATE_FORMAT *PSH_INTERMEDIATE_FORMAT::Initialize(const PSH_OPCODE aOpcode) -{ - int i; - - Opcode = aOpcode; - Modifier = INSMOD_NONE; - for (i = 0; i < 3; i++) - { - Output[i] = {}; - Output[i].Multiplier = 1.0f; - } - for (i = 0; i < 7; i++) - { - Parameters[i] = {}; - Parameters[i].Multiplier = 1.0f; - } - - return this; -} - -std::string PSH_INTERMEDIATE_FORMAT::ToString() -{ - std::string Result = {}; - int i; - char SeparatorChar; - - switch (Opcode) { - case PO_COMMENT: - { - Result = "; " + CommentString; - return Result; - } - case PO_PS: { - // 1.1 allows reading from 2 textures (which we use in 'cnd') and reading from the .b (blue) channel - // 1.3 allows the use of texm3x2depth (which can occur sometimes) - // 2.0 allows up to r12, c32, t8 and s16 (requires Direct3D9) - // 3.0 allows up to r32, c224, v10 (instead of t via dcl), s16 and vFace (which can do two-sided lighting) - - // Use supplied pixel shader version (if any is given) - DWORD PSVersion = Parameters[6].Mask; - - Result = "ps_" + std::to_string(D3DSHADER_VERSION_MAJOR(PSVersion)) - + "_" + std::to_string(D3DSHADER_VERSION_MINOR(PSVersion)); - return Result; - } - case PO_XPS: { - Result = "xps.1.1"; - return Result; - } - } - - if (IsCombined) - Result = "+"; - else - Result = ""; - - Result = Result + PSH_OPCODE_DEFS[Opcode].mn + PSH_INST_MODIFIER_Str[Modifier]; - - // Output a comma-separated list of output registers : - SeparatorChar = ' '; - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._Out; i++) - { - Result = Result + SeparatorChar + Output[i].ToString(); - SeparatorChar = ','; - } - - // If this opcode has both output and input, put a space between them : - if ((PSH_OPCODE_DEFS[Opcode]._Out > 0) && (PSH_OPCODE_DEFS[Opcode]._In > 0)) - { - Result = Result + ","; - SeparatorChar = ' '; - } - - // Output a comma-separated list of parameters : - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._In; i++) - { - Result = Result + SeparatorChar + Parameters[i].ToString(); - SeparatorChar = ','; - } - - if ((!CommentString.empty()) - || (PSH_OPCODE_DEFS[Opcode].note != "")) - Result = Result + " ; " + PSH_OPCODE_DEFS[Opcode].note + " " + CommentString; - - return Result; -} // ToString - -bool PSH_INTERMEDIATE_FORMAT::IsArithmetic() -{ - return (Opcode >= PO_ADD); -} - -void PSH_INTERMEDIATE_FORMAT::ScaleOutput(float aFactor) -{ - assert(aFactor > 0.0f); - - if (aFactor == 1.0f) - return; - - if (aFactor == 0.5f) - { - // Half the output modifier : - switch (Modifier) { - case INSMOD_X8: - Modifier = INSMOD_X4; - break; - case INSMOD_X4: - Modifier = INSMOD_X2; - break; - case INSMOD_X2: - Modifier = INSMOD_NONE; - break; - case INSMOD_NONE: - Modifier = INSMOD_D2; - break; - case INSMOD_D2: - Modifier = INSMOD_D4; - break; - case INSMOD_D4: - Modifier = INSMOD_D8; - break; - } - - return; - } - - if (aFactor == 2.0f) - { - // Double the output modifier : - switch (Modifier) { - case INSMOD_D8: - Modifier = INSMOD_D4; - break; - case INSMOD_D4: - Modifier = INSMOD_D2; - break; - case INSMOD_D2: - Modifier = INSMOD_NONE; - break; - case INSMOD_NONE: - Modifier = INSMOD_X2; - break; - case INSMOD_X2: - Modifier = INSMOD_X4; - break; - case INSMOD_X4: - Modifier = INSMOD_X8; - break; - } - - return; - } -} - -bool PSH_INTERMEDIATE_FORMAT::ReadsFromRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress) // overload; -{ - int i; - bool Result; - - // Check all parameters : - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._In; i++) - { - // Check if one of them reads from the given register : - Result = Parameters[i].IsRegister(aRegType, aAddress); - if (Result) - return true; - } - - return false; -} - -bool PSH_INTERMEDIATE_FORMAT::ReadsFromRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress, DWORD aMask) // overload; -{ - int i; - bool Result; - - // Check all parameters : - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._In; i++) - { - // Check if one of them reads from the given register : - Result = Parameters[i].IsRegister(aRegType, aAddress, aMask); - if (Result) - return true; - } - - return false; -} - -// Used to determine the number of accesses to a register type within an instruction -// For use when determining register access limitations on certain instructions -// addressCount = the number of different registers read of the specified type -// total = the number of accesses to the spcified register type -bool PSH_INTERMEDIATE_FORMAT::ReadsFromRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress, int& addressCount, int& total) // overload; -{ - int i; - bool Result; - bool RegisterUsage[256] = { false }; - - addressCount = 0; - total = 0; - - // Check all parameters : - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._In; i++) - { - // Check if one of them reads from the given register : - Result = Parameters[i].IsRegister(aRegType, aAddress, 0); - if (Result) - { - ++total; - if (!RegisterUsage[Parameters[i].Address]) - { - RegisterUsage[Parameters[i].Address] = true; - ++addressCount; - } - } - } - - return total > 0; -} - -bool PSH_INTERMEDIATE_FORMAT::WritesToRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress) // overload; -{ - int i; - bool Result; - - // Check the output : - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._Out; i++) - { - // Check if one of them writes to the given register : - Result = Output[i].IsRegister(aRegType, aAddress); - if (Result) - return true; - } - - return false; -} - -bool PSH_INTERMEDIATE_FORMAT::WritesToRegister(PSH_ARGUMENT_TYPE aRegType, int aAddress, DWORD aMask) // overload; -{ - int i; - bool Result; - - // Check the output : - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._Out; i++) - { - // Check if one of them writes to the given register : - Result = Output[i].IsRegister(aRegType, aAddress, aMask); - if (Result) - return true; - } - - return false; -} - -void PSH_INTERMEDIATE_FORMAT::SwapParameter(const int Index1, const int Index2) -// Swaps two parameters. -{ - PSH_IMD_ARGUMENT TmpParameters; - - TmpParameters = Parameters[Index1]; - Parameters[Index1] = Parameters[Index2]; - Parameters[Index2] = TmpParameters; -} - -void PSH_INTERMEDIATE_FORMAT::XSwapOutput() -// Swaps the two outputs, along with their arguments. Applies only to Xbox opcodes. -{ - PSH_IMD_ARGUMENT TmpOutput; - - // Swap output 0 with 1 : - TmpOutput = Output[0]; - Output[0] = Output[1]; - Output[1] = TmpOutput; - - // Swap parameters 0 with 2 and 1 with 3 : - SwapParameter(0, 2); - SwapParameter(1, 3); -} - -bool PSH_INTERMEDIATE_FORMAT::MoveRemovableParametersRight(const int Index1, const int Index2) -// Swaps discarded (and const) parameters to the right position, to ease later conversions. -{ - bool Result = false; - - if ( (!Parameters[Index1].UsesRegister()) - && (Parameters[Index2].UsesRegister())) - { - SwapParameter(Index1, Index2); - Result = true; - } - return Result; -} - -bool PSH_INTERMEDIATE_FORMAT::XMoveNonRegisterOutputsRight() -// Swap discards and constants to the right position, to ease later conversions. Applies only to Xbox opcodes. -{ - bool Result = false; - - // First, check if the left output is discarded, while the second isn't : - if ( (!Output[0].UsesRegister()) - && (Output[1].UsesRegister())) - { - // Swap the outputs, so the discarded version is positioned rightmost : - XSwapOutput(); - Result = true; - } - - // Also try to swap the parameters to the first operation : - if (MoveRemovableParametersRight(0, 1)) - Result = true; - - // Idem for the parameters to second operation : - if (MoveRemovableParametersRight(2, 3)) - Result = true; - return Result; -} - -void PSH_INTERMEDIATE_FORMAT::XCopySecondOpcodeToFirst(const PSH_OPCODE aOpcode) -// Copies second opcode to first position, changing the opcode type on the fly. -{ - Opcode = aOpcode; - Output[0] = Output[1]; - Parameters[0] = Parameters[2]; - Parameters[1] = Parameters[3]; -} - -bool PSH_INTERMEDIATE_FORMAT::Decode(DWORD aCombinerStageNr, DWORD PSInputs, DWORD PSOutputs, DWORD aMask) -{ - DWORD CombinerOutputFlags; - int i; - - bool Result = false; - CombinerStageNr = aCombinerStageNr; - IsCombined = aMask == MASK_A; - - // Decode first two outputs : - if (Output[0].Decode((PSOutputs >> 4) & 0xF, aMask, atOutput)) - Result = true; - if (Output[1].Decode((PSOutputs >> 0) & 0xF, aMask, atOutput)) - Result = true; - - // Get the combiner output flags : - CombinerOutputFlags = (PS_COMBINEROUTPUT)(PSOutputs >> 12); - - // Use that to choose between the four possible operations : - // - xdd (dot/dot/discard) > calculating AB=A.B and CD=C.D - // - xdm (dot/mul/discard) > calculating AB=A.B and CD=C*D - // - xmmc (mul/mul/mux) > calculating AB=A*B and CD=C*D and Mux=AB?CD - // - xmma (mul/mul/sum) > calculating AB=A*B and CD=C*D and Sum=AB+CD - if ((CombinerOutputFlags & PS_COMBINEROUTPUT_AB_DOT_PRODUCT) > 0) // false=Multiply, true=DotProduct - { - if ((CombinerOutputFlags & PS_COMBINEROUTPUT_CD_DOT_PRODUCT) > 0) // false=Multiply, true=DotProduct - Opcode = PO_XDD; - else - Opcode = PO_XDM; - - // Note : All arguments are already in-place for these two opcodes. - - // No 3rd output; Assert that (PSOutputs >> 8) & 0xF == PS_REGISTER_DISCARD ? - } - else - if ((CombinerOutputFlags & PS_COMBINEROUTPUT_CD_DOT_PRODUCT) > 0) // false=Multiply, true=DotProduct - { - // The first operation is a multiply, but the second is a dot-product; - // There's no opcode for that, but we can reverse the two and still use XDM : - Opcode = PO_XDM; - XSwapOutput(); - - // No 3rd output; Assert that (PSOutputs >> 8) & 0xF == PS_REGISTER_DISCARD ? - } - else - { - if (/*AB_CD_SUM=*/(CombinerOutputFlags & PS_COMBINEROUTPUT_AB_CD_MUX) == 0) // true=AB+CD, false=MUX(AB,CD) based on R0.a - Opcode = PO_XMMA; - else - Opcode = PO_XMMC; - - // This has a 3rd output, set that already : - if (Output[2].Decode((PSOutputs >> 8) & 0xF, aMask, atOutput)) - Result = true; - } - - if (Result) - { - // Handle the Output Mapping : - switch (CombinerOutputFlags & 0x38) { - case PS_COMBINEROUTPUT_BIAS: Modifier = INSMOD_BIAS; break; // TODO : Fixup occurrances! - case PS_COMBINEROUTPUT_SHIFTLEFT_1: Modifier = INSMOD_X2; break; - case PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS: Modifier = INSMOD_BX2; break; // TODO : Fixup occurrances! - case PS_COMBINEROUTPUT_SHIFTLEFT_2: Modifier = INSMOD_X4; break; - case PS_COMBINEROUTPUT_SHIFTRIGHT_1: Modifier = INSMOD_D2; break; - default /*PS_COMBINEROUTPUT_IDENTITY*/: Modifier = INSMOD_NONE; break; - } - - if ((CombinerOutputFlags & PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA) > 0) // false=Alpha-to-Alpha, true=Blue-to-Alpha - { - // Note : The effect of this flag is not entirely clear - blue to alpha itself is an easy to understand operation, - // but on what output does it operate? AB? or the mux_sum destination register (which doesn't occur when a dot - // operation is executed)? What if AB is discarded, but AB+CD is registered? Also, what happens to the other - // color channels (R,G and A) in that register? The docs seem to imply that AB itself is not changed (as they - // state that the alpha portion is not necessarily discarded), which would mean that only the mux_sum output - // is influenced, but that would imply that this flag has no effect for dot-products (XDD or XDM)... - // And if this is true, how do the blue-to-alpha flags behave if present on both AB and CD? - - // TODO : Rayman does this in some shaders, requires a fixup (as output.b is incorrect and not allowed) - // TODO: Above may not be valid anymore, needs testing - Output[0].Modifiers = Output[0].Modifiers | (1 << ARGMOD_BLUE_REPLICATE); - CommentString += ", d0.a=d0.b"; - } - - if ((CombinerOutputFlags & PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA) > 0) // false=Alpha-to-Alpha, true=Blue-to-Alpha - { - Output[1].Modifiers = Output[1].Modifiers | (1 << ARGMOD_BLUE_REPLICATE); - CommentString += ", d1.a=d1.b"; - } - - // Decode all four inputs : - for (i = 0; i < PSH_OPCODE_DEFS[Opcode]._In; i++) - Parameters[i].Decode((PSInputs >> ((3-i) * 8)) & 0xFF, aMask, atInput); - } - return Result; -} // Decode - -bool PSH_INTERMEDIATE_FORMAT::DecodeFinalCombiner(DWORD aPSFinalCombinerInputsABCD, DWORD aPSFinalCombinerInputsEFG) -{ - int i; -// Note : The sign bit is lost upon input to the final combiner! - -// The final combiner performs the following operations : -// -// prod register = E*F // PS_REGISTER_EF_PROD, useable in A,B,C,D,G -// -// rgbout = A*B + (1-A)*C + D // lrp tmp.rgb, A, B, C // Note : tmp can be r0 if [A,B,C,D] * r0 = [] -// // add r0.rgb, tmp.rgb, D.rgb // Otherwise use a writable register from A;B or C -// -// alphaout = G.a // mov r0.a, G.a // Not necessary if G = r0 -// -// (also the final combiner can read PS_REGISTER_V1R0_SUM, which is equal to v1 + r0) -// Normal optimizations apply, like when A = PS_REGISTER_ZERO, all we have left is C + D (add r0.rgb, C.rgb, D.rgb) -// Also, if D = PS_REGISTER_ZERO, the add can be changed into a mov (if the result isn't already in r0.rgb) - - // Note : Previously, XSokoban lost it's font rendering when the final combiner was emitted, - // when disabled, the font reappeared (in various colors). This was because constants where - // not properly set locally. - - Opcode = PO_XFC; - CombinerStageNr = XFC_COMBINERSTAGENR; - - // Decode A,B,C and D : - for (i = 0; i < 4; i++) - Parameters[i].Decode((aPSFinalCombinerInputsABCD >> ((3-i) * 8)) & 0xFF, MASK_RGB/*?*/, atFinalCombiner); - - // Decode E,F and G : - for (i = 0; i < 3; i++) - Parameters[4+i].Decode((aPSFinalCombinerInputsEFG >> ((3-i) * 8)) & 0xFF, MASK_RGB/*?*/, atFinalCombiner); - - return true; -} - -/* PSH_XBOX_SHADER */ - -void PSH_XBOX_SHADER::SetPSVersion(const uint32_t PSVersion) -{ - m_PSVersion = PSVersion; - - // Source : https://en.wikipedia.org/wiki/High-Level_Shading_Language#Pixel_shader_comparison - if (m_PSVersion >= D3DPS_VERSION(4, 0)) { - MaxInputColorRegisters = 32; - MaxTemporaryRegisters = 4096; - MaxConstantFloatRegisters = 16*4096; - MaxSamplerRegisters = 16; - MaxTextureCoordinateRegisters = 0; // In shader model 4 and up, Dependent texture limit (T) is unlimited - // Note : Input Registers (v#) are now fully floating point and the Texture Coordinate Registers (t#) have been consolidated into it. - - PSH_PC_MAX_REGISTER_COUNT = 16 * 4096; - } - else if (m_PSVersion >= D3DPS_VERSION(3, 0)) { - // Source https://msdn.microsoft.com/en-us/library/windows/desktop/bb172920(v=vs.85).aspx - MaxInputColorRegisters = 10; - MaxTemporaryRegisters = 32; - MaxConstantFloatRegisters = 224; - MaxSamplerRegisters = 16; - MaxTextureCoordinateRegisters = 0; // In shader model 3 and up, Dependent texture limit (T) is unlimited - - PSH_PC_MAX_REGISTER_COUNT = 224; - } - else if (m_PSVersion >= D3DPS_VERSION(2, 0)) { - // Source https://msdn.microsoft.com/en-us/library/windows/desktop/bb172918(v=vs.85).aspx - MaxInputColorRegisters = 2; - MaxTemporaryRegisters = 12; // 12 min/32 max: The number of r# registers is determined by D3DCAPS9.D3DPSHADERCAPS2_0.NumTemps (which ranges from 12 to 32). - MaxConstantFloatRegisters = 32; - MaxSamplerRegisters = 16; - MaxTextureCoordinateRegisters = 8; - - PSH_PC_MAX_REGISTER_COUNT = 32; - } - else - assert(false); // We no longer support less than Direct3D 9 - /* For documentation purposes, keep the below information around : - else if (m_PSVersion >= D3DPS_VERSION(1, 4)) { - // Source https://msdn.microsoft.com/en-us/library/windows/desktop/bb172917(v=vs.85).aspx - MaxConstantFloatRegisters = 8; - MaxTemporaryRegisters = 6; - MaxTextureCoordinateRegisters = 4; - MaxInputColorRegisters = 2; // 2 in phase 2 - MaxSamplerRegisters = 0; // Not yet in shader model 1 - - PSH_PC_MAX_REGISTER_COUNT = 8; - } - else if (m_PSVersion >= D3DPS_VERSION(1, 3)) { - MaxConstantFloatRegisters = 8; - MaxTemporaryRegisters = 2; - MaxTextureCoordinateRegisters = 4; - MaxInputColorRegisters = 2; - MaxSamplerRegisters = 0; // Not yet in shader model 1 - - PSH_PC_MAX_REGISTER_COUNT = 8; - } - else if (m_PSVersion >= D3DPS_VERSION(1, 2)) { - MaxConstantFloatRegisters = 8; - MaxTemporaryRegisters = 2; - MaxTextureCoordinateRegisters = 4; - MaxInputColorRegisters = 2; - MaxSamplerRegisters = 0; // Not yet in shader model 1 - - PSH_PC_MAX_REGISTER_COUNT = 8; - } - else { - // m_PSVersion >= D3DPS_VERSION(1, 1) - MaxConstantFloatRegisters = 8; - MaxTemporaryRegisters = 2; - MaxTextureCoordinateRegisters = 4; // Some sources say 2? - MaxInputColorRegisters = 2; - MaxSamplerRegisters = 0; // Not yet in shader model 1 - - PSH_PC_MAX_REGISTER_COUNT = 8; - } */ -} - -std::string PSH_XBOX_SHADER::ToString() -{ - std::string Result; - int i; - - for (i = 0; i < IntermediateCount; i++) - Result = Result + Intermediate[i].ToString() + "\n"; - - return Result; -} - -void PSH_XBOX_SHADER::Log(const char *PhaseStr) -{ - //if (MayLog(lfUnit)) - { - EmuLog(LOG_LEVEL::DEBUG, "New decoding - %s :", PhaseStr); - EmuLog(LOG_LEVEL::DEBUG, "%s", ToString().c_str()); - } -} - -PPSH_INTERMEDIATE_FORMAT PSH_XBOX_SHADER::NewIntermediate() -{ - PPSH_INTERMEDIATE_FORMAT Result = &Intermediate[IntermediateCount]; - Result->Initialize(PO_COMMENT); - ++IntermediateCount; - return Result; -} - -void PSH_XBOX_SHADER::InsertIntermediate(PPSH_INTERMEDIATE_FORMAT pIntermediate, int Index) -{ - int i; - i = IntermediateCount - 1; - while (i >= Index) - { - Intermediate[i + 1] = Intermediate[i]; - --i; - } - - Intermediate[Index] = *pIntermediate; - ++IntermediateCount; -} - -void PSH_XBOX_SHADER::DeleteIntermediate(int Index) -{ - int i; - for (i = Index; i < IntermediateCount - 1; i++) - Intermediate[i] = Intermediate[i + 1]; - - --IntermediateCount; -} - -void PSH_XBOX_SHADER::DeleteLastIntermediate() -{ - if (IntermediateCount > 0) - DeleteIntermediate(IntermediateCount - 1); -} - -std::string PSH_XBOX_SHADER::OriginalToString(xbox::X_D3DPIXELSHADERDEF *pPSDef) // static -{ - char buffer[4096]; - return std::string(buffer, sprintf(buffer, "PSAphaInputs[8] = 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X\n" - "PSFinalCombinerInputsABCD = 0x%.08X\n" - "PSFinalCombinerInputsEFG = 0x%.08X\n" - "PSConstant0[8] = 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X\n" - "PSConstant1[8] = 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X\n" - "PSAlphaOutputs[8] = 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X\n" - "PSRGBInputs[8] = 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X\n" - "PSCompareMode = 0x%.08X\n" - "PSFinalCombinerConstant0 = 0x%.08X\n" - "PSFinalCombinerConstant1 = 0x%.08X\n" - "PSRGBOutputs[8] = 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X 0x%.08X\n" - "PSCombinerCount = 0x%.08X\n" - "PSTextureModes = 0x%.08X\n" - "PSDotMapping = 0x%.08X\n" - "PSInputTexture = 0x%.08X\n" - "PSC0Mapping = 0x%.08X\n" - "PSC1Mapping = 0x%.08X\n" - "PSFinalCombinerConstants = 0x%.08X\n", - pPSDef->PSAlphaInputs[0], pPSDef->PSAlphaInputs[1], pPSDef->PSAlphaInputs[2], pPSDef->PSAlphaInputs[3], - pPSDef->PSAlphaInputs[4], pPSDef->PSAlphaInputs[5], pPSDef->PSAlphaInputs[6], pPSDef->PSAlphaInputs[7], - pPSDef->PSFinalCombinerInputsABCD, - pPSDef->PSFinalCombinerInputsEFG, - pPSDef->PSConstant0[0], pPSDef->PSConstant0[1], pPSDef->PSConstant0[2], pPSDef->PSConstant0[3], - pPSDef->PSConstant0[4], pPSDef->PSConstant0[5], pPSDef->PSConstant0[6], pPSDef->PSConstant0[7], - pPSDef->PSConstant1[0], pPSDef->PSConstant1[1], pPSDef->PSConstant1[2], pPSDef->PSConstant1[3], - pPSDef->PSConstant1[4], pPSDef->PSConstant1[5], pPSDef->PSConstant1[6], pPSDef->PSConstant1[7], - pPSDef->PSAlphaOutputs[0], pPSDef->PSAlphaOutputs[1], pPSDef->PSAlphaOutputs[2], pPSDef->PSAlphaOutputs[3], - pPSDef->PSAlphaOutputs[4], pPSDef->PSAlphaOutputs[5], pPSDef->PSAlphaOutputs[6], pPSDef->PSAlphaOutputs[7], - pPSDef->PSRGBInputs[0], pPSDef->PSRGBInputs[1], pPSDef->PSRGBInputs[2], pPSDef->PSRGBInputs[3], - pPSDef->PSRGBInputs[4], pPSDef->PSRGBInputs[5], pPSDef->PSRGBInputs[6], pPSDef->PSRGBInputs[7], - pPSDef->PSCompareMode, - pPSDef->PSFinalCombinerConstant0, - pPSDef->PSFinalCombinerConstant1, - pPSDef->PSRGBOutputs[0], pPSDef->PSRGBOutputs[1], pPSDef->PSRGBOutputs[2], pPSDef->PSRGBOutputs[3], - pPSDef->PSRGBOutputs[4], pPSDef->PSRGBOutputs[5], pPSDef->PSRGBOutputs[6], pPSDef->PSRGBOutputs[7], - pPSDef->PSCombinerCount, - XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES), /* pPSDef->PSTextureModes is stored in a different place than pPSDef*/ - pPSDef->PSDotMapping, - pPSDef->PSInputTexture, - pPSDef->PSC0Mapping, - pPSDef->PSC1Mapping, - pPSDef->PSFinalCombinerConstants)); -} - -void PSH_XBOX_SHADER::GetPSTextureModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]) -{ - for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) - { - psTextureModes[i] = (PS_TEXTUREMODES)((XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES) >> (i * 5)) & 0x1F); - } -} - -void PSH_XBOX_SHADER::GetPSDotMapping(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_DOTMAPPING psDotMapping[xbox::X_D3DTS_STAGECOUNT]) -{ - psDotMapping[0] = (PS_DOTMAPPING)(0); - psDotMapping[1] = (PS_DOTMAPPING)((pPSDef->PSDotMapping >> 0) & 0x7); - psDotMapping[2] = (PS_DOTMAPPING)((pPSDef->PSDotMapping >> 4) & 0x7); - psDotMapping[3] = (PS_DOTMAPPING)((pPSDef->PSDotMapping >> 8) & 0x7); -} - -void PSH_XBOX_SHADER::GetPSCompareModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, DWORD psCompareModes[xbox::X_D3DTS_STAGECOUNT]) -{ - for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) - { - psCompareModes[i] = (pPSDef->PSCompareMode >> (i * 4)) & 0xF; - } -} - -void PSH_XBOX_SHADER::GetPSInputTexture(xbox::X_D3DPIXELSHADERDEF* pPSDef, int psInputTexture[xbox::X_D3DTS_STAGECOUNT]) -{ - psInputTexture[0] = -1; // Stage 0 has no predecessors - psInputTexture[1] = 0; // Stage 1 can only use stage 0 - psInputTexture[2] = (pPSDef->PSInputTexture >> 16) & 0x1; // Stage 2 can use stage 0 or 1 - psInputTexture[3] = (pPSDef->PSInputTexture >> 20) & 0x3; // Stage 3 can only use stage 0, 1 or 2 -} - -void PSH_XBOX_SHADER::Decode(xbox::X_D3DPIXELSHADERDEF *pPSDef) -{ - int i; - - /* Azurik likes to create and destroy the same shader every frame! O_o - LogFlags = lfUnit; - if (IsRunning(TITLEID_AZURIK)) - LogFlags = LogFlags | lfExtreme;*/ - - GetPSTextureModes(pPSDef, PSTextureModes); - GetPSCompareModes(pPSDef, PSCompareMode); - GetPSDotMapping(pPSDef, PSDotMapping); - GetPSInputTexture(pPSDef, PSInputTexture); - - NumberOfCombiners = (pPSDef->PSCombinerCount >> 0) & 0xF; - CombinerCountFlags = (pPSDef->PSCombinerCount >> 8); - - CombinerMuxesOnMsb = (CombinerCountFlags & PS_COMBINERCOUNT_MUX_MSB) > 0; - CombinerHasUniqueC0 = (CombinerCountFlags & PS_COMBINERCOUNT_UNIQUE_C0) > 0; - CombinerHasUniqueC1 = (CombinerCountFlags & PS_COMBINERCOUNT_UNIQUE_C1) > 0; - - // Backwards compatible decoding (purely for logging) : - { - for (i = 0; i < xbox::X_PSH_COMBINECOUNT; i++) { - Combiners[i].RGB.Decode(pPSDef->PSRGBInputs[i], pPSDef->PSRGBOutputs[i]); - Combiners[i].Alpha.Decode(pPSDef->PSAlphaInputs[i], pPSDef->PSAlphaOutputs[i], /*aIsAlpha=*/true); - } - - FinalCombiner.Decode(pPSDef->PSFinalCombinerInputsABCD, pPSDef->PSFinalCombinerInputsEFG, pPSDef->PSFinalCombinerConstants); - } -} - -PSH_RECOMPILED_SHADER PSH_XBOX_SHADER::Convert(xbox::X_D3DPIXELSHADERDEF *pPSDef) -{ - int i; - Recompiled = {}; - Recompiled.PSDef = *pPSDef; - - // Use a fluent interface to start with a pixel shader version opcode that knowns the host version - NewIntermediate()->Initialize(PO_XPS)->Parameters[6].Mask = m_PSVersion; - - for (i = 0; i < NumberOfCombiners; i++) - { - // Check that the RGB and Alpha inputs do the same operation : - if ( ((pPSDef->PSRGBInputs[i] & PS_NoChannelsMask) == (pPSDef->PSAlphaInputs[i] & PS_NoChannelsMask)) - // Check if all RGB channels are set to read from PS_CHANNEL_RGB : - && ((pPSDef->PSRGBInputs[i] & PS_AlphaChannelsMask) == 0) - // Check if all Alpha channels are set to read from PS_CHANNEL_ALPHA : - && ((pPSDef->PSAlphaInputs[i] & PS_AlphaChannelsMask) == PS_AlphaChannelsMask) - // Check that RGB and Alpha output to the same register(s) : - && (pPSDef->PSRGBOutputs[i] == pPSDef->PSAlphaOutputs[i])) - { - // In this case, we can convert RGB and Alpha together : - if (!NewIntermediate()->Decode(i, pPSDef->PSRGBInputs[i], pPSDef->PSRGBOutputs[i], MASK_RGBA)) - DeleteLastIntermediate(); - } - else - { - // Otherwise, we need to convert RGB and Alpha separately : - if (!NewIntermediate()->Decode(i, pPSDef->PSRGBInputs[i], pPSDef->PSRGBOutputs[i], MASK_RGB)) - DeleteLastIntermediate(); - - if (!NewIntermediate()->Decode(i, pPSDef->PSAlphaInputs[i], pPSDef->PSAlphaOutputs[i], MASK_A)) - DeleteLastIntermediate(); - } - } - - if ((pPSDef->PSFinalCombinerInputsABCD > 0) - || (pPSDef->PSFinalCombinerInputsEFG > 0)) { - if (NewIntermediate()->DecodeFinalCombiner(pPSDef->PSFinalCombinerInputsABCD, pPSDef->PSFinalCombinerInputsEFG)) - { - FinalCombinerFlags = (PS_FINALCOMBINERSETTING)((pPSDef->PSFinalCombinerInputsEFG >> 0) & 0xFF); -// dwPS_GLOBALFLAGS = (pPSDef->PSFinalCombinerConstants >> 8) & 0x1; - } - else - DeleteLastIntermediate(); - } - // Dump the contents of the PixelShader def - //if (MayLog(LogFlags)) - // dump pixel shader definition to string - // TODO : Reinstate : XTL_DumpPixelShaderToFile(pPSDef); - - //if (MayLog(LogFlags)) - { - // print relevant contents to the debug console - EmuLog(LOG_LEVEL::DEBUG, "%s", DecodedToString(pPSDef).c_str()); - } - - // TODO: - // - Insert tex* and def instructions - - Log("Parse result"); - - if (MoveRemovableParametersRight()) - Log("MoveRemovableParametersRight"); - - if (RemoveNops()) - Log("RemoveNops"); - - while (RemoveUselessWrites()) { - Log("RemoveUselessWrites"); - if (RemoveNops()) - Log("RemoveNops"); - } - - if (ConvertConstantsToNative(pPSDef, /*Recompiled=*/&Recompiled)) - Log("ConvertConstantsToNative"); - - // Handle Texture declarations : - if (DecodeTextureModes(pPSDef)) - Log("DecodeTextureModes"); - - ConvertXboxOpcodesToNative(pPSDef); - Log("ConvertXboxOpcodesToNative"); - - while (RemoveUselessWrites()) { // again - Log("RemoveUselessWrites"); - if (RemoveNops()) - Log("RemoveNops"); - } - - // Resolve all differences : - if (FixupPixelShader()) - Log("FixupPixelShader"); - - if (FixInvalidDstRegister()) - Log("FixInvalidDstRegister"); - - if (FixConstantParameters()) - Log("FixConstantParameters"); - - if (FixArgumentModifiers()) - Log("FixArgumentModifiers"); - - if (FixInstructionModifiers()) - Log("FixInstructionModifiers"); - - if (FixInvalidSrcSwizzle()) - Log("FixInvalidSrcSwizzle"); - - if (FixMissingR0a()) - Log("FixMissingR0a"); - - if (FixMissingR1a()) - Log("FixMissingR1a"); - - if (FixCoIssuedOpcodes()) - Log("FixCoIssuedOpcodes"); - - if (FixOverusedRegisters()) - Log("FixOverusedRegisters"); - - if (FixUninitializedReads()) - Log("FixUninitializedReads"); - - if (FinalizeShader()) - Log("FinalizeShader"); - - Log("End result"); - - Recompiled.NewShaderStr = ToString(); - return Recompiled; -} - -std::string PSH_XBOX_SHADER::DecodedToString(xbox::X_D3DPIXELSHADERDEF *pPSDef) -// print relevant contents to the debug console - - #define _AddStr1(aStr) \ - \ - Result = Result + aStr + "\n"; - - #define _AddStr(aStr, ...) \ - {\ - _AddStr1(std::string(buf, sprintf(buf, aStr, __VA_ARGS__))); \ - } -{ - char buf[256]; - int i; - - std::string Result = ""; - // Show the contents to the user - _AddStr1("\n-----PixelShader Definition Contents-----"); - _AddStr1(OriginalToString(pPSDef)); - - if (XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES) > 0) - { - _AddStr1("\nPSTextureModes ->"); // Texture addressing modes - _AddStr("Stage 0: %s", PS_TextureModesStr[PSTextureModes[0]]); - _AddStr("Stage 1: %s", PS_TextureModesStr[PSTextureModes[1]]); - _AddStr("Stage 2: %s", PS_TextureModesStr[PSTextureModes[2]]); - _AddStr("Stage 3: %s", PS_TextureModesStr[PSTextureModes[3]]); - } - - if (pPSDef->PSDotMapping > 0) // Input mapping for dot product modes - { - _AddStr1("\nPSDotMapping ->"); - _AddStr("Stage 1: %s", PS_DotMappingStr[PSDotMapping[1]]); - _AddStr("Stage 2: %s", PS_DotMappingStr[PSDotMapping[2]]); - _AddStr("Stage 3: %s", PS_DotMappingStr[PSDotMapping[3]]); - } - - if (pPSDef->PSCompareMode > 0) // Compare modes for clipplane texture mode - { - _AddStr1("\nPSCompareMode ->"); - _AddStr("Stage 0: %s", PS_CompareModeStr[(PSCompareMode[0] == 0) ? 0 : 1]); - _AddStr("Stage 1: %s", PS_CompareModeStr[(PSCompareMode[1] == 0) ? 2 : 3]); - _AddStr("Stage 2: %s", PS_CompareModeStr[(PSCompareMode[2] == 0) ? 4 : 5]); - _AddStr("Stage 3: %s", PS_CompareModeStr[(PSCompareMode[3] == 0) ? 6 : 7]); - } - - if (pPSDef->PSInputTexture > 0) // Texture source for some texture modes - { - _AddStr1("\nPSInputTexture ->"); - _AddStr("Stage 1: %d", PSInputTexture[1]); - _AddStr("Stage 2: %d", PSInputTexture[2]); - _AddStr("Stage 3: %d", PSInputTexture[3]); - } - - if (pPSDef->PSCombinerCount > 0) // Active combiner count (Stages 0-7) - { - _AddStr1("\nPSCombinerCount ->"); - _AddStr("Combiners: %d", NumberOfCombiners); - _AddStr("Mux: %s", PS_CombinerCountFlagsStr[(CombinerCountFlags & PS_COMBINERCOUNT_MUX_MSB) == 0 ? 0 : 1]); - _AddStr("C0: %s", PS_CombinerCountFlagsStr[(CombinerCountFlags & PS_COMBINERCOUNT_UNIQUE_C0) == 0 ? 2 : 3]); - _AddStr("C1: %s", PS_CombinerCountFlagsStr[(CombinerCountFlags & PS_COMBINERCOUNT_UNIQUE_C1) == 0 ? 4 : 5]); - } - - // Dxbx additions from here onwards : - - for (i = 0; i < NumberOfCombiners; i++) // Loop over all combiner stages - { - _AddStr1("\n"); - - _AddStr("PSRGBOutputs[%d] AB: %s", i, Combiners[i].RGB.OutputSUM.OutputAB.DecodedToString().c_str()); - _AddStr("PSRGBOutputs[%d] CD: %s", i, Combiners[i].RGB.OutputSUM.OutputCD.DecodedToString().c_str()); - _AddStr("PSRGBOutputs[%d] SUM: %s", i, Combiners[i].RGB.OutputSUM.DecodedToString().c_str()); - _AddStr("PSRGBOutputs[%d] flags: %s", i, PSCombinerOutputFlagsToStr(Combiners[i].RGB.CombinerOutputFlags, /*aIsAlpha=*/false).c_str()); - - _AddStr1("\n"); - _AddStr("PSRGBInputs[%d] A: %s", i, Combiners[i].RGB.OutputSUM.OutputAB.Input1.DecodedToString().c_str()); - _AddStr("PSRGBInputs[%d] B: %s", i, Combiners[i].RGB.OutputSUM.OutputAB.Input2.DecodedToString().c_str()); - _AddStr("PSRGBInputs[%d] C: %s", i, Combiners[i].RGB.OutputSUM.OutputCD.Input1.DecodedToString().c_str()); - _AddStr("PSRGBInputs[%d] D: %s", i, Combiners[i].RGB.OutputSUM.OutputCD.Input2.DecodedToString().c_str()); - - _AddStr1("\n"); - _AddStr("PSAlphaOutputs[%d] AB: %s", i, Combiners[i].Alpha.OutputSUM.OutputAB.DecodedToString().c_str()); - _AddStr("PSAlphaOutputs[%d] CD: %s", i, Combiners[i].Alpha.OutputSUM.OutputCD.DecodedToString().c_str()); - _AddStr("PSAlphaOutputs[%d] SUM: %s", i, Combiners[i].Alpha.OutputSUM.DecodedToString().c_str()); - _AddStr("PSAlphaOutputs[%d] flags: %s", i, PSCombinerOutputFlagsToStr(Combiners[i].Alpha.CombinerOutputFlags, /*aIsAlpha=*/true).c_str()); - - _AddStr1("\n"); - _AddStr("PSAlphaInputs[%d] A: %s", i, Combiners[i].Alpha.OutputSUM.OutputAB.Input1.DecodedToString().c_str()); - _AddStr("PSAlphaInputs[%d] B: %s", i, Combiners[i].Alpha.OutputSUM.OutputAB.Input2.DecodedToString().c_str()); - _AddStr("PSAlphaInputs[%d] C: %s", i, Combiners[i].Alpha.OutputSUM.OutputCD.Input1.DecodedToString().c_str()); - _AddStr("PSAlphaInputs[%d] D: %s", i, Combiners[i].Alpha.OutputSUM.OutputCD.Input2.DecodedToString().c_str()); - - _AddStr1("\n"); - _AddStr("PSConstant0[%d] : %x", i, pPSDef->PSConstant0[i]); // C0 for each stage - _AddStr("PSConstant1[%d] : %x", i, pPSDef->PSConstant1[i]); // C1 for each stage - } - - if ((pPSDef->PSFinalCombinerInputsABCD > 0) - || (pPSDef->PSFinalCombinerInputsEFG > 0)) // Final combiner inputs - { - _AddStr("\nPSFinalCombinerConstant0 : %x", pPSDef->PSFinalCombinerConstant0); // C0 in final combiner - _AddStr("PSFinalCombinerConstant1 : %x", pPSDef->PSFinalCombinerConstant1); // C1 in final combiner - - _AddStr1("\nPSFinalCombinerInputsABCD ->"); - _AddStr("Input A: %s", FinalCombiner.InputA.DecodedToString().c_str()); - _AddStr("Input B: %s", FinalCombiner.InputB.DecodedToString().c_str()); - _AddStr("Input C: %s", FinalCombiner.InputC.DecodedToString().c_str()); - _AddStr("Input D: %s", FinalCombiner.InputD.DecodedToString().c_str()); - - _AddStr1("\nPSFinalCombinerInputsEFG ->"); - _AddStr("Input E: %s", FinalCombiner.InputE.DecodedToString().c_str()); - _AddStr("Input F: %s", FinalCombiner.InputF.DecodedToString().c_str()); - _AddStr("Input G: %s", FinalCombiner.InputG.DecodedToString().c_str()); - _AddStr("Final combiner setting: %s", PSFinalCombinerSettingToStr((DWORD)(FinalCombiner.FinalCombinerFlags)).c_str()); - - _AddStr1("\nPSFinalCombinerConstants ->"); // Final combiner constant mapping - _AddStr("Offset of D3D constant for (C0: %d", FinalCombiner.FinalCombinerC0Mapping); - _AddStr("Offset of D3D constant for (C1: %d", FinalCombiner.FinalCombinerC1Mapping); - _AddStr("Adjust texture flag: %s", PS_GlobalFlagsStr[PS_GLOBALFLAGS(FinalCombiner.dwPS_GLOBALFLAGS)]); - } - - _AddStr1("\n"); - return Result; -} - - bool _OpcodeMustStayBeforeTextureMode(PSH_OPCODE Opcode, int i) - { - if (Opcode == PO_XPS) - return true; - - // Before texture modes, only keep the first comment (the one mentioning "xps" got converted into "ps") - if (Opcode == PO_COMMENT) - return (i == 0); - - if (Opcode == PO_PS) - return true; - - if (Opcode == PO_DEF) - return true; - - if (Opcode >= PO_DCL && Opcode <= PO_DCL_VOLUME) - return true; - - return false; - } - - bool PSH_XBOX_SHADER::_NextIs2D(int Stage) - { - if (Stage < xbox::X_D3DTS_STAGECOUNT-1) - return (PSTextureModes[Stage + 1] == PS_TEXTUREMODES_DOT_ST) || (PSTextureModes[Stage + 1] == PS_TEXTUREMODES_DOT_ZW); - else - return false; - } - -bool PSH_XBOX_SHADER::DecodeTextureModes(xbox::X_D3DPIXELSHADERDEF *pPSDef) -{ - int InsertPos; - PSH_INTERMEDIATE_FORMAT Ins = {}; - std::vector InsertIns; - int Stage; - - InsertIns.reserve(32); // arbitrary allotment of instructions - InsertIns.resize(xbox::X_D3DTS_STAGECOUNT); // default initialized to PO_COMMENT instructions - - bool Result = false; - - InsertPos = -1; - do { - ++InsertPos; - } while (_OpcodeMustStayBeforeTextureMode(Intermediate[InsertPos].Opcode, InsertPos)); - - Ins.Initialize(PO_DCL); - for (Stage = 0; Stage < xbox::X_D3DTS_STAGECOUNT; Stage++) - { - if (PSTextureModes[Stage] != PS_TEXTUREMODES_NONE || Stage < PSH_XBOX_MAX_T_REGISTER_COUNT) - { - switch (PSTextureModes[Stage]) - { - case PS_TEXTUREMODES_PROJECT2D: // argb = texture(r/q, s/q) TODO : Apply the division via D3DTOP_BUMPENVMAP ? - case PS_TEXTUREMODES_BUMPENVMAP: - case PS_TEXTUREMODES_BUMPENVMAP_LUM: - case PS_TEXTUREMODES_DOT_ST: - case PS_TEXTUREMODES_DPNDNT_AR: - case PS_TEXTUREMODES_DPNDNT_GB: - { - Ins.Opcode = PO_DCL_2D; - Ins.Output[0].SetRegister(PARAM_S, Stage, MASK_RGBA); - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - Result = true; - break; - } - case PS_TEXTUREMODES_PROJECT3D: // argb = texture(r/q, s/q, t/q) Note : 3d textures are sampled using PS_TEXTUREMODES_CUBEMAP - case PS_TEXTUREMODES_BRDF: - case PS_TEXTUREMODES_DOT_STR_3D: - { - Ins.Opcode = PO_DCL_VOLUME; - Ins.Output[0].SetRegister(PARAM_S, Stage, MASK_RGBA); - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - Result = true; - break; - } - case PS_TEXTUREMODES_CUBEMAP: // argb = cubemap(r/q, s/q, t/q) - case PS_TEXTUREMODES_DOT_RFLCT_DIFF: - case PS_TEXTUREMODES_DOT_RFLCT_SPEC: - case PS_TEXTUREMODES_DOT_STR_CUBE: - case PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST: - { - Ins.Opcode = PO_DCL_CUBE; - Ins.Output[0].SetRegister(PARAM_S, Stage, MASK_RGBA); - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - Result = true; - break; - } - } - - Ins.Opcode = PO_DCL; - Ins.Output[0].SetRegister(PARAM_T, Stage, MASK_RGBA); - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - Result = true; - } - } - - for (int j = 0; j < PSH_XBOX_MAX_V_REGISTER_COUNT; ++j) - { - Ins.Opcode = PO_DCL; - Ins.Output[0].SetRegister(PARAM_V, j, MASK_RGBA); - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - Result = true; - } - - PSH_OPCODE Opcode; - - Opcode = PO_TEXLD2; - - for (Stage = 0; Stage < xbox::X_D3DTS_STAGECOUNT; Stage++) - { - // TODO : Apply conversions when PS_GLOBALFLAGS_TEXMODE_ADJUST is set (but ... how to check the texture type? read D3DRS_PSTEXTUREMODES?) - - // Convert the texture mode to a texture addressing instruction : - switch (PSTextureModes[Stage]) { // input = q,s,t,r (same layout as a,r,g,b, also known as w,x,y,z) - case PS_TEXTUREMODES_PROJECT2D: // argb = texture(r/q, s/q) TODO : Apply the division via D3DTOP_BUMPENVMAP ? - case PS_TEXTUREMODES_PROJECT3D: // argb = texture(r/q, s/q, t/q) Note : 3d textures are sampled using PS_TEXTUREMODES_CUBEMAP - case PS_TEXTUREMODES_CUBEMAP: { // argb = cubemap(r/q, s/q, t/q) - Opcode = PO_TEXLD2; - - if (m_PSVersion >= D3DPS_VERSION(3, 0)) - continue; - break; - } - case PS_TEXTUREMODES_NONE: - case PS_TEXTUREMODES_PASSTHRU: - Opcode = PO_MOV; - break; - case PS_TEXTUREMODES_CLIPPLANE: Opcode = PO_TEXKILL; break; - case PS_TEXTUREMODES_BUMPENVMAP: Opcode = PO_TEXBEM; break; - case PS_TEXTUREMODES_BUMPENVMAP_LUM: Opcode = PO_TEXBEML; break; - case PS_TEXTUREMODES_BRDF: Opcode = PO_TEXBRDF; break; // Note : Not supported by Direct3D8 ? - case PS_TEXTUREMODES_DOT_ST: Opcode = PO_TEXM3X2TEX; break; - case PS_TEXTUREMODES_DOT_ZW: Opcode = PO_TEXM3X2DEPTH; break; // Note : requires ps.1.3 and a preceding texm3x2pad - case PS_TEXTUREMODES_DOT_RFLCT_DIFF: Opcode = PO_TEXM3X3DIFF; break; // Note : Not supported by Direct3D8 ? - case PS_TEXTUREMODES_DOT_RFLCT_SPEC: Opcode = PO_TEXM3X3VSPEC; break; - case PS_TEXTUREMODES_DOT_STR_3D: Opcode = PO_TEXM3X3TEX; break; // Note : Uses a 3d texture - case PS_TEXTUREMODES_DOT_STR_CUBE: Opcode = PO_TEXM3X3TEX; break; // Note : Uses a cube texture - case PS_TEXTUREMODES_DPNDNT_AR: Opcode = PO_TEXREG2AR; break; - case PS_TEXTUREMODES_DPNDNT_GB: Opcode = PO_TEXREG2GB; break; - case PS_TEXTUREMODES_DOTPRODUCT: - if (_NextIs2D(Stage)) - Opcode = PO_TEXM3X2PAD; - else - Opcode = PO_TEXM3X3PAD; - break; - case PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST: Opcode = PO_TEXM3X3SPEC; break; // Note : Needs 3 arguments! - default: - continue; - } - - InsertTextureModeInstruction(pPSDef, Stage, Opcode, InsertIns, InsertPos); - Result = true; - } - if (Result) - { - for (unsigned i = 0; i < InsertIns.size(); ++i) - { - if (i >= xbox::X_D3DTS_STAGECOUNT || InsertIns[i].Opcode != PO_COMMENT) - { - InsertIntermediate(&InsertIns[i], InsertPos); - ++InsertPos; - } - } - } - StartPos = InsertPos + 1; - return Result; -} - -int PSH_XBOX_SHADER::GetTextureStageModifiers(int Stage) -{ - int modifiers = 0; - switch (PSDotMapping[Stage]) - { - case PS_DOTMAPPING_ZERO_TO_ONE: - break; - case PS_DOTMAPPING_MINUS1_TO_1_D3D: - modifiers = (1 << ARGMOD_SCALE_BX2); - break; - case PS_DOTMAPPING_MINUS1_TO_1_GL: - break; - case PS_DOTMAPPING_MINUS1_TO_1: - break; - case PS_DOTMAPPING_HILO_1: - break; - case PS_DOTMAPPING_HILO_HEMISPHERE: - break; - default: - break; - } - - return modifiers; -} - -void PSH_XBOX_SHADER::InsertTex3x2Instructions(int Stage, int inputStage, std::vector& InsertIns) -{ - PSH_INTERMEDIATE_FORMAT Ins = {}; - - const int modifiers = GetTextureStageModifiers(Stage); - - Ins.Initialize(PO_DP3); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage - 1, 0); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, 0); - Ins.Parameters[1].Modifiers = modifiers; - InsertIns.emplace_back(Ins); - Ins.Initialize(PO_DP3); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_G); - Ins.Parameters[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage - 0, 0); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, 0); - Ins.Parameters[1].Modifiers = modifiers; - InsertIns.emplace_back(Ins); -} - -void PSH_XBOX_SHADER::InsertTex3x3Instructions(int Stage, int inputStage, std::vector& InsertIns) -{ - PSH_INTERMEDIATE_FORMAT Ins = {}; - - const int modifiers = GetTextureStageModifiers(Stage); - - Ins.Initialize(PO_DP3); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage - 2, 0); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, 0); - Ins.Parameters[1].Modifiers = modifiers; - InsertIns.emplace_back(Ins); - Ins.Initialize(PO_DP3); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_G); - Ins.Parameters[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage - 1, 0); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, 0); - Ins.Parameters[1].Modifiers = modifiers; - InsertIns.emplace_back(Ins); - Ins.Initialize(PO_DP3); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_B); - Ins.Parameters[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage - 0, 0); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, 0); - Ins.Parameters[1].Modifiers = modifiers; - InsertIns.emplace_back(Ins); -} - -bool PSH_XBOX_SHADER::InsertTextureModeInstruction(xbox::X_D3DPIXELSHADERDEF *pPSDef, int Stage, PSH_OPCODE opcode, std::vector& InsertIns, int& InsertPos) -{ - PSH_INTERMEDIATE_FORMAT Ins = {}; - - bool Result = false; - - PSH_ARGUMENT_TYPE type = PARAM_T; - int inputStage = Stage; - int mask = 0; - - // TODO: Refactor and optimize - // TODO: Update handling to support 1.4? - bool needsInitialization = false; - switch (opcode) - { - case PO_TEXBEM: - case PO_TEXBEML: - { - inputStage = PSInputTexture[Stage]; - - // If the bump-map texture format is X_D3DFMT_X8L8V8U8 or X_D3DFMT_L6V5U5 we need to apply a bias - // This happens because these formats are an alias of unsigned texture formats. - // Fixes an issue with the JSRF boost-dash effect - // NOTE: This assumes that this shader will only ever be used for the input bumpmap texture - // If this causes regressions in other titles, we'll need to be smarter about this - // and include the texture formats in the shader hash, somehow. - bool bias = false; - auto biasModifier = (1 << ARGMOD_SCALE_BX2); - auto pXboxTexture = g_pXbox_SetTexture[inputStage]; - if (pXboxTexture != nullptr) { - extern xbox::X_D3DFORMAT GetXboxPixelContainerFormat(const xbox::X_D3DPixelContainer *pXboxPixelContainer); // TODO : Move to XTL-independent header file - - switch (GetXboxPixelContainerFormat(pXboxTexture)) { - case xbox::X_D3DFMT_L6V5U5: { - extern xbox::X_D3DRESOURCETYPE GetXboxD3DResourceType(const xbox::X_D3DResource *pXboxResource); // TODO : Move to XTL-independent header file - extern bool IsSupportedFormat(xbox::X_D3DFORMAT X_Format, xbox::X_D3DRESOURCETYPE XboxResourceType, DWORD D3DUsage); // TODO : Move to XTL-independent header file - - // L6V5U5 format is converted incorrectly if not supported by the device - xbox::X_D3DRESOURCETYPE XboxResourceType = GetXboxD3DResourceType(pXboxTexture); - DWORD D3DUsage = 0; // TODO : Since it's not yet know how to determine D3DUsage in this case, 'hack' it by using no specific D3DUSAGE_* flags. - - bias = !IsSupportedFormat(/*XboxFormat=*/xbox::X_D3DFMT_L6V5U5, XboxResourceType, D3DUsage); - break; - } - case xbox::X_D3DFMT_X8L8V8U8: { - bias = true; - break; - } - } - } - - Ins.Initialize(PO_MAD); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_R); - Ins.Parameters[0].SetScaleBemLumRegister(D3DTSS_BUMPENVMAT00, Stage, Recompiled); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, MASK_R); - - if (bias) { - Ins.Parameters[1].Modifiers = biasModifier; - } - - Ins.Parameters[2].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, MASK_R); - InsertIns.emplace_back(Ins); - Ins.Initialize(PO_MAD); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_R); - Ins.Parameters[0].SetScaleBemLumRegister(D3DTSS_BUMPENVMAT10, Stage, Recompiled); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, MASK_G); - if (bias) { - Ins.Parameters[1].Modifiers = biasModifier; - } - Ins.Parameters[2].SetRegister(PARAM_R, 1, MASK_R); - InsertIns.emplace_back(Ins); - // - Ins.Initialize(PO_MAD); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_G); - Ins.Parameters[0].SetScaleBemLumRegister(D3DTSS_BUMPENVMAT01, Stage, Recompiled); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, MASK_R); - if (bias) { - Ins.Parameters[1].Modifiers = biasModifier; - } - Ins.Parameters[2].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, MASK_G); - InsertIns.emplace_back(Ins); - Ins.Initialize(PO_MAD); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_G); - Ins.Parameters[0].SetScaleBemLumRegister(D3DTSS_BUMPENVMAT11, Stage, Recompiled); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, MASK_G); - if (bias) { - Ins.Parameters[1].Modifiers = biasModifier; - } - Ins.Parameters[2].SetRegister(PARAM_R, 1, MASK_G); - InsertIns.emplace_back(Ins); - - Ins.CommentString = ""; - Ins.Initialize(PO_TEXLD2); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - Ins.Parameters[1].Modifiers = 0; - InsertIns.emplace_back(Ins); - - if (opcode == PO_TEXBEML) - { - // - Ins.Initialize(PO_MAD); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_B); - Ins.Parameters[0].SetScaleBemLumRegister(D3DTSS_BUMPENVLSCALE, Stage, Recompiled); - Ins.Parameters[1].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + inputStage, MASK_B); - Ins.Parameters[2].SetScaleBemLumRegister(D3DTSS_BUMPENVLOFFSET, Stage, Recompiled); - InsertIns.emplace_back(Ins); - // - Ins.Initialize(PO_MUL); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[1].SetRegister(PARAM_R, 1, MASK_B); - InsertIns.emplace_back(Ins); - } - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - - break; - } - case PO_TEXBRDF: - inputStage = PSInputTexture[Stage]; - break; - case PO_TEXM3X2TEX: - { - inputStage = PSInputTexture[Stage]; - - InsertTex3x2Instructions(Stage, inputStage, InsertIns); - - Ins.CommentString = ""; - Ins.Initialize(PO_TEXLD2); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - InsertIns.emplace_back(Ins); - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - case PO_TEXM3X3TEX: - { - inputStage = PSInputTexture[Stage]; - - InsertTex3x3Instructions(Stage, inputStage, InsertIns); - - Ins.CommentString = ""; - Ins.Initialize(PO_TEXLD2); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - InsertIns.emplace_back(Ins); - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - case PO_TEXM3X2DEPTH: - { - inputStage = PSInputTexture[Stage]; - - InsertTex3x2Instructions(Stage, inputStage, InsertIns); - - Ins.CommentString = ""; - Ins.Initialize(PO_RCP); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_B); - Ins.Parameters[0].SetRegister(PARAM_R, 1, MASK_G); - InsertIns.emplace_back(Ins); - - Ins.Initialize(PO_MUL); - Ins.Modifier = INSMOD_SAT; - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_B); - Ins.Parameters[0].SetRegister(PARAM_R, 1, MASK_R); - Ins.Parameters[1].SetRegister(PARAM_R, 1, MASK_B); - InsertIns.emplace_back(Ins); - - Ins.Initialize(PO_CMP); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_B); - Ins.Parameters[0].SetRegister(PARAM_R, 1, MASK_G); - Ins.Parameters[0].Modifiers = (1 << ARGMOD_NEGATE); - Ins.Parameters[1].SetScaleConstRegister(1.0, Recompiled); - Ins.Parameters[2].SetRegister(PARAM_R, 1, MASK_B); - InsertIns.emplace_back(Ins); - - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_oDepth, 0, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, MASK_B); - InsertIns.emplace_back(Ins); - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - case PO_TEXM3X3DIFF: - { - inputStage = PSInputTexture[Stage]; - - InsertTex3x3Instructions(Stage, inputStage, InsertIns); - - Ins.Initialize(PO_TEXLD2); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - InsertIns.emplace_back(Ins); - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - case PO_TEXM3X3VSPEC: - case PO_TEXM3X3SPEC: - { - inputStage = PSInputTexture[Stage]; - - InsertTex3x3Instructions(Stage, inputStage, InsertIns); - - int baseRegister = PSH_XBOX_MAX_R_REGISTER_COUNT + PSH_XBOX_MAX_T_REGISTER_COUNT; - - // get eye-ray vector - Ins.Initialize(PO_COMMENT); - Ins.CommentString = "; get eye-ray vector"; - InsertIns.emplace_back(Ins); - if (opcode == PO_TEXM3X3VSPEC) - { - // E.x - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 0, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_T, Stage - 2, MASK_A); - InsertIns.emplace_back(Ins); - // E.y - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 0, MASK_G); - Ins.Parameters[0].SetRegister(PARAM_T, Stage - 1, MASK_A); - InsertIns.emplace_back(Ins); - // E.z - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 0, MASK_B); - Ins.Parameters[0].SetRegister(PARAM_T, Stage - 0, MASK_A); - InsertIns.emplace_back(Ins); - // E.w - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 0, MASK_A); - Ins.Parameters[0].SetScaleConstRegister(0.0, Recompiled); - InsertIns.emplace_back(Ins); - } - else - { - // E - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 0, 0); - Ins.Parameters[0].SetRegister(PARAM_C, 0, 0); - InsertIns.emplace_back(Ins); - } - - // compute reflection vector - Ins.Initialize(PO_COMMENT); - Ins.CommentString = "; compute reflection vector"; - InsertIns.emplace_back(Ins); - // N.E - Ins.Initialize(PO_DP3); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 1, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_R, baseRegister + 0, 0); - InsertIns.emplace_back(Ins); - // 2 * (N.E) - Ins.Initialize(PO_MUL); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 1, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_R, baseRegister + 1, MASK_R); - Ins.Parameters[1].SetScaleConstRegister(2.0, Recompiled); - InsertIns.emplace_back(Ins); - // N.N - Ins.Initialize(PO_DP3); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 1, MASK_G); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_R, 1, 0); - InsertIns.emplace_back(Ins); - // 1 / (N.N) - Ins.Initialize(PO_RCP); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 1, MASK_G); - Ins.Parameters[0].SetRegister(PARAM_R, baseRegister + 1, MASK_G); - InsertIns.emplace_back(Ins); - // 2 * N.E / N.N - Ins.Initialize(PO_MUL); - Ins.Output[0].SetRegister(PARAM_R, baseRegister + 1, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_R, baseRegister + 1, MASK_R); - Ins.Parameters[1].SetRegister(PARAM_R, baseRegister + 1, MASK_G); - InsertIns.emplace_back(Ins); - // 2 * N.E / N.N * N - E - Ins.Initialize(PO_MAD); - Ins.Output[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_R, baseRegister + 1, MASK_R); - Ins.Parameters[2].SetRegister(PARAM_R, baseRegister + 0, 0); - Ins.Parameters[2].Modifiers = (1 << ARGMOD_NEGATE); - InsertIns.emplace_back(Ins); - - Ins.CommentString = ""; - Ins.Initialize(PO_TEXLD2); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - InsertIns.emplace_back(Ins); - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - case PO_TEXREG2AR: - { - inputStage = PSInputTexture[Stage]; - - // E.x - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_T, Stage, MASK_A); - InsertIns.emplace_back(Ins); - // E.y - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_G); - Ins.Parameters[0].SetRegister(PARAM_T, Stage, MASK_R); - InsertIns.emplace_back(Ins); - - Ins.Initialize(PO_TEXLD2); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - InsertIns.emplace_back(Ins); - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - case PO_TEXREG2GB: - { - inputStage = PSInputTexture[Stage]; - - // E.x - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_R); - Ins.Parameters[0].SetRegister(PARAM_T, Stage, MASK_G); - InsertIns.emplace_back(Ins); - // E.y - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, 1, MASK_G); - Ins.Parameters[0].SetRegister(PARAM_T, Stage, MASK_B); - InsertIns.emplace_back(Ins); - - Ins.Initialize(PO_TEXLD2); - Ins.Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - Ins.Parameters[0].SetRegister(PARAM_R, 1, 0); - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - InsertIns.emplace_back(Ins); - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - case PO_TEXM3X2PAD: - case PO_TEXM3X3PAD: - { - inputStage = PSInputTexture[Stage]; - - opcode = PO_MOV; - inputStage = Stage; - needsInitialization = true; - break; - } - - case PO_TEXLD: - case PO_TEXLD2: - case PO_TEXCRD: - case PO_MOV: - needsInitialization = true; - break; - default: - break; - } - - Ins.Initialize(opcode); - - if (needsInitialization) - { - type = PARAM_R; - - // Insert move instructions in reverse order to prevent overwriting wrong register - // Create instructions to move loaded temporary registers into extra temporary registers - InsertIns[xbox::X_D3DTS_STAGECOUNT - Stage - 1].Initialize(PO_MOV); - InsertIns[xbox::X_D3DTS_STAGECOUNT - Stage - 1].Output[0].SetRegister(PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, 0); - InsertIns[xbox::X_D3DTS_STAGECOUNT - Stage - 1].Parameters[0].SetRegister(PARAM_R, Stage, 0); - - if (Ins.Opcode == PO_TEXCRD) - { - mask = MASK_RGB; - } - else - { - } - - // Replace texture coordinate register usage up until first usage as output - int lastUsed = RegisterIsUsedFromIndexUntil(InsertPos, PARAM_T, Stage); - - if (lastUsed >= 0) - { - ReplaceInputRegisterFromIndexOnwards(InsertPos, PARAM_T, Stage, PARAM_R, PSH_XBOX_MAX_R_REGISTER_COUNT + Stage, lastUsed); - } - } - Ins.Output[0].SetRegister(type, Stage, mask); - - // For those texture modes that need it, add the source stage as argument : - if (PSH_OPCODE_DEFS[Ins.Opcode]._In >= 1) - { - Ins.Parameters[0].SetRegister(PARAM_T, inputStage, 0); - - if (Ins.Opcode >= PO_TEXDP3TEX && Ins.Opcode <= PO_TEXM3X3SPEC) - { - Ins.Parameters[0].Modifiers = GetTextureStageModifiers(Stage); - } - } - - if (PSH_OPCODE_DEFS[Ins.Opcode]._In >= 2) - { - if (Ins.Opcode == PO_TEXLD2) - { - Ins.Parameters[1].SetRegister(PARAM_S, Stage, 0); - } - - // Add the third argument : - switch (PSTextureModes[Stage]) { - case PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST: - { - Ins.Parameters[1].SetRegister(PARAM_C, 0, 0); - Ins.CommentString = "Dxbx guess"; // TODO : Where do we get the 3rd argument to this? - break; - } - } - } - -// // Warn about unprocessed flag : -// if ((dwPS_GLOBALFLAGS & PS_GLOBALFLAGS_TEXMODE_ADJUST) > 0) -// Ins.CommentString = Ins.CommentString + " PS_GLOBALFLAGS_TEXMODE_ADJUST unhandled!"; - - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - Result = true; - - return Result; -} - -bool PSH_XBOX_SHADER::MoveRemovableParametersRight() -{ - int i; - - bool Result = false; - - // For all opcodes, try to put constant and discarded arguments in the rightmost slot, to ease following analysis : - i = IntermediateCount; - while (i > StartPos) - { - --i; - - switch (Intermediate[i].Opcode) { -// case PO_SUB: // 1-x is not the same as x-1, but can still be reduced - see SimplifySUB - case PO_ADD: - case PO_DP3: - case PO_DP4: - case PO_MUL: // All these opcodes have two swappable parameters, so try that : - if (Intermediate[i].MoveRemovableParametersRight(0, 1)) - Result = true; - break; - - case PO_XMMA: - case PO_XMMC: - case PO_XDD: - if (Intermediate[i].XMoveNonRegisterOutputsRight()) - Result = true; - break; - - case PO_XDM: - { - // Parameters may be swapped for both dot and mul, - // but the opcodes themselves may not, as we handle - // both XDM operations separately below : - if (Intermediate[i].MoveRemovableParametersRight(0, 1)) - Result = true; - - if (Intermediate[i].MoveRemovableParametersRight(2, 3)) - Result = true; - break; - } - } - } - return Result; -} // MoveRemovableParametersRight - - void PSH_XBOX_SHADER::_SetColor(/*var OUT*/PSH_INTERMEDIATE_FORMAT &NewIns, D3DCOLOR ConstColor) - { - D3DXCOLOR XColor; - - // Colors are defined in RGBA format, and range 0.0 - 1.0 (negative values - // can be obtained by supplying PS_INPUTMAPPING_SIGNED_NEGATE to the combiner - // that reads from these constants). - XColor = ConstColor; - NewIns.Parameters[0].SetConstValue(XColor.r); - NewIns.Parameters[1].SetConstValue(XColor.g); - NewIns.Parameters[2].SetConstValue(XColor.b); - NewIns.Parameters[3].SetConstValue(XColor.a); - } - - void PSH_XBOX_SHADER::_SetColor(/*var OUT*/PSH_INTERMEDIATE_FORMAT &NewIns, D3DCOLORVALUE ConstColor) - { - NewIns.Parameters[0].SetConstValue(ConstColor.r); - NewIns.Parameters[1].SetConstValue(ConstColor.g); - NewIns.Parameters[2].SetConstValue(ConstColor.b); - NewIns.Parameters[3].SetConstValue(ConstColor.a); - } - -bool PSH_XBOX_SHADER::ConvertConstantsToNative(xbox::X_D3DPIXELSHADERDEF *pPSDef, /*var OUT*/PSH_RECOMPILED_SHADER *Recompiled) -{ - int i, j; - PPSH_INTERMEDIATE_FORMAT Cur; - PPSH_IMD_ARGUMENT CurArg; - PSH_INTERMEDIATE_FORMAT NewIns = {}; - - NewIns.Initialize(PO_DEF); - - // Add constants used to represent common powers of 2 used by instruction and argument modifiers - // Represent constant 0.0 and common powers of 2 divisions - NewIns.Output[0].SetRegister(PARAM_C, PSH_XBOX_CONSTANT_MUL1, MASK_RGBA); - _SetColor(NewIns, { 0.0, 1.0 / 2.0, 1.0 / 4.0, 1.0 / 8.0 }); - InsertIntermediate(&NewIns, 1); - - // Represent common powers of 2 constants, also used as multipliers - NewIns.Output[0].SetRegister(PARAM_C, PSH_XBOX_CONSTANT_MUL0, MASK_RGBA); - _SetColor(NewIns, {1.0, 2.0, 4.0, 8.0}); - InsertIntermediate(&NewIns, 1); - - // Loop over all opcodes to update the constant-indexes (Xbox uses C0 and C1 in each combiner) : - for (i = 0; i < IntermediateCount; i++) - { - // Loop over this opcodes' input arguments : - Cur = &(Intermediate[i]); - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._In; j++) - { - // Only handle arguments that address a constant register : - CurArg = &(Cur->Parameters[j]); - - // The Fog register is not supported on PC so we convert it to a constant too : - // (But only if the MASK is not solely accessing the alpha-channel - we don't support that) - if (CurArg->Type == PARAM_FOG) - { - if (CurArg->Mask != MASK_A) - { - CurArg->Type = PARAM_C; - CurArg->Address = PSH_XBOX_CONSTANT_FOG; - CurArg->Mask = CurArg->Mask & (!MASK_A); - } - else - { - // Until we can get Alpha fog from the vertex shader somehow, - // set it to a constant value, so these shaders (like appearing - // in Dolphin samples) still compile and give reasonable output : - CurArg->SetConstValue(1.0); - Cur->CommentString = "FOG.a not emulated, using 1."; - } - - continue; - } - - if (CurArg->Type != PARAM_C) - continue; - - // For each constant being addressed, we find out which Xbox constant it is, - // and map it to a native constant (as far as we have space for them) : - switch (CurArg->Address) { - case 0: // Handle C0 (if present) : - { - // The final combiner has a separate C0 constant : - if (Cur->CombinerStageNr == XFC_COMBINERSTAGENR) - CurArg->Address = PSH_XBOX_CONSTANT_FC0; - else - { - // See if C0 has a unique index per combiner stage : - if (CombinerHasUniqueC0) - // C0 actually ranges from c0 to c7, one for each possible combiner stage (X_D3DRS_PSCONSTANT0_0..X_D3DRS_PSCONSTANT0_7) : - CurArg->Address = Cur->CombinerStageNr; - else - // Non-unique just reads the same C0 in every stage : - CurArg->Address = 0; - } - break; - } - - case 1: // Handle C1 (if present) : - { - // The final combiner has a separate C1 constant : - if (Cur->CombinerStageNr == XFC_COMBINERSTAGENR) - CurArg->Address = PSH_XBOX_CONSTANT_FC1; - else - { - // See if C1 has a unique index per combiner stage : - if (CombinerHasUniqueC1) - // C1 actually ranges from c8 to c15, one for each possible combiner stage (X_D3DRS_PSCONSTANT1_0..X_D3DRS_PSCONSTANT1_7) : - CurArg->Address = Cur->CombinerStageNr + 8; - else - // Non-unique just reads the same C1 in every stage : - CurArg->Address = 1; - } - break; - } - } // switch - } // for arguments - } // for opcodes - - return true; -} // ConvertConstantsToNative - -bool PSH_XBOX_SHADER::RemoveUselessWrites() -// Note : Xbox allows writing to V0 (diffuse color) and V1 (specular color), but native ps.1.3 doesn't! -// Some examples of this behaviour can be seen when running RayMan Arena. -{ - int i, j; - PPSH_INTERMEDIATE_FORMAT Cur; - PPSH_IMD_ARGUMENT CurArg; - DWORD RegUsage[/*PSH_ARGUMENT_TYPE*/PARAM_C - PARAM_VALUE + 1][224] = {}; // 224 = highest possible PSH_PC_MAX_REGISTER_COUNT - - // TODO : In Polynomial Texture Maps, one extra opcode could be deleted (sub r1.rgb, v0,v0), why doesn't it? - bool Result = false; - - // Mark only R0 (and discard) as initially 'read', as these may not result in a removal : - RegUsage[PARAM_R][0] = MASK_RGBA; - for (i = 0; i < PSH_PC_MAX_REGISTER_COUNT; i++) - RegUsage[PARAM_DISCARD][i] = MASK_RGBA; - - i = IntermediateCount; - while (i > StartPos) - { - --i; - Cur = &(Intermediate[i]); - if (!Cur->IsArithmetic()) - continue; - - // Loop over the output arguments : - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._Out; j++) - { - CurArg = &(Cur->Output[j]); - - // Remove useless flag, to ease up later comparisions : - CurArg->Modifiers = CurArg->Modifiers & ~(1 << ARGMOD_IDENTITY); - - // Discard useless writes : - if ( (CurArg->Address < MaxTemporaryRegisters) - && ((RegUsage[CurArg->Type][CurArg->Address] & CurArg->Mask) == 0)) - { - EmuLog(LOG_LEVEL::DEBUG, "; Removed useless assignment to register %s", CurArg->ToString().c_str()); - CurArg->Type = PARAM_DISCARD; - Result = true; - } - } - - // Loop over the input arguments : - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._In; j++) - { - CurArg = &(Cur->Parameters[j]); - // Skip non-register parameters : - if (!CurArg->UsesRegister()) - continue; - - // Remove useless flag, to ease up later comparisions : - CurArg->Modifiers = CurArg->Modifiers & ~(1 << ARGMOD_IDENTITY); - - // Keep track of all register reads, so that we can discard useless writes : - if (CurArg->Address < MaxTemporaryRegisters) - RegUsage[CurArg->Type][CurArg->Address] = RegUsage[CurArg->Type][CurArg->Address] | CurArg->Mask; - } - } - return Result; -} // RemoveUselessWrites - -void PSH_XBOX_SHADER::ConvertXboxOpcodesToNative(xbox::X_D3DPIXELSHADERDEF *pPSDef) -{ - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - std::string CommentString; - - // Do a bottom-to-top pass, converting all xbox opcodes into a native set of opcodes : - i = IntermediateCount; - while (i > 0) - { - --i; - Cur = &(Intermediate[i]); - - // Convert all Xbox opcodes into native opcodes : - CommentString = Cur->ToString(); - switch (Cur->Opcode) { - case PO_XPS: ConvertXPSToNative(i); break; - case PO_XMMA: ConvertXMMAToNative(i); break; - case PO_XMMC: ConvertXMMCToNative(i); break; - case PO_XDM: ConvertXDMToNative(i); break; - case PO_XDD: ConvertXDDToNative(i); break; - case PO_XFC: ConvertXFCToNative(i); break; // Can only occur once, as the last instruction - default: - CommentString = ""; break; - } - - if (!CommentString.empty()) { - PSH_INTERMEDIATE_FORMAT NewIns = {}; - NewIns.Initialize(PO_COMMENT)->CommentString = CommentString; - InsertIntermediate(&NewIns, i); - } - } -} // ConvertXboxOpcodesToNative - -void PSH_XBOX_SHADER::ConvertXPSToNative(int i) -{ - PPSH_INTERMEDIATE_FORMAT Cur; - - Cur = &(Intermediate[i]); - Cur->Opcode = PO_PS; -} - -bool PSH_XBOX_SHADER::ConvertXMMToNative_Except3RdOutput(int i) -{ - PPSH_INTERMEDIATE_FORMAT Cur; - int InsertPos; - PSH_INTERMEDIATE_FORMAT Ins = {}; - - bool Result = false; - Cur = &(Intermediate[i]); - InsertPos = i; - - // This block is meant for cases where XMMA/XMMC discards the 3rd output : - if (Cur->Output[2].Type == PARAM_DISCARD) - { - // Mark that this XMMA/XMMC opcode is already handled here : - Result = true; - - // The opcode must unconditionally change into a MUL (or two) : - Cur->Opcode = PO_MUL; - - // Is the second output ignored? - if (Cur->Output[1].Type == PARAM_DISCARD) - { - // If the first output is also ignored : - if (Cur->Output[0].Type == PARAM_DISCARD) - // The complete opcode can already be removed early on : - DeleteIntermediate(i); - else - ;// The first output is just a MUL, it's output (and first two parameters) are already in-place, so we're done - - return Result; - } - ++InsertPos; - - // Create a second MUL opcode for the second result : - Ins = *Cur; - Ins.XCopySecondOpcodeToFirst(PO_MUL); - InsertIntermediate(&Ins, InsertPos); - return Result; - } - - // The third output is needed, but what about the first and second output ? - - if (Cur->Output[0].Type == PARAM_DISCARD) - { - Cur->Output[0].Type = PARAM_T; - Cur->Output[0].Address = FakeRegNr_Xmm1; // 'r4' - } - - if (Cur->Output[1].Type == PARAM_DISCARD) - { - Cur->Output[1].Type = PARAM_T; - Cur->Output[1].Address = FakeRegNr_Xmm2; // 'r5' - } - - // Generate a MUL for the 1st output : - Ins = *Cur; - Ins.Opcode = PO_MUL; - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - - // Generate a MUL for the 2nd output : - Ins = *Cur; - Ins.XCopySecondOpcodeToFirst(PO_MUL); - InsertIntermediate(&Ins, InsertPos); - - // Note : If XMMA or XMMC writes to the third argument, we now have - // the first and second stored already (if they where not ignored). - // IF one (or both) are ignored, the intermediate result might be - // needed, but let XMMA/XMMC figure that out first - the resulting - // opcode(s) will probably require the initial opcode's removal! - return Result; -} // ConvertXMMToNative_Except3RdOutput - -void PSH_XBOX_SHADER::ConvertXMMAToNative(int i) -{ - PPSH_INTERMEDIATE_FORMAT Cur; - - // Handle the generic case of XMM, and check if the 3rd (Add) argument is ignored : - if (!ConvertXMMToNative_Except3RdOutput(i)) - { - // Add needs to be stored, we already have 2 MULs, so change the XMMA into an ADD : - Cur = &(Intermediate[i+2]); - Cur->Opcode = PO_ADD; - Cur->Modifier = INSMOD_NONE; - Cur->Parameters[0] = Cur->Output[0]; - Cur->Parameters[1] = Cur->Output[1]; - Cur->Output[0] = Cur->Output[2]; - } -} - -void PSH_XBOX_SHADER::ConvertXMMCToNative(int i) -{ - PPSH_INTERMEDIATE_FORMAT Cur; - - // Handle the generic case of XMM, and check if the 3rd (Compare) argument is ignored : - if (!ConvertXMMToNative_Except3RdOutput(i)) - { - // Add needs to be stored, we already have 2 MULs, so change the XMMC into an CND : - Cur = &(Intermediate[i+2]); - // TODO : If CombinerMuxesOnMsb is False, we should compare to the LeastSignificantBit of r0.a - but how? - Cur->Opcode = PO_CND; - Cur->Modifier = INSMOD_NONE; - // Begin the input of CND with the required r0.a parameter : - Cur->Parameters[0].SetRegister(PARAM_R, 0, MASK_A); - Cur->Parameters[0].Modifiers = (1 << ARGMOD_IDENTITY); - Cur->Parameters[0].Multiplier = 1.0; - // Follow that with the 2 selection registers : - Cur->Parameters[1] = Cur->Output[0]; - Cur->Parameters[2] = Cur->Output[1]; - // And put the result it in the final register : - Cur->Output[0] = Cur->Output[2]; - } -} - -void PSH_XBOX_SHADER::ConvertXDMToNative(int i) -{ - PPSH_INTERMEDIATE_FORMAT Cur; - PSH_INTERMEDIATE_FORMAT Ins = {}; - - Cur = &(Intermediate[i]); - - // XDM does two operations : - - // a multiply : - if (Cur->Output[1].Type != PARAM_DISCARD) - { - Ins = *Cur; - Ins.XCopySecondOpcodeToFirst(PO_MUL); - InsertIntermediate(&Ins, i+1); - } - - // and a dot product : - if (Cur->Output[0].Type == PARAM_DISCARD) - DeleteIntermediate(i); - else - Cur->Opcode = PO_DP3; -} - -void PSH_XBOX_SHADER::ConvertXDDToNative(int i) -{ - PPSH_INTERMEDIATE_FORMAT Cur; - PSH_INTERMEDIATE_FORMAT Ins = {}; - - Cur = &(Intermediate[i]); - - // XDD does two operations : - - // ...a dot product : - Cur->Opcode = PO_DP3; - - // and another dot product : - if (Cur->Output[1].Type != PARAM_DISCARD) - { - Ins = *Cur; - Ins.XCopySecondOpcodeToFirst(PO_DP3); - InsertIntermediate(&Ins, i+1); - } -} - -void PSH_XBOX_SHADER::ConvertXFCToNative(int i) -{ - PSH_INTERMEDIATE_FORMAT Cur = {}; - int InsertPos; - bool NeedsProd; - bool NeedsSum; - PPSH_IMD_ARGUMENT CurArg; - PSH_INTERMEDIATE_FORMAT Ins = {}; - - // Get a copy of XFC and remove it already, new instructions will replace it : - Cur = Intermediate[i]; - DeleteIntermediate(i); - InsertPos = i; - // 'final combiner - r0 = A*B + (1-A)*C + D'; - - // See if the final combiner uses the prod or sum input parameters : - NeedsProd = false; - NeedsSum = false; - for (i = 0; i < PSH_OPCODE_DEFS[Cur.Opcode]._In; i++) - { - CurArg = &(Cur.Parameters[i]); - - // Check for the three final-combiner-specific argument types : - switch (CurArg->Type) { - case PARAM_V1R0_SUM: - { - // Change SUM into a fake register, which will be resolved later : - CurArg->Type = PARAM_T; - CurArg->Address = FakeRegNr_Sum; // 'r2' - NeedsSum = true; - break; - } - - case PARAM_EF_PROD: - { - // Change PROD into a fake register, which will be resolved later : - CurArg->Type = PARAM_T; - CurArg->Address = FakeRegNr_Prod; // 'r3' - NeedsProd = true; - break; - } - - case PARAM_FOG: - { - // Change FOG into a constant of 1.0, as we can't simulate it otherwise : -// CurArg->SetConstValue(1.0); -// Cur->CommentString = "final combiner - FOG not emulated, using 1."; - break; - } - } - } // for input - - if (NeedsSum) - { - // Add a new opcode that calculates r0+v1 : - Ins.Initialize(PO_ADD); - Ins.Output[0].SetRegister(PARAM_T, FakeRegNr_Sum, MASK_RGBA); // 'r2' - - Ins.Parameters[0].SetRegister(PARAM_R, 0, MASK_RGB); - Ins.Parameters[1].SetRegister(PARAM_V, 1, MASK_RGB); - - // Take the FinalCombinerFlags that influence this result into account : - if ((FinalCombinerFlags & PS_FINALCOMBINERSETTING_COMPLEMENT_R0) > 0) - Ins.Parameters[0].Modifiers = (1 << ARGMOD_INVERT); // (1-r0) is used as an input to the sum rather than r0 - if ((FinalCombinerFlags & PS_FINALCOMBINERSETTING_COMPLEMENT_V1) > 0) - Ins.Parameters[1].Modifiers = (1 << ARGMOD_INVERT); // (1-v1) is used as an input to the sum rather than v1 - if ((FinalCombinerFlags & PS_FINALCOMBINERSETTING_CLAMP_SUM) > 0) - Ins.Modifier = INSMOD_SAT; // V1+R0 sum clamped to [0,1] - - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted final combiner calculation of V1R0_sum register"); - } - - if (NeedsProd) - { - // Add a new opcode that calculates E*F : - Ins.Initialize(PO_MUL); - Ins.Output[0].SetRegister(PARAM_T, FakeRegNr_Prod, MASK_RGBA); // 'r3' - Ins.Parameters[0] = Cur.Parameters[4]; // E - Ins.Parameters[1] = Cur.Parameters[5]; // F - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted final combiner calculation of EF_prod register"); - } - - // The final combiner calculates : r0.rgb=s0*s1 + (1-s0)*s2 + s3 - // Change that into a LRP + ADD, and let the optimizer reduce it; - - // Add a new opcode that calculates r0.rgb=s0*s1 + (1-s0)*s2 via a LRP : - // Set the output to r0.rgb (as r0.a is determined via s6.a) : - - // Watch out! If s3=r0.rgb, then the LRP cannot use r0, but must use r1 as temp! - if (Cur.Parameters[3].IsRegister(PARAM_R, 0, 0)) - Cur.Output[0].SetRegister(PARAM_R, 1, MASK_RGB); - else - Cur.Output[0].SetRegister(PARAM_R, 0, MASK_RGB); - - Ins = Cur; - Ins.Opcode = PO_LRP; - Ins.Modifier = INSMOD_NONE; - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - - // Add a new opcode that calculates r0.rgb=r0.rgb+s3 : - Ins.Opcode = PO_ADD; - Ins.Modifier = Cur.Modifier; - Ins.Output[0] = Cur.Output[0]; // = r0.rgb - Ins.Parameters[0] = Cur.Output[0]; // = r0.rgb - Ins.Parameters[1] = Cur.Parameters[3]; // =s3 from XFC - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - - // See if s6 is something else than "r0.a" : - if (Cur.Parameters[6].ToString() != "r0.a") - { - // Add a new opcode that moves s6 over to r0.a : - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, 0, MASK_A); - Ins.Parameters[0] = Cur.Parameters[6]; - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - } -} - -bool PSH_XBOX_SHADER::RemoveNops() -{ - int i, j; - PPSH_INTERMEDIATE_FORMAT Cur; - bool HasOutput; - - bool Result = false; - i = IntermediateCount; - while (i > StartPos) - { - --i; - Cur = &(Intermediate[i]); - - // Skip opcodes that have no output, but should stay anyway : - if (PSH_OPCODE_DEFS[Cur->Opcode]._Out == 0) - if (Cur->Opcode != PO_NOP) - continue; - - // See if this opcode writes to any of it's outputs : - { - HasOutput = false; - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._Out; j++) - if (Cur->Output[j].Type != PARAM_DISCARD) - { - HasOutput = true; - break; - } - - if (!HasOutput) - { - // Remove the opcode (as it doesn't change anything) : - // This applies to PO_NOP and opcodes that discard all their results : - DeleteIntermediate(i); - Result = true; - continue; - } - } - } - return Result; -} - -int PSH_XBOX_SHADER::MaxRegisterCount(PSH_ARGUMENT_TYPE aRegType) -{ - switch (aRegType) - { - case PARAM_R: - return MaxTemporaryRegisters; - case PARAM_T: - return MaxTextureCoordinateRegisters; - case PARAM_V: - return MaxInputColorRegisters; - case PARAM_C: - return MaxConstantFloatRegisters; - case PARAM_S: - return MaxSamplerRegisters; - } - - return 0; -} - -bool PSH_XBOX_SHADER::IsValidNativeOutputRegister(PSH_ARGUMENT_TYPE aRegType, int index /*= -1*/) -{ - bool valid = (PARAM_R == aRegType) && (MaxRegisterCount(PARAM_R) > index); - - return valid; -} - -int PSH_XBOX_SHADER::RegisterIsFreeFromIndexUntil(int aIndex, PSH_ARGUMENT_TYPE aRegType, int16_t aAddress) -{ - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - - for (i = aIndex; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - // Detect a read : - if (Cur->ReadsFromRegister(aRegType, aAddress)) - { - return -1; - } - // Detect a write : - if (Cur->WritesToRegister(aRegType, aAddress)) - { - break; - } - } - - return i; -} - -int PSH_XBOX_SHADER::RegisterIsUsedFromIndexUntil(int aIndex, PSH_ARGUMENT_TYPE aRegType, int16_t aAddress) -{ - int result = -1; - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - - for (i = aIndex; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - // Detect a read : - if (Cur->ReadsFromRegister(aRegType, aAddress)) - { - result = i; - } - // Detect a write : - if (Cur->WritesToRegister(aRegType, aAddress)) - { - break; - } - } - - return result; -} - -int PSH_XBOX_SHADER::NextFreeRegisterFromIndexUntil(int aIndex, PSH_ARGUMENT_TYPE aRegType, int bIndex /*= -1*/, int startAddress /*= 0*/, int excludeAddress /*= -1*/) -{ - const int registerCount = MaxRegisterCount(aRegType); - - if (bIndex < 0 || bIndex < aIndex) - bIndex = IntermediateCount; - - if (startAddress < 0) - startAddress = 0; - - int i; - - for (i = startAddress; i < registerCount; i++) - { - if (i == excludeAddress) - continue; - - if (RegisterIsFreeFromIndexUntil(aIndex, aRegType, i) >= bIndex) - { - return i; - } - } - - return -1; -} - -bool PSH_XBOX_SHADER::IsRegisterFreeFromIndexOnwards(int aIndex, PSH_ARGUMENT_TYPE aRegType, int16_t aAddress) -{ - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - - for (i = aIndex; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - // Detect a write or read : - if (Cur->WritesToRegister(aRegType, aAddress) - || Cur->ReadsFromRegister(aRegType, aAddress)) - { - return false; - } - } - - return true; -} - -void PSH_XBOX_SHADER::ReplaceInputRegisterFromIndexOnwards(int aIndex, - PSH_ARGUMENT_TYPE aSrcRegType, int16_t aSrcAddress, - PSH_ARGUMENT_TYPE aDstRegType, int16_t aDstAddress, int endIndex /*= -1*/) -{ - ReplaceRegisterFromIndexOnwards(aIndex, aSrcRegType, aSrcAddress, aDstRegType, aDstAddress, endIndex, true, false); -} - -void PSH_XBOX_SHADER::ReplaceOutputRegisterFromIndexOnwards(int aIndex, - PSH_ARGUMENT_TYPE aSrcRegType, int16_t aSrcAddress, - PSH_ARGUMENT_TYPE aDstRegType, int16_t aDstAddress, int endIndex /*= -1*/) -{ - ReplaceRegisterFromIndexOnwards(aIndex, aSrcRegType, aSrcAddress, aDstRegType, aDstAddress, endIndex, false, true); -} - -void PSH_XBOX_SHADER::ReplaceRegisterFromIndexOnwards(int aIndex, - PSH_ARGUMENT_TYPE aSrcRegType, int16_t aSrcAddress, - PSH_ARGUMENT_TYPE aDstRegType, int16_t aDstAddress, int endIndex /*= -1*/, bool replaceInput /*= true*/, bool replaceOutput /*= true*/) -{ - int i; - int j; - PPSH_INTERMEDIATE_FORMAT Cur; - - for (i = aIndex; i < IntermediateCount && (i <= endIndex || endIndex == -1); i++) - { - Cur = &(Intermediate[i]); - - if (replaceOutput) - { - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._Out; j++) - if (Cur->Output[j].IsRegister(aSrcRegType, aSrcAddress)) - Cur->Output[j].SetRegister(aDstRegType, aDstAddress, Cur->Output[j].Mask); - } - - if (replaceInput) - { - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._In; j++) - if (Cur->Parameters[j].IsRegister(aSrcRegType, aSrcAddress)) - Cur->Parameters[j].SetRegister(aDstRegType, aDstAddress, Cur->Parameters[j].Mask); - } - } -} - -bool PSH_XBOX_SHADER::FixArgumentModifiers() -{ - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - - bool Result = false; - - // Do a bottom-to-top pass, preventing constant-modifiers via additional MOV's: - i = IntermediateCount; - while (i > 0) - { - --i; - Cur = &(Intermediate[i]); - if (Cur->Opcode < PO_TEX) // TODO : Check explicitly which instruction types are handled below - continue; - - int InsertPos = i; - // Detect modifiers on constant and arguments - for (int p = 0; p < 7 && p < PSH_OPCODE_DEFS[Cur->Opcode]._In; p++) { - if ((Cur->Parameters[p].Type == PARAM_C || Cur->Parameters[p].UsesRegister()) - && ((Cur->Parameters[p].Modifiers & ~(1 << ARGMOD_NEGATE)) != 0)) { - - PSH_INTERMEDIATE_FORMAT Ins = {}; - PSH_IMD_ARGUMENT Arg = {}; - - Arg = Cur->Parameters[p]; - - int excludeAddress = Cur->Output[0].Type == PARAM_R ? Cur->Output[0].Address : -1; - - PSH_ARGUMENT_TYPE type = PARAM_R; - int address = NextFreeRegisterFromIndexUntil(InsertPos, PARAM_R, InsertPos, 0, excludeAddress); - - if (IsValidNativeOutputRegister(Arg.Type, Arg.Address) && RegisterIsFreeFromIndexUntil(InsertPos + 1, Arg.Type, Arg.Address) > InsertPos) - { - type = Arg.Type; - address = Arg.Address; - } - - for (int modifier = ARGMOD_INVERT; modifier < ARGMOD_SATURATE; ++modifier) - { - Arg = Cur->Parameters[p]; - - if (!Arg.HasModifier((PSH_ARG_MODIFIER)modifier)) - continue; - - bool needInsert = false; - switch ((PSH_ARG_MODIFIER)modifier) - { - case ARGMOD_INVERT: - { - if (Arg.HasModifier(ARGMOD_NEGATE)) - { - Ins.Initialize(PO_SUB); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[1].SetScaleConstRegister(1.0f, Recompiled); - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = 0; - Ins.CommentString = "Inserted to replace 'invert' with 'negate' argument modifier (register - 1)"; - ++modifier; - } - else - { - Ins.Initialize(PO_SUB); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[0].SetScaleConstRegister(1.0f, Recompiled); - Ins.Parameters[1] = Cur->Parameters[p]; - Ins.Parameters[1].Modifiers = 0; - Ins.CommentString = "Inserted to replace 'invert' argument modifier (1 - register)"; - } - needInsert = true; - - break; - } - case ARGMOD_NEGATE: - { - // Skip as this modifier is still supported in current shader models - // Included here for completeness - break; - Ins.Initialize(PO_MOV); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = (1 << ARGMOD_NEGATE); - Ins.CommentString = "Inserted to replace 'negate' argument modifier (-register)"; - needInsert = true; - - break; - } - case ARGMOD_BIAS: - { - Ins.Initialize(PO_SUB); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[1].SetScaleConstRegister(0.5f, Recompiled); - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = 0; - Ins.CommentString = "Inserted to replace 'bias' argument modifier (register - 0.5)"; - needInsert = true; - - break; - } - case ARGMOD_SCALE_X2: - { - Ins.Initialize(PO_MUL); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[1].SetScaleConstRegister(2.0f, Recompiled); - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = 0; - Ins.CommentString = "Inserted to replace 'x2' argument modifier (2 * register)"; - needInsert = true; - - break; - } - case ARGMOD_SCALE_BX2: - { - Ins.Initialize(PO_MAD); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[2].SetScaleConstRegister(-1.0f, Recompiled); - Ins.Parameters[1].SetScaleConstRegister(2.0f, Recompiled); - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = 0; - Ins.CommentString = "Inserted to replace 'bx2' argument modifier (2 * register - 1)"; - needInsert = true; - - break; - } - case ARGMOD_SCALE_X4: - { - Ins.Initialize(PO_MUL); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[1].SetScaleConstRegister(4.0f, Recompiled); - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = 0; - Ins.CommentString = "Inserted to replace 'x4' argument modifier (4 * register)"; - needInsert = true; - - break; - } - case ARGMOD_SCALE_D2: - { - Ins.Initialize(PO_MUL); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[1].SetScaleConstRegister(0.5f, Recompiled); - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = 0; - Ins.CommentString = "Inserted to replace 'd2' argument modifier (0.5 * register)"; - needInsert = true; - - break; - } - default: - { - Ins.Initialize(PO_MOV); - // No need to check if output is a constant - those cannot be assigned to anyway - Ins.Output[0].SetRegister(type, address, Arg.Mask); - // Move constant into register - Ins.Parameters[0] = Cur->Parameters[p]; - Ins.Parameters[0].Modifiers = 0; - Ins.CommentString = "Inserted to replace argument with modifier"; - needInsert = true; - - break; - } - } - - if (needInsert == true) - { - for (int q = p; q < PSH_OPCODE_DEFS[Cur->Opcode]._In; q++) - { - // overwrite all matching parameters to avoid duplicate instructions - if (Arg.Type == Cur->Parameters[q].Type - && Arg.Address == Cur->Parameters[q].Address - && Arg.Mask == Cur->Parameters[q].Mask - && Arg.Modifiers == Cur->Parameters[q].Modifiers - && Arg.Multiplier == Cur->Parameters[q].Multiplier) - { - Cur->Parameters[q] = Ins.Output[0]; - // Apply modifier to register instead of constant - Cur->Parameters[q].Modifiers = (Arg.Modifiers & (1 << ARGMOD_NEGATE)) | (Arg.Modifiers & (~0 << (modifier + 1))); - } - } - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - ++Cur; - EmuLog(LOG_LEVEL::DEBUG, "; Used intermediate move to avoid argument modifier"); - Result = true; - } - } - } - } - } - return Result; -} // FixArgumentModifiers - -bool PSH_XBOX_SHADER::FixConstantParameters() -{ - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - - bool Result = false; - - // Do a bottom-to-top pass, preventing constant-modifiers via additional MOV's: - i = IntermediateCount; - while (i > StartPos) - { - --i; - Cur = &(Intermediate[i]); - - if (!Cur->IsArithmetic()) - continue; - - for (int p = 0; p < PSH_OPCODE_DEFS[Cur->Opcode]._In; ++p) - { - if (Cur->Parameters[p].Type != PARAM_VALUE) - continue; - - if (Cur->Parameters[p].SetScaleConstRegister(Cur->Parameters[p].GetConstValue(), Recompiled)) - { - EmuLog(LOG_LEVEL::DEBUG, "; Replaced constant value with constant register"); - Result = true; - } - } - } - return Result; -} // FixConstantParameters - -bool PSH_XBOX_SHADER::FixInstructionModifiers() -{ - int i; - int InsertPos; - PPSH_INTERMEDIATE_FORMAT Cur; - PSH_INTERMEDIATE_FORMAT Ins = {}; - - bool Result = false; - - // Do a bottom-to-top pass, preventing constant-modifiers via additional MOV's: - i = IntermediateCount; - while (i > StartPos) - { - InsertPos = i; - --i; - Cur = &(Intermediate[i]); - - if (!Cur->IsArithmetic()) - continue; - - bool insert = true; - switch (Cur->Modifier) - { - case INSMOD_BIAS: // y = x - 0.5 // Xbox only : TODO : Fixup occurrances! - { - Ins.Initialize(PO_SUB); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(0.5f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_bias"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_bias"); - break; - } - case INSMOD_X2: // y = x * 2 - { - Ins.Initialize(PO_MUL); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(2.0f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_x2"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_x2"); - break; - } - case INSMOD_BX2: // y = (x - 0.5) * 2 // Xbox only : TODO : Fixup occurrances! - { - Ins.Initialize(PO_MAD); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(2.0f, Recompiled); - Ins.Parameters[2].SetScaleConstRegister(-1.0f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_bx2"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_bx2"); - break; - } - case INSMOD_X4: // y = x * 4 - { - Ins.Initialize(PO_MUL); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(4.0f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_x4"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_x4"); - break; - } - case INSMOD_D2: // y = x * 0.5 - { - Ins.Initialize(PO_MUL); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(0.5f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_d2"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_d2"); - break; - } - case INSMOD_X8: // y = x * 8 // ps 1.4 only - { - Ins.Initialize(PO_MUL); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(8.0f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_x8"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_x8"); - break; - } - case INSMOD_D4: // y = x * 0.25 // ps 1.4 only - { - Ins.Initialize(PO_MUL); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(0.25f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_d4"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_d4"); - break; - } - case INSMOD_D8: // y = x * 0.125 // ps 1.4 only - { - Ins.Initialize(PO_MUL); - Ins.Output[0] = Ins.Parameters[0] = Cur->Output[0]; - Ins.Parameters[1].SetScaleConstRegister(0.125f, Recompiled); - Ins.CommentString = "; Inserted adjustment by constant register for INST_d8"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted adjustment by constant register for INST_d8"); - break; - } - case INSMOD_SAT: // Xbox doesn"t support this, but has ARGMOD_SATURATE instead - case INSMOD_NONE: // y = x - default: - insert = false; - break; - } - - if (insert) - { - Cur->Modifier = INSMOD_NONE; - InsertIntermediate(&Ins, InsertPos++); - Result = true; - } - - // Handle blue-to-alpha which is technically an instruction modifier, but operates on arguments - for (int i = 0; i < PSH_OPCODE_DEFS[Cur->Opcode]._Out; i++) { - auto& output = Cur->Output[i]; - - if (output.UsesRegister() && output.HasModifier(ARGMOD_BLUE_REPLICATE)) { - Ins.Initialize(PO_MOV); - Ins.Output[0].Type = Ins.Parameters[0].Type = output.Type; - Ins.Output[0].Address = Ins.Parameters[0].Address = output.Address; - Ins.Output[0].Mask = MASK_A; - Ins.Parameters[0].Mask = MASK_B; - Ins.CommentString = "; Inserted Blue-to-Alpha"; - EmuLog(LOG_LEVEL::DEBUG, "; Inserted Blue-to-Alpha"); - - InsertIntermediate(&Ins, InsertPos); - Result = true; - } - } - } - return Result; -} // FixInstructionModifiers - -bool PSH_XBOX_SHADER::FinalizeShader() -{ - PSH_INTERMEDIATE_FORMAT Ins = {}; - - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_oC, 0, MASK_RGBA); - Ins.Parameters[0].SetRegister(PARAM_R, 0, MASK_RGBA); - InsertIntermediate(&Ins, IntermediateCount); - - return true; -} // FinalizeShader - -//bool PSH_XBOX_SHADER::CombineInstructions() - - bool _CanLerp(PPSH_INTERMEDIATE_FORMAT Mul1, PPSH_INTERMEDIATE_FORMAT Mul2, PPSH_INTERMEDIATE_FORMAT AddOpcode, int Left, int Right) - { - PPSH_IMD_ARGUMENT ParamLeft, ParamRight; - - // Check if Left and Right are the same register : - ParamLeft = &(Mul1->Parameters[Left]); - ParamRight = &(Mul2->Parameters[Right]); - if ((ParamLeft->Type != ParamRight->Type) - || (ParamLeft->Address != ParamRight->Address) - || (ParamLeft->Mask != ParamRight->Mask)) - return false; - - // Is the left argument inverted and the right not (or the other way around) ? - if (ParamLeft->HasModifier(ARGMOD_INVERT) != ParamRight->HasModifier(ARGMOD_INVERT)) - { - // In that case, already move the arguments over to AddOpcode so we create a LRP : - AddOpcode->Parameters[0] = *ParamLeft; - AddOpcode->Parameters[1] = Mul1->Parameters[1-Left]; - AddOpcode->Parameters[2] = Mul2->Parameters[3-Right]; - return true; - } - return false; - } - - bool _CanMad(int ConstOne, PPSH_INTERMEDIATE_FORMAT Mul1, PPSH_INTERMEDIATE_FORMAT Mul2, PPSH_INTERMEDIATE_FORMAT AddOpcode) - { - // Check if the given parameter is 1 : - bool Result = Mul1->Parameters[ConstOne].GetConstValue() == 1.0; - if (Result) - { - // Put the other 3 parameters int the resulting opcode, so we can make it a MAD : - AddOpcode->Parameters[0] = Mul2->Parameters[0]; - AddOpcode->Parameters[1] = Mul2->Parameters[1]; - AddOpcode->Parameters[2] = Mul1->Parameters[1-ConstOne]; - } - return Result; - } - -bool PSH_XBOX_SHADER::CombineInstructions() -{ - int i; - PPSH_INTERMEDIATE_FORMAT Op0; - PPSH_INTERMEDIATE_FORMAT Op1; - PPSH_INTERMEDIATE_FORMAT Op2; - bool CanOptimize; - int j; - int k; - - bool Result = false; - - i = IntermediateCount - 1; - while (i > StartPos) - { - --i; - Op0 = &(Intermediate[i+0]); - Op1 = &(Intermediate[i+1]); - Op2 = &(Intermediate[i+2]); - - // Check if there are two consecutive opcodes reading from a fake R register; - // We outputted these ourselves, in order to ease the conversion and profit - // from having generic optimizations in one place : - if ( (Op0->Output[0].Type == PARAM_T) - && (Op0->Output[0].Address >= PSH_XBOX_MAX_T_REGISTER_COUNT) - && (Op1->Output[0].Type == PARAM_T) - && (Op1->Output[0].Address >= PSH_XBOX_MAX_T_REGISTER_COUNT)) - { - // Did we output those from a CND opcode (originally XMMC) ? - if (Op2->Opcode == PO_CND) - { - if ( (Op0->Opcode == PO_MOV) - && (Op1->Opcode == PO_MOV) - && (Op1->Modifier == Op0->Modifier)) - { - Op2->Modifier = Op0->Modifier; - Op2->Parameters[1] = Op0->Parameters[0]; - Op2->Parameters[2] = Op1->Parameters[0]; - DeleteIntermediate(i); - DeleteIntermediate(i); - EmuLog(LOG_LEVEL::DEBUG, "; Changed temporary MUL,MUL,CND via MOV,MOV,CND into a single CND"); - Result = true; - continue; - } - } - - // Did we output those from a ADD opcode (originally XMMA) ? - if (Op2->Opcode == PO_ADD) - { - if ( (Op0->Opcode == PO_MUL) - && (Op1->Opcode == PO_MUL) - && (Op1->Modifier == Op0->Modifier)) - { - // Check if we can lerp - we just need the same register on both sides that's inverted on the other : - if (_CanLerp(Op0, Op1, Op2, 0, 2) - || _CanLerp(Op0, Op1, Op2, 1, 2) - || _CanLerp(Op0, Op1, Op2, 0, 3) - || _CanLerp(Op0, Op1, Op2, 1, 3)) - { - // The lerp can be done, and the correct parameters are already set to Op2, - // so all we need to do now, it fixup the rest and remove the two MOV's : - Op2->Opcode = PO_LRP; - Op2->Modifier = Op0->Modifier; - DeleteIntermediate(i); - DeleteIntermediate(i); - EmuLog(LOG_LEVEL::DEBUG, "; Changed temporary MUL,MUL,ADD into a single LRP"); - Result = true; - continue; - } - - // Check if we can mad - we just need a constant 1 in one argument : - if (_CanMad(0, Op0, Op1, Op2) - || _CanMad(1, Op0, Op1, Op2) - || _CanMad(0, Op1, Op0, Op2) - || _CanMad(1, Op1, Op0, Op2)) - { - // The mad can be done, and the correct parameters are already set to Op2, - // so all we need to do now, it fixup the rest and remove the two MOV's : - Op2->Opcode = PO_MAD; - Op2->Modifier = Op0->Modifier; - DeleteIntermediate(i); - DeleteIntermediate(i); - EmuLog(LOG_LEVEL::DEBUG, "; Changed temporary MUL,MUL,ADD into a single MAD"); - Result = true; - continue; - } - - // No single opcode possible, so change it into a MUL + MAD : - // The first mul may write to the last output register (without a modifier) : - Op0->Modifier = INSMOD_NONE; - Op0->Output[0] = Op2->Output[0]; - // Change the second MUL into a MAD : - Op1->Opcode = PO_MAD; - Op1->Output[0] = Op2->Output[0]; - Op1->Parameters[2] = Op0->Output[0]; - // Remove the trailing ADD : - DeleteIntermediate(i+2); - EmuLog(LOG_LEVEL::DEBUG, "; Changed temporary MUL,MUL,ADD into a MUL,MAD"); - Result = true; - continue; - } - - // Was it a MUL,MUL,ADD? - if ( (Op0->Opcode == PO_MUL) - && (Op1->Opcode == PO_MUL) - && (Op0->Parameters[1].GetConstValue() == 1.0) - && (Op1->Parameters[1].GetConstValue() == 1.0)) - { - // Remove the two MOV's and fold their arguments into a MUL : - Op2->Opcode = PO_MUL; - Op2->Parameters[0] = Op0->Parameters[0]; - Op2->Parameters[1] = Op1->Parameters[0]; - DeleteIntermediate(i); - DeleteIntermediate(i); - EmuLog(LOG_LEVEL::DEBUG, "; Changed temporary MUL,MUL,ADD into a MUL"); - Result = true; - continue; - } - } - } - - // Do two neighbouring opcodes output to the same register (without a modifier) ? - if ( (Op0->Output[0].ToString() == Op1->Output[0].ToString()) - && (Op0->Modifier == INSMOD_NONE) - && (Op1->Modifier == INSMOD_NONE)) - { - // Is it MUL,ADD ? - if ( (Op0->Opcode == PO_MUL) - && (Op1->Opcode == PO_ADD)) - { - // Is the output of the MUL input to the ADD ? - if ( (Op0->Output[0].Type == Op1->Parameters[0].Type) - && (Op0->Output[0].Address == Op1->Parameters[0].Address) - && (Op0->Output[0].Modifiers == Op1->Parameters[0].Modifiers)) - // Mask and Multiplier are not important here - { - Op0->Opcode = PO_MAD; - Op0->Parameters[2] = Op1->Parameters[1]; - DeleteIntermediate(i+1); - EmuLog(LOG_LEVEL::DEBUG, "; Changed MUL,ADD into a single MAD"); - Result = true; - continue; - } - } - } - -/* - // Combinations that can be made if their intermediate result is not read again or overwritten later: - - MOV+ADD > ADD (if MOV.Output[0] was only read by ADD.Parameter[0] or ADD.Parameter[1]) - MOV+SUB > SUB (if MOV.Output[0] was only read by SUB.Parameter[0] or SUB.Parameter[1]) - MOV+MUL > MUL (if MOV.Output[0] was only read by MOV.Parameter[0] or MOV.Parameter[1]) - - MUL+MOV > MUL (if MUL.Output[0] was only read by MOV.Parameter[0]) - MUL+ADD > MAD (if MUL.Output[0] was only read by ADD.Parameter[0] or ADD.Parameter[1]) - MUL+SUB > MAD (if MUL.Output[0] was only read by SUB.Parameter[0] - Do invert MAD.Parameter[2]) -*/ - - // We can remove a MOV entirely if the input is not changed while - // the output is read, up until the output is re-written; We can change all - // these occurances into a read from the input of this MOV instead : - // This fixes some shaders in Turok, that are reduced to 8 instead of 9 opcodes. - if ( (Op0->Opcode == PO_MOV) - && (Op0->Modifier == INSMOD_NONE) - && (Op0->Output[0].Mask == MASK_RGBA)) - { - CanOptimize = false; - j = i + 1; - while (j < IntermediateCount) - { - // Don't optimize if the output is needed for CND or CMP (which must read from r0) : - // This fixes : "(Validation Error) First source for cnd instruction must be 'r0.a'" in Modify Pixel Shader XDK sample. - if ( ((Intermediate[j].Opcode == PO_CND) || (Intermediate[j].Opcode == PO_CMP)) - && (Op0->Output[0].IsRegister(PARAM_R, 0))) - break; - - // TODO : Add other prevention rules here (like too many texture-reads, and other scases) - - // We can optimize if the MOV-output is written to again before the end of the shader : - CanOptimize = true; - - // ensure this is not "constant with modifier" optimization pattern to prevent infinite loop - for (int p = 0; p < PSH_OPCODE_DEFS[Intermediate[j].Opcode]._In; p++) - { - if ((Op0->Parameters[0].Type == PARAM_C) - && (Intermediate[j].Parameters[p].Type == Op0->Output[0].Type) - && (Intermediate[j].Parameters[p].Address == Op0->Output[0].Address) - && (Intermediate[j].Parameters[p].Modifiers != 0)) - { - CanOptimize = false; - break; - } - }; - - if (Intermediate[j].WritesToRegister(Op0->Output[0].Type, Op0->Output[0].Address, MASK_RGBA)) - break; - - CanOptimize = false; - ++j; - } - - if (CanOptimize) - { - // Loop over all instructions in between, and try to replace reads : - CanOptimize = false; - while (j > i) - { - // For Intermediate[j].Parameters, change all occurrances of Op0.Output[0] into Op0.Parameters[0] : - for (k = 0; k < PSH_OPCODE_DEFS[Intermediate[j].Opcode]._In; k++) - if ( (Intermediate[j].Parameters[k].Type == Op0->Output[0].Type) - && (Intermediate[j].Parameters[k].Address == Op0->Output[0].Address)) - { - Intermediate[j].Parameters[k].Type = Op0->Parameters[0].Type; - Intermediate[j].Parameters[k].Address = Op0->Parameters[0].Address; - // Signal that a replacement is actually done : - CanOptimize = true; - } - - --j; - } - - if (CanOptimize) - { - DeleteIntermediate(i); - EmuLog(LOG_LEVEL::DEBUG, "; Moved MOV input into following instructions"); - Result = true; - } - } - } - - // Fix Dolphin : - // mul r3, r0,t0 ; d0=s0*s1 - // mov r0.rgb, r3 ; d0=s0 final combiner - FOG not emulated, using 1. - if ( (Op0->Output[0].Type == PARAM_T) - && (Op0->Output[0].Address >= PSH_XBOX_MAX_T_REGISTER_COUNT) - && (Op1->Parameters[0].Type == PARAM_T) - && (Op1->Parameters[0].Address >= PSH_XBOX_MAX_T_REGISTER_COUNT)) - { - if ( (Op0->Opcode == PO_MUL) - && (Op1->Opcode == PO_MOV)) - { - // > mul r0.rgb, r0,t0 - Op0->Output[0] = Op1->Output[0]; - DeleteIntermediate(i+1); - EmuLog(LOG_LEVEL::DEBUG, "; Changed temporary MUL,MOV into a MUL"); - Result = true; - continue; - } - } - - // Fix Crash bandicoot xfc leftover r3 : - if (Op0->Output[0].IsRegister(PARAM_T, FakeRegNr_Prod)) // 'r3' - { - // The final combiner uses r3, try to use r1 instead : - if (IsRegisterFreeFromIndexOnwards(i, PARAM_R, 1)) - { - ReplaceRegisterFromIndexOnwards(i, Op0->Output[0].Type, Op0->Output[0].Address, PARAM_R, 1); - EmuLog(LOG_LEVEL::DEBUG, "; Changed fake register by r1"); - Result = true; - continue; - } - } - } // while - return Result; -} // CombineInstructions - -bool PSH_XBOX_SHADER::SimplifyMOV(PPSH_INTERMEDIATE_FORMAT Cur) -{ - bool CanSimplify; - float Factor; - - // NOP-out MOV's that read and write to the same register : - if ( (Cur->Output[0].Type == Cur->Parameters[0].Type) - && (Cur->Output[0].Address == Cur->Parameters[0].Address) - && (Cur->Output[0].Mask == Cur->Parameters[0].Mask)) - { - if (Cur->Output[0].Type == PARAM_VALUE) - CanSimplify = Cur->Output[0].GetConstValue() == Cur->Parameters[0].GetConstValue(); - else - CanSimplify = (Cur->Output[0].Modifiers == Cur->Parameters[0].Modifiers) - && (Cur->Output[0].Multiplier == Cur->Parameters[0].Multiplier); - - if (CanSimplify) - { - Cur->Opcode = PO_NOP; // This nop will be removed in a recursive fixup - EmuLog(LOG_LEVEL::DEBUG, "; Changed MOV into a NOP"); - return true; - } - } - - // Does this MOV put a 0 (zero) in the output? - if (Cur->Parameters[0].GetConstValue() == 0.0) - { - // Attempt to find a constant with the value 0, and use that if present. - if (!Cur->Parameters[0].SetScaleConstRegister(0.0f, Recompiled)) - { - // Simulate 0 by subtracting a (guaranteed) register from itself : - // Fixup via "sub d0=v0,v0" : - Cur->Opcode = PO_SUB; - Cur->Parameters[0].Type = PARAM_V; - Cur->Parameters[0].Address = 0; - Cur->Parameters[0].Modifiers = 0; - Cur->Parameters[1] = Cur->Parameters[0]; - EmuLog(LOG_LEVEL::DEBUG, "; Changed MOV 0 into a SUB v0,v0"); - } - else - { - EmuLog(LOG_LEVEL::DEBUG, "; Changed MOV 0 into a MOV c0"); - } - - return true; - } - - // Does this MOV put a constant in the output? - if (Cur->Parameters[0].Type == PARAM_VALUE) - { - // TODO : If there's a constant equal to GetConstValue(), use that. - Factor = Cur->Parameters[0].GetConstValue(); - - if (!Cur->Parameters[0].SetScaleConstRegister(Factor, Recompiled)) - { - // Fixup via a SUB (which can calculate a constant value) : - Cur->Opcode = PO_SUB; - Cur->Parameters[0].Type = PARAM_V; - Cur->Parameters[0].Address = 0; - - if (Factor < 0.0) - { - // Simulate -1 by calculating it via a (guaranteed) register : - // We follow this : (-v0) - (1-v0) = -v0 - 1 + v0 = -1 - Cur->Parameters[0].Modifiers = (1 << ARGMOD_NEGATE); - Cur->Parameters[1] = Cur->Parameters[0]; - Cur->Parameters[1].Modifiers = (1 << ARGMOD_INVERT); - // Go on with a positive factor, to ease the scaling : - Factor = -Factor; - } - else - { - // Simulate 1 by calculating it via a (guaranteed) register : - // We follow this : (1-v0) - (-v0) = (1-v0) + v0 = 1 - Cur->Parameters[0].Modifiers = (1 << ARGMOD_INVERT); - Cur->Parameters[1] = Cur->Parameters[0]; - Cur->Parameters[1].Modifiers = (1 << ARGMOD_NEGATE); - } - - // Try to simulate all factors (0.5, 1.0 and 2.0) using an output modifier : - Cur->ScaleOutput(Factor); - - EmuLog(LOG_LEVEL::DEBUG, "; Changed MOV {const} into a SUB_factor 1-v0,-v0"); - } - else - { - EmuLog(LOG_LEVEL::DEBUG, "; Changed MOV {const} into a MOV c#"); - } - return true; - } - return false; -} - -bool PSH_XBOX_SHADER::SimplifyADD(PPSH_INTERMEDIATE_FORMAT Cur) -{ - // Is this an addition of s0+0 ? - if (Cur->Parameters[1].GetConstValue() == 0.0) - { - // Change it into a MOV (the first argument is already in-place) - Cur->Opcode = PO_MOV; - EmuLog(LOG_LEVEL::DEBUG, "; Changed ADD s0,0 into a MOV s0"); - return true; - } - return false; -} - -bool PSH_XBOX_SHADER::SimplifyMAD(PPSH_INTERMEDIATE_FORMAT Cur, int index) -{ - // Is this 0*s1+s2 or s0*0+s2 ? - if (Cur->Parameters[0].GetConstValue() == 0.0 - || Cur->Parameters[1].GetConstValue() == 0.0) - { - // Change it into s2 : - Cur->Opcode = PO_MOV; - Cur->Parameters[0] = Cur->Parameters[2]; - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD s0,0 into a MOV s0"); - return true; - } - - // Is this s0*s1+0 ? - if (Cur->Parameters[2].GetConstValue() == 0.0) - { - // Change it into s0*s1 : - Cur->Opcode = PO_MUL; - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD s0, s1,0 into a MUL s0, s1"); - return true; - } - - // Is this s0*1+s2 ? - if (Cur->Parameters[1].GetConstValue() == 1.0) - { - // Change it into s0+s2 : - Cur->Opcode = PO_ADD; - Cur->Parameters[1] = Cur->Parameters[2]; - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD s0,1,s2 into a ADD s0,s2"); - return true; - } - - // Is this s0*-1+s2 ? - if (Cur->Parameters[1].GetConstValue() == -1.0) - { - // Change it into s2-s0 : - Cur->Opcode = PO_SUB; - Cur->Parameters[1] = Cur->Parameters[0]; - Cur->Parameters[0] = Cur->Parameters[2]; - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD s0,-1,s2 into a SUB s2,s0"); - return true; - } - - PSH_INTERMEDIATE_FORMAT Ins = {}; - - // Is this 0.5*s1+s2 ? - if (Cur->Parameters[0].GetConstValue() == 0.5f && Cur->Parameters[1].UsesRegister()) - { - if (!Cur->Parameters[0].SetScaleConstRegister(0.5f, Recompiled)) - { - // Change it into s2 : - Cur->Opcode = PO_ADD; - Cur->Parameters[0] = Cur->Parameters[1]; - Cur->Parameters[1] = Cur->Parameters[2]; - - Ins.Initialize(PO_MOV); - Ins.Modifier = INSMOD_D2; - Ins.Output[0] = Ins.Parameters[0] = Cur->Parameters[1]; - Ins.CommentString = "; Inserted to perform division by 2"; - InsertIntermediate(&Ins, index); - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD 0.5,s1,s2 into a MOV_d2 s1, s1 ADD s1, s2"); - } - else - { - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD 0.5,s1,s2 into a MAD c#,s1,s2"); - } - return true; - } - - // Is this s0*0.5+s2 ? - if (Cur->Parameters[1].GetConstValue() == 0.5f && Cur->Parameters[0].UsesRegister()) - { - if (!Cur->Parameters[1].SetScaleConstRegister(0.5f, Recompiled)) - { - // Change it into s2 : - Cur->Opcode = PO_ADD; - Cur->Parameters[0] = Cur->Parameters[0]; - Cur->Parameters[1] = Cur->Parameters[2]; - - Ins.Initialize(PO_MOV); - Ins.Modifier = INSMOD_D2; - Ins.Output[0] = Ins.Parameters[0] = Cur->Parameters[0]; - Ins.CommentString = "; Inserted to perform division by 2"; - InsertIntermediate(&Ins, index); - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD s0,0.5,s2 into a MOV_d2 s0, s0 ADD s0, s2"); - } - else - { - EmuLog(LOG_LEVEL::DEBUG, "; Changed MAD s0,0.5,s2 into a MAD s0,c#,s2"); - } - return true; - } - return false; -} - -bool PSH_XBOX_SHADER::SimplifySUB(PPSH_INTERMEDIATE_FORMAT Cur) -{ - // Is this an subtraction of s0-0 ? - if (Cur->Parameters[1].GetConstValue() == 0.0) - { - // Change it into a MOV (the first argument is already in-place) - Cur->Opcode = PO_MOV; - EmuLog(LOG_LEVEL::DEBUG, "; Changed SUB x, 0 into a MOV x"); - return true; - } - return false; -} - -bool PSH_XBOX_SHADER::SimplifyMUL(PPSH_INTERMEDIATE_FORMAT Cur) -{ - // Is the result of this multiplication zero ? - if (Cur->Parameters[1].GetConstValue() == 0.0) - { - // Change it into a MOV (the 0 argument will be resolve in a recursive MOV fixup) : - Cur->Opcode = PO_MOV; - Cur->Parameters[0].SetConstValue(0.0); - EmuLog(LOG_LEVEL::DEBUG, "; Changed MUL s0,0 into a MOV 0"); - return true; - } - - // Is this a multiply-by-const ? - if (Cur->Parameters[1].Type == PARAM_VALUE) - { - // Change it into a simple MOV and scale the output instead : - Cur->Opcode = PO_MOV; - Cur->ScaleOutput(Cur->Parameters[1].GetConstValue()); - EmuLog(LOG_LEVEL::DEBUG, "; Changed MUL s0,{const} into a MOV_factor s0"); - return true; - } - return false; -} // SimplifyMUL - -bool PSH_XBOX_SHADER::SimplifyLRP(PPSH_INTERMEDIATE_FORMAT Cur, int index) -{ - // LRP calculates : d0=s0*s1+(1-s0)*s2 which can also be read as : d0=s0*(s1-s2)+s2 - - // Is the right part ((1-s0)*s2) zero? - if ((Cur->Parameters[0].GetConstValue() == 1.0) || (Cur->Parameters[2].GetConstValue() == 0.0)) - { - // Change it into a MUL (calculating the left part : s0*s1 : - Cur->Opcode = PO_MUL; - EmuLog(LOG_LEVEL::DEBUG, "; Changed LRP s0,s1,s2 (where (1-s0)*s2=0) into a MUL s0,s1"); - return true; - } - - // Is the left part (s0*s1) zero? - if ((Cur->Parameters[0].GetConstValue() == 0.0) || (Cur->Parameters[1].GetConstValue() == 0.0)) - { - // Change it into a MUL (calculating the right part : (1-s0)*s2) : - Cur->Opcode = PO_MUL; - Cur->Parameters[0].Invert(); - Cur->Parameters[1] = Cur->Parameters[2]; - EmuLog(LOG_LEVEL::DEBUG, "; Changed LRP s0,s1,s2 (where s0*s1=0) into a MUL (1-s0),s2"); - return true; - } - - // Is it d0=s0*s1+(1-s0)*1 ? - if (Cur->Parameters[2].GetConstValue() == 1.0) - { - // Change it into a d0=s0*s1+(1-s0) - Cur->Opcode = PO_MAD; - Cur->Parameters[2] = Cur->Parameters[0]; - Cur->Parameters[2].Invert(); - EmuLog(LOG_LEVEL::DEBUG, "; Changed LRP s0,s1,1 into a MAD s0,s1,1-s0"); - return true; - } - - // Is it d0=s0*(1-s2)+s2 ? - if (Cur->Parameters[1].GetConstValue() == 1.0) - { - // Change it into a d0=s0*(1-s2)+s2 - Cur->Opcode = PO_MAD; - Cur->Parameters[1] = Cur->Parameters[2]; - Cur->Parameters[1].Invert(); - EmuLog(LOG_LEVEL::DEBUG, "; Changed LRP s0,1,s2 into a MAD s0,1-s2,s2"); - return true; - } - - int output = NextFreeRegisterFromIndexUntil(index, PARAM_R, index, 0, Cur->Output[0].Address); - - if (output >= 0) - { - bool insert = false; - for (int p = 0; p < PSH_OPCODE_DEFS[Cur->Opcode]._In; ++p) - { - if (Cur->Output[0].Type == Cur->Parameters[p].Type - && Cur->Output[0].Address == Cur->Parameters[p].Address) - { - insert = true; - Cur->Parameters[p].Address = output; - Cur->Parameters[p].Type = PARAM_R; - } - } - if (insert) - { - PSH_INTERMEDIATE_FORMAT Ins = {}; - - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(PARAM_R, output, 0); - Ins.Parameters[0].SetRegister(Cur->Output[0].Type, Cur->Output[0].Address, 0); - Ins.CommentString = "; Inserted to avoid LRP parameters referencing the output register"; - InsertIntermediate(&Ins, index); - EmuLog(LOG_LEVEL::DEBUG, "; Changed LRP s0,1,s2 into a MAD s0,1-s2,s2"); - return true; - } - } - - return false; -} // SimplifyLRP - -bool PSH_XBOX_SHADER::FixupCND(PPSH_INTERMEDIATE_FORMAT Cur, int index) -{ - PSH_INTERMEDIATE_FORMAT Ins = {}; - - // TODO: Look into using predicate register - Cur->Opcode = PO_CMP; - - int output = NextFreeRegisterFromIndexUntil(index, PARAM_R, index); - Ins.Initialize(PO_SUB); - Ins.Output[0].SetRegister(PARAM_R, output, Cur->Parameters[0].Mask); - Ins.Parameters[0] = Cur->Parameters[0]; - Ins.Parameters[1].SetScaleConstRegister(0.5f, Recompiled); - Cur->Parameters[0] = Ins.Output[0]; - Cur->Parameters[0].Modifiers = (1 << ARGMOD_NEGATE); - std::swap(Cur->Parameters[1], Cur->Parameters[2]); - Ins.CommentString = Cur->CommentString = "; Changed CND into SUB CMP"; - InsertIntermediate(&Ins, index); - EmuLog(LOG_LEVEL::DEBUG, "; Changed CND into SUB CMP"); - return true; -} - -bool PSH_XBOX_SHADER::FixupPixelShader() -{ - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - - bool Result = RemoveNops(); - - // TODO : Fixup writes to read-only registers (V0, V1) via another free register (if possible) - // TODO : Fixup the usage of non-existent register numbers (like FakeRegNr_Sum and FakeRegNr_Prod) - // TODO : Fixup the usage of the unsupported INSMOD_BIAS and INSMOD_BX2 instruction modifiers - // TODO : Use the INSMOD_SAT instruction modifier instead of the ARGMOD_SATURATE argument modifier - // TODO : Convert numeric arguments (-2, -1, 0, 1, 2) into modifiers on the other argument - - if (CombineInstructions()) - Result = true; - - if (MoveRemovableParametersRight()) - Result = true; - - // Simplify instructions, which can help to compress the result : - i = IntermediateCount; - while (i > StartPos) - { - --i; - Cur = &(Intermediate[i]); - - switch (Cur->Opcode) { - case PO_MOV: - if (SimplifyMOV(Cur)) - Result = true; - break; - - case PO_ADD: - if (SimplifyADD(Cur)) - Result = true; - break; - - case PO_MAD: - if (SimplifyMAD(Cur, i)) - Result = true; - break; - - case PO_SUB: - if (SimplifySUB(Cur)) - Result = true; - break; - - case PO_MUL: - if (SimplifyMUL(Cur)) - Result = true; - break; - - case PO_LRP: - if (SimplifyLRP(Cur, i)) - Result = true; - break; - - case PO_CND: - if (FixupCND(Cur, i)) - Result = true; - break; - } // case - } // for - - // If the above code made any alteration, repeat it as some changes require a followup (like MUL>MOV>NOP) : - if (Result) - { - Log("Fixup intermediate result"); - FixupPixelShader(); - } - return Result; -} // FixupPixelShader - -bool PSH_XBOX_SHADER::FixInvalidSrcSwizzle() -{ - int i, j; - PPSH_INTERMEDIATE_FORMAT Cur; - PPSH_IMD_ARGUMENT CurArg; - - bool Result = false; - for (i = StartPos; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - if (Cur->IsArithmetic()) - { - // Loop over the input arguments : - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._In; j++) - { - CurArg = &(Cur->Parameters[j]); - - // Fix "Invalid src swizzle" : - if (CurArg->Mask == MASK_RGB) - { - CurArg->Mask = MASK_RGBA; - Result = true; - } - } - } - } - return Result; -} - -bool PSH_XBOX_SHADER::FixMissingR0a() -// On the Xbox, the alpha portion of the R0 register is initialized to -// the alpha component of texture 0 if texturing is enabled for texture 0 : -{ - int R0aDefaultInsertPos; - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - PSH_INTERMEDIATE_FORMAT NewIns = {}; - - // Detect a read of r0.a without a write, as we need to insert a "MOV r0.a, t0.a" as default (like the xbox has) : - R0aDefaultInsertPos = -1; - for (i = 0; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - if (Cur->Opcode < PO_TEX) // TODO : Check explicitly which instruction types are handled below - continue; - - // Make sure if we insert at all, it'll be after the DEF's : - if (R0aDefaultInsertPos < 0) - R0aDefaultInsertPos = i; - - // First, check if r0.a is read by this opcode : - if (Cur->ReadsFromRegister(PARAM_R, 0, MASK_A)) - { - R0aDefaultInsertPos = i; - break; - } - - // If this opcode writes to r0.a, we're done : - if (Cur->WritesToRegister(PARAM_R, 0, MASK_A)) - return false; - } - - if (R0aDefaultInsertPos >= 0) - { - // Insert a new opcode : MOV r0.a, t0.a - NewIns.Initialize(PO_MOV); - NewIns.Output[0].SetRegister(PARAM_R, 0, MASK_A); - NewIns.Parameters[0] = NewIns.Output[0]; - NewIns.Parameters[0].Type = PARAM_T; - NewIns.CommentString = "Inserted r0.a default"; - InsertIntermediate(&NewIns, R0aDefaultInsertPos); - return true; - } - return false; -} // FixMissingR0a - -bool PSH_XBOX_SHADER::FixMissingR1a() -// On the Xbox, the alpha portion of the R1 register is initialized to -// the alpha component of texture 1 if texturing is enabled for texture 1 : -{ - int R1aDefaultInsertPos; - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - PSH_INTERMEDIATE_FORMAT NewIns = {}; - - // Detect a read of r1.a without a write, as we need to insert a "MOV r1.a, t1.a" as default (like the xbox has) : - R1aDefaultInsertPos = -1; - for (i = 0; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - if (Cur->Opcode < PO_TEX) // TODO : Check explicitly which instruction types are handled below - continue; - - // First, check if r1.a is read by this opcode : - if (Cur->ReadsFromRegister(PARAM_R, 1, MASK_A)) - { - // Make sure if we insert at all, it'll be after the DEF's : - if (R1aDefaultInsertPos < 0) - R1aDefaultInsertPos = i; - - R1aDefaultInsertPos = i; - break; - } - - // If this opcode writes to r1.a, we're done : - if (Cur->WritesToRegister(PARAM_R, 1, MASK_A)) - return false; - } - - if (R1aDefaultInsertPos >= 0) - { - // Insert a new opcode : MOV r1.a, t1.a - NewIns.Initialize(PO_MOV); - NewIns.Output[0].SetRegister(PARAM_R, 1, MASK_A); - NewIns.Parameters[0] = NewIns.Output[0]; - NewIns.Parameters[0].Type = PARAM_T; - NewIns.CommentString = "Inserted r1.a default"; - InsertIntermediate(&NewIns, R1aDefaultInsertPos); - return true; - } - - return false; -} // FixMissingR1a - -bool PSH_XBOX_SHADER::FixUninitializedReads() -// On the Xbox, the alpha portion of the R1 register is initialized to -// the alpha component of texture 1 if texturing is enabled for texture 1 : -{ - int R1aDefaultInsertPos; - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - PSH_INTERMEDIATE_FORMAT NewIns = {}; - bool Result = false; - - int readPositions[32][4] = {}; - int writePositions[32][4] = {}; - int initPositions[32] = {}; - int initMasks[32] = {}; - - // Detect a read of r1.a without a write, as we need to insert a "MOV r1.a, t1.a" as default (like the xbox has) : - R1aDefaultInsertPos = -1; - for (i = 0; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - - for (int j = 0; j < MaxRegisterCount(PARAM_R); ++j) - { - // check reads - if (readPositions[j][0] == 0 && Cur->ReadsFromRegister(PARAM_R, j, MASK_R)) - { - readPositions[j][0] = i; - } - if (readPositions[j][1] == 0 && Cur->ReadsFromRegister(PARAM_R, j, MASK_G)) - { - readPositions[j][1] = i; - } - if (readPositions[j][2] == 0 && Cur->ReadsFromRegister(PARAM_R, j, MASK_B)) - { - readPositions[j][2] = i; - } - if (readPositions[j][3] == 0 && Cur->ReadsFromRegister(PARAM_R, j, MASK_A)) - { - readPositions[j][3] = i; - } - - // check writes - if (writePositions[j][0] == 0 && Cur->WritesToRegister(PARAM_R, j, MASK_R)) - { - writePositions[j][0] = i; - } - if (writePositions[j][1] == 0 && Cur->WritesToRegister(PARAM_R, j, MASK_G)) - { - writePositions[j][1] = i; - } - if (writePositions[j][2] == 0 && Cur->WritesToRegister(PARAM_R, j, MASK_B)) - { - writePositions[j][2] = i; - } - if (writePositions[j][3] == 0 && Cur->WritesToRegister(PARAM_R, j, MASK_A)) - { - writePositions[j][3] = i; - } - } - } - - for (int j = 0; j < MaxRegisterCount(PARAM_R); ++j) - { - int mask = 0; - int pos = IntermediateCount; - for (int i = 0; i < 4; ++i) - { - if (readPositions[j][i] <= writePositions[j][i] && readPositions[j][i] != 0) - { - mask |= (1 << i); - pos = std::min(pos, readPositions[j][i]); - } - } - - initPositions[j] = pos; - initMasks[j] = mask; - } - - NewIns.Initialize(PO_MOV); - NewIns.CommentString = "; Inserted to initialize register"; - for (int j = 0; j < MaxRegisterCount(PARAM_R); ++j) - { - int mask = initMasks[j]; - if (mask) - { - // Insert a new opcode : MOV r#.???, c0.??? - NewIns.Output[0].SetRegister(PARAM_R, j, mask); - NewIns.Parameters[0].SetScaleConstRegister(0.0f, Recompiled); - // r0 and r1 take their alpha from the respective texture coordinate - if (j < PSH_XBOX_MAX_R_REGISTER_COUNT) - { - mask &= MASK_RGB; - } - - InsertIntermediate(&NewIns, std::min(StartPos, initPositions[j])); - Result = true; - } - } - - return Result; -} // FixUninitializedReads - - -bool PSH_XBOX_SHADER::FixCoIssuedOpcodes() -{ - int i; - PPSH_INTERMEDIATE_FORMAT Cur; - bool Result = false; - // Since we're targetting m_PSVersion >= D3DPS_VERSION(2, 0), co-issued instructions are no longer supported, thus reset all IsCombined flags : - for (i = StartPos; i < IntermediateCount; i++) - { - Cur = &(Intermediate[i]); - if (Cur->IsArithmetic()) - { - if (Cur->IsCombined) - { - Cur->IsCombined = false; - Result = true; - } - } - } - return Result; -} - -bool PSH_XBOX_SHADER::FixInvalidDstRegister() -{ - int i, j; - PPSH_INTERMEDIATE_FORMAT Cur; - PPSH_IMD_ARGUMENT CurArg; - - bool Result = false; - for (i = IntermediateCount - 1; i >= StartPos; --i) - { - Cur = &(Intermediate[i]); - // Skip non-arithmetic opcodes - if (!Cur->IsArithmetic()) - continue; - - // Loop over the output arguments : - for (j = 0; j < PSH_OPCODE_DEFS[Cur->Opcode]._Out; j++) - { - CurArg = &(Cur->Output[j]); - - if (IsValidNativeOutputRegister(CurArg->Type, CurArg->Address)) - continue; - - int lastUsed = RegisterIsUsedFromIndexUntil(i + 1, CurArg->Type, CurArg->Address); - - PSH_ARGUMENT_TYPE dstType = PARAM_R; - int dstIndex = -1; - - if (IsValidNativeOutputRegister(PARAM_T)) - { - dstType = PARAM_T; - dstIndex = NextFreeRegisterFromIndexUntil(i + 1, PARAM_T, lastUsed); - } - - if (dstIndex == -1) - { - dstType = PARAM_R; - dstIndex = NextFreeRegisterFromIndexUntil(i + 1, PARAM_R, lastUsed); - } - - if (dstIndex != -1) - { - Result = true; - - if (Cur->ReadsFromRegister(CurArg->Type, CurArg->Address)) - { - if (lastUsed >= 0) ++lastUsed; - - PSH_INTERMEDIATE_FORMAT Ins = {}; - - Ins.Initialize(PO_MOV); - Ins.Output[0].SetRegister(dstType, dstIndex, 0); - Ins.Parameters[0].SetRegister(CurArg->Type, CurArg->Address, 0); - InsertIntermediate(&Ins, i); - ++Cur; - CurArg = &(Cur->Output[j]); - } - - ReplaceInputRegisterFromIndexOnwards(i + 1, CurArg->Type, CurArg->Address, dstType, dstIndex, lastUsed); - - CurArg->Type = dstType; - CurArg->Address = dstIndex; - } - } - } - return Result; -} - -// TODO: Refactor and optimize -bool PSH_XBOX_SHADER::FixOverusedRegisters() -{ - int i; - - bool Result = false; - - PSH_INTERMEDIATE_FORMAT Ins = {}; - Ins.Initialize(PO_MOV); - - // For all opcodes, try to put constant and discarded arguments in the rightmost slot, to ease following analysis : - i = IntermediateCount; - while (i > StartPos) - { - --i; - - int InsertPos = i; - - // Skip this operation on LRP instructions - // This prevents "error X5765: Dest register for LRP cannot be the same as first or third source register" in WWE RAW2 - if (Intermediate[i].Opcode == PO_LRP) { - continue; - } - - // Handle PARAM_C, PARAM_V and PARAM_T (in that order) : - for (int t = PARAM_C; t >= PARAM_T; t--) { - enum PSH_ARGUMENT_TYPE param_t = (enum PSH_ARGUMENT_TYPE)t; - int max_total = (t == PARAM_C) ? 2 : (t == PARAM_V) ? 999 : 1; - int addressCount = 0; - int total = 0; - while (Intermediate[i].ReadsFromRegister(param_t, -1, addressCount, total) && (addressCount > 1 || total > max_total)) - { - for (int p = 0; p < PSH_OPCODE_DEFS[Intermediate[i].Opcode]._In; ++p) - { - if (Intermediate[i].Parameters[p].Type == param_t) - { - int output = NextFreeRegisterFromIndexUntil(i, PARAM_R, i); - - // This inserts a MOV opcode that writes to R, reading from a C, V or T register - Ins.Output[0].SetRegister(PARAM_R, output, 0); - Ins.Parameters[0].SetRegister(Intermediate[i].Parameters[p].Type, Intermediate[i].Parameters[p].Address, 0); - InsertIntermediate(&Ins, InsertPos); - ++InsertPos; - - ReplaceInputRegisterFromIndexOnwards(InsertPos, Intermediate[InsertPos].Parameters[p].Type, Intermediate[InsertPos].Parameters[p].Address, PARAM_R, output, InsertPos); - Result = true; - break; - } - } - } - } - } - return Result; -} // FixOverusedRegisters - -// TODO : FocusBlur sample needs a zero in 'cnd' opcode - -/* RPSRegisterObject */ - -void RPSRegisterObject::Decode(uint8_t Value, bool aIsAlpha) -{ - IsAlpha = aIsAlpha; - Reg = (PS_REGISTER)(Value); -} - -std::string RPSRegisterObject::DecodedToString() -{ - assert((PS_REGISTER_DISCARD <= Reg) && (Reg <= PS_REGISTER_EF_PROD)); - - return PS_RegisterStr[Reg + 1]; -} - -/* RPSInputRegister */ - -void RPSInputRegister::Decode(uint8_t Value, bool aIsAlpha) -{ - RPSRegisterObject::Decode(Value & PS_NoChannelMask, aIsAlpha); - - Channel = (PS_CHANNEL)(Value & PS_CHANNEL_ALPHA); - InputMapping = (PS_INPUTMAPPING)(Value & 0xe0); - - // Remove the above flags from the register : - Reg = (PS_REGISTER)(Reg & 0xf); - - // Check if the input Register is ZERO, in which case we want to allow the extended registers : - if (Reg == PS_REGISTER_ZERO) - { - switch (InputMapping) { - case PS_REGISTER_ONE: case PS_REGISTER_NEGATIVE_ONE: case PS_REGISTER_ONE_HALF: case PS_REGISTER_NEGATIVE_ONE_HALF: - // These input mapping have their own register - keep these in 'Reg', so we can check for them : - Reg = (PS_REGISTER)(InputMapping); - break; - - case PS_INPUTMAPPING_EXPAND_NEGATE: - // This case has no separate PS_REGISTER define, but when applied to zero, also results in one : - Reg = PS_REGISTER_ONE; - break; - } - } -} - -std::string RPSInputRegister::DecodedToString() -{ - std::string Result; - std::string InputMappingStr = ""; - switch (Reg) { - case PS_REGISTER_ZERO: - { - Result = PS_RegisterStr[0]; - return Result; - } - case PS_REGISTER_ONE: - Result = PS_RegisterStr[0x11]; - break; - case PS_REGISTER_NEGATIVE_ONE: - Result = PS_RegisterStr[0x12]; - break; - case PS_REGISTER_ONE_HALF: - Result = PS_RegisterStr[0x13]; - break; - case PS_REGISTER_NEGATIVE_ONE_HALF: - Result = PS_RegisterStr[0x14]; - break; - default: - Result = RPSRegisterObject::DecodedToString(); - InputMappingStr = " | " + PS_InputMappingStr[(InputMapping >> 5) & 7]; - } - - // Render the channel as a string : - Result = Result + " | " + PS_ChannelStr[(Channel > 0) ? /*Alpha*/2 : (IsAlpha ? /*Blue*/1 : /*RGB*/0)] + InputMappingStr; - return Result; -} - -/* RPSCombinerOutput */ - -void RPSCombinerOutput::Decode(uint8_t Value, DWORD PSInputs, bool aIsAlpha) -{ - RPSRegisterObject::Decode(Value, aIsAlpha); - - // Decode PSAlphaInputs / PSRGBInputs : - Input1.Decode((PSInputs >> 8) & 0xFF, aIsAlpha); - Input2.Decode((PSInputs >> 0) & 0xFF, aIsAlpha); -} - -/* RPSCombinerStageChannel */ - -void RPSCombinerStageChannel::Decode(DWORD PSInputs, DWORD PSOutputs, bool aIsAlpha/* = false*/) -{ - // Get the combiner output flags : - CombinerOutputFlags = (PS_COMBINEROUTPUT)(PSOutputs >> 12); - - // Decompose the combiner output flags : - OutputSUM.OutputAB.DotProduct = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_DOT_PRODUCT) > 0; // false=Multiply, true=DotProduct - OutputSUM.OutputCD.DotProduct = (CombinerOutputFlags & PS_COMBINEROUTPUT_CD_DOT_PRODUCT) > 0; // false=Multiply, true=DotProduct - - if (!aIsAlpha) - { - OutputSUM.OutputAB.BlueToAlpha = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA) > 0; // false=Alpha-to-Alpha, true=Blue-to-Alpha - OutputSUM.OutputCD.BlueToAlpha = (CombinerOutputFlags & PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA) > 0; // false=Alpha-to-Alpha, true=Blue-to-Alpha - } - - // Decode PSAlphaOutputs / PSRGBOutputs and PSAlphaInputs / PSRGBInputs : - OutputSUM.OutputAB.Decode((PSOutputs >> 4) & 0xF, (PSInputs >> 16) & 0xFFFF, aIsAlpha); - OutputSUM.OutputCD.Decode((PSOutputs >> 0) & 0xF, (PSInputs >> 0) & 0xFFFF, aIsAlpha); - OutputSUM.Decode((PSOutputs >> 8) & 0xF, aIsAlpha); - - AB_CD_SUM = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_CD_MUX) == 0; // true=AB+CD, false=MUX(AB,CD) based on R0.a -} - // Note : On a hardware level, there are only 4 pixel shaders instructions present in the Nvidia NV2A GPU : // - xdd (dot/dot/discard) > calculating AB=A.B and CD=C.D // - xdm (dot/mul/discard) > calculating AB=A.B and CD=C*D @@ -5710,185 +91,537 @@ void RPSCombinerStageChannel::Decode(DWORD PSInputs, DWORD PSOutputs, bool aIsAl // "-C0_bias_x2" shifts range from [ 0..1] to [-1..1] // "-V0_bias_d2" shifts range from [-1..1] to [ 0..1] +/* RPSRegisterObject */ + +void RPSRegisterObject::Decode(uint8_t Value) +{ + Reg = (PS_REGISTER)(Value & PS_REGISTER_EF_PROD); // = mask = 0x0f + + // Validate correctness + if (Reg == 6) LOG_TEST_CASE("Unknown PS_REGISTER : 6"); + if (Reg == 7) LOG_TEST_CASE("Unknown PS_REGISTER : 7"); +} + +/* RPSInputRegister */ + +void RPSInputRegister::Decode(uint8_t Value, unsigned stage_nr, bool isRGB) +{ + RPSRegisterObject::Decode(Value); + + Channel = (PS_CHANNEL)(Value & PS_CHANNEL_ALPHA); // = mask = 0x10 + InputMapping = (PS_INPUTMAPPING)(Value & PS_INPUTMAPPING_SIGNED_NEGATE); // = mask = 0xe0 + + if (stage_nr == 9) { + // In final combiner stage, convert C0 into FC0, and C1 into FC1, to discern them as separate registers + if (Reg == PS_REGISTER_C0) Reg = PS_REGISTER_FC0; + if (Reg == PS_REGISTER_C1) Reg = PS_REGISTER_FC1; + } + + // Validate correctness + if (stage_nr <= xbox::X_PSH_COMBINECOUNT) { + if (Reg == PS_REGISTER_FOG) { + if (!isRGB) LOG_TEST_CASE("PS_REGISTER_FOG input not allowed in Alpha register combiner"); + else if (Channel == PS_CHANNEL_ALPHA) LOG_TEST_CASE("PS_REGISTER_FOG.a input not allowed in RGB register combiner"); + } + if (Reg == PS_REGISTER_V1R0_SUM) LOG_TEST_CASE("PS_REGISTER_V1R0_SUM input only allowed in final combiner"); + if (Reg == PS_REGISTER_EF_PROD) LOG_TEST_CASE("PS_REGISTER_EF_PROD input only allowed in final combiner"); + } + else { // final combiner + if (InputMapping == PS_INPUTMAPPING_EXPAND_NORMAL) LOG_TEST_CASE("PS_INPUTMAPPING_EXPAND_NORMAL not allowed in final combiner"); + if (InputMapping == PS_INPUTMAPPING_EXPAND_NEGATE) LOG_TEST_CASE("PS_INPUTMAPPING_EXPAND_NEGATE not allowed in final combiner"); + if (InputMapping == PS_INPUTMAPPING_HALFBIAS_NORMAL) LOG_TEST_CASE("PS_INPUTMAPPING_HALFBIAS_NORMAL not allowed in final combiner"); + if (InputMapping == PS_INPUTMAPPING_HALFBIAS_NEGATE) LOG_TEST_CASE("PS_INPUTMAPPING_HALFBIAS_NEGATE not allowed in final combiner"); + if (InputMapping == PS_INPUTMAPPING_SIGNED_IDENTITY) LOG_TEST_CASE("PS_INPUTMAPPING_SIGNED_IDENTITY not allowed in final combiner"); + if (InputMapping == PS_INPUTMAPPING_SIGNED_NEGATE) LOG_TEST_CASE("PS_INPUTMAPPING_SIGNED_NEGATE not allowed in final combiner"); + } +} + +/* RPSCombinerOutput */ + +void RPSCombinerOutput::Decode(uint8_t Value, uint16_t PSInputs, unsigned stage_nr, bool isRGB) +{ + RPSRegisterObject::Decode(Value); + + // Decode PSAlphaInputs / PSRGBInputs : + Input[0].Decode((PSInputs >> 8) & 0xFF, stage_nr, isRGB); + Input[1].Decode((PSInputs >> 0) & 0xFF, stage_nr, isRGB); + + // Validate correctness + if (Reg == PS_REGISTER_C0) LOG_TEST_CASE("PS_REGISTER_C0 not allowed as output"); + if (Reg == PS_REGISTER_C1) LOG_TEST_CASE("PS_REGISTER_C1 not allowed as output"); + if (Reg == PS_REGISTER_FOG) LOG_TEST_CASE("PS_REGISTER_FOG not allowed as output"); + if (Reg == PS_REGISTER_V1R0_SUM) LOG_TEST_CASE("PS_REGISTER_V1R0_SUM not allowed as output"); + if (Reg == PS_REGISTER_EF_PROD) LOG_TEST_CASE("PS_REGISTER_EF_PROD not allowed as output"); +} + +/* RPSCombinerStageChannel */ + +void RPSCombinerStageChannel::Decode(uint32_t PSInputs, uint32_t PSOutputs, unsigned stage_nr, bool isRGB) +{ + // Decode PSAlphaOutputs / PSRGBOutputs and PSAlphaInputs / PSRGBInputs : + OutputCD.Decode((PSOutputs >> 0) & 0xF, (PSInputs >> 0 ) & 0xFFFF, stage_nr, isRGB); + OutputAB.Decode((PSOutputs >> 4) & 0xF, (PSInputs >> 16) & 0xFFFF, stage_nr, isRGB); + OutputMUX_SUM.Decode((PSOutputs >> 8) & 0xF); + + // Get the combiner output flags : + PS_COMBINEROUTPUT CombinerOutputFlags = (PS_COMBINEROUTPUT)(PSOutputs >> 12); + + // Decompose the combiner output flags : + OutputCD.DotProduct = (CombinerOutputFlags & PS_COMBINEROUTPUT_CD_DOT_PRODUCT) > 0; // False=Multiply, True=DotProduct + OutputAB.DotProduct = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_DOT_PRODUCT) > 0; // False=Multiply, True=DotProduct + AB_CD_MUX = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_CD_MUX) > 0; // False=AB+CD, True=MUX(AB,CD) based on R0.a + CombinerOutputMapping = (PS_COMBINEROUTPUT)(CombinerOutputFlags & PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1_BIAS); // = mask = 0x38 + OutputCD.BlueToAlpha = (CombinerOutputFlags & PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA) >> 6; // 0=Alpha-to-Alpha, 1=Blue-to-Alpha + OutputAB.BlueToAlpha = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA) >> 7; // 0=Alpha-to-Alpha, 1=Blue-to-Alpha + + // Discover test-cases + // Check for 'discard-all-outputs' + if (OutputAB.DotProduct || OutputCD.DotProduct) { + if ((OutputAB.Reg == PS_REGISTER_DISCARD) && (OutputCD.Reg == PS_REGISTER_DISCARD)) LOG_TEST_CASE("All two outputs discarded"); + } else { + // if ((OutputAB.Reg == PS_REGISTER_DISCARD) && (OutputCD.Reg == PS_REGISTER_DISCARD) && (OutputMUX_SUM.Reg == PS_REGISTER_DISCARD)) LOG_TEST_CASE("All three outputs discarded"); // Test-case : XDK sample : Minnaert (on Stage2.Alpha) + } + + // Validate correctness + if ((PSOutputs & ~0x000FFFFF) > 0) LOG_TEST_CASE("Unknown PS_COMBINEROUTPUT flag bits detected"); + if (CombinerOutputMapping == PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2_BIAS) LOG_TEST_CASE("PS_COMBINEROUTPUT_SHIFTLEFT_2_BIAS unsupported on NV2A?"); + if (CombinerOutputMapping == PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1_BIAS) LOG_TEST_CASE("PS_COMBINEROUTPUT_SHIFTRIGHT_1_BIAS unsupported on NV2A?"); + if (isRGB) { + if (OutputMUX_SUM.Reg != PS_REGISTER_DISCARD) { + if (OutputCD.DotProduct) LOG_TEST_CASE("PS_COMBINEROUTPUT_CD_DOT_PRODUCT detected without PS_REGISTER_DISCARD in MUX_SUM"); + if (OutputAB.DotProduct) LOG_TEST_CASE("PS_COMBINEROUTPUT_AB_DOT_PRODUCT detected without PS_REGISTER_DISCARD in MUX_SUM"); + } + if (OutputCD.DotProduct) if (!OutputAB.DotProduct) LOG_TEST_CASE("PS_COMBINEROUTPUT_CD_DOT_PRODUCT detected without PS_COMBINEROUTPUT_AB_DOT_PRODUCT (so, xmd 'opcode')"); // Need test-cases Note : Undefined xmd (mul/dot) *is* supported in CxbxPixelShaderTemplate.hlsl + } else { // DotProduct and BlueToAlpha are not valid for Alpha + if (OutputCD.DotProduct) LOG_TEST_CASE("PS_COMBINEROUTPUT_CD_DOT_PRODUCT shouldn't be set for Alpha"); + if (OutputAB.DotProduct) LOG_TEST_CASE("PS_COMBINEROUTPUT_AB_DOT_PRODUCT shouldn't be set for Alpha"); + if (OutputCD.BlueToAlpha) LOG_TEST_CASE("PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA shouldn't be set for Alpha"); + if (OutputAB.BlueToAlpha) LOG_TEST_CASE("PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA shouldn't be set for Alpha"); + } +} + /* RPSFinalCombiner */ -void RPSFinalCombiner::Decode(const DWORD PSFinalCombinerInputsABCD, const DWORD PSFinalCombinerInputsEFG, const DWORD PSFinalCombinerConstants) +void RPSFinalCombiner::Decode(const uint32_t PSFinalCombinerInputsABCD, const uint32_t PSFinalCombinerInputsEFG) { - InputA.Decode((PSFinalCombinerInputsABCD >> 24) & 0xFF, /*aIsAlpha=*/false); - InputB.Decode((PSFinalCombinerInputsABCD >> 16) & 0xFF, /*aIsAlpha=*/false); - InputC.Decode((PSFinalCombinerInputsABCD >> 8) & 0xFF, /*aIsAlpha=*/false); - InputD.Decode((PSFinalCombinerInputsABCD >> 0) & 0xFF, /*aIsAlpha=*/false); + Input[0].Decode((PSFinalCombinerInputsABCD >> 24) & 0xFF, /*stage_nr=*/9, /*isRGB=*/true); + Input[1].Decode((PSFinalCombinerInputsABCD >> 16) & 0xFF, /*stage_nr=*/9, /*isRGB=*/true); + Input[2].Decode((PSFinalCombinerInputsABCD >> 8) & 0xFF, /*stage_nr=*/9, /*isRGB=*/true); + Input[3].Decode((PSFinalCombinerInputsABCD >> 0) & 0xFF, /*stage_nr=*/9, /*isRGB=*/true); + Input[4].Decode((PSFinalCombinerInputsEFG >> 24) & 0xFF, /*stage_nr=*/9, /*isRGB=*/true); + Input[5].Decode((PSFinalCombinerInputsEFG >> 16) & 0xFF, /*stage_nr=*/9, /*isRGB=*/true); + Input[6].Decode((PSFinalCombinerInputsEFG >> 8) & 0xFF, /*stage_nr=*/9, /*isRGB=*/false); // Note : Final combiner input G must be a single component, and must thus be decoded as Alpha + PS_FINALCOMBINERSETTING FinalCombinerSettingFlags = (PS_FINALCOMBINERSETTING)((PSFinalCombinerInputsEFG >> 0) & 0xFF); - InputE.Decode((PSFinalCombinerInputsEFG >> 24) & 0xFF, /*aIsAlpha=*/false); - InputF.Decode((PSFinalCombinerInputsEFG >> 16) & 0xFF, /*aIsAlpha=*/false); - InputG.Decode((PSFinalCombinerInputsEFG >> 8) & 0xFF, /*aIsAlpha=*/false); - FinalCombinerFlags = (PS_FINALCOMBINERSETTING)((PSFinalCombinerInputsEFG >> 0) & 0xFF); + ComplementV1 = FinalCombinerSettingFlags & PS_FINALCOMBINERSETTING_COMPLEMENT_V1; + ComplementR0 = FinalCombinerSettingFlags & PS_FINALCOMBINERSETTING_COMPLEMENT_R0; + ClampSum = FinalCombinerSettingFlags & PS_FINALCOMBINERSETTING_CLAMP_SUM; - FinalCombinerC0Mapping = (PSFinalCombinerConstants >> 0) & 0xF; - FinalCombinerC1Mapping = (PSFinalCombinerConstants >> 4) & 0xF; - dwPS_GLOBALFLAGS = (PSFinalCombinerConstants >> 8) & 0x1; + // Discover test-cases + // if (Input[0].Channel != PS_CHANNEL_ALPHA) LOG_TEST_CASE("PS_CHANNEL_RGB/PS_CHANNEL_BLUE detected on final combiner A input"); // Note : test-case ModifyPixelShader uses PS_REGISTER_FOG.rgb and seems to expect .rgb handling (not PS_CHANNEL_BLUE's .b) + if (Input[4].Channel == PS_CHANNEL_ALPHA) LOG_TEST_CASE("PS_CHANNEL_ALPHA detected on final combiner E input"); // Need test-case to determine how this should behave (calculating EF_PROD) : .aaa instead of .rgb? + if (Input[5].Channel == PS_CHANNEL_ALPHA) LOG_TEST_CASE("PS_CHANNEL_ALPHA detected on final combiner F input"); // Need test-case to determine how this should behave (calculating EF_PROD) : .aaa instead of .rgb? + // if (Input[6].Channel == PS_CHANNEL_BLUE) LOG_TEST_CASE("PS_CHANNEL_ALPHA detected on final combiner G input"); // PS_CHANNEL_BLUE (==0==PS_CHANNEL_RGB) uses G.b + // if (Input[6].Channel == PS_CHANNEL_ALPHA) LOG_TEST_CASE("PS_CHANNEL_ALPHA detected on final combiner G input"); // PS_CHANNEL_ALPHA (==1) uses .a Test-case : XDK samples BumpDemo,BumpEarth,BumpLens,Explosion + + // Validate correctness + if ((FinalCombinerSettingFlags & ~0xE0) > 0) LOG_TEST_CASE("Unknown FinalCombinerSetting bits detected"); } -void XTL_DumpPixelShaderToFile(xbox::X_D3DPIXELSHADERDEF *pPSDef) +/* DecodedRegisterCombiner */ + +void DecodedRegisterCombiner::GetPSTextureModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]) { - static int PshNumber = 0; // Keep track of how many pixel shaders we've attempted to convert. - // Don't dump more than 100 shaders, to prevent cluttering the filesystem : - if (PshNumber >= 100) - return; + for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { + psTextureModes[i] = (PS_TEXTUREMODES)((pPSDef->PSTextureModes >> (i * 5)) & 0x1F); - char szPSDef[32]; + // Discover test-cases + // if (psTextureModes[i] == PS_TEXTUREMODES_NONE) LOG_TEST_CASE("PS_TEXTUREMODES_NONE"); + // if (psTextureModes[i] == PS_TEXTUREMODES_PROJECT2D) LOG_TEST_CASE("PS_TEXTUREMODES_PROJECT2D"); + if (psTextureModes[i] == PS_TEXTUREMODES_PROJECT3D) LOG_TEST_CASE("PS_TEXTUREMODES_PROJECT3D"); // Test-case: XDK sample TechCertGame,NoSortAlphaBlend,VolumeLight + if (psTextureModes[i] == PS_TEXTUREMODES_CUBEMAP) LOG_TEST_CASE("PS_TEXTUREMODES_CUBEMAP"); // Test-case : XDK sample TechCertGame,Minnaert TODO : More test cases needed + if (psTextureModes[i] == PS_TEXTUREMODES_PASSTHRU) LOG_TEST_CASE("PS_TEXTUREMODES_PASSTHRU"); // Test-case : XDK sample BumpDemo TODO : More test cases needed + if (psTextureModes[i] == PS_TEXTUREMODES_CLIPPLANE) LOG_TEST_CASE("PS_TEXTUREMODES_CLIPPLANE"); // Test-case : XDK sample UserClipPlane TODO : More test cases needed + // if (psTextureModes[i] == PS_TEXTUREMODES_BUMPENVMAP) LOG_TEST_CASE("PS_TEXTUREMODES_BUMPENVMAP"); // Test-case : XDK sample BumpEarth, BumpLens + if (psTextureModes[i] == PS_TEXTUREMODES_BUMPENVMAP_LUM) LOG_TEST_CASE("PS_TEXTUREMODES_BUMPENVMAP_LUM"); // Test-case : XDK sample BumpEarth TODO : More test cases needed + if (psTextureModes[i] == PS_TEXTUREMODES_BRDF) LOG_TEST_CASE("PS_TEXTUREMODES_BRDF"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_ST) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_ST"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_ZW) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_ZW"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_RFLCT_DIFF) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_RFLCT_DIFF"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_RFLCT_SPEC) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_RFLCT_SPEC"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_STR_3D) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_STR_3D"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_STR_CUBE) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_STR_CUBE"); + if (psTextureModes[i] == PS_TEXTUREMODES_DPNDNT_AR) LOG_TEST_CASE("PS_TEXTUREMODES_DPNDNT_AR"); + if (psTextureModes[i] == PS_TEXTUREMODES_DPNDNT_GB) LOG_TEST_CASE("PS_TEXTUREMODES_DPNDNT_GB"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOTPRODUCT) LOG_TEST_CASE("PS_TEXTUREMODES_DOTPRODUCT"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST"); - sprintf(szPSDef, "PSDef%.03d.txt", PshNumber++); - FILE* out = fopen(szPSDef, "w"); - if (out) - { - fprintf(out, PSH_XBOX_SHADER::OriginalToString(pPSDef).c_str()); - fclose(out); - } + // Validate correctness + if (psTextureModes[i] == PS_TEXTUREMODES_BUMPENVMAP) if (i < 1) LOG_TEST_CASE("PS_TEXTUREMODES_BUMPENVMAP only allowed in stage 1, 2 or 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_BUMPENVMAP_LUM) if (i < 1) LOG_TEST_CASE("PS_TEXTUREMODES_BUMPENVMAP_LUM only allowed in stage 1, 2 or 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_BRDF) if (i < 2) LOG_TEST_CASE("PS_TEXTUREMODES_BRDF only allowed in stage 2 or 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_ST) { + if (i < 2) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_ST only allowed in stage 2 or 3"); + if (psTextureModes[i - 1] != PS_TEXTUREMODES_DOTPRODUCT) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_ST only allowed after PS_TEXTUREMODES_DOTPRODUCT"); + } + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_ZW) { + if (i < 2) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_ZW only allowed in stage 2 or 3"); + if (psTextureModes[i - 1] != PS_TEXTUREMODES_DOTPRODUCT) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_ZW only allowed after PS_TEXTUREMODES_DOTPRODUCT"); + } + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_RFLCT_DIFF) if (i != 2) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_RFLCT_DIFF only allowed in stage 2"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_RFLCT_SPEC) if (i < 3) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_RFLCT_SPEC only allowed in stage 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_STR_3D) if (i < 3) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_STR_3D only allowed in stage 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_STR_CUBE) if (i < 3) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_STR_CUBE only allowed in stage 1, 2 or 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_DPNDNT_AR) if (i < 1) LOG_TEST_CASE("PS_TEXTUREMODES_DPNDNT_AR only allowed in stage 1, 2 or 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_DPNDNT_GB) if (i < 1) LOG_TEST_CASE("PS_TEXTUREMODES_DPNDNT_GB only allowed in stage 1, 2 or 3"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOTPRODUCT) if (i < 1 || i > 2) LOG_TEST_CASE("PS_TEXTUREMODES_DOTPRODUCT only allowed in stage 1 or 2"); + if (psTextureModes[i] == PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST) if (i < 3) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST only allowed in 3"); + if (psTextureModes[i] > PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST) LOG_TEST_CASE("Invalid PS_TEXTUREMODES in stage?"); + } + + // Validate correctness + if ((pPSDef->PSTextureModes & ~0x000FFFFF) > 0) LOG_TEST_CASE("Unknown PSTextureModes bits detected"); } -PSH_RECOMPILED_SHADER XTL_EmuRecompilePshDef(xbox::X_D3DPIXELSHADERDEF *pPSDef) +void DecodedRegisterCombiner::GetPSDotMapping(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_DOTMAPPING psDotMapping[xbox::X_D3DTS_STAGECOUNT]) { - uint32_t PSVersion = D3DPS_VERSION(2, 0); // Use pixel shader model 2.0 by default + psDotMapping[0] = (PS_DOTMAPPING)(0); + for (int i = 1; i < xbox::X_D3DTS_STAGECOUNT; i++) { + psDotMapping[i] = (PS_DOTMAPPING)((pPSDef->PSDotMapping >> ((i - 1) * 4)) & 0x7); - extern D3DCAPS g_D3DCaps; - - if (g_D3DCaps.PixelShaderVersion > D3DPS_VERSION(3, 0)) { - // TODO : Test PSVersion = D3DPS_VERSION(3, 0); // g_D3DCaps.PixelShaderVersion; - // TODO : Make the pixel shader version configurable + // Discover test-cases + // if (psDotMapping[i] == PS_DOTMAPPING_ZERO_TO_ONE) LOG_TEST_CASE("PS_DOTMAPPING_ZERO_TO_ONE"); // Note : Most common scenario, no need for test-cases + if (psDotMapping[i] == PS_DOTMAPPING_MINUS1_TO_1_D3D) LOG_TEST_CASE("PS_DOTMAPPING_MINUS1_TO_1_D3D"); // Test-case : XDK samples BumpDemo, Minnaert + if (psDotMapping[i] == PS_DOTMAPPING_MINUS1_TO_1_GL) LOG_TEST_CASE("PS_DOTMAPPING_MINUS1_TO_1_GL"); + if (psDotMapping[i] == PS_DOTMAPPING_MINUS1_TO_1) LOG_TEST_CASE("PS_DOTMAPPING_MINUS1_TO_1"); + if (psDotMapping[i] == PS_DOTMAPPING_HILO_1) LOG_TEST_CASE("PS_DOTMAPPING_HILO_1"); + if (psDotMapping[i] == PS_DOTMAPPING_HILO_HEMISPHERE_D3D) LOG_TEST_CASE("PS_DOTMAPPING_HILO_HEMISPHERE_D3D"); + if (psDotMapping[i] == PS_DOTMAPPING_HILO_HEMISPHERE_GL) LOG_TEST_CASE("PS_DOTMAPPING_HILO_HEMISPHERE_GL"); + if (psDotMapping[i] == PS_DOTMAPPING_HILO_HEMISPHERE) LOG_TEST_CASE("PS_DOTMAPPING_HILO_HEMISPHERE"); } - PSH_XBOX_SHADER PSH = {}; - PSH.SetPSVersion(PSVersion); - PSH.Decode(pPSDef); - return PSH.Convert(pPSDef); + // Validate correctness + if ((pPSDef->PSDotMapping & ~0x00000777) > 0) LOG_TEST_CASE("Unknown PSDotMapping bits detected"); } -// From Dxbx uState.pas : - -PSH_RECOMPILED_SHADER DxbxRecompilePixelShader(xbox::X_D3DPIXELSHADERDEF *pPSDef) +void DecodedRegisterCombiner::GetPSCompareModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, bool psCompareModes[xbox::X_D3DTS_STAGECOUNT][4]) { -static const - char *szDiffusePixelShader = - "ps_2_x\n" - "dcl_2d s0\n" - "dcl t0.xy\n" - "texld r0, t0, s0\n" - "mov oC0, r0\n"; - std::string ConvertedPixelShaderStr; - DWORD hRet; - LPD3DXBUFFER pShader; - LPD3DXBUFFER pErrors; - DWORD *pFunction; - - // Attempt to recompile PixelShader - PSH_RECOMPILED_SHADER Result = XTL_EmuRecompilePshDef(pPSDef); - ConvertedPixelShaderStr = Result.NewShaderStr; - - // assemble the shader - pShader = nullptr; - pErrors = nullptr; - hRet = D3DXAssembleShader( - ConvertedPixelShaderStr.c_str(), - ConvertedPixelShaderStr.length(), - /*pDefines=*/nullptr, - /*pInclude=*/nullptr, - /*Flags=*/0, // D3DXASM_DEBUG, - /*ppCompiledShader=*/&pShader, - /*ppCompilationErrors*/&pErrors); - - if (hRet != D3D_OK) - { - EmuLog(LOG_LEVEL::WARNING, "Could not create pixel shader"); - EmuLog(LOG_LEVEL::WARNING, std::string((char*)pErrors->GetBufferPointer(), pErrors->GetBufferSize()).c_str()); - - printf(ConvertedPixelShaderStr.c_str()); - - hRet = D3DXAssembleShader( - szDiffusePixelShader, - strlen(szDiffusePixelShader), - /*pDefines=*/nullptr, - /*pInclude=*/nullptr, - /*Flags=*/0, // Was D3DXASM_SKIPVALIDATION, - /*ppCompiledShader=*/&pShader, - /*ppCompilationErrors*/&pErrors); - - if (hRet != D3D_OK) { - EmuLog(LOG_LEVEL::WARNING, "Could not create pixel shader"); - EmuLog(LOG_LEVEL::WARNING, std::string((char*)pErrors->GetBufferPointer(), pErrors->GetBufferSize()).c_str()); - CxbxKrnlCleanup("Cannot fall back to the most simple pixel shader!"); + for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { + uint32_t CompareMode = (pPSDef->PSCompareMode >> (i * 4)) & 0xF; + psCompareModes[i][0] = (CompareMode & PS_COMPAREMODE_S_GE) > 0; + psCompareModes[i][1] = (CompareMode & PS_COMPAREMODE_T_GE) > 0; + psCompareModes[i][2] = (CompareMode & PS_COMPAREMODE_R_GE) > 0; + psCompareModes[i][3] = (CompareMode & PS_COMPAREMODE_Q_GE) > 0; } - EmuLog(LOG_LEVEL::WARNING, "We're lying about the creation of a pixel shader!"); - } + // Validate correctness + if ((pPSDef->PSCompareMode & ~0x0000FFFF) > 0) LOG_TEST_CASE("Unknown PSCompareMode bits detected"); +} - if (pShader) - { - pFunction = (DWORD*)(pShader->GetBufferPointer()); - if (hRet == D3D_OK) { - // redirect to windows d3d - hRet = g_pD3DDevice->CreatePixelShader - ( - pFunction, - &Result.ConvertedHandle - ); +void DecodedRegisterCombiner::GetPSInputTexture(xbox::X_D3DPIXELSHADERDEF* pPSDef, int psInputTexture[xbox::X_D3DTS_STAGECOUNT]) +{ + psInputTexture[0] = -1; // Stage 0 has no predecessors (should not be used) + psInputTexture[1] = 0; // Stage 1 can only use stage 0 + psInputTexture[2] = (pPSDef->PSInputTexture >> 16) & 0x1; // Stage 2 can use stage 0 or 1 + psInputTexture[3] = (pPSDef->PSInputTexture >> 20) & 0x3; // Stage 3 can only use stage 0, 1 or 2 - if (hRet != D3D_OK) { - printf(D3DErrorString(hRet)); - } + // Discover test-cases + // if (psInputTexture[2] == 1) LOG_TEST_CASE("PS_INPUTTEXTURE(2) uses texture 1"); // Test-case : XDK sample BumpEarth,Explosion,ZSprite + if (psInputTexture[2] == 2) LOG_TEST_CASE("PS_INPUTTEXTURE(2) uses texture 2"); + // if (psInputTexture[3] == 1) LOG_TEST_CASE("PS_INPUTTEXTURE(3) uses texture 1"); // Test-case : XDK sample Explosion,ZSprite + if (psInputTexture[3] == 2) LOG_TEST_CASE("PS_INPUTTEXTURE(3) uses texture 2"); + if (psInputTexture[3] == 3) LOG_TEST_CASE("PS_INPUTTEXTURE(3) uses texture 3"); + + // Validate correctness + if ((pPSDef->PSInputTexture & ~0x00310000) > 0) LOG_TEST_CASE("Unknown PSInputTexture bits detected"); +} + +void DecodedRegisterCombiner::Decode(xbox::X_D3DPIXELSHADERDEF *pPSDef) +{ + NumberOfCombiners = (pPSDef->PSCombinerCount >> 0) & 0xF; + uint32_t CombinerCountFlags = (pPSDef->PSCombinerCount >> 8); + + CombinerMuxesOnMsb = (CombinerCountFlags & PS_COMBINERCOUNT_MUX_MSB) > 0; + CombinerHasUniqueC0 = (CombinerCountFlags & PS_COMBINERCOUNT_UNIQUE_C0) > 0; + CombinerHasUniqueC1 = (CombinerCountFlags & PS_COMBINERCOUNT_UNIQUE_C1) > 0; + + hasFinalCombiner = (pPSDef->PSFinalCombinerInputsABCD > 0) || (pPSDef->PSFinalCombinerInputsEFG > 0); + + GetPSTextureModes(pPSDef, PSTextureModes); + GetPSCompareModes(pPSDef, PSCompareMode); + GetPSDotMapping(pPSDef, PSDotMapping); + GetPSInputTexture(pPSDef, PSInputTexture); + + for (unsigned i = 0; i < NumberOfCombiners; i++) { + Combiners[i].RGB.Decode(pPSDef->PSRGBInputs[i], pPSDef->PSRGBOutputs[i], /*stage_nr=*/i, /*isRGB=*/true); + Combiners[i].Alpha.Decode(pPSDef->PSAlphaInputs[i], pPSDef->PSAlphaOutputs[i], /*stage_nr=*/i, /*isRGB=*/false); } - // Dxbx note : We must release pShader here, else we would have a resource leak! - pShader->Release(); - pShader = nullptr; - } + if (hasFinalCombiner) { + FinalCombiner.Decode(pPSDef->PSFinalCombinerInputsABCD, pPSDef->PSFinalCombinerInputsEFG); + } - // Dxbx addition : We release pErrors here (or it would become a resource leak!) - if (pErrors) - { - pErrors->Release(); - pErrors = nullptr; - } - return Result; -} // DxbxRecompilePixelShader + TexModeAdjust = (pPSDef->PSFinalCombinerConstants >> 8) & PS_GLOBALFLAGS_TEXMODE_ADJUST; + + // Discover test-cases + if (NumberOfCombiners == 0) LOG_TEST_CASE("NumberOfCombiners is zero"); + if (!CombinerMuxesOnMsb) LOG_TEST_CASE("PS_COMBINERCOUNT_MUX_LSB detected"); // Test case required for how to implement the FCS_MUX check on LSB (see PS_COMBINERCOUNT_MUX_LSB in CxbxPixelShaderTemplate.hlsl) Note : test-case ModifyPixelShader hits this by mistake + if (TexModeAdjust) LOG_TEST_CASE("PS_GLOBALFLAGS_TEXMODE_ADJUST detected"); + + // Validate correctness + if (NumberOfCombiners > 8) LOG_TEST_CASE("NumberOfCombiners bigger than maximum (of 8)"); + if ((pPSDef->PSCombinerCount & ~0x0001110F) > 0) LOG_TEST_CASE("Unknown PSCombinerCount bits detected"); +} + +/* PSH_RECOMPILED_SHADER */ + +typedef struct s_CxbxPSDef { + xbox::X_D3DPIXELSHADERDEF PSDef; + xbox::X_D3DRESOURCETYPE ActiveTextureTypes[xbox::X_D3DTS_STAGECOUNT]; + bool DecodedTexModeAdjust; + bool DecodedHasFinalCombiner; + bool RenderStateFogEnable; + bool RenderStateSpecularEnable; + + bool IsEquivalent(s_CxbxPSDef &Another) + { + // Only compare the [*]-marked members, which forms the unique shader declaration (ignore the constants and most Xbox Direct3D8 run-time fields) : + // [*] DWORD PSAlphaInputs[8]; // X_D3DRS_PSALPHAINPUTS0..X_D3DRS_PSALPHAINPUTS7 : Alpha inputs for each stage + // [*] DWORD PSFinalCombinerInputsABCD; // X_D3DRS_PSFINALCOMBINERINPUTSABCD : Final combiner inputs + // [*] DWORD PSFinalCombinerInputsEFG; // X_D3DRS_PSFINALCOMBINERINPUTSEFG : Final combiner inputs (continued) + if (memcmp(&(PSDef.PSAlphaInputs[0]), &(Another.PSDef.PSAlphaInputs[0]), (8 + 1 + 1) * sizeof(DWORD)) != 0) + return false; + + // [-] DWORD PSConstant0[8]; // X_D3DRS_PSCONSTANT0_0..X_D3DRS_PSCONSTANT0_7 : C0 for each stage + // [-] DWORD PSConstant1[8]; // X_D3DRS_PSCONSTANT1_0..X_D3DRS_PSCONSTANT1_7 : C1 for each stage + // [*] DWORD PSAlphaOutputs[8]; // X_D3DRS_PSALPHAOUTPUTS0..X_D3DRS_PSALPHAOUTPUTS7 : Alpha output for each stage + // [*] DWORD PSRGBInputs[8]; // X_D3DRS_PSRGBINPUTS0..X_D3DRS_PSRGBINPUTS7 : RGB inputs for each stage + // [*] DWORD PSCompareMode; // X_D3DRS_PSCOMPAREMODE : Compare modes for clipplane texture mode + if (memcmp(&(PSDef.PSAlphaOutputs[0]), &(Another.PSDef.PSAlphaOutputs[0]), (8 + 8 + 1) * sizeof(DWORD)) != 0) + return false; + + // [-] DWORD PSFinalCombinerConstant0; // X_D3DRS_PSFINALCOMBINERCONSTANT0 : C0 in final combiner + // [-] DWORD PSFinalCombinerConstant1; // X_D3DRS_PSFINALCOMBINERCONSTANT1 : C1 in final combiner + // [*] DWORD PSRGBOutputs[8]; // X_D3DRS_PSRGBOUTPUTS0..X_D3DRS_PSRGBOUTPUTS7 : Stage 0 RGB outputs + // [*] DWORD PSCombinerCount; // X_D3DRS_PSCOMBINERCOUNT : Active combiner count (Stages 0-7) + // [*] DWORD PSTextureModes; // X_D3DRS_PS_RESERVED (copied from out-of-range X_D3DRS_PSTEXTUREMODES) : Texture addressing modes + // [*] DWORD PSDotMapping; // X_D3DRS_PSDOTMAPPING : Input mapping for dot product modes + // [*] DWORD PSInputTexture; // X_D3DRS_PSINPUTTEXTURE : Texture source for some texture modes + if (memcmp(&(PSDef.PSRGBOutputs[0]), &(Another.PSDef.PSRGBOutputs[0]), (8 + 1 + 1 + 1 + 1) * sizeof(DWORD)) != 0) + return false; + + // [-] DWORD PSC0Mapping; // Mapping of c0 regs to D3D constants + // [-] DWORD PSC1Mapping; // Mapping of c1 regs to D3D constants + // [*] DWORD PSFinalCombinerConstants; // Final combiner constant mapping + // Note : From PSFinalCombinerConstants, only the PS_GLOBALFLAGS_TEXMODE_ADJUST flag must correspond + if (DecodedTexModeAdjust != Another.DecodedTexModeAdjust) + return false; + + // All ActiveTextureTypes must correspond as well (otherwise the recompiled shader would sample incorrectly) : + for (unsigned i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) + if (ActiveTextureTypes[i] != Another.ActiveTextureTypes[i]) + return false; + + // Fail if they don't correspond in their use of the final combiner unit + if (DecodedHasFinalCombiner != Another.DecodedHasFinalCombiner) + return false; + + // If they don't use the final combiner unit + if (!DecodedHasFinalCombiner) { + // Fail if they don't correspond on the render states that impact AdjustFinalCombiner + if (RenderStateFogEnable != Another.RenderStateFogEnable) + return false; + + if (RenderStateSpecularEnable != Another.RenderStateSpecularEnable) + return false; + } + + return true; + } + + void SnapshotRuntimeVariables() + { + // These values are checked in IsEquivalent to see if a cached pixel shader matches this declaration + + // Fetch currently active texture types, which impact AdjustTextureModes + for (unsigned i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { + extern xbox::X_D3DRESOURCETYPE GetXboxD3DResourceType(const xbox::X_D3DResource *pXboxResource); + + if (g_pXbox_SetTexture[i]) + ActiveTextureTypes[i] = GetXboxD3DResourceType(g_pXbox_SetTexture[i]); + else + ActiveTextureTypes[i] = xbox::X_D3DRTYPE_NONE; + } + + // Pre-decode TexModeAdjust, which impacts AdjustTextureModes + DecodedTexModeAdjust = (PSDef.PSFinalCombinerConstants >> 8) & PS_GLOBALFLAGS_TEXMODE_ADJUST; + + // Pre-decode hasFinalCombiner, which impacts AdjustFinalCombiner + DecodedHasFinalCombiner = (PSDef.PSFinalCombinerInputsABCD > 0) || (PSDef.PSFinalCombinerInputsEFG > 0); + + // Fetch all render states that impact AdjustFinalCombiner + RenderStateFogEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGENABLE) > 0; + RenderStateSpecularEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_SPECULARENABLE) > 0; + } + + void AdjustTextureModes(DecodedRegisterCombiner &RC) + { + // if this flag is set, the texture mode for each texture stage is adjusted as follows: + if (!RC.TexModeAdjust) return; + + for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { + // First, disable not-assigned textures + if (ActiveTextureTypes[i] == xbox::X_D3DRTYPE_NONE) { + RC.PSTextureModes[i] = PS_TEXTUREMODES_NONE; + continue; + } + + // Then adjust some texture mode according to the currently active textures, so that the shader will use the appropriate sampling method + switch (RC.PSTextureModes[i]) { + case PS_TEXTUREMODES_PROJECT2D: + case PS_TEXTUREMODES_PROJECT3D: + case PS_TEXTUREMODES_CUBEMAP: + if (ActiveTextureTypes[i] == xbox::X_D3DRTYPE_CUBETEXTURE) + RC.PSTextureModes[i] = PS_TEXTUREMODES_CUBEMAP; + else + if (ActiveTextureTypes[i] == xbox::X_D3DRTYPE_VOLUMETEXTURE) + // TODO : Also do this for DepthBuffers (but not EmuXBFormatIsLinear!) : + // || EmuXBFormatIsDepthBuffer(GetXboxPixelContainerFormat(g_pXbox_SetTexture[i])) in { X_D3DFMT_D24S8, X_D3DFMT_F24S8, X_D3DFMT_D16, X_D3DFMT_F16} + RC.PSTextureModes[i] = PS_TEXTUREMODES_PROJECT3D; + else + RC.PSTextureModes[i] = PS_TEXTUREMODES_PROJECT2D; + break; + case PS_TEXTUREMODES_DOT_STR_3D: + case PS_TEXTUREMODES_DOT_STR_CUBE: + if (ActiveTextureTypes[i] == xbox::X_D3DRTYPE_CUBETEXTURE) + RC.PSTextureModes[i] = PS_TEXTUREMODES_DOT_STR_CUBE; + else + RC.PSTextureModes[i] = PS_TEXTUREMODES_DOT_STR_3D; + break; + } +/* Was : + switch (ActiveTextureTypes[i]) { + case xbox::X_D3DRTYPE_CUBETEXTURE: + switch (RC.PSTextureModes[i]) { + case PS_TEXTUREMODES_PROJECT2D: RC.PSTextureModes[i] = PS_TEXTUREMODES_CUBEMAP; break; + case PS_TEXTUREMODES_PROJECT3D: RC.PSTextureModes[i] = PS_TEXTUREMODES_CUBEMAP; break; + case PS_TEXTUREMODES_DOT_STR_3D: RC.PSTextureModes[i] = PS_TEXTUREMODES_DOT_STR_CUBE; break; + } break; + case xbox::X_D3DRTYPE_VOLUMETEXTURE: + switch (RC.PSTextureModes[i]) { + case PS_TEXTUREMODES_PROJECT2D: RC.PSTextureModes[i] = PS_TEXTUREMODES_PROJECT3D; break; + case PS_TEXTUREMODES_CUBEMAP: RC.PSTextureModes[i] = PS_TEXTUREMODES_PROJECT3D; break; + case PS_TEXTUREMODES_DOT_STR_CUBE: RC.PSTextureModes[i] = PS_TEXTUREMODES_DOT_STR_3D; break; + } break; + case xbox::X_D3DRTYPE_TEXTURE: + switch (RC.PSTextureModes[i]) { + case PS_TEXTUREMODES_PROJECT3D: RC.PSTextureModes[i] = PS_TEXTUREMODES_PROJECT2D; break; + case PS_TEXTUREMODES_CUBEMAP: RC.PSTextureModes[i] = PS_TEXTUREMODES_PROJECT2D; break; + } break; + case xbox::X_D3DRTYPE_NONE: + RC.PSTextureModes[i] = PS_TEXTUREMODES_NONE; + break; + } +*/ + } + } + + void AdjustFinalCombiner(DecodedRegisterCombiner &RC) + { + if (RC.hasFinalCombiner) return; + + // Since we're HLE'ing Xbox D3D, mimick how it configures the final combiner when PSDef doesn't : + // TODO : Use the same final combiner when no pixel shader is set! Possibly by generating a DecodedRegisterCombiner with PSCombinerCount zero? + if (RenderStateFogEnable) { + // Configure final combiner to perform this operation : + // if (X_D3DRS_SPECULARENABLE) r0.rgb = lerp(fog.rgb, r0.rgb + v1.rgb, fog.a); + // else r0.rgb = lerp(fog.rgb, r0.rgb , fog.a); + // r0.a = abs(r0.a); + RC.FinalCombiner.Input[0/*A*/].Reg = PS_REGISTER_FOG; + RC.FinalCombiner.Input[0/*A*/].Channel = PS_CHANNEL_ALPHA; + RC.FinalCombiner.Input[1/*B*/].Reg = RenderStateSpecularEnable ? PS_REGISTER_V1R0_SUM : PS_REGISTER_R0; + RC.FinalCombiner.Input[1/*B*/].Channel = PS_CHANNEL_RGB; // Note : Not really needed, should be 0 already + RC.FinalCombiner.Input[2/*C*/].Reg = PS_REGISTER_FOG; + RC.FinalCombiner.Input[2/*C*/].Channel = PS_CHANNEL_RGB; // Note : Not really needed, should be 0 already + RC.FinalCombiner.Input[6/*G*/].Reg = PS_REGISTER_R0; + RC.FinalCombiner.Input[6/*G*/].InputMapping = PS_INPUTMAPPING_UNSIGNED_IDENTITY; + RC.FinalCombiner.Input[6/*G*/].Channel = PS_CHANNEL_ALPHA; + } + else { + // if (X_D3DRS_SPECULARENABLE) r0.rgb = r0.rgb + v1.rgb; + // else r0.rgb = r0.rgb; + RC.FinalCombiner.Input[3/*D*/].Reg = RenderStateSpecularEnable ? PS_REGISTER_V1R0_SUM : PS_REGISTER_R0; + RC.FinalCombiner.Input[3/*D*/].Channel = PS_CHANNEL_RGB; // Note : Not really needed, should be 0 already + } + } + + void PerformRuntimeAdjustments(DecodedRegisterCombiner &RC) + { + AdjustTextureModes(RC); + AdjustFinalCombiner(RC); + } +} +CxbxPSDef; + +typedef struct _PSH_RECOMPILED_SHADER { + CxbxPSDef CompletePSDef; + IDirect3DPixelShader* ConvertedPixelShader; +} PSH_RECOMPILED_SHADER; + +PSH_RECOMPILED_SHADER CxbxRecompilePixelShader(CxbxPSDef &CompletePSDef) +{ + DecodedRegisterCombiner RC; + RC.Decode(&(CompletePSDef.PSDef)); + CompletePSDef.PerformRuntimeAdjustments(RC); + + ID3DBlob *pShader = nullptr; + EmuCompilePixelShader(&RC, &pShader); + + PSH_RECOMPILED_SHADER Result; + Result.CompletePSDef = CompletePSDef; + Result.ConvertedPixelShader = nullptr; + if (pShader) { + DWORD *pFunction = (DWORD*)pShader->GetBufferPointer(); + if (pFunction) { + DWORD hRet = g_pD3DDevice->CreatePixelShader(pFunction, &(Result.ConvertedPixelShader)); + if (hRet != D3D_OK) { + printf(D3DErrorString(hRet)); + } + } + pShader->Release(); + } + + return Result; +} // CxbxRecompilePixelShader std::vector g_RecompiledPixelShaders; -bool ArePSDefsIdentical(const xbox::X_D3DPIXELSHADERDEF &PSDef1, const xbox::X_D3DPIXELSHADERDEF &PSDef2) -{ - // Only compare the [*]-marked members, which forms the unique shader declaration (ignore the constants and Xbox Direct3D8 run-time fields) : - // [*] DWORD PSAlphaInputs[8]; // X_D3DRS_PSALPHAINPUTS0..X_D3DRS_PSALPHAINPUTS7 : Alpha inputs for each stage - // [*] DWORD PSFinalCombinerInputsABCD; // X_D3DRS_PSFINALCOMBINERINPUTSABCD : Final combiner inputs - // [*] DWORD PSFinalCombinerInputsEFG; // X_D3DRS_PSFINALCOMBINERINPUTSEFG : Final combiner inputs (continued) - if (memcmp(&(PSDef1.PSAlphaInputs[0]), &(PSDef2.PSAlphaInputs[0]), (8 + 1 + 1) * sizeof(DWORD)) != 0) - return false; - - // [-] DWORD PSConstant0[8]; // X_D3DRS_PSCONSTANT0_0..X_D3DRS_PSCONSTANT0_7 : C0 for each stage - // [-] DWORD PSConstant1[8]; // X_D3DRS_PSCONSTANT1_0..X_D3DRS_PSCONSTANT1_7 : C1 for each stage - // [*] DWORD PSAlphaOutputs[8]; // X_D3DRS_PSALPHAOUTPUTS0..X_D3DRS_PSALPHAOUTPUTS7 : Alpha output for each stage - // [*] DWORD PSRGBInputs[8]; // X_D3DRS_PSRGBINPUTS0..X_D3DRS_PSRGBINPUTS7 : RGB inputs for each stage - // [*] DWORD PSCompareMode; // X_D3DRS_PSCOMPAREMODE : Compare modes for clipplane texture mode - if (memcmp(&(PSDef1.PSAlphaOutputs[0]), &(PSDef2.PSAlphaOutputs[0]), (8 + 8 + 1) * sizeof(DWORD)) != 0) - return false; - - // [-] DWORD PSFinalCombinerConstant0; // X_D3DRS_PSFINALCOMBINERCONSTANT0 : C0 in final combiner - // [-] DWORD PSFinalCombinerConstant1; // X_D3DRS_PSFINALCOMBINERCONSTANT1 : C1 in final combiner - // [*] DWORD PSRGBOutputs[8]; // X_D3DRS_PSRGBOUTPUTS0..X_D3DRS_PSRGBOUTPUTS7 : Stage 0 RGB outputs - // [*] DWORD PSCombinerCount; // X_D3DRS_PSCOMBINERCOUNT : Active combiner count (Stages 0-7) - // [*] DWORD PSTextureModes; // X_D3DRS_PS_RESERVED (copied from out-of-range X_D3DRS_PSTEXTUREMODES) : Texture addressing modes - // [*] DWORD PSDotMapping; // X_D3DRS_PSDOTMAPPING : Input mapping for dot product modes - // [*] DWORD PSInputTexture; // X_D3DRS_PSINPUTTEXTURE : Texture source for some texture modes - if (memcmp(&(PSDef1.PSRGBOutputs[0]), &(PSDef2.PSRGBOutputs[0]), (8 + 1 + 1 + 1 + 1) * sizeof(DWORD)) != 0) - return false; - - // [-] DWORD PSC0Mapping; // Mapping of c0 regs to D3D constants - // [-] DWORD PSC1Mapping; // Mapping of c1 regs to D3D constants - // [-] DWORD PSFinalCombinerConstants; // Final combiner constant mapping - return true; -} +// Mapping indices of Xbox register combiner constants to host pixel shader constants; +// The first 16 are identity-mapped (C0_1 .. C0_7 are C0 .. C7 on host, C1_0 .. C1_7 are C8 .. C15 on host) : +constexpr int PSH_XBOX_CONSTANT_C0 = 0; // = 0..15 +// Then two final combiner constants : +constexpr int PSH_XBOX_CONSTANT_FC0 = PSH_XBOX_CONSTANT_C0 + PSH_XBOX_MAX_C_REGISTER_COUNT; // = 16 +constexpr int PSH_XBOX_CONSTANT_FC1 = PSH_XBOX_CONSTANT_FC0 + 1; // = 17 +// Fog requires a constant (as host PS1.4 doesn't support the FOG register) +constexpr int PSH_XBOX_CONSTANT_FOG = PSH_XBOX_CONSTANT_FC1 + 1; // = 18 +// Bump Environment Material registers +constexpr int PSH_XBOX_CONSTANT_BEM = PSH_XBOX_CONSTANT_FOG + 1; // = 19..22 +// Bump map Luminance registers +constexpr int PSH_XBOX_CONSTANT_LUM = PSH_XBOX_CONSTANT_BEM + 4; // = 23..26 +// This concludes the set of constants that need to be set on host : +constexpr int PSH_XBOX_CONSTANT_MAX = PSH_XBOX_CONSTANT_LUM + 4; // = 27 void DxbxUpdateActivePixelShader() // NOPATCH { + xbox::X_D3DPIXELSHADERDEF *pPSDef; + PSH_RECOMPILED_SHADER *RecompiledPixelShader; + IDirect3DPixelShader *CurrentPixelShader = nullptr; + int i; + D3DCOLOR dwColor; + D3DXCOLOR fColor[PSH_XBOX_CONSTANT_MAX]; + + HRESULT Result = D3D_OK; + // The first RenderState is PSAlpha, // The pixel shader is stored in pDevice->m_pPixelShader // For now, we still patch SetPixelShader and read from there... @@ -5901,50 +634,47 @@ void DxbxUpdateActivePixelShader() // NOPATCH // manually read from D3D__RenderState[X_D3DRS_PSTEXTUREMODES] for that one field. // See D3DDevice_SetPixelShaderCommon which implements this - const xbox::X_D3DPIXELSHADERDEF *pPSDef = g_pXbox_PixelShader != nullptr ? (xbox::X_D3DPIXELSHADERDEF*)(XboxRenderStates.GetPixelShaderRenderStatePointer()) : nullptr; - - if (pPSDef != nullptr) - { + pPSDef = g_pXbox_PixelShader != nullptr ? (xbox::X_D3DPIXELSHADERDEF*)(XboxRenderStates.GetPixelShaderRenderStatePointer()) : nullptr; + if (pPSDef == nullptr) { + g_pD3DDevice->SetPixelShader(nullptr); + return; + } + // Create a copy of the pixel shader definition, as it is residing in render state register slots : - xbox::X_D3DPIXELSHADERDEF PSDefCopy = *pPSDef; + CxbxPSDef CompletePSDef; + CompletePSDef.PSDef = *pPSDef; // Copy-in the PSTextureModes value which is stored outside the range of Xbox pixel shader render state slots : - PSDefCopy.PSTextureModes = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES); + CompletePSDef.PSDef.PSTextureModes = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES); + // Fetch all other values that are used in the IsEquivalent check : + CompletePSDef.SnapshotRuntimeVariables(); - const PSH_RECOMPILED_SHADER* RecompiledPixelShader = nullptr; - - // Now, see if we already have a shader compiled for this declaration : + // Now, see if we already have a shader compiled for this definition : // TODO : Change g_RecompiledPixelShaders into an unordered_map, hash just the identifying PSDef members, and add cache eviction (clearing host resources when pruning) - for (const auto& it : g_RecompiledPixelShaders) { - if (ArePSDefsIdentical(it.PSDef, PSDefCopy)) { - RecompiledPixelShader = ⁢ + RecompiledPixelShader = nullptr; + for (auto it = g_RecompiledPixelShaders.begin(); it != g_RecompiledPixelShaders.end(); ++it) { + if (CompletePSDef.IsEquivalent(it->CompletePSDef)) { + RecompiledPixelShader = &(*it); break; } } + // If none was found, recompile this shader and remember it : if (RecompiledPixelShader == nullptr) { // Recompile this pixel shader : - g_RecompiledPixelShaders.push_back(DxbxRecompilePixelShader(&PSDefCopy)); - RecompiledPixelShader = &g_RecompiledPixelShaders.back(); + g_RecompiledPixelShaders.push_back(CxbxRecompilePixelShader(CompletePSDef)); + RecompiledPixelShader = &g_RecompiledPixelShaders.back(); } // Switch to the converted pixel shader (if it's any different from our currently active // pixel shader, to avoid many unnecessary state changes on the local side). - IDirect3DPixelShader* ConvertedPixelShaderHandle = RecompiledPixelShader->ConvertedHandle; - - Microsoft::WRL::ComPtr CurrentPixelShader; - g_pD3DDevice->GetPixelShader(/*out*/CurrentPixelShader.GetAddressOf()); - if (CurrentPixelShader.Get() != ConvertedPixelShaderHandle) - g_pD3DDevice->SetPixelShader(ConvertedPixelShaderHandle); - - // TODO: Figure out a method to forward the vertex-shader oFog output to the pixel shader FOG input register : - // We could use the unused oT4.x to output fog from the vertex shader, and read it with 'texcoord t4' in pixel shader! - // For now, we still disable native fog if pixel shader is said to handle it, this prevents black screen issues in titles using pixel shader fog. - // NOTE: Disabled: This breaks fog in XDK samples such as DolphinClassic. -#if-0 - if ((RecompiledPixelShader->PSDef.PSFinalCombinerInputsABCD > 0) || (RecompiledPixelShader->PSDef.PSFinalCombinerInputsEFG > 0)) { - g_pD3DDevice->SetRenderState(D3DRS_FOGENABLE, FALSE); + g_pD3DDevice->GetPixelShader(/*out*/&CurrentPixelShader); + if (CurrentPixelShader != RecompiledPixelShader->ConvertedPixelShader) { + g_pD3DDevice->SetPixelShader(RecompiledPixelShader->ConvertedPixelShader); + } + if (CurrentPixelShader) { + CurrentPixelShader->Release(); + CurrentPixelShader = nullptr; } -#endif //PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]; //PSH_XBOX_SHADER::GetPSTextureModes(pPSDef, psTextureModes); @@ -5967,2155 +697,55 @@ void DxbxUpdateActivePixelShader() // NOPATCH // Set constants, not based on g_PixelShaderConstants, but based on // the render state slots containing the pixel shader constants, // as these could have been updated via SetRenderState or otherwise : - D3DXCOLOR fColor[PSH_XBOX_CONSTANT_MAX]; - for (int i = 0; i < PSH_XBOX_CONSTANT_MAX; i++) - { - // Assume all constants are in use (this is much easier than tracking them for no other purpose than to skip a few here) - // Read the color from the corresponding render state slot : - switch (i) { - case PSH_XBOX_CONSTANT_FOG: - // Note : FOG.RGB is correct like this, but FOG.a should be coming - // from the vertex shader (oFog) - however, D3D8 does not forward this... - fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGCOLOR); - break; - case PSH_XBOX_CONSTANT_FC0: - fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT0); - break; - case PSH_XBOX_CONSTANT_FC1: - fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT1); - break; - case PSH_XBOX_CONSTANT_BEM + 0: - case PSH_XBOX_CONSTANT_BEM + 1: - case PSH_XBOX_CONSTANT_BEM + 2: - case PSH_XBOX_CONSTANT_BEM + 3: - { - int stage = i - PSH_XBOX_CONSTANT_BEM; - DWORD* value = (DWORD*)&fColor[i]; - - g_pD3DDevice->GetTextureStageState(stage, D3DTSS_BUMPENVMAT00, &value[0]); - g_pD3DDevice->GetTextureStageState(stage, D3DTSS_BUMPENVMAT01, &value[1]); - g_pD3DDevice->GetTextureStageState(stage, D3DTSS_BUMPENVMAT11, &value[2]); - g_pD3DDevice->GetTextureStageState(stage, D3DTSS_BUMPENVMAT10, &value[3]); + for (i = 0; i < PSH_XBOX_CONSTANT_MAX; i++) { + // Assume all constants are in use (this is much easier than tracking them for no other purpose than to skip a few here) + // Read the color from the corresponding render state slot : + switch (i) { + case PSH_XBOX_CONSTANT_FOG: + // Note : FOG.RGB is correct like this, but FOG.a should be coming + // from the vertex shader (oFog) - however, D3D8 does not forward this... + fColor[i] = dwColor = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGCOLOR); break; - } - case PSH_XBOX_CONSTANT_LUM + 0: - case PSH_XBOX_CONSTANT_LUM + 1: - case PSH_XBOX_CONSTANT_LUM + 2: - case PSH_XBOX_CONSTANT_LUM + 3: - { - int stage = i - PSH_XBOX_CONSTANT_LUM; - DWORD* value = (DWORD*)&fColor[i]; - - g_pD3DDevice->GetTextureStageState(stage, D3DTSS_BUMPENVLSCALE, &value[0]); - g_pD3DDevice->GetTextureStageState(stage, D3DTSS_BUMPENVLOFFSET, &value[1]); - value[2] = 0; - value[3] = 0; + case PSH_XBOX_CONSTANT_FC0: + fColor[i] = dwColor = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT0); break; + case PSH_XBOX_CONSTANT_FC1: + fColor[i] = dwColor = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT1); + break; + case PSH_XBOX_CONSTANT_BEM + 0: + case PSH_XBOX_CONSTANT_BEM + 1: + case PSH_XBOX_CONSTANT_BEM + 2: + case PSH_XBOX_CONSTANT_BEM + 3: + { + int stage_nr = i - PSH_XBOX_CONSTANT_BEM; + DWORD* value = (DWORD*)&fColor[i]; + + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT00, &value[0]); // Maps to BEM[stage].x + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT01, &value[1]); // Maps to BEM[stage].y + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT10, &value[2]); // Maps to BEM[stage].z + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT11, &value[3]); // Maps to BEM[stage].w + break; + } + case PSH_XBOX_CONSTANT_LUM + 0: + case PSH_XBOX_CONSTANT_LUM + 1: + case PSH_XBOX_CONSTANT_LUM + 2: + case PSH_XBOX_CONSTANT_LUM + 3: + { + int stage_nr = i - PSH_XBOX_CONSTANT_LUM; + DWORD* value = (DWORD*)&fColor[i]; + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVLSCALE, &value[0]); // Maps to LUM[stage].x + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVLOFFSET, &value[1]); // Maps to LUM[stage].y + value[2] = 0; + value[3] = 0; + break; + } + default: // PSH_XBOX_CONSTANT_C0..C15 are stored as-is in (and should thus be read from) the Xbox render state pixel shader constant slots + unsigned constant_nr = i - PSH_XBOX_CONSTANT_C0; + fColor[i] = dwColor = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSCONSTANT0_0 + constant_nr); // Note : 0xAARRGGBB format + break; } - default: // PSH_XBOX_CONSTANT_C0..C15 are stored as-is in (and should thus be read from) the Xbox render state pixel shader constant slots - fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSCONSTANT0_0 + i - PSH_XBOX_CONSTANT_C0); - break; - } } // Set all host constant values using a single call: - g_pD3DDevice->SetPixelShaderConstantF(0, reinterpret_cast(fColor), PSH_XBOX_CONSTANT_MAX); - // Note PSH_XBOX_CONSTANT_MUL0 and PSH_XBOX_CONSTANT_MUL1 fall outside PSH_XBOX_CONSTANT_MAX - // and have already been 'PO_DEF'ined at the start of ConvertConstantsToNative - } - else - { - g_pD3DDevice->SetPixelShader(nullptr); - } -} - -// End of Dxbx code - -#define REVEL8N_PIXEL_SHADER_CHANGES - -// help functions -char *pCodeBuffer=nullptr; - -void WriteCode(const char *str, ...) -{ - char szBuffer[256]; - va_list argp; - - va_start(argp, str); - vsprintf(szBuffer, str, argp); - va_end(argp); - - //printf("%s", szBuffer); - if(pCodeBuffer) - strcat(pCodeBuffer, szBuffer); -} - -void InsertString(char *szStr, int iOffset, char *szInsert, int iInsertLen, int iRemoveLen); - -inline void HandleInputOutput -( - DWORD dwInput, - DWORD dwOutput, - BOOL bAlpha, - int iCStage, - BOOL bUniqueC0, - BOOL bUniqueC1, - int *iPSC0, - int *iPSC1, - - BOOL bGlobalRGBA, - - BOOL bFinalCombiner -); - -inline void GetRegister -( - WORD wRegister, - char *szRegister, - BOOL bUniqueC0, - BOOL bUniqueC1, - int iCStage, - int *pPSC0, - int *pPSC1 -); - -inline void GetInputMapping(WORD wInputMapping, char *szInputMapping, char *szInputMappingAfter, char *szConst); -inline void GetChannel(WORD wInputChannel, char *szInput, BOOL bAlpha, BOOL bGlobalRGBA); - -inline void GetOutputFlags -( - WORD wOutputFlags, - char *szInstMod, - char *szABOp, - char *szCDOp, - char *szABCDOp, - - BOOL *bAB_BA, - BOOL *bCD_BA, - - BOOL *bShl1Bias, - BOOL *bBias -); - -//inline BOOL CheckOpForMov(char *szOp, char *szInputs1, char *szInput2, char *szRegInput); -inline BOOL OptimizeOperation -( - char *szOp, - char *szOp1, - - char *szOp2, - char *szMod, - - char *szInputAB1, - char *szInputAB2, - - char *szInputCD1, - char *szInputCD2, - - char *szConstRegAB1, - char *szConstRegAB2, - char *szConstRegCD1, - char *szConstRegCD2, - - char *szOutAB, - char *szOutCD, - char *szABCDOutput, - - char *szCommand -); - -inline void ClearConstRegVars(); -inline void CorrectConstToReg(char *szConst, int *pPSC0, int *pPSC1); - -int iPreRunLen=0; - -// This is set to true if an operation tries to read from r0 -// before r0 was written, in that case we do the same as the xbox -// we write the value of t0.a to r0 ;-) -BOOL bR0WAccess=FALSE; -BOOL bR0Written=FALSE; -BOOL bR0AWritten=FALSE; -/* -BOOL bR1WAccess=FALSE; -BOOL bR1AWAccess=FALSE; -BOOL bR1RGBWAccess=FALSE; - -BOOL bR1AWritten=FALSE; -BOOL bR1RGBWritten=FALSE; -BOOL bR1Written=FALSE; -*/ -BOOL bR0AlphaOutput = FALSE; - -BOOL bLastOpRGB = FALSE; - -BOOL bEFProduct = FALSE; -BOOL bV1R0Reg = FALSE; - -#define DEF_VAR_TABLE_LEN 7 -char szVar[][10] = -{ - "r0", - "r1", - "t0", - "t1", - "t2", - "t3", - "t4" -}; - -inline void HandleInputOutput -( - DWORD dwInput, - DWORD dwOutput, - BOOL bAlpha, - int iCStage, - BOOL bUniqueC0, - BOOL bUniqueC1, - int *iPSC0, - int *iPSC1, - - BOOL bGlobalRGBA, - - BOOL bFinalCombiner -) -{ - // INPUTS - if(bFinalCombiner) printf("\npPSD.PSFinalCombinerInputsABCD = PS_COMBINERINPUTS(\n"); - else if(bAlpha) printf("\npPSD.PSAlphaInputs[%d] = PS_COMBINERINPUTS(\n", iCStage); - else printf("\npPSD.PSRGBInputs[%d] = PS_COMBINERINPUTS(\n", iCStage); - - WORD wCombinerInputs[4]; // 0=a, 1=b, 2=c, 3=d - wCombinerInputs[0] = (WORD) ((dwInput>>24) & 0xFF); - wCombinerInputs[1] = (WORD) ((dwInput>>16) & 0xFF); - wCombinerInputs[2] = (WORD) ((dwInput>> 8) & 0xFF); - wCombinerInputs[3] = (WORD) ( dwInput & 0xFF); - - char szInput[4][20] = {0}; - char szConst[4][20] = {0}; - char szInputMapping[4][20] = {0}; - char szInputMappingAfter[4][20] = {0}; - char szChannels[4][5] = {0}; - - // Go through inputs - int i=0; - for(i=0; i<4; i++) - { - szInput[i][0]=0x00; // Fast way to zero a string ;-) - szConst[i][0]=0x00; - szInputMapping[i][0]=0x00; - szInputMappingAfter[i][0]=0x00; - szChannels[i][0]=0x00; - - GetRegister(wCombinerInputs[i] & 0xF, szInput[i], bUniqueC0, bUniqueC1, iCStage, iPSC0, iPSC1); - - if(strcmp(szInput[i], "r0")==0) - { - if(!bR0AWritten) - strcpy(szInput[i], "t0"); - - if(!bR0Written) { - strcpy(szInput[i], "t0"); - //bR0WAccess=TRUE; - } - } - - printf(" | "); - GetInputMapping(wCombinerInputs[i] & 0x1E0, szInputMapping[i], szInputMappingAfter[i], szConst[i]); - printf(" | "); - GetChannel(wCombinerInputs[i] & 0x10, szChannels[i], bAlpha, bGlobalRGBA); - printf(",\n"); - - if((wCombinerInputs[i] & 0xF)==0x00) - szInput[i][0]=0x00; - - // 6928: check this as I doubt whether it works really like that - /*if(strcmp(szInput[i], "r1")==0) - { - // EmuLog(LOG_LEVEL::DEBUG, "channel: %s", szChannels[i]); - // Sleep(3000); - - if((strcmp(szChannels[i], ".a")==0) && (!bR1AWritten)) { - bR1AWAccess=TRUE; - - strcpy(szInput[i], " t1"); - } else if((strcmp(szChannels[i], ".rgb")==0) && (!bR1RGBWritten)) { - bR1RGBWAccess=TRUE; - - strcpy(szInput[i], " t1"); - } else if(!bR1Written) { - bR1WAccess=TRUE; - - strcpy(szInput[i], " t1"); - } - - if(bR1AWAccess && bR1RGBWAccess) - bR1WAccess=TRUE; - - //if(bR1AWAccess || bR1RGBWAccess) - // strcpy(szInput[i], "t1"); - }*/ - - //printf("\n*** szInput[%d]: %s\n", i, szInput[i]); - } - - // Input stuff - BOOL bInput[4] = {0, 0, 0, 0}; - if(szInput[0][0]) bInput[0]=TRUE; - if(szInput[1][0]) bInput[1]=TRUE; - if(szInput[2][0]) bInput[2]=TRUE; - if(szInput[3][0]) bInput[3]=TRUE; - -#ifdef REVEL8N_PIXEL_SHADER_CHANGES - // Correct param if a constant is used! - if(!bInput[0]) - CorrectConstToReg(szConst[0], iPSC0, iPSC1); - if(!bInput[1]) - CorrectConstToReg(szConst[1], iPSC0, iPSC1); - if(!bInput[2]) - CorrectConstToReg(szConst[2], iPSC0, iPSC1); - if(!bInput[3]) - CorrectConstToReg(szConst[3], iPSC0, iPSC1); - - bool bEmptyChannel = false; -#endif - - char szCompleteInput[4][20] = {0}; - for(i=0; i<4; i++) - { - strcpy(szCompleteInput[i], szInputMapping[i]); -#ifdef REVEL8N_PIXEL_SHADER_CHANGES - if(bInput[i]) - { -#endif - strcat(szCompleteInput[i], szInput[i]); -#ifdef REVEL8N_PIXEL_SHADER_CHANGES - bEmptyChannel = bEmptyChannel || (szChannels[i][0] == 0); - } - else - strcat(szCompleteInput[i], &szConst[i][4]); -#endif - strcat(szCompleteInput[i], szInputMappingAfter[i]); - strcat(szCompleteInput[i], szChannels[i]); - } - - printf(");\n"); - - if(!bFinalCombiner) - { - // OUTPUTS - if(bAlpha) printf("\npPSD.PSAlphaOutputs[%d] = PS_COMBINEROUTPUTS(\n", iCStage); - else printf("\npPSD.PSRGBOutputs[%d] = PS_COMBINEROUTPUTS(\n", iCStage); - - WORD wCombinerOutputs[3]; // 0=d0 (ab), 1=d1 (cd), 2=d2 (mux_sum) - wCombinerOutputs[0] = (WORD) ((dwOutput>> 4) & 0xF); - wCombinerOutputs[1] = (WORD) ( dwOutput & 0xF); - wCombinerOutputs[2] = (WORD) ((dwOutput>> 8) & 0xF); - WORD wCombinerOutputFlags = (WORD) ((dwOutput>>12) & 0xFF); - - char szOutput[3][10] = {0}; - char szOutputMod[10]="\0"; - - char szABOp[10]="\0"; - char szCDOp[10]="\0"; - char szABCDOp[10]="\0"; - - BOOL bAB_B2A; - BOOL bCD_B2A; - - BOOL bR0Now = FALSE; - BOOL bR0ANow = FALSE; - BOOL bVAccess[3] = {0,0,0}; - - BOOL bOpRGB_Current = FALSE; - BOOL bCurrOpRealAlpha = FALSE; - - // Go through outputs - for(i=0; i<3; i++) - { - szOutput[i][0]=0x00; // Fast way to zero a string ;-) - - GetRegister(wCombinerOutputs[i], szOutput[i], bUniqueC0, bUniqueC1, iCStage, iPSC0, iPSC1); - if(strcmp(szOutput[i], "r0")==0) - { - bR0Now=TRUE; - - // this checks for output to r0.a - if(bGlobalRGBA || (!bGlobalRGBA && bAlpha)) - bR0AlphaOutput=TRUE; - } - - if((strcmp(szOutput[i], "v0")==0) || (strcmp(szOutput[i], "v1")==0)) { bVAccess[i] = TRUE; } - - /*BOOL bR1_Written = FALSE; - if(strcmp(szOutput[i], "r1")==0) - bR1_Written=TRUE;*/ - - // check channel! - if(!bGlobalRGBA && bAlpha) - { - strcat(szOutput[i], ".a"); - bCurrOpRealAlpha = TRUE; - - if(bR0Now) - bR0ANow=TRUE; - - /*if(bR1_Written) - bR1AWritten=TRUE;*/ - } - else if(!bGlobalRGBA && !bAlpha -#ifdef REVEL8N_PIXEL_SHADER_CHANGES - && !bEmptyChannel -#endif - ) - { - strcat(szOutput[i], ".rgb"); - - if(wCombinerOutputs[i]) - bOpRGB_Current = TRUE; - - /*if(bR1_Written) - bR1RGBWritten=TRUE;*/ - } - else - { - /*if(bR1_Written) - bR1Written=TRUE;*/ - - if(bR0Now) - bR0ANow=TRUE; - } - - printf(",\n"); - - if(wCombinerOutputs[i]==0x00) - szOutput[i][0]=0x00; - - //printf("\n*** szOutput[%d]: %s\n", i, szOutput[i]); - } - - BOOL bBias=FALSE; - BOOL bSh1Bias=FALSE; - - GetOutputFlags( - wCombinerOutputFlags, - szOutputMod, - - szABOp, - szCDOp, - szABCDOp, - - &bAB_B2A, - &bCD_B2A, - - &bSh1Bias, - &bBias); - - if(bR0Now) - bR0Written=TRUE; - - if(bR0ANow) - bR0AWritten=TRUE; - - printf(");\n"); - - // Find output for the operations - char szOut[10]="\0"; - char szOut1[10]="\0"; - - //printf("|****| %s |****|\n", szOutput[1]); - - if(szOutput[0][0]) - strcpy(szOut, szOutput[0]); - if(szOutput[1][0]) - strcpy(szOut1, szOutput[1]); - -#ifndef REVEL8N_PIXEL_SHADER_CHANGES - if(szOutput[2][0]) - { - /* - //EmuWarningMsg("THIS IS WRONG, FIX ME!"); - //if(!szOutput[1][0]) - // strcpy(szOut1, szOutput[2]); - EmuLog(LOG_LEVEL::DEBUG, "(!szOutput[0][0] || !szOutput[1][0]) && szOutput[2][0] = TRUE!"); - - BOOL bUsable=TRUE; - for(i=2; i<4; i++) - { - if((strcmp(szOutput[2], szInput[i])==0) || (strcmp(szOutput[2], szOut1)==0)) { - bUsable=FALSE; - } - } - if(bUsable && !szOutput[0][0]) - { - - strcpy(szOut, szOutput[2]); - - EmuLog(LOG_LEVEL::DEBUG, "BUsable = TRUE, new output: %s", szOut); - - } - else { - printf("!WARNING!: The operation uses the output register also as input!" - "Trying to find a free output register. It is possible that the pixel shader " - "will generate garbage because the new free one contains data used " - "in an other comming operation!\n\n"); - - for(int j=0; j> 24) & 0xFF); - wEFG[1] = (WORD) ((dwOutput >> 16) & 0xFF); - wEFG[2] = (WORD) ((dwOutput >> 8) & 0xFF); - - BOOL bInputEFG[3] = {0, 0, 0}; - char szCompleteInputEFG[3][10]; - - char szInputEFG[3][10]; - char szInputMappingEFG[3][10]; - char szInputMappingAfterEFG[3][10]; - char szConstEFG[3][10]; - - for(i=0; i<3; i++) - { - szInputEFG[i][0]=0x00; - szInputMappingEFG[i][0]=0x00; - szInputMappingAfterEFG[i][0]=0x00; - szConstEFG[i][0]=0x00; - - GetRegister(wEFG[i] & 0xF, szInputEFG[i], bUniqueC0, bUniqueC1, 0, iPSC0, iPSC1); - printf(" | "); - GetInputMapping(wEFG[i] & 0x1E0, szInputMappingEFG[i], szInputMappingAfterEFG[i], szConstEFG[i]); - printf(" | "); - GetChannel(wEFG[i] & 0x10, szInputEFG[i], bAlpha, FALSE); - printf(", \n"); - - strcpy(szCompleteInputEFG[i], szInputMappingEFG[i]); - strcat(szCompleteInputEFG[i], szInputEFG[i]); - strcat(szCompleteInputEFG[i], szInputMappingAfterEFG[i]); - - if(szInputEFG[i][0]) - bInputEFG[i]=TRUE; - else - { - // add that constant as a reg - CorrectConstToReg(szConstEFG[i], iPSC0, iPSC1); - } - } - - if(dwV1R0_EFProd_Flags & 0x20) - printf("PS_FINALCOMBINERSETTINGS_COMPLEMENT_R0"); - else if(dwV1R0_EFProd_Flags & 0x40) - printf("PS_FINALCOMBINERSETTINGS_COMPLEMENT_V1"); - else if(dwV1R0_EFProd_Flags & 0x80) - printf("PS_FINALCOMBINERSETTINGS_CLAMP_SUM"); - else - printf("0"); - - printf(");\n"); - - if (bV1R0Reg) - { - char sMod[10] = {0}; - char sV1[10] = {0}; - char sR0[10] = {0}; - if(dwV1R0_EFProd_Flags & 0x20) - strcpy(sR0, "1-"); - else if(dwV1R0_EFProd_Flags & 0x40) - strcpy(sV1, "1-"); - else if(dwV1R0_EFProd_Flags & 0x80) - strcpy(sMod, "_sat"); - - if (bEFProduct) - { - EmuLog(LOG_LEVEL::WARNING, "EF Product and V1R0 register used at the same time!"); - } - else - { - WriteCode("; (v1 + r0)\nadd%s r0, %sr0, %sv1\n\n", sMod, sR0, sV1); - } - } - - // only we we will use this later in final combiner stuff!! - // all inputs are known now, so check: - if(bEFProduct) { - - // r0 = E * F (E or F must be the r0 calculated before otherwise the stage results - // are lost, problem??? - if(! - ((!bInputEFG[0] && szConstEFG[0][0]=='0') && - (!bInputEFG[1] && szConstEFG[1][0]=='0'))) { - WriteCode(";E * F\nmul r0, %s, %s\n\n", bInputEFG[0] ? szCompleteInputEFG[0] : &szConstEFG[0][4], - bInputEFG[1] ? szCompleteInputEFG[1] : &szConstEFG[1][4]); - } - - } - - // Now the result: - - // What is done by the final combiner: - // final color = s0*s1 + (1-s0)*s2 + s3 - - // lrp r0, s0, s1, s2 - // add r0, r0, s3 - // s0 = szInput[0] - // s1 = szInput[1] - // s2 = szInput[2] - // s3 = szInput[3] - - // Check whether it is a mov r0, r0 - // for example: lrp r0, 1, r0, 0 - // r0 = 1*r0 + (1-1)*r0 + 0 - // --> r0 = r0 - - for(i=0; i<4; i++) - { - if(!bInput[i]) - CorrectConstToReg(szConst[i], iPSC0, iPSC1); - } - - if(!((!bInput[0]) && (szConst[0][0] == '1') && (strncmp(szCompleteInput[1], "r0", 2)==0))) - { - // cases for s2 - // s2 == 0 --> final color = s0*s1 + s3 - if((!bInput[2]) && (szConst[2][0] == '0')) - { - WriteCode("mul r0.rgb, %s, %s\n", - bInput[0] ? szCompleteInput[0] : &szConst[0][4], - bInput[1] ? szCompleteInput[1] : &szConst[1][4]); - } - // s0 == 0 --> final color = s2 + s3 - else if((!bInput[0]) && (szConst[0][0] == '0')) { - // Check whether s2 is r0!!! - if(!(bInput[2] && (strncmp(szCompleteInput[2], "r0", 2)==0))) - WriteCode("mov r0.rgb, %s\n", - bInput[2] ? szCompleteInput[2] : &szConst[2][4]); - } - // s0 == 1 --> final color = s1 + s3 - else if((!bInput[0]) && (szConst[0][0] == '1')) { - // Check whether s1 is r0!!! - if(!(bInput[1] && (strncmp(szCompleteInput[1], "r0", 2)==0))) - WriteCode("mov r0.rgb, %s\n", - bInput[1] ? szCompleteInput[1] : &szConst[1][4]); - } - // no special cases - else if(bInput[2] || bInput[0]) - { - WriteCode("lrp r0.rgb, %s, %s, %s\n", - bInput[0] ? szCompleteInput[0] : &szConst[0][4], - bInput[1] ? szCompleteInput[1] : &szConst[1][4], - bInput[2] ? szCompleteInput[2] : &szConst[2][4]); - } - } - // case for s3 - if(bInput[3] || (szConst[3][0] != '0')) - WriteCode("add r0.rgb, r0, %s\n", bInput[3] ? szCompleteInput[3] : &szConst[3][4]); - - // Alpha ouput (G) - if(bInputEFG[2] && (strncmp(szInputEFG[2], "r0", 2)!=0)) - { - bR0AlphaOutput=TRUE; - - WriteCode("mov r0.a, %s\n", - bInputEFG[2] ? szCompleteInputEFG[2] : &szConstEFG[2][4]); - } - - //else - // WriteCode("mov r0.a, v0.a\n"); - //*/ - //Sleep(3000); - } -} - -inline void GetRegister(WORD wRegister, char *szRegister, BOOL bUniqueC0, BOOL bUniqueC1, int iCStage, int *iPSC0, int *iPSC1) -{ - // Determine register - switch(wRegister) - { - case 0x00: - printf("PS_REGISTER_ZERO"); - break; - case 0x01: // read - printf("PS_REGISTER_C0"); - if(bUniqueC0) - sprintf(szRegister, "c%d", iPSC0[iCStage]); - else - strcpy(szRegister, "c0"); - break; - case 0x02: // read - printf("PS_REGISTER_C1"); - if(bUniqueC0) - sprintf(szRegister, "c%d", iPSC1[iCStage]); - else - strcpy(szRegister, "c1"); - break; - case 0x03: // read - { - printf("PS_REGISTER_FOG"); - - char szOneHalf[40] = "0.5\0"; - CorrectConstToReg(szOneHalf, iPSC0, iPSC1); - - strcpy(szRegister, &szOneHalf[4]); // Unsupported - break; - } - case 0x04: // read/(write ???) - printf("PS_REGISTER_V0"); - strcpy(szRegister, "v0"); - break; - case 0x05: // read/(write ???) - printf("PS_REGISTER_V1"); - strcpy(szRegister, "v1"); - break; - case 0x08: // read/write - printf("PS_REGISTER_T0"); - strcpy(szRegister, "t0"); - //strcpy(szRegister, "r2"); - break; - case 0x09: // read/write - printf("PS_REGISTER_T1"); - strcpy(szRegister, "t1"); - //strcpy(szRegister, "r3"); - break; - case 0x0A: // read/write - printf("PS_REGISTER_T2"); - strcpy(szRegister, "t2"); - //strcpy(szRegister, "r4"); - break; - case 0x0B: // read/write - printf("PS_REGISTER_T3"); - strcpy(szRegister, "t3"); - //strcpy(szRegister, "r5"); - break; - case 0x0C: // read/write - printf("PS_REGISTER_R0"); - strcpy(szRegister, "r0"); - break; - case 0x0D: // read/write - printf("PS_REGISTER_R1"); - strcpy(szRegister, "r1"); - break; - case 0x0E: // read - printf("PS_REGISTER_V1R0_SUM"); - - bV1R0Reg = TRUE; - strcpy(szRegister, "r0"); //"V1R0");//(v1+r0)"); - break; - case 0x0F: - printf("PS_REGISTER_EF_PROD"); - - // we save it in r0 - bEFProduct = TRUE; - strcpy(szRegister, "r0");/* e * f --> combiner input */ - break; - default: - printf("/*Unknown register %d*/", wRegister); - break; - } -} - -inline void GetInputMapping(WORD wInputMapping, char *szInputMapping, char *szInputMappingAfter, char *szConst) -{ - strcpy(szConst, "0"); - switch(wInputMapping) - { - case 0x00: // max(0,x) [ok for final combiner] - printf("PS_INPUTMAPPING_UNSIGNED_IDENTITY"); - break; - case 0x20: // 1 - max(0,x) [ok for final combiner] - printf("PS_INPUTMAPPING_UNSIGNED_INVERT"); - strcpy(szInputMapping, "1-"); - strcpy(szConst, "1"); - break; - case 0x40: // 2*max(0,x) - 1 [invalid for final combiner] - printf("PS_INPUTMAPPING_EXPAND_NORMAL"); - strcpy(szInputMappingAfter, "_bx2"); // right??? - strcpy(szConst, "-1"); - break; - case 0x60: // 1 - 2*max(0,x) [invalid for final combiner] - printf("PS_INPUTMAPPING_EXPAND_NEGATE"); - - strcpy(szInputMapping, "-"); - strcpy(szInputMappingAfter, "_bx2"); - strcpy(szConst, "1"); - break; - case 0x80: // max(0,x) - 1/2 [invalid for final combiner] - printf("PS_INPUTMAPPING_HALFBIAS_NORMAL"); - strcpy(szInputMappingAfter, "_bias"); - - strcpy(szConst, "-0.5"); - break; - case 0xA0: // 1/2 - max(0,x) [invalid for final combiner] - printf("PS_INPUTMAPPING_HALFBIAS_NEGATE"); - - strcpy(szConst, "0.5"); - - // Negate is run last if combined with bias - strcpy(szInputMapping, "-"); - strcpy(szInputMappingAfter, "_bias"); - break; - case 0xC0: // x [invalid for final combiner] - printf("PS_INPUTMAPPING_SIGNED_IDENTITY"); - break; - case 0xE0: // -x [invalid for final combiner] - printf("PS_INPUTMAPPING_SIGNED_NEGATE"); - strcpy(szInputMapping, "-"); - break; - default: - printf("/*Unknown input mapping %d!*/", wInputMapping); - break; - } -} - -inline void GetChannel(WORD wInputChannel, char *szInput, BOOL bAlpha, BOOL bGlobalRGBA) -{ - switch(wInputChannel) - { - case 0x00: - if(bAlpha) { - printf("PS_CHANNEL_BLUE"); - strcat(szInput, ".b"); - } else { - printf("PS_CHANNEL_RGB"); - - //if (!bGlobalRGBA) - // strcat(szInput, ".rgb"); - } - break; - case 0x10: - printf("PS_CHANNEL_ALPHA"); - - // TODO: check this || !bAlpha, it should mean that alpha channel - // is detected in a RGB register, then it must be set also - // if both commands the same are (in that case it has to be RGB!) - if (!bGlobalRGBA || !bAlpha) - strcat(szInput, ".a"); - break; - default: - printf("/*Unknown channel %d!*/", wInputChannel); - break; - } -} - -inline void GetOutputFlags -( - WORD wOutputFlags, - char *szInstMod, - char *szABOp, - char *szCDOp, - char *szABCDOp, - - BOOL *bAB_BA, - BOOL *bCD_BA, - - BOOL *bShl1Bias, - BOOL *bBias -) -{ - // Output mapping - switch (wOutputFlags & 0x38) { - case PS_COMBINEROUTPUT_BIAS: - { - printf("PS_COMBINEROUTPUT_BIAS"); // y = x - 0.5 - //strcpy(szInstMod, "_bias"); - - // Only over this: - // mov y, y_bias - (*bBias)=TRUE; - break; - } - case PS_COMBINEROUTPUT_SHIFTLEFT_1: // 0x10L - { - printf("PS_COMBINEROUTPUT_SHIFTLEFT_1"); // y = x*2 - strcpy(szInstMod, "_x2"); - break; - } - case PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS: // 0x18L - { - LOG_TEST_CASE("PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS"); - printf("PS_COMBINEROUTPUT_SHIFTLEFT_1_BIAS"); // y = (x - 0.5)*2 - - //strcpy(szInstMod, "_x2"); - // what is missing is a subtraction of 1 - // --> 2 * (x - 0.5) = 2x - 1 - - // But this won't work because we would have to do 2 movs - // to subtract 1 - // Let's do this: mov_x2 y, y_bias - (*bShl1Bias)=TRUE; - break; - } - case PS_COMBINEROUTPUT_SHIFTLEFT_2: // 0x20L - { - LOG_TEST_CASE("PS_COMBINEROUTPUT_SHIFTLEFT_2"); - printf("PS_COMBINEROUTPUT_SHIFTLEFT_2"); // y = x*4 - strcpy(szInstMod, "_x4"); - break; - } - // case PS_COMBINEROUTPUT_SHIFTLEFT_2_BIAS: // 0x28L, // y = (x - 0.5)*4 - case PS_COMBINEROUTPUT_SHIFTRIGHT_1: // 0x30L - { - LOG_TEST_CASE("PS_COMBINEROUTPUT_SHIFTRIGHT_1"); - printf("PS_COMBINEROUTPUT_SHIFTRIGHT_1"); // y = x/2 - strcpy(szInstMod, "_d2"); - break; - } - // case PS_COMBINEROUTPUT_SHIFTRIGHT_1_BIAS: // 0x38L, // y = (x - 0.5)/2 - default: - printf("PS_COMBINEROUTPUT_IDENTITY"); - } - - printf(" | "); - - // MUX operation - if(wOutputFlags & 0x04) { - printf("PS_COMBINEROUTPUT_AB_CD_MUX"); - strcpy(szABCDOp, "cnd"); - - if((!bR0Written) || (!bR0AWritten)) - bR0WAccess=TRUE; - } - else - { - printf("PS_COMBINEROUTPUT_AB_CD_SUM"); // 3rd output is AB+CD - strcpy(szABCDOp, "add"); - } - - printf(" | "); - - // Function for ab side - if(wOutputFlags & 0x02) - { - printf("PS_COMBINEROUTPUT_AB_DOT_PRODUCT"); // RGB only - strcpy(szABOp, "dp3"); - } else { - printf("PS_COMBINEROUTPUT_AB_MULTIPLY"); - strcpy(szABOp, "mul"); - } - - printf(" | "); - - // Functiomn for cd side - if(wOutputFlags & 0x01) - { - printf("!!!PS_COMBINEROUTPUT_CD_DOT_PRODUCT!!!"); // RGB only - strcpy(szCDOp, "dp3"); - } else { - printf("PS_COMBINEROUTPUT_CD_MULTIPLY"); - strcpy(szCDOp, "mul"); - } - - // Blue to alpha for ab side - if(wOutputFlags & 0x80) { - printf(" | PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA"); // RGB only - (*bAB_BA)=TRUE; - } else (*bAB_BA)=FALSE; - - // Blue to alpha for cd side - if(wOutputFlags & 0x40) { - printf(" | PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA"); // RGB only - (*bCD_BA)=TRUE; - } else (*bCD_BA)=FALSE; -} - -enum OpType -{ - OPTYPE_NOP = -1, - OPTYPE_MOV = 0, - OPTYPE_ADD, - OPTYPE_MUL, - OPTYPE_DP3, - OPTYPE_CND, -}; - -inline BOOL OptimizeOperation -( - char *szOp, - char *szOp1, - - char *szOp2, - char *szMod, - - char *szInputAB1, - char *szInputAB2, - - char *szInputCD1, - char *szInputCD2, - - char *szConstRegAB1, - char *szConstRegAB2, - char *szConstRegCD1, - char *szConstRegCD2, - - char *szOutAB, - char *szOutCD, - char *szABCDOutput, - - char *szCommand) -{ - printf("----------\nszOp: |%s|\nszOp1: |%s|\nszOp2: |%s|\nszMod: |%s|\n" - "szInputAB1: |%s|\nszInputAB2: |%s|\nszInputCD1: |%s|\nszInputCD2: |%s|\n" - "szOutAB: |%s|\nszOutCD: |%s|\nszABCDOutput: |%s|\n", - szOp, szOp1, szOp2, szMod, szInputAB1, szInputAB2, szInputCD1, szInputCD2, - szOutAB, szOutCD, szABCDOutput); - - char szABCDInput[2][10]; - szABCDInput[0][0]=0x00; - szABCDInput[1][0]=0x00; - - szCommand[0]=0x00; - - char *szOps[3]; - szOps[0] = szOp; - szOps[1] = szOp1; - szOps[2] = szOp2; - - char *szInputs[4]; - szInputs[0] = szInputAB1; - szInputs[1] = szInputAB2; - szInputs[2] = szInputCD1; - szInputs[3] = szInputCD2; - - char *szRealInputs[4]; - szRealInputs[0] = szConstRegAB1; - szRealInputs[1] = szConstRegAB2; - szRealInputs[2] = szConstRegCD1; - szRealInputs[3] = szConstRegCD2; - -#ifdef REVEL8N_PIXEL_SHADER_CHANGES - char *szOutputs[3]; - szOutputs[0] = szOutAB; - szOutputs[1] = szOutCD; - szOutputs[2] = szABCDOutput; -#endif - - // TODO: check mov: other operations like lrp - // are ignored because of a shitty mul with 1 - BOOL bMov[3]={0, 0, 0}; - - int i=0; - for(i=0; i<2; i++) - { - //printf("szOps[i]: %s\n", szOps[i]); - //printf("szInputs[i*2+1]: %s\n", szInputs[i*2+1]); - if(strcmp(szOps[i], "mul")==0) - { - // If it is a mul, it can also be only a mov - if(strcmp(szInputs[i*2], "1")==0) { - //strcpy(szABCDInput[i], szInputs[i*2+1]); -#ifndef REVEL8N_PIXEL_SHADER_CHANGES - strcpy(szABCDInput[i], szRealInputs[i*2+1]); -#endif - - strcpy(szOps[i], "mov"); - - strcpy(szInputs[i*2], szInputs[i*2+1]); - strcpy(szRealInputs[i*2], szRealInputs[i*2+1]); - - strcpy(szInputs[i*2+1], ""); - strcpy(szRealInputs[i*2+1], ""); - - bMov[i]=TRUE; - - } else if(strcmp(szInputs[i*2+1], "1")==0) { - //strcpy(szABCDInput[i], szInputs[i*2]); -#ifndef REVEL8N_PIXEL_SHADER_CHANGES - strcpy(szABCDInput[i], szRealInputs[i*2]); -#endif - - strcpy(szOps[i], "mov"); - - strcpy(szInputs[i*2+1], ""); - strcpy(szRealInputs[i*2+1], ""); - - bMov[i]=TRUE; - } - } - } - -#ifdef REVEL8N_PIXEL_SHADER_CHANGES - OpType eOpTypes[3] = {OPTYPE_NOP, OPTYPE_NOP, OPTYPE_NOP}; - for (i = 0; i < 3; ++i) - { - if (strcmp(szOps[i], "mov") == 0) - eOpTypes[i] = OPTYPE_MOV; - else if (strcmp(szOps[i], "add") == 0) - eOpTypes[i] = OPTYPE_ADD; - else if (strcmp(szOps[i], "mul") == 0) - eOpTypes[i] = OPTYPE_MUL; - else if (strcmp(szOps[i], "dp3") == 0) - eOpTypes[i] = OPTYPE_DP3; - else if (strcmp(szOps[i], "cnd") == 0) - eOpTypes[i] = OPTYPE_CND; - else - eOpTypes[i] = OPTYPE_NOP; - } - - bool bHandled = false; - int iOffset = 0; - int iOpCount = 0; - if (szOps[2][0] && szOutputs[2][0] && szOutputs[2][0] != 'v') - { - if (!szOutputs[0][0] && - !szOutputs[1][0]) - { - if (szMod[0]) - { - EmuLog(LOG_LEVEL::WARNING, "Destination modifier present!"); - } - switch (eOpTypes[2]) - { - case OPTYPE_ADD: - { - if (eOpTypes[0] == OPTYPE_MOV && - eOpTypes[1] == OPTYPE_MOV) - { - iOffset += sprintf(szCommand + iOffset, "add%s %s, %s, %s\n", - szMod, szOutputs[2], szRealInputs[0], szRealInputs[2]); - ++iOpCount; - bHandled = true; - } - else if (eOpTypes[0] == OPTYPE_MOV && - eOpTypes[1] == OPTYPE_MUL) - { - iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, %s\n", - szMod, szOutputs[2], szRealInputs[2], szRealInputs[3], szRealInputs[0]); - bHandled = true; - ++iOpCount; - } - else if (eOpTypes[0] == OPTYPE_MUL && - eOpTypes[1] == OPTYPE_MOV) - { - iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, %s\n", - szMod, szOutputs[2], szRealInputs[0], szRealInputs[1], szRealInputs[2]); - bHandled = true; - ++iOpCount; - } - else if (eOpTypes[0] == OPTYPE_MUL && - eOpTypes[1] == OPTYPE_MUL) - { - // nice, mul, mul, add can be converted to lrp - // lrp r0, t0, t1, c2 - // --> r0 = t0 * t1 + (1-t0) * c2 - // or r0 = c2 + t0 * (t1 - c2), but that would mean we have to mul in the ABCD op - // and that is not possible - - for(i=0; i<2; i++) - { - // To match the first option, the first input of the AB/CD op must inverted - BOOL bInvert[2] = {0, 0}; - if((szRealInputs[2*i][0] == '1') && (szRealInputs[2*i][1] == '-')) - //if((szInputs[2*i][0] == '1') && (szInputs[2*i][1] == '-')) - bInvert[0]=TRUE; - - if((szRealInputs[2*i+1][0] == '1') && (szRealInputs[2*i+1][1] == '-')) - //if((szInputs[2*i+1][0] == '1') && (szInputs[2*i+1][1] == '-')) - bInvert[1]=TRUE; - - //printf("szInputs[2*i]: %s\nszInputs[2*i+1]: %s\n", szInputs[2*i], szInputs[2*i+1]); - //printf("bInvert[0]: %d\nbInvert[1]: %d\n", bInvert[0], bInvert[1]); - - if((bInvert[0] || bInvert[1]) && (!(bInvert[0] && bInvert[1]))) - { - char szParam[3][10] = {0}; - char szRealParam0[10] = {0}; - if(bInvert[0]) - { - // copy over the not inverted param - strcpy(szParam[i+1], /*szInputs*/szRealInputs[2*i+1]); - - // and the inverted - strcpy(szParam[0], &szInputs[2*i][2]); - strcpy(szRealParam0, &szRealInputs[2*i][2]); - } - else if(bInvert[1]) - { - // copy over the not inverted param - strcpy(szParam[i+1], /*szInputs*/szRealInputs[2*i]); - - // and the inverted - strcpy(szParam[0], &szInputs[2*i+1][2]); - strcpy(szRealParam0, &szRealInputs[2*i+1][2]); - } - int iOtherOp = i == 0 ? 1 : 0; - - bHandled = true; - if (strcmp(szRealInputs[2*iOtherOp], szRealParam0/*szParam[0]*/)==0) - strcpy(szParam[iOtherOp+1], /*szInputs*/szRealInputs[2*iOtherOp+1]); - else if (strcmp(szRealInputs[2*iOtherOp+1], szRealParam0/*szParam[0]*/)==0) - strcpy(szParam[iOtherOp+1], /*szInputs*/szRealInputs[2*iOtherOp]); - else - bHandled = false; - if (bHandled) - { - // ok, we have it - iOffset += sprintf(szCommand, "lrp%s %s, %s, %s, %s\n", - szMod, szABCDOutput, szRealParam0/*szParam[0]*/, szParam[1], szParam[2]); - ++iOpCount; - break; - } - } - } - - if (!bHandled) - { - iOffset += sprintf(szCommand + iOffset, "mul r1, %s, %s\n", - szRealInputs[0], szRealInputs[1]); - ++iOpCount; - iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, r1\n", - szMod, szOutputs[2], szRealInputs[2], szRealInputs[3]); - ++iOpCount; - - bHandled = true; - } - } - } - break; - case OPTYPE_CND: - { - if (eOpTypes[0] == OPTYPE_MOV && - eOpTypes[1] == OPTYPE_MOV) - { - iOffset += sprintf(szCommand + iOffset, "cnd%s %s, r0.a, %s, %s\n", - szMod, szOutputs[2], szRealInputs[2], szRealInputs[0]); - ++iOpCount; - bHandled = true; - } - else if (eOpTypes[0] == OPTYPE_MUL && - eOpTypes[1] == OPTYPE_MUL) - { - if (szOutputs[2][0] != 'r') - { - EmuLog(LOG_LEVEL::WARNING, "Destination not temporary register!"); - } - // ab input - iOffset += sprintf(szCommand + iOffset, "mul%s r1, %s, %s\n", - szMod, szRealInputs[0], szRealInputs[1]); - ++iOpCount; - // cd input - iOffset += sprintf(szCommand + iOffset, "mul%s r0, %s, %s\n", - szMod, szRealInputs[2], szRealInputs[3]); - ++iOpCount; - // abcd output - iOffset += sprintf(szCommand + iOffset, "cnd%s %s, r0.a, r0, r1\n", - szMod, szOutputs[2]); - ++iOpCount; - bHandled = true; - } - } - break; - } - if (!bHandled && strcmp(szOps[2], "add") == 0) - { - if ((strcmp(szOps[0], "mov")==0)) - { - if ((strcmp(szOps[1], "mul")==0)) - { - char szParam[10]="\0"; - - if(strcmp(szInputCD1, "-1")==0) - strcpy(szParam, szInputCD2); - else if(strcmp(szInputCD2, "-1")==0) - strcpy(szParam, szInputCD1); - - if(szParam[0] && szConstRegAB1[0] && szABCDOutput[0]) - { - iOffset += sprintf(szCommand, "sub%s %s, %s, %s\n", - szMod, szABCDOutput, szConstRegAB1, szParam); - bHandled = true; - ++iOpCount; - } -// else -// { -// iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, %s\n", -// szMod, szOutputs[2], szRealInputs[2], szRealInputs[3], szRealInputs[0]); -// bHandled = true; -// ++iOpCount; -// } - } - } -// else if ((strcmp(szOps[0], "mul")==0)) -// { -// if ((strcmp(szOps[1], "mov")==0)) -// { -// iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, %s\n", -// szMod, szOutputs[2], szRealInputs[0], szRealInputs[1], szRealInputs[2]); -// bHandled = true; -// ++iOpCount; -// } -// else if ((strcmp(szOps[1], "mul")==0)) -// { -// // nice, mul, mul, add can be converted to lrp -// // lrp r0, t0, t1, c2 -// // --> r0 = t0 * t1 + (1-t0) * c2 -// // or r0 = c2 + t0 * (t1 - c2), but that would mean we have to mul in the ABCD op -// // and that is not possible -// -// for(i=0; i<2; i++) -// { -// // To match the first option, the first input of the AB/CD op must inverted -// BOOL bInvert[2] = {0, 0}; -// if((szRealInputs[2*i][0] == '1') && (szRealInputs[2*i][1] == '-')) -// //if((szInputs[2*i][0] == '1') && (szInputs[2*i][1] == '-')) -// bInvert[0]=TRUE; -// -// if((szRealInputs[2*i+1][0] == '1') && (szRealInputs[2*i+1][1] == '-')) -// //if((szInputs[2*i+1][0] == '1') && (szInputs[2*i+1][1] == '-')) -// bInvert[1]=TRUE; -// -// //printf("szInputs[2*i]: %s\nszInputs[2*i+1]: %s\n", szInputs[2*i], szInputs[2*i+1]); -// //printf("bInvert[0]: %d\nbInvert[1]: %d\n", bInvert[0], bInvert[1]); -// -// if((bInvert[0] || bInvert[1]) && (!(bInvert[0] && bInvert[1]))) -// { -// char szParam[3][10]; -// char szRealParam0[10]; -// if(bInvert[0]) -// { -// // copy over the not inverted param -// strcpy(szParam[2], /*szInputs*/szRealInputs[2*i+1]); -// -// // and the inverted -// strcpy(szParam[0], &szInputs[2*i][2]); -// strcpy(szRealParam0, &szRealInputs[2*i][2]); -// } -// else if(bInvert[1]) -// { -// // copy over the not inverted param -// strcpy(szParam[2], /*szInputs*/szRealInputs[2*i]); -// -// // and the inverted -// strcpy(szParam[0], &szInputs[2*i+1][2]); -// strcpy(szRealParam0, &szRealInputs[2*i+1][2]); -// } -// int iOtherOp = i == 0 ? 1 : 0; -// -// bHandled = true; -// if (strcmp(szRealInputs[2*iOtherOp], szRealParam0/*szParam[0]*/)==0) -// strcpy(szParam[1], /*szInputs*/szRealInputs[2*iOtherOp+1]); -// else if (strcmp(szRealInputs[2*iOtherOp+1], szRealParam0/*szParam[0]*/)==0) -// strcpy(szParam[1], /*szInputs*/szRealInputs[2*iOtherOp]); -// else -// bHandled = false; -// if (bHandled) -// { -// // ok, we have it -// iOffset += sprintf(szCommand, "lrp%s %s, %s, %s, %s\n", -// szMod, szABCDOutput, szRealParam0/*szParam[0]*/, szParam[1], szParam[2]); -// ++iOpCount; -// break; -// } -// } -// } -// -// if (!bHandled) -// { -// iOffset += sprintf(szCommand + iOffset, "mul r1, %s, %s\n", -// szRealInputs[0], szRealInputs[1]); -// ++iOpCount; -// iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, r1\n", -// szMod, szOutputs[2], szRealInputs[2], szRealInputs[3]); -// ++iOpCount; -// -// bHandled = true; -// } -// } -// } - } - } - } - - if (!bHandled) - { - for (i = 0; i < 2; ++i) - { - if (szOps[i][0] && szOutputs[i][0] && szOutputs[i][0] != 'v') - { - ++iOpCount; - // copy output value to final input - strcpy(szABCDInput[i], szOutputs[i]); - // insert command - iOffset += sprintf(szCommand + iOffset, "%s%s %s, %s\n", szOps[i], szMod, szOutputs[i], szRealInputs[i * 2 + 0]); - - // if there are more parameters... - if (szRealInputs[i * 2 + 1][0]) - { - // backspace of the newline character - --iOffset; - // insert remaining parameters - iOffset += sprintf(szCommand + iOffset, ", %s\n", szRealInputs[i * 2 + 1]); - } - bHandled = true; - } - } - -// if (szOutputs[2][0]) -// { -// if(!szOutputs[1][0]) -// strcpy(szOutputs[1], "r0"); -// if(!szOutputs[0][0]) -// strcpy(szOutputs[0], "r1"); -// } - - if (szOps[2][0] && szOutputs[2][0] && szOutputs[2][0] != 'v') - { - switch (eOpTypes[2]) - { - case OPTYPE_ADD: - { - if (szABCDInput[0][0] && - szABCDInput[1][0]) - { - iOffset += sprintf(szCommand + iOffset, "add%s %s, %s, %s\n", - szMod, szOutputs[2], szABCDInput[0], szABCDInput[1]); - ++iOpCount; - bHandled = true; - } - else if (szABCDInput[0][0] && - !szABCDInput[1][0]) - { - switch (eOpTypes[1]) - { - case OPTYPE_MUL: - { - iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, %s\n", - szMod, szOutputs[2], szRealInputs[2], szRealInputs[3], szABCDInput[0]); - ++iOpCount; - bHandled = true; - } - break; - case OPTYPE_DP3: - { - { - ++iOpCount; - // insert command - iOffset += sprintf(szCommand + iOffset, "%s%s r1, %s\n", szOps[1], szMod, szRealInputs[2]); - - // if there are more parameters... - if (szRealInputs[3][0]) - { - // backspace of the newline character - --iOffset; - // insert remaining parameters - iOffset += sprintf(szCommand + iOffset, ", %s\n", szRealInputs[3]); - } - } - { - iOffset += sprintf(szCommand + iOffset, "add%s %s, %s, r1\n", - szMod, szOutputs[2], szABCDInput[0]); - ++iOpCount; - bHandled = true; - } - } - break; - default: - break; - } - } - else if (!szABCDInput[0][0] && - szABCDInput[1][0]) - { - switch (eOpTypes[0]) - { - case OPTYPE_MUL: - { - iOffset += sprintf(szCommand + iOffset, "mad%s %s, %s, %s, %s\n", - szMod, szOutputs[2], szRealInputs[0], szRealInputs[1], szABCDInput[1]); - ++iOpCount; - bHandled = true; - } - break; - case OPTYPE_DP3: - { - { - ++iOpCount; - // insert command - iOffset += sprintf(szCommand + iOffset, "%s%s r1, %s\n", szOps[0], szMod, szRealInputs[0]); - - // if there are more parameters... - if (szRealInputs[1][0]) - { - // backspace of the newline character - --iOffset; - // insert remaining parameters - iOffset += sprintf(szCommand + iOffset, ", %s\n", szRealInputs[1]); - } - } - { - iOffset += sprintf(szCommand + iOffset, "add%s %s, r1, %s\n", - szMod, szOutputs[2], szABCDInput[1]); - ++iOpCount; - bHandled = true; - } - } - break; - default: - break; - } - } - } - break; - case OPTYPE_CND: - { - if (szABCDInput[0][0] && - szABCDInput[1][0]) - { - iOffset += sprintf(szCommand + iOffset, "cnd%s %s, r0.a, %s, %s\n", - szMod, szOutputs[2], szABCDInput[1], szABCDInput[0]); - ++iOpCount; - bHandled = true; - } - else if (szABCDInput[0][0] && - !szABCDInput[1][0]) - { - { - ++iOpCount; - // insert command - iOffset += sprintf(szCommand + iOffset, "%s%s r1, %s\n", szOps[1], szMod, szRealInputs[2]); - - // if there are more parameters... - if (szRealInputs[3][0]) - { - // backspace of the newline character - --iOffset; - // insert remaining parameters - iOffset += sprintf(szCommand + iOffset, ", %s\n", szRealInputs[3]); - } - } - { - iOffset += sprintf(szCommand + iOffset, "cnd%s %s, r0.a, r1, %s\n", - szMod, szOutputs[2], szABCDInput[0]); - ++iOpCount; - bHandled = true; - } - } - else if (!szABCDInput[0][0] && - szABCDInput[1][0]) - { - { - ++iOpCount; - // insert command - iOffset += sprintf(szCommand + iOffset, "%s%s r1, %s\n", szOps[0], szMod, szRealInputs[0]); - - // if there are more parameters... - if (szRealInputs[1][0]) - { - // backspace of the newline character - --iOffset; - // insert remaining parameters - iOffset += sprintf(szCommand + iOffset, ", %s\n", szRealInputs[1]); - } - } - { - iOffset += sprintf(szCommand + iOffset, "cnd%s %s, r0.a, %s, r1\n", - szMod, szOutputs[2], szABCDInput[1]); - ++iOpCount; - bHandled = true; - } - } - } - break; - } - if (!bHandled) - { - EmuLog(LOG_LEVEL::WARNING, "Unhandled pixel shader instruction!"); - } -// if (strcmp(szOps[2], "add") == 0) -// { -// if (szABCDInput[0][0] && -// szABCDInput[1][0]) -// { -// iOffset += sprintf(szCommand + iOffset, "add%s %s, %s, %s\n", -// szMod, szOutputs[2], szABCDInput[1], szABCDInput[0]); -// ++iOpCount; -// bHandled = true; -// } -// else -// { -// EmuLog(LOG_LEVEL::WARNING, "Unhandled pixel shader instruction!"); -// } -// } -// else if (strcmp(szOps[2], "cnd") == 0) -// { -// if (szABCDInput[0][0] && -// szABCDInput[1][0]) -// { -// iOffset += sprintf(szCommand + iOffset, "cnd%s %s, r0.a, %s, %s\n", -// szMod, szOutputs[2], szABCDInput[1], szABCDInput[0]); -// ++iOpCount; -// bHandled = true; -// } -// else -// { -// EmuLog(LOG_LEVEL::WARNING, "Unhandled pixel shader instruction!"); -// } -// } -// else -// { -// EmuLog(LOG_LEVEL::WARNING, "Unhandled pixel shader instruction!"); -// } - } - } - - if(szCommand[0]) - printf("new command:\n%s\n", szCommand); - return (bHandled && (iOpCount == 1)) ? (TRUE) : (FALSE); -#endif - - if( - (strcmp(szOp, "mul")==0) && - (strcmp(szOp1, "mov")==0) && //bMov[1] && - (strcmp(szOp2, "add")==0) && - szABCDOutput[0]) - { - sprintf(szCommand, "mad%s %s, %s, %s, %s\n", - szMod, szABCDOutput, - /*szInput*/szConstRegAB1, - /*szInput*/szConstRegAB2, - /*szInput*/szConstRegCD1 /*because it's a mov now*/); - } - else if( - (strcmp(szOp, "mul")==0) && - (strcmp(szOp1, "mul")==0) && - (strcmp(szOp2, "add")==0) && - szABCDOutput[0]) // TODO: check that strange lrp/ABCDOutput[0]=0 case - { - // nice, mul, mul, add can be converted to lrp - // lrp r0, t0, t1, c2 - // --> r0 = t0 * t1 + (1-t0) * c2 - // or r0 = c2 + t0 * (t1 - c2), but that would mean we have to mul in the ABCD op - // and that is not possible - - for(i=0; i<2; i++) - { - // To match the first option, the first input of the AB/CD op must inverted - BOOL bInvert[2] = {0, 0}; - if((szInputs[2*i][0] == '1') && (szInputs[2*i][1] == '-')) - bInvert[0]=TRUE; - - if((szInputs[2*i+1][0] == '1') && (szInputs[2*i+1][1] == '-')) - bInvert[1]=TRUE; - - //printf("szInputs[2*i]: %s\nszInputs[2*i+1]: %s\n", szInputs[2*i], szInputs[2*i+1]); - //printf("bInvert[0]: %d\nbInvert[1]: %d\n", bInvert[0], bInvert[1]); - - if((bInvert[0] || bInvert[1]) && (!(bInvert[0] && bInvert[1]))) - { - char szParam[3][10]; - char szRealParam0[10]; - if(bInvert[0]) - { - // copy over the not inverted param - strcpy(szParam[2], /*szInputs*/szRealInputs[2*i+1]); - - // and the inverted - strcpy(szParam[0], &szInputs[2*i][2]); - strcpy(szRealParam0, &szRealInputs[2*i][2]); - } - else if(bInvert[1]) - { - // copy over the not inverted param - strcpy(szParam[2], /*szInputs*/szRealInputs[2*i]); - - // and the inverted - strcpy(szParam[0], &szInputs[2*i+1][2]); - strcpy(szRealParam0, &szRealInputs[2*i+1][2]); - } - int iOtherOp = i == 0 ? 1 : 0; - - if(strcmp(szInputs[2*iOtherOp], szParam[0])==0) - strcpy(szParam[1], /*szInputs*/szRealInputs[2*iOtherOp+1]); - else - strcpy(szParam[1], /*szInputs*/szRealInputs[2*iOtherOp]); - // ok, we have it - sprintf(szCommand, "lrp%s %s, %s, %s, %s\n", - szMod, szABCDOutput, szRealParam0/*szParam[0]*/, szParam[1], szParam[2]); - - break; - } - } - } else if(strcmp(szOp2, "cnd")==0) { -#ifdef REVEL8N_PIXEL_SHADER_CHANGES - iOffset = 0; - i = 0; - for (i = 0; i < 2; ++i) - { - if (strcmp(szOps[i], "mul")==0) - { - strcpy(szABCDInput[i], szOutputs[i]); - iOffset += sprintf(szCommand + iOffset, "mul %s, %s, %s\n", szOutputs[i], szRealInputs[i * 2 + 0], szRealInputs[i * 2 + 1]); - } - } - sprintf(szCommand + iOffset, "cnd%s %s, %s, %s, %s\n", - szMod, szABCDOutput, "r0.a", szABCDInput[1], szABCDInput[0]); -#else - sprintf(szCommand, "cnd%s %s, %s, %s, %s\n", - szMod, szABCDOutput, "r0.a", szABCDInput[1], szABCDInput[0]); -#endif - - bMov[1]=0; - bMov[0]=0; - } else if( - (strcmp(szOp, "mov")==0) && - (strcmp(szOp1, "mul")==0) && - (strcmp(szOp2, "add")==0)) - { - char szParam[10]="\0"; - - if(strcmp(szInputCD1, "-1")==0) - strcpy(szParam, szInputCD2); - else if(strcmp(szInputCD2, "-1")==0) - strcpy(szParam, szInputCD1); - - if(szParam[0] && szConstRegAB1[0] && szABCDOutput[0]) - { - sprintf(szCommand, "sub%s %s, %s, %s\n", - szMod, szABCDOutput, szConstRegAB1, szParam); - } - - } -//do_operation_with_new_input: - - if(bMov[0] && bMov[1] && szABCDOutput[0]) { - sprintf(szCommand, "%s%s %s, %s, %s\n", szOp2, szMod, szABCDOutput, szABCDInput[0], szABCDInput[1]); - } - - if(szCommand[0]) - printf("new command: %s", szCommand); - return TRUE; -} - -float fConstants[20] = {0.0f}; -int iConstants[20] = {0}; -int iConstCount=0; - -inline void ClearConstRegVars() -{ - iConstCount=0; - memset(fConstants, 0x00, 20*sizeof(float)); - memset(iConstants, 0x00, 20*sizeof(int)); -} - -inline void CorrectConstToReg(char *szConst, int *pPSC0, int *pPSC1) -{ - printf("Looking for %s\n", szConst); - float fConst = (float)atof(szConst); - - // check whether we already saved it - int i=0; - for(i=0; iPSAlphaInputs[0], pPSDef->PSAlphaInputs[1], pPSDef->PSAlphaInputs[2], pPSDef->PSAlphaInputs[3], - pPSDef->PSAlphaInputs[4], pPSDef->PSAlphaInputs[5], pPSDef->PSAlphaInputs[6], pPSDef->PSAlphaInputs[7], - pPSDef->PSFinalCombinerInputsABCD, - pPSDef->PSFinalCombinerInputsEFG, - pPSDef->PSConstant0[0], pPSDef->PSConstant0[1], pPSDef->PSConstant0[2], pPSDef->PSConstant0[3], - pPSDef->PSConstant0[4], pPSDef->PSConstant0[5], pPSDef->PSConstant0[6], pPSDef->PSConstant0[7], - pPSDef->PSConstant1[0], pPSDef->PSConstant1[1], pPSDef->PSConstant1[2], pPSDef->PSConstant1[3], - pPSDef->PSConstant1[4], pPSDef->PSConstant1[5], pPSDef->PSConstant1[6], pPSDef->PSConstant1[7], - pPSDef->PSAlphaOutputs[0], pPSDef->PSAlphaOutputs[1], pPSDef->PSAlphaOutputs[2], pPSDef->PSAlphaOutputs[3], - pPSDef->PSAlphaOutputs[4], pPSDef->PSAlphaOutputs[5], pPSDef->PSAlphaOutputs[6], pPSDef->PSAlphaOutputs[7], - pPSDef->PSRGBInputs[0], pPSDef->PSRGBInputs[1], pPSDef->PSRGBInputs[2], pPSDef->PSRGBInputs[3], - pPSDef->PSRGBInputs[4], pPSDef->PSRGBInputs[5], pPSDef->PSRGBInputs[6], pPSDef->PSRGBInputs[7], - pPSDef->PSCompareMode, - pPSDef->PSFinalCombinerConstant0, - pPSDef->PSFinalCombinerConstant1, - pPSDef->PSRGBOutputs[0], pPSDef->PSRGBOutputs[1], pPSDef->PSRGBOutputs[2], pPSDef->PSRGBOutputs[3], - pPSDef->PSRGBOutputs[4], pPSDef->PSRGBOutputs[5], pPSDef->PSRGBOutputs[6], pPSDef->PSRGBOutputs[7], - pPSDef->PSCombinerCount, - XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES), /* pPSDef->PSTextureModes is stored in a different place than pPSDef*/ - pPSDef->PSDotMapping, - pPSDef->PSInputTexture, - pPSDef->PSC0Mapping, - pPSDef->PSC1Mapping, - pPSDef->PSFinalCombinerConstants ); - if (pszCode) - { - fprintf(out, "\n\n%s\n", pszCode); - } - - fclose( out ); - } -} - -// print relevant contents to the debug console -void PrintPixelShaderDefContents(xbox::X_D3DPIXELSHADERDEF* pPSDef ) -{ - // Show the contents to the user - if( pPSDef ) - { - DbgPshPrintf( "\n-----PixelShader Def Contents-----\n" ); - - if(XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES)) - { - DWORD dwPSTexMode0 = (XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES) >> 0 ) & 0x1F; - DWORD dwPSTexMode1 = (XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES) >> 5 ) & 0x1F; - DWORD dwPSTexMode2 = (XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES) >> 10 ) & 0x1F; - DWORD dwPSTexMode3 = (XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES) >> 15 ) & 0x1F; - - DbgPshPrintf( "PSTextureModes ->\n" ); - DbgPshPrintf( "Stage 0: %s\n", PS_TextureModesStr[dwPSTexMode0] ); - DbgPshPrintf( "Stage 1: %s\n", PS_TextureModesStr[dwPSTexMode1] ); - DbgPshPrintf( "Stage 2: %s\n", PS_TextureModesStr[dwPSTexMode2] ); - DbgPshPrintf( "Stage 3: %s\n", PS_TextureModesStr[dwPSTexMode3] ); - } - - if( pPSDef->PSDotMapping ) - { - DWORD dwPSDMStage1 = ( pPSDef->PSDotMapping >> 0 ) & 0x7; - DWORD dwPSDMStage2 = ( pPSDef->PSDotMapping >> 4 ) & 0x7; - DWORD dwPSDMStage3 = ( pPSDef->PSDotMapping >> 8 ) & 0x7; - - DbgPshPrintf( "PSDotMapping ->\n" ); - DbgPshPrintf( "Stage 1: %s\n", PS_DotMappingStr[dwPSDMStage1] ); - DbgPshPrintf( "Stage 2: %s\n", PS_DotMappingStr[dwPSDMStage2] ); - DbgPshPrintf( "Stage 3: %s\n", PS_DotMappingStr[dwPSDMStage3] ); - } - - if( pPSDef->PSCompareMode ) - { - DWORD dwPSCMStage0 = ( pPSDef->PSCompareMode >> 0 ) & 0xF; - DWORD dwPSCMStage1 = ( pPSDef->PSCompareMode >> 4 ) & 0xF; - DWORD dwPSCMStage2 = ( pPSDef->PSCompareMode >> 8 ) & 0xF; - DWORD dwPSCMStage3 = ( pPSDef->PSCompareMode >> 12 ) & 0xF; - - DbgPshPrintf( "PSCompareMode ->\n" ); - DbgPshPrintf( "Stage 0: %s\n", PS_TextureModesStr[dwPSCMStage0 == 0 ? 0 : 1] ); - DbgPshPrintf( "Stage 1: %s\n", PS_TextureModesStr[dwPSCMStage1 == 0 ? 2 : 3] ); - DbgPshPrintf( "Stage 2: %s\n", PS_TextureModesStr[dwPSCMStage2 == 0 ? 4 : 5] ); - DbgPshPrintf( "Stage 3: %s\n", PS_TextureModesStr[dwPSCMStage3 == 0 ? 6 : 7] ); - } - - if( pPSDef->PSInputTexture ) - { - DWORD dwPSITStage2 = ( pPSDef->PSInputTexture >> 16 ) & 0x1; - DWORD dwPSITStage3 = ( pPSDef->PSInputTexture >> 20 ) & 0x3; - - DbgPshPrintf( "PSInputTexture ->\n" ); - DbgPshPrintf( "Stage 2: %s\n", PS_TextureModesStr[dwPSITStage2] ); - DbgPshPrintf( "Stage 3: %s\n", PS_TextureModesStr[dwPSITStage3] ); - } - - if( pPSDef->PSCombinerCount ) - { - DWORD dwPSCCNumCombiners = ( pPSDef->PSCombinerCount >> 0 ) & 0xF; - DWORD dwPSCCMux = ( pPSDef->PSCombinerCount >> 8 ) & 0x1; - DWORD dwPSCCC0 = ( pPSDef->PSCombinerCount >> 12 ) & 0x1; - DWORD dwPSCCC1 = ( pPSDef->PSCombinerCount >> 16 ) & 0x1; - - DbgPshPrintf( "PSCombinerCount ->\n" ); - DbgPshPrintf( "Combiners: %d\n", dwPSCCNumCombiners ); - DbgPshPrintf( "Mux: %s\n", PS_CombinerCountFlagsStr[dwPSCCMux] ); - DbgPshPrintf( "C0: %s\n", PS_CombinerCountFlagsStr[dwPSCCC0 == 0 ? 2 : 3] ); - DbgPshPrintf( "C1: %s\n", PS_CombinerCountFlagsStr[dwPSCCC1 == 0 ? 4 : 5] ); - } - - /*for( int i = 0; i > 7; i++ ) - { - if( pPSDef->PSRGBInputs[i] ) - {*/ - } + g_pD3DDevice->SetPixelShaderConstantF(0, (PixelShaderConstantType*)(&fColor[0]), PSH_XBOX_CONSTANT_MAX); } diff --git a/src/core/hle/D3D8/XbPixelShader.h b/src/core/hle/D3D8/XbPixelShader.h index f5c0edc36..128369146 100644 --- a/src/core/hle/D3D8/XbPixelShader.h +++ b/src/core/hle/D3D8/XbPixelShader.h @@ -29,10 +29,480 @@ #include "core\hle\D3D8\XbD3D8Types.h" -// dump pixel shader definition to file -void DumpPixelShaderDefToFile( xbox::X_D3DPIXELSHADERDEF* pPSDef, const char* pszCode ); -// print relevant contents to the debug console -void PrintPixelShaderDefContents(xbox::X_D3DPIXELSHADERDEF* pDSDef ); +/*---------------------------------------------------------------------------*/ +/* Texture configuration - The following members of the D3DPixelShaderDef */ +/* structure define the addressing modes of each of the four texture stages:*/ +/* PSTextureModes */ +/* PSDotMapping */ +/* PSInputTexture */ +/* PSCompareMode */ +/*---------------------------------------------------------------------------*/ + +// ========================================================================================================= +// PSTextureModes +// --------.--------.--------.---xxxxx stage 0 PS_TEXTUREMODES +// --------.--------.------xx.xxx----- stage 1 PS_TEXTUREMODES +// --------.--------.-xxxxx--.-------- stage 2 PS_TEXTUREMODES +// --------.----xxxx.x-------.-------- stage 3 PS_TEXTUREMODES + +#define PS_TEXTUREMODES(t0,t1,t2,t3) (((t3)<<15)|((t2)<<10)|((t1)<<5)|(t0)) + +/* +Texture modes: +NONE :stage inactive +PROJECT2D :argb = texture(s/q, t/q) +PROJECT3D :argb = texture(s/q, t/q, r/q) +CUBEMAP :argb = cubemap(s,t,r) +PASSTHRU :argb = s,t,r,q +CLIPPLANE :pixel not drawn if s,t,r, or q < 0. PSCompareMode affects comparison +BUMPENVMAP :argb=texture(s+mat00*src.r+mat01*src.g, + t+mat10*src.r+mat11*src.g) + mat00 set via D3DTSS_BUMPENVMAT00, etc. +BUMPENVMAP_LUM :argb=texture(s+mat00*src.r+mat01*src.g, + t+mat10*src.r+mat11*src.g); + rgb *= (lum_scale*src.b + lum_bias); (a is not affected) + lum_scale set by D3DTSS_BUMPENVLSCALE + lum_bias set by D3DTSS_BUMPENVLOFFSET + mat00 set via D3DTSS_BUMPENVMAT00, etc. +BRDF :argb = texture(eyeSigma, lightSigma, dPhi) + eyeSigma = Sigma of eye vector in spherical coordinates, read from stage-2 as (16 bit phi,sigma) + lightSigma = Sigma of light vector in spherical coordinates, read from stage-1 as (16 bit phi,sigma) + dPhi = Phi of eye - Phi of light +DOT_ST :argb = texture(, (s,t,r).(src.r,src.g,src.b)) +DOT_ZW :frag depth = (/((s,t,r).(src.r,src.g,src.b)) +DOT_RFLCT_DIFF :n = (,(s,t,r).(src.r,src.g,src.b),) + argb = cubemap(n) +DOT_RFLCT_SPEC :n = (,,(s,t,r).(src.r,src.g,src.b)) + r = 2*n*(n.e)/(n.n) - e where e is eye vector built from q texture coordinate of each stage + argb = cubemap(r) +DOT_STR_3D :argb=texture((,,(s,t,r).(src.r,src.g,src.b))) +DOT_STR_CUBE :argb=cubemap((,,(s,t,r).(src.r,src.g,src.b))) +DEPENDENT_AR :argb = texture(src.a, src.r) +DEPENDENT_GB :argb = texture(src.g, src.b) +DOTPRODUCT :argb = (s,t,r).(src.r,src.g,src.b) +DOT_RFLCT_SPEC_CONST :n = (,,(s,t,r).(src.r,src.g,src.b)) + r = 2*n*(n.e)/(n.n) - e where e is eye vector set via SetEyeVector() into c0 + argb = cubemap(r) +*/ + +enum PS_TEXTUREMODES +{ // valid in stage 0 1 2 3 Uses + PS_TEXTUREMODES_NONE= 0x00L, // * * * * + PS_TEXTUREMODES_PROJECT2D= 0x01L, // * * * * Sample + PS_TEXTUREMODES_PROJECT3D= 0x02L, // * * * * Sample + PS_TEXTUREMODES_CUBEMAP= 0x03L, // * * * * Sample + PS_TEXTUREMODES_PASSTHRU= 0x04L, // * * * * + PS_TEXTUREMODES_CLIPPLANE= 0x05L, // * * * * PSCompareMode + PS_TEXTUREMODES_BUMPENVMAP= 0x06L, // - * * * Sample, PSInputTexture + PS_TEXTUREMODES_BUMPENVMAP_LUM= 0x07L, // - * * * Sample, PSInputTexture + PS_TEXTUREMODES_BRDF= 0x08L, // - - * * + PS_TEXTUREMODES_DOT_ST= 0x09L, // - - * * Sample, PSInputTexture, PSDotMapping + PS_TEXTUREMODES_DOT_ZW= 0x0aL, // - - * * PSInputTexture, PSDotMapping + PS_TEXTUREMODES_DOT_RFLCT_DIFF= 0x0bL, // - - * - Sample, PSInputTexture, PSDotMapping + PS_TEXTUREMODES_DOT_RFLCT_SPEC= 0x0cL, // - - - * Sample, PSInputTexture, PSDotMapping + PS_TEXTUREMODES_DOT_STR_3D= 0x0dL, // - - - * Sample, PSInputTexture, PSDotMapping + PS_TEXTUREMODES_DOT_STR_CUBE= 0x0eL, // - - - * Sample, PSInputTexture, PSDotMapping + PS_TEXTUREMODES_DPNDNT_AR= 0x0fL, // - * * * Sample, PSInputTexture + PS_TEXTUREMODES_DPNDNT_GB= 0x10L, // - * * * Sample, PSInputTexture + PS_TEXTUREMODES_DOTPRODUCT= 0x11L, // - * * - PSInputTexture + PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST= 0x12L, // - - - * Sample, PSInputTexture, PSDotMapping + // 0x13-0x1f reserved +}; + +// ========================================================================================================= +// PSDotMapping +// --------.--------.--------.-----xxx // stage 1 PS_DOTMAPPING +// --------.--------.--------.-xxx---- // stage 2 PS_DOTMAPPING +// --------.--------.-----xxx.-------- // stage 3 PS_DOTMAPPING + +#define PS_DOTMAPPING(t0,t1,t2,t3) (((t3)<<8)|((t2)<<4)|(t1)) + +// Dot mappings over the output value of a (4 component 8 bit unsigned) texture stage register into a (3 component float) vector value, for use in a dot product calculation: +// PS_DOTMAPPING_ZERO_TO_ONE :r8g8b8a8->(r,g,b): 0x00=>0, 0xff=>1 thus : output = (input / 0xff ) +// PS_DOTMAPPING_MINUS1_TO_1_D3D :r8g8b8a8->(r,g,b): 0x00=>-128/127, 0x01=>-1, 0x80=>0, 0xff=>1 thus : output = ((input - 0x100 ) / 0x7f ) +// PS_DOTMAPPING_MINUS1_TO_1_GL :r8g8b8a8->(r,g,b): 0x80=>-1, 0x00=>0, 0x7f=>1 thus : output = (input < 0x80 ) ? (input / 0x7f ) : ((input - 0x100 ) / 0x80 ) (see https://en.wikipedia.org/wiki/Two's_complement) +// PS_DOTMAPPING_MINUS1_TO_1 :r8g8b8a8->(r,g,b): 0x80=>-128/127, ?0x81=>-1, 0x00=>0, 0x7f=>1 thus : output = (input < 0x80 ) ? (input / 0x7f ) : ((input - 0x100 ) / 0x7f ) (see https://en.wikipedia.org/wiki/Two's_complement) +// PS_DOTMAPPING_HILO_1 :H16L16 ->(H,L,1): 0x0000=>0, 0xffff=>1 thus : output = (input / 0xffff) +// PS_DOTMAPPING_HILO_HEMISPHERE_D3D :H16L16 ->(H,L,sqrt(1-H*H-L*L)):? 0x8000=>-1, 0x0000=>0, 0x7fff=32767/32768 thus : output = ((input - 0x10000) / 0x7fff) +// PS_DOTMAPPING_HILO_HEMISPHERE_GL :H16L16 ->(H,L,sqrt(1-H*H-L*L)):? 0x8000=>-1, 0x0000=>0, 0x7fff=>1 thus : output = (input < 0x8000) ? (input / 0x7fff) : ((input - 0x10000) / 0x8000) +// PS_DOTMAPPING_HILO_HEMISPHERE :H16L16 ->(H,L,sqrt(1-H*H-L*L)): 0x8000=>-32768/32767, 0x8001=>-1, 0x0000=>0, 0x7fff=>1 thus : output = (input < 0x8000) ? (input / 0x7fff) : ((input - 0x10000) / 0x7fff) + +enum PS_DOTMAPPING +{ // valid in stage 0 1 2 3 + PS_DOTMAPPING_ZERO_TO_ONE= 0x00L, // - * * * + PS_DOTMAPPING_MINUS1_TO_1_D3D= 0x01L, // - * * * + PS_DOTMAPPING_MINUS1_TO_1_GL= 0x02L, // - * * * + PS_DOTMAPPING_MINUS1_TO_1= 0x03L, // - * * * + PS_DOTMAPPING_HILO_1= 0x04L, // - * * * + PS_DOTMAPPING_HILO_HEMISPHERE_D3D= 0x05L, // - * * * + PS_DOTMAPPING_HILO_HEMISPHERE_GL= 0x06L, // - * * * + PS_DOTMAPPING_HILO_HEMISPHERE= 0x07L, // - * * * +}; + +// ========================================================================================================= +// PSCompareMode +// --------.--------.--------.----xxxx // stage 0 PS_COMPAREMODE +// --------.--------.--------.xxxx---- // stage 1 PS_COMPAREMODE +// --------.--------.----xxxx.-------- // stage 2 PS_COMPAREMODE +// --------.--------.xxxx----.-------- // stage 3 PS_COMPAREMODE + +#define PS_COMPAREMODE(t0,t1,t2,t3) (((t3)<<12)|((t2)<<8)|((t1)<<4)|(t0)) + +enum PS_COMPAREMODE +{ + PS_COMPAREMODE_S_LT= 0x00L, + PS_COMPAREMODE_S_GE= 0x01L, + + PS_COMPAREMODE_T_LT= 0x00L, + PS_COMPAREMODE_T_GE= 0x02L, + + PS_COMPAREMODE_R_LT= 0x00L, + PS_COMPAREMODE_R_GE= 0x04L, + + PS_COMPAREMODE_Q_LT= 0x00L, + PS_COMPAREMODE_Q_GE= 0x08L, +}; + +// ========================================================================================================= +// PSInputTexture +// --------.-------x.--------.-------- // stage 2 +// --------.--xx----.--------.-------- // stage 3 +// +// Selects the other texture to use as an input in the following texture modes: +// DOT_ST, DOT_STR_3D, DOT_STR_CUBE, DOT_ZW, DOT_RFLCT_SPEC, +// DOT_RFLCT_DIFF, DPNDNT_AR, DPNDNT_GB, BUMPENVMAP, +// BUMPENVMAP_LUM, DOT_PRODUCT + +#define PS_INPUTTEXTURE(t0,t1,t2,t3) (((t3)<<20)|((t2)<<16)) + + +/*---------------------------------------------------------------------------------*/ +/* Color combiners - The following members of the D3DPixelShaderDef structure */ +/* define the state for the eight stages of color combiners: */ +/* PSCombinerCount - Number of stages */ +/* PSAlphaInputs[8] - Inputs for alpha portion of each stage */ +/* PSRGBInputs[8] - Inputs for RGB portion of each stage */ +/* PSConstant0[8] - Constant 0 for each stage */ +/* PSConstant1[8] - Constant 1 for each stage */ +/* PSFinalCombinerConstant0 - Constant 0 for final combiner */ +/* PSFinalCombinerConstant1 - Constant 1 for final combiner */ +/* PSAlphaOutputs[8] - Outputs for alpha portion of each stage */ +/* PSRGBOutputs[8] - Outputs for RGB portion of each stage */ +/*---------------------------------------------------------------------------------*/ + +// ========================================================================================================= +// PSCombinerCount +// --------.--------.--------.----xxxx // number of combiners (1-8) +// --------.--------.-------x.-------- // PS_COMBINERCOUNT_MUX_MSB bit (0= LSB, 1= MSB) +// --------.--------.---x----.-------- // PS_COMBINERCOUNT_UNIQUE_C0 +// --------.-------x.--------.-------- // PS_COMBINERCOUNT_UNIQUE_C1 + +#define PS_COMBINERCOUNT(count, flags) (((flags)<<8)|(count)) +// count is 1-8, flags contains one or more values from PS_COMBINERCOUNTFLAGS + +enum PS_COMBINERCOUNTFLAGS +{ + PS_COMBINERCOUNT_MUX_LSB= 0x0000L, // mux on r0.a lsb + PS_COMBINERCOUNT_MUX_MSB= 0x0001L, // mux on r0.a msb + + PS_COMBINERCOUNT_SAME_C0= 0x0000L, // c0 same in each stage + PS_COMBINERCOUNT_UNIQUE_C0= 0x0010L, // c0 unique in each stage + + PS_COMBINERCOUNT_SAME_C1= 0x0000L, // c1 same in each stage + PS_COMBINERCOUNT_UNIQUE_C1= 0x0100L // c1 unique in each stage +}; + +// ========================================================================================================= +// PSRGBInputs[0-7] +// PSAlphaInputs[0-7] +// PSFinalCombinerInputsABCD +// PSFinalCombinerInputsEFG +// --------.--------.--------.----xxxx // D PS_REGISTER +// --------.--------.--------.---x---- // D PS_CHANNEL (0= RGB/BLUE, 1= ALPHA) +// --------.--------.--------.xxx----- // D PS_INPUTMAPPING +// --------.--------.----xxxx.-------- // C PS_REGISTER +// --------.--------.---x----.-------- // C PS_CHANNEL (0= RGB/BLUE, 1= ALPHA) +// --------.--------.xxx-----.-------- // C PS_INPUTMAPPING +// --------.----xxxx.--------.-------- // B PS_REGISTER +// --------.---x----.--------.-------- // B PS_CHANNEL (0= RGB/BLUE, 1= ALPHA) +// --------.xxx-----.--------.-------- // B PS_INPUTMAPPING +// ----xxxx.--------.--------.-------- // A PS_REGISTER +// ---x----.--------.--------.-------- // A PS_CHANNEL (0= RGB/BLUE, 1= ALPHA) +// xxx-----.--------.--------.-------- // A PS_INPUTMAPPING + +// examples: +// +// shader.PSRGBInputs[3]= PS_COMBINERINPUTS( +// PS_REGISTER_T0 | PS_INPUTMAPPING_EXPAND_NORMAL | PS_CHANNEL_RGB, +// PS_REGISTER_C0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_ALPHA, +// PS_REGISTER_ZERO, +// PS_REGISTER_ZERO); +// +// shader.PSFinalCombinerInputsABCD= PS_COMBINERINPUTS( +// PS_REGISTER_T0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_ALPHA, +// PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL | PS_CHANNEL_RGB, +// PS_REGISTER_EFPROD | PS_INPUTMAPPING_UNSIGNED_INVERT | PS_CHANNEL_RGB, +// PS_REGISTER_ZERO); +// +// PS_FINALCOMBINERSETTING is set in 4th field of PSFinalCombinerInputsEFG with PS_COMBINERINPUTS +// example: +// +// shader.PSFinalCombinerInputsEFG= PS_COMBINERINPUTS( +// PS_REGISTER_R0 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_RGB, +// PS_REGISTER_R1 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_RGB, +// PS_REGISTER_R1 | PS_INPUTMAPPING_UNSIGNED_IDENTITY | PS_CHANNEL_BLUE, +// PS_FINALCOMBINERSETTING_CLAMP_SUM | PS_FINALCOMBINERSETTING_COMPLEMENT_R0); + +#define PS_COMBINERINPUTS(a,b,c,d) (((a)<<24)|((b)<<16)|((c)<<8)|(d)) +// For PSFinalCombinerInputsEFG, +// a,b,c contain a value from PS_REGISTER, PS_CHANNEL, and PS_INPUTMAPPING for input E,F, and G +// d contains values from PS_FINALCOMBINERSETTING +// For all other inputs, +// a,b,c,d each contain a value from PS_REGISTER, PS_CHANNEL, and PS_INPUTMAPPING + +// The input has PS_INPUTMAPPING applied +// (Note : I don't know for sure if the max() operation mentioned above is indeed what happens, +// as there's no further documentation available on this. Native Direct3D can clamp with the +// '_sat' instruction modifier, but that's not really the same as these Xbox1 input mappings.) +// +// When the input register is PS_ZERO, the above mappings result in the following constants: +// +// PS_REGISTER_NEGATIVE_ONE (PS_INPUTMAPPING_EXPAND_NORMAL on zero) : y = -1.0 +// PS_REGISTER_NEGATIVE_ONE_HALF (PS_INPUTMAPPING_HALFBIAS_NORMAL on zero) : y = -0.5 +// PS_REGISTER_ZERO itself : y = 0.0 +// PS_REGISTER_ONE_HALF (PS_INPUTMAPPING_HALFBIAS_NEGATE on zero) : y = 0.5 +// PS_REGISTER_ONE (PS_INPUTMAPPING_UNSIGNED_INVERT on zero) : y = 1.0 +// (Note : It has no define, but PS_INPUTMAPPING_EXPAND_NEGATE on zero results in ONE too!) + +enum PS_INPUTMAPPING +{ + PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // max(0,x) = 1*max(0,x) + 0.0 OK for final combiner: y = abs(x) + PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // 1 - max(0,x) = -1*max(0,x) + 1.0 OK for final combiner: y = 1 - x + PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // 2 * max(0,x) - 1 = 2*max(0,x) - 1.0 invalid for final combiner + PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // 1 - 2 * max(0,x) = -2*max(0,x) + 1.0 invalid for final combiner + PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, // max(0,x) - 1/2 = 1*max(0,x) - 0.5 invalid for final combiner + PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // 1/2 - max(0,x) = -1*max(0,x) + 0.5 invalid for final combiner + PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // x = 1* x + 0.0 invalid for final combiner + PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // -x = -1* x + 0.0 invalid for final combiner +}; + +enum PS_REGISTER +{ + PS_REGISTER_ZERO= 0x00L, // r A.k.a. _REG_0 + PS_REGISTER_DISCARD= 0x00L, // w A.k.a. _REG_0 + PS_REGISTER_C0= 0x01L, // r A.k.a. _REG_1 + PS_REGISTER_C1= 0x02L, // r A.k.a. _REG_2 + PS_REGISTER_FOG= 0x03L, // r A.k.a. _REG_3 + PS_REGISTER_V0= 0x04L, // r/w A.k.a. _REG_4 + PS_REGISTER_V1= 0x05L, // r/w A.k.a. _REG_5 + PS_REGISTER_T0= 0x08L, // r/w A.k.a. _REG_8 + PS_REGISTER_T1= 0x09L, // r/w A.k.a. _REG_9 + PS_REGISTER_T2= 0x0aL, // r/w A.k.a. _REG_A + PS_REGISTER_T3= 0x0bL, // r/w A.k.a. _REG_B + PS_REGISTER_R0= 0x0cL, // r/w A.k.a. _REG_C + PS_REGISTER_R1= 0x0dL, // r/w A.k.a. _REG_D + PS_REGISTER_V1R0_SUM= 0x0eL, // r A.k.a. _REG_SPECLIT + PS_REGISTER_EF_PROD= 0x0fL, // r A.k.a. _REG_EF_PROD + + PS_REGISTER_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // 0x20 r OK for final combiner + PS_REGISTER_NEGATIVE_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL, // 0x40 r invalid for final combiner + PS_REGISTER_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NEGATE, // 0xa0 r invalid for final combiner + PS_REGISTER_NEGATIVE_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NORMAL, // 0x80 r invalid for final combiner + + // Cxbx extension; Separate final combiner constant registers (values not encoded on NV2A, as outside of available bits range) : + PS_REGISTER_FC0= 0x10, + PS_REGISTER_FC1= 0x11, +}; + +// FOG ALPHA is only available in final combiner +// V1R0_SUM and EF_PROD are only available in final combiner (A,B,C,D inputs only) +// V1R0_SUM_ALPHA and EF_PROD_ALPHA are not available +// R0_ALPHA is initialized to T0_ALPHA in stage0 + +enum PS_CHANNEL +{ + PS_CHANNEL_RGB= 0x00, // used as RGB source + PS_CHANNEL_BLUE= 0x00, // used as ALPHA source + PS_CHANNEL_ALPHA= 0x10, // used as RGB or ALPHA source +}; + +enum PS_FINALCOMBINERSETTING +{ + PS_FINALCOMBINERSETTING_CLAMP_SUM= 0x80, // V1+R0 sum clamped to [0,1] A.k.a. NV097_SET_COMBINER_SPECULAR_FOG_CW1_SPECULAR_CLAMP_TRUE + PS_FINALCOMBINERSETTING_COMPLEMENT_V1= 0x40, // unsigned invert mapping (1 - v1) is used as an input to the sum rather than v1 A.k.a. NV097_SET_COMBINER_SPECULAR_FOG_CW1_SPECULAR_ADD_INVERT_R5_TRUE + PS_FINALCOMBINERSETTING_COMPLEMENT_R0= 0x20, // unsigned invert mapping (1 - r0) is used as an input to the sum rather than r0 A.k.a. NV097_SET_COMBINER_SPECULAR_FOG_CW1_SPECULAR_ADD_INVERT_R12_TRUE +}; + +// ========================================================================================================= +// PSRGBOutputs[0-7] +// PSAlphaOutputs[0-7] +// --------.--------.--------.----xxxx // CD output PS_REGISTER +// --------.--------.--------.xxxx---- // AB output PS_REGISTER +// --------.--------.----xxxx.-------- // AB_CD output PS_REGISTER Note : Must be PS_REGISTER_DISCARD if either PS_COMBINEROUTPUT_AB_DOT_PRODUCT or PS_COMBINEROUTPUT_CD_DOT_PRODUCT are set +// --------.--------.---x----.-------- // PS_COMBINEROUTPUT_CD_DOT_PRODUCT (CD output 0= multiply, 1= dot product) +// --------.--------.--x-----.-------- // PS_COMBINEROUTPUT_AB_DOT_PRODUCT (AB output 0= multiply, 1= dot product) +// --------.--------.-x------.-------- // PS_COMBINEROUTPUT_AB_CD_MUX (AB_CD mux/sum select 0= sum, 1= mux) +// --------.------xx.x-------.-------- // PS_COMBINEROUTPUT_OUTPUTMAPPING +// --------.-----x--.--------.-------- // PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA +// --------.----x---.--------.-------- // PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA + +#define PS_COMBINEROUTPUTS(ab,cd,mux_sum,flags) (((flags)<<12)|((mux_sum)<<8)|((ab)<<4)|(cd)) +// ab,cd,mux_sum contain a value from PS_REGISTER +// flags contains values from PS_COMBINEROUTPUT + +enum PS_COMBINEROUTPUT_OUTPUTMAPPING +{ + PS_COMBINEROUTPUT_OUTPUTMAPPING_IDENTITY= 0x00L, // y = x + PS_COMBINEROUTPUT_OUTPUTMAPPING_BIAS= 0x08L, // y = (x - 0.5) + PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_1= 0x10L, // y = x * 2 + PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_1_BIAS= 0x18L, // y = (x - 0.5) * 2 + PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2= 0x20L, // y = x * 4 + PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2_BIAS= 0x28L, // y = (x - 0.5) * 4 Note : Cxbx inferred method; May not be supported on NV2A + PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1= 0x30L, // y = x / 2 + PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1_BIAS= 0x38L, // y = (x - 0.5) / 2 Note : Cxbx inferred method; May not be supported on NV2A +}; + +enum PS_COMBINEROUTPUT +{ + PS_COMBINEROUTPUT_CD_MULTIPLY= 0x00L, + PS_COMBINEROUTPUT_CD_DOT_PRODUCT= 0x01L, // RGB only + + PS_COMBINEROUTPUT_AB_MULTIPLY= 0x00L, + PS_COMBINEROUTPUT_AB_DOT_PRODUCT= 0x02L, // RGB only + + PS_COMBINEROUTPUT_AB_CD_SUM= 0x00L, // 3rd output is AB+CD + PS_COMBINEROUTPUT_AB_CD_MUX= 0x04L, // 3rd output is MUX(AB,CD) based on R0.a + + PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA= 0x40L, // RGB only + + PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA= 0x80L, // RGB only +}; + +// ========================================================================================================= +// PSC0Mapping +// PSC1Mapping +// --------.--------.--------.----xxxx // offset of D3D constant for stage 0 +// --------.--------.--------.xxxx---- // offset of D3D constant for stage 1 +// --------.--------.----xxxx.-------- // offset of D3D constant for stage 2 +// --------.--------.xxxx----.-------- // offset of D3D constant for stage 3 +// --------.----xxxx.--------.-------- // offset of D3D constant for stage 4 +// --------.xxxx----.--------.-------- // offset of D3D constant for stage 5 +// ----xxxx.--------.--------.-------- // offset of D3D constant for stage 6 +// xxxx----.--------.--------.-------- // offset of D3D constant for stage 7 + +#define PS_CONSTANTMAPPING(s0,s1,s2,s3,s4,s5,s6,s7) \ + (((DWORD)(s0)&0xf)<< 0) | (((DWORD)(s1)&0xf)<< 4) | \ + (((DWORD)(s2)&0xf)<< 8) | (((DWORD)(s3)&0xf)<<12) | \ + (((DWORD)(s4)&0xf)<<16) | (((DWORD)(s5)&0xf)<<20) | \ + (((DWORD)(s6)&0xf)<<24) | (((DWORD)(s7)&0xf)<<28) +// s0-s7 contain the offset of the D3D constant that corresponds to the +// c0 or c1 constant in stages 0 through 7. These mappings are only used in +// SetPixelShaderConstant(). + +// ========================================================================================================= +// PSFinalCombinerConstants +// --------.--------.--------.----xxxx // offset of D3D constant for C0 +// --------.--------.--------.xxxx---- // offset of D3D constant for C1 +// --------.--------.-------x.-------- // Adjust texture flag + +#define PS_FINALCOMBINERCONSTANTS(c0,c1,flags) (((DWORD)(flags) << 8) | ((DWORD)(c0)&0xf)<< 0) | (((DWORD)(c1)&0xf)<< 4) +// c0 and c1 contain the offset of the D3D constant that corresponds to the +// constants in the final combiner. These mappings are only used in +// SetPixelShaderConstant(). Flags contains values from PS_GLOBALFLAGS + +enum PS_GLOBALFLAGS +{ + // if this flag is set, the texture mode for each texture stage is adjusted as follows: + // if set texture is a cubemap, + // change PS_TEXTUREMODES_PROJECT2D to PS_TEXTUREMODES_CUBEMAP + // change PS_TEXTUREMODES_PROJECT3D to PS_TEXTUREMODES_CUBEMAP + // change PS_TEXTUREMODES_DOT_STR_3D to PS_TEXTUREMODES_DOT_STR_CUBE + // if set texture is a volume texture, + // change PS_TEXTUREMODES_PROJECT2D to PS_TEXTUREMODES_PROJECT3D + // change PS_TEXTUREMODES_CUBEMAP to PS_TEXTUREMODES_PROJECT3D + // change PS_TEXTUREMODES_DOT_STR_CUBE to PS_TEXTUREMODES_DOT_STR_3D + // if set texture is neither cubemap or volume texture, + // change PS_TEXTUREMODES_PROJECT3D to PS_TEXTUREMODES_PROJECT2D + // change PS_TEXTUREMODES_CUBEMAP to PS_TEXTUREMODES_PROJECT2D + + PS_GLOBALFLAGS_NO_TEXMODE_ADJUST= 0x0000L, // don't adjust texture modes + PS_GLOBALFLAGS_TEXMODE_ADJUST= 0x0001L, // adjust texture modes according to set texture +}; + + +constexpr int PSH_XBOX_MAX_C_REGISTER_COUNT = 16; +constexpr int PSH_XBOX_MAX_R_REGISTER_COUNT = 2; +constexpr int PSH_XBOX_MAX_T_REGISTER_COUNT = 4; +constexpr int PSH_XBOX_MAX_V_REGISTER_COUNT = 2; + + +struct RPSRegisterObject { + PS_REGISTER Reg; + + void Decode(uint8_t Value); +}; + +struct RPSInputRegister : RPSRegisterObject { + PS_CHANNEL Channel; + PS_INPUTMAPPING InputMapping; + + void Decode(uint8_t Value, unsigned stage_nr, bool isRGB); +}; + +struct RPSCombinerOutput : RPSRegisterObject { + RPSInputRegister Input[2]; // Called Input A and B, or C and D (depending if it's inside the AB or CD combiner) + bool DotProduct; // False=Multiply, True=DotProduct + unsigned BlueToAlpha; // 0=Alpha-to-Alpha, 1=Blue-to-Alpha + + void Decode(uint8_t Value, uint16_t PSInputs, unsigned stage_nr, bool isRGB); +}; + +struct RPSCombinerStageChannel { + RPSCombinerOutput OutputCD; // Contains InputC and InputD (as Input1 and Input2) + RPSCombinerOutput OutputAB; // Contains InputA and InputB (as Input1 and Input2) + RPSRegisterObject OutputMUX_SUM; + bool AB_CD_MUX; // False=AB+CD, True=MUX(AB,CD) based on R0.a + PS_COMBINEROUTPUT CombinerOutputMapping; + + void Decode(uint32_t PSInputs, uint32_t PSOutputs, unsigned stage_nr, bool isRGB); +}; + +struct RPSCombinerStage { + RPSCombinerStageChannel RGB; + RPSCombinerStageChannel Alpha; +}; + +struct RPSFinalCombiner { + RPSInputRegister Input[7]; + bool ComplementV1; + bool ComplementR0; + bool ClampSum; + + void Decode(const uint32_t PSFinalCombinerInputsABCD, const uint32_t PSFinalCombinerInputsEFG); +}; + +struct DecodedRegisterCombiner { + PS_TEXTUREMODES PSTextureModes[xbox::X_D3DTS_STAGECOUNT]; + PS_DOTMAPPING PSDotMapping[xbox::X_D3DTS_STAGECOUNT]; + bool PSCompareMode[xbox::X_D3DTS_STAGECOUNT][4]; // True in [0] = PS_COMPAREMODE_S_GE, [1] = PS_COMPAREMODE_T_GE, [2] = PS_COMPAREMODE_R_GE, [3] PS_COMPAREMODE_Q_GE (so, STRQ>0, otherwise <0) + int PSInputTexture[xbox::X_D3DTS_STAGECOUNT]; + + bool CombinerMuxesOnMsb; + bool CombinerHasUniqueC0; + bool CombinerHasUniqueC1; + unsigned NumberOfCombiners; + RPSCombinerStage Combiners[xbox::X_PSH_COMBINECOUNT]; + bool hasFinalCombiner; + RPSFinalCombiner FinalCombiner; + bool TexModeAdjust; + + static void GetPSTextureModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]); + static void GetPSDotMapping(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_DOTMAPPING psDotMapping[xbox::X_D3DTS_STAGECOUNT]); + static void GetPSCompareModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, bool psCompareModes[xbox::X_D3DTS_STAGECOUNT][4]); + static void GetPSInputTexture(xbox::X_D3DPIXELSHADERDEF* pPSDef, int psInputTexture[xbox::X_D3DTS_STAGECOUNT]); + void Decode(xbox::X_D3DPIXELSHADERDEF *pPSDef); +}; + // PatrickvL's Dxbx pixel shader translation void DxbxUpdateActivePixelShader(); // NOPATCH diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index baa682fb1..9adc3e3b7 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -128,17 +128,14 @@ static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf) // TODO : unsigned offset = 0; DWORD position = (xboxFvf & X_D3DFVF_POSITION_MASK); switch (position) { - case 0: nrPositionFloats = 0; LOG_TEST_CASE("FVF without position"); break; // Note : Remove logging if this occurs often - case X_D3DFVF_XYZ: /*nrPositionFloats is set to 3 by default*/ break; - case X_D3DFVF_XYZRHW: - g_Xbox_VertexShader_ForFVF.Flags |= X_VERTEXSHADER_FLAG_PASSTHROUGH; - nrPositionFloats = 4; - break; - case X_D3DFVF_XYZB1: nrBlendWeights = 1; break; - case X_D3DFVF_XYZB2: nrBlendWeights = 2; break; - case X_D3DFVF_XYZB3: nrBlendWeights = 3; break; - case X_D3DFVF_XYZB4: nrBlendWeights = 4; break; - case X_D3DFVF_POSITION_MASK: /*Keep nrPositionFloats set to 3*/ LOG_TEST_CASE("FVF invalid (5th blendweight?)"); break; + case 0: nrPositionFloats = 0; LOG_TEST_CASE("FVF without position"); break; // Note : Remove logging if this occurs often + case X_D3DFVF_XYZ: /*nrPositionFloats is set to 3 by default*/ break; + case X_D3DFVF_XYZRHW: nrPositionFloats = 4; g_Xbox_VertexShader_ForFVF.Flags |= X_VERTEXSHADER_FLAG_PASSTHROUGH; break; + case X_D3DFVF_XYZB1: nrBlendWeights = 1; break; + case X_D3DFVF_XYZB2: nrBlendWeights = 2; break; + case X_D3DFVF_XYZB3: nrBlendWeights = 3; break; + case X_D3DFVF_XYZB4: nrBlendWeights = 4; break; + case X_D3DFVF_POSITION_MASK: /*Keep nrPositionFloats set to 3*/ LOG_TEST_CASE("FVF invalid (5th blendweight?)"); break; DEFAULT_UNREACHABLE; } @@ -159,6 +156,7 @@ static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf) // TODO : offset += sizeof(float) * nrBlendWeights; } } + else if (nrBlendWeights > 0) LOG_TEST_CASE("BlendWeights given without position?"); // Write Normal, Diffuse, and Specular if (xboxFvf & X_D3DFVF_NORMAL) { @@ -194,18 +192,23 @@ static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf) // TODO : LOG_TEST_CASE("Limiting FVF to 4 textures"); textureCount = 4; // Safeguard, since the X_D3DFVF_TEXCOUNT bitfield could contain invalid values (5 up to 15) } + for (int i = 0; i < textureCount; i++) { - int numberOfCoordinates = 0; auto FVFTextureFormat = (xboxFvf >> X_D3DFVF_TEXCOORDSIZE_SHIFT(i)) & 0x003; - switch (FVFTextureFormat) { - case X_D3DFVF_TEXTUREFORMAT1: numberOfCoordinates = 1; break; - case X_D3DFVF_TEXTUREFORMAT2: numberOfCoordinates = 2; break; - case X_D3DFVF_TEXTUREFORMAT3: numberOfCoordinates = 3; break; - case X_D3DFVF_TEXTUREFORMAT4: numberOfCoordinates = 4; break; - DEFAULT_UNREACHABLE; +#if 1 + int numberOfCoordinates = ((FVFTextureFormat + 1) & 3) + 1; +#else + int numberOfCoordinates = 0; + switch (FVFTextureFormat) { // Note : Below enums are not ordered; In a math expression mapped as : + case X_D3DFVF_TEXTUREFORMAT1: numberOfCoordinates = 1; break; // input = 3 -> 4 -> 0 -> 1 = output + case X_D3DFVF_TEXTUREFORMAT2: numberOfCoordinates = 2; break; // input = 0 -> 1 -> 1 -> 2 = output + case X_D3DFVF_TEXTUREFORMAT3: numberOfCoordinates = 3; break; // input = 1 -> 2 -> 2 -> 3 = output + case X_D3DFVF_TEXTUREFORMAT4: numberOfCoordinates = 4; break; // input = 2 -> 3 -> 3 -> 4 = output + DEFAULT_UNREACHABLE; // ((input +1 ) &3 ) +1 ) = output } assert(numberOfCoordinates > 0); +#endif pSlot = &declaration.Slots[X_D3DVSDE_TEXCOORD0 + i]; pSlot->Format = X_D3DVSDT_FLOAT[numberOfCoordinates]; pSlot->Offset = offset; From 4517f364563dfe58a58af58fa5d731d15850bcc0 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Wed, 25 Nov 2020 17:09:41 +0100 Subject: [PATCH 03/47] Unrelated to PS : UpdateFixedFunctionVertexShaderState now derives TexCoordComponentCount from GetXboxVertexAttributeFormat, which makes it honor overrides. This could fix corrupted textures when Fixed Function HLSL is used together with VertexStream override modes. --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 16b19e6d6..eda935d60 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -6413,6 +6413,7 @@ float AsFloat(uint32_t value) { void UpdateFixedFunctionVertexShaderState() { + extern xbox::X_VERTEXATTRIBUTEFORMAT* GetXboxVertexAttributeFormat(); // TMP glue using namespace xbox; // Vertex blending @@ -6514,7 +6515,7 @@ void UpdateFixedFunctionVertexShaderState() } // Texture state - for (int i = 0; i < 4; i++) { + for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { auto transformFlags = XboxTextureStates.Get(i, X_D3DTSS_TEXTURETRANSFORMFLAGS); ffShaderState.TextureStates[i].TextureTransformFlagsCount = (float)(transformFlags & ~D3DTTFF_PROJECTED); ffShaderState.TextureStates[i].TextureTransformFlagsProjected = (float)(transformFlags & D3DTTFF_PROJECTED); @@ -6524,9 +6525,14 @@ void UpdateFixedFunctionVertexShaderState() ffShaderState.TextureStates[i].TexCoordIndexGen = (float)(texCoordIndex >> 16); // D3DTSS_TCI flags } - // TexCoord component counts - extern xbox::X_VERTEXATTRIBUTEFORMAT* GetXboxVertexAttributeFormat(); // TMP glue + // Read current TexCoord component counts xbox::X_VERTEXATTRIBUTEFORMAT* pXboxVertexAttributeFormat = GetXboxVertexAttributeFormat(); + // Note : There seem to be other ways to access this, but we can use only this one; + // This, because CxbxGetVertexDeclaration() can't be used, since it doesn't track VertexAttributes + // (plus, it contains the overhead of shader lookup). + // Another, GetXboxVertexShader(), can't be used, because it doesn't honor vertex attribute overrides + // like those that apply for g_InlineVertexBuffer_DeclarationOverride and active SetVertexShaderInput. + // Also, the xbox::X_D3DVertexShader.Dimensionality[] field contains somewhat strange values. for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { auto vertexDataFormat = pXboxVertexAttributeFormat->Slots[xbox::X_D3DVSDE_TEXCOORD0 + i].Format; ffShaderState.TexCoordComponentCount[i] = (float)GetXboxVertexDataComponentCount(vertexDataFormat); From b72b52a4a248c1b0584650164edb29978b310512 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Wed, 25 Nov 2020 17:10:05 +0100 Subject: [PATCH 04/47] RenderState mapping table review remarks (nothing functional) --- src/core/hle/D3D8/XbConvert.cpp | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/core/hle/D3D8/XbConvert.cpp b/src/core/hle/D3D8/XbConvert.cpp index 8c55acae4..36899b2d7 100644 --- a/src/core/hle/D3D8/XbConvert.cpp +++ b/src/core/hle/D3D8/XbConvert.cpp @@ -1339,14 +1339,23 @@ void EmuUnswizzleBox } // EmuUnswizzleBox NOPATCH // Notes : -// * most renderstates were introduced in the (lowest known) XDK version : 3424 -// * additional renderstates were introduced between 3434 and 4627 -// * we MUST list exact versions for each of those, since their inserts impacts mapping! -// * renderstates were finalized in 4627 (so no change after that version) -// * renderstates after D3DRS_MULTISAMPLEMASK have no host mapping, thus no impact -// * D3DRS_MULTISAMPLETYPE seems the only renderstate that got removed (after 3944, before 4039) -// * all renderstates marked 3424 are also verified present in 3944 -const RenderStateInfo DxbxRenderStateInfo[] = { +// * Most renderstates were introduced in the (lowest known) XDK version : 3424 +// * Some titles use XDK version 3911 +// * The lowest XDK version that has been verified is : 3944 +// * All renderstates marked 3424 are also verified to be present in 3944 +// * Twenty-three additional renderstates were introduced after 3944 and up to 4627; +// * D3DRS_DEPTHCLIPCONTROL, D3DRS_STIPPLEENABLE, D3DRS_SIMPLE_UNUSED8..D3DRS_SIMPLE_UNUSED1, +// * D3DRS_SWAPFILTER, D3DRS_PRESENTATIONINTERVAL, D3DRS_DEFERRED_UNUSED8..D3DRS_DEFERRED_UNUSED1, +// * D3DRS_MULTISAMPLEMODE, D3DRS_MULTISAMPLERENDERTARGETMODE, and D3DRS_SAMPLEALPHA +// * One renderstate, D3DRS_MULTISAMPLETYPE, was removed (after 3944, before 4039, perhaps even 4034) +// * Around when D3DRS_MULTISAMPLETYPE was removed, D3DRS_MULTISAMPLEMODE was introduced (after 3944, before or at 4039, perhaps even 4034) +// * We MUST list exact versions for all above mentioned renderstates, since their inserts impacts mapping! +// * Renderstates verified to be introduced at 4039 or earlier, may have been introduced at 4034 or earlier +// * Renderstates were finalized in 4627 (so no change after that version) +// * XDK versions that have been verified : 3944, 4039, 4134, 4242, 4361, 4432, 4531, 4627, 4721, 4831, 4928, 5028, 5120, 5233, 5344, 5455, 5558, 5659, 5788, 5849, 5933 +// * Renderstates with uncertain validity are marked "Verified absent in #XDK#" and/or "present in #XDK#". Some have "Might be introduced "... "in between" or "around #XDK#" +// * Renderstates after D3DRS_MULTISAMPLEMASK have no host DX9 D3DRS mapping, thus no impact +const RenderStateInfo DxbxRenderStateInfo[1+xbox::X_D3DRS_DONOTCULLUNCOMPRESSED] = { // String Ord Version Type Method Native { "D3DRS_PSALPHAINPUTS0" /*= 0*/, 3424, xtDWORD, NV2A_RC_IN_ALPHA(0) }, From 29905522307138438328943e8ba9e561ed44ffb8 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Sat, 28 Nov 2020 16:48:07 +0100 Subject: [PATCH 05/47] Fixes after rebase --- src/core/hle/D3D8/XbPixelShader.cpp | 213 +++++++++++++--------------- 1 file changed, 99 insertions(+), 114 deletions(-) diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index ca79697bf..f8dd09154 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -383,7 +383,7 @@ typedef struct s_CxbxPSDef { bool RenderStateFogEnable; bool RenderStateSpecularEnable; - bool IsEquivalent(s_CxbxPSDef &Another) + bool IsEquivalent(const s_CxbxPSDef &Another) { // Only compare the [*]-marked members, which forms the unique shader declaration (ignore the constants and most Xbox Direct3D8 run-time fields) : // [*] DWORD PSAlphaInputs[8]; // X_D3DRS_PSALPHAINPUTS0..X_D3DRS_PSALPHAINPUTS7 : Alpha inputs for each stage @@ -613,15 +613,6 @@ constexpr int PSH_XBOX_CONSTANT_MAX = PSH_XBOX_CONSTANT_LUM + 4; // = 27 void DxbxUpdateActivePixelShader() // NOPATCH { - xbox::X_D3DPIXELSHADERDEF *pPSDef; - PSH_RECOMPILED_SHADER *RecompiledPixelShader; - IDirect3DPixelShader *CurrentPixelShader = nullptr; - int i; - D3DCOLOR dwColor; - D3DXCOLOR fColor[PSH_XBOX_CONSTANT_MAX]; - - HRESULT Result = D3D_OK; - // The first RenderState is PSAlpha, // The pixel shader is stored in pDevice->m_pPixelShader // For now, we still patch SetPixelShader and read from there... @@ -634,118 +625,112 @@ void DxbxUpdateActivePixelShader() // NOPATCH // manually read from D3D__RenderState[X_D3DRS_PSTEXTUREMODES] for that one field. // See D3DDevice_SetPixelShaderCommon which implements this - pPSDef = g_pXbox_PixelShader != nullptr ? (xbox::X_D3DPIXELSHADERDEF*)(XboxRenderStates.GetPixelShaderRenderStatePointer()) : nullptr; + const xbox::X_D3DPIXELSHADERDEF *pPSDef = g_pXbox_PixelShader != nullptr ? (xbox::X_D3DPIXELSHADERDEF*)(XboxRenderStates.GetPixelShaderRenderStatePointer()) : nullptr; if (pPSDef == nullptr) { g_pD3DDevice->SetPixelShader(nullptr); return; } - // Create a copy of the pixel shader definition, as it is residing in render state register slots : - CxbxPSDef CompletePSDef; - CompletePSDef.PSDef = *pPSDef; - // Copy-in the PSTextureModes value which is stored outside the range of Xbox pixel shader render state slots : - CompletePSDef.PSDef.PSTextureModes = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES); - // Fetch all other values that are used in the IsEquivalent check : - CompletePSDef.SnapshotRuntimeVariables(); + // Create a copy of the pixel shader definition, as it is residing in render state register slots : + CxbxPSDef CompletePSDef; + CompletePSDef.PSDef = *pPSDef; + // Copy-in the PSTextureModes value which is stored outside the range of Xbox pixel shader render state slots : + CompletePSDef.PSDef.PSTextureModes = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSTEXTUREMODES); + // Fetch all other values that are used in the IsEquivalent check : + CompletePSDef.SnapshotRuntimeVariables(); - // Now, see if we already have a shader compiled for this definition : - // TODO : Change g_RecompiledPixelShaders into an unordered_map, hash just the identifying PSDef members, and add cache eviction (clearing host resources when pruning) - RecompiledPixelShader = nullptr; - for (auto it = g_RecompiledPixelShaders.begin(); it != g_RecompiledPixelShaders.end(); ++it) { - if (CompletePSDef.IsEquivalent(it->CompletePSDef)) { - RecompiledPixelShader = &(*it); - break; - } - } - - // If none was found, recompile this shader and remember it : - if (RecompiledPixelShader == nullptr) { - // Recompile this pixel shader : - g_RecompiledPixelShaders.push_back(CxbxRecompilePixelShader(CompletePSDef)); - RecompiledPixelShader = &g_RecompiledPixelShaders.back(); + // Now, see if we already have a shader compiled for this definition : + // TODO : Change g_RecompiledPixelShaders into an unordered_map, hash just the identifying PSDef members, and add cache eviction (clearing host resources when pruning) + const PSH_RECOMPILED_SHADER* RecompiledPixelShader = nullptr; + for (const auto& it : g_RecompiledPixelShaders) { + if (CompletePSDef.IsEquivalent(it.CompletePSDef)) { + RecompiledPixelShader = ⁢ + break; } + } - // Switch to the converted pixel shader (if it's any different from our currently active - // pixel shader, to avoid many unnecessary state changes on the local side). - g_pD3DDevice->GetPixelShader(/*out*/&CurrentPixelShader); - if (CurrentPixelShader != RecompiledPixelShader->ConvertedPixelShader) { - g_pD3DDevice->SetPixelShader(RecompiledPixelShader->ConvertedPixelShader); - } - if (CurrentPixelShader) { - CurrentPixelShader->Release(); - CurrentPixelShader = nullptr; + // If none was found, recompile this shader and remember it : + if (RecompiledPixelShader == nullptr) { + // Recompile this pixel shader : + g_RecompiledPixelShaders.push_back(CxbxRecompilePixelShader(CompletePSDef)); + RecompiledPixelShader = &g_RecompiledPixelShaders.back(); + } + + // Switch to the converted pixel shader (if it's any different from our currently active + // pixel shader, to avoid many unnecessary state changes on the local side). + Microsoft::WRL::ComPtr CurrentPixelShader; + g_pD3DDevice->GetPixelShader(/*out*/CurrentPixelShader.GetAddressOf()); + if (CurrentPixelShader.Get() != RecompiledPixelShader->ConvertedPixelShader) { + g_pD3DDevice->SetPixelShader(RecompiledPixelShader->ConvertedPixelShader); + } + + //PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]; + //PSH_XBOX_SHADER::GetPSTextureModes(pPSDef, psTextureModes); + // + //for (i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) + //{ + // switch (psTextureModes[i]) + // { + // default: + // break; + // } + //} + + // Set constants, not based on g_PixelShaderConstants, but based on + // the render state slots containing the pixel shader constants, + // as these could have been updated via SetRenderState or otherwise : + D3DXCOLOR fColor[PSH_XBOX_CONSTANT_MAX]; + for (int i = 0; i < PSH_XBOX_CONSTANT_MAX; i++) { + // Assume all constants are in use (this is much easier than tracking them for no other purpose than to skip a few here) + // Read the color from the corresponding render state slot : + switch (i) { + case PSH_XBOX_CONSTANT_FOG: + // Note : FOG.RGB is correct like this, but FOG.a should be coming + // from the vertex shader (oFog) - however, D3D8 does not forward this... + fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGCOLOR); + break; + case PSH_XBOX_CONSTANT_FC0: + fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT0); + break; + case PSH_XBOX_CONSTANT_FC1: + fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT1); + break; + case PSH_XBOX_CONSTANT_BEM + 0: + case PSH_XBOX_CONSTANT_BEM + 1: + case PSH_XBOX_CONSTANT_BEM + 2: + case PSH_XBOX_CONSTANT_BEM + 3: + { + int stage_nr = i - PSH_XBOX_CONSTANT_BEM; + DWORD* value = (DWORD*)&fColor[i];; // Note : This overlays D3DXCOLOR's FLOAT r, g, b, a + + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT00, &value[0]); // Maps to BEM[stage].x + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT01, &value[1]); // Maps to BEM[stage].y + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT10, &value[2]); // Maps to BEM[stage].z + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT11, &value[3]); // Maps to BEM[stage].w + // Note : The TSS values being read here, have been transfered from Xbox to host in XboxTextureStateConverter::Apply() + break; + } + case PSH_XBOX_CONSTANT_LUM + 0: + case PSH_XBOX_CONSTANT_LUM + 1: + case PSH_XBOX_CONSTANT_LUM + 2: + case PSH_XBOX_CONSTANT_LUM + 3: + { + int stage_nr = i - PSH_XBOX_CONSTANT_LUM; + DWORD* value = (DWORD*)&fColor[i]; // Note : This overlays D3DXCOLOR's FLOAT r, g, b, a + + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVLSCALE, &value[0]); // Maps to LUM[stage].x + g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVLOFFSET, &value[1]); // Maps to LUM[stage].y + value[2] = 0; + value[3] = 0; + break; + } + default: // PSH_XBOX_CONSTANT_C0..C15 are stored as-is in (and should thus be read from) the Xbox render state pixel shader constant slots + unsigned constant_nr = i - PSH_XBOX_CONSTANT_C0; + fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSCONSTANT0_0 + constant_nr); // Note : 0xAARRGGBB format + break; } + } - //PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]; - //PSH_XBOX_SHADER::GetPSTextureModes(pPSDef, psTextureModes); - // - //for (i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) - //{ - // switch (psTextureModes[i]) - // { - // case PS_TEXTUREMODES_BUMPENVMAP: - // g_pD3DDevice->SetTextureStageState(i, D3DTSS_COLOROP, D3DTOP_BUMPENVMAP); - // break; - // case PS_TEXTUREMODES_BUMPENVMAP_LUM: - // g_pD3DDevice->SetTextureStageState(i, D3DTSS_COLOROP, D3DTOP_BUMPENVMAPLUMINANCE); - // break; - // default: - // break; - // } - //} - - // Set constants, not based on g_PixelShaderConstants, but based on - // the render state slots containing the pixel shader constants, - // as these could have been updated via SetRenderState or otherwise : - for (i = 0; i < PSH_XBOX_CONSTANT_MAX; i++) { - // Assume all constants are in use (this is much easier than tracking them for no other purpose than to skip a few here) - // Read the color from the corresponding render state slot : - switch (i) { - case PSH_XBOX_CONSTANT_FOG: - // Note : FOG.RGB is correct like this, but FOG.a should be coming - // from the vertex shader (oFog) - however, D3D8 does not forward this... - fColor[i] = dwColor = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGCOLOR); - break; - case PSH_XBOX_CONSTANT_FC0: - fColor[i] = dwColor = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT0); - break; - case PSH_XBOX_CONSTANT_FC1: - fColor[i] = dwColor = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT1); - break; - case PSH_XBOX_CONSTANT_BEM + 0: - case PSH_XBOX_CONSTANT_BEM + 1: - case PSH_XBOX_CONSTANT_BEM + 2: - case PSH_XBOX_CONSTANT_BEM + 3: - { - int stage_nr = i - PSH_XBOX_CONSTANT_BEM; - DWORD* value = (DWORD*)&fColor[i]; - - g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT00, &value[0]); // Maps to BEM[stage].x - g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT01, &value[1]); // Maps to BEM[stage].y - g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT10, &value[2]); // Maps to BEM[stage].z - g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVMAT11, &value[3]); // Maps to BEM[stage].w - break; - } - case PSH_XBOX_CONSTANT_LUM + 0: - case PSH_XBOX_CONSTANT_LUM + 1: - case PSH_XBOX_CONSTANT_LUM + 2: - case PSH_XBOX_CONSTANT_LUM + 3: - { - int stage_nr = i - PSH_XBOX_CONSTANT_LUM; - DWORD* value = (DWORD*)&fColor[i]; - g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVLSCALE, &value[0]); // Maps to LUM[stage].x - g_pD3DDevice->GetTextureStageState(stage_nr, D3DTSS_BUMPENVLOFFSET, &value[1]); // Maps to LUM[stage].y - value[2] = 0; - value[3] = 0; - break; - } - default: // PSH_XBOX_CONSTANT_C0..C15 are stored as-is in (and should thus be read from) the Xbox render state pixel shader constant slots - unsigned constant_nr = i - PSH_XBOX_CONSTANT_C0; - fColor[i] = dwColor = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSCONSTANT0_0 + constant_nr); // Note : 0xAARRGGBB format - break; - } - } - - // Set all host constant values using a single call: - g_pD3DDevice->SetPixelShaderConstantF(0, (PixelShaderConstantType*)(&fColor[0]), PSH_XBOX_CONSTANT_MAX); + // Set all host constant values using a single call: + g_pD3DDevice->SetPixelShaderConstantF(0, reinterpret_cast(fColor), PSH_XBOX_CONSTANT_MAX); } From 29017db910d2c779061ff505adfa53ea1cc90732 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Sat, 28 Nov 2020 21:51:36 +0100 Subject: [PATCH 06/47] In DxbxUpdateActivePixelShader, transfer values step by step, instead of using a switch/case loop. Also, working towards removing host SetTextureStageState calls (doesn't work yet). --- src/core/hle/D3D8/Direct3D9/TextureStates.cpp | 5 ++ src/core/hle/D3D8/XbPixelShader.cpp | 54 +++++++++++++------ 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/TextureStates.cpp b/src/core/hle/D3D8/Direct3D9/TextureStates.cpp index 0a0a07f64..f75714d34 100644 --- a/src/core/hle/D3D8/Direct3D9/TextureStates.cpp +++ b/src/core/hle/D3D8/Direct3D9/TextureStates.cpp @@ -275,9 +275,13 @@ void XboxTextureStateConverter::Apply() case xbox::X_D3DTSS_COLORARG0: case xbox::X_D3DTSS_COLORARG1: case xbox::X_D3DTSS_COLORARG2: case xbox::X_D3DTSS_ALPHAARG0: case xbox::X_D3DTSS_ALPHAARG1: case xbox::X_D3DTSS_ALPHAARG2: case xbox::X_D3DTSS_RESULTARG: case xbox::X_D3DTSS_TEXTURETRANSFORMFLAGS: + break; case xbox::X_D3DTSS_BUMPENVMAT00: case xbox::X_D3DTSS_BUMPENVMAT01: case xbox::X_D3DTSS_BUMPENVMAT11: case xbox::X_D3DTSS_BUMPENVMAT10: case xbox::X_D3DTSS_BUMPENVLSCALE: case xbox::X_D3DTSS_BUMPENVLOFFSET: +#if 0 // New, doesn't work yet + continue; // Note : Since DxbxUpdateActivePixelShader() reads these too, you'd expect here we could skip, but alas. TODO: Fix PS HLSL to not depend on host D3D TSS +#endif case xbox::X_D3DTSS_BORDERCOLOR: case xbox::X_D3DTSS_MIPMAPLODBIAS: case xbox::X_D3DTSS_MAXMIPLEVEL: case xbox::X_D3DTSS_MAXANISOTROPY: break; @@ -336,5 +340,6 @@ uint32_t XboxTextureStateConverter::Get(int textureStage, DWORD xboxState) { if (xboxState < xbox::X_D3DTSS_FIRST || xboxState > xbox::X_D3DTSS_LAST) CxbxKrnlCleanup("Requested texture state was out of range: %d", xboxState); + // Read the value of the current stage/state from the Xbox data structure return D3D__TextureState[(textureStage * xbox::X_D3DTS_STAGESIZE) + XboxTextureStateOffsets[xboxState]]; } diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index f8dd09154..e9c55143c 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -50,10 +50,13 @@ #include #include "Direct3D9\RenderStates.h" // For XboxRenderStateConverter +#include "Direct3D9\TextureStates.h" // For XboxTextureStateConverter #include extern XboxRenderStateConverter XboxRenderStates; // Declared in Direct3D9.cpp +extern XboxTextureStateConverter XboxTextureStates; // Declared in Direct3D9.cpp + #define DbgPshPrintf \ LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) \ @@ -680,21 +683,39 @@ void DxbxUpdateActivePixelShader() // NOPATCH // the render state slots containing the pixel shader constants, // as these could have been updated via SetRenderState or otherwise : D3DXCOLOR fColor[PSH_XBOX_CONSTANT_MAX]; + + // PSH_XBOX_CONSTANT_C0..C15 are stored as-is in (and should thus be read from) the Xbox render state pixel shader constant slots + for (unsigned constant_nr = 0; constant_nr < 16; constant_nr++) { + fColor[PSH_XBOX_CONSTANT_C0 + constant_nr] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSCONSTANT0_0 + constant_nr); // Note : 0xAARRGGBB format + } + + fColor[PSH_XBOX_CONSTANT_FC0] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT0); + fColor[PSH_XBOX_CONSTANT_FC1] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT1); + + // Fog requires a constant (as host PS1.4 doesn't support the FOG register) + // Note : FOG.RGB is correct like this, but FOG.a should be coming + // from the vertex shader (oFog) - however, D3D8 does not forward this... + fColor[PSH_XBOX_CONSTANT_FOG] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGCOLOR); +#if 0 // New, doesn't work yet + // Bump Environment Material registers + for (int stage_nr = 0; stage_nr < xbox::X_D3DTS_STAGECOUNT; stage_nr++) { + // Note : No loop, because X_D3DTSS_BUMPENVMAT11 and X_D3DTSS_BUMPENVMAT10 are swapped + fColor[PSH_XBOX_CONSTANT_BEM + stage_nr].r = XboxTextureStates.Get(stage_nr, xbox::X_D3DTSS_BUMPENVMAT00); // Maps to BEM[stage].x + fColor[PSH_XBOX_CONSTANT_BEM + stage_nr].g = XboxTextureStates.Get(stage_nr, xbox::X_D3DTSS_BUMPENVMAT01); // Maps to BEM[stage].y + fColor[PSH_XBOX_CONSTANT_BEM + stage_nr].b = XboxTextureStates.Get(stage_nr, xbox::X_D3DTSS_BUMPENVMAT10); // Maps to BEM[stage].z + fColor[PSH_XBOX_CONSTANT_BEM + stage_nr].a = XboxTextureStates.Get(stage_nr, xbox::X_D3DTSS_BUMPENVMAT11); // Maps to BEM[stage].w + } + + // Bump map Luminance registers + for (int stage_nr = 0; stage_nr < xbox::X_D3DTS_STAGECOUNT; stage_nr++) { + fColor[PSH_XBOX_CONSTANT_LUM + stage_nr].r = XboxTextureStates.Get(stage_nr, xbox::X_D3DTSS_BUMPENVLSCALE); // Maps to LUM[stage].x + fColor[PSH_XBOX_CONSTANT_LUM + stage_nr].g = XboxTextureStates.Get(stage_nr, xbox::X_D3DTSS_BUMPENVLOFFSET); // Maps to LUM[stage].y + fColor[PSH_XBOX_CONSTANT_LUM + stage_nr].b = 0; + fColor[PSH_XBOX_CONSTANT_LUM + stage_nr].a = 0; + } +#else for (int i = 0; i < PSH_XBOX_CONSTANT_MAX; i++) { - // Assume all constants are in use (this is much easier than tracking them for no other purpose than to skip a few here) - // Read the color from the corresponding render state slot : switch (i) { - case PSH_XBOX_CONSTANT_FOG: - // Note : FOG.RGB is correct like this, but FOG.a should be coming - // from the vertex shader (oFog) - however, D3D8 does not forward this... - fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGCOLOR); - break; - case PSH_XBOX_CONSTANT_FC0: - fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT0); - break; - case PSH_XBOX_CONSTANT_FC1: - fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSFINALCOMBINERCONSTANT1); - break; case PSH_XBOX_CONSTANT_BEM + 0: case PSH_XBOX_CONSTANT_BEM + 1: case PSH_XBOX_CONSTANT_BEM + 2: @@ -724,13 +745,12 @@ void DxbxUpdateActivePixelShader() // NOPATCH value[3] = 0; break; } - default: // PSH_XBOX_CONSTANT_C0..C15 are stored as-is in (and should thus be read from) the Xbox render state pixel shader constant slots - unsigned constant_nr = i - PSH_XBOX_CONSTANT_C0; - fColor[i] = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_PSCONSTANT0_0 + constant_nr); // Note : 0xAARRGGBB format - break; } } +#endif + // Assume all constants are in use (this is much easier than tracking them for no other purpose than to skip a few here) + // Read the color from the corresponding render state slot : // Set all host constant values using a single call: g_pD3DDevice->SetPixelShaderConstantF(0, reinterpret_cast(fColor), PSH_XBOX_CONSTANT_MAX); } From 4af986f0580b3c28b0bb23b50b7f63d52a196fdf Mon Sep 17 00:00:00 2001 From: patrickvl Date: Sun, 29 Nov 2020 18:08:13 +0100 Subject: [PATCH 07/47] No time to test, but here's alphakill support in PS HLSL --- .../Direct3D9/CxbxPixelShaderTemplate.hlsl | 88 +++++++++++-------- src/core/hle/D3D8/Direct3D9/PixelShader.cpp | 6 ++ src/core/hle/D3D8/XbPixelShader.cpp | 13 +++ src/core/hle/D3D8/XbPixelShader.h | 2 + 4 files changed, 74 insertions(+), 35 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl index 2baf6abf2..718efd574 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -226,26 +226,44 @@ float m21(const float input) // Declare one sampler per each {Sampler Type, Texture Stage} combination // TODO : Generate sampler status? -sampler2D _sampler2D_0; -sampler2D _sampler2D_1; -sampler2D _sampler2D_2; -sampler2D _sampler2D_3; +sampler2D _sampler2D[4]; +sampler3D _sampler3D[4]; +samplerCUBE _sampler6F[4]; -sampler3D _sampler3D_0; -sampler3D _sampler3D_1; -sampler3D _sampler3D_2; -sampler3D _sampler3D_3; - -samplerCUBE _sampler6F_0; -samplerCUBE _sampler6F_1; -samplerCUBE _sampler6F_2; -samplerCUBE _sampler6F_3; +// Generated alphakill contents are based on X_D3DTSS_ALPHAKILL (we avoid using a constant, to allow false's to be optimized away) +// bool alphakill[4] = {false, false, false, false}; // Generated by PixelShader.cpp::BuildShader() // Actual texture sampling per stage (always uses the s sampling vector variable as input) // abstracting away the specifics of accessing above sampler declarations (usefull for future Direct3D 10+ sampler arrays) -#define Sample2D(st) tex2D(_sampler2D_ ## st, s.xy) // Ignores s.z (and whatever it's set to, will be optimized away by the compiler, see [1] below) -#define Sample3D(st) tex3D(_sampler3D_ ## st, s.xyz) -#define Sample6F(st) texCUBE(_sampler6F_ ## st, s.xyz) +float4 Sample2D(int st, float3 s) +{ + float4 result = tex2D(_sampler2D[st], s.xy); // Ignores s.z (and whatever it's set to, will be optimized away by the compiler, see [1] below) + if (alphakill[st]) + if (result.a == 0) + discard; + + return result; +} + +float4 Sample3D(int st, float3 s) +{ + float4 result = tex3D(_sampler3D[st], s.xyz); + if (alphakill[st]) + if (result.a == 0) + discard; + + return result; +} + +float4 Sample6F(int st, float3 s) +{ + float4 result = texCUBE(_sampler6F[st], s.xyz); + if (alphakill[st]) + if (result.a == 0) + discard; + + return result; +} // Map texture registers to their array elements. Having texture registers in an array allows indexed access to them #define t0 t[0] @@ -271,25 +289,25 @@ samplerCUBE _sampler6F_3; // Implementations for all possible texture modes, with stage as argument (prefixed with valid stages and corresponding pixel shader 1.3 assembly texture addressing instructions) // For ease of understanding, all follow this plan : Optional specifics, or dot calculation (some with normal selection) and sampling vector determination. All end by deriving a value and assigning this to the stage's texture register. -/*0123 tex */ #define PS_TEXTUREMODES_NONE(st) v = black; t[st] = v // Seems to work -/*0123 tex */ #define PS_TEXTUREMODES_PROJECT2D(st) s = iT[st].xyz; v = Sample2D(st); t[st] = v // Seems to work (are x/w and y/w implicit?) [1] -/*0123 tex */ #define PS_TEXTUREMODES_PROJECT3D(st) s = iT[st].xyz; v = Sample3D(st); t[st] = v // Seems to work (is z/w implicit?) -/*0123 tex */ #define PS_TEXTUREMODES_CUBEMAP(st) s = iT[st].xyz; v = Sample6F(st); t[st] = v // TODO : Test -/*0123 texcoord */ #define PS_TEXTUREMODES_PASSTHRU(st) v = Passthru(st); t[st] = v // Seems to work -/*0123 texkill */ #define PS_TEXTUREMODES_CLIPPLANE(st) PS_COMPAREMODE_ ## st(iT[st]); v = black; t[st] = v // Seems to work (setting black to texture register, in case it gets read) -/*-123 texbem */ #define PS_TEXTUREMODES_BUMPENVMAP(st) s = BumpEnv(st); v = Sample2D(st); t[st] = v // Seems to work -/*-123 texbeml */ #define PS_TEXTUREMODES_BUMPENVMAP_LUM(st) PS_TEXTUREMODES_BUMPENVMAP(st); v.rgb *= LSO(st); t[st] = v // TODO : Test -/*--23 texbrdf */ #define PS_TEXTUREMODES_BRDF(st) s = Brdf(st); v = Sample3D(st); t[st] = v // TODO : Test (t[st-2] is 16 bit eyePhi,eyeSigma; t[st-1] is lightPhi,lightSigma) -/*--23 texm3x2tex */ #define PS_TEXTUREMODES_DOT_ST(st) CalcDot(st); n = Normal2(st); s = n; v = Sample2D(st); t[st] = v // TODO : Test -/*--23 texm3x2depth */ #define PS_TEXTUREMODES_DOT_ZW(st) CalcDot(st); n = Normal2(st); if (n.y==0) v=1;else v = n.x / n.y; t[st] = v // TODO : Make depth-check use result of division, but how? -/*--2- texm3x3diff */ #define PS_TEXTUREMODES_DOT_RFLCT_DIFF(st) CalcDot(st); n = Normal3(st); s = n; v = Sample6F(st); t[st] = v // TODO : Test -/*---3 texm3x3vspec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC(st) CalcDot(st); n = Normal3(st); s = Reflect(n, Eye); v = Sample6F(st); t[st] = v // TODO : Test -/*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_3D(st) CalcDot(st); n = Normal3(st); s = n; v = Sample3D(st); t[st] = v // TODO : Test -/*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_CUBE(st) CalcDot(st); n = Normal3(st); s = n; v = Sample6F(st); t[st] = v // TODO : Test -/*-123 texreg2ar */ #define PS_TEXTUREMODES_DPNDNT_AR(st) s = src(st).arg; v = Sample2D(st); t[st] = v // TODO : Test [1] -/*-123 texreg2bg */ #define PS_TEXTUREMODES_DPNDNT_GB(st) s = src(st).gba; v = Sample2D(st); t[st] = v // TODO : Test [1] -/*-12- texm3x2pad */ #define PS_TEXTUREMODES_DOTPRODUCT(st) CalcDot(st); v = float4(dm,0); t[st] = v // TODO : Test all dot mapping (setting texture register, in case it gets read - test-case : BumpDemo) -/*---3 texm3x3spec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST(st) CalcDot(st); n = Normal3(st); s = Reflect(n, c0); v = Sample6F(st); t[st] = v // TODO : Test +/*0123 tex */ #define PS_TEXTUREMODES_NONE(st) v = black; t[st] = v // Seems to work +/*0123 tex */ #define PS_TEXTUREMODES_PROJECT2D(st) s = iT[st].xyz; v = Sample2D(st, s); t[st] = v // Seems to work (are x/w and y/w implicit?) [1] +/*0123 tex */ #define PS_TEXTUREMODES_PROJECT3D(st) s = iT[st].xyz; v = Sample3D(st, s); t[st] = v // Seems to work (is z/w implicit?) +/*0123 tex */ #define PS_TEXTUREMODES_CUBEMAP(st) s = iT[st].xyz; v = Sample6F(st, s); t[st] = v // TODO : Test +/*0123 texcoord */ #define PS_TEXTUREMODES_PASSTHRU(st) v = Passthru(st); t[st] = v // Seems to work +/*0123 texkill */ #define PS_TEXTUREMODES_CLIPPLANE(st) PS_COMPAREMODE_ ## st(iT[st]); v = black; t[st] = v // Seems to work (setting black to texture register, in case it gets read) +/*-123 texbem */ #define PS_TEXTUREMODES_BUMPENVMAP(st) s = BumpEnv(st); v = Sample2D(st, s); t[st] = v // Seems to work +/*-123 texbeml */ #define PS_TEXTUREMODES_BUMPENVMAP_LUM(st) PS_TEXTUREMODES_BUMPENVMAP(st); v.rgb *= LSO(st); t[st] = v // TODO : Test +/*--23 texbrdf */ #define PS_TEXTUREMODES_BRDF(st) s = Brdf(st); v = Sample3D(st, s); t[st] = v // TODO : Test (t[st-2] is 16 bit eyePhi,eyeSigma; t[st-1] is lightPhi,lightSigma) +/*--23 texm3x2tex */ #define PS_TEXTUREMODES_DOT_ST(st) CalcDot(st); n = Normal2(st); s = n; v = Sample2D(st, s); t[st] = v // TODO : Test +/*--23 texm3x2depth */ #define PS_TEXTUREMODES_DOT_ZW(st) CalcDot(st); n = Normal2(st); if (n.y==0) v=1;else v = n.x / n.y; t[st] = v // TODO : Make depth-check use result of division, but how? +/*--2- texm3x3diff */ #define PS_TEXTUREMODES_DOT_RFLCT_DIFF(st) CalcDot(st); n = Normal3(st); s = n; v = Sample6F(st, s); t[st] = v // TODO : Test +/*---3 texm3x3vspec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC(st) CalcDot(st); n = Normal3(st); s = Reflect(n, Eye); v = Sample6F(st, s); t[st] = v // TODO : Test +/*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_3D(st) CalcDot(st); n = Normal3(st); s = n; v = Sample3D(st, s); t[st] = v // TODO : Test +/*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_CUBE(st) CalcDot(st); n = Normal3(st); s = n; v = Sample6F(st, s); t[st] = v // TODO : Test +/*-123 texreg2ar */ #define PS_TEXTUREMODES_DPNDNT_AR(st) s = src(st).arg; v = Sample2D(st, s); t[st] = v // TODO : Test [1] +/*-123 texreg2bg */ #define PS_TEXTUREMODES_DPNDNT_GB(st) s = src(st).gba; v = Sample2D(st, s); t[st] = v // TODO : Test [1] +/*-12- texm3x2pad */ #define PS_TEXTUREMODES_DOTPRODUCT(st) CalcDot(st); v = float4(dm, 0); t[st] = v // TODO : Test all dot mapping (setting texture register, in case it gets read - test-case : BumpDemo) +/*---3 texm3x3spec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST(st) CalcDot(st); n = Normal3(st); s = Reflect(n, c0); v = Sample6F(st, s); t[st] = v // TODO : Test // [1] Note : 3rd component set to s.z is just an (ignored) placeholder to produce a float3 (made unique, to avoid the potential complexity of repeated components) PS_OUTPUT main(const PS_INPUT xIn) diff --git a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp index 71d8394e1..4229470f5 100644 --- a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp @@ -277,6 +277,12 @@ void BuildShader(DecodedRegisterCombiner* pShader, std::stringstream& hlsl) hlsl << hlsl_template[0]; // Start with the HLSL template header + hlsl << "\nbool alphakill[4] = {" + << (pShader->AlphaKill[0] ? "true, " : "false, ") + << (pShader->AlphaKill[1] ? "true, " : "false, ") + << (pShader->AlphaKill[2] ? "true, " : "false, ") + << (pShader->AlphaKill[3] ? "true};" : "false};"); + hlsl << "\n#define PS_COMBINERCOUNT " << pShader->NumberOfCombiners; if (pShader->NumberOfCombiners > 0) { OutputDefine(hlsl, "PS_COMBINERCOUNT_UNIQUE_C0", pShader->CombinerHasUniqueC0); diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index e9c55143c..fc186e97e 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -385,6 +385,7 @@ typedef struct s_CxbxPSDef { bool DecodedHasFinalCombiner; bool RenderStateFogEnable; bool RenderStateSpecularEnable; + bool AlphaKill[4]; // Read from XboxTextureStates.Get(stage, xbox::X_D3DTSS_ALPHAKILL); bool IsEquivalent(const s_CxbxPSDef &Another) { @@ -420,6 +421,10 @@ typedef struct s_CxbxPSDef { if (DecodedTexModeAdjust != Another.DecodedTexModeAdjust) return false; + for (unsigned i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) + if (AlphaKill[i] != Another.AlphaKill[i]) + return false; + // All ActiveTextureTypes must correspond as well (otherwise the recompiled shader would sample incorrectly) : for (unsigned i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) if (ActiveTextureTypes[i] != Another.ActiveTextureTypes[i]) @@ -465,6 +470,10 @@ typedef struct s_CxbxPSDef { // Fetch all render states that impact AdjustFinalCombiner RenderStateFogEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGENABLE) > 0; RenderStateSpecularEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_SPECULARENABLE) > 0; + + for (unsigned i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { + AlphaKill[i] = XboxTextureStates.Get(/*stage=*/i, xbox::X_D3DTSS_ALPHAKILL) & 4; // D3DTALPHAKILL_ENABLE + } } void AdjustTextureModes(DecodedRegisterCombiner &RC) @@ -560,6 +569,10 @@ typedef struct s_CxbxPSDef { void PerformRuntimeAdjustments(DecodedRegisterCombiner &RC) { + RC.AlphaKill[0] = AlphaKill[0]; + RC.AlphaKill[1] = AlphaKill[1]; + RC.AlphaKill[2] = AlphaKill[2]; + RC.AlphaKill[3] = AlphaKill[3]; AdjustTextureModes(RC); AdjustFinalCombiner(RC); } diff --git a/src/core/hle/D3D8/XbPixelShader.h b/src/core/hle/D3D8/XbPixelShader.h index 128369146..4eadf29a8 100644 --- a/src/core/hle/D3D8/XbPixelShader.h +++ b/src/core/hle/D3D8/XbPixelShader.h @@ -495,6 +495,8 @@ struct DecodedRegisterCombiner { bool hasFinalCombiner; RPSFinalCombiner FinalCombiner; bool TexModeAdjust; + // Variables + bool AlphaKill[4]; // X_D3DTSS_ALPHAKILL static void GetPSTextureModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]); static void GetPSDotMapping(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_DOTMAPPING psDotMapping[xbox::X_D3DTS_STAGECOUNT]); From c6fdfc7101927ab8f0bcd09771371dabceb53da1 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Wed, 9 Dec 2020 16:55:47 +0100 Subject: [PATCH 08/47] Code cleanup (comments and variable renames) --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 32 +++++++++++------------ src/core/hle/D3D8/XbPixelShader.cpp | 2 ++ 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index eda935d60..6d5a02faa 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -5012,15 +5012,15 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_Clear) // Scale the fill based on our scale factor and MSAA scale float aaX, aaY; GetMultiSampleScaleRaw(aaX, aaY); - aaX *= g_RenderUpscaleFactor; - aaY *= g_RenderUpscaleFactor; + float Xscale = aaX * g_RenderUpscaleFactor; + float Yscale = aaY * g_RenderUpscaleFactor; std::vector rects(Count); for (DWORD i = 0; i < Count; i++) { - rects[i].x1 = static_cast(pRects[i].x1 * aaX); - rects[i].x2 = static_cast(pRects[i].x2 * aaX); - rects[i].y1 = static_cast(pRects[i].y1 * aaY); - rects[i].y2 = static_cast(pRects[i].y2 * aaY); + rects[i].x1 = static_cast(pRects[i].x1 * Xscale); + rects[i].x2 = static_cast(pRects[i].x2 * Xscale); + rects[i].y1 = static_cast(pRects[i].y1 * Yscale); + rects[i].y2 = static_cast(pRects[i].y2 * Yscale); } hRet = g_pD3DDevice->Clear(Count, rects.data(), HostFlags, Color, Z, Stencil); } else { @@ -7634,16 +7634,16 @@ void CxbxUpdateHostViewport() { LOG_TEST_CASE("Could not get rendertarget dimensions while setting the viewport"); } - aaScaleX *= g_RenderUpscaleFactor; - aaScaleY *= g_RenderUpscaleFactor; + float Xscale = aaScaleX * g_RenderUpscaleFactor; + float Yscale = aaScaleY * g_RenderUpscaleFactor; if (g_Xbox_VertexShaderMode == VertexShaderMode::FixedFunction) { // Set viewport D3DVIEWPORT hostViewport = g_Xbox_Viewport; - hostViewport.X *= aaScaleX; - hostViewport.Y *= aaScaleY; - hostViewport.Width *= aaScaleX; - hostViewport.Height *= aaScaleY; + hostViewport.X *= Xscale; + hostViewport.Y *= Yscale; + hostViewport.Width *= Xscale; + hostViewport.Height *= Yscale; g_pD3DDevice->SetViewport(&hostViewport); // Reset scissor rect @@ -7675,10 +7675,10 @@ void CxbxUpdateHostViewport() { // Scissor to viewport g_pD3DDevice->SetRenderState(D3DRS_SCISSORTESTENABLE, TRUE); RECT viewportRect; - viewportRect.left = g_Xbox_Viewport.X * aaScaleX; - viewportRect.top = g_Xbox_Viewport.Y * aaScaleY; - viewportRect.right = viewportRect.left + g_Xbox_Viewport.Width * aaScaleX; - viewportRect.bottom = viewportRect.top + g_Xbox_Viewport.Height * aaScaleY; + viewportRect.left = g_Xbox_Viewport.X * Xscale; + viewportRect.top = g_Xbox_Viewport.Y * Yscale; + viewportRect.right = viewportRect.left + (g_Xbox_Viewport.Width * Xscale); + viewportRect.bottom = viewportRect.top + (g_Xbox_Viewport.Height * Yscale); g_pD3DDevice->SetScissorRect(&viewportRect); } } diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index fc186e97e..749618220 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -472,6 +472,8 @@ typedef struct s_CxbxPSDef { RenderStateSpecularEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_SPECULARENABLE) > 0; for (unsigned i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { + // Test-cases : XDK sample nosortalphablend, Xbmc-fork (https://github.com/superpea/xbmc-fork/blob/bba40d57db52d11dea7bbf9509c298f7c2b05f4b/xbmc/cores/VideoRenderers/XBoxRenderer.cpp#L134) + // Star Wars: Jedi Academy (https://github.com/RetailGameSourceCode/StarWars_JediAcademy/blob/5b8f0040b3177d8855f7d575ef49b23ed52ff42a/codemp/win32/win_lighteffects.cpp#L299) AlphaKill[i] = XboxTextureStates.Get(/*stage=*/i, xbox::X_D3DTSS_ALPHAKILL) & 4; // D3DTALPHAKILL_ENABLE } } From af0058ad61662af2d5417b88d5a25ef0123f4c24 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Tue, 22 Dec 2020 16:27:04 +0100 Subject: [PATCH 09/47] WIP pointsprite nuances --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 30 ++++++++++--------- .../Direct3D9/FixedFunctionVertexShader.hlsl | 12 ++++++-- src/core/hle/D3D8/Direct3D9/RenderStates.cpp | 10 +++++++ src/core/hle/D3D8/Direct3D9/RenderStates.h | 1 + src/core/hle/D3D8/Direct3D9/TextureStates.cpp | 10 +++++-- 5 files changed, 44 insertions(+), 19 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 6d5a02faa..b0c15023a 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -6469,21 +6469,23 @@ void UpdateFixedFunctionVertexShaderState() ffShaderState.Modes.BackEmissiveMaterialSource = (float)(ColorVertex ? XboxRenderStates.GetXboxRenderState(X_D3DRS_BACKEMISSIVEMATERIALSOURCE) : D3DMCS_MATERIAL); // Point sprites - auto pointSize = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSIZE); - auto pointSizeMin = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSIZE_MIN); - auto pointSizeMax = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSIZE_MAX); - ffShaderState.PointSprite.PointSize = *reinterpret_cast(&pointSize); - ffShaderState.PointSprite.PointSizeMin = *reinterpret_cast(&pointSizeMin); - ffShaderState.PointSprite.PointSizeMax = *reinterpret_cast(&pointSizeMax); - + bool PointSpriteEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSPRITEENABLE); + float pointSize = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE); + float pointSizeMin = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE_MIN); + float pointSizeMax = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE_MAX); bool PointScaleEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALEENABLE); - auto scaleA = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALE_A); - auto scaleB = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALE_B); - auto scaleC = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALE_C); - ffShaderState.PointSprite.ScaleABC.x = PointScaleEnable ? *reinterpret_cast(&scaleA) : 1.0f; - ffShaderState.PointSprite.ScaleABC.y = PointScaleEnable ? *reinterpret_cast(&scaleB) : 0.0f; - ffShaderState.PointSprite.ScaleABC.z = PointScaleEnable ? *reinterpret_cast(&scaleC) : 0.0f; - ffShaderState.PointSprite.XboxRenderTargetHeight = PointScaleEnable ? (float)GetPixelContainerHeight(g_pXbox_RenderTarget) : 1.0f; + float scaleA = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_A); + float scaleB = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_B); + float scaleC = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_C); + float renderTargetHeight = (float)GetPixelContainerHeight(g_pXbox_RenderTarget); + PointScaleEnable &= PointSpriteEnable; + ffShaderState.PointSprite.PointSize = PointSpriteEnable ? pointSize : 1.0f; + ffShaderState.PointSprite.PointSizeMin = PointSpriteEnable ? pointSizeMin : 1.0f; + ffShaderState.PointSprite.PointSizeMax = PointSpriteEnable ? pointSizeMax : 1.0f; + ffShaderState.PointSprite.ScaleABC.x = PointScaleEnable ? scaleA : 1.0f; + ffShaderState.PointSprite.ScaleABC.y = PointScaleEnable ? scaleB : 0.0f; + ffShaderState.PointSprite.ScaleABC.z = PointScaleEnable ? scaleC : 0.0f; + ffShaderState.PointSprite.XboxRenderTargetHeight = PointScaleEnable ? renderTargetHeight : 1.0f; ffShaderState.PointSprite.RenderUpscaleFactor = g_RenderUpscaleFactor; // Fog diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl index cdab93f0f..c5c76dd94 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl @@ -19,10 +19,13 @@ struct VS_INPUT #else float4 pos : POSITION; float4 bw : BLENDWEIGHT; - float4 color[2] : COLOR; + float4 normal : NORMAL; + float4 color[2] : COLOR; + float1 fogCoord : FOG; + float1 pointSize : PSIZE; float4 backColor[2] : TEXCOORD4; - float4 normal : NORMAL; float4 texcoord[4] : TEXCOORD; + float4 reserved[3] : TEXCOORD6; #endif }; @@ -59,12 +62,17 @@ float4 Get(const VS_INPUT xIn, const uint index) if(index == normal) return xIn.normal; if(index == diffuse) return xIn.color[0]; if(index == specular) return xIn.color[1]; + if(index == fogCoord) return xIn.fogCoord; + if(index == pointSize) return xIn.pointSize; if(index == backDiffuse) return xIn.backColor[0]; if(index == backSpecular) return xIn.backColor[1]; if(index == texcoord0) return xIn.texcoord[0]; if(index == texcoord1) return xIn.texcoord[1]; if(index == texcoord2) return xIn.texcoord[2]; if(index == texcoord3) return xIn.texcoord[3]; + if(index == reserved0) return xIn.reserved[0]; + if(index == reserved1) return xIn.reserved[1]; + if(index == reserved2) return xIn.reserved[2]; return 1; #endif } diff --git a/src/core/hle/D3D8/Direct3D9/RenderStates.cpp b/src/core/hle/D3D8/Direct3D9/RenderStates.cpp index f53eda030..e617eb30b 100644 --- a/src/core/hle/D3D8/Direct3D9/RenderStates.cpp +++ b/src/core/hle/D3D8/Direct3D9/RenderStates.cpp @@ -186,6 +186,16 @@ uint32_t XboxRenderStateConverter::GetXboxRenderState(uint32_t State) return D3D__RenderState[XboxRenderStateOffsets[State]]; } +float XboxRenderStateConverter::GetXboxRenderStateAsFloat(uint32_t State) +{ + if (!XboxRenderStateExists(State)) { + EmuLog(LOG_LEVEL::WARNING, "Attempt to read a Renderstate (%s) that does not exist in the current D3D8 XDK Version (%d)", GetDxbxRenderStateInfo(State).S, g_LibVersion_D3D8); + return 0; + } + + return *reinterpret_cast(&(D3D__RenderState[XboxRenderStateOffsets[State]])); +} + void XboxRenderStateConverter::StoreInitialValues() { for (unsigned int RenderState = xbox::X_D3DRS_FIRST; RenderState <= xbox::X_D3DRS_LAST; RenderState++) { diff --git a/src/core/hle/D3D8/Direct3D9/RenderStates.h b/src/core/hle/D3D8/Direct3D9/RenderStates.h index 78bfab944..dbc161ddb 100644 --- a/src/core/hle/D3D8/Direct3D9/RenderStates.h +++ b/src/core/hle/D3D8/Direct3D9/RenderStates.h @@ -46,6 +46,7 @@ public: void SetWireFrameMode(int mode); void SetDirty(); uint32_t GetXboxRenderState(uint32_t State); + float GetXboxRenderStateAsFloat(uint32_t State); private: void VerifyAndFixDeferredRenderStateOffset(); void DeriveRenderStateOffsetFromDeferredRenderStateOffset(); diff --git a/src/core/hle/D3D8/Direct3D9/TextureStates.cpp b/src/core/hle/D3D8/Direct3D9/TextureStates.cpp index f75714d34..045b06235 100644 --- a/src/core/hle/D3D8/Direct3D9/TextureStates.cpp +++ b/src/core/hle/D3D8/Direct3D9/TextureStates.cpp @@ -33,6 +33,7 @@ #include "EmuShared.h" #include "core/hle/Intercept.hpp" #include "RenderStates.h" +#include "core/hle/D3D8/XbVertexShader.h" // For g_UseFixedFunctionVertexShader, g_Xbox_VertexShaderMode and VertexShaderMode::FixedFunction #include "core/hle/D3D8/Direct3D9/Direct3D9.h" // For g_pD3DDevice #include @@ -166,9 +167,12 @@ void XboxTextureStateConverter::Apply() // The Xbox NV2A uses only Stage 3 for point-sprites, so we emulate this // by mapping Stage 3 to Stage 0, and disabling all stages > 0 bool pointSpriteOverride = false; - bool pointSpritesEnabled = pXboxRenderStates->GetXboxRenderState(xbox::X_D3DRS_POINTSPRITEENABLE); - if (pointSpritesEnabled) { - pointSpriteOverride = true; + bool pointSpritesEnabled = false; + if (g_Xbox_VertexShaderMode == VertexShaderMode::FixedFunction && g_UseFixedFunctionVertexShader) { + pointSpritesEnabled = pXboxRenderStates->GetXboxRenderState(xbox::X_D3DRS_POINTSPRITEENABLE); + if (pointSpritesEnabled) { + pointSpriteOverride = true; + } } for (int XboxStage = 0; XboxStage < xbox::X_D3DTS_STAGECOUNT; XboxStage++) { From 845bc7ef06eae437813bdb3afb447543daaefc6e Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Wed, 6 Jan 2021 16:34:38 +0100 Subject: [PATCH 10/47] Renames, indenting, comments --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 32 ++++++++++--------- .../Direct3D9/FixedFunctionVertexShader.hlsl | 11 ++++--- .../FixedFunctionVertexShaderState.hlsli | 6 ++-- src/core/hle/D3D8/Direct3D9/TextureStates.cpp | 2 +- 4 files changed, 27 insertions(+), 24 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index b0c15023a..945cd7fa9 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -297,7 +297,7 @@ g_EmuCDPD; #define XB_TRAMPOLINES(XB_MACRO) \ XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_CreateVertexShader, (CONST xbox::dword_xt*, CONST xbox::dword_xt*, xbox::dword_xt*, xbox::dword_xt) ); \ XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_DeleteVertexShader, (xbox::dword_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_DeleteVertexShader_0, () ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_DeleteVertexShader_0, () ); \ XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_GetBackBuffer, (xbox::int_xt, D3DBACKBUFFER_TYPE, xbox::X_D3DSurface**) ); \ XB_MACRO(xbox::X_D3DSurface*, WINAPI, D3DDevice_GetBackBuffer2, (xbox::int_xt) ); \ XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_GetDepthStencilSurface, (xbox::X_D3DSurface**) ); \ @@ -337,9 +337,9 @@ g_EmuCDPD; XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetVertexShader_0, () ); \ XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetVertexShaderInput, (xbox::dword_xt, xbox::uint_xt, xbox::X_STREAMINPUT*) ); \ XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetViewport, (CONST xbox::X_D3DVIEWPORT8*) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTransform, (xbox::X_D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTransform, (xbox::X_D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \ XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTransform_0, () ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_MultiplyTransform, (xbox::X_D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_MultiplyTransform, (xbox::X_D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \ XB_MACRO(xbox::void_xt, WINAPI, D3D_DestroyResource, (xbox::X_D3DResource*) ); \ XB_MACRO(xbox::void_xt, WINAPI, D3D_DestroyResource__LTCG, (xbox::void_xt) ); \ XB_MACRO(xbox::hresult_xt, WINAPI, Direct3D_CreateDevice, (xbox::uint_xt, D3DDEVTYPE, HWND, xbox::dword_xt, xbox::X_D3DPRESENT_PARAMETERS*, xbox::X_D3DDevice**)); \ @@ -348,7 +348,7 @@ g_EmuCDPD; XB_MACRO(xbox::hresult_xt, WINAPI, Direct3D_CreateDevice_4, (xbox::X_D3DPRESENT_PARAMETERS*) ); \ XB_MACRO(xbox::void_xt, WINAPI, Lock2DSurface, (xbox::X_D3DPixelContainer*, D3DCUBEMAP_FACES, xbox::uint_xt, D3DLOCKED_RECT*, RECT*, xbox::dword_xt) ); \ XB_MACRO(xbox::void_xt, WINAPI, Lock3DSurface, (xbox::X_D3DPixelContainer*, xbox::uint_xt, D3DLOCKED_BOX*, D3DBOX*, xbox::dword_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3D_CommonSetRenderTarget, (xbox::X_D3DSurface*, xbox::X_D3DSurface*, void*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3D_CommonSetRenderTarget, (xbox::X_D3DSurface*, xbox::X_D3DSurface*, void*) ); \ XB_TRAMPOLINES(XB_trampoline_declare); @@ -6468,23 +6468,25 @@ void UpdateFixedFunctionVertexShaderState() ffShaderState.Modes.BackSpecularMaterialSource = (float)(ColorVertex ? XboxRenderStates.GetXboxRenderState(X_D3DRS_BACKSPECULARMATERIALSOURCE) : D3DMCS_MATERIAL); ffShaderState.Modes.BackEmissiveMaterialSource = (float)(ColorVertex ? XboxRenderStates.GetXboxRenderState(X_D3DRS_BACKEMISSIVEMATERIALSOURCE) : D3DMCS_MATERIAL); - // Point sprites + // Point sprites; Fetch required variables bool PointSpriteEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSPRITEENABLE); float pointSize = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE); - float pointSizeMin = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE_MIN); - float pointSizeMax = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE_MAX); + float pointSize_Min = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE_MIN); + float pointSize_Max = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE_MAX); bool PointScaleEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALEENABLE); - float scaleA = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_A); - float scaleB = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_B); - float scaleC = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_C); + float pointScale_A = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_A); + float pointScale_B = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_B); + float pointScale_C = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSCALE_C); float renderTargetHeight = (float)GetPixelContainerHeight(g_pXbox_RenderTarget); + // Make sure to disable point scaling when point sprites are not enabled PointScaleEnable &= PointSpriteEnable; + // Set variables in shader state ffShaderState.PointSprite.PointSize = PointSpriteEnable ? pointSize : 1.0f; - ffShaderState.PointSprite.PointSizeMin = PointSpriteEnable ? pointSizeMin : 1.0f; - ffShaderState.PointSprite.PointSizeMax = PointSpriteEnable ? pointSizeMax : 1.0f; - ffShaderState.PointSprite.ScaleABC.x = PointScaleEnable ? scaleA : 1.0f; - ffShaderState.PointSprite.ScaleABC.y = PointScaleEnable ? scaleB : 0.0f; - ffShaderState.PointSprite.ScaleABC.z = PointScaleEnable ? scaleC : 0.0f; + ffShaderState.PointSprite.PointSize_Min = PointSpriteEnable ? pointSize_Min : 1.0f; + ffShaderState.PointSprite.PointSize_Max = PointSpriteEnable ? pointSize_Max : 1.0f; + ffShaderState.PointSprite.PointScaleABC.x = PointScaleEnable ? pointScale_A : 1.0f; + ffShaderState.PointSprite.PointScaleABC.y = PointScaleEnable ? pointScale_B : 0.0f; + ffShaderState.PointSprite.PointScaleABC.z = PointScaleEnable ? pointScale_C : 0.0f; ffShaderState.PointSprite.XboxRenderTargetHeight = PointScaleEnable ? renderTargetHeight : 1.0f; ffShaderState.PointSprite.RenderUpscaleFactor = g_RenderUpscaleFactor; diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl index c5c76dd94..a894aba5c 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl @@ -397,12 +397,13 @@ float4 DoTexCoord(const uint stage, const VS_INPUT xIn) float DoPointSpriteSize() { const PointSprite ps = state.PointSprite; - float pointSize = ps.PointSize; - float A = ps.ScaleABC.x; - float B = ps.ScaleABC.y; - float C = ps.ScaleABC.z; - // Note : if (ps.PointScaleEnable) not required because when disabled, CPU sets RenderTargetHeight and ScaleA to 1, and ScaleB and ScaleC to 0 + const float pointSize = ps.PointSize; + const float A = ps.PointScaleABC.x; + const float B = ps.PointScaleABC.y; + const float C = ps.PointScaleABC.z; + + // Note : if (ps.PointScaleEnable) not required because when disabled, CPU sets RenderTargetHeight and PointScale _A to 1, and _B and _C to 0 { const float eyeDistance = length(View.Position); const float factor = A + (B * eyeDistance) + (C * (eyeDistance * eyeDistance)); diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli index b909b97e4..8961a5195 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli @@ -105,11 +105,11 @@ struct Modes { struct PointSprite { alignas(16) float PointSize; - alignas(16) float PointSizeMin; - alignas(16) float PointSizeMax; + alignas(16) float PointSize_Min; + alignas(16) float PointSize_Max; // alignas(16) float PointScaleEnable; alignas(16) float XboxRenderTargetHeight; - alignas(16) float3 ScaleABC; + alignas(16) float3 PointScaleABC; alignas(16) float RenderUpscaleFactor; }; diff --git a/src/core/hle/D3D8/Direct3D9/TextureStates.cpp b/src/core/hle/D3D8/Direct3D9/TextureStates.cpp index 045b06235..37fb14ded 100644 --- a/src/core/hle/D3D8/Direct3D9/TextureStates.cpp +++ b/src/core/hle/D3D8/Direct3D9/TextureStates.cpp @@ -169,7 +169,7 @@ void XboxTextureStateConverter::Apply() bool pointSpriteOverride = false; bool pointSpritesEnabled = false; if (g_Xbox_VertexShaderMode == VertexShaderMode::FixedFunction && g_UseFixedFunctionVertexShader) { - pointSpritesEnabled = pXboxRenderStates->GetXboxRenderState(xbox::X_D3DRS_POINTSPRITEENABLE); + pointSpritesEnabled = pXboxRenderStates->GetXboxRenderState(xbox::X_D3DRS_POINTSPRITEENABLE); if (pointSpritesEnabled) { pointSpriteOverride = true; } From 65734fac9d1b01c996c884f7d4d5e5cdd6f915db Mon Sep 17 00:00:00 2001 From: Anthony Date: Sun, 31 Jan 2021 17:25:45 +1300 Subject: [PATCH 11/47] Wrap PS_INPUTMAPPING ops in brackets for consistent order of operations (cherry picked from commit 001dca83fea387debd7e85893eb8b8631ea264fd) --- .../D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl index 718efd574..b6a93fd3b 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -24,14 +24,14 @@ struct PS_OUTPUT // Source register modifier macro's, based on enum PS_INPUTMAPPING : // TODO : Should all these 'max(0, x)' actually be 'saturate(x)'? This, because the operation may actually clamp the register value to the range [0..1] -#define s_sat(x) saturate(x) // PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // OK for final combiner // Clamps negative x to 0 // Was : max(0, x) -#define s_comp(x) 1 - saturate(x) // PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // OK for final combiner // Complements x (1-x) // Was : 1- min(max(0, x), 1) -#define s_bx2(x) ( 2 * max(0, x)) - 1 // PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // invalid for final combiner // Shifts range from [0..1] to [-1..1] -#define s_negbx2(x) (-2 * max(0, x)) + 1 // PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // invalid for final combiner // Shifts range from [0..1] to [-1..1] and then negates -#define s_bias(x) max(0, x) - 0.5 // PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, // invalid for final combiner // Clamps negative x to 0 and then subtracts 0.5 -#define s_negbias(x) -max(0, x) + 0.5 // PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // invalid for final combiner // Clamps negative x to 0, subtracts 0.5, and then negates -#define s_ident(x) x // PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // invalid for final combiner // No modifier, x is passed without alteration -#define s_neg(x) -x // PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // invalid for final combiner // Negate +#define s_sat(x) saturate(x) // PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // OK for final combiner // Clamps negative x to 0 // Was : max(0, x) +#define s_comp(x) (1 - saturate(x)) // PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // OK for final combiner // Complements x (1-x) // Was : 1- min(max(0, x), 1) +#define s_bx2(x) (( 2 * max(0, x)) - 1) // PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // invalid for final combiner // Shifts range from [0..1] to [-1..1] +#define s_negbx2(x) ((-2 * max(0, x)) + 1) // PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // invalid for final combiner // Shifts range from [0..1] to [-1..1] and then negates +#define s_bias(x) (max(0, x) - 0.5) // PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, // invalid for final combiner // Clamps negative x to 0 and then subtracts 0.5 +#define s_negbias(x) (-max(0, x) + 0.5) // PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // invalid for final combiner // Clamps negative x to 0, subtracts 0.5, and then negates +#define s_ident(x) x // PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // invalid for final combiner // No modifier, x is passed without alteration +#define s_neg(x) (-x) // PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // invalid for final combiner // Negate // Destination register modifier macro's, based on enum PS_COMBINEROUTPUT : #define d_ident(x) x // PS_COMBINEROUTPUT_IDENTITY= 0x00L, // From 9480bea9c034467814e807dd30d54086930134a1 Mon Sep 17 00:00:00 2001 From: Anthony Date: Sun, 31 Jan 2021 18:34:51 +1300 Subject: [PATCH 12/47] Use sampler state (cherry picked from commit ab2064ef690179560761d29eed53fd5b756b5dc7) --- .../hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl index b6a93fd3b..96346cb8e 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -226,9 +226,7 @@ float m21(const float input) // Declare one sampler per each {Sampler Type, Texture Stage} combination // TODO : Generate sampler status? -sampler2D _sampler2D[4]; -sampler3D _sampler3D[4]; -samplerCUBE _sampler6F[4]; +sampler samplers[4] : register(s0); // Generated alphakill contents are based on X_D3DTSS_ALPHAKILL (we avoid using a constant, to allow false's to be optimized away) // bool alphakill[4] = {false, false, false, false}; // Generated by PixelShader.cpp::BuildShader() @@ -237,7 +235,7 @@ samplerCUBE _sampler6F[4]; // abstracting away the specifics of accessing above sampler declarations (usefull for future Direct3D 10+ sampler arrays) float4 Sample2D(int st, float3 s) { - float4 result = tex2D(_sampler2D[st], s.xy); // Ignores s.z (and whatever it's set to, will be optimized away by the compiler, see [1] below) + float4 result = tex2D(samplers[st], s.xy); // Ignores s.z (and whatever it's set to, will be optimized away by the compiler, see [1] below) if (alphakill[st]) if (result.a == 0) discard; @@ -247,7 +245,7 @@ float4 Sample2D(int st, float3 s) float4 Sample3D(int st, float3 s) { - float4 result = tex3D(_sampler3D[st], s.xyz); + float4 result = tex3D(samplers[st], s.xyz); if (alphakill[st]) if (result.a == 0) discard; @@ -257,7 +255,7 @@ float4 Sample3D(int st, float3 s) float4 Sample6F(int st, float3 s) { - float4 result = texCUBE(_sampler6F[st], s.xyz); + float4 result = texCUBE(samplers[st], s.xyz); if (alphakill[st]) if (result.a == 0) discard; From 3216931da64a2f6a0a7a16cddc992f0a70f067ad Mon Sep 17 00:00:00 2001 From: Anthony Date: Sun, 31 Jan 2021 22:28:31 +1300 Subject: [PATCH 13/47] fixup fog colour constant register (cherry picked from commit b99e63d944d2ceb474761ae88c8c90246071f0bc) --- src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl index 96346cb8e..ab1111bf9 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -46,7 +46,7 @@ struct PS_OUTPUT // Constant registers uniform const float4 c0_[8] : register(c0); uniform const float4 c1_[8] : register(c8); -uniform const float4 c_fog : register(c16); // Note : Maps to PSH_XBOX_CONSTANT_FOG, assigned to fog.rgb +uniform const float4 c_fog : register(c18); // Note : Maps to PSH_XBOX_CONSTANT_FOG, assigned to fog.rgb // Constant registers used only in final combiner stage (xfc 'opcode') : uniform const float4 FC0 : register(c17); // Note : Maps to PSH_XBOX_CONSTANT_FC0, must be generated as argument to xfc instead of C0 From cb4bbc17ee8cfddf0cbe148eb248a3e4a548365d Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 1 Feb 2021 19:40:53 +1300 Subject: [PATCH 14/47] Calculate the fog factor in the vertex shader (cherry picked from commit 3c3ede07da1f0829697cc1635058b5dc4ec610d2) --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 20 +++++++++++++++++-- .../Direct3D9/FixedFunctionVertexShader.hlsl | 20 +++++++++++++++---- .../FixedFunctionVertexShaderState.hlsli | 11 ++++++++++ 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 945cd7fa9..722bd13dc 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -6491,9 +6491,18 @@ void UpdateFixedFunctionVertexShaderState() ffShaderState.PointSprite.RenderUpscaleFactor = g_RenderUpscaleFactor; // Fog + // Determine how the fog depth is transformed into the fog factor + auto fogEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGENABLE); + auto fogTableMode = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGTABLEMODE); + ffShaderState.Fog.Enable = fogEnable; + // FIXME remove when fixed function PS is implemented + // Note if we are using the fixed function pixel shader + // We only want to produce the fog depth value in the VS, not the fog factor + auto psIsFixedFunction = g_pXbox_PixelShader == nullptr; + ffShaderState.Fog.TableMode = psIsFixedFunction ? D3DFOG_NONE : fogTableMode; + // Determine how fog depth is calculated - if (XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGENABLE) && - XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGTABLEMODE) != D3DFOG_NONE) { + if (fogEnable && fogTableMode != D3DFOG_NONE) { auto proj = &ffShaderState.Transforms.Projection; if (XboxRenderStates.GetXboxRenderState(X_D3DRS_RANGEFOGENABLE)) { @@ -6513,6 +6522,13 @@ void UpdateFixedFunctionVertexShaderState() // JSRF (non-compliant projection matrix) ffShaderState.Fog.DepthMode = FixedFunctionVertexShader::FOG_DEPTH_W; } + + auto density = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGDENSITY); + auto fogStart = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGSTART); + auto fogEnd = XboxRenderStates.GetXboxRenderState(X_D3DRS_FOGEND); + ffShaderState.Fog.Density = *reinterpret_cast(&density); + ffShaderState.Fog.Start = *reinterpret_cast(&fogStart); + ffShaderState.Fog.End = *reinterpret_cast(&fogEnd); } else { ffShaderState.Fog.DepthMode = FixedFunctionVertexShader::FOG_DEPTH_NONE; diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl index a894aba5c..f4df70870 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl @@ -273,11 +273,11 @@ Material DoMaterial(const uint index, const uint diffuseReg, const uint specular float DoFog(const VS_INPUT xIn) { - // TODO implement properly - // Until we have pixel shader HLSL we are still leaning on D3D renderstates for fogging - // So we are not doing any fog density calculations here + if (!state.Fog.Enable) + return 1; // No fog! // http://developer.download.nvidia.com/assets/gamedev/docs/Fog2.pdf + // Obtain the fog depth value 'd' float fogDepth; if (state.Fog.DepthMode == FixedFunctionVertexShader::FOG_DEPTH_NONE) @@ -289,7 +289,19 @@ float DoFog(const VS_INPUT xIn) if (state.Fog.DepthMode == FixedFunctionVertexShader::FOG_DEPTH_W) fogDepth = Projection.Position.w; - return fogDepth; + // Calculate the fog factor + // Some of this might be better done in the pixel shader? + float fogFactor; + if (state.Fog.TableMode == FixedFunctionVertexShader::FOG_TABLE_NONE) + fogFactor = fogDepth; + if (state.Fog.TableMode == FixedFunctionVertexShader::FOG_TABLE_EXP) + fogFactor = 1 / exp(fogDepth * state.Fog.Density); // 1 / e^(d * density) + if (state.Fog.TableMode == FixedFunctionVertexShader::FOG_TABLE_EXP2) + fogFactor = 1 / exp(pow(fogDepth * state.Fog.Density, 2)); // 1 / e^((d * density)^2) + if (state.Fog.TableMode == FixedFunctionVertexShader::FOG_TABLE_LINEAR) + fogFactor = (state.Fog.End - fogDepth) / (state.Fog.End - state.Fog.Start); // (end - d) / (end - start) + + return fogFactor; } float4 DoTexCoord(const uint stage, const VS_INPUT xIn) diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli index 8961a5195..5009fac06 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli @@ -29,6 +29,12 @@ namespace FixedFunctionVertexShader { const float FOG_DEPTH_W = 2; // Fog depth is based distance of the vertex from the eye position const float FOG_DEPTH_RANGE = 3; + + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/fog-formulas + const float FOG_TABLE_NONE = 0; + const float FOG_TABLE_EXP = 1; + const float FOG_TABLE_EXP2 = 2; + const float FOG_TABLE_LINEAR = 3; } // Shared HLSL structures @@ -121,7 +127,12 @@ struct TextureState { }; struct Fog { + alignas(16) float Enable; alignas(16) float DepthMode; + alignas(16) float TableMode; + alignas(16) float Density; // EXP fog density + alignas(16) float Start; // LINEAR fog start + alignas(16) float End; // LINEAR fog end }; // Vertex lighting From e3c0b7287c5e61719f0f0f15bf6295231195b225 Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 1 Feb 2021 20:03:39 +1300 Subject: [PATCH 15/47] fixup FC0 and FC1 constant mapping (cherry picked from commit 45cd7f72f07a5528b83bdadb24b46a934a7bfc50) --- src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl index ab1111bf9..a0c7db56a 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -49,8 +49,8 @@ uniform const float4 c1_[8] : register(c8); uniform const float4 c_fog : register(c18); // Note : Maps to PSH_XBOX_CONSTANT_FOG, assigned to fog.rgb // Constant registers used only in final combiner stage (xfc 'opcode') : -uniform const float4 FC0 : register(c17); // Note : Maps to PSH_XBOX_CONSTANT_FC0, must be generated as argument to xfc instead of C0 -uniform const float4 FC1 : register(c18); // Note : Maps to PSH_XBOX_CONSTANT_FC1, must be generated as argument to xfc instead of C1 +uniform const float4 FC0 : register(c16); // Note : Maps to PSH_XBOX_CONSTANT_FC0, must be generated as argument to xfc instead of C0 +uniform const float4 FC1 : register(c17); // Note : Maps to PSH_XBOX_CONSTANT_FC1, must be generated as argument to xfc instead of C1 uniform const float4 BEM[4] : register(c19); // Note : PSH_XBOX_CONSTANT_BEM for 4 texture stages uniform const float4 LUM[4] : register(c23); // Note : PSH_XBOX_CONSTANT_LUM for 4 texture stages From 7f89e72b2bec12b7aec07a1dd60c796108028c1a Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 1 Feb 2021 22:53:32 +1300 Subject: [PATCH 16/47] fixup enable complement in final combiner (cherry picked from commit 71df0a3a5af94f550c2ecc25a47f78e54e74ee4a) --- src/core/hle/D3D8/Direct3D9/PixelShader.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp index 4229470f5..d3cd3244e 100644 --- a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp @@ -76,14 +76,14 @@ void InputRegisterHLSL(std::stringstream& hlsl, RPSInputRegister &input, unsigne // [*][0] = PS_REGISTER_ZERO-derived constants, based on enum PS_INPUTMAPPING : // [*][1] = Source register modifier macro's, based on enum PS_INPUTMAPPING : // [*][2] = Final combiner source register modifier macro's, based on enum PS_INPUTMAPPING : - "zero", "s_sat", "abs", // saturate(x) // PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, OK for final combiner // Clamps negative x to 0 - "one", "s_comp", "", // ( 1.0 - saturate(x) ) // PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, OK for final combiner // Complements x (1-x) - "-one", "s_bx2", "N/A", // ( 2.0 * max(0.0, x) - 1.0) // PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, invalid for final combiner // Shifts range from [0..1] to [-1..1] - "one", "s_negbx2", "N/A", // (-2.0 * max(0.0, x) + 1.0) // PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, invalid for final combiner // Shifts range from [0..1] to [-1..1] and then negates - "-half", "s_bias", "N/A", // (max(0.0, x) - 0.5) // PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, invalid for final combiner // Clamps negative x to 0 and then subtracts 0.5 - "half", "s_negbias", "N/A", // (-max(0.0, x) + 0.5) // PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, invalid for final combiner // Clamps negative x to 0, subtracts 0.5, and then negates - "zero", "s_ident", "N/A", // x // PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, invalid for final combiner // No modifier, x is passed without alteration - "zero", "s_neg", "N/A" // -x // PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, invalid for final combiner // Negate + "zero", "s_sat", "abs", // saturate(x) // PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, OK for final combiner // Clamps negative x to 0 + "one", "s_comp", "s_comp", // ( 1.0 - saturate(x) ) // PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, OK for final combiner // Complements x (1-x) + "-one", "s_bx2", "N/A", // ( 2.0 * max(0.0, x) - 1.0) // PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, invalid for final combiner // Shifts range from [0..1] to [-1..1] + "one", "s_negbx2", "N/A", // (-2.0 * max(0.0, x) + 1.0) // PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, invalid for final combiner // Shifts range from [0..1] to [-1..1] and then negates + "-half", "s_bias", "N/A", // (max(0.0, x) - 0.5) // PS_INPUTMAPPING_HALFBIAS_NORMAL= 0x80L, invalid for final combiner // Clamps negative x to 0 and then subtracts 0.5 + "half", "s_negbias", "N/A", // (-max(0.0, x) + 0.5) // PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, invalid for final combiner // Clamps negative x to 0, subtracts 0.5, and then negates + "zero", "s_ident", "N/A", // x // PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, invalid for final combiner // No modifier, x is passed without alteration + "zero", "s_neg", "N/A" // -x // PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, invalid for final combiner // Negate }; // Generate channel selector From b5cc970d7ee1ecfebfdf46fc4c6255b3b893aa9d Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Mon, 1 Feb 2021 13:20:26 +0100 Subject: [PATCH 17/47] Corrected fixed-function formula's in final combiner --- src/core/hle/D3D8/XbPixelShader.cpp | 46 ++++++++++++++++------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index 749618220..f092c9996 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -546,27 +546,31 @@ typedef struct s_CxbxPSDef { // Since we're HLE'ing Xbox D3D, mimick how it configures the final combiner when PSDef doesn't : // TODO : Use the same final combiner when no pixel shader is set! Possibly by generating a DecodedRegisterCombiner with PSCombinerCount zero? - if (RenderStateFogEnable) { - // Configure final combiner to perform this operation : - // if (X_D3DRS_SPECULARENABLE) r0.rgb = lerp(fog.rgb, r0.rgb + v1.rgb, fog.a); - // else r0.rgb = lerp(fog.rgb, r0.rgb , fog.a); - // r0.a = abs(r0.a); - RC.FinalCombiner.Input[0/*A*/].Reg = PS_REGISTER_FOG; - RC.FinalCombiner.Input[0/*A*/].Channel = PS_CHANNEL_ALPHA; - RC.FinalCombiner.Input[1/*B*/].Reg = RenderStateSpecularEnable ? PS_REGISTER_V1R0_SUM : PS_REGISTER_R0; - RC.FinalCombiner.Input[1/*B*/].Channel = PS_CHANNEL_RGB; // Note : Not really needed, should be 0 already - RC.FinalCombiner.Input[2/*C*/].Reg = PS_REGISTER_FOG; - RC.FinalCombiner.Input[2/*C*/].Channel = PS_CHANNEL_RGB; // Note : Not really needed, should be 0 already - RC.FinalCombiner.Input[6/*G*/].Reg = PS_REGISTER_R0; - RC.FinalCombiner.Input[6/*G*/].InputMapping = PS_INPUTMAPPING_UNSIGNED_IDENTITY; - RC.FinalCombiner.Input[6/*G*/].Channel = PS_CHANNEL_ALPHA; - } - else { - // if (X_D3DRS_SPECULARENABLE) r0.rgb = r0.rgb + v1.rgb; - // else r0.rgb = r0.rgb; - RC.FinalCombiner.Input[3/*D*/].Reg = RenderStateSpecularEnable ? PS_REGISTER_V1R0_SUM : PS_REGISTER_R0; - RC.FinalCombiner.Input[3/*D*/].Channel = PS_CHANNEL_RGB; // Note : Not really needed, should be 0 already - } + // (This forms the entire Xbox fixed function pixel pipeline, which uses only two renderstates : X_D3DRS_SPECULARENABLE and X_D3DRS_SPECULARENABLE.) + // + // If X_D3DRS_FOGENABLE, configure final combiner to perform this operation : + // if (X_D3DRS_SPECULARENABLE) r0.rgb = lerp(fog.rgb, r0.rgb, fog.a) + v1.rgb; + // else r0.rgb = lerp(fog.rgb, r0.rgb, fog.a); + // r0.a = abs(r0.a); + // Otherwise, if not X_D3DRS_FOGENABLE, configure final combiner to perform this operation : + // if (X_D3DRS_SPECULARENABLE) r0.rgb = r0.rgb + v1.rgb; + // else r0.rgb = r0.rgb; + // Remember : + // xfc.rgb = lerp(C, B, A) + D + // xfc.a = G.b + // Whereby A, B, C and G can use the two xfc-special purpose registers : + // V1R0 = V1 + R0 + // EFPROD = E * F + // ( Or in shorthand : sum=r0+v1, prod=s4*s5, r0.rgb=s0*s1+{1-s0}*s2+s3, r0.a=s6.b ) + RC.FinalCombiner.Input[0/*A*/].Channel = PS_CHANNEL_ALPHA; + RC.FinalCombiner.Input[0/*A*/].Reg = RenderStateFogEnable ? PS_REGISTER_FOG : PS_REGISTER_ZERO; + RC.FinalCombiner.Input[1/*B*/].Reg = RenderStateFogEnable ? PS_REGISTER_FOG : PS_REGISTER_ZERO; + RC.FinalCombiner.Input[2/*C*/].Reg = PS_REGISTER_R0; + RC.FinalCombiner.Input[3/*D*/].Reg = RenderStateSpecularEnable ? PS_REGISTER_V1 : PS_REGISTER_ZERO; + RC.FinalCombiner.Input[4/*E*/].Reg = PS_REGISTER_ZERO; // Note : Not really needed, should be 0 already + RC.FinalCombiner.Input[5/*F*/].Reg = PS_REGISTER_ZERO; // Note : Not really needed, should be 0 already + RC.FinalCombiner.Input[6/*G*/].Reg = PS_REGISTER_R0; + RC.FinalCombiner.Input[6/*G*/].Channel = PS_CHANNEL_ALPHA; } void PerformRuntimeAdjustments(DecodedRegisterCombiner &RC) From 55f3c60ef263e055d70157f02becc9f1329b9fee Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Mon, 1 Feb 2021 18:17:42 +0100 Subject: [PATCH 18/47] Include Pixel Shader HLSL in output artifacts (and fixed a few typos in FixedFunction HLSL) --- CMakeLists.txt | 2 ++ .../hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl | 11 ++++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 010f89ce1..d9e343970 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -131,6 +131,7 @@ file (GLOB CXBXR_HEADER_EMU "${CXBXR_ROOT_DIR}/src/core/common/imgui/settings.h" "${CXBXR_ROOT_DIR}/src/core/common/imgui/video.hpp" "${CXBXR_ROOT_DIR}/src/core/common/video/RenderBase.hpp" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.h" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" @@ -446,6 +447,7 @@ install(FILES ${cxbxr_INSTALL_files} ) install(FILES + "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl" "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli" "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" DESTINATION bin/hlsl diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl index f4df70870..5ae1bdb88 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl @@ -410,7 +410,7 @@ float DoPointSpriteSize() { const PointSprite ps = state.PointSprite; - const float pointSize = ps.PointSize; + float pointSize = ps.PointSize; const float A = ps.PointScaleABC.x; const float B = ps.PointScaleABC.y; const float C = ps.PointScaleABC.z; @@ -423,7 +423,7 @@ float DoPointSpriteSize() pointSize *= ps.XboxRenderTargetHeight * sqrt(1 / factor); } - return clamp(pointSize, ps.PointSizeMin, ps.PointSizeMax) * ps.RenderUpscaleFactor; + return clamp(pointSize, ps.PointSize_Min, ps.PointSize_Max) * ps.RenderUpscaleFactor; } VS_INPUT InitializeInputRegisters(const VS_INPUT xInput) @@ -444,16 +444,21 @@ VS_INPUT InitializeInputRegisters(const VS_INPUT xInput) if(i == normal) xIn.normal = value; if(i == diffuse) xIn.color[0] = value; if(i == specular) xIn.color[1] = value; + if(i == fogCoord) xIn.fogCoord = value.x; // Note : Untested + if(i == pointSize) xIn.pointSize = value.x; // Note : Untested if(i == backDiffuse) xIn.backColor[0] = value; if(i == backSpecular) xIn.backColor[1] = value; if(i == texcoord0) xIn.texcoord[0] = value; if(i == texcoord1) xIn.texcoord[1] = value; if(i == texcoord2) xIn.texcoord[2] = value; if(i == texcoord3) xIn.texcoord[3] = value; + if(i == reserved0) xIn.reserved[0] = value; // Note : Untested + if(i == reserved1) xIn.reserved[1] = value; // Note : Untested + if(i == reserved2) xIn.reserved[2] = value; // Note : Untested #endif } - return xIn; + return xIn; // Note : Untested setters are required to avoid "variable 'xIn' used without having been completely initialized" here } VS_OUTPUT main(const VS_INPUT xInput) From 7745515588f06a5492b48c9351c1c64b1621be33 Mon Sep 17 00:00:00 2001 From: Anthony Date: Thu, 4 Feb 2021 18:53:14 +1300 Subject: [PATCH 19/47] Fix front/back colour usage --- .../Direct3D9/CxbxPixelShaderTemplate.hlsl | 8 ++++++-- src/core/hle/D3D8/XbConvert.cpp | 4 ++-- src/core/hle/D3D8/XbPixelShader.cpp | 19 ++++++++++++++++++- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl index a0c7db56a..b69b93393 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -53,6 +53,7 @@ uniform const float4 FC0 : register(c16); // Note : Maps to PSH_XBOX_CONSTANT_FC uniform const float4 FC1 : register(c17); // Note : Maps to PSH_XBOX_CONSTANT_FC1, must be generated as argument to xfc instead of C1 uniform const float4 BEM[4] : register(c19); // Note : PSH_XBOX_CONSTANT_BEM for 4 texture stages uniform const float4 LUM[4] : register(c23); // Note : PSH_XBOX_CONSTANT_LUM for 4 texture stages +uniform const float FRONTFACE_FACTOR : register(c27); // Note : PSH_XBOX_CONSTANT_LUM for 4 texture stages #define CM_LT(c) if(c < 0) clip(-1); // = PS_COMPAREMODE_[RSTQ]_LT @@ -335,11 +336,14 @@ PS_OUTPUT main(const PS_INPUT xIn) float3 s; // Actual texture coordinate sampling coordinates (temporary) float4 v; // Texture value (temporary) + // Determine if this is a front face or backface + bool isFrontFace = (xIn.iFF * FRONTFACE_FACTOR) >= 0; + // Initialize variables r0 = r1 = black; // Note : r0.a/r1.a will be overwritten by t0.a/t1.a (opaque_black will be retained for PS_TEXTUREMODES_NONE) // Note : VFACE/FrontFace has been unreliable, investigate again if some test-case shows bland colors - v0 = xIn.iFF > 0 ? xIn.iD0 : xIn.iB0; // Diffuse front/back - v1 = xIn.iFF > 0 ? xIn.iD1 : xIn.iB1; // Specular front/back + v0 = isFrontFace ? xIn.iD0 : xIn.iB0; // Diffuse front/back + v1 = isFrontFace ? xIn.iD1 : xIn.iB1; // Specular front/back fog = float4(c_fog.rgb, xIn.iFog); // color from PSH_XBOX_CONSTANT_FOG, alpha from vertex shader output / pixel shader input // Xbox shader program diff --git a/src/core/hle/D3D8/XbConvert.cpp b/src/core/hle/D3D8/XbConvert.cpp index 36899b2d7..1ea57bd1b 100644 --- a/src/core/hle/D3D8/XbConvert.cpp +++ b/src/core/hle/D3D8/XbConvert.cpp @@ -1504,8 +1504,8 @@ const RenderStateInfo DxbxRenderStateInfo[1+xbox::X_D3DRS_DONOTCULLUNCOMPRESSED] { "D3DRS_VERTEXBLEND" /*= 137*/, 3424, xtD3DVERTEXBLENDFLAGS, NV2A_SKIN_MODE, D3DRS_VERTEXBLEND }, { "D3DRS_FOGCOLOR" /*= 138*/, 3424, xtD3DCOLOR, NV2A_FOG_COLOR, D3DRS_FOGCOLOR }, // SwapRgb { "D3DRS_FILLMODE" /*= 139*/, 3424, xtD3DFILLMODE, NV2A_POLYGON_MODE_FRONT, D3DRS_FILLMODE }, - { "D3DRS_BACKFILLMODE" /*= 140*/, 3424, xtD3DFILLMODE, 0 }, // nsp. - { "D3DRS_TWOSIDEDLIGHTING" /*= 141*/, 3424, xtBOOL, NV2A_POLYGON_MODE_BACK }, // nsp. + { "D3DRS_BACKFILLMODE" /*= 140*/, 3424, xtD3DFILLMODE, NV2A_POLYGON_MODE_BACK }, // nsp. + { "D3DRS_TWOSIDEDLIGHTING" /*= 141*/, 3424, xtBOOL, 0 }, // nsp. // FIXME map from NV2A_LIGHT_MODEL { "D3DRS_NORMALIZENORMALS" /*= 142*/, 3424, xtBOOL, NV2A_NORMALIZE_ENABLE, D3DRS_NORMALIZENORMALS }, { "D3DRS_ZENABLE" /*= 143*/, 3424, xtBOOL, NV2A_DEPTH_TEST_ENABLE, D3DRS_ZENABLE }, // D3DZBUFFERTYPE? { "D3DRS_STENCILENABLE" /*= 144*/, 3424, xtBOOL, NV2A_STENCIL_ENABLE, D3DRS_STENCILENABLE }, diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index f092c9996..e9e68a1e6 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -630,8 +630,10 @@ constexpr int PSH_XBOX_CONSTANT_FOG = PSH_XBOX_CONSTANT_FC1 + 1; // = 18 constexpr int PSH_XBOX_CONSTANT_BEM = PSH_XBOX_CONSTANT_FOG + 1; // = 19..22 // Bump map Luminance registers constexpr int PSH_XBOX_CONSTANT_LUM = PSH_XBOX_CONSTANT_BEM + 4; // = 23..26 +// Which winding order to consider as the front face +constexpr int PSH_XBOX_CONSTANT_FRONTFACE_FACTOR = PSH_XBOX_CONSTANT_LUM + 4; // = 27 // This concludes the set of constants that need to be set on host : -constexpr int PSH_XBOX_CONSTANT_MAX = PSH_XBOX_CONSTANT_LUM + 4; // = 27 +constexpr int PSH_XBOX_CONSTANT_MAX = PSH_XBOX_CONSTANT_FRONTFACE_FACTOR + 1; // = 28 void DxbxUpdateActivePixelShader() // NOPATCH { @@ -768,6 +770,21 @@ void DxbxUpdateActivePixelShader() // NOPATCH } #endif + // Control whether to use front or back diffuse/specular colours + // This factor should be multipled with VFACE + // Test cases: + // Amped (snowboard trails should use front colours, but use both CW and CCW winding) + // TwoSidedLighting sample + float frontfaceFactor = 0; // 0 == always use the front colours + if (XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_TWOSIDEDLIGHTING)) { + LOG_TEST_CASE("Two sided lighting"); + // VFACE is positive for clockwise faces + // If Xbox designates counter-clockwise as front-facing, we invert VFACE + auto cwFrontface = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FRONTFACE) == 0x900; // clockwise; + frontfaceFactor = cwFrontface ? 1 : -1; + } + fColor[PSH_XBOX_CONSTANT_FRONTFACE_FACTOR].r = frontfaceFactor; + // Assume all constants are in use (this is much easier than tracking them for no other purpose than to skip a few here) // Read the color from the corresponding render state slot : // Set all host constant values using a single call: From b565d39ffe81f116905c6b58ba36c8efff83c056 Mon Sep 17 00:00:00 2001 From: Anthony Date: Thu, 4 Feb 2021 20:27:32 +1300 Subject: [PATCH 20/47] fix abs instead of clamp --- src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl | 2 +- src/core/hle/D3D8/Direct3D9/PixelShader.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl index b69b93393..84ea398b4 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -24,7 +24,7 @@ struct PS_OUTPUT // Source register modifier macro's, based on enum PS_INPUTMAPPING : // TODO : Should all these 'max(0, x)' actually be 'saturate(x)'? This, because the operation may actually clamp the register value to the range [0..1] -#define s_sat(x) saturate(x) // PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // OK for final combiner // Clamps negative x to 0 // Was : max(0, x) +#define s_sat(x) saturate(x) // PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, // OK for final combiner // Clamps negative x to 0 // Was : max(0, x), then abs(x) (Test case: Scaler) #define s_comp(x) (1 - saturate(x)) // PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, // OK for final combiner // Complements x (1-x) // Was : 1- min(max(0, x), 1) #define s_bx2(x) (( 2 * max(0, x)) - 1) // PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, // invalid for final combiner // Shifts range from [0..1] to [-1..1] #define s_negbx2(x) ((-2 * max(0, x)) + 1) // PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, // invalid for final combiner // Shifts range from [0..1] to [-1..1] and then negates diff --git a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp index d3cd3244e..4a94c54fc 100644 --- a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp @@ -76,7 +76,7 @@ void InputRegisterHLSL(std::stringstream& hlsl, RPSInputRegister &input, unsigne // [*][0] = PS_REGISTER_ZERO-derived constants, based on enum PS_INPUTMAPPING : // [*][1] = Source register modifier macro's, based on enum PS_INPUTMAPPING : // [*][2] = Final combiner source register modifier macro's, based on enum PS_INPUTMAPPING : - "zero", "s_sat", "abs", // saturate(x) // PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, OK for final combiner // Clamps negative x to 0 + "zero", "s_sat", "s_sat", // saturate(x) // PS_INPUTMAPPING_UNSIGNED_IDENTITY= 0x00L, OK for final combiner // Clamps negative x to 0 "one", "s_comp", "s_comp", // ( 1.0 - saturate(x) ) // PS_INPUTMAPPING_UNSIGNED_INVERT= 0x20L, OK for final combiner // Complements x (1-x) "-one", "s_bx2", "N/A", // ( 2.0 * max(0.0, x) - 1.0) // PS_INPUTMAPPING_EXPAND_NORMAL= 0x40L, invalid for final combiner // Shifts range from [0..1] to [-1..1] "one", "s_negbx2", "N/A", // (-2.0 * max(0.0, x) + 1.0) // PS_INPUTMAPPING_EXPAND_NEGATE= 0x60L, invalid for final combiner // Shifts range from [0..1] to [-1..1] and then negates From 3c96e3236be41c8dd48a0a507a6286d845ec4841 Mon Sep 17 00:00:00 2001 From: Anthony Date: Sat, 6 Feb 2021 23:27:21 +1300 Subject: [PATCH 21/47] use vs_3_0 --- src/core/hle/D3D8/Direct3D9/VertexShader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/VertexShader.cpp b/src/core/hle/D3D8/Direct3D9/VertexShader.cpp index 21bb9582f..e4ebc6f75 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/VertexShader.cpp @@ -8,7 +8,7 @@ #include #include // std::stringstream -extern const char* g_vs_model = vs_model_2_a; +extern const char* g_vs_model = vs_model_3_0; // HLSL generation void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) From 6373ba58bbe01a5678260878208756bd85dc6a09 Mon Sep 17 00:00:00 2001 From: Anthony Date: Sat, 6 Feb 2021 23:27:52 +1300 Subject: [PATCH 22/47] fixup typo VXSH instead of PXSH --- src/core/hle/D3D8/Direct3D9/PixelShader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp index 4a94c54fc..ccee60037 100644 --- a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp @@ -25,7 +25,7 @@ // * // ****************************************************************** -#define LOG_PREFIX CXBXR_MODULE::VTXSH +#define LOG_PREFIX CXBXR_MODULE::PXSH #include // std::stringstream #include "Shader.h" // EmuCompileShader From fe6de7c6219d1458262ec42b2c6616a7ac5cade0 Mon Sep 17 00:00:00 2001 From: Anthony Date: Sun, 7 Feb 2021 00:49:39 +1300 Subject: [PATCH 23/47] Fix fog when xfc is undefined * flip fog factor lerp. fog factor 1 = no fog * ensure RC is zero initialized --- src/core/hle/D3D8/XbPixelShader.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index e9e68a1e6..43902cab3 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -544,6 +544,8 @@ typedef struct s_CxbxPSDef { { if (RC.hasFinalCombiner) return; + RC.hasFinalCombiner = true; + // Since we're HLE'ing Xbox D3D, mimick how it configures the final combiner when PSDef doesn't : // TODO : Use the same final combiner when no pixel shader is set! Possibly by generating a DecodedRegisterCombiner with PSCombinerCount zero? // (This forms the entire Xbox fixed function pixel pipeline, which uses only two renderstates : X_D3DRS_SPECULARENABLE and X_D3DRS_SPECULARENABLE.) @@ -563,9 +565,9 @@ typedef struct s_CxbxPSDef { // EFPROD = E * F // ( Or in shorthand : sum=r0+v1, prod=s4*s5, r0.rgb=s0*s1+{1-s0}*s2+s3, r0.a=s6.b ) RC.FinalCombiner.Input[0/*A*/].Channel = PS_CHANNEL_ALPHA; - RC.FinalCombiner.Input[0/*A*/].Reg = RenderStateFogEnable ? PS_REGISTER_FOG : PS_REGISTER_ZERO; - RC.FinalCombiner.Input[1/*B*/].Reg = RenderStateFogEnable ? PS_REGISTER_FOG : PS_REGISTER_ZERO; - RC.FinalCombiner.Input[2/*C*/].Reg = PS_REGISTER_R0; + RC.FinalCombiner.Input[0/*A*/].Reg = RenderStateFogEnable ? PS_REGISTER_FOG : PS_REGISTER_ONE; + RC.FinalCombiner.Input[1/*B*/].Reg = PS_REGISTER_R0; + RC.FinalCombiner.Input[2/*C*/].Reg = PS_REGISTER_FOG; RC.FinalCombiner.Input[3/*D*/].Reg = RenderStateSpecularEnable ? PS_REGISTER_V1 : PS_REGISTER_ZERO; RC.FinalCombiner.Input[4/*E*/].Reg = PS_REGISTER_ZERO; // Note : Not really needed, should be 0 already RC.FinalCombiner.Input[5/*F*/].Reg = PS_REGISTER_ZERO; // Note : Not really needed, should be 0 already @@ -592,7 +594,7 @@ typedef struct _PSH_RECOMPILED_SHADER { PSH_RECOMPILED_SHADER CxbxRecompilePixelShader(CxbxPSDef &CompletePSDef) { - DecodedRegisterCombiner RC; + DecodedRegisterCombiner RC = {}; RC.Decode(&(CompletePSDef.PSDef)); CompletePSDef.PerformRuntimeAdjustments(RC); From ad1652f0505cdf76a9f14b7d3f165e1e62463a10 Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 15 Feb 2021 21:53:11 +1300 Subject: [PATCH 24/47] Fix array index out-of-range crash Remove PS_REGISTER_ONE usage since its not a register, but a combination of PS_REGISTER_ZERO and an INPUT_MAPPING --- src/core/hle/D3D8/XbPixelShader.cpp | 5 +++-- src/core/hle/D3D8/XbPixelShader.h | 10 ++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index 43902cab3..44c40bc24 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -565,9 +565,10 @@ typedef struct s_CxbxPSDef { // EFPROD = E * F // ( Or in shorthand : sum=r0+v1, prod=s4*s5, r0.rgb=s0*s1+{1-s0}*s2+s3, r0.a=s6.b ) RC.FinalCombiner.Input[0/*A*/].Channel = PS_CHANNEL_ALPHA; - RC.FinalCombiner.Input[0/*A*/].Reg = RenderStateFogEnable ? PS_REGISTER_FOG : PS_REGISTER_ONE; + RC.FinalCombiner.Input[0/*A*/].Reg = PS_REGISTER_FOG; RC.FinalCombiner.Input[1/*B*/].Reg = PS_REGISTER_R0; - RC.FinalCombiner.Input[2/*C*/].Reg = PS_REGISTER_FOG; + // If fog is disabled, blend R0 with itself + RC.FinalCombiner.Input[2/*C*/].Reg = RenderStateFogEnable ? PS_REGISTER_FOG : PS_REGISTER_R0; RC.FinalCombiner.Input[3/*D*/].Reg = RenderStateSpecularEnable ? PS_REGISTER_V1 : PS_REGISTER_ZERO; RC.FinalCombiner.Input[4/*E*/].Reg = PS_REGISTER_ZERO; // Note : Not really needed, should be 0 already RC.FinalCombiner.Input[5/*F*/].Reg = PS_REGISTER_ZERO; // Note : Not really needed, should be 0 already diff --git a/src/core/hle/D3D8/XbPixelShader.h b/src/core/hle/D3D8/XbPixelShader.h index 4eadf29a8..3d9c2cd1b 100644 --- a/src/core/hle/D3D8/XbPixelShader.h +++ b/src/core/hle/D3D8/XbPixelShader.h @@ -304,10 +304,12 @@ enum PS_REGISTER PS_REGISTER_V1R0_SUM= 0x0eL, // r A.k.a. _REG_SPECLIT PS_REGISTER_EF_PROD= 0x0fL, // r A.k.a. _REG_EF_PROD - PS_REGISTER_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // 0x20 r OK for final combiner - PS_REGISTER_NEGATIVE_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL, // 0x40 r invalid for final combiner - PS_REGISTER_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NEGATE, // 0xa0 r invalid for final combiner - PS_REGISTER_NEGATIVE_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NORMAL, // 0x80 r invalid for final combiner + // These constant values can be represented as a combination of 0, and an input modifier + // But they're not registers + // PS_REGISTER_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // 0x20 r OK for final combiner + // PS_REGISTER_NEGATIVE_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_EXPAND_NORMAL, // 0x40 r invalid for final combiner + // PS_REGISTER_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NEGATE, // 0xa0 r invalid for final combiner + // PS_REGISTER_NEGATIVE_ONE_HALF= PS_REGISTER_ZERO | PS_INPUTMAPPING_HALFBIAS_NORMAL, // 0x80 r invalid for final combiner // Cxbx extension; Separate final combiner constant registers (values not encoded on NV2A, as outside of available bits range) : PS_REGISTER_FC0= 0x10, From 512502ce363512629288f294d5de09af0f2ce5f9 Mon Sep 17 00:00:00 2001 From: Anthony Date: Thu, 11 Mar 2021 21:53:42 +1300 Subject: [PATCH 25/47] Fix Azurik render issue Remove #ifndef blocks that were driven by the opposite define than they should have been Also provide the opposite flag as a comment, next to where the 'driver' define --- .../Direct3D9/CxbxPixelShaderTemplate.hlsl | 11 ------ src/core/hle/D3D8/Direct3D9/PixelShader.cpp | 34 +++++++++++++------ 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl index 84ea398b4..d29e59a89 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -93,17 +93,6 @@ uniform const float FRONTFACE_FACTOR : register(c27); // Note : PSH_XBOX_CONSTA // Second raw string : R"DELIMITER( -// Define defaults when their inverses are not defined (handy while compiler isn't yet providing these) : -#ifndef PS_COMBINERCOUNT_SAME_C0 - #define PS_COMBINERCOUNT_UNIQUE_C0 -#endif -#ifndef PS_COMBINERCOUNT_SAME_C1 - #define PS_COMBINERCOUNT_UNIQUE_C1 -#endif -#ifndef PS_COMBINERCOUNT_MUX_LSB - #define PS_COMBINERCOUNT_MUX_MSB -#endif - // PS_COMBINERCOUNT_UNIQUE_C0 steers whether for C0 to use stage-specific constants c0_0 .. c0_7, or c0_0 for all stages #ifdef PS_COMBINERCOUNT_UNIQUE_C0 #define C0 c0_[stage] // concatenate stage to form c0_0 .. c0_7 diff --git a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp index ccee60037..60df135ad 100644 --- a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp @@ -219,12 +219,24 @@ void FinalCombinerStageHlsl(std::stringstream& hlsl, RPSFinalCombiner& fc) hlsl << "\n " << opcode_comment[5][0] << "(" << arguments.str() << "); // " << opcode_comment[5][1]; } -void OutputDefine(std::stringstream& hlsl, std::string define_str, bool enabled) +void OutputDefineFlag(std::stringstream& hlsl, bool enabled, std::string_view define_enabled, std::string_view define_disabled = "") { - if (enabled) - hlsl << "\n#define " << define_str; - else - hlsl << "\n#undef " << define_str; + if (define_disabled.length() > 0) { + if (enabled) { + hlsl << "\n#define " << define_enabled << " // not " << define_disabled; + } + else { + hlsl << "\n#define " << define_disabled << " // not " << define_enabled; + } + } + else { + if (enabled) { + hlsl << "\n#define " << define_enabled; + } + else { + hlsl << "\n#undef " << define_enabled; + } + } } /* Disabled, until BumpDemo is fixed (which with this code, inadvertedly skips stage 1 and 2 dotproducts) : @@ -285,9 +297,9 @@ void BuildShader(DecodedRegisterCombiner* pShader, std::stringstream& hlsl) hlsl << "\n#define PS_COMBINERCOUNT " << pShader->NumberOfCombiners; if (pShader->NumberOfCombiners > 0) { - OutputDefine(hlsl, "PS_COMBINERCOUNT_UNIQUE_C0", pShader->CombinerHasUniqueC0); - OutputDefine(hlsl, "PS_COMBINERCOUNT_UNIQUE_C1", pShader->CombinerHasUniqueC1); - OutputDefine(hlsl, "PS_COMBINERCOUNT_MUX_MSB", pShader->CombinerMuxesOnMsb); + OutputDefineFlag(hlsl, pShader->CombinerHasUniqueC0, "PS_COMBINERCOUNT_UNIQUE_C0", "PS_COMBINERCOUNT_SAME_C0"); + OutputDefineFlag(hlsl, pShader->CombinerHasUniqueC1, "PS_COMBINERCOUNT_UNIQUE_C1", "PS_COMBINERCOUNT_SAME_C1"); + OutputDefineFlag(hlsl, pShader->CombinerMuxesOnMsb, "PS_COMBINERCOUNT_MUX_MSB", "PS_COMBINERCOUNT_MUX_LSB"); } for (unsigned i = 0; i < PSH_XBOX_MAX_T_REGISTER_COUNT; i++) { @@ -320,9 +332,9 @@ void BuildShader(DecodedRegisterCombiner* pShader, std::stringstream& hlsl) } if (pShader->hasFinalCombiner) { - OutputDefine(hlsl, "PS_FINALCOMBINERSETTING_COMPLEMENT_V1", pShader->FinalCombiner.ComplementV1); - OutputDefine(hlsl, "PS_FINALCOMBINERSETTING_COMPLEMENT_R0", pShader->FinalCombiner.ComplementR0); - OutputDefine(hlsl, "PS_FINALCOMBINERSETTING_CLAMP_SUM", pShader->FinalCombiner.ClampSum); + OutputDefineFlag(hlsl, pShader->FinalCombiner.ComplementV1, "PS_FINALCOMBINERSETTING_COMPLEMENT_V1"); + OutputDefineFlag(hlsl, pShader->FinalCombiner.ComplementR0, "PS_FINALCOMBINERSETTING_COMPLEMENT_R0"); + OutputDefineFlag(hlsl, pShader->FinalCombiner.ClampSum, "PS_FINALCOMBINERSETTING_CLAMP_SUM"); } hlsl << hlsl_template[1]; From 726d6ff4f31005b1e47fccf70afb6c522ea4678b Mon Sep 17 00:00:00 2001 From: Anthony Date: Sat, 13 Mar 2021 18:44:08 +1300 Subject: [PATCH 26/47] Improve scaling of depth buffer texture coordinates - GetZScale accepts a PixelContainer rather than a surface - Fix accidental call to CxbxGetPixelContainerDepth instead of GetZScale - Assume we should scale the z component for all depth buffers, not just linear ones --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 722bd13dc..a65d91eb4 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -4184,7 +4184,7 @@ void ValidateRenderTargetDimensions(DWORD HostRenderTarget_Width, DWORD HostRend } } -float GetZScaleForSurface(xbox::X_D3DSurface* pSurface) +float GetZScaleForPixelContainer(xbox::X_D3DPixelContainer* pSurface) { // If no surface was present, fallback to 1 if (pSurface == xbox::zeroptr) { @@ -7548,10 +7548,22 @@ void CxbxUpdateHostTextureScaling() *texCoordScale = { width, height, - (float)CxbxGetPixelContainerDepth(pXboxBaseTexture), + 1.0f, // TODO should this be mip levels for volume textures? 1.0f }; } + + // When a depth buffer is used as a texture + // We do 'Native Shadow Mapping' + // https://aras-p.info/texts/D3D9GPUHacks.html + // The z texture coordinate component holds a depth value, which needs to be normalized + // TODO implement handling for + // - X_D3DRS_SHADOWFUNC + // - X_D3DRS_POLYGONOFFSETZSLOPESCALE + // - X_D3DRS_POLYGONOFFSETZOFFSET + if (EmuXBFormatIsDepthBuffer(XboxFormat)) { + (*texCoordScale)[2] = (float)GetZScaleForPixelContainer(pXboxBaseTexture); + } } // Pass above determined texture scaling factors to our HLSL shader. // Note : CxbxVertexShaderTemplate.hlsl applies texture scaling on @@ -8427,7 +8439,7 @@ static void CxbxImpl_SetRenderTarget // The currenct depth stencil is always replaced by whats passed in here (even a null) g_pXbox_DepthStencil = pNewZStencil; - g_ZScale = GetZScaleForSurface(g_pXbox_DepthStencil); // TODO : Discern between Xbox and host and do this in UpdateDepthStencilFlags? + g_ZScale = GetZScaleForPixelContainer(g_pXbox_DepthStencil); // TODO : Discern between Xbox and host and do this in UpdateDepthStencilFlags? pHostDepthStencil = GetHostSurface(g_pXbox_DepthStencil, D3DUSAGE_DEPTHSTENCIL); HRESULT hRet; From 90bdeac9047f13ffc41b820c7fd6834bb362e541 Mon Sep 17 00:00:00 2001 From: medievil1 <38597905+medievil1@users.noreply.github.com> Date: Wed, 17 Mar 2021 18:54:18 -0400 Subject: [PATCH 27/47] fix up fog mode (#2163) fix up fog mode --- .../Direct3D9/CxbxVertexShaderTemplate.hlsl | 28 +++++++++++++++++-- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 8 ++++++ src/core/hle/D3D8/XbD3D8Types.h | 4 +-- 3 files changed, 36 insertions(+), 4 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl index 9b0e326c1..ebaca85f2 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl @@ -28,6 +28,9 @@ struct VS_OUTPUT // Declared identical to pixel shader input (see PS_INPUT) // Xbox constant registers uniform float4 C[X_D3DVS_CONSTREG_COUNT] : register(c0); +// Parameters for mapping the shader's fog output value to a fog factor +uniform float4 CxbxFogInfo: register(c218); // = CXBX_D3DVS_CONSTREG_FOGINFO + // Default values for vertex registers, and whether to use them uniform float4 vRegisterDefaultValues[16] : register(c192); uniform float4 vRegisterDefaultFlagsPacked[4] : register(c208); @@ -324,12 +327,33 @@ VS_OUTPUT main(const VS_INPUT xIn) R"DELIMITER( // Copy variables to output struct - VS_OUTPUT xOut; + VS_OUTPUT xOut; + const float fogDepth = abs(oFog.x); + const float fogTableMode = CxbxFogInfo.x; + const float fogDensity = CxbxFogInfo.y; + const float fogStart = CxbxFogInfo.z; + const float fogEnd = CxbxFogInfo.w; + + const float FOG_TABLE_NONE = 0; + const float FOG_TABLE_EXP = 1; + const float FOG_TABLE_EXP2 = 2; + const float FOG_TABLE_LINEAR = 3; + + float fogFactor; + if(fogTableMode == FOG_TABLE_NONE) + fogFactor = fogDepth; + if(fogTableMode == FOG_TABLE_EXP) + fogFactor = 1 / exp(fogDepth * fogDensity); /* / 1 / e^(d * density)*/ + if(fogTableMode == FOG_TABLE_EXP2) + fogFactor = 1 / exp(pow(fogDepth * fogDensity, 2)); /* / 1 / e^((d * density)^2)*/ + if(fogTableMode == FOG_TABLE_LINEAR) + fogFactor = (fogEnd - fogDepth) / (fogEnd - fogStart) ; + xOut.oPos = reverseScreenspaceTransform(oPos); xOut.oD0 = saturate(oD0); xOut.oD1 = saturate(oD1); - xOut.oFog = oFog.x; // Note : Xbox clamps fog in pixel shader + xOut.oFog = fogFactor; // Note : Xbox clamps fog in pixel shader -> *NEEDS TESTING* /was oFog.x xOut.oPts = oPts.x; xOut.oB0 = saturate(oB0); xOut.oB1 = saturate(oB1); diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index a65d91eb4..982ce4861 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -7650,6 +7650,14 @@ void CxbxUpdateHostVertexShaderConstants() // Need for Speed: Hot Pursuit 2 (car select) CxbxUpdateHostViewPortOffsetAndScaleConstants(); } + + // Placed this here until we find a better place + const uint32_t fogTableMode = XboxRenderStates.GetXboxRenderState(xbox::_X_D3DRENDERSTATETYPE::X_D3DRS_FOGTABLEMODE); + const float fogDensity = XboxRenderStates.GetXboxRenderStateAsFloat(xbox::_X_D3DRENDERSTATETYPE::X_D3DRS_FOGDENSITY); + const float fogStart = XboxRenderStates.GetXboxRenderStateAsFloat(xbox::_X_D3DRENDERSTATETYPE::X_D3DRS_FOGSTART); + const float fogEnd = XboxRenderStates.GetXboxRenderStateAsFloat(xbox::_X_D3DRENDERSTATETYPE::X_D3DRS_FOGEND); + float fogStuff[4] = { (float)fogTableMode, fogDensity, fogStart, fogEnd }; + g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_FOGINFO, fogStuff, 1); } void CxbxUpdateHostViewport() { diff --git a/src/core/hle/D3D8/XbD3D8Types.h b/src/core/hle/D3D8/XbD3D8Types.h index c5d5dc88d..33040d5c0 100644 --- a/src/core/hle/D3D8/XbD3D8Types.h +++ b/src/core/hle/D3D8/XbD3D8Types.h @@ -1039,10 +1039,8 @@ typedef DWORD X_VERTEXSHADERCONSTANTMODE; // TODO co-locate shader workaround constants with shader code #define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_BASE (X_D3DVS_CONSTREG_COUNT) #define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_SIZE 16 - #define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_BASE (CXBX_D3DVS_CONSTREG_VREGDEFAULTS_BASE + CXBX_D3DVS_CONSTREG_VREGDEFAULTS_SIZE) #define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_SIZE 4 - #define CXBX_D3DVS_SCREENSPACE_SCALE_BASE (CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_BASE + CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_SIZE) #define CXBX_D3DVS_NORMALIZE_SCALE_SIZE 1 @@ -1052,6 +1050,8 @@ typedef DWORD X_VERTEXSHADERCONSTANTMODE; #define CXBX_D3DVS_TEXTURES_SCALE_BASE (CXBX_D3DVS_SCREENSPACE_OFFSET_BASE + CXBX_D3DVS_NORMALIZE_OFFSET_SIZE) #define CXBX_D3DVS_TEXTURES_SCALE_SIZE 4 +#define CXBX_D3DVS_CONSTREG_FOGINFO (CXBX_D3DVS_TEXTURES_SCALE_BASE + CXBX_D3DVS_TEXTURES_SCALE_SIZE) + #define X_D3DSCM_RESERVED_CONSTANT_SCALE_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_SCALE + X_D3DSCM_CORRECTION) #define X_D3DSCM_RESERVED_CONSTANT_OFFSET_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_OFFSET + X_D3DSCM_CORRECTION) From 03200fdcd3e745e567abfd23acb474ba13e5fca4 Mon Sep 17 00:00:00 2001 From: Anthony Date: Sat, 20 Mar 2021 00:15:06 +1300 Subject: [PATCH 28/47] Re-enable texture stage swap hack --- src/core/hle/D3D8/Direct3D9/TextureStates.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/TextureStates.cpp b/src/core/hle/D3D8/Direct3D9/TextureStates.cpp index 37fb14ded..1aea580ef 100644 --- a/src/core/hle/D3D8/Direct3D9/TextureStates.cpp +++ b/src/core/hle/D3D8/Direct3D9/TextureStates.cpp @@ -166,13 +166,12 @@ void XboxTextureStateConverter::Apply() // Track if we need to overwrite state 0 with 3 because of Point Sprites // The Xbox NV2A uses only Stage 3 for point-sprites, so we emulate this // by mapping Stage 3 to Stage 0, and disabling all stages > 0 + // TODO use stage 3 when we roll our own point sprites after moving off D3D9 bool pointSpriteOverride = false; bool pointSpritesEnabled = false; - if (g_Xbox_VertexShaderMode == VertexShaderMode::FixedFunction && g_UseFixedFunctionVertexShader) { - pointSpritesEnabled = pXboxRenderStates->GetXboxRenderState(xbox::X_D3DRS_POINTSPRITEENABLE); - if (pointSpritesEnabled) { - pointSpriteOverride = true; - } + pointSpritesEnabled = pXboxRenderStates->GetXboxRenderState(xbox::X_D3DRS_POINTSPRITEENABLE); + if (pointSpritesEnabled) { + pointSpriteOverride = true; } for (int XboxStage = 0; XboxStage < xbox::X_D3DTS_STAGECOUNT; XboxStage++) { From 6f2460c70a446c182ee88983e3949866e9a9c4aa Mon Sep 17 00:00:00 2001 From: Anthony Date: Sun, 21 Mar 2021 23:16:44 +1300 Subject: [PATCH 29/47] Fixed function pixel shader Placeholder shader until we are able to directly load the pixel shader program used on the Xbox A shader is generated for each texture op and argument combination --- CMakeLists.txt | 4 + projects/misc/batch.cmake | 2 + src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 10 +- .../Direct3D9/FixedFunctionPixelShader.hlsl | 288 ++++++++++++++++ .../Direct3D9/FixedFunctionPixelShader.hlsli | 141 ++++++++ src/core/hle/D3D8/XbPixelShader.cpp | 312 +++++++++++++++++- src/core/hle/D3D8/XbPixelShader.h | 1 + 7 files changed, 749 insertions(+), 9 deletions(-) create mode 100644 src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl create mode 100644 src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli diff --git a/CMakeLists.txt b/CMakeLists.txt index d9e343970..901dcbac3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -134,6 +134,8 @@ file (GLOB CXBXR_HEADER_EMU "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/Direct3D9.h" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl" + "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli" "${CXBXR_ROOT_DIR}/src/core/hle/D3D8/Direct3D9/PixelShader.h" @@ -448,6 +450,8 @@ install(FILES ${cxbxr_INSTALL_files} install(FILES "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl" + "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl" + "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli" "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli" "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" DESTINATION bin/hlsl diff --git a/projects/misc/batch.cmake b/projects/misc/batch.cmake index e29fafe0d..9f1f15550 100644 --- a/projects/misc/batch.cmake +++ b/projects/misc/batch.cmake @@ -32,6 +32,8 @@ file(COPY ${CXBXR_GLEW_DLL} DESTINATION ${TargetRunTimeDir}) set(CXBXR_HLSL_FILES "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShaderState.hlsli" "${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionVertexShader.hlsl" +"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli" +"${CMAKE_SOURCE_DIR}/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl" ) set(HlslOutputDir ${TargetRunTimeDir}/hlsl) file(MAKE_DIRECTORY ${HlslOutputDir}) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 982ce4861..4b7a6a55d 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -1980,7 +1980,7 @@ static LRESULT WINAPI EmuMsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lPar } else if (wParam == VK_F2) { - g_UseFixedFunctionVertexShader = !g_UseFixedFunctionVertexShader; + g_UseFixedFunctionPixelShader = !g_UseFixedFunctionPixelShader; } else if (wParam == VK_F3) { @@ -6406,11 +6406,6 @@ void UpdateFixedFunctionShaderLight(int d3dLightIndex, Light* pShaderLight, D3DX pShaderLight->SpotIntensityDivisor = cos(d3dLight->Theta / 2) - cos(d3dLight->Phi / 2); } -float AsFloat(uint32_t value) { - auto v = value; - return *(float*)&v; -} - void UpdateFixedFunctionVertexShaderState() { extern xbox::X_VERTEXATTRIBUTEFORMAT* GetXboxVertexAttributeFormat(); // TMP glue @@ -6498,8 +6493,7 @@ void UpdateFixedFunctionVertexShaderState() // FIXME remove when fixed function PS is implemented // Note if we are using the fixed function pixel shader // We only want to produce the fog depth value in the VS, not the fog factor - auto psIsFixedFunction = g_pXbox_PixelShader == nullptr; - ffShaderState.Fog.TableMode = psIsFixedFunction ? D3DFOG_NONE : fogTableMode; + ffShaderState.Fog.TableMode = !g_UseFixedFunctionPixelShader ? D3DFOG_NONE : fogTableMode; // Determine how fog depth is calculated if (fogEnable && fogTableMode != D3DFOG_NONE) { diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl new file mode 100644 index 000000000..6e54a6966 --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl @@ -0,0 +1,288 @@ +#include "FixedFunctionPixelShader.hlsli" + +uniform FixedFunctionPixelShaderState state : register(c0); +sampler samplers[4] : register(s0); + +struct PS_INPUT // Declared identical to vertex shader output (see VS_OUTPUT) +{ + float2 iPos : VPOS; // Screen space x,y pixel location + float4 iD0 : COLOR0; // Front-facing primary (diffuse) vertex color (clamped to 0..1) + float4 iD1 : COLOR1; // Front-facing secondary (specular) vertex color (clamped to 0..1) + float iFog : FOG; + float iPts : PSIZE; + float4 iB0 : TEXCOORD4; // Back-facing primary (diffuse) vertex color (clamped to 0..1) + float4 iB1 : TEXCOORD5; // Back-facing secondary (specular) vertex color (clamped to 0..1) + float4 iT[4] : TEXCOORD0; // Texture Coord 0 + float iFF : VFACE; // Front facing if > 0 +}; + +// These 'D3DTA' texture argument values +// may be used during each texture stage +struct TextureArgs { + float4 CURRENT; + float4 TEXTURE; + float4 DIFFUSE; + float4 SPECULAR; + float4 TEMP; + float4 TFACTOR; +}; + +static float4 TexCoords[4]; + +// When creating an instance of the fixed function shader +// we string-replace the assignment below with a value +// The define keeps the shader compilable without the replacement +#define TEXTURE_SAMPLE_TYPE {SAMPLE_2D, SAMPLE_2D, SAMPLE_2D, SAMPLE_2D}; +static int TextureSampleType[4] = TEXTURE_SAMPLE_TYPE; + +bool HasFlag(float value, float flag) { + // http://theinstructionlimit.com/encoding-boolean-flags-into-a-float-in-hlsl + return fmod(value, flag) >= flag / 2; +} + +float4 GetArg(float arg, TextureArgs ctx) { + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dta + bool alphaReplicate = HasFlag(arg, X_D3DTA_ALPHAREPLICATE); + bool complement = HasFlag(arg, X_D3DTA_COMPLEMENT); + arg = arg % 16; + + float4 o; + + if (arg == X_D3DTA_DIFFUSE) + o = ctx.DIFFUSE; + if (arg == X_D3DTA_CURRENT) + o = ctx.CURRENT; + if (arg == X_D3DTA_TEXTURE) + o = ctx.TEXTURE; + if (arg == X_D3DTA_TFACTOR) + o = ctx.TFACTOR; + if (arg == X_D3DTA_SPECULAR) + o = ctx.SPECULAR; + if (arg == X_D3DTA_TEMP) + o = ctx.TEMP; + + if (alphaReplicate) + return o.aaaa; + else if (complement) + return 1 - o; + else + return o; +} + +float4 ExecuteTextureOp(float op, float4 arg1, float4 arg2, float4 arg0, TextureArgs ctx, PsTextureStageState stage) { + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtextureop + + // Note if we use ifs here instead of else if + // D3DCompile may stackoverflow at runtime + + // X_D3DTOP_DISABLE can only be reached by ALPHAOP + // It's documented as undefined behaviour + // Test case: DoA:Xtreme menu + if (op == X_D3DTOP_DISABLE) + return ctx.CURRENT; + else if (op == X_D3DTOP_SELECTARG1) + return arg1; + else if (op == X_D3DTOP_SELECTARG2) + return arg2; + else if (op == X_D3DTOP_MODULATE) + return arg1 * arg2; + else if (op == X_D3DTOP_MODULATE2X) + return 2 * (arg1 * arg2); + else if (op == X_D3DTOP_MODULATE4X) + return 4 * (arg1 * arg2); + else if (op == X_D3DTOP_ADD) + return arg1 + arg2; + else if (op == X_D3DTOP_ADDSIGNED) + return arg1 + arg2 - 0.5; + else if (op == X_D3DTOP_ADDSIGNED2X) + return 2 * (arg1 + arg2 - 0.5); + else if (op == X_D3DTOP_SUBTRACT) + return arg1 - arg2; + else if (op == X_D3DTOP_ADDSMOOTH) + return arg1 + arg2 * (1 - arg1); + else if (op == X_D3DTOP_BLENDDIFFUSEALPHA) + return arg1 * ctx.DIFFUSE.a + arg2 * (1 - ctx.DIFFUSE.a); + else if (op == X_D3DTOP_BLENDCURRENTALPHA) + return arg1 * ctx.CURRENT.a + arg2 * (1 - ctx.CURRENT.a); + else if (op == X_D3DTOP_BLENDTEXTUREALPHA) + return arg1 * ctx.TEXTURE.a + arg2 * (1 - ctx.TEXTURE.a); + else if (op == X_D3DTOP_BLENDFACTORALPHA) + return arg1 * ctx.TFACTOR.a + arg2 * (1 - ctx.TFACTOR.a); + else if (op == X_D3DTOP_BLENDTEXTUREALPHAPM) + return arg1 + arg2 * (1 - ctx.TEXTURE.a); + else if (op == X_D3DTOP_PREMODULATE) + return arg1; // Note this also multiplies the next stage's CURRENT by its texture + else if (op == X_D3DTOP_MODULATEALPHA_ADDCOLOR) + return float4(arg1.rgb + arg1.a * arg2.rgb, 1); + else if (op == X_D3DTOP_MODULATECOLOR_ADDALPHA) + return float4(arg1.rgb * arg2.rgb + arg1.a, 1); + else if (op == X_D3DTOP_MODULATEINVALPHA_ADDCOLOR) + return float4((1 - arg1.a) * arg2.rgb + arg1.rgb, 1); + else if (op == X_D3DTOP_MODULATEINVCOLOR_ADDALPHA) + return float4((1 - arg1.rgb) * arg2.rgb + arg1.a, 1); + else if (op == X_D3DTOP_DOTPRODUCT3) + return dot(arg1.rgb, arg2.rgb).rrrr; + // Note arg0 below is arg1 in D3D docs + // since it becomes the first argument for operations supporting 3 arguments... + else if (op == X_D3DTOP_MULTIPLYADD) + return arg0 + arg1 * arg2; + else if (op == X_D3DTOP_LERP) + return arg0 * arg1 + (1 - arg0) * arg2; + else if (op == X_D3DTOP_BUMPENVMAP) + return float4( + arg1.x * stage.BUMPENVMAT00 + arg1.y * stage.BUMPENVMAT10, + arg1.x * stage.BUMPENVMAT01 + arg1.y * stage.BUMPENVMAT11, + 1, 1); + else if (op == X_D3DTOP_BUMPENVMAPLUMINANCE) + return float4( + arg1.x * stage.BUMPENVMAT00 + arg1.y * stage.BUMPENVMAT10, + arg1.x * stage.BUMPENVMAT01 + arg1.y * stage.BUMPENVMAT11, + arg1.z * stage.BUMPENVLSCALE + stage.BUMPENVLOFFSET, + 1); + + // Something is amiss... we should have returned by now! + // Return a bright colour + return float4(0, 1, 1, 1); +} + +TextureArgs ExecuteTextureStage( + int i, + TextureArgs ctx, + PsTextureHardcodedState s, + int previousOp +) +{ + // Early exit if this stage is disabled (and therefore all further stages are too) + if (s.COLOROP == X_D3DTOP_DISABLE) + return ctx; + + PsTextureStageState stage = state.stages[i]; + + // Determine the texture for this stage + float3 offset = float3(0, 0, 0); + float4 factor = float4(1, 1, 1, 1); + + // Bumpmap special case + if (previousOp == X_D3DTOP_BUMPENVMAP || + previousOp == X_D3DTOP_BUMPENVMAPLUMINANCE) { + // Assume U, V, L is in CURRENT + // Add U', V', to the texture coordinates + // And multiply by L' + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/bump-mapping-formulas + offset.xy = ctx.CURRENT.xy; + factor.rgb = ctx.CURRENT.z; + } + + // Sample the texture + float4 t = float4(1, 1, 1, 1); + if (stage.IsTextureSet) { + int type = TextureSampleType[i]; + if (type == SAMPLE_2D) + t = tex2D(samplers[i], TexCoords[i].xy + offset.xy); + else if (type == SAMPLE_3D) + t = tex3D(samplers[i], TexCoords[i].xyz + offset.xyz); + else if (type == SAMPLE_CUBE) + t = texCUBE(samplers[i], TexCoords[i].xyz + offset.xyz); + } + + // Assign the final value for TEXTURE + ctx.TEXTURE = t * factor; + + // Premodulate special case + if (previousOp == X_D3DTOP_PREMODULATE) { + ctx.CURRENT *= ctx.TEXTURE; + } + + // Get arguments for the texture operation + // Almost all operate on 2 arguments, Arg1 and Arg2 + // Arg0 is a third argument that seems to have been tacked on + // for MULTIPLYADD and LERP + + // Colour operation arguments + float4 cArg1 = GetArg(s.COLORARG1, ctx); + float4 cArg2 = GetArg(s.COLORARG2, ctx); + float4 cArg0 = GetArg(s.COLORARG0, ctx); + + // Alpha operation arguments + float4 aArg1 = GetArg(s.ALPHAARG1, ctx); + float4 aArg2 = GetArg(s.ALPHAARG2, ctx); + float4 aArg0 = GetArg(s.ALPHAARG0, ctx); + + // Execute texture operation + float4 value; + value.rgb = ExecuteTextureOp(s.COLOROP, cArg1, cArg2, cArg0, ctx, stage).rgb; + value.a = ExecuteTextureOp(s.ALPHAOP, aArg1, aArg2, aArg0, ctx, stage).a; + + // Save the result + // Note RESULTARG should either be CURRENT or TEMP + // But some titles seem to set it to DIFFUSE + // Use CURRENT for anything other than TEMP + // Test case: DoA 3 + if (s.RESULTARG == X_D3DTA_TEMP) + ctx.TEMP = value; + else + ctx.CURRENT = value; + + return ctx; +} + +float4 main(const PS_INPUT input) : COLOR { + + TexCoords = input.iT; + + // Each stage is passed and returns + // a set of texture arguments + // And will usually update the CURRENT value + TextureArgs ctx; + + // The CURRENT register + // Default to the diffuse value + // TODO determine whether to use the front or back colours + // and set them here + ctx.CURRENT = input.iD0; + ctx.DIFFUSE = input.iD0; + ctx.SPECULAR = input.iD1; + // The TEMP register + // Default to 0 + ctx.TEMP = float4(0, 0, 0, 0); + ctx.TFACTOR = state.TextureFactor; + + PsTextureHardcodedState stages[4]; + stages[0].COLOROP = X_D3DTOP_DISABLE; + stages[1].COLOROP = X_D3DTOP_DISABLE; + stages[2].COLOROP = X_D3DTOP_DISABLE; + stages[3].COLOROP = X_D3DTOP_DISABLE; + + // Define stages + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtexturestagestatetype + // We'll find comment below and insert the definitions after it + // STAGE DEFINITIONS + // END STAGE DEFINITIONS + + // Run each stage + int previousOp = -1; + for (int i = 0; i < 4; i++) { + + ctx = ExecuteTextureStage( + i, + ctx, + stages[i], + previousOp + ); + + previousOp = stages[i].COLOROP; + } + + // Add fog if enabled + if (state.FogEnable) { + ctx.CURRENT.rgb = lerp(state.FogColor.rgb, ctx.CURRENT.rgb, saturate(input.iFog)); + } + + // Add specular if enabled + if (state.SpecularEnable) { + ctx.CURRENT.rgb += ctx.SPECULAR.rgb; + } + + // Output whatever is in current at the end + return ctx.CURRENT; +} diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli new file mode 100644 index 000000000..af86b040e --- /dev/null +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli @@ -0,0 +1,141 @@ +// C++ / HLSL shared state block for fixed function support +#ifdef __cplusplus +#pragma once + +#include +#include // for D3DFORMAT, D3DLIGHT9, etc +#include // for D3DXVECTOR4, etc +#include + +#define float4x4 D3DMATRIX +#define float4 D3DXVECTOR4 +#define float3 D3DVECTOR +#define float2 D3DXVECTOR2 +#define arr(name, type, length) std::array name + +#else +// HLSL +#define arr(name, type, length) type name[length] +#define alignas(x) +#define const static +#endif // __cplusplus + +#ifdef __cplusplus +namespace FixedFunctionPixelShader { +#endif + // From X_D3DTOP + const float X_D3DTOP_DISABLE = 1; + const float X_D3DTOP_SELECTARG1 = 2; + const float X_D3DTOP_SELECTARG2 = 3; + const float X_D3DTOP_MODULATE = 4; + const float X_D3DTOP_MODULATE2X = 5; + const float X_D3DTOP_MODULATE4X = 6; + const float X_D3DTOP_ADD = 7; + const float X_D3DTOP_ADDSIGNED = 8; + const float X_D3DTOP_ADDSIGNED2X = 9; + const float X_D3DTOP_SUBTRACT = 10; + const float X_D3DTOP_ADDSMOOTH = 11; + const float X_D3DTOP_BLENDDIFFUSEALPHA = 12; + const float X_D3DTOP_BLENDCURRENTALPHA = 13; + const float X_D3DTOP_BLENDTEXTUREALPHA = 14; + const float X_D3DTOP_BLENDFACTORALPHA = 15; + const float X_D3DTOP_BLENDTEXTUREALPHAPM = 16; + const float X_D3DTOP_PREMODULATE = 17; + const float X_D3DTOP_MODULATEALPHA_ADDCOLOR = 18; + const float X_D3DTOP_MODULATECOLOR_ADDALPHA = 19; + const float X_D3DTOP_MODULATEINVALPHA_ADDCOLOR = 20; + const float X_D3DTOP_MODULATEINVCOLOR_ADDALPHA = 21; + const float X_D3DTOP_DOTPRODUCT3 = 22; + const float X_D3DTOP_MULTIPLYADD = 23; + const float X_D3DTOP_LERP = 24; + const float X_D3DTOP_BUMPENVMAP = 25; + const float X_D3DTOP_BUMPENVMAPLUMINANCE = 26; + + // D3DTA taken from D3D9 - we don't have Xbox definitions + // for these so I guess they're the same? + const float X_D3DTA_DIFFUSE = 0x00000000; // select diffuse color (read only) + const float X_D3DTA_CURRENT = 0x00000001; // select stage destination register (read/write) + const float X_D3DTA_TEXTURE = 0x00000002; // select texture color (read only) + const float X_D3DTA_TFACTOR = 0x00000003; // select D3DRS_TEXTUREFACTOR (read only) + const float X_D3DTA_SPECULAR = 0x00000004; // select specular color (read only) + const float X_D3DTA_TEMP = 0x00000005; // select temporary register color (read/write) + const float X_D3DTA_CONSTANT = 0x00000006; // select texture stage constant + const float X_D3DTA_COMPLEMENT = 0x00000010; // take 1.0 - x (read modifier) + const float X_D3DTA_ALPHAREPLICATE = 0x00000020; // replicate alpha to color components (read modifier) + + const int SAMPLE_2D = 0; + const int SAMPLE_3D = 1; + const int SAMPLE_CUBE = 2; + + // This state is passed to the shader + struct PsTextureStageState { + // Values correspond to XD3D8 version of D3DTEXTURESTAGESTATETYPE + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtexturestagestatetype + + /* Samplers for now are configured elsewhere already + constexpr DWORD X_D3DTSS_ADDRESSU = 0; + constexpr DWORD X_D3DTSS_ADDRESSV = 1; + constexpr DWORD X_D3DTSS_ADDRESSW = 2; + constexpr DWORD X_D3DTSS_MAGFILTER = 3; + constexpr DWORD X_D3DTSS_MINFILTER = 4; + constexpr DWORD X_D3DTSS_MIPFILTER = 5; + constexpr DWORD X_D3DTSS_MIPMAPLODBIAS = 6; + constexpr DWORD X_D3DTSS_MAXMIPLEVEL = 7; + constexpr DWORD X_D3DTSS_MAXANISOTROPY = 8; + */ + + alignas(16) float COLORKEYOP; // Unimplemented Xbox extension! + alignas(16) float COLORSIGN; // Unimplemented Xbox extension! + alignas(16) float ALPHAKILL; // Unimplemented Xbox extension! + // TEXTURETRANSFORMFLAGS handled by the VS + alignas(16) float BUMPENVMAT00; + alignas(16) float BUMPENVMAT01; + alignas(16) float BUMPENVMAT11; + alignas(16) float BUMPENVMAT10; + alignas(16) float BUMPENVLSCALE; + alignas(16) float BUMPENVLOFFSET; + // TEXCOORDINDEX handled by the VS + // BORDERCOLOR set on sampler + alignas(16) float COLORKEYCOLOR; // Unimplemented Xbox extension! + + // Misc properties + alignas(16) float IsTextureSet; + }; + + // This state is compiled into the shader + // Values correspond to XD3D8 version of D3DTEXTURESTAGESTATETYPE + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dtexturestagestatetype + struct PsTextureHardcodedState { + alignas(16) float COLOROP; + alignas(16) float COLORARG0; + alignas(16) float COLORARG1; + alignas(16) float COLORARG2; + alignas(16) float ALPHAOP; + alignas(16) float ALPHAARG0; + alignas(16) float ALPHAARG1; + alignas(16) float ALPHAARG2; + alignas(16) float RESULTARG; + }; + + struct FixedFunctionPixelShaderState { + alignas(16) arr(stages, PsTextureStageState, 4); + alignas(16) float4 TextureFactor; + alignas(16) float SpecularEnable; + alignas(16) float FogEnable; + alignas(16) float3 FogColor; + }; +#ifdef __cplusplus +} // FixedFunctionPixelShader namespace +#endif + +#ifdef __cplusplus +#undef float4x4 +#undef float4 +#undef float3 +#undef float2 +#undef arr +#else // HLSL +#undef arr +#undef alignas +#undef const +#endif // __cplusplus diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index 44c40bc24..3d3727a58 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -44,10 +44,14 @@ #include "core\hle\D3D8\XbD3D8Logging.h" // For D3DErrorString() #include "core\kernel\init\CxbxKrnl.h" // For CxbxKrnlCleanup() +#include "util\hasher.h" +#include "core\hle\D3D8\Direct3D9\FixedFunctionPixelShader.hlsli" #include // assert() #include #include +#include +#include #include "Direct3D9\RenderStates.h" // For XboxRenderStateConverter #include "Direct3D9\TextureStates.h" // For XboxTextureStateConverter @@ -638,6 +642,306 @@ constexpr int PSH_XBOX_CONSTANT_FRONTFACE_FACTOR = PSH_XBOX_CONSTANT_LUM + 4; // // This concludes the set of constants that need to be set on host : constexpr int PSH_XBOX_CONSTANT_MAX = PSH_XBOX_CONSTANT_FRONTFACE_FACTOR + 1; // = 28 +std::string GetFixedFunctionShaderTemplate() { + static bool loaded = false; + static std::string hlslString; + + // TODO does this need to be thread safe? + if (!loaded) { + loaded = true; + + // Determine the filename and directory for the fixed function shader + // TODO make this a relative path so we guarantee an LPCSTR for D3DCompile + auto hlslDir = std::filesystem::path(szFilePath_CxbxReloaded_Exe) + .parent_path() + .append("hlsl"); + + auto sourceFile = hlslDir.append("FixedFunctionPixelShader.hlsl").string(); + + // Load the shader into a string + std::ifstream hlslStream(sourceFile); + std::stringstream hlsl; + hlsl << hlslStream.rdbuf(); + + hlslString = hlsl.str(); + } + + return hlslString; +} + +std::string_view GetD3DTOPString(int d3dtop) { + static constexpr std::string_view opToString[] = { + "UNDEFINED", // 0 + "X_D3DTOP_DISABLE", // 1 + "X_D3DTOP_SELECTARG1", // 2 + "X_D3DTOP_SELECTARG2", // 3 + "X_D3DTOP_MODULATE", // 4 + "X_D3DTOP_MODULATE2X", // 5 + "X_D3DTOP_MODULATE4X", // 6 + "X_D3DTOP_ADD", // 7 + "X_D3DTOP_ADDSIGNED", // 8 + "X_D3DTOP_ADDSIGNED2X", // 9 + "X_D3DTOP_SUBTRACT", // 10 + "X_D3DTOP_ADDSMOOTH", // 11 + "X_D3DTOP_BLENDDIFFUSEALPHA", // 12 + "X_D3DTOP_BLENDCURRENTALPHA", // 13 + "X_D3DTOP_BLENDTEXTUREALPHA", // 14 + "X_D3DTOP_BLENDFACTORALPHA", // 15 + "X_D3DTOP_BLENDTEXTUREALPHAPM", // 16 + "X_D3DTOP_PREMODULATE", // 17 + "X_D3DTOP_MODULATEALPHA_ADDCOLOR", // 18 + "X_D3DTOP_MODULATECOLOR_ADDALPHA", // 19 + "X_D3DTOP_MODULATEINVALPHA_ADDCOLOR", // 20 + "X_D3DTOP_MODULATEINVCOLOR_ADDALPHA", // 21 + "X_D3DTOP_DOTPRODUCT3", // 22 + "X_D3DTOP_MULTIPLYADD", // 23 + "X_D3DTOP_LERP", // 24 + "X_D3DTOP_BUMPENVMAP", // 25 + "X_D3DTOP_BUMPENVMAPLUMINANCE", // 26 + }; + + if (d3dtop < 1 || d3dtop > 26) { + EmuLog(LOG_LEVEL::ERROR2, "Unmapped texture operation %d", d3dtop); + d3dtop = 0; // undefined + } + + return opToString[d3dtop]; +} + +// Get a string equivalent of ' + ' +std::string GetD3DTASumString(int d3dta, bool allowModifier = true) { + using namespace FixedFunctionPixelShader; + + static const std::string argToString[] = { + "X_D3DTA_DIFFUSE", // 0 + "X_D3DTA_CURRENT", // 1 + "X_D3DTA_TEXTURE", // 2 + "X_D3DTA_TFACTOR", // 3 + "X_D3DTA_SPECULAR", // 4 + "X_D3DTA_TEMP", // 5 + "X_D3DTA_CONSTANT", // 6 + "UNDEFINED", // 7 + }; + + // Write a texture argument + const int flagMask = 0x30; + int iFlags = d3dta & flagMask; + int i = d3dta & ~flagMask; + + if (i < 0 || i > 6) { + EmuLog(LOG_LEVEL::ERROR2, "Unmapped texture argument %d on texture arg", i); + i = 7; // undefined + } + + auto str = argToString[i]; + if (iFlags) { + if (!allowModifier) { + EmuLog(LOG_LEVEL::ERROR2, "Modifier not expected on texture argument"); + } + + if (iFlags == X_D3DTA_COMPLEMENT) + str += " + X_D3DTA_COMPLEMENT"; + else if (iFlags == X_D3DTA_ALPHAREPLICATE) + str += " + X_D3DTA_ALPHAREPLICATE"; + else { + EmuLog(LOG_LEVEL::ERROR2, "Unmapped texture modifier %d", iFlags); + str += " /* + UNKNOWN MODIFIER */"; + } + } + + return str; +} + +// TODO we have to create and cache shaders over and over and over and over +// Deduplicate this resource management +IDirect3DPixelShader9* GetFixedFunctionShader() +{ + using namespace FixedFunctionPixelShader; + + // TODO move this cache elsewhere - and flush it when the device is released! + static std::unordered_map ffPsCache = {}; + + // Create a key from state that will be baked in to the shader + PsTextureHardcodedState states[4] = {}; + int sampleType[4] = { SAMPLE_2D, SAMPLE_2D, SAMPLE_2D, SAMPLE_2D }; + bool pointSpriteEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_POINTSPRITEENABLE); + + bool previousStageDisabled = false; + for (int i = 0; i < 4; i++) { + // Determine the COLOROP + // Usually we execute stages up to the first disabled stage + // However, if point sprites are enabled, we just execute stage 3 + bool forceDisable = + (!pointSpriteEnable && previousStageDisabled) || + (pointSpriteEnable && i < 3); + auto colorOp = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLOROP); + states[i].COLOROP = forceDisable ? X_D3DTOP_DISABLE : colorOp; + + // If the stage is disabled we don't want its configuration to affect the key + // Move on to the next stage + if (colorOp == X_D3DTOP_DISABLE) { + previousStageDisabled = true; + continue; + } + + // Get sample type + // TODO move XD3D8 resource query functions out of Direct3D9.cpp so we can use them here + if (g_pXbox_SetTexture[i]) { + auto format = g_pXbox_SetTexture[i]->Format; + // SampleType is initialized to SAMPLE_2D + if (format & X_D3DFORMAT_CUBEMAP) + sampleType[i] = SAMPLE_CUBE; + else if (((format & X_D3DFORMAT_DIMENSION_MASK) >> X_D3DFORMAT_DIMENSION_SHIFT) > 2) + sampleType[i] = SAMPLE_3D; + } + + states[i].COLORARG0 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG0); + states[i].COLORARG1 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG1); + states[i].COLORARG2 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG2); + + states[i].ALPHAOP = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAOP); + states[i].ALPHAARG0 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAARG0); + states[i].ALPHAARG1 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAARG1); + states[i].ALPHAARG2 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAARG2); + + states[i].RESULTARG = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_RESULTARG); + } + + // Create a key from the shader state + // Note currently this is padded since it's what we send to the GPU + auto key = 3 * ComputeHash(states, sizeof(states)) + + ComputeHash(sampleType, sizeof(sampleType)); + + auto got = ffPsCache.find(key); + if (got != ffPsCache.end()) { + // We have a shader. Great! + return got->second; + } + + // Build and compile a new shader + auto hlslTemplate = GetFixedFunctionShaderTemplate(); + + // In D3D9 it seems we need to know hardcode if we're doing a 2D or 3D lookup + const std::string sampleTypePattern = "TEXTURE_SAMPLE_TYPE;"; + auto sampleTypeReplace = hlslTemplate.find(sampleTypePattern); + + static constexpr std::string_view typeToString[] = { + "SAMPLE_2D", + "SAMPLE_3D", + "SAMPLE_CUBE" + }; + + std::stringstream sampleTypeString; + sampleTypeString << "{" + << typeToString[sampleType[0]] << ", " + << typeToString[sampleType[1]] << ", " + << typeToString[sampleType[2]] << ", " + << typeToString[sampleType[3]] << "};"; + + auto finalShader = hlslTemplate.replace(sampleTypeReplace, sampleTypePattern.size(), sampleTypeString.str()); + + // Hardcode the texture stage operations and arguments + // So the shader handles exactly one combination of values + const std::string stageDef = "// STAGE DEFINITIONS"; + auto stageDefInsert = finalShader.find(stageDef) + stageDef.size(); + + std::stringstream stageSetup; + stageSetup << '\n'; + + for (int i = 0; i < 4; i++) { + // The stage is initialized to be disabled + // We don't have to output anything + if (states[i].COLOROP == X_D3DTOP_DISABLE) + continue; + + std::string target = "stages[" + std::to_string(i) + "]."; + + auto s = states[i]; + stageSetup << target << "COLOROP = " << GetD3DTOPString(s.COLOROP) << ";\n"; + + // TODO handle texture arg flags + stageSetup << target << "COLORARG0 = " << GetD3DTASumString(s.COLORARG0) << ";\n"; + stageSetup << target << "COLORARG1 = " << GetD3DTASumString(s.COLORARG1) << ";\n"; + stageSetup << target << "COLORARG2 = " << GetD3DTASumString(s.COLORARG2) << ";\n"; + + stageSetup << target << "ALPHAOP = " << GetD3DTOPString(s.ALPHAOP) << ";\n"; + + if (states[i].ALPHAOP != X_D3DTOP_DISABLE) { + stageSetup << target << "ALPHAARG0 = " << GetD3DTASumString(s.ALPHAARG0) << ";\n"; + stageSetup << target << "ALPHAARG1 = " << GetD3DTASumString(s.ALPHAARG1) << ";\n"; + stageSetup << target << "ALPHAARG2 = " << GetD3DTASumString(s.ALPHAARG2) << ";\n"; + } + + stageSetup << target << "RESULTARG = " << GetD3DTASumString(s.RESULTARG, false) << ";\n"; + stageSetup << '\n'; + } + + finalShader = finalShader.insert(stageDefInsert, stageSetup.str()); + + // Compile the shader + ID3DBlob* pShaderBlob; + + auto hlslDir = std::filesystem::path(szFilePath_CxbxReloaded_Exe) + .parent_path() + .append("hlsl"); + + auto pseudoFileName = "FixedFunctionPixelShader-" + std::to_string(key) + ".hlsl"; + auto pseudoSourceFile = hlslDir.append(pseudoFileName).string(); + EmuCompileShader(finalShader, "ps_3_0", &pShaderBlob, pseudoSourceFile.c_str()); + + // Create shader object for the device + IDirect3DPixelShader9* pShader = nullptr; + auto hRet = g_pD3DDevice->CreatePixelShader((DWORD*)pShaderBlob->GetBufferPointer(), &pShader); + if (hRet != S_OK) + CxbxKrnlCleanup("Failed to compile fixed function pixel shader"); + pShaderBlob->Release(); + + // Insert the shader into the cache + ffPsCache[key] = pShader; + + return pShader; +}; + +float AsFloat(uint32_t value) { + auto v = value; + return *(float*)&v; +} + +// Set constant state for the fixed function pixel shader +void UpdateFixedFunctionPixelShaderState() +{ + using namespace FixedFunctionPixelShader; + + FixedFunctionPixelShaderState ffPsState; + ffPsState.TextureFactor = (D3DXVECTOR4)((D3DXCOLOR)(XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_TEXTUREFACTOR))); + ffPsState.SpecularEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_SPECULARENABLE); + ffPsState.FogEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGENABLE); + ffPsState.FogColor = (D3DXVECTOR3)((D3DXCOLOR)XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FOGCOLOR)); + + // Texture state + for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { + + auto stage = &ffPsState.stages[i]; + + stage->COLORKEYOP = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORKEYOP); + stage->COLORSIGN = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORSIGN); + stage->ALPHAKILL = XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAKILL); + stage->BUMPENVMAT00 = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVMAT00)); + stage->BUMPENVMAT01 = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVMAT01)); + stage->BUMPENVMAT10 = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVMAT10)); + stage->BUMPENVMAT11 = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVMAT11)); + stage->BUMPENVLSCALE = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVLSCALE)); + stage->BUMPENVLOFFSET = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVLOFFSET)); + stage->COLORKEYCOLOR = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORKEYCOLOR); + + stage->IsTextureSet = g_pXbox_SetTexture[i] != nullptr; + } + + const int size = (sizeof(FixedFunctionPixelShaderState) + 16 - 1) / 16; + g_pD3DDevice->SetPixelShaderConstantF(0, (float*)&ffPsState, size); +} + +bool g_UseFixedFunctionPixelShader = true; void DxbxUpdateActivePixelShader() // NOPATCH { // The first RenderState is PSAlpha, @@ -654,7 +958,13 @@ void DxbxUpdateActivePixelShader() // NOPATCH const xbox::X_D3DPIXELSHADERDEF *pPSDef = g_pXbox_PixelShader != nullptr ? (xbox::X_D3DPIXELSHADERDEF*)(XboxRenderStates.GetPixelShaderRenderStatePointer()) : nullptr; if (pPSDef == nullptr) { - g_pD3DDevice->SetPixelShader(nullptr); + IDirect3DPixelShader9* pShader = nullptr; + if (g_UseFixedFunctionPixelShader) { + pShader = GetFixedFunctionShader(); + UpdateFixedFunctionPixelShaderState(); + } + + g_pD3DDevice->SetPixelShader(pShader); return; } diff --git a/src/core/hle/D3D8/XbPixelShader.h b/src/core/hle/D3D8/XbPixelShader.h index 3d9c2cd1b..49e812259 100644 --- a/src/core/hle/D3D8/XbPixelShader.h +++ b/src/core/hle/D3D8/XbPixelShader.h @@ -507,6 +507,7 @@ struct DecodedRegisterCombiner { void Decode(xbox::X_D3DPIXELSHADERDEF *pPSDef); }; +extern bool g_UseFixedFunctionPixelShader; // PatrickvL's Dxbx pixel shader translation void DxbxUpdateActivePixelShader(); // NOPATCH From 3ea3cdc1a8e2ce6f579a0e1a1d9c5927591101fc Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 22 Mar 2021 22:30:12 +1300 Subject: [PATCH 30/47] Fix implicitly extern shader variable So the hardcoded values are used, as values were unintuitively being read from constant registers --- src/core/hle/D3D8/Direct3D9/PixelShader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp index 60df135ad..755648bd3 100644 --- a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp @@ -289,7 +289,7 @@ void BuildShader(DecodedRegisterCombiner* pShader, std::stringstream& hlsl) hlsl << hlsl_template[0]; // Start with the HLSL template header - hlsl << "\nbool alphakill[4] = {" + hlsl << "\nstatic bool alphakill[4] = {" << (pShader->AlphaKill[0] ? "true, " : "false, ") << (pShader->AlphaKill[1] ? "true, " : "false, ") << (pShader->AlphaKill[2] ? "true, " : "false, ") From 5ea12636eedbf6f42cefc3176917be8dc0f146c8 Mon Sep 17 00:00:00 2001 From: Anthony Date: Wed, 31 Mar 2021 01:47:57 +1300 Subject: [PATCH 31/47] Passthrough fogging and generate fog factor for fixed function mode --- .../Direct3D9/CxbxVertexShaderTemplate.hlsl | 10 ++++--- src/core/hle/D3D8/Direct3D9/VertexShader.cpp | 28 ++++++++++++++++++- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl index ebaca85f2..718694cd7 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl @@ -28,9 +28,6 @@ struct VS_OUTPUT // Declared identical to pixel shader input (see PS_INPUT) // Xbox constant registers uniform float4 C[X_D3DVS_CONSTREG_COUNT] : register(c0); -// Parameters for mapping the shader's fog output value to a fog factor -uniform float4 CxbxFogInfo: register(c218); // = CXBX_D3DVS_CONSTREG_FOGINFO - // Default values for vertex registers, and whether to use them uniform float4 vRegisterDefaultValues[16] : register(c192); uniform float4 vRegisterDefaultFlagsPacked[4] : register(c208); @@ -40,6 +37,9 @@ uniform float4 xboxScreenspaceOffset : register(c213); uniform float4 xboxTextureScale[4] : register(c214); +// Parameters for mapping the shader's fog output value to a fog factor +uniform float4 CxbxFogInfo: register(c218); // = CXBX_D3DVS_CONSTREG_FOGINFO + // Overloaded casts, assuring all inputs are treated as float4 float4 _tof4(float src) { return float4(src, src, src, src); } float4 _tof4(float2 src) { return src.xyyy; } @@ -329,6 +329,8 @@ R"DELIMITER( // Copy variables to output struct VS_OUTPUT xOut; + // Fogging + // TODO deduplicate const float fogDepth = abs(oFog.x); const float fogTableMode = CxbxFogInfo.x; const float fogDensity = CxbxFogInfo.y; @@ -348,7 +350,7 @@ R"DELIMITER( if(fogTableMode == FOG_TABLE_EXP2) fogFactor = 1 / exp(pow(fogDepth * fogDensity, 2)); /* / 1 / e^((d * density)^2)*/ if(fogTableMode == FOG_TABLE_LINEAR) - fogFactor = (fogEnd - fogDepth) / (fogEnd - fogStart) ; + fogFactor = (fogEnd - fogDepth) / (fogEnd - fogStart); xOut.oPos = reverseScreenspaceTransform(oPos); xOut.oD0 = saturate(oD0); diff --git a/src/core/hle/D3D8/Direct3D9/VertexShader.cpp b/src/core/hle/D3D8/Direct3D9/VertexShader.cpp index e4ebc6f75..959fe0abd 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/VertexShader.cpp @@ -259,6 +259,9 @@ uniform float4 xboxScreenspaceOffset : register(c213); uniform float4 xboxTextureScale[4] : register(c214); +// Parameters for mapping the shader's fog output value to a fog factor +uniform float4 CxbxFogInfo: register(c218); // = CXBX_D3DVS_CONSTREG_FOGINFO + struct VS_INPUT { float4 v[16] : TEXCOORD; @@ -337,10 +340,33 @@ VS_OUTPUT main(const VS_INPUT xIn) // Copy variables to output struct VS_OUTPUT xOut; + // Fogging + // TODO deduplicate + const float fogDepth = abs(oFog.x); + const float fogTableMode = CxbxFogInfo.x; + const float fogDensity = CxbxFogInfo.y; + const float fogStart = CxbxFogInfo.z; + const float fogEnd = CxbxFogInfo.w; + + const float FOG_TABLE_NONE = 0; + const float FOG_TABLE_EXP = 1; + const float FOG_TABLE_EXP2 = 2; + const float FOG_TABLE_LINEAR = 3; + + float fogFactor; + if(fogTableMode == FOG_TABLE_NONE) + fogFactor = fogDepth; + if(fogTableMode == FOG_TABLE_EXP) + fogFactor = 1 / exp(fogDepth * fogDensity); /* / 1 / e^(d * density)*/ + if(fogTableMode == FOG_TABLE_EXP2) + fogFactor = 1 / exp(pow(fogDepth * fogDensity, 2)); /* / 1 / e^((d * density)^2)*/ + if(fogTableMode == FOG_TABLE_LINEAR) + fogFactor = (fogEnd - fogDepth) / (fogEnd - fogStart); + xOut.oPos = reverseScreenspaceTransform(oPos); xOut.oD0 = saturate(oD0); xOut.oD1 = saturate(oD1); - xOut.oFog = oFog.x; // Note : Xbox clamps fog in pixel shader + xOut.oFog = fogFactor; // Note : Xbox clamps fog in pixel shader xOut.oPts = oPts.x; xOut.oB0 = saturate(oB0); xOut.oB1 = saturate(oB1); From 657a8ef7c30abb98df8a7bfdce4586d4dd080377 Mon Sep 17 00:00:00 2001 From: Anthony Date: Sat, 3 Apr 2021 19:58:27 +1300 Subject: [PATCH 32/47] Disable lighting for point sprites --- src/core/hle/D3D8/Direct3D9/Direct3D9.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 4b7a6a55d..65b524a0a 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -6448,7 +6448,11 @@ void UpdateFixedFunctionVertexShaderState() } // Lighting - ffShaderState.Modes.Lighting = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_LIGHTING); + // Point sprites aren't lit - 'each point is always rendered with constant colors.' + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/point-sprites + bool PointSpriteEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSPRITEENABLE); + bool LightingEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_LIGHTING); + ffShaderState.Modes.Lighting = LightingEnable && !PointSpriteEnable; ffShaderState.Modes.TwoSidedLighting = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_TWOSIDEDLIGHTING); ffShaderState.Modes.LocalViewer = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_LOCALVIEWER); @@ -6464,7 +6468,6 @@ void UpdateFixedFunctionVertexShaderState() ffShaderState.Modes.BackEmissiveMaterialSource = (float)(ColorVertex ? XboxRenderStates.GetXboxRenderState(X_D3DRS_BACKEMISSIVEMATERIALSOURCE) : D3DMCS_MATERIAL); // Point sprites; Fetch required variables - bool PointSpriteEnable = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSPRITEENABLE); float pointSize = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE); float pointSize_Min = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE_MIN); float pointSize_Max = XboxRenderStates.GetXboxRenderStateAsFloat(X_D3DRS_POINTSIZE_MAX); From 814b040ff1379aac1bb5f61e50772f24f2dd9b89 Mon Sep 17 00:00:00 2001 From: Anthony Date: Tue, 6 Apr 2021 19:42:04 +1200 Subject: [PATCH 33/47] Normalize the values returned by the texture state converter --- src/core/hle/D3D8/Direct3D9/TextureStates.cpp | 38 +++++++++++++++---- src/core/hle/D3D8/Direct3D9/TextureStates.h | 2 + 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/TextureStates.cpp b/src/core/hle/D3D8/Direct3D9/TextureStates.cpp index 1aea580ef..15182a217 100644 --- a/src/core/hle/D3D8/Direct3D9/TextureStates.cpp +++ b/src/core/hle/D3D8/Direct3D9/TextureStates.cpp @@ -124,7 +124,6 @@ void XboxTextureStateConverter::BuildTextureStateMappingTable() DWORD XboxTextureStateConverter::GetHostTextureOpValue(DWORD Value) { - bool bOldOrder = g_LibVersion_D3D8 <= 3948; // Verified old order in 3944, new order in 4039 switch (Value) { case xbox::X_D3DTOP_DISABLE: return D3DTOP_DISABLE; case xbox::X_D3DTOP_SELECTARG1: return D3DTOP_SELECTARG1; @@ -138,10 +137,10 @@ DWORD XboxTextureStateConverter::GetHostTextureOpValue(DWORD Value) case xbox::X_D3DTOP_SUBTRACT: return D3DTOP_SUBTRACT; case xbox::X_D3DTOP_ADDSMOOTH: return D3DTOP_ADDSMOOTH; case xbox::X_D3DTOP_BLENDDIFFUSEALPHA: return D3DTOP_BLENDDIFFUSEALPHA; - case 0x0D/*xbox::X_D3DTOP_BLENDCURRENTALPHA */: return bOldOrder ? D3DTOP_BLENDTEXTUREALPHA : D3DTOP_BLENDCURRENTALPHA; - case 0x0E/*xbox::X_D3DTOP_BLENDTEXTUREALPHA */: return bOldOrder ? D3DTOP_BLENDFACTORALPHA : D3DTOP_BLENDTEXTUREALPHA; - case 0x0F/*xbox::X_D3DTOP_BLENDFACTORALPHA */: return bOldOrder ? D3DTOP_BLENDTEXTUREALPHAPM : D3DTOP_BLENDFACTORALPHA; - case 0x10/*xbox::X_D3DTOP_BLENDTEXTUREALPHAPM*/: return bOldOrder ? D3DTOP_BLENDCURRENTALPHA : D3DTOP_BLENDTEXTUREALPHAPM; + case xbox::X_D3DTOP_BLENDCURRENTALPHA: return D3DTOP_BLENDCURRENTALPHA; + case xbox::X_D3DTOP_BLENDTEXTUREALPHA: return D3DTOP_BLENDTEXTUREALPHA; + case xbox::X_D3DTOP_BLENDFACTORALPHA: return D3DTOP_BLENDFACTORALPHA; + case xbox::X_D3DTOP_BLENDTEXTUREALPHAPM: return D3DTOP_BLENDTEXTUREALPHAPM; case xbox::X_D3DTOP_PREMODULATE: return D3DTOP_PREMODULATE; case xbox::X_D3DTOP_MODULATEALPHA_ADDCOLOR: return D3DTOP_MODULATEALPHA_ADDCOLOR; case xbox::X_D3DTOP_MODULATECOLOR_ADDALPHA: return D3DTOP_MODULATECOLOR_ADDALPHA; @@ -180,7 +179,7 @@ void XboxTextureStateConverter::Apply() for (int State = xbox::X_D3DTSS_FIRST; State <= xbox::X_D3DTSS_LAST; State++) { // Read the value of the current stage/state from the Xbox data structure - DWORD XboxValue = Get(XboxStage, State); // OR D3D__TextureState[(XboxStage * xbox::X_D3DTS_STAGESIZE) + XboxTextureStateOffsets[State]]; + DWORD XboxValue = Get(XboxStage, State); DWORD PcValue = XboxValue; // If the state hasn't changed, skip setting it @@ -337,6 +336,29 @@ void XboxTextureStateConverter::Apply() } } +// Normalize values which may have different mappings per XDK version +DWORD NormalizeValue(DWORD xboxState, DWORD value) { + if (g_LibVersion_D3D8 <= 3948) { + // D3DTOP verified old order in 3948, new order in 4039 + switch (xboxState) { + case xbox::X_D3DTSS_COLOROP: + case xbox::X_D3DTSS_ALPHAOP: + switch (value) { + case 13: + return xbox::X_D3DTOP_BLENDTEXTUREALPHA; + case 14: + return xbox::X_D3DTOP_BLENDFACTORALPHA; + case 15: + return xbox::X_D3DTOP_BLENDTEXTUREALPHAPM; + case 16: + return xbox::X_D3DTOP_BLENDCURRENTALPHA; + } + } + } + + return value; +} + uint32_t XboxTextureStateConverter::Get(int textureStage, DWORD xboxState) { if (textureStage < 0 || textureStage > 3) CxbxKrnlCleanup("Requested texture stage was out of range: %d", textureStage); @@ -344,5 +366,7 @@ uint32_t XboxTextureStateConverter::Get(int textureStage, DWORD xboxState) { CxbxKrnlCleanup("Requested texture state was out of range: %d", xboxState); // Read the value of the current stage/state from the Xbox data structure - return D3D__TextureState[(textureStage * xbox::X_D3DTS_STAGESIZE) + XboxTextureStateOffsets[xboxState]]; + DWORD rawValue = D3D__TextureState[(textureStage * xbox::X_D3DTS_STAGESIZE) + XboxTextureStateOffsets[xboxState]]; + + return NormalizeValue(xboxState, rawValue); } diff --git a/src/core/hle/D3D8/Direct3D9/TextureStates.h b/src/core/hle/D3D8/Direct3D9/TextureStates.h index d9f6e3742..0859eb893 100644 --- a/src/core/hle/D3D8/Direct3D9/TextureStates.h +++ b/src/core/hle/D3D8/Direct3D9/TextureStates.h @@ -45,6 +45,8 @@ private: void BuildTextureStateMappingTable(); DWORD GetHostTextureOpValue(DWORD XboxTextureOp); + // Pointer to Xbox texture states + // Note mappings may change between XDK versions uint32_t* D3D__TextureState = nullptr; std::array XboxTextureStateOffsets; XboxRenderStateConverter* pXboxRenderStates; From 2f9558d307e52712eaf53e21c4eb52fe1c0ed814 Mon Sep 17 00:00:00 2001 From: Anthony Date: Fri, 9 Apr 2021 22:38:33 +1200 Subject: [PATCH 34/47] Use SAMPLE_NONE instead of IsTextureSet --- .../Direct3D9/FixedFunctionPixelShader.hlsl | 20 +++++++++---------- .../Direct3D9/FixedFunctionPixelShader.hlsli | 10 ++++------ src/core/hle/D3D8/XbPixelShader.cpp | 8 ++++---- 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl index 6e54a6966..891677722 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl @@ -174,16 +174,16 @@ TextureArgs ExecuteTextureStage( } // Sample the texture - float4 t = float4(1, 1, 1, 1); - if (stage.IsTextureSet) { - int type = TextureSampleType[i]; - if (type == SAMPLE_2D) - t = tex2D(samplers[i], TexCoords[i].xy + offset.xy); - else if (type == SAMPLE_3D) - t = tex3D(samplers[i], TexCoords[i].xyz + offset.xyz); - else if (type == SAMPLE_CUBE) - t = texCUBE(samplers[i], TexCoords[i].xyz + offset.xyz); - } + float4 t; + int type = TextureSampleType[i]; + if (type == SAMPLE_NONE) + t = 1; // Test case JSRF + else if (type == SAMPLE_2D) + t = tex2D(samplers[i], TexCoords[i].xy + offset.xy); + else if (type == SAMPLE_3D) + t = tex3D(samplers[i], TexCoords[i].xyz + offset.xyz); + else if (type == SAMPLE_CUBE) + t = texCUBE(samplers[i], TexCoords[i].xyz + offset.xyz); // Assign the final value for TEXTURE ctx.TEXTURE = t * factor; diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli index af86b040e..79c432a60 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli @@ -63,9 +63,10 @@ namespace FixedFunctionPixelShader { const float X_D3DTA_COMPLEMENT = 0x00000010; // take 1.0 - x (read modifier) const float X_D3DTA_ALPHAREPLICATE = 0x00000020; // replicate alpha to color components (read modifier) - const int SAMPLE_2D = 0; - const int SAMPLE_3D = 1; - const int SAMPLE_CUBE = 2; + const int SAMPLE_NONE = 0; + const int SAMPLE_2D = 1; + const int SAMPLE_3D = 2; + const int SAMPLE_CUBE = 3; // This state is passed to the shader struct PsTextureStageState { @@ -97,9 +98,6 @@ namespace FixedFunctionPixelShader { // TEXCOORDINDEX handled by the VS // BORDERCOLOR set on sampler alignas(16) float COLORKEYCOLOR; // Unimplemented Xbox extension! - - // Misc properties - alignas(16) float IsTextureSet; }; // This state is compiled into the shader diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index 3d3727a58..7ab1192bc 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -763,7 +763,7 @@ IDirect3DPixelShader9* GetFixedFunctionShader() // Create a key from state that will be baked in to the shader PsTextureHardcodedState states[4] = {}; - int sampleType[4] = { SAMPLE_2D, SAMPLE_2D, SAMPLE_2D, SAMPLE_2D }; + int sampleType[4] = { SAMPLE_NONE, SAMPLE_NONE, SAMPLE_NONE, SAMPLE_NONE }; bool pointSpriteEnable = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_POINTSPRITEENABLE); bool previousStageDisabled = false; @@ -788,11 +788,12 @@ IDirect3DPixelShader9* GetFixedFunctionShader() // TODO move XD3D8 resource query functions out of Direct3D9.cpp so we can use them here if (g_pXbox_SetTexture[i]) { auto format = g_pXbox_SetTexture[i]->Format; - // SampleType is initialized to SAMPLE_2D if (format & X_D3DFORMAT_CUBEMAP) sampleType[i] = SAMPLE_CUBE; else if (((format & X_D3DFORMAT_DIMENSION_MASK) >> X_D3DFORMAT_DIMENSION_SHIFT) > 2) sampleType[i] = SAMPLE_3D; + else + sampleType[i] = SAMPLE_2D; } states[i].COLORARG0 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG0); @@ -826,6 +827,7 @@ IDirect3DPixelShader9* GetFixedFunctionShader() auto sampleTypeReplace = hlslTemplate.find(sampleTypePattern); static constexpr std::string_view typeToString[] = { + "SAMPLE_NONE", "SAMPLE_2D", "SAMPLE_3D", "SAMPLE_CUBE" @@ -933,8 +935,6 @@ void UpdateFixedFunctionPixelShaderState() stage->BUMPENVLSCALE = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVLSCALE)); stage->BUMPENVLOFFSET = AsFloat(XboxTextureStates.Get(i, xbox::X_D3DTSS_BUMPENVLOFFSET)); stage->COLORKEYCOLOR = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORKEYCOLOR); - - stage->IsTextureSet = g_pXbox_SetTexture[i] != nullptr; } const int size = (sizeof(FixedFunctionPixelShaderState) + 16 - 1) / 16; From 85f792d2b2c7c9fec62ee94a592b327c5d6cb03d Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 12 Apr 2021 23:53:06 +1200 Subject: [PATCH 35/47] Fix DOTPRODUCT3 texture op Improves PerPixelLighting --- src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl index 891677722..acac60577 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl @@ -121,7 +121,11 @@ float4 ExecuteTextureOp(float op, float4 arg1, float4 arg2, float4 arg0, Texture else if (op == X_D3DTOP_MODULATEINVCOLOR_ADDALPHA) return float4((1 - arg1.rgb) * arg2.rgb + arg1.a, 1); else if (op == X_D3DTOP_DOTPRODUCT3) - return dot(arg1.rgb, arg2.rgb).rrrr; + // Test case: PerPixelLighting + return saturate(dot( + (arg1.rgb - 0.5) * 2, + (arg2.rgb - 0.5) * 2 + )); // Note arg0 below is arg1 in D3D docs // since it becomes the first argument for operations supporting 3 arguments... else if (op == X_D3DTOP_MULTIPLYADD) From 990a24292daf308d78e91bb1ab884da113f8ea7c Mon Sep 17 00:00:00 2001 From: Anthony Date: Tue, 13 Apr 2021 22:20:17 +1200 Subject: [PATCH 36/47] Fix reflection equation Improve BumpDemo --- src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl index d29e59a89..3ec0a0c71 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -263,6 +263,7 @@ float4 Sample6F(int st, float3 s) #define src(st) t[PS_INPUTTEXTURE_[st]] // Calculate the dot result for a given stage. Since any given stage is input-mapped to always be less than or equal the stage it appears in, this won't cause read-ahead issues +// Test case: BumpDemo demo #define CalcDot(st) PS_DOTMAPPING_ ## st(src(st)); dot_[st] = dot(iT[st].xyz, dm) // Addressing operations @@ -271,7 +272,7 @@ float4 Sample6F(int st, float3 s) #define Normal2(st) float3(dot_[st-1], dot_[st], 0) // Preceding and current stage dot result. Will be input for Sample2D. #define Normal3(st) float3(dot_[st-2], dot_[st-1], dot_[st]) // Two preceding and current stage dot result. #define Eye float3(iT[1].w, iT[2].w, iT[3].w) // 4th (q) component of input texture coordinates 1, 2 and 3. Only used by texm3x3vspec/PS_TEXTUREMODES_DOT_RFLCT_SPEC, always at stage 3. TODO : Map iT[1/2/3] through PS_INPUTTEXTURE_[]? -#define Reflect(n, e) (2 * n * dot(n, e)) / dot(n, n) // TODO : Prevent division by zero when n == 0? +#define Reflect(n, e) 2 * (dot(n, e) / dot(n, n)) * n - e // https://documentation.help/directx8_c/texm3x3vspec.htm #define BumpEnv(st) float3(iT[st].x + (BEM[st].x * src(st).r) + (BEM[st].y * src(st).g), iT[st].y + (BEM[st].z * src(st).r) + (BEM[st].w * src(st).g), 0) // Will be input for Sample2D. TODO : Compact into a regular 2x2 maxtrix multiplication. #define LSO(st) (LUM[st].x * src(st).b) + LUM[st].y // Uses PSH_XBOX_CONSTANT_LUM .x = D3DTSS_BUMPENVLSCALE .y = D3DTSS_BUMPENVLOFFSET @@ -294,6 +295,7 @@ float4 Sample6F(int st, float3 s) /*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_CUBE(st) CalcDot(st); n = Normal3(st); s = n; v = Sample6F(st, s); t[st] = v // TODO : Test /*-123 texreg2ar */ #define PS_TEXTUREMODES_DPNDNT_AR(st) s = src(st).arg; v = Sample2D(st, s); t[st] = v // TODO : Test [1] /*-123 texreg2bg */ #define PS_TEXTUREMODES_DPNDNT_GB(st) s = src(st).gba; v = Sample2D(st, s); t[st] = v // TODO : Test [1] +// TODO replace dm with dot_[st]? Confirm BumpDemo 'Cubemap only' modes /*-12- texm3x2pad */ #define PS_TEXTUREMODES_DOTPRODUCT(st) CalcDot(st); v = float4(dm, 0); t[st] = v // TODO : Test all dot mapping (setting texture register, in case it gets read - test-case : BumpDemo) /*---3 texm3x3spec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST(st) CalcDot(st); n = Normal3(st); s = Reflect(n, c0); v = Sample6F(st, s); t[st] = v // TODO : Test // [1] Note : 3rd component set to s.z is just an (ignored) placeholder to produce a float3 (made unique, to avoid the potential complexity of repeated components) From 5aaadd026cbb67061f271df9a23a7b743c8e4495 Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 19 Apr 2021 19:10:31 +1200 Subject: [PATCH 37/47] Implement missing texture behaviour --- src/core/hle/D3D8/XbPixelShader.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index 7ab1192bc..87c1aeeb7 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -774,6 +774,21 @@ IDirect3DPixelShader9* GetFixedFunctionShader() bool forceDisable = (!pointSpriteEnable && previousStageDisabled) || (pointSpriteEnable && i < 3); + + // When a texture stage has D3DTSS_COLORARG1 equal to D3DTA_TEXTURE + // and the texture pointer for the stage is NULL, this stage + // and all stages after it are not processed. + // Test cases: Red Dead Revolver, JSRF + // https://docs.microsoft.com/en-us/windows/win32/direct3d9/texture-blending + // Don't follow the D3D9 docs if SELECTARG2 is in use (PC D3D9 behaviour, nvidia quirk?) + // Test case: Crash Nitro Kart (engine speed UI) + if (!g_pXbox_SetTexture[i] + && XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG1) == X_D3DTA_TEXTURE + && XboxTextureStates.Get(i, xbox::X_D3DTSS_COLOROP) != xbox::X_D3DTOP_SELECTARG2) + { + forceDisable = true; + } + auto colorOp = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLOROP); states[i].COLOROP = forceDisable ? X_D3DTOP_DISABLE : colorOp; From ce7a5f6e4012adebc4b03dd94e745de351c309a2 Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 19 Apr 2021 20:29:27 +1200 Subject: [PATCH 38/47] Fix GTA III logos Depending on ALPHAOP == DISABLE --- src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl index acac60577..9f8784faf 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl @@ -77,9 +77,9 @@ float4 ExecuteTextureOp(float op, float4 arg1, float4 arg2, float4 arg0, Texture // X_D3DTOP_DISABLE can only be reached by ALPHAOP // It's documented as undefined behaviour - // Test case: DoA:Xtreme menu + // Test case: DoA:Xtreme menu, GTA III logos if (op == X_D3DTOP_DISABLE) - return ctx.CURRENT; + return 1; else if (op == X_D3DTOP_SELECTARG1) return arg1; else if (op == X_D3DTOP_SELECTARG2) From 6bf2a1f44f77c582a0e2d4d5cdd941a644a1e58f Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 19 Apr 2021 20:45:01 +1200 Subject: [PATCH 39/47] Reuse colorOp variable And additional comments --- src/core/hle/D3D8/XbPixelShader.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index 87c1aeeb7..aaa414651 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -768,7 +768,12 @@ IDirect3DPixelShader9* GetFixedFunctionShader() bool previousStageDisabled = false; for (int i = 0; i < 4; i++) { - // Determine the COLOROP + // Determine COLOROP + // This controls both the texture operation for the colour of the stage + // and when to stop processing + // Under certain circumstances we force it to be DISABLE + auto colorOp = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLOROP); + // Usually we execute stages up to the first disabled stage // However, if point sprites are enabled, we just execute stage 3 bool forceDisable = @@ -784,12 +789,12 @@ IDirect3DPixelShader9* GetFixedFunctionShader() // Test case: Crash Nitro Kart (engine speed UI) if (!g_pXbox_SetTexture[i] && XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG1) == X_D3DTA_TEXTURE - && XboxTextureStates.Get(i, xbox::X_D3DTSS_COLOROP) != xbox::X_D3DTOP_SELECTARG2) + && colorOp != xbox::X_D3DTOP_SELECTARG2) { forceDisable = true; } - auto colorOp = XboxTextureStates.Get(i, xbox::X_D3DTSS_COLOROP); + // Set the final COLOROP value states[i].COLOROP = forceDisable ? X_D3DTOP_DISABLE : colorOp; // If the stage is disabled we don't want its configuration to affect the key From d8c18cf268427f97b84500d6a43faea549502384 Mon Sep 17 00:00:00 2001 From: Anthony Date: Mon, 19 Apr 2021 23:08:06 +1200 Subject: [PATCH 40/47] Mask flags when detecting colorarg1 + missing texture case --- src/core/hle/D3D8/XbPixelShader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index aaa414651..b5f6c770b 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -788,7 +788,7 @@ IDirect3DPixelShader9* GetFixedFunctionShader() // Don't follow the D3D9 docs if SELECTARG2 is in use (PC D3D9 behaviour, nvidia quirk?) // Test case: Crash Nitro Kart (engine speed UI) if (!g_pXbox_SetTexture[i] - && XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG1) == X_D3DTA_TEXTURE + && (XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG1) & 0x7) == X_D3DTA_TEXTURE && colorOp != xbox::X_D3DTOP_SELECTARG2) { forceDisable = true; From a29c2f7d6261425478760394e1304e5ef656cff3 Mon Sep 17 00:00:00 2001 From: Anthony Date: Tue, 20 Apr 2021 23:17:08 +1200 Subject: [PATCH 41/47] Force VS to write to oFog.x Default fog factor to 1 (no fog) --- .../D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl | 3 ++- src/core/hle/D3D8/Direct3D9/VertexShader.cpp | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl index 718694cd7..d2d3cdc00 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl @@ -297,7 +297,8 @@ VS_OUTPUT main(const VS_INPUT xIn) // Single component outputs float4 oFog, oPts; // x is write-only on Xbox. Use float4 as some games use incorrect masks - oFog = oPts = 0; + oFog = 1; // Default to no fog. Test case: Lego Star Wars II + oPts = 0; // Address (index) register int1 a0 = 0; diff --git a/src/core/hle/D3D8/Direct3D9/VertexShader.cpp b/src/core/hle/D3D8/Direct3D9/VertexShader.cpp index 959fe0abd..597b6c169 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/VertexShader.cpp @@ -57,6 +57,20 @@ void OutputHlsl(std::stringstream& hlsl, VSH_IMD_OUTPUT& dest) // Write the mask as a separate argument to the opcode defines // (No space, so that "dest,mask, ..." looks close to "dest.mask, ...") hlsl << ","; + + // Detect oFog masks other than x + // Test case: Lego Star Wars II (menu) + if (dest.Type == IMD_OUTPUT_O && + dest.Address == OREG_OFOG && + dest.Mask != MASK_X) + { + LOG_TEST_CASE("Vertex shader uses oFog mask other than x"); + EmuLog(LOG_LEVEL::WARNING, "oFog mask was %#x", dest.Mask); + hlsl << "x"; // write to x instead + return; + } + + // Write the mask if (dest.Mask & MASK_X) hlsl << "x"; if (dest.Mask & MASK_Y) hlsl << "y"; if (dest.Mask & MASK_Z) hlsl << "z"; From 3da5b9f5d966a4524a4571bed18e8644f911c0b4 Mon Sep 17 00:00:00 2001 From: Anthony Date: Tue, 20 Apr 2021 23:53:06 +1200 Subject: [PATCH 42/47] Include debug comment about how the final combiner hlsl was generated Make hasFinalCombiner correspond to DecodedHasFinalCombiner --- src/core/hle/D3D8/Direct3D9/PixelShader.cpp | 21 ++++++++++++--------- src/core/hle/D3D8/XbPixelShader.cpp | 2 -- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp index 755648bd3..594f77255 100644 --- a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp @@ -203,10 +203,17 @@ void CombinerStageHlsl(std::stringstream& hlsl, RPSCombinerStageChannel& stage, hlsl << "); // " << opcode_comment[opcode][1]; } -void FinalCombinerStageHlsl(std::stringstream& hlsl, RPSFinalCombiner& fc) +void FinalCombinerStageHlsl(std::stringstream& hlsl, RPSFinalCombiner& fc, bool hasFinalCombiner) { std::stringstream arguments; + if (hasFinalCombiner) { + hlsl << "\n // Final combiner xfc was defined in the shader"; + } + else { + hlsl << "\n // Final combiner xfc generated from XD3D8 renderstates"; + } + for (unsigned i = 0; i < 7; i++) { // Generate A, B, C, D, E, F, G input arguments // Note : Most final combiner inputs are treated as RGB, but G is single-channel (.a or .b) bool isLast = (i == 6); @@ -331,11 +338,9 @@ void BuildShader(DecodedRegisterCombiner* pShader, std::stringstream& hlsl) hlsl << "\n#define PS_DOTMAPPING_" << i << " " << dotmapping_str[(unsigned)pShader->PSDotMapping[i]]; } - if (pShader->hasFinalCombiner) { - OutputDefineFlag(hlsl, pShader->FinalCombiner.ComplementV1, "PS_FINALCOMBINERSETTING_COMPLEMENT_V1"); - OutputDefineFlag(hlsl, pShader->FinalCombiner.ComplementR0, "PS_FINALCOMBINERSETTING_COMPLEMENT_R0"); - OutputDefineFlag(hlsl, pShader->FinalCombiner.ClampSum, "PS_FINALCOMBINERSETTING_CLAMP_SUM"); - } + OutputDefineFlag(hlsl, pShader->FinalCombiner.ComplementV1, "PS_FINALCOMBINERSETTING_COMPLEMENT_V1"); + OutputDefineFlag(hlsl, pShader->FinalCombiner.ComplementR0, "PS_FINALCOMBINERSETTING_COMPLEMENT_R0"); + OutputDefineFlag(hlsl, pShader->FinalCombiner.ClampSum, "PS_FINALCOMBINERSETTING_CLAMP_SUM"); hlsl << hlsl_template[1]; hlsl << hlsl_template[2]; @@ -383,9 +388,7 @@ void BuildShader(DecodedRegisterCombiner* pShader, std::stringstream& hlsl) CombinerStageHlsl(hlsl, pShader->Combiners[i].Alpha, channel_index_Alpha); } - if (pShader->hasFinalCombiner) { - FinalCombinerStageHlsl(hlsl, pShader->FinalCombiner); - } + FinalCombinerStageHlsl(hlsl, pShader->FinalCombiner, pShader->hasFinalCombiner); hlsl << hlsl_template[3]; // Finish with the HLSL template footer } diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index b5f6c770b..6b8364764 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -548,8 +548,6 @@ typedef struct s_CxbxPSDef { { if (RC.hasFinalCombiner) return; - RC.hasFinalCombiner = true; - // Since we're HLE'ing Xbox D3D, mimick how it configures the final combiner when PSDef doesn't : // TODO : Use the same final combiner when no pixel shader is set! Possibly by generating a DecodedRegisterCombiner with PSCombinerCount zero? // (This forms the entire Xbox fixed function pixel pipeline, which uses only two renderstates : X_D3DRS_SPECULARENABLE and X_D3DRS_SPECULARENABLE.) From d46abef9a193b49507341bea6a2c3d805a95b71b Mon Sep 17 00:00:00 2001 From: Anthony Date: Thu, 22 Apr 2021 01:44:51 +1200 Subject: [PATCH 43/47] Redefine undefined ALPHAOP behaviour for Crash WoC To aid debugging add a LOG_TEST_CASE , and include ALPHAARGs when ALPHAOP is disabled --- .../Direct3D9/FixedFunctionPixelShader.hlsl | 17 ++++++++--------- src/core/hle/D3D8/XbPixelShader.cpp | 14 +++++++------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl index 9f8784faf..bc676711a 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl @@ -74,13 +74,7 @@ float4 ExecuteTextureOp(float op, float4 arg1, float4 arg2, float4 arg0, Texture // Note if we use ifs here instead of else if // D3DCompile may stackoverflow at runtime - - // X_D3DTOP_DISABLE can only be reached by ALPHAOP - // It's documented as undefined behaviour - // Test case: DoA:Xtreme menu, GTA III logos - if (op == X_D3DTOP_DISABLE) - return 1; - else if (op == X_D3DTOP_SELECTARG1) + if (op == X_D3DTOP_SELECTARG1) return arg1; else if (op == X_D3DTOP_SELECTARG2) return arg2; @@ -213,9 +207,14 @@ TextureArgs ExecuteTextureStage( float4 aArg0 = GetArg(s.ALPHAARG0, ctx); // Execute texture operation - float4 value; + // ALPHAOP == X_D3DTOP_DISABLE is undefined behaviour + // Using an intermediate value matches known cases... + // Test case: DoA:Xtreme (menu water), GTA III (logos), Crash Wrath of Cortex (relics UI) + static float4 value = 1; value.rgb = ExecuteTextureOp(s.COLOROP, cArg1, cArg2, cArg0, ctx, stage).rgb; - value.a = ExecuteTextureOp(s.ALPHAOP, aArg1, aArg2, aArg0, ctx, stage).a; + if (s.ALPHAOP != X_D3DTOP_DISABLE) { + value.a = ExecuteTextureOp(s.ALPHAOP, aArg1, aArg2, aArg0, ctx, stage).a; + } // Save the result // Note RESULTARG should either be CURRENT or TEMP diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index 6b8364764..204c1f8ca 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -818,7 +818,10 @@ IDirect3DPixelShader9* GetFixedFunctionShader() states[i].COLORARG1 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG1); states[i].COLORARG2 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_COLORARG2); - states[i].ALPHAOP = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAOP); + auto alphaOp = XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAOP); + if (alphaOp == X_D3DTOP_DISABLE) LOG_TEST_CASE("Alpha stage disabled when colour stage is enabled"); + + states[i].ALPHAOP = (float)alphaOp; states[i].ALPHAARG0 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAARG0); states[i].ALPHAARG1 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAARG1); states[i].ALPHAARG2 = (float)XboxTextureStates.Get(i, xbox::X_D3DTSS_ALPHAARG2); @@ -879,18 +882,15 @@ IDirect3DPixelShader9* GetFixedFunctionShader() auto s = states[i]; stageSetup << target << "COLOROP = " << GetD3DTOPString(s.COLOROP) << ";\n"; - // TODO handle texture arg flags stageSetup << target << "COLORARG0 = " << GetD3DTASumString(s.COLORARG0) << ";\n"; stageSetup << target << "COLORARG1 = " << GetD3DTASumString(s.COLORARG1) << ";\n"; stageSetup << target << "COLORARG2 = " << GetD3DTASumString(s.COLORARG2) << ";\n"; stageSetup << target << "ALPHAOP = " << GetD3DTOPString(s.ALPHAOP) << ";\n"; - if (states[i].ALPHAOP != X_D3DTOP_DISABLE) { - stageSetup << target << "ALPHAARG0 = " << GetD3DTASumString(s.ALPHAARG0) << ";\n"; - stageSetup << target << "ALPHAARG1 = " << GetD3DTASumString(s.ALPHAARG1) << ";\n"; - stageSetup << target << "ALPHAARG2 = " << GetD3DTASumString(s.ALPHAARG2) << ";\n"; - } + stageSetup << target << "ALPHAARG0 = " << GetD3DTASumString(s.ALPHAARG0) << ";\n"; + stageSetup << target << "ALPHAARG1 = " << GetD3DTASumString(s.ALPHAARG1) << ";\n"; + stageSetup << target << "ALPHAARG2 = " << GetD3DTASumString(s.ALPHAARG2) << ";\n"; stageSetup << target << "RESULTARG = " << GetD3DTASumString(s.RESULTARG, false) << ";\n"; stageSetup << '\n'; From 061e38af6f073cd4265f9675bae0e8e8777e826c Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Wed, 14 Apr 2021 13:13:40 +0200 Subject: [PATCH 44/47] HLSL Pixel Shader tweaks; Fully declare stages, to avoid "error X4000: variable 'stages' used without having been completely initialized" Add alphakill support to fixed function Use macro-based alphakill declaration in template (same as in fixed function) Updated comments to differentiate between texture stage and combiner stage Rename st into ts (texture stage) where appropriate Fixed a typo in Brdf macro (was s1, must be ts now) Updated Reflect to use C0 (uppercase, instead of lowercase host c0) Initialized stage to zero for use in the C0 macro, now used Reflect Fixed type of CombinerOutputMapping to actualle be PS_COMBINEROUTPUT_OUTPUTMAPPING Introduced and used varios PS_*_MASK defines Added some comments on pixel shader verification and test-case discovery. --- .../Direct3D9/CxbxPixelShaderTemplate.hlsl | 99 ++++++++------- .../Direct3D9/FixedFunctionPixelShader.hlsl | 6 + .../Direct3D9/FixedFunctionPixelShader.hlsli | 4 + src/core/hle/D3D8/Direct3D9/PixelShader.cpp | 6 +- src/core/hle/D3D8/XbPixelShader.cpp | 115 ++++++++++-------- src/core/hle/D3D8/XbPixelShader.h | 18 ++- 6 files changed, 149 insertions(+), 99 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl index 3ec0a0c71..d51819b6b 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -61,6 +61,10 @@ uniform const float FRONTFACE_FACTOR : register(c27); // Note : PSH_XBOX_CONSTA #if 0 // Compiler-defines/symbols which must be defined when their bit/value is set in the corresponding register : + // Generated by PixelShader.cpp::BuildShader() + + // Data from X_D3DTSS_ALPHAKILL : + #define ALPHAKILL {false, false, false, false} // Bits from PSCombinerCount (a.k.a. PSCombinerCountFlags) : #define PS_COMBINERCOUNT 2 @@ -74,10 +78,10 @@ uniform const float FRONTFACE_FACTOR : register(c27); // Note : PSH_XBOX_CONSTA #define PS_COMPAREMODE_2(in) CM_LT(in.x) CM_LT(in.y) CM_LT(in.z) CM_LT(in.w) #define PS_COMPAREMODE_3(in) CM_LT(in.x) CM_LT(in.y) CM_LT(in.z) CM_LT(in.w) - // Input texture register mappings for stage 1, 2 and 3 (stage 0 has no input-texture) + // Input texture register mappings for texture stage 1, 2 and 3 (stage 0 has no input-texture) static const int PS_INPUTTEXTURE_[4] = { -1, 0, 0, 0 }; - // Dot mappings for stage 1, 2 and 3 (stage 0 performs no dot product) + // Dot mappings for texture stage 1, 2 and 3 (stage 0 performs no dot product) #define PS_DOTMAPPING_1 PS_DOTMAPPING_MINUS1_TO_1_D3D #define PS_DOTMAPPING_2 PS_DOTMAPPING_MINUS1_TO_1_D3D #define PS_DOTMAPPING_3 PS_DOTMAPPING_MINUS1_TO_1_D3D @@ -93,14 +97,14 @@ uniform const float FRONTFACE_FACTOR : register(c27); // Note : PSH_XBOX_CONSTA // Second raw string : R"DELIMITER( -// PS_COMBINERCOUNT_UNIQUE_C0 steers whether for C0 to use stage-specific constants c0_0 .. c0_7, or c0_0 for all stages +// PS_COMBINERCOUNT_UNIQUE_C0 steers whether for C0 to use combiner stage-specific constants c0_0 .. c0_7, or c0_0 for all stages #ifdef PS_COMBINERCOUNT_UNIQUE_C0 #define C0 c0_[stage] // concatenate stage to form c0_0 .. c0_7 #else // PS_COMBINERCOUNT_SAME_C0 #define C0 c0_[0] // always resolve to c0_0 #endif -// PS_COMBINERCOUNT_UNIQUE_C1 steers whether for C1 to use stage-specific constants c1_0 .. c1_7, or c1_0 for all stages +// PS_COMBINERCOUNT_UNIQUE_C1 steers whether for C1 to use combiner stage-specific constants c1_0 .. c1_7, or c1_0 for all stages #ifdef PS_COMBINERCOUNT_UNIQUE_C1 #define C1 c1_[stage] // concatenate stage to form c1_0 .. c1_7 #else // PS_COMBINERCOUNT_SAME_C1 @@ -218,35 +222,38 @@ float m21(const float input) // TODO : Generate sampler status? sampler samplers[4] : register(s0); -// Generated alphakill contents are based on X_D3DTSS_ALPHAKILL (we avoid using a constant, to allow false's to be optimized away) -// bool alphakill[4] = {false, false, false, false}; // Generated by PixelShader.cpp::BuildShader() +// Declare alphakill as a variable (avoiding a constant, to allow false's to be optimized away) : +#ifndef ALPHAKILL + #define ALPHAKILL {false, false, false, false} +#endif +static bool alphakill[4] = ALPHAKILL; -// Actual texture sampling per stage (always uses the s sampling vector variable as input) +// Actual texture sampling per texture stage (ts), using the sampling vector (s) as input, // abstracting away the specifics of accessing above sampler declarations (usefull for future Direct3D 10+ sampler arrays) -float4 Sample2D(int st, float3 s) +float4 Sample2D(int ts, float3 s) { - float4 result = tex2D(samplers[st], s.xy); // Ignores s.z (and whatever it's set to, will be optimized away by the compiler, see [1] below) - if (alphakill[st]) + float4 result = tex2D(samplers[ts], s.xy); // Ignores s.z (and whatever it's set to, will be optimized away by the compiler, see [1] below) + if (alphakill[ts]) if (result.a == 0) discard; return result; } -float4 Sample3D(int st, float3 s) +float4 Sample3D(int ts, float3 s) { - float4 result = tex3D(samplers[st], s.xyz); - if (alphakill[st]) + float4 result = tex3D(samplers[ts], s.xyz); + if (alphakill[ts]) if (result.a == 0) discard; return result; } -float4 Sample6F(int st, float3 s) +float4 Sample6F(int ts, float3 s) { - float4 result = texCUBE(samplers[st], s.xyz); - if (alphakill[st]) + float4 result = texCUBE(samplers[ts], s.xyz); + if (alphakill[ts]) if (result.a == 0) discard; @@ -260,44 +267,44 @@ float4 Sample6F(int st, float3 s) #define t3 t[3] // Resolve a stage number via 'input texture (index) mapping' to it's corresponding output texture register (rgba?) -#define src(st) t[PS_INPUTTEXTURE_[st]] +#define src(ts) t[PS_INPUTTEXTURE_[ts]] -// Calculate the dot result for a given stage. Since any given stage is input-mapped to always be less than or equal the stage it appears in, this won't cause read-ahead issues +// Calculate the dot result for a given texture stage. Since any given stage is input-mapped to always be less than or equal the stage it appears in, this won't cause read-ahead issues // Test case: BumpDemo demo -#define CalcDot(st) PS_DOTMAPPING_ ## st(src(st)); dot_[st] = dot(iT[st].xyz, dm) +#define CalcDot(ts) PS_DOTMAPPING_ ## ts(src(ts)); dot_[ts] = dot(iT[ts].xyz, dm) // Addressing operations -#define Passthru(st) float4(saturate(iT[st].xyz), 1) // Clamps input texture coordinates to the range [0..1] -#define Brdf(st) float3(t[st-2].y, t[s1-1].y, t[st-2].x - t[st-1].x) // TODO : Complete 16 bit phi/sigma retrieval from float4 texture register. Perhaps use CalcHiLo? -#define Normal2(st) float3(dot_[st-1], dot_[st], 0) // Preceding and current stage dot result. Will be input for Sample2D. -#define Normal3(st) float3(dot_[st-2], dot_[st-1], dot_[st]) // Two preceding and current stage dot result. +#define Passthru(ts) float4(saturate(iT[ts].xyz), 1) // Clamps input texture coordinates to the range [0..1] +#define Brdf(ts) float3(t[ts-2].y, t[ts-1].y, t[ts-2].x - t[ts-1].x) // TODO : Complete 16 bit phi/sigma retrieval from float4 texture register. Perhaps use CalcHiLo? +#define Normal2(ts) float3(dot_[ts-1], dot_[ts], 0) // Preceding and current stage dot result. Will be input for Sample2D. +#define Normal3(ts) float3(dot_[ts-2], dot_[ts-1], dot_[ts]) // Two preceding and current stage dot result. #define Eye float3(iT[1].w, iT[2].w, iT[3].w) // 4th (q) component of input texture coordinates 1, 2 and 3. Only used by texm3x3vspec/PS_TEXTUREMODES_DOT_RFLCT_SPEC, always at stage 3. TODO : Map iT[1/2/3] through PS_INPUTTEXTURE_[]? #define Reflect(n, e) 2 * (dot(n, e) / dot(n, n)) * n - e // https://documentation.help/directx8_c/texm3x3vspec.htm -#define BumpEnv(st) float3(iT[st].x + (BEM[st].x * src(st).r) + (BEM[st].y * src(st).g), iT[st].y + (BEM[st].z * src(st).r) + (BEM[st].w * src(st).g), 0) // Will be input for Sample2D. TODO : Compact into a regular 2x2 maxtrix multiplication. -#define LSO(st) (LUM[st].x * src(st).b) + LUM[st].y // Uses PSH_XBOX_CONSTANT_LUM .x = D3DTSS_BUMPENVLSCALE .y = D3DTSS_BUMPENVLOFFSET +#define BumpEnv(ts) float3(iT[ts].x + (BEM[ts].x * src(ts).r) + (BEM[ts].y * src(ts).g), iT[ts].y + (BEM[ts].z * src(ts).r) + (BEM[ts].w * src(ts).g), 0) // Will be input for Sample2D. TODO : Compact into a regular 2x2 maxtrix multiplication. +#define LSO(ts) (LUM[ts].x * src(ts).b) + LUM[ts].y // Uses PSH_XBOX_CONSTANT_LUM .x = D3DTSS_BUMPENVLSCALE .y = D3DTSS_BUMPENVLOFFSET // Implementations for all possible texture modes, with stage as argument (prefixed with valid stages and corresponding pixel shader 1.3 assembly texture addressing instructions) // For ease of understanding, all follow this plan : Optional specifics, or dot calculation (some with normal selection) and sampling vector determination. All end by deriving a value and assigning this to the stage's texture register. -/*0123 tex */ #define PS_TEXTUREMODES_NONE(st) v = black; t[st] = v // Seems to work -/*0123 tex */ #define PS_TEXTUREMODES_PROJECT2D(st) s = iT[st].xyz; v = Sample2D(st, s); t[st] = v // Seems to work (are x/w and y/w implicit?) [1] -/*0123 tex */ #define PS_TEXTUREMODES_PROJECT3D(st) s = iT[st].xyz; v = Sample3D(st, s); t[st] = v // Seems to work (is z/w implicit?) -/*0123 tex */ #define PS_TEXTUREMODES_CUBEMAP(st) s = iT[st].xyz; v = Sample6F(st, s); t[st] = v // TODO : Test -/*0123 texcoord */ #define PS_TEXTUREMODES_PASSTHRU(st) v = Passthru(st); t[st] = v // Seems to work -/*0123 texkill */ #define PS_TEXTUREMODES_CLIPPLANE(st) PS_COMPAREMODE_ ## st(iT[st]); v = black; t[st] = v // Seems to work (setting black to texture register, in case it gets read) -/*-123 texbem */ #define PS_TEXTUREMODES_BUMPENVMAP(st) s = BumpEnv(st); v = Sample2D(st, s); t[st] = v // Seems to work -/*-123 texbeml */ #define PS_TEXTUREMODES_BUMPENVMAP_LUM(st) PS_TEXTUREMODES_BUMPENVMAP(st); v.rgb *= LSO(st); t[st] = v // TODO : Test -/*--23 texbrdf */ #define PS_TEXTUREMODES_BRDF(st) s = Brdf(st); v = Sample3D(st, s); t[st] = v // TODO : Test (t[st-2] is 16 bit eyePhi,eyeSigma; t[st-1] is lightPhi,lightSigma) -/*--23 texm3x2tex */ #define PS_TEXTUREMODES_DOT_ST(st) CalcDot(st); n = Normal2(st); s = n; v = Sample2D(st, s); t[st] = v // TODO : Test -/*--23 texm3x2depth */ #define PS_TEXTUREMODES_DOT_ZW(st) CalcDot(st); n = Normal2(st); if (n.y==0) v=1;else v = n.x / n.y; t[st] = v // TODO : Make depth-check use result of division, but how? -/*--2- texm3x3diff */ #define PS_TEXTUREMODES_DOT_RFLCT_DIFF(st) CalcDot(st); n = Normal3(st); s = n; v = Sample6F(st, s); t[st] = v // TODO : Test -/*---3 texm3x3vspec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC(st) CalcDot(st); n = Normal3(st); s = Reflect(n, Eye); v = Sample6F(st, s); t[st] = v // TODO : Test -/*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_3D(st) CalcDot(st); n = Normal3(st); s = n; v = Sample3D(st, s); t[st] = v // TODO : Test -/*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_CUBE(st) CalcDot(st); n = Normal3(st); s = n; v = Sample6F(st, s); t[st] = v // TODO : Test -/*-123 texreg2ar */ #define PS_TEXTUREMODES_DPNDNT_AR(st) s = src(st).arg; v = Sample2D(st, s); t[st] = v // TODO : Test [1] -/*-123 texreg2bg */ #define PS_TEXTUREMODES_DPNDNT_GB(st) s = src(st).gba; v = Sample2D(st, s); t[st] = v // TODO : Test [1] -// TODO replace dm with dot_[st]? Confirm BumpDemo 'Cubemap only' modes -/*-12- texm3x2pad */ #define PS_TEXTUREMODES_DOTPRODUCT(st) CalcDot(st); v = float4(dm, 0); t[st] = v // TODO : Test all dot mapping (setting texture register, in case it gets read - test-case : BumpDemo) -/*---3 texm3x3spec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST(st) CalcDot(st); n = Normal3(st); s = Reflect(n, c0); v = Sample6F(st, s); t[st] = v // TODO : Test +/*0123 tex */ #define PS_TEXTUREMODES_NONE(ts) v = black; t[ts] = v // Seems to work +/*0123 tex */ #define PS_TEXTUREMODES_PROJECT2D(ts) s = iT[ts].xyz; v = Sample2D(ts, s); t[ts] = v // Seems to work (are x/w and y/w implicit?) [1] +/*0123 tex */ #define PS_TEXTUREMODES_PROJECT3D(ts) s = iT[ts].xyz; v = Sample3D(ts, s); t[ts] = v // Seems to work (is z/w implicit?) +/*0123 tex */ #define PS_TEXTUREMODES_CUBEMAP(ts) s = iT[ts].xyz; v = Sample6F(ts, s); t[ts] = v // TODO : Test +/*0123 texcoord */ #define PS_TEXTUREMODES_PASSTHRU(ts) v = Passthru(ts); t[ts] = v // Seems to work +/*0123 texkill */ #define PS_TEXTUREMODES_CLIPPLANE(ts) PS_COMPAREMODE_ ## ts(iT[ts]); v = black; t[ts] = v // Seems to work (setting black to texture register, in case it gets read) +/*-123 texbem */ #define PS_TEXTUREMODES_BUMPENVMAP(ts) s = BumpEnv(ts); v = Sample2D(ts, s); t[ts] = v // Seems to work +/*-123 texbeml */ #define PS_TEXTUREMODES_BUMPENVMAP_LUM(ts) PS_TEXTUREMODES_BUMPENVMAP(ts); v.rgb *= LSO(ts); t[ts] = v // TODO : Test +/*--23 texbrdf */ #define PS_TEXTUREMODES_BRDF(ts) s = Brdf(ts); v = Sample3D(ts, s); t[ts] = v // TODO : Test (t[ts-2] is 16 bit eyePhi,eyeSigma; t[ts-1] is lightPhi,lightSigma) +/*--23 texm3x2tex */ #define PS_TEXTUREMODES_DOT_ST(ts) CalcDot(ts); n = Normal2(ts); s = n; v = Sample2D(ts, s); t[ts] = v // TODO : Test +/*--23 texm3x2depth */ #define PS_TEXTUREMODES_DOT_ZW(ts) CalcDot(ts); n = Normal2(ts); if (n.y==0) v=1;else v = n.x / n.y; t[ts] = v // TODO : Make depth-check use result of division, but how? +/*--2- texm3x3diff */ #define PS_TEXTUREMODES_DOT_RFLCT_DIFF(ts) CalcDot(ts); n = Normal3(ts); s = n; v = Sample6F(ts, s); t[ts] = v // TODO : Test +/*---3 texm3x3vspec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC(ts) CalcDot(ts); n = Normal3(ts); s = Reflect(n, Eye); v = Sample6F(ts, s); t[ts] = v // TODO : Test +/*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_3D(ts) CalcDot(ts); n = Normal3(ts); s = n; v = Sample3D(ts, s); t[ts] = v // TODO : Test +/*---3 texm3x3tex */ #define PS_TEXTUREMODES_DOT_STR_CUBE(ts) CalcDot(ts); n = Normal3(ts); s = n; v = Sample6F(ts, s); t[ts] = v // TODO : Test +/*-123 texreg2ar */ #define PS_TEXTUREMODES_DPNDNT_AR(ts) s = src(ts).arg; v = Sample2D(ts, s); t[ts] = v // TODO : Test [1] +/*-123 texreg2bg */ #define PS_TEXTUREMODES_DPNDNT_GB(ts) s = src(ts).gba; v = Sample2D(ts, s); t[ts] = v // TODO : Test [1] +// TODO replace dm with dot_[ts]? Confirm BumpDemo 'Cubemap only' modes +/*-12- texm3x2pad */ #define PS_TEXTUREMODES_DOTPRODUCT(ts) CalcDot(ts); v = float4(dm, 0); t[ts] = v // TODO : Test all dot mapping (setting texture register, in case it gets read - test-case : BumpDemo) +/*---3 texm3x3spec */ #define PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST(ts) CalcDot(ts); n = Normal3(ts); s = Reflect(n, C0); v = Sample6F(ts, s); t[ts] = v // TODO : Test // [1] Note : 3rd component set to s.z is just an (ignored) placeholder to produce a float3 (made unique, to avoid the potential complexity of repeated components) PS_OUTPUT main(const PS_INPUT xIn) @@ -318,7 +325,7 @@ PS_OUTPUT main(const PS_INPUT xIn) float4 sum, prod; // Special purpose registers for xfc (final combiner) operation // Helper variables - int stage; // Write-only variable, generated prefixing each 'opcode', for use in C0 and C1 macro's (and should thus get optimized away) + int stage = 0; // Write-only variable, emitted as prefix-comment before each 'opcode', used in C0 and C1 macro's (and should thus get optimized away), initialized to zero for use of C0 in PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST float4 tmp; float H, L; // HILO (high/low) temps float dot_[4]; diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl index bc676711a..fbe40b1e0 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsl @@ -183,6 +183,12 @@ TextureArgs ExecuteTextureStage( else if (type == SAMPLE_CUBE) t = texCUBE(samplers[i], TexCoords[i].xyz + offset.xyz); +#ifdef ENABLE_FF_ALPHAKILL + if (stage.ALPHAKILL) + if (t.a == 0) + discard; + +#endif // Assign the final value for TEXTURE ctx.TEXTURE = t * factor; diff --git a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli index 79c432a60..f2458947c 100644 --- a/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli +++ b/src/core/hle/D3D8/Direct3D9/FixedFunctionPixelShader.hlsli @@ -87,7 +87,11 @@ namespace FixedFunctionPixelShader { alignas(16) float COLORKEYOP; // Unimplemented Xbox extension! alignas(16) float COLORSIGN; // Unimplemented Xbox extension! +#ifdef ENABLE_FF_ALPHAKILL + alignas(16) float ALPHAKILL; // Xbox extension! +#else alignas(16) float ALPHAKILL; // Unimplemented Xbox extension! +#endif // TEXTURETRANSFORMFLAGS handled by the VS alignas(16) float BUMPENVMAT00; alignas(16) float BUMPENVMAT01; diff --git a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp index 594f77255..384780a70 100644 --- a/src/core/hle/D3D8/Direct3D9/PixelShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/PixelShader.cpp @@ -191,7 +191,7 @@ void CombinerStageHlsl(std::stringstream& hlsl, RPSCombinerStageChannel& stage, "d_bd2" // y = (x - 0.5) / 2 // PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1_BIAS= 0x38L // Subtracts 0.5 from outputs and divides by 2 }; - std::string output_modifier = output_modifier_str[(stage.CombinerOutputMapping & 0x38) >> 3]; + std::string output_modifier = output_modifier_str[stage.CombinerOutputMapping >> 3]; // Concatenate it all together into an opcode 'call' (which resolves into macro expressions) hlsl << opcode_comment[opcode][0] << '(' << arguments.str() << ' ' << output_modifier; @@ -296,11 +296,11 @@ void BuildShader(DecodedRegisterCombiner* pShader, std::stringstream& hlsl) hlsl << hlsl_template[0]; // Start with the HLSL template header - hlsl << "\nstatic bool alphakill[4] = {" + hlsl << "\n#define ALPHAKILL {" << (pShader->AlphaKill[0] ? "true, " : "false, ") << (pShader->AlphaKill[1] ? "true, " : "false, ") << (pShader->AlphaKill[2] ? "true, " : "false, ") - << (pShader->AlphaKill[3] ? "true};" : "false};"); + << (pShader->AlphaKill[3] ? "true}" : "false}"); hlsl << "\n#define PS_COMBINERCOUNT " << pShader->NumberOfCombiners; if (pShader->NumberOfCombiners > 0) { diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index 204c1f8ca..bd38db034 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -102,9 +102,9 @@ extern XboxTextureStateConverter XboxTextureStates; // Declared in Direct3D9.cpp void RPSRegisterObject::Decode(uint8_t Value) { - Reg = (PS_REGISTER)(Value & PS_REGISTER_EF_PROD); // = mask = 0x0f + Reg = (PS_REGISTER)(Value & PS_REGISTER_MASK); // = 0x0f - // Validate correctness + // Validate correctness (see NOTE below) if (Reg == 6) LOG_TEST_CASE("Unknown PS_REGISTER : 6"); if (Reg == 7) LOG_TEST_CASE("Unknown PS_REGISTER : 7"); } @@ -115,8 +115,8 @@ void RPSInputRegister::Decode(uint8_t Value, unsigned stage_nr, bool isRGB) { RPSRegisterObject::Decode(Value); - Channel = (PS_CHANNEL)(Value & PS_CHANNEL_ALPHA); // = mask = 0x10 - InputMapping = (PS_INPUTMAPPING)(Value & PS_INPUTMAPPING_SIGNED_NEGATE); // = mask = 0xe0 + Channel = (PS_CHANNEL)(Value & PS_CHANNEL_MASK); // = 0x10 + InputMapping = (PS_INPUTMAPPING)(Value & PS_INPUTMAPPING_MASK); // = 0xe0 if (stage_nr == 9) { // In final combiner stage, convert C0 into FC0, and C1 into FC1, to discern them as separate registers @@ -124,7 +124,7 @@ void RPSInputRegister::Decode(uint8_t Value, unsigned stage_nr, bool isRGB) if (Reg == PS_REGISTER_C1) Reg = PS_REGISTER_FC1; } - // Validate correctness + // Validate correctness (see NOTE below) if (stage_nr <= xbox::X_PSH_COMBINECOUNT) { if (Reg == PS_REGISTER_FOG) { if (!isRGB) LOG_TEST_CASE("PS_REGISTER_FOG input not allowed in Alpha register combiner"); @@ -150,10 +150,10 @@ void RPSCombinerOutput::Decode(uint8_t Value, uint16_t PSInputs, unsigned stage_ RPSRegisterObject::Decode(Value); // Decode PSAlphaInputs / PSRGBInputs : - Input[0].Decode((PSInputs >> 8) & 0xFF, stage_nr, isRGB); - Input[1].Decode((PSInputs >> 0) & 0xFF, stage_nr, isRGB); + Input[0].Decode((uint8_t)(PSInputs >> 8), stage_nr, isRGB); + Input[1].Decode((uint8_t)(PSInputs >> 0), stage_nr, isRGB); - // Validate correctness + // Validate correctness (see NOTE below) if (Reg == PS_REGISTER_C0) LOG_TEST_CASE("PS_REGISTER_C0 not allowed as output"); if (Reg == PS_REGISTER_C1) LOG_TEST_CASE("PS_REGISTER_C1 not allowed as output"); if (Reg == PS_REGISTER_FOG) LOG_TEST_CASE("PS_REGISTER_FOG not allowed as output"); @@ -166,9 +166,9 @@ void RPSCombinerOutput::Decode(uint8_t Value, uint16_t PSInputs, unsigned stage_ void RPSCombinerStageChannel::Decode(uint32_t PSInputs, uint32_t PSOutputs, unsigned stage_nr, bool isRGB) { // Decode PSAlphaOutputs / PSRGBOutputs and PSAlphaInputs / PSRGBInputs : - OutputCD.Decode((PSOutputs >> 0) & 0xF, (PSInputs >> 0 ) & 0xFFFF, stage_nr, isRGB); - OutputAB.Decode((PSOutputs >> 4) & 0xF, (PSInputs >> 16) & 0xFFFF, stage_nr, isRGB); - OutputMUX_SUM.Decode((PSOutputs >> 8) & 0xF); + OutputCD.Decode((uint8_t)(PSOutputs >> 0), (uint16_t)(PSInputs >> 0 ), stage_nr, isRGB); + OutputAB.Decode((uint8_t)(PSOutputs >> 4), (uint16_t)(PSInputs >> 16), stage_nr, isRGB); + OutputMUX_SUM.Decode((uint8_t)(PSOutputs >> 8)); // Get the combiner output flags : PS_COMBINEROUTPUT CombinerOutputFlags = (PS_COMBINEROUTPUT)(PSOutputs >> 12); @@ -177,11 +177,11 @@ void RPSCombinerStageChannel::Decode(uint32_t PSInputs, uint32_t PSOutputs, unsi OutputCD.DotProduct = (CombinerOutputFlags & PS_COMBINEROUTPUT_CD_DOT_PRODUCT) > 0; // False=Multiply, True=DotProduct OutputAB.DotProduct = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_DOT_PRODUCT) > 0; // False=Multiply, True=DotProduct AB_CD_MUX = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_CD_MUX) > 0; // False=AB+CD, True=MUX(AB,CD) based on R0.a - CombinerOutputMapping = (PS_COMBINEROUTPUT)(CombinerOutputFlags & PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1_BIAS); // = mask = 0x38 + CombinerOutputMapping = (PS_COMBINEROUTPUT_OUTPUTMAPPING)(CombinerOutputFlags & PS_COMBINEROUTPUT_OUTPUTMAPPING_MASK); // = 0x38 OutputCD.BlueToAlpha = (CombinerOutputFlags & PS_COMBINEROUTPUT_CD_BLUE_TO_ALPHA) >> 6; // 0=Alpha-to-Alpha, 1=Blue-to-Alpha OutputAB.BlueToAlpha = (CombinerOutputFlags & PS_COMBINEROUTPUT_AB_BLUE_TO_ALPHA) >> 7; // 0=Alpha-to-Alpha, 1=Blue-to-Alpha - // Discover test-cases + // Discover test-cases (see TODO below) // Check for 'discard-all-outputs' if (OutputAB.DotProduct || OutputCD.DotProduct) { if ((OutputAB.Reg == PS_REGISTER_DISCARD) && (OutputCD.Reg == PS_REGISTER_DISCARD)) LOG_TEST_CASE("All two outputs discarded"); @@ -189,7 +189,7 @@ void RPSCombinerStageChannel::Decode(uint32_t PSInputs, uint32_t PSOutputs, unsi // if ((OutputAB.Reg == PS_REGISTER_DISCARD) && (OutputCD.Reg == PS_REGISTER_DISCARD) && (OutputMUX_SUM.Reg == PS_REGISTER_DISCARD)) LOG_TEST_CASE("All three outputs discarded"); // Test-case : XDK sample : Minnaert (on Stage2.Alpha) } - // Validate correctness + // Validate correctness (see NOTE below) if ((PSOutputs & ~0x000FFFFF) > 0) LOG_TEST_CASE("Unknown PS_COMBINEROUTPUT flag bits detected"); if (CombinerOutputMapping == PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2_BIAS) LOG_TEST_CASE("PS_COMBINEROUTPUT_SHIFTLEFT_2_BIAS unsupported on NV2A?"); if (CombinerOutputMapping == PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1_BIAS) LOG_TEST_CASE("PS_COMBINEROUTPUT_SHIFTRIGHT_1_BIAS unsupported on NV2A?"); @@ -211,27 +211,27 @@ void RPSCombinerStageChannel::Decode(uint32_t PSInputs, uint32_t PSOutputs, unsi void RPSFinalCombiner::Decode(const uint32_t PSFinalCombinerInputsABCD, const uint32_t PSFinalCombinerInputsEFG) { - Input[0].Decode((PSFinalCombinerInputsABCD >> 24) & 0xFF, /*stage_nr=*/9, /*isRGB=*/true); - Input[1].Decode((PSFinalCombinerInputsABCD >> 16) & 0xFF, /*stage_nr=*/9, /*isRGB=*/true); - Input[2].Decode((PSFinalCombinerInputsABCD >> 8) & 0xFF, /*stage_nr=*/9, /*isRGB=*/true); - Input[3].Decode((PSFinalCombinerInputsABCD >> 0) & 0xFF, /*stage_nr=*/9, /*isRGB=*/true); - Input[4].Decode((PSFinalCombinerInputsEFG >> 24) & 0xFF, /*stage_nr=*/9, /*isRGB=*/true); - Input[5].Decode((PSFinalCombinerInputsEFG >> 16) & 0xFF, /*stage_nr=*/9, /*isRGB=*/true); - Input[6].Decode((PSFinalCombinerInputsEFG >> 8) & 0xFF, /*stage_nr=*/9, /*isRGB=*/false); // Note : Final combiner input G must be a single component, and must thus be decoded as Alpha + Input[0].Decode((uint8_t)(PSFinalCombinerInputsABCD >> 24), /*stage_nr=*/9, /*isRGB=*/true); + Input[1].Decode((uint8_t)(PSFinalCombinerInputsABCD >> 16), /*stage_nr=*/9, /*isRGB=*/true); + Input[2].Decode((uint8_t)(PSFinalCombinerInputsABCD >> 8), /*stage_nr=*/9, /*isRGB=*/true); + Input[3].Decode((uint8_t)(PSFinalCombinerInputsABCD >> 0), /*stage_nr=*/9, /*isRGB=*/true); + Input[4].Decode((uint8_t)(PSFinalCombinerInputsEFG >> 24), /*stage_nr=*/9, /*isRGB=*/true); + Input[5].Decode((uint8_t)(PSFinalCombinerInputsEFG >> 16), /*stage_nr=*/9, /*isRGB=*/true); + Input[6].Decode((uint8_t)(PSFinalCombinerInputsEFG >> 8), /*stage_nr=*/9, /*isRGB=*/false); // Note : Final combiner input G must be a single component, and must thus be decoded as Alpha PS_FINALCOMBINERSETTING FinalCombinerSettingFlags = (PS_FINALCOMBINERSETTING)((PSFinalCombinerInputsEFG >> 0) & 0xFF); - ComplementV1 = FinalCombinerSettingFlags & PS_FINALCOMBINERSETTING_COMPLEMENT_V1; - ComplementR0 = FinalCombinerSettingFlags & PS_FINALCOMBINERSETTING_COMPLEMENT_R0; - ClampSum = FinalCombinerSettingFlags & PS_FINALCOMBINERSETTING_CLAMP_SUM; + ComplementV1 = (FinalCombinerSettingFlags & PS_FINALCOMBINERSETTING_COMPLEMENT_V1) > 0; + ComplementR0 = (FinalCombinerSettingFlags & PS_FINALCOMBINERSETTING_COMPLEMENT_R0) > 0; + ClampSum = (FinalCombinerSettingFlags & PS_FINALCOMBINERSETTING_CLAMP_SUM) > 0; - // Discover test-cases + // Discover test-cases (see TODO below) // if (Input[0].Channel != PS_CHANNEL_ALPHA) LOG_TEST_CASE("PS_CHANNEL_RGB/PS_CHANNEL_BLUE detected on final combiner A input"); // Note : test-case ModifyPixelShader uses PS_REGISTER_FOG.rgb and seems to expect .rgb handling (not PS_CHANNEL_BLUE's .b) if (Input[4].Channel == PS_CHANNEL_ALPHA) LOG_TEST_CASE("PS_CHANNEL_ALPHA detected on final combiner E input"); // Need test-case to determine how this should behave (calculating EF_PROD) : .aaa instead of .rgb? if (Input[5].Channel == PS_CHANNEL_ALPHA) LOG_TEST_CASE("PS_CHANNEL_ALPHA detected on final combiner F input"); // Need test-case to determine how this should behave (calculating EF_PROD) : .aaa instead of .rgb? // if (Input[6].Channel == PS_CHANNEL_BLUE) LOG_TEST_CASE("PS_CHANNEL_ALPHA detected on final combiner G input"); // PS_CHANNEL_BLUE (==0==PS_CHANNEL_RGB) uses G.b // if (Input[6].Channel == PS_CHANNEL_ALPHA) LOG_TEST_CASE("PS_CHANNEL_ALPHA detected on final combiner G input"); // PS_CHANNEL_ALPHA (==1) uses .a Test-case : XDK samples BumpDemo,BumpEarth,BumpLens,Explosion - // Validate correctness + // Validate correctness (see NOTE below) if ((FinalCombinerSettingFlags & ~0xE0) > 0) LOG_TEST_CASE("Unknown FinalCombinerSetting bits detected"); } @@ -240,9 +240,9 @@ void RPSFinalCombiner::Decode(const uint32_t PSFinalCombinerInputsABCD, const ui void DecodedRegisterCombiner::GetPSTextureModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, PS_TEXTUREMODES psTextureModes[xbox::X_D3DTS_STAGECOUNT]) { for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { - psTextureModes[i] = (PS_TEXTUREMODES)((pPSDef->PSTextureModes >> (i * 5)) & 0x1F); + psTextureModes[i] = (PS_TEXTUREMODES)((pPSDef->PSTextureModes >> (i * 5)) & PS_TEXTUREMODES_MASK); - // Discover test-cases + // Discover test-cases (see TODO below) // if (psTextureModes[i] == PS_TEXTUREMODES_NONE) LOG_TEST_CASE("PS_TEXTUREMODES_NONE"); // if (psTextureModes[i] == PS_TEXTUREMODES_PROJECT2D) LOG_TEST_CASE("PS_TEXTUREMODES_PROJECT2D"); if (psTextureModes[i] == PS_TEXTUREMODES_PROJECT3D) LOG_TEST_CASE("PS_TEXTUREMODES_PROJECT3D"); // Test-case: XDK sample TechCertGame,NoSortAlphaBlend,VolumeLight @@ -263,7 +263,7 @@ void DecodedRegisterCombiner::GetPSTextureModes(xbox::X_D3DPIXELSHADERDEF* pPSDe if (psTextureModes[i] == PS_TEXTUREMODES_DOTPRODUCT) LOG_TEST_CASE("PS_TEXTUREMODES_DOTPRODUCT"); if (psTextureModes[i] == PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST) LOG_TEST_CASE("PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST"); - // Validate correctness + // Validate correctness (see NOTE below) if (psTextureModes[i] == PS_TEXTUREMODES_BUMPENVMAP) if (i < 1) LOG_TEST_CASE("PS_TEXTUREMODES_BUMPENVMAP only allowed in stage 1, 2 or 3"); if (psTextureModes[i] == PS_TEXTUREMODES_BUMPENVMAP_LUM) if (i < 1) LOG_TEST_CASE("PS_TEXTUREMODES_BUMPENVMAP_LUM only allowed in stage 1, 2 or 3"); if (psTextureModes[i] == PS_TEXTUREMODES_BRDF) if (i < 2) LOG_TEST_CASE("PS_TEXTUREMODES_BRDF only allowed in stage 2 or 3"); @@ -286,7 +286,7 @@ void DecodedRegisterCombiner::GetPSTextureModes(xbox::X_D3DPIXELSHADERDEF* pPSDe if (psTextureModes[i] > PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST) LOG_TEST_CASE("Invalid PS_TEXTUREMODES in stage?"); } - // Validate correctness + // Validate correctness (see NOTE below) if ((pPSDef->PSTextureModes & ~0x000FFFFF) > 0) LOG_TEST_CASE("Unknown PSTextureModes bits detected"); } @@ -294,9 +294,9 @@ void DecodedRegisterCombiner::GetPSDotMapping(xbox::X_D3DPIXELSHADERDEF* pPSDef, { psDotMapping[0] = (PS_DOTMAPPING)(0); for (int i = 1; i < xbox::X_D3DTS_STAGECOUNT; i++) { - psDotMapping[i] = (PS_DOTMAPPING)((pPSDef->PSDotMapping >> ((i - 1) * 4)) & 0x7); + psDotMapping[i] = (PS_DOTMAPPING)((pPSDef->PSDotMapping >> ((i - 1) * 4)) & PS_DOTMAPPING_MASK); - // Discover test-cases + // Discover test-cases (see TODO below) // if (psDotMapping[i] == PS_DOTMAPPING_ZERO_TO_ONE) LOG_TEST_CASE("PS_DOTMAPPING_ZERO_TO_ONE"); // Note : Most common scenario, no need for test-cases if (psDotMapping[i] == PS_DOTMAPPING_MINUS1_TO_1_D3D) LOG_TEST_CASE("PS_DOTMAPPING_MINUS1_TO_1_D3D"); // Test-case : XDK samples BumpDemo, Minnaert if (psDotMapping[i] == PS_DOTMAPPING_MINUS1_TO_1_GL) LOG_TEST_CASE("PS_DOTMAPPING_MINUS1_TO_1_GL"); @@ -307,22 +307,22 @@ void DecodedRegisterCombiner::GetPSDotMapping(xbox::X_D3DPIXELSHADERDEF* pPSDef, if (psDotMapping[i] == PS_DOTMAPPING_HILO_HEMISPHERE) LOG_TEST_CASE("PS_DOTMAPPING_HILO_HEMISPHERE"); } - // Validate correctness - if ((pPSDef->PSDotMapping & ~0x00000777) > 0) LOG_TEST_CASE("Unknown PSDotMapping bits detected"); + // Validate correctness (see NOTE below) + if ((pPSDef->PSDotMapping & ~0x00000777) > 0) LOG_TEST_CASE("Unknown PSDotMapping bits detected"); } void DecodedRegisterCombiner::GetPSCompareModes(xbox::X_D3DPIXELSHADERDEF* pPSDef, bool psCompareModes[xbox::X_D3DTS_STAGECOUNT][4]) { for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { - uint32_t CompareMode = (pPSDef->PSCompareMode >> (i * 4)) & 0xF; + uint32_t CompareMode = (pPSDef->PSCompareMode >> (i * 4)) & PS_COMPAREMODE_MASK; psCompareModes[i][0] = (CompareMode & PS_COMPAREMODE_S_GE) > 0; psCompareModes[i][1] = (CompareMode & PS_COMPAREMODE_T_GE) > 0; psCompareModes[i][2] = (CompareMode & PS_COMPAREMODE_R_GE) > 0; psCompareModes[i][3] = (CompareMode & PS_COMPAREMODE_Q_GE) > 0; } - // Validate correctness - if ((pPSDef->PSCompareMode & ~0x0000FFFF) > 0) LOG_TEST_CASE("Unknown PSCompareMode bits detected"); + // Validate correctness (see NOTE below) + if ((pPSDef->PSCompareMode & ~0x0000FFFF) > 0) LOG_TEST_CASE("Unknown PSCompareMode bits detected"); } void DecodedRegisterCombiner::GetPSInputTexture(xbox::X_D3DPIXELSHADERDEF* pPSDef, int psInputTexture[xbox::X_D3DTS_STAGECOUNT]) @@ -332,21 +332,22 @@ void DecodedRegisterCombiner::GetPSInputTexture(xbox::X_D3DPIXELSHADERDEF* pPSDe psInputTexture[2] = (pPSDef->PSInputTexture >> 16) & 0x1; // Stage 2 can use stage 0 or 1 psInputTexture[3] = (pPSDef->PSInputTexture >> 20) & 0x3; // Stage 3 can only use stage 0, 1 or 2 - // Discover test-cases + // Discover test-cases (see TODO below) + if (psInputTexture[2] == 0) LOG_TEST_CASE("PS_INPUTTEXTURE(2) uses texture 0"); // if (psInputTexture[2] == 1) LOG_TEST_CASE("PS_INPUTTEXTURE(2) uses texture 1"); // Test-case : XDK sample BumpEarth,Explosion,ZSprite - if (psInputTexture[2] == 2) LOG_TEST_CASE("PS_INPUTTEXTURE(2) uses texture 2"); + if (psInputTexture[3] == 0) LOG_TEST_CASE("PS_INPUTTEXTURE(3) uses texture 0"); // if (psInputTexture[3] == 1) LOG_TEST_CASE("PS_INPUTTEXTURE(3) uses texture 1"); // Test-case : XDK sample Explosion,ZSprite if (psInputTexture[3] == 2) LOG_TEST_CASE("PS_INPUTTEXTURE(3) uses texture 2"); - if (psInputTexture[3] == 3) LOG_TEST_CASE("PS_INPUTTEXTURE(3) uses texture 3"); - // Validate correctness - if ((pPSDef->PSInputTexture & ~0x00310000) > 0) LOG_TEST_CASE("Unknown PSInputTexture bits detected"); + // Validate correctness (see NOTE below) + if (psInputTexture[3] == 3) LOG_TEST_CASE("PS_INPUTTEXTURE(3) incorrectly uses texture 3"); + if ((pPSDef->PSInputTexture & ~0x00310000) > 0) LOG_TEST_CASE("Unknown PSInputTexture bits detected"); } void DecodedRegisterCombiner::Decode(xbox::X_D3DPIXELSHADERDEF *pPSDef) { NumberOfCombiners = (pPSDef->PSCombinerCount >> 0) & 0xF; - uint32_t CombinerCountFlags = (pPSDef->PSCombinerCount >> 8); + uint32_t CombinerCountFlags = (pPSDef->PSCombinerCount >> 8); // = PS_COMBINERCOUNTFLAGS CombinerMuxesOnMsb = (CombinerCountFlags & PS_COMBINERCOUNT_MUX_MSB) > 0; CombinerHasUniqueC0 = (CombinerCountFlags & PS_COMBINERCOUNT_UNIQUE_C0) > 0; @@ -368,18 +369,21 @@ void DecodedRegisterCombiner::Decode(xbox::X_D3DPIXELSHADERDEF *pPSDef) FinalCombiner.Decode(pPSDef->PSFinalCombinerInputsABCD, pPSDef->PSFinalCombinerInputsEFG); } - TexModeAdjust = (pPSDef->PSFinalCombinerConstants >> 8) & PS_GLOBALFLAGS_TEXMODE_ADJUST; + TexModeAdjust = ((pPSDef->PSFinalCombinerConstants >> PS_GLOBALFLAGS_SHIFT) & PS_GLOBALFLAGS_TEXMODE_ADJUST) > 0; - // Discover test-cases + // Discover test-cases (see TODO below) if (NumberOfCombiners == 0) LOG_TEST_CASE("NumberOfCombiners is zero"); if (!CombinerMuxesOnMsb) LOG_TEST_CASE("PS_COMBINERCOUNT_MUX_LSB detected"); // Test case required for how to implement the FCS_MUX check on LSB (see PS_COMBINERCOUNT_MUX_LSB in CxbxPixelShaderTemplate.hlsl) Note : test-case ModifyPixelShader hits this by mistake if (TexModeAdjust) LOG_TEST_CASE("PS_GLOBALFLAGS_TEXMODE_ADJUST detected"); - // Validate correctness + // Validate correctness (see NOTE below) if (NumberOfCombiners > 8) LOG_TEST_CASE("NumberOfCombiners bigger than maximum (of 8)"); if ((pPSDef->PSCombinerCount & ~0x0001110F) > 0) LOG_TEST_CASE("Unknown PSCombinerCount bits detected"); } +// * TODO : For all "Discover test-cases" LOG_TEST_CASE's that lack sufficient test-case mentions, find some, note them in an EOL comment, and comment out the entire check. +// * NOTE : For all "Validate correctness" LOG_TEST_CASE's that ever get hit, investigate what caused it, what should be done, implement that, and update the verification. + /* PSH_RECOMPILED_SHADER */ typedef struct s_CxbxPSDef { @@ -466,7 +470,7 @@ typedef struct s_CxbxPSDef { } // Pre-decode TexModeAdjust, which impacts AdjustTextureModes - DecodedTexModeAdjust = (PSDef.PSFinalCombinerConstants >> 8) & PS_GLOBALFLAGS_TEXMODE_ADJUST; + DecodedTexModeAdjust = ((PSDef.PSFinalCombinerConstants >> PS_GLOBALFLAGS_SHIFT) & PS_GLOBALFLAGS_TEXMODE_ADJUST) > 0; // Pre-decode hasFinalCombiner, which impacts AdjustFinalCombiner DecodedHasFinalCombiner = (PSDef.PSFinalCombinerInputsABCD > 0) || (PSDef.PSFinalCombinerInputsEFG > 0); @@ -669,7 +673,11 @@ std::string GetFixedFunctionShaderTemplate() { std::string_view GetD3DTOPString(int d3dtop) { static constexpr std::string_view opToString[] = { +#ifdef ENABLE_FF_ALPHAKILL + "X_D3DTOP_DISABLE", // 0 (Was UNDEFINED, but that doesn't compile) +#else "UNDEFINED", // 0 +#endif "X_D3DTOP_DISABLE", // 1 "X_D3DTOP_SELECTARG1", // 2 "X_D3DTOP_SELECTARG2", // 3 @@ -698,7 +706,11 @@ std::string_view GetD3DTOPString(int d3dtop) { "X_D3DTOP_BUMPENVMAPLUMINANCE", // 26 }; +#ifdef ENABLE_FF_ALPHAKILL + if (d3dtop < 0 || d3dtop > 26) { +#else if (d3dtop < 1 || d3dtop > 26) { +#endif EmuLog(LOG_LEVEL::ERROR2, "Unmapped texture operation %d", d3dtop); d3dtop = 0; // undefined } @@ -872,11 +884,16 @@ IDirect3DPixelShader9* GetFixedFunctionShader() stageSetup << '\n'; for (int i = 0; i < 4; i++) { +#ifdef ENABLE_FF_ALPHAKILL + // Even when a stage is disabled, we still have to fully initialize it's values, to prevent + // "error X4000: variable 'stages' used without having been completely initialized" +#else // The stage is initialized to be disabled // We don't have to output anything if (states[i].COLOROP == X_D3DTOP_DISABLE) continue; +#endif std::string target = "stages[" + std::to_string(i) + "]."; auto s = states[i]; @@ -1111,8 +1128,8 @@ void DxbxUpdateActivePixelShader() // NOPATCH LOG_TEST_CASE("Two sided lighting"); // VFACE is positive for clockwise faces // If Xbox designates counter-clockwise as front-facing, we invert VFACE - auto cwFrontface = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FRONTFACE) == 0x900; // clockwise; - frontfaceFactor = cwFrontface ? 1 : -1; + auto cwFrontface = XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_FRONTFACE) == 0x900; // clockwise; = NV097_SET_FRONT_FACE_V_CW = NV2A_FRONT_FACE_CW + frontfaceFactor = cwFrontface ? 1.0 : -1.0; } fColor[PSH_XBOX_CONSTANT_FRONTFACE_FACTOR].r = frontfaceFactor; diff --git a/src/core/hle/D3D8/XbPixelShader.h b/src/core/hle/D3D8/XbPixelShader.h index 49e812259..737ba9da1 100644 --- a/src/core/hle/D3D8/XbPixelShader.h +++ b/src/core/hle/D3D8/XbPixelShader.h @@ -107,6 +107,8 @@ enum PS_TEXTUREMODES PS_TEXTUREMODES_DOTPRODUCT= 0x11L, // - * * - PSInputTexture PS_TEXTUREMODES_DOT_RFLCT_SPEC_CONST= 0x12L, // - - - * Sample, PSInputTexture, PSDotMapping // 0x13-0x1f reserved + + PS_TEXTUREMODES_MASK= 0x1fL }; // ========================================================================================================= @@ -137,6 +139,8 @@ enum PS_DOTMAPPING PS_DOTMAPPING_HILO_HEMISPHERE_D3D= 0x05L, // - * * * PS_DOTMAPPING_HILO_HEMISPHERE_GL= 0x06L, // - * * * PS_DOTMAPPING_HILO_HEMISPHERE= 0x07L, // - * * * + + PS_DOTMAPPING_MASK= 0x07L }; // ========================================================================================================= @@ -161,6 +165,8 @@ enum PS_COMPAREMODE PS_COMPAREMODE_Q_LT= 0x00L, PS_COMPAREMODE_Q_GE= 0x08L, + + PS_COMPAREMODE_MASK= 0x0fL }; // ========================================================================================================= @@ -284,6 +290,8 @@ enum PS_INPUTMAPPING PS_INPUTMAPPING_HALFBIAS_NEGATE= 0xa0L, // 1/2 - max(0,x) = -1*max(0,x) + 0.5 invalid for final combiner PS_INPUTMAPPING_SIGNED_IDENTITY= 0xc0L, // x = 1* x + 0.0 invalid for final combiner PS_INPUTMAPPING_SIGNED_NEGATE= 0xe0L, // -x = -1* x + 0.0 invalid for final combiner + + PS_INPUTMAPPING_MASK= 0xe0L }; enum PS_REGISTER @@ -304,6 +312,8 @@ enum PS_REGISTER PS_REGISTER_V1R0_SUM= 0x0eL, // r A.k.a. _REG_SPECLIT PS_REGISTER_EF_PROD= 0x0fL, // r A.k.a. _REG_EF_PROD + PS_REGISTER_MASK= 0x0fL, + // These constant values can be represented as a combination of 0, and an input modifier // But they're not registers // PS_REGISTER_ONE= PS_REGISTER_ZERO | PS_INPUTMAPPING_UNSIGNED_INVERT, // 0x20 r OK for final combiner @@ -326,6 +336,8 @@ enum PS_CHANNEL PS_CHANNEL_RGB= 0x00, // used as RGB source PS_CHANNEL_BLUE= 0x00, // used as ALPHA source PS_CHANNEL_ALPHA= 0x10, // used as RGB or ALPHA source + + PS_CHANNEL_MASK= 0x10 }; enum PS_FINALCOMBINERSETTING @@ -362,6 +374,8 @@ enum PS_COMBINEROUTPUT_OUTPUTMAPPING PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTLEFT_2_BIAS= 0x28L, // y = (x - 0.5) * 4 Note : Cxbx inferred method; May not be supported on NV2A PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1= 0x30L, // y = x / 2 PS_COMBINEROUTPUT_OUTPUTMAPPING_SHIFTRIGHT_1_BIAS= 0x38L, // y = (x - 0.5) / 2 Note : Cxbx inferred method; May not be supported on NV2A + + PS_COMBINEROUTPUT_OUTPUTMAPPING_MASK= 0x38L }; enum PS_COMBINEROUTPUT @@ -429,6 +443,8 @@ enum PS_GLOBALFLAGS PS_GLOBALFLAGS_NO_TEXMODE_ADJUST= 0x0000L, // don't adjust texture modes PS_GLOBALFLAGS_TEXMODE_ADJUST= 0x0001L, // adjust texture modes according to set texture + + PS_GLOBALFLAGS_SHIFT= 8 }; @@ -464,7 +480,7 @@ struct RPSCombinerStageChannel { RPSCombinerOutput OutputAB; // Contains InputA and InputB (as Input1 and Input2) RPSRegisterObject OutputMUX_SUM; bool AB_CD_MUX; // False=AB+CD, True=MUX(AB,CD) based on R0.a - PS_COMBINEROUTPUT CombinerOutputMapping; + PS_COMBINEROUTPUT_OUTPUTMAPPING CombinerOutputMapping; void Decode(uint32_t PSInputs, uint32_t PSOutputs, unsigned stage_nr, bool isRGB); }; From c03b2ed15169b13fcffcebf601843ad904ce36d5 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Fri, 23 Apr 2021 12:12:46 +0200 Subject: [PATCH 45/47] Fix DolphinClassic sample, by not making fogDepth absolute (thanks NZJenkins!) (This bug was introduced with PR #2163, based on some misleading code in xqemu, to cater for what now appear to be non-supported fog modes on NV2A.) --- src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl index d2d3cdc00..477cbb9ac 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl @@ -332,7 +332,7 @@ R"DELIMITER( // Fogging // TODO deduplicate - const float fogDepth = abs(oFog.x); + const float fogDepth = oFog.x; // Don't abs this value! Test-case : DolphinClassic xdk sample const float fogTableMode = CxbxFogInfo.x; const float fogDensity = CxbxFogInfo.y; const float fogStart = CxbxFogInfo.z; From a8fae552991ed02b15baed3c7b083d99a7bd9801 Mon Sep 17 00:00:00 2001 From: PatrickvL Date: Sat, 15 May 2021 21:06:12 +0200 Subject: [PATCH 46/47] Fix HLSL PS bump environment mapping (most of the research done by medieval, thanks!) --- .../Direct3D9/CxbxPixelShaderTemplate.hlsl | 40 ++++++++++++------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl index d51819b6b..07e5292fb 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -228,36 +228,46 @@ sampler samplers[4] : register(s0); #endif static bool alphakill[4] = ALPHAKILL; +float4 PostProcessTexel(const int ts, float4 t) +{ + if (alphakill[ts]) + if (t.a == 0) + discard; + + return t; +} + // Actual texture sampling per texture stage (ts), using the sampling vector (s) as input, // abstracting away the specifics of accessing above sampler declarations (usefull for future Direct3D 10+ sampler arrays) float4 Sample2D(int ts, float3 s) { float4 result = tex2D(samplers[ts], s.xy); // Ignores s.z (and whatever it's set to, will be optimized away by the compiler, see [1] below) - if (alphakill[ts]) - if (result.a == 0) - discard; - - return result; + return PostProcessTexel(ts, result); } float4 Sample3D(int ts, float3 s) { float4 result = tex3D(samplers[ts], s.xyz); - if (alphakill[ts]) - if (result.a == 0) - discard; - - return result; + return PostProcessTexel(ts, result); } float4 Sample6F(int ts, float3 s) { float4 result = texCUBE(samplers[ts], s.xyz); - if (alphakill[ts]) - if (result.a == 0) - discard; + return PostProcessTexel(ts, result); +} - return result; +// Test-case JSRF (boost-dash effect). +float3 DoBumpEnv(const float4 TexCoord, const float4 BumpEnvMat, const float4 src) +{ + // Convert the input bump map (source texture) value range into two's complement signed values (from (0, +1) to (-1, +1), using s_bx2): + const float4 BumpMap = s_bx2(src); // Note : medieval discovered s_bias improved JSRF, PatrickvL changed it into s_bx2 thanks to http://www.rastertek.com/dx11tut20.html + // TODO : The above should be removed, and replaced by some form of COLORSIGN handling, which may not be possible inside this pixel shader, because filtering-during-sampling would cause artifacts. + + const float u = TexCoord.x + (BumpEnvMat.x * BumpMap.r) + (BumpEnvMat.z * BumpMap.g); // Or : TexCoord.x + dot(BumpEnvMat.xz, BumpMap.rg) + const float v = TexCoord.y + (BumpEnvMat.y * BumpMap.r) + (BumpEnvMat.w * BumpMap.g); // Or : TexCoord.y + dot(BumpEnvMat.yw, BumpMap.rg) + + return float3(u, v, 0); } // Map texture registers to their array elements. Having texture registers in an array allows indexed access to them @@ -280,7 +290,7 @@ float4 Sample6F(int ts, float3 s) #define Normal3(ts) float3(dot_[ts-2], dot_[ts-1], dot_[ts]) // Two preceding and current stage dot result. #define Eye float3(iT[1].w, iT[2].w, iT[3].w) // 4th (q) component of input texture coordinates 1, 2 and 3. Only used by texm3x3vspec/PS_TEXTUREMODES_DOT_RFLCT_SPEC, always at stage 3. TODO : Map iT[1/2/3] through PS_INPUTTEXTURE_[]? #define Reflect(n, e) 2 * (dot(n, e) / dot(n, n)) * n - e // https://documentation.help/directx8_c/texm3x3vspec.htm -#define BumpEnv(ts) float3(iT[ts].x + (BEM[ts].x * src(ts).r) + (BEM[ts].y * src(ts).g), iT[ts].y + (BEM[ts].z * src(ts).r) + (BEM[ts].w * src(ts).g), 0) // Will be input for Sample2D. TODO : Compact into a regular 2x2 maxtrix multiplication. +#define BumpEnv(ts) DoBumpEnv(iT[ts], BEM[ts], src(ts)) // Will be input for Sample2D. #define LSO(ts) (LUM[ts].x * src(ts).b) + LUM[ts].y // Uses PSH_XBOX_CONSTANT_LUM .x = D3DTSS_BUMPENVLSCALE .y = D3DTSS_BUMPENVLOFFSET // Implementations for all possible texture modes, with stage as argument (prefixed with valid stages and corresponding pixel shader 1.3 assembly texture addressing instructions) From fa0a114e0c27c65fad454af6828752d66ed954db Mon Sep 17 00:00:00 2001 From: Anthony Date: Wed, 19 May 2021 22:31:54 +1200 Subject: [PATCH 47/47] Fix passthru not passing through all components Previous behaviour aligned with ps_1_3 texcoord, where alpha = 1 but appears to be incorrect. Note ps_1_4 texcrd leaves alpha undefined Fixes Metal Arms menu clouds --- src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl index 07e5292fb..9edffe760 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxPixelShaderTemplate.hlsl @@ -284,7 +284,11 @@ float3 DoBumpEnv(const float4 TexCoord, const float4 BumpEnvMat, const float4 sr #define CalcDot(ts) PS_DOTMAPPING_ ## ts(src(ts)); dot_[ts] = dot(iT[ts].xyz, dm) // Addressing operations -#define Passthru(ts) float4(saturate(iT[ts].xyz), 1) // Clamps input texture coordinates to the range [0..1] + +// Clamps input texture coordinates to the range [0..1] +// Note alpha is passed through rather than set to one like ps_1_3 'texcoord' +// Test case: Metal Arms (menu skybox clouds, alpha is specifically set in the VS) +#define Passthru(ts) float4(saturate(iT[ts])) #define Brdf(ts) float3(t[ts-2].y, t[ts-1].y, t[ts-2].x - t[ts-1].x) // TODO : Complete 16 bit phi/sigma retrieval from float4 texture register. Perhaps use CalcHiLo? #define Normal2(ts) float3(dot_[ts-1], dot_[ts], 0) // Preceding and current stage dot result. Will be input for Sample2D. #define Normal3(ts) float3(dot_[ts-2], dot_[ts-1], dot_[ts]) // Two preceding and current stage dot result.