diff --git a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl index 3c31f936f..e8a03aa6a 100644 --- a/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl +++ b/src/core/hle/D3D8/Direct3D9/CxbxVertexShaderTemplate.hlsl @@ -32,8 +32,12 @@ uniform float4 C[X_D3DVS_CONSTREG_COUNT] : register(c0); uniform float4 vRegisterDefaultValues[16] : register(c192); uniform float4 vRegisterDefaultFlagsPacked[4] : register(c208); -uniform float4 xboxViewportScale : register(c212); -uniform float4 xboxViewportOffset : register(c213); +uniform float4 xboxViewportScaleInverse : register(c212); +uniform float4 xboxViewportOffset : register(c213); + +uniform float4 xboxTextureScale[4] : register(c214); + +uniform float4 xboxIsRHWTransformedPosition : register(c218); // Overloaded casts, assuring all inputs are treated as float4 float4 _tof4(float src) { return float4(src, src, src, src); } @@ -149,17 +153,6 @@ float _dph(float4 src0, float4 src1) // Xbox ILU Functions -// 2.14.1.10.6 RCP: Reciprocal -#define x_rcp(dest, mask, src0) dest.mask = _ssss(_rcp(_scalar(src0))).mask -float _rcp(float src) -{ -#if 0 // TODO : Enable - if (src == 1) return 1; - if (src == 0) return 1.#INF; -#endif - return 1/ src; -} - // 2.14.1.10.7 RSQ: Reciprocal Square Root #define x_rsq(dest, mask, src0) dest.mask = _ssss(_rsq(_scalar(src0))).mask float _rsq(float src) @@ -251,6 +244,23 @@ float _rcc(float src) : clamp(r, -1.84467e+019f, -5.42101e-020f); // the IEEE 32-bit binary values 0xDF800000 and 0x9F800000 } +// 2.14.1.10.6 RCP: Reciprocal +#define x_rcp(dest, mask, src0) dest.mask = _ssss(_rcp(_scalar(src0))).mask +float _rcp(float src) +{ + // OpenGL/NVidia extension definition +#if 0 // TODO : Enable? + if (src == 1) return 1; + if (src == 0) return 1.#INF; + return 1 / src; +#endif + // Forward to Xbox clamped reciprocal + // So we have defined behaviour with rcp(0) + // This prevents issues with XYZRHW modes + // where the w component may be 0 + return _rcc(src); +} + float4 reverseScreenspaceTransform(float4 oPos) { // On Xbox, oPos should contain the vertex position in screenspace @@ -261,13 +271,19 @@ float4 reverseScreenspaceTransform(float4 oPos) // mad oPos.xyz, r12, r1.x, c-37 // where c-37 and c-38 are reserved transform values + if (xboxIsRHWTransformedPosition.x) { + // Detect 0 w and avoid 0 division + if (oPos.w == 0) oPos.w = 1; // if else doesn't seem to work here + oPos.w = 1 / oPos.w; // flip rhw to w + } + // oPos.w and xboxViewportScale.z might be VERY big when a D24 depth buffer is used // and multiplying oPos.xyz by oPos.w may cause precision issues. - // Pre-divide them to help keep the values reasonably small. // Test case: Burnout 3 - float3 divisor = xboxViewportScale.xyz / oPos.w; + oPos.xyz -= xboxViewportOffset.xyz; // reverse offset - oPos.xyz /= divisor; // reverse scale and perspective divide + oPos.xyz *= oPos.w; // reverse perspective divide + oPos.xyz *= xboxViewportScaleInverse.xyz; // reverse scale return oPos; } @@ -315,14 +331,15 @@ R"DELIMITER( xOut.oPos = reverseScreenspaceTransform(oPos); xOut.oD0 = saturate(oD0); xOut.oD1 = saturate(oD1); - xOut.oFog = oFog.x; + xOut.oFog = oFog.x; // Note : Xbox clamps fog in pixel shader xOut.oPts = oPts.x; xOut.oB0 = saturate(oB0); xOut.oB1 = saturate(oB1); - xOut.oT0 = oT0; - xOut.oT1 = oT1; - xOut.oT2 = oT2; - xOut.oT3 = oT3; + // Scale textures (TODO : or should we apply this to the input register values?) + xOut.oT0 = oT0 / xboxTextureScale[0]; + xOut.oT1 = oT1 / xboxTextureScale[1]; + xOut.oT2 = oT2 / xboxTextureScale[2]; + xOut.oT3 = oT3 / xboxTextureScale[3]; return xOut; } diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp index 6415dcc32..8f12aae9a 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp @@ -31,7 +31,7 @@ #else #define INCLUDE_DBG_CONSOLE #endif -#include "common\util\hasher.h" +#include "common\util\hasher.h" // For ComputeHash #include #include @@ -108,8 +108,10 @@ static DWORD g_OverlaySwap = 0; // Set in D3DDevice_UpdateOverlay static int g_iWireframe = 0; // wireframe toggle static bool g_bHack_UnlockFramerate = false; // ignore the xbox presentation interval static bool g_bHasDepth = false; // Does device have a Depth Buffer? + float g_ZScale = 1.0; static bool g_bHasStencil = false; // Does device have a Stencil Buffer? static DWORD g_dwPrimPerFrame = 0; // Number of primitives within one frame + bool g_bUsePassthroughHLSL = true; static float g_AspectRatioScale = 1.0f; static UINT g_AspectRatioScaleWidth = 0; static UINT g_AspectRatioScaleHeight = 0; @@ -154,9 +156,6 @@ static bool g_bHack_DisableHostGPUQueries = false; // TO static IDirect3DQuery *g_pHostQueryWaitForIdle = nullptr; static IDirect3DQuery *g_pHostQueryCallbackEvent = nullptr; -// Vertex buffer symbols, declared in XbVertexBuffer.cpp -extern void CxbxImpl_SetStreamSource(UINT StreamNumber, xbox::X_D3DVertexBuffer* pStreamData, UINT Stride); - static std::condition_variable g_VBConditionVariable; // Used in BlockUntilVerticalBlank static std::mutex g_VBConditionMutex; // Used in BlockUntilVerticalBlank static DWORD g_VBLastSwap = 0; @@ -194,8 +193,6 @@ static xbox::dword_xt *g_Xbox_D3DDevice; // TODO: This should b static DWORD g_dwVertexShaderUsage = 0; // Unused. If needed, move to XbVertexShader.cpp */ - xbox::dword_xt g_Xbox_VertexShader_Handle = 0; - // Static Function(s) static BOOL WINAPI EmuEnumDisplayDevices(GUID FAR *lpGUID, LPSTR lpDriverDescription, LPSTR lpDriverName, LPVOID lpContext, HMONITOR hm); static DWORD WINAPI EmuRenderWindow(LPVOID); @@ -266,57 +263,57 @@ struct EmuD3D8CreateDeviceProxyData g_EmuCDPD = {0}; // Declare trampolines -#define XB_TRAMPOLINES(XB_MACRO) \ - XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_CreateVertexShader, (CONST xbox::dword_xt*, CONST xbox::dword_xt*, xbox::dword_xt*, xbox::dword_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_DeleteVertexShader, (xbox::dword_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_DeleteVertexShader_0, () ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_GetBackBuffer, (xbox::int_xt, D3DBACKBUFFER_TYPE, xbox::X_D3DSurface**) ); \ - XB_MACRO(xbox::X_D3DSurface*, WINAPI, D3DDevice_GetBackBuffer2, (xbox::int_xt) ); \ - XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_GetDepthStencilSurface, (xbox::X_D3DSurface**) ); \ - XB_MACRO(xbox::X_D3DSurface*, WINAPI, D3DDevice_GetDepthStencilSurface2, (xbox::void_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_GetDisplayMode, (xbox::X_D3DDISPLAYMODE*) ); \ - XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_GetRenderTarget, (xbox::X_D3DSurface**) ); \ - XB_MACRO(xbox::X_D3DSurface*, WINAPI, D3DDevice_GetRenderTarget2, (xbox::void_xt) ); \ - XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_LightEnable, (xbox::dword_xt, xbox::bool_xt) ); \ - /*XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_LoadVertexShader, (xbox::dword_xt, xbox::dword_xt) );*/\ - /*XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_LoadVertexShaderProgram, (CONST xbox::dword_xt*, xbox::dword_xt) );*/\ - /*XB_MACRO(xbox::void_xt, __stdcall, D3DDevice_LoadVertexShader_0, () );*/\ - /*XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_LoadVertexShader_4, (xbox::dword_xt) );*/\ - XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_PersistDisplay, (xbox::void_xt) ); \ - XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_Reset, (xbox::X_D3DPRESENT_PARAMETERS*) ); \ - /*XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SelectVertexShader, (xbox::dword_xt, xbox::dword_xt) );*/\ - /*XB_MACRO(xbox::void_xt, __stdcall, D3DDevice_SelectVertexShader_0, () );*/\ - /*XB_MACRO(xbox::void_xt, __stdcall, D3DDevice_SelectVertexShader_4, (xbox::dword_xt) );*/\ - /*XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetGammaRamp, (xbox::dword_xt, CONST X_D3DGAMMARAMP*) );*/\ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetIndices, (xbox::X_D3DIndexBuffer*, xbox::uint_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetIndices_4, (xbox::uint_xt) ); \ - XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_SetLight, (xbox::dword_xt, CONST xbox::X_D3DLIGHT8*) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetPixelShader, (xbox::dword_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetPixelShader_0, () ); \ - XB_MACRO(xbox::void_xt, __fastcall, D3DDevice_SetRenderState_Simple, (xbox::dword_xt, xbox::dword_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetRenderTarget, (xbox::X_D3DSurface*, xbox::X_D3DSurface*) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetRenderTarget_0, () ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetStreamSource, (xbox::uint_xt, xbox::X_D3DVertexBuffer*, xbox::uint_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetStreamSource_4, (xbox::uint_xt, xbox::X_D3DVertexBuffer*, xbox::uint_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetStreamSource_8, (xbox::X_D3DVertexBuffer*, xbox::uint_xt) ); \ - XB_MACRO(xbox::void_xt, __fastcall, D3DDevice_SetStreamSource_8__LTCG_edx_StreamNumber, (void*, xbox::uint_xt, xbox::X_D3DVertexBuffer*, xbox::uint_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTexture, (xbox::dword_xt, xbox::X_D3DBaseTexture*) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTexture_4__LTCG_eax_pTexture, (xbox::dword_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTexture_4, (xbox::X_D3DBaseTexture*) ); \ - /*XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetVertexShader, (xbox::dword_xt) );*/\ - /*XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetVertexShaderInput, (xbox::dword_xt, xbox::uint_xt, xbox::X_STREAMINPUT*) );*/\ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetViewport, (CONST xbox::X_D3DVIEWPORT8*) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTransform, (D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTransform_0, () ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_MultiplyTransform, (D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3D_DestroyResource, (xbox::X_D3DResource*) ); \ - XB_MACRO(xbox::void_xt, WINAPI, D3D_DestroyResource__LTCG, (xbox::void_xt) ); \ - XB_MACRO(xbox::hresult_xt, WINAPI, Direct3D_CreateDevice, (xbox::uint_xt, D3DDEVTYPE, HWND, xbox::dword_xt, xbox::X_D3DPRESENT_PARAMETERS*, IDirect3DDevice**) ); \ - XB_MACRO(xbox::hresult_xt, WINAPI, Direct3D_CreateDevice_16__LTCG_eax_BehaviorFlags_ebx_ppReturnedDeviceInterface, (xbox::uint_xt, D3DDEVTYPE, HWND, xbox::X_D3DPRESENT_PARAMETERS*) ); \ - XB_MACRO(xbox::hresult_xt, WINAPI, Direct3D_CreateDevice_16__LTCG_eax_BehaviorFlags_ecx_ppReturnedDeviceInterface, (xbox::uint_xt, D3DDEVTYPE, HWND, xbox::X_D3DPRESENT_PARAMETERS*) ); \ - XB_MACRO(xbox::hresult_xt, WINAPI, Direct3D_CreateDevice_4, (xbox::X_D3DPRESENT_PARAMETERS*) ); \ - XB_MACRO(xbox::void_xt, WINAPI, Lock2DSurface, (xbox::X_D3DPixelContainer*, D3DCUBEMAP_FACES, xbox::uint_xt, D3DLOCKED_RECT*, RECT*, xbox::dword_xt) ); \ - XB_MACRO(xbox::void_xt, WINAPI, Lock3DSurface, (xbox::X_D3DPixelContainer*, xbox::uint_xt, D3DLOCKED_BOX*, D3DBOX*, xbox::dword_xt) ); \ +#define XB_TRAMPOLINES(XB_MACRO) \ + XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_CreateVertexShader, (CONST xbox::dword_xt*, CONST xbox::dword_xt*, xbox::dword_xt*, xbox::dword_xt) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_DeleteVertexShader, (xbox::dword_xt) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_DeleteVertexShader_0, () ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_GetBackBuffer, (xbox::int_xt, D3DBACKBUFFER_TYPE, xbox::X_D3DSurface**) ); \ + XB_MACRO(xbox::X_D3DSurface*, WINAPI, D3DDevice_GetBackBuffer2, (xbox::int_xt) ); \ + XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_GetDepthStencilSurface, (xbox::X_D3DSurface**) ); \ + XB_MACRO(xbox::X_D3DSurface*, WINAPI, D3DDevice_GetDepthStencilSurface2, (xbox::void_xt) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_GetDisplayMode, (xbox::X_D3DDISPLAYMODE*) ); \ + XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_GetRenderTarget, (xbox::X_D3DSurface**) ); \ + XB_MACRO(xbox::X_D3DSurface*, WINAPI, D3DDevice_GetRenderTarget2, (xbox::void_xt) ); \ + XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_LightEnable, (xbox::dword_xt, xbox::bool_xt) ); \ + /*XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_LoadVertexShader, (xbox::dword_xt, xbox::dword_xt) );*/\ + /*XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_LoadVertexShaderProgram, (CONST xbox::dword_xt*, xbox::dword_xt) );*/\ + /*XB_MACRO(xbox::void_xt, __stdcall, D3DDevice_LoadVertexShader_0, () );*/\ + /*XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_LoadVertexShader_4, (xbox::dword_xt) );*/\ + XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_PersistDisplay, (xbox::void_xt) ); \ + XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_Reset, (xbox::X_D3DPRESENT_PARAMETERS*) ); \ + /*XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SelectVertexShader, (xbox::dword_xt, xbox::dword_xt) );*/\ + /*XB_MACRO(xbox::void_xt, __stdcall, D3DDevice_SelectVertexShader_0, () );*/\ + /*XB_MACRO(xbox::void_xt, __stdcall, D3DDevice_SelectVertexShader_4, (xbox::dword_xt) );*/\ + /*XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetGammaRamp, (xbox::dword_xt, CONST X_D3DGAMMARAMP*) );*/\ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetIndices, (xbox::X_D3DIndexBuffer*, xbox::uint_xt) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetIndices_4, (xbox::uint_xt) ); \ + XB_MACRO(xbox::hresult_xt, WINAPI, D3DDevice_SetLight, (xbox::dword_xt, CONST xbox::X_D3DLIGHT8*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetPixelShader, (xbox::dword_xt) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetPixelShader_0, () ); \ + XB_MACRO(xbox::void_xt, __fastcall, D3DDevice_SetRenderState_Simple, (xbox::dword_xt, xbox::dword_xt) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetRenderTarget, (xbox::X_D3DSurface*, xbox::X_D3DSurface*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetRenderTarget_0, () ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetStreamSource, (xbox::uint_xt, xbox::X_D3DVertexBuffer*, xbox::uint_xt) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetStreamSource_4, (xbox::uint_xt, xbox::X_D3DVertexBuffer*, xbox::uint_xt) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetStreamSource_8, (xbox::X_D3DVertexBuffer*, xbox::uint_xt) ); \ + XB_MACRO(xbox::void_xt, __fastcall, D3DDevice_SetStreamSource_8__LTCG_edx_StreamNumber, (void*, xbox::uint_xt, xbox::X_D3DVertexBuffer*, xbox::uint_xt) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTexture, (xbox::dword_xt, xbox::X_D3DBaseTexture*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTexture_4__LTCG_eax_pTexture, (xbox::dword_xt) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTexture_4, (xbox::X_D3DBaseTexture*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetVertexShader, (xbox::dword_xt) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetVertexShaderInput, (xbox::dword_xt, xbox::uint_xt, xbox::X_STREAMINPUT*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetViewport, (CONST xbox::X_D3DVIEWPORT8*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTransform, (D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_SetTransform_0, () ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3DDevice_MultiplyTransform, (D3DTRANSFORMSTATETYPE, CONST D3DMATRIX*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3D_DestroyResource, (xbox::X_D3DResource*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, D3D_DestroyResource__LTCG, (xbox::void_xt) ); \ + XB_MACRO(xbox::hresult_xt, WINAPI, Direct3D_CreateDevice, (xbox::uint_xt, D3DDEVTYPE, HWND, xbox::dword_xt, xbox::X_D3DPRESENT_PARAMETERS*, IDirect3DDevice**) ); \ + XB_MACRO(xbox::hresult_xt, WINAPI, Direct3D_CreateDevice_16__LTCG_eax_BehaviorFlags_ebx_ppReturnedDeviceInterface, (xbox::uint_xt, D3DDEVTYPE, HWND, xbox::X_D3DPRESENT_PARAMETERS*) ); \ + XB_MACRO(xbox::hresult_xt, WINAPI, Direct3D_CreateDevice_16__LTCG_eax_BehaviorFlags_ecx_ppReturnedDeviceInterface, (xbox::uint_xt, D3DDEVTYPE, HWND, xbox::X_D3DPRESENT_PARAMETERS*) ); \ + XB_MACRO(xbox::hresult_xt, WINAPI, Direct3D_CreateDevice_4, (xbox::X_D3DPRESENT_PARAMETERS*) ); \ + XB_MACRO(xbox::void_xt, WINAPI, Lock2DSurface, (xbox::X_D3DPixelContainer*, D3DCUBEMAP_FACES, xbox::uint_xt, D3DLOCKED_RECT*, RECT*, xbox::dword_xt) ); \ + XB_MACRO(xbox::void_xt, WINAPI, Lock3DSurface, (xbox::X_D3DPixelContainer*, xbox::uint_xt, D3DLOCKED_BOX*, D3DBOX*, xbox::dword_xt) ); \ XB_MACRO(xbox::void_xt, WINAPI, D3D_CommonSetRenderTarget, (xbox::X_D3DSurface*, xbox::X_D3DSurface*, void*) ); \ XB_TRAMPOLINES(XB_trampoline_declare); @@ -1605,9 +1602,13 @@ uint8_t *ConvertD3DTextureToARGB( return pDst; } +extern void HLE_init_pgraph_plugins(); // implemented in XbPushBuffer.cpp + // Direct3D initialization (called before emulation begins) void EmuD3DInit() { + HLE_init_pgraph_plugins(); // TODO : Hook more nv_dma_map() result uses in EmuNV2A_PGRAPH.cpp + // create the create device proxy thread { HANDLE thread = CreateThread(nullptr, 0, EmuCreateDeviceProxy, nullptr, 0, nullptr); @@ -1974,6 +1975,10 @@ static LRESULT WINAPI EmuMsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lPar // sometimes, so detect it and stop emulation from here too : SendMessage(hWnd, WM_CLOSE, 0, 0); // See StopEmulation(); } + else if (wParam == VK_F7) + { + g_bUsePassthroughHLSL = !g_bUsePassthroughHLSL; + } else if(wParam == VK_F8) { g_bPrintfOn = !g_bPrintfOn; @@ -2450,6 +2455,7 @@ static DWORD WINAPI EmuCreateDeviceProxy(LPVOID) CxbxKrnlCleanup("Could not initialize DirectDraw7"); hRet = g_pDD7->GetCaps(&g_DriverCaps, nullptr); + // TODO : Why does this call return DDERR_INVALIDPARAMS, even when passing in a second argument? DEBUG_D3DRESULT(hRet, "g_pDD7->GetCaps"); hRet = g_pDD7->SetCooperativeLevel(0, DDSCL_NORMAL); @@ -2540,9 +2546,9 @@ static DWORD WINAPI EmuCreateDeviceProxy(LPVOID) ); DEBUG_D3DRESULT(hRet, "g_pD3DDevice->CreateVertexBuffer"); - for(int Streams = 0; Streams < 16; Streams++) + for(int HostStreamNumber = 0; HostStreamNumber < X_VSH_MAX_STREAMS; HostStreamNumber++) { - hRet = g_pD3DDevice->SetStreamSource(Streams, g_pDummyBuffer, + hRet = g_pD3DDevice->SetStreamSource(HostStreamNumber, g_pDummyBuffer, 0, // OffsetInBytes 1); DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetStreamSource"); @@ -2988,7 +2994,7 @@ void SetXboxMultiSampleType(xbox::X_D3DMULTISAMPLE_TYPE value) g_Xbox_MultiSampleType = value; } -static inline float GetMultiSampleOffsetDelta() +float GetMultiSampleOffsetDelta() { // TODO : What impact does X_D3DMULTISAMPLE_SAMPLING_SUPER have on offset? return (g_Xbox_MultiSampleType & xbox::X_D3DMULTISAMPLE_SAMPLING_MULTI) ? 0.0f : 0.5f; @@ -3006,7 +3012,7 @@ void GetMultiSampleOffsetAndScale(float& xScale, float& yScale, float& xOffset, GetMultiSampleOffset(xOffset, yOffset); } -static void ApplyXboxMultiSampleOffset(float& x, float& y) +void ApplyXboxMultiSampleOffset(float& x, float& y) { float d = GetMultiSampleOffsetDelta(); x += d; @@ -3117,15 +3123,15 @@ void Direct3D_CreateDevice_End() g_pXbox_BackBufferSurface = XB_TRMP(D3DDevice_GetRenderTarget2)(); } - // At this point, pRenderTarget should now point to a valid render target + // At this point, g_pXbox_BackBufferSurface should now point to a valid render target // if it still doesn't, we cannot continue without crashing at draw time if (g_pXbox_BackBufferSurface == xbox::zeroptr) { CxbxKrnlCleanup("Unable to determine default Xbox backbuffer"); } - // We must also call our SetRenderTarget patch to properly setup the host state + // We must also properly setup the host state // Update only the Back buffer - xbox::EMUPATCH(D3DDevice_SetRenderTarget)(g_pXbox_BackBufferSurface, xbox::zeroptr); + CxbxImpl_SetRenderTarget(g_pXbox_BackBufferSurface, xbox::zeroptr); } // Now do the same, but for the default depth stencil surface @@ -3143,7 +3149,7 @@ void Direct3D_CreateDevice_End() LOG_TEST_CASE("Unable to determine default Xbox depth stencil"); } else { // Update only the depth stencil - xbox::EMUPATCH(D3DDevice_SetRenderTarget)(xbox::zeroptr, g_pXbox_DefaultDepthStencilSurface); + CxbxImpl_SetRenderTarget(xbox::zeroptr, g_pXbox_DefaultDepthStencilSurface); } } } @@ -3734,16 +3740,30 @@ xbox::void_xt __stdcall xbox::EMUPATCH(D3DDevice_LoadVertexShader_0) // This uses a custom calling convention where parameter is passed in EAX // Test-case: Ninja Gaiden -xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_LoadVertexShader_4) +__declspec(naked) VOID xbox::EMUPATCH(D3DDevice_LoadVertexShader_4) ( dword_xt Address ) { - dword_xt Handle; - __asm mov Handle, eax; + dword_xt Handle; - LOG_FORWARD("D3DDevice_LoadVertexShader"); - return EMUPATCH(D3DDevice_LoadVertexShader)(Handle, Address); + // prologue + __asm + { + push ebp + mov ebp, esp + sub esp, __LOCAL_SIZE + mov Handle, eax // get parameter from eax + } + + CxbxImpl_LoadVertexShader(Handle, Address); + + // epilogue + __asm { + mov esp, ebp + pop ebp + ret 4 + } } // ****************************************************************** @@ -4083,7 +4103,7 @@ float GetZScaleForSurface(xbox::X_D3DSurface* pSurface) { // If no surface was present, fallback to 1 if (pSurface == xbox::zeroptr) { - return 1; + return 1.0f; } auto format = GetXboxPixelContainerFormat(pSurface); @@ -4109,15 +4129,18 @@ float GetZScaleForSurface(xbox::X_D3DSurface* pSurface) // Default to 1 if unknown depth format LOG_TEST_CASE("GetZScaleForSurface: Unknown Xbox Depth Format"); - return 1; + return 1.0f; } void GetViewPortOffsetAndScale(float (&vOffset)[4], float(&vScale)[4]) { + // We need to transform back from screen space (Xbox Shader Output) to Clip space (Host Shader Output) + // If we know the scale and offset term used to get to Xbox screenspace, will allow us to reverse it + // Store viewport offset and scale in constant registers // used in shaders to transform back from screen space (Xbox Shader Output) to Clip space (Host Shader Output) - D3DVIEWPORT ViewPort; - g_pD3DDevice->GetViewport(&ViewPort); + D3DVIEWPORT HostViewPort; + g_pD3DDevice->GetViewport(&HostViewPort); // NOTE: Due to how our GPU emulation works, we need to account for MSAA here, by adjusting the ViewPort dimensions // This fixes the 'offset' models in GTA3 @@ -4125,68 +4148,80 @@ void GetViewPortOffsetAndScale(float (&vOffset)[4], float(&vScale)[4]) float xOffset, yOffset; GetMultiSampleOffsetAndScale(xScale, yScale, xOffset, yOffset); // Since Width and Height are DWORD, subtracting MultiSampleOffset 0.0f or 0.5f makes no sense - //ViewPort.Width -= xOffset; - //ViewPort.Height -= yOffset; - ViewPort.Width = static_cast(ViewPort.Width / xScale); - ViewPort.Height = static_cast(ViewPort.Height / yScale); + //HostViewPort.Width -= xOffset; + //HostViewPort.Height -= yOffset; + HostViewPort.Width = static_cast(HostViewPort.Width / xScale); + HostViewPort.Height = static_cast(HostViewPort.Height / yScale); // Calculate Width/Height scale & offset - float scaleWidth = (2.0f / ViewPort.Width) * g_RenderScaleFactor; - float scaleHeight = (2.0f / ViewPort.Height) * g_RenderScaleFactor; + float scaleWidth = (2.0f / HostViewPort.Width) * g_RenderScaleFactor; + float scaleHeight = (2.0f / HostViewPort.Height) * g_RenderScaleFactor; float offsetWidth = scaleWidth; float offsetHeight = scaleHeight; // Calculate Z scale & offset - float zScale = GetZScaleForSurface(g_pXbox_DepthStencil); - float scaleZ = zScale * (ViewPort.MaxZ - ViewPort.MinZ); - float offsetZ = zScale * ViewPort.MinZ; + float scaleZ = g_ZScale * (HostViewPort.MaxZ - HostViewPort.MinZ); +#if 0 // unused? + float offsetZ = g_ZScale * HostViewPort.MinZ; +#endif - // TODO will we need to do something here to support upscaling? - // TODO remove the code above as required + // NOTE: Due to how our GPU emulation works, we need to account for MSAA + float multisampleScaleX, multisampleScaleY; + float multisampleOffsetX, multisampleOffsetY; + GetMultiSampleOffsetAndScale(multisampleScaleX, multisampleScaleY, multisampleOffsetX, multisampleOffsetY); - // Reset to default scale (as we accounted for MSAA scale above) - // But don't reset the offset - xScale = 1.0f; - yScale = 1.0f; + // Scale by frontbuffer width and height + // The backbuffer will have been multipled by the multisample scale + // This fixes the 'offset' models in GTA3 + float xboxScreenWidth = GetPixelContainerWidth(g_pXbox_RenderTarget) / multisampleScaleX; + float xboxScreenHeight = GetPixelContainerHeight(g_pXbox_RenderTarget) / multisampleScaleY; - // Xbox correct values? - xOffset = xOffset + (1.0f / 32.0f); - yOffset = yOffset + (1.0f / 32.0f); - xScale = xScale * ViewPort.Width; - yScale = yScale * ViewPort.Height; + // Xbox correct values + multisampleOffsetX += (1.0f / 32.0f); + multisampleOffsetY += (1.0f / 32.0f); - // HACK: Add a host correction factor to these values - // So that after we reverse the screenspace transformation - // Pre-transformed 2d geometry is in the same space as the 3d geometry...? - - // Offset with a host correction - vOffset[0] = xOffset + (0.5f * (float)ViewPort.Width / (float)g_RenderScaleFactor); - vOffset[1] = yOffset + (0.5f * (float)ViewPort.Height / (float)g_RenderScaleFactor); + // Offset + vOffset[0] = multisampleOffsetX + xboxScreenWidth * 0.5f; + vOffset[1] = multisampleOffsetY + xboxScreenHeight * 0.5f; vOffset[2] = 0.0f; //offsetZ; vOffset[3] = 0.0f; - // Scale with a host correction - vScale[0] = xScale * (1.0f / ( 2.0f * (float)g_RenderScaleFactor)); - vScale[1] = yScale * (1.0f / (-2.0f * (float)g_RenderScaleFactor)); + // Scale + vScale[0] = xboxScreenWidth * 0.5f; + vScale[1] = xboxScreenHeight * -0.5f; vScale[2] = scaleZ; // ? vScale[3] = 1.0f; // ? } -void UpdateViewPortOffsetAndScaleConstants() +void CxbxUpdateHostViewPortOffsetAndScaleConstants() { + extern bool g_Xbox_VertexShader_IsPassthrough; + float vScaleOffset[2][4]; // 0 - scale 1 - offset GetViewPortOffsetAndScale(vScaleOffset[1], vScaleOffset[0]); - g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_VIEWPORT_SCALE_MIRROR, reinterpret_cast(vScaleOffset), 2); - // Store viewport offset and scale in constant registers 58 (c-38) and // 59 (c-37) used for screen space transformation. - // We only do this if X_D3DSCM_NORESERVEDCONSTANTS is not set, since enabling this flag frees up these registers for shader used + // We only do this if X_D3DSCM_NORESERVEDCONSTANTS is not set, + // since enabling this flag frees up these registers for shader use // Treat this as a flag // Test Case: GTA III, Soldier of Fortune II if (!(g_Xbox_VertexShaderConstantMode & X_D3DSCM_NORESERVEDCONSTANTS)) { - g_pD3DDevice->SetVertexShaderConstantF(X_D3DSCM_RESERVED_CONSTANT_SCALE + X_D3DSCM_CORRECTION, reinterpret_cast(vScaleOffset), 2); + g_pD3DDevice->SetVertexShaderConstantF(X_D3DSCM_RESERVED_CONSTANT_SCALE_CORRECTED, reinterpret_cast(vScaleOffset), 2); } + + // Get the inverse of the scale, to allow multiply instead of divide on GPU + float vScaleInverse[4] = { 1 / vScaleOffset[0][0], 1 / vScaleOffset[0][1], 1 / vScaleOffset[0][2], 1 / vScaleOffset[0][3] }; + float isRHWTransformedPosition[4] = { 0 }; + + if (g_Xbox_VertexShader_IsPassthrough) { + isRHWTransformedPosition[0] = 1.0f; + vScaleInverse[2] = 1.0f; // Passthrough should not scale Z + } + + g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_VIEWPORT_SCALE_INVERSE_BASE, vScaleInverse, CXBX_D3DVS_VIEWPORT_SCALE_INVERSE_SIZE); + g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_VIEWPORT_OFFSET_MIRROR_BASE, vScaleOffset[1], CXBX_D3DVS_VIEWPORT_OFFSET_MIRROR_SIZE); + g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_IS_RHW_TRANSFORMED_POSITION_BASE, isRHWTransformedPosition, CXBX_D3DVS_IS_RHW_TRANSFORMED_POSITION_SIZE); } // ****************************************************************** @@ -4194,7 +4229,7 @@ void UpdateViewPortOffsetAndScaleConstants() // ****************************************************************** xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetViewport) ( - CONST X_D3DVIEWPORT8 *pViewport + X_D3DVIEWPORT8 *pViewport ) { LOG_FUNC_ONE_ARG(pViewport); @@ -4202,13 +4237,19 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetViewport) // Always call the Xbox SetViewPort to update D3D Internal State XB_TRMP(D3DDevice_SetViewport)(pViewport); + CxbxImpl_SetViewPort(pViewport); +} + +void CxbxImpl_SetViewPort(xbox::X_D3DVIEWPORT8* pViewport) +{ + LOG_INIT; + // Host does not support pViewPort = nullptr if (pViewport == nullptr) { LOG_TEST_CASE("pViewport = null"); return; } - D3DVIEWPORT XboxViewPort = *pViewport; D3DVIEWPORT HostViewPort = *pViewport; if (g_pXbox_RenderTarget) { @@ -4224,6 +4265,7 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetViewport) DWORD width = right - left; DWORD height = bottom - top; + D3DVIEWPORT XboxViewPort; XboxViewPort.X = left; XboxViewPort.Y = top; XboxViewPort.Width = width; @@ -4255,6 +4297,8 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetViewport) } // Apply MSAA scale and offset + HostViewPort.X = static_cast(HostViewPort.X * g_Xbox_MultiSampleXScale); + HostViewPort.Y = static_cast(HostViewPort.Y * g_Xbox_MultiSampleYScale); HostViewPort.Width = static_cast(HostViewPort.Width * g_Xbox_MultiSampleXScale); HostViewPort.Height = static_cast(HostViewPort.Height * g_Xbox_MultiSampleYScale); HostViewPort.X = static_cast(HostViewPort.X * g_Xbox_MultiSampleXScale); @@ -4263,8 +4307,6 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetViewport) HRESULT hRet = g_pD3DDevice->SetViewport(&HostViewPort); DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetViewport"); - - UpdateViewPortOffsetAndScaleConstants(); } // LTCG specific D3DDevice_SetShaderConstantMode function... @@ -4294,55 +4336,6 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetShaderConstantMode) g_Xbox_VertexShaderConstantMode = Mode; } -// ****************************************************************** -// * patch: D3DDevice_CreateVertexShader -// ****************************************************************** -xbox::hresult_xt WINAPI xbox::EMUPATCH(D3DDevice_CreateVertexShader) -( - CONST dword_xt *pDeclaration, - CONST dword_xt *pFunction, - dword_xt *pHandle, - dword_xt Usage -) -{ - LOG_FUNC_BEGIN - LOG_FUNC_ARG(pDeclaration) - LOG_FUNC_ARG(pFunction) - LOG_FUNC_ARG(pHandle) - LOG_FUNC_ARG_TYPE(X_D3DUSAGE, Usage) - LOG_FUNC_END; - - // First, we must call the Xbox CreateVertexShader function and check for success - // This does the following: - // Allocates an Xbox VertexShader struct - // Sets reference count to 1 - // Puts Usage in VertexShader->Flags - // If pFunction is not null, it points to DWORDS shader type, length and a binary compiled xbox vertex shader - // If pDeclaration is not null, it's parsed, resulting in a number of constants - // Parse results are pushed to the push buffer - // Sets other fields - // pHandle recieves the addres of the new shader, or-ed with 1 (D3DFVF_RESERVED0) - - HRESULT hRet = D3D_OK; - - if (XB_TRMP(D3DDevice_CreateVertexShader)) { - HRESULT hRet = XB_TRMP(D3DDevice_CreateVertexShader)(pDeclaration, pFunction, pHandle, Usage); - if (FAILED(hRet)) { - LOG_TEST_CASE("D3DDevice_CreateVertexShader trampoline call returned failure"); - RETURN(hRet); - } - } else { - // Due to how our LoadVertexShader patch is implemented, it may call this function without the Xbox version existing - // As a result, we have to build our own vertex shader handle if the trampoline was not found - // We don't do the full steps listed above intentionally so: If this situation is reached, the game - // does not have a CreateVertexShader function, so those actions should not happen anyway! - LOG_TEST_CASE("CreateVertexShader with no trampoline"); - *pHandle = ((DWORD)malloc(sizeof(X_D3DVertexShader)) & D3DFVF_RESERVED0); - } - - return CxbxImpl_CreateVertexShader((DWORD *)pDeclaration, (DWORD *)pFunction, (DWORD *)pHandle, Usage); -} - // LTCG specific D3DDevice_SetVertexShaderConstant function... // This uses a custom calling convention where ConstantCount parameter is passed in EDX // Test-case: Murakumo @@ -4379,6 +4372,12 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetVertexShaderConstant) LOG_FUNC_ARG(ConstantCount) LOG_FUNC_END; + // TODO : Should we trampoline into Xbox code as well here, + // so that besides pushing NV2A commands, Xbox internal D3D + // state gets updated? + // Or better yet, remove all D3DDevice_SetVertexShaderConstant patches + // once CxbxUpdateHostVertexShaderConstants is reliable (ie. : when we're + // able to flush the NV2A push buffer) CxbxImpl_SetVertexShaderConstant(Register, pConstantData, ConstantCount); } @@ -4559,22 +4558,20 @@ xbox::void_xt __fastcall xbox::EMUPATCH(D3DDevice_SwitchTexture) LOG_FUNC_ARG(Format) LOG_FUNC_END; - DWORD StageLookup[xbox::X_D3DTS_STAGECOUNT] = { 0x00081b00, 0x00081b40, 0x00081b80, 0x00081bc0 }; - // This array contains D3DPUSH_ENCODE(NV2A_TX_OFFSET(v), 2) = 2 DWORD's, shifted left PUSH_COUNT_SHIFT (18) left DWORD Stage = -1; - for (int v = 0; v < xbox::X_D3DTS_STAGECOUNT; v++) { - if (StageLookup[v] == Method) { - Stage = v; - break; - } - } - - if (Stage == -1) { + switch (Method) { // Detect which of the 4 (X_D3DTS_STAGECOUNT) texture stages is given by the (NV2A) Method argument + // This code contains D3DPUSH_ENCODE(NV2A_TX_OFFSET(v), 2) = 2 DWORD's, shifted left PUSH_COUNT_SHIFT (18) left + case 0x00081b00: Stage = 0; break; + case 0x00081b40: Stage = 1; break; + case 0x00081b80: Stage = 2; break; + case 0x00081bc0: Stage = 3; break; + default: LOG_TEST_CASE("D3DDevice_SwitchTexture Unknown Method"); EmuLog(LOG_LEVEL::WARNING, "Unknown Method (0x%.08X)", Method); - } - else { + } + + if (Stage >= 0) { // Switch Texture updates the data pointer of an active texture using pushbuffer commands if (g_pXbox_SetTexture[Stage] == xbox::zeroptr) { LOG_TEST_CASE("D3DDevice_SwitchTexture without an active texture"); @@ -4628,9 +4625,7 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_Begin) { LOG_FUNC_ONE_ARG(PrimitiveType); - g_InlineVertexBuffer_PrimitiveType = PrimitiveType; - g_InlineVertexBuffer_TableOffset = 0; - g_InlineVertexBuffer_FVF = 0; + CxbxImpl_Begin(PrimitiveType); } // ****************************************************************** @@ -4676,8 +4671,6 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetVertexData2s) } extern uint32_t HLE_read_NV2A_pgraph_register(const int reg); // Declared in PushBuffer.cpp -extern void HLE_write_NV2A_vertex_attribute_slot(unsigned slot, uint32_t parameter); // Declared in PushBuffer.cpp -extern uint32_t HLE_read_NV2A_vertex_attribute_slot(unsigned VertexSlot); // Declared in PushBuffer.cpp extern NV2ADevice* g_NV2A; @@ -4723,271 +4716,7 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetVertexData4f) LOG_FUNC_ARG(d) LOG_FUNC_END; - HRESULT hRet = D3D_OK; - - // Get the vertex shader flags (if any is active) : - uint32_t ActiveVertexAttributeFlags = 0; - if (VshHandleIsVertexShader(g_Xbox_VertexShader_Handle)) { - LOG_TEST_CASE("D3DDevice_SetVertexData4f with active VertexShader"); - X_D3DVertexShader *pXboxVertexShader = VshHandleToXboxVertexShader(g_Xbox_VertexShader_Handle); - if (!(pXboxVertexShader->Flags & 0x10/*=X_VERTEXSHADER_PROGRAM*/)) { - ActiveVertexAttributeFlags = pXboxVertexShader->Flags; - } - - // If we have an active vertex shader, we also write the input to a vertex shader constant - // This allows us to implement Xbox functionality where SetVertexData4f can be used to specify attributes - // not present in the vertex declaration. - // We use range 193 and up to store these values, as Xbox shaders stop at c192! - FLOAT values[] = {a,b,c,d}; - if (Register < 0) LOG_TEST_CASE("Register < 0"); - if (Register >= 16) LOG_TEST_CASE("Register >= 16"); - g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VREGDEFAULTS_BASE + Register, values, 1); - } - - // Grow g_InlineVertexBuffer_Table to contain at least current, and a potentially next vertex - if (g_InlineVertexBuffer_TableLength <= g_InlineVertexBuffer_TableOffset + 1) { - UINT InlineVertexBuffer_TableLength_Original = g_InlineVertexBuffer_TableLength; - if (g_InlineVertexBuffer_TableLength == 0) { - g_InlineVertexBuffer_TableLength = PAGE_SIZE / sizeof(struct _D3DIVB); - } else { - g_InlineVertexBuffer_TableLength *= 2; - } - - for (unsigned i = 0; i < (g_InlineVertexBuffer_TableLength - InlineVertexBuffer_TableLength_Original); ++i) { - g_InlineVertexBuffer_Table.emplace_back(); - } - - EmuLog(LOG_LEVEL::DEBUG, "Expanded g_InlineVertexBuffer_Table to %u entries", g_InlineVertexBuffer_TableLength); - - // Sanity check: ensure that g_InlineVertexBuffer_Table is not growing indefinetly. This can happen if D3DDevice_Begin and D3DDevice_End - // are not patched, since they both reset g_InlineVertexBuffer_TableOffset back to zero, thus preventing further growth - if (g_InlineVertexBuffer_TableLength > 50000) { - LOG_TEST_CASE("g_InlineVertexBuffer_TableLength > 50000! This probably means that g_InlineVertexBuffer_Table is growing indefinitely."); - } - } - - // Is this the initial call after D3DDevice_Begin() ? - if (g_InlineVertexBuffer_FVF == 0) { - // Set first vertex to zero (preventing leaks from prior Begin/End calls) - g_InlineVertexBuffer_Table[0] = {}; - - // Handle persistent vertex attribute flags, by resetting non-persistent colors - // to their default value (and leaving the persistent colors alone - see the - // "Copy all attributes of the previous vertex" comment below) : - static const uint32_t ColorBlack = D3DCOLOR_ARGB(0, 0, 0, 0); - static const uint32_t ColorWhite = D3DCOLOR_ARGB(255, 255, 255, 255); - - // If needed, write default vertex colors to HLE NV2A pgraph : - if (!(ActiveVertexAttributeFlags & X_D3DUSAGE_PERSISTENTDIFFUSE)) { - HLE_write_NV2A_vertex_attribute_slot(X_D3DVSDE_DIFFUSE, ColorWhite); - } - - if (!(ActiveVertexAttributeFlags & X_D3DUSAGE_PERSISTENTSPECULAR)) { - HLE_write_NV2A_vertex_attribute_slot(X_D3DVSDE_SPECULAR, ColorBlack); - } - - if (!(ActiveVertexAttributeFlags & X_D3DUSAGE_PERSISTENTBACKDIFFUSE)) { - HLE_write_NV2A_vertex_attribute_slot(X_D3DVSDE_BACKDIFFUSE, ColorWhite); - } - - if (!(ActiveVertexAttributeFlags & X_D3DUSAGE_PERSISTENTBACKSPECULAR)) { - HLE_write_NV2A_vertex_attribute_slot(X_D3DVSDE_BACKSPECULAR, ColorBlack); - } - - // Read starting vertex colors from HLE NV2A pgraph : - g_InlineVertexBuffer_Table[0].Diffuse = HLE_read_NV2A_vertex_attribute_slot(X_D3DVSDE_DIFFUSE); - g_InlineVertexBuffer_Table[0].Specular = HLE_read_NV2A_vertex_attribute_slot(X_D3DVSDE_SPECULAR); - g_InlineVertexBuffer_Table[0].BackDiffuse = HLE_read_NV2A_vertex_attribute_slot(X_D3DVSDE_BACKDIFFUSE); - g_InlineVertexBuffer_Table[0].BackSpecular = HLE_read_NV2A_vertex_attribute_slot(X_D3DVSDE_BACKSPECULAR); - } - - int o = g_InlineVertexBuffer_TableOffset; - unsigned int FVFPosType = g_InlineVertexBuffer_FVF & D3DFVF_POSITION_MASK; - - switch(Register) - { - case X_D3DVSDE_VERTEX: - case X_D3DVSDE_POSITION: - { - // Note : Setting position signals completion of a vertex - g_InlineVertexBuffer_Table[o].Position.x = a; - g_InlineVertexBuffer_Table[o].Position.y = b; - g_InlineVertexBuffer_Table[o].Position.z = c; - g_InlineVertexBuffer_Table[o].Rhw = d; // Was : 1.0f; // Dxbx note : Why set Rhw to 1.0? And why ignore d? - - switch (g_InlineVertexBuffer_FVF & D3DFVF_POSITION_MASK) { - case 0: - // No position mask given yet, set it now : - if (g_InlineVertexBuffer_FVF & D3DFVF_NORMAL) { - // See https://msdn.microsoft.com/ru-ru/library/windows/desktop/bb172559(v=vs.85).aspx and DxbxFVFToVertexSizeInBytes - // D3DFVF_NORMAL cannot be combined with D3DFVF_XYZRHW : - g_InlineVertexBuffer_FVF |= D3DFVF_XYZ; - g_InlineVertexBuffer_Table[o].Rhw = 1.0f; // This, just to stay close to prior behaviour - } - else { - // Without D3DFVF_NORMAL, assume D3DFVF_XYZRHW - g_InlineVertexBuffer_FVF |= D3DFVF_XYZRHW; - } - break; - case D3DFVF_XYZ: - case D3DFVF_XYZRHW: - case D3DFVF_XYZB1: - // These are alright - break; - default: - EmuLog(LOG_LEVEL::WARNING, "D3DDevice_SetVertexData4f unexpected FVF when selecting D3DFVF_XYZ(RHW) : %x", g_InlineVertexBuffer_FVF); - // TODO : How to resolve this? - } - - // Start a new vertex - g_InlineVertexBuffer_TableOffset++; - // Copy all attributes of the previous vertex (if any) to the new vertex - g_InlineVertexBuffer_Table[g_InlineVertexBuffer_TableOffset] = g_InlineVertexBuffer_Table[o]; - - break; - } - - case X_D3DVSDE_BLENDWEIGHT: - { - g_InlineVertexBuffer_Table[o].Blend[0] = a; - g_InlineVertexBuffer_Table[o].Blend[1] = b; - g_InlineVertexBuffer_Table[o].Blend[2] = c; - g_InlineVertexBuffer_Table[o].Blend[3] = d; - // TODO: Test the above. - // Xbox supports up to 4 blendweights - - switch (g_InlineVertexBuffer_FVF & D3DFVF_POSITION_MASK) { - case 0: - // No position mask given yet, set it now : - g_InlineVertexBuffer_FVF |= D3DFVF_XYZB1; - // TODO: How to select blendweight D3DFVF_XYZB2 or up? - break; - case D3DFVF_XYZB1: - // These are alright - break; - default: - EmuLog(LOG_LEVEL::WARNING, "D3DDevice_SetVertexData4f unexpected FVF when processing X_D3DVSDE_BLENDWEIGHT : %x", g_InlineVertexBuffer_FVF); - g_InlineVertexBuffer_FVF &= ~D3DFVF_POSITION_MASK; // for now, remove prior position mask, leading to blending below - g_InlineVertexBuffer_FVF |= D3DFVF_XYZB1; - // TODO: How to select blendweight D3DFVF_XYZB2 or up? - // TODO : How to resolve this? - } - - break; - } - - case X_D3DVSDE_NORMAL: - { - g_InlineVertexBuffer_Table[o].Normal.x = a; - g_InlineVertexBuffer_Table[o].Normal.y = b; - g_InlineVertexBuffer_Table[o].Normal.z = c; - g_InlineVertexBuffer_FVF |= D3DFVF_NORMAL; - break; - } - - case X_D3DVSDE_DIFFUSE: - { - g_InlineVertexBuffer_Table[o].Diffuse = D3DCOLOR_COLORVALUE(a, b, c, d); - g_InlineVertexBuffer_FVF |= D3DFVF_DIFFUSE; - HLE_write_NV2A_vertex_attribute_slot(X_D3DVSDE_DIFFUSE, g_InlineVertexBuffer_Table[o].Diffuse); - break; - } - - case X_D3DVSDE_SPECULAR: - { - g_InlineVertexBuffer_Table[o].Specular = D3DCOLOR_COLORVALUE(a, b, c, d); - g_InlineVertexBuffer_FVF |= D3DFVF_SPECULAR; - HLE_write_NV2A_vertex_attribute_slot(X_D3DVSDE_SPECULAR, g_InlineVertexBuffer_Table[o].Specular); - break; - } - - case X_D3DVSDE_FOG: // Xbox extension - { - g_InlineVertexBuffer_Table[o].Fog = a; // TODO : What about the other (b, c and d) arguments? - //EmuLog(LOG_LEVEL::WARNING, "Host Direct3D8 doesn''t support FVF FOG"); - break; - } - - // Note : X_D3DVSDE_POINTSIZE: Maps to D3DFVF_PSIZE, which is not available on Xbox FVF's - - case X_D3DVSDE_BACKDIFFUSE: // Xbox extension - { - g_InlineVertexBuffer_Table[o].BackDiffuse = D3DCOLOR_COLORVALUE(a, b, c, d); - //EmuLog(LOG_LEVEL::WARNING, "Host Direct3D8 doesn''t support FVF BACKDIFFUSE"); - HLE_write_NV2A_vertex_attribute_slot(X_D3DVSDE_BACKDIFFUSE, g_InlineVertexBuffer_Table[o].BackDiffuse); - break; - } - - case X_D3DVSDE_BACKSPECULAR: // Xbox extension - { - g_InlineVertexBuffer_Table[o].BackSpecular = D3DCOLOR_COLORVALUE(a, b, c, d); - //EmuLog(LOG_LEVEL::WARNING, "Host Direct3D8 doesn''t support FVF BACKSPECULAR"); - HLE_write_NV2A_vertex_attribute_slot(X_D3DVSDE_BACKSPECULAR, g_InlineVertexBuffer_Table[o].BackSpecular); - break; - } - - case X_D3DVSDE_TEXCOORD0: - { - g_InlineVertexBuffer_Table[o].TexCoord[0].x = a; - g_InlineVertexBuffer_Table[o].TexCoord[0].y = b; - g_InlineVertexBuffer_Table[o].TexCoord[0].z = c; - g_InlineVertexBuffer_Table[o].TexCoord[0].w = d; - if ((g_InlineVertexBuffer_FVF & D3DFVF_TEXCOUNT_MASK) < D3DFVF_TEX1) { - // Dxbx fix : Use mask, else the format might get expanded incorrectly : - g_InlineVertexBuffer_FVF &= ~D3DFVF_TEXCOUNT_MASK; - g_InlineVertexBuffer_FVF |= D3DFVF_TEX1; - // Dxbx note : Correct usage of D3DFVF_TEX1 (and the other cases below) - // can be tested with "Daphne Xbox" (the Laserdisc Arcade Game Emulator). - } - - break; - } - - case X_D3DVSDE_TEXCOORD1: - { - g_InlineVertexBuffer_Table[o].TexCoord[1].x = a; - g_InlineVertexBuffer_Table[o].TexCoord[1].y = b; - g_InlineVertexBuffer_Table[o].TexCoord[1].z = c; - g_InlineVertexBuffer_Table[o].TexCoord[1].w = d; - if ((g_InlineVertexBuffer_FVF & D3DFVF_TEXCOUNT_MASK) < D3DFVF_TEX2) { - g_InlineVertexBuffer_FVF &= ~D3DFVF_TEXCOUNT_MASK; - g_InlineVertexBuffer_FVF |= D3DFVF_TEX2; - } - - break; - } - - case X_D3DVSDE_TEXCOORD2: - { - g_InlineVertexBuffer_Table[o].TexCoord[2].x = a; - g_InlineVertexBuffer_Table[o].TexCoord[2].y = b; - g_InlineVertexBuffer_Table[o].TexCoord[2].z = c; - g_InlineVertexBuffer_Table[o].TexCoord[2].w = d; - if ((g_InlineVertexBuffer_FVF & D3DFVF_TEXCOUNT_MASK) < D3DFVF_TEX3) { - g_InlineVertexBuffer_FVF &= ~D3DFVF_TEXCOUNT_MASK; - g_InlineVertexBuffer_FVF |= D3DFVF_TEX3; - } - - break; - } - - case X_D3DVSDE_TEXCOORD3: - { - g_InlineVertexBuffer_Table[o].TexCoord[3].x = a; - g_InlineVertexBuffer_Table[o].TexCoord[3].y = b; - g_InlineVertexBuffer_Table[o].TexCoord[3].z = c; - g_InlineVertexBuffer_Table[o].TexCoord[3].w = d; - if ((g_InlineVertexBuffer_FVF & D3DFVF_TEXCOUNT_MASK) < D3DFVF_TEX4) { - g_InlineVertexBuffer_FVF &= ~D3DFVF_TEXCOUNT_MASK; - g_InlineVertexBuffer_FVF |= D3DFVF_TEX4; - } - - break; - } - - default: - EmuLog(LOG_LEVEL::WARNING, "Unknown IVB Register : %d", Register); - } + CxbxImpl_SetVertexData4f(Register, a, b, c, d); } // ****************************************************************** @@ -5063,12 +4792,7 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_End)() { LOG_FUNC(); - if(g_InlineVertexBuffer_TableOffset > 0) - EmuFlushIVB(); - - // TODO: Should technically clean this up at some point..but on XP doesnt matter much -// g_VMManager.Deallocate((VAddr)g_InlineVertexBuffer_pData); -// g_VMManager.Deallocate((VAddr)g_InlineVertexBuffer_Table); + CxbxImpl_End(); } // ****************************************************************** @@ -6880,9 +6604,10 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetVertexShader) { LOG_FUNC_ONE_ARG(Handle); - CxbxImpl_SetVertexShader(Handle); + if (XB_TRMP(D3DDevice_SetVertexShader)) + XB_TRMP(D3DDevice_SetVertexShader)(Handle); - UpdateViewPortOffsetAndScaleConstants(); + CxbxImpl_SetVertexShader(Handle); } // This uses a custom calling convention where Handle is passed in EBX @@ -6895,8 +6620,6 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetVertexShader_0)() LOG_FUNC_ONE_ARG(Handle); CxbxImpl_SetVertexShader(Handle); - - UpdateViewPortOffsetAndScaleConstants(); } // TODO : Move to own file @@ -6905,7 +6628,7 @@ constexpr unsigned int InputQuadsPerPage = ((IndicesPerPage * VERTICES_PER_QUAD) // TODO : Move to own file // Called by CxbxDrawPrimitiveUP (indirectly by D3DDevice_DrawVerticesUP, -// EmuExecutePushBufferRaw and EmuFlushIVB) when PrimitiveType == X_D3DPT_QUADLIST. +// EmuExecutePushBufferRaw and CxbxImpl_End) when PrimitiveType == X_D3DPT_QUADLIST. // Emulated by calling g_pD3DDevice->DrawIndexedPrimitiveUP with index data that maps // quads to triangles. This function creates the index buffer that is needed for this; // For every quad that must be drawn, we generate indices for two triangles. @@ -7071,7 +6794,6 @@ void CxbxDrawIndexed(CxbxDrawContext &DrawContext) assert(DrawContext.dwStartVertex == 0); assert(DrawContext.pXboxIndexData != nullptr); assert(DrawContext.dwVertexCount > 0); // TODO : If this fails, make responsible callers do an early-exit - assert(IsValidCurrentShader()); bool bConvertQuadListToTriangleList = (DrawContext.XboxPrimitiveType == xbox::X_D3DPT_QUADLIST); ConvertedIndexBuffer& CacheEntry = CxbxUpdateActiveIndexBuffer(DrawContext.pXboxIndexData, DrawContext.dwVertexCount, bConvertQuadListToTriangleList); @@ -7135,7 +6857,7 @@ void CxbxDrawIndexed(CxbxDrawContext &DrawContext) // TODO : Move to own file // Drawing function specifically for rendering Xbox draw calls supplying a 'User Pointer'. -// Called by D3DDevice_DrawVerticesUP, EmuExecutePushBufferRaw and EmuFlushIVB +// Called by D3DDevice_DrawVerticesUP, EmuExecutePushBufferRaw and CxbxImpl_End void CxbxDrawPrimitiveUP(CxbxDrawContext &DrawContext) { LOG_INIT // Allows use of DEBUG_D3DRESULT @@ -7199,7 +6921,7 @@ void CxbxDrawPrimitiveUP(CxbxDrawContext &DrawContext) IDirect3DBaseTexture* CxbxConvertXboxSurfaceToHostTexture(xbox::X_D3DBaseTexture* pBaseTexture) { - LOG_INIT; + LOG_INIT; // Allows use of DEBUG_D3DRESULT IDirect3DTexture* pNewHostTexture = nullptr; #if 0 // TODO : Complete, debug and activate (and then cleanup GetHostBaseTexture) @@ -7232,27 +6954,27 @@ IDirect3DBaseTexture* CxbxConvertXboxSurfaceToHostTexture(xbox::X_D3DBaseTexture return (IDirect3DBaseTexture*)pNewHostTexture; // return it as a base texture } -void EmuUpdateActiveTextureStages() +void CxbxUpdateHostTextures() { - LOG_INIT; + LOG_INIT; // Allows use of DEBUG_D3DRESULT - for (int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) - { - xbox::X_D3DBaseTexture *pBaseTexture = g_pXbox_SetTexture[i]; - IDirect3DBaseTexture *pHostBaseTexture = nullptr; + // Set the host texture for each stage + for (int stage = 0; stage < xbox::X_D3DTS_STAGECOUNT; stage++) { + auto pXboxBaseTexture = g_pXbox_SetTexture[stage]; + IDirect3DBaseTexture* pHostBaseTexture = nullptr; bool bNeedRelease = false; - if (pBaseTexture != xbox::zeroptr) { - DWORD Type = GetXboxCommonResourceType(pBaseTexture); - switch (Type) { + if (pXboxBaseTexture != xbox::zeroptr) { + DWORD XboxResourceType = GetXboxCommonResourceType(pXboxBaseTexture); + switch (XboxResourceType) { case X_D3DCOMMON_TYPE_TEXTURE: - pHostBaseTexture = GetHostBaseTexture(pBaseTexture, /*D3DUsage=*/0, i); + pHostBaseTexture = GetHostBaseTexture(pXboxBaseTexture, /*D3DUsage=*/0, stage); break; case X_D3DCOMMON_TYPE_SURFACE: // Surfaces can be set in the texture stages, instead of textures LOG_TEST_CASE("ActiveTexture set to a surface (non-texture) resource"); // Test cases : Burnout, Outrun 2006 // We must wrap the surface before using it as a texture - pHostBaseTexture = CxbxConvertXboxSurfaceToHostTexture(pBaseTexture); + pHostBaseTexture = CxbxConvertXboxSurfaceToHostTexture(pXboxBaseTexture); // Release this texture (after SetTexture) when we succeeded in creating it : bNeedRelease = pHostBaseTexture != nullptr; break; @@ -7262,7 +6984,7 @@ void EmuUpdateActiveTextureStages() } } - HRESULT hRet = g_pD3DDevice->SetTexture(i, pHostBaseTexture); + HRESULT hRet = g_pD3DDevice->SetTexture(stage, pHostBaseTexture); DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetTexture"); if (bNeedRelease) { pHostBaseTexture->Release(); @@ -7270,31 +6992,124 @@ void EmuUpdateActiveTextureStages() } } +void CxbxUpdateHostTextureScaling() +{ + extern xbox::X_VERTEXATTRIBUTEFORMAT* GetXboxVertexAttributeFormat(); // TMP glue + + // Xbox works with "Linear" and "Swizzled" texture formats + // Linear formats are not addressed with normalized coordinates (similar to https://www.khronos.org/opengl/wiki/Rectangle_Texture?) + // We want to use normalized coordinates in our shaders, so need to be able to scale the coordinates back + // Note texcoords aren't only used for texture lookups + // TODO store scaling per texture instead of per stage, and scale during lookup in the pixel shader + + // Each texture stage has one texture coordinate set associated with it + // We'll store scale factors for each texture coordinate set + std::array, xbox::X_D3DTS_STAGECOUNT> texcoordScales; + texcoordScales.fill({ 1, 1, 1, 1 }); + + for (int stage = 0; stage < xbox::X_D3DTS_STAGECOUNT; stage++) { + auto pXboxBaseTexture = g_pXbox_SetTexture[stage]; + + // No texture, no scaling to do + if (pXboxBaseTexture == xbox::zeroptr) { + continue; + } + + // Texcoord index. Just the texture stage unless fixed function mode + int texCoordIndex = stage; + if (g_Xbox_VertexShader_IsFixedFunction) { + // Get TEXCOORDINDEX for the current texture stage's state + // Stores both the texture stage index and information for generating coordinates + // See D3DTSS_TEXCOORDINDEX + auto texCoordIndexState = XboxTextureStates.Get(stage, xbox::X_D3DTSS_TEXCOORDINDEX); + + // If coordinates are generated, we don't have to worry about the coordinates coming from the title + bool isGenerated = texCoordIndexState >= X_D3DTSS_TCI_CAMERASPACENORMAL; + if (isGenerated) { + continue; + } + + // Determine the texture coordinate addressing this texture stage + texCoordIndex = (texCoordIndexState & 0x3); // 0 - 3 + } + + auto texCoordScale = &texcoordScales[texCoordIndex]; + + // Check for active linear textures. + xbox::X_D3DFORMAT XboxFormat = GetXboxPixelContainerFormat(pXboxBaseTexture); + if (EmuXBFormatIsLinear(XboxFormat)) { + // Test-case : This is often hit by the help screen in XDK samples. + // Set scaling factor for this texture, which will be applied to + // all texture-coordinates in CxbxVertexShaderTemplate.hlsl + // Note : Linear textures are two-dimensional at most (right?) + *texCoordScale = { + (float)GetPixelContainerWidth(pXboxBaseTexture), + (float)GetPixelContainerHeight(pXboxBaseTexture), + (float)CxbxGetPixelContainerDepth(pXboxBaseTexture), + 1.0f + }; + } + } + // Pass above determined texture scaling factors to our HLSL shader. + // Note : CxbxVertexShaderTemplate.hlsl applies texture scaling on + // output registers oT0 to oT3. It may be needed to move the scaling + // and apply it on input registers instead. In that case, we'd have to + // figure out which registers are used to pass texture-coordinates into + // the shader and allow scaling on any of the input registers (so we'd + // need to allow scaling on all 16 attributes, instead of just the four + // textures like we do right now). + g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_TEXTURES_SCALE_BASE, (float*)texcoordScales.data(), CXBX_D3DVS_TEXTURES_SCALE_SIZE); +} + +extern float* HLE_get_NV2A_vertex_constant_float4_ptr(unsigned const_index); // TMP glue + +// TODO : Once we're able to flush the NV2A push buffer +// remove our patches on D3DDevice_SetVertexShaderConstant (and CxbxImpl_SetVertexShaderConstant) +void CxbxUpdateHostVertexShaderConstants() +{ + // Transfer all constants that have been flagged dirty to host + auto nv2a = g_NV2A->GetDeviceState(); + for (int i = 0; i < X_D3DVS_CONSTREG_COUNT; i++) { + if (nv2a->pgraph.vsh_constants_dirty[i]) { + nv2a->pgraph.vsh_constants_dirty[i] = false; + + float *constant_floats = HLE_get_NV2A_vertex_constant_float4_ptr(i); + // Note : If host SetVertexShaderConstantF has high overhead (unlikely), + // we could combine multiple adjacent constants into one call. + g_pD3DDevice->SetVertexShaderConstantF(i, constant_floats, 1); + } + } + + // FIXME our viewport constants don't match Xbox values + // If we write them to pgraph constants, like we do with constants set by the title, + // the Xbox could overwrite them (at any time?) and we get flickering geometry. + // For now, set our viewport constants directly in the call below, + // overwriting whatever was in pgraph + // Test case: + // Xbox dashboard (during initial fade from black) + // Need for Speed: Hot Pursuit 2 (car select) + CxbxUpdateHostViewPortOffsetAndScaleConstants(); +} + +extern void CxbxUpdateHostVertexDeclaration(); // TMP glue +extern void CxbxUpdateHostVertexShader(); // TMP glue + void CxbxUpdateNativeD3DResources() { // Before we start, make sure our resource cache stays limited in size PrunePaletizedTexturesCache(); // TODO : Could we move this to Swap instead? - EmuUpdateActiveTextureStages(); + CxbxUpdateHostVertexDeclaration(); - // Some titles set Vertex Shader constants directly via pushbuffers rather than through D3D - // We handle that case by updating any constants that have the dirty flag set on the nv2a. - auto nv2a = g_NV2A->GetDeviceState(); - for(int i = 0; i < X_D3DVS_CONSTREG_COUNT; i++) { - // Skip vOffset and vScale constants, we don't want our values to be overwritten by accident - if (i == X_D3DSCM_RESERVED_CONSTANT_OFFSET_CORRECTED || i == X_D3DSCM_RESERVED_CONSTANT_SCALE_CORRECTED) { - continue; - } + CxbxUpdateHostVertexShader(); - if (nv2a->pgraph.vsh_constants_dirty[i]) { - g_pD3DDevice->SetVertexShaderConstantF(i, (float*)&nv2a->pgraph.vsh_constants[i][0], 1); - nv2a->pgraph.vsh_constants_dirty[i] = false; - } - } + CxbxUpdateHostVertexShaderConstants(); - // NOTE: Order is important here + // NOTE: Order is important here // Some Texture States depend on RenderState values (Point Sprites) // And some Pixel Shaders depend on Texture State values (BumpEnvMat, etc) + CxbxUpdateHostTextures(); + CxbxUpdateHostTextureScaling(); XboxRenderStates.Apply(); XboxTextureStates.Apply(); @@ -7303,9 +7118,8 @@ void CxbxUpdateNativeD3DResources() DxbxUpdateActivePixelShader(); } + /* TODO : Port these : - DxbxUpdateActiveVertexShader(); - DxbxUpdateActiveTextures(); DxbxUpdateDeferredStates(); // BeginPush sample shows us that this must come *after* texture update! DxbxUpdateActiveVertexBufferStreams(); DxbxUpdateActiveRenderTarget(); @@ -7566,7 +7380,7 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_DrawVertices) // TODO : Call unpatched D3DDevice_SetStateVB(0); CxbxUpdateNativeD3DResources(); - if (IsValidCurrentShader()) { + CxbxDrawContext DrawContext = {}; DrawContext.XboxPrimitiveType = PrimitiveType; @@ -7658,7 +7472,6 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_DrawVertices) // NOTE : We don't restore the previously active index buffer } } - } CxbxHandleXboxCallbacks(); } @@ -7690,16 +7503,14 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_DrawVerticesUP) CxbxUpdateNativeD3DResources(); - if (IsValidCurrentShader()) { - CxbxDrawContext DrawContext = {}; + CxbxDrawContext DrawContext = {}; - DrawContext.XboxPrimitiveType = PrimitiveType; - DrawContext.dwVertexCount = VertexCount; - DrawContext.pXboxVertexStreamZeroData = pVertexStreamZeroData; - DrawContext.uiXboxVertexStreamZeroStride = VertexStreamZeroStride; + DrawContext.XboxPrimitiveType = PrimitiveType; + DrawContext.dwVertexCount = VertexCount; + DrawContext.pXboxVertexStreamZeroData = pVertexStreamZeroData; + DrawContext.uiXboxVertexStreamZeroStride = VertexStreamZeroStride; - CxbxDrawPrimitiveUP(DrawContext); - } + CxbxDrawPrimitiveUP(DrawContext); CxbxHandleXboxCallbacks(); } @@ -7751,18 +7562,16 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_DrawIndexedVertices) CxbxUpdateNativeD3DResources(); - if (IsValidCurrentShader()) { - CxbxDrawContext DrawContext = {}; + CxbxDrawContext DrawContext = {}; - DrawContext.XboxPrimitiveType = PrimitiveType; - DrawContext.dwVertexCount = VertexCount; - DrawContext.dwBaseVertexIndex = g_Xbox_BaseVertexIndex; // Multiplied by vertex stride and added to the vertex buffer start - DrawContext.pXboxIndexData = pIndexData; // Used to derive VerticesInBuffer + DrawContext.XboxPrimitiveType = PrimitiveType; + DrawContext.dwVertexCount = VertexCount; + DrawContext.dwBaseVertexIndex = g_Xbox_BaseVertexIndex; // Multiplied by vertex stride and added to the vertex buffer start + DrawContext.pXboxIndexData = pIndexData; // Used to derive VerticesInBuffer - // Test case JSRF draws all geometry through this function (only sparks are drawn via another method) - // using X_D3DPT_TRIANGLELIST and X_D3DPT_TRIANGLESTRIP PrimitiveType - CxbxDrawIndexed(DrawContext); - } + // Test case JSRF draws all geometry through this function (only sparks are drawn via another method) + // using X_D3DPT_TRIANGLELIST and X_D3DPT_TRIANGLESTRIP PrimitiveType + CxbxDrawIndexed(DrawContext); CxbxHandleXboxCallbacks(); } @@ -7796,7 +7605,6 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_DrawIndexedVerticesUP) CxbxUpdateNativeD3DResources(); - if (IsValidCurrentShader()) { CxbxDrawContext DrawContext = {}; INDEX16* pXboxIndexData = (INDEX16*)pIndexData; @@ -7866,7 +7674,6 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_DrawIndexedVerticesUP) DrawContext.uiHostVertexStreamZeroStride ); } - } CxbxHandleXboxCallbacks(); } @@ -7943,7 +7750,6 @@ static void CxbxImpl_SetRenderTarget IDirect3DSurface *pHostRenderTarget = nullptr; IDirect3DSurface *pHostDepthStencil = nullptr; - // In Xbox titles, CreateDevice calls SetRenderTarget for the back buffer // We can use this to determine the Xbox backbuffer surface for later use! if (g_pXbox_BackBufferSurface == xbox::zeroptr) { @@ -7952,8 +7758,12 @@ static void CxbxImpl_SetRenderTarget // if that happens, we might need to skip the first one or two calls? } + // In Xbox titles, CreateDevice calls SetRenderTarget (our caller) for the depth stencil + // We can use this to determine the Xbox depth stencil surface for later use! if (g_pXbox_DefaultDepthStencilSurface == xbox::zeroptr) { g_pXbox_DefaultDepthStencilSurface = pNewZStencil; + // TODO : Some titles might set another depth stencil later on, + // if that happens, we might need to skip the first one or two calls? } // The current render target is only replaced if it's passed in here non-null @@ -7974,6 +7784,7 @@ static void CxbxImpl_SetRenderTarget // The currenct depth stencil is always replaced by whats passed in here (even a null) g_pXbox_DepthStencil = pNewZStencil; + g_ZScale = GetZScaleForSurface(g_pXbox_DepthStencil); // TODO : Discern between Xbox and host and do this in UpdateDepthStencilFlags? pHostDepthStencil = GetHostSurface(g_pXbox_DepthStencil, D3DUSAGE_DEPTHSTENCIL); HRESULT hRet; @@ -7997,13 +7808,12 @@ static void CxbxImpl_SetRenderTarget // Validate that our host render target is still the correct size DWORD HostRenderTarget_Width, HostRenderTarget_Height; - if (GetHostRenderTargetDimensions(&HostRenderTarget_Width, &HostRenderTarget_Height)) { + if (GetHostRenderTargetDimensions(&HostRenderTarget_Width, &HostRenderTarget_Height, pHostRenderTarget)) { DWORD XboxRenderTarget_Width = GetPixelContainerWidth(g_pXbox_RenderTarget); DWORD XboxRenderTarget_Height = GetPixelContainerHeight(g_pXbox_RenderTarget); ValidateRenderTargetDimensions(HostRenderTarget_Width, HostRenderTarget_Height, XboxRenderTarget_Width, XboxRenderTarget_Height); } - UpdateViewPortOffsetAndScaleConstants(); CalculateMultiSampleScaleForRenderTarget(pRenderTarget); } @@ -8150,7 +7960,7 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetPalette) if (Stage >= xbox::X_D3DTS_STAGECOUNT) { LOG_TEST_CASE("Stage out of bounds"); } else { - // Note : Actual update of paletized textures (X_D3DFMT_P8) happens in EmuUpdateActiveTextureStages! + // Note : Actual update of paletized textures (X_D3DFMT_P8) happens in CxbxUpdateHostTextures! g_pXbox_Palette_Data[Stage] = GetDataFromXboxResource(pPalette); g_Xbox_Palette_Size[Stage] = pPalette ? XboxD3DPaletteSizeToBytes(GetXboxPaletteSize(pPalette)) : 0; } @@ -8249,27 +8059,14 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_DeleteVertexShader) { LOG_FUNC_ONE_ARG(Handle); - XB_TRMP(D3DDevice_DeleteVertexShader)(Handle); - CxbxImpl_DeleteVertexShader(Handle); + + // When deleting, call trampoline *after* our implementation, + // so that we can still access it's fields before it gets deleted! + XB_TRMP(D3DDevice_DeleteVertexShader)(Handle); } -// ****************************************************************** -// * patch: D3DDevice_SelectVertexShaderDirect -// ****************************************************************** -xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SelectVertexShaderDirect) -( - X_VERTEXATTRIBUTEFORMAT *pVAF, - dword_xt Address -) -{ - LOG_FUNC_BEGIN - LOG_FUNC_ARG(pVAF) - LOG_FUNC_ARG(Address) - LOG_FUNC_END; - CxbxImpl_SelectVertexShaderDirect(pVAF, Address); -} // ****************************************************************** // * patch: D3DDevice_GetShaderConstantMode @@ -8333,50 +8130,6 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_GetVertexShaderConstant) DEBUG_D3DRESULT(hRet, "g_pD3DDevice->GetVertexShaderConstant"); } -// ****************************************************************** -// * patch: D3DDevice_SetVertexShaderInputDirect -// ****************************************************************** -xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetVertexShaderInputDirect) -( - X_VERTEXATTRIBUTEFORMAT *pVAF, - uint_xt StreamCount, - X_STREAMINPUT *pStreamInputs -) -{ - LOG_FUNC_BEGIN - LOG_FUNC_ARG(pVAF) - LOG_FUNC_ARG(StreamCount) - LOG_FUNC_ARG(pStreamInputs) - LOG_FUNC_END; - - // If pVAF is given, it's copied into a global Xbox VertexBuffer struct and - // D3DDevice_SetVertexShaderInput is called with Handle set to that address, or-ed with 1 (X_D3DFVF_RESERVED0) - // Otherwise, D3DDevice_SetVertexShaderInput is called with Handle 0. - - LOG_UNIMPLEMENTED(); -} - -// ****************************************************************** -// * patch: D3DDevice_GetVertexShaderInput -// ****************************************************************** -xbox::hresult_xt WINAPI xbox::EMUPATCH(D3DDevice_GetVertexShaderInput) -( - dword_xt *pHandle, - uint_xt *pStreamCount, - X_STREAMINPUT *pStreamInputs -) -{ - LOG_FUNC_BEGIN - LOG_FUNC_ARG(pHandle) - LOG_FUNC_ARG(pStreamCount) - LOG_FUNC_ARG(pStreamInputs) - LOG_FUNC_END; - - LOG_UNIMPLEMENTED(); - - return 0; -} - // ****************************************************************** // * patch: D3DDevice_SetVertexShaderInput // ****************************************************************** @@ -8403,7 +8156,24 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetVertexShaderInput) // each vertex attribute (as defined in the given VertexShader.VertexAttribute.Slots[]) to read // the attribute data from the pStreamInputs[slot].VertexBuffer + pStreamInputs[slot].Offset + VertexShader.VertexAttribute.Slots[slot].Offset + /* LOG_TEST_CASE("SetVertexShaderInput"); + /* Test-cases : + PushBuffer XDK sample + Halo 2-3ebe4439.ini:D3DDevice_SetVertexShaderInput = 0x3f7440 + Kung Fu Chaos-d9ab292c.ini:D3DDevice_SetVertexShaderInput = 0x2bc0e0 + NBA LIVE 2005-71d4eeb1.ini:D3DDevice_SetVertexShaderInput = 0x5cf810 + NBA LIVE 2005-71d4eeb1.ini:D3DDevice_SetVertexShaderInputDirect = 0x5ceba0 + Prince of Persia WW-4ccf7369.ini:D3DDevice_SetVertexShaderInput = 0x494830 + Prince of Persia WW-4ccf7369.ini:D3DDevice_SetVertexShaderInputDirect = 0x494280 + Spyro A Hero's Tail-b18e00e5.ini:D3DDevice_SetVertexShaderInput = 0x286cf0 + Spyro A Hero's Tail-b18e00e5.ini:D3DDevice_SetVertexShaderInputDirect = 0x286760 + */ + CxbxImpl_SetVertexShaderInput(Handle, StreamCount, pStreamInputs); + + // Call trampoline + if (XB_TRMP(D3DDevice_SetVertexShaderInput)) + XB_TRMP(D3DDevice_SetVertexShaderInput)(Handle, StreamCount, pStreamInputs); } // ****************************************************************** @@ -8583,8 +8353,7 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetScreenSpaceOffset) LOG_FUNC_ARG(y) LOG_FUNC_END; - // No need to log this, it's safe to ignore. - //EmuLog(LOG_LEVEL::WARNING, "EmuD3DDevice_SetScreenSpaceOffset ignored"); + CxbxImpl_SetScreenSpaceOffset(x, y); } // ****************************************************************** diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp.unused-patches b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp.unused-patches index 3a3e658b6..457813b46 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp.unused-patches +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.cpp.unused-patches @@ -4195,3 +4195,64 @@ xbox::void_xt WINAPI xbox::EMUPATCH(D3DDevice_SetPixelShaderConstant_4) hRet = D3D_OK; } } + +// ****************************************************************** +// * patch: D3DDevice_SelectVertexShaderDirect +// ****************************************************************** +VOID WINAPI XTL::EMUPATCH(D3DDevice_SelectVertexShaderDirect) +( + X_VERTEXATTRIBUTEFORMAT *pVAF, + DWORD Address +) +{ + LOG_FUNC_BEGIN + LOG_FUNC_ARG(pVAF) + LOG_FUNC_ARG(Address) + LOG_FUNC_END; + + CxbxImpl_SelectVertexShaderDirect(pVAF, Address); +} + +// ****************************************************************** +// * patch: D3DDevice_SetVertexShaderInputDirect +// ****************************************************************** +VOID WINAPI XTL::EMUPATCH(D3DDevice_SetVertexShaderInputDirect) +( + X_VERTEXATTRIBUTEFORMAT *pVAF, + UINT StreamCount, + X_STREAMINPUT *pStreamInputs +) +{ + LOG_FUNC_BEGIN + LOG_FUNC_ARG(pVAF) + LOG_FUNC_ARG(StreamCount) + LOG_FUNC_ARG(pStreamInputs) + LOG_FUNC_END; + + // If pVAF is given, it's copied into a global Xbox VertexBuffer struct and + // D3DDevice_SetVertexShaderInput is called with Handle set to that address, or-ed with 1 (X_D3DFVF_RESERVED0) + // Otherwise, D3DDevice_SetVertexShaderInput is called with Handle 0. + + LOG_UNIMPLEMENTED(); +} + +// ****************************************************************** +// * patch: D3DDevice_GetVertexShaderInput +// ****************************************************************** +HRESULT WINAPI XTL::EMUPATCH(D3DDevice_GetVertexShaderInput) +( + DWORD *pHandle, + UINT *pStreamCount, + X_STREAMINPUT *pStreamInputs +) +{ + LOG_FUNC_BEGIN + LOG_FUNC_ARG(pHandle) + LOG_FUNC_ARG(pStreamCount) + LOG_FUNC_ARG(pStreamInputs) + LOG_FUNC_END; + + LOG_UNIMPLEMENTED(); + + return 0; +} diff --git a/src/core/hle/D3D8/Direct3D9/Direct3D9.h b/src/core/hle/D3D8/Direct3D9/Direct3D9.h index a95602864..673dd2610 100644 --- a/src/core/hle/D3D8/Direct3D9/Direct3D9.h +++ b/src/core/hle/D3D8/Direct3D9/Direct3D9.h @@ -63,6 +63,9 @@ extern uint8_t *ConvertD3DTextureToARGB( void CxbxUpdateNativeD3DResources(); +void CxbxImpl_SetRenderTarget(xbox::X_D3DSurface* pRenderTarget, xbox::X_D3DSurface* pNewZStencil); +void CxbxImpl_SetViewPort(xbox::X_D3DVIEWPORT8* pViewport); + // initialize direct3d extern void EmuD3DInit(); @@ -214,7 +217,7 @@ xbox::void_xt WINAPI EMUPATCH(D3DDevice_LoadVertexShader) ); xbox::void_xt __stdcall EMUPATCH(D3DDevice_LoadVertexShader_0)(); -xbox::void_xt WINAPI EMUPATCH(D3DDevice_LoadVertexShader_4) +xbox::void_xt EMUPATCH(D3DDevice_LoadVertexShader_4) ( dword_xt Address ); @@ -332,7 +335,7 @@ xbox::void_xt WINAPI EMUPATCH(D3DDevice_GetBackBuffer) // ****************************************************************** xbox::void_xt WINAPI EMUPATCH(D3DDevice_SetViewport) ( - CONST X_D3DVIEWPORT8 *pViewport + X_D3DVIEWPORT8 *pViewport ); // ****************************************************************** @@ -1602,15 +1605,6 @@ xbox::void_xt WINAPI EMUPATCH(D3DDevice_DeleteVertexShader) xbox::void_xt WINAPI EMUPATCH(D3DDevice_DeleteVertexShader_0)(); -// ****************************************************************** -// * patch: D3DDevice_SelectVertexShaderDirect -// ****************************************************************** -xbox::void_xt WINAPI EMUPATCH(D3DDevice_SelectVertexShaderDirect) -( - X_VERTEXATTRIBUTEFORMAT *pVAF, - dword_xt Address -); - // ****************************************************************** // * patch: D3DDevice_GetShaderConstantMode // ****************************************************************** @@ -1637,16 +1631,6 @@ xbox::void_xt WINAPI EMUPATCH(D3DDevice_GetVertexShaderConstant) dword_xt ConstantCount ); -// ****************************************************************** -// * patch: D3DDevice_SetVertexShaderInputDirect -// ****************************************************************** -xbox::void_xt WINAPI EMUPATCH(D3DDevice_SetVertexShaderInputDirect) -( - X_VERTEXATTRIBUTEFORMAT *pVAF, - uint_xt StreamCount, - X_STREAMINPUT *pStreamInputs -); - // ****************************************************************** // * patch: D3DDevice_GetVertexShaderInput // ****************************************************************** @@ -1658,7 +1642,7 @@ xbox::hresult_xt WINAPI EMUPATCH(D3DDevice_GetVertexShaderInput) ); // ****************************************************************** -// * patch: D3DDevice_GetVertexShaderInput +// * patch: D3DDevice_SetVertexShaderInput // ****************************************************************** xbox::void_xt WINAPI EMUPATCH(D3DDevice_SetVertexShaderInput) ( diff --git a/src/core/hle/D3D8/Direct3D9/TextureStates.cpp b/src/core/hle/D3D8/Direct3D9/TextureStates.cpp index 4b1f3584f..3a581bb89 100644 --- a/src/core/hle/D3D8/Direct3D9/TextureStates.cpp +++ b/src/core/hle/D3D8/Direct3D9/TextureStates.cpp @@ -77,7 +77,7 @@ TextureStateInfo CxbxTextureStateInfo[] = { bool XboxTextureStateConverter::Init(XboxRenderStateConverter* pState) { - // Deferred states start at 0, this menas that D3DDeferredTextureState IS D3D__TextureState + // Deferred states start at 0, this means that D3DDeferredTextureState IS D3D__TextureState // No further works is required to derive the offset if (g_SymbolAddresses.find("D3DDeferredTextureState") != g_SymbolAddresses.end()) { D3D__TextureState = (uint32_t*)g_SymbolAddresses["D3DDeferredTextureState"]; @@ -184,41 +184,89 @@ void XboxTextureStateConverter::Apply() switch (State) { // These types map 1:1 but have some unsupported values case xbox::X_D3DTSS_ADDRESSU: case xbox::X_D3DTSS_ADDRESSV: case xbox::X_D3DTSS_ADDRESSW: - if (Value == xbox::X_D3DTADDRESS_CLAMPTOEDGE) { - EmuLog(LOG_LEVEL::WARNING, "D3DTADDRESS_CLAMPTOEDGE is unsupported"); - // D3DTADDRESS_BORDER is the closest host match, CLAMPTOEDGE is identical - // Except it has additional restrictions. - Value = D3DTADDRESS_BORDER; - break; + switch (Value) { + case 0: // Let's ignore zero (its no known X_D3DTADDRESS_ mode, but logging this seems useless) + case xbox::X_D3DTADDRESS_WRAP: // = 1 = D3DTADDRESS_WRAP = 1, + case xbox::X_D3DTADDRESS_MIRROR: // = 2 = D3DTADDRESS_MIRROR = 2, + case xbox::X_D3DTADDRESS_CLAMP: // = 3 = D3DTADDRESS_CLAMP = 3, + case xbox::X_D3DTADDRESS_BORDER: // = 4 = D3DTADDRESS_BORDER = 4, + // These match host Direct3D 9 values, so no update necessary + break; + case xbox::X_D3DTADDRESS_CLAMPTOEDGE: // = 5 + LOG_TEST_CASE("X_D3DTADDRESS_CLAMPTOEDGE unsupported, falling back to D3DTADDRESS_BORDER"); + // D3DTADDRESS_BORDER is the closest host match, CLAMPTOEDGE is identical + // Except it has additional restrictions. + Value = D3DTADDRESS_BORDER; + break; + default: + EmuLog(LOG_LEVEL::WARNING, "Unsupported X_D3DTSS_ADDRESS? value %x", Value); + Value = D3DTADDRESS_WRAP; + break; } break; case xbox::X_D3DTSS_MAGFILTER: case xbox::X_D3DTSS_MINFILTER: case xbox::X_D3DTSS_MIPFILTER: - if (Value == xbox::X_D3DTEXF_QUINCUNX) { - EmuLog(LOG_LEVEL::WARNING, "D3DTEXF_QUINCUNX is unsupported"); - // Fallback to D3DTEXF_ANISOTROPIC - Value = D3DTEXF_ANISOTROPIC; - break; - } - break; - case xbox::X_D3DTSS_TEXCOORDINDEX: switch (Value) { - case 0x00040000: - // This value is TCI_OBJECT on Xbox,which is not supported by the host - // In this case, we reset to 0. - EmuLog(LOG_LEVEL::WARNING, "EmuD3DDevice_SetTextureState_TexCoordIndex: D3DTSS_TCI_OBJECT is unsupported", Value); - Value = 0; + case xbox::X_D3DTEXF_NONE: // = 0 = D3DTEXF_NONE = 0, // filtering disabled (valid for mip filter only) + case xbox::X_D3DTEXF_POINT: // = 1 = D3DTEXF_POINT = 1, // nearest + case xbox::X_D3DTEXF_LINEAR: // = 2 = D3DTEXF_LINEAR = 2, // linear interpolation + case xbox::X_D3DTEXF_ANISOTROPIC: // = 3 = D3DTEXF_ANISOTROPIC = 3, // anisotropic + // These match host Direct3D 9 values, so no update necessary break; - case 0x00050000: - // This value is TCI_SPHERE on Xbox, let's map it to D3DTSS_TCI_SPHEREMAP for the host - Value = D3DTSS_TCI_SPHEREMAP; + case xbox::X_D3DTEXF_QUINCUNX: // = 4; // quincunx kernel (Xbox extension), also known as "flat cubic" + LOG_TEST_CASE("X_D3DTEXF_QUINCUNX unsupported, falling back to D3DTEXF_ANISOTROPIC"); + Value = D3DTEXF_ANISOTROPIC; + break; + case xbox::X_D3DTEXF_GAUSSIANCUBIC: // = 5 // Xbox extension, different cubic kernel + // Direct3D 9 alternatives : + // D3DTEXF_PYRAMIDALQUAD = 6, // 4-sample tent + // D3DTEXF_GAUSSIANQUAD = 7, // 4-sample gaussian + // D3DTEXF_CONVOLUTIONMONO = 8, // Convolution filter for monochrome textures + LOG_TEST_CASE("X_D3DTEXF_QUINCUNX unsupported, falling back to D3DTEXF_GAUSSIANQUAD"); + Value = D3DTEXF_GAUSSIANQUAD; + break; + default: + EmuLog(LOG_LEVEL::WARNING, "Unsupported X_D3DTSS_M??FILTER value %x", Value); + Value = D3DTEXF_NONE; break; } break; + case xbox::X_D3DTSS_TEXCOORDINDEX: { + int texCoordIndex = Value & 0x0000FFFF; + if (texCoordIndex > 3) { + LOG_TEST_CASE("TEXCOORDINDEX out of bounds, masking to lowest 2 bits"); + texCoordIndex = Value & 3; + } + switch (Value & 0xFFFF0000) { + case X_D3DTSS_TCI_PASSTHRU: // = 0x00000000 + case X_D3DTSS_TCI_CAMERASPACENORMAL: // = 0x00010000 + case X_D3DTSS_TCI_CAMERASPACEPOSITION: // = 0x00020000 + case X_D3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: // = 0x00030000 + // These match host Direct3D 9 values, so no update necessary + break; + case X_D3DTSS_TCI_OBJECT: // = 0x00040000 + // Collides with host Direct3D 9 D3DTSS_TCI_SPHEREMAP + // This value is not supported on host in Direct3D 9 + // It probably means "TexGen ObjectLinear", or '(untransformed) object space identity mapping' + LOG_TEST_CASE("Xbox D3DTSS_TCI_OBJECT unsupported on host"); + // Test-case : Terrain XDK sample + Value = texCoordIndex; + break; + case X_D3DTSS_TCI_SPHEREMAP: // = 0x00050000 + // Convert Xbox sphere mapping bit to host Direct3D 9 (which uses a different bit) + Value = D3DTSS_TCI_SPHEREMAP | texCoordIndex; + break; + default: + EmuLog(LOG_LEVEL::WARNING, "Unsupported X_D3DTSS_TEXCOORDINDEX value %x", Value); + Value = texCoordIndex; + break; + } + break; + } // These types require value remapping for all supported values case xbox::X_D3DTSS_COLOROP: case xbox::X_D3DTSS_ALPHAOP: Value = GetHostTextureOpValue(Value); break; - // These types require no conversion, so we just pass through as-is + // These types require no conversion, so we just pass through as-is case xbox::X_D3DTSS_COLORARG0: case xbox::X_D3DTSS_COLORARG1: case xbox::X_D3DTSS_COLORARG2: case xbox::X_D3DTSS_ALPHAARG0: case xbox::X_D3DTSS_ALPHAARG1: case xbox::X_D3DTSS_ALPHAARG2: case xbox::X_D3DTSS_RESULTARG: case xbox::X_D3DTSS_TEXTURETRANSFORMFLAGS: @@ -273,3 +321,12 @@ void XboxTextureStateConverter::Apply() // no need to actually copy here, since it was handled in the loop above } } + +uint32_t XboxTextureStateConverter::Get(int textureStage, DWORD xboxState) { + if (textureStage < 0 || textureStage > 3) + CxbxKrnlCleanup("Requested texture stage was out of range: %d", textureStage); + if (xboxState < xbox::X_D3DTSS_FIRST || xboxState > xbox::X_D3DTSS_LAST) + CxbxKrnlCleanup("Requested texture state was out of range: %d", xboxState); + + return D3D__TextureState[(textureStage * xbox::X_D3DTS_STAGESIZE) + xboxState]; +} diff --git a/src/core/hle/D3D8/Direct3D9/TextureStates.h b/src/core/hle/D3D8/Direct3D9/TextureStates.h index 36281757c..e02f08e9c 100644 --- a/src/core/hle/D3D8/Direct3D9/TextureStates.h +++ b/src/core/hle/D3D8/Direct3D9/TextureStates.h @@ -38,6 +38,7 @@ class XboxTextureStateConverter public: bool Init(XboxRenderStateConverter* state); void Apply(); + uint32_t Get(int textureStage, DWORD xboxState); private: void BuildTextureStateMappingTable(); diff --git a/src/core/hle/D3D8/Direct3D9/VertexShader.cpp b/src/core/hle/D3D8/Direct3D9/VertexShader.cpp index df2ec62f1..5c7449210 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShader.cpp +++ b/src/core/hle/D3D8/Direct3D9/VertexShader.cpp @@ -220,44 +220,21 @@ extern ShaderType EmuGetShaderInfo(IntermediateVertexShader* pIntermediateShader return ShaderType::Compilable; } -// recompile xbox vertex shader function -extern HRESULT EmuCompileShader -( - IntermediateVertexShader* pIntermediateShader, - ID3DBlob** ppHostShader -) +HRESULT CompileHlsl(const std::string& hlsl, ID3DBlob** ppHostShader, const char* pSourceName) { // TODO include header in vertex shader //xbox::X_VSH_SHADER_HEADER* pXboxVertexShaderHeader = (xbox::X_VSH_SHADER_HEADER*)pXboxFunction; ID3DBlob* pErrors = nullptr; + ID3DBlob* pErrorsCompatibility = nullptr; HRESULT hRet = 0; - // Include HLSL header and footer as raw strings : - static std::string hlsl_template[2] = { - #include "core\hle\D3D8\Direct3D9\CxbxVertexShaderTemplate.hlsl" - }; - - auto hlsl_stream = std::stringstream(); - hlsl_stream << hlsl_template[0]; // Start with the HLSL template header - assert(pIntermediateShader->Instructions.size() > 0); - BuildShader(pIntermediateShader, hlsl_stream); - - hlsl_stream << hlsl_template[1]; // Finish with the HLSL template footer - std::string hlsl_str = hlsl_stream.str(); - - EmuLog(LOG_LEVEL::DEBUG, "--- HLSL conversion ---"); - EmuLog(LOG_LEVEL::DEBUG, DebugPrependLineNumbers(hlsl_str).c_str()); - EmuLog(LOG_LEVEL::DEBUG, "-----------------------"); - - - UINT flags1 = D3DCOMPILE_OPTIMIZATION_LEVEL3 | D3DCOMPILE_AVOID_FLOW_CONTROL; - + UINT flags1 = D3DCOMPILE_OPTIMIZATION_LEVEL3; hRet = D3DCompile( - hlsl_str.c_str(), - hlsl_str.length(), - nullptr, // pSourceName + hlsl.c_str(), + hlsl.length(), + pSourceName, // pSourceName nullptr, // pDefines - nullptr, // pInclude // TODO precompile x_* HLSL functions? + D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions? "main", // shader entry poiint g_vs_model, // shader profile flags1, // flags1 @@ -266,25 +243,26 @@ extern HRESULT EmuCompileShader &pErrors // ppErrorMsgs out ); if (FAILED(hRet)) { + EmuLog(LOG_LEVEL::WARNING, "Shader compile failed. Recompiling in compatibility mode"); // Attempt to retry in compatibility mode, this allows some vertex-state shaders to compile // Test Case: Spy vs Spy - flags1 |= D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY; + flags1 |= D3DCOMPILE_ENABLE_BACKWARDS_COMPATIBILITY | D3DCOMPILE_AVOID_FLOW_CONTROL; hRet = D3DCompile( - hlsl_str.c_str(), - hlsl_str.length(), - nullptr, // pSourceName + hlsl.c_str(), + hlsl.length(), + pSourceName, // pSourceName nullptr, // pDefines - nullptr, // pInclude // TODO precompile x_* HLSL functions? + D3D_COMPILE_STANDARD_FILE_INCLUDE, // pInclude // TODO precompile x_* HLSL functions? "main", // shader entry poiint g_vs_model, // shader profile flags1, // flags1 0, // flags2 ppHostShader, // out - &pErrors // ppErrorMsgs out + &pErrorsCompatibility // ppErrorMsgs out ); if (FAILED(hRet)) { - LOG_TEST_CASE("Couldn't assemble recompiled vertex shader"); + LOG_TEST_CASE("Couldn't assemble vertex shader"); //EmuLog(LOG_LEVEL::WARNING, "Couldn't assemble recompiled vertex shader"); } } @@ -296,6 +274,10 @@ extern HRESULT EmuCompileShader EmuLog(hlslErrorLogLevel, "%s", (char*)(pErrors->GetBufferPointer())); pErrors->Release(); pErrors = nullptr; + if (pErrorsCompatibility != nullptr) { + pErrorsCompatibility->Release(); + pErrorsCompatibility = nullptr; + } } LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) @@ -317,3 +299,156 @@ extern HRESULT EmuCompileShader return hRet; } + +// recompile xbox vertex shader function +extern HRESULT EmuCompileShader +( + IntermediateVertexShader* pIntermediateShader, + ID3DBlob** ppHostShader +) +{ + // Include HLSL header and footer as raw strings : + static std::string hlsl_template[2] = { + #include "core\hle\D3D8\Direct3D9\CxbxVertexShaderTemplate.hlsl" + }; + + auto hlsl_stream = std::stringstream(); + hlsl_stream << hlsl_template[0]; // Start with the HLSL template header + assert(pIntermediateShader->Instructions.size() > 0); + BuildShader(pIntermediateShader, hlsl_stream); + + hlsl_stream << hlsl_template[1]; // Finish with the HLSL template footer + std::string hlsl_str = hlsl_stream.str(); + + EmuLog(LOG_LEVEL::DEBUG, "--- HLSL conversion ---"); + EmuLog(LOG_LEVEL::DEBUG, DebugPrependLineNumbers(hlsl_str).c_str()); + EmuLog(LOG_LEVEL::DEBUG, "-----------------------"); + + return CompileHlsl(hlsl_str, ppHostShader, "CxbxVertexShaderTemplate.hlsl"); +} + +static ID3DBlob* pPassthroughShader = nullptr; + +extern HRESULT EmuCompileXboxPassthrough(ID3DBlob** ppHostShader) +{ + // TODO does this need to be thread safe? + if (pPassthroughShader == nullptr) { + auto hlsl = +R"( +// Xbox HLSL pretransformed vertex shader + +// Default values for vertex registers, and whether to use them +uniform float4 vRegisterDefaultValues[16] : register(c192); +uniform float4 vRegisterDefaultFlagsPacked[4] : register(c208); + +uniform float4 xboxViewportScaleInverse : register(c212); +uniform float4 xboxViewportOffset : register(c213); + + +uniform float4 xboxTextureScale[4] : register(c214); + +uniform float4 xboxIsRHWTransformedPosition : register(c218); + +struct VS_INPUT +{ + float4 v[16] : TEXCOORD; +}; + +// Output registers +struct VS_OUTPUT +{ + float4 oPos : POSITION; // Homogeneous clip space position + float4 oD0 : COLOR0; // Primary color (front-facing) + float4 oD1 : COLOR1; // Secondary color (front-facing) + float oFog : FOG; // Fog coordinate + float oPts : PSIZE; // Point size + float4 oB0 : TEXCOORD4; // Back-facing primary color + float4 oB1 : TEXCOORD5; // Back-facing secondary color + float4 oT0 : TEXCOORD0; // Texture coordinate set 0 + float4 oT1 : TEXCOORD1; // Texture coordinate set 1 + float4 oT2 : TEXCOORD2; // Texture coordinate set 2 + float4 oT3 : TEXCOORD3; // Texture coordinate set 3 +}; + +float4 reverseScreenspaceTransform(float4 oPos) +{ + // Scale screenspace coordinates (0 to viewport width/height) to -1 to +1 range + + // On Xbox, oPos should contain the vertex position in screenspace + // We need to reverse this transformation + // Conventionally, each Xbox Vertex Shader includes instructions like this + // mul oPos.xyz, r12, c-38 + // +rcc r1.x, r12.w + // mad oPos.xyz, r12, r1.x, c-37 + // where c-37 and c-38 are reserved transform values + + if (xboxIsRHWTransformedPosition.x) { + // Detect 0 w and avoid 0 division + if (oPos.w == 0) oPos.w = 1; // if else doesn't seem to work here + oPos.w = 1 / oPos.w; // flip rhw to w + } + + oPos.xyz -= xboxViewportOffset.xyz; // reverse offset + oPos.xyz *= oPos.w; // reverse perspective divide + oPos.xyz *= xboxViewportScaleInverse.xyz; // reverse scale + + return oPos; +} + +VS_OUTPUT main(const VS_INPUT xIn) +{ + // Input registers + float4 v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; + + // Unpack 16 flags from 4 float4 constant registers + float vRegisterDefaultFlags[16] = (float[16])vRegisterDefaultFlagsPacked; + + // Initialize input registers from the vertex buffer data + // Or use the register's default value (which can be changed by the title) + #define init_v(i) v##i = lerp(xIn.v[i], vRegisterDefaultValues[i], vRegisterDefaultFlags[i]); + // Note : unroll manually instead of for-loop, because of the ## concatenation + init_v( 0); init_v( 1); init_v( 2); init_v( 3); + init_v( 4); init_v( 5); init_v( 6); init_v( 7); + init_v( 8); init_v( 9); init_v(10); init_v(11); + init_v(12); init_v(13); init_v(14); init_v(15); + + // For passthrough, map output variables to their corresponding input registers + float4 oPos = v0; + float4 oD0 = v3; + float4 oD1 = v4; + float4 oFog = v5; + float4 oPts = v6; + float4 oB0 = v7; + float4 oB1 = v8; + float4 oT0 = v9; + float4 oT1 = v10; + float4 oT2 = v11; + float4 oT3 = v12; + + // Copy variables to output struct + VS_OUTPUT xOut; + + xOut.oPos = reverseScreenspaceTransform(oPos); + xOut.oD0 = saturate(oD0); + xOut.oD1 = saturate(oD1); + xOut.oFog = oFog.x; // Note : Xbox clamps fog in pixel shader + xOut.oPts = oPts.x; + xOut.oB0 = saturate(oB0); + xOut.oB1 = saturate(oB1); + // Scale textures (TODO : or should we apply this to the input register values?) + xOut.oT0 = oT0 / xboxTextureScale[0]; + xOut.oT1 = oT1 / xboxTextureScale[1]; + xOut.oT2 = oT2 / xboxTextureScale[2]; + xOut.oT3 = oT3 / xboxTextureScale[3]; + + return xOut; +} +)"; + + CompileHlsl(hlsl, &pPassthroughShader, "passthrough.hlsl"); + } + + *ppHostShader = pPassthroughShader; + + return 0; +} diff --git a/src/core/hle/D3D8/Direct3D9/VertexShader.h b/src/core/hle/D3D8/Direct3D9/VertexShader.h index 3e02ff0b5..a6ea258fd 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShader.h +++ b/src/core/hle/D3D8/Direct3D9/VertexShader.h @@ -22,4 +22,6 @@ extern HRESULT EmuCompileShader ID3DBlob** ppHostShader ); +extern HRESULT EmuCompileXboxPassthrough(ID3DBlob** ppHostShader); + #endif diff --git a/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp b/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp index c1103bef0..af464d7cc 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp +++ b/src/core/hle/D3D8/Direct3D9/VertexShaderSource.cpp @@ -43,7 +43,7 @@ ID3DBlob* AsyncCreateVertexShader(IntermediateVertexShader intermediateShader, S // Create a new shader // If the shader was already created, just increase its reference count -ShaderKey VertexShaderSource::CreateShader(const DWORD* pXboxFunction, DWORD *pXboxFunctionSize) { +ShaderKey VertexShaderSource::CreateShader(const xbox::dword_xt* pXboxFunction, DWORD *pXboxFunctionSize) { IntermediateVertexShader intermediateShader; // Parse into intermediate format diff --git a/src/core/hle/D3D8/Direct3D9/VertexShaderSource.h b/src/core/hle/D3D8/Direct3D9/VertexShaderSource.h index 0ac1204e4..ab5675847 100644 --- a/src/core/hle/D3D8/Direct3D9/VertexShaderSource.h +++ b/src/core/hle/D3D8/Direct3D9/VertexShaderSource.h @@ -11,7 +11,7 @@ typedef uint64_t ShaderKey; class VertexShaderSource { public: - ShaderKey CreateShader(const DWORD* pXboxFunction, DWORD* pXboxFunctionSize); + ShaderKey CreateShader(const xbox::dword_xt* pXboxFunction, DWORD* pXboxFunctionSize); IDirect3DVertexShader *GetShader(ShaderKey key); void ReleaseShader(ShaderKey key); diff --git a/src/core/hle/D3D8/XbConvert.cpp b/src/core/hle/D3D8/XbConvert.cpp index 43ce3776c..4a74aec61 100644 --- a/src/core/hle/D3D8/XbConvert.cpp +++ b/src/core/hle/D3D8/XbConvert.cpp @@ -929,7 +929,7 @@ static const FormatInfo FormatInfos[] = { /* 0x33 X_D3DFMT_V16U16 */ { 32, Swzzld, NoCmpnts, D3DFMT_V16U16 }, /* 0x34 undefined */ {}, /* 0x35 X_D3DFMT_LIN_L16 */ { 16, Linear, _____L16, D3DFMT_L16 }, - /* 0x36 X_D3DFMT_LIN_V16U16 */ { 32, Linear, NoCmpnts, D3DFMT_V16U16 }, // Note : Seems ununsed on Xbox + /* 0x36 X_D3DFMT_LIN_V16U16 */ { 32, Linear, NoCmpnts, D3DFMT_V16U16 }, // Note : Seems unused on Xbox /* 0x37 X_D3DFMT_LIN_L6V5U5 */ { 16, Linear, __R6G5B5, D3DFMT_L6V5U5 }, // Alias : X_D3DFMT_LIN_R6G5B5 /* 0x38 X_D3DFMT_R5G5B5A1 */ { 16, Swzzld, R5G5B5A1, D3DFMT_A1R5G5B5 , Texture, "X_D3DFMT_R5G5B5A1 -> D3DFMT_A1R5G5B5" }, /* 0x39 X_D3DFMT_R4G4B4A4 */ { 16, Swzzld, R4G4B4A4, D3DFMT_A4R4G4B4 , Texture, "X_D3DFMT_R4G4B4A4 -> D3DFMT_A4R4G4B4" }, diff --git a/src/core/hle/D3D8/XbD3D8Types.h b/src/core/hle/D3D8/XbD3D8Types.h index 906538f9c..5b94f4f5d 100644 --- a/src/core/hle/D3D8/XbD3D8Types.h +++ b/src/core/hle/D3D8/XbD3D8Types.h @@ -1020,9 +1020,22 @@ typedef DWORD X_VERTEXSHADERCONSTANTMODE; // Special Registers, used to pass additional information to the shaders // TODO co-locate shader workaround constants with shader code #define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_BASE (X_D3DVS_CONSTREG_COUNT) -#define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_BASE (CXBX_D3DVS_CONSTREG_VREGDEFAULTS_BASE + 16) -#define CXBX_D3DVS_VIEWPORT_SCALE_MIRROR (CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_BASE + 4) -#define CXBX_D3DVS_VIEWPORT_OFFSET_MIRROR (CXBX_D3DVS_VIEWPORT_SCALE_MIRROR + 1) +#define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_SIZE 16 + +#define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_BASE (CXBX_D3DVS_CONSTREG_VREGDEFAULTS_BASE + CXBX_D3DVS_CONSTREG_VREGDEFAULTS_SIZE) +#define CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_SIZE 4 + +#define CXBX_D3DVS_VIEWPORT_SCALE_INVERSE_BASE (CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_BASE + CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_SIZE) +#define CXBX_D3DVS_VIEWPORT_SCALE_INVERSE_SIZE 1 + +#define CXBX_D3DVS_VIEWPORT_OFFSET_MIRROR_BASE (CXBX_D3DVS_VIEWPORT_SCALE_INVERSE_BASE + CXBX_D3DVS_VIEWPORT_SCALE_INVERSE_SIZE) +#define CXBX_D3DVS_VIEWPORT_OFFSET_MIRROR_SIZE 1 + +#define CXBX_D3DVS_TEXTURES_SCALE_BASE (CXBX_D3DVS_VIEWPORT_OFFSET_MIRROR_BASE + CXBX_D3DVS_VIEWPORT_OFFSET_MIRROR_SIZE) +#define CXBX_D3DVS_TEXTURES_SCALE_SIZE 4 + +#define CXBX_D3DVS_IS_RHW_TRANSFORMED_POSITION_BASE (CXBX_D3DVS_TEXTURES_SCALE_BASE + CXBX_D3DVS_TEXTURES_SCALE_SIZE) +#define CXBX_D3DVS_IS_RHW_TRANSFORMED_POSITION_SIZE 1 #define X_D3DSCM_RESERVED_CONSTANT_SCALE_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_SCALE + X_D3DSCM_CORRECTION) #define X_D3DSCM_RESERVED_CONSTANT_OFFSET_CORRECTED (X_D3DSCM_RESERVED_CONSTANT_OFFSET + X_D3DSCM_CORRECTION) @@ -1039,14 +1052,16 @@ typedef DWORD X_VERTEXSHADERCONSTANTMODE; #define X_VST_STATE 3 // Xbox vertex shader counts -#define X_VSH_MAX_ATTRIBUTES 16 -#define X_VSH_MAX_STREAMS 16 -#define X_VSH_MAX_INSTRUCTION_COUNT 136 // The maximum Xbox shader instruction count +#define X_VSH_MAX_ATTRIBUTES 16 +#define X_VSH_MAX_STREAMS 16 +#define X_VSH_MAX_INSTRUCTION_COUNT 136 // The maximum Xbox shader instruction count +#define X_VSH_INSTRUCTION_SIZE 4 +#define X_VSH_INSTRUCTION_SIZE_BYTES (X_VSH_INSTRUCTION_SIZE * sizeof(DWORD)) // Xbox Vertex Shader versions -#define VERSION_XVS 0x2078 // 'x ' Xbox vertex shader -#define VERSION_XVSS 0x7378 // 'xs' Xbox vertex state shader -#define VERSION_XVSW 0x7778 // 'xw' Xbox vertex read/write shader +#define VERSION_XVS 0x2078 // 'x ' Xbox vertex shader. Corresponds to X_VST_NORMAL +#define VERSION_XVSS 0x7378 // 'xs' Xbox vertex state shader. Corresponds to X_VST_STATE +#define VERSION_XVSW 0x7778 // 'xw' Xbox vertex read/write shader. Corresponds to X_VST_READWRITE /// nv2a microcode header typedef struct @@ -1056,22 +1071,30 @@ typedef struct } X_VSH_SHADER_HEADER; -#define X_VSH_INSTRUCTION_SIZE 4 -#define X_VSH_INSTRUCTION_SIZE_BYTES (X_VSH_INSTRUCTION_SIZE * sizeof(DWORD)) - // ****************************************************************** // * X_VERTEXSHADERINPUT // ****************************************************************** typedef struct _X_VERTEXSHADERINPUT { - DWORD IndexOfStream; + DWORD StreamIndex; DWORD Offset; DWORD Format; - BYTE TesselationType; - BYTE TesselationSource; + BYTE TessellationType; + BYTE TessellationSource; + BYTE Padding0; + BYTE Padding1; } X_VERTEXSHADERINPUT; +typedef struct { + DWORD StreamIndex; + DWORD Offset; + DWORD SizeAndType; + BYTE Flags; + BYTE Source; +} +X_VertexShaderSlot; + // ****************************************************************** // * X_VERTEXATTRIBUTEFORMAT // ****************************************************************** @@ -1092,34 +1115,64 @@ typedef struct _X_STREAMINPUT UINT Offset; } X_STREAMINPUT; +struct X_D3DVertexShader3948 +{ +#if 0 + DWORD Signature; // Note : Debug XBE's have a 'Vshd' DWORD signature prefix +#endif + DWORD RefCount; // Based on the observation this member is set to 1 in D3DDevice_CreateVertexShader and decreased in D3DDevice_DeleteVertexShader + DWORD Flags; // Seems to contain at solely the four X_D3DUSAGE_PERSISTENT* flags + DWORD MaxSlot; + DWORD TextureCount; + DWORD ProgramSize; + DWORD ProgramAndConstantsDwords; // Sum of ProgramSize + constant count, expressed in instruction slots, taking 4 DWORD's per slot (see X_VSH_INSTRUCTION_SIZE) + DWORD Dimensionality[4] ; // Guesswork, since all 4 bytes (for all 4 textures) are most often set to 0 (or 2 when a texture isn't used) and 1, 3 and 4 also occur (and nothing else) + X_VERTEXATTRIBUTEFORMAT VertexAttribute; + X_VertexShaderSlot Slot[4]; // Four more (for a total of 20) + DWORD ProgramAndConstants[1 /*declare more for debugging purposes */+ X_VSH_MAX_INSTRUCTION_COUNT]; // The binary function data and constants (contents continues futher outside this struct, up to ProgramAndConstantsDwords * 4 (=X_VSH_INSTRUCTION_SIZE) DWORD's) +}; + struct X_D3DVertexShader { - // Note : Debug XBE's have a 'Vshd' DWORD signature prefixing this! +#if 0 + DWORD Signature; // Note : Debug XBE's have a 'Vshd' DWORD signature prefix +#endif DWORD RefCount; // Based on the observation this member is set to 1 in D3DDevice_CreateVertexShader and decreased in D3DDevice_DeleteVertexShader - DWORD Flags; - DWORD FunctionSize; // ?Also known as ProgramSize? - DWORD TotalSize; // seems to include both the function and ?constants? - DWORD NumberOfDimensionsPerTexture; // Guesswork, since all 4 bytes (for all 4 textures) are most often set to 0 (or 2 when a texture isn't used) and 1, 3 and 4 also occur (and nothing else) + DWORD Flags; // Contains X_VERTEXSHADER_FLAG_* bits + DWORD ProgramSize; + DWORD ProgramAndConstantsDwords; // Sum of ProgramSize + constant count, expressed in instruction slots, taking 4 DWORD's per slot (see X_VSH_INSTRUCTION_SIZE) + BYTE Dimensionality[4] ; // Guesswork, since all 4 bytes (for all 4 textures) are most often set to 0 (or 2 when a texture isn't used) and 1, 3 and 4 also occur (and nothing else) X_VERTEXATTRIBUTEFORMAT VertexAttribute; - DWORD FunctionData[X_VSH_MAX_INSTRUCTION_COUNT]; // probably the binary function data and ?constants? (data continues futher outside this struct, up to TotalSize DWORD's) + DWORD ProgramAndConstants[X_VSH_MAX_INSTRUCTION_COUNT]; // The binary function data and constants (contents continues futher outside this struct, up to ProgramAndConstantsDwords * 4 (=X_VSH_INSTRUCTION_SIZE) DWORD's) }; +// X_D3DVertexShader.Flags values : +#define X_VERTEXSHADER_FLAG_WRITE (1 << 0) // = 0x0001 // Set for Xbox ShaderType != X_VST_NORMAL +#define X_VERTEXSHADER_FLAG_PASSTHROUGH (1 << 1) // = 0x0002 +#define X_VERTEXSHADER_FLAG_UNKNOWN (1 << 2) // = 0x0004 // Test case: Amped +#define X_VERTEXSHADER_FLAG_STATE (1 << 3) // = 0x0008 // Set for Xbox ShaderType == X_VST_STATE +#define X_VERTEXSHADER_FLAG_PROGRAM (1 << 4) // = 0x0010 // Set when X_D3DVertexShader was created with assigned function data +#define X_VERTEXSHADER_FLAG_HASDIFFUSE (1 << 10) // = 0x0400 Corresponds to X_D3DUSAGE_PERSISTENTDIFFUSE +#define X_VERTEXSHADER_FLAG_HASSPECULAR (1 << 11) // = 0x0800 Corresponds to X_D3DUSAGE_PERSISTENTSPECULAR +#define X_VERTEXSHADER_FLAG_HASBACKDIFFUSE (1 << 12) // = 0x1000 Corresponds to X_D3DUSAGE_PERSISTENTBACKDIFFUSE +#define X_VERTEXSHADER_FLAG_HASBACKSPECULAR (1 << 13) // = 0x2000 Corresponds to X_D3DUSAGE_PERSISTENTBACKSPECULAR + // vertex shader input registers for fixed function vertex shader // Name Register number D3DFVF -const int X_D3DVSDE_POSITION = 0; // Corresponds to D3DFVF_XYZ -const int X_D3DVSDE_BLENDWEIGHT = 1; // Corresponds to D3DFVF_XYZRHW -const int X_D3DVSDE_NORMAL = 2; // Corresponds to D3DFVF_NORMAL -const int X_D3DVSDE_DIFFUSE = 3; // Corresponds to D3DFVF_DIFFUSE -const int X_D3DVSDE_SPECULAR = 4; // Corresponds to D3DFVF_SPECULAR +const int X_D3DVSDE_POSITION = 0; // Corresponds to X_D3DFVF_XYZ +const int X_D3DVSDE_BLENDWEIGHT = 1; // Corresponds to X_D3DFVF_XYZB1? (was X_D3DFVF_XYZRHW?) +const int X_D3DVSDE_NORMAL = 2; // Corresponds to X_D3DFVF_NORMAL +const int X_D3DVSDE_DIFFUSE = 3; // Corresponds to X_D3DFVF_DIFFUSE +const int X_D3DVSDE_SPECULAR = 4; // Corresponds to X_D3DFVF_SPECULAR const int X_D3DVSDE_FOG = 5; // Xbox extension const int X_D3DVSDE_POINTSIZE = 6; // Dxbx addition const int X_D3DVSDE_BACKDIFFUSE = 7; // Xbox extension const int X_D3DVSDE_BACKSPECULAR = 8; // Xbox extension -const int X_D3DVSDE_TEXCOORD0 = 9; // Corresponds to D3DFVF_TEX1 (not D3DFVF_TEX0, which means no textures are present) -const int X_D3DVSDE_TEXCOORD1 = 10; // Corresponds to D3DFVF_TEX2 -const int X_D3DVSDE_TEXCOORD2 = 11; // Corresponds to D3DFVF_TEX3 -const int X_D3DVSDE_TEXCOORD3 = 12; // Corresponds to D3DFVF_TEX4 +const int X_D3DVSDE_TEXCOORD0 = 9; // Corresponds to X_D3DFVF_TEX1 (not X_D3DFVF_TEX0, which means no textures are present) +const int X_D3DVSDE_TEXCOORD1 = 10; // Corresponds to X_D3DFVF_TEX2 +const int X_D3DVSDE_TEXCOORD2 = 11; // Corresponds to X_D3DFVF_TEX3 +const int X_D3DVSDE_TEXCOORD3 = 12; // Corresponds to X_D3DFVF_TEX4 const int X_D3DVSDE_VERTEX = 0xFFFFFFFF; // Xbox extension for Begin/End drawing (data is a D3DVSDT_FLOAT4) //typedef X_D3DVSDE = X_D3DVSDE_POSITION..High(DWORD)-2; // Unique declaration to make overloads possible; @@ -1233,12 +1286,22 @@ typedef enum _X_D3DVSD_TOKENTYPE #define X_D3DFVF_TEXTUREFORMAT2 0x000 #define X_D3DFVF_TEXTUREFORMAT3 0x001 #define X_D3DFVF_TEXTUREFORMAT4 0x002 - +#define X_D3DFVF_TEXCOORDSIZE_SHIFT(Index) ((Index) * 2 + 16) #define X_D3DFVF_TEXCOORDSIZE1(Index) (X_D3DFVF_TEXTUREFORMAT1 << (Index * 2 + 16)) #define X_D3DFVF_TEXCOORDSIZE2(Index) (X_D3DFVF_TEXTUREFORMAT2) #define X_D3DFVF_TEXCOORDSIZE3(Index) (X_D3DFVF_TEXTUREFORMAT3 << (Index * 2 + 16)) #define X_D3DFVF_TEXCOORDSIZE4(Index) (X_D3DFVF_TEXTUREFORMAT4 << (Index * 2 + 16)) +// Values, used with D3DTSS_TEXCOORDINDEX, to specify that the vertex data (position +// and normal in the camera space) should be taken as texture coordinates. +// Low 16 bits are used to specify texture coordinate index, to take the WRAP mode from. +#define X_D3DTSS_TCI_PASSTHRU 0x00000000 +#define X_D3DTSS_TCI_CAMERASPACENORMAL 0x00010000 +#define X_D3DTSS_TCI_CAMERASPACEPOSITION 0x00020000 +#define X_D3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR 0x00030000 +#define X_D3DTSS_TCI_OBJECT 0x00040000 // Warning! Collides with host Direct3D 9 D3DTSS_TCI_SPHEREMAP +#define X_D3DTSS_TCI_SPHEREMAP 0x00050000 + typedef DWORD NV2AMETHOD; // diff --git a/src/core/hle/D3D8/XbPixelShader.cpp b/src/core/hle/D3D8/XbPixelShader.cpp index a4d7247a2..63766d7ec 100644 --- a/src/core/hle/D3D8/XbPixelShader.cpp +++ b/src/core/hle/D3D8/XbPixelShader.cpp @@ -66,8 +66,9 @@ #include #include -#include "Direct3D9\RenderStates.h" -extern XboxRenderStateConverter XboxRenderStates; +#include "Direct3D9\RenderStates.h" // For XboxRenderStateConverter + +extern XboxRenderStateConverter XboxRenderStates; // Declared in Direct3D9.cpp #define DbgPshPrintf \ LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) \ diff --git a/src/core/hle/D3D8/XbPushBuffer.cpp b/src/core/hle/D3D8/XbPushBuffer.cpp index c6e9605e7..50aafbe01 100644 --- a/src/core/hle/D3D8/XbPushBuffer.cpp +++ b/src/core/hle/D3D8/XbPushBuffer.cpp @@ -40,82 +40,12 @@ #include "Logging.h" // TODO: Find somewhere to put this that doesn't conflict with xbox:: -extern void EmuUpdateActiveTextureStages(); +extern void CxbxUpdateHostTextures(); const char *NV2AMethodToString(DWORD dwMethod); // forward static void DbgDumpMesh(WORD *pIndexData, DWORD dwCount); -// Determine the size (in number of floating point texture coordinates) of the texture format (indexed 0 .. 3). -// This is the reverse of the D3DFVF_TEXCOORDSIZE[0..3] macros. -int DxbxFVF_GetNumberOfTextureCoordinates(DWORD dwFVF, int aTextureIndex) -{ - // See D3DFVF_TEXCOORDSIZE1() - switch ((dwFVF >> ((aTextureIndex * 2) + 16)) & 3) { - case D3DFVF_TEXTUREFORMAT1: return 1; // One floating point value - case D3DFVF_TEXTUREFORMAT2: return 2; // Two floating point values - case D3DFVF_TEXTUREFORMAT3: return 3; // Three floating point values - case D3DFVF_TEXTUREFORMAT4: return 4; // Four floating point values - default: - //assert(false || "DxbxFVF_GetNumberOfTextureCoordinates : Unhandled case"); - return 0; - } -} - -// Dxbx Note: This code appeared in EmuExecutePushBufferRaw and occured -// in EmuFlushIVB too, so it's generalize in this single implementation. -UINT DxbxFVFToVertexSizeInBytes(DWORD dwFVF, BOOL bIncludeTextures) -{ -/* - X_D3DFVF_POSITION_MASK = $00E; // Dec /2 #fl - - X_D3DFVF_XYZ = $002; // 2 > 1 > 3 - X_D3DFVF_XYZRHW = $004; // 4 > 2 > 4 - X_D3DFVF_XYZB1 = $006; // 6 > 3 > 4 - X_D3DFVF_XYZB2 = $008; // 8 > 4 > 5 - X_D3DFVF_XYZB3 = $00a; // 10 > 5 > 6 - X_D3DFVF_XYZB4 = $00c; // 12 > 6 > 7 -*/ - // Divide the D3DFVF by two, this gives almost the number of floats needed for the format : - UINT Result = (dwFVF & D3DFVF_POSITION_MASK) >> 1; - if (Result >= (D3DFVF_XYZB1 >> 1)) { - // Any format from D3DFVF_XYZB1 and above need 1 extra float : - Result++; - } - else { - // The other formats (XYZ and XYZRHW) need 2 extra floats : - Result += 2; - } - - // Express the size in bytes, instead of floats : - Result *= sizeof(FLOAT); - - // D3DFVF_NORMAL cannot be combined with D3DFVF_XYZRHW : - if ((dwFVF & D3DFVF_POSITION_MASK) != D3DFVF_XYZRHW) { - if (dwFVF & D3DFVF_NORMAL) { - Result += sizeof(FLOAT) * 3; - } - } - - if (dwFVF & D3DFVF_DIFFUSE) { - Result += sizeof(D3DCOLOR); - } - - if (dwFVF & D3DFVF_SPECULAR) { - Result += sizeof(D3DCOLOR); - } - - if (bIncludeTextures) { - int NrTextures = ((dwFVF & D3DFVF_TEXCOUNT_MASK) >> D3DFVF_TEXCOUNT_SHIFT); - while (NrTextures > 0) { - NrTextures--; - Result += DxbxFVF_GetNumberOfTextureCoordinates(dwFVF, NrTextures) * sizeof(FLOAT); - } - } - - return Result; -} - void EmuExecutePushBuffer ( xbox::X_D3DPushBuffer *pPushBuffer, @@ -162,11 +92,11 @@ void EmuExecutePushBuffer return; } -DWORD CxbxGetStrideFromVertexShaderHandle(DWORD dwVertexShader) +DWORD CxbxGetStrideFromVertexDeclaration(CxbxVertexDeclaration* pCxbxVertexDeclaration) { DWORD Stride = 0; - if (VshHandleIsVertexShader(dwVertexShader)) { + if (pCxbxVertexDeclaration) { // Test-case : Crash 'n' Burn [45530014] // Test-case : CrimsonSea [4B4F0002] // Test-case : Freedom Fighters @@ -178,24 +108,16 @@ DWORD CxbxGetStrideFromVertexShaderHandle(DWORD dwVertexShader) // Test-case : SpyHunter 2 [4D57001B] //LOG_TEST_CASE("Non-FVF Vertex Shaders not yet (completely) supported for PushBuffer emulation!"); - CxbxVertexShader *pCxbxVertexShader = GetCxbxVertexShader(dwVertexShader); - if (pCxbxVertexShader) { - if (pCxbxVertexShader->Declaration.NumberOfVertexStreams == 1) { - // Note : This assumes that the only stream in use will be stream zero : - Stride = pCxbxVertexShader->Declaration.VertexStreams[0].HostVertexStride; - } - else { - LOG_TEST_CASE("Non-FVF Vertex Shaders with multiple streams not supported for PushBuffer emulation!"); - } + if (pCxbxVertexDeclaration->NumberOfVertexStreams == 1) { + // Note : This assumes that the only stream in use will be stream zero : + Stride = pCxbxVertexDeclaration->VertexStreams[0].HostVertexStride; + } + else { + LOG_TEST_CASE("Non-FVF Vertex Shaders with multiple streams not supported for PushBuffer emulation!"); } } else { - if (VshHandleIsFVF(dwVertexShader)) { - Stride = DxbxFVFToVertexSizeInBytes(dwVertexShader, /*bIncludeTextures=*/true); - } - else { - LOG_TEST_CASE("Invalid Vertex Shader not supported for PushBuffer emulation!"); - } + LOG_TEST_CASE("Missing Vertex Declaration not supported for PushBuffer emulation!"); } return Stride; @@ -233,7 +155,7 @@ void HLE_draw_inline_array(NV2AState *d) } // render vertices else { - DWORD dwVertexStride = CxbxGetStrideFromVertexShaderHandle(g_Xbox_VertexShader_Handle); + DWORD dwVertexStride = CxbxGetStrideFromVertexDeclaration(CxbxGetVertexDeclaration()); if (dwVertexStride > 0) { UINT VertexCount = (pg->inline_array_length * sizeof(DWORD)) / dwVertexStride; CxbxDrawContext DrawContext = {}; @@ -252,16 +174,14 @@ void HLE_draw_inline_elements(NV2AState *d) { PGRAPHState *pg = &d->pgraph; - if (IsValidCurrentShader()) { - unsigned int uiIndexCount = pg->inline_elements_length; - CxbxDrawContext DrawContext = {}; + unsigned int uiIndexCount = pg->inline_elements_length; + CxbxDrawContext DrawContext = {}; DrawContext.XboxPrimitiveType = (xbox::X_D3DPRIMITIVETYPE)pg->primitive_mode; - DrawContext.dwVertexCount = uiIndexCount; - DrawContext.pXboxIndexData = d->pgraph.inline_elements; + DrawContext.dwVertexCount = uiIndexCount; + DrawContext.pXboxIndexData = d->pgraph.inline_elements; - CxbxDrawIndexed(DrawContext); - } + CxbxDrawIndexed(DrawContext); } DWORD ABGR_to_ARGB(const uint32_t color) @@ -357,32 +277,38 @@ uint32_t HLE_read_NV2A_pgraph_register(const int reg) return pg->regs[reg]; } -void HLE_write_NV2A_vertex_attribute_slot(unsigned slot, uint32_t parameter) -{ - // Write value to LLE NV2A device - pgraph_handle_method(g_NV2A->GetDeviceState(), - /*subchannel=*/0, - /*method=*/NV097_SET_VERTEX_DATA4UB + (4 * slot), - parameter); -} - -uint32_t HLE_read_NV2A_vertex_attribute_slot(unsigned slot) +float *HLE_get_NV2A_vertex_attribute_value_pointer(unsigned slot) { NV2AState* dev = g_NV2A->GetDeviceState(); PGRAPHState *pg = &(dev->pgraph); // See CASE_16(NV097_SET_VERTEX_DATA4UB, 4) in LLE pgraph_handle_method() VertexAttribute *vertex_attribute = &pg->vertex_attributes[slot]; - // Inverse of D3DDevice_SetVertexDataColor - uint8_t a = uint8_t(vertex_attribute->inline_value[0] * 255.0f); - uint8_t b = uint8_t(vertex_attribute->inline_value[1] * 255.0f); - uint8_t c = uint8_t(vertex_attribute->inline_value[2] * 255.0f); - uint8_t d = uint8_t(vertex_attribute->inline_value[3] * 255.0f); - uint32_t value = a + (b << 8) + (c << 16) + (d << 24); + return vertex_attribute->inline_value; +} + +uint32_t HLE_read_NV2A_vertex_program_slot(unsigned program_load, unsigned slot) +{ + NV2AState* dev = g_NV2A->GetDeviceState(); + PGRAPHState* pg = &(dev->pgraph); + + // See CASE_32(NV097_SET_TRANSFORM_PROGRAM, 4) in LLE pgraph_handle_method() + assert(program_load < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH); + uint32_t value = pg->program_data[program_load][slot % 4]; return value; } +float *HLE_get_NV2A_vertex_constant_float4_ptr(unsigned const_index) +{ + NV2AState* dev = g_NV2A->GetDeviceState(); + PGRAPHState* pg = &(dev->pgraph); + + // See CASE_32(NV097_SET_TRANSFORM_CONSTANT, 4) in LLE pgraph_handle_method() + assert(const_index < NV2A_VERTEXSHADER_CONSTANTS); + return (float*)&(pg->vsh_constants[const_index][0]); +} + // For now, skip the cache, but handle the pgraph method directly // Note : Here's where the method gets multiplied by four! // Note 2 : d is read from local scope, and ni is unused (same in LLE) @@ -439,8 +365,6 @@ extern void EmuExecutePushBufferRaw uint32_t uSizeInBytes ) { - HLE_init_pgraph_plugins(); // TODO : Move to more approriate spot - // Test-case : Azurik (see https://github.com/Cxbx-Reloaded/Cxbx-Reloaded/issues/360) // Test-case : Crash 'n' Burn [45530014] // Test-case : CrimsonSea [4B4F0002] diff --git a/src/core/hle/D3D8/XbPushBuffer.h b/src/core/hle/D3D8/XbPushBuffer.h index 870f66ca8..e76a83b68 100644 --- a/src/core/hle/D3D8/XbPushBuffer.h +++ b/src/core/hle/D3D8/XbPushBuffer.h @@ -27,9 +27,6 @@ #include "core/hle/D3D8/XbVertexBuffer.h" // for CxbxDrawContext -extern int DxbxFVF_GetNumberOfTextureCoordinates(DWORD dwFVF, int aTextureIndex); -extern UINT DxbxFVFToVertexSizeInBytes(DWORD dwFVF, BOOL bIncludeTextures); - extern void CxbxDrawIndexed(CxbxDrawContext &DrawContext); extern void CxbxDrawPrimitiveUP(CxbxDrawContext &DrawContext); diff --git a/src/core/hle/D3D8/XbVertexBuffer.cpp b/src/core/hle/D3D8/XbVertexBuffer.cpp index 147c1589d..69d282379 100644 --- a/src/core/hle/D3D8/XbVertexBuffer.cpp +++ b/src/core/hle/D3D8/XbVertexBuffer.cpp @@ -34,7 +34,7 @@ #include "core\hle\D3D8\Direct3D9\Direct3D9.h" // For g_pD3DDevice #include "core\hle\D3D8\Direct3D9\WalkIndexBuffer.h" // for WalkIndexBuffer #include "core\hle\D3D8\ResourceTracker.h" -#include "core\hle\D3D8\XbPushBuffer.h" // for DxbxFVF_GetNumberOfTextureCoordinates +#include "core\hle\D3D8\XbPushBuffer.h" // For CxbxDrawPrimitiveUP #include "core\hle\D3D8\XbVertexBuffer.h" #include "core\hle\D3D8\XbConvert.h" @@ -45,16 +45,15 @@ #define MAX_STREAM_NOT_USED_TIME (2 * CLOCKS_PER_SEC) // TODO: Trim the not used time // Inline vertex buffer emulation -extern xbox::X_D3DPRIMITIVETYPE g_InlineVertexBuffer_PrimitiveType = xbox::X_D3DPT_INVALID; -extern DWORD g_InlineVertexBuffer_FVF = 0; - std::vector<_D3DIVB> g_InlineVertexBuffer_Table; -extern UINT g_InlineVertexBuffer_TableLength = 0; -extern UINT g_InlineVertexBuffer_TableOffset = 0; - -FLOAT *g_InlineVertexBuffer_pData = nullptr; -UINT g_InlineVertexBuffer_DataSize = 0; - -extern DWORD g_dwPrimPerFrame = 0; +xbox::X_D3DPRIMITIVETYPE g_InlineVertexBuffer_PrimitiveType = xbox::X_D3DPT_INVALID; +uint32_t g_InlineVertexBuffer_WrittenRegisters = 0; // A bitmask, indicating which registers have been set in g_InlineVertexBuffer_Table +xbox::X_VERTEXATTRIBUTEFORMAT g_InlineVertexBuffer_AttributeFormat = {}; +bool g_InlineVertexBuffer_DeclarationOverride = false; +std::vector g_InlineVertexBuffer_Table; +UINT g_InlineVertexBuffer_TableLength = 0; +UINT g_InlineVertexBuffer_TableOffset = 0; +FLOAT *g_InlineVertexBuffer_pData = nullptr; +UINT g_InlineVertexBuffer_DataSize = 0; // Copy of active Xbox D3D Vertex Streams (and strides), set by [D3DDevice|CxbxImpl]_SetStreamSource* xbox::X_STREAMINPUT g_Xbox_SetStreamSource[X_VSH_MAX_STREAMS] = { 0 }; // Note : .Offset member is never set (so always 0) @@ -63,29 +62,12 @@ extern xbox::X_D3DSurface* g_pXbox_RenderTarget; extern xbox::X_D3DSurface* g_pXbox_BackBufferSurface; extern xbox::X_D3DMULTISAMPLE_TYPE g_Xbox_MultiSampleType; +extern float *HLE_get_NV2A_vertex_attribute_value_pointer(unsigned VertexSlot); // Declared in PushBuffer.cpp + void *GetDataFromXboxResource(xbox::X_D3DResource *pXboxResource); bool GetHostRenderTargetDimensions(DWORD* pHostWidth, DWORD* pHostHeight, IDirect3DSurface* pHostRenderTarget = nullptr); uint32_t GetPixelContainerWidth(xbox::X_D3DPixelContainer* pPixelContainer); uint32_t GetPixelContainerHeight(xbox::X_D3DPixelContainer* pPixelContainer); -void ApplyXboxMultiSampleOffsetAndScale(float& x, float& y); - -_D3DIVB::_D3DIVB() -{ - Position.x = 0.0f; - Position.y = 0.0f; - Position.z = 0.0f; - Rhw = 0.0f; - std::fill(std::begin(Blend), std::end(Blend), 0.0f); - Normal.x = 0.0f; - Normal.y = 0.0f; - Normal.z = 0.0f; - Diffuse = 0u; - Specular = 0u; - Fog = 0.0f; - BackDiffuse = 0u; - BackSpecular = 0u; - std::fill(std::begin(TexCoord), std::end(TexCoord), D3DXVECTOR4{ 0.0f , 0.0f, 0.0f, 0.0f }); -} struct _D3DIVB &_D3DIVB::operator=(const struct _D3DIVB &Val) { @@ -100,14 +82,16 @@ struct _D3DIVB &_D3DIVB::operator=(const struct _D3DIVB &Val) Diffuse = Val.Diffuse; Specular = Val.Specular; Fog = Val.Fog; + PointSize = Val.PointSize; BackDiffuse = Val.BackDiffuse; BackSpecular = Val.BackSpecular; std::copy(std::begin(Val.TexCoord), std::end(Val.TexCoord), std::begin(TexCoord)); + std::copy(std::begin(Val.Reg13Up), std::end(Val.Reg13Up), std::begin(Reg13Up)); return *this; } -void CxbxPatchedStream::Activate(CxbxDrawContext *pDrawContext, UINT uiStream) const +void CxbxPatchedStream::Activate(CxbxDrawContext *pDrawContext, UINT HostStreamNumber) const { //LOG_INIT // Allows use of DEBUG_D3DRESULT @@ -119,7 +103,7 @@ void CxbxPatchedStream::Activate(CxbxDrawContext *pDrawContext, UINT uiStream) c } else { HRESULT hRet = g_pD3DDevice->SetStreamSource( - uiStream, + HostStreamNumber, pCachedHostVertexBuffer, 0, // OffsetInBytes uiCachedHostVertexStride); @@ -136,7 +120,7 @@ CxbxPatchedStream::CxbxPatchedStream() isValid = false; } -CxbxPatchedStream::~CxbxPatchedStream() +void CxbxPatchedStream::Clear() { if (bCachedHostVertexStreamZeroDataIsAllocated) { free(pCachedHostVertexStreamZeroData); @@ -151,26 +135,30 @@ CxbxPatchedStream::~CxbxPatchedStream() } } +CxbxPatchedStream::~CxbxPatchedStream() +{ + Clear(); +} + CxbxVertexBufferConverter::CxbxVertexBufferConverter() { m_uiNbrStreams = 0; m_pCxbxVertexDeclaration = nullptr; } +// TODO: CountActiveD3DStreams must be removed once we can rely on CxbxGetVertexDeclaration always being set int CountActiveD3DStreams() { - int lastStreamIndex = 0; - for (int i = 0; i < X_VSH_MAX_STREAMS; i++) { - if (g_Xbox_SetStreamSource[i].VertexBuffer != xbox::zeroptr) { - lastStreamIndex = i + 1; + int StreamCount = 0; + for (int XboxStreamNumber = 0; XboxStreamNumber < X_VSH_MAX_STREAMS; XboxStreamNumber++) { + if (GetXboxVertexStreamInput(XboxStreamNumber).VertexBuffer != xbox::zeroptr) { + StreamCount++; } } - return lastStreamIndex; + return StreamCount; } -CxbxVertexDeclaration *GetCxbxVertexDeclaration(DWORD XboxVertexShaderHandle); // forward - UINT CxbxVertexBufferConverter::GetNbrStreams(CxbxDrawContext *pDrawContext) { // Draw..Up always have one stream @@ -178,20 +166,12 @@ UINT CxbxVertexBufferConverter::GetNbrStreams(CxbxDrawContext *pDrawContext) return 1; } - if(VshHandleIsVertexShader(g_Xbox_VertexShader_Handle)) { - CxbxVertexDeclaration *pDecl = GetCxbxVertexDeclaration(g_Xbox_VertexShader_Handle); - if (pDecl) { - if (pDecl->NumberOfVertexStreams <= X_VSH_MAX_STREAMS) { - return pDecl->NumberOfVertexStreams; - } - - // If we reached here, pDecl was set,but with invalid data - LOG_TEST_CASE("NumberOfVertexStreams > 16"); - } - - return CountActiveD3DStreams(); + CxbxVertexDeclaration *pDecl = CxbxGetVertexDeclaration(); + if (pDecl) { + return pDecl->NumberOfVertexStreams; } + // TODO: This code and CountActiveD3DStreams must be removed once we can rely on CxbxGetVertexDeclaration always being set if (g_Xbox_VertexShader_Handle) { return CountActiveD3DStreams(); } @@ -261,60 +241,24 @@ void CxbxVertexBufferConverter::ConvertStream ) { extern D3DCAPS g_D3DCaps; - - bool bVshHandleIsFVF = VshHandleIsFVF(g_Xbox_VertexShader_Handle); - DWORD XboxFVF = bVshHandleIsFVF ? g_Xbox_VertexShader_Handle : 0; - // Texture normalization can only be set for FVF shaders - bool bNeedTextureNormalization = false; - struct { int NrTexCoords; bool bTexIsLinear; int Width; int Height; int Depth; } pActivePixelContainer[xbox::X_D3DTS_STAGECOUNT] = { 0 }; - - if (bVshHandleIsFVF) { - DWORD dwTexN = (XboxFVF & D3DFVF_TEXCOUNT_MASK) >> D3DFVF_TEXCOUNT_SHIFT; - if (dwTexN > xbox::X_D3DTS_STAGECOUNT) { - LOG_TEST_CASE("FVF,dwTexN > X_D3DTS_STAGECOUNT"); - } - - // Check for active linear textures. //X_D3DBaseTexture *pLinearBaseTexture[xbox::X_D3DTS_STAGECOUNT]; - for (unsigned int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { - // Only normalize coordinates used by the FVF shader : - if (i + 1 <= dwTexN) { - pActivePixelContainer[i].NrTexCoords = DxbxFVF_GetNumberOfTextureCoordinates(XboxFVF, i); - // TODO : Use GetXboxBaseTexture() - xbox::X_D3DBaseTexture *pXboxBaseTexture = g_pXbox_SetTexture[i]; - if (pXboxBaseTexture != xbox::zeroptr) { - extern xbox::X_D3DFORMAT GetXboxPixelContainerFormat(const xbox::X_D3DPixelContainer *pXboxPixelContainer); // TODO : Move to XTL-independent header file - - xbox::X_D3DFORMAT XboxFormat = GetXboxPixelContainerFormat(pXboxBaseTexture); - if (EmuXBFormatIsLinear(XboxFormat)) { - // This is often hit by the help screen in XDK samples. - bNeedTextureNormalization = true; - // Remember linearity, width and height : - pActivePixelContainer[i].bTexIsLinear = true; - // TODO : Use DecodeD3DSize or GetPixelContainerWidth + GetPixelContainerHeight - pActivePixelContainer[i].Width = (pXboxBaseTexture->Size & X_D3DSIZE_WIDTH_MASK) + 1; - pActivePixelContainer[i].Height = ((pXboxBaseTexture->Size & X_D3DSIZE_HEIGHT_MASK) >> X_D3DSIZE_HEIGHT_SHIFT) + 1; - // TODO : Support 3D textures - } - } - } - } - } CxbxVertexShaderStreamInfo *pVertexShaderStreamInfo = nullptr; + UINT XboxStreamNumber = uiStream; if (m_pCxbxVertexDeclaration != nullptr) { - if (uiStream > m_pCxbxVertexDeclaration->NumberOfVertexStreams + 1) { + if (uiStream > m_pCxbxVertexDeclaration->NumberOfVertexStreams) { LOG_TEST_CASE("uiStream > NumberOfVertexStreams"); return; } pVertexShaderStreamInfo = &(m_pCxbxVertexDeclaration->VertexStreams[uiStream]); + XboxStreamNumber = pVertexShaderStreamInfo->XboxStreamIndex; } bool bNeedVertexPatching = (pVertexShaderStreamInfo != nullptr && pVertexShaderStreamInfo->NeedPatch); - bool bNeedRHWReset = bVshHandleIsFVF && ((XboxFVF & D3DFVF_POSITION_MASK) == D3DFVF_XYZRHW); - bool bNeedStreamCopy = bNeedTextureNormalization || bNeedVertexPatching || bNeedRHWReset; + bool bNeedStreamCopy = bNeedVertexPatching; + UINT HostStreamNumber = XboxStreamNumber; // Use Xbox stream index on host uint8_t *pXboxVertexData = xbox::zeroptr; UINT uiXboxVertexStride = 0; UINT uiVertexCount = 0; @@ -325,7 +269,7 @@ void CxbxVertexBufferConverter::ConvertStream if (pDrawContext->pXboxVertexStreamZeroData != xbox::zeroptr) { // There should only be one stream (stream zero) in this case - if (uiStream != 0) { + if (XboxStreamNumber != 0) { CxbxKrnlCleanup("Trying to patch a Draw..UP with more than stream zero!"); } @@ -335,23 +279,24 @@ void CxbxVertexBufferConverter::ConvertStream uiHostVertexStride = (bNeedVertexPatching) ? pVertexShaderStreamInfo->HostVertexStride : uiXboxVertexStride; dwHostVertexDataSize = uiVertexCount * uiHostVertexStride; } else { - xbox::X_D3DVertexBuffer *pXboxVertexBuffer = g_Xbox_SetStreamSource[uiStream].VertexBuffer; + xbox::X_STREAMINPUT& XboxStreamInput = GetXboxVertexStreamInput(XboxStreamNumber); + xbox::X_D3DVertexBuffer *pXboxVertexBuffer = XboxStreamInput.VertexBuffer; pXboxVertexData = (uint8_t*)GetDataFromXboxResource(pXboxVertexBuffer); if (pXboxVertexData == xbox::zeroptr) { HRESULT hRet = g_pD3DDevice->SetStreamSource( - uiStream, + HostStreamNumber, nullptr, 0, // OffsetInBytes 0); // DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetStreamSource"); if (FAILED(hRet)) { - EmuLog(LOG_LEVEL::WARNING, "g_pD3DDevice->SetStreamSource(uiStream, nullptr, 0)"); + EmuLog(LOG_LEVEL::WARNING, "g_pD3DDevice->SetStreamSource(HostStreamNumber, nullptr, 0)"); } return; } - uiXboxVertexStride = g_Xbox_SetStreamSource[uiStream].Stride; + uiXboxVertexStride = XboxStreamInput.Stride; // Set a new (exact) vertex count uiVertexCount = pDrawContext->VerticesInBuffer; // Dxbx note : Don't overwrite pDrawContext.dwVertexCount with uiVertexCount, because an indexed draw @@ -376,7 +321,7 @@ void CxbxVertexBufferConverter::ConvertStream stream.uiCachedHostVertexStride = uiHostVertexStride; stream.bCacheIsStreamZeroDrawUP = true; stream.pCachedHostVertexStreamZeroData = pHostVertexData; - stream.Activate(pDrawContext, uiStream); + stream.Activate(pDrawContext, HostStreamNumber); return; } @@ -400,26 +345,17 @@ void CxbxVertexBufferConverter::ConvertStream patchedStream.uiCachedXboxVertexStride == uiXboxVertexStride && // Make sure the Xbox Stride didn't change patchedStream.uiCachedXboxVertexDataSize == xboxVertexDataSize ) { // Make sure the Xbox Data Size also didn't change m_TotalCacheHits++; - patchedStream.Activate(pDrawContext, uiStream); + patchedStream.Activate(pDrawContext, HostStreamNumber); return; } m_TotalCacheMisses++; // If execution reaches here, the cached vertex buffer was not valid and we must reconvert the data - if (patchedStream.isValid) { - pHostVertexData = (uint8_t*)patchedStream.pCachedHostVertexStreamZeroData; - pNewHostVertexBuffer = patchedStream.pCachedHostVertexBuffer; - - // Free the existing buffers - if (pHostVertexData != nullptr) { - free(pHostVertexData); - pHostVertexData = nullptr; - } else if (pNewHostVertexBuffer != nullptr) { - pNewHostVertexBuffer->Release(); - pNewHostVertexBuffer = nullptr; - } - } + // Free the existing buffers + patchedStream.Clear(); + assert(pHostVertexData == nullptr); + assert(pNewHostVertexBuffer == nullptr); // If dwHostVertexDataSize is zero, the allocation/creation will fail // This can be caused by a stride of 0, and 'other' invalid configurations @@ -478,9 +414,7 @@ void CxbxVertexBufferConverter::ConvertStream // Make it SHORT2N pHostVertexAsShort[0] = pXboxVertexAsShort[0]; pHostVertexAsShort[1] = 0; - } - else - { + } else { // Make it FLOAT1 pHostVertexAsFloat[0] = NormShortToFloat(pXboxVertexAsShort[0]); //pHostVertexAsFloat[1] = 0.0f; // Would be needed for FLOAT2 @@ -496,9 +430,7 @@ void CxbxVertexBufferConverter::ConvertStream // Make it SHORT2N pHostVertexAsShort[0] = pXboxVertexAsShort[0]; pHostVertexAsShort[1] = pXboxVertexAsShort[1]; - } - else - { + } else { // Make it FLOAT2 pHostVertexAsFloat[0] = NormShortToFloat(pXboxVertexAsShort[0]); pHostVertexAsFloat[1] = NormShortToFloat(pXboxVertexAsShort[1]); @@ -513,9 +445,7 @@ void CxbxVertexBufferConverter::ConvertStream pHostVertexAsShort[1] = pXboxVertexAsShort[1]; pHostVertexAsShort[2] = pXboxVertexAsShort[2]; pHostVertexAsShort[3] = 32767; // TODO : verify - } - else - { + } else { // Make it FLOAT3 pHostVertexAsFloat[0] = NormShortToFloat(pXboxVertexAsShort[0]); pHostVertexAsFloat[1] = NormShortToFloat(pXboxVertexAsShort[1]); @@ -534,9 +464,7 @@ void CxbxVertexBufferConverter::ConvertStream pHostVertexAsShort[1] = pXboxVertexAsShort[1]; pHostVertexAsShort[2] = pXboxVertexAsShort[2]; pHostVertexAsShort[3] = pXboxVertexAsShort[3]; - } - else - { + } else { // Make it FLOAT4 pHostVertexAsFloat[0] = NormShortToFloat(pXboxVertexAsShort[0]); pHostVertexAsFloat[1] = NormShortToFloat(pXboxVertexAsShort[1]); @@ -586,9 +514,7 @@ void CxbxVertexBufferConverter::ConvertStream pHostVertexAsByte[1] = 0; pHostVertexAsByte[2] = 0; pHostVertexAsByte[3] = 255; // TODO : Verify - } - else - { + } else { // Make it FLOAT1 pHostVertexAsFloat[0] = ByteToFloat(pXboxVertexAsByte[0]); } @@ -601,9 +527,7 @@ void CxbxVertexBufferConverter::ConvertStream pHostVertexAsByte[1] = pXboxVertexAsByte[1]; pHostVertexAsByte[2] = 0; pHostVertexAsByte[3] = 255; // TODO : Verify - } - else - { + } else { // Make it FLOAT2 pHostVertexAsFloat[0] = ByteToFloat(pXboxVertexAsByte[0]); pHostVertexAsFloat[1] = ByteToFloat(pXboxVertexAsByte[1]); @@ -618,9 +542,7 @@ void CxbxVertexBufferConverter::ConvertStream pHostVertexAsByte[1] = pXboxVertexAsByte[1]; pHostVertexAsByte[2] = pXboxVertexAsByte[2]; pHostVertexAsByte[3] = 255; // TODO : Verify - } - else - { + } else { // Make it FLOAT3 pHostVertexAsFloat[0] = ByteToFloat(pXboxVertexAsByte[0]); pHostVertexAsFloat[1] = ByteToFloat(pXboxVertexAsByte[1]); @@ -639,9 +561,7 @@ void CxbxVertexBufferConverter::ConvertStream pHostVertexAsByte[1] = pXboxVertexAsByte[1]; pHostVertexAsByte[2] = pXboxVertexAsByte[2]; pHostVertexAsByte[3] = pXboxVertexAsByte[3]; - } - else - { + } else { // Make it FLOAT4 pHostVertexAsFloat[0] = ByteToFloat(pXboxVertexAsByte[0]); pHostVertexAsFloat[1] = ByteToFloat(pXboxVertexAsByte[1]); @@ -685,105 +605,6 @@ void CxbxVertexBufferConverter::ConvertStream } } - // Xbox FVF shaders are identical to host Direct3D 8.1, however - // texture coordinates may need normalization if used with linear textures. - if (bNeedTextureNormalization || bNeedRHWReset) { - // assert(bVshHandleIsFVF); - - UINT uiTextureCoordinatesByteOffsetInVertex = 0; - - // Locate texture coordinate offset in vertex structure. - if (bNeedTextureNormalization) { - uiTextureCoordinatesByteOffsetInVertex = DxbxFVFToVertexSizeInBytes(XboxFVF, /*bIncludeTextures=*/false); - if (bNeedVertexPatching) { - LOG_TEST_CASE("Potential xbox vs host texture-offset difference! (bNeedVertexPatching within bNeedTextureNormalization)"); - } - // As long as vertices aren't resized / patched up until the texture coordinates, - // the uiTextureCoordinatesByteOffsetInVertex on host will match Xbox - } - - // If for some reason the Xbox Render Target is not set, fallback to the backbuffer - if (g_pXbox_RenderTarget == xbox::zeroptr) { - LOG_TEST_CASE("SetRenderTarget fallback to backbuffer"); - g_pXbox_RenderTarget = g_pXbox_BackBufferSurface; - } - - DWORD HostRenderTarget_Width, HostRenderTarget_Height; - DWORD XboxRenderTarget_Width = GetPixelContainerWidth(g_pXbox_RenderTarget); - DWORD XboxRenderTarget_Height = GetPixelContainerHeight(g_pXbox_RenderTarget); - if (!GetHostRenderTargetDimensions(&HostRenderTarget_Width, &HostRenderTarget_Height)) { - HostRenderTarget_Width = XboxRenderTarget_Width; - HostRenderTarget_Height = XboxRenderTarget_Height; - } - - bool bNeedRHWTransform = (g_Xbox_MultiSampleType > xbox::X_D3DMULTISAMPLE_NONE) || (XboxRenderTarget_Width < HostRenderTarget_Width && XboxRenderTarget_Height < HostRenderTarget_Height); - - for (uint32_t uiVertex = 0; uiVertex < uiVertexCount; uiVertex++) { - FLOAT *pVertexDataAsFloat = (FLOAT*)(&pHostVertexData[uiVertex * uiHostVertexStride]); - - // Handle pre-transformed vertices (which bypass the vertex shader pipeline) - if (bNeedRHWReset) { - // We need to transform these vertices only if the host render target was upscaled from the Xbox render target - // Transforming always breaks render to non-upscaled textures: Only surfaces are upscaled, intentionally so - if (bNeedRHWTransform) { - pVertexDataAsFloat[0] *= g_RenderScaleFactor; - pVertexDataAsFloat[1] *= g_RenderScaleFactor; - - ApplyXboxMultiSampleOffsetAndScale(pVertexDataAsFloat[0], pVertexDataAsFloat[1]); - } - -#if 0 - // Check Z. TODO : Why reset Z from 0.0 to 1.0 ? (Maybe fog-related?) - if (pVertexDataAsFloat[2] == 0.0f) { - // LOG_TEST_CASE("D3DFVF_XYZRHW (Z)"); // Test-case : Many XDK Samples (AlphaFog, PointSprites) - pVertexDataAsFloat[2] = 1.0f; - } -#endif -#if 1 - // Check RHW. TODO : Why reset from 0.0 to 1.0 ? (Maybe 1.0 indicates that the vertices are not to be transformed) - if (pVertexDataAsFloat[3] == 0.0f) { - // LOG_TEST_CASE("D3DFVF_XYZRHW (RHW)"); // Test-case : Many XDK Samples (AlphaFog, PointSprites) - pVertexDataAsFloat[3] = 1.0f; - } -#endif - } - - // Normalize texture coordinates in FVF stream if needed - if (uiTextureCoordinatesByteOffsetInVertex > 0) { // implies bNeedTextureNormalization (using one is more efficient than both) - FLOAT *pVertexUVData = (FLOAT*)((uintptr_t)pVertexDataAsFloat + uiTextureCoordinatesByteOffsetInVertex); - for (unsigned int i = 0; i < xbox::X_D3DTS_STAGECOUNT; i++) { - if (pActivePixelContainer[i].bTexIsLinear) { - switch (pActivePixelContainer[i].NrTexCoords) { - case 0: - LOG_TEST_CASE("Normalize 0D?"); - break; - case 1: - LOG_TEST_CASE("Normalize 1D"); - pVertexUVData[0] /= pActivePixelContainer[i].Width; - break; - case 2: - pVertexUVData[0] /= pActivePixelContainer[i].Width; - pVertexUVData[1] /= pActivePixelContainer[i].Height; - break; - case 3: - LOG_TEST_CASE("Normalize 3D"); - // Test case : HeatShimmer - pVertexUVData[0] /= pActivePixelContainer[i].Width; - pVertexUVData[1] /= pActivePixelContainer[i].Height; - pVertexUVData[2] /= pActivePixelContainer[i].Depth; - break; - default: - LOG_TEST_CASE("Normalize ?D"); - break; - } - } - - pVertexUVData += pActivePixelContainer[i].NrTexCoords; - } - } - } - } - patchedStream.isValid = true; patchedStream.XboxPrimitiveType = pDrawContext->XboxPrimitiveType; patchedStream.pCachedXboxVertexData = pXboxVertexData; @@ -802,7 +623,7 @@ void CxbxVertexBufferConverter::ConvertStream patchedStream.pCachedHostVertexBuffer = pNewHostVertexBuffer; } - patchedStream.Activate(pDrawContext, uiStream); + patchedStream.Activate(pDrawContext, HostStreamNumber); } void CxbxVertexBufferConverter::Apply(CxbxDrawContext *pDrawContext) @@ -810,10 +631,7 @@ void CxbxVertexBufferConverter::Apply(CxbxDrawContext *pDrawContext) if ((pDrawContext->XboxPrimitiveType < xbox::X_D3DPT_POINTLIST) || (pDrawContext->XboxPrimitiveType > xbox::X_D3DPT_POLYGON)) CxbxKrnlCleanup("Unknown primitive type: 0x%.02X\n", pDrawContext->XboxPrimitiveType); - m_pCxbxVertexDeclaration = nullptr; - if (VshHandleIsVertexShader(g_Xbox_VertexShader_Handle)) { - m_pCxbxVertexDeclaration = &(GetCxbxVertexShader(g_Xbox_VertexShader_Handle)->Declaration); - } + m_pCxbxVertexDeclaration = CxbxGetVertexDeclaration(); // If we are drawing from an offset, we know that the vertex count must have // 'offset' vertices before the first drawn vertices @@ -841,8 +659,8 @@ void CxbxVertexBufferConverter::Apply(CxbxDrawContext *pDrawContext) m_uiNbrStreams = X_VSH_MAX_STREAMS; } - for(UINT uiStream = 0; uiStream < m_uiNbrStreams; uiStream++) { - ConvertStream(pDrawContext, uiStream); + for(UINT i = 0; i < m_uiNbrStreams; i++) { + ConvertStream(pDrawContext, i); } if (pDrawContext->XboxPrimitiveType == xbox::X_D3DPT_QUADSTRIP) { @@ -869,48 +687,128 @@ void CxbxVertexBufferConverter::Apply(CxbxDrawContext *pDrawContext) } } -void EmuFlushIVB() +void CxbxSetVertexAttribute(int Register, FLOAT a, FLOAT b, FLOAT c, FLOAT d) { - CxbxUpdateNativeD3DResources(); - - // Parse IVB table with current FVF shader if possible. - bool bFVF = VshHandleIsFVF(g_Xbox_VertexShader_Handle); - DWORD dwCurFVF = (bFVF) ? g_Xbox_VertexShader_Handle : g_InlineVertexBuffer_FVF; - - EmuLog(LOG_LEVEL::DEBUG, "g_InlineVertexBuffer_TableOffset := %d", g_InlineVertexBuffer_TableOffset); - - // Check the given FVF - switch (dwCurFVF & D3DFVF_POSITION_MASK) { - case 0: // No position ? - if (bFVF) { - EmuLog(LOG_LEVEL::WARNING, "EmuFlushIVB(): g_Xbox_VertexShader_Handle isn't a valid FVF - using D3DFVF_XYZRHW instead!"); - dwCurFVF |= D3DFVF_XYZRHW; - } - else { - EmuLog(LOG_LEVEL::WARNING, "EmuFlushIVB(): using g_InlineVertexBuffer_FVF instead of current FVF!"); - dwCurFVF = g_InlineVertexBuffer_FVF; - } - break; - case D3DFVF_XYZRHW: - // D3DFVF_NORMAL isn't allowed in combination with D3DFVF_XYZRHW - if (dwCurFVF & D3DFVF_NORMAL) { - EmuLog(LOG_LEVEL::WARNING, "EmuFlushIVB(): Normal encountered while D3DFVF_XYZRHW is given - switching back to D3DFVF_XYZ!"); - dwCurFVF &= ~D3DFVF_POSITION_MASK; - dwCurFVF |= D3DFVF_XYZ; - } - break; + if (Register < 0) { + LOG_TEST_CASE("Register < 0"); + return; + } + if (Register >= 16) { + LOG_TEST_CASE("Register >= 16"); + return; } - DWORD dwPos = dwCurFVF & D3DFVF_POSITION_MASK; - DWORD dwTexN = (dwCurFVF & D3DFVF_TEXCOUNT_MASK) >> D3DFVF_TEXCOUNT_SHIFT; - size_t TexSize[xbox::X_D3DTS_STAGECOUNT]; // Xbox supports up to 4 textures + // Write these values to the NV2A registers, so that we read them back when needed + float* attribute_floats = HLE_get_NV2A_vertex_attribute_value_pointer(Register); + attribute_floats[0] = a; + attribute_floats[1] = b; + attribute_floats[2] = c; + attribute_floats[3] = d; - for (unsigned int i = 0; i < dwTexN; i++) { - TexSize[i] = DxbxFVF_GetNumberOfTextureCoordinates(dwCurFVF, i); + // Also, write the given register value to a matching host vertex shader constant + // This allows us to implement Xbox functionality where SetVertexData4f can be used to specify attributes + // not present in the vertex declaration. + // We use range 193 and up to store these values, as Xbox shaders stop at c192! + g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VREGDEFAULTS_BASE + Register, attribute_floats, 1); +} + +DWORD Float4ToDWORD(float* floats) +{ + // Inverse of D3DDevice_SetVertexDataColor + uint8_t a = uint8_t(floats[0] * 255.0f); + uint8_t b = uint8_t(floats[1] * 255.0f); + uint8_t c = uint8_t(floats[2] * 255.0f); + uint8_t d = uint8_t(floats[3] * 255.0f); + uint32_t value = a + (b << 8) + (c << 16) + (d << 24); + return value; +} + +void CxbxImpl_Begin(xbox::X_D3DPRIMITIVETYPE PrimitiveType) +{ + g_InlineVertexBuffer_PrimitiveType = PrimitiveType; + g_InlineVertexBuffer_TableOffset = 0; + g_InlineVertexBuffer_WrittenRegisters = 0; +} + +void CxbxImpl_End() +{ + using namespace xbox; + +#ifndef FIELD_OFFSET +#define FIELD_OFFSET(type, field) ((LONG)(LONG_PTR)&(((type *)0)->field)) +#endif + static const LONG OffsetPerRegister[X_VSH_MAX_ATTRIBUTES] = { + /*X_D3DVSDE_POSITION = 0:*/FIELD_OFFSET(D3DIVB, Position), + /*X_D3DVSDE_BLENDWEIGHT = 1:*/FIELD_OFFSET(D3DIVB, Blend), + /*X_D3DVSDE_NORMAL = 2:*/FIELD_OFFSET(D3DIVB, Normal), + /*X_D3DVSDE_DIFFUSE = 3:*/FIELD_OFFSET(D3DIVB, Diffuse), + /*X_D3DVSDE_SPECULAR = 4:*/FIELD_OFFSET(D3DIVB, Specular), + /*X_D3DVSDE_FOG = 5:*/FIELD_OFFSET(D3DIVB, Fog), + /*X_D3DVSDE_POINTSIZE = 6:*/FIELD_OFFSET(D3DIVB, PointSize), + /*X_D3DVSDE_BACKDIFFUSE = 7:*/FIELD_OFFSET(D3DIVB, BackDiffuse), + /*X_D3DVSDE_BACKSPECULAR = 8:*/FIELD_OFFSET(D3DIVB, BackSpecular), + /*X_D3DVSDE_TEXCOORD0 = 9:*/FIELD_OFFSET(D3DIVB, TexCoord[0]), + /*X_D3DVSDE_TEXCOORD1 = 10:*/FIELD_OFFSET(D3DIVB, TexCoord[1]), + /*X_D3DVSDE_TEXCOORD2 = 11:*/FIELD_OFFSET(D3DIVB, TexCoord[2]), + /*X_D3DVSDE_TEXCOORD3 = 12:*/FIELD_OFFSET(D3DIVB, TexCoord[3]), + /* 13:*/FIELD_OFFSET(D3DIVB, Reg13Up[0]), + /* 14:*/FIELD_OFFSET(D3DIVB, Reg13Up[1]), + /* 15:*/FIELD_OFFSET(D3DIVB, Reg13Up[2]) + }; + static const LONG FormatPerRegister[X_VSH_MAX_ATTRIBUTES] = { + /*X_D3DVSDE_POSITION = 0:*/X_D3DVSDT_FLOAT4, + /*X_D3DVSDE_BLENDWEIGHT = 1:*/X_D3DVSDT_FLOAT4, + /*X_D3DVSDE_NORMAL = 2:*/X_D3DVSDT_FLOAT3, + /*X_D3DVSDE_DIFFUSE = 3:*/X_D3DVSDT_D3DCOLOR, + /*X_D3DVSDE_SPECULAR = 4:*/X_D3DVSDT_D3DCOLOR, + /*X_D3DVSDE_FOG = 5:*/X_D3DVSDT_FLOAT1, + /*X_D3DVSDE_POINTSIZE = 6:*/X_D3DVSDT_FLOAT1, + /*X_D3DVSDE_BACKDIFFUSE = 7:*/X_D3DVSDT_D3DCOLOR, + /*X_D3DVSDE_BACKSPECULAR = 8:*/X_D3DVSDT_D3DCOLOR, + /*X_D3DVSDE_TEXCOORD0 = 9:*/X_D3DVSDT_FLOAT4, + /*X_D3DVSDE_TEXCOORD1 = 10:*/X_D3DVSDT_FLOAT4, + /*X_D3DVSDE_TEXCOORD2 = 11:*/X_D3DVSDT_FLOAT4, + /*X_D3DVSDE_TEXCOORD3 = 12:*/X_D3DVSDT_FLOAT4, + /* 13:*/X_D3DVSDT_FLOAT4, + /* 14:*/X_D3DVSDT_FLOAT4, + /* 15:*/X_D3DVSDT_FLOAT4 + }; + static const DWORD SizePerRegister[X_VSH_MAX_ATTRIBUTES] = { + /*X_D3DVSDE_POSITION = 0:*/sizeof(float) * 4, + /*X_D3DVSDE_BLENDWEIGHT = 1:*/sizeof(float) * 4, + /*X_D3DVSDE_NORMAL = 2:*/sizeof(float) * 3, + /*X_D3DVSDE_DIFFUSE = 3:*/sizeof(DWORD), + /*X_D3DVSDE_SPECULAR = 4:*/sizeof(DWORD), + /*X_D3DVSDE_FOG = 5:*/sizeof(float) * 1, + /*X_D3DVSDE_POINTSIZE = 6:*/sizeof(float) * 1, + /*X_D3DVSDE_BACKDIFFUSE = 7:*/sizeof(DWORD), + /*X_D3DVSDE_BACKSPECULAR = 8:*/sizeof(DWORD), + /*X_D3DVSDE_TEXCOORD0 = 9:*/sizeof(float) * 4, + /*X_D3DVSDE_TEXCOORD1 = 10:*/sizeof(float) * 4, + /*X_D3DVSDE_TEXCOORD2 = 11:*/sizeof(float) * 4, + /*X_D3DVSDE_TEXCOORD3 = 12:*/sizeof(float) * 4, + /* 13:*/sizeof(float) * 4, + /* 14:*/sizeof(float) * 4, + /* 15:*/sizeof(float) * 4 + }; + + if (g_InlineVertexBuffer_TableOffset <= 0) { + return; + } + + // Compose an Xbox vertex attribute format according to the registers that have been written to : + UINT uiStride = 0; + g_InlineVertexBuffer_AttributeFormat = {}; + for (int reg = 0; reg < X_VSH_MAX_ATTRIBUTES; reg++) { + if (g_InlineVertexBuffer_WrittenRegisters & (1 << reg)) { + g_InlineVertexBuffer_AttributeFormat.Slots[reg].Format = FormatPerRegister[reg]; + g_InlineVertexBuffer_AttributeFormat.Slots[reg].Offset = uiStride; + uiStride += SizePerRegister[reg]; + } else { + g_InlineVertexBuffer_AttributeFormat.Slots[reg].Format = X_D3DVSDT_NONE; + } } - // Use a tooling function to determine the vertex stride : - UINT uiStride = DxbxFVFToVertexSizeInBytes(dwCurFVF, /*bIncludeTextures=*/true); // Make sure the output buffer is big enough UINT NeededSize = g_InlineVertexBuffer_TableOffset * uiStride; if (g_InlineVertexBuffer_DataSize < NeededSize) { @@ -922,102 +820,37 @@ void EmuFlushIVB() g_InlineVertexBuffer_pData = (FLOAT*)malloc(g_InlineVertexBuffer_DataSize); } - FLOAT *pVertexBufferData = g_InlineVertexBuffer_pData; - for(unsigned int v=0;v= 2) { - *pVertexBufferData++ = g_InlineVertexBuffer_Table[v].TexCoord[i].y; - if (TexSize[i] >= 3) { - *pVertexBufferData++ = g_InlineVertexBuffer_Table[v].TexCoord[i].z; - if (TexSize[i] >= 4) { - *pVertexBufferData++ = g_InlineVertexBuffer_Table[v].TexCoord[i].w; - } - } - } - - if (g_bPrintfOn) { - switch (TexSize[i]) { - case 1: EmuLog(LOG_LEVEL::DEBUG, "IVB TexCoord%d := {%f}", i + 1, g_InlineVertexBuffer_Table[v].TexCoord[i].x); break; - case 2: EmuLog(LOG_LEVEL::DEBUG, "IVB TexCoord%d := {%f, %f}", i + 1, g_InlineVertexBuffer_Table[v].TexCoord[i].x, g_InlineVertexBuffer_Table[v].TexCoord[i].y); break; - case 3: EmuLog(LOG_LEVEL::DEBUG, "IVB TexCoord%d := {%f, %f, %f}", i + 1, g_InlineVertexBuffer_Table[v].TexCoord[i].x, g_InlineVertexBuffer_Table[v].TexCoord[i].y, g_InlineVertexBuffer_Table[v].TexCoord[i].z); break; - case 4: EmuLog(LOG_LEVEL::DEBUG, "IVB TexCoord%d := {%f, %f, %f, %f}", i + 1, g_InlineVertexBuffer_Table[v].TexCoord[i].x, g_InlineVertexBuffer_Table[v].TexCoord[i].y, g_InlineVertexBuffer_Table[v].TexCoord[i].z, g_InlineVertexBuffer_Table[v].TexCoord[i].w); break; - } - } - } - - if (v == 0) { - unsigned int VertexBufferUsage = (uintptr_t)pVertexBufferData - (uintptr_t)g_InlineVertexBuffer_pData; - if (VertexBufferUsage != uiStride) { - CxbxKrnlCleanup("EmuFlushIVB uses wrong stride!"); + // For each register that ever got written, copy the data over from + // g_InlineVertexBuffer_Table to pVertexBufferData, but adjacent + // to eachother. This, so that host accepts this as a vertex declaration. + // Note, that if we figure out how to draw using vertex buffers that + // contain gaps, the following copy is no longer needed, and we could + // draw straight from g_InlineVertexBuffer_Table instead! + uint8_t* pVertexBufferData = (uint8_t*)g_InlineVertexBuffer_pData; + for (unsigned int v = 0; v < g_InlineVertexBuffer_TableOffset; v++) { + auto vertex_ptr = (uint8_t*)&(g_InlineVertexBuffer_Table[v]); + for (unsigned int reg = 0; reg < X_VSH_MAX_ATTRIBUTES; reg++) { + if (g_InlineVertexBuffer_WrittenRegisters & (1 << reg)) { + auto source = vertex_ptr + OffsetPerRegister[reg]; + auto size = SizePerRegister[reg]; + // Note, that g_InlineVertexBuffer_AttributeFormat is declared with + // data types (in g_InlineVertexBuffer_Table ) that are host-compatible, + // so we can do a straight copy here, there's no conversion needed : + memcpy(pVertexBufferData, source, size); + pVertexBufferData += size; } } } + // Arrange for g_InlineVertexBuffer_AttributeFormat to be returned in CxbxGetVertexDeclaration, + // so that our above composed declaration will be used for the next draw : + g_InlineVertexBuffer_DeclarationOverride = true; + // Note, that g_Xbox_VertexShader_IsFixedFunction should be left untouched, + // because except for the declaration override, the Xbox shader (either FVF + // or a program, or even passthrough shaders) should still be in effect! + + CxbxUpdateNativeD3DResources(); + CxbxDrawContext DrawContext = {}; DrawContext.XboxPrimitiveType = g_InlineVertexBuffer_PrimitiveType; @@ -1025,20 +858,178 @@ void EmuFlushIVB() DrawContext.pXboxVertexStreamZeroData = g_InlineVertexBuffer_pData; DrawContext.uiXboxVertexStreamZeroStride = uiStride; - HRESULT hRet; - - if (bFVF) { - g_pD3DDevice->SetVertexShader(nullptr); - hRet = g_pD3DDevice->SetFVF(dwCurFVF); - //DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexShader"); - } - CxbxDrawPrimitiveUP(DrawContext); - if (bFVF) { - hRet = g_pD3DDevice->SetFVF(g_Xbox_VertexShader_Handle); - //DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexShader"); + + // Now that we've drawn, stop our override in CxbxGetVertexDeclaration : + g_InlineVertexBuffer_DeclarationOverride = false; + + // TODO: Should technically clean this up at some point..but on XP doesnt matter much + // g_VMManager.Deallocate((VAddr)g_InlineVertexBuffer_pData); + // g_VMManager.Deallocate((VAddr)g_InlineVertexBuffer_Table); +} + +void CxbxImpl_SetVertexData4f(int Register, FLOAT a, FLOAT b, FLOAT c, FLOAT d) +{ + using namespace xbox; + + HRESULT hRet = D3D_OK; + + // Always update our attribute storage with the most recently set register value + CxbxSetVertexAttribute(Register, a, b, c, d); + + // Grow g_InlineVertexBuffer_Table to contain at least current, and a potentially next vertex + if (g_InlineVertexBuffer_TableLength <= g_InlineVertexBuffer_TableOffset + 1) { + UINT InlineVertexBuffer_TableLength_Original = g_InlineVertexBuffer_TableLength; + if (g_InlineVertexBuffer_TableLength == 0) { + g_InlineVertexBuffer_TableLength = PAGE_SIZE / sizeof(D3DIVB); + } else { + g_InlineVertexBuffer_TableLength *= 2; + } + + for (unsigned i = 0; i < (g_InlineVertexBuffer_TableLength - InlineVertexBuffer_TableLength_Original); ++i) { + g_InlineVertexBuffer_Table.emplace_back(); + } + + EmuLog(LOG_LEVEL::DEBUG, "Expanded g_InlineVertexBuffer_Table to %u entries", g_InlineVertexBuffer_TableLength); + + // Sanity check: ensure that g_InlineVertexBuffer_Table is not growing indefinetly. This can happen if D3DDevice_Begin and D3DDevice_End + // are not patched, since they both reset g_InlineVertexBuffer_TableOffset back to zero, thus preventing further growth + if (g_InlineVertexBuffer_TableLength > 50000) { + LOG_TEST_CASE("g_InlineVertexBuffer_TableLength > 50000! This probably means that g_InlineVertexBuffer_Table is growing indefinitely."); + } + } + + // Is this the initial call after D3DDevice_Begin() ? + if (g_InlineVertexBuffer_WrittenRegisters == 0) { + // Read starting values for all inline vertex attributes from HLE NV2A pgraph (converting them to required types) : + g_InlineVertexBuffer_Table[0].Position = D3DXVECTOR3(HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_POSITION)); + g_InlineVertexBuffer_Table[0].Rhw = HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_POSITION)[3]; + g_InlineVertexBuffer_Table[0].Blend[0] = HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_BLENDWEIGHT)[0]; + g_InlineVertexBuffer_Table[0].Blend[1] = HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_BLENDWEIGHT)[1]; + g_InlineVertexBuffer_Table[0].Blend[2] = HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_BLENDWEIGHT)[2]; + g_InlineVertexBuffer_Table[0].Blend[3] = HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_BLENDWEIGHT)[3]; + g_InlineVertexBuffer_Table[0].Normal = D3DXVECTOR3(HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_NORMAL)); + g_InlineVertexBuffer_Table[0].Diffuse = Float4ToDWORD(HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_DIFFUSE)); + g_InlineVertexBuffer_Table[0].Specular = Float4ToDWORD(HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_SPECULAR)); + g_InlineVertexBuffer_Table[0].Fog = HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_FOG)[0]; + g_InlineVertexBuffer_Table[0].PointSize = HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_POINTSIZE)[0]; + g_InlineVertexBuffer_Table[0].BackDiffuse = Float4ToDWORD(HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_BACKDIFFUSE)); + g_InlineVertexBuffer_Table[0].BackSpecular = Float4ToDWORD(HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_BACKSPECULAR)); + g_InlineVertexBuffer_Table[0].TexCoord[0] = D3DXVECTOR4(HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_TEXCOORD0)); + g_InlineVertexBuffer_Table[0].TexCoord[1] = D3DXVECTOR4(HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_TEXCOORD1)); + g_InlineVertexBuffer_Table[0].TexCoord[2] = D3DXVECTOR4(HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_TEXCOORD2)); + g_InlineVertexBuffer_Table[0].TexCoord[3] = D3DXVECTOR4(HLE_get_NV2A_vertex_attribute_value_pointer(X_D3DVSDE_TEXCOORD3)); + g_InlineVertexBuffer_Table[0].Reg13Up[0] = D3DXVECTOR4(HLE_get_NV2A_vertex_attribute_value_pointer(13)); + g_InlineVertexBuffer_Table[0].Reg13Up[1] = D3DXVECTOR4(HLE_get_NV2A_vertex_attribute_value_pointer(14)); + g_InlineVertexBuffer_Table[0].Reg13Up[2] = D3DXVECTOR4(HLE_get_NV2A_vertex_attribute_value_pointer(15)); + // Note : Because all members are assigned an initial value, there's no need for a clearing constructor for _D3DIVB! + } + + if (Register == X_D3DVSDE_VERTEX) + g_InlineVertexBuffer_WrittenRegisters |= (1 << X_D3DVSDE_POSITION); + else if (Register < 16) + g_InlineVertexBuffer_WrittenRegisters |= (1 << Register); + + unsigned o = g_InlineVertexBuffer_TableOffset; + + switch (Register) + { + case X_D3DVSDE_VERTEX: + case X_D3DVSDE_POSITION: + { + // Note : Setting position signals completion of a vertex + g_InlineVertexBuffer_Table[o].Position.x = a; + g_InlineVertexBuffer_Table[o].Position.y = b; + g_InlineVertexBuffer_Table[o].Position.z = c; + g_InlineVertexBuffer_Table[o].Rhw = d; + // Start a new vertex + g_InlineVertexBuffer_TableOffset++; + // Copy all attributes of the prior vertex to the new one, to simulate persistent attribute values + g_InlineVertexBuffer_Table[g_InlineVertexBuffer_TableOffset] = g_InlineVertexBuffer_Table[o]; + break; + } + + case X_D3DVSDE_BLENDWEIGHT: + { + g_InlineVertexBuffer_Table[o].Blend[0] = a; + g_InlineVertexBuffer_Table[o].Blend[1] = b; + g_InlineVertexBuffer_Table[o].Blend[2] = c; + g_InlineVertexBuffer_Table[o].Blend[3] = d; + break; + } + + case X_D3DVSDE_NORMAL: + { + g_InlineVertexBuffer_Table[o].Normal.x = a; + g_InlineVertexBuffer_Table[o].Normal.y = b; + g_InlineVertexBuffer_Table[o].Normal.z = c; + break; + } + + case X_D3DVSDE_DIFFUSE: + { + g_InlineVertexBuffer_Table[o].Diffuse = D3DCOLOR_COLORVALUE(a, b, c, d); + break; + } + + case X_D3DVSDE_SPECULAR: + { + g_InlineVertexBuffer_Table[o].Specular = D3DCOLOR_COLORVALUE(a, b, c, d); + break; + } + + case X_D3DVSDE_FOG: // Xbox extension + { + g_InlineVertexBuffer_Table[o].Fog = a; // TODO : What about the other (b, c and d) arguments? + break; + } + + case X_D3DVSDE_POINTSIZE: + { + g_InlineVertexBuffer_Table[o].PointSize = a; // TODO : What about the other (b, c and d) arguments? + break; + } + + case X_D3DVSDE_BACKDIFFUSE: // Xbox extension + { + g_InlineVertexBuffer_Table[o].BackDiffuse = D3DCOLOR_COLORVALUE(a, b, c, d); + break; + } + + case X_D3DVSDE_BACKSPECULAR: // Xbox extension + { + g_InlineVertexBuffer_Table[o].BackSpecular = D3DCOLOR_COLORVALUE(a, b, c, d); + break; + } + + case X_D3DVSDE_TEXCOORD0: + case X_D3DVSDE_TEXCOORD1: + case X_D3DVSDE_TEXCOORD2: + case X_D3DVSDE_TEXCOORD3: + { + unsigned i = Register - X_D3DVSDE_TEXCOORD0; + g_InlineVertexBuffer_Table[o].TexCoord[i].x = a; + g_InlineVertexBuffer_Table[o].TexCoord[i].y = b; + g_InlineVertexBuffer_Table[o].TexCoord[i].z = c; + g_InlineVertexBuffer_Table[o].TexCoord[i].w = d; + break; + } + + case 13: + case 14: + case 15: + { + unsigned i = Register - 13; + g_InlineVertexBuffer_Table[o].Reg13Up[i].x = a; + g_InlineVertexBuffer_Table[o].Reg13Up[i].y = b; + g_InlineVertexBuffer_Table[o].Reg13Up[i].z = c; + g_InlineVertexBuffer_Table[o].Reg13Up[i].w = d; + break; + } + + default: + EmuLog(LOG_LEVEL::WARNING, "Unknown IVB Register : %d", Register); } - g_InlineVertexBuffer_TableOffset = 0; // Might not be needed (also cleared in D3DDevice_Begin) } void CxbxImpl_SetStreamSource(UINT StreamNumber, xbox::X_D3DVertexBuffer* pStreamData, UINT Stride) diff --git a/src/core/hle/D3D8/XbVertexBuffer.h b/src/core/hle/D3D8/XbVertexBuffer.h index 654560d36..4fea1cfb0 100644 --- a/src/core/hle/D3D8/XbVertexBuffer.h +++ b/src/core/hle/D3D8/XbVertexBuffer.h @@ -56,7 +56,8 @@ class CxbxPatchedStream public: CxbxPatchedStream(); ~CxbxPatchedStream(); - void Activate(CxbxDrawContext *pDrawContext, UINT uiStream) const; + void Clear(); + void Activate(CxbxDrawContext *pDrawContext, UINT HostStreamNumber) const; bool isValid = false; xbox::X_D3DPRIMITIVETYPE XboxPrimitiveType = xbox::X_D3DPT_NONE; PVOID pCachedXboxVertexData = xbox::zeroptr; @@ -99,39 +100,44 @@ class CxbxVertexBufferConverter void ConvertStream(CxbxDrawContext *pPatchDesc, UINT uiStream); }; -// inline vertex buffer emulation +// Inline vertex buffer emulation extern xbox::X_D3DPRIMITIVETYPE g_InlineVertexBuffer_PrimitiveType; -extern DWORD g_InlineVertexBuffer_FVF; -struct _D3DIVB +typedef struct _D3DIVB { D3DXVECTOR3 Position; // X_D3DVSDE_POSITION (*) > D3DFVF_XYZ / D3DFVF_XYZRHW - FLOAT Rhw; // X_D3DVSDE_VERTEX (*) > D3DFVF_XYZ / D3DFVF_XYZRHW - FLOAT Blend[4]; // X_D3DVSDE_BLENDWEIGHT > D3DFVF_XYZB1 (and 3 more up to D3DFVF_XYZB4) + FLOAT Rhw; // X_D3DVSDE_VERTEX (*) > D3DFVF_XYZ / D3DFVF_XYZRHW + FLOAT Blend[4]; // X_D3DVSDE_BLENDWEIGHT > D3DFVF_XYZB1 (and 3 more up to D3DFVF_XYZB4) D3DXVECTOR3 Normal; // X_D3DVSDE_NORMAL > D3DFVF_NORMAL - D3DCOLOR Diffuse; // X_D3DVSDE_DIFFUSE > D3DFVF_DIFFUSE - D3DCOLOR Specular; // X_D3DVSDE_SPECULAR > D3DFVF_SPECULAR - FLOAT Fog; // X_D3DVSDE_FOG > D3DFVF_FOG unavailable; TODO : How to handle? - D3DCOLOR BackDiffuse; // X_D3DVSDE_BACKDIFFUSE > D3DFVF_BACKDIFFUSE unavailable; TODO : How to handle? - D3DCOLOR BackSpecular; // X_D3DVSDE_BACKSPECULAR > D3DFVF_BACKSPECULAR unavailable; TODO : How to handle? - D3DXVECTOR4 TexCoord[4]; // X_D3DVSDE_TEXCOORD0 > D3DFVF_TEX1 (and 4 more up to D3DFVF_TEX4) - + D3DCOLOR Diffuse; // X_D3DVSDE_DIFFUSE > D3DFVF_DIFFUSE + D3DCOLOR Specular; // X_D3DVSDE_SPECULAR > D3DFVF_SPECULAR + FLOAT Fog; // X_D3DVSDE_FOG > D3DFVF_FOG unavailable; TODO : How to handle? + FLOAT PointSize; // X_D3DVSDE_POINTSIZE > D3DFVF_POINTSIZE unavailable; TODO : How to handle? + D3DCOLOR BackDiffuse; // X_D3DVSDE_BACKDIFFUSE > D3DFVF_BACKDIFFUSE unavailable; TODO : How to handle? + D3DCOLOR BackSpecular; // X_D3DVSDE_BACKSPECULAR > D3DFVF_BACKSPECULAR unavailable; TODO : How to handle? + D3DXVECTOR4 TexCoord[4]; // X_D3DVSDE_TEXCOORD0 > D3DFVF_TEX1, (and 3 more up to D3DFVF_TEX4) + D3DXVECTOR4 Reg13Up[3]; // (*) X_D3DVSDE_POSITION and X_D3DVSDE_VERTEX both set Position, but Rhw seems optional, // hence, selection for D3DFVF_XYZ or D3DFVF_XYZRHW is rather fuzzy. We DO know that once // D3DFVF_NORMAL is given, D3DFVF_XYZRHW is forbidden (see D3DDevice_SetVertexData4f) - _D3DIVB(); struct _D3DIVB &operator=(const struct _D3DIVB &Val); -}; -extern std::vector<_D3DIVB> g_InlineVertexBuffer_Table; +} D3DIVB; + +extern std::vector g_InlineVertexBuffer_Table; extern UINT g_InlineVertexBuffer_TableLength; extern UINT g_InlineVertexBuffer_TableOffset; -extern void EmuFlushIVB(); - extern void EmuUpdateActiveTexture(); +extern void CxbxSetVertexAttribute(int Register, FLOAT a, FLOAT b, FLOAT c, FLOAT d); + +extern void CxbxImpl_Begin(xbox::X_D3DPRIMITIVETYPE PrimitiveType); +extern void CxbxImpl_End(); +extern void CxbxImpl_SetStreamSource(UINT StreamNumber, xbox::X_D3DVertexBuffer* pStreamData, UINT Stride); +extern void CxbxImpl_SetVertexData4f(int Register, FLOAT a, FLOAT b, FLOAT c, FLOAT d); + extern DWORD g_dwPrimPerFrame; - + #endif diff --git a/src/core/hle/D3D8/XbVertexShader.cpp b/src/core/hle/D3D8/XbVertexShader.cpp index 8974ec05d..506422feb 100644 --- a/src/core/hle/D3D8/XbVertexShader.cpp +++ b/src/core/hle/D3D8/XbVertexShader.cpp @@ -29,13 +29,19 @@ //#define _DEBUG_TRACK_VS +#include "common\util\hasher.h" // For ComputeHash #include "core\kernel\init\CxbxKrnl.h" #include "core\kernel\support\Emu.h" #include "core\hle\D3D8\Direct3D9\Direct3D9.h" // For g_Xbox_VertexShader_Handle +#include "core\hle\D3D8\Direct3D9\RenderStates.h" // For XboxRenderStateConverter #include "core\hle\D3D8\Direct3D9\VertexShaderSource.h" // For g_VertexShaderSource +#include "core\hle\D3D8\XbVertexBuffer.h" // For CxbxImpl_SetVertexData4f #include "core\hle\D3D8\XbVertexShader.h" #include "core\hle\D3D8\XbD3D8Logging.h" // For DEBUG_D3DRESULT +#include "core\hle\D3D8\XbConvert.h" // For NV2A_VP_UPLOAD_INST, NV2A_VP_UPLOAD_CONST_ID, NV2A_VP_UPLOAD_CONST +#include "devices\video\nv2a.h" // For D3DPUSH_DECODE #include "common\Logging.h" // For LOG_INIT +#include "common\Settings.hpp" // for g_LibVersion_D3D8 #include "XbD3D8Types.h" // For X_D3DVSDE_* #include @@ -43,6 +49,303 @@ #include #include +// External symbols : +extern xbox::X_STREAMINPUT g_Xbox_SetStreamSource[X_VSH_MAX_STREAMS]; // Declared in XbVertexBuffer.cpp +extern XboxRenderStateConverter XboxRenderStates; // Declared in Direct3D9.cpp + +// Variables set by [D3DDevice|CxbxImpl]_SetVertexShaderInput() : + unsigned g_Xbox_SetVertexShaderInput_Count = 0; // Read by GetXboxVertexAttributes + xbox::X_STREAMINPUT g_Xbox_SetVertexShaderInput_Data[X_VSH_MAX_STREAMS] = { 0 }; // Active when g_Xbox_SetVertexShaderInput_Count > 0 + xbox::X_VERTEXATTRIBUTEFORMAT g_Xbox_SetVertexShaderInput_Attributes = { 0 }; // Read by GetXboxVertexAttributes when g_Xbox_SetVertexShaderInput_Count > 0 + +// Variables set by [D3DDevice|CxbxImpl]_SetVertexShader() and [D3DDevice|CxbxImpl]_SelectVertexShader() : + bool g_Xbox_VertexShader_IsFixedFunction = true; + bool g_Xbox_VertexShader_IsPassthrough = false; + xbox::dword_xt g_Xbox_VertexShader_Handle = 0; +#ifdef CXBX_USE_GLOBAL_VERTEXSHADER_POINTER // TODO : Would this be more accurate / simpler? + xbox::X_D3DVertexShader *g_Xbox_VertexShader_Ptr = nullptr; +#endif + xbox::dword_xt g_Xbox_VertexShader_FunctionSlots_StartAddress = 0; + +// Variable set by [D3DDevice|CxbxImpl]_LoadVertexShader() / [D3DDevice|CxbxImpl]_LoadVertexShaderProgram() (both through CxbxCopyVertexShaderFunctionSlots): + xbox::dword_xt g_Xbox_VertexShader_FunctionSlots[(X_VSH_MAX_INSTRUCTION_COUNT + 1) * X_VSH_INSTRUCTION_SIZE] = { 0 }; // One extra for FLD_FINAL terminator + +// Variables set by [D3DDevice|CxbxImpl]_SetScreenSpaceOffset: + float g_Xbox_ScreenSpaceOffset_x = 0.0f; + float g_Xbox_ScreenSpaceOffset_y = 0.0f; + + +static xbox::X_D3DVertexShader g_Xbox_VertexShader_ForFVF = {}; + +// Converts an Xbox FVF shader handle to X_D3DVertexShader +// Note : Temporary, until we reliably locate the Xbox internal state for this +// See D3DXDeclaratorFromFVF docs https://docs.microsoft.com/en-us/windows/win32/direct3d9/d3dxdeclaratorfromfvf +// and https://github.com/reactos/wine/blob/2e8dfbb1ad71f24c41e8485a39df01bb9304127f/dlls/d3dx9_36/mesh.c#L2041 +static xbox::X_D3DVertexShader* XboxVertexShaderFromFVF(DWORD xboxFvf) +{ + using namespace xbox; + + // Note : FVFs don't tessellate, all slots read from stream zero, therefore + // the following zero-initialization of StreamIndex (like all other fields) + // is never updated below. + g_Xbox_VertexShader_ForFVF = { 0 }; + + // Shorthand, glueing earlier implementation below to global g_Xbox_VertexShader_ForFVF variable : + X_VERTEXATTRIBUTEFORMAT &declaration = g_Xbox_VertexShader_ForFVF.VertexAttribute; + + static DWORD X_D3DVSDT_FLOAT[] = { 0, X_D3DVSDT_FLOAT1, X_D3DVSDT_FLOAT2, X_D3DVSDT_FLOAT3, X_D3DVSDT_FLOAT4 }; + + static const DWORD InvalidXboxFVFBits = X_D3DFVF_RESERVED0 | X_D3DFVF_RESERVED1 /* probably D3DFVF_PSIZE if detected */ + | 0x0000F000 // Bits between texture count and the texture formats + | 0xFF000000; // All bits above the four alllowed texture formats + + if (xboxFvf & InvalidXboxFVFBits) { + // Test-case : JSRF (after "now Loading...") TODO : Figure out what's going on + // LOG_TEST_CASE("Invalid Xbox FVF bits detected!"); + } + + // Position & Blendweights + int nrPositionFloats = 3; + int nrBlendWeights = 0; + unsigned offset = 0; + DWORD position = (xboxFvf & X_D3DFVF_POSITION_MASK); + switch (position) { + case 0: nrPositionFloats = 0; LOG_TEST_CASE("FVF without position"); break; // Note : Remove logging if this occurs often + case X_D3DFVF_XYZ: /*nrPositionFloats is set to 3 by default*/ break; + case X_D3DFVF_XYZRHW: + g_Xbox_VertexShader_ForFVF.Flags |= X_VERTEXSHADER_FLAG_PASSTHROUGH; + nrPositionFloats = 4; + break; + case X_D3DFVF_XYZB1: nrBlendWeights = 1; break; + case X_D3DFVF_XYZB2: nrBlendWeights = 2; break; + case X_D3DFVF_XYZB3: nrBlendWeights = 3; break; + case X_D3DFVF_XYZB4: nrBlendWeights = 4; break; + case X_D3DFVF_POSITION_MASK: /*Keep nrPositionFloats set to 3*/ LOG_TEST_CASE("FVF invalid (5th blendweight?)"); break; + DEFAULT_UNREACHABLE; + } + + // Assign vertex element (attribute) slots + X_VERTEXSHADERINPUT* pSlot; + + // Write Position + if (nrPositionFloats > 0) { + pSlot = &declaration.Slots[X_D3DVSDE_POSITION]; + pSlot->Format = X_D3DVSDT_FLOAT[nrPositionFloats]; + pSlot->Offset = offset; + offset += sizeof(float) * nrPositionFloats; + // Write Blend Weights + if (nrBlendWeights > 0) { + pSlot = &declaration.Slots[X_D3DVSDE_BLENDWEIGHT]; + pSlot->Format = X_D3DVSDT_FLOAT[nrBlendWeights]; + pSlot->Offset = offset; + offset += sizeof(float) * nrBlendWeights; + } + } + + // Write Normal, Diffuse, and Specular + if (xboxFvf & X_D3DFVF_NORMAL) { + if (position == X_D3DFVF_XYZRHW) { + LOG_TEST_CASE("X_D3DFVF_NORMAL shouldn't use X_D3DFVF_XYZRHW"); + } + + pSlot = &declaration.Slots[X_D3DVSDE_NORMAL]; + pSlot->Format = X_D3DVSDT_FLOAT[3]; + pSlot->Offset = offset; + offset += sizeof(float) * 3; + } + + if (xboxFvf & X_D3DFVF_DIFFUSE) { + g_Xbox_VertexShader_ForFVF.Flags |= X_VERTEXSHADER_FLAG_HASDIFFUSE; + pSlot = &declaration.Slots[X_D3DVSDE_DIFFUSE]; + pSlot->Format = X_D3DVSDT_D3DCOLOR; + pSlot->Offset = offset; + offset += sizeof(DWORD) * 1; + } + + if (xboxFvf & X_D3DFVF_SPECULAR) { + g_Xbox_VertexShader_ForFVF.Flags |= X_VERTEXSHADER_FLAG_HASSPECULAR; + pSlot = &declaration.Slots[X_D3DVSDE_SPECULAR]; + pSlot->Format = X_D3DVSDT_D3DCOLOR; + pSlot->Offset = offset; + offset += sizeof(DWORD) * 1; + } + + // Write Texture Coordinates + int textureCount = (xboxFvf & X_D3DFVF_TEXCOUNT_MASK) >> X_D3DFVF_TEXCOUNT_SHIFT; + if (textureCount > 4) { + LOG_TEST_CASE("Limiting FVF to 4 textures"); + textureCount = 4; // Safeguard, since the X_D3DFVF_TEXCOUNT bitfield could contain invalid values (5 up to 15) + } + for (int i = 0; i < textureCount; i++) { + int numberOfCoordinates = 0; + auto FVFTextureFormat = (xboxFvf >> X_D3DFVF_TEXCOORDSIZE_SHIFT(i)) & 0x003; + switch (FVFTextureFormat) { + case X_D3DFVF_TEXTUREFORMAT1: numberOfCoordinates = 1; break; + case X_D3DFVF_TEXTUREFORMAT2: numberOfCoordinates = 2; break; + case X_D3DFVF_TEXTUREFORMAT3: numberOfCoordinates = 3; break; + case X_D3DFVF_TEXTUREFORMAT4: numberOfCoordinates = 4; break; + DEFAULT_UNREACHABLE; + } + + assert(numberOfCoordinates > 0); + pSlot = &declaration.Slots[X_D3DVSDE_TEXCOORD0 + i]; + pSlot->Format = X_D3DVSDT_FLOAT[numberOfCoordinates]; + pSlot->Offset = offset; + offset += sizeof(float) * numberOfCoordinates; + // Update the VertexShader texture Dimensionality field here as well + g_Xbox_VertexShader_ForFVF.Dimensionality[i] = numberOfCoordinates; + } + + // Make sure all unused slots have a X_D3DVSDT_NONE format + // TODO : Actually, maybe not, since this could avoid VshConvertToken_STREAMDATA_REG() calls! + for (unsigned i = 0; i < X_VSH_MAX_ATTRIBUTES; i++) { + pSlot = &declaration.Slots[i]; + if (pSlot->Format == 0) { + pSlot->Format = X_D3DVSDT_NONE; + } + } + + // Return the global g_Xbox_VertexShader_ForFVF variable + return &g_Xbox_VertexShader_ForFVF; +} + +static xbox::X_D3DVertexShader* CxbxGetXboxVertexShaderForHandle(DWORD Handle) +{ + if (VshHandleIsVertexShader(Handle)) { + return VshHandleToXboxVertexShader(Handle); + } else { + return XboxVertexShaderFromFVF(Handle); + } +} + +// TODO : Start using this function everywhere g_Xbox_VertexShader_Handle is accessed currently! +xbox::X_D3DVertexShader* GetXboxVertexShader() +{ + // LOG_INIT; // Allows use of DEBUG_D3DRESULT + + using namespace xbox; + + X_D3DVertexShader* pXboxVertexShader = xbox::zeroptr; +#if 0 // TODO : Retrieve vertex shader from actual Xbox D3D state + // Only when we're sure of the location of the Xbox Device.m_pVertexShader variable + if (XboxVertexShaders.g_XboxAddr_pVertexShader) { + // read that (so that we get access to internal vertex shaders, like those generated + // to contain the attribute-information for FVF shaders) : + pXboxVertexShader = (X_D3DVertexShader*)(*XboxVertexShaders.g_XboxAddr_pVertexShader); + } + else + { + LOG_TEST_CASE("Unknown pVertexShader symbol location!"); +#endif + // Otherwise, we have no choice but to use what we've last stored in the + // g_Xbox_VertexShader_Handle variable via our D3DDevice_SetVertexShader + // and D3DDevice_SelectVertexShader* patches. + + // Note, that once we have a fail-safe way to determine the location of the + // Xbox Device.m_pVertexShader symbol, the FVF and the accompanying Address, + // we no longer need this statement block, nor patches on D3DDevice_SetVertexShader + // nor D3DDevice_SelectVertexShader* ! + + // Now, to convert, we do need to have a valid vertex shader : + if (g_Xbox_VertexShader_Handle == 0) { + LOG_TEST_CASE("Unassigned Xbox vertex shader!"); + return nullptr; + } + +#if 0 // TODO : Retrieve vertex shader from actual Xbox D3D state + if (!VshHandleIsVertexShader(g_Xbox_VertexShader_Handle)) { + LOG_TEST_CASE("Xbox vertex shader lacks X_D3DFVF_RESERVED0 bit!"); + return nullptr; + } +#endif + +#ifdef CXBX_USE_GLOBAL_VERTEXSHADER_POINTER + pXboxVertexShader = g_Xbox_VertexShader_Ptr; +#else + pXboxVertexShader = CxbxGetXboxVertexShaderForHandle(g_Xbox_VertexShader_Handle); +#endif + +#if 0 // TODO : Retrieve vertex shader from actual Xbox D3D state + } +#endif + + return pXboxVertexShader; +} + +static bool UseXboxD3DVertexShaderTypeForVersion3948(xbox::X_D3DVertexShader* pXboxVertexShader) +{ + // Don't check XDK version for our internal FVF vertex shader + // because g_Xbox_VertexShader_ForFVF is an internal variable + // that's compiled in as a xbox::X_D3DVertexShader + if (pXboxVertexShader == &g_Xbox_VertexShader_ForFVF) { + return false; + } + + return g_LibVersion_D3D8 <= 3948; +} + +static xbox::X_VERTEXATTRIBUTEFORMAT* CxbxGetVertexShaderAttributes(xbox::X_D3DVertexShader* pXboxVertexShader) +{ + if (UseXboxD3DVertexShaderTypeForVersion3948(pXboxVertexShader)) { + auto pXboxVertexShader3948 = (xbox::X_D3DVertexShader3948*)pXboxVertexShader; + return &(pXboxVertexShader3948->VertexAttribute); + } + + return &(pXboxVertexShader->VertexAttribute); +} + +static DWORD* CxbxGetVertexShaderTokens(xbox::X_D3DVertexShader* pXboxVertexShader, DWORD* pNrTokens) +{ + if (UseXboxD3DVertexShaderTypeForVersion3948(pXboxVertexShader)) { + auto pXboxVertexShader3948 = (xbox::X_D3DVertexShader3948*)pXboxVertexShader; + *pNrTokens = pXboxVertexShader3948->ProgramAndConstantsDwords; + return &pXboxVertexShader3948->ProgramAndConstants[0]; + } + + *pNrTokens = pXboxVertexShader->ProgramAndConstantsDwords; + return &pXboxVertexShader->ProgramAndConstants[0]; +} + +extern bool g_InlineVertexBuffer_DeclarationOverride; // TMP glue +extern xbox::X_VERTEXATTRIBUTEFORMAT g_InlineVertexBuffer_AttributeFormat; // TMP glue + +xbox::X_VERTEXATTRIBUTEFORMAT* GetXboxVertexAttributeFormat() +{ + // Special case for CxbxImpl_End() based drawing + if (g_InlineVertexBuffer_DeclarationOverride) { + return &g_InlineVertexBuffer_AttributeFormat; + } + + xbox::X_D3DVertexShader* pXboxVertexShader = GetXboxVertexShader(); + if (pXboxVertexShader == xbox::zeroptr) { + // Despite possibly not being used, the pXboxVertexShader argument must always be assigned + LOG_TEST_CASE("Xbox should always have a VertexShader set (even for FVF's)"); + return &g_Xbox_SetVertexShaderInput_Attributes; // WRONG result, but it's already strange this happens + } + + // If SetVertexShaderInput is active, its arguments overrule those of the active vertex shader + if (g_Xbox_SetVertexShaderInput_Count > 0) { + // Take overrides (on declarations and streaminputs, as optionally set by SetVertexShaderInput) into account : + // Test-case : Crazy taxi 3 + LOG_TEST_CASE("SetVertexShaderInput_Attributes override in effect!"); + return &g_Xbox_SetVertexShaderInput_Attributes; + } + + return CxbxGetVertexShaderAttributes(pXboxVertexShader); +} + +// Reads the active Xbox stream input values (containing VertexBuffer, Offset and Stride) for the given stream index. +// (These values are set through SetStreamSource and can be overridden by SetVertexShaderInput.) +xbox::X_STREAMINPUT& GetXboxVertexStreamInput(unsigned XboxStreamNumber) +{ + // If SetVertexShaderInput is active, its arguments overrule those of SetStreamSource + if (g_Xbox_SetVertexShaderInput_Count > 0) { + return g_Xbox_SetVertexShaderInput_Data[XboxStreamNumber]; + } + + return g_Xbox_SetStreamSource[XboxStreamNumber]; +} + #define DbgVshPrintf \ LOG_CHECK_ENABLED(LOG_LEVEL::DEBUG) \ if(g_bPrintfOn) printf @@ -341,500 +644,74 @@ class XboxVertexDeclarationConverter { protected: // Internal variables - CxbxVertexDeclaration* pVertexDeclarationToSet; + CxbxVertexDeclaration* pCurrentVertexDeclaration; CxbxVertexShaderStreamInfo* pCurrentVertexShaderStreamInfo = nullptr; bool IsFixedFunction; - D3DVERTEXELEMENT* pRecompiled; + D3DVERTEXELEMENT* pCurrentHostVertexElement; std::array RegVIsPresentInDeclaration; -public: - // Output - DWORD XboxDeclarationCount; - private: #define D3DDECLUSAGE_UNSUPPORTED ((D3DDECLUSAGE)-1) - static D3DDECLUSAGE Xb2PCRegisterType - ( - DWORD VertexRegister, - BYTE& PCUsageIndex - ) + D3DDECLUSAGE Xb2PCRegisterType(DWORD VertexRegister, BYTE &UsageIndex) { - D3DDECLUSAGE PCRegisterType; - PCUsageIndex = 0; - - switch (VertexRegister) - { - case (DWORD)xbox::X_D3DVSDE_VERTEX: // -1 - PCRegisterType = D3DDECLUSAGE_UNSUPPORTED; - break; - case xbox::X_D3DVSDE_POSITION: // 0 - PCRegisterType = D3DDECLUSAGE_POSITION; - break; - case xbox::X_D3DVSDE_BLENDWEIGHT: // 1 - PCRegisterType = D3DDECLUSAGE_BLENDWEIGHT; - break; - case xbox::X_D3DVSDE_NORMAL: // 2 - PCRegisterType = D3DDECLUSAGE_NORMAL; - break; - case xbox::X_D3DVSDE_DIFFUSE: // 3 - PCRegisterType = D3DDECLUSAGE_COLOR; PCUsageIndex = 0; - break; - case xbox::X_D3DVSDE_SPECULAR: // 4 - PCRegisterType = D3DDECLUSAGE_COLOR; PCUsageIndex = 1; - break; - case xbox::X_D3DVSDE_FOG: // 5 - PCRegisterType = D3DDECLUSAGE_FOG; - break; - case xbox::X_D3DVSDE_POINTSIZE: // 6 - PCRegisterType = D3DDECLUSAGE_PSIZE; - break; - case xbox::X_D3DVSDE_BACKDIFFUSE: // 7 - PCRegisterType = D3DDECLUSAGE_COLOR; PCUsageIndex = 2; - break; - case xbox::X_D3DVSDE_BACKSPECULAR: // 8 - PCRegisterType = D3DDECLUSAGE_COLOR; PCUsageIndex = 3; - break; - case xbox::X_D3DVSDE_TEXCOORD0: // 9 - PCRegisterType = D3DDECLUSAGE_TEXCOORD; PCUsageIndex = 0; - break; - case xbox::X_D3DVSDE_TEXCOORD1: // 10 - PCRegisterType = D3DDECLUSAGE_TEXCOORD; PCUsageIndex = 1; - break; - case xbox::X_D3DVSDE_TEXCOORD2: // 11 - PCRegisterType = D3DDECLUSAGE_TEXCOORD; PCUsageIndex = 2; - break; - case xbox::X_D3DVSDE_TEXCOORD3: // 12 - PCRegisterType = D3DDECLUSAGE_TEXCOORD; PCUsageIndex = 3; - break; - default: - PCRegisterType = D3DDECLUSAGE_UNSUPPORTED; - break; - } - - return PCRegisterType; - } - - static char* XboxVertexRegisterAsString(DWORD VertexRegister) - { - switch (VertexRegister) - { - case (DWORD)xbox::X_D3DVSDE_VERTEX: // -1 - return "D3DVSDE_VERTEX /* xbox ext. */"; - case xbox::X_D3DVSDE_POSITION: // 0 - return "D3DVSDE_POSITION"; - case xbox::X_D3DVSDE_BLENDWEIGHT: // 1 - return "D3DVSDE_BLENDWEIGHT"; - case xbox::X_D3DVSDE_NORMAL: // 2 - return "D3DVSDE_NORMAL"; - case xbox::X_D3DVSDE_DIFFUSE: // 3 - return "D3DVSDE_DIFFUSE"; - case xbox::X_D3DVSDE_SPECULAR: // 4 - return "D3DVSDE_SPECULAR"; - case xbox::X_D3DVSDE_FOG: // 5 - return "D3DVSDE_FOG"; - case xbox::X_D3DVSDE_POINTSIZE: // 6 - return "D3DVDSE_POINTSIZE"; - case xbox::X_D3DVSDE_BACKDIFFUSE: // 7 - return "D3DVSDE_BACKDIFFUSE /* xbox ext. */"; - case xbox::X_D3DVSDE_BACKSPECULAR: // 8 - return "D3DVSDE_BACKSPECULAR /* xbox ext. */"; - case xbox::X_D3DVSDE_TEXCOORD0: // 9 - return "D3DVSDE_TEXCOORD0"; - case xbox::X_D3DVSDE_TEXCOORD1: // 10 - return "D3DVSDE_TEXCOORD1"; - case xbox::X_D3DVSDE_TEXCOORD2: // 11 - return "D3DVSDE_TEXCOORD2"; - case xbox::X_D3DVSDE_TEXCOORD3: // 12 - return "D3DVSDE_TEXCOORD3"; - case 13: - return "13 /* unknown register */"; - case 14: - return "14 /* unknown register */"; - case 15: - return "15 /* unknown register */"; - default: - return "16 /* or higher, unknown register */"; + UsageIndex = 0; + switch (VertexRegister) { + case xbox::X_D3DVSDE_POSITION /*= 0*/: return D3DDECLUSAGE_POSITION; + case xbox::X_D3DVSDE_BLENDWEIGHT /*= 1*/: return D3DDECLUSAGE_BLENDWEIGHT; + case xbox::X_D3DVSDE_NORMAL /*= 2*/: return D3DDECLUSAGE_NORMAL; + case xbox::X_D3DVSDE_DIFFUSE /*= 3*/: return D3DDECLUSAGE_COLOR; + case xbox::X_D3DVSDE_SPECULAR /*= 4*/: UsageIndex = 1; return D3DDECLUSAGE_COLOR; + case xbox::X_D3DVSDE_FOG /*= 5*/: return D3DDECLUSAGE_FOG; + case xbox::X_D3DVSDE_POINTSIZE /*= 6*/: return D3DDECLUSAGE_PSIZE; + case xbox::X_D3DVSDE_BACKDIFFUSE /*= 7*/: UsageIndex = 2; return D3DDECLUSAGE_COLOR; + case xbox::X_D3DVSDE_BACKSPECULAR/*= 8*/: UsageIndex = 3; return D3DDECLUSAGE_COLOR; + case xbox::X_D3DVSDE_TEXCOORD0 /*= 9*/: return D3DDECLUSAGE_TEXCOORD; + case xbox::X_D3DVSDE_TEXCOORD1 /*=10*/: UsageIndex = 1; return D3DDECLUSAGE_TEXCOORD; + case xbox::X_D3DVSDE_TEXCOORD2 /*=11*/: UsageIndex = 2; return D3DDECLUSAGE_TEXCOORD; + case xbox::X_D3DVSDE_TEXCOORD3 /*=12*/: UsageIndex = 3; return D3DDECLUSAGE_TEXCOORD; + default /*13-15*/ : + return D3DDECLUSAGE_UNSUPPORTED; } } // VERTEX SHADER - static DWORD VshGetDeclarationCount(DWORD *pXboxDeclaration) + bool VshConvertToken_STREAMDATA_REG(DWORD VertexRegister, xbox::X_VERTEXSHADERINPUT &slot) { - DWORD Pos = 0; - while (pXboxDeclaration[Pos] != X_D3DVSD_END()) - { - Pos++; - } + DWORD XboxVertexElementDataType = slot.Format; - return Pos + 1; - } - - static inline DWORD VshGetTokenType(DWORD XboxToken) - { - return (XboxToken & X_D3DVSD_TOKENTYPEMASK) >> X_D3DVSD_TOKENTYPESHIFT; - } - - static inline WORD VshGetVertexStream(DWORD XboxToken) - { - return (XboxToken & X_D3DVSD_STREAMNUMBERMASK) >> X_D3DVSD_STREAMNUMBERSHIFT; - } - - static inline DWORD VshGetVertexRegister(DWORD XboxToken) - { - DWORD regNum = (XboxToken & X_D3DVSD_VERTEXREGMASK) >> X_D3DVSD_VERTEXREGSHIFT; - return regNum; - } - - static inline DWORD VshGetVertexRegisterIn(DWORD XboxToken) - { - DWORD regNum = (XboxToken & X_D3DVSD_VERTEXREGINMASK) >> X_D3DVSD_VERTEXREGINSHIFT; - return regNum; - } - - void VshDumpXboxDeclaration(DWORD* pXboxDeclaration) - { - DbgVshPrintf("DWORD dwVSHDecl[] =\n{\n"); - unsigned iNumberOfVertexStreams = 0; - bool bStreamNeedsPatching = false; - auto pXboxToken = pXboxDeclaration; - while (*pXboxToken != X_D3DVSD_END()) // X_D3DVSD_TOKEN_END - { - DWORD Step = 1; - - switch (VshGetTokenType(*pXboxToken)) { - case xbox::X_D3DVSD_TOKEN_NOP: { - DbgVshPrintf("\tD3DVSD_NOP(),\n"); - break; - } - case xbox::X_D3DVSD_TOKEN_STREAM: { - if (*pXboxToken & X_D3DVSD_STREAMTESSMASK) { - DbgVshPrintf("\tD3DVSD_STREAM_TESS(),\n"); - } else { - if (iNumberOfVertexStreams > 0) { - DbgVshPrintf("\t// NeedPatching: %d\n", bStreamNeedsPatching); - } - DWORD StreamNumber = VshGetVertexStream(*pXboxToken); - DbgVshPrintf("\tD3DVSD_STREAM(%u),\n", StreamNumber); - iNumberOfVertexStreams++; - bStreamNeedsPatching = false; - } - break; - } - case xbox::X_D3DVSD_TOKEN_STREAMDATA: { - if (*pXboxToken & X_D3DVSD_MASK_SKIP) { - WORD SkipCount = (*pXboxToken & X_D3DVSD_SKIPCOUNTMASK) >> X_D3DVSD_SKIPCOUNTSHIFT; - if (*pXboxToken & X_D3DVSD_MASK_SKIPBYTES) { - DbgVshPrintf("\tD3DVSD_SKIPBYTES(%d), /* xbox ext. */\n", SkipCount); - } else { - DbgVshPrintf("\tD3DVSD_SKIP(%d),\n", SkipCount); - } - } else { - DWORD VertexRegister = VshGetVertexRegister(*pXboxToken); - if (IsFixedFunction) { - DbgVshPrintf("\t\tD3DVSD_REG(%s, ", XboxVertexRegisterAsString(VertexRegister)); - } else { - DbgVshPrintf("\t\tD3DVSD_REG(%d, ", (BYTE)VertexRegister); - } - - DWORD XboxVertexElementDataType = (*pXboxToken & X_D3DVSD_DATATYPEMASK) >> X_D3DVSD_DATATYPESHIFT; - switch (XboxVertexElementDataType) { - case xbox::X_D3DVSDT_FLOAT1: // 0x12: - DbgVshPrintf("D3DVSDT_FLOAT1"); - break; - case xbox::X_D3DVSDT_FLOAT2: // 0x22: - DbgVshPrintf("D3DVSDT_FLOAT2"); - break; - case xbox::X_D3DVSDT_FLOAT3: // 0x32: - DbgVshPrintf("D3DVSDT_FLOAT3"); - break; - case xbox::X_D3DVSDT_FLOAT4: // 0x42: - DbgVshPrintf("D3DVSDT_FLOAT4"); - break; - case xbox::X_D3DVSDT_D3DCOLOR: // 0x40: - DbgVshPrintf("D3DVSDT_D3DCOLOR"); - break; - case xbox::X_D3DVSDT_SHORT2: // 0x25: - DbgVshPrintf("D3DVSDT_SHORT2"); - break; - case xbox::X_D3DVSDT_SHORT4: // 0x45: - DbgVshPrintf("D3DVSDT_SHORT4"); - break; - case xbox::X_D3DVSDT_NORMSHORT1: // 0x11: - DbgVshPrintf("D3DVSDT_NORMSHORT1 /* xbox ext. */"); - bStreamNeedsPatching = true; - break; - case xbox::X_D3DVSDT_NORMSHORT2: // 0x21: - if (g_D3DCaps.DeclTypes & D3DDTCAPS_SHORT2N) { - DbgVshPrintf("D3DVSDT_NORMSHORT2"); - } else { - DbgVshPrintf("D3DVSDT_NORMSHORT2 /* xbox ext. */"); - bStreamNeedsPatching = true; - } - break; - case xbox::X_D3DVSDT_NORMSHORT3: // 0x31: - DbgVshPrintf("D3DVSDT_NORMSHORT3 /* xbox ext. */"); - bStreamNeedsPatching = true; - break; - case xbox::X_D3DVSDT_NORMSHORT4: // 0x41: - if (g_D3DCaps.DeclTypes & D3DDTCAPS_SHORT4N) { - DbgVshPrintf("D3DVSDT_NORMSHORT4"); - // No need for patching in D3D9 - } else { - DbgVshPrintf("D3DVSDT_NORMSHORT4 /* xbox ext. */"); - bStreamNeedsPatching = true; - } - break; - case xbox::X_D3DVSDT_NORMPACKED3: // 0x16: - DbgVshPrintf("D3DVSDT_NORMPACKED3 /* xbox ext. */"); - bStreamNeedsPatching = true; - break; - case xbox::X_D3DVSDT_SHORT1: // 0x15: - DbgVshPrintf("D3DVSDT_SHORT1 /* xbox ext. */"); - bStreamNeedsPatching = true; - break; - case xbox::X_D3DVSDT_SHORT3: // 0x35: - DbgVshPrintf("D3DVSDT_SHORT3 /* xbox ext. */"); - bStreamNeedsPatching = true; - break; - case xbox::X_D3DVSDT_PBYTE1: // 0x14: - DbgVshPrintf("D3DVSDT_PBYTE1 /* xbox ext. */"); - bStreamNeedsPatching = true; - break; - case xbox::X_D3DVSDT_PBYTE2: // 0x24: - DbgVshPrintf("D3DVSDT_PBYTE2 /* xbox ext. */"); - bStreamNeedsPatching = true; - break; - case xbox::X_D3DVSDT_PBYTE3: // 0x34: - DbgVshPrintf("D3DVSDT_PBYTE3 /* xbox ext. */"); - bStreamNeedsPatching = true; - break; - case xbox::X_D3DVSDT_PBYTE4: // 0x44: - if (g_D3DCaps.DeclTypes & D3DDTCAPS_UBYTE4N) { - DbgVshPrintf("D3DVSDT_PBYTE4"); - } else { - DbgVshPrintf("D3DVSDT_PBYTE4 /* xbox ext. */"); - bStreamNeedsPatching = true; - } - break; - case xbox::X_D3DVSDT_FLOAT2H: // 0x72: - DbgVshPrintf("D3DVSDT_FLOAT2H /* xbox ext. */"); - bStreamNeedsPatching = true; - break; - case xbox::X_D3DVSDT_NONE: // 0x02: - DbgVshPrintf("D3DVSDT_NONE /* xbox ext. */"); - break; - default: - DbgVshPrintf("Unknown data type for D3DVSD_REG: 0x%02X\n", XboxVertexElementDataType); - break; - } - - DbgVshPrintf("),\n"); - }; - break; - } - case xbox::X_D3DVSD_TOKEN_TESSELLATOR: { - DWORD VertexRegisterOut = VshGetVertexRegister(*pXboxToken); - if (*pXboxToken & X_D3DVSD_MASK_TESSUV) { - DbgVshPrintf("\tD3DVSD_TESSUV(%s),\n", XboxVertexRegisterAsString(VertexRegisterOut)); - } else { // D3DVSD_TESSNORMAL - DWORD VertexRegisterIn = VshGetVertexRegisterIn(*pXboxToken); - DbgVshPrintf("\tD3DVSD_TESSNORMAL(%s, %s),\n", - XboxVertexRegisterAsString(VertexRegisterIn), - XboxVertexRegisterAsString(VertexRegisterOut)); - } - break; - } - case xbox::X_D3DVSD_TOKEN_CONSTMEM: { - DWORD ConstantAddress = (*pXboxToken & X_D3DVSD_CONSTADDRESSMASK) >> X_D3DVSD_CONSTADDRESSSHIFT; - DWORD Count = (*pXboxToken & X_D3DVSD_CONSTCOUNTMASK) >> X_D3DVSD_CONSTCOUNTSHIFT; - DbgVshPrintf("\tD3DVSD_CONST(%d, %d),\n", ConstantAddress, Count); - LOG_TEST_CASE("X_D3DVSD_TOKEN_CONSTMEM"); - Step = Count * 4 + 1; - break; - } - case xbox::X_D3DVSD_TOKEN_EXT: { - DWORD ExtInfo = (*pXboxToken & X_D3DVSD_EXTINFOMASK) >> X_D3DVSD_EXTINFOSHIFT; - DWORD Count = (*pXboxToken & X_D3DVSD_EXTCOUNTMASK) >> X_D3DVSD_EXTCOUNTSHIFT; - DbgVshPrintf("\tD3DVSD_EXT(%d, %d),\n", ExtInfo, Count); - LOG_TEST_CASE("X_D3DVSD_TOKEN_EXT"); - Step = Count * 4 + 1; // TODO : Is this correct? - break; - } + // Does this attribute use no storage present the vertex (check this as early as possible to avoid needless processing) ? + if (XboxVertexElementDataType == xbox::X_D3DVSDT_NONE) { + // Handle tessellating attributes + switch (slot.TessellationType) { + case 0: return false; // AUTONONE + case 1: // AUTONORMAL + // Note : .Stream, .Offset and .Type are copied from pAttributeSlot->TessellationSource in a post-processing step below, + // because these could all go through an Xbox to host conversion step, so must be copied over afterwards. + pCurrentHostVertexElement->Method = D3DDECLMETHOD_CROSSUV; // for D3DVSD_TESSNORMAL + pCurrentHostVertexElement->Usage = D3DDECLUSAGE_NORMAL; // TODO : Is this correct? + pCurrentHostVertexElement->UsageIndex = 0; // Note : 1 would be wrong + return true; + case 2: // AUTOTEXCOORD + // pCurrentHostVertexElement->Stream = 0; // The input stream is unused (but must be set to 0), which is the current default value + // pCurrentHostVertexElement->Offset = 0; // The input offset is unused (but must be set to 0), which is the current default value + pCurrentHostVertexElement->Type = D3DDECLTYPE_UNUSED; // The input type for D3DDECLMETHOD_UV must be D3DDECLTYPE_UNUSED (the output type implied by D3DDECLMETHOD_UV is D3DDECLTYPE_FLOAT2) + pCurrentHostVertexElement->Method = D3DDECLMETHOD_UV; // For X_D3DVSD_MASK_TESSUV + pCurrentHostVertexElement->Usage = D3DDECLUSAGE_NORMAL; // Note : In Fixed Function Vertex Pipeline, D3DDECLMETHOD_UV must specify usage D3DDECLUSAGE_TEXCOORD or D3DDECLUSAGE_BLENDWEIGHT. TODO : So, what to do? + pCurrentHostVertexElement->UsageIndex = 1; // TODO ; Is this correct? + return true; default: - DbgVshPrintf("Unknown token type: %d\n", VshGetTokenType(*pXboxToken)); - break; - } - - pXboxToken += Step; - } - - if (iNumberOfVertexStreams > 0) { - DbgVshPrintf("\t// NeedPatching: %d\n", bStreamNeedsPatching); - } - - DbgVshPrintf("\tD3DVSD_END()\n};\n"); - - DbgVshPrintf("// NbrStreams: %d\n", iNumberOfVertexStreams); - } - - static void VshConvertToken_NOP(DWORD *pXboxToken) - { - if(*pXboxToken != X_D3DVSD_NOP()) - { - LOG_TEST_CASE("Token NOP found, but extra parameters are given!"); - } - } - - static DWORD VshConvertToken_CONSTMEM(DWORD *pXboxToken) - { - // DWORD ConstantAddress = (*pXboxToken & X_D3DVSD_CONSTADDRESSMASK) >> X_D3DVSD_CONSTADDRESSSHIFT; - DWORD Count = (*pXboxToken & X_D3DVSD_CONSTCOUNTMASK) >> X_D3DVSD_CONSTCOUNTSHIFT; - LOG_TEST_CASE("CONST"); // TODO : Implement - return Count * 4 + 1; - } - - void VshConvertToken_TESSELATOR(DWORD *pXboxToken) - { - BYTE Index; - - if(*pXboxToken & X_D3DVSD_MASK_TESSUV) - { - DWORD VertexRegister = VshGetVertexRegister(*pXboxToken); - DWORD NewVertexRegister = VertexRegister; - - NewVertexRegister = Xb2PCRegisterType(VertexRegister, Index); - // TODO : Expand on the setting of this TESSUV register element : - pRecompiled->Usage = D3DDECLUSAGE(NewVertexRegister); - pRecompiled->UsageIndex = Index; - } - else // D3DVSD_TESSNORMAL - { - DWORD VertexRegisterIn = VshGetVertexRegisterIn(*pXboxToken); - DWORD VertexRegisterOut = VshGetVertexRegister(*pXboxToken); - - DWORD NewVertexRegisterIn = VertexRegisterIn; - DWORD NewVertexRegisterOut = VertexRegisterOut; - - NewVertexRegisterIn = Xb2PCRegisterType(VertexRegisterIn, Index); - // TODO : Expand on the setting of this TESSNORMAL input register element : - pRecompiled->Usage = D3DDECLUSAGE(NewVertexRegisterIn); - pRecompiled->UsageIndex = Index; - - NewVertexRegisterOut = Xb2PCRegisterType(VertexRegisterOut, Index); - // TODO : Expand on the setting of this TESSNORMAL output register element : - pRecompiled++; - pRecompiled->Usage = D3DDECLUSAGE(NewVertexRegisterOut); - pRecompiled->UsageIndex = Index; - } - } - - void VshConvertToken_STREAM(DWORD *pXboxToken) - { - // D3DVSD_STREAM_TESS - if(*pXboxToken & X_D3DVSD_STREAMTESSMASK) - { - // TODO - } - else // D3DVSD_STREAM - { - DWORD StreamNumber = VshGetVertexStream(*pXboxToken); - - // new stream - pCurrentVertexShaderStreamInfo = &(pVertexDeclarationToSet->VertexStreams[StreamNumber]); - pCurrentVertexShaderStreamInfo->NeedPatch = FALSE; - pCurrentVertexShaderStreamInfo->DeclPosition = FALSE; - pCurrentVertexShaderStreamInfo->CurrentStreamNumber = 0; - pCurrentVertexShaderStreamInfo->HostVertexStride = 0; - pCurrentVertexShaderStreamInfo->NumberOfVertexElements = 0; - - // Dxbx note : Use Dophin(s), FieldRender, MatrixPaletteSkinning and PersistDisplay as a testcase - - pCurrentVertexShaderStreamInfo->CurrentStreamNumber = VshGetVertexStream(*pXboxToken); - pVertexDeclarationToSet->NumberOfVertexStreams++; - // TODO : Keep a bitmask for all StreamNumber's seen? - } - } - - void VshConvert_RegisterVertexElement( - UINT XboxVertexElementDataType, - UINT XboxVertexElementByteSize, - UINT HostVertexElementByteSize, - BOOL NeedPatching) - { - CxbxVertexShaderStreamElement* pCurrentElement = &(pCurrentVertexShaderStreamInfo->VertexElements[pCurrentVertexShaderStreamInfo->NumberOfVertexElements]); - pCurrentElement->XboxType = XboxVertexElementDataType; - pCurrentElement->XboxByteSize = XboxVertexElementByteSize; - pCurrentElement->HostByteSize = HostVertexElementByteSize; - pCurrentVertexShaderStreamInfo->NumberOfVertexElements++; - pCurrentVertexShaderStreamInfo->NeedPatch |= NeedPatching; - } - - void VshConvert_SkipBytes(int SkipBytesCount) - { - if (SkipBytesCount % sizeof(DWORD)) { - LOG_TEST_CASE("D3DVSD_SKIPBYTES not divisble by 4!"); - } -#if 0 // Potential optimization, for now disabled for simplicity : - else { - // Skip size is a whole multiple of 4 bytes; - // Is stream patching not needed up until this element? - if (!pCurrentVertexShaderStreamInfo->NeedPatch) { - // Then we can get away with increasing the host stride, - // which avoids otherwise needless vertex buffer patching : - pCurrentVertexShaderStreamInfo->HostVertexStride += SkipBytesCount; - return; + LOG_TEST_CASE("invalid TessellationType"); + return false; } } -#endif - // Register a 'skip' element, so that Xbox data will be skipped - // without increasing host stride - this does require patching : - VshConvert_RegisterVertexElement(xbox::X_D3DVSDT_NONE, SkipBytesCount, /*HostSize=*/0, /*NeedPatching=*/TRUE); - } - - void VshConvertToken_STREAMDATA_SKIP(DWORD *pXboxToken) - { - WORD SkipCount = (*pXboxToken & X_D3DVSD_SKIPCOUNTMASK) >> X_D3DVSD_SKIPCOUNTSHIFT; - VshConvert_SkipBytes(SkipCount * sizeof(DWORD)); - } - - void VshConvertToken_STREAMDATA_SKIPBYTES(DWORD* pXboxToken) - { - WORD SkipBytesCount = (*pXboxToken & X_D3DVSD_SKIPCOUNTMASK) >> X_D3DVSD_SKIPCOUNTSHIFT; - VshConvert_SkipBytes(SkipBytesCount); - } - - void VshConvertToken_STREAMDATA_REG(DWORD *pXboxToken) - { - DWORD VertexRegister = VshGetVertexRegister(*pXboxToken); - BOOL NeedPatching = FALSE; - BYTE Index; - BYTE HostVertexRegisterType; - - if (IsFixedFunction) { - HostVertexRegisterType = Xb2PCRegisterType(VertexRegister, Index); - } else { - // D3DDECLUSAGE_TEXCOORD can be useds for any user-defined data - // We need this because there is no reliable way to detect the real usage - // Xbox has no concept of 'usage types', it only requires a list of attribute register numbers. - // So we treat them all as 'user-defined' with an Index of the Vertex Register Index - // this prevents information loss in shaders due to non-matching dcl types! - HostVertexRegisterType = D3DDECLUSAGE_TEXCOORD; - Index = (BYTE)VertexRegister; - } - - // Add this register to the list of declared registers - RegVIsPresentInDeclaration[VertexRegister] = true; - - DWORD XboxVertexElementDataType = (*pXboxToken & X_D3DVSD_DATATYPEMASK) >> X_D3DVSD_DATATYPESHIFT; - WORD XboxVertexElementByteSize = 0; + WORD XboxVertexElementByteSize = 0; // When set above 0, implies NeedPatching BYTE HostVertexElementDataType = 0; WORD HostVertexElementByteSize = 0; - switch (XboxVertexElementDataType) - { + switch (XboxVertexElementDataType) { case xbox::X_D3DVSDT_FLOAT1: // 0x12: HostVertexElementDataType = D3DDECLTYPE_FLOAT1; HostVertexElementByteSize = 1 * sizeof(FLOAT); @@ -867,112 +744,88 @@ private: if (g_D3DCaps.DeclTypes & D3DDTCAPS_SHORT2N) { HostVertexElementDataType = D3DDECLTYPE_SHORT2N; HostVertexElementByteSize = 2 * sizeof(SHORT); - } - else - { + } else { HostVertexElementDataType = D3DDECLTYPE_FLOAT1; HostVertexElementByteSize = 1 * sizeof(FLOAT); } XboxVertexElementByteSize = 1 * sizeof(xbox::short_xt); - NeedPatching = TRUE; break; case xbox::X_D3DVSDT_NORMSHORT2: // 0x21: if (g_D3DCaps.DeclTypes & D3DDTCAPS_SHORT2N) { HostVertexElementDataType = D3DDECLTYPE_SHORT2N; HostVertexElementByteSize = 2 * sizeof(SHORT); // No need for patching in D3D9 - } - else - { + } else { HostVertexElementDataType = D3DDECLTYPE_FLOAT2; HostVertexElementByteSize = 2 * sizeof(FLOAT); XboxVertexElementByteSize = 2 * sizeof(xbox::short_xt); - NeedPatching = TRUE; } break; case xbox::X_D3DVSDT_NORMSHORT3: // 0x31: if (g_D3DCaps.DeclTypes & D3DDTCAPS_SHORT4N) { HostVertexElementDataType = D3DDECLTYPE_SHORT4N; HostVertexElementByteSize = 4 * sizeof(SHORT); - } - else - { + } else { HostVertexElementDataType = D3DDECLTYPE_FLOAT3; HostVertexElementByteSize = 3 * sizeof(FLOAT); } XboxVertexElementByteSize = 3 * sizeof(xbox::short_xt); - NeedPatching = TRUE; break; case xbox::X_D3DVSDT_NORMSHORT4: // 0x41: if (g_D3DCaps.DeclTypes & D3DDTCAPS_SHORT4N) { HostVertexElementDataType = D3DDECLTYPE_SHORT4N; HostVertexElementByteSize = 4 * sizeof(SHORT); // No need for patching in D3D9 - } - else - { + } else { HostVertexElementDataType = D3DDECLTYPE_FLOAT4; HostVertexElementByteSize = 4 * sizeof(FLOAT); XboxVertexElementByteSize = 4 * sizeof(xbox::short_xt); - NeedPatching = TRUE; } break; case xbox::X_D3DVSDT_NORMPACKED3: // 0x16: HostVertexElementDataType = D3DDECLTYPE_FLOAT3; HostVertexElementByteSize = 3 * sizeof(FLOAT); XboxVertexElementByteSize = 1 * sizeof(xbox::dword_xt); - NeedPatching = TRUE; break; case xbox::X_D3DVSDT_SHORT1: // 0x15: HostVertexElementDataType = D3DDECLTYPE_SHORT2; HostVertexElementByteSize = 2 * sizeof(SHORT); XboxVertexElementByteSize = 1 * sizeof(xbox::short_xt); - NeedPatching = TRUE; break; case xbox::X_D3DVSDT_SHORT3: // 0x35: HostVertexElementDataType = D3DDECLTYPE_SHORT4; HostVertexElementByteSize = 4 * sizeof(SHORT); XboxVertexElementByteSize = 3 * sizeof(xbox::short_xt); - NeedPatching = TRUE; break; case xbox::X_D3DVSDT_PBYTE1: // 0x14: if (g_D3DCaps.DeclTypes & D3DDTCAPS_UBYTE4N) { HostVertexElementDataType = D3DDECLTYPE_UBYTE4N; HostVertexElementByteSize = 4 * sizeof(BYTE); - } - else - { + } else { HostVertexElementDataType = D3DDECLTYPE_FLOAT1; HostVertexElementByteSize = 1 * sizeof(FLOAT); } XboxVertexElementByteSize = 1 * sizeof(xbox::byte_xt); - NeedPatching = TRUE; break; case xbox::X_D3DVSDT_PBYTE2: // 0x24: if (g_D3DCaps.DeclTypes & D3DDTCAPS_UBYTE4N) { HostVertexElementDataType = D3DDECLTYPE_UBYTE4N; HostVertexElementByteSize = 4 * sizeof(BYTE); - } - else - { + } else { HostVertexElementDataType = D3DDECLTYPE_FLOAT2; HostVertexElementByteSize = 2 * sizeof(FLOAT); } XboxVertexElementByteSize = 2 * sizeof(xbox::byte_xt); - NeedPatching = TRUE; break; case xbox::X_D3DVSDT_PBYTE3: // 0x34: if (g_D3DCaps.DeclTypes & D3DDTCAPS_UBYTE4N) { HostVertexElementDataType = D3DDECLTYPE_UBYTE4N; HostVertexElementByteSize = 4 * sizeof(BYTE); - } - else - { + } else { HostVertexElementDataType = D3DDECLTYPE_FLOAT3; HostVertexElementByteSize = 3 * sizeof(FLOAT); } XboxVertexElementByteSize = 3 * sizeof(xbox::byte_xt); - NeedPatching = TRUE; break; case xbox::X_D3DVSDT_PBYTE4: // 0x44: // Test-case : Panzer @@ -980,225 +833,180 @@ private: HostVertexElementDataType = D3DDECLTYPE_UBYTE4N; HostVertexElementByteSize = 4 * sizeof(BYTE); // No need for patching when D3D9 supports D3DDECLTYPE_UBYTE4N - } - else - { + } else { HostVertexElementDataType = D3DDECLTYPE_FLOAT4; HostVertexElementByteSize = 4 * sizeof(FLOAT); XboxVertexElementByteSize = 4 * sizeof(xbox::byte_xt); - NeedPatching = TRUE; } break; case xbox::X_D3DVSDT_FLOAT2H: // 0x72: HostVertexElementDataType = D3DDECLTYPE_FLOAT4; HostVertexElementByteSize = 4 * sizeof(FLOAT); XboxVertexElementByteSize = 3 * sizeof(FLOAT); - NeedPatching = TRUE; break; case xbox::X_D3DVSDT_NONE: // 0x02: - // No host element data, so no patching + assert(false); // Should already be handled above break; default: - //LOG_TEST_CASE("Unknown data type for D3DVSD_REG: 0x%02X\n", XboxVertexElementDataType); - break; + LOG_TEST_CASE("Unknown data type for D3DVSD_REG"); // TODO : Add : 0x % 02X\n", XboxVertexElementDataType); + return false; } - // On X_D3DVSDT_NONE skip this token - if (XboxVertexElementDataType == xbox::X_D3DVSDT_NONE) - { - // Xbox elements with X_D3DVSDT_NONE have size zero, so there's no need to register those. - // Note, that for skip tokens, we DO call VshConvert_RegisterVertexElement with a X_D3DVSDT_NONE! - return; + assert(HostVertexElementDataType > 0); + assert(HostVertexElementByteSize > 0); + + // Select new stream, if needed + if ((pCurrentVertexShaderStreamInfo == nullptr) + || (pCurrentVertexShaderStreamInfo->XboxStreamIndex != slot.StreamIndex)) { + assert(slot.StreamIndex < X_VSH_MAX_STREAMS); + assert(pCurrentVertexDeclaration->NumberOfVertexStreams < X_VSH_MAX_STREAMS); + + pCurrentVertexShaderStreamInfo = + &(pCurrentVertexDeclaration->VertexStreams[ + pCurrentVertexDeclaration->NumberOfVertexStreams++]); + pCurrentVertexShaderStreamInfo->NeedPatch = FALSE; + pCurrentVertexShaderStreamInfo->XboxStreamIndex = (WORD)slot.StreamIndex; + pCurrentVertexShaderStreamInfo->HostVertexStride = (WORD)slot.Offset; + pCurrentVertexShaderStreamInfo->NumberOfVertexElements = 0; + // Dxbx note : Use Dophin(s), FieldRender, MatrixPaletteSkinning and PersistDisplay as a testcase } // save patching information - VshConvert_RegisterVertexElement( - XboxVertexElementDataType, - NeedPatching ? XboxVertexElementByteSize : HostVertexElementByteSize, - HostVertexElementByteSize, - NeedPatching); + assert(pCurrentVertexShaderStreamInfo->NumberOfVertexElements < X_VSH_MAX_ATTRIBUTES); - pRecompiled->Stream = pCurrentVertexShaderStreamInfo->CurrentStreamNumber; - pRecompiled->Offset = pCurrentVertexShaderStreamInfo->HostVertexStride; - pRecompiled->Type = HostVertexElementDataType; - pRecompiled->Method = D3DDECLMETHOD_DEFAULT; - pRecompiled->Usage = HostVertexRegisterType; - pRecompiled->UsageIndex = Index; + CxbxVertexShaderStreamElement* pCurrentVertexShaderStreamElementInfo = + &(pCurrentVertexShaderStreamInfo->VertexElements[ + pCurrentVertexShaderStreamInfo->NumberOfVertexElements++]); - pRecompiled++; + if (XboxVertexElementByteSize == 0) { + XboxVertexElementByteSize = HostVertexElementByteSize; + } else { + pCurrentVertexShaderStreamInfo->NeedPatch |= TRUE; + } + + pCurrentVertexShaderStreamElementInfo->XboxType = XboxVertexElementDataType; + pCurrentVertexShaderStreamElementInfo->XboxByteSize = XboxVertexElementByteSize; + pCurrentVertexShaderStreamElementInfo->HostDataType = HostVertexElementDataType; + pCurrentVertexShaderStreamElementInfo->HostByteSize = HostVertexElementByteSize; + + // Convert to host vertex element + pCurrentHostVertexElement->Stream = pCurrentVertexShaderStreamInfo->XboxStreamIndex; // Use Xbox stream index on host + // FIXME Don't assume vertex elements are contiguous! + pCurrentHostVertexElement->Offset = pCurrentVertexShaderStreamInfo->HostVertexStride; + pCurrentHostVertexElement->Type = pCurrentVertexShaderStreamElementInfo->HostDataType; + pCurrentHostVertexElement->Method = D3DDECLMETHOD_DEFAULT; + if (IsFixedFunction) { + pCurrentHostVertexElement->Usage = Xb2PCRegisterType(VertexRegister, /*&*/pCurrentHostVertexElement->UsageIndex); + } + else { + // D3DDECLUSAGE_TEXCOORD can be useds for any user-defined data + // We need this because there is no reliable way to detect the real usage + // Xbox has no concept of 'usage types', it only requires a list of attribute register numbers. + // So we treat them all as 'user-defined' with an Index of the Vertex Register Index + // this prevents information loss in shaders due to non-matching dcl types! + pCurrentHostVertexElement->Usage = D3DDECLUSAGE_TEXCOORD; + pCurrentHostVertexElement->UsageIndex = (BYTE)VertexRegister; + } pCurrentVertexShaderStreamInfo->HostVertexStride += HostVertexElementByteSize; - } - void VshConvertToken_STREAMDATA(DWORD *pXboxToken) - { - if (*pXboxToken & X_D3DVSD_MASK_SKIP) - { - // For D3D9, use D3DDECLTYPE_UNUSED ? - if (*pXboxToken & X_D3DVSD_MASK_SKIPBYTES) { - VshConvertToken_STREAMDATA_SKIPBYTES(pXboxToken); - } else { - VshConvertToken_STREAMDATA_SKIP(pXboxToken); - } - } - else // D3DVSD_REG - { - VshConvertToken_STREAMDATA_REG(pXboxToken); - } - } - - DWORD VshRecompileToken(DWORD *pXboxToken) - { - DWORD Step = 1; - - switch(VshGetTokenType(*pXboxToken)) - { - case xbox::X_D3DVSD_TOKEN_NOP: - VshConvertToken_NOP(pXboxToken); - break; - case xbox::X_D3DVSD_TOKEN_STREAM: - { - VshConvertToken_STREAM(pXboxToken); - break; - } - case xbox::X_D3DVSD_TOKEN_STREAMDATA: - { - VshConvertToken_STREAMDATA(pXboxToken); - break; - } - case xbox::X_D3DVSD_TOKEN_TESSELLATOR: - { - VshConvertToken_TESSELATOR(pXboxToken); - break; - } - case xbox::X_D3DVSD_TOKEN_CONSTMEM: - { - Step = VshConvertToken_CONSTMEM(pXboxToken); - break; - } - default: - //LOG_TEST_CASE("Unknown token type: %d\n", VshGetTokenType(*pXboxToken)); - break; - } - - return Step; - } - - static DWORD* RemoveXboxDeclarationRedefinition(DWORD* pXboxDeclaration) - { - // Detect and remove register redefinitions by preprocessing the Xbox Vertex Declaration - // Test Case: King Kong - - // Find the last token - DWORD* pXboxToken = pXboxDeclaration; - while (*pXboxToken != X_D3DVSD_END()){ - pXboxToken++; - } - - // Operate on a copy of the Xbox declaration, rather than messing with the Xbox's memory - auto declarationBytes = sizeof(DWORD) * (pXboxToken - pXboxDeclaration + 1); - auto pXboxDeclarationCopy = (DWORD*)malloc(declarationBytes); - memcpy(pXboxDeclarationCopy, pXboxDeclaration, declarationBytes); - pXboxToken = pXboxDeclarationCopy + (pXboxToken - pXboxDeclaration); // Move to end of the copy - - // Remember if we've seen a given output register - std::bitset<16> seen; - - // We want to keep later definitions, and remove earlier ones - // Scan back from the end of the declaration, and replace redefinitions with nops - while (pXboxToken > pXboxDeclarationCopy) { - auto type = VshGetTokenType(*pXboxToken); - if (type == xbox::X_D3DVSD_TOKEN_STREAMDATA && !(*pXboxToken & X_D3DVSD_MASK_SKIP) || - type == xbox::X_D3DVSD_TOKEN_TESSELLATOR) - { - auto outputRegister = VshGetVertexRegister(*pXboxToken); - if (seen[outputRegister]) - { - // Blank out tokens for mapped registers - *pXboxToken = X_D3DVSD_NOP(); - EmuLog(LOG_LEVEL::DEBUG, "Replacing duplicate definition of register %d with D3DVSD_NOP", outputRegister); - } - else - { - // Mark register as seen - seen[outputRegister] = true; - } - } - - pXboxToken--; - } - - return pXboxDeclarationCopy; + return true; } public: - D3DVERTEXELEMENT *Convert(DWORD* pXboxDeclaration, bool bIsFixedFunction, CxbxVertexDeclaration* pCxbxVertexDeclaration) + D3DVERTEXELEMENT* Convert(xbox::X_VERTEXATTRIBUTEFORMAT* pXboxDeclaration, bool bIsFixedFunction, CxbxVertexDeclaration* pCxbxVertexDeclaration) { // Get a preprocessed copy of the original Xbox Vertex Declaration - auto pXboxVertexDeclarationCopy = RemoveXboxDeclarationRedefinition(pXboxDeclaration); - - pVertexDeclarationToSet = pCxbxVertexDeclaration; + pCurrentVertexDeclaration = pCxbxVertexDeclaration; IsFixedFunction = bIsFixedFunction; RegVIsPresentInDeclaration.fill(false); - // First of all some info: - // We have to figure out which flags are set and then - // we have to patch their params + // Mapping between Xbox register and the resulting host vertex element + D3DVERTEXELEMENT* HostVertexElementPerRegister[X_VSH_MAX_ATTRIBUTES] = { 0 }; - // some token values - // 0xFFFFFFFF - end of the declaration - // 0x00000000 - nop (means that this value is ignored) + // For Direct3D9, we need to reserve the maximum number of elements, plus one token for D3DDECL_END : + unsigned HostDeclarationSize = (X_VSH_MAX_ATTRIBUTES + 1) * sizeof(D3DVERTEXELEMENT); - // Calculate size of declaration - XboxDeclarationCount = VshGetDeclarationCount(pXboxVertexDeclarationCopy); - // For Direct3D9, we need to reserve at least twice the number of elements, as one token can generate two registers (in and out) : - unsigned HostDeclarationSize = XboxDeclarationCount * sizeof(D3DVERTEXELEMENT) * 2; - - D3DVERTEXELEMENT *Result = (D3DVERTEXELEMENT *)calloc(1, HostDeclarationSize); - pRecompiled = Result; - uint8_t *pRecompiledBufferOverflow = ((uint8_t*)pRecompiled) + HostDeclarationSize; + D3DVERTEXELEMENT* HostVertexElements = (D3DVERTEXELEMENT*)calloc(1, HostDeclarationSize); + pCurrentHostVertexElement = HostVertexElements; - VshDumpXboxDeclaration(pXboxDeclaration); + std::array orderedRegisterIndices; + for (byte i = 0; i < orderedRegisterIndices.size(); i++) + orderedRegisterIndices[i] = i; - auto pXboxToken = pXboxVertexDeclarationCopy; - while (*pXboxToken != X_D3DVSD_END()) - { - if ((uint8_t*)pRecompiled >= pRecompiledBufferOverflow) { - DbgVshPrintf("Detected buffer-overflow, breaking out...\n"); - break; + // Make sure we convert registers in order of offset, per stream + // TODO fix elements with identical positions? + std::sort(orderedRegisterIndices.begin(), orderedRegisterIndices.end(), + [pXboxDeclaration](const auto& x, const auto& y) + { + auto regX = pXboxDeclaration->Slots[x]; + auto regY = pXboxDeclaration->Slots[y]; + return std::tie(regX.StreamIndex, regX.Offset) + < std::tie(regY.StreamIndex, regY.Offset); + }); + + EmuLog(LOG_LEVEL::DEBUG, "Parsing vertex declaration"); + for (size_t i = 0; i < orderedRegisterIndices.size(); i++) { + auto regIndex = orderedRegisterIndices[i]; + auto &slot = pXboxDeclaration->Slots[regIndex]; + if (slot.Format > xbox::X_D3DVSDT_NONE) { + // Set Direct3D9 vertex element (declaration) members : + if (VshConvertToken_STREAMDATA_REG(regIndex, slot)) { + // Add this register to the list of declared registers + RegVIsPresentInDeclaration[regIndex] = true; + // Remember a pointer to this register + HostVertexElementPerRegister[regIndex] = pCurrentHostVertexElement; + pCurrentHostVertexElement++; + + EmuLog(LOG_LEVEL::DEBUG, "\tXbox Stream %d, Offset %d, Format %d, Slot %d", + slot.StreamIndex, slot.Offset, slot.Format, regIndex); + EmuLog(LOG_LEVEL::DEBUG, "\tHost Stream %d, Offset %d, Format %d, Usage %d-%d", + pCurrentHostVertexElement->Stream, pCurrentHostVertexElement->Offset, pCurrentHostVertexElement->Type, pCurrentHostVertexElement->Usage, pCurrentHostVertexElement->UsageIndex); + } } - - DWORD Step = VshRecompileToken(pXboxToken); - pXboxToken += Step; } - *pRecompiled = D3DDECL_END(); + *pCurrentHostVertexElement = D3DDECL_END(); + + // Post-process host vertex elements that have a D3DDECLMETHOD_CROSSUV method : + for (int AttributeIndex = 0; AttributeIndex < X_VSH_MAX_ATTRIBUTES; AttributeIndex++) { + auto pHostElement = HostVertexElementPerRegister[AttributeIndex]; + if (pHostElement == nullptr) continue; + if (pHostElement->Method == D3DDECLMETHOD_CROSSUV) { + int TessellationSource = pXboxDeclaration->Slots[AttributeIndex].TessellationSource; + auto pSourceElement = HostVertexElementPerRegister[TessellationSource]; + // Copy over the Stream, Offset and Type of the host vertex element that serves as 'TessellationSource' : + pHostElement->Stream = pSourceElement->Stream; + pHostElement->Offset = pSourceElement->Offset; + pHostElement->Type = pSourceElement->Type; + // Note, the input type for D3DDECLMETHOD_CROSSUV can be D3DDECLTYPE_FLOAT[43], D3DDECLTYPE_D3DCOLOR, D3DDECLTYPE_UBYTE4, or D3DDECLTYPE_SHORT4 + // (the output type implied by D3DDECLMETHOD_CROSSUV is D3DDECLTYPE_FLOAT3). + // TODO : Should we assert this? + } + } // Ensure valid ordering of the vertex declaration (http://doc.51windows.net/Directx9_SDK/graphics/programmingguide/gettingstarted/vertexdeclaration/vertexdeclaration.htm) // In particular "All vertex elements for a stream must be consecutive and sorted by offset" // Test case: King Kong (due to register redefinition) - std::sort(Result, pRecompiled, [] (const auto& x, const auto& y) + // Note : Xbox slots might use non-ordered stream indices, so we can't rely on the output ordering of our converted elements! + std::sort(/*First=*/HostVertexElements, /*Last=*/pCurrentHostVertexElement, /*Pred=*/[] (const auto& x, const auto& y) { return std::tie(x.Stream, x.Method, x.Offset) < std::tie(y.Stream, y.Method, y.Offset); }); - // Free the preprocessed declaration copy - free(pXboxVertexDeclarationCopy); - // Record which registers are in the vertex declaration for (size_t i = 0; i < RegVIsPresentInDeclaration.size(); i++) { pCxbxVertexDeclaration->vRegisterInDeclaration[i] = RegVIsPresentInDeclaration[i]; } - return Result; + return HostVertexElements; } }; D3DVERTEXELEMENT *EmuRecompileVshDeclaration ( - DWORD *pXboxDeclaration, + xbox::X_VERTEXATTRIBUTEFORMAT* pXboxDeclaration, bool bIsFixedFunction, - DWORD *pXboxDeclarationCount, CxbxVertexDeclaration *pCxbxVertexDeclaration ) { @@ -1206,224 +1014,329 @@ D3DVERTEXELEMENT *EmuRecompileVshDeclaration D3DVERTEXELEMENT* pHostVertexElements = Converter.Convert(pXboxDeclaration, bIsFixedFunction, pCxbxVertexDeclaration); - *pXboxDeclarationCount = Converter.XboxDeclarationCount; - return pHostVertexElements; } -extern void FreeVertexDynamicPatch(CxbxVertexShader *pVertexShader) +static bool FreeCxbxVertexDeclaration(CxbxVertexDeclaration *pCxbxVertexDeclaration) { - pVertexShader->Declaration.NumberOfVertexStreams = 0; + LOG_INIT; // Allows use of DEBUG_D3DRESULT + + if (pCxbxVertexDeclaration) { + if (pCxbxVertexDeclaration->pHostVertexDeclaration) { + HRESULT hRet = pCxbxVertexDeclaration->pHostVertexDeclaration->Release(); + DEBUG_D3DRESULT(hRet, "g_pD3DDevice->DeleteVertexShader(pHostVertexDeclaration)"); + } + free(pCxbxVertexDeclaration); + return true; + } + + return false; } -// Checks for failed vertex shaders, and shaders that would need patching -boolean VshHandleIsValidShader(DWORD XboxVertexShaderHandle) -{ -#if 0 - //printf( "VS = 0x%.08X\n", XboxVertexShaderHandle ); - - CxbxVertexShader *pCxbxVertexShader = GetCxbxVertexShader(XboxVertexShaderHandle); - if (pCxbxVertexShader) { - if (pCxbxVertexShader->XboxStatus != 0) - { - return FALSE; - } - /* - for (uint32 i = 0; i < pCxbxVertexShader->VertexShaderInfo.NumberOfVertexStreams; i++) - { - if (pCxbxVertexShader->VertexShaderInfo.VertexStreams[i].NeedPatch) - { - // Just for caching purposes - pCxbxVertexShader->XboxStatus = 0x80000001; - return FALSE; - } - } - */ - } -#endif - return TRUE; -} - -extern boolean IsValidCurrentShader(void) -{ - // Dxbx addition : There's no need to call - // XTL_EmuIDirect3DDevice_GetVertexShader, just check g_Xbox_VertexShader_Handle : - return VshHandleIsValidShader(g_Xbox_VertexShader_Handle); -} - -// Vertex shader state -static DWORD g_CxbxVertexShaderSlotAddress = 0; -static DWORD g_CxbxVertexShaderSlots[X_VSH_MAX_INSTRUCTION_COUNT * X_VSH_INSTRUCTION_SIZE] = { 0 }; - -DWORD* GetCxbxVertexShaderSlotPtr(const DWORD SlotIndexAddress) +xbox::dword_xt* GetCxbxVertexShaderSlotPtr(const DWORD SlotIndexAddress) { if (SlotIndexAddress < X_VSH_MAX_INSTRUCTION_COUNT) { - return &g_CxbxVertexShaderSlots[SlotIndexAddress * X_VSH_INSTRUCTION_SIZE]; + return &g_Xbox_VertexShader_FunctionSlots[SlotIndexAddress * X_VSH_INSTRUCTION_SIZE]; } else { LOG_TEST_CASE("SlotIndexAddress out of range"); // FIXME : extend with value (once supported by LOG_TEST_CASE) return nullptr; } } -CxbxVertexDeclaration *GetCxbxVertexDeclaration(DWORD XboxVertexShaderHandle) +VertexDeclarationKey GetXboxVertexAttributesKey(xbox::X_VERTEXATTRIBUTEFORMAT* pXboxVertexAttributeFormat) { - CxbxVertexShader *pCxbxVertexShader = GetCxbxVertexShader(XboxVertexShaderHandle); - - for (uint32_t i = 0; i < pCxbxVertexShader->Declaration.NumberOfVertexStreams; i++) - { - if (pCxbxVertexShader->Declaration.VertexStreams[i].NeedPatch) - { - return &pCxbxVertexShader->Declaration; - } - } - return nullptr; + auto attributeHash = ComputeHash((void*)pXboxVertexAttributeFormat, sizeof(xbox::X_VERTEXATTRIBUTEFORMAT)); + // For now, we use different declarations depending on if the fixed function pipeline + // is in use, even if the attributes are the same + return attributeHash ^ (VertexDeclarationKey)g_Xbox_VertexShader_IsFixedFunction; } -std::unordered_map g_CxbxVertexShaders; +std::unordered_map g_CxbxVertexDeclarations; -CxbxVertexShader* GetCxbxVertexShader(DWORD XboxVertexShaderHandle) +void RegisterCxbxVertexDeclaration(VertexDeclarationKey CacheKey, CxbxVertexDeclaration* pCxbxVertexDeclaration) { - if (VshHandleIsVertexShader(XboxVertexShaderHandle)) { - auto it = g_CxbxVertexShaders.find(XboxVertexShaderHandle); - if (it != g_CxbxVertexShaders.end()) { - return it->second; - } + auto it = g_CxbxVertexDeclarations.find(CacheKey); + if (it != g_CxbxVertexDeclarations.end() && it->second != nullptr) { + LOG_TEST_CASE("Overwriting existing Vertex Declaration"); + FreeCxbxVertexDeclaration(it->second); // Avoid memory leak + } + + g_CxbxVertexDeclarations[CacheKey] = pCxbxVertexDeclaration; +} + +CxbxVertexDeclaration* FetchCachedCxbxVertexDeclaration(VertexDeclarationKey CacheKey) +{ + auto it = g_CxbxVertexDeclarations.find(CacheKey); + if (it != g_CxbxVertexDeclarations.end()) { + return it->second; } return nullptr; } -void RegisterCxbxVertexShader(DWORD XboxVertexShaderHandle, CxbxVertexShader* shader) +IDirect3DVertexDeclaration* CxbxCreateHostVertexDeclaration(D3DVERTEXELEMENT *pDeclaration) { - auto it = g_CxbxVertexShaders.find(XboxVertexShaderHandle); - if (it != g_CxbxVertexShaders.end() && it->second != nullptr && shader != nullptr) { - LOG_TEST_CASE("Overwriting existing Vertex Shader"); - } + LOG_INIT; // Allows use of DEBUG_D3DRESULT - g_CxbxVertexShaders[XboxVertexShaderHandle] = shader; + IDirect3DVertexDeclaration* pHostVertexDeclaration = nullptr; + HRESULT hRet = g_pD3DDevice->CreateVertexDeclaration(pDeclaration, &pHostVertexDeclaration); + DEBUG_D3DRESULT(hRet, "g_pD3DDevice->CreateVertexDeclaration"); + + return pHostVertexDeclaration; } -void SetCxbxVertexDeclaration(CxbxVertexDeclaration& pCxbxVertexDeclaration) { - LOG_INIT +static IDirect3DVertexShader* passthroughshader; +void CxbxUpdateHostVertexShader() +{ + extern bool g_bUsePassthroughHLSL; // TMP glue - HRESULT hRet; + // TODO Call this when state is dirty + // Rather than every time state changes - // Set vertex declaration - hRet = g_pD3DDevice->SetVertexDeclaration(pCxbxVertexDeclaration.pHostVertexDeclaration); - DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexDeclaration"); + LOG_INIT; // Allows use of DEBUG_D3DRESULT + + if (g_Xbox_VertexShader_IsFixedFunction) { + HRESULT hRet = g_pD3DDevice->SetVertexShader(nullptr); + DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexShader"); + // TODO : Once available, start using host Fixed Function HLSL shader + // instead of using deprecated host fixed function (by setting a null + // vertex shader). + // As for the required host vertex declaration : + // CxbxUpdateHostVertexDeclaration already been + // called, which sets host vertex declaration based on the + // declaration that XboxVertexShaderFromFVF generated. + } + else if (g_Xbox_VertexShader_IsPassthrough && g_bUsePassthroughHLSL) { + if (passthroughshader == nullptr) { + ID3DBlob* pBlob = nullptr; + EmuCompileXboxPassthrough(&pBlob); + if (pBlob) { + g_pD3DDevice->CreateVertexShader((DWORD*)pBlob->GetBufferPointer(), &passthroughshader); + } + } + + HRESULT hRet = g_pD3DDevice->SetVertexShader(passthroughshader); + } + else { + auto pTokens = GetCxbxVertexShaderSlotPtr(g_Xbox_VertexShader_FunctionSlots_StartAddress); + assert(pTokens); + // Create a vertex shader from the tokens + DWORD shaderSize; + auto VertexShaderKey = g_VertexShaderSource.CreateShader(pTokens, &shaderSize); + IDirect3DVertexShader* pHostVertexShader = g_VertexShaderSource.GetShader(VertexShaderKey); + HRESULT hRet = g_pD3DDevice->SetVertexShader(pHostVertexShader); + DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexShader"); + } +} + +void CxbxSetVertexShaderSlots(DWORD* pTokens, DWORD Address, DWORD NrInstructions) +{ + int upToSlot = Address + NrInstructions; + if (upToSlot > X_VSH_MAX_INSTRUCTION_COUNT) { + LOG_TEST_CASE("Shader does not fit in vertex shader slots"); + return; + } + + auto CxbxVertexShaderSlotPtr = GetCxbxVertexShaderSlotPtr(Address); + if (CxbxVertexShaderSlotPtr == nullptr) { + return; + } + + memcpy(CxbxVertexShaderSlotPtr, pTokens, NrInstructions * X_VSH_INSTRUCTION_SIZE_BYTES); + + // Make sure slot parsing in EmuParseVshFunction (VshConvertToIntermediate) stops after the last slot; + // Just setting bit 0 in 3rd DWORD suffices (see XboxVertexShaderDecoder.VshGetField.FieldMapping[FLD_FINAL]) : + g_Xbox_VertexShader_FunctionSlots[(X_VSH_MAX_INSTRUCTION_COUNT * X_VSH_INSTRUCTION_SIZE) + 3] = 1; +} + +static void CxbxSetVertexShaderPassthroughProgram() +{ + static DWORD XboxShaderBinaryPassthrough[] = { + 0, 0x0020001B, 0x0836106C, 0x2F100FF8, + 0, 0x0420061B, 0x083613FC, 0x5011F818, + 0, 0x002008FF, 0x0836106C, 0x2070F828, + 0, 0x0240081B, 0x1436186C, 0x2F20F824, + 0, 0x0060201B, 0x2436106C, 0x3070F800, + 0, 0x00200200, 0x0836106C, 0x2070F830, + 0, 0x00200E1B, 0x0836106C, 0x2070F838, + 0, 0x0020101B, 0x0836106C, 0x2070F840, + 0, 0x0020121B, 0x0836106C, 0x2070F848, + 0, 0x0020141B, 0x0836106C, 0x2070F850, + 0, 0x0020161B, 0x0836106C, 0x2070F858, + 0, 0x0020181B, 0x0836106C, 0x2070F861 // FLD_FINAL is set here! + }; + + // LOG_TEST_CASE("Setting Xbox passthrough shader"); + // Test cases : Many XDK samples & games + + // TODO : Xbox uses three variants; + // one for FOGTABLEMODE NONE + // one for FOGSOURCEZ + // one for WFOG + + CxbxSetVertexShaderSlots(&XboxShaderBinaryPassthrough[0], 0, sizeof(XboxShaderBinaryPassthrough) / X_VSH_INSTRUCTION_SIZE_BYTES); + + extern float g_ZScale; // TMP glue + extern float GetMultiSampleOffsetDelta(); // TMP glue + + // Passthrough programs require scale and offset to be set in constants zero and one (both minus 96) + // (Note, these are different from GetMultiSampleOffsetAndScale / GetViewPortOffsetAndScale) + float scale[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; + float offset[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; +#if 0 // Based on (regular) BeginPush XDK and (multisampled) AntiAlias samples, scale and offset should just use above defaults, with both render scale factor 1, but also 2 and higher. + scale[0] = (float)g_RenderScaleFactor; + scale[1] = (float)g_RenderScaleFactor; + scale[2] = 1.0f; // Passthrough should not scale Z (so don't use g_ZScale) + scale[3] = 1.0f; + + float MultiSampleBias = 0.0f; + if (XboxRenderStates.GetXboxRenderState(xbox::X_D3DRS_MULTISAMPLEANTIALIAS) > 0) { + MultiSampleBias = GetMultiSampleOffsetDelta(); + } + + offset[0] = g_Xbox_ScreenSpaceOffset_x - MultiSampleBias; + offset[1] = g_Xbox_ScreenSpaceOffset_y - MultiSampleBias; + offset[2] = 0.0f; + offset[3] = 0.0f; +#endif + + // Test-case : XDK Ripple sample + + // TODO : Apparently, offset and scale are swapped in some XDK versions, but which? + CxbxImpl_SetVertexShaderConstant(0 - X_D3DSCM_CORRECTION, scale, 1); + CxbxImpl_SetVertexShaderConstant(1 - X_D3DSCM_CORRECTION, offset, 1); +} + +CxbxVertexDeclaration* CxbxGetVertexDeclaration() +{ + LOG_INIT; // Allows use of DEBUG_D3DRESULT + + xbox::X_VERTEXATTRIBUTEFORMAT *pXboxVertexAttributeFormat = GetXboxVertexAttributeFormat(); + + auto XboxVertexAttributesKey = GetXboxVertexAttributesKey(pXboxVertexAttributeFormat); + CxbxVertexDeclaration* pCxbxVertexDeclaration = FetchCachedCxbxVertexDeclaration(XboxVertexAttributesKey); + if (pCxbxVertexDeclaration == nullptr) { + pCxbxVertexDeclaration = (CxbxVertexDeclaration*)calloc(1, sizeof(CxbxVertexDeclaration)); + + // Convert Xbox vertex attributes towards host Direct3D 9 vertex element + D3DVERTEXELEMENT* pRecompiledVertexElements = EmuRecompileVshDeclaration( + pXboxVertexAttributeFormat, + g_Xbox_VertexShader_IsFixedFunction, + pCxbxVertexDeclaration); + + // Create the vertex declaration + pCxbxVertexDeclaration->pHostVertexDeclaration = CxbxCreateHostVertexDeclaration(pRecompiledVertexElements); + + free(pRecompiledVertexElements); + + // Cache resulting declarations from given inputs + pCxbxVertexDeclaration->Key = XboxVertexAttributesKey; + RegisterCxbxVertexDeclaration(XboxVertexAttributesKey, pCxbxVertexDeclaration); + } + + return pCxbxVertexDeclaration; +} + +void CxbxUpdateHostVertexDeclaration() +{ + CxbxVertexDeclaration* pCxbxVertexDeclaration = CxbxGetVertexDeclaration(); + HRESULT hRet = g_pD3DDevice->SetVertexDeclaration(pCxbxVertexDeclaration->pHostVertexDeclaration); // Titles can specify default values for registers via calls like SetVertexData4f // HLSL shaders need to know whether to use vertex data or default vertex shader values // Any register not in the vertex declaration should be set to the default value float vertexDefaultFlags[X_VSH_MAX_ATTRIBUTES]; for (int i = 0; i < X_VSH_MAX_ATTRIBUTES; i++) { - vertexDefaultFlags[i] = pCxbxVertexDeclaration.vRegisterInDeclaration[i] ? 0.0f : 1.0f; + vertexDefaultFlags[i] = pCxbxVertexDeclaration->vRegisterInDeclaration[i] ? 0.0f : 1.0f; } - g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_BASE, vertexDefaultFlags, 4); + g_pD3DDevice->SetVertexShaderConstantF(CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_BASE, vertexDefaultFlags, CXBX_D3DVS_CONSTREG_VREGDEFAULTS_FLAG_SIZE); } -// TODO Call this when state is dirty in UpdateNativeD3DResources -// Rather than every time state changes -void SetVertexShaderFromSlots() { - LOG_INIT - - auto pTokens = GetCxbxVertexShaderSlotPtr(g_CxbxVertexShaderSlotAddress); - if (pTokens) { - // Create a vertex shader from the tokens - DWORD shaderSize; - auto shaderKey = g_VertexShaderSource.CreateShader(pTokens, &shaderSize); - HRESULT hRet = g_pD3DDevice->SetVertexShader(g_VertexShaderSource.GetShader(shaderKey)); - DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexShader"); - } -} - -void SetCxbxVertexShaderHandle(CxbxVertexShader* pCxbxVertexShader) +void CxbxImpl_SetScreenSpaceOffset(float x, float y) { - LOG_INIT + // See https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#3.3.1%20Pixel%20Coordinate%20System + static float PixelOffset = 0.53125f; // 0.5 for pixel center + 1/16? - HRESULT hRet; - - // Get vertex shader if we have a key - auto pHostShader = pCxbxVertexShader->VertexShaderKey - ? g_VertexShaderSource.GetShader(pCxbxVertexShader->VertexShaderKey) - : nullptr; - - // Set vertex shader - hRet = g_pD3DDevice->SetVertexShader(pHostShader); - DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexShader"); - - SetCxbxVertexDeclaration(pCxbxVertexShader->Declaration); + g_Xbox_ScreenSpaceOffset_x = x + PixelOffset; + g_Xbox_ScreenSpaceOffset_y = y + PixelOffset; } -void CxbxImpl_SetVertexShaderInput -( - DWORD Handle, - UINT StreamCount, - xbox::X_STREAMINPUT* pStreamInputs -) +// Note : SetVertexShaderInputDirect needs no EMUPATCH CxbxImpl_..., since it just calls SetVertexShaderInput + +void CxbxImpl_SetVertexShaderInput(DWORD Handle, UINT StreamCount, xbox::X_STREAMINPUT* pStreamInputs) { - LOG_INIT + using namespace xbox; // If Handle is NULL, all VertexShader input state is cleared. // Otherwise, Handle is the address of an Xbox VertexShader struct, or-ed with 1 (X_D3DFVF_RESERVED0) // (Thus, a FVF handle is an invalid argument.) - // - LOG_UNIMPLEMENTED(); + if (Handle == NULL) + { + // Xbox doesn't remember a null-handle - this may be an XDK bug! + // (Although, if that's skipped intentionally, we'd need to be very carefull about that!) + // StreamCount and pStreamInputs arguments are ignored + g_Xbox_SetVertexShaderInput_Count = 0; + } + else + { + assert(VshHandleIsVertexShader(Handle)); + assert(StreamCount > 0); + assert(StreamCount <= X_VSH_MAX_STREAMS); + assert(pStreamInputs != xbox::zeroptr); + + X_D3DVertexShader* pXboxVertexShader = VshHandleToXboxVertexShader(Handle); + assert(pXboxVertexShader); + + // Xbox DOES store the Handle, but since it merely returns this through (unpatched) D3DDevice_GetVertexShaderInput, we don't have to. + + g_Xbox_SetVertexShaderInput_Count = StreamCount; // This > 0 indicates g_Xbox_SetVertexShaderInput_Data has to be used + memcpy(g_Xbox_SetVertexShaderInput_Data, pStreamInputs, StreamCount * sizeof(xbox::X_STREAMINPUT)); // Make a copy of the supplied StreamInputs array + + g_Xbox_SetVertexShaderInput_Attributes = *CxbxGetVertexShaderAttributes(pXboxVertexShader); // Copy this vertex shaders's attribute slots + } } -void CxbxImpl_SelectVertexShaderDirect -( - xbox::X_VERTEXATTRIBUTEFORMAT* pVAF, - DWORD Address -) -{ - LOG_INIT; - - // When pVAF is non-null, this vertex attribute format takes precedence over the the one - LOG_UNIMPLEMENTED(); -} +// Note : SelectVertexShaderDirect needs no EMUPATCH CxbxImpl_..., since it just calls SelectVertexShader void CxbxImpl_SelectVertexShader(DWORD Handle, DWORD Address) { + LOG_INIT; // Allows use of DEBUG_D3DRESULT + // Address always indicates a previously loaded vertex shader slot (from where the program is used). // Handle can be null if the current Xbox VertexShader is assigned // Handle can be an address of an Xbox VertexShader struct, or-ed with 1 (X_D3DFVF_RESERVED0) // If Handle is assigned, it becomes the new current Xbox VertexShader, // which resets a bit of state (nv2a execution mode, viewport, ?) // Either way, the given address slot is selected as the start of the current vertex shader program - g_Xbox_VertexShader_Handle = Handle; - g_CxbxVertexShaderSlotAddress = Address; + g_Xbox_VertexShader_FunctionSlots_StartAddress = Address; - if (VshHandleIsVertexShader(Handle)) - { - auto pCxbxVertexShader = GetCxbxVertexShader(Handle); - if (pCxbxVertexShader == nullptr) { - LOG_TEST_CASE("Shader handle has not been created"); - } - else { - // Set the shader handle declaration - SetCxbxVertexDeclaration(pCxbxVertexShader->Declaration); - } + if (Handle) { + if (!VshHandleIsVertexShader(Handle)) + LOG_TEST_CASE("Non-zero handle must be a VertexShader!"); + +#ifdef CXBX_USE_GLOBAL_VERTEXSHADER_POINTER + g_Xbox_VertexShader_Ptr = VshHandleToXboxVertexShader(Handle); +#endif + g_Xbox_VertexShader_Handle = Handle; + g_Xbox_VertexShader_IsFixedFunction = false; + g_Xbox_VertexShader_IsPassthrough = false; } - - SetVertexShaderFromSlots(); } void CxbxImpl_LoadVertexShaderProgram(CONST DWORD* pFunction, DWORD Address) { + // pFunction is a X_VSH_SHADER_HEADER pointer // D3DDevice_LoadVertexShaderProgram splits the given function buffer into batch-wise pushes to the NV2A + // However, we can suffice by copying the program into our slots (and make sure these slots get converted into a vertex shader) // Copy shader instructions to shader slots - auto CxbxVertexShaderSlotPtr = GetCxbxVertexShaderSlotPtr(Address); - if (CxbxVertexShaderSlotPtr == nullptr) - return; - auto shaderHeader = *((xbox::X_VSH_SHADER_HEADER*) pFunction); - auto tokens = &pFunction[1]; - memcpy(CxbxVertexShaderSlotPtr, tokens, shaderHeader.NumInst * X_VSH_INSTRUCTION_SIZE_BYTES); + if (shaderHeader.Version != VERSION_XVS) + LOG_TEST_CASE("Non-regular (state or read/write) shader detected!"); - SetVertexShaderFromSlots(); + auto tokens = (DWORD*)&pFunction[1]; + CxbxSetVertexShaderSlots(tokens, Address, shaderHeader.NumInst); } void CxbxImpl_LoadVertexShader(DWORD Handle, DWORD Address) @@ -1431,36 +1344,56 @@ void CxbxImpl_LoadVertexShader(DWORD Handle, DWORD Address) // Handle is always address of an X_D3DVertexShader struct, thus always or-ed with 1 (X_D3DFVF_RESERVED0) // Address is the slot (offset) from which the program must be written onwards (as whole DWORDS) // D3DDevice_LoadVertexShader pushes the program contained in the Xbox VertexShader struct to the NV2A - auto CxbxVertexShaderSlotPtr = GetCxbxVertexShaderSlotPtr(Address); - if (CxbxVertexShaderSlotPtr) { - CxbxVertexShader* pCxbxVertexShader = GetCxbxVertexShader(Handle); - if (pCxbxVertexShader) { - // Make sure there is a shader function to load - // from the shader handle - if (pCxbxVertexShader->pXboxFunctionCopy == nullptr) { - LOG_TEST_CASE("LoadVertexShader with FVF shader handle"); - return; - } - int upToSlot = Address + pCxbxVertexShader->XboxNrAddressSlots; - if (upToSlot > X_VSH_MAX_INSTRUCTION_COUNT) { - LOG_TEST_CASE("Shader does not fit in vertex shader slots"); - return; - } + xbox::X_D3DVertexShader* pXboxVertexShader = VshHandleToXboxVertexShader(Handle); - // Skip the header DWORD at the beginning - auto pTokens = &pCxbxVertexShader->pXboxFunctionCopy[1]; - memcpy(CxbxVertexShaderSlotPtr, pTokens, pCxbxVertexShader->XboxNrAddressSlots * X_VSH_INSTRUCTION_SIZE_BYTES); + DWORD NrTokens; + DWORD* pNV2ATokens = CxbxGetVertexShaderTokens(pXboxVertexShader, &NrTokens); + +#if 1 // TODO : Remove dirty hack (?once CreateVertexShader trampolines to Xbox code that sets ProgramAndConstantsDwords correctly?) : + if (NrTokens == 0) + NrTokens = 10000; +#endif + + static unsigned ConstantAddress = 0; + DWORD* pEnd = pNV2ATokens + NrTokens; + while (pNV2ATokens < pEnd) { + DWORD dwMethod, dwSubChannel, nrDWORDS; + D3DPUSH_DECODE(*pNV2ATokens++, dwMethod, dwSubChannel, nrDWORDS); + if (nrDWORDS == 0) { LOG_TEST_CASE("Zero-length NV2A method detected!"); break; } + switch (dwMethod) { + case NV2A_VP_UPLOAD_INST(0): { // = 0x00000B00 + if ((nrDWORDS & 3) != 0) LOG_TEST_CASE("NV2A_VP_UPLOAD_INST arguments should be a multiple of 4!"); + unsigned nrSlots = nrDWORDS / X_VSH_INSTRUCTION_SIZE; + CxbxSetVertexShaderSlots(pNV2ATokens, Address, nrSlots); + Address += nrSlots; + break; } - else { - LOG_TEST_CASE("LoadVertexShader called with unrecognized handle"); // FIXME : extend with value (once supported by LOG_TEST_CASE) + case NV2A_VP_UPLOAD_CONST_ID: // = 0x00001EA4 + if (nrDWORDS != 1) LOG_TEST_CASE("NV2A_VP_UPLOAD_CONST_ID should have one argument!"); + ConstantAddress = *pNV2ATokens; + break; + case NV2A_VP_UPLOAD_CONST(0): { // = 0x00000B80 + if ((nrDWORDS & 3) != 0) LOG_TEST_CASE("NV2A_VP_UPLOAD_CONST arguments should be a multiple of 4!"); + unsigned nrConstants = nrDWORDS / X_VSH_INSTRUCTION_SIZE; + // TODO : FIXME : Implement and call SetVertexShaderConstants(pNV2ATokens, ConstantAddress, nrConstants); + ConstantAddress += nrConstants; + break; } + default: + // TODO : Remove this break-out hack once NrTokens is reliable and instead have: DEFAULT_UNREACHABLE; + LOG_TEST_CASE("Stopping at unexpected NV2A method"); + pEnd = pNV2ATokens; + break; + } + + pNV2ATokens += nrDWORDS; } } void CxbxImpl_SetVertexShader(DWORD Handle) { - LOG_INIT // Allows use of DEBUG_D3DRESULT + LOG_INIT; // Allows use of DEBUG_D3DRESULT // Checks if the Handle has bit 0 set - if not, it's a FVF // which is converted to a global Xbox Vertex Shader struct @@ -1473,209 +1406,126 @@ void CxbxImpl_SetVertexShader(DWORD Handle) HRESULT hRet = D3D_OK; - g_Xbox_VertexShader_Handle = Handle; - - if (VshHandleIsVertexShader(Handle)) { - CxbxVertexShader* pCxbxVertexShader = GetCxbxVertexShader(Handle); - if (pCxbxVertexShader) { - SetCxbxVertexShaderHandle(pCxbxVertexShader); - - // If the shader handle has a shader function - // copy it to the shader slots - if (pCxbxVertexShader->pXboxFunctionCopy != nullptr) { - g_CxbxVertexShaderSlotAddress = 0; - auto CxbxVertexShaderSlotPtr = GetCxbxVertexShaderSlotPtr(g_CxbxVertexShaderSlotAddress); - if (CxbxVertexShaderSlotPtr) { - // Skip the header DWORD at the beginning - auto pTokens = &pCxbxVertexShader->pXboxFunctionCopy[1]; - memcpy(CxbxVertexShaderSlotPtr, pTokens, pCxbxVertexShader->XboxNrAddressSlots * X_VSH_INSTRUCTION_SIZE_BYTES); - } - } + xbox::X_D3DVertexShader* pXboxVertexShader = CxbxGetXboxVertexShaderForHandle(Handle); + g_Xbox_VertexShader_IsPassthrough = false; + if (pXboxVertexShader->Flags & X_VERTEXSHADER_FLAG_PROGRAM) { +#if 0 // Since the D3DDevice_SetVertexShader patch already called it's trampoline, these calls have already been executed : + CxbxImpl_LoadVertexShader(Handle, 0); + CxbxImpl_SelectVertexShader(Handle, 0); +#else // So let's check if that indeed happened : + bool bHackCallSelectAgain = false; + if (g_Xbox_VertexShader_Handle != Handle) { + LOG_TEST_CASE("g_Xbox_VertexShader_Handle != Handle"); + bHackCallSelectAgain = true; } - else { - EmuLog(LOG_LEVEL::DEBUG, "SetVertexShader with shader handle that has not been created"); + if (g_Xbox_VertexShader_FunctionSlots_StartAddress != 0) { + LOG_TEST_CASE("g_Xbox_VertexShader_FunctionSlots_StartAddress != 0"); + bHackCallSelectAgain = true; + } + if (g_Xbox_VertexShader_IsFixedFunction != false) { + LOG_TEST_CASE("g_Xbox_VertexShader_IsFixedFunction != false"); + bHackCallSelectAgain = true; } - } - else { - hRet = g_pD3DDevice->SetVertexShader(nullptr); - DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexShader"); - hRet = g_pD3DDevice->SetFVF(Handle); - DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetFVF"); - } -} -HRESULT CxbxImpl_CreateVertexShader(CONST DWORD *pDeclaration, CONST DWORD *pFunction, DWORD *pHandle, DWORD Usage) -{ - LOG_INIT // Allows use of DEBUG_D3DRESULT - - HRESULT hRet = D3D_OK; - - if (g_pD3DDevice == nullptr) { - LOG_TEST_CASE("D3DDevice_CreateVertexShader called before Direct3D_CreateDevice"); - // We lie to allow the game to continue for now, but it probably won't work well - return 0; // == xbox::status_success - } - - // HACK: TODO: support this situation - if (pDeclaration == nullptr) { - LOG_TEST_CASE("Vertex shader without declaration"); - *pHandle = xbox::zero; - return D3D_OK; - } - - // Now, we can create the host vertex shader - DWORD XboxDeclarationCount = 0; - CxbxVertexShader* pCxbxVertexShader = (CxbxVertexShader*)calloc(1, sizeof(CxbxVertexShader)); - D3DVERTEXELEMENT* pRecompiledDeclaration = nullptr; - - pRecompiledDeclaration = EmuRecompileVshDeclaration((DWORD*)pDeclaration, - /*bIsFixedFunction=*/pFunction == xbox::zeroptr, - &XboxDeclarationCount, - &pCxbxVertexShader->Declaration); - - // Create the vertex declaration - hRet = g_pD3DDevice->CreateVertexDeclaration(pRecompiledDeclaration, &pCxbxVertexShader->Declaration.pHostVertexDeclaration); - free(pRecompiledDeclaration); - - DEBUG_D3DRESULT(hRet, "g_pD3DDevice->CreateVertexDeclaration"); - - if (FAILED(hRet)) { - // NOTE: This is a fatal error because it ALWAYS triggers a crash within DrawVertices if not set - CxbxKrnlCleanup("Failed to create Vertex Declaration"); - } -#if 0 // Creating a vertex shader doesn't imply activating it! - hRet = g_pD3DDevice->SetVertexDeclaration(pCxbxVertexShader->Declaration.pHostVertexDeclaration); - DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexDeclaration"); - if (FAILED(hRet)) { - CxbxKrnlCleanup("Failed to set Vertex Declaration"); - } + if (bHackCallSelectAgain) { + // If any of the above test-cases was hit, perhaps our patch on + // _SelectVertexShader isn't applied; + // 'solve' that by calling it here instead. + CxbxImpl_SelectVertexShader(Handle, 0); + g_Xbox_VertexShader_IsFixedFunction = false; + } #endif + } else { + // A shader without a program won't call LoadVertexShader nor SelectVertexShader + // +#ifdef CXBX_USE_GLOBAL_VERTEXSHADER_POINTER + g_Xbox_VertexShader_Ptr = pXboxVertexShader; +#endif + g_Xbox_VertexShader_Handle = Handle; + g_Xbox_VertexShader_FunctionSlots_StartAddress = 0; - // Set handle declaration properties - pCxbxVertexShader->Declaration.pXboxDeclarationCopy = (DWORD*)malloc(XboxDeclarationCount * sizeof(DWORD)); - memcpy(pCxbxVertexShader->Declaration.pXboxDeclarationCopy, pDeclaration, XboxDeclarationCount * sizeof(DWORD)); - pCxbxVertexShader->Declaration.XboxDeclarationCount = XboxDeclarationCount; - - if (pFunction != xbox::zeroptr) - { - // Parse and compile the shader - DWORD xboxFunctionSize = 0; - pCxbxVertexShader->VertexShaderKey = g_VertexShaderSource.CreateShader(pFunction, &xboxFunctionSize); - - // Set handle shader function properties - pCxbxVertexShader->XboxFunctionSize = xboxFunctionSize; - pCxbxVertexShader->pXboxFunctionCopy = (DWORD*)malloc(xboxFunctionSize); - memcpy(pCxbxVertexShader->pXboxFunctionCopy, pFunction, xboxFunctionSize); - pCxbxVertexShader->XboxNrAddressSlots = (xboxFunctionSize - sizeof(xbox::X_VSH_SHADER_HEADER)) / X_VSH_INSTRUCTION_SIZE_BYTES; - pCxbxVertexShader->XboxVertexShaderType = X_VST_NORMAL; // TODO : This can vary - } - - // Save the status, to remove things later - // pCxbxVertexShader->XboxStatus = hRet; // Not even used by VshHandleIsValidShader() - - RegisterCxbxVertexShader(*pHandle, pCxbxVertexShader); - - if (FAILED(hRet)) - { -#ifdef _DEBUG_TRACK_VS - if (pFunction) - { - char pFileName[30]; - static int FailedShaderCount = 0; - xbox::X_VSH_SHADER_HEADER* pHeader = (xbox::X_VSH_SHADER_HEADER*)pFunction; - EmuLog(LOG_LEVEL::WARNING, "Couldn't create vertex shader!"); - sprintf(pFileName, "failed%05d.xvu", FailedShaderCount); - FILE* f = fopen(pFileName, "wb"); - if (f) - { - fwrite(pFunction, sizeof(xbox::X_VSH_SHADER_HEADER) + pHeader->NumInst * 16, 1, f); - fclose(f); - } - FailedShaderCount++; + // Only when there's no program, set default values for attributes missing from vertex shader + // Note : We avoid calling CxbxImpl_SetVertexData4f here, as that would + // start populating g_InlineVertexBuffer_Table, which is not our intend here. + if (!(pXboxVertexShader->Flags & X_VERTEXSHADER_FLAG_HASDIFFUSE)) { + CxbxSetVertexAttribute(xbox::X_D3DVSDE_DIFFUSE, 1, 1, 1, 1); + } + if (!(pXboxVertexShader->Flags & X_VERTEXSHADER_FLAG_HASSPECULAR)) { + CxbxSetVertexAttribute(xbox::X_D3DVSDE_SPECULAR, 0, 0, 0, 0); + } + if (!(pXboxVertexShader->Flags & X_VERTEXSHADER_FLAG_HASBACKDIFFUSE)) { + CxbxSetVertexAttribute(xbox::X_D3DVSDE_BACKDIFFUSE, 1, 1, 1, 1); + } + if (!(pXboxVertexShader->Flags & X_VERTEXSHADER_FLAG_HASBACKSPECULAR)) { + CxbxSetVertexAttribute(xbox::X_D3DVSDE_BACKSPECULAR, 0, 0, 0, 0); } -#endif // _DEBUG_TRACK_VS - //hRet = D3D_OK; - } - return hRet; + // Switch to passthrough program, if so required + if (pXboxVertexShader->Flags & X_VERTEXSHADER_FLAG_PASSTHROUGH) { + CxbxSetVertexShaderPassthroughProgram(); + g_Xbox_VertexShader_IsFixedFunction = false; + g_Xbox_VertexShader_IsPassthrough = true; + } else if (pXboxVertexShader->Flags & X_VERTEXSHADER_FLAG_UNKNOWN) { + // Test-case : Amped + LOG_TEST_CASE("unknown vertex shader flag (4)"); + } else { + // Test-case : Many XDK samples, Crazy taxi 3 + //LOG_TEST_CASE("Other or no vertex shader flags"); + g_Xbox_VertexShader_IsFixedFunction = true; + } + } } void CxbxImpl_DeleteVertexShader(DWORD Handle) { - LOG_INIT // Allows use of DEBUG_D3DRESULT + LOG_INIT; // Allows use of DEBUG_D3DRESULT // Handle is always address of an Xbox VertexShader struct, or-ed with 1 (X_D3DFVF_RESERVED0) // It's reference count is lowered. If it reaches zero (0), the struct is freed. - if (VshHandleIsVertexShader(Handle)) - { - CxbxVertexShader* pCxbxVertexShader = GetCxbxVertexShader(Handle); // Fetch from cache - if (pCxbxVertexShader == nullptr) { - return; // Avoid crash if no shader was cached yet - } - - RegisterCxbxVertexShader(Handle, nullptr); // Remove from cache - - if (pCxbxVertexShader->Declaration.pHostVertexDeclaration) { - HRESULT hRet = pCxbxVertexShader->Declaration.pHostVertexDeclaration->Release(); - DEBUG_D3DRESULT(hRet, "g_pD3DDevice->DeleteVertexShader(pHostVertexDeclaration)"); - } - - // Release the host vertex shader - g_VertexShaderSource.ReleaseShader(pCxbxVertexShader->VertexShaderKey); - - if (pCxbxVertexShader->Declaration.pXboxDeclarationCopy) - { - free(pCxbxVertexShader->Declaration.pXboxDeclarationCopy); - } - - if (pCxbxVertexShader->pXboxFunctionCopy) - { - free(pCxbxVertexShader->pXboxFunctionCopy); - } - - FreeVertexDynamicPatch(pCxbxVertexShader); - - free(pCxbxVertexShader); + xbox::X_D3DVertexShader* pXboxVertexShader = VshHandleToXboxVertexShader(Handle); + if (pXboxVertexShader == nullptr) { + return; } + + if (pXboxVertexShader->RefCount > 1) { + return; + } + +#if 0 // TODO : Decide and implement what parts to free + RegisterCxbxVertexDeclaration(pCxbxVertexDeclaration->Key, nullptr); // Remove from cache (which will free present pCxbxVertexDeclaration) + + // Release the host vertex shader + g_VertexShaderSource.ReleaseShader(pCxbxVertexShader->Key); +#endif } +// TODO : Remove SetVertexShaderConstant implementation and the patch once +// CxbxUpdateHostVertexShaderConstants is reliable (ie. : when we're able to flush the NV2A push buffer) void CxbxImpl_SetVertexShaderConstant(INT Register, PVOID pConstantData, DWORD ConstantCount) { - LOG_INIT // Allows use of DEBUG_D3DRESULT + LOG_INIT; // Allows use of DEBUG_D3DRESULT -/*#ifdef _DEBUG_TRACK_VS_CONST - for (uint32_t i = 0; i < ConstantCount; i++) - { - printf("SetVertexShaderConstant, c%d = { %f, %f, %f, %f }\n", - Register + i, - *((float*)pConstantData + 4 * i), - *((float*)pConstantData + 4 * i + 1), - *((float*)pConstantData + 4 * i + 2), - *((float*)pConstantData + 4 * i + 3)); - } -#endif*/ // _DEBUG_TRACK_VS_CONST - -// Xbox vertex shader constants range from -96 to 95 -// The host does not support negative, so we adjust to 0..191 + // Xbox vertex shader constants range from -96 to 95 + // The host does not support negative, so we adjust to 0..191 Register += X_D3DSCM_CORRECTION; if (Register < 0) LOG_TEST_CASE("Register < 0"); if (Register + ConstantCount > X_D3DVS_CONSTREG_COUNT) LOG_TEST_CASE("Register + ConstantCount > X_D3DVS_CONSTREG_COUNT"); - HRESULT hRet; - hRet = g_pD3DDevice->SetVertexShaderConstantF( - Register, - (float*)pConstantData, - ConstantCount - ); - DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetVertexShaderConstant"); - if (FAILED(hRet)) - { - EmuLog(LOG_LEVEL::WARNING, "We're lying about setting a vertex shader constant!"); - hRet = D3D_OK; + // Write Vertex Shader constants in nv2a + extern float* HLE_get_NV2A_vertex_constant_float4_ptr(unsigned const_index); // TMP glue + float* constant_floats = HLE_get_NV2A_vertex_constant_float4_ptr(Register); + memcpy(constant_floats, pConstantData, ConstantCount * sizeof(float) * 4); + + // Mark the constant as dirty, so that CxbxUpdateHostVertexShaderConstants will pick it up + extern NV2ADevice* g_NV2A; // TMP glue + auto nv2a = g_NV2A->GetDeviceState(); + for (DWORD i = 0; i < ConstantCount; i++) { + nv2a->pgraph.vsh_constants_dirty[Register + i] = true; } } + // parse xbox vertex shader function into an intermediate format extern void EmuParseVshFunction ( @@ -1704,6 +1554,8 @@ extern void EmuParseVshFunction pShader->Header.NumInst = (uint16_t)pShader->Instructions.size(); // Decode until we hit a token marked final + // Note : CxbxSetVertexShaderSlots makes sure this always stops + // after X_VSH_MAX_INSTRUCTION_COUNT, by setting FLD_FINAL in there. while (VshDecoder.VshConvertToIntermediate(pCurToken, pShader)) { pCurToken += X_VSH_INSTRUCTION_SIZE; } diff --git a/src/core/hle/D3D8/XbVertexShader.h b/src/core/hle/D3D8/XbVertexShader.h index de234b02e..5db13d815 100644 --- a/src/core/hle/D3D8/XbVertexShader.h +++ b/src/core/hle/D3D8/XbVertexShader.h @@ -40,9 +40,10 @@ typedef struct _CxbxVertexShaderStreamElement { - UINT XboxType; // The stream element data types (xbox) - UINT XboxByteSize; // The stream element data sizes (xbox) - UINT HostByteSize; // The stream element data sizes (pc) + UINT XboxType; // The stream element data type (xbox) + UINT XboxByteSize; // The stream element data size (xbox) + BYTE HostDataType; // The stream element data type (pc) + UINT HostByteSize; // The stream element data size (pc) } CxbxVertexShaderStreamElement; @@ -60,53 +61,25 @@ CxbxVertexShaderStreamElement; typedef struct _CxbxVertexShaderStreamInfo { BOOL NeedPatch; // This is to know whether it's data which must be patched - BOOL DeclPosition; WORD HostVertexStride; DWORD NumberOfVertexElements; // Number of the stream data types - WORD CurrentStreamNumber; - CxbxVertexShaderStreamElement VertexElements[X_VSH_MAX_ATTRIBUTES + 16]; // TODO : Why 16 extra host additions?) + WORD XboxStreamIndex; + CxbxVertexShaderStreamElement VertexElements[X_VSH_MAX_ATTRIBUTES]; } CxbxVertexShaderStreamInfo; +typedef uint64_t VertexDeclarationKey; + typedef struct _CxbxVertexDeclaration { - CxbxVertexShaderStreamInfo VertexStreams[X_VSH_MAX_STREAMS]; + VertexDeclarationKey Key; + CxbxVertexShaderStreamInfo VertexStreams[X_VSH_MAX_STREAMS]; // Note : VertexStreams is indexed by a counter, NOT StreamIndex! IDirect3DVertexDeclaration* pHostVertexDeclaration; - DWORD* pXboxDeclarationCopy; - DWORD XboxDeclarationCount; // Xbox's number of DWORD-sized X_D3DVSD* tokens - UINT NumberOfVertexStreams; // The number of streams the vertex shader uses + UINT NumberOfVertexStreams; // The number of streams the vertex shader uses bool vRegisterInDeclaration[X_VSH_MAX_ATTRIBUTES]; } CxbxVertexDeclaration; - -typedef struct _CxbxVertexShader -{ - // These are the parameters given by the XBE, - // we save them to be able to return them when necessary. - DWORD XboxFunctionSize; - DWORD* pXboxFunctionCopy; - UINT XboxNrAddressSlots; - DWORD XboxVertexShaderType; - // DWORD XboxStatus; // Used by VshHandleIsValidShader() - - // The resulting host variables - uint64_t VertexShaderKey; - - // Needed for dynamic stream patching - CxbxVertexDeclaration Declaration; -} -CxbxVertexShader; - -// recompile xbox vertex shader declaration -extern D3DVERTEXELEMENT *EmuRecompileVshDeclaration -( - DWORD *pXboxDeclaration, - bool bIsFixedFunction, - DWORD *pXboxDeclarationCount, - CxbxVertexDeclaration *pCxbxVertexDeclaration -); - // Intermediate vertex shader structures enum VSH_OREG_NAME { @@ -224,25 +197,21 @@ extern void EmuParseVshFunction IntermediateVertexShader* pShader ); -extern void FreeVertexDynamicPatch(CxbxVertexShader *pVertexShader); - -// Checks for failed vertex shaders, and shaders that would need patching -extern boolean IsValidCurrentShader(void); - inline boolean VshHandleIsVertexShader(DWORD Handle) { return (Handle & X_D3DFVF_RESERVED0) ? TRUE : FALSE; } -inline boolean VshHandleIsFVF(DWORD Handle) { return !VshHandleIsVertexShader(Handle); } inline xbox::X_D3DVertexShader *VshHandleToXboxVertexShader(DWORD Handle) { return (xbox::X_D3DVertexShader *)(Handle & ~X_D3DFVF_RESERVED0);} -extern CxbxVertexShader* GetCxbxVertexShader(DWORD XboxVertexShaderHandle); +extern bool g_Xbox_VertexShader_IsFixedFunction; +extern CxbxVertexDeclaration* CxbxGetVertexDeclaration(); +extern xbox::X_STREAMINPUT& GetXboxVertexStreamInput(unsigned XboxStreamNumber); + +extern void CxbxImpl_SetScreenSpaceOffset(float x, float y); extern void CxbxImpl_LoadVertexShaderProgram(CONST DWORD* pFunction, DWORD Address); extern void CxbxImpl_LoadVertexShader(DWORD Handle, DWORD Address); extern void CxbxImpl_SetVertexShader(DWORD Handle); -extern void CxbxImpl_SelectVertexShaderDirect(xbox::X_VERTEXATTRIBUTEFORMAT* pVAF, DWORD Address); extern void CxbxImpl_SelectVertexShader(DWORD Handle, DWORD Address); extern void CxbxImpl_SetVertexShaderInput(DWORD Handle, UINT StreamCount, xbox::X_STREAMINPUT* pStreamInputs); extern void CxbxImpl_SetVertexShaderConstant(INT Register, PVOID pConstantData, DWORD ConstantCount); -extern HRESULT CxbxImpl_CreateVertexShader(CONST DWORD *pDeclaration, CONST DWORD *pFunction, DWORD *pHandle, DWORD Usage); extern void CxbxImpl_DeleteVertexShader(DWORD Handle); #endif diff --git a/src/core/hle/Patches.cpp b/src/core/hle/Patches.cpp index 0bf3ab1d6..88dee8860 100644 --- a/src/core/hle/Patches.cpp +++ b/src/core/hle/Patches.cpp @@ -64,7 +64,7 @@ std::map g_PatchTable = { PATCH_ENTRY("D3DDevice_BlockUntilVerticalBlank", xbox::EMUPATCH(D3DDevice_BlockUntilVerticalBlank), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_Clear", xbox::EMUPATCH(D3DDevice_Clear), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_CopyRects", xbox::EMUPATCH(D3DDevice_CopyRects), PATCH_HLE_D3D), - PATCH_ENTRY("D3DDevice_CreateVertexShader", xbox::EMUPATCH(D3DDevice_CreateVertexShader), PATCH_HLE_D3D), + // PATCH_ENTRY("D3DDevice_CreateVertexShader", xbox::EMUPATCH(D3DDevice_CreateVertexShader), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_DeleteVertexShader", xbox::EMUPATCH(D3DDevice_DeleteVertexShader), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_DeleteVertexShader_0", xbox::EMUPATCH(D3DDevice_DeleteVertexShader_0), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_DrawIndexedVertices", xbox::EMUPATCH(D3DDevice_DrawIndexedVertices), PATCH_HLE_D3D), @@ -95,7 +95,7 @@ std::map g_PatchTable = { PATCH_ENTRY("D3DDevice_GetVertexShaderConstant", xbox::EMUPATCH(D3DDevice_GetVertexShaderConstant), PATCH_HLE_D3D), //PATCH_ENTRY("D3DDevice_GetVertexShaderDeclaration", xbox::EMUPATCH(D3DDevice_GetVertexShaderDeclaration), PATCH_HLE_D3D), //PATCH_ENTRY("D3DDevice_GetVertexShaderFunction", xbox::EMUPATCH(D3DDevice_GetVertexShaderFunction), PATCH_HLE_D3D), - PATCH_ENTRY("D3DDevice_GetVertexShaderInput", xbox::EMUPATCH(D3DDevice_GetVertexShaderInput), PATCH_HLE_D3D), + //PATCH_ENTRY("D3DDevice_GetVertexShaderInput", xbox::EMUPATCH(D3DDevice_GetVertexShaderInput), PATCH_HLE_D3D), //PATCH_ENTRY("D3DDevice_GetVertexShaderSize", xbox::EMUPATCH(D3DDevice_GetVertexShaderSize), PATCH_HLE_D3D), //PATCH_ENTRY("D3DDevice_GetVertexShaderType", xbox::EMUPATCH(D3DDevice_GetVertexShaderType), PATCH_HLE_D3D), //PATCH_ENTRY("D3DDevice_GetViewportOffsetAndScale", xbox::EMUPATCH(D3DDevice_GetViewportOffsetAndScale), PATCH_HLE_D3D), @@ -117,7 +117,7 @@ std::map g_PatchTable = { PATCH_ENTRY("D3DDevice_RunPushBuffer", xbox::EMUPATCH(D3DDevice_RunPushBuffer), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_RunVertexStateShader", xbox::EMUPATCH(D3DDevice_RunVertexStateShader), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_SelectVertexShader", xbox::EMUPATCH(D3DDevice_SelectVertexShader), PATCH_HLE_D3D), - PATCH_ENTRY("D3DDevice_SelectVertexShaderDirect", xbox::EMUPATCH(D3DDevice_SelectVertexShaderDirect), PATCH_HLE_D3D), + //PATCH_ENTRY("D3DDevice_SelectVertexShaderDirect", xbox::EMUPATCH(D3DDevice_SelectVertexShaderDirect), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_SelectVertexShader_0", xbox::EMUPATCH(D3DDevice_SelectVertexShader_0), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_SelectVertexShader_4", xbox::EMUPATCH(D3DDevice_SelectVertexShader_4), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_SetBackBufferScale", xbox::EMUPATCH(D3DDevice_SetBackBufferScale), PATCH_HLE_D3D), @@ -173,7 +173,7 @@ std::map g_PatchTable = { PATCH_ENTRY("D3DDevice_SetVertexShaderConstantNotInlineFast", xbox::EMUPATCH(D3DDevice_SetVertexShaderConstantNotInlineFast), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_SetVertexShaderConstant_8", xbox::EMUPATCH(D3DDevice_SetVertexShaderConstant_8), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_SetVertexShaderInput", xbox::EMUPATCH(D3DDevice_SetVertexShaderInput), PATCH_HLE_D3D), - PATCH_ENTRY("D3DDevice_SetVertexShaderInputDirect", xbox::EMUPATCH(D3DDevice_SetVertexShaderInputDirect), PATCH_HLE_D3D), + //PATCH_ENTRY("D3DDevice_SetVertexShaderInputDirect", xbox::EMUPATCH(D3DDevice_SetVertexShaderInputDirect), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_SetVerticalBlankCallback", xbox::EMUPATCH(D3DDevice_SetVerticalBlankCallback), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_SetViewport", xbox::EMUPATCH(D3DDevice_SetViewport), PATCH_HLE_D3D), PATCH_ENTRY("D3DDevice_Swap", xbox::EMUPATCH(D3DDevice_Swap), PATCH_HLE_D3D), diff --git a/src/devices/video/EmuNV2A_PGRAPH.cpp b/src/devices/video/EmuNV2A_PGRAPH.cpp index 2402b8c62..c37eac00f 100644 --- a/src/devices/video/EmuNV2A_PGRAPH.cpp +++ b/src/devices/video/EmuNV2A_PGRAPH.cpp @@ -441,22 +441,78 @@ static uint64_t fnv_hash(const uint8_t *data, size_t len); static uint64_t fast_hash(const uint8_t *data, size_t len, unsigned int samples); /* PGRAPH - accelerated 2d/3d drawing engine */ + +static uint32_t pgraph_rdi_read(PGRAPHState *pg, + unsigned int select, unsigned int address) +{ + uint32_t r = 0; + switch(select) { + case RDI_INDEX_VTX_CONSTANTS0: + case RDI_INDEX_VTX_CONSTANTS1: + assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS); + r = pg->vsh_constants[address / 4][3 - address % 4]; + break; + default: + fprintf(stderr, "nv2a: unknown rdi read select 0x%x address 0x%x\n", + select, address); + assert(false); + break; + } + return r; +} + +static void pgraph_rdi_write(PGRAPHState *pg, + unsigned int select, unsigned int address, + uint32_t val) +{ + switch(select) { + case RDI_INDEX_VTX_CONSTANTS0: + case RDI_INDEX_VTX_CONSTANTS1: + assert(false); /* Untested */ + assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS); + pg->vsh_constants_dirty[address / 4] |= + (val != pg->vsh_constants[address / 4][3 - address % 4]); + pg->vsh_constants[address / 4][3 - address % 4] = val; + break; + default: + NV2A_DPRINTF("unknown rdi write select 0x%x, address 0x%x, val 0x%08x\n", + select, address, val); + break; + } +} + DEVICE_READ32(PGRAPH) { qemu_mutex_lock(&d->pgraph.pgraph_lock); + PGRAPHState *pg = &d->pgraph; DEVICE_READ32_SWITCH() { case NV_PGRAPH_INTR: - result = d->pgraph.pending_interrupts; + result = pg->pending_interrupts; break; case NV_PGRAPH_INTR_EN: - result = d->pgraph.enabled_interrupts; + result = pg->enabled_interrupts; break; + case NV_PGRAPH_RDI_DATA: { + unsigned int select = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], + NV_PGRAPH_RDI_INDEX_SELECT); + int address = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], + NV_PGRAPH_RDI_INDEX_ADDRESS); + + result = pgraph_rdi_read(pg, select, address); + + /* FIXME: Overflow into select? */ + assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS, + NV_PGRAPH_RDI_INDEX_ADDRESS)); + SET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], + NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1); + break; + } default: DEVICE_READ32_REG(pgraph); // Was : DEBUG_READ32_UNHANDLED(PGRAPH); } - qemu_mutex_unlock(&d->pgraph.pgraph_lock); + qemu_mutex_unlock(&pg->pgraph_lock); // reg_log_read(NV_PGRAPH, addr, r); @@ -465,36 +521,53 @@ DEVICE_READ32(PGRAPH) DEVICE_WRITE32(PGRAPH) { + PGRAPHState *pg = &d->pgraph; // reg_log_write(NV_PGRAPH, addr, val); - qemu_mutex_lock(&d->pgraph.pgraph_lock); + qemu_mutex_lock(&pg->pgraph_lock); switch (addr) { case NV_PGRAPH_INTR: - d->pgraph.pending_interrupts &= ~value; - qemu_cond_broadcast(&d->pgraph.interrupt_cond); + pg->pending_interrupts &= ~value; + qemu_cond_broadcast(&pg->interrupt_cond); break; case NV_PGRAPH_INTR_EN: - d->pgraph.enabled_interrupts = value; + pg->enabled_interrupts = value; break; case NV_PGRAPH_INCREMENT: if (value & NV_PGRAPH_INCREMENT_READ_3D) { - SET_MASK(d->pgraph.regs[NV_PGRAPH_SURFACE], + SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D, - (GET_MASK(d->pgraph.regs[NV_PGRAPH_SURFACE], + (GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D) + 1) - % GET_MASK(d->pgraph.regs[NV_PGRAPH_SURFACE], + % GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D)); - qemu_cond_broadcast(&d->pgraph.flip_3d); + qemu_cond_broadcast(&pg->flip_3d); } break; + case NV_PGRAPH_RDI_DATA: { + unsigned int select = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], + NV_PGRAPH_RDI_INDEX_SELECT); + int address = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], + NV_PGRAPH_RDI_INDEX_ADDRESS); + + pgraph_rdi_write(pg, select, address, value); + + /* FIXME: Overflow into select? */ + assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS, + NV_PGRAPH_RDI_INDEX_ADDRESS)); + SET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX], + NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1); + break; + } case NV_PGRAPH_CHANNEL_CTX_TRIGGER: { xbox::addr_xt context_address = - GET_MASK(d->pgraph.regs[NV_PGRAPH_CHANNEL_CTX_POINTER], NV_PGRAPH_CHANNEL_CTX_POINTER_INST) << 4; + GET_MASK(pg->regs[NV_PGRAPH_CHANNEL_CTX_POINTER], + NV_PGRAPH_CHANNEL_CTX_POINTER_INST) << 4; if (value & NV_PGRAPH_CHANNEL_CTX_TRIGGER_READ_IN) { unsigned pgraph_channel_id = - GET_MASK(d->pgraph.regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID); + GET_MASK(pg->regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID); NV2A_DPRINTF("PGRAPH: read channel %d context from %" HWADDR_PRIx "\n", pgraph_channel_id, context_address); @@ -504,7 +577,7 @@ DEVICE_WRITE32(PGRAPH) NV2A_DPRINTF(" - CTX_USER = 0x%08X\n", context_user); - d->pgraph.regs[NV_PGRAPH_CTX_USER] = context_user; + pg->regs[NV_PGRAPH_CTX_USER] = context_user; // pgraph_set_context_user(d, context_user); } if (value & NV_PGRAPH_CHANNEL_CTX_TRIGGER_WRITE_OUT) { @@ -521,11 +594,11 @@ DEVICE_WRITE32(PGRAPH) // events switch (addr) { case NV_PGRAPH_FIFO: - qemu_cond_broadcast(&d->pgraph.fifo_access_cond); + qemu_cond_broadcast(&pg->fifo_access_cond); break; } - qemu_mutex_unlock(&d->pgraph.pgraph_lock); + qemu_mutex_unlock(&pg->pgraph_lock); DEVICE_WRITE32_END(PGRAPH); } @@ -1407,16 +1480,20 @@ void pgraph_handle_method(NV2AState *d, GET_MASK(parameter, NV097_SET_SURFACE_PITCH_COLOR); pg->surface_zeta.pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_ZETA); + pg->surface_color.buffer_dirty = true; + pg->surface_zeta.buffer_dirty = true; break; case NV097_SET_SURFACE_COLOR_OFFSET: pgraph_update_surface(d, false, true, true); pg->surface_color.offset = parameter; + pg->surface_color.buffer_dirty = true; break; case NV097_SET_SURFACE_ZETA_OFFSET: pgraph_update_surface(d, false, true, true); pg->surface_zeta.offset = parameter; + pg->surface_zeta.buffer_dirty = true; break; CASE_8(NV097_SET_COMBINER_ALPHA_ICW, 4) : @@ -2596,17 +2673,12 @@ void pgraph_handle_method(NV2AState *d, assert(false); /* FIXME: Untested! */ VertexAttribute *vertex_attribute = &pg->vertex_attributes[slot]; pgraph_allocate_inline_buffer_vertices(pg, slot); - /* FIXME: Is mapping to [-1,+1] correct? */ - vertex_attribute->inline_value[0] = ((int16_t)(parameter & 0xFFFF) * 2.0f + 1) - / 65535.0f; - vertex_attribute->inline_value[1] = ((int16_t)(parameter >> 16) * 2.0f + 1) - / 65535.0f; - /* FIXME: Should these really be set to 0.0 and 1.0 ? Conditions? */ + vertex_attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF); + vertex_attribute->inline_value[1] = (float)(int16_t)(parameter >> 16); vertex_attribute->inline_value[2] = 0.0f; vertex_attribute->inline_value[3] = 1.0f; if (slot == 0) { pgraph_finish_inline_buffer_vertex(pg); - assert(false); /* FIXME: Untested */ } break; } @@ -2638,7 +2710,6 @@ void pgraph_handle_method(NV2AState *d, * 2.0f + 1) / 65535.0f; if ((slot == 0) && (part == 1)) { pgraph_finish_inline_buffer_vertex(pg); - assert(false); /* FIXME: Untested */ } break; } @@ -3364,7 +3435,15 @@ static void pgraph_bind_shaders(PGRAPHState *pg) last_y = y; } - for (i = 0; i < 8; i++) { + /* FIXME: We should memset(state, 0x00, sizeof(state)) instead */ + memset(state.psh.rgb_inputs, 0, sizeof(state.psh.rgb_inputs)); + memset(state.psh.rgb_outputs, 0, sizeof(state.psh.rgb_outputs)); + memset(state.psh.alpha_inputs, 0, sizeof(state.psh.alpha_inputs)); + memset(state.psh.alpha_outputs, 0, sizeof(state.psh.alpha_outputs)); + + /* Copy content of enabled combiner stages */ + unsigned int num_stages = pg->regs[NV_PGRAPH_COMBINECTL] & 0xFF; + for (i = 0; i < num_stages; i++) { state.psh.rgb_inputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORI0 + i * 4]; state.psh.rgb_outputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORO0 + i * 4]; state.psh.alpha_inputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAI0 + i * 4]; @@ -3439,7 +3518,7 @@ static void pgraph_bind_shaders(PGRAPHState *pg) pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max); glUniform4i(pg->shader_binding->clip_region_loc[i], - x_min, y_min, x_max, y_max); + x_min, y_min, x_max + 1, y_max + 1); } pgraph_update_shader_constants(pg, pg->shader_binding, binding_changed, @@ -3787,10 +3866,10 @@ static void pgraph_update_surface(NV2AState *d, bool upload, glDeleteTextures(1, &pg->gl_zeta_buffer); pg->gl_zeta_buffer = 0; } - } - memcpy(&pg->last_surface_shape, &pg->surface_shape, + memcpy(&pg->last_surface_shape, &pg->surface_shape, sizeof(SurfaceShape)); + } if ((color_write || (!upload && pg->surface_color.write_enabled_cache)) && (upload || pg->surface_color.draw_dirty)) { diff --git a/src/devices/video/nv2a.cpp b/src/devices/video/nv2a.cpp index ddc8cef06..07fb7e7c0 100644 --- a/src/devices/video/nv2a.cpp +++ b/src/devices/video/nv2a.cpp @@ -1141,7 +1141,7 @@ void CxbxReserveNV2AMemory(NV2AState *d) } } - printf("[0x%.4X] INIT: Allocated %d MiB of Xbox NV2A PRAMIN memory at 0x%.8p to 0x%.8p\n", + printf("[0x%.4X] INIT: Allocated %d MiB of Xbox NV2A PRAMIN memory at 0x%.8x to 0x%.8x\n", GetCurrentThreadId(), d->pramin.ramin_size / ONE_MB, (uintptr_t)d->pramin.ramin_ptr, (uintptr_t)d->pramin.ramin_ptr + d->pramin.ramin_size - 1); } diff --git a/src/devices/video/nv2a.h b/src/devices/video/nv2a.h index fb774c3c3..43b64c052 100644 --- a/src/devices/video/nv2a.h +++ b/src/devices/video/nv2a.h @@ -95,7 +95,7 @@ public: void Reset(); // State Getter: Used for HLE reading of device state - NV2AState* GetDeviceState() { return m_nv2a_state; }; + inline NV2AState* GetDeviceState() { return m_nv2a_state; }; uint32_t IORead(int barIndex, uint32_t port, unsigned size); void IOWrite(int barIndex, uint32_t port, uint32_t value, unsigned size); diff --git a/src/devices/video/nv2a_regs.h b/src/devices/video/nv2a_regs.h index f34561ed4..80e4ae2f2 100644 --- a/src/devices/video/nv2a_regs.h +++ b/src/devices/video/nv2a_regs.h @@ -335,6 +335,8 @@ #define NV_PGRAPH_FIFO 0x00000720 # define NV_PGRAPH_FIFO_ACCESS (1 << 0) #define NV_PGRAPH_RDI_INDEX 0x00000750 +# define NV_PGRAPH_RDI_INDEX_ADDRESS 0x00001FFC +# define NV_PGRAPH_RDI_INDEX_SELECT 0x01FF0000 #define NV_PGRAPH_RDI_DATA 0x00000754 #define NV_PGRAPH_FFINTFC_ST2 0x00000764 #define NV_PGRAPH_CHANNEL_CTX_TABLE 0x00000780 @@ -691,6 +693,7 @@ #define NV_PCRTC_CONFIG 0x00000804 #define NV_PCRTC_RASTER 0x00000808 + #define NV_PVIDEO_DEBUG_2 0x00000088 #define NV_PVIDEO_DEBUG_3 0x0000008C #define NV_PVIDEO_INTR 0x00000100 @@ -832,7 +835,7 @@ # define NV_PRAMDAC_PLL_TEST_COUNTER_NVPLL_LOCK (1 << 29) # define NV_PRAMDAC_PLL_TEST_COUNTER_MPLL_LOCK (1 << 30) # define NV_PRAMDAC_PLL_TEST_COUNTER_VPLL_LOCK (1 << 31) -#define NV_PRAMDAC_GENERAL_CONTROL 0x00680600 +#define NV_PRAMDAC_GENERAL_CONTROL 0x00000600 # define NV_PRAMDAC_GENERAL_CONTROL_PIXMIX_ON (3 << 4) # define NV_PRAMDAC_GENERAL_CONTROL_VGA_STATE_SEL (1 << 8) # define NV_PRAMDAC_GENERAL_CONTROL_ALT_MODE_SEL (1 << 12) @@ -840,6 +843,14 @@ # define NV_PRAMDAC_GENERAL_CONTROL_BPC_8BITS (1 << 20) # define NV_PRAMDAC_GENERAL_CONTROL_PIPE_LONG (2 << 28) +#define NV_PRAMDAC_FP_VDISPLAY_END 0x00000800 +#define NV_PRAMDAC_FP_VCRTC 0x00000808 +#define NV_PRAMDAC_FP_VSYNC_END 0x00000810 +#define NV_PRAMDAC_FP_VVALID_END 0x00000818 +#define NV_PRAMDAC_FP_HDISPLAY_END 0x00000820 +#define NV_PRAMDAC_FP_HCRTC 0x00000828 +#define NV_PRAMDAC_FP_HVALID_END 0x00000838 + #define NV_PRMCIO_ARX 0x006013c0 #define NV_PRMCIO_AR__WRITE 0x006013c0 #define NV_PRMCIO_AR__READ 0x006013c1 @@ -1795,6 +1806,14 @@ #define NV_IGRAPH_XF_LTC1_L6 0x12 #define NV_IGRAPH_XF_LTC1_L7 0x13 +/* These RDI select values appear to be named by MS. + * nvidia seems to refer to RDI_INDEX_VTX_CONSTANTS0 by RDI_RAMSEL_XL_XFCTX. + * However, we don't have other nvidia names; so we use these aliases for now. + * Eventually we'll probably adopt nouveau names for these internals. + */ +#define RDI_INDEX_VTX_CONSTANTS0 0x17 +#define RDI_INDEX_VTX_CONSTANTS1 0xCC + #define NV2A_VERTEX_ATTR_POSITION 0 #define NV2A_VERTEX_ATTR_WEIGHT 1