From d4232df76676994fc3e9fbf8fae014558789445d Mon Sep 17 00:00:00 2001 From: Luke Usher Date: Wed, 14 Mar 2018 10:12:58 +0000 Subject: [PATCH] Cache Vertex Buffer Objects, but not their content. This gives a performance boost by avoiding the need to constantly Allocate and Free vertex buffers every single usage. --- src/CxbxKrnl/EmuD3D8.cpp | 8 - src/CxbxKrnl/EmuD3D8/PushBuffer.cpp | 6 - src/CxbxKrnl/EmuD3D8/VertexBuffer.cpp | 376 +++++--------------------- src/CxbxKrnl/EmuD3D8/VertexBuffer.h | 34 --- 4 files changed, 63 insertions(+), 361 deletions(-) diff --git a/src/CxbxKrnl/EmuD3D8.cpp b/src/CxbxKrnl/EmuD3D8.cpp index 6b45bda73..39ba899f4 100755 --- a/src/CxbxKrnl/EmuD3D8.cpp +++ b/src/CxbxKrnl/EmuD3D8.cpp @@ -6212,8 +6212,6 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_DrawVertices) #ifdef _DEBUG_TRACK_VB } #endif - - VertPatch.Restore(); } // Execute callback procedure @@ -6283,8 +6281,6 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_DrawVerticesUP) g_dwPrimPerFrame += VPDesc.dwHostPrimitiveCount; } - - VertPatch.Restore(); } // Execute callback procedure @@ -6408,8 +6404,6 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_DrawIndexedVertices) g_dwPrimPerFrame += VPDesc.dwHostPrimitiveCount; } - VertPatch.Restore(); - g_pD3DDevice8->SetIndices(NULL, 0); } @@ -6488,8 +6482,6 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_DrawIndexedVerticesUP) #ifdef _DEBUG_TRACK_VB } #endif - - VertPatch.Restore(); } // Execute callback procedure diff --git a/src/CxbxKrnl/EmuD3D8/PushBuffer.cpp b/src/CxbxKrnl/EmuD3D8/PushBuffer.cpp index 106154e4d..460e67bc6 100644 --- a/src/CxbxKrnl/EmuD3D8/PushBuffer.cpp +++ b/src/CxbxKrnl/EmuD3D8/PushBuffer.cpp @@ -330,8 +330,6 @@ extern void XTL::EmuExecutePushBufferRaw ); g_dwPrimPerFrame += VPDesc.dwHostPrimitiveCount; - - VertPatch.Restore(); } pdwPushData--; @@ -444,8 +442,6 @@ extern void XTL::EmuExecutePushBufferRaw } #endif - VertPatch.Restore(); - g_pD3DDevice8->SetIndices(0, 0); } } @@ -605,8 +601,6 @@ extern void XTL::EmuExecutePushBufferRaw } #endif - VertPatch.Restore(); - g_pD3DDevice8->SetIndices(0, 0); } } diff --git a/src/CxbxKrnl/EmuD3D8/VertexBuffer.cpp b/src/CxbxKrnl/EmuD3D8/VertexBuffer.cpp index 713cfabad..fd983a985 100755 --- a/src/CxbxKrnl/EmuD3D8/VertexBuffer.cpp +++ b/src/CxbxKrnl/EmuD3D8/VertexBuffer.cpp @@ -46,10 +46,11 @@ extern void EmuUpdateActiveTextureStages(); #include +#include +#include #define HASH_SEED 0 -#define VERTEX_BUFFER_CACHE_SIZE 256 #define MAX_STREAM_NOT_USED_TIME (2 * CLOCKS_PER_SEC) // TODO: Trim the not used time // Inline vertex buffer emulation @@ -70,6 +71,59 @@ extern UINT g_D3DStreamStrides[16]; void *GetDataFromXboxResource(XTL::X_D3DResource *pXboxResource); extern XTL::IDirect3DVertexBuffer8 *GetHostVertexBuffer(XTL::X_D3DResource *pXboxResource, DWORD dwSize); + +typedef struct { + XTL::IDirect3DVertexBuffer8* pHostVertexBuffer; + size_t uiSize; + std::chrono::time_point lastUsed; +} cached_vertex_buffer_object; + +std::unordered_map g_HostVertexBuffers; + +// This caches Vertex Buffer Objects, but not the containing data +// This prevents unnecessary allocation and releasing of Vertex Buffers when +// we can use an existing just fine. This gives a (slight) performance boost +XTL::IDirect3DVertexBuffer8* GetCachedVertexBufferObject(DWORD pXboxDataPtr, DWORD size) +{ + // TODO: If the vertex buffer object cache becomes too large, + // free the least recently used vertex buffers + + auto it = g_HostVertexBuffers.find(pXboxDataPtr); + if (it == g_HostVertexBuffers.end()) { + // Create new vertex buffer and return + cached_vertex_buffer_object newBuffer; + newBuffer.uiSize = size; + newBuffer.lastUsed = std::chrono::high_resolution_clock::now(); + + HRESULT hRet = g_pD3DDevice8->CreateVertexBuffer(size, D3DUSAGE_DYNAMIC, 0, XTL::D3DPOOL_DEFAULT, &newBuffer.pHostVertexBuffer); + if (FAILED(hRet)) { + CxbxKrnlCleanup("Failed to create vertex buffer"); + } + + g_HostVertexBuffers[pXboxDataPtr] = newBuffer; + + return newBuffer.pHostVertexBuffer; + } + + auto buffer = &it->second; + buffer->lastUsed = std::chrono::high_resolution_clock::now(); + + // Return the existing vertex buffer, if possible + if (size <= buffer->uiSize) { + return buffer->pHostVertexBuffer; + } + + // If execution reached here, we need to release and re-create the vertex buffer.. + buffer->pHostVertexBuffer->Release(); + buffer->uiSize = size; + HRESULT hRet = g_pD3DDevice8->CreateVertexBuffer(size, D3DUSAGE_DYNAMIC, 0, XTL::D3DPOOL_DEFAULT, &buffer->pHostVertexBuffer); + if (FAILED(hRet)) { + CxbxKrnlCleanup("Failed to create vertex buffer"); + } + + return buffer->pHostVertexBuffer; +} + XTL::VertexPatcher::VertexPatcher() { this->m_uiNbrStreams = 0; @@ -84,27 +138,6 @@ XTL::VertexPatcher::~VertexPatcher() { } -void XTL::VertexPatcher::DumpCache(void) -{ - printf("--- Dumping streams cache ---\n"); - RTNode *pNode = g_PatchedStreamsCache.getHead(); - while(pNode) - { - CACHEDSTREAM *pCachedStream = (CACHEDSTREAM *)pNode->pResource; - if(pCachedStream) - { - // TODO: Write nicer dump presentation - printf("Key: 0x%.08X Cache Hits: %d IsUP: %s OrigStride: %d NewStride: %d HashCount: %d HashFreq: %d Length: %d Hash: 0x%.08X\n", - pNode->uiKey, pCachedStream->uiCacheHit, pCachedStream->bIsUP ? "YES" : "NO", - pCachedStream->Stream.uiOrigStride, pCachedStream->Stream.uiNewStride, - pCachedStream->uiCount, pCachedStream->uiCheckFrequency, - pCachedStream->uiLength, pCachedStream->uiHash); - } - - pNode = pNode->pNext; - } -} - size_t GetVertexBufferSize(DWORD dwVertexCount, DWORD dwStride, PWORD pIndexData, DWORD dwOffset, DWORD dwIndexBase) { // If we are drawing from an offset, we know that the vertex count must have offset vertices @@ -128,230 +161,6 @@ size_t GetVertexBufferSize(DWORD dwVertexCount, DWORD dwStride, PWORD pIndexData return (highestVertexIndex + dwIndexBase + 1) * dwStride; } -void XTL::VertexPatcher::CacheStream(VertexPatchDesc *pPatchDesc, - UINT uiStream, - uint32_t uiHash) -{ - UINT uiStride; - XTL::X_D3DVertexBuffer *pOrigVertexBuffer = nullptr; - void *pCalculateData = NULL; - uint32 uiKey; - UINT uiLength; - CACHEDSTREAM *pCachedStream = (CACHEDSTREAM *)calloc(1, sizeof(CACHEDSTREAM)); - - // Check if the cache is full, if so, throw away the least used stream - if(g_PatchedStreamsCache.get_count() > VERTEX_BUFFER_CACHE_SIZE) - { - uint32 uiKey = 0; - uint32 uiMinHit = 0xFFFFFFFF; - - RTNode *pNode = g_PatchedStreamsCache.getHead(); - while(pNode) - { - if(pNode->pResource) - { - // First, check if there is an "expired" stream in the cache (not recently used) - if(((CACHEDSTREAM *)pNode->pResource)->lLastUsed < (clock() + MAX_STREAM_NOT_USED_TIME)) - { - printf("!!!Found an old stream, %2.2f\n", ((FLOAT)((clock() + MAX_STREAM_NOT_USED_TIME) - ((CACHEDSTREAM *)pNode->pResource)->lLastUsed)) / (FLOAT)CLOCKS_PER_SEC); - uiKey = pNode->uiKey; - break; - } - // Find the least used cached stream - if((uint32)((CACHEDSTREAM *)pNode->pResource)->uiCacheHit < uiMinHit) - { - uiMinHit = ((CACHEDSTREAM *)pNode->pResource)->uiCacheHit; - uiKey = pNode->uiKey; - } - } - pNode = pNode->pNext; - } - if(uiKey != 0) - { - printf("!!!Removing stream\n\n"); - FreeCachedStream((void*)uiKey); - } - } - - // Start the actual stream caching - if(!pPatchDesc->pXboxVertexStreamZeroData) - { - pOrigVertexBuffer = m_pStreams[uiStream].pOriginalStream; - m_pStreams[uiStream].pPatchedStream->AddRef(); - - pCalculateData = (void*)GetDataFromXboxResource(pOrigVertexBuffer); - - uiLength = GetVertexBufferSize(pPatchDesc->dwVertexCount, g_D3DStreamStrides[uiStream], pPatchDesc->pIndexData, pPatchDesc->dwStartVertex, pPatchDesc->dwIndexBase); - pCachedStream->bIsUP = false; - uiKey = (uint32)pOrigVertexBuffer; - } - else - { - // There should only be one stream (stream zero) in this case - if(uiStream != 0) - { - CxbxKrnlCleanup("Trying to patch a Draw..UP with more than stream zero!"); - } - uiStride = pPatchDesc->uiXboxVertexStreamZeroStride; - pCalculateData = (uint08 *)pPatchDesc->pXboxVertexStreamZeroData; - uiLength = GetVertexBufferSize(pPatchDesc->dwVertexCount, uiStride, pPatchDesc->pIndexData, pPatchDesc->dwStartVertex, pPatchDesc->dwIndexBase); - pCachedStream->bIsUP = true; - pCachedStream->pStreamUP = pCalculateData; - uiKey = (uint32)pCalculateData; - } - - // If we weren't given a known hash, calculate a new one - if (uiHash == 0) { - uiHash = XXHash32::hash((void *)pCalculateData, uiLength, HASH_SEED); - } - - pCachedStream->uiHash = uiHash; - pCachedStream->Stream = m_pStreams[uiStream]; - pCachedStream->uiCheckFrequency = 1; // Start with checking every 1th Draw.. - pCachedStream->uiCount = 0; - pCachedStream->uiLength = uiLength; - pCachedStream->uiCacheHit = 0; - pCachedStream->dwHostPrimitiveCount = pPatchDesc->dwHostPrimitiveCount; - pCachedStream->lLastUsed = clock(); - g_PatchedStreamsCache.insert(uiKey, pCachedStream); -} - -void XTL::VertexPatcher::FreeCachedStream(void *pStream) -{ - g_PatchedStreamsCache.Lock(); - CACHEDSTREAM *pCachedStream = (CACHEDSTREAM *)g_PatchedStreamsCache.get(pStream); - if(pCachedStream) - { - if(pCachedStream->bIsUP && pCachedStream->pStreamUP) - { - free(pCachedStream->pStreamUP); - } - - if(pCachedStream->Stream.pPatchedStream) - { - pCachedStream->Stream.pPatchedStream->Release(); - } - free(pCachedStream); - } - g_PatchedStreamsCache.Unlock(); - g_PatchedStreamsCache.remove(pStream); -} - -bool XTL::VertexPatcher::ApplyCachedStream(VertexPatchDesc *pPatchDesc, - UINT uiStream, - bool *pbFatalError, - uint32_t *pHash) -{ - UINT uiStride; - XTL::X_D3DVertexBuffer *pOrigVertexBuffer = nullptr; - void *pCalculateData = NULL; - UINT uiLength; - bool bApplied = false; - uint32 uiKey; - //CACHEDSTREAM *pCachedStream = (CACHEDSTREAM *)malloc(sizeof(CACHEDSTREAM)); - - if(!pPatchDesc->pXboxVertexStreamZeroData) - { - pOrigVertexBuffer = g_D3DStreams[uiStream]; - uiStride = g_D3DStreamStrides[uiStream]; - uiLength = GetVertexBufferSize(pPatchDesc->dwVertexCount, uiStride, pPatchDesc->pIndexData, pPatchDesc->dwStartVertex, pPatchDesc->dwIndexBase); - uiKey = (uint32)pOrigVertexBuffer; - //pCachedStream->bIsUP = false; - } - else - { - // There should only be one stream (stream zero) in this case - if(uiStream != 0) - { - CxbxKrnlCleanup("Trying to find a cached Draw..UP with more than stream zero!"); - } - - uiStride = pPatchDesc->uiXboxVertexStreamZeroStride; - pCalculateData = (uint08 *)pPatchDesc->pXboxVertexStreamZeroData; - uiLength = GetVertexBufferSize(pPatchDesc->dwVertexCount, uiStride, pPatchDesc->pIndexData, pPatchDesc->dwStartVertex, pPatchDesc->dwIndexBase); - uiKey = (uint32)pCalculateData; - //pCachedStream->bIsUP = true; - //pCachedStream->pStreamUP = pCalculateData; - } - g_PatchedStreamsCache.Lock(); - - CACHEDSTREAM *pCachedStream = (CACHEDSTREAM *)g_PatchedStreamsCache.get(uiKey); - if(pCachedStream) - { - pCachedStream->lLastUsed = clock(); - pCachedStream->uiCacheHit++; - bool bMismatch = false; - if(pCachedStream->uiCount == (pCachedStream->uiCheckFrequency - 1)) - { - if(pOrigVertexBuffer) { - pCalculateData = (void*)GetDataFromXboxResource(pOrigVertexBuffer); - } - - uint32_t uiHash = XXHash32::hash((void *)pCalculateData, uiLength, HASH_SEED); - *pHash = uiHash; - - if(uiHash == pCachedStream->uiHash) - { - // Take a while longer to check - if(pCachedStream->uiCheckFrequency < 32*1024) - { - pCachedStream->uiCheckFrequency *= 2; - } - pCachedStream->uiCount = 0; - } - else - { - // TODO: Do something about this - if(pCachedStream->bIsUP) - { - FreeCachedStream(pCachedStream->pStreamUP); - } - else - { - FreeCachedStream(pCachedStream->Stream.pOriginalStream); - } - pCachedStream = NULL; - bMismatch = true; - } - } - else - { - pCachedStream->uiCount++; - } - - if(!bMismatch) - { - if(!pCachedStream->bIsUP) - { - m_pStreams[uiStream].pOriginalStream = pOrigVertexBuffer; - m_pStreams[uiStream].uiOrigStride = uiStride; - g_pD3DDevice8->SetStreamSource(uiStream, pCachedStream->Stream.pPatchedStream, pCachedStream->Stream.uiNewStride); - pCachedStream->Stream.pPatchedStream->AddRef(); - m_pStreams[uiStream].pPatchedStream = pCachedStream->Stream.pPatchedStream; - m_pStreams[uiStream].uiNewStride = pCachedStream->Stream.uiNewStride; - } - else - { - pPatchDesc->pXboxVertexStreamZeroData = pCachedStream->pStreamUP; - pPatchDesc->uiXboxVertexStreamZeroStride = pCachedStream->Stream.uiNewStride; - } - - if(pCachedStream->dwHostPrimitiveCount) - { - // The primitives were patched, draw with the correct number of primimtives from the cache - pPatchDesc->dwHostPrimitiveCount = pCachedStream->dwHostPrimitiveCount; - } - - bApplied = true; - m_bPatched = true; - } - } - - g_PatchedStreamsCache.Unlock(); - - return bApplied; -} - int CountActiveD3DStreams() { int lastStreamIndex; @@ -438,7 +247,8 @@ bool XTL::VertexPatcher::PatchStream(VertexPatchDesc *pPatchDesc, dwNewSize = uiVertexCount * pStreamPatch->ConvertedStride; pOrigData = (uint08*)GetDataFromXboxResource(pOrigVertexBuffer); - g_pD3DDevice8->CreateVertexBuffer(dwNewSize, D3DUSAGE_DYNAMIC, 0, XTL::D3DPOOL_DEFAULT, &pNewVertexBuffer); + pNewVertexBuffer = GetCachedVertexBufferObject(pOrigVertexBuffer->Data, dwNewSize); + if(FAILED(pNewVertexBuffer->Lock(0, 0, &pNewData, D3DLOCK_DISCARD))) { CxbxKrnlCleanup("Couldn't lock the new buffer"); @@ -603,11 +413,7 @@ bool XTL::VertexPatcher::PatchStream(VertexPatchDesc *pPatchDesc, { CxbxKrnlCleanup("Failed to set the type patched buffer as the new stream source!\n"); } - if(pStream->pPatchedStream) - { - // The stream was already primitive patched, release the previous vertex buffer to avoid memory leaks - pStream->pPatchedStream->Release(); - } + pStream->pPatchedStream = pNewVertexBuffer; } else @@ -678,7 +484,7 @@ bool XTL::VertexPatcher::NormalizeTexCoords(VertexPatchDesc *pPatchDesc, UINT ui uint08 *pOrigData = (uint08*)GetDataFromXboxResource(pOrigVertexBuffer); - g_pD3DDevice8->CreateVertexBuffer(uiLength, D3DUSAGE_DYNAMIC, 0, XTL::D3DPOOL_DEFAULT, &pNewVertexBuffer); + pNewVertexBuffer = GetCachedVertexBufferObject(pOrigVertexBuffer->Data, uiLength); if(FAILED(pNewVertexBuffer->Lock(0, 0, &pData, D3DLOCK_DISCARD))) { CxbxKrnlCleanup("Couldn't lock new FVF buffer."); @@ -770,10 +576,6 @@ bool XTL::VertexPatcher::NormalizeTexCoords(VertexPatchDesc *pPatchDesc, UINT ui { CxbxKrnlCleanup("Failed to set the texcoord patched FVF buffer as the new stream source."); } - if(pStream->pPatchedStream) - { - pStream->pPatchedStream->Release(); - } pStream->pPatchedStream = pNewVertexBuffer; pStream->uiOrigStride = uiStride; @@ -881,18 +683,13 @@ bool XTL::VertexPatcher::PatchPrimitive(VertexPatchDesc *pPatchDesc, if(pPatchDesc->pXboxVertexStreamZeroData == nullptr) { - HRESULT hRet = g_pD3DDevice8->CreateVertexBuffer(dwNewSize, D3DUSAGE_DYNAMIC, 0, XTL::D3DPOOL_DEFAULT, &pStream->pPatchedStream); - if (FAILED(hRet)) { - EmuWarning("CreateVertexBuffer Failed. Size: %d", dwNewSize); - } - - - if(pStream->pOriginalStream != 0) + if(pStream->pOriginalStream != nullptr) { pOrigVertexData = (XTL::BYTE*)GetDataFromXboxResource(pStream->pOriginalStream); + pStream->pPatchedStream = GetCachedVertexBufferObject(pStream->pOriginalStream->Data, dwNewSize); } - if(pStream->pPatchedStream != 0) + if(pStream->pPatchedStream != nullptr) { pStream->pPatchedStream->Lock(0, 0, &pPatchedVertexData, D3DLOCK_DISCARD); } @@ -990,22 +787,10 @@ bool XTL::VertexPatcher::Apply(VertexPatchDesc *pPatchDesc, bool *pbFatalError) bool LocalPatched = false; uint32_t uiHash = 0; - /*if(ApplyCachedStream(pPatchDesc, uiStream, pbFatalError, &uiHash)) - { - m_pStreams[uiStream].bUsedCached = true; - continue; - }*/ LocalPatched |= PatchPrimitive(pPatchDesc, uiStream); LocalPatched |= PatchStream(pPatchDesc, uiStream); - /* - if(LocalPatched && !pPatchDesc->pXboxVertexStreamZeroData) - { - // Insert the patched stream in the cache - CacheStream(pPatchDesc, uiStream, uiHash); - m_pStreams[uiStream].bUsedCached = true; - } - */ + Patched |= LocalPatched; // If we didn't patch the stream, use a non-patched stream @@ -1020,39 +805,6 @@ bool XTL::VertexPatcher::Apply(VertexPatchDesc *pPatchDesc, bool *pbFatalError) return Patched; } -bool XTL::VertexPatcher::Restore() -{ - if(!this->m_bPatched) - return false; - - for(UINT uiStream = 0; uiStream < m_uiNbrStreams; uiStream++) - { - if(m_pStreams[uiStream].pPatchedStream != NULL) - { - UINT b = m_pStreams[uiStream].pPatchedStream->Release(); - } - - if(!m_pStreams[uiStream].bUsedCached) - { - - if(this->m_bAllocatedStreamZeroData) - { - free(m_pNewVertexStreamZeroData); - // Cleanup, just to be sure : - m_pNewVertexStreamZeroData = nullptr; - this->m_bAllocatedStreamZeroData = false; - } - } - else - { - m_pStreams[uiStream].bUsedCached = false; - } - - } - - return true; -} - VOID XTL::EmuFlushIVB() { XTL::EmuUpdateDeferredStates(); @@ -1239,8 +991,6 @@ VOID XTL::EmuFlushIVB() g_pD3DDevice8->SetVertexShader(g_CurrentVertexShader); } - VertPatch.Restore(); - g_InlineVertexBuffer_TableOffset = 0; return; diff --git a/src/CxbxKrnl/EmuD3D8/VertexBuffer.h b/src/CxbxKrnl/EmuD3D8/VertexBuffer.h index 78463ef46..eb700f4a0 100755 --- a/src/CxbxKrnl/EmuD3D8/VertexBuffer.h +++ b/src/CxbxKrnl/EmuD3D8/VertexBuffer.h @@ -62,23 +62,8 @@ typedef struct _PATCHEDSTREAM XTL::IDirect3DVertexBuffer8 *pPatchedStream; UINT uiOrigStride; UINT uiNewStride; - bool bUsedCached; } PATCHEDSTREAM; -typedef struct _CACHEDSTREAM -{ - uint32_t uiHash; - uint32 uiCheckFrequency; - uint32 uiCacheHit; - bool bIsUP; - PATCHEDSTREAM Stream; - void *pStreamUP; // Draw..UP (instead of pOriginalStream) - uint32 uiLength; // The length of the stream - uint32 uiCount; // XXHash32::hash() check count - uint32 dwHostPrimitiveCount; - long lLastUsed; // For cache removal purposes -} CACHEDSTREAM; - class VertexPatcher { public: @@ -86,11 +71,6 @@ class VertexPatcher ~VertexPatcher(); bool Apply(VertexPatchDesc *pPatchDesc, bool *pbFatalError); - bool Restore(); - - // Dumps the cache to the console - static void DumpCache(void); - private: UINT m_uiNbrStreams; @@ -106,20 +86,6 @@ class VertexPatcher // Returns the number of streams of a patch UINT GetNbrStreams(VertexPatchDesc *pPatchDesc); - // Caches a patched stream - void CacheStream(VertexPatchDesc *pPatchDesc, - UINT uiStream, - uint32_t uiHash); - - // Frees a cached, patched stream - void FreeCachedStream(void *pStream); - - // Tries to apply a previously patched stream from the cache - bool ApplyCachedStream(VertexPatchDesc *pPatchDesc, - UINT uiStream, - bool *pbFatalError, - uint32_t *uiHash); - // Patches the types of the stream bool PatchStream(VertexPatchDesc *pPatchDesc, UINT uiStream);