Merge pull request #992 from LukeUsher/various-tweaks-and-fixes

Various tweaks and fixes
This commit is contained in:
Luke Usher 2018-03-16 15:29:30 +00:00 committed by GitHub
commit 226607d1ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 1110 additions and 1453 deletions

View File

@ -654,6 +654,7 @@
<ClCompile Include="..\..\src\devices\video\nv2a_vsh.cpp" />
<ClCompile Include="..\..\src\devices\video\swizzle.cpp" />
<ClCompile Include="..\..\src\devices\Xbox.cpp" />
<ClCompile Include="..\..\src\HighPerformanceGraphicsEnabler.c" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="subhook.vcxproj">

File diff suppressed because it is too large Load Diff

View File

@ -2545,3 +2545,62 @@ XTL::X_D3DPalette * WINAPI XTL::EMUPATCH(D3DDevice_CreatePalette2)
return pPalette;
}
// ******************************************************************
// * patch: IDirect3DVertexBuffer8_Lock
// ******************************************************************
VOID WINAPI XTL::EMUPATCH(D3DVertexBuffer_Lock)
(
X_D3DVertexBuffer *pVertexBuffer,
UINT OffsetToLock,
UINT SizeToLock,
BYTE **ppbData,
DWORD Flags
)
{
FUNC_EXPORTS
LOG_FUNC_BEGIN
LOG_FUNC_ARG(pVertexBuffer)
LOG_FUNC_ARG(OffsetToLock)
LOG_FUNC_ARG(SizeToLock)
LOG_FUNC_ARG(ppbData)
LOG_FUNC_ARG(Flags)
LOG_FUNC_END;
// Pass through to the Xbox implementation of this function
XB_trampoline(VOID, WINAPI, D3DVertexBuffer_Lock, (X_D3DVertexBuffer*, UINT, UINT, BYTE**, DWORD));
XB_D3DVertexBuffer_Lock(pVertexBuffer, OffsetToLock, SizeToLock, ppbData, Flags);
// Mark the resource as modified
ForceResourceRehash(pVertexBuffer);
}
// ******************************************************************
// * patch: IDirect3DVertexBuffer8_Lock2
// ******************************************************************
BYTE* WINAPI XTL::EMUPATCH(D3DVertexBuffer_Lock2)
(
X_D3DVertexBuffer *pVertexBuffer,
DWORD Flags
)
{
FUNC_EXPORTS
LOG_FUNC_BEGIN
LOG_FUNC_ARG(pVertexBuffer)
LOG_FUNC_ARG(Flags)
LOG_FUNC_END;
// Pass through to the Xbox implementation of this function
XB_trampoline(BYTE*, WINAPI, D3DVertexBuffer_Lock2, (X_D3DVertexBuffer*, DWORD));
BYTE* pRet = XB_D3DVertexBuffer_Lock2(pVertexBuffer, Flags);
// Mark the resource as modified
ForceResourceRehash(pVertexBuffer);
RETURN(pRet);
}

View File

@ -62,6 +62,7 @@ namespace xboxkrnl
#include <assert.h>
#include <process.h>
#include <clocale>
#include <unordered_map>
// Allow use of time duration literals (making 16ms, etc possible)
using namespace std::literals::chrono_literals;
@ -746,7 +747,7 @@ typedef struct {
std::chrono::time_point<std::chrono::high_resolution_clock> lastUpdate;
} host_resource_info_t;
std::map <resource_key_t, host_resource_info_t> g_HostResources;
std::unordered_map <resource_key_t, host_resource_info_t> g_HostResources;
resource_key_t GetHostResourceKey(XTL::X_D3DResource* pXboxResource)
{
@ -856,7 +857,7 @@ bool HostResourceRequiresUpdate(resource_key_t key, DWORD dwSize)
// Currently, we only dynamically update Textures and Surfaces, so if our resource
// isn't of these types, do nothing
DWORD type = GetXboxCommonResourceType(it->second.pXboxResource);
if (type != X_D3DCOMMON_TYPE_SURFACE && type != X_D3DCOMMON_TYPE_TEXTURE && type != X_D3DCOMMON_TYPE_VERTEXBUFFER) {
if (type != X_D3DCOMMON_TYPE_SURFACE && type != X_D3DCOMMON_TYPE_TEXTURE) {
return false;
}
@ -977,16 +978,6 @@ XTL::IDirect3DIndexBuffer8 *GetHostIndexBuffer(XTL::X_D3DResource *pXboxResource
return (XTL::IDirect3DIndexBuffer8*)GetHostResource(pXboxResource);;
}
XTL::IDirect3DVertexBuffer8 *GetHostVertexBuffer(XTL::X_D3DResource *pXboxResource, DWORD dwSize)
{
if (pXboxResource == NULL)
return nullptr;
assert(GetXboxCommonResourceType(pXboxResource) == X_D3DCOMMON_TYPE_VERTEXBUFFER);
return (XTL::IDirect3DVertexBuffer8*)GetHostResource(pXboxResource, true, dwSize);
}
void SetHostSurface(XTL::X_D3DResource *pXboxResource, XTL::IDirect3DSurface8 *pHostSurface)
{
assert(pXboxResource != NULL);
@ -1026,15 +1017,6 @@ void SetHostIndexBuffer(XTL::X_D3DResource *pXboxResource, XTL::IDirect3DIndexBu
SetHostResource(pXboxResource, (XTL::IDirect3DResource8*)pHostIndexBuffer);
}
void SetHostVertexBuffer(XTL::X_D3DResource *pXboxResource, XTL::IDirect3DVertexBuffer8 *pHostVertexBuffer, DWORD dwSize)
{
assert(pXboxResource != NULL);
assert(GetXboxCommonResourceType(pXboxResource) == X_D3DCOMMON_TYPE_VERTEXBUFFER);
SetHostResource(pXboxResource, (XTL::IDirect3DResource8*)pHostVertexBuffer, dwSize);
}
int XboxD3DPaletteSizeToBytes(const XTL::X_D3DPALETTESIZE Size)
{
static int lk[4] =
@ -2325,7 +2307,7 @@ typedef struct {
XTL::IDirect3DIndexBuffer8* pHostIndexBuffer = nullptr;
} ConvertedIndexBuffer;
std::map<PWORD, ConvertedIndexBuffer> g_ConvertedIndexBuffers;
std::unordered_map<PWORD, ConvertedIndexBuffer> g_ConvertedIndexBuffers;
void CxbxRemoveIndexBuffer(PWORD pData)
{
@ -4175,76 +4157,7 @@ VOID WINAPI CreateHostResource
{
//
case X_D3DCOMMON_TYPE_INDEXBUFFER: return;
case X_D3DCOMMON_TYPE_VERTEXBUFFER:
{
DbgPrintf("EmuIDirect3DResource8_Register : Creating VertexBuffer...\n");
X_D3DVertexBuffer *pVertexBuffer = (X_D3DVertexBuffer*)pResource;
XTL::IDirect3DVertexBuffer8 *pNewHostVertexBuffer = nullptr;
// Vertex buffers live in Physical Memory Region
void* pVirtualAddr = GetDataFromXboxResource(pResource);
// create vertex buffer
{
// If we didn't get a size passed in, use QuerySize
if (dwSize == 0) {
g_VMManager.QuerySize((VAddr)pVirtualAddr);
}
// If we still didn't get a valid size, make a wild guess
if(dwSize == 0)
{
// TODO: once this is known to be working, remove the warning
EmuWarning("Vertex buffer allocation size unknown");
dwSize = PAGE_SIZE; // temporarily assign a small buffer, which will be increased later
/*hRet = E_FAIL;
goto fail;*/
}
hRet = g_pD3DDevice8->CreateVertexBuffer
(
dwSize, 0, 0, D3DPOOL_MANAGED,
&pNewHostVertexBuffer
);
DEBUG_D3DRESULT(hRet, "g_pD3DDevice8->CreateVertexBuffer");
if(FAILED(hRet))
{
char szString[256];
sprintf( szString, "CreateVertexBuffer Failed!\n\nVB Size = 0x%X\n\nError: \nDesc: ", dwSize/*,
DXGetErrorString8A(hRet)*//*, DXGetErrorDescription8A(hRet)*/);
EmuWarning( szString );
}
SetHostVertexBuffer(pResource, pNewHostVertexBuffer, dwSize);
#ifdef _DEBUG_TRACK_VB
g_VBTrackTotal.insert(pNewHostVertexBuffer);
#endif
BYTE *pNativeData = nullptr;
hRet = pNewHostVertexBuffer->Lock(
/*OffsetToLock=*/0,
/*SizeToLock=*/0/*=entire buffer*/,
&pNativeData,
/*Flags=*/0);
DEBUG_D3DRESULT(hRet, "pNewHostVertexBuffer->Lock");
if(FAILED(hRet))
CxbxKrnlCleanup("VertexBuffer Lock Failed!\n\nError: \nDesc: "/*,
DXGetErrorString8A(hRet)*//*, DXGetErrorDescription8A(hRet)*/);
memcpy(pNativeData, (void*)pVirtualAddr, dwSize);
pNewHostVertexBuffer->Unlock();
}
DbgPrintf("EmuIDirect3DResource8_Register : Successfully Created VertexBuffer (0x%.08X)\n", pNewHostVertexBuffer);
}
break;
case X_D3DCOMMON_TYPE_VERTEXBUFFER: return;
case X_D3DCOMMON_TYPE_PUSHBUFFER:
{
DbgPrintf("EmuIDirect3DResource8_Register :-> PushBuffer...\n");
@ -4609,7 +4522,7 @@ VOID WINAPI CreateHostResource
}
else if (CacheFormat != 0) // Do we need to convert to ARGB?
{
EmuWarning("Unsupported texture format, expanding to D3DFMT_A8R8G8B8");
DbgPrintf("Unsupported texture format, expanding to D3DFMT_A8R8G8B8");
uint8 *pSrc = (BYTE*)GetDataFromXboxResource(pResource);
uint8 *pDest = (uint8 *)LockedRect.pBits;
@ -5974,66 +5887,6 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_SetStreamSource)
}
}
// ******************************************************************
// * patch: IDirect3DVertexBuffer8_Lock
// ******************************************************************
VOID WINAPI XTL::EMUPATCH(D3DVertexBuffer_Lock)
(
X_D3DVertexBuffer *pVertexBuffer,
UINT OffsetToLock,
UINT SizeToLock,
BYTE **ppbData,
DWORD Flags
)
{
FUNC_EXPORTS
LOG_FUNC_BEGIN
LOG_FUNC_ARG(pVertexBuffer)
LOG_FUNC_ARG(OffsetToLock)
LOG_FUNC_ARG(SizeToLock)
LOG_FUNC_ARG(ppbData)
LOG_FUNC_ARG(Flags)
LOG_FUNC_END;
// Pass through to the Xbox implementation of this function
XB_trampoline(VOID, WINAPI, D3DVertexBuffer_Lock, (X_D3DVertexBuffer*, UINT, UINT, BYTE**, DWORD));
XB_D3DVertexBuffer_Lock(pVertexBuffer, OffsetToLock, SizeToLock, ppbData, Flags);
// Mark the resource as modified
ForceResourceRehash(pVertexBuffer);
}
// ******************************************************************
// * patch: IDirect3DVertexBuffer8_Lock2
// ******************************************************************
BYTE* WINAPI XTL::EMUPATCH(D3DVertexBuffer_Lock2)
(
X_D3DVertexBuffer *pVertexBuffer,
DWORD Flags
)
{
FUNC_EXPORTS
LOG_FUNC_BEGIN
LOG_FUNC_ARG(pVertexBuffer)
LOG_FUNC_ARG(Flags)
LOG_FUNC_END;
// Pass through to the Xbox implementation of this function
XB_trampoline(BYTE*, WINAPI, D3DVertexBuffer_Lock2, (X_D3DVertexBuffer*, DWORD));
BYTE* pRet = XB_D3DVertexBuffer_Lock2(pVertexBuffer, Flags);
// Mark the resource as modified
ForceResourceRehash(pVertexBuffer);
RETURN(pRet);
}
// ******************************************************************
// * patch: D3DDevice_SetVertexShader
// ******************************************************************
@ -6211,8 +6064,6 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_DrawVertices)
#ifdef _DEBUG_TRACK_VB
}
#endif
VertPatch.Restore();
}
// Execute callback procedure
@ -6282,8 +6133,6 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_DrawVerticesUP)
g_dwPrimPerFrame += VPDesc.dwHostPrimitiveCount;
}
VertPatch.Restore();
}
// Execute callback procedure
@ -6407,8 +6256,6 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_DrawIndexedVertices)
g_dwPrimPerFrame += VPDesc.dwHostPrimitiveCount;
}
VertPatch.Restore();
g_pD3DDevice8->SetIndices(NULL, 0);
}
@ -6487,8 +6334,6 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_DrawIndexedVerticesUP)
#ifdef _DEBUG_TRACK_VB
}
#endif
VertPatch.Restore();
}
// Execute callback procedure
@ -6990,7 +6835,7 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_RunVertexStateShader)
// Maps pFunction defintions to pre-compiled shaders
// to reduce the speed impact of LoadVertexShaderProgram
typedef uint64_t load_shader_program_key_t;
std::map<load_shader_program_key_t, DWORD> g_LoadVertexShaderProgramCache;
std::unordered_map<load_shader_program_key_t, DWORD> g_LoadVertexShaderProgramCache;
// ******************************************************************
// * patch: D3DDevice_LoadVertexShaderProgram

View File

@ -1036,8 +1036,9 @@ XTL::D3DFORMAT XTL::EmuXB2PC_D3DFormat(X_D3DFORMAT Format)
if (Format <= X_D3DFMT_LIN_R8G8B8A8 && Format != -1 /*X_D3DFMT_UNKNOWN*/) // The last bit prevents crashing (Metal Slug 3)
{
const FormatInfo *info = &FormatInfos[Format];
if (info->warning != nullptr)
EmuWarning(info->warning);
if (info->warning != nullptr) {
DbgPrintf("%s", info->warning);
}
return info->pc;
}

View File

@ -330,8 +330,6 @@ extern void XTL::EmuExecutePushBufferRaw
);
g_dwPrimPerFrame += VPDesc.dwHostPrimitiveCount;
VertPatch.Restore();
}
pdwPushData--;
@ -444,8 +442,6 @@ extern void XTL::EmuExecutePushBufferRaw
}
#endif
VertPatch.Restore();
g_pD3DDevice8->SetIndices(0, 0);
}
}
@ -605,8 +601,6 @@ extern void XTL::EmuExecutePushBufferRaw
}
#endif
VertPatch.Restore();
g_pD3DDevice8->SetIndices(0, 0);
}
}

View File

@ -46,10 +46,11 @@
extern void EmuUpdateActiveTextureStages();
#include <ctime>
#include <unordered_map>
#include <chrono>
#define HASH_SEED 0
#define VERTEX_BUFFER_CACHE_SIZE 256
#define MAX_STREAM_NOT_USED_TIME (2 * CLOCKS_PER_SEC) // TODO: Trim the not used time
// Inline vertex buffer emulation
@ -68,7 +69,62 @@ extern DWORD XTL::g_dwPrimPerFrame = 0;
extern XTL::X_D3DVertexBuffer*g_D3DStreams[16];
extern UINT g_D3DStreamStrides[16];
void *GetDataFromXboxResource(XTL::X_D3DResource *pXboxResource);
extern XTL::IDirect3DVertexBuffer8 *GetHostVertexBuffer(XTL::X_D3DResource *pXboxResource, DWORD dwSize);
typedef struct {
XTL::IDirect3DVertexBuffer8* pHostVertexBuffer;
size_t uiSize;
std::chrono::time_point<std::chrono::high_resolution_clock> lastUsed;
} cached_vertex_buffer_object;
std::unordered_map<DWORD, cached_vertex_buffer_object> g_HostVertexBuffers;
// This caches Vertex Buffer Objects, but not the containing data
// This prevents unnecessary allocation and releasing of Vertex Buffers when
// we can use an existing just fine. This gives a (slight) performance boost
// Returns true if the existing vertex buffer was trashed/made invalid
bool GetCachedVertexBufferObject(DWORD pXboxDataPtr, DWORD size, XTL::IDirect3DVertexBuffer8** pVertexBuffer)
{
// TODO: If the vertex buffer object cache becomes too large,
// free the least recently used vertex buffers
auto it = g_HostVertexBuffers.find(pXboxDataPtr);
if (it == g_HostVertexBuffers.end()) {
// Create new vertex buffer and return
cached_vertex_buffer_object newBuffer;
newBuffer.uiSize = size;
newBuffer.lastUsed = std::chrono::high_resolution_clock::now();
HRESULT hRet = g_pD3DDevice8->CreateVertexBuffer(size, D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC, 0, XTL::D3DPOOL_DEFAULT, &newBuffer.pHostVertexBuffer);
if (FAILED(hRet)) {
CxbxKrnlCleanup("Failed to create vertex buffer");
}
g_HostVertexBuffers[pXboxDataPtr] = newBuffer;
*pVertexBuffer = newBuffer.pHostVertexBuffer;
return false;
}
auto buffer = &it->second;
buffer->lastUsed = std::chrono::high_resolution_clock::now();
// Return the existing vertex buffer, if possible
if (size <= buffer->uiSize) {
*pVertexBuffer = buffer->pHostVertexBuffer;
return false;
}
// If execution reached here, we need to release and re-create the vertex buffer..
buffer->pHostVertexBuffer->Release();
buffer->uiSize = size;
HRESULT hRet = g_pD3DDevice8->CreateVertexBuffer(size, D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC, 0, XTL::D3DPOOL_DEFAULT, &buffer->pHostVertexBuffer);
if (FAILED(hRet)) {
CxbxKrnlCleanup("Failed to create vertex buffer");
}
*pVertexBuffer = buffer->pHostVertexBuffer;
return true;
}
XTL::VertexPatcher::VertexPatcher()
{
@ -84,27 +140,6 @@ XTL::VertexPatcher::~VertexPatcher()
{
}
void XTL::VertexPatcher::DumpCache(void)
{
printf("--- Dumping streams cache ---\n");
RTNode *pNode = g_PatchedStreamsCache.getHead();
while(pNode)
{
CACHEDSTREAM *pCachedStream = (CACHEDSTREAM *)pNode->pResource;
if(pCachedStream)
{
// TODO: Write nicer dump presentation
printf("Key: 0x%.08X Cache Hits: %d IsUP: %s OrigStride: %d NewStride: %d HashCount: %d HashFreq: %d Length: %d Hash: 0x%.08X\n",
pNode->uiKey, pCachedStream->uiCacheHit, pCachedStream->bIsUP ? "YES" : "NO",
pCachedStream->Stream.uiOrigStride, pCachedStream->Stream.uiNewStride,
pCachedStream->uiCount, pCachedStream->uiCheckFrequency,
pCachedStream->uiLength, pCachedStream->uiHash);
}
pNode = pNode->pNext;
}
}
size_t GetVertexBufferSize(DWORD dwVertexCount, DWORD dwStride, PWORD pIndexData, DWORD dwOffset, DWORD dwIndexBase)
{
// If we are drawing from an offset, we know that the vertex count must have offset vertices
@ -128,230 +163,6 @@ size_t GetVertexBufferSize(DWORD dwVertexCount, DWORD dwStride, PWORD pIndexData
return (highestVertexIndex + dwIndexBase + 1) * dwStride;
}
void XTL::VertexPatcher::CacheStream(VertexPatchDesc *pPatchDesc,
UINT uiStream,
uint32_t uiHash)
{
UINT uiStride;
XTL::X_D3DVertexBuffer *pOrigVertexBuffer = nullptr;
void *pCalculateData = NULL;
uint32 uiKey;
UINT uiLength;
CACHEDSTREAM *pCachedStream = (CACHEDSTREAM *)calloc(1, sizeof(CACHEDSTREAM));
// Check if the cache is full, if so, throw away the least used stream
if(g_PatchedStreamsCache.get_count() > VERTEX_BUFFER_CACHE_SIZE)
{
uint32 uiKey = 0;
uint32 uiMinHit = 0xFFFFFFFF;
RTNode *pNode = g_PatchedStreamsCache.getHead();
while(pNode)
{
if(pNode->pResource)
{
// First, check if there is an "expired" stream in the cache (not recently used)
if(((CACHEDSTREAM *)pNode->pResource)->lLastUsed < (clock() + MAX_STREAM_NOT_USED_TIME))
{
printf("!!!Found an old stream, %2.2f\n", ((FLOAT)((clock() + MAX_STREAM_NOT_USED_TIME) - ((CACHEDSTREAM *)pNode->pResource)->lLastUsed)) / (FLOAT)CLOCKS_PER_SEC);
uiKey = pNode->uiKey;
break;
}
// Find the least used cached stream
if((uint32)((CACHEDSTREAM *)pNode->pResource)->uiCacheHit < uiMinHit)
{
uiMinHit = ((CACHEDSTREAM *)pNode->pResource)->uiCacheHit;
uiKey = pNode->uiKey;
}
}
pNode = pNode->pNext;
}
if(uiKey != 0)
{
printf("!!!Removing stream\n\n");
FreeCachedStream((void*)uiKey);
}
}
// Start the actual stream caching
if(!pPatchDesc->pXboxVertexStreamZeroData)
{
pOrigVertexBuffer = m_pStreams[uiStream].pOriginalStream;
m_pStreams[uiStream].pPatchedStream->AddRef();
pCalculateData = (void*)GetDataFromXboxResource(pOrigVertexBuffer);
uiLength = GetVertexBufferSize(pPatchDesc->dwVertexCount, g_D3DStreamStrides[uiStream], pPatchDesc->pIndexData, pPatchDesc->dwStartVertex, pPatchDesc->dwIndexBase);
pCachedStream->bIsUP = false;
uiKey = (uint32)pOrigVertexBuffer;
}
else
{
// There should only be one stream (stream zero) in this case
if(uiStream != 0)
{
CxbxKrnlCleanup("Trying to patch a Draw..UP with more than stream zero!");
}
uiStride = pPatchDesc->uiXboxVertexStreamZeroStride;
pCalculateData = (uint08 *)pPatchDesc->pXboxVertexStreamZeroData;
uiLength = GetVertexBufferSize(pPatchDesc->dwVertexCount, uiStride, pPatchDesc->pIndexData, pPatchDesc->dwStartVertex, pPatchDesc->dwIndexBase);
pCachedStream->bIsUP = true;
pCachedStream->pStreamUP = pCalculateData;
uiKey = (uint32)pCalculateData;
}
// If we weren't given a known hash, calculate a new one
if (uiHash == 0) {
uiHash = XXHash32::hash((void *)pCalculateData, uiLength, HASH_SEED);
}
pCachedStream->uiHash = uiHash;
pCachedStream->Stream = m_pStreams[uiStream];
pCachedStream->uiCheckFrequency = 1; // Start with checking every 1th Draw..
pCachedStream->uiCount = 0;
pCachedStream->uiLength = uiLength;
pCachedStream->uiCacheHit = 0;
pCachedStream->dwHostPrimitiveCount = pPatchDesc->dwHostPrimitiveCount;
pCachedStream->lLastUsed = clock();
g_PatchedStreamsCache.insert(uiKey, pCachedStream);
}
void XTL::VertexPatcher::FreeCachedStream(void *pStream)
{
g_PatchedStreamsCache.Lock();
CACHEDSTREAM *pCachedStream = (CACHEDSTREAM *)g_PatchedStreamsCache.get(pStream);
if(pCachedStream)
{
if(pCachedStream->bIsUP && pCachedStream->pStreamUP)
{
free(pCachedStream->pStreamUP);
}
if(pCachedStream->Stream.pPatchedStream)
{
pCachedStream->Stream.pPatchedStream->Release();
}
free(pCachedStream);
}
g_PatchedStreamsCache.Unlock();
g_PatchedStreamsCache.remove(pStream);
}
bool XTL::VertexPatcher::ApplyCachedStream(VertexPatchDesc *pPatchDesc,
UINT uiStream,
bool *pbFatalError,
uint32_t *pHash)
{
UINT uiStride;
XTL::X_D3DVertexBuffer *pOrigVertexBuffer = nullptr;
void *pCalculateData = NULL;
UINT uiLength;
bool bApplied = false;
uint32 uiKey;
//CACHEDSTREAM *pCachedStream = (CACHEDSTREAM *)malloc(sizeof(CACHEDSTREAM));
if(!pPatchDesc->pXboxVertexStreamZeroData)
{
pOrigVertexBuffer = g_D3DStreams[uiStream];
uiStride = g_D3DStreamStrides[uiStream];
uiLength = GetVertexBufferSize(pPatchDesc->dwVertexCount, uiStride, pPatchDesc->pIndexData, pPatchDesc->dwStartVertex, pPatchDesc->dwIndexBase);
uiKey = (uint32)pOrigVertexBuffer;
//pCachedStream->bIsUP = false;
}
else
{
// There should only be one stream (stream zero) in this case
if(uiStream != 0)
{
CxbxKrnlCleanup("Trying to find a cached Draw..UP with more than stream zero!");
}
uiStride = pPatchDesc->uiXboxVertexStreamZeroStride;
pCalculateData = (uint08 *)pPatchDesc->pXboxVertexStreamZeroData;
uiLength = GetVertexBufferSize(pPatchDesc->dwVertexCount, uiStride, pPatchDesc->pIndexData, pPatchDesc->dwStartVertex, pPatchDesc->dwIndexBase);
uiKey = (uint32)pCalculateData;
//pCachedStream->bIsUP = true;
//pCachedStream->pStreamUP = pCalculateData;
}
g_PatchedStreamsCache.Lock();
CACHEDSTREAM *pCachedStream = (CACHEDSTREAM *)g_PatchedStreamsCache.get(uiKey);
if(pCachedStream)
{
pCachedStream->lLastUsed = clock();
pCachedStream->uiCacheHit++;
bool bMismatch = false;
if(pCachedStream->uiCount == (pCachedStream->uiCheckFrequency - 1))
{
if(pOrigVertexBuffer) {
pCalculateData = (void*)GetDataFromXboxResource(pOrigVertexBuffer);
}
uint32_t uiHash = XXHash32::hash((void *)pCalculateData, uiLength, HASH_SEED);
*pHash = uiHash;
if(uiHash == pCachedStream->uiHash)
{
// Take a while longer to check
if(pCachedStream->uiCheckFrequency < 32*1024)
{
pCachedStream->uiCheckFrequency *= 2;
}
pCachedStream->uiCount = 0;
}
else
{
// TODO: Do something about this
if(pCachedStream->bIsUP)
{
FreeCachedStream(pCachedStream->pStreamUP);
}
else
{
FreeCachedStream(pCachedStream->Stream.pOriginalStream);
}
pCachedStream = NULL;
bMismatch = true;
}
}
else
{
pCachedStream->uiCount++;
}
if(!bMismatch)
{
if(!pCachedStream->bIsUP)
{
m_pStreams[uiStream].pOriginalStream = pOrigVertexBuffer;
m_pStreams[uiStream].uiOrigStride = uiStride;
g_pD3DDevice8->SetStreamSource(uiStream, pCachedStream->Stream.pPatchedStream, pCachedStream->Stream.uiNewStride);
pCachedStream->Stream.pPatchedStream->AddRef();
m_pStreams[uiStream].pPatchedStream = pCachedStream->Stream.pPatchedStream;
m_pStreams[uiStream].uiNewStride = pCachedStream->Stream.uiNewStride;
}
else
{
pPatchDesc->pXboxVertexStreamZeroData = pCachedStream->pStreamUP;
pPatchDesc->uiXboxVertexStreamZeroStride = pCachedStream->Stream.uiNewStride;
}
if(pCachedStream->dwHostPrimitiveCount)
{
// The primitives were patched, draw with the correct number of primimtives from the cache
pPatchDesc->dwHostPrimitiveCount = pCachedStream->dwHostPrimitiveCount;
}
bApplied = true;
m_bPatched = true;
}
}
g_PatchedStreamsCache.Unlock();
return bApplied;
}
int CountActiveD3DStreams()
{
int lastStreamIndex;
@ -426,7 +237,7 @@ bool XTL::VertexPatcher::PatchStream(VertexPatchDesc *pPatchDesc,
{
pOrigVertexBuffer = g_D3DStreams[uiStream];
uiStride = g_D3DStreamStrides[uiStream];
uiLength = GetVertexBufferSize(pPatchDesc->dwVertexCount, uiStride, pPatchDesc->pIndexData, pPatchDesc->dwStartVertex, pPatchDesc->dwIndexBase);
uiLength = pPatchDesc->uiSize;
// Set a new (exact) vertex count
uiVertexCount = uiLength / uiStride;
@ -438,8 +249,9 @@ bool XTL::VertexPatcher::PatchStream(VertexPatchDesc *pPatchDesc,
dwNewSize = uiVertexCount * pStreamPatch->ConvertedStride;
pOrigData = (uint08*)GetDataFromXboxResource(pOrigVertexBuffer);
g_pD3DDevice8->CreateVertexBuffer(dwNewSize, 0, 0, XTL::D3DPOOL_MANAGED, &pNewVertexBuffer);
if(FAILED(pNewVertexBuffer->Lock(0, 0, &pNewData, 0)))
GetCachedVertexBufferObject(pOrigVertexBuffer->Data, dwNewSize, &pNewVertexBuffer);
if(FAILED(pNewVertexBuffer->Lock(0, 0, &pNewData, D3DLOCK_DISCARD)))
{
CxbxKrnlCleanup("Couldn't lock the new buffer");
}
@ -459,7 +271,7 @@ bool XTL::VertexPatcher::PatchStream(VertexPatchDesc *pPatchDesc,
uiStride = pPatchDesc->uiXboxVertexStreamZeroStride;
pStreamPatch->ConvertedStride = max(pStreamPatch->ConvertedStride, uiStride); // ??
pOrigData = (uint08 *)pPatchDesc->pXboxVertexStreamZeroData;
uiLength = GetVertexBufferSize(pPatchDesc->dwVertexCount, uiStride, pPatchDesc->pIndexData, pPatchDesc->dwStartVertex, pPatchDesc->dwIndexBase);
uiLength = pPatchDesc->uiSize;
uiVertexCount = uiLength / uiStride;
dwNewSize = uiVertexCount * pStreamPatch->ConvertedStride;
pNewVertexBuffer = NULL;
@ -603,11 +415,7 @@ bool XTL::VertexPatcher::PatchStream(VertexPatchDesc *pPatchDesc,
{
CxbxKrnlCleanup("Failed to set the type patched buffer as the new stream source!\n");
}
if(pStream->pPatchedStream)
{
// The stream was already primitive patched, release the previous vertex buffer to avoid memory leaks
pStream->pPatchedStream->Release();
}
pStream->pPatchedStream = pNewVertexBuffer;
}
else
@ -664,7 +472,7 @@ bool XTL::VertexPatcher::NormalizeTexCoords(VertexPatchDesc *pPatchDesc, UINT ui
pNewVertexBuffer = 0;
pData = (uint08 *)pPatchDesc->pXboxVertexStreamZeroData;
uiStride = pPatchDesc->uiXboxVertexStreamZeroStride;
DWORD uiLength = GetVertexBufferSize(pPatchDesc->dwVertexCount, uiStride, pPatchDesc->pIndexData, pPatchDesc->dwStartVertex, pPatchDesc->dwIndexBase);
DWORD uiLength = pPatchDesc->uiSize;
uiVertexCount = uiLength / uiStride;
}
else
@ -672,14 +480,14 @@ bool XTL::VertexPatcher::NormalizeTexCoords(VertexPatchDesc *pPatchDesc, UINT ui
// Copy stream for patching and caching.
pOrigVertexBuffer = g_D3DStreams[uiStream];
uiStride = g_D3DStreamStrides[uiStream];
UINT uiLength = GetVertexBufferSize(pPatchDesc->dwVertexCount, uiStride, pPatchDesc->pIndexData, pPatchDesc->dwStartVertex, pPatchDesc->dwIndexBase);
UINT uiLength = pPatchDesc->uiSize;
uiVertexCount = uiLength / uiStride;
uint08 *pOrigData = (uint08*)GetDataFromXboxResource(pOrigVertexBuffer);
g_pD3DDevice8->CreateVertexBuffer(uiLength, 0, 0, XTL::D3DPOOL_MANAGED, &pNewVertexBuffer);
if(FAILED(pNewVertexBuffer->Lock(0, 0, &pData, 0)))
GetCachedVertexBufferObject(pOrigVertexBuffer->Data, uiLength, &pNewVertexBuffer);
if(FAILED(pNewVertexBuffer->Lock(0, 0, &pData, D3DLOCK_DISCARD)))
{
CxbxKrnlCleanup("Couldn't lock new FVF buffer.");
}
@ -770,10 +578,6 @@ bool XTL::VertexPatcher::NormalizeTexCoords(VertexPatchDesc *pPatchDesc, UINT ui
{
CxbxKrnlCleanup("Failed to set the texcoord patched FVF buffer as the new stream source.");
}
if(pStream->pPatchedStream)
{
pStream->pPatchedStream->Release();
}
pStream->pPatchedStream = pNewVertexBuffer;
pStream->uiOrigStride = uiStride;
@ -806,7 +610,7 @@ bool XTL::VertexPatcher::PatchPrimitive(VertexPatchDesc *pPatchDesc,
pStream->uiOrigStride = pPatchDesc->uiXboxVertexStreamZeroStride;
}
DWORD uiLength = GetVertexBufferSize(pPatchDesc->dwVertexCount, pStream->uiOrigStride, pPatchDesc->pIndexData, pPatchDesc->dwStartVertex, pPatchDesc->dwIndexBase);
DWORD uiLength = pPatchDesc->uiSize;
DWORD uiVertexCount = uiLength / pStream->uiOrigStride;
// Unsupported primitives that don't need deep patching.
@ -881,20 +685,15 @@ bool XTL::VertexPatcher::PatchPrimitive(VertexPatchDesc *pPatchDesc,
if(pPatchDesc->pXboxVertexStreamZeroData == nullptr)
{
HRESULT hRet = g_pD3DDevice8->CreateVertexBuffer(dwNewSize, 0, 0, XTL::D3DPOOL_MANAGED, &pStream->pPatchedStream);
if (FAILED(hRet)) {
EmuWarning("CreateVertexBuffer Failed. Size: %d", dwNewSize);
}
if(pStream->pOriginalStream != 0)
if(pStream->pOriginalStream != nullptr)
{
pOrigVertexData = (XTL::BYTE*)GetDataFromXboxResource(pStream->pOriginalStream);
GetCachedVertexBufferObject(pStream->pOriginalStream->Data, dwNewSize, &pStream->pPatchedStream);
}
if(pStream->pPatchedStream != 0)
if(pStream->pPatchedStream != nullptr)
{
pStream->pPatchedStream->Lock(0, 0, &pPatchedVertexData, 0);
pStream->pPatchedStream->Lock(0, 0, &pPatchedVertexData, D3DLOCK_DISCARD);
}
}
else
@ -908,9 +707,6 @@ bool XTL::VertexPatcher::PatchPrimitive(VertexPatchDesc *pPatchDesc,
pPatchDesc->pXboxVertexStreamZeroData = pPatchedVertexData;
}
// Copy the nonmodified data
memcpy(pPatchedVertexData, pOrigVertexData, dwOriginalSize);
// Quad list
if(pPatchDesc->XboxPrimitiveType == X_D3DPT_QUADLIST)
{
@ -962,6 +758,11 @@ bool XTL::VertexPatcher::PatchPrimitive(VertexPatchDesc *pPatchDesc,
memcpy(&pPatchedVertexData[0], &pOrigVertexData[0], dwOriginalSize);
memcpy(&pPatchedVertexData[dwOriginalSize], &pOrigVertexData[0], pStream->uiOrigStride);
}
else
{
// Copy the nonmodified data
memcpy(pPatchedVertexData, pOrigVertexData, dwOriginalSize);
}
if(pPatchDesc->pXboxVertexStreamZeroData == nullptr)
{
@ -985,72 +786,48 @@ bool XTL::VertexPatcher::Apply(VertexPatchDesc *pPatchDesc, bool *pbFatalError)
{
m_pDynamicPatch = &((VERTEX_SHADER *)VshHandleGetVertexShader(pPatchDesc->hVertexShader)->Handle)->VertexDynamicPatch;
}
for(UINT uiStream = 0; uiStream < m_uiNbrStreams; uiStream++)
{
bool LocalPatched = false;
uint32_t uiHash = 0;
if(ApplyCachedStream(pPatchDesc, uiStream, pbFatalError, &uiHash))
{
m_pStreams[uiStream].bUsedCached = true;
continue;
}
LocalPatched |= PatchPrimitive(pPatchDesc, uiStream);
pPatchDesc->uiSize = GetVertexBufferSize(
pPatchDesc->dwVertexCount,
pPatchDesc->pXboxVertexStreamZeroData == nullptr ? g_D3DStreamStrides[uiStream] : pPatchDesc->uiXboxVertexStreamZeroStride,
pPatchDesc->pIndexData,
pPatchDesc->dwStartVertex,
pPatchDesc->dwIndexBase
);
// TODO: Check for cached vertex buffer, and use it if possible
LocalPatched |= PatchPrimitive(pPatchDesc, uiStream);
LocalPatched |= PatchStream(pPatchDesc, uiStream);
if(LocalPatched && !pPatchDesc->pXboxVertexStreamZeroData)
{
// Insert the patched stream in the cache
CacheStream(pPatchDesc, uiStream, uiHash);
m_pStreams[uiStream].bUsedCached = true;
}
Patched |= LocalPatched;
// If we didn't patch the stream, use a non-patched stream
// TODO: Update the converion/patching code to make a host copy even when no patching is required
// Doing this will fully remove the need to call _Register on Vertex Buffers
if (!Patched && pPatchDesc->pXboxVertexStreamZeroData == nullptr) {
DWORD dwSize = GetVertexBufferSize(pPatchDesc->dwVertexCount, g_D3DStreamStrides[uiStream], pPatchDesc->pIndexData, pPatchDesc->dwStartVertex, pPatchDesc->dwIndexBase);
g_pD3DDevice8->SetStreamSource(uiStream, GetHostVertexBuffer(g_D3DStreams[uiStream], dwSize), g_D3DStreamStrides[uiStream]);
// Fetch or Create the host Vertex Buffer
XTL::IDirect3DVertexBuffer8* pHostVertexBuffer;
GetCachedVertexBufferObject(g_D3DStreams[uiStream]->Data, pPatchDesc->uiSize, &pHostVertexBuffer);
// Copy xbox data to the host vertex buffer
BYTE* pVertexDataData;
if (FAILED(pHostVertexBuffer->Lock(0, 0, &pVertexDataData, D3DLOCK_DISCARD))) {
CxbxKrnlCleanup("Couldn't lock Vertex Buffer");
}
memcpy(pVertexDataData, GetDataFromXboxResource(g_D3DStreams[uiStream]), pPatchDesc->uiSize);
pHostVertexBuffer->Unlock();
// Set the buffer as a stream source
g_pD3DDevice8->SetStreamSource(uiStream, pHostVertexBuffer, g_D3DStreamStrides[uiStream]);
}
// TODO: Cache Vertex Buffer Data
}
return Patched;
}
bool XTL::VertexPatcher::Restore()
{
if(!this->m_bPatched)
return false;
for(UINT uiStream = 0; uiStream < m_uiNbrStreams; uiStream++)
{
if(m_pStreams[uiStream].pPatchedStream != NULL)
{
UINT b = m_pStreams[uiStream].pPatchedStream->Release();
}
if(!m_pStreams[uiStream].bUsedCached)
{
if(this->m_bAllocatedStreamZeroData)
{
free(m_pNewVertexStreamZeroData);
// Cleanup, just to be sure :
m_pNewVertexStreamZeroData = nullptr;
this->m_bAllocatedStreamZeroData = false;
}
}
else
{
m_pStreams[uiStream].bUsedCached = false;
}
}
return true;
}
VOID XTL::EmuFlushIVB()
{
XTL::EmuUpdateDeferredStates();
@ -1237,8 +1014,6 @@ VOID XTL::EmuFlushIVB()
g_pD3DDevice8->SetVertexShader(g_CurrentVertexShader);
}
VertPatch.Restore();
g_InlineVertexBuffer_TableOffset = 0;
return;

View File

@ -48,6 +48,7 @@ typedef struct _VertexPatchDesc
IN DWORD hVertexShader;
IN PWORD pIndexData = nullptr;
IN DWORD dwIndexBase = 0;
IN size_t uiSize;
// Data if Draw...UP call
IN PVOID pXboxVertexStreamZeroData;
IN UINT uiXboxVertexStreamZeroStride;
@ -62,23 +63,8 @@ typedef struct _PATCHEDSTREAM
XTL::IDirect3DVertexBuffer8 *pPatchedStream;
UINT uiOrigStride;
UINT uiNewStride;
bool bUsedCached;
} PATCHEDSTREAM;
typedef struct _CACHEDSTREAM
{
uint32_t uiHash;
uint32 uiCheckFrequency;
uint32 uiCacheHit;
bool bIsUP;
PATCHEDSTREAM Stream;
void *pStreamUP; // Draw..UP (instead of pOriginalStream)
uint32 uiLength; // The length of the stream
uint32 uiCount; // XXHash32::hash() check count
uint32 dwHostPrimitiveCount;
long lLastUsed; // For cache removal purposes
} CACHEDSTREAM;
class VertexPatcher
{
public:
@ -86,11 +72,6 @@ class VertexPatcher
~VertexPatcher();
bool Apply(VertexPatchDesc *pPatchDesc, bool *pbFatalError);
bool Restore();
// Dumps the cache to the console
static void DumpCache(void);
private:
UINT m_uiNbrStreams;
@ -106,20 +87,6 @@ class VertexPatcher
// Returns the number of streams of a patch
UINT GetNbrStreams(VertexPatchDesc *pPatchDesc);
// Caches a patched stream
void CacheStream(VertexPatchDesc *pPatchDesc,
UINT uiStream,
uint32_t uiHash);
// Frees a cached, patched stream
void FreeCachedStream(void *pStream);
// Tries to apply a previously patched stream from the cache
bool ApplyCachedStream(VertexPatchDesc *pPatchDesc,
UINT uiStream,
bool *pbFatalError,
uint32_t *uiHash);
// Patches the types of the stream
bool PatchStream(VertexPatchDesc *pPatchDesc, UINT uiStream);

View File

@ -188,6 +188,11 @@ void CxbxInitAudio()
}
#endif
#define DSOUND_PERFORMANCE_FREQUENCY 48000 // GetSampleTime needs to tick at 48Khz
extern LARGE_INTEGER NativePerformanceFrequency;
LARGE_INTEGER DSoundInitialPerformanceCounter;
double NativeToXboxDSound_FactorForPerformanceFrequency = 0;
// ******************************************************************
// * patch: DirectSoundCreate
// ******************************************************************
@ -215,6 +220,10 @@ HRESULT WINAPI XTL::EMUPATCH(DirectSoundCreate)
enterCriticalSection;
// Measure current host performance counter and frequency
QueryPerformanceCounter(&DSoundInitialPerformanceCounter);
NativeToXboxDSound_FactorForPerformanceFrequency = (double)DSOUND_PERFORMANCE_FREQUENCY / NativePerformanceFrequency.QuadPart;
// Set this flag when this function is called
g_bDSoundCreateCalled = TRUE;
@ -3163,19 +3172,19 @@ DWORD WINAPI XTL::EMUPATCH(DirectSoundGetSampleTime)()
LOG_FUNC();
// FIXME: This is the best I could think of for now.
// Check the XDK documentation for the description of what this function
// can actually do. BTW, this function accesses the NVIDIA SoundStorm APU
// register directly (0xFE80200C).
DWORD dwRet;
::LARGE_INTEGER PerformanceCounter;
QueryPerformanceCounter(&PerformanceCounter);
// TODO: Handle reset at certain event?
// TODO: Wait until a DirectSoundBuffer/Stream is being played?
static DWORD dwStart = GetTickCount();
DWORD dwRet = GetTickCount() - dwStart;
// Re-Base on the time DirectSoundCreate was called
PerformanceCounter.QuadPart -= DSoundInitialPerformanceCounter.QuadPart;
// Apply a delta to make it appear to tick at 48khz
PerformanceCounter.QuadPart = (ULONGLONG)(NativeToXboxDSound_FactorForPerformanceFrequency * PerformanceCounter.QuadPart);
dwRet = PerformanceCounter.QuadPart;
leaveCriticalSection;
return 0;
return dwRet;
}
// ******************************************************************

View File

@ -370,8 +370,8 @@ __declspec(naked) void EmuFS_MovFs00Esp()
__declspec(naked) void EmuFS_PushDwordPtrFs00()
{
uint32 returnAddr;
uint32 temp;
static uint32 returnAddr;
static uint32 temp;
__asm
{
@ -388,8 +388,8 @@ __declspec(naked) void EmuFS_PushDwordPtrFs00()
__declspec(naked) void EmuFS_PopDwordPtrFs00()
{
uint32 returnAddr;
uint32 temp;
static uint32 returnAddr;
static uint32 temp;
__asm
{

View File

@ -460,7 +460,7 @@ void EmuHLEIntercept(Xbe::Header *pXbeHeader)
// Read address of D3DRS_CULLMODE from D3DDevice_SetRenderState_CullMode
// TODO : Simplify this when XREF_D3D_RenderState_CullMode derivation is deemed stable
{
if (BuildVersion >= 3911 && BuildVersion < 4034) {
if (BuildVersion < 4034) {
DerivedAddr_D3DRS_CULLMODE = *(xbaddr*)(pFunc + 0x25);
Decrement = 0x1FC; // TODO: Clean up (?)
Increment = 82 * 4;
@ -469,17 +469,17 @@ void EmuHLEIntercept(Xbe::Header *pXbeHeader)
//Decrement = 0x19F; // TODO: Clean up (?)
//Increment = 72 * 4;
//patchOffset = 142*4; // TODO: Verify
} else if (BuildVersion >= 4034 && BuildVersion <= 4361) {
} else if (BuildVersion <= 4361) {
DerivedAddr_D3DRS_CULLMODE = *(xbaddr*)(pFunc + 0x2B);
Decrement = 0x200;
Increment = 82 * 4;
patchOffset = 142 * 4;
} else if (BuildVersion >= 4432 && BuildVersion < 4627) {
} else if (BuildVersion < 4627) {
DerivedAddr_D3DRS_CULLMODE = *(xbaddr*)(pFunc + 0x2B);
Decrement = 0x204;
Increment = 83 * 4;
patchOffset = 143 * 4;
} else if (BuildVersion >= 4627 && BuildVersion <= 5933) {
} else { // 4627-5933
DerivedAddr_D3DRS_CULLMODE = *(xbaddr*)(pFunc + 0x2B);
Decrement = 0x24C;
Increment = 92 * 4;

View File

@ -0,0 +1,5 @@
#include <Windows.h>
// Default to High Performance Mode on machines with dual graphics
__declspec(dllexport) int AmdPowerXpressRequestHighPerformance = 1; // AMD
__declspec(dllexport) DWORD NvOptimusEnablement = 0x00000001; // NVIDIA