Implement Vertex Buffer Caching
This commit is contained in:
parent
a76bac4205
commit
f36cd0540a
|
@ -205,6 +205,7 @@ file (GLOB CXBXR_SOURCE_COMMON
|
|||
"${CXBXR_ROOT_DIR}/src/common/Timer.cpp"
|
||||
"${CXBXR_ROOT_DIR}/src/common/util/crc32c.cpp"
|
||||
"${CXBXR_ROOT_DIR}/src/common/util/CxbxUtil.cpp"
|
||||
"${CXBXR_ROOT_DIR}/src/common/util/hasher.cpp"
|
||||
"${CXBXR_ROOT_DIR}/src/common/win32/DInputController.cpp"
|
||||
"${CXBXR_ROOT_DIR}/src/common/win32/EmuShared.cpp"
|
||||
"${CXBXR_ROOT_DIR}/src/common/win32/InlineFunc.cpp"
|
||||
|
|
|
@ -32,10 +32,9 @@ link_directories(
|
|||
|
||||
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
|
||||
add_compile_definitions(
|
||||
_CRT_SECURE_NO_WARNINGS
|
||||
_CRT_SECURE_NO_WARNINGS
|
||||
# Windows 7 minimum requirement
|
||||
_WIN32_WINNT=0x0601
|
||||
|
||||
LTM_DESC
|
||||
USE_LTM
|
||||
LTC_NO_TEST
|
||||
|
@ -45,6 +44,9 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
|
|||
LTC_NO_PRNGS
|
||||
LTC_NO_MISC
|
||||
LTC_NO_PROTOTYPES
|
||||
|
||||
# Use inline XXHash version
|
||||
XXH_INLINE_ALL
|
||||
)
|
||||
|
||||
# Reference: https://docs.microsoft.com/en-us/cpp/build/reference/compiler-options-listed-alphabetically
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
#include "hasher.h"
|
||||
|
||||
#include "xxhash.h"
|
||||
#include "crc32c.h"
|
||||
#include <cstdio>
|
||||
|
||||
enum {
|
||||
HASH_NONE = 0,
|
||||
HASH_XXH3,
|
||||
HASH_CRC32C
|
||||
};
|
||||
|
||||
static int g_HashAlgorithm = HASH_NONE;
|
||||
|
||||
void InitHasher()
|
||||
{
|
||||
// Detect the best hashing algorithm to use for the host machine
|
||||
// TODO/Future Improvement: This could be expanded to support even more hash algorithims
|
||||
// And we could hash a random buffer to calculate the fastest hash to use on a given host
|
||||
printf("Selecting hash algorithm: ");
|
||||
if (crc32c_hw_available()) {
|
||||
printf("CRC32C\n");
|
||||
g_HashAlgorithm = HASH_CRC32C;
|
||||
} else {
|
||||
printf("XXH3\n");
|
||||
g_HashAlgorithm = HASH_XXH3;
|
||||
}
|
||||
}
|
||||
|
||||
__forceinline uint64_t ComputeHash(void* data, size_t len)
|
||||
{
|
||||
if (g_HashAlgorithm == HASH_NONE) {
|
||||
InitHasher();
|
||||
}
|
||||
|
||||
switch (g_HashAlgorithm) {
|
||||
case HASH_XXH3: return XXH3_64bits(data, len);
|
||||
case HASH_CRC32C: return crc32c_append(0, (uint8_t*)data, len);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
// This is an open source non-commercial project. Dear PVS-Studio, please check it.
|
||||
// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
|
||||
// ******************************************************************
|
||||
// *
|
||||
// * This file is part of the Cxbx project.
|
||||
// *
|
||||
// * Cxbx and Cxbe are free software; you can redistribute them
|
||||
// * and/or modify them under the terms of the GNU General Public
|
||||
// * License as published by the Free Software Foundation; either
|
||||
// * version 2 of the license, or (at your option) any later version.
|
||||
// *
|
||||
// * This program is distributed in the hope that it will be useful,
|
||||
// * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// * GNU General Public License for more details.
|
||||
// *
|
||||
// * You should have recieved a copy of the GNU General Public License
|
||||
// * along with this program; see the file COPYING.
|
||||
// * If not, write to the Free Software Foundation, Inc.,
|
||||
// * 59 Temple Place - Suite 330, Bostom, MA 02111-1307, USA.
|
||||
// *
|
||||
// * (c) 2019 - Luke Usher
|
||||
// *
|
||||
// * All rights reserved
|
||||
// *
|
||||
// ******************************************************************
|
||||
#ifndef _HASHER_H
|
||||
#define _HASHER_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
extern __forceinline uint64_t ComputeHash(void* data, size_t len);
|
||||
|
||||
#endif
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,512 @@
|
|||
/*
|
||||
xxHash - Extremely Fast Hash algorithm
|
||||
Header File
|
||||
Copyright (C) 2012-2016, Yann Collet.
|
||||
|
||||
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above
|
||||
copyright notice, this list of conditions and the following disclaimer
|
||||
in the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
You can contact the author at :
|
||||
- xxHash source repository : https://github.com/Cyan4973/xxHash
|
||||
*/
|
||||
|
||||
/* Notice extracted from xxHash homepage :
|
||||
|
||||
xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
|
||||
It also successfully passes all tests from the SMHasher suite.
|
||||
|
||||
Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
|
||||
|
||||
Name Speed Q.Score Author
|
||||
xxHash 5.4 GB/s 10
|
||||
CrapWow 3.2 GB/s 2 Andrew
|
||||
MumurHash 3a 2.7 GB/s 10 Austin Appleby
|
||||
SpookyHash 2.0 GB/s 10 Bob Jenkins
|
||||
SBox 1.4 GB/s 9 Bret Mulvey
|
||||
Lookup3 1.2 GB/s 9 Bob Jenkins
|
||||
SuperFastHash 1.2 GB/s 1 Paul Hsieh
|
||||
CityHash64 1.05 GB/s 10 Pike & Alakuijala
|
||||
FNV 0.55 GB/s 5 Fowler, Noll, Vo
|
||||
CRC32 0.43 GB/s 9
|
||||
MD5-32 0.33 GB/s 10 Ronald L. Rivest
|
||||
SHA1-32 0.28 GB/s 10
|
||||
|
||||
Q.Score is a measure of quality of the hash function.
|
||||
It depends on successfully passing SMHasher test set.
|
||||
10 is a perfect score.
|
||||
|
||||
A 64-bit version, named XXH64, is available since r35.
|
||||
It offers much better speed, but for 64-bit applications only.
|
||||
Name Speed on 64 bits Speed on 32 bits
|
||||
XXH64 13.8 GB/s 1.9 GB/s
|
||||
XXH32 6.8 GB/s 6.0 GB/s
|
||||
*/
|
||||
|
||||
#ifndef XXHASH_H_5627135585666179
|
||||
#define XXHASH_H_5627135585666179 1
|
||||
|
||||
#if defined (__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* ****************************
|
||||
* Definitions
|
||||
******************************/
|
||||
#include <stddef.h> /* size_t */
|
||||
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
|
||||
|
||||
|
||||
/* ****************************
|
||||
* API modifier
|
||||
******************************/
|
||||
/** XXH_INLINE_ALL (and XXH_PRIVATE_API)
|
||||
* This is useful to include xxhash functions in `static` mode
|
||||
* in order to inline them, and remove their symbol from the public list.
|
||||
* Inlining can offer dramatic performance improvement on small keys.
|
||||
* Methodology :
|
||||
* #define XXH_INLINE_ALL
|
||||
* #include "xxhash.h"
|
||||
* `xxhash.c` is automatically included.
|
||||
* It's not useful to compile and link it as a separate module.
|
||||
*/
|
||||
#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
|
||||
# ifndef XXH_STATIC_LINKING_ONLY
|
||||
# define XXH_STATIC_LINKING_ONLY
|
||||
# endif
|
||||
# if defined(__GNUC__)
|
||||
# define XXH_PUBLIC_API static __inline __attribute__((unused))
|
||||
# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
|
||||
# define XXH_PUBLIC_API static inline
|
||||
# elif defined(_MSC_VER)
|
||||
# define XXH_PUBLIC_API static __inline
|
||||
# else
|
||||
/* this version may generate warnings for unused static functions */
|
||||
# define XXH_PUBLIC_API static
|
||||
# endif
|
||||
#else
|
||||
# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
|
||||
# ifdef XXH_EXPORT
|
||||
# define XXH_PUBLIC_API __declspec(dllexport)
|
||||
# elif XXH_IMPORT
|
||||
# define XXH_PUBLIC_API __declspec(dllimport)
|
||||
# endif
|
||||
# else
|
||||
# define XXH_PUBLIC_API /* do nothing */
|
||||
# endif
|
||||
#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
|
||||
|
||||
/*! XXH_NAMESPACE, aka Namespace Emulation :
|
||||
*
|
||||
* If you want to include _and expose_ xxHash functions from within your own library,
|
||||
* but also want to avoid symbol collisions with other libraries which may also include xxHash,
|
||||
*
|
||||
* you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
|
||||
* with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
|
||||
*
|
||||
* Note that no change is required within the calling program as long as it includes `xxhash.h` :
|
||||
* regular symbol name will be automatically translated by this header.
|
||||
*/
|
||||
#ifdef XXH_NAMESPACE
|
||||
# define XXH_CAT(A,B) A##B
|
||||
# define XXH_NAME2(A,B) XXH_CAT(A,B)
|
||||
# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
|
||||
# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
|
||||
# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
|
||||
# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
|
||||
# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
|
||||
# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
|
||||
# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
|
||||
# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
|
||||
# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
|
||||
# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
|
||||
# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
|
||||
# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
|
||||
# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
|
||||
# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
|
||||
# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
|
||||
# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
|
||||
# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
|
||||
# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
|
||||
# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
|
||||
#endif
|
||||
|
||||
|
||||
/* *************************************
|
||||
* Version
|
||||
***************************************/
|
||||
#define XXH_VERSION_MAJOR 0
|
||||
#define XXH_VERSION_MINOR 7
|
||||
#define XXH_VERSION_RELEASE 0
|
||||
#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
|
||||
XXH_PUBLIC_API unsigned XXH_versionNumber (void);
|
||||
|
||||
|
||||
/*-**********************************************************************
|
||||
* 32-bit hash
|
||||
************************************************************************/
|
||||
#if !defined (__VMS) \
|
||||
&& (defined (__cplusplus) \
|
||||
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
|
||||
# include <stdint.h>
|
||||
typedef uint32_t XXH32_hash_t;
|
||||
#else
|
||||
typedef unsigned int XXH32_hash_t;
|
||||
#endif
|
||||
|
||||
/*! XXH32() :
|
||||
Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
|
||||
The memory between input & input+length must be valid (allocated and read-accessible).
|
||||
"seed" can be used to alter the result predictably.
|
||||
Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
|
||||
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
|
||||
|
||||
/*====== Streaming ======*/
|
||||
typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
|
||||
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
|
||||
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
|
||||
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
|
||||
|
||||
XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed);
|
||||
XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
|
||||
XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
|
||||
|
||||
/*
|
||||
* Streaming functions generate the xxHash of an input provided in multiple segments.
|
||||
* Note that, for small input, they are slower than single-call functions, due to state management.
|
||||
* For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
|
||||
*
|
||||
* XXH state must first be allocated, using XXH*_createState() .
|
||||
*
|
||||
* Start a new hash by initializing state with a seed, using XXH*_reset().
|
||||
*
|
||||
* Then, feed the hash state by calling XXH*_update() as many times as necessary.
|
||||
* The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
|
||||
*
|
||||
* Finally, a hash value can be produced anytime, by using XXH*_digest().
|
||||
* This function returns the nn-bits hash as an int or long long.
|
||||
*
|
||||
* It's still possible to continue inserting input into the hash state after a digest,
|
||||
* and generate some new hashes later on, by calling again XXH*_digest().
|
||||
*
|
||||
* When done, free XXH state space if it was allocated dynamically.
|
||||
*/
|
||||
|
||||
/*====== Canonical representation ======*/
|
||||
|
||||
typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
|
||||
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
|
||||
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
|
||||
|
||||
/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
|
||||
* The canonical representation uses human-readable write convention, aka big-endian (large digits first).
|
||||
* These functions allow transformation of hash result into and from its canonical format.
|
||||
* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef XXH_NO_LONG_LONG
|
||||
/*-**********************************************************************
|
||||
* 64-bit hash
|
||||
************************************************************************/
|
||||
#if !defined (__VMS) \
|
||||
&& (defined (__cplusplus) \
|
||||
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
|
||||
# include <stdint.h>
|
||||
typedef uint64_t XXH64_hash_t;
|
||||
#else
|
||||
typedef unsigned long long XXH64_hash_t;
|
||||
#endif
|
||||
|
||||
/*! XXH64() :
|
||||
Calculate the 64-bit hash of sequence of length "len" stored at memory address "input".
|
||||
"seed" can be used to alter the result predictably.
|
||||
This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
|
||||
*/
|
||||
XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
|
||||
|
||||
/*====== Streaming ======*/
|
||||
typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
|
||||
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
|
||||
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
|
||||
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
|
||||
|
||||
XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
|
||||
XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
|
||||
XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
|
||||
|
||||
/*====== Canonical representation ======*/
|
||||
typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
|
||||
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
|
||||
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
|
||||
|
||||
|
||||
#endif /* XXH_NO_LONG_LONG */
|
||||
|
||||
|
||||
|
||||
#ifdef XXH_STATIC_LINKING_ONLY
|
||||
|
||||
/* ================================================================================================
|
||||
This section contains declarations which are not guaranteed to remain stable.
|
||||
They may change in future versions, becoming incompatible with a different version of the library.
|
||||
These declarations should only be used with static linking.
|
||||
Never use them in association with dynamic linking !
|
||||
=================================================================================================== */
|
||||
|
||||
/* These definitions are only present to allow
|
||||
* static allocation of XXH state, on stack or in a struct for example.
|
||||
* Never **ever** use members directly. */
|
||||
|
||||
struct XXH32_state_s {
|
||||
XXH32_hash_t total_len_32;
|
||||
XXH32_hash_t large_len;
|
||||
XXH32_hash_t v1;
|
||||
XXH32_hash_t v2;
|
||||
XXH32_hash_t v3;
|
||||
XXH32_hash_t v4;
|
||||
XXH32_hash_t mem32[4];
|
||||
XXH32_hash_t memsize;
|
||||
XXH32_hash_t reserved; /* never read nor write, might be removed in a future version */
|
||||
}; /* typedef'd to XXH32_state_t */
|
||||
|
||||
#ifndef XXH_NO_LONG_LONG /* remove 64-bit support */
|
||||
struct XXH64_state_s {
|
||||
XXH64_hash_t total_len;
|
||||
XXH64_hash_t v1;
|
||||
XXH64_hash_t v2;
|
||||
XXH64_hash_t v3;
|
||||
XXH64_hash_t v4;
|
||||
XXH64_hash_t mem64[4];
|
||||
XXH32_hash_t memsize;
|
||||
XXH32_hash_t reserved[2]; /* never read nor write, might be removed in a future version */
|
||||
}; /* typedef'd to XXH64_state_t */
|
||||
#endif /* XXH_NO_LONG_LONG */
|
||||
|
||||
|
||||
/*-**********************************************************************
|
||||
* XXH3
|
||||
* New experimental hash
|
||||
************************************************************************/
|
||||
#ifndef XXH_NO_LONG_LONG
|
||||
|
||||
|
||||
/* ============================================
|
||||
* XXH3 is a new hash algorithm,
|
||||
* featuring vastly improved speed performance
|
||||
* for both small and large inputs.
|
||||
* See full speed analysis at : http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
|
||||
* In general, expect XXH3 to run about ~2x faster on large inputs,
|
||||
* and >3x faster on small ones, though exact difference depend on platform.
|
||||
*
|
||||
* The algorithm is portable, will generate the same hash on all platforms.
|
||||
* It benefits greatly from vectorization units, but does not require it.
|
||||
*
|
||||
* XXH3 offers 2 variants, _64bits and _128bits.
|
||||
* When only 64 bits are needed, prefer calling the _64bits variant :
|
||||
* it reduces the amount of mixing, resulting in faster speed on small inputs.
|
||||
* It's also generally simpler to manipulate a scalar return type than a struct.
|
||||
*
|
||||
* The XXH3 algorithm is still considered experimental.
|
||||
* Produced results can still change between versions.
|
||||
* It's possible to use it for ephemeral data, but avoid storing long-term values for later re-use.
|
||||
*
|
||||
* The API currently supports one-shot hashing only.
|
||||
* The full version will include streaming capability, and canonical representation.
|
||||
*
|
||||
* There are still a number of opened questions that community can influence during the experimental period.
|
||||
* I'm trying to list a few of them below, though don't consider this list as complete.
|
||||
*
|
||||
* - 128-bits output type : currently defined as a structure of 2 64-bits fields.
|
||||
* That's because 128-bit values do not exist in C standard.
|
||||
* Note that it means that, at byte level, result is not identical depending on endianess.
|
||||
* However, at field level, they are identical on all platforms.
|
||||
* The canonical representation will solve the issue of identical byte-level representation across platforms,
|
||||
* which is necessary for serialization.
|
||||
* Would there be a better representation for a 128-bit hash result ?
|
||||
* Are the names of the inner 64-bit fields important ? Should they be changed ?
|
||||
*
|
||||
* - Canonical representation : for the 64-bit variant, canonical representation is the same as XXH64() (aka big-endian).
|
||||
* What should it be for the 128-bit variant ?
|
||||
* Since it's no longer a scalar value, big-endian representation is no longer an obvious choice.
|
||||
* One possibility : represent it as the concatenation of two 64-bits canonical representation (aka 2x big-endian)
|
||||
* Another one : represent it in the same order as natural order in the struct for little-endian platforms.
|
||||
* Less consistent with existing convention for XXH32/XXH64, but may be more natural for little-endian platforms.
|
||||
*
|
||||
* - Associated functions for 128-bit hash : simple things, such as checking if 2 hashes are equal, become more difficult with struct.
|
||||
* Granted, it's not terribly difficult to create a comparator, but it's still a workload.
|
||||
* Would it be beneficial to declare and define a comparator function for XXH128_hash_t ?
|
||||
* Are there other operations on XXH128_hash_t which would be desirable ?
|
||||
*
|
||||
* - Seed type for 128-bits variant : currently, it's a single 64-bit value, like the 64-bit variant.
|
||||
* It could be argued that it's more logical to offer a 128-bit seed input parameter for a 128-bit hash.
|
||||
* Although it's also more difficult to use, since it requires to declare and pass a structure instead of a value.
|
||||
* It would either replace current choice, or add a new one.
|
||||
* Farmhash, for example, offers both variants (the 128-bits seed variant is called `doubleSeed`).
|
||||
* If both 64-bit and 128-bit seeds are possible, which variant should be called XXH128 ?
|
||||
*
|
||||
* - Result for len==0 : Currently, the result of hashing a zero-length input is `0`.
|
||||
* It seems okay as a return value when using all "default" secret and seed (it used to be a request for XXH32/XXH64).
|
||||
* But is it still fine to return `0` when secret or seed are non-default ?
|
||||
* Are there use case which would depend on a different hash result when the secret is different ?
|
||||
*/
|
||||
|
||||
#ifdef XXH_NAMESPACE
|
||||
# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
|
||||
# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
|
||||
# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
|
||||
|
||||
# define XXH3_64bits_createState XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_createState)
|
||||
# define XXH3_64bits_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_freeState)
|
||||
# define XXH3_64bits_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_copyState)
|
||||
# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
|
||||
# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
|
||||
# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
|
||||
# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
|
||||
# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
|
||||
|
||||
# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
|
||||
# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
|
||||
# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
|
||||
#endif
|
||||
|
||||
/* XXH3_64bits() :
|
||||
* default 64-bit variant, using default secret and default seed of 0.
|
||||
* it's also the fastest one. */
|
||||
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);
|
||||
|
||||
/* XXH3_64bits_withSecret() :
|
||||
* It's possible to provide any blob of bytes as a "secret" to generate the hash.
|
||||
* This makes it more difficult for an external actor to prepare an intentional collision.
|
||||
* The secret *must* be large enough (>= XXH_SECRET_SIZE_MIN).
|
||||
* It should consist of random bytes.
|
||||
* Avoid repeating same character, and especially avoid swathes of \0.
|
||||
* Avoid repeating sequences of bytes within the secret.
|
||||
* Failure to respect these conditions will result in a bad quality hash.
|
||||
*/
|
||||
#define XXH_SECRET_SIZE_MIN 136
|
||||
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
|
||||
|
||||
/* XXH3_64bits_withSeed() :
|
||||
* This variant generates on the fly a custom secret,
|
||||
* based on the default secret, altered using the `seed` value.
|
||||
* While this operation is decently fast, note that it's not completely free.
|
||||
* note : seed==0 produces same results as XXH3_64bits() */
|
||||
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
|
||||
|
||||
|
||||
/* streaming 64-bit */
|
||||
|
||||
#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11+ */
|
||||
# include <stdalign.h>
|
||||
# define XXH_ALIGN(n) alignas(n)
|
||||
#elif defined(__GNUC__)
|
||||
# define XXH_ALIGN(n) __attribute__ ((aligned(n)))
|
||||
#elif defined(_MSC_VER)
|
||||
# define XXH_ALIGN(n) __declspec(align(n))
|
||||
#else
|
||||
# define XXH_ALIGN(n) /* disabled */
|
||||
#endif
|
||||
|
||||
typedef struct XXH3_state_s XXH3_state_t;
|
||||
|
||||
#define XXH3_SECRET_DEFAULT_SIZE 192 /* minimum XXH_SECRET_SIZE_MIN */
|
||||
#define XXH3_INTERNALBUFFER_SIZE 128
|
||||
struct XXH3_state_s {
|
||||
XXH_ALIGN(64) XXH64_hash_t acc[8];
|
||||
XXH_ALIGN(64) char customSecret[XXH3_SECRET_DEFAULT_SIZE]; /* used to store a custom secret generated from the seed. Makes state larger. Design might change */
|
||||
XXH_ALIGN(64) char buffer[XXH3_INTERNALBUFFER_SIZE];
|
||||
const void* secret;
|
||||
XXH32_hash_t bufferedSize;
|
||||
XXH32_hash_t nbStripesPerBlock;
|
||||
XXH32_hash_t nbStripesSoFar;
|
||||
XXH32_hash_t reserved32;
|
||||
XXH32_hash_t reserved32_2;
|
||||
XXH32_hash_t secretLimit;
|
||||
XXH64_hash_t totalLen;
|
||||
XXH64_hash_t seed;
|
||||
XXH64_hash_t reserved64;
|
||||
}; /* typedef'd to XXH3_state_t */
|
||||
|
||||
/* Streaming requires state maintenance.
|
||||
* This operation costs memory and cpu.
|
||||
* As a consequence, streaming is slower than one-shot hashing.
|
||||
* For better performance, prefer using one-short functions anytime possible. */
|
||||
|
||||
XXH_PUBLIC_API XXH3_state_t* XXH3_64bits_createState(void);
|
||||
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_freeState(XXH3_state_t* statePtr);
|
||||
XXH_PUBLIC_API void XXH3_64bits_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
|
||||
|
||||
/* XXH3_64bits_reset() :
|
||||
* initialize with default parameters.
|
||||
* result will be equivalent to `XXH3_64bits()` */
|
||||
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
|
||||
/* XXH3_64bits_reset_withSeed() :
|
||||
* generate a custom secret from `seed`, and store it into state.
|
||||
* digest will be equivalent to `XXH3_64bits_withSeed()` */
|
||||
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
|
||||
/* XXH3_64bits_reset_withSecret() :
|
||||
* `secret` is referenced, and must outlive the hash streaming session.
|
||||
* secretSize must be >= XXH_SECRET_SIZE_MIN.
|
||||
*/
|
||||
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
|
||||
|
||||
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
|
||||
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* statePtr);
|
||||
|
||||
|
||||
/* 128-bit */
|
||||
|
||||
typedef struct {
|
||||
XXH64_hash_t low64;
|
||||
XXH64_hash_t high64;
|
||||
} XXH128_hash_t;
|
||||
|
||||
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);
|
||||
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed); /* == XXH128() */
|
||||
XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
|
||||
|
||||
|
||||
|
||||
#endif /* XXH_NO_LONG_LONG */
|
||||
|
||||
|
||||
/*-**********************************************************************
|
||||
* XXH_INLINE_ALL
|
||||
************************************************************************/
|
||||
#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
|
||||
# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#endif /* XXH_STATIC_LINKING_ONLY */
|
||||
|
||||
|
||||
#if defined (__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* XXHASH_H_5627135585666179 */
|
|
@ -1,167 +0,0 @@
|
|||
// //////////////////////////////////////////////////////////
|
||||
// xxhash32.h
|
||||
// Copyright (c) 2016 Stephan Brumme. All rights reserved.
|
||||
// see https://create.stephan-brumme.com/disclaimer.html
|
||||
//
|
||||
#pragma once
|
||||
#include <stdint.h> // for uint32_t and uint64_t
|
||||
#include "crc32c.h"
|
||||
/// XXHash (32 bit), based on Yann Collet's descriptions, see https://cyan4973.github.io/xxHash/
|
||||
/** How to use:
|
||||
uint32_t myseed = 0;
|
||||
XXHash32 myhash(myseed);
|
||||
myhash.add(pointerToSomeBytes, numberOfBytes);
|
||||
myhash.add(pointerToSomeMoreBytes, numberOfMoreBytes); // call add() as often as you like to ...
|
||||
// and compute hash:
|
||||
uint32_t result = myhash.hash();
|
||||
// or all of the above in one single line:
|
||||
uint32_t result2 = XXHash32::hash(mypointer, numBytes, myseed);
|
||||
Note: my code is NOT endian-aware !
|
||||
**/
|
||||
class XXHash32
|
||||
{
|
||||
public:
|
||||
/// create new XXHash (32 bit)
|
||||
/** @param seed your seed value, even zero is a valid seed and e.g. used by LZ4 **/
|
||||
explicit XXHash32(uint32_t seed)
|
||||
{
|
||||
state[0] = seed + Prime1 + Prime2;
|
||||
state[1] = seed + Prime2;
|
||||
state[2] = seed;
|
||||
state[3] = seed - Prime1;
|
||||
bufferSize = 0;
|
||||
totalLength = 0;
|
||||
}
|
||||
/// add a chunk of bytes
|
||||
/** @param input pointer to a continuous block of data
|
||||
@param length number of bytes
|
||||
@return false if parameters are invalid / zero **/
|
||||
bool add(const void* input, uint64_t length)
|
||||
{
|
||||
// no data ?
|
||||
if (!input || length == 0)
|
||||
return false;
|
||||
totalLength += length;
|
||||
// byte-wise access
|
||||
const unsigned char* data = (const unsigned char*)input;
|
||||
// unprocessed old data plus new data still fit in temporary buffer ?
|
||||
if (bufferSize + length < MaxBufferSize)
|
||||
{
|
||||
// just add new data
|
||||
while (length-- > 0)
|
||||
buffer[bufferSize++] = *data++;
|
||||
return true;
|
||||
}
|
||||
// point beyond last byte
|
||||
const unsigned char* stop = data + length;
|
||||
const unsigned char* stopBlock = stop - MaxBufferSize;
|
||||
// some data left from previous update ?
|
||||
if (bufferSize > 0)
|
||||
{
|
||||
// make sure temporary buffer is full (16 bytes)
|
||||
while (bufferSize < MaxBufferSize)
|
||||
buffer[bufferSize++] = *data++;
|
||||
// process these 16 bytes (4x4)
|
||||
process(buffer, state[0], state[1], state[2], state[3]);
|
||||
}
|
||||
// copying state to local variables helps optimizer A LOT
|
||||
uint32_t s0 = state[0], s1 = state[1], s2 = state[2], s3 = state[3];
|
||||
// 16 bytes at once
|
||||
while (data <= stopBlock)
|
||||
{
|
||||
// local variables s0..s3 instead of state[0]..state[3] are much faster
|
||||
process(data, s0, s1, s2, s3);
|
||||
data += 16;
|
||||
}
|
||||
// copy back
|
||||
state[0] = s0; state[1] = s1; state[2] = s2; state[3] = s3;
|
||||
// copy remainder to temporary buffer
|
||||
bufferSize = stop - data;
|
||||
for (unsigned int i = 0; i < bufferSize; i++)
|
||||
buffer[i] = data[i];
|
||||
// done
|
||||
return true;
|
||||
}
|
||||
/// get current hash
|
||||
/** @return 32 bit XXHash **/
|
||||
uint32_t hash() const
|
||||
{
|
||||
uint32_t result = (uint32_t)totalLength;
|
||||
// fold 128 bit state into one single 32 bit value
|
||||
if (totalLength >= MaxBufferSize)
|
||||
result += rotateLeft(state[0], 1) +
|
||||
rotateLeft(state[1], 7) +
|
||||
rotateLeft(state[2], 12) +
|
||||
rotateLeft(state[3], 18);
|
||||
else
|
||||
// internal state wasn't set in add(), therefore original seed is still stored in state2
|
||||
result += state[2] + Prime5;
|
||||
// process remaining bytes in temporary buffer
|
||||
const unsigned char* data = buffer;
|
||||
// point beyond last byte
|
||||
const unsigned char* stop = data + bufferSize;
|
||||
// at least 4 bytes left ? => eat 4 bytes per step
|
||||
for (; data + 4 <= stop; data += 4)
|
||||
result = rotateLeft(result + *(uint32_t*)data * Prime3, 17) * Prime4;
|
||||
// take care of remaining 0..3 bytes, eat 1 byte per step
|
||||
while (data != stop)
|
||||
result = rotateLeft(result + (*data++) * Prime5, 11) * Prime1;
|
||||
// mix bits
|
||||
result ^= result >> 15;
|
||||
result *= Prime2;
|
||||
result ^= result >> 13;
|
||||
result *= Prime3;
|
||||
result ^= result >> 16;
|
||||
return result;
|
||||
}
|
||||
/// combine constructor, add() and hash() in one static function (C style)
|
||||
/** @param input pointer to a continuous block of data
|
||||
@param length number of bytes
|
||||
@param seed your seed value, e.g. zero is a valid seed and used by LZ4
|
||||
@return 32 bit XXHash **/
|
||||
static uint32_t hash(const void* input, uint64_t length, uint32_t seed)
|
||||
{
|
||||
// Some modern CPUs support hardware accellerated CRC32
|
||||
// This is significantly faster than xxHash, in some cases, by more than double
|
||||
// So now we check for this capability and use it if it exists.
|
||||
// This significantly reduces the impact of hashing on CPUs supporting SSE4.2
|
||||
// but also keeps xxHash present as a fast fallback, for those who don't support it
|
||||
static bool bHardwareCrc32 = crc32c_hw_available(); // Cache the result in a static variable to avoid _cpuid every call
|
||||
static bool bCrc32Init = false;
|
||||
if (bHardwareCrc32) {
|
||||
return crc32c_append(seed, (uint8_t*)input, (size_t)length);
|
||||
}
|
||||
|
||||
XXHash32 hasher(seed);
|
||||
hasher.add(input, length);
|
||||
return hasher.hash();
|
||||
}
|
||||
private:
|
||||
/// magic constants :-)
|
||||
static const uint32_t Prime1 = 2654435761U;
|
||||
static const uint32_t Prime2 = 2246822519U;
|
||||
static const uint32_t Prime3 = 3266489917U;
|
||||
static const uint32_t Prime4 = 668265263U;
|
||||
static const uint32_t Prime5 = 374761393U;
|
||||
/// temporarily store up to 15 bytes between multiple add() calls
|
||||
static const uint32_t MaxBufferSize = 15 + 1;
|
||||
// internal state and temporary buffer
|
||||
uint32_t state[4]; // state[2] == seed if totalLength < MaxBufferSize
|
||||
unsigned char buffer[MaxBufferSize];
|
||||
unsigned int bufferSize;
|
||||
uint64_t totalLength;
|
||||
/// rotate bits, should compile to a single CPU instruction (ROL)
|
||||
static inline uint32_t rotateLeft(uint32_t x, unsigned char bits)
|
||||
{
|
||||
return (x << bits) | (x >> (32 - bits));
|
||||
}
|
||||
/// process a block of 4x4 bytes, this is the main part of the XXHash32 algorithm
|
||||
static inline void process(const void* data, uint32_t& state0, uint32_t& state1, uint32_t& state2, uint32_t& state3)
|
||||
{
|
||||
const uint32_t* block = (const uint32_t*)data;
|
||||
state0 = rotateLeft(state0 + block[0] * Prime2, 13) * Prime1;
|
||||
state1 = rotateLeft(state1 + block[1] * Prime2, 13) * Prime1;
|
||||
state2 = rotateLeft(state2 + block[2] * Prime2, 13) * Prime1;
|
||||
state3 = rotateLeft(state3 + block[3] * Prime2, 13) * Prime1;
|
||||
}
|
||||
};
|
|
@ -27,7 +27,7 @@
|
|||
#define _XBOXKRNL_DEFEXTRN_
|
||||
#define LOG_PREFIX CXBXR_MODULE::D3D8
|
||||
|
||||
#include "common\util\xxhash32.h"
|
||||
#include "common\util\hasher.h"
|
||||
#include <condition_variable>
|
||||
|
||||
// prevent name collisions
|
||||
|
@ -150,6 +150,8 @@ static DWORD g_VBLastSwap = 0;
|
|||
static XTL::D3DSWAPDATA g_SwapData = {0};
|
||||
static DWORD g_SwapLast = 0;
|
||||
|
||||
static XTL::CxbxVertexBufferConverter VertexBufferConverter = {};
|
||||
|
||||
// cached Direct3D state variable(s)
|
||||
static XTL::IDirect3DIndexBuffer *pClosingLineLoopIndexBuffer = nullptr;
|
||||
|
||||
|
@ -716,7 +718,7 @@ typedef struct {
|
|||
DWORD dwXboxResourceType = 0;
|
||||
void* pXboxData = nullptr;
|
||||
size_t szXboxDataSize = 0;
|
||||
uint32_t hash = 0;
|
||||
uint64_t hash = 0;
|
||||
bool forceRehash = false;
|
||||
std::chrono::time_point<std::chrono::high_resolution_clock> nextHashTime;
|
||||
std::chrono::milliseconds hashLifeTime = 1ms;
|
||||
|
@ -866,8 +868,8 @@ bool HostResourceRequiresUpdate(resource_key_t key, DWORD dwSize)
|
|||
|
||||
auto now = std::chrono::high_resolution_clock::now();
|
||||
if (now > it->second.nextHashTime || it->second.forceRehash) {
|
||||
uint32_t oldHash = it->second.hash;
|
||||
it->second.hash = XXHash32::hash(it->second.pXboxData, it->second.szXboxDataSize, 0);
|
||||
uint64_t oldHash = it->second.hash;
|
||||
it->second.hash = ComputeHash(it->second.pXboxData, it->second.szXboxDataSize);
|
||||
|
||||
if (it->second.hash != oldHash) {
|
||||
// The data changed, so reset the hash lifetime
|
||||
|
@ -905,7 +907,7 @@ void SetHostResource(XTL::X_D3DResource* pXboxResource, XTL::IDirect3DResource*
|
|||
resourceInfo.dwXboxResourceType = GetXboxCommonResourceType(pXboxResource);
|
||||
resourceInfo.pXboxData = GetDataFromXboxResource(pXboxResource);
|
||||
resourceInfo.szXboxDataSize = dwSize > 0 ? dwSize : GetXboxResourceSize(pXboxResource);
|
||||
resourceInfo.hash = XXHash32::hash(resourceInfo.pXboxData, resourceInfo.szXboxDataSize, 0);
|
||||
resourceInfo.hash = ComputeHash(resourceInfo.pXboxData, resourceInfo.szXboxDataSize);
|
||||
resourceInfo.hashLifeTime = 1ms;
|
||||
resourceInfo.lastUpdate = std::chrono::high_resolution_clock::now();
|
||||
resourceInfo.nextHashTime = resourceInfo.lastUpdate + resourceInfo.hashLifeTime;
|
||||
|
@ -1640,6 +1642,10 @@ static LRESULT WINAPI EmuMsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lPar
|
|||
ToggleFauxFullscreen(hWnd);
|
||||
}
|
||||
}
|
||||
else if (wParam == VK_F1)
|
||||
{
|
||||
VertexBufferConverter.PrintStats();
|
||||
}
|
||||
else if (wParam == VK_F6)
|
||||
{
|
||||
// For some unknown reason, F6 isn't handled in WndMain::WndProc
|
||||
|
@ -2332,7 +2338,7 @@ static void EmuVerifyResourceIsRegistered(XTL::X_D3DResource *pResource, DWORD D
|
|||
}
|
||||
|
||||
typedef struct {
|
||||
DWORD Hash = 0;
|
||||
uint64_t Hash = 0;
|
||||
DWORD IndexCount = 0;
|
||||
XTL::IDirect3DIndexBuffer* pHostIndexBuffer = nullptr;
|
||||
} ConvertedIndexBuffer;
|
||||
|
@ -2388,7 +2394,7 @@ void CxbxUpdateActiveIndexBuffer
|
|||
}
|
||||
|
||||
// If the data needs updating, do so
|
||||
uint32_t uiHash = XXHash32::hash(pIndexData, IndexCount * 2, 0);
|
||||
uint64_t uiHash = ComputeHash(pIndexData, IndexCount * 2);
|
||||
if (uiHash != indexBuffer.Hash) {
|
||||
// Update the Index Count and the hash
|
||||
indexBuffer.IndexCount = IndexCount;
|
||||
|
@ -7053,13 +7059,11 @@ void XTL::CxbxDrawIndexed(CxbxDrawContext &DrawContext)
|
|||
|
||||
CxbxUpdateActiveIndexBuffer(DrawContext.pIndexData, DrawContext.dwVertexCount);
|
||||
|
||||
CxbxVertexBufferConverter VertexBufferConverter = {};
|
||||
|
||||
//Walk through index buffer
|
||||
// Determine highest and lowest index in use :
|
||||
INDEX16 LowIndex, HighIndex;
|
||||
WalkIndexBuffer(LowIndex, HighIndex, &(DrawContext.pIndexData[DrawContext.dwStartVertex]), DrawContext.dwVertexCount);
|
||||
VertexBufferConverter.Apply(&DrawContext, LowIndex);
|
||||
VertexBufferConverter.Apply(&DrawContext);
|
||||
|
||||
if (DrawContext.XboxPrimitiveType == X_D3DPT_QUADLIST) {
|
||||
UINT uiStartIndex = 0;
|
||||
|
@ -7135,7 +7139,6 @@ void XTL::CxbxDrawPrimitiveUP(CxbxDrawContext &DrawContext)
|
|||
assert(DrawContext.uiXboxVertexStreamZeroStride > 0);
|
||||
assert(DrawContext.dwIndexBase == 0); // No IndexBase under Draw*UP
|
||||
|
||||
CxbxVertexBufferConverter VertexBufferConverter = {};
|
||||
VertexBufferConverter.Apply(&DrawContext);
|
||||
if (DrawContext.XboxPrimitiveType == X_D3DPT_QUADLIST) {
|
||||
// LOG_TEST_CASE("X_D3DPT_QUADLIST"); // X-Marbles and XDK Sample PlayField hits this case
|
||||
|
@ -7329,7 +7332,7 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_DrawVertices)
|
|||
DrawContext.dwVertexCount = VertexCount;
|
||||
DrawContext.dwStartVertex = StartVertex;
|
||||
DrawContext.hVertexShader = g_CurrentXboxVertexShaderHandle;
|
||||
CxbxVertexBufferConverter VertexBufferConverter = {};
|
||||
|
||||
VertexBufferConverter.Apply(&DrawContext);
|
||||
if (DrawContext.XboxPrimitiveType == X_D3DPT_QUADLIST) {
|
||||
// LOG_TEST_CASE("X_D3DPT_QUADLIST"); // ?X-Marbles and XDK Sample (Cartoon, ?maybe PlayField?) hits this case
|
||||
|
@ -7538,7 +7541,6 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_DrawIndexedVerticesUP)
|
|||
DrawContext.hVertexShader = g_CurrentXboxVertexShaderHandle;
|
||||
// Don't set DrawContext.pIndexData = (INDEX16*)pIndexData; // Used by GetVerticesInBuffer
|
||||
|
||||
CxbxVertexBufferConverter VertexBufferConverter = {};
|
||||
VertexBufferConverter.Apply(&DrawContext);
|
||||
if (DrawContext.XboxPrimitiveType == X_D3DPT_QUADLIST) {
|
||||
// Indexed quadlist can be drawn using unpatched indexes via multiple draws of 2 'strip' triangles :
|
||||
|
|
|
@ -28,19 +28,17 @@
|
|||
#define _XBOXKRNL_DEFEXTRN_
|
||||
#define LOG_PREFIX CXBXR_MODULE::VTXB
|
||||
|
||||
#include <unordered_map>
|
||||
#include "core\kernel\memory-manager\VMManager.h"
|
||||
#include "common\util\xxhash32.h" // For XXHash32::hash()
|
||||
#include "common\util\hasher.h"
|
||||
#include "core\kernel\support\Emu.h"
|
||||
#include "core\kernel\support\EmuXTL.h"
|
||||
#include "core\hle\D3D8\ResourceTracker.h"
|
||||
|
||||
#include <ctime>
|
||||
#include <unordered_map>
|
||||
#include <chrono>
|
||||
#include <algorithm>
|
||||
|
||||
#define HASH_SEED 0
|
||||
|
||||
#define MAX_STREAM_NOT_USED_TIME (2 * CLOCKS_PER_SEC) // TODO: Trim the not used time
|
||||
|
||||
// Inline vertex buffer emulation
|
||||
|
@ -60,137 +58,54 @@ extern XTL::X_D3DVertexBuffer*g_D3DStreams[16];
|
|||
extern UINT g_D3DStreamStrides[16];
|
||||
void *GetDataFromXboxResource(XTL::X_D3DResource *pXboxResource);
|
||||
|
||||
typedef struct {
|
||||
XTL::IDirect3DVertexBuffer* pHostVertexBuffer;
|
||||
size_t uiSize;
|
||||
std::chrono::time_point<std::chrono::high_resolution_clock> lastUsed;
|
||||
} cached_vertex_buffer_object;
|
||||
|
||||
std::unordered_map<DWORD, cached_vertex_buffer_object> g_HostVertexBuffers;
|
||||
|
||||
// This caches Vertex Buffer Objects, but not the containing data
|
||||
// This prevents unnecessary allocation and releasing of Vertex Buffers when
|
||||
// we can use an existing just fine. This gives a (slight) performance boost
|
||||
// Returns true if the existing vertex buffer was trashed/made invalid
|
||||
bool GetCachedVertexBufferObject(DWORD pXboxDataPtr, DWORD size, XTL::IDirect3DVertexBuffer** pVertexBuffer)
|
||||
{
|
||||
// TODO: If the vertex buffer object cache becomes too large,
|
||||
// free the least recently used vertex buffers
|
||||
|
||||
auto it = g_HostVertexBuffers.find(pXboxDataPtr);
|
||||
if (it == g_HostVertexBuffers.end()) {
|
||||
// Create new vertex buffer and return
|
||||
cached_vertex_buffer_object newBuffer;
|
||||
newBuffer.uiSize = size;
|
||||
newBuffer.lastUsed = std::chrono::high_resolution_clock::now();
|
||||
|
||||
HRESULT hRet = g_pD3DDevice->CreateVertexBuffer(
|
||||
size,
|
||||
D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC,
|
||||
0,
|
||||
XTL::D3DPOOL_DEFAULT,
|
||||
&newBuffer.pHostVertexBuffer,
|
||||
nullptr
|
||||
);
|
||||
if (FAILED(hRet)) {
|
||||
CxbxKrnlCleanup("Failed to create vertex buffer");
|
||||
}
|
||||
|
||||
g_HostVertexBuffers[pXboxDataPtr] = newBuffer;
|
||||
|
||||
*pVertexBuffer = newBuffer.pHostVertexBuffer;
|
||||
return false;
|
||||
}
|
||||
|
||||
auto buffer = &it->second;
|
||||
buffer->lastUsed = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// Return the existing vertex buffer, if possible
|
||||
if (size <= buffer->uiSize) {
|
||||
*pVertexBuffer = buffer->pHostVertexBuffer;
|
||||
return false;
|
||||
}
|
||||
|
||||
// If execution reached here, we need to release and re-create the vertex buffer..
|
||||
buffer->pHostVertexBuffer->Release();
|
||||
buffer->uiSize = size;
|
||||
HRESULT hRet = g_pD3DDevice->CreateVertexBuffer(
|
||||
size,
|
||||
D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC,
|
||||
0,
|
||||
XTL::D3DPOOL_DEFAULT,
|
||||
&buffer->pHostVertexBuffer,
|
||||
nullptr
|
||||
);
|
||||
if (FAILED(hRet)) {
|
||||
CxbxKrnlCleanup("Failed to create vertex buffer");
|
||||
}
|
||||
|
||||
*pVertexBuffer = buffer->pHostVertexBuffer;
|
||||
return true;
|
||||
}
|
||||
|
||||
void ActivatePatchedStream
|
||||
(
|
||||
XTL::CxbxDrawContext *pDrawContext,
|
||||
UINT uiStream,
|
||||
XTL::CxbxPatchedStream *pPatchedStream,
|
||||
bool bRelease
|
||||
)
|
||||
void XTL::CxbxPatchedStream::Activate(XTL::CxbxDrawContext *pDrawContext, UINT uiStream) const
|
||||
{
|
||||
//LOG_INIT // Allows use of DEBUG_D3DRESULT
|
||||
|
||||
// Use the cached stream values on the host
|
||||
if (pPatchedStream->bCacheIsStreamZeroDrawUP) {
|
||||
if (bCacheIsStreamZeroDrawUP) {
|
||||
// Set the UserPointer variables in the drawing context
|
||||
pDrawContext->pHostVertexStreamZeroData = pPatchedStream->pCachedHostVertexStreamZeroData;
|
||||
pDrawContext->uiHostVertexStreamZeroStride = pPatchedStream->uiCachedHostVertexStride;
|
||||
pDrawContext->pHostVertexStreamZeroData = pCachedHostVertexStreamZeroData;
|
||||
pDrawContext->uiHostVertexStreamZeroStride = uiCachedHostVertexStride;
|
||||
}
|
||||
else {
|
||||
HRESULT hRet = g_pD3DDevice->SetStreamSource(
|
||||
uiStream,
|
||||
pPatchedStream->pCachedHostVertexBuffer,
|
||||
pCachedHostVertexBuffer,
|
||||
0, // OffsetInBytes
|
||||
pPatchedStream->uiCachedHostVertexStride);
|
||||
uiCachedHostVertexStride);
|
||||
//DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetStreamSource");
|
||||
if (FAILED(hRet)) {
|
||||
CxbxKrnlCleanup("Failed to set the type patched buffer as the new stream source!\n");
|
||||
// TODO : Cartoon hits the above case when the vertex cache size is 0.
|
||||
}
|
||||
|
||||
// TODO : The following doesn't fix that - find our why and fix it for real
|
||||
if (bRelease) {
|
||||
// Always release to prevent leaks when it wasn't read from cache:
|
||||
pPatchedStream->pCachedHostVertexBuffer->Release();
|
||||
// NOTE : Even this doesn't prevent Cartoon breaking : g_pD3DDevice->ResourceManagerDiscardBytes(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
XTL::CxbxPatchedStream::CxbxPatchedStream()
|
||||
{
|
||||
isValid = false;
|
||||
}
|
||||
|
||||
void ReleasePatchedStream(XTL::CxbxPatchedStream *pPatchedStream)
|
||||
XTL::CxbxPatchedStream::~CxbxPatchedStream()
|
||||
{
|
||||
if (pPatchedStream->bCachedHostVertexStreamZeroDataIsAllocated) {
|
||||
free(pPatchedStream->pCachedHostVertexStreamZeroData);
|
||||
pPatchedStream->bCachedHostVertexStreamZeroDataIsAllocated = false;
|
||||
}
|
||||
pPatchedStream->pCachedHostVertexStreamZeroData = nullptr;
|
||||
pPatchedStream->pCachedHostVertexBuffer = nullptr; // g_HostVertexBuffers owns these nowadays
|
||||
}
|
||||
if (bCachedHostVertexStreamZeroDataIsAllocated) {
|
||||
free(pCachedHostVertexStreamZeroData);
|
||||
bCachedHostVertexStreamZeroDataIsAllocated = false;
|
||||
}
|
||||
|
||||
pCachedHostVertexStreamZeroData = nullptr;
|
||||
|
||||
if (pCachedHostVertexBuffer != nullptr) {
|
||||
pCachedHostVertexBuffer->Release();
|
||||
pCachedHostVertexBuffer = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
XTL::CxbxVertexBufferConverter::CxbxVertexBufferConverter()
|
||||
{
|
||||
this->m_uiNbrStreams = 0;
|
||||
ZeroMemory(this->m_PatchedStreams, sizeof(CxbxPatchedStream) * MAX_NBR_STREAMS);
|
||||
this->m_bAllocatedStreamZeroData = false;
|
||||
this->m_pNewVertexStreamZeroData = NULL;
|
||||
this->m_pVertexShaderInfo = NULL;
|
||||
}
|
||||
|
||||
XTL::CxbxVertexBufferConverter::~CxbxVertexBufferConverter()
|
||||
{
|
||||
for (int i = 0; i < MAX_NBR_STREAMS; i++) {
|
||||
ReleasePatchedStream(&m_PatchedStreams[i]);
|
||||
}
|
||||
m_uiNbrStreams = 0;
|
||||
m_pVertexShaderInfo = nullptr;
|
||||
}
|
||||
|
||||
size_t GetVerticesInBuffer(DWORD dwOffset, DWORD dwVertexCount, PWORD pIndexData, DWORD dwIndexBase)
|
||||
|
@ -198,7 +113,7 @@ size_t GetVerticesInBuffer(DWORD dwOffset, DWORD dwVertexCount, PWORD pIndexData
|
|||
// If we are drawing from an offset, we know that the vertex count must have offset vertices
|
||||
// before the first drawn vertices
|
||||
dwVertexCount += dwOffset;
|
||||
if (pIndexData == nullptr) {
|
||||
if (pIndexData == xbnullptr) {
|
||||
return dwVertexCount;
|
||||
}
|
||||
|
||||
|
@ -218,7 +133,7 @@ int CountActiveD3DStreams()
|
|||
{
|
||||
int lastStreamIndex = 0;
|
||||
for (int i = 0; i < 16; i++) {
|
||||
if (g_D3DStreams[i] != nullptr) {
|
||||
if (g_D3DStreams[i] != xbnullptr) {
|
||||
lastStreamIndex = i + 1;
|
||||
}
|
||||
}
|
||||
|
@ -231,7 +146,7 @@ XTL::CxbxVertexShaderInfo *GetCxbxVertexShaderInfo(DWORD Handle); // forward
|
|||
UINT XTL::CxbxVertexBufferConverter::GetNbrStreams(CxbxDrawContext *pDrawContext)
|
||||
{
|
||||
// Draw..Up always have one stream
|
||||
if (pDrawContext->pXboxVertexStreamZeroData != nullptr) {
|
||||
if (pDrawContext->pXboxVertexStreamZeroData != xbnullptr) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -274,13 +189,47 @@ inline FLOAT NormShortToFloat(const SHORT value)
|
|||
inline FLOAT ByteToFloat(const BYTE value)
|
||||
{
|
||||
return ((FLOAT)value) / 255.0f;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
XTL::CxbxPatchedStream& XTL::CxbxVertexBufferConverter::GetPatchedStream(uint64_t key)
|
||||
{
|
||||
// First, attempt to fetch an existing patched stream
|
||||
auto it = m_PatchedStreams.find(key);
|
||||
if (it != m_PatchedStreams.end()) {
|
||||
m_PatchedStreamUsageList.splice(m_PatchedStreamUsageList.begin(), m_PatchedStreamUsageList, it->second);
|
||||
return *it->second;
|
||||
}
|
||||
|
||||
// We didn't find an existing patched stream, so we must insert one and get a reference to it
|
||||
m_PatchedStreamUsageList.push_front({});
|
||||
CxbxPatchedStream& stream = m_PatchedStreamUsageList.front();
|
||||
|
||||
// Insert a reference iterator into the fast lookup map
|
||||
m_PatchedStreams[key] = m_PatchedStreamUsageList.begin();
|
||||
|
||||
// If the cache has exceeded it's upper bound, discard the oldest entries in the cache
|
||||
if (m_PatchedStreams.size() > (m_MaxCacheSize + m_CacheElasticity)) {
|
||||
while (m_PatchedStreams.size() > m_MaxCacheSize) {
|
||||
m_PatchedStreams.erase(m_PatchedStreamUsageList.back().uiVertexDataHash);
|
||||
m_PatchedStreamUsageList.pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
return stream;
|
||||
}
|
||||
|
||||
void XTL::CxbxVertexBufferConverter::PrintStats()
|
||||
{
|
||||
printf("Vertex Buffer Cache Status: \n");
|
||||
printf("- Cache Size: %d\n", m_PatchedStreams.size());
|
||||
printf("- Hits: %d\n", m_TotalCacheHits);
|
||||
printf("- Misses: %d\n", m_TotalCacheMisses);
|
||||
}
|
||||
|
||||
void XTL::CxbxVertexBufferConverter::ConvertStream
|
||||
(
|
||||
CxbxDrawContext *pDrawContext,
|
||||
UINT uiStream,
|
||||
DWORD StartIndex
|
||||
UINT uiStream
|
||||
)
|
||||
{
|
||||
extern XTL::D3DCAPS g_D3DCaps;
|
||||
|
@ -337,12 +286,12 @@ void XTL::CxbxVertexBufferConverter::ConvertStream
|
|||
bool bNeedRHWReset = bVshHandleIsFVF && ((XboxFVF & D3DFVF_POSITION_MASK) == D3DFVF_XYZRHW);
|
||||
bool bNeedStreamCopy = bNeedTextureNormalization || bNeedVertexPatching || bNeedRHWReset;
|
||||
|
||||
uint8_t *pXboxVertexData;
|
||||
UINT uiXboxVertexStride;
|
||||
UINT uiVertexCount;
|
||||
UINT uiHostVertexStride;
|
||||
DWORD dwHostVertexDataSize;
|
||||
uint8_t *pHostVertexData;
|
||||
uint8_t *pXboxVertexData = xbnullptr;
|
||||
UINT uiXboxVertexStride = 0;
|
||||
UINT uiVertexCount = 0;
|
||||
UINT uiHostVertexStride = 0;
|
||||
DWORD dwHostVertexDataSize = 0;
|
||||
uint8_t *pHostVertexData = nullptr;
|
||||
IDirect3DVertexBuffer *pNewHostVertexBuffer = nullptr;
|
||||
|
||||
if (pDrawContext->pXboxVertexStreamZeroData != xbnullptr) {
|
||||
|
@ -356,17 +305,7 @@ void XTL::CxbxVertexBufferConverter::ConvertStream
|
|||
uiVertexCount = pDrawContext->VerticesInBuffer;
|
||||
uiHostVertexStride = (bNeedVertexPatching) ? pVertexShaderStreamInfo->HostVertexStride : uiXboxVertexStride;
|
||||
dwHostVertexDataSize = uiVertexCount * uiHostVertexStride;
|
||||
if (bNeedStreamCopy) {
|
||||
pHostVertexData = (uint8_t*)malloc(dwHostVertexDataSize);
|
||||
if (pHostVertexData == nullptr) {
|
||||
CxbxKrnlCleanup("Couldn't allocate the new stream zero buffer");
|
||||
}
|
||||
}
|
||||
else {
|
||||
pHostVertexData = pXboxVertexData;
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
XTL::X_D3DVertexBuffer *pXboxVertexBuffer = g_D3DStreams[uiStream];
|
||||
pXboxVertexData = (uint8_t*)GetDataFromXboxResource(pXboxVertexBuffer);
|
||||
if (pXboxVertexData == NULL) {
|
||||
|
@ -392,19 +331,99 @@ void XTL::CxbxVertexBufferConverter::ConvertStream
|
|||
|
||||
uiHostVertexStride = (bNeedVertexPatching) ? pVertexShaderStreamInfo->HostVertexStride : uiXboxVertexStride;
|
||||
dwHostVertexDataSize = uiVertexCount * uiHostVertexStride;
|
||||
GetCachedVertexBufferObject(pXboxVertexBuffer->Data, dwHostVertexDataSize, &pNewHostVertexBuffer);
|
||||
|
||||
if (FAILED(pNewHostVertexBuffer->Lock(0, 0, (D3DLockData **)&pHostVertexData, D3DLOCK_DISCARD))) {
|
||||
CxbxKrnlCleanup("Couldn't lock the new buffer");
|
||||
}
|
||||
|
||||
// Copy stream for patching and caching.
|
||||
bNeedStreamCopy = true;
|
||||
}
|
||||
|
||||
// FAST PATH: If this draw is a zerostream based draw, and does not require patching, we can use it directly
|
||||
// No need to hash or patch at all in this case!
|
||||
if (pDrawContext->pXboxVertexStreamZeroData != xbnullptr && !bNeedStreamCopy) {
|
||||
pHostVertexData = pXboxVertexData;
|
||||
|
||||
CxbxPatchedStream stream;
|
||||
stream.isValid = true;
|
||||
stream.XboxPrimitiveType = pDrawContext->XboxPrimitiveType;
|
||||
stream.uiCachedHostVertexStride = uiHostVertexStride;
|
||||
stream.bCacheIsStreamZeroDrawUP = true;
|
||||
stream.pCachedHostVertexStreamZeroData = pHostVertexData;
|
||||
stream.Activate(pDrawContext, uiStream);
|
||||
return;
|
||||
}
|
||||
|
||||
// Now we have enough information to hash the existing resource and find it in our cache!
|
||||
DWORD xboxVertexDataSize = uiVertexCount * uiXboxVertexStride;
|
||||
uint64_t vertexDataHash = ComputeHash(pXboxVertexData, xboxVertexDataSize);
|
||||
uint64_t pVertexShaderSteamInfoHash = 0;
|
||||
|
||||
if (pVertexShaderStreamInfo != nullptr) {
|
||||
pVertexShaderSteamInfoHash = ComputeHash(pVertexShaderStreamInfo, sizeof(CxbxVertexShaderStreamInfo));
|
||||
}
|
||||
|
||||
// Lookup implicity inserts a new entry if not exists, so this always works
|
||||
CxbxPatchedStream& patchedStream = GetPatchedStream(vertexDataHash);
|
||||
|
||||
// We check a few fields of the patched stream to protect against hash collisions (rare)
|
||||
// but also to protect against games using the exact same vertex data for different vertex formats (Test Case: Burnout)
|
||||
if (patchedStream.isValid && // Check that we found a cached stream
|
||||
patchedStream.uiVertexStreamInformationHash == pVertexShaderSteamInfoHash && // Check that the vertex conversion is valid
|
||||
patchedStream.uiCachedHostVertexStride == patchedStream.uiCachedHostVertexStride && // Make sure the host stride didn't change
|
||||
patchedStream.uiCachedXboxVertexStride == uiXboxVertexStride && // Make sure the Xbox Stride didn't change
|
||||
patchedStream.uiCachedXboxVertexDataSize == xboxVertexDataSize ) { // Make sure the Xbox Data Size also didn't change
|
||||
m_TotalCacheHits++;
|
||||
patchedStream.Activate(pDrawContext, uiStream);
|
||||
return;
|
||||
}
|
||||
|
||||
m_TotalCacheMisses++;
|
||||
|
||||
// If execution reaches here, the cached vertex buffer was not valid and we must reconvert the data
|
||||
if (patchedStream.isValid) {
|
||||
pHostVertexData = (uint8_t*)patchedStream.pCachedHostVertexStreamZeroData;
|
||||
pNewHostVertexBuffer = patchedStream.pCachedHostVertexBuffer;
|
||||
|
||||
// Free the existing buffers
|
||||
if (pHostVertexData != nullptr) {
|
||||
free(pHostVertexData);
|
||||
pHostVertexData = nullptr;
|
||||
} else if (pNewHostVertexBuffer != nullptr) {
|
||||
pNewHostVertexBuffer->Release();
|
||||
pNewHostVertexBuffer = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate new buffers
|
||||
if (pDrawContext->pXboxVertexStreamZeroData != xbnullptr) {
|
||||
pHostVertexData = (uint8_t*)malloc(dwHostVertexDataSize);
|
||||
|
||||
if (pHostVertexData == nullptr) {
|
||||
CxbxKrnlCleanup("Couldn't allocate the new stream zero buffer");
|
||||
}
|
||||
} else {
|
||||
HRESULT hRet = g_pD3DDevice->CreateVertexBuffer(
|
||||
dwHostVertexDataSize,
|
||||
D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC,
|
||||
0,
|
||||
XTL::D3DPOOL_DEFAULT,
|
||||
&pNewHostVertexBuffer,
|
||||
nullptr
|
||||
);
|
||||
|
||||
if (FAILED(hRet)) {
|
||||
CxbxKrnlCleanup("Failed to create vertex buffer");
|
||||
}
|
||||
}
|
||||
|
||||
// If we need to lock a host vertex buffer, do so now
|
||||
if (pHostVertexData == nullptr && pNewHostVertexBuffer != nullptr) {
|
||||
if (FAILED(pNewHostVertexBuffer->Lock(0, 0, (D3DLockData **)&pHostVertexData, D3DLOCK_DISCARD))) {
|
||||
CxbxKrnlCleanup("Couldn't lock vertex buffer");
|
||||
}
|
||||
}
|
||||
|
||||
if (bNeedVertexPatching) {
|
||||
// assert(bNeedStreamCopy || "bNeedVertexPatching implies bNeedStreamCopy (but copies via conversions");
|
||||
for (uint32_t uiVertex = StartIndex; uiVertex < uiVertexCount; uiVertex++) {
|
||||
for (uint32_t uiVertex = 0; uiVertex < uiVertexCount; uiVertex++) {
|
||||
uint8_t *pXboxVertexAsByte = &pXboxVertexData[uiVertex * uiXboxVertexStride];
|
||||
uint8_t *pHostVertexAsByte = &pHostVertexData[uiVertex * uiHostVertexStride];
|
||||
for (UINT uiElement = 0; uiElement < pVertexShaderStreamInfo->NumberOfVertexElements; uiElement++) {
|
||||
|
@ -659,7 +678,7 @@ void XTL::CxbxVertexBufferConverter::ConvertStream
|
|||
// the uiTextureCoordinatesByteOffsetInVertex on host will match Xbox
|
||||
}
|
||||
|
||||
for (uint32_t uiVertex = StartIndex; uiVertex < uiVertexCount; uiVertex++) {
|
||||
for (uint32_t uiVertex = 0; uiVertex < uiVertexCount; uiVertex++) {
|
||||
FLOAT *pVertexDataAsFloat = (FLOAT*)(&pHostVertexData[uiVertex * uiHostVertexStride]);
|
||||
|
||||
// Handle pre-transformed vertices (which bypass the vertex shader pipeline)
|
||||
|
@ -715,38 +734,34 @@ void XTL::CxbxVertexBufferConverter::ConvertStream
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
patchedStream.isValid = true;
|
||||
patchedStream.XboxPrimitiveType = pDrawContext->XboxPrimitiveType;
|
||||
patchedStream.pCachedXboxVertexData = pXboxVertexData;
|
||||
patchedStream.uiCachedXboxVertexDataSize = xboxVertexDataSize;
|
||||
patchedStream.uiVertexDataHash = vertexDataHash;
|
||||
patchedStream.uiVertexStreamInformationHash = pVertexShaderSteamInfoHash;
|
||||
patchedStream.uiCachedXboxVertexStride = uiXboxVertexStride;
|
||||
patchedStream.uiCachedHostVertexStride = uiHostVertexStride;
|
||||
patchedStream.bCacheIsStreamZeroDrawUP = (pDrawContext->pXboxVertexStreamZeroData != NULL);
|
||||
if (patchedStream.bCacheIsStreamZeroDrawUP) {
|
||||
patchedStream.pCachedHostVertexStreamZeroData = pHostVertexData;
|
||||
patchedStream.bCachedHostVertexStreamZeroDataIsAllocated = bNeedStreamCopy;
|
||||
} else {
|
||||
// assert(pNewHostVertexBuffer != nullptr);
|
||||
pNewHostVertexBuffer->Unlock();
|
||||
patchedStream.pCachedHostVertexBuffer = pNewHostVertexBuffer;
|
||||
}
|
||||
|
||||
CxbxPatchedStream *pPatchedStream = &m_PatchedStreams[uiStream];
|
||||
|
||||
#if 0 // new
|
||||
pPatchedStream->pCachedXboxVertexData = pXboxVertexData; // TODO : For hashing & caching purposes
|
||||
#endif
|
||||
pPatchedStream->uiCachedXboxVertexStride = uiXboxVertexStride;
|
||||
#if 0 // new
|
||||
pPatchedStream->uiCachedXboxVertexDataSize = uiVertexCount * uiXboxVertexStride; // TODO : For hashing & caching purposes
|
||||
#endif
|
||||
pPatchedStream->uiCachedHostVertexStride = uiHostVertexStride;
|
||||
pPatchedStream->bCacheIsStreamZeroDrawUP = (pDrawContext->pXboxVertexStreamZeroData != NULL);
|
||||
if (pPatchedStream->bCacheIsStreamZeroDrawUP) {
|
||||
pPatchedStream->pCachedHostVertexStreamZeroData = pHostVertexData;
|
||||
pPatchedStream->bCachedHostVertexStreamZeroDataIsAllocated = bNeedStreamCopy;
|
||||
}
|
||||
else {
|
||||
// assert(pNewHostVertexBuffer != nullptr);
|
||||
|
||||
pNewHostVertexBuffer->Unlock();
|
||||
pPatchedStream->pCachedHostVertexBuffer = pNewHostVertexBuffer;
|
||||
}
|
||||
|
||||
ActivatePatchedStream(pDrawContext, uiStream, pPatchedStream,
|
||||
/*Release=*/!bNeedStreamCopy); // Release when it won't get cached
|
||||
patchedStream.Activate(pDrawContext, uiStream);
|
||||
}
|
||||
|
||||
void XTL::CxbxVertexBufferConverter::Apply(CxbxDrawContext *pDrawContext, DWORD StartIndex)
|
||||
void XTL::CxbxVertexBufferConverter::Apply(CxbxDrawContext *pDrawContext)
|
||||
{
|
||||
if ((pDrawContext->XboxPrimitiveType < X_D3DPT_POINTLIST) || (pDrawContext->XboxPrimitiveType > X_D3DPT_POLYGON))
|
||||
CxbxKrnlCleanup("Unknown primitive type: 0x%.02X\n", pDrawContext->XboxPrimitiveType);
|
||||
|
||||
m_pVertexShaderInfo = nullptr;
|
||||
if (VshHandleIsVertexShader(pDrawContext->hVertexShader)) {
|
||||
m_pVertexShaderInfo = &(GetCxbxVertexShader(pDrawContext->hVertexShader)->VertexShaderInfo);
|
||||
}
|
||||
|
@ -766,11 +781,7 @@ void XTL::CxbxVertexBufferConverter::Apply(CxbxDrawContext *pDrawContext, DWORD
|
|||
}
|
||||
|
||||
for(UINT uiStream = 0; uiStream < m_uiNbrStreams; uiStream++) {
|
||||
// TODO: Check for cached vertex buffer, and use it if possible
|
||||
|
||||
ConvertStream(pDrawContext, uiStream, StartIndex);
|
||||
|
||||
// TODO: Cache Vertex Buffer Data
|
||||
ConvertStream(pDrawContext, uiStream);
|
||||
}
|
||||
|
||||
if (pDrawContext->XboxPrimitiveType == X_D3DPT_QUADSTRIP) {
|
||||
|
|
|
@ -24,8 +24,9 @@
|
|||
// ******************************************************************
|
||||
#ifndef XBVERTEXBUFFER_H
|
||||
#define XBVERTEXBUFFER_H
|
||||
|
||||
#include "Cxbx.h"
|
||||
|
||||
#include "Cxbx.h"
|
||||
//#include <ctime> // Conflict with io.h
|
||||
|
||||
#define MAX_NBR_STREAMS 16
|
||||
|
@ -50,31 +51,44 @@ typedef struct _CxbxDrawContext
|
|||
}
|
||||
CxbxDrawContext;
|
||||
|
||||
typedef struct _CxbxPatchedStream
|
||||
{
|
||||
UINT uiCachedXboxVertexStride;
|
||||
UINT uiCachedHostVertexStride;
|
||||
bool bCacheIsStreamZeroDrawUP;
|
||||
void *pCachedHostVertexStreamZeroData;
|
||||
bool bCachedHostVertexStreamZeroDataIsAllocated;
|
||||
XTL::IDirect3DVertexBuffer *pCachedHostVertexBuffer;
|
||||
} CxbxPatchedStream;
|
||||
|
||||
class CxbxPatchedStream
|
||||
{
|
||||
public:
|
||||
CxbxPatchedStream();
|
||||
~CxbxPatchedStream();
|
||||
void Activate(XTL::CxbxDrawContext *pDrawContext, UINT uiStream) const;
|
||||
bool isValid = false;
|
||||
XTL::X_D3DPRIMITIVETYPE XboxPrimitiveType = XTL::X_D3DPT_NONE;
|
||||
PVOID pCachedXboxVertexData = xbnullptr;
|
||||
UINT uiCachedXboxVertexDataSize = 0;
|
||||
uint64_t uiVertexDataHash = 0;
|
||||
uint64_t uiVertexStreamInformationHash = 0;
|
||||
UINT uiCachedXboxVertexStride = 0;
|
||||
UINT uiCachedHostVertexStride = 0;
|
||||
bool bCacheIsStreamZeroDrawUP = false;
|
||||
void *pCachedHostVertexStreamZeroData = nullptr;
|
||||
bool bCachedHostVertexStreamZeroDataIsAllocated = false;
|
||||
XTL::IDirect3DVertexBuffer *pCachedHostVertexBuffer = nullptr;
|
||||
};
|
||||
|
||||
class CxbxVertexBufferConverter
|
||||
{
|
||||
public:
|
||||
CxbxVertexBufferConverter();
|
||||
~CxbxVertexBufferConverter();
|
||||
|
||||
void Apply(CxbxDrawContext *pPatchDesc, DWORD StartIndex = 0);
|
||||
void Apply(CxbxDrawContext *pPatchDesc);
|
||||
void PrintStats();
|
||||
private:
|
||||
UINT m_uiNbrStreams;
|
||||
|
||||
// Stack tracking
|
||||
ULONG m_TotalCacheHits = 0;
|
||||
ULONG m_TotalCacheMisses = 0;
|
||||
|
||||
UINT m_uiNbrStreams;
|
||||
CxbxPatchedStream m_PatchedStreams[MAX_NBR_STREAMS];
|
||||
|
||||
PVOID m_pNewVertexStreamZeroData;
|
||||
|
||||
bool m_bAllocatedStreamZeroData;
|
||||
UINT m_MaxCacheSize = 2000; // Maximum number of entries in the cache
|
||||
UINT m_CacheElasticity = 200; // Cache is allowed to grow this much more than maximum before being purged to maximum
|
||||
std::unordered_map<uint64_t, std::list<CxbxPatchedStream>::iterator> m_PatchedStreams; // Stores references to patched streams for fast lookup
|
||||
std::list<CxbxPatchedStream> m_PatchedStreamUsageList; // Linked list of vertex streams, least recently used is last in the list
|
||||
CxbxPatchedStream& GetPatchedStream(uint64_t); // Fetches (or inserts) a patched stream associated with the given key
|
||||
|
||||
XTL::CxbxVertexShaderInfo *m_pVertexShaderInfo;
|
||||
|
||||
|
@ -82,7 +96,7 @@ class CxbxVertexBufferConverter
|
|||
UINT GetNbrStreams(CxbxDrawContext *pPatchDesc);
|
||||
|
||||
// Patches the types of the stream
|
||||
void ConvertStream(CxbxDrawContext *pPatchDesc, UINT uiStream, DWORD StartIndex);
|
||||
void ConvertStream(CxbxDrawContext *pPatchDesc, UINT uiStream);
|
||||
};
|
||||
|
||||
// inline vertex buffer emulation
|
||||
|
|
|
@ -45,7 +45,7 @@
|
|||
#include "..\..\import\XbSymbolDatabase\XbSymbolDatabase.h"
|
||||
#include "Intercept.hpp"
|
||||
#include "Patches.hpp"
|
||||
#include "common\util\xxhash32.h"
|
||||
#include "common\util\hasher.h"
|
||||
#include <Shlwapi.h>
|
||||
#include <shlobj.h>
|
||||
#include <unordered_map>
|
||||
|
@ -385,7 +385,7 @@ void EmuHLEIntercept(Xbe::Header *pXbeHeader)
|
|||
}
|
||||
|
||||
// Hash the loaded XBE's header, use it as a filename
|
||||
uint32_t uiHash = XXHash32::hash((void*)&CxbxKrnl_Xbe->m_Header, sizeof(Xbe::Header), 0);
|
||||
uint64_t uiHash = ComputeHash((void*)&CxbxKrnl_Xbe->m_Header, sizeof(Xbe::Header));
|
||||
std::stringstream sstream;
|
||||
char tAsciiTitle[40] = "Unknown";
|
||||
std::setlocale(LC_ALL, "English");
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#define EMUXTL_H
|
||||
|
||||
#include <vector> // Needed for EmuDSound.h file, must be outside of XTL namespace.
|
||||
#include <unordered_map>
|
||||
|
||||
namespace XTL
|
||||
{
|
||||
|
|
|
@ -59,7 +59,7 @@
|
|||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <fcntl.h> // for _O_TEXT
|
||||
#include "common\util\xxhash32.h" // for XXHash32::hash
|
||||
#include "common\util\hasher.h"
|
||||
|
||||
#define XBOX_LED_FLASH_PERIOD 176 // if you know a more accurate value, put it here
|
||||
|
||||
|
@ -1063,7 +1063,7 @@ LRESULT CALLBACK WndMain::WndProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lP
|
|||
std::string cacheDir = g_Settings->GetDataLocation() + "\\SymbolCache\\";
|
||||
|
||||
// Hash the loaded XBE's header, use it as a filename
|
||||
uint32_t uiHash = XXHash32::hash((void*)&m_Xbe->m_Header, sizeof(Xbe::Header), 0);
|
||||
uint64_t uiHash = ComputeHash((void*)&m_Xbe->m_Header, sizeof(Xbe::Header));
|
||||
std::stringstream sstream;
|
||||
std::string szTitleName(m_Xbe->m_szAsciiTitle);
|
||||
m_Xbe->PurgeBadChar(szTitleName);
|
||||
|
|
Loading…
Reference in New Issue