Implement Vertex Buffer Caching

This commit is contained in:
Luke Usher 2019-06-12 23:08:28 +01:00 committed by PatrickvL
parent a76bac4205
commit f36cd0540a
14 changed files with 3079 additions and 381 deletions

View File

@ -205,6 +205,7 @@ file (GLOB CXBXR_SOURCE_COMMON
"${CXBXR_ROOT_DIR}/src/common/Timer.cpp"
"${CXBXR_ROOT_DIR}/src/common/util/crc32c.cpp"
"${CXBXR_ROOT_DIR}/src/common/util/CxbxUtil.cpp"
"${CXBXR_ROOT_DIR}/src/common/util/hasher.cpp"
"${CXBXR_ROOT_DIR}/src/common/win32/DInputController.cpp"
"${CXBXR_ROOT_DIR}/src/common/win32/EmuShared.cpp"
"${CXBXR_ROOT_DIR}/src/common/win32/InlineFunc.cpp"

View File

@ -32,10 +32,9 @@ link_directories(
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
add_compile_definitions(
_CRT_SECURE_NO_WARNINGS
_CRT_SECURE_NO_WARNINGS
# Windows 7 minimum requirement
_WIN32_WINNT=0x0601
LTM_DESC
USE_LTM
LTC_NO_TEST
@ -45,6 +44,9 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC")
LTC_NO_PRNGS
LTC_NO_MISC
LTC_NO_PROTOTYPES
# Use inline XXHash version
XXH_INLINE_ALL
)
# Reference: https://docs.microsoft.com/en-us/cpp/build/reference/compiler-options-listed-alphabetically

View File

@ -0,0 +1,42 @@
#include "hasher.h"
#include "xxhash.h"
#include "crc32c.h"
#include <cstdio>
enum {
HASH_NONE = 0,
HASH_XXH3,
HASH_CRC32C
};
static int g_HashAlgorithm = HASH_NONE;
void InitHasher()
{
// Detect the best hashing algorithm to use for the host machine
// TODO/Future Improvement: This could be expanded to support even more hash algorithims
// And we could hash a random buffer to calculate the fastest hash to use on a given host
printf("Selecting hash algorithm: ");
if (crc32c_hw_available()) {
printf("CRC32C\n");
g_HashAlgorithm = HASH_CRC32C;
} else {
printf("XXH3\n");
g_HashAlgorithm = HASH_XXH3;
}
}
__forceinline uint64_t ComputeHash(void* data, size_t len)
{
if (g_HashAlgorithm == HASH_NONE) {
InitHasher();
}
switch (g_HashAlgorithm) {
case HASH_XXH3: return XXH3_64bits(data, len);
case HASH_CRC32C: return crc32c_append(0, (uint8_t*)data, len);
}
return 0;
}

34
src/common/util/hasher.h Normal file
View File

@ -0,0 +1,34 @@
// This is an open source non-commercial project. Dear PVS-Studio, please check it.
// PVS-Studio Static Code Analyzer for C, C++ and C#: http://www.viva64.com
// ******************************************************************
// *
// * This file is part of the Cxbx project.
// *
// * Cxbx and Cxbe are free software; you can redistribute them
// * and/or modify them under the terms of the GNU General Public
// * License as published by the Free Software Foundation; either
// * version 2 of the license, or (at your option) any later version.
// *
// * This program is distributed in the hope that it will be useful,
// * but WITHOUT ANY WARRANTY; without even the implied warranty of
// * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// * GNU General Public License for more details.
// *
// * You should have recieved a copy of the GNU General Public License
// * along with this program; see the file COPYING.
// * If not, write to the Free Software Foundation, Inc.,
// * 59 Temple Place - Suite 330, Bostom, MA 02111-1307, USA.
// *
// * (c) 2019 - Luke Usher
// *
// * All rights reserved
// *
// ******************************************************************
#ifndef _HASHER_H
#define _HASHER_H
#include <stdint.h>
extern __forceinline uint64_t ComputeHash(void* data, size_t len);
#endif

1222
src/common/util/xxh3.h Normal file

File diff suppressed because it is too large Load Diff

1024
src/common/util/xxhash.c Normal file

File diff suppressed because it is too large Load Diff

512
src/common/util/xxhash.h Normal file
View File

@ -0,0 +1,512 @@
/*
xxHash - Extremely Fast Hash algorithm
Header File
Copyright (C) 2012-2016, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- xxHash source repository : https://github.com/Cyan4973/xxHash
*/
/* Notice extracted from xxHash homepage :
xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
It also successfully passes all tests from the SMHasher suite.
Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
Name Speed Q.Score Author
xxHash 5.4 GB/s 10
CrapWow 3.2 GB/s 2 Andrew
MumurHash 3a 2.7 GB/s 10 Austin Appleby
SpookyHash 2.0 GB/s 10 Bob Jenkins
SBox 1.4 GB/s 9 Bret Mulvey
Lookup3 1.2 GB/s 9 Bob Jenkins
SuperFastHash 1.2 GB/s 1 Paul Hsieh
CityHash64 1.05 GB/s 10 Pike & Alakuijala
FNV 0.55 GB/s 5 Fowler, Noll, Vo
CRC32 0.43 GB/s 9
MD5-32 0.33 GB/s 10 Ronald L. Rivest
SHA1-32 0.28 GB/s 10
Q.Score is a measure of quality of the hash function.
It depends on successfully passing SMHasher test set.
10 is a perfect score.
A 64-bit version, named XXH64, is available since r35.
It offers much better speed, but for 64-bit applications only.
Name Speed on 64 bits Speed on 32 bits
XXH64 13.8 GB/s 1.9 GB/s
XXH32 6.8 GB/s 6.0 GB/s
*/
#ifndef XXHASH_H_5627135585666179
#define XXHASH_H_5627135585666179 1
#if defined (__cplusplus)
extern "C" {
#endif
/* ****************************
* Definitions
******************************/
#include <stddef.h> /* size_t */
typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
/* ****************************
* API modifier
******************************/
/** XXH_INLINE_ALL (and XXH_PRIVATE_API)
* This is useful to include xxhash functions in `static` mode
* in order to inline them, and remove their symbol from the public list.
* Inlining can offer dramatic performance improvement on small keys.
* Methodology :
* #define XXH_INLINE_ALL
* #include "xxhash.h"
* `xxhash.c` is automatically included.
* It's not useful to compile and link it as a separate module.
*/
#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
# ifndef XXH_STATIC_LINKING_ONLY
# define XXH_STATIC_LINKING_ONLY
# endif
# if defined(__GNUC__)
# define XXH_PUBLIC_API static __inline __attribute__((unused))
# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
# define XXH_PUBLIC_API static inline
# elif defined(_MSC_VER)
# define XXH_PUBLIC_API static __inline
# else
/* this version may generate warnings for unused static functions */
# define XXH_PUBLIC_API static
# endif
#else
# if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
# ifdef XXH_EXPORT
# define XXH_PUBLIC_API __declspec(dllexport)
# elif XXH_IMPORT
# define XXH_PUBLIC_API __declspec(dllimport)
# endif
# else
# define XXH_PUBLIC_API /* do nothing */
# endif
#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
/*! XXH_NAMESPACE, aka Namespace Emulation :
*
* If you want to include _and expose_ xxHash functions from within your own library,
* but also want to avoid symbol collisions with other libraries which may also include xxHash,
*
* you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
* with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
*
* Note that no change is required within the calling program as long as it includes `xxhash.h` :
* regular symbol name will be automatically translated by this header.
*/
#ifdef XXH_NAMESPACE
# define XXH_CAT(A,B) A##B
# define XXH_NAME2(A,B) XXH_CAT(A,B)
# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
#endif
/* *************************************
* Version
***************************************/
#define XXH_VERSION_MAJOR 0
#define XXH_VERSION_MINOR 7
#define XXH_VERSION_RELEASE 0
#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
XXH_PUBLIC_API unsigned XXH_versionNumber (void);
/*-**********************************************************************
* 32-bit hash
************************************************************************/
#if !defined (__VMS) \
&& (defined (__cplusplus) \
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
typedef uint32_t XXH32_hash_t;
#else
typedef unsigned int XXH32_hash_t;
#endif
/*! XXH32() :
Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
The memory between input & input+length must be valid (allocated and read-accessible).
"seed" can be used to alter the result predictably.
Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
/*====== Streaming ======*/
typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed);
XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
/*
* Streaming functions generate the xxHash of an input provided in multiple segments.
* Note that, for small input, they are slower than single-call functions, due to state management.
* For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
*
* XXH state must first be allocated, using XXH*_createState() .
*
* Start a new hash by initializing state with a seed, using XXH*_reset().
*
* Then, feed the hash state by calling XXH*_update() as many times as necessary.
* The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
*
* Finally, a hash value can be produced anytime, by using XXH*_digest().
* This function returns the nn-bits hash as an int or long long.
*
* It's still possible to continue inserting input into the hash state after a digest,
* and generate some new hashes later on, by calling again XXH*_digest().
*
* When done, free XXH state space if it was allocated dynamically.
*/
/*====== Canonical representation ======*/
typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
* The canonical representation uses human-readable write convention, aka big-endian (large digits first).
* These functions allow transformation of hash result into and from its canonical format.
* This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
*/
#ifndef XXH_NO_LONG_LONG
/*-**********************************************************************
* 64-bit hash
************************************************************************/
#if !defined (__VMS) \
&& (defined (__cplusplus) \
|| (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
# include <stdint.h>
typedef uint64_t XXH64_hash_t;
#else
typedef unsigned long long XXH64_hash_t;
#endif
/*! XXH64() :
Calculate the 64-bit hash of sequence of length "len" stored at memory address "input".
"seed" can be used to alter the result predictably.
This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
*/
XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
/*====== Streaming ======*/
typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
/*====== Canonical representation ======*/
typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
#endif /* XXH_NO_LONG_LONG */
#ifdef XXH_STATIC_LINKING_ONLY
/* ================================================================================================
This section contains declarations which are not guaranteed to remain stable.
They may change in future versions, becoming incompatible with a different version of the library.
These declarations should only be used with static linking.
Never use them in association with dynamic linking !
=================================================================================================== */
/* These definitions are only present to allow
* static allocation of XXH state, on stack or in a struct for example.
* Never **ever** use members directly. */
struct XXH32_state_s {
XXH32_hash_t total_len_32;
XXH32_hash_t large_len;
XXH32_hash_t v1;
XXH32_hash_t v2;
XXH32_hash_t v3;
XXH32_hash_t v4;
XXH32_hash_t mem32[4];
XXH32_hash_t memsize;
XXH32_hash_t reserved; /* never read nor write, might be removed in a future version */
}; /* typedef'd to XXH32_state_t */
#ifndef XXH_NO_LONG_LONG /* remove 64-bit support */
struct XXH64_state_s {
XXH64_hash_t total_len;
XXH64_hash_t v1;
XXH64_hash_t v2;
XXH64_hash_t v3;
XXH64_hash_t v4;
XXH64_hash_t mem64[4];
XXH32_hash_t memsize;
XXH32_hash_t reserved[2]; /* never read nor write, might be removed in a future version */
}; /* typedef'd to XXH64_state_t */
#endif /* XXH_NO_LONG_LONG */
/*-**********************************************************************
* XXH3
* New experimental hash
************************************************************************/
#ifndef XXH_NO_LONG_LONG
/* ============================================
* XXH3 is a new hash algorithm,
* featuring vastly improved speed performance
* for both small and large inputs.
* See full speed analysis at : http://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
* In general, expect XXH3 to run about ~2x faster on large inputs,
* and >3x faster on small ones, though exact difference depend on platform.
*
* The algorithm is portable, will generate the same hash on all platforms.
* It benefits greatly from vectorization units, but does not require it.
*
* XXH3 offers 2 variants, _64bits and _128bits.
* When only 64 bits are needed, prefer calling the _64bits variant :
* it reduces the amount of mixing, resulting in faster speed on small inputs.
* It's also generally simpler to manipulate a scalar return type than a struct.
*
* The XXH3 algorithm is still considered experimental.
* Produced results can still change between versions.
* It's possible to use it for ephemeral data, but avoid storing long-term values for later re-use.
*
* The API currently supports one-shot hashing only.
* The full version will include streaming capability, and canonical representation.
*
* There are still a number of opened questions that community can influence during the experimental period.
* I'm trying to list a few of them below, though don't consider this list as complete.
*
* - 128-bits output type : currently defined as a structure of 2 64-bits fields.
* That's because 128-bit values do not exist in C standard.
* Note that it means that, at byte level, result is not identical depending on endianess.
* However, at field level, they are identical on all platforms.
* The canonical representation will solve the issue of identical byte-level representation across platforms,
* which is necessary for serialization.
* Would there be a better representation for a 128-bit hash result ?
* Are the names of the inner 64-bit fields important ? Should they be changed ?
*
* - Canonical representation : for the 64-bit variant, canonical representation is the same as XXH64() (aka big-endian).
* What should it be for the 128-bit variant ?
* Since it's no longer a scalar value, big-endian representation is no longer an obvious choice.
* One possibility : represent it as the concatenation of two 64-bits canonical representation (aka 2x big-endian)
* Another one : represent it in the same order as natural order in the struct for little-endian platforms.
* Less consistent with existing convention for XXH32/XXH64, but may be more natural for little-endian platforms.
*
* - Associated functions for 128-bit hash : simple things, such as checking if 2 hashes are equal, become more difficult with struct.
* Granted, it's not terribly difficult to create a comparator, but it's still a workload.
* Would it be beneficial to declare and define a comparator function for XXH128_hash_t ?
* Are there other operations on XXH128_hash_t which would be desirable ?
*
* - Seed type for 128-bits variant : currently, it's a single 64-bit value, like the 64-bit variant.
* It could be argued that it's more logical to offer a 128-bit seed input parameter for a 128-bit hash.
* Although it's also more difficult to use, since it requires to declare and pass a structure instead of a value.
* It would either replace current choice, or add a new one.
* Farmhash, for example, offers both variants (the 128-bits seed variant is called `doubleSeed`).
* If both 64-bit and 128-bit seeds are possible, which variant should be called XXH128 ?
*
* - Result for len==0 : Currently, the result of hashing a zero-length input is `0`.
* It seems okay as a return value when using all "default" secret and seed (it used to be a request for XXH32/XXH64).
* But is it still fine to return `0` when secret or seed are non-default ?
* Are there use case which would depend on a different hash result when the secret is different ?
*/
#ifdef XXH_NAMESPACE
# define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
# define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
# define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
# define XXH3_64bits_createState XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_createState)
# define XXH3_64bits_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_freeState)
# define XXH3_64bits_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_copyState)
# define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
# define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
# define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
# define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
# define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
# define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
# define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
# define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
#endif
/* XXH3_64bits() :
* default 64-bit variant, using default secret and default seed of 0.
* it's also the fastest one. */
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);
/* XXH3_64bits_withSecret() :
* It's possible to provide any blob of bytes as a "secret" to generate the hash.
* This makes it more difficult for an external actor to prepare an intentional collision.
* The secret *must* be large enough (>= XXH_SECRET_SIZE_MIN).
* It should consist of random bytes.
* Avoid repeating same character, and especially avoid swathes of \0.
* Avoid repeating sequences of bytes within the secret.
* Failure to respect these conditions will result in a bad quality hash.
*/
#define XXH_SECRET_SIZE_MIN 136
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
/* XXH3_64bits_withSeed() :
* This variant generates on the fly a custom secret,
* based on the default secret, altered using the `seed` value.
* While this operation is decently fast, note that it's not completely free.
* note : seed==0 produces same results as XXH3_64bits() */
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
/* streaming 64-bit */
#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* C11+ */
# include <stdalign.h>
# define XXH_ALIGN(n) alignas(n)
#elif defined(__GNUC__)
# define XXH_ALIGN(n) __attribute__ ((aligned(n)))
#elif defined(_MSC_VER)
# define XXH_ALIGN(n) __declspec(align(n))
#else
# define XXH_ALIGN(n) /* disabled */
#endif
typedef struct XXH3_state_s XXH3_state_t;
#define XXH3_SECRET_DEFAULT_SIZE 192 /* minimum XXH_SECRET_SIZE_MIN */
#define XXH3_INTERNALBUFFER_SIZE 128
struct XXH3_state_s {
XXH_ALIGN(64) XXH64_hash_t acc[8];
XXH_ALIGN(64) char customSecret[XXH3_SECRET_DEFAULT_SIZE]; /* used to store a custom secret generated from the seed. Makes state larger. Design might change */
XXH_ALIGN(64) char buffer[XXH3_INTERNALBUFFER_SIZE];
const void* secret;
XXH32_hash_t bufferedSize;
XXH32_hash_t nbStripesPerBlock;
XXH32_hash_t nbStripesSoFar;
XXH32_hash_t reserved32;
XXH32_hash_t reserved32_2;
XXH32_hash_t secretLimit;
XXH64_hash_t totalLen;
XXH64_hash_t seed;
XXH64_hash_t reserved64;
}; /* typedef'd to XXH3_state_t */
/* Streaming requires state maintenance.
* This operation costs memory and cpu.
* As a consequence, streaming is slower than one-shot hashing.
* For better performance, prefer using one-short functions anytime possible. */
XXH_PUBLIC_API XXH3_state_t* XXH3_64bits_createState(void);
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_freeState(XXH3_state_t* statePtr);
XXH_PUBLIC_API void XXH3_64bits_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
/* XXH3_64bits_reset() :
* initialize with default parameters.
* result will be equivalent to `XXH3_64bits()` */
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
/* XXH3_64bits_reset_withSeed() :
* generate a custom secret from `seed`, and store it into state.
* digest will be equivalent to `XXH3_64bits_withSeed()` */
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
/* XXH3_64bits_reset_withSecret() :
* `secret` is referenced, and must outlive the hash streaming session.
* secretSize must be >= XXH_SECRET_SIZE_MIN.
*/
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* statePtr);
/* 128-bit */
typedef struct {
XXH64_hash_t low64;
XXH64_hash_t high64;
} XXH128_hash_t;
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);
XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed); /* == XXH128() */
XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
#endif /* XXH_NO_LONG_LONG */
/*-**********************************************************************
* XXH_INLINE_ALL
************************************************************************/
#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */
#endif
#endif /* XXH_STATIC_LINKING_ONLY */
#if defined (__cplusplus)
}
#endif
#endif /* XXHASH_H_5627135585666179 */

View File

@ -1,167 +0,0 @@
// //////////////////////////////////////////////////////////
// xxhash32.h
// Copyright (c) 2016 Stephan Brumme. All rights reserved.
// see https://create.stephan-brumme.com/disclaimer.html
//
#pragma once
#include <stdint.h> // for uint32_t and uint64_t
#include "crc32c.h"
/// XXHash (32 bit), based on Yann Collet's descriptions, see https://cyan4973.github.io/xxHash/
/** How to use:
uint32_t myseed = 0;
XXHash32 myhash(myseed);
myhash.add(pointerToSomeBytes, numberOfBytes);
myhash.add(pointerToSomeMoreBytes, numberOfMoreBytes); // call add() as often as you like to ...
// and compute hash:
uint32_t result = myhash.hash();
// or all of the above in one single line:
uint32_t result2 = XXHash32::hash(mypointer, numBytes, myseed);
Note: my code is NOT endian-aware !
**/
class XXHash32
{
public:
/// create new XXHash (32 bit)
/** @param seed your seed value, even zero is a valid seed and e.g. used by LZ4 **/
explicit XXHash32(uint32_t seed)
{
state[0] = seed + Prime1 + Prime2;
state[1] = seed + Prime2;
state[2] = seed;
state[3] = seed - Prime1;
bufferSize = 0;
totalLength = 0;
}
/// add a chunk of bytes
/** @param input pointer to a continuous block of data
@param length number of bytes
@return false if parameters are invalid / zero **/
bool add(const void* input, uint64_t length)
{
// no data ?
if (!input || length == 0)
return false;
totalLength += length;
// byte-wise access
const unsigned char* data = (const unsigned char*)input;
// unprocessed old data plus new data still fit in temporary buffer ?
if (bufferSize + length < MaxBufferSize)
{
// just add new data
while (length-- > 0)
buffer[bufferSize++] = *data++;
return true;
}
// point beyond last byte
const unsigned char* stop = data + length;
const unsigned char* stopBlock = stop - MaxBufferSize;
// some data left from previous update ?
if (bufferSize > 0)
{
// make sure temporary buffer is full (16 bytes)
while (bufferSize < MaxBufferSize)
buffer[bufferSize++] = *data++;
// process these 16 bytes (4x4)
process(buffer, state[0], state[1], state[2], state[3]);
}
// copying state to local variables helps optimizer A LOT
uint32_t s0 = state[0], s1 = state[1], s2 = state[2], s3 = state[3];
// 16 bytes at once
while (data <= stopBlock)
{
// local variables s0..s3 instead of state[0]..state[3] are much faster
process(data, s0, s1, s2, s3);
data += 16;
}
// copy back
state[0] = s0; state[1] = s1; state[2] = s2; state[3] = s3;
// copy remainder to temporary buffer
bufferSize = stop - data;
for (unsigned int i = 0; i < bufferSize; i++)
buffer[i] = data[i];
// done
return true;
}
/// get current hash
/** @return 32 bit XXHash **/
uint32_t hash() const
{
uint32_t result = (uint32_t)totalLength;
// fold 128 bit state into one single 32 bit value
if (totalLength >= MaxBufferSize)
result += rotateLeft(state[0], 1) +
rotateLeft(state[1], 7) +
rotateLeft(state[2], 12) +
rotateLeft(state[3], 18);
else
// internal state wasn't set in add(), therefore original seed is still stored in state2
result += state[2] + Prime5;
// process remaining bytes in temporary buffer
const unsigned char* data = buffer;
// point beyond last byte
const unsigned char* stop = data + bufferSize;
// at least 4 bytes left ? => eat 4 bytes per step
for (; data + 4 <= stop; data += 4)
result = rotateLeft(result + *(uint32_t*)data * Prime3, 17) * Prime4;
// take care of remaining 0..3 bytes, eat 1 byte per step
while (data != stop)
result = rotateLeft(result + (*data++) * Prime5, 11) * Prime1;
// mix bits
result ^= result >> 15;
result *= Prime2;
result ^= result >> 13;
result *= Prime3;
result ^= result >> 16;
return result;
}
/// combine constructor, add() and hash() in one static function (C style)
/** @param input pointer to a continuous block of data
@param length number of bytes
@param seed your seed value, e.g. zero is a valid seed and used by LZ4
@return 32 bit XXHash **/
static uint32_t hash(const void* input, uint64_t length, uint32_t seed)
{
// Some modern CPUs support hardware accellerated CRC32
// This is significantly faster than xxHash, in some cases, by more than double
// So now we check for this capability and use it if it exists.
// This significantly reduces the impact of hashing on CPUs supporting SSE4.2
// but also keeps xxHash present as a fast fallback, for those who don't support it
static bool bHardwareCrc32 = crc32c_hw_available(); // Cache the result in a static variable to avoid _cpuid every call
static bool bCrc32Init = false;
if (bHardwareCrc32) {
return crc32c_append(seed, (uint8_t*)input, (size_t)length);
}
XXHash32 hasher(seed);
hasher.add(input, length);
return hasher.hash();
}
private:
/// magic constants :-)
static const uint32_t Prime1 = 2654435761U;
static const uint32_t Prime2 = 2246822519U;
static const uint32_t Prime3 = 3266489917U;
static const uint32_t Prime4 = 668265263U;
static const uint32_t Prime5 = 374761393U;
/// temporarily store up to 15 bytes between multiple add() calls
static const uint32_t MaxBufferSize = 15 + 1;
// internal state and temporary buffer
uint32_t state[4]; // state[2] == seed if totalLength < MaxBufferSize
unsigned char buffer[MaxBufferSize];
unsigned int bufferSize;
uint64_t totalLength;
/// rotate bits, should compile to a single CPU instruction (ROL)
static inline uint32_t rotateLeft(uint32_t x, unsigned char bits)
{
return (x << bits) | (x >> (32 - bits));
}
/// process a block of 4x4 bytes, this is the main part of the XXHash32 algorithm
static inline void process(const void* data, uint32_t& state0, uint32_t& state1, uint32_t& state2, uint32_t& state3)
{
const uint32_t* block = (const uint32_t*)data;
state0 = rotateLeft(state0 + block[0] * Prime2, 13) * Prime1;
state1 = rotateLeft(state1 + block[1] * Prime2, 13) * Prime1;
state2 = rotateLeft(state2 + block[2] * Prime2, 13) * Prime1;
state3 = rotateLeft(state3 + block[3] * Prime2, 13) * Prime1;
}
};

View File

@ -27,7 +27,7 @@
#define _XBOXKRNL_DEFEXTRN_
#define LOG_PREFIX CXBXR_MODULE::D3D8
#include "common\util\xxhash32.h"
#include "common\util\hasher.h"
#include <condition_variable>
// prevent name collisions
@ -150,6 +150,8 @@ static DWORD g_VBLastSwap = 0;
static XTL::D3DSWAPDATA g_SwapData = {0};
static DWORD g_SwapLast = 0;
static XTL::CxbxVertexBufferConverter VertexBufferConverter = {};
// cached Direct3D state variable(s)
static XTL::IDirect3DIndexBuffer *pClosingLineLoopIndexBuffer = nullptr;
@ -716,7 +718,7 @@ typedef struct {
DWORD dwXboxResourceType = 0;
void* pXboxData = nullptr;
size_t szXboxDataSize = 0;
uint32_t hash = 0;
uint64_t hash = 0;
bool forceRehash = false;
std::chrono::time_point<std::chrono::high_resolution_clock> nextHashTime;
std::chrono::milliseconds hashLifeTime = 1ms;
@ -866,8 +868,8 @@ bool HostResourceRequiresUpdate(resource_key_t key, DWORD dwSize)
auto now = std::chrono::high_resolution_clock::now();
if (now > it->second.nextHashTime || it->second.forceRehash) {
uint32_t oldHash = it->second.hash;
it->second.hash = XXHash32::hash(it->second.pXboxData, it->second.szXboxDataSize, 0);
uint64_t oldHash = it->second.hash;
it->second.hash = ComputeHash(it->second.pXboxData, it->second.szXboxDataSize);
if (it->second.hash != oldHash) {
// The data changed, so reset the hash lifetime
@ -905,7 +907,7 @@ void SetHostResource(XTL::X_D3DResource* pXboxResource, XTL::IDirect3DResource*
resourceInfo.dwXboxResourceType = GetXboxCommonResourceType(pXboxResource);
resourceInfo.pXboxData = GetDataFromXboxResource(pXboxResource);
resourceInfo.szXboxDataSize = dwSize > 0 ? dwSize : GetXboxResourceSize(pXboxResource);
resourceInfo.hash = XXHash32::hash(resourceInfo.pXboxData, resourceInfo.szXboxDataSize, 0);
resourceInfo.hash = ComputeHash(resourceInfo.pXboxData, resourceInfo.szXboxDataSize);
resourceInfo.hashLifeTime = 1ms;
resourceInfo.lastUpdate = std::chrono::high_resolution_clock::now();
resourceInfo.nextHashTime = resourceInfo.lastUpdate + resourceInfo.hashLifeTime;
@ -1640,6 +1642,10 @@ static LRESULT WINAPI EmuMsgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lPar
ToggleFauxFullscreen(hWnd);
}
}
else if (wParam == VK_F1)
{
VertexBufferConverter.PrintStats();
}
else if (wParam == VK_F6)
{
// For some unknown reason, F6 isn't handled in WndMain::WndProc
@ -2332,7 +2338,7 @@ static void EmuVerifyResourceIsRegistered(XTL::X_D3DResource *pResource, DWORD D
}
typedef struct {
DWORD Hash = 0;
uint64_t Hash = 0;
DWORD IndexCount = 0;
XTL::IDirect3DIndexBuffer* pHostIndexBuffer = nullptr;
} ConvertedIndexBuffer;
@ -2388,7 +2394,7 @@ void CxbxUpdateActiveIndexBuffer
}
// If the data needs updating, do so
uint32_t uiHash = XXHash32::hash(pIndexData, IndexCount * 2, 0);
uint64_t uiHash = ComputeHash(pIndexData, IndexCount * 2);
if (uiHash != indexBuffer.Hash) {
// Update the Index Count and the hash
indexBuffer.IndexCount = IndexCount;
@ -7053,13 +7059,11 @@ void XTL::CxbxDrawIndexed(CxbxDrawContext &DrawContext)
CxbxUpdateActiveIndexBuffer(DrawContext.pIndexData, DrawContext.dwVertexCount);
CxbxVertexBufferConverter VertexBufferConverter = {};
//Walk through index buffer
// Determine highest and lowest index in use :
INDEX16 LowIndex, HighIndex;
WalkIndexBuffer(LowIndex, HighIndex, &(DrawContext.pIndexData[DrawContext.dwStartVertex]), DrawContext.dwVertexCount);
VertexBufferConverter.Apply(&DrawContext, LowIndex);
VertexBufferConverter.Apply(&DrawContext);
if (DrawContext.XboxPrimitiveType == X_D3DPT_QUADLIST) {
UINT uiStartIndex = 0;
@ -7135,7 +7139,6 @@ void XTL::CxbxDrawPrimitiveUP(CxbxDrawContext &DrawContext)
assert(DrawContext.uiXboxVertexStreamZeroStride > 0);
assert(DrawContext.dwIndexBase == 0); // No IndexBase under Draw*UP
CxbxVertexBufferConverter VertexBufferConverter = {};
VertexBufferConverter.Apply(&DrawContext);
if (DrawContext.XboxPrimitiveType == X_D3DPT_QUADLIST) {
// LOG_TEST_CASE("X_D3DPT_QUADLIST"); // X-Marbles and XDK Sample PlayField hits this case
@ -7329,7 +7332,7 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_DrawVertices)
DrawContext.dwVertexCount = VertexCount;
DrawContext.dwStartVertex = StartVertex;
DrawContext.hVertexShader = g_CurrentXboxVertexShaderHandle;
CxbxVertexBufferConverter VertexBufferConverter = {};
VertexBufferConverter.Apply(&DrawContext);
if (DrawContext.XboxPrimitiveType == X_D3DPT_QUADLIST) {
// LOG_TEST_CASE("X_D3DPT_QUADLIST"); // ?X-Marbles and XDK Sample (Cartoon, ?maybe PlayField?) hits this case
@ -7538,7 +7541,6 @@ VOID WINAPI XTL::EMUPATCH(D3DDevice_DrawIndexedVerticesUP)
DrawContext.hVertexShader = g_CurrentXboxVertexShaderHandle;
// Don't set DrawContext.pIndexData = (INDEX16*)pIndexData; // Used by GetVerticesInBuffer
CxbxVertexBufferConverter VertexBufferConverter = {};
VertexBufferConverter.Apply(&DrawContext);
if (DrawContext.XboxPrimitiveType == X_D3DPT_QUADLIST) {
// Indexed quadlist can be drawn using unpatched indexes via multiple draws of 2 'strip' triangles :

View File

@ -28,19 +28,17 @@
#define _XBOXKRNL_DEFEXTRN_
#define LOG_PREFIX CXBXR_MODULE::VTXB
#include <unordered_map>
#include "core\kernel\memory-manager\VMManager.h"
#include "common\util\xxhash32.h" // For XXHash32::hash()
#include "common\util\hasher.h"
#include "core\kernel\support\Emu.h"
#include "core\kernel\support\EmuXTL.h"
#include "core\hle\D3D8\ResourceTracker.h"
#include <ctime>
#include <unordered_map>
#include <chrono>
#include <algorithm>
#define HASH_SEED 0
#define MAX_STREAM_NOT_USED_TIME (2 * CLOCKS_PER_SEC) // TODO: Trim the not used time
// Inline vertex buffer emulation
@ -60,137 +58,54 @@ extern XTL::X_D3DVertexBuffer*g_D3DStreams[16];
extern UINT g_D3DStreamStrides[16];
void *GetDataFromXboxResource(XTL::X_D3DResource *pXboxResource);
typedef struct {
XTL::IDirect3DVertexBuffer* pHostVertexBuffer;
size_t uiSize;
std::chrono::time_point<std::chrono::high_resolution_clock> lastUsed;
} cached_vertex_buffer_object;
std::unordered_map<DWORD, cached_vertex_buffer_object> g_HostVertexBuffers;
// This caches Vertex Buffer Objects, but not the containing data
// This prevents unnecessary allocation and releasing of Vertex Buffers when
// we can use an existing just fine. This gives a (slight) performance boost
// Returns true if the existing vertex buffer was trashed/made invalid
bool GetCachedVertexBufferObject(DWORD pXboxDataPtr, DWORD size, XTL::IDirect3DVertexBuffer** pVertexBuffer)
{
// TODO: If the vertex buffer object cache becomes too large,
// free the least recently used vertex buffers
auto it = g_HostVertexBuffers.find(pXboxDataPtr);
if (it == g_HostVertexBuffers.end()) {
// Create new vertex buffer and return
cached_vertex_buffer_object newBuffer;
newBuffer.uiSize = size;
newBuffer.lastUsed = std::chrono::high_resolution_clock::now();
HRESULT hRet = g_pD3DDevice->CreateVertexBuffer(
size,
D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC,
0,
XTL::D3DPOOL_DEFAULT,
&newBuffer.pHostVertexBuffer,
nullptr
);
if (FAILED(hRet)) {
CxbxKrnlCleanup("Failed to create vertex buffer");
}
g_HostVertexBuffers[pXboxDataPtr] = newBuffer;
*pVertexBuffer = newBuffer.pHostVertexBuffer;
return false;
}
auto buffer = &it->second;
buffer->lastUsed = std::chrono::high_resolution_clock::now();
// Return the existing vertex buffer, if possible
if (size <= buffer->uiSize) {
*pVertexBuffer = buffer->pHostVertexBuffer;
return false;
}
// If execution reached here, we need to release and re-create the vertex buffer..
buffer->pHostVertexBuffer->Release();
buffer->uiSize = size;
HRESULT hRet = g_pD3DDevice->CreateVertexBuffer(
size,
D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC,
0,
XTL::D3DPOOL_DEFAULT,
&buffer->pHostVertexBuffer,
nullptr
);
if (FAILED(hRet)) {
CxbxKrnlCleanup("Failed to create vertex buffer");
}
*pVertexBuffer = buffer->pHostVertexBuffer;
return true;
}
void ActivatePatchedStream
(
XTL::CxbxDrawContext *pDrawContext,
UINT uiStream,
XTL::CxbxPatchedStream *pPatchedStream,
bool bRelease
)
void XTL::CxbxPatchedStream::Activate(XTL::CxbxDrawContext *pDrawContext, UINT uiStream) const
{
//LOG_INIT // Allows use of DEBUG_D3DRESULT
// Use the cached stream values on the host
if (pPatchedStream->bCacheIsStreamZeroDrawUP) {
if (bCacheIsStreamZeroDrawUP) {
// Set the UserPointer variables in the drawing context
pDrawContext->pHostVertexStreamZeroData = pPatchedStream->pCachedHostVertexStreamZeroData;
pDrawContext->uiHostVertexStreamZeroStride = pPatchedStream->uiCachedHostVertexStride;
pDrawContext->pHostVertexStreamZeroData = pCachedHostVertexStreamZeroData;
pDrawContext->uiHostVertexStreamZeroStride = uiCachedHostVertexStride;
}
else {
HRESULT hRet = g_pD3DDevice->SetStreamSource(
uiStream,
pPatchedStream->pCachedHostVertexBuffer,
pCachedHostVertexBuffer,
0, // OffsetInBytes
pPatchedStream->uiCachedHostVertexStride);
uiCachedHostVertexStride);
//DEBUG_D3DRESULT(hRet, "g_pD3DDevice->SetStreamSource");
if (FAILED(hRet)) {
CxbxKrnlCleanup("Failed to set the type patched buffer as the new stream source!\n");
// TODO : Cartoon hits the above case when the vertex cache size is 0.
}
// TODO : The following doesn't fix that - find our why and fix it for real
if (bRelease) {
// Always release to prevent leaks when it wasn't read from cache:
pPatchedStream->pCachedHostVertexBuffer->Release();
// NOTE : Even this doesn't prevent Cartoon breaking : g_pD3DDevice->ResourceManagerDiscardBytes(0);
}
}
}
XTL::CxbxPatchedStream::CxbxPatchedStream()
{
isValid = false;
}
void ReleasePatchedStream(XTL::CxbxPatchedStream *pPatchedStream)
XTL::CxbxPatchedStream::~CxbxPatchedStream()
{
if (pPatchedStream->bCachedHostVertexStreamZeroDataIsAllocated) {
free(pPatchedStream->pCachedHostVertexStreamZeroData);
pPatchedStream->bCachedHostVertexStreamZeroDataIsAllocated = false;
}
pPatchedStream->pCachedHostVertexStreamZeroData = nullptr;
pPatchedStream->pCachedHostVertexBuffer = nullptr; // g_HostVertexBuffers owns these nowadays
}
if (bCachedHostVertexStreamZeroDataIsAllocated) {
free(pCachedHostVertexStreamZeroData);
bCachedHostVertexStreamZeroDataIsAllocated = false;
}
pCachedHostVertexStreamZeroData = nullptr;
if (pCachedHostVertexBuffer != nullptr) {
pCachedHostVertexBuffer->Release();
pCachedHostVertexBuffer = nullptr;
}
}
XTL::CxbxVertexBufferConverter::CxbxVertexBufferConverter()
{
this->m_uiNbrStreams = 0;
ZeroMemory(this->m_PatchedStreams, sizeof(CxbxPatchedStream) * MAX_NBR_STREAMS);
this->m_bAllocatedStreamZeroData = false;
this->m_pNewVertexStreamZeroData = NULL;
this->m_pVertexShaderInfo = NULL;
}
XTL::CxbxVertexBufferConverter::~CxbxVertexBufferConverter()
{
for (int i = 0; i < MAX_NBR_STREAMS; i++) {
ReleasePatchedStream(&m_PatchedStreams[i]);
}
m_uiNbrStreams = 0;
m_pVertexShaderInfo = nullptr;
}
size_t GetVerticesInBuffer(DWORD dwOffset, DWORD dwVertexCount, PWORD pIndexData, DWORD dwIndexBase)
@ -198,7 +113,7 @@ size_t GetVerticesInBuffer(DWORD dwOffset, DWORD dwVertexCount, PWORD pIndexData
// If we are drawing from an offset, we know that the vertex count must have offset vertices
// before the first drawn vertices
dwVertexCount += dwOffset;
if (pIndexData == nullptr) {
if (pIndexData == xbnullptr) {
return dwVertexCount;
}
@ -218,7 +133,7 @@ int CountActiveD3DStreams()
{
int lastStreamIndex = 0;
for (int i = 0; i < 16; i++) {
if (g_D3DStreams[i] != nullptr) {
if (g_D3DStreams[i] != xbnullptr) {
lastStreamIndex = i + 1;
}
}
@ -231,7 +146,7 @@ XTL::CxbxVertexShaderInfo *GetCxbxVertexShaderInfo(DWORD Handle); // forward
UINT XTL::CxbxVertexBufferConverter::GetNbrStreams(CxbxDrawContext *pDrawContext)
{
// Draw..Up always have one stream
if (pDrawContext->pXboxVertexStreamZeroData != nullptr) {
if (pDrawContext->pXboxVertexStreamZeroData != xbnullptr) {
return 1;
}
@ -274,13 +189,47 @@ inline FLOAT NormShortToFloat(const SHORT value)
inline FLOAT ByteToFloat(const BYTE value)
{
return ((FLOAT)value) / 255.0f;
}
}
XTL::CxbxPatchedStream& XTL::CxbxVertexBufferConverter::GetPatchedStream(uint64_t key)
{
// First, attempt to fetch an existing patched stream
auto it = m_PatchedStreams.find(key);
if (it != m_PatchedStreams.end()) {
m_PatchedStreamUsageList.splice(m_PatchedStreamUsageList.begin(), m_PatchedStreamUsageList, it->second);
return *it->second;
}
// We didn't find an existing patched stream, so we must insert one and get a reference to it
m_PatchedStreamUsageList.push_front({});
CxbxPatchedStream& stream = m_PatchedStreamUsageList.front();
// Insert a reference iterator into the fast lookup map
m_PatchedStreams[key] = m_PatchedStreamUsageList.begin();
// If the cache has exceeded it's upper bound, discard the oldest entries in the cache
if (m_PatchedStreams.size() > (m_MaxCacheSize + m_CacheElasticity)) {
while (m_PatchedStreams.size() > m_MaxCacheSize) {
m_PatchedStreams.erase(m_PatchedStreamUsageList.back().uiVertexDataHash);
m_PatchedStreamUsageList.pop_back();
}
}
return stream;
}
void XTL::CxbxVertexBufferConverter::PrintStats()
{
printf("Vertex Buffer Cache Status: \n");
printf("- Cache Size: %d\n", m_PatchedStreams.size());
printf("- Hits: %d\n", m_TotalCacheHits);
printf("- Misses: %d\n", m_TotalCacheMisses);
}
void XTL::CxbxVertexBufferConverter::ConvertStream
(
CxbxDrawContext *pDrawContext,
UINT uiStream,
DWORD StartIndex
UINT uiStream
)
{
extern XTL::D3DCAPS g_D3DCaps;
@ -337,12 +286,12 @@ void XTL::CxbxVertexBufferConverter::ConvertStream
bool bNeedRHWReset = bVshHandleIsFVF && ((XboxFVF & D3DFVF_POSITION_MASK) == D3DFVF_XYZRHW);
bool bNeedStreamCopy = bNeedTextureNormalization || bNeedVertexPatching || bNeedRHWReset;
uint8_t *pXboxVertexData;
UINT uiXboxVertexStride;
UINT uiVertexCount;
UINT uiHostVertexStride;
DWORD dwHostVertexDataSize;
uint8_t *pHostVertexData;
uint8_t *pXboxVertexData = xbnullptr;
UINT uiXboxVertexStride = 0;
UINT uiVertexCount = 0;
UINT uiHostVertexStride = 0;
DWORD dwHostVertexDataSize = 0;
uint8_t *pHostVertexData = nullptr;
IDirect3DVertexBuffer *pNewHostVertexBuffer = nullptr;
if (pDrawContext->pXboxVertexStreamZeroData != xbnullptr) {
@ -356,17 +305,7 @@ void XTL::CxbxVertexBufferConverter::ConvertStream
uiVertexCount = pDrawContext->VerticesInBuffer;
uiHostVertexStride = (bNeedVertexPatching) ? pVertexShaderStreamInfo->HostVertexStride : uiXboxVertexStride;
dwHostVertexDataSize = uiVertexCount * uiHostVertexStride;
if (bNeedStreamCopy) {
pHostVertexData = (uint8_t*)malloc(dwHostVertexDataSize);
if (pHostVertexData == nullptr) {
CxbxKrnlCleanup("Couldn't allocate the new stream zero buffer");
}
}
else {
pHostVertexData = pXboxVertexData;
}
}
else {
} else {
XTL::X_D3DVertexBuffer *pXboxVertexBuffer = g_D3DStreams[uiStream];
pXboxVertexData = (uint8_t*)GetDataFromXboxResource(pXboxVertexBuffer);
if (pXboxVertexData == NULL) {
@ -392,19 +331,99 @@ void XTL::CxbxVertexBufferConverter::ConvertStream
uiHostVertexStride = (bNeedVertexPatching) ? pVertexShaderStreamInfo->HostVertexStride : uiXboxVertexStride;
dwHostVertexDataSize = uiVertexCount * uiHostVertexStride;
GetCachedVertexBufferObject(pXboxVertexBuffer->Data, dwHostVertexDataSize, &pNewHostVertexBuffer);
if (FAILED(pNewHostVertexBuffer->Lock(0, 0, (D3DLockData **)&pHostVertexData, D3DLOCK_DISCARD))) {
CxbxKrnlCleanup("Couldn't lock the new buffer");
}
// Copy stream for patching and caching.
bNeedStreamCopy = true;
}
// FAST PATH: If this draw is a zerostream based draw, and does not require patching, we can use it directly
// No need to hash or patch at all in this case!
if (pDrawContext->pXboxVertexStreamZeroData != xbnullptr && !bNeedStreamCopy) {
pHostVertexData = pXboxVertexData;
CxbxPatchedStream stream;
stream.isValid = true;
stream.XboxPrimitiveType = pDrawContext->XboxPrimitiveType;
stream.uiCachedHostVertexStride = uiHostVertexStride;
stream.bCacheIsStreamZeroDrawUP = true;
stream.pCachedHostVertexStreamZeroData = pHostVertexData;
stream.Activate(pDrawContext, uiStream);
return;
}
// Now we have enough information to hash the existing resource and find it in our cache!
DWORD xboxVertexDataSize = uiVertexCount * uiXboxVertexStride;
uint64_t vertexDataHash = ComputeHash(pXboxVertexData, xboxVertexDataSize);
uint64_t pVertexShaderSteamInfoHash = 0;
if (pVertexShaderStreamInfo != nullptr) {
pVertexShaderSteamInfoHash = ComputeHash(pVertexShaderStreamInfo, sizeof(CxbxVertexShaderStreamInfo));
}
// Lookup implicity inserts a new entry if not exists, so this always works
CxbxPatchedStream& patchedStream = GetPatchedStream(vertexDataHash);
// We check a few fields of the patched stream to protect against hash collisions (rare)
// but also to protect against games using the exact same vertex data for different vertex formats (Test Case: Burnout)
if (patchedStream.isValid && // Check that we found a cached stream
patchedStream.uiVertexStreamInformationHash == pVertexShaderSteamInfoHash && // Check that the vertex conversion is valid
patchedStream.uiCachedHostVertexStride == patchedStream.uiCachedHostVertexStride && // Make sure the host stride didn't change
patchedStream.uiCachedXboxVertexStride == uiXboxVertexStride && // Make sure the Xbox Stride didn't change
patchedStream.uiCachedXboxVertexDataSize == xboxVertexDataSize ) { // Make sure the Xbox Data Size also didn't change
m_TotalCacheHits++;
patchedStream.Activate(pDrawContext, uiStream);
return;
}
m_TotalCacheMisses++;
// If execution reaches here, the cached vertex buffer was not valid and we must reconvert the data
if (patchedStream.isValid) {
pHostVertexData = (uint8_t*)patchedStream.pCachedHostVertexStreamZeroData;
pNewHostVertexBuffer = patchedStream.pCachedHostVertexBuffer;
// Free the existing buffers
if (pHostVertexData != nullptr) {
free(pHostVertexData);
pHostVertexData = nullptr;
} else if (pNewHostVertexBuffer != nullptr) {
pNewHostVertexBuffer->Release();
pNewHostVertexBuffer = nullptr;
}
}
// Allocate new buffers
if (pDrawContext->pXboxVertexStreamZeroData != xbnullptr) {
pHostVertexData = (uint8_t*)malloc(dwHostVertexDataSize);
if (pHostVertexData == nullptr) {
CxbxKrnlCleanup("Couldn't allocate the new stream zero buffer");
}
} else {
HRESULT hRet = g_pD3DDevice->CreateVertexBuffer(
dwHostVertexDataSize,
D3DUSAGE_WRITEONLY | D3DUSAGE_DYNAMIC,
0,
XTL::D3DPOOL_DEFAULT,
&pNewHostVertexBuffer,
nullptr
);
if (FAILED(hRet)) {
CxbxKrnlCleanup("Failed to create vertex buffer");
}
}
// If we need to lock a host vertex buffer, do so now
if (pHostVertexData == nullptr && pNewHostVertexBuffer != nullptr) {
if (FAILED(pNewHostVertexBuffer->Lock(0, 0, (D3DLockData **)&pHostVertexData, D3DLOCK_DISCARD))) {
CxbxKrnlCleanup("Couldn't lock vertex buffer");
}
}
if (bNeedVertexPatching) {
// assert(bNeedStreamCopy || "bNeedVertexPatching implies bNeedStreamCopy (but copies via conversions");
for (uint32_t uiVertex = StartIndex; uiVertex < uiVertexCount; uiVertex++) {
for (uint32_t uiVertex = 0; uiVertex < uiVertexCount; uiVertex++) {
uint8_t *pXboxVertexAsByte = &pXboxVertexData[uiVertex * uiXboxVertexStride];
uint8_t *pHostVertexAsByte = &pHostVertexData[uiVertex * uiHostVertexStride];
for (UINT uiElement = 0; uiElement < pVertexShaderStreamInfo->NumberOfVertexElements; uiElement++) {
@ -659,7 +678,7 @@ void XTL::CxbxVertexBufferConverter::ConvertStream
// the uiTextureCoordinatesByteOffsetInVertex on host will match Xbox
}
for (uint32_t uiVertex = StartIndex; uiVertex < uiVertexCount; uiVertex++) {
for (uint32_t uiVertex = 0; uiVertex < uiVertexCount; uiVertex++) {
FLOAT *pVertexDataAsFloat = (FLOAT*)(&pHostVertexData[uiVertex * uiHostVertexStride]);
// Handle pre-transformed vertices (which bypass the vertex shader pipeline)
@ -715,38 +734,34 @@ void XTL::CxbxVertexBufferConverter::ConvertStream
}
}
}
patchedStream.isValid = true;
patchedStream.XboxPrimitiveType = pDrawContext->XboxPrimitiveType;
patchedStream.pCachedXboxVertexData = pXboxVertexData;
patchedStream.uiCachedXboxVertexDataSize = xboxVertexDataSize;
patchedStream.uiVertexDataHash = vertexDataHash;
patchedStream.uiVertexStreamInformationHash = pVertexShaderSteamInfoHash;
patchedStream.uiCachedXboxVertexStride = uiXboxVertexStride;
patchedStream.uiCachedHostVertexStride = uiHostVertexStride;
patchedStream.bCacheIsStreamZeroDrawUP = (pDrawContext->pXboxVertexStreamZeroData != NULL);
if (patchedStream.bCacheIsStreamZeroDrawUP) {
patchedStream.pCachedHostVertexStreamZeroData = pHostVertexData;
patchedStream.bCachedHostVertexStreamZeroDataIsAllocated = bNeedStreamCopy;
} else {
// assert(pNewHostVertexBuffer != nullptr);
pNewHostVertexBuffer->Unlock();
patchedStream.pCachedHostVertexBuffer = pNewHostVertexBuffer;
}
CxbxPatchedStream *pPatchedStream = &m_PatchedStreams[uiStream];
#if 0 // new
pPatchedStream->pCachedXboxVertexData = pXboxVertexData; // TODO : For hashing & caching purposes
#endif
pPatchedStream->uiCachedXboxVertexStride = uiXboxVertexStride;
#if 0 // new
pPatchedStream->uiCachedXboxVertexDataSize = uiVertexCount * uiXboxVertexStride; // TODO : For hashing & caching purposes
#endif
pPatchedStream->uiCachedHostVertexStride = uiHostVertexStride;
pPatchedStream->bCacheIsStreamZeroDrawUP = (pDrawContext->pXboxVertexStreamZeroData != NULL);
if (pPatchedStream->bCacheIsStreamZeroDrawUP) {
pPatchedStream->pCachedHostVertexStreamZeroData = pHostVertexData;
pPatchedStream->bCachedHostVertexStreamZeroDataIsAllocated = bNeedStreamCopy;
}
else {
// assert(pNewHostVertexBuffer != nullptr);
pNewHostVertexBuffer->Unlock();
pPatchedStream->pCachedHostVertexBuffer = pNewHostVertexBuffer;
}
ActivatePatchedStream(pDrawContext, uiStream, pPatchedStream,
/*Release=*/!bNeedStreamCopy); // Release when it won't get cached
patchedStream.Activate(pDrawContext, uiStream);
}
void XTL::CxbxVertexBufferConverter::Apply(CxbxDrawContext *pDrawContext, DWORD StartIndex)
void XTL::CxbxVertexBufferConverter::Apply(CxbxDrawContext *pDrawContext)
{
if ((pDrawContext->XboxPrimitiveType < X_D3DPT_POINTLIST) || (pDrawContext->XboxPrimitiveType > X_D3DPT_POLYGON))
CxbxKrnlCleanup("Unknown primitive type: 0x%.02X\n", pDrawContext->XboxPrimitiveType);
m_pVertexShaderInfo = nullptr;
if (VshHandleIsVertexShader(pDrawContext->hVertexShader)) {
m_pVertexShaderInfo = &(GetCxbxVertexShader(pDrawContext->hVertexShader)->VertexShaderInfo);
}
@ -766,11 +781,7 @@ void XTL::CxbxVertexBufferConverter::Apply(CxbxDrawContext *pDrawContext, DWORD
}
for(UINT uiStream = 0; uiStream < m_uiNbrStreams; uiStream++) {
// TODO: Check for cached vertex buffer, and use it if possible
ConvertStream(pDrawContext, uiStream, StartIndex);
// TODO: Cache Vertex Buffer Data
ConvertStream(pDrawContext, uiStream);
}
if (pDrawContext->XboxPrimitiveType == X_D3DPT_QUADSTRIP) {

View File

@ -24,8 +24,9 @@
// ******************************************************************
#ifndef XBVERTEXBUFFER_H
#define XBVERTEXBUFFER_H
#include "Cxbx.h"
#include "Cxbx.h"
//#include <ctime> // Conflict with io.h
#define MAX_NBR_STREAMS 16
@ -50,31 +51,44 @@ typedef struct _CxbxDrawContext
}
CxbxDrawContext;
typedef struct _CxbxPatchedStream
{
UINT uiCachedXboxVertexStride;
UINT uiCachedHostVertexStride;
bool bCacheIsStreamZeroDrawUP;
void *pCachedHostVertexStreamZeroData;
bool bCachedHostVertexStreamZeroDataIsAllocated;
XTL::IDirect3DVertexBuffer *pCachedHostVertexBuffer;
} CxbxPatchedStream;
class CxbxPatchedStream
{
public:
CxbxPatchedStream();
~CxbxPatchedStream();
void Activate(XTL::CxbxDrawContext *pDrawContext, UINT uiStream) const;
bool isValid = false;
XTL::X_D3DPRIMITIVETYPE XboxPrimitiveType = XTL::X_D3DPT_NONE;
PVOID pCachedXboxVertexData = xbnullptr;
UINT uiCachedXboxVertexDataSize = 0;
uint64_t uiVertexDataHash = 0;
uint64_t uiVertexStreamInformationHash = 0;
UINT uiCachedXboxVertexStride = 0;
UINT uiCachedHostVertexStride = 0;
bool bCacheIsStreamZeroDrawUP = false;
void *pCachedHostVertexStreamZeroData = nullptr;
bool bCachedHostVertexStreamZeroDataIsAllocated = false;
XTL::IDirect3DVertexBuffer *pCachedHostVertexBuffer = nullptr;
};
class CxbxVertexBufferConverter
{
public:
CxbxVertexBufferConverter();
~CxbxVertexBufferConverter();
void Apply(CxbxDrawContext *pPatchDesc, DWORD StartIndex = 0);
void Apply(CxbxDrawContext *pPatchDesc);
void PrintStats();
private:
UINT m_uiNbrStreams;
// Stack tracking
ULONG m_TotalCacheHits = 0;
ULONG m_TotalCacheMisses = 0;
UINT m_uiNbrStreams;
CxbxPatchedStream m_PatchedStreams[MAX_NBR_STREAMS];
PVOID m_pNewVertexStreamZeroData;
bool m_bAllocatedStreamZeroData;
UINT m_MaxCacheSize = 2000; // Maximum number of entries in the cache
UINT m_CacheElasticity = 200; // Cache is allowed to grow this much more than maximum before being purged to maximum
std::unordered_map<uint64_t, std::list<CxbxPatchedStream>::iterator> m_PatchedStreams; // Stores references to patched streams for fast lookup
std::list<CxbxPatchedStream> m_PatchedStreamUsageList; // Linked list of vertex streams, least recently used is last in the list
CxbxPatchedStream& GetPatchedStream(uint64_t); // Fetches (or inserts) a patched stream associated with the given key
XTL::CxbxVertexShaderInfo *m_pVertexShaderInfo;
@ -82,7 +96,7 @@ class CxbxVertexBufferConverter
UINT GetNbrStreams(CxbxDrawContext *pPatchDesc);
// Patches the types of the stream
void ConvertStream(CxbxDrawContext *pPatchDesc, UINT uiStream, DWORD StartIndex);
void ConvertStream(CxbxDrawContext *pPatchDesc, UINT uiStream);
};
// inline vertex buffer emulation

View File

@ -45,7 +45,7 @@
#include "..\..\import\XbSymbolDatabase\XbSymbolDatabase.h"
#include "Intercept.hpp"
#include "Patches.hpp"
#include "common\util\xxhash32.h"
#include "common\util\hasher.h"
#include <Shlwapi.h>
#include <shlobj.h>
#include <unordered_map>
@ -385,7 +385,7 @@ void EmuHLEIntercept(Xbe::Header *pXbeHeader)
}
// Hash the loaded XBE's header, use it as a filename
uint32_t uiHash = XXHash32::hash((void*)&CxbxKrnl_Xbe->m_Header, sizeof(Xbe::Header), 0);
uint64_t uiHash = ComputeHash((void*)&CxbxKrnl_Xbe->m_Header, sizeof(Xbe::Header));
std::stringstream sstream;
char tAsciiTitle[40] = "Unknown";
std::setlocale(LC_ALL, "English");

View File

@ -26,6 +26,7 @@
#define EMUXTL_H
#include <vector> // Needed for EmuDSound.h file, must be outside of XTL namespace.
#include <unordered_map>
namespace XTL
{

View File

@ -59,7 +59,7 @@
#include <fstream>
#include <iostream>
#include <fcntl.h> // for _O_TEXT
#include "common\util\xxhash32.h" // for XXHash32::hash
#include "common\util\hasher.h"
#define XBOX_LED_FLASH_PERIOD 176 // if you know a more accurate value, put it here
@ -1063,7 +1063,7 @@ LRESULT CALLBACK WndMain::WndProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lP
std::string cacheDir = g_Settings->GetDataLocation() + "\\SymbolCache\\";
// Hash the loaded XBE's header, use it as a filename
uint32_t uiHash = XXHash32::hash((void*)&m_Xbe->m_Header, sizeof(Xbe::Header), 0);
uint64_t uiHash = ComputeHash((void*)&m_Xbe->m_Header, sizeof(Xbe::Header));
std::stringstream sstream;
std::string szTitleName(m_Xbe->m_szAsciiTitle);
m_Xbe->PurgeBadChar(szTitleName);