pcsx2/common/include/Utilities/HashMap.h

/*  PCSX2 - PS2 Emulator for PCs
*  Copyright (C) 2002-2010  PCSX2 Dev Team
*
*  PCSX2 is free software: you can redistribute it and/or modify it under the terms
*  of the GNU Lesser General Public License as published by the Free Software Found-
*  ation, either version 3 of the License, or (at your option) any later version.
*
*  PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
*  without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
*  PURPOSE.  See the GNU General Public License for more details.
*
*  You should have received a copy of the GNU General Public License along with PCSX2.
*  If not, see <http://www.gnu.org/licenses/>.
*/

#pragma once

// They move include file in version 2.0.2 of google sparsehash...
#ifdef SPARSEHASH_NEW_INCLUDE_DIR
#include <sparsehash/type_traits.h>
#include <sparsehash/dense_hash_set>
#include <sparsehash/dense_hash_map>
#include <sparsehash/internal/densehashtable.h>
#else
#include <google/type_traits.h>
#include <google/dense_hash_set>
#include <google/dense_hash_map>
#include <google/sparsehash/densehashtable.h>
#endif

#include <wx/string.h>

namespace HashTools {

#define HashFriend(Key,T) friend class HashMap<Key,T>

/// Defines an equality comparison unary method.
/// Generally intended for internal use only.
#define _EQUALS_UNARY_OP( Type ) bool operator()(const Type s1, const Type s2) const { return s1.Equals( s2 ); }

/// Defines a hash code unary method
/// Generally intended for internal use only.
#define _HASHCODE_UNARY_OP( Type ) hash_key_t operator()( const Type& val ) const { return val.GetHashCode(); }

/// <summary>
///   Defines an equality comparison method within an encapsulating struct, using the 'unary method' approach.
/// </summary>
/// <remarks>
///   <para>
///     This macro is a shortcut helper to implementing types usable as keys in <see cref="HashMap"/>s.
///     Normally you will want to use <see cref="DEFINE_HASH_API"/> instead as it defines both
///     the HashCode predicate and Compare predicate.
///   </para>
///   The code generated by this macro is equivalent to this:
///   <code>
///		// where 'Type' is the parameter used in the macro.
///		struct UnaryEquals
///		{
///			bool operator()(const Type s1, const Type s2) const
///			{
///				return s1.Equals( s2 );			// this operator must be implemented by the user.
///			}
///		};
///   </code>
///   Note:
///   In C++, the term 'unary method' refers to a method that is implemented as an overload of the
///   <c>operator ()</c>, such that the object instance itself acts as a method.
///   Note:
///   This methodology is similar to C# / .NET's <c>object.Equals()</c> method: The class member method
///   implementation of <c>Equals</c> should *not* throw exceptions -- it should instead return <c>false</c>
///   if either side of the comparison is not a matching type.  See <see cref="IHashable" /> for details.
///   Note:
///   The reason for this (perhaps seemingly) hogwash red tape is because you can define custom
///   equality behavior for individual hashmaps, which are independent of the type used.   The only
///   obvious scenario where such a feature is useful is in
/// </remarks>
/// <seealso cref="DEFINE_HASHCODE_UNARY"/>
/// <seealso cref="DEFINE_HASH_API"/>
/// <seealso cref="IHashable"/>
/// <seealso cref="HashMap"/>
#define DEFINE_EQUALS_UNARY( Type ) struct UnaryEquals{ _EQUALS_UNARY_OP( Type ) }

/// <summary>
///   Defines a hash code predicate within an encapsulating struct; for use in hashable user datatypes
/// </summary>
/// <remarks>
///   <para>
///     This macro is a shortcut helper to implementing types usable as keys in <see cref="HashMap"/>s.
///     Normally you will want to use <see cref="DEFINE_HASH_API"/> instead as it defines both
///     the HashCode predicate and Compare predicate.
///   </para>
///   The code generated by this macro is equivalent to this:
///   <code>
///		// where 'Type' is the parameter used in the macro.
///		struct UnaryHashCode
///		{
///			hash_key_t operator()( const Type& val ) const
///			{
///				return val.GetHashCode();		// this member function must be implemented by the user.
///			}
///		};
///   </code>
/// </remarks>
/// <seealso cref="DEFINE_EQUALS_UNARY"/>
/// <seealso cref="DEFINE_HASH_API"/>
/// <seealso cref="IHashable"/>
/// <seealso cref="HashMap"/>
#define DEFINE_HASHCODE_UNARY( Type ) struct UnaryHashCode{ _HASHCODE_UNARY_OP( Type ) }

/// <summary>
///   Defines the API for hashcode and comparison unary methods; for use in hashable user datatypes
/// </summary>
/// <remarks>
///   This macro creates APIs that allow the class or struct to be used as a key in a <see cref="HashMap"/>.
///   It requires that the data type implement the following items:
///    * An equality test via an <c>operator==</c> overload.
///    * A public instance member method <c>GetHashCode.</c>
///   The code generated by this macro is equivalent to this:
///   <code>
///		// where 'Type' is the parameter used in the macro.
///		struct UnaryHashCode
///		{
///			hash_key_t operator()( const Type& val ) const
///			{
///				return val.GetHashCode();		// this member function must be implemented by the user.
///			}
///		};
///
///		struct UnaryEquals
///		{
///			bool operator()(const Type s1, const Type s2) const
///			{
///				return s1.Equals( s2 );			// this operator must be implemented by the user.
///			}
///		};
///   </code>
///   Note:
///   In C++, the term 'unary method' refers to a method that is implemented as an overload of the
///   <c>operator ()</c>, such that the object instance itself acts as a method.
///   Note:
///   For class types you can use the <see cref="IHashable"/> interface, which also allows you to group
///   multiple types of objects into a single complex HashMap.
///   Note:
///   Generally speaking, you do not use the <c>IHashable</c> interface on simple C-style structs, since it
///   would incur the overhead of a vtbl and could potentially break code that assumes the structs to have
///   1-to-1 data-to-declaration coorlations.
///   Note:
///   Internally, using this macro is functionally equivalent to using both <see cref="DEFINE_HASHCODE_CLASS"/>
///   and <see cref="DEFINE_EQUALS_CLASS"/>.
/// </remarks>
/// <seealso cref="IHashable"/>
/// <seealso cref="DEFINE_HASHCODE_CLASS"/>
/// <seealso cref="DEFINE_COMPARE_CLASS"/>
/// <seealso cref="DEFINE_HASH_API"/>
/// <seealso cref="HashMap"/>
#define DEFINE_HASH_API( Type ) DEFINE_HASHCODE_UNARY( Type ); DEFINE_EQUALS_UNARY( Type );

/// <summary>
///   A helper macro for creating custom types that can be used as <see cref="HashMap" /> keys.
/// </summary>
/// <remarks>
///   Use of this macro is only needed if the hashable type in question is a struct that is a private
///   local to the namespace of a containing class.
/// </remarks>
#define PRIVATE_HASHMAP( Key, T ) \
	typedef SpecializedHashMap<Key, T> Key##HashMap; \
	friend Key##HashMap;

/// <summary>
///   Type that represents a hashcode; returned by all hash functions.
/// </summary>
/// <remarks>
///   In theory this could be changed to a 64 bit value in the future, although many of the hash algorithms
///   would have to be changed to take advantage of the larger data type.
/// </remarks>
typedef u32 hash_key_t;

hash_key_t Hash(const char* data, int len);

struct CommonHashClass;
extern const CommonHashClass GetCommonHash;

/// <summary>
///   A unary-style set of methods for getting the hash code of C++ fundamental types.
/// </summary>
/// <remarks>
///   This class is used to pass hash functions into the <see cref="HashMap"/> class and
///   it's siblings.  It houses methods for most of the fundamental types of C++ and the STL,
///   such as all int and float types, and also <c>std::string</c>.  All functions can be
///	  accessed via the () overload on an instance of the class, such as:
///   <code>
///		const CommonHashClass GetHash;
///		int v = 27;
///		std::string s = "Joe's World!";
///		hash_key_t hashV = GetHash( v );
///		hash_key_t hashS = GetHash( s );
///	  </code>
///   Note:
///   In C++, the term 'unary method' refers to a method that is implemented as an overload of the
///   <c>operator ()</c>, such that the object instance itself acts as a method.
/// </remarks>
/// <seealso cref="GetCommonHash"/>
struct CommonHashClass
{
public:
	// GCC needs empty constructors on const instances, because it likes pointlessness.
	CommonHashClass() {}

	hash_key_t DoInt( u32 val ) const
	{
		u32 key = val;
		key = ~key + (key << 15);
		key = key ^ (key >> 12);
		key = key + (key << 2);
		key = key ^ (key >> 4);
		key = key * 2057;
		key = key ^ (key >> 16);

		return val;
	}

	hash_key_t operator()(const std::string& src) const
	{
		return Hash( src.data(), src.length() );
	}

	hash_key_t operator()( const std::wstring& src ) const
	{
		return Hash( (const char *)src.data(), src.length() * sizeof( wchar_t ) );
	}

	hash_key_t operator()( const wxString& src ) const
	{
		return Hash( (const char *)src.data(), src.length() * sizeof( wxChar ) );
	}

	// Returns a hashcode for a character.
	// This has function has been optimized to return an even distribution
	// across the range of an int value.  In theory that should be more rewarding
	// to hastable performance than a straight up char lookup.
	hash_key_t operator()( const char c1 ) const
	{
		// Most chars contain values between 0 and 128, so let's mix it up a bit:
		int cs = (int)( c1 + (char)64 );
		return ( cs + ( cs<<8 ) + ( cs << 16 ) + (cs << 24 ) );
	}

	hash_key_t operator()( const wchar_t wc1 ) const
	{
		// Most unicode values are between 0 and 128, with 0-1024
		// making up the bulk of the rest.  Everything else is spatially used.
		/*int wcs = (int) ( wc1 + 0x2000 );
		return wcs ^ ( wcs + 0x19000 );*/

		// or maybe I'll just feed it into the int hash:
		return GetCommonHash( (u32)wc1 );
	}

	/// <summary>
	///   Gets the hash code for a 32 bit integer.
	/// </summary>
	/// <remarks>
	///   This method performs a very fast algorithm optimized for typical integral
	///   dispersion patterns (which tend to favor a bit heavy on the lower-range of values while
	///   leaving the extremes un-used).
	///   Note:
	///   Implementation is based on an article found here: http://www.concentric.net/~Ttwang/tech/inthash.htm
	/// </remarks>
	hash_key_t operator()( const u32 val ) const
	{
		return DoInt(val);
	}

	/// <summary>
	///   Gets the hash code for a 32 bit integer.
	/// </summary>
	/// <remarks>
	///   This method performs a very fast algorithm optimized for typical integral
	///   dispersion patterns (which tend to favor a bit heavy on the lower-range of values while
	///   leaving the extremes un-used).
	///   Note:
	///   Implementation is based on an article found here: http://www.concentric.net/~Ttwang/tech/inthash.htm
	/// </remarks>
	hash_key_t operator()( const s32 val ) const
	{
		return DoInt(val);
	}

	/// <summary>
	///   Gets the hash code for a 64 bit integer.
	/// </summary>
	/// <remarks>
	///   This method performs a very fast algorithm optimized for typical integral
	///   dispersion patterns (which tend to favor a bit heavy on the lower-range of values while
	///   leaving the extremes un-used).
	///   Note:
	///   Implementation is based on an article found here: http://www.concentric.net/~Ttwang/tech/inthash.htm
	/// </remarks>
	hash_key_t operator()( const u64 val ) const
	{
		u64 key = val;
		key = (~key) + (key << 18);
		key = key ^ (key >> 31);
		key = key * 21;  // key = (key + (key << 2)) + (key << 4);
		key = key ^ (key >> 11);
		key = key + (key << 6);
		key = key ^ (key >> 22);
		return (u32) key;
	}

	/// <summary>
	///   Gets the hash code for a 64 bit integer.
	/// </summary>
	/// <remarks>
	///   This method performs a very fast algorithm optimized for typical integral
	///   dispersion patterns (which tend to favor a bit heavy on the lower-range of values while
	///   leaving the extremes un-used).
	///   Note:
	///   Implementation is based on an article found here: http://www.concentric.net/~Ttwang/tech/inthash.htm
	/// </remarks>
	hash_key_t operator()( const s64 val ) const
	{
		return GetCommonHash((u64)val);
	}

	hash_key_t operator()( const float val ) const
	{
		// floats do a fine enough job of being scattered about
		// the universe:
		return *((hash_key_t *)&val);
	}

	hash_key_t operator()( const double val ) const
	{
		// doubles have to be compressed into a 32 bit value:
		return GetCommonHash( *((u64*)&val) );
	}

	/// <summary>
	///   Calculates the hash of a pointer.
	/// </summary>
	/// <remarks>
	///   This method has been optimized to give typical 32 bit pointers a reasonably
	///   wide spread across the integer spectrum.
	///   Note:
	///   This method is optimized for 32 bit pointers only.
	///   64 bit pointer support is implemented but not optimized.
	/// </remarks>
	hash_key_t operator()( const void* addr ) const
	{
#ifdef _ARCH_64
		return GetCommonHash((u64)addr);
#else
		hash_key_t key = (hash_key_t) addr;
		return (hash_key_t)((key >> 3) * 2654435761ul);
#endif
	}
	
	
};

}