mirror of https://github.com/PCSX2/pcsx2.git
Made a clean fix for the memzero.h template expansion bug/warning (caused by msvc treating template parameters like macros).
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1556 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
ba0f21463c
commit
bd91426bab
|
@ -1,7 +1,7 @@
|
|||
<?xml version="1.0" encoding="windows-1253"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="9,00"
|
||||
Version="9.00"
|
||||
Name="pcsx2"
|
||||
ProjectGUID="{1CEFD830-2B76-4596-A4EE-BCD7280A60BD}"
|
||||
RootNamespace="pcsx2"
|
||||
|
@ -20,7 +20,6 @@
|
|||
<Configurations>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
InheritedPropertySheets=".\vsprops\common.vsprops;..\..\..\common\vsprops\BaseProperties.vsprops;..\..\..\common\vsprops\3rdpartyDeps.vsprops;..\..\..\common\vsprops\pthreads.vsprops;.\vsprops\devbuild.vsprops;..\..\..\common\vsprops\CodeGen_Debug.vsprops;..\..\..\common\vsprops\IncrementalLinking.vsprops"
|
||||
UseOfMFC="0"
|
||||
|
|
|
@ -16,8 +16,7 @@
|
|||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
|
||||
*/
|
||||
|
||||
#ifndef _WIN_MEMZERO_H_
|
||||
#define _WIN_MEMZERO_H_
|
||||
#pragma once
|
||||
|
||||
// These functions are meant for memset operations of constant length only.
|
||||
// For dynamic length clears, use the C-compiler provided memset instead.
|
||||
|
@ -36,6 +35,11 @@
|
|||
// structures, which are constant in size, thus allowing us to generate optimal compile-
|
||||
// time code for each use of the function.
|
||||
|
||||
// Use of CLD (Clear Direction Flag):
|
||||
// On Windows platforms the ABI declares that the direction flag should be cleared upon
|
||||
// entry of *any* function. Therefore there is no need to have CLD prior to our use of
|
||||
// rep strosd here.
|
||||
|
||||
// Notes on XMM0's "storage" area (_xmm_backup):
|
||||
// Unfortunately there's no way to guarantee alignment for this variable. If I use the
|
||||
// __declspec(aligned(16)) decorator, MSVC fails to inline the function since stack
|
||||
|
@ -43,26 +47,35 @@
|
|||
// alignment of the stack at compile time, so I'm forced to use movups to store and
|
||||
// retrieve xmm0.
|
||||
|
||||
// MSVC Template Issue:
|
||||
// MSVC treats int template parameters like macro insertions. That is, if you have a
|
||||
// a template parameter in the form of "func<10-5>()", MSVC inserts 10-5 into the
|
||||
// templated function, causing order-of-operation problems (sigh). The normal fix would
|
||||
// be to assign the template parameter to a static const int inside each function, but that
|
||||
// won't fly with the enums optimization. So in order to fix the problem I define a macro
|
||||
// that encapsulates the template parameter inside parenthesis for us:
|
||||
|
||||
#define MZFbytes (_bytes)
|
||||
|
||||
// This is an implementation of the memzero_ptr fast memset routine (for zero-clears only).
|
||||
template< size_t bytes >
|
||||
template< size_t _bytes >
|
||||
static __forceinline void memzero_ptr( void *dest )
|
||||
{
|
||||
if( bytes == 0 ) return;
|
||||
if( MZFbytes == 0 ) return;
|
||||
|
||||
// This function only works on 32-bit alignments. For anything else we just fall back
|
||||
// on the compiler-provided implementation of memset...
|
||||
|
||||
if( (bytes & 0x3) != 0 )
|
||||
if( (MZFbytes & 0x3) != 0 )
|
||||
{
|
||||
memset( dest, 0, bytes );
|
||||
memset( dest, 0, MZFbytes );
|
||||
return;
|
||||
}
|
||||
|
||||
enum
|
||||
{
|
||||
remainder = bytes & 127,
|
||||
bytes128 = bytes / 128
|
||||
remainder = MZFbytes & 127,
|
||||
bytes128 = MZFbytes / 128
|
||||
};
|
||||
|
||||
// Initial check -- if the length is not a multiple of 16 then fall back on
|
||||
|
@ -70,7 +83,7 @@ static __forceinline void memzero_ptr( void *dest )
|
|||
// manner isn't necessary in pcsx2 (meaning they aren't used in speed-critical
|
||||
// scenarios).
|
||||
|
||||
if( (bytes & 0xf) == 0 )
|
||||
if( (MZFbytes & 0xf) == 0 )
|
||||
{
|
||||
u64 _xmm_backup[2];
|
||||
|
||||
|
@ -173,12 +186,12 @@ static __forceinline void memzero_ptr( void *dest )
|
|||
}
|
||||
|
||||
// This function only works on 32-bit alignments.
|
||||
jASSUME( (bytes & 0x3) == 0 );
|
||||
jASSUME( (MZFbytes & 0x3) == 0 );
|
||||
jASSUME( ((uptr)dest & 0x3) == 0 );
|
||||
|
||||
enum
|
||||
{
|
||||
remdat = (bytes>>2)
|
||||
remdat = MZFbytes >> 2
|
||||
};
|
||||
|
||||
// This case statement handles 5 special-case sizes (small blocks)
|
||||
|
@ -197,7 +210,6 @@ static __forceinline void memzero_ptr( void *dest )
|
|||
case 3:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov edi, dest
|
||||
xor eax, eax
|
||||
stosd
|
||||
|
@ -209,7 +221,6 @@ static __forceinline void memzero_ptr( void *dest )
|
|||
case 4:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov edi, dest
|
||||
xor eax, eax
|
||||
stosd
|
||||
|
@ -222,7 +233,6 @@ static __forceinline void memzero_ptr( void *dest )
|
|||
case 5:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov edi, dest
|
||||
xor eax, eax
|
||||
stosd
|
||||
|
@ -236,7 +246,6 @@ static __forceinline void memzero_ptr( void *dest )
|
|||
default:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov ecx, remdat
|
||||
mov edi, dest
|
||||
xor eax, eax
|
||||
|
@ -247,28 +256,28 @@ static __forceinline void memzero_ptr( void *dest )
|
|||
}
|
||||
|
||||
// An optimized memset for 8 bit destination data.
|
||||
template< u8 data, size_t bytes >
|
||||
template< u8 data, size_t _bytes >
|
||||
static __forceinline void memset_8( void *dest )
|
||||
{
|
||||
if( bytes == 0 ) return;
|
||||
if( MZFbytes == 0 ) return;
|
||||
|
||||
if( (bytes & 0x3) != 0 )
|
||||
if( (MZFbytes & 0x3) != 0 )
|
||||
{
|
||||
// unaligned data length. No point in doing an optimized inline version (too complicated!)
|
||||
// So fall back on the compiler implementation:
|
||||
|
||||
memset( dest, data, bytes );
|
||||
memset( dest, data, MZFbytes );
|
||||
return;
|
||||
}
|
||||
|
||||
//u64 _xmm_backup[2];
|
||||
|
||||
/*static const size_t remainder = bytes & 127;
|
||||
static const size_t bytes128 = bytes / 128;
|
||||
/*static const size_t remainder = MZFbytes & 127;
|
||||
static const size_t bytes128 = MZFbytes / 128;
|
||||
if( bytes128 > 32 )
|
||||
{
|
||||
// This function only works on 128-bit alignments.
|
||||
jASSUME( (bytes & 0xf) == 0 );
|
||||
jASSUME( (MZFbytes & 0xf) == 0 );
|
||||
jASSUME( ((uptr)dest & 0xf) == 0 );
|
||||
|
||||
__asm
|
||||
|
@ -313,11 +322,11 @@ static __forceinline void memset_8( void *dest )
|
|||
}*/
|
||||
|
||||
// This function only works on 32-bit alignments of data copied.
|
||||
jASSUME( (bytes & 0x3) == 0 );
|
||||
jASSUME( (MZFbytes & 0x3) == 0 );
|
||||
|
||||
enum
|
||||
{
|
||||
remdat = bytes>>2,
|
||||
remdat = MZFbytes >> 2,
|
||||
data32 = data + (data<<8) + (data<<16) + (data<<24)
|
||||
};
|
||||
|
||||
|
@ -336,7 +345,6 @@ static __forceinline void memset_8( void *dest )
|
|||
case 3:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
|
@ -348,7 +356,6 @@ static __forceinline void memset_8( void *dest )
|
|||
case 4:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
|
@ -361,7 +368,6 @@ static __forceinline void memset_8( void *dest )
|
|||
case 5:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
|
@ -375,7 +381,6 @@ static __forceinline void memset_8( void *dest )
|
|||
default:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov ecx, remdat;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
|
@ -385,31 +390,31 @@ static __forceinline void memset_8( void *dest )
|
|||
}
|
||||
}
|
||||
|
||||
template< u16 data, size_t bytes >
|
||||
template< u16 data, size_t _bytes >
|
||||
static __forceinline void memset_16( void *dest )
|
||||
{
|
||||
if( bytes == 0 ) return;
|
||||
if( MZFbytes == 0 ) return;
|
||||
|
||||
if( (bytes & 0x1) != 0 )
|
||||
if( (MZFbytes & 0x1) != 0 )
|
||||
throw Exception::LogicError( "Invalid parameter passed to memset_16 - data length is not a multiple of 16 or 32 bits." );
|
||||
|
||||
if( (bytes & 0x3) != 0 )
|
||||
if( (MZFbytes & 0x3) != 0 )
|
||||
{
|
||||
// Unaligned data length. No point in doing an optimized inline version (too complicated with
|
||||
// remainders and such).
|
||||
|
||||
_memset16_unaligned( dest, data, bytes );
|
||||
_memset16_unaligned( dest, data, MZFbytes );
|
||||
return;
|
||||
}
|
||||
|
||||
//u64 _xmm_backup[2];
|
||||
|
||||
// This function only works on 32-bit alignments of data copied.
|
||||
jASSUME( (bytes & 0x3) == 0 );
|
||||
jASSUME( (MZFbytes & 0x3) == 0 );
|
||||
|
||||
enum
|
||||
{
|
||||
remdat = bytes>>2,
|
||||
remdat = MZFbytes >> 2,
|
||||
data32 = data + (data<<16)
|
||||
};
|
||||
|
||||
|
@ -428,7 +433,6 @@ static __forceinline void memset_16( void *dest )
|
|||
case 3:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
|
@ -440,7 +444,6 @@ static __forceinline void memset_16( void *dest )
|
|||
case 4:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
|
@ -453,7 +456,6 @@ static __forceinline void memset_16( void *dest )
|
|||
case 5:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
|
@ -467,7 +469,6 @@ static __forceinline void memset_16( void *dest )
|
|||
default:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov ecx, remdat;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
|
@ -477,12 +478,12 @@ static __forceinline void memset_16( void *dest )
|
|||
}
|
||||
}
|
||||
|
||||
template< u32 data, size_t bytes >
|
||||
template< u32 data, size_t MZFbytes >
|
||||
static __forceinline void memset_32( void *dest )
|
||||
{
|
||||
if( bytes == 0 ) return;
|
||||
if( MZFbytes == 0 ) return;
|
||||
|
||||
if( (bytes & 0x3) != 0 )
|
||||
if( (MZFbytes & 0x3) != 0 )
|
||||
throw Exception::LogicError( "Invalid parameter passed to memset_32 - data length is not a multiple of 32 bits." );
|
||||
|
||||
|
||||
|
@ -492,11 +493,11 @@ static __forceinline void memset_32( void *dest )
|
|||
// If the data length is not a factor of 32 bits, the C++ optimizing compiler will
|
||||
// probably just generate mysteriously broken code in Release builds. ;)
|
||||
|
||||
jASSUME( (bytes & 0x3) == 0 );
|
||||
jASSUME( (MZFbytes & 0x3) == 0 );
|
||||
|
||||
enum
|
||||
{
|
||||
remdat = bytes>>2,
|
||||
remdat = MZFbytes>>2,
|
||||
data32 = data
|
||||
};
|
||||
|
||||
|
@ -515,7 +516,6 @@ static __forceinline void memset_32( void *dest )
|
|||
case 3:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
|
@ -527,7 +527,6 @@ static __forceinline void memset_32( void *dest )
|
|||
case 4:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
|
@ -540,7 +539,6 @@ static __forceinline void memset_32( void *dest )
|
|||
case 5:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
stosd;
|
||||
|
@ -554,7 +552,6 @@ static __forceinline void memset_32( void *dest )
|
|||
default:
|
||||
__asm
|
||||
{
|
||||
cld;
|
||||
mov ecx, remdat;
|
||||
mov edi, dest;
|
||||
mov eax, data32;
|
||||
|
@ -594,5 +591,4 @@ static __forceinline void memset32_obj( T& object )
|
|||
memset_32<data, sizeof(T)>( &object );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#undef MZFbytes
|
||||
|
|
Loading…
Reference in New Issue