pcsx2/3rdparty/w32pthreads/ptw32_InterlockedCompareExc...

/*
 * ptw32_InterlockedCompareExchange.c
 *
 * Description:
 * This translation unit implements routines which are private to
 * the implementation and may be used throughout it.
 *
 * --------------------------------------------------------------------------
 *
 *      Pthreads-win32 - POSIX Threads Library for Win32
 *      Copyright(C) 1998 John E. Bossom
 *      Copyright(C) 1999,2005 Pthreads-win32 contributors
 * 
 *      Contact Email: rpj@callisto.canberra.edu.au
 * 
 *      The current list of contributors is contained
 *      in the file CONTRIBUTORS included with the source
 *      code distribution. The list can also be seen at the
 *      following World Wide Web location:
 *      http://sources.redhat.com/pthreads-win32/contributors.html
 * 
 *      This library is free software; you can redistribute it and/or
 *      modify it under the terms of the GNU Lesser General Public
 *      License as published by the Free Software Foundation; either
 *      version 2 of the License, or (at your option) any later version.
 * 
 *      This library is distributed in the hope that it will be useful,
 *      but WITHOUT ANY WARRANTY; without even the implied warranty of
 *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *      Lesser General Public License for more details.
 * 
 *      You should have received a copy of the GNU Lesser General Public
 *      License along with this library in the file COPYING.LIB;
 *      if not, write to the Free Software Foundation, Inc.,
 *      59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
 */

#include "pthread.h"
#include "implement.h"


/*
 * ptw32_InterlockedCompareExchange --
 *
 * Originally needed because W9x doesn't support InterlockedCompareExchange.
 * We now use this version wherever possible so we can inline it.
 */

INLINE PTW32_INTERLOCKED_LONG WINAPI
ptw32_InterlockedCompareExchange (volatile PTW32_INTERLOCKED_LPLONG location,
				  PTW32_INTERLOCKED_LONG value,
				  PTW32_INTERLOCKED_LONG comparand)
{

#if defined(__WATCOMC__)
/* Don't report that result is not assigned a value before being referenced */
#pragma disable_message (200)
#endif

  PTW32_INTERLOCKED_LONG result;

  /*
   * Using the LOCK prefix on uni-processor machines is significantly slower
   * and it is not necessary. The overhead of the conditional below is
   * negligible in comparison. Since an optimised DLL will inline this
   * routine, this will be faster than calling the system supplied
   * Interlocked routine, which appears to avoid the LOCK prefix on
   * uniprocessor systems. So one DLL works for all systems.
   */
  if (1) //ptw32_smp_system)

/* *INDENT-OFF* */

#if defined(_M_IX86) || defined(_X86_)

#if defined(_MSC_VER) || defined(__WATCOMC__) || (defined(__BORLANDC__) && defined(HAVE_TASM32))
#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG
    {
      _asm {
	//PUSH         ecx
	//PUSH         edx
	MOV          ecx,dword ptr [location]
	MOV          edx,dword ptr [value]
	MOV          eax,dword ptr [comparand]
	LOCK CMPXCHG dword ptr [ecx],edx
	MOV          dword ptr [result], eax
	//POP          edx
	//POP          ecx
      }
    }
  else
    {
      _asm {
	//PUSH         ecx
	//PUSH         edx
	MOV          ecx,dword ptr [location]
	MOV          edx,dword ptr [value]
	MOV          eax,dword ptr [comparand]
	CMPXCHG      dword ptr [ecx],edx
	MOV          dword ptr [result], eax
	//POP          edx
	//POP          ecx
      }
    }

#elif defined(__GNUC__)
#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG

    {
      __asm__ __volatile__
	(
	 "lock\n\t"
	 "cmpxchgl       %2,%1"      /* if (EAX == [location])  */
	                             /*   [location] = value    */
                                     /* else                    */
                                     /*   EAX = [location]      */
	 :"=a" (result)
	 :"m"  (*location), "r" (value), "a" (comparand));
    }
  else
    {
      __asm__ __volatile__
	(
	 "cmpxchgl       %2,%1"      /* if (EAX == [location])  */
	                             /*   [location] = value    */
                                     /* else                    */
                                     /*   EAX = [location]      */
	 :"=a" (result)
	 :"m"  (*location), "r" (value), "a" (comparand));
    }

#endif

#else

  /*
   * If execution gets to here then we're running on a currently
   * unsupported processor or compiler.
   */

  result = 0;

#endif

/* *INDENT-ON* */

  return result;

#if defined(__WATCOMC__)
#pragma enable_message (200)
#endif

}

/*
 * ptw32_InterlockedExchange --
 *
 * We now use this version wherever possible so we can inline it.
 */

INLINE LONG WINAPI
ptw32_InterlockedExchange (volatile PTW32_INTERLOCKED_LPLONG location,
			   LONG value)
{

#if defined(__WATCOMC__)
/* Don't report that result is not assigned a value before being referenced */
#pragma disable_message (200)
#endif

  LONG result;

  /*
   * The XCHG instruction always locks the bus with or without the
   * LOCKED prefix. This makes it significantly slower than CMPXCHG on
   * uni-processor machines. The Windows InterlockedExchange function
   * is nearly 3 times faster than the XCHG instruction, so this routine
   * is not yet very useful for speeding up pthreads.
   */
  if (1) //ptw32_smp_system)

/* *INDENT-OFF* */

#if defined(_M_IX86) || defined(_X86_)

#if defined(_MSC_VER) || defined(__WATCOMC__) || (defined(__BORLANDC__) && defined(HAVE_TASM32))
#define HAVE_INLINABLE_INTERLOCKED_XCHG

    {
      _asm {
	//PUSH         ecx
	MOV          ecx,dword ptr [location]
	MOV          eax,dword ptr [value]
	XCHG         dword ptr [ecx],eax
	MOV          dword ptr [result], eax
        //POP          ecx
      }
    }
  else
    {
      /*
       * Faster version of XCHG for uni-processor systems because
       * it doesn't lock the bus. If an interrupt or context switch
       * occurs between the MOV and the CMPXCHG then the value in
       * 'location' may have changed, in which case we will loop
       * back to do the MOV again.
       *
       * FIXME! Need memory barriers for the MOV+CMPXCHG combo?
       *
       * Tests show that this routine has almost identical timing
       * to Win32's InterlockedExchange(), which is much faster than
       * using the inlined 'xchg' instruction above, so it's probably
       * doing something similar to this (on UP systems).
       *
       * Can we do without the PUSH/POP instructions?
       */
      _asm {
	//PUSH         ecx
	//PUSH         edx
	MOV          ecx,dword ptr [location]
	MOV          edx,dword ptr [value]
L1:	MOV          eax,dword ptr [ecx]
	CMPXCHG      dword ptr [ecx],edx
	JNZ          L1
	MOV          dword ptr [result], eax
	//POP          edx
        //POP          ecx
      }
    }

#elif defined(__GNUC__)
#define HAVE_INLINABLE_INTERLOCKED_XCHG

    {
      __asm__ __volatile__
	(
	 "xchgl          %2,%1"
	 :"=r" (result)
	 :"m"  (*location), "0" (value));
    }
  else
    {
      /*
       * Faster version of XCHG for uni-processor systems because
       * it doesn't lock the bus. If an interrupt or context switch
       * occurs between the movl and the cmpxchgl then the value in
       * 'location' may have changed, in which case we will loop
       * back to do the movl again.
       *
       * FIXME! Need memory barriers for the MOV+CMPXCHG combo?
       *
       * Tests show that this routine has almost identical timing
       * to Win32's InterlockedExchange(), which is much faster than
       * using the an inlined 'xchg' instruction, so it's probably
       * doing something similar to this (on UP systems).
       */
      __asm__ __volatile__
	(
	 "0:\n\t"
	 "movl           %1,%%eax\n\t"
	 "cmpxchgl       %2,%1\n\t"
	 "jnz            0b"
	 :"=&a" (result)
	 :"m"  (*location), "r" (value));
    }

#endif

#else

  /*
   * If execution gets to here then we're running on a currently
   * unsupported processor or compiler.
   */

  result = 0;

#endif

/* *INDENT-ON* */

  return result;

#if defined(__WATCOMC__)
#pragma enable_message (200)
#endif

}


#if 1

#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_CMPXCHG)
#undef PTW32_INTERLOCKED_COMPARE_EXCHANGE
#define PTW32_INTERLOCKED_COMPARE_EXCHANGE ptw32_InterlockedCompareExchange
#endif

#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_XCHG)
#undef PTW32_INTERLOCKED_EXCHANGE
#define PTW32_INTERLOCKED_EXCHANGE ptw32_InterlockedExchange
#endif

#endif
Re-Added eol-style:native properties to the repository. The settings got lost when we merged from Playground to Official. Added interface.cpp (plugin/pcsx2 interface) and savestate.cpp to SPU2ghz, to help clean up SPU2.cpp. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@463 96395faa-99c1-11dd-bbfe-3dabce05a288 2009-02-09 21:15:56 +00:00			`/*`
			`* ptw32_InterlockedCompareExchange.c`
			`*`
			`* Description:`
			`* This translation unit implements routines which are private to`
			`* the implementation and may be used throughout it.`
			`*`
			`* --------------------------------------------------------------------------`
			`*`
			`* Pthreads-win32 - POSIX Threads Library for Win32`
			`* Copyright(C) 1998 John E. Bossom`
			`* Copyright(C) 1999,2005 Pthreads-win32 contributors`
			`*`
			`* Contact Email: rpj@callisto.canberra.edu.au`
			`*`
			`* The current list of contributors is contained`
			`* in the file CONTRIBUTORS included with the source`
			`* code distribution. The list can also be seen at the`
			`* following World Wide Web location:`
			`* http://sources.redhat.com/pthreads-win32/contributors.html`
			`*`
			`* This library is free software; you can redistribute it and/or`
			`* modify it under the terms of the GNU Lesser General Public`
			`* License as published by the Free Software Foundation; either`
			`* version 2 of the License, or (at your option) any later version.`
			`*`
			`* This library is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`* Lesser General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU Lesser General Public`
			`* License along with this library in the file COPYING.LIB;`
			`* if not, write to the Free Software Foundation, Inc.,`
			`* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA`
			`*/`

			`#include "pthread.h"`
			`#include "implement.h"`


			`/*`
			`* ptw32_InterlockedCompareExchange --`
			`*`
			`* Originally needed because W9x doesn't support InterlockedCompareExchange.`
			`* We now use this version wherever possible so we can inline it.`
			`*/`

			`INLINE PTW32_INTERLOCKED_LONG WINAPI`
			`ptw32_InterlockedCompareExchange (volatile PTW32_INTERLOCKED_LPLONG location,`
			`PTW32_INTERLOCKED_LONG value,`
			`PTW32_INTERLOCKED_LONG comparand)`
			`{`

			`#if defined(__WATCOMC__)`
			`/* Don't report that result is not assigned a value before being referenced */`
			`#pragma disable_message (200)`
			`#endif`

			`PTW32_INTERLOCKED_LONG result;`

			`/*`
			`* Using the LOCK prefix on uni-processor machines is significantly slower`
			`* and it is not necessary. The overhead of the conditional below is`
			`* negligible in comparison. Since an optimised DLL will inline this`
			`* routine, this will be faster than calling the system supplied`
			`* Interlocked routine, which appears to avoid the LOCK prefix on`
			`* uniprocessor systems. So one DLL works for all systems.`
			`*/`
			`if (1) //ptw32_smp_system)`

			`/* INDENT-OFF */`

			`#if defined(_M_IX86) \|\| defined(_X86_)`

			`#if defined(_MSC_VER) \|\| defined(__WATCOMC__) \|\| (defined(__BORLANDC__) && defined(HAVE_TASM32))`
			`#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG`
			`{`
			`_asm {`
			`//PUSH ecx`
			`//PUSH edx`
			`MOV ecx,dword ptr [location]`
			`MOV edx,dword ptr [value]`
			`MOV eax,dword ptr [comparand]`
			`LOCK CMPXCHG dword ptr [ecx],edx`
			`MOV dword ptr [result], eax`
			`//POP edx`
			`//POP ecx`
			`}`
			`}`
			`else`
			`{`
			`_asm {`
			`//PUSH ecx`
			`//PUSH edx`
			`MOV ecx,dword ptr [location]`
			`MOV edx,dword ptr [value]`
			`MOV eax,dword ptr [comparand]`
			`CMPXCHG dword ptr [ecx],edx`
			`MOV dword ptr [result], eax`
			`//POP edx`
			`//POP ecx`
			`}`
			`}`

			`#elif defined(__GNUC__)`
			`#define HAVE_INLINABLE_INTERLOCKED_CMPXCHG`

			`{`
			`__asm__ __volatile__`
			`(`
			`"lock\n\t"`
			`"cmpxchgl %2,%1" /* if (EAX == [location]) */`
			`/* [location] = value */`
			`/* else */`
			`/* EAX = [location] */`
			`:"=a" (result)`
			`:"m" (*location), "r" (value), "a" (comparand));`
			`}`
			`else`
			`{`
			`__asm__ __volatile__`
			`(`
			`"cmpxchgl %2,%1" /* if (EAX == [location]) */`
			`/* [location] = value */`
			`/* else */`
			`/* EAX = [location] */`
			`:"=a" (result)`
			`:"m" (*location), "r" (value), "a" (comparand));`
			`}`

			`#endif`

			`#else`

			`/*`
			`* If execution gets to here then we're running on a currently`
			`* unsupported processor or compiler.`
			`*/`

			`result = 0;`

			`#endif`

			`/* INDENT-ON */`

			`return result;`

			`#if defined(__WATCOMC__)`
			`#pragma enable_message (200)`
			`#endif`

			`}`

			`/*`
			`* ptw32_InterlockedExchange --`
			`*`
			`* We now use this version wherever possible so we can inline it.`
			`*/`

			`INLINE LONG WINAPI`
			`ptw32_InterlockedExchange (volatile PTW32_INTERLOCKED_LPLONG location,`
			`LONG value)`
			`{`

			`#if defined(__WATCOMC__)`
			`/* Don't report that result is not assigned a value before being referenced */`
			`#pragma disable_message (200)`
			`#endif`

			`LONG result;`

			`/*`
			`* The XCHG instruction always locks the bus with or without the`
			`* LOCKED prefix. This makes it significantly slower than CMPXCHG on`
			`* uni-processor machines. The Windows InterlockedExchange function`
			`* is nearly 3 times faster than the XCHG instruction, so this routine`
			`* is not yet very useful for speeding up pthreads.`
			`*/`
			`if (1) //ptw32_smp_system)`

			`/* INDENT-OFF */`

			`#if defined(_M_IX86) \|\| defined(_X86_)`

			`#if defined(_MSC_VER) \|\| defined(__WATCOMC__) \|\| (defined(__BORLANDC__) && defined(HAVE_TASM32))`
			`#define HAVE_INLINABLE_INTERLOCKED_XCHG`

			`{`
			`_asm {`
			`//PUSH ecx`
			`MOV ecx,dword ptr [location]`
			`MOV eax,dword ptr [value]`
			`XCHG dword ptr [ecx],eax`
			`MOV dword ptr [result], eax`
			`//POP ecx`
			`}`
			`}`
			`else`
			`{`
			`/*`
			`* Faster version of XCHG for uni-processor systems because`
			`* it doesn't lock the bus. If an interrupt or context switch`
			`* occurs between the MOV and the CMPXCHG then the value in`
			`* 'location' may have changed, in which case we will loop`
			`* back to do the MOV again.`
			`*`
			`* FIXME! Need memory barriers for the MOV+CMPXCHG combo?`
			`*`
			`* Tests show that this routine has almost identical timing`
			`* to Win32's InterlockedExchange(), which is much faster than`
			`* using the inlined 'xchg' instruction above, so it's probably`
			`* doing something similar to this (on UP systems).`
			`*`
			`* Can we do without the PUSH/POP instructions?`
			`*/`
			`_asm {`
			`//PUSH ecx`
			`//PUSH edx`
			`MOV ecx,dword ptr [location]`
			`MOV edx,dword ptr [value]`
			`L1: MOV eax,dword ptr [ecx]`
			`CMPXCHG dword ptr [ecx],edx`
			`JNZ L1`
			`MOV dword ptr [result], eax`
			`//POP edx`
			`//POP ecx`
			`}`
			`}`

			`#elif defined(__GNUC__)`
			`#define HAVE_INLINABLE_INTERLOCKED_XCHG`

			`{`
			`__asm__ __volatile__`
			`(`
			`"xchgl %2,%1"`
			`:"=r" (result)`
			`:"m" (*location), "0" (value));`
			`}`
			`else`
			`{`
			`/*`
			`* Faster version of XCHG for uni-processor systems because`
			`* it doesn't lock the bus. If an interrupt or context switch`
			`* occurs between the movl and the cmpxchgl then the value in`
			`* 'location' may have changed, in which case we will loop`
			`* back to do the movl again.`
			`*`
			`* FIXME! Need memory barriers for the MOV+CMPXCHG combo?`
			`*`
			`* Tests show that this routine has almost identical timing`
			`* to Win32's InterlockedExchange(), which is much faster than`
			`* using the an inlined 'xchg' instruction, so it's probably`
			`* doing something similar to this (on UP systems).`
			`*/`
			`__asm__ __volatile__`
			`(`
			`"0:\n\t"`
			`"movl %1,%%eax\n\t"`
			`"cmpxchgl %2,%1\n\t"`
			`"jnz 0b"`
			`:"=&a" (result)`
			`:"m" (*location), "r" (value));`
			`}`

			`#endif`

			`#else`

			`/*`
			`* If execution gets to here then we're running on a currently`
			`* unsupported processor or compiler.`
			`*/`

			`result = 0;`

			`#endif`

			`/* INDENT-ON */`

			`return result;`

			`#if defined(__WATCOMC__)`
			`#pragma enable_message (200)`
			`#endif`

			`}`


			`#if 1`

			`#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_CMPXCHG)`
			`#undef PTW32_INTERLOCKED_COMPARE_EXCHANGE`
			`#define PTW32_INTERLOCKED_COMPARE_EXCHANGE ptw32_InterlockedCompareExchange`
			`#endif`

			`#if defined(PTW32_BUILD_INLINED) && defined(HAVE_INLINABLE_INTERLOCKED_XCHG)`
			`#undef PTW32_INTERLOCKED_EXCHANGE`
			`#define PTW32_INTERLOCKED_EXCHANGE ptw32_InterlockedExchange`
			`#endif`

			`#endif`