I missed some cases in r5901:

Because we only ever call Pos_ReadDirect (and through that, DataRead<T>)
from JIT generated code, the compiler may not get the heads-up to properly
prepare for run-time instantiation of those template functions.

Explicitly instantiating Pos_ReadDirect gets around that issue.

Also force DataRead* inline as gcc didn't always do that itself when the
DataRead functions in turn were called from (other) template functions.


git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5902 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Soren Jorvang 2010-07-18 08:30:40 +00:00
parent faf586e8f1
commit b0041f00a3
3 changed files with 35 additions and 22 deletions

View File

@ -121,9 +121,12 @@ inline u16 swap16(u16 _data) {return bswap_16(_data);}
inline u32 swap32(u32 _data) {return bswap_32(_data);} inline u32 swap32(u32 _data) {return bswap_32(_data);}
inline u64 swap64(u64 _data) {return bswap_64(_data);} inline u64 swap64(u64 _data) {return bswap_64(_data);}
#elif __APPLE__ #elif __APPLE__
inline u16 swap16(u16 _data) {return (_data >> 8) | (_data << 8);} inline __attribute__((always_inline)) u16 swap16(u16 _data)
inline u32 swap32(u32 _data) {return __builtin_bswap32(_data);} {return (_data >> 8) | (_data << 8);}
inline u64 swap64(u64 _data) {return __builtin_bswap64(_data);} inline __attribute__((always_inline)) u32 swap32(u32 _data)
{return __builtin_bswap32(_data);}
inline __attribute__((always_inline)) u64 swap64(u64 _data)
{return __builtin_bswap64(_data);}
#else #else
// Slow generic implementation. // Slow generic implementation.
inline u16 swap16(u16 data) {return (data >> 8) | (data << 8);} inline u16 swap16(u16 data) {return (data >> 8) | (data << 8);}

View File

@ -33,14 +33,14 @@
extern int colIndex; extern int colIndex;
extern int colElements[2]; extern int colElements[2];
inline void _SetCol(u32 val) __forceinline void _SetCol(u32 val)
{ {
*(u32*)VertexManager::s_pCurBufferPointer = val; *(u32*)VertexManager::s_pCurBufferPointer = val;
VertexManager::s_pCurBufferPointer += 4; VertexManager::s_pCurBufferPointer += 4;
colIndex++; colIndex++;
} }
void _SetCol4444(u16 val) __forceinline void _SetCol4444(u16 val)
{ {
u32 col = Convert4To8(val & 0xF) << ASHIFT; u32 col = Convert4To8(val & 0xF) << ASHIFT;
col |= Convert4To8((val >> 12) & 0xF) << RSHIFT; col |= Convert4To8((val >> 12) & 0xF) << RSHIFT;
@ -49,7 +49,7 @@ void _SetCol4444(u16 val)
_SetCol(col); _SetCol(col);
} }
void _SetCol6666(u32 val) __forceinline void _SetCol6666(u32 val)
{ {
u32 col = Convert6To8((val >> 18) & 0x3F) << RSHIFT; u32 col = Convert6To8((val >> 18) & 0x3F) << RSHIFT;
col |= Convert6To8((val >> 12) & 0x3F) << GSHIFT; col |= Convert6To8((val >> 12) & 0x3F) << GSHIFT;
@ -58,7 +58,7 @@ void _SetCol6666(u32 val)
_SetCol(col); _SetCol(col);
} }
void _SetCol565(u16 val) __forceinline void _SetCol565(u16 val)
{ {
u32 col = Convert5To8((val >> 11) & 0x1F) << RSHIFT; u32 col = Convert5To8((val >> 11) & 0x1F) << RSHIFT;
col |= Convert6To8((val >> 5) & 0x3F) << GSHIFT; col |= Convert6To8((val >> 5) & 0x3F) << GSHIFT;
@ -67,12 +67,12 @@ void _SetCol565(u16 val)
} }
inline u32 _Read24(const u8 *addr) __forceinline u32 _Read24(const u8 *addr)
{ {
return addr[0] | (addr[1] << 8) | (addr[2] << 16) | 0xFF000000; return addr[0] | (addr[1] << 8) | (addr[2] << 16) | 0xFF000000;
} }
inline u32 _Read32(const u8 *addr) __forceinline u32 _Read32(const u8 *addr)
{ {
return *(const u32 *)addr; return *(const u32 *)addr;
} }

View File

@ -92,17 +92,6 @@ void Pos_ReadDirect()
VertexManager::s_pCurBufferPointer += 12; VertexManager::s_pCurBufferPointer += 12;
} }
// Explicitly instantiate these functions to decrease the possibility of
// symbol binding problems when (only) calling them from JIT compiled code.
template void Pos_ReadDirect<u8, true>();
template void Pos_ReadDirect<s8, true>();
template void Pos_ReadDirect<u16, true>();
template void Pos_ReadDirect<s16, true>();
template void Pos_ReadDirect<u8, false>();
template void Pos_ReadDirect<s8, false>();
template void Pos_ReadDirect<u16, false>();
template void Pos_ReadDirect<s16, false>();
void LOADERDECL Pos_ReadDirect_UByte3() { Pos_ReadDirect<u8, true>(); } void LOADERDECL Pos_ReadDirect_UByte3() { Pos_ReadDirect<u8, true>(); }
void LOADERDECL Pos_ReadDirect_Byte3() { Pos_ReadDirect<s8, true>(); } void LOADERDECL Pos_ReadDirect_Byte3() { Pos_ReadDirect<s8, true>(); }
void LOADERDECL Pos_ReadDirect_UShort3() { Pos_ReadDirect<u16, true>(); } void LOADERDECL Pos_ReadDirect_UShort3() { Pos_ReadDirect<u16, true>(); }
@ -162,7 +151,7 @@ inline void Pos_ReadIndex_Short(int Index)
} }
template<bool three> template<bool three>
inline void Pos_ReadIndex_Float(int Index) void Pos_ReadIndex_Float(int Index)
{ {
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
@ -180,7 +169,7 @@ static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x0
static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L); static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
template<bool three> template<bool three>
inline void Pos_ReadIndex_Float_SSSE3(int Index) void Pos_ReadIndex_Float_SSSE3(int Index)
{ {
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
const __m128i a = _mm_loadu_si128((__m128i*)pData); const __m128i a = _mm_loadu_si128((__m128i*)pData);
@ -191,6 +180,27 @@ inline void Pos_ReadIndex_Float_SSSE3(int Index)
} }
#endif #endif
// Explicitly instantiate these functions to decrease the possibility of
// symbol binding problems when (only) calling them from JIT compiled code.
template void Pos_ReadDirect<u8, true>();
template void Pos_ReadDirect<s8, true>();
template void Pos_ReadDirect<u16, true>();
template void Pos_ReadDirect<s16, true>();
template void Pos_ReadDirect<u8, false>();
template void Pos_ReadDirect<s8, false>();
template void Pos_ReadDirect<u16, false>();
template void Pos_ReadDirect<s16, false>();
template void Pos_ReadIndex_Byte<u8, true>(int Index);
template void Pos_ReadIndex_Byte<s8, true>(int Index);
template void Pos_ReadIndex_Short<u16, true>(int Index);
template void Pos_ReadIndex_Short<s16, true>(int Index);
template void Pos_ReadIndex_Float<true>(int Index);
template void Pos_ReadIndex_Byte<u8, false>(int Index);
template void Pos_ReadIndex_Byte<s8, false>(int Index);
template void Pos_ReadIndex_Short<u16, false>(int Index);
template void Pos_ReadIndex_Short<s16, false>(int Index);
template void Pos_ReadIndex_Float<false>(int Index);
// ============================================================================== // ==============================================================================
// Index 8 // Index 8
// ============================================================================== // ==============================================================================